remove ClockSelect module, use DummyPLL
[soc.git] / src / soc / simple / core.py
1 """simple core
2
3 not in any way intended for production use. connects up FunctionUnits to
4 Register Files in a brain-dead fashion that only permits one and only one
5 Function Unit to be operational.
6
7 the principle here is to take the Function Units, analyse their regspecs,
8 and turn their requirements for access to register file read/write ports
9 into groupings by Register File and Register File Port name.
10
11 under each grouping - by regfile/port - a list of Function Units that
12 need to connect to that port is created. as these are a contended
13 resource a "Broadcast Bus" per read/write port is then also created,
14 with access to it managed by a PriorityPicker.
15
16 the brain-dead part of this module is that even though there is no
17 conflict of access, regfile read/write hazards are *not* analysed,
18 and consequently it is safer to wait for the Function Unit to complete
19 before allowing a new instruction to proceed.
20 """
21
22 from nmigen import Elaboratable, Module, Signal, ResetSignal, Cat, Mux
23 from nmigen.cli import rtlil
24
25 from soc.decoder.power_decoder2 import PowerDecodeSubset
26 from soc.decoder.power_regspec_map import regspec_decode_read
27 from soc.decoder.power_regspec_map import regspec_decode_write
28
29 from nmutil.picker import PriorityPicker
30 from nmutil.util import treereduce
31
32 from soc.fu.compunits.compunits import AllFunctionUnits
33 from soc.regfile.regfiles import RegFiles
34 from soc.decoder.decode2execute1 import Decode2ToExecute1Type
35 from soc.decoder.decode2execute1 import IssuerDecode2ToOperand
36 from soc.decoder.power_decoder2 import get_rdflags
37 from soc.decoder.decode2execute1 import Data
38 from soc.experiment.l0_cache import TstL0CacheBuffer # test only
39 from soc.config.test.test_loadstore import TestMemPspec
40 from soc.decoder.power_enums import MicrOp
41 from soc.config.state import CoreState
42
43 import operator
44
45 from nmutil.util import rising_edge
46
47
48 # helper function for reducing a list of signals down to a parallel
49 # ORed single signal.
50 def ortreereduce(tree, attr="data_o"):
51 return treereduce(tree, operator.or_, lambda x: getattr(x, attr))
52
53
54 def ortreereduce_sig(tree):
55 return treereduce(tree, operator.or_, lambda x: x)
56
57
58 # helper function to place full regs declarations first
59 def sort_fuspecs(fuspecs):
60 res = []
61 for (regname, fspec) in fuspecs.items():
62 if regname.startswith("full"):
63 res.append((regname, fspec))
64 for (regname, fspec) in fuspecs.items():
65 if not regname.startswith("full"):
66 res.append((regname, fspec))
67 return res # enumerate(res)
68
69
70 class NonProductionCore(Elaboratable):
71 def __init__(self, pspec):
72 self.pspec = pspec
73
74 # single LD/ST funnel for memory access
75 self.l0 = TstL0CacheBuffer(pspec, n_units=1)
76 pi = self.l0.l0.dports[0]
77
78 if False:
79 # MMU / DCache
80 self.mmu = MMU()
81 self.dcache = DCache()
82
83 # function units (only one each)
84 self.fus = AllFunctionUnits(pspec, pilist=[pi])
85
86 # register files (yes plural)
87 self.regs = RegFiles()
88
89 # instruction decoder - needs a Trap-capable Record (captures EINT etc.)
90 self.e = Decode2ToExecute1Type("core", opkls=IssuerDecode2ToOperand)
91
92 self.state = CoreState("core")
93 self.raw_insn_i = Signal(32) # raw instruction
94 self.bigendian_i = Signal() # bigendian
95
96 # issue/valid/busy signalling
97 self.ivalid_i = Signal(reset_less=True) # instruction is valid
98 self.issue_i = Signal(reset_less=True)
99 self.busy_o = Signal(name="corebusy_o", reset_less=True)
100
101 # start/stop and terminated signalling
102 self.core_stopped_i = Signal(reset_less=True)
103 self.core_terminate_o = Signal(reset=0) # indicates stopped
104
105 # create per-FU instruction decoders (subsetted)
106 self.decoders = {}
107 self.des = {}
108
109 for funame, fu in self.fus.fus.items():
110 f_name = fu.fnunit.name
111 fnunit = fu.fnunit.value
112 opkls = fu.opsubsetkls
113 if f_name == 'TRAP':
114 self.trapunit = funame
115 continue
116 self.decoders[funame] = PowerDecodeSubset(None, opkls, f_name,
117 final=True,
118 state=self.state)
119 self.des[funame] = self.decoders[funame].do
120
121 def elaborate(self, platform):
122 m = Module()
123 # for testing purposes, to cut down on build time in coriolis2
124 if hasattr(self.pspec, "nocore") and self.pspec.nocore == True:
125 x = Signal() # dummy signal
126 m.d.sync += x.eq(~x)
127 return m
128 comb = m.d.comb
129
130 m.submodules.fus = self.fus
131 m.submodules.l0 = l0 = self.l0
132 self.regs.elaborate_into(m, platform)
133 regs = self.regs
134 fus = self.fus.fus
135
136 # connect decoders
137 for k, v in self.decoders.items():
138 setattr(m.submodules, "dec_%s" % v.fn_name, v)
139 comb += v.dec.raw_opcode_in.eq(self.raw_insn_i)
140 comb += v.dec.bigendian.eq(self.bigendian_i)
141
142 # ssh, cheat: trap uses the main decoder because of the rewriting
143 self.des[self.trapunit] = self.e.do
144
145 # connect up Function Units, then read/write ports
146 fu_bitdict = self.connect_instruction(m)
147 self.connect_rdports(m, fu_bitdict)
148 self.connect_wrports(m, fu_bitdict)
149
150 return m
151
152 def connect_instruction(self, m):
153 """connect_instruction
154
155 uses decoded (from PowerOp) function unit information from CSV files
156 to ascertain which Function Unit should deal with the current
157 instruction.
158
159 some (such as OP_ATTN, OP_NOP) are dealt with here, including
160 ignoring it and halting the processor. OP_NOP is a bit annoying
161 because the issuer expects busy flag still to be raised then lowered.
162 (this requires a fake counter to be set).
163 """
164 comb, sync = m.d.comb, m.d.sync
165 fus = self.fus.fus
166
167 # enable-signals for each FU, get one bit for each FU (by name)
168 fu_enable = Signal(len(fus), reset_less=True)
169 fu_bitdict = {}
170 for i, funame in enumerate(fus.keys()):
171 fu_bitdict[funame] = fu_enable[i]
172
173 # enable the required Function Unit based on the opcode decode
174 # note: this *only* works correctly for simple core when one and
175 # *only* one FU is allocated per instruction
176 for funame, fu in fus.items():
177 fnunit = fu.fnunit.value
178 enable = Signal(name="en_%s" % funame, reset_less=True)
179 comb += enable.eq((self.e.do.fn_unit & fnunit).bool())
180 comb += fu_bitdict[funame].eq(enable)
181
182 # sigh - need a NOP counter
183 counter = Signal(2)
184 with m.If(counter != 0):
185 sync += counter.eq(counter - 1)
186 comb += self.busy_o.eq(1)
187
188 with m.If(self.ivalid_i): # run only when valid
189 with m.Switch(self.e.do.insn_type):
190 # check for ATTN: halt if true
191 with m.Case(MicrOp.OP_ATTN):
192 m.d.sync += self.core_terminate_o.eq(1)
193
194 with m.Case(MicrOp.OP_NOP):
195 sync += counter.eq(2)
196 comb += self.busy_o.eq(1)
197
198 with m.Default():
199 # connect up instructions. only one enabled at a time
200 for funame, fu in fus.items():
201 do = self.des[funame]
202 enable = fu_bitdict[funame]
203
204 # run this FunctionUnit if enabled
205 # route op, issue, busy, read flags and mask to FU
206 with m.If(enable):
207 # operand comes from the *local* decoder
208 comb += fu.oper_i.eq_from(do)
209 #comb += fu.oper_i.eq_from_execute1(e)
210 comb += fu.issue_i.eq(self.issue_i)
211 comb += self.busy_o.eq(fu.busy_o)
212 # rdmask, which is for registers, needs to come
213 # from the *main* decoder
214 rdmask = get_rdflags(self.e, fu)
215 comb += fu.rdmaskn.eq(~rdmask)
216
217 return fu_bitdict
218
219 def connect_rdport(self, m, fu_bitdict, rdpickers, regfile, regname, fspec):
220 comb, sync = m.d.comb, m.d.sync
221 fus = self.fus.fus
222 regs = self.regs
223
224 rpidx = regname
225
226 # select the required read port. these are pre-defined sizes
227 rfile = regs.rf[regfile.lower()]
228 rport = rfile.r_ports[rpidx]
229 print("read regfile", rpidx, regfile, regs.rf.keys(),
230 rfile, rfile.unary)
231
232 fspecs = fspec
233 if not isinstance(fspecs, list):
234 fspecs = [fspecs]
235
236 rdflags = []
237 pplen = 0
238 reads = []
239 ppoffs = []
240 for i, fspec in enumerate(fspecs):
241 # get the regfile specs for this regfile port
242 (rf, read, write, wid, fuspec) = fspec
243 print ("fpsec", i, fspec, len(fuspec))
244 ppoffs.append(pplen) # record offset for picker
245 pplen += len(fuspec)
246 name = "rdflag_%s_%s_%d" % (regfile, regname, i)
247 rdflag = Signal(name=name, reset_less=True)
248 comb += rdflag.eq(rf)
249 rdflags.append(rdflag)
250 reads.append(read)
251
252 print ("pplen", pplen)
253
254 # create a priority picker to manage this port
255 rdpickers[regfile][rpidx] = rdpick = PriorityPicker(pplen)
256 setattr(m.submodules, "rdpick_%s_%s" % (regfile, rpidx), rdpick)
257
258 rens = []
259 addrs = []
260 for i, fspec in enumerate(fspecs):
261 (rf, read, write, wid, fuspec) = fspec
262 # connect up the FU req/go signals, and the reg-read to the FU
263 # and create a Read Broadcast Bus
264 for pi, (funame, fu, idx) in enumerate(fuspec):
265 pi += ppoffs[i]
266
267 # connect request-read to picker input, and output to go-rd
268 fu_active = fu_bitdict[funame]
269 name = "%s_%s_%s_%i" % (regfile, rpidx, funame, pi)
270 addr_en = Signal.like(reads[i], name="addr_en_"+name)
271 pick = Signal(name="pick_"+name) # picker input
272 rp = Signal(name="rp_"+name) # picker output
273 delay_pick = Signal(name="dp_"+name) # read-enable "underway"
274
275 # exclude any currently-enabled read-request (mask out active)
276 comb += pick.eq(fu.rd_rel_o[idx] & fu_active & rdflags[i] &
277 ~delay_pick)
278 comb += rdpick.i[pi].eq(pick)
279 comb += fu.go_rd_i[idx].eq(delay_pick) # pass in *delayed* pick
280
281 # if picked, select read-port "reg select" number to port
282 comb += rp.eq(rdpick.o[pi] & rdpick.en_o)
283 sync += delay_pick.eq(rp) # delayed "pick"
284 comb += addr_en.eq(Mux(rp, reads[i], 0))
285
286 # the read-enable happens combinatorially (see mux-bus below)
287 # but it results in the data coming out on a one-cycle delay.
288 if rfile.unary:
289 rens.append(addr_en)
290 else:
291 addrs.append(addr_en)
292 rens.append(rp)
293
294 # use the *delayed* pick signal to put requested data onto bus
295 with m.If(delay_pick):
296 # connect regfile port to input, creating fan-out Bus
297 src = fu.src_i[idx]
298 print("reg connect widths",
299 regfile, regname, pi, funame,
300 src.shape(), rport.data_o.shape())
301 # all FUs connect to same port
302 comb += src.eq(rport.data_o)
303
304 # or-reduce the muxed read signals
305 if rfile.unary:
306 # for unary-addressed
307 comb += rport.ren.eq(ortreereduce_sig(rens))
308 else:
309 # for binary-addressed
310 comb += rport.addr.eq(ortreereduce_sig(addrs))
311 comb += rport.ren.eq(Cat(*rens).bool())
312 print ("binary", regfile, rpidx, rport, rport.ren, rens, addrs)
313
314 def connect_rdports(self, m, fu_bitdict):
315 """connect read ports
316
317 orders the read regspecs into a dict-of-dicts, by regfile, by
318 regport name, then connects all FUs that want that regport by
319 way of a PriorityPicker.
320 """
321 comb, sync = m.d.comb, m.d.sync
322 fus = self.fus.fus
323 regs = self.regs
324
325 # dictionary of lists of regfile read ports
326 byregfiles_rd, byregfiles_rdspec = self.get_byregfiles(True)
327
328 # okaay, now we need a PriorityPicker per regfile per regfile port
329 # loootta pickers... peter piper picked a pack of pickled peppers...
330 rdpickers = {}
331 for regfile, spec in byregfiles_rd.items():
332 fuspecs = byregfiles_rdspec[regfile]
333 rdpickers[regfile] = {}
334
335 # argh. an experiment to merge RA and RB in the INT regfile
336 # (we have too many read/write ports)
337 #if regfile == 'INT':
338 #fuspecs['rabc'] = [fuspecs.pop('rb')]
339 #fuspecs['rabc'].append(fuspecs.pop('rc'))
340 #fuspecs['rabc'].append(fuspecs.pop('ra'))
341 #if regfile == 'FAST':
342 # fuspecs['fast1'] = [fuspecs.pop('fast1')]
343 # if 'fast2' in fuspecs:
344 # fuspecs['fast1'].append(fuspecs.pop('fast2'))
345
346 # for each named regfile port, connect up all FUs to that port
347 for (regname, fspec) in sort_fuspecs(fuspecs):
348 print("connect rd", regname, fspec)
349 self.connect_rdport(m, fu_bitdict, rdpickers, regfile,
350 regname, fspec)
351
352 def connect_wrport(self, m, fu_bitdict, wrpickers, regfile, regname, fspec):
353 comb, sync = m.d.comb, m.d.sync
354 fus = self.fus.fus
355 regs = self.regs
356
357 print("connect wr", regname, fspec)
358 rpidx = regname
359
360 # select the required write port. these are pre-defined sizes
361 print(regfile, regs.rf.keys())
362 rfile = regs.rf[regfile.lower()]
363 wport = rfile.w_ports[rpidx]
364
365 fspecs = fspec
366 if not isinstance(fspecs, list):
367 fspecs = [fspecs]
368
369 pplen = 0
370 writes = []
371 ppoffs = []
372 for i, fspec in enumerate(fspecs):
373 # get the regfile specs for this regfile port
374 (rf, read, write, wid, fuspec) = fspec
375 print ("fpsec", i, fspec, len(fuspec))
376 ppoffs.append(pplen) # record offset for picker
377 pplen += len(fuspec)
378
379 # create a priority picker to manage this port
380 wrpickers[regfile][rpidx] = wrpick = PriorityPicker(pplen)
381 setattr(m.submodules, "wrpick_%s_%s" % (regfile, rpidx), wrpick)
382
383 wsigs = []
384 wens = []
385 addrs = []
386 for i, fspec in enumerate(fspecs):
387 # connect up the FU req/go signals and the reg-read to the FU
388 # these are arbitrated by Data.ok signals
389 (rf, read, write, wid, fuspec) = fspec
390 for pi, (funame, fu, idx) in enumerate(fuspec):
391 pi += ppoffs[i]
392
393 # write-request comes from dest.ok
394 dest = fu.get_out(idx)
395 fu_dest_latch = fu.get_fu_out(idx) # latched output
396 name = "wrflag_%s_%s_%d" % (funame, regname, idx)
397 wrflag = Signal(name=name, reset_less=True)
398 comb += wrflag.eq(dest.ok & fu.busy_o)
399
400 # connect request-write to picker input, and output to go-wr
401 fu_active = fu_bitdict[funame]
402 pick = fu.wr.rel_o[idx] & fu_active # & wrflag
403 comb += wrpick.i[pi].eq(pick)
404 # create a single-pulse go write from the picker output
405 wr_pick = Signal()
406 comb += wr_pick.eq(wrpick.o[pi] & wrpick.en_o)
407 comb += fu.go_wr_i[idx].eq(rising_edge(m, wr_pick))
408
409 # connect the regspec write "reg select" number to this port
410 # only if one FU actually requests (and is granted) the port
411 # will the write-enable be activated
412 addr_en = Signal.like(write)
413 wp = Signal()
414 comb += wp.eq(wr_pick & wrpick.en_o)
415 comb += addr_en.eq(Mux(wp, write, 0))
416 if rfile.unary:
417 wens.append(addr_en)
418 else:
419 addrs.append(addr_en)
420 wens.append(wp)
421
422 # connect regfile port to input
423 print("reg connect widths",
424 regfile, regname, pi, funame,
425 dest.shape(), wport.data_i.shape())
426 wsigs.append(fu_dest_latch)
427
428 # here is where we create the Write Broadcast Bus. simple, eh?
429 comb += wport.data_i.eq(ortreereduce_sig(wsigs))
430 if rfile.unary:
431 # for unary-addressed
432 comb += wport.wen.eq(ortreereduce_sig(wens))
433 else:
434 # for binary-addressed
435 comb += wport.addr.eq(ortreereduce_sig(addrs))
436 comb += wport.wen.eq(ortreereduce_sig(wens))
437
438 def connect_wrports(self, m, fu_bitdict):
439 """connect write ports
440
441 orders the write regspecs into a dict-of-dicts, by regfile,
442 by regport name, then connects all FUs that want that regport
443 by way of a PriorityPicker.
444
445 note that the write-port wen, write-port data, and go_wr_i all need to
446 be on the exact same clock cycle. as there is a combinatorial loop bug
447 at the moment, these all use sync.
448 """
449 comb, sync = m.d.comb, m.d.sync
450 fus = self.fus.fus
451 regs = self.regs
452 # dictionary of lists of regfile write ports
453 byregfiles_wr, byregfiles_wrspec = self.get_byregfiles(False)
454
455 # same for write ports.
456 # BLECH! complex code-duplication! BLECH!
457 wrpickers = {}
458 for regfile, spec in byregfiles_wr.items():
459 fuspecs = byregfiles_wrspec[regfile]
460 wrpickers[regfile] = {}
461
462 # argh, more port-merging
463 if regfile == 'INT':
464 fuspecs['o'] = [fuspecs.pop('o')]
465 fuspecs['o'].append(fuspecs.pop('o1'))
466 if regfile == 'FAST':
467 fuspecs['fast1'] = [fuspecs.pop('fast1')]
468 if 'fast2' in fuspecs:
469 fuspecs['fast1'].append(fuspecs.pop('fast2'))
470
471 for (regname, fspec) in sort_fuspecs(fuspecs):
472 self.connect_wrport(m, fu_bitdict, wrpickers,
473 regfile, regname, fspec)
474
475 def get_byregfiles(self, readmode):
476
477 mode = "read" if readmode else "write"
478 regs = self.regs
479 fus = self.fus.fus
480 e = self.e # decoded instruction to execute
481
482 # dictionary of lists of regfile ports
483 byregfiles = {}
484 byregfiles_spec = {}
485 for (funame, fu) in fus.items():
486 print("%s ports for %s" % (mode, funame))
487 for idx in range(fu.n_src if readmode else fu.n_dst):
488 if readmode:
489 (regfile, regname, wid) = fu.get_in_spec(idx)
490 else:
491 (regfile, regname, wid) = fu.get_out_spec(idx)
492 print(" %d %s %s %s" % (idx, regfile, regname, str(wid)))
493 if readmode:
494 rdflag, read = regspec_decode_read(e, regfile, regname)
495 write = None
496 else:
497 rdflag, read = None, None
498 wrport, write = regspec_decode_write(e, regfile, regname)
499 if regfile not in byregfiles:
500 byregfiles[regfile] = {}
501 byregfiles_spec[regfile] = {}
502 if regname not in byregfiles_spec[regfile]:
503 byregfiles_spec[regfile][regname] = \
504 (rdflag, read, write, wid, [])
505 # here we start to create "lanes"
506 if idx not in byregfiles[regfile]:
507 byregfiles[regfile][idx] = []
508 fuspec = (funame, fu, idx)
509 byregfiles[regfile][idx].append(fuspec)
510 byregfiles_spec[regfile][regname][4].append(fuspec)
511
512 # ok just print that out, for convenience
513 for regfile, spec in byregfiles.items():
514 print("regfile %s ports:" % mode, regfile)
515 fuspecs = byregfiles_spec[regfile]
516 for regname, fspec in fuspecs.items():
517 [rdflag, read, write, wid, fuspec] = fspec
518 print(" rf %s port %s lane: %s" % (mode, regfile, regname))
519 print(" %s" % regname, wid, read, write, rdflag)
520 for (funame, fu, idx) in fuspec:
521 fusig = fu.src_i[idx] if readmode else fu.dest[idx]
522 print(" ", funame, fu, idx, fusig)
523 print()
524
525 return byregfiles, byregfiles_spec
526
527 def __iter__(self):
528 yield from self.fus.ports()
529 yield from self.e.ports()
530 yield from self.l0.ports()
531 # TODO: regs
532
533 def ports(self):
534 return list(self)
535
536
537 if __name__ == '__main__':
538 pspec = TestMemPspec(ldst_ifacetype='testpi',
539 imem_ifacetype='',
540 addr_wid=48,
541 mask_wid=8,
542 reg_wid=64)
543 dut = NonProductionCore(pspec)
544 vl = rtlil.convert(dut, ports=dut.ports())
545 with open("test_core.il", "w") as f:
546 f.write(vl)