Merge branch 'master' of git.libre-soc.org:soc
[soc.git] / src / soc / simple / core.py
1 """simple core
2
3 not in any way intended for production use. connects up FunctionUnits to
4 Register Files in a brain-dead fashion that only permits one and only one
5 Function Unit to be operational.
6
7 the principle here is to take the Function Units, analyse their regspecs,
8 and turn their requirements for access to register file read/write ports
9 into groupings by Register File and Register File Port name.
10
11 under each grouping - by regfile/port - a list of Function Units that
12 need to connect to that port is created. as these are a contended
13 resource a "Broadcast Bus" per read/write port is then also created,
14 with access to it managed by a PriorityPicker.
15
16 the brain-dead part of this module is that even though there is no
17 conflict of access, regfile read/write hazards are *not* analysed,
18 and consequently it is safer to wait for the Function Unit to complete
19 before allowing a new instruction to proceed.
20 """
21
22 from nmigen import Elaboratable, Module, Signal, ResetSignal, Cat, Mux
23 from nmigen.cli import rtlil
24
25 from soc.decoder.power_decoder2 import PowerDecodeSubset
26 from soc.decoder.power_regspec_map import regspec_decode_read
27 from soc.decoder.power_regspec_map import regspec_decode_write
28
29 from nmutil.picker import PriorityPicker
30 from nmutil.util import treereduce
31
32 from soc.fu.compunits.compunits import AllFunctionUnits
33 from soc.regfile.regfiles import RegFiles
34 from soc.decoder.decode2execute1 import Decode2ToExecute1Type
35 from soc.decoder.decode2execute1 import IssuerDecode2ToOperand
36 from soc.decoder.power_decoder2 import get_rdflags
37 from soc.decoder.decode2execute1 import Data
38 from soc.experiment.l0_cache import TstL0CacheBuffer # test only
39 from soc.config.test.test_loadstore import TestMemPspec
40 from soc.decoder.power_enums import MicrOp
41 from soc.config.state import CoreState
42
43 import operator
44
45 from nmutil.util import rising_edge
46
47
48 # helper function for reducing a list of signals down to a parallel
49 # ORed single signal.
50 def ortreereduce(tree, attr="data_o"):
51 return treereduce(tree, operator.or_, lambda x: getattr(x, attr))
52
53
54 def ortreereduce_sig(tree):
55 return treereduce(tree, operator.or_, lambda x: x)
56
57
58 # helper function to place full regs declarations first
59 def sort_fuspecs(fuspecs):
60 res = []
61 for (regname, fspec) in fuspecs.items():
62 if regname.startswith("full"):
63 res.append((regname, fspec))
64 for (regname, fspec) in fuspecs.items():
65 if not regname.startswith("full"):
66 res.append((regname, fspec))
67 return res # enumerate(res)
68
69
70 class NonProductionCore(Elaboratable):
71 def __init__(self, pspec):
72 self.pspec = pspec
73
74 # test is SVP64 is to be enabled
75 self.svp64_en = hasattr(pspec, "svp64") and (pspec.svp64 == True)
76
77 # test to see if regfile ports should be reduced
78 self.regreduce_en = (hasattr(pspec, "regreduce") and
79 (pspec.regreduce == True))
80
81 # single LD/ST funnel for memory access
82 self.l0 = TstL0CacheBuffer(pspec, n_units=1)
83 pi = self.l0.l0.dports[0]
84
85 # function units (only one each)
86 # only include mmu if enabled in pspec
87 self.fus = AllFunctionUnits(pspec, pilist=[pi])
88
89 # register files (yes plural)
90 self.regs = RegFiles(pspec)
91
92 # instruction decoder - needs a Trap-capable Record (captures EINT etc.)
93 self.e = Decode2ToExecute1Type("core", opkls=IssuerDecode2ToOperand,
94 regreduce_en=self.regreduce_en)
95
96 # SVP64 RA_OR_ZERO needs to know if the relevant EXTRA2/3 field is zero
97 self.sv_a_nz = Signal()
98
99 # state and raw instruction
100 self.state = CoreState("core")
101 self.raw_insn_i = Signal(32) # raw instruction
102 self.bigendian_i = Signal() # bigendian - TODO, set by MSR.BE
103
104 # issue/valid/busy signalling
105 self.ivalid_i = Signal(reset_less=True) # instruction is valid
106 self.issue_i = Signal(reset_less=True)
107 self.busy_o = Signal(name="corebusy_o", reset_less=True)
108
109 # start/stop and terminated signalling
110 self.core_stopped_i = Signal(reset_less=True)
111 self.core_terminate_o = Signal(reset=0) # indicates stopped
112
113 # create per-FU instruction decoders (subsetted)
114 self.decoders = {}
115 self.des = {}
116
117 for funame, fu in self.fus.fus.items():
118 f_name = fu.fnunit.name
119 fnunit = fu.fnunit.value
120 opkls = fu.opsubsetkls
121 if f_name == 'TRAP':
122 self.trapunit = funame
123 continue
124 self.decoders[funame] = PowerDecodeSubset(None, opkls, f_name,
125 final=True,
126 state=self.state,
127 svp64_en=self.svp64_en,
128 regreduce_en=self.regreduce_en)
129 self.des[funame] = self.decoders[funame].do
130
131 if "mmu0" in self.decoders:
132 self.decoders["mmu0"].mmu0_spr_dec = self.decoders["spr0"]
133
134 def elaborate(self, platform):
135 m = Module()
136 # for testing purposes, to cut down on build time in coriolis2
137 if hasattr(self.pspec, "nocore") and self.pspec.nocore == True:
138 x = Signal() # dummy signal
139 m.d.sync += x.eq(~x)
140 return m
141 comb = m.d.comb
142
143 m.submodules.fus = self.fus
144 m.submodules.l0 = l0 = self.l0
145 self.regs.elaborate_into(m, platform)
146 regs = self.regs
147 fus = self.fus.fus
148
149 # connect decoders
150 for k, v in self.decoders.items():
151 setattr(m.submodules, "dec_%s" % v.fn_name, v)
152 comb += v.dec.raw_opcode_in.eq(self.raw_insn_i)
153 comb += v.dec.bigendian.eq(self.bigendian_i)
154 # sigh due to SVP64 RA_OR_ZERO detection connect these too
155 comb += v.sv_a_nz.eq(self.sv_a_nz)
156
157 # ssh, cheat: trap uses the main decoder because of the rewriting
158 self.des[self.trapunit] = self.e.do
159
160 # connect up Function Units, then read/write ports
161 fu_bitdict = self.connect_instruction(m)
162 self.connect_rdports(m, fu_bitdict)
163 self.connect_wrports(m, fu_bitdict)
164
165 return m
166
167 def connect_instruction(self, m):
168 """connect_instruction
169
170 uses decoded (from PowerOp) function unit information from CSV files
171 to ascertain which Function Unit should deal with the current
172 instruction.
173
174 some (such as OP_ATTN, OP_NOP) are dealt with here, including
175 ignoring it and halting the processor. OP_NOP is a bit annoying
176 because the issuer expects busy flag still to be raised then lowered.
177 (this requires a fake counter to be set).
178 """
179 comb, sync = m.d.comb, m.d.sync
180 fus = self.fus.fus
181
182 # enable-signals for each FU, get one bit for each FU (by name)
183 fu_enable = Signal(len(fus), reset_less=True)
184 fu_bitdict = {}
185 for i, funame in enumerate(fus.keys()):
186 fu_bitdict[funame] = fu_enable[i]
187
188 # enable the required Function Unit based on the opcode decode
189 # note: this *only* works correctly for simple core when one and
190 # *only* one FU is allocated per instruction
191 for funame, fu in fus.items():
192 fnunit = fu.fnunit.value
193 enable = Signal(name="en_%s" % funame, reset_less=True)
194 comb += enable.eq((self.e.do.fn_unit & fnunit).bool())
195 comb += fu_bitdict[funame].eq(enable)
196
197 # sigh - need a NOP counter
198 counter = Signal(2)
199 with m.If(counter != 0):
200 sync += counter.eq(counter - 1)
201 comb += self.busy_o.eq(1)
202
203 with m.If(self.ivalid_i): # run only when valid
204 with m.Switch(self.e.do.insn_type):
205 # check for ATTN: halt if true
206 with m.Case(MicrOp.OP_ATTN):
207 m.d.sync += self.core_terminate_o.eq(1)
208
209 with m.Case(MicrOp.OP_NOP):
210 sync += counter.eq(2)
211 comb += self.busy_o.eq(1)
212
213 with m.Default():
214 # connect up instructions. only one enabled at a time
215 for funame, fu in fus.items():
216 do = self.des[funame]
217 enable = fu_bitdict[funame]
218
219 # run this FunctionUnit if enabled
220 # route op, issue, busy, read flags and mask to FU
221 with m.If(enable):
222 # operand comes from the *local* decoder
223 comb += fu.oper_i.eq_from(do)
224 #comb += fu.oper_i.eq_from_execute1(e)
225 comb += fu.issue_i.eq(self.issue_i)
226 comb += self.busy_o.eq(fu.busy_o)
227 # rdmask, which is for registers, needs to come
228 # from the *main* decoder
229 rdmask = get_rdflags(self.e, fu)
230 comb += fu.rdmaskn.eq(~rdmask)
231
232 return fu_bitdict
233
234 def connect_rdport(self, m, fu_bitdict, rdpickers, regfile, regname, fspec):
235 comb, sync = m.d.comb, m.d.sync
236 fus = self.fus.fus
237 regs = self.regs
238
239 rpidx = regname
240
241 # select the required read port. these are pre-defined sizes
242 rfile = regs.rf[regfile.lower()]
243 rport = rfile.r_ports[rpidx]
244 print("read regfile", rpidx, regfile, regs.rf.keys(),
245 rfile, rfile.unary)
246
247 fspecs = fspec
248 if not isinstance(fspecs, list):
249 fspecs = [fspecs]
250
251 rdflags = []
252 pplen = 0
253 reads = []
254 ppoffs = []
255 for i, fspec in enumerate(fspecs):
256 # get the regfile specs for this regfile port
257 (rf, read, write, wid, fuspec) = fspec
258 print ("fpsec", i, fspec, len(fuspec))
259 ppoffs.append(pplen) # record offset for picker
260 pplen += len(fuspec)
261 name = "rdflag_%s_%s_%d" % (regfile, regname, i)
262 rdflag = Signal(name=name, reset_less=True)
263 comb += rdflag.eq(rf)
264 rdflags.append(rdflag)
265 reads.append(read)
266
267 print ("pplen", pplen)
268
269 # create a priority picker to manage this port
270 rdpickers[regfile][rpidx] = rdpick = PriorityPicker(pplen)
271 setattr(m.submodules, "rdpick_%s_%s" % (regfile, rpidx), rdpick)
272
273 rens = []
274 addrs = []
275 for i, fspec in enumerate(fspecs):
276 (rf, read, write, wid, fuspec) = fspec
277 # connect up the FU req/go signals, and the reg-read to the FU
278 # and create a Read Broadcast Bus
279 for pi, (funame, fu, idx) in enumerate(fuspec):
280 pi += ppoffs[i]
281
282 # connect request-read to picker input, and output to go-rd
283 fu_active = fu_bitdict[funame]
284 name = "%s_%s_%s_%i" % (regfile, rpidx, funame, pi)
285 addr_en = Signal.like(reads[i], name="addr_en_"+name)
286 pick = Signal(name="pick_"+name) # picker input
287 rp = Signal(name="rp_"+name) # picker output
288 delay_pick = Signal(name="dp_"+name) # read-enable "underway"
289
290 # exclude any currently-enabled read-request (mask out active)
291 comb += pick.eq(fu.rd_rel_o[idx] & fu_active & rdflags[i] &
292 ~delay_pick)
293 comb += rdpick.i[pi].eq(pick)
294 comb += fu.go_rd_i[idx].eq(delay_pick) # pass in *delayed* pick
295
296 # if picked, select read-port "reg select" number to port
297 comb += rp.eq(rdpick.o[pi] & rdpick.en_o)
298 sync += delay_pick.eq(rp) # delayed "pick"
299 comb += addr_en.eq(Mux(rp, reads[i], 0))
300
301 # the read-enable happens combinatorially (see mux-bus below)
302 # but it results in the data coming out on a one-cycle delay.
303 if rfile.unary:
304 rens.append(addr_en)
305 else:
306 addrs.append(addr_en)
307 rens.append(rp)
308
309 # use the *delayed* pick signal to put requested data onto bus
310 with m.If(delay_pick):
311 # connect regfile port to input, creating fan-out Bus
312 src = fu.src_i[idx]
313 print("reg connect widths",
314 regfile, regname, pi, funame,
315 src.shape(), rport.data_o.shape())
316 # all FUs connect to same port
317 comb += src.eq(rport.data_o)
318
319 # or-reduce the muxed read signals
320 if rfile.unary:
321 # for unary-addressed
322 comb += rport.ren.eq(ortreereduce_sig(rens))
323 else:
324 # for binary-addressed
325 comb += rport.addr.eq(ortreereduce_sig(addrs))
326 comb += rport.ren.eq(Cat(*rens).bool())
327 print ("binary", regfile, rpidx, rport, rport.ren, rens, addrs)
328
329 def connect_rdports(self, m, fu_bitdict):
330 """connect read ports
331
332 orders the read regspecs into a dict-of-dicts, by regfile, by
333 regport name, then connects all FUs that want that regport by
334 way of a PriorityPicker.
335 """
336 comb, sync = m.d.comb, m.d.sync
337 fus = self.fus.fus
338 regs = self.regs
339
340 # dictionary of lists of regfile read ports
341 byregfiles_rd, byregfiles_rdspec = self.get_byregfiles(True)
342
343 # okaay, now we need a PriorityPicker per regfile per regfile port
344 # loootta pickers... peter piper picked a pack of pickled peppers...
345 rdpickers = {}
346 for regfile, spec in byregfiles_rd.items():
347 fuspecs = byregfiles_rdspec[regfile]
348 rdpickers[regfile] = {}
349
350 # argh. an experiment to merge RA and RB in the INT regfile
351 # (we have too many read/write ports)
352 if self.regreduce_en:
353 if regfile == 'INT':
354 fuspecs['rabc'] = [fuspecs.pop('rb')]
355 fuspecs['rabc'].append(fuspecs.pop('rc'))
356 fuspecs['rabc'].append(fuspecs.pop('ra'))
357 if regfile == 'FAST':
358 fuspecs['fast1'] = [fuspecs.pop('fast1')]
359 if 'fast2' in fuspecs:
360 fuspecs['fast1'].append(fuspecs.pop('fast2'))
361
362 # for each named regfile port, connect up all FUs to that port
363 for (regname, fspec) in sort_fuspecs(fuspecs):
364 print("connect rd", regname, fspec)
365 self.connect_rdport(m, fu_bitdict, rdpickers, regfile,
366 regname, fspec)
367
368 def connect_wrport(self, m, fu_bitdict, wrpickers, regfile, regname, fspec):
369 comb, sync = m.d.comb, m.d.sync
370 fus = self.fus.fus
371 regs = self.regs
372
373 print("connect wr", regname, fspec)
374 rpidx = regname
375
376 # select the required write port. these are pre-defined sizes
377 print(regfile, regs.rf.keys())
378 rfile = regs.rf[regfile.lower()]
379 wport = rfile.w_ports[rpidx]
380
381 fspecs = fspec
382 if not isinstance(fspecs, list):
383 fspecs = [fspecs]
384
385 pplen = 0
386 writes = []
387 ppoffs = []
388 for i, fspec in enumerate(fspecs):
389 # get the regfile specs for this regfile port
390 (rf, read, write, wid, fuspec) = fspec
391 print ("fpsec", i, fspec, len(fuspec))
392 ppoffs.append(pplen) # record offset for picker
393 pplen += len(fuspec)
394
395 # create a priority picker to manage this port
396 wrpickers[regfile][rpidx] = wrpick = PriorityPicker(pplen)
397 setattr(m.submodules, "wrpick_%s_%s" % (regfile, rpidx), wrpick)
398
399 wsigs = []
400 wens = []
401 addrs = []
402 for i, fspec in enumerate(fspecs):
403 # connect up the FU req/go signals and the reg-read to the FU
404 # these are arbitrated by Data.ok signals
405 (rf, read, write, wid, fuspec) = fspec
406 for pi, (funame, fu, idx) in enumerate(fuspec):
407 pi += ppoffs[i]
408
409 # write-request comes from dest.ok
410 dest = fu.get_out(idx)
411 fu_dest_latch = fu.get_fu_out(idx) # latched output
412 name = "wrflag_%s_%s_%d" % (funame, regname, idx)
413 wrflag = Signal(name=name, reset_less=True)
414 comb += wrflag.eq(dest.ok & fu.busy_o)
415
416 # connect request-write to picker input, and output to go-wr
417 fu_active = fu_bitdict[funame]
418 pick = fu.wr.rel_o[idx] & fu_active # & wrflag
419 comb += wrpick.i[pi].eq(pick)
420 # create a single-pulse go write from the picker output
421 wr_pick = Signal()
422 comb += wr_pick.eq(wrpick.o[pi] & wrpick.en_o)
423 comb += fu.go_wr_i[idx].eq(rising_edge(m, wr_pick))
424
425 # connect the regspec write "reg select" number to this port
426 # only if one FU actually requests (and is granted) the port
427 # will the write-enable be activated
428 addr_en = Signal.like(write)
429 wp = Signal()
430 comb += wp.eq(wr_pick & wrpick.en_o)
431 comb += addr_en.eq(Mux(wp, write, 0))
432 if rfile.unary:
433 wens.append(addr_en)
434 else:
435 addrs.append(addr_en)
436 wens.append(wp)
437
438 # connect regfile port to input
439 print("reg connect widths",
440 regfile, regname, pi, funame,
441 dest.shape(), wport.data_i.shape())
442 wsigs.append(fu_dest_latch)
443
444 # here is where we create the Write Broadcast Bus. simple, eh?
445 comb += wport.data_i.eq(ortreereduce_sig(wsigs))
446 if rfile.unary:
447 # for unary-addressed
448 comb += wport.wen.eq(ortreereduce_sig(wens))
449 else:
450 # for binary-addressed
451 comb += wport.addr.eq(ortreereduce_sig(addrs))
452 comb += wport.wen.eq(ortreereduce_sig(wens))
453
454 def connect_wrports(self, m, fu_bitdict):
455 """connect write ports
456
457 orders the write regspecs into a dict-of-dicts, by regfile,
458 by regport name, then connects all FUs that want that regport
459 by way of a PriorityPicker.
460
461 note that the write-port wen, write-port data, and go_wr_i all need to
462 be on the exact same clock cycle. as there is a combinatorial loop bug
463 at the moment, these all use sync.
464 """
465 comb, sync = m.d.comb, m.d.sync
466 fus = self.fus.fus
467 regs = self.regs
468 # dictionary of lists of regfile write ports
469 byregfiles_wr, byregfiles_wrspec = self.get_byregfiles(False)
470
471 # same for write ports.
472 # BLECH! complex code-duplication! BLECH!
473 wrpickers = {}
474 for regfile, spec in byregfiles_wr.items():
475 fuspecs = byregfiles_wrspec[regfile]
476 wrpickers[regfile] = {}
477
478 if self.regreduce_en:
479 # argh, more port-merging
480 if regfile == 'INT':
481 fuspecs['o'] = [fuspecs.pop('o')]
482 fuspecs['o'].append(fuspecs.pop('o1'))
483 if regfile == 'FAST':
484 fuspecs['fast1'] = [fuspecs.pop('fast1')]
485 if 'fast2' in fuspecs:
486 fuspecs['fast1'].append(fuspecs.pop('fast2'))
487
488 for (regname, fspec) in sort_fuspecs(fuspecs):
489 self.connect_wrport(m, fu_bitdict, wrpickers,
490 regfile, regname, fspec)
491
492 def get_byregfiles(self, readmode):
493
494 mode = "read" if readmode else "write"
495 regs = self.regs
496 fus = self.fus.fus
497 e = self.e # decoded instruction to execute
498
499 # dictionary of lists of regfile ports
500 byregfiles = {}
501 byregfiles_spec = {}
502 for (funame, fu) in fus.items():
503 print("%s ports for %s" % (mode, funame))
504 for idx in range(fu.n_src if readmode else fu.n_dst):
505 if readmode:
506 (regfile, regname, wid) = fu.get_in_spec(idx)
507 else:
508 (regfile, regname, wid) = fu.get_out_spec(idx)
509 print(" %d %s %s %s" % (idx, regfile, regname, str(wid)))
510 if readmode:
511 rdflag, read = regspec_decode_read(e, regfile, regname)
512 write = None
513 else:
514 rdflag, read = None, None
515 wrport, write = regspec_decode_write(e, regfile, regname)
516 if regfile not in byregfiles:
517 byregfiles[regfile] = {}
518 byregfiles_spec[regfile] = {}
519 if regname not in byregfiles_spec[regfile]:
520 byregfiles_spec[regfile][regname] = \
521 (rdflag, read, write, wid, [])
522 # here we start to create "lanes"
523 if idx not in byregfiles[regfile]:
524 byregfiles[regfile][idx] = []
525 fuspec = (funame, fu, idx)
526 byregfiles[regfile][idx].append(fuspec)
527 byregfiles_spec[regfile][regname][4].append(fuspec)
528
529 # ok just print that out, for convenience
530 for regfile, spec in byregfiles.items():
531 print("regfile %s ports:" % mode, regfile)
532 fuspecs = byregfiles_spec[regfile]
533 for regname, fspec in fuspecs.items():
534 [rdflag, read, write, wid, fuspec] = fspec
535 print(" rf %s port %s lane: %s" % (mode, regfile, regname))
536 print(" %s" % regname, wid, read, write, rdflag)
537 for (funame, fu, idx) in fuspec:
538 fusig = fu.src_i[idx] if readmode else fu.dest[idx]
539 print(" ", funame, fu, idx, fusig)
540 print()
541
542 return byregfiles, byregfiles_spec
543
544 def __iter__(self):
545 yield from self.fus.ports()
546 yield from self.e.ports()
547 yield from self.l0.ports()
548 # TODO: regs
549
550 def ports(self):
551 return list(self)
552
553
554 if __name__ == '__main__':
555 pspec = TestMemPspec(ldst_ifacetype='testpi',
556 imem_ifacetype='',
557 addr_wid=48,
558 mask_wid=8,
559 reg_wid=64)
560 dut = NonProductionCore(pspec)
561 vl = rtlil.convert(dut, ports=dut.ports())
562 with open("test_core.il", "w") as f:
563 f.write(vl)