replace PartitionedSignal with SimdSignal
[soc.git] / src / soc / simple / core.py
1 """simple core
2
3 not in any way intended for production use. connects up FunctionUnits to
4 Register Files in a brain-dead fashion that only permits one and only one
5 Function Unit to be operational.
6
7 the principle here is to take the Function Units, analyse their regspecs,
8 and turn their requirements for access to register file read/write ports
9 into groupings by Register File and Register File Port name.
10
11 under each grouping - by regfile/port - a list of Function Units that
12 need to connect to that port is created. as these are a contended
13 resource a "Broadcast Bus" per read/write port is then also created,
14 with access to it managed by a PriorityPicker.
15
16 the brain-dead part of this module is that even though there is no
17 conflict of access, regfile read/write hazards are *not* analysed,
18 and consequently it is safer to wait for the Function Unit to complete
19 before allowing a new instruction to proceed.
20 """
21
22 from nmigen import Elaboratable, Module, Signal, ResetSignal, Cat, Mux
23 from nmigen.cli import rtlil
24
25 from openpower.decoder.power_decoder2 import PowerDecodeSubset
26 from openpower.decoder.power_regspec_map import regspec_decode_read
27 from openpower.decoder.power_regspec_map import regspec_decode_write
28 from openpower.sv.svp64 import SVP64Rec
29
30 from nmutil.picker import PriorityPicker
31 from nmutil.util import treereduce
32
33 from soc.fu.compunits.compunits import AllFunctionUnits
34 from soc.regfile.regfiles import RegFiles
35 from openpower.decoder.decode2execute1 import Decode2ToExecute1Type
36 from openpower.decoder.decode2execute1 import IssuerDecode2ToOperand
37 from openpower.decoder.power_decoder2 import get_rdflags
38 from openpower.decoder.decode2execute1 import Data
39 from soc.experiment.l0_cache import TstL0CacheBuffer # test only
40 from soc.config.test.test_loadstore import TestMemPspec
41 from openpower.decoder.power_enums import MicrOp
42 from soc.config.state import CoreState
43
44 import operator
45
46 from nmutil.util import rising_edge
47
48
49 # helper function for reducing a list of signals down to a parallel
50 # ORed single signal.
51 def ortreereduce(tree, attr="o_data"):
52 return treereduce(tree, operator.or_, lambda x: getattr(x, attr))
53
54
55 def ortreereduce_sig(tree):
56 return treereduce(tree, operator.or_, lambda x: x)
57
58
59 # helper function to place full regs declarations first
60 def sort_fuspecs(fuspecs):
61 res = []
62 for (regname, fspec) in fuspecs.items():
63 if regname.startswith("full"):
64 res.append((regname, fspec))
65 for (regname, fspec) in fuspecs.items():
66 if not regname.startswith("full"):
67 res.append((regname, fspec))
68 return res # enumerate(res)
69
70
71 class NonProductionCore(Elaboratable):
72 def __init__(self, pspec):
73 self.pspec = pspec
74
75 # test is SVP64 is to be enabled
76 self.svp64_en = hasattr(pspec, "svp64") and (pspec.svp64 == True)
77
78 # test to see if regfile ports should be reduced
79 self.regreduce_en = (hasattr(pspec, "regreduce") and
80 (pspec.regreduce == True))
81
82 # single LD/ST funnel for memory access
83 self.l0 = l0 = TstL0CacheBuffer(pspec, n_units=1)
84 pi = l0.l0.dports[0]
85
86 # function units (only one each)
87 # only include mmu if enabled in pspec
88 self.fus = AllFunctionUnits(pspec, pilist=[pi])
89
90 # link LoadStore1 into MMU
91 mmu = self.fus.get_fu('mmu0')
92 print ("core pspec", pspec.ldst_ifacetype)
93 print ("core mmu", mmu)
94 print ("core lsmem.lsi", l0.cmpi.lsmem.lsi)
95 if mmu is not None:
96 mmu.alu.set_ldst_interface(l0.cmpi.lsmem.lsi)
97
98 # register files (yes plural)
99 self.regs = RegFiles(pspec)
100
101 # instruction decoder - needs a Trap-capable Record (captures EINT etc.)
102 self.e = Decode2ToExecute1Type("core", opkls=IssuerDecode2ToOperand,
103 regreduce_en=self.regreduce_en)
104
105 # SVP64 RA_OR_ZERO needs to know if the relevant EXTRA2/3 field is zero
106 self.sv_a_nz = Signal()
107
108 # state and raw instruction (and SVP64 ReMap fields)
109 self.state = CoreState("core")
110 self.raw_insn_i = Signal(32) # raw instruction
111 self.bigendian_i = Signal() # bigendian - TODO, set by MSR.BE
112 if self.svp64_en:
113 self.sv_rm = SVP64Rec(name="core_svp64_rm") # SVP64 RM field
114 self.is_svp64_mode = Signal() # set if SVP64 mode is enabled
115 self.use_svp64_ldst_dec = Signal() # use alternative LDST decoder
116 self.sv_pred_sm = Signal() # TODO: SIMD width
117 self.sv_pred_dm = Signal() # TODO: SIMD width
118
119 # issue/valid/busy signalling
120 self.ivalid_i = Signal(reset_less=True) # instruction is valid
121 self.issue_i = Signal(reset_less=True)
122 self.busy_o = Signal(name="corebusy_o", reset_less=True)
123
124 # start/stop and terminated signalling
125 self.core_terminate_o = Signal(reset=0) # indicates stopped
126
127 # create per-FU instruction decoders (subsetted)
128 self.decoders = {}
129 self.des = {}
130
131 for funame, fu in self.fus.fus.items():
132 f_name = fu.fnunit.name
133 fnunit = fu.fnunit.value
134 opkls = fu.opsubsetkls
135 if f_name == 'TRAP':
136 # TRAP decoder is the *main* decoder
137 self.trapunit = funame
138 continue
139 self.decoders[funame] = PowerDecodeSubset(None, opkls, f_name,
140 final=True,
141 state=self.state,
142 svp64_en=self.svp64_en,
143 regreduce_en=self.regreduce_en)
144 self.des[funame] = self.decoders[funame].do
145
146 if "mmu0" in self.decoders:
147 self.decoders["mmu0"].mmu0_spr_dec = self.decoders["spr0"]
148
149 def elaborate(self, platform):
150 m = Module()
151 # for testing purposes, to cut down on build time in coriolis2
152 if hasattr(self.pspec, "nocore") and self.pspec.nocore == True:
153 x = Signal() # dummy signal
154 m.d.sync += x.eq(~x)
155 return m
156 comb = m.d.comb
157
158 m.submodules.fus = self.fus
159 m.submodules.l0 = l0 = self.l0
160 self.regs.elaborate_into(m, platform)
161 regs = self.regs
162 fus = self.fus.fus
163
164 # connect decoders
165 for k, v in self.decoders.items():
166 setattr(m.submodules, "dec_%s" % v.fn_name, v)
167 comb += v.dec.raw_opcode_in.eq(self.raw_insn_i)
168 comb += v.dec.bigendian.eq(self.bigendian_i)
169 # sigh due to SVP64 RA_OR_ZERO detection connect these too
170 comb += v.sv_a_nz.eq(self.sv_a_nz)
171 if self.svp64_en:
172 comb += v.pred_sm.eq(self.sv_pred_sm)
173 comb += v.pred_dm.eq(self.sv_pred_dm)
174 if k != self.trapunit:
175 comb += v.sv_rm.eq(self.sv_rm) # pass through SVP64 ReMap
176 comb += v.is_svp64_mode.eq(self.is_svp64_mode)
177 # only the LDST PowerDecodeSubset *actually* needs to
178 # know to use the alternative decoder. this is all
179 # a terrible hack
180 if k.lower().startswith("ldst"):
181 comb += v.use_svp64_ldst_dec.eq(self.use_svp64_ldst_dec)
182
183 # ssh, cheat: trap uses the main decoder because of the rewriting
184 self.des[self.trapunit] = self.e.do
185
186 # connect up Function Units, then read/write ports
187 fu_bitdict = self.connect_instruction(m)
188 self.connect_rdports(m, fu_bitdict)
189 self.connect_wrports(m, fu_bitdict)
190
191 return m
192
193 def connect_instruction(self, m):
194 """connect_instruction
195
196 uses decoded (from PowerOp) function unit information from CSV files
197 to ascertain which Function Unit should deal with the current
198 instruction.
199
200 some (such as OP_ATTN, OP_NOP) are dealt with here, including
201 ignoring it and halting the processor. OP_NOP is a bit annoying
202 because the issuer expects busy flag still to be raised then lowered.
203 (this requires a fake counter to be set).
204 """
205 comb, sync = m.d.comb, m.d.sync
206 fus = self.fus.fus
207
208 # enable-signals for each FU, get one bit for each FU (by name)
209 fu_enable = Signal(len(fus), reset_less=True)
210 fu_bitdict = {}
211 for i, funame in enumerate(fus.keys()):
212 fu_bitdict[funame] = fu_enable[i]
213
214 # enable the required Function Unit based on the opcode decode
215 # note: this *only* works correctly for simple core when one and
216 # *only* one FU is allocated per instruction
217 for funame, fu in fus.items():
218 fnunit = fu.fnunit.value
219 enable = Signal(name="en_%s" % funame, reset_less=True)
220 comb += enable.eq((self.e.do.fn_unit & fnunit).bool())
221 comb += fu_bitdict[funame].eq(enable)
222
223 # sigh - need a NOP counter
224 counter = Signal(2)
225 with m.If(counter != 0):
226 sync += counter.eq(counter - 1)
227 comb += self.busy_o.eq(1)
228
229 with m.If(self.ivalid_i): # run only when valid
230 with m.Switch(self.e.do.insn_type):
231 # check for ATTN: halt if true
232 with m.Case(MicrOp.OP_ATTN):
233 m.d.sync += self.core_terminate_o.eq(1)
234
235 with m.Case(MicrOp.OP_NOP):
236 sync += counter.eq(2)
237 comb += self.busy_o.eq(1)
238
239 with m.Default():
240 # connect up instructions. only one enabled at a time
241 for funame, fu in fus.items():
242 do = self.des[funame]
243 enable = fu_bitdict[funame]
244
245 # run this FunctionUnit if enabled
246 # route op, issue, busy, read flags and mask to FU
247 with m.If(enable):
248 # operand comes from the *local* decoder
249 comb += fu.oper_i.eq_from(do)
250 #comb += fu.oper_i.eq_from_execute1(e)
251 comb += fu.issue_i.eq(self.issue_i)
252 comb += self.busy_o.eq(fu.busy_o)
253 # rdmask, which is for registers, needs to come
254 # from the *main* decoder
255 rdmask = get_rdflags(self.e, fu)
256 comb += fu.rdmaskn.eq(~rdmask)
257
258 return fu_bitdict
259
260 def connect_rdport(self, m, fu_bitdict, rdpickers, regfile, regname, fspec):
261 comb, sync = m.d.comb, m.d.sync
262 fus = self.fus.fus
263 regs = self.regs
264
265 rpidx = regname
266
267 # select the required read port. these are pre-defined sizes
268 rfile = regs.rf[regfile.lower()]
269 rport = rfile.r_ports[rpidx]
270 print("read regfile", rpidx, regfile, regs.rf.keys(),
271 rfile, rfile.unary)
272
273 fspecs = fspec
274 if not isinstance(fspecs, list):
275 fspecs = [fspecs]
276
277 rdflags = []
278 pplen = 0
279 reads = []
280 ppoffs = []
281 for i, fspec in enumerate(fspecs):
282 # get the regfile specs for this regfile port
283 (rf, read, write, wid, fuspec) = fspec
284 print ("fpsec", i, fspec, len(fuspec))
285 ppoffs.append(pplen) # record offset for picker
286 pplen += len(fuspec)
287 name = "rdflag_%s_%s_%d" % (regfile, regname, i)
288 rdflag = Signal(name=name, reset_less=True)
289 comb += rdflag.eq(rf)
290 rdflags.append(rdflag)
291 reads.append(read)
292
293 print ("pplen", pplen)
294
295 # create a priority picker to manage this port
296 rdpickers[regfile][rpidx] = rdpick = PriorityPicker(pplen)
297 setattr(m.submodules, "rdpick_%s_%s" % (regfile, rpidx), rdpick)
298
299 rens = []
300 addrs = []
301 for i, fspec in enumerate(fspecs):
302 (rf, read, write, wid, fuspec) = fspec
303 # connect up the FU req/go signals, and the reg-read to the FU
304 # and create a Read Broadcast Bus
305 for pi, (funame, fu, idx) in enumerate(fuspec):
306 pi += ppoffs[i]
307
308 # connect request-read to picker input, and output to go-rd
309 fu_active = fu_bitdict[funame]
310 name = "%s_%s_%s_%i" % (regfile, rpidx, funame, pi)
311 addr_en = Signal.like(reads[i], name="addr_en_"+name)
312 pick = Signal(name="pick_"+name) # picker input
313 rp = Signal(name="rp_"+name) # picker output
314 delay_pick = Signal(name="dp_"+name) # read-enable "underway"
315
316 # exclude any currently-enabled read-request (mask out active)
317 comb += pick.eq(fu.rd_rel_o[idx] & fu_active & rdflags[i] &
318 ~delay_pick)
319 comb += rdpick.i[pi].eq(pick)
320 comb += fu.go_rd_i[idx].eq(delay_pick) # pass in *delayed* pick
321
322 # if picked, select read-port "reg select" number to port
323 comb += rp.eq(rdpick.o[pi] & rdpick.en_o)
324 sync += delay_pick.eq(rp) # delayed "pick"
325 comb += addr_en.eq(Mux(rp, reads[i], 0))
326
327 # the read-enable happens combinatorially (see mux-bus below)
328 # but it results in the data coming out on a one-cycle delay.
329 if rfile.unary:
330 rens.append(addr_en)
331 else:
332 addrs.append(addr_en)
333 rens.append(rp)
334
335 # use the *delayed* pick signal to put requested data onto bus
336 with m.If(delay_pick):
337 # connect regfile port to input, creating fan-out Bus
338 src = fu.src_i[idx]
339 print("reg connect widths",
340 regfile, regname, pi, funame,
341 src.shape(), rport.o_data.shape())
342 # all FUs connect to same port
343 comb += src.eq(rport.o_data)
344
345 # or-reduce the muxed read signals
346 if rfile.unary:
347 # for unary-addressed
348 comb += rport.ren.eq(ortreereduce_sig(rens))
349 else:
350 # for binary-addressed
351 comb += rport.addr.eq(ortreereduce_sig(addrs))
352 comb += rport.ren.eq(Cat(*rens).bool())
353 print ("binary", regfile, rpidx, rport, rport.ren, rens, addrs)
354
355 def connect_rdports(self, m, fu_bitdict):
356 """connect read ports
357
358 orders the read regspecs into a dict-of-dicts, by regfile, by
359 regport name, then connects all FUs that want that regport by
360 way of a PriorityPicker.
361 """
362 comb, sync = m.d.comb, m.d.sync
363 fus = self.fus.fus
364 regs = self.regs
365
366 # dictionary of lists of regfile read ports
367 byregfiles_rd, byregfiles_rdspec = self.get_byregfiles(True)
368
369 # okaay, now we need a PriorityPicker per regfile per regfile port
370 # loootta pickers... peter piper picked a pack of pickled peppers...
371 rdpickers = {}
372 for regfile, spec in byregfiles_rd.items():
373 fuspecs = byregfiles_rdspec[regfile]
374 rdpickers[regfile] = {}
375
376 # argh. an experiment to merge RA and RB in the INT regfile
377 # (we have too many read/write ports)
378 if self.regreduce_en:
379 if regfile == 'INT':
380 fuspecs['rabc'] = [fuspecs.pop('rb')]
381 fuspecs['rabc'].append(fuspecs.pop('rc'))
382 fuspecs['rabc'].append(fuspecs.pop('ra'))
383 if regfile == 'FAST':
384 fuspecs['fast1'] = [fuspecs.pop('fast1')]
385 if 'fast2' in fuspecs:
386 fuspecs['fast1'].append(fuspecs.pop('fast2'))
387 if 'fast3' in fuspecs:
388 fuspecs['fast1'].append(fuspecs.pop('fast3'))
389
390 # for each named regfile port, connect up all FUs to that port
391 for (regname, fspec) in sort_fuspecs(fuspecs):
392 print("connect rd", regname, fspec)
393 self.connect_rdport(m, fu_bitdict, rdpickers, regfile,
394 regname, fspec)
395
396 def connect_wrport(self, m, fu_bitdict, wrpickers, regfile, regname, fspec):
397 comb, sync = m.d.comb, m.d.sync
398 fus = self.fus.fus
399 regs = self.regs
400
401 print("connect wr", regname, fspec)
402 rpidx = regname
403
404 # select the required write port. these are pre-defined sizes
405 print(regfile, regs.rf.keys())
406 rfile = regs.rf[regfile.lower()]
407 wport = rfile.w_ports[rpidx]
408
409 fspecs = fspec
410 if not isinstance(fspecs, list):
411 fspecs = [fspecs]
412
413 pplen = 0
414 writes = []
415 ppoffs = []
416 for i, fspec in enumerate(fspecs):
417 # get the regfile specs for this regfile port
418 (rf, read, write, wid, fuspec) = fspec
419 print ("fpsec", i, fspec, len(fuspec))
420 ppoffs.append(pplen) # record offset for picker
421 pplen += len(fuspec)
422
423 # create a priority picker to manage this port
424 wrpickers[regfile][rpidx] = wrpick = PriorityPicker(pplen)
425 setattr(m.submodules, "wrpick_%s_%s" % (regfile, rpidx), wrpick)
426
427 wsigs = []
428 wens = []
429 addrs = []
430 for i, fspec in enumerate(fspecs):
431 # connect up the FU req/go signals and the reg-read to the FU
432 # these are arbitrated by Data.ok signals
433 (rf, read, write, wid, fuspec) = fspec
434 for pi, (funame, fu, idx) in enumerate(fuspec):
435 pi += ppoffs[i]
436
437 # write-request comes from dest.ok
438 dest = fu.get_out(idx)
439 fu_dest_latch = fu.get_fu_out(idx) # latched output
440 name = "wrflag_%s_%s_%d" % (funame, regname, idx)
441 wrflag = Signal(name=name, reset_less=True)
442 comb += wrflag.eq(dest.ok & fu.busy_o)
443
444 # connect request-write to picker input, and output to go-wr
445 fu_active = fu_bitdict[funame]
446 pick = fu.wr.rel_o[idx] & fu_active # & wrflag
447 comb += wrpick.i[pi].eq(pick)
448 # create a single-pulse go write from the picker output
449 wr_pick = Signal()
450 comb += wr_pick.eq(wrpick.o[pi] & wrpick.en_o)
451 comb += fu.go_wr_i[idx].eq(rising_edge(m, wr_pick))
452
453 # connect the regspec write "reg select" number to this port
454 # only if one FU actually requests (and is granted) the port
455 # will the write-enable be activated
456 addr_en = Signal.like(write)
457 wp = Signal()
458 comb += wp.eq(wr_pick & wrpick.en_o)
459 comb += addr_en.eq(Mux(wp, write, 0))
460 if rfile.unary:
461 wens.append(addr_en)
462 else:
463 addrs.append(addr_en)
464 wens.append(wp)
465
466 # connect regfile port to input
467 print("reg connect widths",
468 regfile, regname, pi, funame,
469 dest.shape(), wport.i_data.shape())
470 wsigs.append(fu_dest_latch)
471
472 # here is where we create the Write Broadcast Bus. simple, eh?
473 comb += wport.i_data.eq(ortreereduce_sig(wsigs))
474 if rfile.unary:
475 # for unary-addressed
476 comb += wport.wen.eq(ortreereduce_sig(wens))
477 else:
478 # for binary-addressed
479 comb += wport.addr.eq(ortreereduce_sig(addrs))
480 comb += wport.wen.eq(ortreereduce_sig(wens))
481
482 def connect_wrports(self, m, fu_bitdict):
483 """connect write ports
484
485 orders the write regspecs into a dict-of-dicts, by regfile,
486 by regport name, then connects all FUs that want that regport
487 by way of a PriorityPicker.
488
489 note that the write-port wen, write-port data, and go_wr_i all need to
490 be on the exact same clock cycle. as there is a combinatorial loop bug
491 at the moment, these all use sync.
492 """
493 comb, sync = m.d.comb, m.d.sync
494 fus = self.fus.fus
495 regs = self.regs
496 # dictionary of lists of regfile write ports
497 byregfiles_wr, byregfiles_wrspec = self.get_byregfiles(False)
498
499 # same for write ports.
500 # BLECH! complex code-duplication! BLECH!
501 wrpickers = {}
502 for regfile, spec in byregfiles_wr.items():
503 fuspecs = byregfiles_wrspec[regfile]
504 wrpickers[regfile] = {}
505
506 if self.regreduce_en:
507 # argh, more port-merging
508 if regfile == 'INT':
509 fuspecs['o'] = [fuspecs.pop('o')]
510 fuspecs['o'].append(fuspecs.pop('o1'))
511 if regfile == 'FAST':
512 fuspecs['fast1'] = [fuspecs.pop('fast1')]
513 if 'fast2' in fuspecs:
514 fuspecs['fast1'].append(fuspecs.pop('fast2'))
515 if 'fast3' in fuspecs:
516 fuspecs['fast1'].append(fuspecs.pop('fast3'))
517
518 for (regname, fspec) in sort_fuspecs(fuspecs):
519 self.connect_wrport(m, fu_bitdict, wrpickers,
520 regfile, regname, fspec)
521
522 def get_byregfiles(self, readmode):
523
524 mode = "read" if readmode else "write"
525 regs = self.regs
526 fus = self.fus.fus
527 e = self.e # decoded instruction to execute
528
529 # dictionary of lists of regfile ports
530 byregfiles = {}
531 byregfiles_spec = {}
532 for (funame, fu) in fus.items():
533 print("%s ports for %s" % (mode, funame))
534 for idx in range(fu.n_src if readmode else fu.n_dst):
535 if readmode:
536 (regfile, regname, wid) = fu.get_in_spec(idx)
537 else:
538 (regfile, regname, wid) = fu.get_out_spec(idx)
539 print(" %d %s %s %s" % (idx, regfile, regname, str(wid)))
540 if readmode:
541 rdflag, read = regspec_decode_read(e, regfile, regname)
542 write = None
543 else:
544 rdflag, read = None, None
545 wrport, write = regspec_decode_write(e, regfile, regname)
546 if regfile not in byregfiles:
547 byregfiles[regfile] = {}
548 byregfiles_spec[regfile] = {}
549 if regname not in byregfiles_spec[regfile]:
550 byregfiles_spec[regfile][regname] = \
551 (rdflag, read, write, wid, [])
552 # here we start to create "lanes"
553 if idx not in byregfiles[regfile]:
554 byregfiles[regfile][idx] = []
555 fuspec = (funame, fu, idx)
556 byregfiles[regfile][idx].append(fuspec)
557 byregfiles_spec[regfile][regname][4].append(fuspec)
558
559 # ok just print that out, for convenience
560 for regfile, spec in byregfiles.items():
561 print("regfile %s ports:" % mode, regfile)
562 fuspecs = byregfiles_spec[regfile]
563 for regname, fspec in fuspecs.items():
564 [rdflag, read, write, wid, fuspec] = fspec
565 print(" rf %s port %s lane: %s" % (mode, regfile, regname))
566 print(" %s" % regname, wid, read, write, rdflag)
567 for (funame, fu, idx) in fuspec:
568 fusig = fu.src_i[idx] if readmode else fu.dest[idx]
569 print(" ", funame, fu, idx, fusig)
570 print()
571
572 return byregfiles, byregfiles_spec
573
574 def __iter__(self):
575 yield from self.fus.ports()
576 yield from self.e.ports()
577 yield from self.l0.ports()
578 # TODO: regs
579
580 def ports(self):
581 return list(self)
582
583
584 if __name__ == '__main__':
585 pspec = TestMemPspec(ldst_ifacetype='testpi',
586 imem_ifacetype='',
587 addr_wid=48,
588 mask_wid=8,
589 reg_wid=64)
590 dut = NonProductionCore(pspec)
591 vl = rtlil.convert(dut, ports=dut.ports())
592 with open("test_core.il", "w") as f:
593 f.write(vl)