add in predicate mask bit detection when zeroing is enabled
[soc.git] / src / soc / simple / core.py
1 """simple core
2
3 not in any way intended for production use. connects up FunctionUnits to
4 Register Files in a brain-dead fashion that only permits one and only one
5 Function Unit to be operational.
6
7 the principle here is to take the Function Units, analyse their regspecs,
8 and turn their requirements for access to register file read/write ports
9 into groupings by Register File and Register File Port name.
10
11 under each grouping - by regfile/port - a list of Function Units that
12 need to connect to that port is created. as these are a contended
13 resource a "Broadcast Bus" per read/write port is then also created,
14 with access to it managed by a PriorityPicker.
15
16 the brain-dead part of this module is that even though there is no
17 conflict of access, regfile read/write hazards are *not* analysed,
18 and consequently it is safer to wait for the Function Unit to complete
19 before allowing a new instruction to proceed.
20 """
21
22 from nmigen import Elaboratable, Module, Signal, ResetSignal, Cat, Mux
23 from nmigen.cli import rtlil
24
25 from openpower.decoder.power_decoder2 import PowerDecodeSubset
26 from openpower.decoder.power_regspec_map import regspec_decode_read
27 from openpower.decoder.power_regspec_map import regspec_decode_write
28 from openpower.sv.svp64 import SVP64Rec
29
30 from nmutil.picker import PriorityPicker
31 from nmutil.util import treereduce
32
33 from soc.fu.compunits.compunits import AllFunctionUnits
34 from soc.regfile.regfiles import RegFiles
35 from openpower.decoder.decode2execute1 import Decode2ToExecute1Type
36 from openpower.decoder.decode2execute1 import IssuerDecode2ToOperand
37 from openpower.decoder.power_decoder2 import get_rdflags
38 from openpower.decoder.decode2execute1 import Data
39 from soc.experiment.l0_cache import TstL0CacheBuffer # test only
40 from soc.config.test.test_loadstore import TestMemPspec
41 from openpower.decoder.power_enums import MicrOp
42 from soc.config.state import CoreState
43
44 import operator
45
46 from nmutil.util import rising_edge
47
48
49 # helper function for reducing a list of signals down to a parallel
50 # ORed single signal.
51 def ortreereduce(tree, attr="data_o"):
52 return treereduce(tree, operator.or_, lambda x: getattr(x, attr))
53
54
55 def ortreereduce_sig(tree):
56 return treereduce(tree, operator.or_, lambda x: x)
57
58
59 # helper function to place full regs declarations first
60 def sort_fuspecs(fuspecs):
61 res = []
62 for (regname, fspec) in fuspecs.items():
63 if regname.startswith("full"):
64 res.append((regname, fspec))
65 for (regname, fspec) in fuspecs.items():
66 if not regname.startswith("full"):
67 res.append((regname, fspec))
68 return res # enumerate(res)
69
70
71 class NonProductionCore(Elaboratable):
72 def __init__(self, pspec):
73 self.pspec = pspec
74
75 # test is SVP64 is to be enabled
76 self.svp64_en = hasattr(pspec, "svp64") and (pspec.svp64 == True)
77
78 # test to see if regfile ports should be reduced
79 self.regreduce_en = (hasattr(pspec, "regreduce") and
80 (pspec.regreduce == True))
81
82 # single LD/ST funnel for memory access
83 self.l0 = l0 = TstL0CacheBuffer(pspec, n_units=1)
84 pi = l0.l0.dports[0]
85
86 # function units (only one each)
87 # only include mmu if enabled in pspec
88 self.fus = AllFunctionUnits(pspec, pilist=[pi])
89
90 # link LoadStore1 into MMU
91 mmu = self.fus.get_fu('mmu0')
92 print ("core pspec", pspec.ldst_ifacetype)
93 print ("core mmu", mmu)
94 print ("core lsmem.lsi", l0.cmpi.lsmem.lsi)
95 if mmu is not None:
96 mmu.alu.set_ldst_interface(l0.cmpi.lsmem.lsi)
97
98 # register files (yes plural)
99 self.regs = RegFiles(pspec)
100
101 # instruction decoder - needs a Trap-capable Record (captures EINT etc.)
102 self.e = Decode2ToExecute1Type("core", opkls=IssuerDecode2ToOperand,
103 regreduce_en=self.regreduce_en)
104
105 # SVP64 RA_OR_ZERO needs to know if the relevant EXTRA2/3 field is zero
106 self.sv_a_nz = Signal()
107
108 # state and raw instruction (and SVP64 ReMap fields)
109 self.state = CoreState("core")
110 self.raw_insn_i = Signal(32) # raw instruction
111 self.bigendian_i = Signal() # bigendian - TODO, set by MSR.BE
112 if self.svp64_en:
113 self.sv_rm = SVP64Rec(name="core_svp64_rm") # SVP64 RM field
114 self.sv_pred_sm = Signal() # TODO: SIMD width
115 self.sv_pred_dm = Signal() # TODO: SIMD width
116
117 # issue/valid/busy signalling
118 self.ivalid_i = Signal(reset_less=True) # instruction is valid
119 self.issue_i = Signal(reset_less=True)
120 self.busy_o = Signal(name="corebusy_o", reset_less=True)
121
122 # start/stop and terminated signalling
123 self.core_terminate_o = Signal(reset=0) # indicates stopped
124
125 # create per-FU instruction decoders (subsetted)
126 self.decoders = {}
127 self.des = {}
128
129 for funame, fu in self.fus.fus.items():
130 f_name = fu.fnunit.name
131 fnunit = fu.fnunit.value
132 opkls = fu.opsubsetkls
133 if f_name == 'TRAP':
134 self.trapunit = funame
135 continue
136 self.decoders[funame] = PowerDecodeSubset(None, opkls, f_name,
137 final=True,
138 state=self.state,
139 svp64_en=self.svp64_en,
140 regreduce_en=self.regreduce_en)
141 self.des[funame] = self.decoders[funame].do
142
143 if "mmu0" in self.decoders:
144 self.decoders["mmu0"].mmu0_spr_dec = self.decoders["spr0"]
145
146 def elaborate(self, platform):
147 m = Module()
148 # for testing purposes, to cut down on build time in coriolis2
149 if hasattr(self.pspec, "nocore") and self.pspec.nocore == True:
150 x = Signal() # dummy signal
151 m.d.sync += x.eq(~x)
152 return m
153 comb = m.d.comb
154
155 m.submodules.fus = self.fus
156 m.submodules.l0 = l0 = self.l0
157 self.regs.elaborate_into(m, platform)
158 regs = self.regs
159 fus = self.fus.fus
160
161 # connect decoders
162 for k, v in self.decoders.items():
163 setattr(m.submodules, "dec_%s" % v.fn_name, v)
164 comb += v.dec.raw_opcode_in.eq(self.raw_insn_i)
165 comb += v.dec.bigendian.eq(self.bigendian_i)
166 # sigh due to SVP64 RA_OR_ZERO detection connect these too
167 comb += v.sv_a_nz.eq(self.sv_a_nz)
168 if self.svp64_en:
169 comb += v.pred_sm.eq(self.sv_pred_sm)
170 comb += v.pred_dm.eq(self.sv_pred_dm)
171 if k != self.trapunit:
172 comb += v.sv_rm.eq(self.sv_rm) # pass through SVP64 ReMap
173
174 # ssh, cheat: trap uses the main decoder because of the rewriting
175 self.des[self.trapunit] = self.e.do
176
177 # connect up Function Units, then read/write ports
178 fu_bitdict = self.connect_instruction(m)
179 self.connect_rdports(m, fu_bitdict)
180 self.connect_wrports(m, fu_bitdict)
181
182 return m
183
184 def connect_instruction(self, m):
185 """connect_instruction
186
187 uses decoded (from PowerOp) function unit information from CSV files
188 to ascertain which Function Unit should deal with the current
189 instruction.
190
191 some (such as OP_ATTN, OP_NOP) are dealt with here, including
192 ignoring it and halting the processor. OP_NOP is a bit annoying
193 because the issuer expects busy flag still to be raised then lowered.
194 (this requires a fake counter to be set).
195 """
196 comb, sync = m.d.comb, m.d.sync
197 fus = self.fus.fus
198
199 # enable-signals for each FU, get one bit for each FU (by name)
200 fu_enable = Signal(len(fus), reset_less=True)
201 fu_bitdict = {}
202 for i, funame in enumerate(fus.keys()):
203 fu_bitdict[funame] = fu_enable[i]
204
205 # enable the required Function Unit based on the opcode decode
206 # note: this *only* works correctly for simple core when one and
207 # *only* one FU is allocated per instruction
208 for funame, fu in fus.items():
209 fnunit = fu.fnunit.value
210 enable = Signal(name="en_%s" % funame, reset_less=True)
211 comb += enable.eq((self.e.do.fn_unit & fnunit).bool())
212 comb += fu_bitdict[funame].eq(enable)
213
214 # sigh - need a NOP counter
215 counter = Signal(2)
216 with m.If(counter != 0):
217 sync += counter.eq(counter - 1)
218 comb += self.busy_o.eq(1)
219
220 with m.If(self.ivalid_i): # run only when valid
221 with m.Switch(self.e.do.insn_type):
222 # check for ATTN: halt if true
223 with m.Case(MicrOp.OP_ATTN):
224 m.d.sync += self.core_terminate_o.eq(1)
225
226 with m.Case(MicrOp.OP_NOP):
227 sync += counter.eq(2)
228 comb += self.busy_o.eq(1)
229
230 with m.Default():
231 # connect up instructions. only one enabled at a time
232 for funame, fu in fus.items():
233 do = self.des[funame]
234 enable = fu_bitdict[funame]
235
236 # run this FunctionUnit if enabled
237 # route op, issue, busy, read flags and mask to FU
238 with m.If(enable):
239 # operand comes from the *local* decoder
240 comb += fu.oper_i.eq_from(do)
241 #comb += fu.oper_i.eq_from_execute1(e)
242 comb += fu.issue_i.eq(self.issue_i)
243 comb += self.busy_o.eq(fu.busy_o)
244 # rdmask, which is for registers, needs to come
245 # from the *main* decoder
246 rdmask = get_rdflags(self.e, fu)
247 comb += fu.rdmaskn.eq(~rdmask)
248
249 return fu_bitdict
250
251 def connect_rdport(self, m, fu_bitdict, rdpickers, regfile, regname, fspec):
252 comb, sync = m.d.comb, m.d.sync
253 fus = self.fus.fus
254 regs = self.regs
255
256 rpidx = regname
257
258 # select the required read port. these are pre-defined sizes
259 rfile = regs.rf[regfile.lower()]
260 rport = rfile.r_ports[rpidx]
261 print("read regfile", rpidx, regfile, regs.rf.keys(),
262 rfile, rfile.unary)
263
264 fspecs = fspec
265 if not isinstance(fspecs, list):
266 fspecs = [fspecs]
267
268 rdflags = []
269 pplen = 0
270 reads = []
271 ppoffs = []
272 for i, fspec in enumerate(fspecs):
273 # get the regfile specs for this regfile port
274 (rf, read, write, wid, fuspec) = fspec
275 print ("fpsec", i, fspec, len(fuspec))
276 ppoffs.append(pplen) # record offset for picker
277 pplen += len(fuspec)
278 name = "rdflag_%s_%s_%d" % (regfile, regname, i)
279 rdflag = Signal(name=name, reset_less=True)
280 comb += rdflag.eq(rf)
281 rdflags.append(rdflag)
282 reads.append(read)
283
284 print ("pplen", pplen)
285
286 # create a priority picker to manage this port
287 rdpickers[regfile][rpidx] = rdpick = PriorityPicker(pplen)
288 setattr(m.submodules, "rdpick_%s_%s" % (regfile, rpidx), rdpick)
289
290 rens = []
291 addrs = []
292 for i, fspec in enumerate(fspecs):
293 (rf, read, write, wid, fuspec) = fspec
294 # connect up the FU req/go signals, and the reg-read to the FU
295 # and create a Read Broadcast Bus
296 for pi, (funame, fu, idx) in enumerate(fuspec):
297 pi += ppoffs[i]
298
299 # connect request-read to picker input, and output to go-rd
300 fu_active = fu_bitdict[funame]
301 name = "%s_%s_%s_%i" % (regfile, rpidx, funame, pi)
302 addr_en = Signal.like(reads[i], name="addr_en_"+name)
303 pick = Signal(name="pick_"+name) # picker input
304 rp = Signal(name="rp_"+name) # picker output
305 delay_pick = Signal(name="dp_"+name) # read-enable "underway"
306
307 # exclude any currently-enabled read-request (mask out active)
308 comb += pick.eq(fu.rd_rel_o[idx] & fu_active & rdflags[i] &
309 ~delay_pick)
310 comb += rdpick.i[pi].eq(pick)
311 comb += fu.go_rd_i[idx].eq(delay_pick) # pass in *delayed* pick
312
313 # if picked, select read-port "reg select" number to port
314 comb += rp.eq(rdpick.o[pi] & rdpick.en_o)
315 sync += delay_pick.eq(rp) # delayed "pick"
316 comb += addr_en.eq(Mux(rp, reads[i], 0))
317
318 # the read-enable happens combinatorially (see mux-bus below)
319 # but it results in the data coming out on a one-cycle delay.
320 if rfile.unary:
321 rens.append(addr_en)
322 else:
323 addrs.append(addr_en)
324 rens.append(rp)
325
326 # use the *delayed* pick signal to put requested data onto bus
327 with m.If(delay_pick):
328 # connect regfile port to input, creating fan-out Bus
329 src = fu.src_i[idx]
330 print("reg connect widths",
331 regfile, regname, pi, funame,
332 src.shape(), rport.data_o.shape())
333 # all FUs connect to same port
334 comb += src.eq(rport.data_o)
335
336 # or-reduce the muxed read signals
337 if rfile.unary:
338 # for unary-addressed
339 comb += rport.ren.eq(ortreereduce_sig(rens))
340 else:
341 # for binary-addressed
342 comb += rport.addr.eq(ortreereduce_sig(addrs))
343 comb += rport.ren.eq(Cat(*rens).bool())
344 print ("binary", regfile, rpidx, rport, rport.ren, rens, addrs)
345
346 def connect_rdports(self, m, fu_bitdict):
347 """connect read ports
348
349 orders the read regspecs into a dict-of-dicts, by regfile, by
350 regport name, then connects all FUs that want that regport by
351 way of a PriorityPicker.
352 """
353 comb, sync = m.d.comb, m.d.sync
354 fus = self.fus.fus
355 regs = self.regs
356
357 # dictionary of lists of regfile read ports
358 byregfiles_rd, byregfiles_rdspec = self.get_byregfiles(True)
359
360 # okaay, now we need a PriorityPicker per regfile per regfile port
361 # loootta pickers... peter piper picked a pack of pickled peppers...
362 rdpickers = {}
363 for regfile, spec in byregfiles_rd.items():
364 fuspecs = byregfiles_rdspec[regfile]
365 rdpickers[regfile] = {}
366
367 # argh. an experiment to merge RA and RB in the INT regfile
368 # (we have too many read/write ports)
369 if self.regreduce_en:
370 if regfile == 'INT':
371 fuspecs['rabc'] = [fuspecs.pop('rb')]
372 fuspecs['rabc'].append(fuspecs.pop('rc'))
373 fuspecs['rabc'].append(fuspecs.pop('ra'))
374 if regfile == 'FAST':
375 fuspecs['fast1'] = [fuspecs.pop('fast1')]
376 if 'fast2' in fuspecs:
377 fuspecs['fast1'].append(fuspecs.pop('fast2'))
378 if 'fast3' in fuspecs:
379 fuspecs['fast1'].append(fuspecs.pop('fast3'))
380
381 # for each named regfile port, connect up all FUs to that port
382 for (regname, fspec) in sort_fuspecs(fuspecs):
383 print("connect rd", regname, fspec)
384 self.connect_rdport(m, fu_bitdict, rdpickers, regfile,
385 regname, fspec)
386
387 def connect_wrport(self, m, fu_bitdict, wrpickers, regfile, regname, fspec):
388 comb, sync = m.d.comb, m.d.sync
389 fus = self.fus.fus
390 regs = self.regs
391
392 print("connect wr", regname, fspec)
393 rpidx = regname
394
395 # select the required write port. these are pre-defined sizes
396 print(regfile, regs.rf.keys())
397 rfile = regs.rf[regfile.lower()]
398 wport = rfile.w_ports[rpidx]
399
400 fspecs = fspec
401 if not isinstance(fspecs, list):
402 fspecs = [fspecs]
403
404 pplen = 0
405 writes = []
406 ppoffs = []
407 for i, fspec in enumerate(fspecs):
408 # get the regfile specs for this regfile port
409 (rf, read, write, wid, fuspec) = fspec
410 print ("fpsec", i, fspec, len(fuspec))
411 ppoffs.append(pplen) # record offset for picker
412 pplen += len(fuspec)
413
414 # create a priority picker to manage this port
415 wrpickers[regfile][rpidx] = wrpick = PriorityPicker(pplen)
416 setattr(m.submodules, "wrpick_%s_%s" % (regfile, rpidx), wrpick)
417
418 wsigs = []
419 wens = []
420 addrs = []
421 for i, fspec in enumerate(fspecs):
422 # connect up the FU req/go signals and the reg-read to the FU
423 # these are arbitrated by Data.ok signals
424 (rf, read, write, wid, fuspec) = fspec
425 for pi, (funame, fu, idx) in enumerate(fuspec):
426 pi += ppoffs[i]
427
428 # write-request comes from dest.ok
429 dest = fu.get_out(idx)
430 fu_dest_latch = fu.get_fu_out(idx) # latched output
431 name = "wrflag_%s_%s_%d" % (funame, regname, idx)
432 wrflag = Signal(name=name, reset_less=True)
433 comb += wrflag.eq(dest.ok & fu.busy_o)
434
435 # connect request-write to picker input, and output to go-wr
436 fu_active = fu_bitdict[funame]
437 pick = fu.wr.rel_o[idx] & fu_active # & wrflag
438 comb += wrpick.i[pi].eq(pick)
439 # create a single-pulse go write from the picker output
440 wr_pick = Signal()
441 comb += wr_pick.eq(wrpick.o[pi] & wrpick.en_o)
442 comb += fu.go_wr_i[idx].eq(rising_edge(m, wr_pick))
443
444 # connect the regspec write "reg select" number to this port
445 # only if one FU actually requests (and is granted) the port
446 # will the write-enable be activated
447 addr_en = Signal.like(write)
448 wp = Signal()
449 comb += wp.eq(wr_pick & wrpick.en_o)
450 comb += addr_en.eq(Mux(wp, write, 0))
451 if rfile.unary:
452 wens.append(addr_en)
453 else:
454 addrs.append(addr_en)
455 wens.append(wp)
456
457 # connect regfile port to input
458 print("reg connect widths",
459 regfile, regname, pi, funame,
460 dest.shape(), wport.data_i.shape())
461 wsigs.append(fu_dest_latch)
462
463 # here is where we create the Write Broadcast Bus. simple, eh?
464 comb += wport.data_i.eq(ortreereduce_sig(wsigs))
465 if rfile.unary:
466 # for unary-addressed
467 comb += wport.wen.eq(ortreereduce_sig(wens))
468 else:
469 # for binary-addressed
470 comb += wport.addr.eq(ortreereduce_sig(addrs))
471 comb += wport.wen.eq(ortreereduce_sig(wens))
472
473 def connect_wrports(self, m, fu_bitdict):
474 """connect write ports
475
476 orders the write regspecs into a dict-of-dicts, by regfile,
477 by regport name, then connects all FUs that want that regport
478 by way of a PriorityPicker.
479
480 note that the write-port wen, write-port data, and go_wr_i all need to
481 be on the exact same clock cycle. as there is a combinatorial loop bug
482 at the moment, these all use sync.
483 """
484 comb, sync = m.d.comb, m.d.sync
485 fus = self.fus.fus
486 regs = self.regs
487 # dictionary of lists of regfile write ports
488 byregfiles_wr, byregfiles_wrspec = self.get_byregfiles(False)
489
490 # same for write ports.
491 # BLECH! complex code-duplication! BLECH!
492 wrpickers = {}
493 for regfile, spec in byregfiles_wr.items():
494 fuspecs = byregfiles_wrspec[regfile]
495 wrpickers[regfile] = {}
496
497 if self.regreduce_en:
498 # argh, more port-merging
499 if regfile == 'INT':
500 fuspecs['o'] = [fuspecs.pop('o')]
501 fuspecs['o'].append(fuspecs.pop('o1'))
502 if regfile == 'FAST':
503 fuspecs['fast1'] = [fuspecs.pop('fast1')]
504 if 'fast2' in fuspecs:
505 fuspecs['fast1'].append(fuspecs.pop('fast2'))
506 if 'fast3' in fuspecs:
507 fuspecs['fast1'].append(fuspecs.pop('fast3'))
508
509 for (regname, fspec) in sort_fuspecs(fuspecs):
510 self.connect_wrport(m, fu_bitdict, wrpickers,
511 regfile, regname, fspec)
512
513 def get_byregfiles(self, readmode):
514
515 mode = "read" if readmode else "write"
516 regs = self.regs
517 fus = self.fus.fus
518 e = self.e # decoded instruction to execute
519
520 # dictionary of lists of regfile ports
521 byregfiles = {}
522 byregfiles_spec = {}
523 for (funame, fu) in fus.items():
524 print("%s ports for %s" % (mode, funame))
525 for idx in range(fu.n_src if readmode else fu.n_dst):
526 if readmode:
527 (regfile, regname, wid) = fu.get_in_spec(idx)
528 else:
529 (regfile, regname, wid) = fu.get_out_spec(idx)
530 print(" %d %s %s %s" % (idx, regfile, regname, str(wid)))
531 if readmode:
532 rdflag, read = regspec_decode_read(e, regfile, regname)
533 write = None
534 else:
535 rdflag, read = None, None
536 wrport, write = regspec_decode_write(e, regfile, regname)
537 if regfile not in byregfiles:
538 byregfiles[regfile] = {}
539 byregfiles_spec[regfile] = {}
540 if regname not in byregfiles_spec[regfile]:
541 byregfiles_spec[regfile][regname] = \
542 (rdflag, read, write, wid, [])
543 # here we start to create "lanes"
544 if idx not in byregfiles[regfile]:
545 byregfiles[regfile][idx] = []
546 fuspec = (funame, fu, idx)
547 byregfiles[regfile][idx].append(fuspec)
548 byregfiles_spec[regfile][regname][4].append(fuspec)
549
550 # ok just print that out, for convenience
551 for regfile, spec in byregfiles.items():
552 print("regfile %s ports:" % mode, regfile)
553 fuspecs = byregfiles_spec[regfile]
554 for regname, fspec in fuspecs.items():
555 [rdflag, read, write, wid, fuspec] = fspec
556 print(" rf %s port %s lane: %s" % (mode, regfile, regname))
557 print(" %s" % regname, wid, read, write, rdflag)
558 for (funame, fu, idx) in fuspec:
559 fusig = fu.src_i[idx] if readmode else fu.dest[idx]
560 print(" ", funame, fu, idx, fusig)
561 print()
562
563 return byregfiles, byregfiles_spec
564
565 def __iter__(self):
566 yield from self.fus.ports()
567 yield from self.e.ports()
568 yield from self.l0.ports()
569 # TODO: regs
570
571 def ports(self):
572 return list(self)
573
574
575 if __name__ == '__main__':
576 pspec = TestMemPspec(ldst_ifacetype='testpi',
577 imem_ifacetype='',
578 addr_wid=48,
579 mask_wid=8,
580 reg_wid=64)
581 dut = NonProductionCore(pspec)
582 vl = rtlil.convert(dut, ports=dut.ports())
583 with open("test_core.il", "w") as f:
584 f.write(vl)