add name to write pick on core
[soc.git] / src / soc / simple / core.py
1 """simple core
2
3 not in any way intended for production use. connects up FunctionUnits to
4 Register Files in a brain-dead fashion that only permits one and only one
5 Function Unit to be operational.
6
7 the principle here is to take the Function Units, analyse their regspecs,
8 and turn their requirements for access to register file read/write ports
9 into groupings by Register File and Register File Port name.
10
11 under each grouping - by regfile/port - a list of Function Units that
12 need to connect to that port is created. as these are a contended
13 resource a "Broadcast Bus" per read/write port is then also created,
14 with access to it managed by a PriorityPicker.
15
16 the brain-dead part of this module is that even though there is no
17 conflict of access, regfile read/write hazards are *not* analysed,
18 and consequently it is safer to wait for the Function Unit to complete
19 before allowing a new instruction to proceed.
20 """
21
22 from nmigen import Elaboratable, Module, Signal, ResetSignal, Cat, Mux
23 from nmigen.cli import rtlil
24
25 from openpower.decoder.power_decoder2 import PowerDecodeSubset
26 from openpower.decoder.power_regspec_map import regspec_decode_read
27 from openpower.decoder.power_regspec_map import regspec_decode_write
28 from openpower.sv.svp64 import SVP64Rec
29
30 from nmutil.picker import PriorityPicker
31 from nmutil.util import treereduce
32
33 from soc.fu.compunits.compunits import AllFunctionUnits
34 from soc.regfile.regfiles import RegFiles
35 from openpower.decoder.decode2execute1 import Decode2ToExecute1Type
36 from openpower.decoder.decode2execute1 import IssuerDecode2ToOperand
37 from openpower.decoder.power_decoder2 import get_rdflags
38 from openpower.decoder.decode2execute1 import Data
39 from soc.experiment.l0_cache import TstL0CacheBuffer # test only
40 from soc.config.test.test_loadstore import TestMemPspec
41 from openpower.decoder.power_enums import MicrOp
42 from soc.config.state import CoreState
43
44 import operator
45
46 from nmutil.util import rising_edge
47
48
49 # helper function for reducing a list of signals down to a parallel
50 # ORed single signal.
51 def ortreereduce(tree, attr="o_data"):
52 return treereduce(tree, operator.or_, lambda x: getattr(x, attr))
53
54
55 def ortreereduce_sig(tree):
56 return treereduce(tree, operator.or_, lambda x: x)
57
58
59 # helper function to place full regs declarations first
60 def sort_fuspecs(fuspecs):
61 res = []
62 for (regname, fspec) in fuspecs.items():
63 if regname.startswith("full"):
64 res.append((regname, fspec))
65 for (regname, fspec) in fuspecs.items():
66 if not regname.startswith("full"):
67 res.append((regname, fspec))
68 return res # enumerate(res)
69
70
71 class NonProductionCore(Elaboratable):
72 def __init__(self, pspec):
73 self.pspec = pspec
74
75 # test is SVP64 is to be enabled
76 self.svp64_en = hasattr(pspec, "svp64") and (pspec.svp64 == True)
77
78 # test to see if regfile ports should be reduced
79 self.regreduce_en = (hasattr(pspec, "regreduce") and
80 (pspec.regreduce == True))
81
82 # single LD/ST funnel for memory access
83 self.l0 = l0 = TstL0CacheBuffer(pspec, n_units=1)
84 pi = l0.l0.dports[0]
85
86 # function units (only one each)
87 # only include mmu if enabled in pspec
88 self.fus = AllFunctionUnits(pspec, pilist=[pi])
89
90 # link LoadStore1 into MMU
91 mmu = self.fus.get_fu('mmu0')
92 print ("core pspec", pspec.ldst_ifacetype)
93 print ("core mmu", mmu)
94 print ("core lsmem.lsi", l0.cmpi.lsmem.lsi)
95 if mmu is not None:
96 mmu.alu.set_ldst_interface(l0.cmpi.lsmem.lsi)
97
98 # register files (yes plural)
99 self.regs = RegFiles(pspec)
100
101 # instruction decoder - needs a Trap-capable Record (captures EINT etc.)
102 self.e = Decode2ToExecute1Type("core", opkls=IssuerDecode2ToOperand,
103 regreduce_en=self.regreduce_en)
104
105 # SVP64 RA_OR_ZERO needs to know if the relevant EXTRA2/3 field is zero
106 self.sv_a_nz = Signal()
107
108 # state and raw instruction (and SVP64 ReMap fields)
109 self.state = CoreState("core")
110 self.raw_insn_i = Signal(32) # raw instruction
111 self.bigendian_i = Signal() # bigendian - TODO, set by MSR.BE
112 if self.svp64_en:
113 self.sv_rm = SVP64Rec(name="core_svp64_rm") # SVP64 RM field
114 self.is_svp64_mode = Signal() # set if SVP64 mode is enabled
115 self.use_svp64_ldst_dec = Signal() # use alternative LDST decoder
116 self.sv_pred_sm = Signal() # TODO: SIMD width
117 self.sv_pred_dm = Signal() # TODO: SIMD width
118
119 # issue/valid/busy signalling
120 self.ivalid_i = Signal(reset_less=True) # instruction is valid
121 self.issue_i = Signal(reset_less=True)
122 self.busy_o = Signal(name="corebusy_o", reset_less=True)
123
124 # start/stop and terminated signalling
125 self.core_terminate_o = Signal(reset=0) # indicates stopped
126
127 # create per-FU instruction decoders (subsetted)
128 self.decoders = {}
129 self.des = {}
130
131 for funame, fu in self.fus.fus.items():
132 f_name = fu.fnunit.name
133 fnunit = fu.fnunit.value
134 opkls = fu.opsubsetkls
135 if f_name == 'TRAP':
136 # TRAP decoder is the *main* decoder
137 self.trapunit = funame
138 continue
139 self.decoders[funame] = PowerDecodeSubset(None, opkls, f_name,
140 final=True,
141 state=self.state,
142 svp64_en=self.svp64_en,
143 regreduce_en=self.regreduce_en)
144 self.des[funame] = self.decoders[funame].do
145
146 if "mmu0" in self.decoders:
147 self.decoders["mmu0"].mmu0_spr_dec = self.decoders["spr0"]
148
149 def elaborate(self, platform):
150 m = Module()
151 # for testing purposes, to cut down on build time in coriolis2
152 if hasattr(self.pspec, "nocore") and self.pspec.nocore == True:
153 x = Signal() # dummy signal
154 m.d.sync += x.eq(~x)
155 return m
156 comb = m.d.comb
157
158 m.submodules.fus = self.fus
159 m.submodules.l0 = l0 = self.l0
160 self.regs.elaborate_into(m, platform)
161 regs = self.regs
162 fus = self.fus.fus
163
164 # connect decoders
165 for k, v in self.decoders.items():
166 # connect each satellite decoder and give it the instruction.
167 # as subset decoders this massively reduces wire fanout given
168 # the large number of ALUs
169 setattr(m.submodules, "dec_%s" % v.fn_name, v)
170 comb += v.dec.raw_opcode_in.eq(self.raw_insn_i)
171 comb += v.dec.bigendian.eq(self.bigendian_i)
172 # sigh due to SVP64 RA_OR_ZERO detection connect these too
173 comb += v.sv_a_nz.eq(self.sv_a_nz)
174 if self.svp64_en:
175 comb += v.pred_sm.eq(self.sv_pred_sm)
176 comb += v.pred_dm.eq(self.sv_pred_dm)
177 if k != self.trapunit:
178 comb += v.sv_rm.eq(self.sv_rm) # pass through SVP64 ReMap
179 comb += v.is_svp64_mode.eq(self.is_svp64_mode)
180 # only the LDST PowerDecodeSubset *actually* needs to
181 # know to use the alternative decoder. this is all
182 # a terrible hack
183 if k.lower().startswith("ldst"):
184 comb += v.use_svp64_ldst_dec.eq(self.use_svp64_ldst_dec)
185
186 # ssh, cheat: trap uses the main decoder because of the rewriting
187 self.des[self.trapunit] = self.e.do
188
189 # connect up Function Units, then read/write ports
190 fu_bitdict = self.connect_instruction(m)
191 self.connect_rdports(m, fu_bitdict)
192 self.connect_wrports(m, fu_bitdict)
193
194 return m
195
196 def connect_instruction(self, m):
197 """connect_instruction
198
199 uses decoded (from PowerOp) function unit information from CSV files
200 to ascertain which Function Unit should deal with the current
201 instruction.
202
203 some (such as OP_ATTN, OP_NOP) are dealt with here, including
204 ignoring it and halting the processor. OP_NOP is a bit annoying
205 because the issuer expects busy flag still to be raised then lowered.
206 (this requires a fake counter to be set).
207 """
208 comb, sync = m.d.comb, m.d.sync
209 fus = self.fus.fus
210
211 # enable-signals for each FU, get one bit for each FU (by name)
212 fu_enable = Signal(len(fus), reset_less=True)
213 fu_bitdict = {}
214 for i, funame in enumerate(fus.keys()):
215 fu_bitdict[funame] = fu_enable[i]
216
217 # enable the required Function Unit based on the opcode decode
218 # note: this *only* works correctly for simple core when one and
219 # *only* one FU is allocated per instruction
220 for funame, fu in fus.items():
221 fnunit = fu.fnunit.value
222 enable = Signal(name="en_%s" % funame, reset_less=True)
223 comb += enable.eq((self.e.do.fn_unit & fnunit).bool())
224 comb += fu_bitdict[funame].eq(enable)
225
226 # sigh - need a NOP counter
227 counter = Signal(2)
228 with m.If(counter != 0):
229 sync += counter.eq(counter - 1)
230 comb += self.busy_o.eq(1)
231
232 with m.If(self.ivalid_i): # run only when valid
233 with m.Switch(self.e.do.insn_type):
234 # check for ATTN: halt if true
235 with m.Case(MicrOp.OP_ATTN):
236 m.d.sync += self.core_terminate_o.eq(1)
237
238 with m.Case(MicrOp.OP_NOP):
239 sync += counter.eq(2)
240 comb += self.busy_o.eq(1)
241
242 with m.Default():
243 # connect up instructions. only one enabled at a time
244 for funame, fu in fus.items():
245 do = self.des[funame]
246 enable = fu_bitdict[funame]
247
248 # run this FunctionUnit if enabled
249 # route op, issue, busy, read flags and mask to FU
250 with m.If(enable):
251 # operand comes from the *local* decoder
252 comb += fu.oper_i.eq_from(do)
253 #comb += fu.oper_i.eq_from_execute1(e)
254 comb += fu.issue_i.eq(self.issue_i)
255 comb += self.busy_o.eq(fu.busy_o)
256 # rdmask, which is for registers, needs to come
257 # from the *main* decoder
258 rdmask = get_rdflags(self.e, fu)
259 comb += fu.rdmaskn.eq(~rdmask)
260
261 return fu_bitdict
262
263 def connect_rdport(self, m, fu_bitdict, rdpickers, regfile, regname, fspec):
264 comb, sync = m.d.comb, m.d.sync
265 fus = self.fus.fus
266 regs = self.regs
267
268 rpidx = regname
269
270 # select the required read port. these are pre-defined sizes
271 rfile = regs.rf[regfile.lower()]
272 rport = rfile.r_ports[rpidx]
273 print("read regfile", rpidx, regfile, regs.rf.keys(),
274 rfile, rfile.unary)
275
276 fspecs = fspec
277 if not isinstance(fspecs, list):
278 fspecs = [fspecs]
279
280 rdflags = []
281 pplen = 0
282 reads = []
283 ppoffs = []
284 for i, fspec in enumerate(fspecs):
285 # get the regfile specs for this regfile port
286 (rf, read, write, wid, fuspec) = fspec
287 print ("fpsec", i, fspec, len(fuspec))
288 ppoffs.append(pplen) # record offset for picker
289 pplen += len(fuspec)
290 name = "rdflag_%s_%s_%d" % (regfile, regname, i)
291 rdflag = Signal(name=name, reset_less=True)
292 comb += rdflag.eq(rf)
293 rdflags.append(rdflag)
294 reads.append(read)
295
296 print ("pplen", pplen)
297
298 # create a priority picker to manage this port
299 rdpickers[regfile][rpidx] = rdpick = PriorityPicker(pplen)
300 setattr(m.submodules, "rdpick_%s_%s" % (regfile, rpidx), rdpick)
301
302 rens = []
303 addrs = []
304 for i, fspec in enumerate(fspecs):
305 (rf, read, write, wid, fuspec) = fspec
306 # connect up the FU req/go signals, and the reg-read to the FU
307 # and create a Read Broadcast Bus
308 for pi, (funame, fu, idx) in enumerate(fuspec):
309 pi += ppoffs[i]
310
311 # connect request-read to picker input, and output to go-rd
312 fu_active = fu_bitdict[funame]
313 name = "%s_%s_%s_%i" % (regfile, rpidx, funame, pi)
314 addr_en = Signal.like(reads[i], name="addr_en_"+name)
315 pick = Signal(name="pick_"+name) # picker input
316 rp = Signal(name="rp_"+name) # picker output
317 delay_pick = Signal(name="dp_"+name) # read-enable "underway"
318
319 # exclude any currently-enabled read-request (mask out active)
320 comb += pick.eq(fu.rd_rel_o[idx] & fu_active & rdflags[i] &
321 ~delay_pick)
322 comb += rdpick.i[pi].eq(pick)
323 comb += fu.go_rd_i[idx].eq(delay_pick) # pass in *delayed* pick
324
325 # if picked, select read-port "reg select" number to port
326 comb += rp.eq(rdpick.o[pi] & rdpick.en_o)
327 sync += delay_pick.eq(rp) # delayed "pick"
328 comb += addr_en.eq(Mux(rp, reads[i], 0))
329
330 # the read-enable happens combinatorially (see mux-bus below)
331 # but it results in the data coming out on a one-cycle delay.
332 if rfile.unary:
333 rens.append(addr_en)
334 else:
335 addrs.append(addr_en)
336 rens.append(rp)
337
338 # use the *delayed* pick signal to put requested data onto bus
339 with m.If(delay_pick):
340 # connect regfile port to input, creating fan-out Bus
341 src = fu.src_i[idx]
342 print("reg connect widths",
343 regfile, regname, pi, funame,
344 src.shape(), rport.o_data.shape())
345 # all FUs connect to same port
346 comb += src.eq(rport.o_data)
347
348 # or-reduce the muxed read signals
349 if rfile.unary:
350 # for unary-addressed
351 comb += rport.ren.eq(ortreereduce_sig(rens))
352 else:
353 # for binary-addressed
354 comb += rport.addr.eq(ortreereduce_sig(addrs))
355 comb += rport.ren.eq(Cat(*rens).bool())
356 print ("binary", regfile, rpidx, rport, rport.ren, rens, addrs)
357
358 def connect_rdports(self, m, fu_bitdict):
359 """connect read ports
360
361 orders the read regspecs into a dict-of-dicts, by regfile, by
362 regport name, then connects all FUs that want that regport by
363 way of a PriorityPicker.
364 """
365 comb, sync = m.d.comb, m.d.sync
366 fus = self.fus.fus
367 regs = self.regs
368
369 # dictionary of lists of regfile read ports
370 byregfiles_rd, byregfiles_rdspec = self.get_byregfiles(True)
371
372 # okaay, now we need a PriorityPicker per regfile per regfile port
373 # loootta pickers... peter piper picked a pack of pickled peppers...
374 rdpickers = {}
375 for regfile, spec in byregfiles_rd.items():
376 fuspecs = byregfiles_rdspec[regfile]
377 rdpickers[regfile] = {}
378
379 # argh. an experiment to merge RA and RB in the INT regfile
380 # (we have too many read/write ports)
381 if self.regreduce_en:
382 if regfile == 'INT':
383 fuspecs['rabc'] = [fuspecs.pop('rb')]
384 fuspecs['rabc'].append(fuspecs.pop('rc'))
385 fuspecs['rabc'].append(fuspecs.pop('ra'))
386 if regfile == 'FAST':
387 fuspecs['fast1'] = [fuspecs.pop('fast1')]
388 if 'fast2' in fuspecs:
389 fuspecs['fast1'].append(fuspecs.pop('fast2'))
390 if 'fast3' in fuspecs:
391 fuspecs['fast1'].append(fuspecs.pop('fast3'))
392
393 # for each named regfile port, connect up all FUs to that port
394 for (regname, fspec) in sort_fuspecs(fuspecs):
395 print("connect rd", regname, fspec)
396 self.connect_rdport(m, fu_bitdict, rdpickers, regfile,
397 regname, fspec)
398
399 def connect_wrport(self, m, fu_bitdict, wrpickers, regfile, regname, fspec):
400 comb, sync = m.d.comb, m.d.sync
401 fus = self.fus.fus
402 regs = self.regs
403
404 print("connect wr", regname, fspec)
405 rpidx = regname
406
407 # select the required write port. these are pre-defined sizes
408 print(regfile, regs.rf.keys())
409 rfile = regs.rf[regfile.lower()]
410 wport = rfile.w_ports[rpidx]
411
412 fspecs = fspec
413 if not isinstance(fspecs, list):
414 fspecs = [fspecs]
415
416 pplen = 0
417 writes = []
418 ppoffs = []
419 for i, fspec in enumerate(fspecs):
420 # get the regfile specs for this regfile port
421 (rf, read, write, wid, fuspec) = fspec
422 print ("fpsec", i, fspec, len(fuspec))
423 ppoffs.append(pplen) # record offset for picker
424 pplen += len(fuspec)
425
426 # create a priority picker to manage this port
427 wrpickers[regfile][rpidx] = wrpick = PriorityPicker(pplen)
428 setattr(m.submodules, "wrpick_%s_%s" % (regfile, rpidx), wrpick)
429
430 wsigs = []
431 wens = []
432 addrs = []
433 for i, fspec in enumerate(fspecs):
434 # connect up the FU req/go signals and the reg-read to the FU
435 # these are arbitrated by Data.ok signals
436 (rf, read, write, wid, fuspec) = fspec
437 for pi, (funame, fu, idx) in enumerate(fuspec):
438 pi += ppoffs[i]
439
440 # write-request comes from dest.ok
441 dest = fu.get_out(idx)
442 fu_dest_latch = fu.get_fu_out(idx) # latched output
443 name = "wrflag_%s_%s_%d" % (funame, regname, idx)
444 wrflag = Signal(name=name, reset_less=True)
445 comb += wrflag.eq(dest.ok & fu.busy_o)
446
447 # connect request-write to picker input, and output to go-wr
448 fu_active = fu_bitdict[funame]
449 pick = fu.wr.rel_o[idx] & fu_active # & wrflag
450 comb += wrpick.i[pi].eq(pick)
451 # create a single-pulse go write from the picker output
452 wr_pick = Signal(name="wpick_%s_%s_%d" % (funame, regname, idx))
453 comb += wr_pick.eq(wrpick.o[pi] & wrpick.en_o)
454 comb += fu.go_wr_i[idx].eq(rising_edge(m, wr_pick))
455
456 # connect the regspec write "reg select" number to this port
457 # only if one FU actually requests (and is granted) the port
458 # will the write-enable be activated
459 addr_en = Signal.like(write)
460 wp = Signal()
461 comb += wp.eq(wr_pick & wrpick.en_o)
462 comb += addr_en.eq(Mux(wp, write, 0))
463 if rfile.unary:
464 wens.append(addr_en)
465 else:
466 addrs.append(addr_en)
467 wens.append(wp)
468
469 # connect regfile port to input
470 print("reg connect widths",
471 regfile, regname, pi, funame,
472 dest.shape(), wport.i_data.shape())
473 wsigs.append(fu_dest_latch)
474
475 # here is where we create the Write Broadcast Bus. simple, eh?
476 comb += wport.i_data.eq(ortreereduce_sig(wsigs))
477 if rfile.unary:
478 # for unary-addressed
479 comb += wport.wen.eq(ortreereduce_sig(wens))
480 else:
481 # for binary-addressed
482 comb += wport.addr.eq(ortreereduce_sig(addrs))
483 comb += wport.wen.eq(ortreereduce_sig(wens))
484
485 def connect_wrports(self, m, fu_bitdict):
486 """connect write ports
487
488 orders the write regspecs into a dict-of-dicts, by regfile,
489 by regport name, then connects all FUs that want that regport
490 by way of a PriorityPicker.
491
492 note that the write-port wen, write-port data, and go_wr_i all need to
493 be on the exact same clock cycle. as there is a combinatorial loop bug
494 at the moment, these all use sync.
495 """
496 comb, sync = m.d.comb, m.d.sync
497 fus = self.fus.fus
498 regs = self.regs
499 # dictionary of lists of regfile write ports
500 byregfiles_wr, byregfiles_wrspec = self.get_byregfiles(False)
501
502 # same for write ports.
503 # BLECH! complex code-duplication! BLECH!
504 wrpickers = {}
505 for regfile, spec in byregfiles_wr.items():
506 fuspecs = byregfiles_wrspec[regfile]
507 wrpickers[regfile] = {}
508
509 if self.regreduce_en:
510 # argh, more port-merging
511 if regfile == 'INT':
512 fuspecs['o'] = [fuspecs.pop('o')]
513 fuspecs['o'].append(fuspecs.pop('o1'))
514 if regfile == 'FAST':
515 fuspecs['fast1'] = [fuspecs.pop('fast1')]
516 if 'fast2' in fuspecs:
517 fuspecs['fast1'].append(fuspecs.pop('fast2'))
518 if 'fast3' in fuspecs:
519 fuspecs['fast1'].append(fuspecs.pop('fast3'))
520
521 for (regname, fspec) in sort_fuspecs(fuspecs):
522 self.connect_wrport(m, fu_bitdict, wrpickers,
523 regfile, regname, fspec)
524
525 def get_byregfiles(self, readmode):
526
527 mode = "read" if readmode else "write"
528 regs = self.regs
529 fus = self.fus.fus
530 e = self.e # decoded instruction to execute
531
532 # dictionary of lists of regfile ports
533 byregfiles = {}
534 byregfiles_spec = {}
535 for (funame, fu) in fus.items():
536 print("%s ports for %s" % (mode, funame))
537 for idx in range(fu.n_src if readmode else fu.n_dst):
538 if readmode:
539 (regfile, regname, wid) = fu.get_in_spec(idx)
540 else:
541 (regfile, regname, wid) = fu.get_out_spec(idx)
542 print(" %d %s %s %s" % (idx, regfile, regname, str(wid)))
543 if readmode:
544 rdflag, read = regspec_decode_read(e, regfile, regname)
545 write = None
546 else:
547 rdflag, read = None, None
548 wrport, write = regspec_decode_write(e, regfile, regname)
549 if regfile not in byregfiles:
550 byregfiles[regfile] = {}
551 byregfiles_spec[regfile] = {}
552 if regname not in byregfiles_spec[regfile]:
553 byregfiles_spec[regfile][regname] = \
554 (rdflag, read, write, wid, [])
555 # here we start to create "lanes"
556 if idx not in byregfiles[regfile]:
557 byregfiles[regfile][idx] = []
558 fuspec = (funame, fu, idx)
559 byregfiles[regfile][idx].append(fuspec)
560 byregfiles_spec[regfile][regname][4].append(fuspec)
561
562 # ok just print that out, for convenience
563 for regfile, spec in byregfiles.items():
564 print("regfile %s ports:" % mode, regfile)
565 fuspecs = byregfiles_spec[regfile]
566 for regname, fspec in fuspecs.items():
567 [rdflag, read, write, wid, fuspec] = fspec
568 print(" rf %s port %s lane: %s" % (mode, regfile, regname))
569 print(" %s" % regname, wid, read, write, rdflag)
570 for (funame, fu, idx) in fuspec:
571 fusig = fu.src_i[idx] if readmode else fu.dest[idx]
572 print(" ", funame, fu, idx, fusig)
573 print()
574
575 return byregfiles, byregfiles_spec
576
577 def __iter__(self):
578 yield from self.fus.ports()
579 yield from self.e.ports()
580 yield from self.l0.ports()
581 # TODO: regs
582
583 def ports(self):
584 return list(self)
585
586
587 if __name__ == '__main__':
588 pspec = TestMemPspec(ldst_ifacetype='testpi',
589 imem_ifacetype='',
590 addr_wid=48,
591 mask_wid=8,
592 reg_wid=64)
593 dut = NonProductionCore(pspec)
594 vl = rtlil.convert(dut, ports=dut.ports())
595 with open("test_core.il", "w") as f:
596 f.write(vl)