6bca3bd8b02215bb0f61c8468cc8297582d0a699
[soc.git] / src / soc / simple / core.py
1 """simple core
2
3 not in any way intended for production use. connects up FunctionUnits to
4 Register Files in a brain-dead fashion that only permits one and only one
5 Function Unit to be operational.
6
7 the principle here is to take the Function Units, analyse their regspecs,
8 and turn their requirements for access to register file read/write ports
9 into groupings by Register File and Register File Port name.
10
11 under each grouping - by regfile/port - a list of Function Units that
12 need to connect to that port is created. as these are a contended
13 resource a "Broadcast Bus" per read/write port is then also created,
14 with access to it managed by a PriorityPicker.
15
16 the brain-dead part of this module is that even though there is no
17 conflict of access, regfile read/write hazards are *not* analysed,
18 and consequently it is safer to wait for the Function Unit to complete
19 before allowing a new instruction to proceed.
20 (update: actually this is being added now:
21 https://bugs.libre-soc.org/show_bug.cgi?id=737)
22 """
23
24 from nmigen import (Elaboratable, Module, Signal, ResetSignal, Cat, Mux,
25 Const)
26 from nmigen.cli import rtlil
27
28 from openpower.decoder.power_decoder2 import PowerDecodeSubset
29 from openpower.decoder.power_regspec_map import regspec_decode
30 from openpower.sv.svp64 import SVP64Rec
31
32 from nmutil.picker import PriorityPicker
33 from nmutil.util import treereduce
34 from nmutil.singlepipe import ControlBase
35
36 from soc.fu.compunits.compunits import AllFunctionUnits, LDSTFunctionUnit
37 from soc.regfile.regfiles import RegFiles
38 from openpower.decoder.power_decoder2 import get_rdflags
39 from soc.experiment.l0_cache import TstL0CacheBuffer # test only
40 from soc.config.test.test_loadstore import TestMemPspec
41 from openpower.decoder.power_enums import MicrOp, Function
42 from soc.simple.core_data import CoreInput, CoreOutput
43
44 from collections import defaultdict, namedtuple
45 import operator
46
47 from nmutil.util import rising_edge
48
49 FUSpec = namedtuple("FUSpec", ["funame", "fu", "idx"])
50 ByRegSpec = namedtuple("ByRegSpec", ["okflag", "regport", "wid", "specs"])
51
52 # helper function for reducing a list of signals down to a parallel
53 # ORed single signal.
54 def ortreereduce(tree, attr="o_data"):
55 return treereduce(tree, operator.or_, lambda x: getattr(x, attr))
56
57
58 def ortreereduce_sig(tree):
59 return treereduce(tree, operator.or_, lambda x: x)
60
61
62 # helper function to place full regs declarations first
63 def sort_fuspecs(fuspecs):
64 res = []
65 for (regname, fspec) in fuspecs.items():
66 if regname.startswith("full"):
67 res.append((regname, fspec))
68 for (regname, fspec) in fuspecs.items():
69 if not regname.startswith("full"):
70 res.append((regname, fspec))
71 return res # enumerate(res)
72
73
74 # a hazard bitvector "remap" function which returns an AST expression
75 # that remaps read/write hazard regfile port numbers to either a full
76 # bitvector or a reduced subset one. SPR for example is reduced to a
77 # single bit.
78 # CRITICALLY-IMPORTANT NOTE: these bitvectors *have* to match up per
79 # regfile! therefore the remapping is per regfile, *NOT* per regfile
80 # port and certainly not based on whether it is a read port or write port.
81 # note that any reductions here will result in degraded performance due
82 # to conflicts, but at least it keeps the hazard matrix sizes down to "sane"
83 def bitvector_remap(regfile, rfile, port):
84 # 8-bits (at the moment, no SVP64), CR is unary: no remap
85 if regfile == 'CR':
86 return port
87 # 3 bits, unary alrady: return the port
88 if regfile == 'XER':
89 return port
90 # 3 bits, unary: return the port
91 if regfile == 'XER':
92 return port
93 # 3 bits, unary: return the port
94 if regfile == 'SVSTATE':
95 return port
96 # 9 bits (9 entries), might be unary already
97 if regfile == 'FAST':
98 if rfile.unary: # FAST might be unary already
99 return port
100 else:
101 return 1 << port
102 # 10 bits (!!) - reduce to one
103 if regfile == 'SPR':
104 if rfile.unary: # FAST might be unary already
105 return port
106 else:
107 return 1 << port
108 if regfile == 'INT':
109 if rfile.unary: # INT, check if unary/binary
110 return port
111 else:
112 return 1 << port
113
114
115 # derive from ControlBase rather than have a separate Stage instance,
116 # this is simpler to do
117 class NonProductionCore(ControlBase):
118 def __init__(self, pspec):
119 self.pspec = pspec
120
121 # test is SVP64 is to be enabled
122 self.svp64_en = hasattr(pspec, "svp64") and (pspec.svp64 == True)
123
124 # test to see if regfile ports should be reduced
125 self.regreduce_en = (hasattr(pspec, "regreduce") and
126 (pspec.regreduce == True))
127
128 # test to see if overlapping of instructions is allowed
129 # (not normally enabled for TestIssuer FSM but useful for checking
130 # the bitvector hazard detection, before doing In-Order)
131 self.allow_overlap = (hasattr(pspec, "allow_overlap") and
132 (pspec.allow_overlap == True))
133
134 # test core type
135 self.make_hazard_vecs = self.allow_overlap
136 self.core_type = "fsm"
137 if hasattr(pspec, "core_type"):
138 self.core_type = pspec.core_type
139
140 super().__init__(stage=self)
141
142 # single LD/ST funnel for memory access
143 self.l0 = l0 = TstL0CacheBuffer(pspec, n_units=1)
144 pi = l0.l0.dports[0]
145
146 # function units (only one each)
147 # only include mmu if enabled in pspec
148 self.fus = AllFunctionUnits(pspec, pilist=[pi])
149
150 # link LoadStore1 into MMU
151 mmu = self.fus.get_fu('mmu0')
152 ldst0 = self.fus.get_fu('ldst0')
153 print ("core pspec", pspec.ldst_ifacetype)
154 print ("core mmu", mmu)
155 if mmu is not None:
156 lsi = l0.cmpi.lsmem.lsi # a LoadStore1 Interface object
157 print ("core lsmem.lsi", lsi)
158 mmu.alu.set_ldst_interface(lsi)
159 # urr store I-Cache in core so it is easier to get at
160 self.icache = lsi.icache
161
162 # register files (yes plural)
163 self.regs = RegFiles(pspec, make_hazard_vecs=self.make_hazard_vecs)
164
165 # set up input and output: unusual requirement to set data directly
166 # (due to the way that the core is set up in a different domain,
167 # see TestIssuer.setup_peripherals
168 self.p.i_data, self.n.o_data = self.new_specs(None)
169 self.i, self.o = self.p.i_data, self.n.o_data
170
171 # actual internal input data used (captured)
172 self.ireg = self.ispec()
173
174 # create per-FU instruction decoders (subsetted). these "satellite"
175 # decoders reduce wire fan-out from the one (main) PowerDecoder2
176 # (used directly by the trap unit) to the *twelve* (or more)
177 # Function Units. we can either have 32 wires (the instruction)
178 # to each, or we can have well over a 200 wire fan-out (to 12
179 # ALUs). it's an easy choice to make.
180 self.decoders = {}
181 self.des = {}
182
183 # eep, these should be *per FU* i.e. for FunctionUnitBaseMulti
184 # they should be shared (put into the ALU *once*).
185
186 for funame, fu in self.fus.fus.items():
187 f_name = fu.fnunit.name
188 fnunit = fu.fnunit.value
189 opkls = fu.opsubsetkls
190 if f_name == 'TRAP':
191 # TRAP decoder is the *main* decoder
192 self.trapunit = funame
193 continue
194 assert funame not in self.decoders
195 self.decoders[funame] = PowerDecodeSubset(None, opkls, f_name,
196 final=True,
197 state=self.ireg.state,
198 svp64_en=self.svp64_en,
199 regreduce_en=self.regreduce_en)
200 self.des[funame] = self.decoders[funame].do
201
202 # create per-Function Unit write-after-write hazard signals
203 # yes, really, this should have been added in ReservationStations
204 # but hey.
205 for funame, fu in self.fus.fus.items():
206 fu._waw_hazard = Signal(name="waw_%s" % funame)
207
208 # share the SPR decoder with the MMU if it exists
209 if "mmu0" in self.decoders:
210 self.decoders["mmu0"].mmu0_spr_dec = self.decoders["spr0"]
211
212 # next 3 functions are Stage API Compliance
213 def setup(self, m, i):
214 pass
215
216 def ispec(self):
217 return CoreInput(self.pspec, self.svp64_en, self.regreduce_en)
218
219 def ospec(self):
220 return CoreOutput()
221
222 # elaborate function to create HDL
223 def elaborate(self, platform):
224 m = super().elaborate(platform)
225
226 # for testing purposes, to cut down on build time in coriolis2
227 if hasattr(self.pspec, "nocore") and self.pspec.nocore == True:
228 x = Signal() # dummy signal
229 m.d.sync += x.eq(~x)
230 return m
231 comb = m.d.comb
232
233 m.submodules.fus = self.fus
234 m.submodules.l0 = l0 = self.l0
235 self.regs.elaborate_into(m, platform)
236 regs = self.regs
237 fus = self.fus.fus
238
239 # amalgamate write-hazards into a single top-level Signal
240 self.waw_hazard = Signal()
241 whaz = []
242 for funame, fu in self.fus.fus.items():
243 whaz.append(fu._waw_hazard)
244 comb += self.waw_hazard.eq(Cat(*whaz).bool())
245
246 # connect decoders
247 self.connect_satellite_decoders(m)
248
249 # ssh, cheat: trap uses the main decoder because of the rewriting
250 self.des[self.trapunit] = self.ireg.e.do
251
252 # connect up Function Units, then read/write ports, and hazard conflict
253 self.issue_conflict = Signal()
254 fu_bitdict, fu_selected = self.connect_instruction(m)
255 raw_hazard = self.connect_rdports(m, fu_bitdict, fu_selected)
256 self.connect_wrports(m, fu_bitdict, fu_selected)
257 if self.allow_overlap:
258 comb += self.issue_conflict.eq(raw_hazard)
259
260 # note if an exception happened. in a pipelined or OoO design
261 # this needs to be accompanied by "shadowing" (or stalling)
262 el = []
263 for exc in self.fus.excs.values():
264 el.append(exc.happened)
265 if len(el) > 0: # at least one exception
266 comb += self.o.exc_happened.eq(Cat(*el).bool())
267
268 return m
269
270 def connect_satellite_decoders(self, m):
271 comb = m.d.comb
272 for k, v in self.decoders.items():
273 # connect each satellite decoder and give it the instruction.
274 # as subset decoders this massively reduces wire fanout given
275 # the large number of ALUs
276 m.submodules["dec_%s" % k] = v
277 comb += v.dec.raw_opcode_in.eq(self.ireg.raw_insn_i)
278 comb += v.dec.bigendian.eq(self.ireg.bigendian_i)
279 # sigh due to SVP64 RA_OR_ZERO detection connect these too
280 comb += v.sv_a_nz.eq(self.ireg.sv_a_nz)
281 if not self.svp64_en:
282 continue
283 comb += v.pred_sm.eq(self.ireg.sv_pred_sm)
284 comb += v.pred_dm.eq(self.ireg.sv_pred_dm)
285 if k == self.trapunit:
286 continue
287 comb += v.sv_rm.eq(self.ireg.sv_rm) # pass through SVP64 RM
288 comb += v.is_svp64_mode.eq(self.ireg.is_svp64_mode)
289 # only the LDST PowerDecodeSubset *actually* needs to
290 # know to use the alternative decoder. this is all
291 # a terrible hack
292 if not k.lower().startswith("ldst"):
293 continue
294 comb += v.use_svp64_ldst_dec.eq( self.ireg.use_svp64_ldst_dec)
295
296 def connect_instruction(self, m):
297 """connect_instruction
298
299 uses decoded (from PowerOp) function unit information from CSV files
300 to ascertain which Function Unit should deal with the current
301 instruction.
302
303 some (such as OP_ATTN, OP_NOP) are dealt with here, including
304 ignoring it and halting the processor. OP_NOP is a bit annoying
305 because the issuer expects busy flag still to be raised then lowered.
306 (this requires a fake counter to be set).
307 """
308 comb, sync = m.d.comb, m.d.sync
309 fus = self.fus.fus
310
311 # indicate if core is busy
312 busy_o = self.o.busy_o
313 any_busy_o = self.o.any_busy_o
314
315 # connect up temporary copy of incoming instruction. the FSM will
316 # either blat the incoming instruction (if valid) into self.ireg
317 # or if the instruction could not be delivered, keep dropping the
318 # latched copy into ireg
319 ilatch = self.ispec()
320 self.instr_active = Signal()
321
322 # enable/busy-signals for each FU, get one bit for each FU (by name)
323 fu_enable = Signal(len(fus), reset_less=True)
324 fu_busy = Signal(len(fus), reset_less=True)
325 fu_bitdict = {}
326 fu_selected = {}
327 for i, funame in enumerate(fus.keys()):
328 fu_bitdict[funame] = fu_enable[i]
329 fu_selected[funame] = fu_busy[i]
330
331 # identify function units and create a list by fnunit so that
332 # PriorityPickers can be created for selecting one of them that
333 # isn't busy at the time the incoming instruction needs passing on
334 by_fnunit = defaultdict(list)
335 for fname, member in Function.__members__.items():
336 for funame, fu in fus.items():
337 fnunit = fu.fnunit.value
338 if member.value & fnunit: # this FU handles this type of op
339 by_fnunit[fname].append((funame, fu)) # add by Function
340
341 # ok now just print out the list of FUs by Function, because we can
342 for fname, fu_list in by_fnunit.items():
343 print ("FUs by type", fname, fu_list)
344
345 # now create a PriorityPicker per FU-type such that only one
346 # non-busy FU will be picked
347 issue_pps = {}
348 fu_found = Signal() # take a note if no Function Unit was available
349 for fname, fu_list in by_fnunit.items():
350 i_pp = PriorityPicker(len(fu_list))
351 m.submodules['i_pp_%s' % fname] = i_pp
352 i_l = []
353 for i, (funame, fu) in enumerate(fu_list):
354 # match the decoded instruction (e.do.fn_unit) against the
355 # "capability" of this FU, gate that by whether that FU is
356 # busy, and drop that into the PriorityPicker.
357 # this will give us an output of the first available *non-busy*
358 # Function Unit (Reservation Statio) capable of handling this
359 # instruction.
360 fnunit = fu.fnunit.value
361 en_req = Signal(name="issue_en_%s" % funame, reset_less=True)
362 fnmatch = (self.ireg.e.do.fn_unit & fnunit).bool()
363 comb += en_req.eq(fnmatch & ~fu.busy_o &
364 self.instr_active)
365 i_l.append(en_req) # store in list for doing the Cat-trick
366 # picker output, gated by enable: store in fu_bitdict
367 po = Signal(name="o_issue_pick_"+funame) # picker output
368 comb += po.eq(i_pp.o[i] & i_pp.en_o)
369 comb += fu_bitdict[funame].eq(po)
370 comb += fu_selected[funame].eq(fu.busy_o | po)
371 # if we don't do this, then when there are no FUs available,
372 # the "p.o_ready" signal will go back "ok we accepted this
373 # instruction" which of course isn't true.
374 with m.If(i_pp.en_o):
375 comb += fu_found.eq(1)
376 # for each input, Cat them together and drop them into the picker
377 comb += i_pp.i.eq(Cat(*i_l))
378
379 # rdmask, which is for registers needs to come from the *main* decoder
380 for funame, fu in fus.items():
381 rdmask = get_rdflags(m, self.ireg.e, fu)
382 comb += fu.rdmaskn.eq(~rdmask)
383
384 # sigh - need a NOP counter
385 counter = Signal(2)
386 with m.If(counter != 0):
387 sync += counter.eq(counter - 1)
388 comb += busy_o.eq(1)
389
390 # default to reading from incoming instruction: may be overridden
391 # by copy from latch when "waiting"
392 comb += self.ireg.eq(self.i)
393 # always say "ready" except if overridden
394 comb += self.p.o_ready.eq(1)
395
396 with m.FSM():
397 with m.State("READY"):
398 with m.If(self.p.i_valid): # run only when valid
399 with m.Switch(self.ireg.e.do.insn_type):
400 # check for ATTN: halt if true
401 with m.Case(MicrOp.OP_ATTN):
402 m.d.sync += self.o.core_terminate_o.eq(1)
403
404 # fake NOP - this isn't really used (Issuer detects NOP)
405 with m.Case(MicrOp.OP_NOP):
406 sync += counter.eq(2)
407 comb += busy_o.eq(1)
408
409 with m.Default():
410 comb += self.instr_active.eq(1)
411 comb += self.p.o_ready.eq(0)
412 # connect instructions. only one enabled at a time
413 for funame, fu in fus.items():
414 do = self.des[funame]
415 enable = fu_bitdict[funame]
416
417 # run this FunctionUnit if enabled route op,
418 # issue, busy, read flags and mask to FU
419 with m.If(enable):
420 # operand comes from the *local* decoder
421 # do not actually issue, though, if there
422 # is a waw hazard. decoder has to still
423 # be asserted in order to detect that, tho
424 comb += fu.oper_i.eq_from(do)
425 # issue when valid (and no write-hazard)
426 comb += fu.issue_i.eq(~self.waw_hazard)
427 # instruction ok, indicate ready
428 comb += self.p.o_ready.eq(1)
429
430 if self.allow_overlap:
431 with m.If(~fu_found | self.waw_hazard):
432 # latch copy of instruction
433 sync += ilatch.eq(self.i)
434 comb += self.p.o_ready.eq(1) # accept
435 comb += busy_o.eq(1)
436 m.next = "WAITING"
437
438 with m.State("WAITING"):
439 comb += self.instr_active.eq(1)
440 comb += self.p.o_ready.eq(0)
441 comb += busy_o.eq(1)
442 # using copy of instruction, keep waiting until an FU is free
443 comb += self.ireg.eq(ilatch)
444 with m.If(fu_found): # wait for conflict to clear
445 # connect instructions. only one enabled at a time
446 for funame, fu in fus.items():
447 do = self.des[funame]
448 enable = fu_bitdict[funame]
449
450 # run this FunctionUnit if enabled route op,
451 # issue, busy, read flags and mask to FU
452 with m.If(enable):
453 # operand comes from the *local* decoder,
454 # which is asserted even if not issued,
455 # so that WaW-detection can check for hazards.
456 # only if the waw hazard is clear does the
457 # instruction actually get issued
458 comb += fu.oper_i.eq_from(do)
459 # issue when valid
460 comb += fu.issue_i.eq(~self.waw_hazard)
461 with m.If(~self.waw_hazard):
462 comb += self.p.o_ready.eq(1)
463 comb += busy_o.eq(0)
464 m.next = "READY"
465
466 print ("core: overlap allowed", self.allow_overlap)
467 # true when any FU is busy (including the cycle where it is perhaps
468 # to be issued - because that's what fu_busy is)
469 comb += any_busy_o.eq(fu_busy.bool())
470 if not self.allow_overlap:
471 # for simple non-overlap, if any instruction is busy, set
472 # busy output for core.
473 comb += busy_o.eq(any_busy_o)
474 else:
475 # sigh deal with a fun situation that needs to be investigated
476 # and resolved
477 with m.If(self.issue_conflict):
478 comb += busy_o.eq(1)
479 # make sure that LDST, SPR, MMU, Branch and Trap all say "busy"
480 # and do not allow overlap. these are all the ones that
481 # are non-forward-progressing: exceptions etc. that otherwise
482 # change CoreState for some reason (MSR, PC, SVSTATE)
483 for funame, fu in fus.items():
484 if (funame.lower().startswith('ldst') or
485 funame.lower().startswith('branch') or
486 funame.lower().startswith('mmu') or
487 funame.lower().startswith('spr') or
488 funame.lower().startswith('trap')):
489 with m.If(fu.busy_o):
490 comb += busy_o.eq(1)
491
492 # return both the function unit "enable" dict as well as the "busy".
493 # the "busy-or-issued" can be passed in to the Read/Write port
494 # connecters to give them permission to request access to regfiles
495 return fu_bitdict, fu_selected
496
497 def connect_rdport(self, m, fu_bitdict, fu_selected,
498 rdpickers, regfile, regname, fspec):
499 comb, sync = m.d.comb, m.d.sync
500 fus = self.fus.fus
501 regs = self.regs
502
503 rpidx = regname
504
505 # select the required read port. these are pre-defined sizes
506 rfile = regs.rf[regfile.lower()]
507 rport = rfile.r_ports[rpidx]
508 print("read regfile", rpidx, regfile, regs.rf.keys(),
509 rfile, rfile.unary)
510
511 # for checking if the read port has an outstanding write
512 if self.make_hazard_vecs:
513 wv = regs.wv[regfile.lower()]
514 wvchk = wv.q_int # write-vec bit-level hazard check
515
516 # if a hazard is detected on this read port, simply blithely block
517 # every FU from reading on it. this is complete overkill but very
518 # simple for now.
519 hazard_detected = Signal(name="raw_%s_%s" % (regfile, rpidx))
520
521 fspecs = fspec
522 if not isinstance(fspecs, list):
523 fspecs = [fspecs]
524
525 rdflags = []
526 pplen = 0
527 ppoffs = []
528 for i, fspec in enumerate(fspecs):
529 # get the regfile specs for this regfile port
530 print ("fpsec", i, fspec, len(fspec.specs))
531 name = "%s_%s_%d" % (regfile, regname, i)
532 ppoffs.append(pplen) # record offset for picker
533 pplen += len(fspec.specs)
534 rdflag = Signal(name="rdflag_"+name, reset_less=True)
535 comb += rdflag.eq(fspec.okflag)
536 rdflags.append(rdflag)
537
538 print ("pplen", pplen)
539
540 # create a priority picker to manage this port
541 rdpickers[regfile][rpidx] = rdpick = PriorityPicker(pplen)
542 m.submodules["rdpick_%s_%s" % (regfile, rpidx)] = rdpick
543
544 rens = []
545 addrs = []
546 wvens = []
547
548 for i, fspec in enumerate(fspecs):
549 (rf, _read, wid, fuspecs) = \
550 (fspec.okflag, fspec.regport, fspec.wid, fspec.specs)
551 # connect up the FU req/go signals, and the reg-read to the FU
552 # and create a Read Broadcast Bus
553 for pi, fuspec in enumerate(fspec.specs):
554 (funame, fu, idx) = (fuspec.funame, fuspec.fu, fuspec.idx)
555 pi += ppoffs[i]
556 name = "%s_%s_%s_%i" % (regfile, rpidx, funame, pi)
557 fu_active = fu_selected[funame]
558 fu_issued = fu_bitdict[funame]
559
560 # get (or set up) a latched copy of read register number
561 # and (sigh) also the read-ok flag
562 # TODO: use nmutil latchregister
563 rhname = "%s_%s_%d" % (regfile, regname, i)
564 rdflag = Signal(name="rdflag_%s_%s" % (funame, rhname),
565 reset_less=True)
566 if rhname not in fu.rf_latches:
567 rfl = Signal(name="rdflag_latch_%s_%s" % (funame, rhname))
568 fu.rf_latches[rhname] = rfl
569 with m.If(fu.issue_i):
570 sync += rfl.eq(rdflags[i])
571 else:
572 rfl = fu.rf_latches[rhname]
573
574 # now the register port
575 rname = "%s_%s_%s_%d" % (funame, regfile, regname, pi)
576 read = Signal.like(_read, name="read_"+rname)
577 if rname not in fu.rd_latches:
578 rdl = Signal.like(_read, name="rdlatch_"+rname)
579 fu.rd_latches[rname] = rdl
580 with m.If(fu.issue_i):
581 sync += rdl.eq(_read)
582 else:
583 rdl = fu.rd_latches[rname]
584
585 # make the read immediately available on issue cycle
586 # after the read cycle, otherwies use the latched copy.
587 # this captures the regport and okflag on issue
588 with m.If(fu.issue_i):
589 comb += read.eq(_read)
590 comb += rdflag.eq(rdflags[i])
591 with m.Else():
592 comb += read.eq(rdl)
593 comb += rdflag.eq(rfl)
594
595 # connect request-read to picker input, and output to go-rd
596 addr_en = Signal.like(read, name="addr_en_"+name)
597 pick = Signal(name="pick_"+name) # picker input
598 rp = Signal(name="rp_"+name) # picker output
599 delay_pick = Signal(name="dp_"+name) # read-enable "underway"
600 rhazard = Signal(name="rhaz_"+name)
601
602 # exclude any currently-enabled read-request (mask out active)
603 # entirely block anything hazarded from being picked
604 comb += pick.eq(fu.rd_rel_o[idx] & fu_active & rdflag &
605 ~delay_pick & ~rhazard)
606 comb += rdpick.i[pi].eq(pick)
607 comb += fu.go_rd_i[idx].eq(delay_pick) # pass in *delayed* pick
608
609 # if picked, select read-port "reg select" number to port
610 comb += rp.eq(rdpick.o[pi] & rdpick.en_o)
611 sync += delay_pick.eq(rp) # delayed "pick"
612 comb += addr_en.eq(Mux(rp, read, 0))
613
614 # the read-enable happens combinatorially (see mux-bus below)
615 # but it results in the data coming out on a one-cycle delay.
616 if rfile.unary:
617 rens.append(addr_en)
618 else:
619 addrs.append(addr_en)
620 rens.append(rp)
621
622 # use the *delayed* pick signal to put requested data onto bus
623 with m.If(delay_pick):
624 # connect regfile port to input, creating fan-out Bus
625 src = fu.src_i[idx]
626 print("reg connect widths",
627 regfile, regname, pi, funame,
628 src.shape(), rport.o_data.shape())
629 # all FUs connect to same port
630 comb += src.eq(rport.o_data)
631
632 if not self.make_hazard_vecs:
633 continue
634
635 # read the write-hazard bitvector (wv) for any bit that is
636 wvchk_en = Signal(len(wvchk), name="wv_chk_addr_en_"+name)
637 issue_active = Signal(name="rd_iactive_"+name)
638 # XXX combinatorial loop here
639 comb += issue_active.eq(fu_active & rdflag)
640 with m.If(issue_active):
641 if rfile.unary:
642 comb += wvchk_en.eq(read)
643 else:
644 comb += wvchk_en.eq(1<<read)
645 # if FU is busy (which doesn't get set at the same time as
646 # issue) and no hazard was detected, clear wvchk_en (i.e.
647 # stop checking for hazards). there is a loop here, but it's
648 # via a DFF, so is ok. some linters may complain, but hey.
649 with m.If(fu.busy_o & ~rhazard):
650 comb += wvchk_en.eq(0)
651
652 # read-hazard is ANDed with (filtered by) what is actually
653 # being requested.
654 comb += rhazard.eq((wvchk & wvchk_en).bool())
655
656 wvens.append(wvchk_en)
657
658 # or-reduce the muxed read signals
659 if rfile.unary:
660 # for unary-addressed
661 comb += rport.ren.eq(ortreereduce_sig(rens))
662 else:
663 # for binary-addressed
664 comb += rport.addr.eq(ortreereduce_sig(addrs))
665 comb += rport.ren.eq(Cat(*rens).bool())
666 print ("binary", regfile, rpidx, rport, rport.ren, rens, addrs)
667
668 if not self.make_hazard_vecs:
669 return Const(0) # declare "no hazards"
670
671 # enable the read bitvectors for this issued instruction
672 # and return whether any write-hazard bit is set
673 wvchk_and = Signal(len(wvchk), name="wv_chk_"+name)
674 comb += wvchk_and.eq(wvchk & ortreereduce_sig(wvens))
675 comb += hazard_detected.eq(wvchk_and.bool())
676 return hazard_detected
677
678 def connect_rdports(self, m, fu_bitdict, fu_selected):
679 """connect read ports
680
681 orders the read regspecs into a dict-of-dicts, by regfile, by
682 regport name, then connects all FUs that want that regport by
683 way of a PriorityPicker.
684 """
685 comb, sync = m.d.comb, m.d.sync
686 fus = self.fus.fus
687 regs = self.regs
688 rd_hazard = []
689
690 # dictionary of lists of regfile read ports
691 byregfiles_rdspec = self.get_byregfiles(m, True)
692
693 # okaay, now we need a PriorityPicker per regfile per regfile port
694 # loootta pickers... peter piper picked a pack of pickled peppers...
695 rdpickers = {}
696 for regfile, fuspecs in byregfiles_rdspec.items():
697 rdpickers[regfile] = {}
698
699 # argh. an experiment to merge RA and RB in the INT regfile
700 # (we have too many read/write ports)
701 if self.regreduce_en:
702 if regfile == 'INT':
703 fuspecs['rabc'] = [fuspecs.pop('rb')]
704 fuspecs['rabc'].append(fuspecs.pop('rc'))
705 fuspecs['rabc'].append(fuspecs.pop('ra'))
706 if regfile == 'FAST':
707 fuspecs['fast1'] = [fuspecs.pop('fast1')]
708 if 'fast2' in fuspecs:
709 fuspecs['fast1'].append(fuspecs.pop('fast2'))
710 if 'fast3' in fuspecs:
711 fuspecs['fast1'].append(fuspecs.pop('fast3'))
712
713 # for each named regfile port, connect up all FUs to that port
714 # also return (and collate) hazard detection)
715 for (regname, fspec) in sort_fuspecs(fuspecs):
716 print("connect rd", regname, fspec)
717 rh = self.connect_rdport(m, fu_bitdict, fu_selected,
718 rdpickers, regfile,
719 regname, fspec)
720 rd_hazard.append(rh)
721
722 return Cat(*rd_hazard).bool()
723
724 def make_hazards(self, m, regfile, rfile, wvclr, wvset,
725 funame, regname, idx,
726 addr_en, wp, fu, fu_active, wrflag, write,
727 fu_wrok):
728 """make_hazards: a setter and a clearer for the regfile write ports
729
730 setter is at issue time (using PowerDecoder2 regfile write numbers)
731 clearer is at regfile write time (when FU has said what to write to)
732
733 there is *one* unusual case here which has to be dealt with:
734 when the Function Unit does *NOT* request a write to the regfile
735 (has its data.ok bit CLEARED). this is perfectly legitimate.
736 and a royal pain.
737 """
738 comb, sync = m.d.comb, m.d.sync
739 name = "%s_%s_%d" % (funame, regname, idx)
740
741 # connect up the bitvector write hazard. unlike the
742 # regfile writeports, a ONE must be written to the corresponding
743 # bit of the hazard bitvector (to indicate the existence of
744 # the hazard)
745
746 # the detection of what shall be written to is based
747 # on *issue*. it is delayed by 1 cycle so that instructions
748 # "addi 5,5,0x2" do not cause combinatorial loops due to
749 # fake-dependency on *themselves*. this will totally fail
750 # spectacularly when doing multi-issue
751 print ("write vector (for regread)", regfile, wvset)
752 wviaddr_en = Signal(len(wvset), name="wv_issue_addr_en_"+name)
753 issue_active = Signal(name="iactive_"+name)
754 sync += issue_active.eq(fu.issue_i & fu_active & wrflag)
755 with m.If(issue_active):
756 if rfile.unary:
757 comb += wviaddr_en.eq(write)
758 else:
759 comb += wviaddr_en.eq(1<<write)
760
761 # deal with write vector clear: this kicks in when the regfile
762 # is written to, and clears the corresponding bitvector entry
763 print ("write vector", regfile, wvclr)
764 wvaddr_en = Signal(len(wvclr), name="wvaddr_en_"+name)
765 if rfile.unary:
766 comb += wvaddr_en.eq(addr_en)
767 else:
768 with m.If(wp):
769 comb += wvaddr_en.eq(1<<addr_en)
770
771 # XXX ASSUME that LDSTFunctionUnit always sets the data it intends to
772 # this may NOT be the case when an exception occurs
773 if isinstance(fu, LDSTFunctionUnit):
774 return wvaddr_en, wviaddr_en
775
776 # okaaay, this is preparation for the awkward case.
777 # * latch a copy of wrflag when issue goes high.
778 # * when the fu_wrok (data.ok) flag is NOT set,
779 # but the FU is done, the FU is NEVER going to write
780 # so the bitvector has to be cleared.
781 latch_wrflag = Signal(name="latch_wrflag_"+name)
782 with m.If(~fu.busy_o):
783 sync += latch_wrflag.eq(0)
784 with m.If(fu.issue_i & fu_active):
785 sync += latch_wrflag.eq(wrflag)
786 with m.If(fu.alu_done_o & latch_wrflag & ~fu_wrok):
787 if rfile.unary:
788 comb += wvaddr_en.eq(write) # addr_en gated with wp, don't use
789 else:
790 comb += wvaddr_en.eq(1<<addr_en) # binary addr_en not gated
791
792 return wvaddr_en, wviaddr_en
793
794 def connect_wrport(self, m, fu_bitdict, fu_selected,
795 wrpickers, regfile, regname, fspec):
796 comb, sync = m.d.comb, m.d.sync
797 fus = self.fus.fus
798 regs = self.regs
799
800 rpidx = regname
801
802 # select the required write port. these are pre-defined sizes
803 rfile = regs.rf[regfile.lower()]
804 wport = rfile.w_ports[rpidx]
805
806 print("connect wr", regname, "unary", rfile.unary, fspec)
807 print(regfile, regs.rf.keys())
808
809 # select the write-protection hazard vector. note that this still
810 # requires to WRITE to the hazard bitvector! read-requests need
811 # to RAISE the bitvector (set it to 1), which, duh, requires a WRITE
812 if self.make_hazard_vecs:
813 wv = regs.wv[regfile.lower()]
814 wvset = wv.s # write-vec bit-level hazard ctrl
815 wvclr = wv.r # write-vec bit-level hazard ctrl
816 wvchk = wv.q # write-after-write hazard check
817
818 fspecs = fspec
819 if not isinstance(fspecs, list):
820 fspecs = [fspecs]
821
822 pplen = 0
823 writes = []
824 ppoffs = []
825 wrflags = []
826 for i, fspec in enumerate(fspecs):
827 # get the regfile specs for this regfile port
828 (wf, _write, wid, fuspecs) = \
829 (fspec.okflag, fspec.regport, fspec.wid, fspec.specs)
830 print ("fpsec", i, "wrflag", wf, fspec, len(fuspecs))
831 ppoffs.append(pplen) # record offset for picker
832 pplen += len(fuspecs)
833
834 name = "%s_%s_%d" % (regfile, regname, i)
835 wrflag = Signal(name="wr_flag_"+name)
836 if wf is not None:
837 comb += wrflag.eq(wf)
838 else:
839 comb += wrflag.eq(0)
840 wrflags.append(wrflag)
841
842 # create a priority picker to manage this port
843 wrpickers[regfile][rpidx] = wrpick = PriorityPicker(pplen)
844 m.submodules["wrpick_%s_%s" % (regfile, rpidx)] = wrpick
845
846 wsigs = []
847 wens = []
848 wvsets = []
849 wvseten = []
850 wvclren = []
851 #wvens = [] - not needed: reading of writevec is permanently held hi
852 addrs = []
853 for i, fspec in enumerate(fspecs):
854 # connect up the FU req/go signals and the reg-read to the FU
855 # these are arbitrated by Data.ok signals
856 (wf, _write, wid, fuspecs) = \
857 (fspec.okflag, fspec.regport, fspec.wid, fspec.specs)
858 for pi, fuspec in enumerate(fspec.specs):
859 (funame, fu, idx) = (fuspec.funame, fuspec.fu, fuspec.idx)
860 fu_requested = fu_bitdict[funame]
861 pi += ppoffs[i]
862 name = "%s_%s_%s_%d" % (funame, regfile, regname, idx)
863 # get (or set up) a write-latched copy of write register number
864 write = Signal.like(_write, name="write_"+name)
865 rname = "%s_%s_%s_%d" % (funame, regfile, regname, idx)
866 if rname not in fu.wr_latches:
867 wrl = Signal.like(_write, name="wrlatch_"+rname)
868 fu.wr_latches[rname] = write
869 # do not depend on fu.issue_i here, it creates a
870 # combinatorial loop on waw checking. using the FU
871 # "enable" bitdict entry for this FU is sufficient,
872 # because the PowerDecoder2 read/write nums are
873 # valid continuously when the instruction is valid
874 with m.If(fu_requested):
875 sync += wrl.eq(_write)
876 comb += write.eq(_write)
877 with m.Else():
878 comb += write.eq(wrl)
879 else:
880 write = fu.wr_latches[rname]
881
882 # write-request comes from dest.ok
883 dest = fu.get_out(idx)
884 fu_dest_latch = fu.get_fu_out(idx) # latched output
885 name = "%s_%s_%d" % (funame, regname, idx)
886 fu_wrok = Signal(name="fu_wrok_"+name, reset_less=True)
887 comb += fu_wrok.eq(dest.ok & fu.busy_o)
888
889 # connect request-write to picker input, and output to go-wr
890 fu_active = fu_selected[funame]
891 pick = fu.wr.rel_o[idx] & fu_active
892 comb += wrpick.i[pi].eq(pick)
893 # create a single-pulse go write from the picker output
894 wr_pick = Signal(name="wpick_%s_%s_%d" % (funame, regname, idx))
895 comb += wr_pick.eq(wrpick.o[pi] & wrpick.en_o)
896 comb += fu.go_wr_i[idx].eq(rising_edge(m, wr_pick))
897
898 # connect the regspec write "reg select" number to this port
899 # only if one FU actually requests (and is granted) the port
900 # will the write-enable be activated
901 wname = "waddr_en_%s_%s_%d" % (funame, regname, idx)
902 addr_en = Signal.like(write, name=wname)
903 wp = Signal()
904 comb += wp.eq(wr_pick & wrpick.en_o)
905 comb += addr_en.eq(Mux(wp, write, 0))
906 if rfile.unary:
907 wens.append(addr_en)
908 else:
909 addrs.append(addr_en)
910 wens.append(wp)
911
912 # connect regfile port to input
913 print("reg connect widths",
914 regfile, regname, pi, funame,
915 dest.shape(), wport.i_data.shape())
916 wsigs.append(fu_dest_latch)
917
918 # now connect up the bitvector write hazard
919 if not self.make_hazard_vecs:
920 continue
921 res = self.make_hazards(m, regfile, rfile, wvclr, wvset,
922 funame, regname, idx,
923 addr_en, wp, fu, fu_active,
924 wrflags[i], write, fu_wrok)
925 wvaddr_en, wv_issue_en = res
926 wvclren.append(wvaddr_en) # set only: no data => clear bit
927 wvseten.append(wv_issue_en) # set data same as enable
928
929 # read the write-hazard bitvector (wv) for any bit that is
930 fu_requested = fu_bitdict[funame]
931 wvchk_en = Signal(len(wvchk), name="waw_chk_addr_en_"+name)
932 issue_active = Signal(name="waw_iactive_"+name)
933 whazard = Signal(name="whaz_"+name)
934 if wf is None:
935 # XXX EEK! STATE regfile (branch) does not have an
936 # write-active indicator in regspec_decode_write()
937 print ("XXX FIXME waw_iactive", issue_active,
938 fu_requested, wf)
939 else:
940 # check bits from the incoming instruction. note (back
941 # in connect_instruction) that the decoder is held for
942 # us to be able to do this, here... *without* issue being
943 # held HI. we MUST NOT gate this with fu.issue_i or
944 # with fu_bitdict "enable": it would create a loop
945 comb += issue_active.eq(wf)
946 with m.If(issue_active):
947 if rfile.unary:
948 comb += wvchk_en.eq(write)
949 else:
950 comb += wvchk_en.eq(1<<write)
951 # if FU is busy (which doesn't get set at the same time as
952 # issue) and no hazard was detected, clear wvchk_en (i.e.
953 # stop checking for hazards). there is a loop here, but it's
954 # via a DFF, so is ok. some linters may complain, but hey.
955 with m.If(fu.busy_o & ~whazard):
956 comb += wvchk_en.eq(0)
957
958 # write-hazard is ANDed with (filtered by) what is actually
959 # being requested. the wvchk data is on a one-clock delay,
960 # and wvchk_en comes directly from the main decoder
961 comb += whazard.eq((wvchk & wvchk_en).bool())
962 with m.If(whazard):
963 comb += fu._waw_hazard.eq(1)
964
965 #wvens.append(wvchk_en)
966
967 # here is where we create the Write Broadcast Bus. simple, eh?
968 comb += wport.i_data.eq(ortreereduce_sig(wsigs))
969 if rfile.unary:
970 # for unary-addressed
971 comb += wport.wen.eq(ortreereduce_sig(wens))
972 else:
973 # for binary-addressed
974 comb += wport.addr.eq(ortreereduce_sig(addrs))
975 comb += wport.wen.eq(ortreereduce_sig(wens))
976
977 if not self.make_hazard_vecs:
978 return [], []
979
980 # return these here rather than set wvclr/wvset directly,
981 # because there may be more than one write-port to a given
982 # regfile. example: XER has a write-port for SO, CA, and OV
983 # and the *last one added* of those would overwrite the other
984 # two. solution: have connect_wrports collate all the
985 # or-tree-reduced bitvector set/clear requests and drop them
986 # in as a single "thing". this can only be done because the
987 # set/get is an unary bitvector.
988 print ("make write-vecs", regfile, regname, wvset, wvclr)
989 return (wvclren, # clear (regfile write)
990 wvseten) # set (issue time)
991
992 def connect_wrports(self, m, fu_bitdict, fu_selected):
993 """connect write ports
994
995 orders the write regspecs into a dict-of-dicts, by regfile,
996 by regport name, then connects all FUs that want that regport
997 by way of a PriorityPicker.
998
999 note that the write-port wen, write-port data, and go_wr_i all need to
1000 be on the exact same clock cycle. as there is a combinatorial loop bug
1001 at the moment, these all use sync.
1002 """
1003 comb, sync = m.d.comb, m.d.sync
1004 fus = self.fus.fus
1005 regs = self.regs
1006 # dictionary of lists of regfile write ports
1007 byregfiles_wrspec = self.get_byregfiles(m, False)
1008
1009 # same for write ports.
1010 # BLECH! complex code-duplication! BLECH!
1011 wrpickers = {}
1012 wvclrers = defaultdict(list)
1013 wvseters = defaultdict(list)
1014 for regfile, fuspecs in byregfiles_wrspec.items():
1015 wrpickers[regfile] = {}
1016
1017 if self.regreduce_en:
1018 # argh, more port-merging
1019 if regfile == 'INT':
1020 fuspecs['o'] = [fuspecs.pop('o')]
1021 fuspecs['o'].append(fuspecs.pop('o1'))
1022 if regfile == 'FAST':
1023 fuspecs['fast1'] = [fuspecs.pop('fast1')]
1024 if 'fast2' in fuspecs:
1025 fuspecs['fast1'].append(fuspecs.pop('fast2'))
1026 if 'fast3' in fuspecs:
1027 fuspecs['fast1'].append(fuspecs.pop('fast3'))
1028
1029 # collate these and record them by regfile because there
1030 # are sometimes more write-ports per regfile
1031 for (regname, fspec) in sort_fuspecs(fuspecs):
1032 wvclren, wvseten = self.connect_wrport(m,
1033 fu_bitdict, fu_selected,
1034 wrpickers,
1035 regfile, regname, fspec)
1036 wvclrers[regfile.lower()] += wvclren
1037 wvseters[regfile.lower()] += wvseten
1038
1039 if not self.make_hazard_vecs:
1040 return
1041
1042 # for write-vectors: reduce the clr-ers and set-ers down to
1043 # a single set of bits. otherwise if there are two write
1044 # ports (on some regfiles), the last one doing comb += on
1045 # the reg.wv[regfile] instance "wins" (and all others are ignored,
1046 # whoops). if there was only one write-port per wv regfile this would
1047 # not be an issue.
1048 for regfile in wvclrers.keys():
1049 wv = regs.wv[regfile]
1050 wvset = wv.s # write-vec bit-level hazard ctrl
1051 wvclr = wv.r # write-vec bit-level hazard ctrl
1052 wvclren = wvclrers[regfile]
1053 wvseten = wvseters[regfile]
1054 comb += wvclr.eq(ortreereduce_sig(wvclren)) # clear (regfile write)
1055 comb += wvset.eq(ortreereduce_sig(wvseten)) # set (issue time)
1056
1057 def get_byregfiles(self, m, readmode):
1058
1059 mode = "read" if readmode else "write"
1060 regs = self.regs
1061 fus = self.fus.fus
1062 e = self.ireg.e # decoded instruction to execute
1063
1064 # dictionary of dictionaries of lists/tuples of regfile ports.
1065 # first key: regfile. second key: regfile port name
1066 byregfiles_spec = defaultdict(dict)
1067
1068 for (funame, fu) in fus.items():
1069 # create in each FU a receptacle for the read/write register
1070 # hazard numbers (and okflags for read). to be latched in
1071 # connect_rd/write_ports
1072 if readmode:
1073 fu.rd_latches = {} # read reg number latches
1074 fu.rf_latches = {} # read flag latches
1075 else:
1076 fu.wr_latches = {}
1077
1078 # construct regfile specs: read uses inspec, write outspec
1079 print("%s ports for %s" % (mode, funame))
1080 for idx in range(fu.n_src if readmode else fu.n_dst):
1081 (regfile, regname, wid) = fu.get_io_spec(readmode, idx)
1082 print(" %d %s %s %s" % (idx, regfile, regname, str(wid)))
1083
1084 # the PowerDecoder2 (main one, not the satellites) contains
1085 # the decoded regfile numbers. obtain these now
1086 decinfo = regspec_decode(m, readmode, e, regfile, regname)
1087 okflag, regport = decinfo.okflag, decinfo.regport
1088
1089 # construct the dictionary of regspec information by regfile
1090 if regname not in byregfiles_spec[regfile]:
1091 byregfiles_spec[regfile][regname] = \
1092 ByRegSpec(okflag, regport, wid, [])
1093
1094 # here we start to create "lanes" where each Function Unit
1095 # requiring access to a given [single-contended resource]
1096 # regfile port is appended to a list, so that PriorityPickers
1097 # can be created to give uncontested access to it
1098 fuspec = FUSpec(funame, fu, idx)
1099 byregfiles_spec[regfile][regname].specs.append(fuspec)
1100
1101 # ok just print that all out, for convenience
1102 for regfile, fuspecs in byregfiles_spec.items():
1103 print("regfile %s ports:" % mode, regfile)
1104 for regname, fspec in fuspecs.items():
1105 [okflag, regport, wid, fuspecs] = fspec
1106 print(" rf %s port %s lane: %s" % (mode, regfile, regname))
1107 print(" %s" % regname, wid, okflag, regport)
1108 for (funame, fu, idx) in fuspecs:
1109 fusig = fu.src_i[idx] if readmode else fu.dest[idx]
1110 print(" ", funame, fu.__class__.__name__, idx, fusig)
1111 print()
1112
1113 return byregfiles_spec
1114
1115 def __iter__(self):
1116 yield from self.fus.ports()
1117 yield from self.i.e.ports()
1118 yield from self.l0.ports()
1119 # TODO: regs
1120
1121 def ports(self):
1122 return list(self)
1123
1124
1125 if __name__ == '__main__':
1126 pspec = TestMemPspec(ldst_ifacetype='testpi',
1127 imem_ifacetype='',
1128 addr_wid=48,
1129 allow_overlap=True,
1130 mask_wid=8,
1131 reg_wid=64)
1132 dut = NonProductionCore(pspec)
1133 vl = rtlil.convert(dut, ports=dut.ports())
1134 with open("test_core.il", "w") as f:
1135 f.write(vl)