start allocating more FUs (more ReservationStations)
[soc.git] / src / soc / simple / core.py
1 """simple core
2
3 not in any way intended for production use. connects up FunctionUnits to
4 Register Files in a brain-dead fashion that only permits one and only one
5 Function Unit to be operational.
6
7 the principle here is to take the Function Units, analyse their regspecs,
8 and turn their requirements for access to register file read/write ports
9 into groupings by Register File and Register File Port name.
10
11 under each grouping - by regfile/port - a list of Function Units that
12 need to connect to that port is created. as these are a contended
13 resource a "Broadcast Bus" per read/write port is then also created,
14 with access to it managed by a PriorityPicker.
15
16 the brain-dead part of this module is that even though there is no
17 conflict of access, regfile read/write hazards are *not* analysed,
18 and consequently it is safer to wait for the Function Unit to complete
19 before allowing a new instruction to proceed.
20 (update: actually this is being added now:
21 https://bugs.libre-soc.org/show_bug.cgi?id=737)
22 """
23
24 from nmigen import (Elaboratable, Module, Signal, ResetSignal, Cat, Mux,
25 Const)
26 from nmigen.cli import rtlil
27
28 from openpower.decoder.power_decoder2 import PowerDecodeSubset
29 from openpower.decoder.power_regspec_map import regspec_decode_read
30 from openpower.decoder.power_regspec_map import regspec_decode_write
31 from openpower.sv.svp64 import SVP64Rec
32
33 from nmutil.picker import PriorityPicker
34 from nmutil.util import treereduce
35 from nmutil.singlepipe import ControlBase
36
37 from soc.fu.compunits.compunits import AllFunctionUnits, LDSTFunctionUnit
38 from soc.regfile.regfiles import RegFiles
39 from openpower.decoder.power_decoder2 import get_rdflags
40 from soc.experiment.l0_cache import TstL0CacheBuffer # test only
41 from soc.config.test.test_loadstore import TestMemPspec
42 from openpower.decoder.power_enums import MicrOp, Function
43 from soc.simple.core_data import CoreInput, CoreOutput
44
45 from collections import defaultdict, namedtuple
46 import operator
47
48 from nmutil.util import rising_edge
49
50 FUSpec = namedtuple("FUSpec", ["funame", "fu", "idx"])
51 ByRegSpec = namedtuple("ByRegSpec", ["rdport", "wrport", "read",
52 "write", "wid", "specs"])
53
54 # helper function for reducing a list of signals down to a parallel
55 # ORed single signal.
56 def ortreereduce(tree, attr="o_data"):
57 return treereduce(tree, operator.or_, lambda x: getattr(x, attr))
58
59
60 def ortreereduce_sig(tree):
61 return treereduce(tree, operator.or_, lambda x: x)
62
63
64 # helper function to place full regs declarations first
65 def sort_fuspecs(fuspecs):
66 res = []
67 for (regname, fspec) in fuspecs.items():
68 if regname.startswith("full"):
69 res.append((regname, fspec))
70 for (regname, fspec) in fuspecs.items():
71 if not regname.startswith("full"):
72 res.append((regname, fspec))
73 return res # enumerate(res)
74
75
76 # derive from ControlBase rather than have a separate Stage instance,
77 # this is simpler to do
78 class NonProductionCore(ControlBase):
79 def __init__(self, pspec):
80 self.pspec = pspec
81
82 # test is SVP64 is to be enabled
83 self.svp64_en = hasattr(pspec, "svp64") and (pspec.svp64 == True)
84
85 # test to see if regfile ports should be reduced
86 self.regreduce_en = (hasattr(pspec, "regreduce") and
87 (pspec.regreduce == True))
88
89 # test to see if overlapping of instructions is allowed
90 # (not normally enabled for TestIssuer FSM but useful for checking
91 # the bitvector hazard detection, before doing In-Order)
92 self.allow_overlap = (hasattr(pspec, "allow_overlap") and
93 (pspec.allow_overlap == True))
94
95 # test core type
96 self.make_hazard_vecs = self.allow_overlap
97 self.core_type = "fsm"
98 if hasattr(pspec, "core_type"):
99 self.core_type = pspec.core_type
100
101 super().__init__(stage=self)
102
103 # single LD/ST funnel for memory access
104 self.l0 = l0 = TstL0CacheBuffer(pspec, n_units=1)
105 pi = l0.l0.dports[0]
106
107 # function units (only one each)
108 # only include mmu if enabled in pspec
109 self.fus = AllFunctionUnits(pspec, pilist=[pi])
110
111 # link LoadStore1 into MMU
112 mmu = self.fus.get_fu('mmu0')
113 print ("core pspec", pspec.ldst_ifacetype)
114 print ("core mmu", mmu)
115 if mmu is not None:
116 print ("core lsmem.lsi", l0.cmpi.lsmem.lsi)
117 mmu.alu.set_ldst_interface(l0.cmpi.lsmem.lsi)
118
119 # register files (yes plural)
120 self.regs = RegFiles(pspec, make_hazard_vecs=self.make_hazard_vecs)
121
122 # set up input and output: unusual requirement to set data directly
123 # (due to the way that the core is set up in a different domain,
124 # see TestIssuer.setup_peripherals
125 self.p.i_data, self.n.o_data = self.new_specs(None)
126 self.i, self.o = self.p.i_data, self.n.o_data
127
128 # actual internal input data used (captured)
129 self.ireg = self.ispec()
130
131 # create per-FU instruction decoders (subsetted). these "satellite"
132 # decoders reduce wire fan-out from the one (main) PowerDecoder2
133 # (used directly by the trap unit) to the *twelve* (or more)
134 # Function Units. we can either have 32 wires (the instruction)
135 # to each, or we can have well over a 200 wire fan-out (to 12
136 # ALUs). it's an easy choice to make.
137 self.decoders = {}
138 self.des = {}
139
140 # eep, these should be *per FU* i.e. for FunctionUnitBaseMulti
141 # they should be shared (put into the ALU *once*).
142
143 for funame, fu in self.fus.fus.items():
144 f_name = fu.fnunit.name
145 fnunit = fu.fnunit.value
146 opkls = fu.opsubsetkls
147 if f_name == 'TRAP':
148 # TRAP decoder is the *main* decoder
149 self.trapunit = funame
150 continue
151 assert funame not in self.decoders
152 self.decoders[funame] = PowerDecodeSubset(None, opkls, f_name,
153 final=True,
154 state=self.ireg.state,
155 svp64_en=self.svp64_en,
156 regreduce_en=self.regreduce_en)
157 self.des[funame] = self.decoders[funame].do
158
159 # create per-Function Unit write-after-write hazard signals
160 # yes, really, this should have been added in ReservationStations
161 # but hey.
162 for funame, fu in self.fus.fus.items():
163 fu._waw_hazard = Signal(name="waw_%s" % funame)
164
165 # share the SPR decoder with the MMU if it exists
166 if "mmu0" in self.decoders:
167 self.decoders["mmu0"].mmu0_spr_dec = self.decoders["spr0"]
168
169 # next 3 functions are Stage API Compliance
170 def setup(self, m, i):
171 pass
172
173 def ispec(self):
174 return CoreInput(self.pspec, self.svp64_en, self.regreduce_en)
175
176 def ospec(self):
177 return CoreOutput()
178
179 # elaborate function to create HDL
180 def elaborate(self, platform):
181 m = super().elaborate(platform)
182
183 # for testing purposes, to cut down on build time in coriolis2
184 if hasattr(self.pspec, "nocore") and self.pspec.nocore == True:
185 x = Signal() # dummy signal
186 m.d.sync += x.eq(~x)
187 return m
188 comb = m.d.comb
189
190 m.submodules.fus = self.fus
191 m.submodules.l0 = l0 = self.l0
192 self.regs.elaborate_into(m, platform)
193 regs = self.regs
194 fus = self.fus.fus
195
196 # amalgamate write-hazards into a single top-level Signal
197 self.waw_hazard = Signal()
198 whaz = []
199 for funame, fu in self.fus.fus.items():
200 whaz.append(fu._waw_hazard)
201 comb += self.waw_hazard.eq(Cat(*whaz).bool())
202
203 # connect decoders
204 self.connect_satellite_decoders(m)
205
206 # ssh, cheat: trap uses the main decoder because of the rewriting
207 self.des[self.trapunit] = self.ireg.e.do
208
209 # connect up Function Units, then read/write ports, and hazard conflict
210 self.issue_conflict = Signal()
211 fu_bitdict, fu_selected = self.connect_instruction(m)
212 raw_hazard = self.connect_rdports(m, fu_bitdict, fu_selected)
213 self.connect_wrports(m, fu_bitdict, fu_selected)
214 if self.allow_overlap:
215 comb += self.issue_conflict.eq(raw_hazard)
216
217 # note if an exception happened. in a pipelined or OoO design
218 # this needs to be accompanied by "shadowing" (or stalling)
219 el = []
220 for exc in self.fus.excs.values():
221 el.append(exc.happened)
222 if len(el) > 0: # at least one exception
223 comb += self.o.exc_happened.eq(Cat(*el).bool())
224
225 return m
226
227 def connect_satellite_decoders(self, m):
228 comb = m.d.comb
229 for k, v in self.decoders.items():
230 # connect each satellite decoder and give it the instruction.
231 # as subset decoders this massively reduces wire fanout given
232 # the large number of ALUs
233 m.submodules["dec_%s" % k] = v
234 comb += v.dec.raw_opcode_in.eq(self.ireg.raw_insn_i)
235 comb += v.dec.bigendian.eq(self.ireg.bigendian_i)
236 # sigh due to SVP64 RA_OR_ZERO detection connect these too
237 comb += v.sv_a_nz.eq(self.ireg.sv_a_nz)
238 if self.svp64_en:
239 comb += v.pred_sm.eq(self.ireg.sv_pred_sm)
240 comb += v.pred_dm.eq(self.ireg.sv_pred_dm)
241 if k != self.trapunit:
242 comb += v.sv_rm.eq(self.ireg.sv_rm) # pass through SVP64 RM
243 comb += v.is_svp64_mode.eq(self.ireg.is_svp64_mode)
244 # only the LDST PowerDecodeSubset *actually* needs to
245 # know to use the alternative decoder. this is all
246 # a terrible hack
247 if k.lower().startswith("ldst"):
248 comb += v.use_svp64_ldst_dec.eq(
249 self.ireg.use_svp64_ldst_dec)
250
251 def connect_instruction(self, m):
252 """connect_instruction
253
254 uses decoded (from PowerOp) function unit information from CSV files
255 to ascertain which Function Unit should deal with the current
256 instruction.
257
258 some (such as OP_ATTN, OP_NOP) are dealt with here, including
259 ignoring it and halting the processor. OP_NOP is a bit annoying
260 because the issuer expects busy flag still to be raised then lowered.
261 (this requires a fake counter to be set).
262 """
263 comb, sync = m.d.comb, m.d.sync
264 fus = self.fus.fus
265
266 # indicate if core is busy
267 busy_o = self.o.busy_o
268 any_busy_o = self.o.any_busy_o
269
270 # connect up temporary copy of incoming instruction. the FSM will
271 # either blat the incoming instruction (if valid) into self.ireg
272 # or if the instruction could not be delivered, keep dropping the
273 # latched copy into ireg
274 ilatch = self.ispec()
275 self.instr_active = Signal()
276
277 # enable/busy-signals for each FU, get one bit for each FU (by name)
278 fu_enable = Signal(len(fus), reset_less=True)
279 fu_busy = Signal(len(fus), reset_less=True)
280 fu_bitdict = {}
281 fu_selected = {}
282 for i, funame in enumerate(fus.keys()):
283 fu_bitdict[funame] = fu_enable[i]
284 fu_selected[funame] = fu_busy[i]
285
286 # identify function units and create a list by fnunit so that
287 # PriorityPickers can be created for selecting one of them that
288 # isn't busy at the time the incoming instruction needs passing on
289 by_fnunit = defaultdict(list)
290 for fname, member in Function.__members__.items():
291 for funame, fu in fus.items():
292 fnunit = fu.fnunit.value
293 if member.value & fnunit: # this FU handles this type of op
294 by_fnunit[fname].append((funame, fu)) # add by Function
295
296 # ok now just print out the list of FUs by Function, because we can
297 for fname, fu_list in by_fnunit.items():
298 print ("FUs by type", fname, fu_list)
299
300 # now create a PriorityPicker per FU-type such that only one
301 # non-busy FU will be picked
302 issue_pps = {}
303 fu_found = Signal() # take a note if no Function Unit was available
304 for fname, fu_list in by_fnunit.items():
305 i_pp = PriorityPicker(len(fu_list))
306 m.submodules['i_pp_%s' % fname] = i_pp
307 i_l = []
308 for i, (funame, fu) in enumerate(fu_list):
309 # match the decoded instruction (e.do.fn_unit) against the
310 # "capability" of this FU, gate that by whether that FU is
311 # busy, and drop that into the PriorityPicker.
312 # this will give us an output of the first available *non-busy*
313 # Function Unit (Reservation Statio) capable of handling this
314 # instruction.
315 fnunit = fu.fnunit.value
316 en_req = Signal(name="issue_en_%s" % funame, reset_less=True)
317 fnmatch = (self.ireg.e.do.fn_unit & fnunit).bool()
318 comb += en_req.eq(fnmatch & ~fu.busy_o &
319 self.instr_active)
320 i_l.append(en_req) # store in list for doing the Cat-trick
321 # picker output, gated by enable: store in fu_bitdict
322 po = Signal(name="o_issue_pick_"+funame) # picker output
323 comb += po.eq(i_pp.o[i] & i_pp.en_o)
324 comb += fu_bitdict[funame].eq(po)
325 comb += fu_selected[funame].eq(fu.busy_o | po)
326 # if we don't do this, then when there are no FUs available,
327 # the "p.o_ready" signal will go back "ok we accepted this
328 # instruction" which of course isn't true.
329 with m.If(i_pp.en_o):
330 comb += fu_found.eq(1)
331 # for each input, Cat them together and drop them into the picker
332 comb += i_pp.i.eq(Cat(*i_l))
333
334 # rdmask, which is for registers needs to come from the *main* decoder
335 for funame, fu in fus.items():
336 rdmask = get_rdflags(self.ireg.e, fu)
337 comb += fu.rdmaskn.eq(~rdmask)
338
339 # sigh - need a NOP counter
340 counter = Signal(2)
341 with m.If(counter != 0):
342 sync += counter.eq(counter - 1)
343 comb += busy_o.eq(1)
344
345 # default to reading from incoming instruction: may be overridden
346 # by copy from latch when "waiting"
347 comb += self.ireg.eq(self.i)
348 # always say "ready" except if overridden
349 comb += self.p.o_ready.eq(1)
350
351 with m.FSM():
352 with m.State("READY"):
353 with m.If(self.p.i_valid): # run only when valid
354 with m.Switch(self.ireg.e.do.insn_type):
355 # check for ATTN: halt if true
356 with m.Case(MicrOp.OP_ATTN):
357 m.d.sync += self.o.core_terminate_o.eq(1)
358
359 # fake NOP - this isn't really used (Issuer detects NOP)
360 with m.Case(MicrOp.OP_NOP):
361 sync += counter.eq(2)
362 comb += busy_o.eq(1)
363
364 with m.Default():
365 comb += self.instr_active.eq(1)
366 comb += self.p.o_ready.eq(0)
367 # connect instructions. only one enabled at a time
368 for funame, fu in fus.items():
369 do = self.des[funame]
370 enable = fu_bitdict[funame]
371
372 # run this FunctionUnit if enabled route op,
373 # issue, busy, read flags and mask to FU
374 with m.If(enable):
375 # operand comes from the *local* decoder
376 # do not actually issue, though, if there
377 # is a waw hazard. decoder has to still
378 # be asserted in order to detect that, tho
379 comb += fu.oper_i.eq_from(do)
380 # issue when valid (and no write-hazard)
381 comb += fu.issue_i.eq(~self.waw_hazard)
382 # instruction ok, indicate ready
383 comb += self.p.o_ready.eq(1)
384
385 if self.allow_overlap:
386 with m.If(~fu_found | self.waw_hazard):
387 # latch copy of instruction
388 sync += ilatch.eq(self.i)
389 comb += self.p.o_ready.eq(1) # accept
390 comb += busy_o.eq(1)
391 m.next = "WAITING"
392
393 with m.State("WAITING"):
394 comb += self.instr_active.eq(1)
395 comb += self.p.o_ready.eq(0)
396 comb += busy_o.eq(1)
397 # using copy of instruction, keep waiting until an FU is free
398 comb += self.ireg.eq(ilatch)
399 with m.If(fu_found): # wait for conflict to clear
400 # connect instructions. only one enabled at a time
401 for funame, fu in fus.items():
402 do = self.des[funame]
403 enable = fu_bitdict[funame]
404
405 # run this FunctionUnit if enabled route op,
406 # issue, busy, read flags and mask to FU
407 with m.If(enable):
408 # operand comes from the *local* decoder,
409 # which is asserted even if not issued,
410 # so that WaW-detection can check for hazards.
411 # only if the waw hazard is clear does the
412 # instruction actually get issued
413 comb += fu.oper_i.eq_from(do)
414 # issue when valid
415 comb += fu.issue_i.eq(~self.waw_hazard)
416 with m.If(~self.waw_hazard):
417 comb += self.p.o_ready.eq(1)
418 comb += busy_o.eq(0)
419 m.next = "READY"
420
421 print ("core: overlap allowed", self.allow_overlap)
422 # true when any FU is busy (including the cycle where it is perhaps
423 # to be issued - because that's what fu_busy is)
424 comb += any_busy_o.eq(fu_busy.bool())
425 if not self.allow_overlap:
426 # for simple non-overlap, if any instruction is busy, set
427 # busy output for core.
428 comb += busy_o.eq(any_busy_o)
429 else:
430 # sigh deal with a fun situation that needs to be investigated
431 # and resolved
432 with m.If(self.issue_conflict):
433 comb += busy_o.eq(1)
434 # make sure that LDST, SPR, MMU, Branch and Trap all say "busy"
435 # and do not allow overlap. these are all the ones that
436 # are non-forward-progressing: exceptions etc. that otherwise
437 # change CoreState for some reason (MSR, PC, SVSTATE)
438 for funame, fu in fus.items():
439 if (funame.lower().startswith('ldst') or
440 funame.lower().startswith('branch') or
441 funame.lower().startswith('mmu') or
442 funame.lower().startswith('spr') or
443 funame.lower().startswith('trap')):
444 with m.If(fu.busy_o):
445 comb += busy_o.eq(1)
446
447 # return both the function unit "enable" dict as well as the "busy".
448 # the "busy-or-issued" can be passed in to the Read/Write port
449 # connecters to give them permission to request access to regfiles
450 return fu_bitdict, fu_selected
451
452 def connect_rdport(self, m, fu_bitdict, fu_selected,
453 rdpickers, regfile, regname, fspec):
454 comb, sync = m.d.comb, m.d.sync
455 fus = self.fus.fus
456 regs = self.regs
457
458 rpidx = regname
459
460 # select the required read port. these are pre-defined sizes
461 rfile = regs.rf[regfile.lower()]
462 rport = rfile.r_ports[rpidx]
463 print("read regfile", rpidx, regfile, regs.rf.keys(),
464 rfile, rfile.unary)
465
466 # for checking if the read port has an outstanding write
467 if self.make_hazard_vecs:
468 wv = regs.wv[regfile.lower()]
469 wvchk = wv.q_int # write-vec bit-level hazard check
470
471 # if a hazard is detected on this read port, simply blithely block
472 # every FU from reading on it. this is complete overkill but very
473 # simple for now.
474 hazard_detected = Signal(name="raw_%s_%s" % (regfile, rpidx))
475
476 fspecs = fspec
477 if not isinstance(fspecs, list):
478 fspecs = [fspecs]
479
480 rdflags = []
481 pplen = 0
482 ppoffs = []
483 for i, fspec in enumerate(fspecs):
484 # get the regfile specs for this regfile port
485 (rf, wf, _read, _write, wid, fuspecs) = \
486 (fspec.rdport, fspec.wrport, fspec.read, fspec.write,
487 fspec.wid, fspec.specs)
488 print ("fpsec", i, fspec, len(fuspecs))
489 name = "%s_%s_%d" % (regfile, regname, i)
490 ppoffs.append(pplen) # record offset for picker
491 pplen += len(fspec.specs)
492 rdflag = Signal(name="rdflag_"+name, reset_less=True)
493 comb += rdflag.eq(fspec.rdport)
494 rdflags.append(rdflag)
495
496 print ("pplen", pplen)
497
498 # create a priority picker to manage this port
499 rdpickers[regfile][rpidx] = rdpick = PriorityPicker(pplen)
500 m.submodules["rdpick_%s_%s" % (regfile, rpidx)] = rdpick
501
502 rens = []
503 addrs = []
504 wvens = []
505
506 for i, fspec in enumerate(fspecs):
507 (rf, wf, _read, _write, wid, fuspecs) = \
508 (fspec.rdport, fspec.wrport, fspec.read, fspec.write,
509 fspec.wid, fspec.specs)
510 # connect up the FU req/go signals, and the reg-read to the FU
511 # and create a Read Broadcast Bus
512 for pi, fuspec in enumerate(fspec.specs):
513 (funame, fu, idx) = (fuspec.funame, fuspec.fu, fuspec.idx)
514 pi += ppoffs[i]
515 name = "%s_%s_%s_%i" % (regfile, rpidx, funame, pi)
516 fu_active = fu_selected[funame]
517 fu_issued = fu_bitdict[funame]
518
519 # get (or set up) a latched copy of read register number
520 # and (sigh) also the read-ok flag
521 rname = "%s_%s_%s_%d" % (funame, regfile, regname, pi)
522 rhname = "%s_%s_%d" % (regfile, regname, i)
523 read = Signal.like(_read, name="read_"+name)
524 rdflag = Signal(name="rdflag_%s_%s" % (funame, rhname),
525 reset_less=True)
526 if rhname not in fu.rf_latches:
527 rfl = Signal(name="rdflag_latch_"+rname)
528 fu.rf_latches[rhname] = rfl
529 with m.If(fu.issue_i):
530 sync += rfl.eq(rdflags[i])
531 else:
532 rfl = fu.rf_latches[rhname]
533 if rname not in fu.rd_latches:
534 rdl = Signal.like(_read, name="rdlatch_"+rname)
535 fu.rd_latches[rname] = rdl
536 with m.If(fu.issue_i):
537 sync += rdl.eq(_read)
538 else:
539 rdl = fu.rd_latches[rname]
540 # latch to make the read immediately available on issue cycle
541 # after the read cycle, use the latched copy
542 with m.If(fu.issue_i):
543 comb += read.eq(_read)
544 comb += rdflag.eq(rdflags[i])
545 with m.Else():
546 comb += read.eq(rdl)
547 comb += rdflag.eq(rfl)
548
549 # connect request-read to picker input, and output to go-rd
550 addr_en = Signal.like(read, name="addr_en_"+name)
551 pick = Signal(name="pick_"+name) # picker input
552 rp = Signal(name="rp_"+name) # picker output
553 delay_pick = Signal(name="dp_"+name) # read-enable "underway"
554 rhazard = Signal(name="rhaz_"+name)
555
556 # exclude any currently-enabled read-request (mask out active)
557 # entirely block anything hazarded from being picked
558 comb += pick.eq(fu.rd_rel_o[idx] & fu_active & rdflag &
559 ~delay_pick & ~rhazard)
560 comb += rdpick.i[pi].eq(pick)
561 comb += fu.go_rd_i[idx].eq(delay_pick) # pass in *delayed* pick
562
563 # if picked, select read-port "reg select" number to port
564 comb += rp.eq(rdpick.o[pi] & rdpick.en_o)
565 sync += delay_pick.eq(rp) # delayed "pick"
566 comb += addr_en.eq(Mux(rp, read, 0))
567
568 # the read-enable happens combinatorially (see mux-bus below)
569 # but it results in the data coming out on a one-cycle delay.
570 if rfile.unary:
571 rens.append(addr_en)
572 else:
573 addrs.append(addr_en)
574 rens.append(rp)
575
576 # use the *delayed* pick signal to put requested data onto bus
577 with m.If(delay_pick):
578 # connect regfile port to input, creating fan-out Bus
579 src = fu.src_i[idx]
580 print("reg connect widths",
581 regfile, regname, pi, funame,
582 src.shape(), rport.o_data.shape())
583 # all FUs connect to same port
584 comb += src.eq(rport.o_data)
585
586 if not self.make_hazard_vecs:
587 continue
588
589 # read the write-hazard bitvector (wv) for any bit that is
590 wvchk_en = Signal(len(wvchk), name="wv_chk_addr_en_"+name)
591 issue_active = Signal(name="rd_iactive_"+name)
592 # XXX combinatorial loop here
593 comb += issue_active.eq(fu_active & rdflag)
594 with m.If(issue_active):
595 if rfile.unary:
596 comb += wvchk_en.eq(read)
597 else:
598 comb += wvchk_en.eq(1<<read)
599 # if FU is busy (which doesn't get set at the same time as
600 # issue) and no hazard was detected, clear wvchk_en (i.e.
601 # stop checking for hazards). there is a loop here, but it's
602 # via a DFF, so is ok. some linters may complain, but hey.
603 with m.If(fu.busy_o & ~rhazard):
604 comb += wvchk_en.eq(0)
605
606 # read-hazard is ANDed with (filtered by) what is actually
607 # being requested.
608 comb += rhazard.eq((wvchk & wvchk_en).bool())
609
610 wvens.append(wvchk_en)
611
612 # or-reduce the muxed read signals
613 if rfile.unary:
614 # for unary-addressed
615 comb += rport.ren.eq(ortreereduce_sig(rens))
616 else:
617 # for binary-addressed
618 comb += rport.addr.eq(ortreereduce_sig(addrs))
619 comb += rport.ren.eq(Cat(*rens).bool())
620 print ("binary", regfile, rpidx, rport, rport.ren, rens, addrs)
621
622 if not self.make_hazard_vecs:
623 return Const(0) # declare "no hazards"
624
625 # enable the read bitvectors for this issued instruction
626 # and return whether any write-hazard bit is set
627 wvchk_and = Signal(len(wvchk), name="wv_chk_"+name)
628 comb += wvchk_and.eq(wvchk & ortreereduce_sig(wvens))
629 comb += hazard_detected.eq(wvchk_and.bool())
630 return hazard_detected
631
632 def connect_rdports(self, m, fu_bitdict, fu_selected):
633 """connect read ports
634
635 orders the read regspecs into a dict-of-dicts, by regfile, by
636 regport name, then connects all FUs that want that regport by
637 way of a PriorityPicker.
638 """
639 comb, sync = m.d.comb, m.d.sync
640 fus = self.fus.fus
641 regs = self.regs
642 rd_hazard = []
643
644 # dictionary of lists of regfile read ports
645 byregfiles_rd, byregfiles_rdspec = self.get_byregfiles(True)
646
647 # okaay, now we need a PriorityPicker per regfile per regfile port
648 # loootta pickers... peter piper picked a pack of pickled peppers...
649 rdpickers = {}
650 for regfile, spec in byregfiles_rd.items():
651 fuspecs = byregfiles_rdspec[regfile]
652 rdpickers[regfile] = {}
653
654 # argh. an experiment to merge RA and RB in the INT regfile
655 # (we have too many read/write ports)
656 if self.regreduce_en:
657 if regfile == 'INT':
658 fuspecs['rabc'] = [fuspecs.pop('rb')]
659 fuspecs['rabc'].append(fuspecs.pop('rc'))
660 fuspecs['rabc'].append(fuspecs.pop('ra'))
661 if regfile == 'FAST':
662 fuspecs['fast1'] = [fuspecs.pop('fast1')]
663 if 'fast2' in fuspecs:
664 fuspecs['fast1'].append(fuspecs.pop('fast2'))
665 if 'fast3' in fuspecs:
666 fuspecs['fast1'].append(fuspecs.pop('fast3'))
667
668 # for each named regfile port, connect up all FUs to that port
669 # also return (and collate) hazard detection)
670 for (regname, fspec) in sort_fuspecs(fuspecs):
671 print("connect rd", regname, fspec)
672 rh = self.connect_rdport(m, fu_bitdict, fu_selected,
673 rdpickers, regfile,
674 regname, fspec)
675 rd_hazard.append(rh)
676
677 return Cat(*rd_hazard).bool()
678
679 def make_hazards(self, m, regfile, rfile, wvclr, wvset,
680 funame, regname, idx,
681 addr_en, wp, fu, fu_active, wrflag, write,
682 fu_wrok):
683 """make_hazards: a setter and a clearer for the regfile write ports
684
685 setter is at issue time (using PowerDecoder2 regfile write numbers)
686 clearer is at regfile write time (when FU has said what to write to)
687
688 there is *one* unusual case here which has to be dealt with:
689 when the Function Unit does *NOT* request a write to the regfile
690 (has its data.ok bit CLEARED). this is perfectly legitimate.
691 and a royal pain.
692 """
693 comb, sync = m.d.comb, m.d.sync
694 name = "%s_%s_%d" % (funame, regname, idx)
695
696 # connect up the bitvector write hazard. unlike the
697 # regfile writeports, a ONE must be written to the corresponding
698 # bit of the hazard bitvector (to indicate the existence of
699 # the hazard)
700
701 # the detection of what shall be written to is based
702 # on *issue*. it is delayed by 1 cycle so that instructions
703 # "addi 5,5,0x2" do not cause combinatorial loops due to
704 # fake-dependency on *themselves*. this will totally fail
705 # spectacularly when doing multi-issue
706 print ("write vector (for regread)", regfile, wvset)
707 wviaddr_en = Signal(len(wvset), name="wv_issue_addr_en_"+name)
708 issue_active = Signal(name="iactive_"+name)
709 sync += issue_active.eq(fu.issue_i & fu_active & wrflag)
710 with m.If(issue_active):
711 if rfile.unary:
712 comb += wviaddr_en.eq(write)
713 else:
714 comb += wviaddr_en.eq(1<<write)
715
716 # deal with write vector clear: this kicks in when the regfile
717 # is written to, and clears the corresponding bitvector entry
718 print ("write vector", regfile, wvclr)
719 wvaddr_en = Signal(len(wvclr), name="wvaddr_en_"+name)
720 if rfile.unary:
721 comb += wvaddr_en.eq(addr_en)
722 else:
723 with m.If(wp):
724 comb += wvaddr_en.eq(1<<addr_en)
725
726 # XXX ASSUME that LDSTFunctionUnit always sets the data it intends to
727 # this may NOT be the case when an exception occurs
728 if isinstance(fu, LDSTFunctionUnit):
729 return wvaddr_en, wviaddr_en
730
731 # okaaay, this is preparation for the awkward case.
732 # * latch a copy of wrflag when issue goes high.
733 # * when the fu_wrok (data.ok) flag is NOT set,
734 # but the FU is done, the FU is NEVER going to write
735 # so the bitvector has to be cleared.
736 latch_wrflag = Signal(name="latch_wrflag_"+name)
737 with m.If(~fu.busy_o):
738 sync += latch_wrflag.eq(0)
739 with m.If(fu.issue_i & fu_active):
740 sync += latch_wrflag.eq(wrflag)
741 with m.If(fu.alu_done_o & latch_wrflag & ~fu_wrok):
742 if rfile.unary:
743 comb += wvaddr_en.eq(write) # addr_en gated with wp, don't use
744 else:
745 comb += wvaddr_en.eq(1<<addr_en) # binary addr_en not gated
746
747 return wvaddr_en, wviaddr_en
748
749 def connect_wrport(self, m, fu_bitdict, fu_selected,
750 wrpickers, regfile, regname, fspec):
751 comb, sync = m.d.comb, m.d.sync
752 fus = self.fus.fus
753 regs = self.regs
754
755 rpidx = regname
756
757 # select the required write port. these are pre-defined sizes
758 rfile = regs.rf[regfile.lower()]
759 wport = rfile.w_ports[rpidx]
760
761 print("connect wr", regname, "unary", rfile.unary, fspec)
762 print(regfile, regs.rf.keys())
763
764 # select the write-protection hazard vector. note that this still
765 # requires to WRITE to the hazard bitvector! read-requests need
766 # to RAISE the bitvector (set it to 1), which, duh, requires a WRITE
767 if self.make_hazard_vecs:
768 wv = regs.wv[regfile.lower()]
769 wvset = wv.s # write-vec bit-level hazard ctrl
770 wvclr = wv.r # write-vec bit-level hazard ctrl
771 wvchk = wv.q # write-after-write hazard check
772 wvchk_qint = wv.q # write-after-write hazard check, NOT delayed
773
774 fspecs = fspec
775 if not isinstance(fspecs, list):
776 fspecs = [fspecs]
777
778 pplen = 0
779 writes = []
780 ppoffs = []
781 rdflags = []
782 wrflags = []
783 for i, fspec in enumerate(fspecs):
784 # get the regfile specs for this regfile port
785 (rf, wf, _read, _write, wid, fuspecs) = \
786 (fspec.rdport, fspec.wrport, fspec.read, fspec.write,
787 fspec.wid, fspec.specs)
788 print ("fpsec", i, "wrflag", wf, fspec, len(fuspecs))
789 ppoffs.append(pplen) # record offset for picker
790 pplen += len(fuspecs)
791
792 name = "%s_%s_%d" % (regfile, regname, i)
793 rdflag = Signal(name="rd_flag_"+name)
794 wrflag = Signal(name="wr_flag_"+name)
795 if rf is not None:
796 comb += rdflag.eq(rf)
797 else:
798 comb += rdflag.eq(0)
799 if wf is not None:
800 comb += wrflag.eq(wf)
801 else:
802 comb += wrflag.eq(0)
803 rdflags.append(rdflag)
804 wrflags.append(wrflag)
805
806 # create a priority picker to manage this port
807 wrpickers[regfile][rpidx] = wrpick = PriorityPicker(pplen)
808 m.submodules["wrpick_%s_%s" % (regfile, rpidx)] = wrpick
809
810 wsigs = []
811 wens = []
812 wvsets = []
813 wvseten = []
814 wvclren = []
815 #wvens = [] - not needed: reading of writevec is permanently held hi
816 addrs = []
817 for i, fspec in enumerate(fspecs):
818 # connect up the FU req/go signals and the reg-read to the FU
819 # these are arbitrated by Data.ok signals
820 (rf, wf, _read, _write, wid, fuspecs) = \
821 (fspec.rdport, fspec.wrport, fspec.read, fspec.write,
822 fspec.wid, fspec.specs)
823 for pi, fuspec in enumerate(fspec.specs):
824 (funame, fu, idx) = (fuspec.funame, fuspec.fu, fuspec.idx)
825 fu_requested = fu_bitdict[funame]
826 pi += ppoffs[i]
827 name = "%s_%s_%s_%d" % (funame, regfile, regname, idx)
828 # get (or set up) a write-latched copy of write register number
829 write = Signal.like(_write, name="write_"+name)
830 rname = "%s_%s_%s_%d" % (funame, regfile, regname, idx)
831 if rname not in fu.wr_latches:
832 wrl = Signal.like(_write, name="wrlatch_"+rname)
833 fu.wr_latches[rname] = write
834 # do not depend on fu.issue_i here, it creates a
835 # combinatorial loop on waw checking. using the FU
836 # "enable" bitdict entry for this FU is sufficient,
837 # because the PowerDecoder2 read/write nums are
838 # valid continuously when the instruction is valid
839 with m.If(fu_requested):
840 sync += wrl.eq(_write)
841 comb += write.eq(_write)
842 with m.Else():
843 comb += write.eq(wrl)
844 else:
845 write = fu.wr_latches[rname]
846
847 # write-request comes from dest.ok
848 dest = fu.get_out(idx)
849 fu_dest_latch = fu.get_fu_out(idx) # latched output
850 name = "%s_%s_%d" % (funame, regname, idx)
851 fu_wrok = Signal(name="fu_wrok_"+name, reset_less=True)
852 comb += fu_wrok.eq(dest.ok & fu.busy_o)
853
854 # connect request-write to picker input, and output to go-wr
855 fu_active = fu_selected[funame]
856 pick = fu.wr.rel_o[idx] & fu_active
857 comb += wrpick.i[pi].eq(pick)
858 # create a single-pulse go write from the picker output
859 wr_pick = Signal(name="wpick_%s_%s_%d" % (funame, regname, idx))
860 comb += wr_pick.eq(wrpick.o[pi] & wrpick.en_o)
861 comb += fu.go_wr_i[idx].eq(rising_edge(m, wr_pick))
862
863 # connect the regspec write "reg select" number to this port
864 # only if one FU actually requests (and is granted) the port
865 # will the write-enable be activated
866 wname = "waddr_en_%s_%s_%d" % (funame, regname, idx)
867 addr_en = Signal.like(write, name=wname)
868 wp = Signal()
869 comb += wp.eq(wr_pick & wrpick.en_o)
870 comb += addr_en.eq(Mux(wp, write, 0))
871 if rfile.unary:
872 wens.append(addr_en)
873 else:
874 addrs.append(addr_en)
875 wens.append(wp)
876
877 # connect regfile port to input
878 print("reg connect widths",
879 regfile, regname, pi, funame,
880 dest.shape(), wport.i_data.shape())
881 wsigs.append(fu_dest_latch)
882
883 # now connect up the bitvector write hazard
884 if not self.make_hazard_vecs:
885 continue
886 res = self.make_hazards(m, regfile, rfile, wvclr, wvset,
887 funame, regname, idx,
888 addr_en, wp, fu, fu_active,
889 wrflags[i], write, fu_wrok)
890 wvaddr_en, wv_issue_en = res
891 wvclren.append(wvaddr_en) # set only: no data => clear bit
892 wvseten.append(wv_issue_en) # set data same as enable
893
894 # read the write-hazard bitvector (wv) for any bit that is
895 fu_requested = fu_bitdict[funame]
896 wvchk_en = Signal(len(wvchk), name="waw_chk_addr_en_"+name)
897 issue_active = Signal(name="waw_iactive_"+name)
898 whazard = Signal(name="whaz_"+name)
899 if wf is None:
900 # XXX EEK! STATE regfile (branch) does not have an
901 # write-active indicator in regspec_decode_write()
902 print ("XXX FIXME waw_iactive", issue_active,
903 fu_requested, wf)
904 else:
905 # check bits from the incoming instruction. note (back
906 # in connect_instruction) that the decoder is held for
907 # us to be able to do this, here... *without* issue being
908 # held HI. we MUST NOT gate this with fu.issue_i or
909 # with fu_bitdict "enable": it would create a loop
910 comb += issue_active.eq(wf)
911 with m.If(issue_active):
912 if rfile.unary:
913 comb += wvchk_en.eq(write)
914 else:
915 comb += wvchk_en.eq(1<<write)
916 # if FU is busy (which doesn't get set at the same time as
917 # issue) and no hazard was detected, clear wvchk_en (i.e.
918 # stop checking for hazards). there is a loop here, but it's
919 # via a DFF, so is ok. some linters may complain, but hey.
920 with m.If(fu.busy_o & ~whazard):
921 comb += wvchk_en.eq(0)
922
923 # write-hazard is ANDed with (filtered by) what is actually
924 # being requested. the wvchk data is on a one-clock delay,
925 # and wvchk_en comes directly from the main decoder
926 comb += whazard.eq((wvchk_qint & wvchk_en).bool())
927 with m.If(whazard):
928 comb += fu._waw_hazard.eq(1)
929
930 #wvens.append(wvchk_en)
931
932 # here is where we create the Write Broadcast Bus. simple, eh?
933 comb += wport.i_data.eq(ortreereduce_sig(wsigs))
934 if rfile.unary:
935 # for unary-addressed
936 comb += wport.wen.eq(ortreereduce_sig(wens))
937 else:
938 # for binary-addressed
939 comb += wport.addr.eq(ortreereduce_sig(addrs))
940 comb += wport.wen.eq(ortreereduce_sig(wens))
941
942 if not self.make_hazard_vecs:
943 return [], []
944
945 # return these here rather than set wvclr/wvset directly,
946 # because there may be more than one write-port to a given
947 # regfile. example: XER has a write-port for SO, CA, and OV
948 # and the *last one added* of those would overwrite the other
949 # two. solution: have connect_wrports collate all the
950 # or-tree-reduced bitvector set/clear requests and drop them
951 # in as a single "thing". this can only be done because the
952 # set/get is an unary bitvector.
953 print ("make write-vecs", regfile, regname, wvset, wvclr)
954 return (ortreereduce_sig(wvclren), # clear (regfile write)
955 ortreereduce_sig(wvseten)) # set (issue time)
956
957 def connect_wrports(self, m, fu_bitdict, fu_selected):
958 """connect write ports
959
960 orders the write regspecs into a dict-of-dicts, by regfile,
961 by regport name, then connects all FUs that want that regport
962 by way of a PriorityPicker.
963
964 note that the write-port wen, write-port data, and go_wr_i all need to
965 be on the exact same clock cycle. as there is a combinatorial loop bug
966 at the moment, these all use sync.
967 """
968 comb, sync = m.d.comb, m.d.sync
969 fus = self.fus.fus
970 regs = self.regs
971 # dictionary of lists of regfile write ports
972 byregfiles_wr, byregfiles_wrspec = self.get_byregfiles(False)
973
974 # same for write ports.
975 # BLECH! complex code-duplication! BLECH!
976 wrpickers = {}
977 wvclrers = defaultdict(list)
978 wvseters = defaultdict(list)
979 for regfile, spec in byregfiles_wr.items():
980 fuspecs = byregfiles_wrspec[regfile]
981 wrpickers[regfile] = {}
982
983 if self.regreduce_en:
984 # argh, more port-merging
985 if regfile == 'INT':
986 fuspecs['o'] = [fuspecs.pop('o')]
987 fuspecs['o'].append(fuspecs.pop('o1'))
988 if regfile == 'FAST':
989 fuspecs['fast1'] = [fuspecs.pop('fast1')]
990 if 'fast2' in fuspecs:
991 fuspecs['fast1'].append(fuspecs.pop('fast2'))
992 if 'fast3' in fuspecs:
993 fuspecs['fast1'].append(fuspecs.pop('fast3'))
994
995 # collate these and record them by regfile because there
996 # are sometimes more write-ports per regfile
997 for (regname, fspec) in sort_fuspecs(fuspecs):
998 wvclren, wvseten = self.connect_wrport(m,
999 fu_bitdict, fu_selected,
1000 wrpickers,
1001 regfile, regname, fspec)
1002 wvclrers[regfile.lower()].append(wvclren)
1003 wvseters[regfile.lower()].append(wvseten)
1004
1005 if not self.make_hazard_vecs:
1006 return
1007
1008 # for write-vectors: reduce the clr-ers and set-ers down to
1009 # a single set of bits. otherwise if there are two write
1010 # ports (on some regfiles), the last one doing comb += on
1011 # the reg.wv[regfile] instance "wins" (and all others are ignored,
1012 # whoops). if there was only one write-port per wv regfile this would
1013 # not be an issue.
1014 for regfile in wvclrers.keys():
1015 wv = regs.wv[regfile]
1016 wvset = wv.s # write-vec bit-level hazard ctrl
1017 wvclr = wv.r # write-vec bit-level hazard ctrl
1018 wvclren = wvclrers[regfile]
1019 wvseten = wvseters[regfile]
1020 comb += wvclr.eq(ortreereduce_sig(wvclren)) # clear (regfile write)
1021 comb += wvset.eq(ortreereduce_sig(wvseten)) # set (issue time)
1022
1023 def get_byregfiles(self, readmode):
1024
1025 mode = "read" if readmode else "write"
1026 regs = self.regs
1027 fus = self.fus.fus
1028 e = self.ireg.e # decoded instruction to execute
1029
1030 # dictionary of dictionaries of lists/tuples of regfile ports.
1031 # first key: regfile. second key: regfile port name
1032 byregfiles = defaultdict(lambda: defaultdict(list))
1033 byregfiles_spec = defaultdict(dict)
1034
1035 for (funame, fu) in fus.items():
1036 # create in each FU a receptacle for the read/write register
1037 # hazard numbers. to be latched in connect_rd/write_ports
1038 # XXX better that this is moved into the actual FUs, but
1039 # the issue there is that this function is actually better
1040 # suited at the moment
1041 if readmode:
1042 fu.rd_latches = {} # read reg number latches
1043 fu.rf_latches = {} # read flag latches
1044 else:
1045 fu.wr_latches = {}
1046
1047 print("%s ports for %s" % (mode, funame))
1048 for idx in range(fu.n_src if readmode else fu.n_dst):
1049 # construct regfile specs: read uses inspec, write outspec
1050 if readmode:
1051 (regfile, regname, wid) = fu.get_in_spec(idx)
1052 else:
1053 (regfile, regname, wid) = fu.get_out_spec(idx)
1054 print(" %d %s %s %s" % (idx, regfile, regname, str(wid)))
1055
1056 # the PowerDecoder2 (main one, not the satellites) contains
1057 # the decoded regfile numbers. obtain these now
1058 if readmode:
1059 rdport, read = regspec_decode_read(e, regfile, regname)
1060 wrport, write = None, None
1061 else:
1062 rdport, read = None, None
1063 wrport, write = regspec_decode_write(e, regfile, regname)
1064
1065 # construct the dictionary of regspec information by regfile
1066 if regname not in byregfiles_spec[regfile]:
1067 byregfiles_spec[regfile][regname] = \
1068 ByRegSpec(rdport, wrport, read, write, wid, [])
1069 # here we start to create "lanes"
1070 fuspec = FUSpec(funame, fu, idx)
1071 byregfiles[regfile][idx].append(fuspec)
1072 byregfiles_spec[regfile][regname].specs.append(fuspec)
1073
1074 continue
1075 # append a latch Signal to the FU's list of latches
1076 rname = "%s_%s" % (regfile, regname)
1077 if readmode:
1078 if rname not in fu.rd_latches:
1079 rdl = Signal.like(read, name="rdlatch_"+rname)
1080 fu.rd_latches[rname] = rdl
1081 else:
1082 if rname not in fu.wr_latches:
1083 wrl = Signal.like(write, name="wrlatch_"+rname)
1084 fu.wr_latches[rname] = wrl
1085
1086 # ok just print that all out, for convenience
1087 for regfile, spec in byregfiles.items():
1088 print("regfile %s ports:" % mode, regfile)
1089 fuspecs = byregfiles_spec[regfile]
1090 for regname, fspec in fuspecs.items():
1091 [rdport, wrport, read, write, wid, fuspecs] = fspec
1092 print(" rf %s port %s lane: %s" % (mode, regfile, regname))
1093 print(" %s" % regname, wid, read, write, rdport, wrport)
1094 for (funame, fu, idx) in fuspecs:
1095 fusig = fu.src_i[idx] if readmode else fu.dest[idx]
1096 print(" ", funame, fu.__class__.__name__, idx, fusig)
1097 print()
1098
1099 return byregfiles, byregfiles_spec
1100
1101 def __iter__(self):
1102 yield from self.fus.ports()
1103 yield from self.i.e.ports()
1104 yield from self.l0.ports()
1105 # TODO: regs
1106
1107 def ports(self):
1108 return list(self)
1109
1110
1111 if __name__ == '__main__':
1112 pspec = TestMemPspec(ldst_ifacetype='testpi',
1113 imem_ifacetype='',
1114 addr_wid=48,
1115 allow_overlap=True,
1116 mask_wid=8,
1117 reg_wid=64)
1118 dut = NonProductionCore(pspec)
1119 vl = rtlil.convert(dut, ports=dut.ports())
1120 with open("test_core.il", "w") as f:
1121 f.write(vl)