3 not in any way intended for production use. connects up FunctionUnits to
4 Register Files in a brain-dead fashion that only permits one and only one
5 Function Unit to be operational.
7 the principle here is to take the Function Units, analyse their regspecs,
8 and turn their requirements for access to register file read/write ports
9 into groupings by Register File and Register File Port name.
11 under each grouping - by regfile/port - a list of Function Units that
12 need to connect to that port is created. as these are a contended
13 resource a "Broadcast Bus" per read/write port is then also created,
14 with access to it managed by a PriorityPicker.
16 the brain-dead part of this module is that even though there is no
17 conflict of access, regfile read/write hazards are *not* analysed,
18 and consequently it is safer to wait for the Function Unit to complete
19 before allowing a new instruction to proceed.
20 (update: actually this is being added now:
21 https://bugs.libre-soc.org/show_bug.cgi?id=737)
24 from nmigen
import (Elaboratable
, Module
, Signal
, ResetSignal
, Cat
, Mux
,
26 from nmigen
.cli
import rtlil
28 from openpower
.decoder
.power_decoder2
import PowerDecodeSubset
29 from openpower
.decoder
.power_regspec_map
import regspec_decode
30 from openpower
.sv
.svp64
import SVP64Rec
32 from nmutil
.picker
import PriorityPicker
33 from nmutil
.util
import treereduce
34 from nmutil
.singlepipe
import ControlBase
36 from soc
.fu
.compunits
.compunits
import AllFunctionUnits
, LDSTFunctionUnit
37 from soc
.regfile
.regfiles
import RegFiles
38 from openpower
.decoder
.power_decoder2
import get_rdflags
39 from soc
.experiment
.l0_cache
import TstL0CacheBuffer
# test only
40 from soc
.config
.test
.test_loadstore
import TestMemPspec
41 from openpower
.decoder
.power_enums
import MicrOp
, Function
42 from soc
.simple
.core_data
import CoreInput
, CoreOutput
44 from collections
import defaultdict
, namedtuple
47 from nmutil
.util
import rising_edge
49 FUSpec
= namedtuple("FUSpec", ["funame", "fu", "idx"])
50 ByRegSpec
= namedtuple("ByRegSpec", ["okflag", "regport", "wid", "specs"])
52 # helper function for reducing a list of signals down to a parallel
54 def ortreereduce(tree
, attr
="o_data"):
55 return treereduce(tree
, operator
.or_
, lambda x
: getattr(x
, attr
))
58 def ortreereduce_sig(tree
):
59 return treereduce(tree
, operator
.or_
, lambda x
: x
)
62 # helper function to place full regs declarations first
63 def sort_fuspecs(fuspecs
):
65 for (regname
, fspec
) in fuspecs
.items():
66 if regname
.startswith("full"):
67 res
.append((regname
, fspec
))
68 for (regname
, fspec
) in fuspecs
.items():
69 if not regname
.startswith("full"):
70 res
.append((regname
, fspec
))
71 return res
# enumerate(res)
74 # a hazard bitvector "remap" function which returns an AST expression
75 # that remaps read/write hazard regfile port numbers to either a full
76 # bitvector or a reduced subset one. SPR for example is reduced to a
78 # CRITICALLY-IMPORTANT NOTE: these bitvectors *have* to match up per
79 # regfile! therefore the remapping is per regfile, *NOT* per regfile
80 # port and certainly not based on whether it is a read port or write port.
81 # note that any reductions here will result in degraded performance due
82 # to conflicts, but at least it keeps the hazard matrix sizes down to "sane"
83 def bitvector_remap(regfile
, rfile
, port
):
84 # 8-bits (at the moment, no SVP64), CR is unary: no remap
87 # 3 bits, unary alrady: return the port
90 # 3 bits, unary: return the port
93 # 3 bits, unary: return the port
94 if regfile
== 'SVSTATE':
96 # 9 bits (9 entries), might be unary already
98 if rfile
.unary
: # FAST might be unary already
102 # 10 bits (!!) - reduce to one
104 if rfile
.unary
: # FAST might be unary already
109 if rfile
.unary
: # INT, check if unary/binary
115 # derive from ControlBase rather than have a separate Stage instance,
116 # this is simpler to do
117 class NonProductionCore(ControlBase
):
118 def __init__(self
, pspec
):
121 # test is SVP64 is to be enabled
122 self
.svp64_en
= hasattr(pspec
, "svp64") and (pspec
.svp64
== True)
124 # test to see if regfile ports should be reduced
125 self
.regreduce_en
= (hasattr(pspec
, "regreduce") and
126 (pspec
.regreduce
== True))
128 # test to see if overlapping of instructions is allowed
129 # (not normally enabled for TestIssuer FSM but useful for checking
130 # the bitvector hazard detection, before doing In-Order)
131 self
.allow_overlap
= (hasattr(pspec
, "allow_overlap") and
132 (pspec
.allow_overlap
== True))
135 self
.make_hazard_vecs
= self
.allow_overlap
136 self
.core_type
= "fsm"
137 if hasattr(pspec
, "core_type"):
138 self
.core_type
= pspec
.core_type
140 super().__init
__(stage
=self
)
142 # single LD/ST funnel for memory access
143 self
.l0
= l0
= TstL0CacheBuffer(pspec
, n_units
=1)
146 # function units (only one each)
147 # only include mmu if enabled in pspec
148 self
.fus
= AllFunctionUnits(pspec
, pilist
=[pi
])
150 # link LoadStore1 into MMU
151 mmu
= self
.fus
.get_fu('mmu0')
152 print ("core pspec", pspec
.ldst_ifacetype
)
153 print ("core mmu", mmu
)
155 print ("core lsmem.lsi", l0
.cmpi
.lsmem
.lsi
)
156 mmu
.alu
.set_ldst_interface(l0
.cmpi
.lsmem
.lsi
)
158 # register files (yes plural)
159 self
.regs
= RegFiles(pspec
, make_hazard_vecs
=self
.make_hazard_vecs
)
161 # set up input and output: unusual requirement to set data directly
162 # (due to the way that the core is set up in a different domain,
163 # see TestIssuer.setup_peripherals
164 self
.p
.i_data
, self
.n
.o_data
= self
.new_specs(None)
165 self
.i
, self
.o
= self
.p
.i_data
, self
.n
.o_data
167 # actual internal input data used (captured)
168 self
.ireg
= self
.ispec()
170 # create per-FU instruction decoders (subsetted). these "satellite"
171 # decoders reduce wire fan-out from the one (main) PowerDecoder2
172 # (used directly by the trap unit) to the *twelve* (or more)
173 # Function Units. we can either have 32 wires (the instruction)
174 # to each, or we can have well over a 200 wire fan-out (to 12
175 # ALUs). it's an easy choice to make.
179 # eep, these should be *per FU* i.e. for FunctionUnitBaseMulti
180 # they should be shared (put into the ALU *once*).
182 for funame
, fu
in self
.fus
.fus
.items():
183 f_name
= fu
.fnunit
.name
184 fnunit
= fu
.fnunit
.value
185 opkls
= fu
.opsubsetkls
187 # TRAP decoder is the *main* decoder
188 self
.trapunit
= funame
190 assert funame
not in self
.decoders
191 self
.decoders
[funame
] = PowerDecodeSubset(None, opkls
, f_name
,
193 state
=self
.ireg
.state
,
194 svp64_en
=self
.svp64_en
,
195 regreduce_en
=self
.regreduce_en
)
196 self
.des
[funame
] = self
.decoders
[funame
].do
198 # create per-Function Unit write-after-write hazard signals
199 # yes, really, this should have been added in ReservationStations
201 for funame
, fu
in self
.fus
.fus
.items():
202 fu
._waw
_hazard
= Signal(name
="waw_%s" % funame
)
204 # share the SPR decoder with the MMU if it exists
205 if "mmu0" in self
.decoders
:
206 self
.decoders
["mmu0"].mmu0_spr_dec
= self
.decoders
["spr0"]
208 # next 3 functions are Stage API Compliance
209 def setup(self
, m
, i
):
213 return CoreInput(self
.pspec
, self
.svp64_en
, self
.regreduce_en
)
218 # elaborate function to create HDL
219 def elaborate(self
, platform
):
220 m
= super().elaborate(platform
)
222 # for testing purposes, to cut down on build time in coriolis2
223 if hasattr(self
.pspec
, "nocore") and self
.pspec
.nocore
== True:
224 x
= Signal() # dummy signal
229 m
.submodules
.fus
= self
.fus
230 m
.submodules
.l0
= l0
= self
.l0
231 self
.regs
.elaborate_into(m
, platform
)
235 # amalgamate write-hazards into a single top-level Signal
236 self
.waw_hazard
= Signal()
238 for funame
, fu
in self
.fus
.fus
.items():
239 whaz
.append(fu
._waw
_hazard
)
240 comb
+= self
.waw_hazard
.eq(Cat(*whaz
).bool())
243 self
.connect_satellite_decoders(m
)
245 # ssh, cheat: trap uses the main decoder because of the rewriting
246 self
.des
[self
.trapunit
] = self
.ireg
.e
.do
248 # connect up Function Units, then read/write ports, and hazard conflict
249 self
.issue_conflict
= Signal()
250 fu_bitdict
, fu_selected
= self
.connect_instruction(m
)
251 raw_hazard
= self
.connect_rdports(m
, fu_bitdict
, fu_selected
)
252 self
.connect_wrports(m
, fu_bitdict
, fu_selected
)
253 if self
.allow_overlap
:
254 comb
+= self
.issue_conflict
.eq(raw_hazard
)
256 # note if an exception happened. in a pipelined or OoO design
257 # this needs to be accompanied by "shadowing" (or stalling)
259 for exc
in self
.fus
.excs
.values():
260 el
.append(exc
.happened
)
261 if len(el
) > 0: # at least one exception
262 comb
+= self
.o
.exc_happened
.eq(Cat(*el
).bool())
266 def connect_satellite_decoders(self
, m
):
268 for k
, v
in self
.decoders
.items():
269 # connect each satellite decoder and give it the instruction.
270 # as subset decoders this massively reduces wire fanout given
271 # the large number of ALUs
272 m
.submodules
["dec_%s" % k
] = v
273 comb
+= v
.dec
.raw_opcode_in
.eq(self
.ireg
.raw_insn_i
)
274 comb
+= v
.dec
.bigendian
.eq(self
.ireg
.bigendian_i
)
275 # sigh due to SVP64 RA_OR_ZERO detection connect these too
276 comb
+= v
.sv_a_nz
.eq(self
.ireg
.sv_a_nz
)
277 if not self
.svp64_en
:
279 comb
+= v
.pred_sm
.eq(self
.ireg
.sv_pred_sm
)
280 comb
+= v
.pred_dm
.eq(self
.ireg
.sv_pred_dm
)
281 if k
== self
.trapunit
:
283 comb
+= v
.sv_rm
.eq(self
.ireg
.sv_rm
) # pass through SVP64 RM
284 comb
+= v
.is_svp64_mode
.eq(self
.ireg
.is_svp64_mode
)
285 # only the LDST PowerDecodeSubset *actually* needs to
286 # know to use the alternative decoder. this is all
288 if not k
.lower().startswith("ldst"):
290 comb
+= v
.use_svp64_ldst_dec
.eq( self
.ireg
.use_svp64_ldst_dec
)
292 def connect_instruction(self
, m
):
293 """connect_instruction
295 uses decoded (from PowerOp) function unit information from CSV files
296 to ascertain which Function Unit should deal with the current
299 some (such as OP_ATTN, OP_NOP) are dealt with here, including
300 ignoring it and halting the processor. OP_NOP is a bit annoying
301 because the issuer expects busy flag still to be raised then lowered.
302 (this requires a fake counter to be set).
304 comb
, sync
= m
.d
.comb
, m
.d
.sync
307 # indicate if core is busy
308 busy_o
= self
.o
.busy_o
309 any_busy_o
= self
.o
.any_busy_o
311 # connect up temporary copy of incoming instruction. the FSM will
312 # either blat the incoming instruction (if valid) into self.ireg
313 # or if the instruction could not be delivered, keep dropping the
314 # latched copy into ireg
315 ilatch
= self
.ispec()
316 self
.instr_active
= Signal()
318 # enable/busy-signals for each FU, get one bit for each FU (by name)
319 fu_enable
= Signal(len(fus
), reset_less
=True)
320 fu_busy
= Signal(len(fus
), reset_less
=True)
323 for i
, funame
in enumerate(fus
.keys()):
324 fu_bitdict
[funame
] = fu_enable
[i
]
325 fu_selected
[funame
] = fu_busy
[i
]
327 # identify function units and create a list by fnunit so that
328 # PriorityPickers can be created for selecting one of them that
329 # isn't busy at the time the incoming instruction needs passing on
330 by_fnunit
= defaultdict(list)
331 for fname
, member
in Function
.__members
__.items():
332 for funame
, fu
in fus
.items():
333 fnunit
= fu
.fnunit
.value
334 if member
.value
& fnunit
: # this FU handles this type of op
335 by_fnunit
[fname
].append((funame
, fu
)) # add by Function
337 # ok now just print out the list of FUs by Function, because we can
338 for fname
, fu_list
in by_fnunit
.items():
339 print ("FUs by type", fname
, fu_list
)
341 # now create a PriorityPicker per FU-type such that only one
342 # non-busy FU will be picked
344 fu_found
= Signal() # take a note if no Function Unit was available
345 for fname
, fu_list
in by_fnunit
.items():
346 i_pp
= PriorityPicker(len(fu_list
))
347 m
.submodules
['i_pp_%s' % fname
] = i_pp
349 for i
, (funame
, fu
) in enumerate(fu_list
):
350 # match the decoded instruction (e.do.fn_unit) against the
351 # "capability" of this FU, gate that by whether that FU is
352 # busy, and drop that into the PriorityPicker.
353 # this will give us an output of the first available *non-busy*
354 # Function Unit (Reservation Statio) capable of handling this
356 fnunit
= fu
.fnunit
.value
357 en_req
= Signal(name
="issue_en_%s" % funame
, reset_less
=True)
358 fnmatch
= (self
.ireg
.e
.do
.fn_unit
& fnunit
).bool()
359 comb
+= en_req
.eq(fnmatch
& ~fu
.busy_o
&
361 i_l
.append(en_req
) # store in list for doing the Cat-trick
362 # picker output, gated by enable: store in fu_bitdict
363 po
= Signal(name
="o_issue_pick_"+funame
) # picker output
364 comb
+= po
.eq(i_pp
.o
[i
] & i_pp
.en_o
)
365 comb
+= fu_bitdict
[funame
].eq(po
)
366 comb
+= fu_selected
[funame
].eq(fu
.busy_o | po
)
367 # if we don't do this, then when there are no FUs available,
368 # the "p.o_ready" signal will go back "ok we accepted this
369 # instruction" which of course isn't true.
370 with m
.If(i_pp
.en_o
):
371 comb
+= fu_found
.eq(1)
372 # for each input, Cat them together and drop them into the picker
373 comb
+= i_pp
.i
.eq(Cat(*i_l
))
375 # rdmask, which is for registers needs to come from the *main* decoder
376 for funame
, fu
in fus
.items():
377 rdmask
= get_rdflags(m
, self
.ireg
.e
, fu
)
378 comb
+= fu
.rdmaskn
.eq(~rdmask
)
380 # sigh - need a NOP counter
382 with m
.If(counter
!= 0):
383 sync
+= counter
.eq(counter
- 1)
386 # default to reading from incoming instruction: may be overridden
387 # by copy from latch when "waiting"
388 comb
+= self
.ireg
.eq(self
.i
)
389 # always say "ready" except if overridden
390 comb
+= self
.p
.o_ready
.eq(1)
393 with m
.State("READY"):
394 with m
.If(self
.p
.i_valid
): # run only when valid
395 with m
.Switch(self
.ireg
.e
.do
.insn_type
):
396 # check for ATTN: halt if true
397 with m
.Case(MicrOp
.OP_ATTN
):
398 m
.d
.sync
+= self
.o
.core_terminate_o
.eq(1)
400 # fake NOP - this isn't really used (Issuer detects NOP)
401 with m
.Case(MicrOp
.OP_NOP
):
402 sync
+= counter
.eq(2)
406 comb
+= self
.instr_active
.eq(1)
407 comb
+= self
.p
.o_ready
.eq(0)
408 # connect instructions. only one enabled at a time
409 for funame
, fu
in fus
.items():
410 do
= self
.des
[funame
]
411 enable
= fu_bitdict
[funame
]
413 # run this FunctionUnit if enabled route op,
414 # issue, busy, read flags and mask to FU
416 # operand comes from the *local* decoder
417 # do not actually issue, though, if there
418 # is a waw hazard. decoder has to still
419 # be asserted in order to detect that, tho
420 comb
+= fu
.oper_i
.eq_from(do
)
421 # issue when valid (and no write-hazard)
422 comb
+= fu
.issue_i
.eq(~self
.waw_hazard
)
423 # instruction ok, indicate ready
424 comb
+= self
.p
.o_ready
.eq(1)
426 if self
.allow_overlap
:
427 with m
.If(~fu_found | self
.waw_hazard
):
428 # latch copy of instruction
429 sync
+= ilatch
.eq(self
.i
)
430 comb
+= self
.p
.o_ready
.eq(1) # accept
434 with m
.State("WAITING"):
435 comb
+= self
.instr_active
.eq(1)
436 comb
+= self
.p
.o_ready
.eq(0)
438 # using copy of instruction, keep waiting until an FU is free
439 comb
+= self
.ireg
.eq(ilatch
)
440 with m
.If(fu_found
): # wait for conflict to clear
441 # connect instructions. only one enabled at a time
442 for funame
, fu
in fus
.items():
443 do
= self
.des
[funame
]
444 enable
= fu_bitdict
[funame
]
446 # run this FunctionUnit if enabled route op,
447 # issue, busy, read flags and mask to FU
449 # operand comes from the *local* decoder,
450 # which is asserted even if not issued,
451 # so that WaW-detection can check for hazards.
452 # only if the waw hazard is clear does the
453 # instruction actually get issued
454 comb
+= fu
.oper_i
.eq_from(do
)
456 comb
+= fu
.issue_i
.eq(~self
.waw_hazard
)
457 with m
.If(~self
.waw_hazard
):
458 comb
+= self
.p
.o_ready
.eq(1)
462 print ("core: overlap allowed", self
.allow_overlap
)
463 # true when any FU is busy (including the cycle where it is perhaps
464 # to be issued - because that's what fu_busy is)
465 comb
+= any_busy_o
.eq(fu_busy
.bool())
466 if not self
.allow_overlap
:
467 # for simple non-overlap, if any instruction is busy, set
468 # busy output for core.
469 comb
+= busy_o
.eq(any_busy_o
)
471 # sigh deal with a fun situation that needs to be investigated
473 with m
.If(self
.issue_conflict
):
475 # make sure that LDST, SPR, MMU, Branch and Trap all say "busy"
476 # and do not allow overlap. these are all the ones that
477 # are non-forward-progressing: exceptions etc. that otherwise
478 # change CoreState for some reason (MSR, PC, SVSTATE)
479 for funame
, fu
in fus
.items():
480 if (funame
.lower().startswith('ldst') or
481 funame
.lower().startswith('branch') or
482 funame
.lower().startswith('mmu') or
483 funame
.lower().startswith('spr') or
484 funame
.lower().startswith('trap')):
485 with m
.If(fu
.busy_o
):
488 # return both the function unit "enable" dict as well as the "busy".
489 # the "busy-or-issued" can be passed in to the Read/Write port
490 # connecters to give them permission to request access to regfiles
491 return fu_bitdict
, fu_selected
493 def connect_rdport(self
, m
, fu_bitdict
, fu_selected
,
494 rdpickers
, regfile
, regname
, fspec
):
495 comb
, sync
= m
.d
.comb
, m
.d
.sync
501 # select the required read port. these are pre-defined sizes
502 rfile
= regs
.rf
[regfile
.lower()]
503 rport
= rfile
.r_ports
[rpidx
]
504 print("read regfile", rpidx
, regfile
, regs
.rf
.keys(),
507 # for checking if the read port has an outstanding write
508 if self
.make_hazard_vecs
:
509 wv
= regs
.wv
[regfile
.lower()]
510 wvchk
= wv
.q_int
# write-vec bit-level hazard check
512 # if a hazard is detected on this read port, simply blithely block
513 # every FU from reading on it. this is complete overkill but very
515 hazard_detected
= Signal(name
="raw_%s_%s" % (regfile
, rpidx
))
518 if not isinstance(fspecs
, list):
524 for i
, fspec
in enumerate(fspecs
):
525 # get the regfile specs for this regfile port
526 print ("fpsec", i
, fspec
, len(fspec
.specs
))
527 name
= "%s_%s_%d" % (regfile
, regname
, i
)
528 ppoffs
.append(pplen
) # record offset for picker
529 pplen
+= len(fspec
.specs
)
530 rdflag
= Signal(name
="rdflag_"+name
, reset_less
=True)
531 comb
+= rdflag
.eq(fspec
.okflag
)
532 rdflags
.append(rdflag
)
534 print ("pplen", pplen
)
536 # create a priority picker to manage this port
537 rdpickers
[regfile
][rpidx
] = rdpick
= PriorityPicker(pplen
)
538 m
.submodules
["rdpick_%s_%s" % (regfile
, rpidx
)] = rdpick
544 for i
, fspec
in enumerate(fspecs
):
545 (rf
, _read
, wid
, fuspecs
) = \
546 (fspec
.okflag
, fspec
.regport
, fspec
.wid
, fspec
.specs
)
547 # connect up the FU req/go signals, and the reg-read to the FU
548 # and create a Read Broadcast Bus
549 for pi
, fuspec
in enumerate(fspec
.specs
):
550 (funame
, fu
, idx
) = (fuspec
.funame
, fuspec
.fu
, fuspec
.idx
)
552 name
= "%s_%s_%s_%i" % (regfile
, rpidx
, funame
, pi
)
553 fu_active
= fu_selected
[funame
]
554 fu_issued
= fu_bitdict
[funame
]
556 # get (or set up) a latched copy of read register number
557 # and (sigh) also the read-ok flag
558 # TODO: use nmutil latchregister
559 rhname
= "%s_%s_%d" % (regfile
, regname
, i
)
560 rdflag
= Signal(name
="rdflag_%s_%s" % (funame
, rhname
),
562 if rhname
not in fu
.rf_latches
:
563 rfl
= Signal(name
="rdflag_latch_%s_%s" % (funame
, rhname
))
564 fu
.rf_latches
[rhname
] = rfl
565 with m
.If(fu
.issue_i
):
566 sync
+= rfl
.eq(rdflags
[i
])
568 rfl
= fu
.rf_latches
[rhname
]
570 # now the register port
571 rname
= "%s_%s_%s_%d" % (funame
, regfile
, regname
, pi
)
572 read
= Signal
.like(_read
, name
="read_"+rname
)
573 if rname
not in fu
.rd_latches
:
574 rdl
= Signal
.like(_read
, name
="rdlatch_"+rname
)
575 fu
.rd_latches
[rname
] = rdl
576 with m
.If(fu
.issue_i
):
577 sync
+= rdl
.eq(_read
)
579 rdl
= fu
.rd_latches
[rname
]
581 # make the read immediately available on issue cycle
582 # after the read cycle, otherwies use the latched copy.
583 # this captures the regport and okflag on issue
584 with m
.If(fu
.issue_i
):
585 comb
+= read
.eq(_read
)
586 comb
+= rdflag
.eq(rdflags
[i
])
589 comb
+= rdflag
.eq(rfl
)
591 # connect request-read to picker input, and output to go-rd
592 addr_en
= Signal
.like(read
, name
="addr_en_"+name
)
593 pick
= Signal(name
="pick_"+name
) # picker input
594 rp
= Signal(name
="rp_"+name
) # picker output
595 delay_pick
= Signal(name
="dp_"+name
) # read-enable "underway"
596 rhazard
= Signal(name
="rhaz_"+name
)
598 # exclude any currently-enabled read-request (mask out active)
599 # entirely block anything hazarded from being picked
600 comb
+= pick
.eq(fu
.rd_rel_o
[idx
] & fu_active
& rdflag
&
601 ~delay_pick
& ~rhazard
)
602 comb
+= rdpick
.i
[pi
].eq(pick
)
603 comb
+= fu
.go_rd_i
[idx
].eq(delay_pick
) # pass in *delayed* pick
605 # if picked, select read-port "reg select" number to port
606 comb
+= rp
.eq(rdpick
.o
[pi
] & rdpick
.en_o
)
607 sync
+= delay_pick
.eq(rp
) # delayed "pick"
608 comb
+= addr_en
.eq(Mux(rp
, read
, 0))
610 # the read-enable happens combinatorially (see mux-bus below)
611 # but it results in the data coming out on a one-cycle delay.
615 addrs
.append(addr_en
)
618 # use the *delayed* pick signal to put requested data onto bus
619 with m
.If(delay_pick
):
620 # connect regfile port to input, creating fan-out Bus
622 print("reg connect widths",
623 regfile
, regname
, pi
, funame
,
624 src
.shape(), rport
.o_data
.shape())
625 # all FUs connect to same port
626 comb
+= src
.eq(rport
.o_data
)
628 if not self
.make_hazard_vecs
:
631 # read the write-hazard bitvector (wv) for any bit that is
632 wvchk_en
= Signal(len(wvchk
), name
="wv_chk_addr_en_"+name
)
633 issue_active
= Signal(name
="rd_iactive_"+name
)
634 # XXX combinatorial loop here
635 comb
+= issue_active
.eq(fu_active
& rdflag
)
636 with m
.If(issue_active
):
638 comb
+= wvchk_en
.eq(read
)
640 comb
+= wvchk_en
.eq(1<<read
)
641 # if FU is busy (which doesn't get set at the same time as
642 # issue) and no hazard was detected, clear wvchk_en (i.e.
643 # stop checking for hazards). there is a loop here, but it's
644 # via a DFF, so is ok. some linters may complain, but hey.
645 with m
.If(fu
.busy_o
& ~rhazard
):
646 comb
+= wvchk_en
.eq(0)
648 # read-hazard is ANDed with (filtered by) what is actually
650 comb
+= rhazard
.eq((wvchk
& wvchk_en
).bool())
652 wvens
.append(wvchk_en
)
654 # or-reduce the muxed read signals
656 # for unary-addressed
657 comb
+= rport
.ren
.eq(ortreereduce_sig(rens
))
659 # for binary-addressed
660 comb
+= rport
.addr
.eq(ortreereduce_sig(addrs
))
661 comb
+= rport
.ren
.eq(Cat(*rens
).bool())
662 print ("binary", regfile
, rpidx
, rport
, rport
.ren
, rens
, addrs
)
664 if not self
.make_hazard_vecs
:
665 return Const(0) # declare "no hazards"
667 # enable the read bitvectors for this issued instruction
668 # and return whether any write-hazard bit is set
669 wvchk_and
= Signal(len(wvchk
), name
="wv_chk_"+name
)
670 comb
+= wvchk_and
.eq(wvchk
& ortreereduce_sig(wvens
))
671 comb
+= hazard_detected
.eq(wvchk_and
.bool())
672 return hazard_detected
674 def connect_rdports(self
, m
, fu_bitdict
, fu_selected
):
675 """connect read ports
677 orders the read regspecs into a dict-of-dicts, by regfile, by
678 regport name, then connects all FUs that want that regport by
679 way of a PriorityPicker.
681 comb
, sync
= m
.d
.comb
, m
.d
.sync
686 # dictionary of lists of regfile read ports
687 byregfiles_rdspec
= self
.get_byregfiles(m
, True)
689 # okaay, now we need a PriorityPicker per regfile per regfile port
690 # loootta pickers... peter piper picked a pack of pickled peppers...
692 for regfile
, fuspecs
in byregfiles_rdspec
.items():
693 rdpickers
[regfile
] = {}
695 # argh. an experiment to merge RA and RB in the INT regfile
696 # (we have too many read/write ports)
697 if self
.regreduce_en
:
699 fuspecs
['rabc'] = [fuspecs
.pop('rb')]
700 fuspecs
['rabc'].append(fuspecs
.pop('rc'))
701 fuspecs
['rabc'].append(fuspecs
.pop('ra'))
702 if regfile
== 'FAST':
703 fuspecs
['fast1'] = [fuspecs
.pop('fast1')]
704 if 'fast2' in fuspecs
:
705 fuspecs
['fast1'].append(fuspecs
.pop('fast2'))
706 if 'fast3' in fuspecs
:
707 fuspecs
['fast1'].append(fuspecs
.pop('fast3'))
709 # for each named regfile port, connect up all FUs to that port
710 # also return (and collate) hazard detection)
711 for (regname
, fspec
) in sort_fuspecs(fuspecs
):
712 print("connect rd", regname
, fspec
)
713 rh
= self
.connect_rdport(m
, fu_bitdict
, fu_selected
,
718 return Cat(*rd_hazard
).bool()
720 def make_hazards(self
, m
, regfile
, rfile
, wvclr
, wvset
,
721 funame
, regname
, idx
,
722 addr_en
, wp
, fu
, fu_active
, wrflag
, write
,
724 """make_hazards: a setter and a clearer for the regfile write ports
726 setter is at issue time (using PowerDecoder2 regfile write numbers)
727 clearer is at regfile write time (when FU has said what to write to)
729 there is *one* unusual case here which has to be dealt with:
730 when the Function Unit does *NOT* request a write to the regfile
731 (has its data.ok bit CLEARED). this is perfectly legitimate.
734 comb
, sync
= m
.d
.comb
, m
.d
.sync
735 name
= "%s_%s_%d" % (funame
, regname
, idx
)
737 # connect up the bitvector write hazard. unlike the
738 # regfile writeports, a ONE must be written to the corresponding
739 # bit of the hazard bitvector (to indicate the existence of
742 # the detection of what shall be written to is based
743 # on *issue*. it is delayed by 1 cycle so that instructions
744 # "addi 5,5,0x2" do not cause combinatorial loops due to
745 # fake-dependency on *themselves*. this will totally fail
746 # spectacularly when doing multi-issue
747 print ("write vector (for regread)", regfile
, wvset
)
748 wviaddr_en
= Signal(len(wvset
), name
="wv_issue_addr_en_"+name
)
749 issue_active
= Signal(name
="iactive_"+name
)
750 sync
+= issue_active
.eq(fu
.issue_i
& fu_active
& wrflag
)
751 with m
.If(issue_active
):
753 comb
+= wviaddr_en
.eq(write
)
755 comb
+= wviaddr_en
.eq(1<<write
)
757 # deal with write vector clear: this kicks in when the regfile
758 # is written to, and clears the corresponding bitvector entry
759 print ("write vector", regfile
, wvclr
)
760 wvaddr_en
= Signal(len(wvclr
), name
="wvaddr_en_"+name
)
762 comb
+= wvaddr_en
.eq(addr_en
)
765 comb
+= wvaddr_en
.eq(1<<addr_en
)
767 # XXX ASSUME that LDSTFunctionUnit always sets the data it intends to
768 # this may NOT be the case when an exception occurs
769 if isinstance(fu
, LDSTFunctionUnit
):
770 return wvaddr_en
, wviaddr_en
772 # okaaay, this is preparation for the awkward case.
773 # * latch a copy of wrflag when issue goes high.
774 # * when the fu_wrok (data.ok) flag is NOT set,
775 # but the FU is done, the FU is NEVER going to write
776 # so the bitvector has to be cleared.
777 latch_wrflag
= Signal(name
="latch_wrflag_"+name
)
778 with m
.If(~fu
.busy_o
):
779 sync
+= latch_wrflag
.eq(0)
780 with m
.If(fu
.issue_i
& fu_active
):
781 sync
+= latch_wrflag
.eq(wrflag
)
782 with m
.If(fu
.alu_done_o
& latch_wrflag
& ~fu_wrok
):
784 comb
+= wvaddr_en
.eq(write
) # addr_en gated with wp, don't use
786 comb
+= wvaddr_en
.eq(1<<addr_en
) # binary addr_en not gated
788 return wvaddr_en
, wviaddr_en
790 def connect_wrport(self
, m
, fu_bitdict
, fu_selected
,
791 wrpickers
, regfile
, regname
, fspec
):
792 comb
, sync
= m
.d
.comb
, m
.d
.sync
798 # select the required write port. these are pre-defined sizes
799 rfile
= regs
.rf
[regfile
.lower()]
800 wport
= rfile
.w_ports
[rpidx
]
802 print("connect wr", regname
, "unary", rfile
.unary
, fspec
)
803 print(regfile
, regs
.rf
.keys())
805 # select the write-protection hazard vector. note that this still
806 # requires to WRITE to the hazard bitvector! read-requests need
807 # to RAISE the bitvector (set it to 1), which, duh, requires a WRITE
808 if self
.make_hazard_vecs
:
809 wv
= regs
.wv
[regfile
.lower()]
810 wvset
= wv
.s
# write-vec bit-level hazard ctrl
811 wvclr
= wv
.r
# write-vec bit-level hazard ctrl
812 wvchk
= wv
.q
# write-after-write hazard check
815 if not isinstance(fspecs
, list):
822 for i
, fspec
in enumerate(fspecs
):
823 # get the regfile specs for this regfile port
824 (wf
, _write
, wid
, fuspecs
) = \
825 (fspec
.okflag
, fspec
.regport
, fspec
.wid
, fspec
.specs
)
826 print ("fpsec", i
, "wrflag", wf
, fspec
, len(fuspecs
))
827 ppoffs
.append(pplen
) # record offset for picker
828 pplen
+= len(fuspecs
)
830 name
= "%s_%s_%d" % (regfile
, regname
, i
)
831 wrflag
= Signal(name
="wr_flag_"+name
)
833 comb
+= wrflag
.eq(wf
)
836 wrflags
.append(wrflag
)
838 # create a priority picker to manage this port
839 wrpickers
[regfile
][rpidx
] = wrpick
= PriorityPicker(pplen
)
840 m
.submodules
["wrpick_%s_%s" % (regfile
, rpidx
)] = wrpick
847 #wvens = [] - not needed: reading of writevec is permanently held hi
849 for i
, fspec
in enumerate(fspecs
):
850 # connect up the FU req/go signals and the reg-read to the FU
851 # these are arbitrated by Data.ok signals
852 (wf
, _write
, wid
, fuspecs
) = \
853 (fspec
.okflag
, fspec
.regport
, fspec
.wid
, fspec
.specs
)
854 for pi
, fuspec
in enumerate(fspec
.specs
):
855 (funame
, fu
, idx
) = (fuspec
.funame
, fuspec
.fu
, fuspec
.idx
)
856 fu_requested
= fu_bitdict
[funame
]
858 name
= "%s_%s_%s_%d" % (funame
, regfile
, regname
, idx
)
859 # get (or set up) a write-latched copy of write register number
860 write
= Signal
.like(_write
, name
="write_"+name
)
861 rname
= "%s_%s_%s_%d" % (funame
, regfile
, regname
, idx
)
862 if rname
not in fu
.wr_latches
:
863 wrl
= Signal
.like(_write
, name
="wrlatch_"+rname
)
864 fu
.wr_latches
[rname
] = write
865 # do not depend on fu.issue_i here, it creates a
866 # combinatorial loop on waw checking. using the FU
867 # "enable" bitdict entry for this FU is sufficient,
868 # because the PowerDecoder2 read/write nums are
869 # valid continuously when the instruction is valid
870 with m
.If(fu_requested
):
871 sync
+= wrl
.eq(_write
)
872 comb
+= write
.eq(_write
)
874 comb
+= write
.eq(wrl
)
876 write
= fu
.wr_latches
[rname
]
878 # write-request comes from dest.ok
879 dest
= fu
.get_out(idx
)
880 fu_dest_latch
= fu
.get_fu_out(idx
) # latched output
881 name
= "%s_%s_%d" % (funame
, regname
, idx
)
882 fu_wrok
= Signal(name
="fu_wrok_"+name
, reset_less
=True)
883 comb
+= fu_wrok
.eq(dest
.ok
& fu
.busy_o
)
885 # connect request-write to picker input, and output to go-wr
886 fu_active
= fu_selected
[funame
]
887 pick
= fu
.wr
.rel_o
[idx
] & fu_active
888 comb
+= wrpick
.i
[pi
].eq(pick
)
889 # create a single-pulse go write from the picker output
890 wr_pick
= Signal(name
="wpick_%s_%s_%d" % (funame
, regname
, idx
))
891 comb
+= wr_pick
.eq(wrpick
.o
[pi
] & wrpick
.en_o
)
892 comb
+= fu
.go_wr_i
[idx
].eq(rising_edge(m
, wr_pick
))
894 # connect the regspec write "reg select" number to this port
895 # only if one FU actually requests (and is granted) the port
896 # will the write-enable be activated
897 wname
= "waddr_en_%s_%s_%d" % (funame
, regname
, idx
)
898 addr_en
= Signal
.like(write
, name
=wname
)
900 comb
+= wp
.eq(wr_pick
& wrpick
.en_o
)
901 comb
+= addr_en
.eq(Mux(wp
, write
, 0))
905 addrs
.append(addr_en
)
908 # connect regfile port to input
909 print("reg connect widths",
910 regfile
, regname
, pi
, funame
,
911 dest
.shape(), wport
.i_data
.shape())
912 wsigs
.append(fu_dest_latch
)
914 # now connect up the bitvector write hazard
915 if not self
.make_hazard_vecs
:
917 res
= self
.make_hazards(m
, regfile
, rfile
, wvclr
, wvset
,
918 funame
, regname
, idx
,
919 addr_en
, wp
, fu
, fu_active
,
920 wrflags
[i
], write
, fu_wrok
)
921 wvaddr_en
, wv_issue_en
= res
922 wvclren
.append(wvaddr_en
) # set only: no data => clear bit
923 wvseten
.append(wv_issue_en
) # set data same as enable
925 # read the write-hazard bitvector (wv) for any bit that is
926 fu_requested
= fu_bitdict
[funame
]
927 wvchk_en
= Signal(len(wvchk
), name
="waw_chk_addr_en_"+name
)
928 issue_active
= Signal(name
="waw_iactive_"+name
)
929 whazard
= Signal(name
="whaz_"+name
)
931 # XXX EEK! STATE regfile (branch) does not have an
932 # write-active indicator in regspec_decode_write()
933 print ("XXX FIXME waw_iactive", issue_active
,
936 # check bits from the incoming instruction. note (back
937 # in connect_instruction) that the decoder is held for
938 # us to be able to do this, here... *without* issue being
939 # held HI. we MUST NOT gate this with fu.issue_i or
940 # with fu_bitdict "enable": it would create a loop
941 comb
+= issue_active
.eq(wf
)
942 with m
.If(issue_active
):
944 comb
+= wvchk_en
.eq(write
)
946 comb
+= wvchk_en
.eq(1<<write
)
947 # if FU is busy (which doesn't get set at the same time as
948 # issue) and no hazard was detected, clear wvchk_en (i.e.
949 # stop checking for hazards). there is a loop here, but it's
950 # via a DFF, so is ok. some linters may complain, but hey.
951 with m
.If(fu
.busy_o
& ~whazard
):
952 comb
+= wvchk_en
.eq(0)
954 # write-hazard is ANDed with (filtered by) what is actually
955 # being requested. the wvchk data is on a one-clock delay,
956 # and wvchk_en comes directly from the main decoder
957 comb
+= whazard
.eq((wvchk
& wvchk_en
).bool())
959 comb
+= fu
._waw
_hazard
.eq(1)
961 #wvens.append(wvchk_en)
963 # here is where we create the Write Broadcast Bus. simple, eh?
964 comb
+= wport
.i_data
.eq(ortreereduce_sig(wsigs
))
966 # for unary-addressed
967 comb
+= wport
.wen
.eq(ortreereduce_sig(wens
))
969 # for binary-addressed
970 comb
+= wport
.addr
.eq(ortreereduce_sig(addrs
))
971 comb
+= wport
.wen
.eq(ortreereduce_sig(wens
))
973 if not self
.make_hazard_vecs
:
976 # return these here rather than set wvclr/wvset directly,
977 # because there may be more than one write-port to a given
978 # regfile. example: XER has a write-port for SO, CA, and OV
979 # and the *last one added* of those would overwrite the other
980 # two. solution: have connect_wrports collate all the
981 # or-tree-reduced bitvector set/clear requests and drop them
982 # in as a single "thing". this can only be done because the
983 # set/get is an unary bitvector.
984 print ("make write-vecs", regfile
, regname
, wvset
, wvclr
)
985 return (wvclren
, # clear (regfile write)
986 wvseten
) # set (issue time)
988 def connect_wrports(self
, m
, fu_bitdict
, fu_selected
):
989 """connect write ports
991 orders the write regspecs into a dict-of-dicts, by regfile,
992 by regport name, then connects all FUs that want that regport
993 by way of a PriorityPicker.
995 note that the write-port wen, write-port data, and go_wr_i all need to
996 be on the exact same clock cycle. as there is a combinatorial loop bug
997 at the moment, these all use sync.
999 comb
, sync
= m
.d
.comb
, m
.d
.sync
1002 # dictionary of lists of regfile write ports
1003 byregfiles_wrspec
= self
.get_byregfiles(m
, False)
1005 # same for write ports.
1006 # BLECH! complex code-duplication! BLECH!
1008 wvclrers
= defaultdict(list)
1009 wvseters
= defaultdict(list)
1010 for regfile
, fuspecs
in byregfiles_wrspec
.items():
1011 wrpickers
[regfile
] = {}
1013 if self
.regreduce_en
:
1014 # argh, more port-merging
1015 if regfile
== 'INT':
1016 fuspecs
['o'] = [fuspecs
.pop('o')]
1017 fuspecs
['o'].append(fuspecs
.pop('o1'))
1018 if regfile
== 'FAST':
1019 fuspecs
['fast1'] = [fuspecs
.pop('fast1')]
1020 if 'fast2' in fuspecs
:
1021 fuspecs
['fast1'].append(fuspecs
.pop('fast2'))
1022 if 'fast3' in fuspecs
:
1023 fuspecs
['fast1'].append(fuspecs
.pop('fast3'))
1025 # collate these and record them by regfile because there
1026 # are sometimes more write-ports per regfile
1027 for (regname
, fspec
) in sort_fuspecs(fuspecs
):
1028 wvclren
, wvseten
= self
.connect_wrport(m
,
1029 fu_bitdict
, fu_selected
,
1031 regfile
, regname
, fspec
)
1032 wvclrers
[regfile
.lower()] += wvclren
1033 wvseters
[regfile
.lower()] += wvseten
1035 if not self
.make_hazard_vecs
:
1038 # for write-vectors: reduce the clr-ers and set-ers down to
1039 # a single set of bits. otherwise if there are two write
1040 # ports (on some regfiles), the last one doing comb += on
1041 # the reg.wv[regfile] instance "wins" (and all others are ignored,
1042 # whoops). if there was only one write-port per wv regfile this would
1044 for regfile
in wvclrers
.keys():
1045 wv
= regs
.wv
[regfile
]
1046 wvset
= wv
.s
# write-vec bit-level hazard ctrl
1047 wvclr
= wv
.r
# write-vec bit-level hazard ctrl
1048 wvclren
= wvclrers
[regfile
]
1049 wvseten
= wvseters
[regfile
]
1050 comb
+= wvclr
.eq(ortreereduce_sig(wvclren
)) # clear (regfile write)
1051 comb
+= wvset
.eq(ortreereduce_sig(wvseten
)) # set (issue time)
1053 def get_byregfiles(self
, m
, readmode
):
1055 mode
= "read" if readmode
else "write"
1058 e
= self
.ireg
.e
# decoded instruction to execute
1060 # dictionary of dictionaries of lists/tuples of regfile ports.
1061 # first key: regfile. second key: regfile port name
1062 byregfiles_spec
= defaultdict(dict)
1064 for (funame
, fu
) in fus
.items():
1065 # create in each FU a receptacle for the read/write register
1066 # hazard numbers (and okflags for read). to be latched in
1067 # connect_rd/write_ports
1069 fu
.rd_latches
= {} # read reg number latches
1070 fu
.rf_latches
= {} # read flag latches
1074 # construct regfile specs: read uses inspec, write outspec
1075 print("%s ports for %s" % (mode
, funame
))
1076 for idx
in range(fu
.n_src
if readmode
else fu
.n_dst
):
1077 (regfile
, regname
, wid
) = fu
.get_io_spec(readmode
, idx
)
1078 print(" %d %s %s %s" % (idx
, regfile
, regname
, str(wid
)))
1080 # the PowerDecoder2 (main one, not the satellites) contains
1081 # the decoded regfile numbers. obtain these now
1082 decinfo
= regspec_decode(m
, readmode
, e
, regfile
, regname
)
1083 okflag
, regport
= decinfo
.okflag
, decinfo
.regport
1085 # construct the dictionary of regspec information by regfile
1086 if regname
not in byregfiles_spec
[regfile
]:
1087 byregfiles_spec
[regfile
][regname
] = \
1088 ByRegSpec(okflag
, regport
, wid
, [])
1090 # here we start to create "lanes" where each Function Unit
1091 # requiring access to a given [single-contended resource]
1092 # regfile port is appended to a list, so that PriorityPickers
1093 # can be created to give uncontested access to it
1094 fuspec
= FUSpec(funame
, fu
, idx
)
1095 byregfiles_spec
[regfile
][regname
].specs
.append(fuspec
)
1097 # ok just print that all out, for convenience
1098 for regfile
, fuspecs
in byregfiles_spec
.items():
1099 print("regfile %s ports:" % mode
, regfile
)
1100 for regname
, fspec
in fuspecs
.items():
1101 [okflag
, regport
, wid
, fuspecs
] = fspec
1102 print(" rf %s port %s lane: %s" % (mode
, regfile
, regname
))
1103 print(" %s" % regname
, wid
, okflag
, regport
)
1104 for (funame
, fu
, idx
) in fuspecs
:
1105 fusig
= fu
.src_i
[idx
] if readmode
else fu
.dest
[idx
]
1106 print(" ", funame
, fu
.__class
__.__name
__, idx
, fusig
)
1109 return byregfiles_spec
1112 yield from self
.fus
.ports()
1113 yield from self
.i
.e
.ports()
1114 yield from self
.l0
.ports()
1121 if __name__
== '__main__':
1122 pspec
= TestMemPspec(ldst_ifacetype
='testpi',
1128 dut
= NonProductionCore(pspec
)
1129 vl
= rtlil
.convert(dut
, ports
=dut
.ports())
1130 with
open("test_core.il", "w") as f
: