3 not in any way intended for production use. connects up FunctionUnits to
4 Register Files in a brain-dead fashion that only permits one and only one
5 Function Unit to be operational.
7 the principle here is to take the Function Units, analyse their regspecs,
8 and turn their requirements for access to register file read/write ports
9 into groupings by Register File and Register File Port name.
11 under each grouping - by regfile/port - a list of Function Units that
12 need to connect to that port is created. as these are a contended
13 resource a "Broadcast Bus" per read/write port is then also created,
14 with access to it managed by a PriorityPicker.
16 the brain-dead part of this module is that even though there is no
17 conflict of access, regfile read/write hazards are *not* analysed,
18 and consequently it is safer to wait for the Function Unit to complete
19 before allowing a new instruction to proceed.
20 (update: actually this is being added now:
21 https://bugs.libre-soc.org/show_bug.cgi?id=737)
24 from nmigen
import (Elaboratable
, Module
, Signal
, ResetSignal
, Cat
, Mux
,
26 from nmigen
.cli
import rtlil
28 from openpower
.decoder
.power_decoder2
import PowerDecodeSubset
29 from openpower
.decoder
.power_regspec_map
import regspec_decode
30 from openpower
.sv
.svp64
import SVP64Rec
32 from nmutil
.picker
import PriorityPicker
33 from nmutil
.util
import treereduce
34 from nmutil
.singlepipe
import ControlBase
36 from soc
.fu
.compunits
.compunits
import AllFunctionUnits
, LDSTFunctionUnit
37 from soc
.regfile
.regfiles
import RegFiles
38 from openpower
.decoder
.power_decoder2
import get_rdflags
39 from soc
.experiment
.l0_cache
import TstL0CacheBuffer
# test only
40 from soc
.config
.test
.test_loadstore
import TestMemPspec
41 from openpower
.decoder
.power_enums
import MicrOp
, Function
42 from soc
.simple
.core_data
import CoreInput
, CoreOutput
44 from collections
import defaultdict
, namedtuple
47 from nmutil
.util
import rising_edge
49 FUSpec
= namedtuple("FUSpec", ["funame", "fu", "idx"])
50 ByRegSpec
= namedtuple("ByRegSpec", ["okflag", "regport", "wid", "specs"])
52 # helper function for reducing a list of signals down to a parallel
54 def ortreereduce(tree
, attr
="o_data"):
55 return treereduce(tree
, operator
.or_
, lambda x
: getattr(x
, attr
))
58 def ortreereduce_sig(tree
):
59 return treereduce(tree
, operator
.or_
, lambda x
: x
)
62 # helper function to place full regs declarations first
63 def sort_fuspecs(fuspecs
):
65 for (regname
, fspec
) in fuspecs
.items():
66 if regname
.startswith("full"):
67 res
.append((regname
, fspec
))
68 for (regname
, fspec
) in fuspecs
.items():
69 if not regname
.startswith("full"):
70 res
.append((regname
, fspec
))
71 return res
# enumerate(res)
74 # a hazard bitvector "remap" function which returns an AST expression
75 # that remaps read/write hazard regfile port numbers to either a full
76 # bitvector or a reduced subset one. SPR for example is reduced to a
78 # CRITICALLY-IMPORTANT NOTE: these bitvectors *have* to match up per
79 # regfile! therefore the remapping is per regfile, *NOT* per regfile
80 # port and certainly not based on whether it is a read port or write port.
81 # note that any reductions here will result in degraded performance due
82 # to conflicts, but at least it keeps the hazard matrix sizes down to "sane"
83 def bitvector_remap(regfile
, rfile
, port
):
84 # 8-bits (at the moment, no SVP64), CR is unary: no remap
87 # 3 bits, unary alrady: return the port
90 # 3 bits, unary: return the port
93 # 3 bits, unary: return the port
94 if regfile
== 'SVSTATE':
96 # 9 bits (9 entries), might be unary already
98 if rfile
.unary
: # FAST might be unary already
102 # 10 bits (!!) - reduce to one
104 if rfile
.unary
: # FAST might be unary already
109 if rfile
.unary
: # INT, check if unary/binary
115 # derive from ControlBase rather than have a separate Stage instance,
116 # this is simpler to do
117 class NonProductionCore(ControlBase
):
118 def __init__(self
, pspec
):
121 # test is SVP64 is to be enabled
122 self
.svp64_en
= hasattr(pspec
, "svp64") and (pspec
.svp64
== True)
124 # test to see if regfile ports should be reduced
125 self
.regreduce_en
= (hasattr(pspec
, "regreduce") and
126 (pspec
.regreduce
== True))
128 # test to see if overlapping of instructions is allowed
129 # (not normally enabled for TestIssuer FSM but useful for checking
130 # the bitvector hazard detection, before doing In-Order)
131 self
.allow_overlap
= (hasattr(pspec
, "allow_overlap") and
132 (pspec
.allow_overlap
== True))
135 self
.make_hazard_vecs
= self
.allow_overlap
136 self
.core_type
= "fsm"
137 if hasattr(pspec
, "core_type"):
138 self
.core_type
= pspec
.core_type
140 super().__init
__(stage
=self
)
142 # single LD/ST funnel for memory access
143 self
.l0
= l0
= TstL0CacheBuffer(pspec
, n_units
=1)
146 # function units (only one each)
147 # only include mmu if enabled in pspec
148 self
.fus
= AllFunctionUnits(pspec
, pilist
=[pi
])
150 # link LoadStore1 into MMU and make L1 I-Cache easy to get at
151 mmu
= self
.fus
.get_fu('mmu0')
152 ldst0
= self
.fus
.get_fu('ldst0')
153 print ("core pspec", pspec
.ldst_ifacetype
)
154 print ("core mmu", mmu
)
156 lsi
= l0
.cmpi
.lsmem
.lsi
# a LoadStore1 Interface object
157 print ("core lsmem.lsi", lsi
)
158 mmu
.alu
.set_ldst_interface(lsi
)
159 self
.icache
= lsi
.icache
161 # register files (yes plural)
162 self
.regs
= RegFiles(pspec
, make_hazard_vecs
=self
.make_hazard_vecs
)
164 # set up input and output: unusual requirement to set data directly
165 # (due to the way that the core is set up in a different domain,
166 # see TestIssuer.setup_peripherals
167 self
.p
.i_data
, self
.n
.o_data
= self
.new_specs(None)
168 self
.i
, self
.o
= self
.p
.i_data
, self
.n
.o_data
170 # actual internal input data used (captured)
171 self
.ireg
= self
.ispec()
173 # create per-FU instruction decoders (subsetted). these "satellite"
174 # decoders reduce wire fan-out from the one (main) PowerDecoder2
175 # (used directly by the trap unit) to the *twelve* (or more)
176 # Function Units. we can either have 32 wires (the instruction)
177 # to each, or we can have well over a 200 wire fan-out (to 12
178 # ALUs). it's an easy choice to make.
182 # eep, these should be *per FU* i.e. for FunctionUnitBaseMulti
183 # they should be shared (put into the ALU *once*).
185 for funame
, fu
in self
.fus
.fus
.items():
186 f_name
= fu
.fnunit
.name
187 fnunit
= fu
.fnunit
.value
188 opkls
= fu
.opsubsetkls
190 # TRAP decoder is the *main* decoder
191 self
.trapunit
= funame
193 assert funame
not in self
.decoders
194 self
.decoders
[funame
] = PowerDecodeSubset(None, opkls
, f_name
,
196 state
=self
.ireg
.state
,
197 svp64_en
=self
.svp64_en
,
198 regreduce_en
=self
.regreduce_en
)
199 self
.des
[funame
] = self
.decoders
[funame
].do
201 # create per-Function Unit write-after-write hazard signals
202 # yes, really, this should have been added in ReservationStations
204 for funame
, fu
in self
.fus
.fus
.items():
205 fu
._waw
_hazard
= Signal(name
="waw_%s" % funame
)
207 # share the SPR decoder with the MMU if it exists
208 if "mmu0" in self
.decoders
:
209 self
.decoders
["mmu0"].mmu0_spr_dec
= self
.decoders
["spr0"]
211 # next 3 functions are Stage API Compliance
212 def setup(self
, m
, i
):
216 return CoreInput(self
.pspec
, self
.svp64_en
, self
.regreduce_en
)
221 # elaborate function to create HDL
222 def elaborate(self
, platform
):
223 m
= super().elaborate(platform
)
225 # for testing purposes, to cut down on build time in coriolis2
226 if hasattr(self
.pspec
, "nocore") and self
.pspec
.nocore
== True:
227 x
= Signal() # dummy signal
232 m
.submodules
.fus
= self
.fus
233 m
.submodules
.l0
= l0
= self
.l0
234 self
.regs
.elaborate_into(m
, platform
)
238 # amalgamate write-hazards into a single top-level Signal
239 self
.waw_hazard
= Signal()
241 for funame
, fu
in self
.fus
.fus
.items():
242 whaz
.append(fu
._waw
_hazard
)
243 comb
+= self
.waw_hazard
.eq(Cat(*whaz
).bool())
246 self
.connect_satellite_decoders(m
)
248 # ssh, cheat: trap uses the main decoder because of the rewriting
249 self
.des
[self
.trapunit
] = self
.ireg
.e
.do
251 # connect up Function Units, then read/write ports, and hazard conflict
252 self
.issue_conflict
= Signal()
253 fu_bitdict
, fu_selected
= self
.connect_instruction(m
)
254 raw_hazard
= self
.connect_rdports(m
, fu_bitdict
, fu_selected
)
255 self
.connect_wrports(m
, fu_bitdict
, fu_selected
)
256 if self
.allow_overlap
:
257 comb
+= self
.issue_conflict
.eq(raw_hazard
)
259 # note if an exception happened. in a pipelined or OoO design
260 # this needs to be accompanied by "shadowing" (or stalling)
262 for exc
in self
.fus
.excs
.values():
263 el
.append(exc
.happened
)
264 if len(el
) > 0: # at least one exception
265 comb
+= self
.o
.exc_happened
.eq(Cat(*el
).bool())
269 def connect_satellite_decoders(self
, m
):
271 for k
, v
in self
.decoders
.items():
272 # connect each satellite decoder and give it the instruction.
273 # as subset decoders this massively reduces wire fanout given
274 # the large number of ALUs
275 m
.submodules
["dec_%s" % k
] = v
276 comb
+= v
.dec
.raw_opcode_in
.eq(self
.ireg
.raw_insn_i
)
277 comb
+= v
.dec
.bigendian
.eq(self
.ireg
.bigendian_i
)
278 # sigh due to SVP64 RA_OR_ZERO detection connect these too
279 comb
+= v
.sv_a_nz
.eq(self
.ireg
.sv_a_nz
)
280 if not self
.svp64_en
:
282 comb
+= v
.pred_sm
.eq(self
.ireg
.sv_pred_sm
)
283 comb
+= v
.pred_dm
.eq(self
.ireg
.sv_pred_dm
)
284 if k
== self
.trapunit
:
286 comb
+= v
.sv_rm
.eq(self
.ireg
.sv_rm
) # pass through SVP64 RM
287 comb
+= v
.is_svp64_mode
.eq(self
.ireg
.is_svp64_mode
)
288 # only the LDST PowerDecodeSubset *actually* needs to
289 # know to use the alternative decoder. this is all
291 if not k
.lower().startswith("ldst"):
293 comb
+= v
.use_svp64_ldst_dec
.eq( self
.ireg
.use_svp64_ldst_dec
)
295 def connect_instruction(self
, m
):
296 """connect_instruction
298 uses decoded (from PowerOp) function unit information from CSV files
299 to ascertain which Function Unit should deal with the current
302 some (such as OP_ATTN, OP_NOP) are dealt with here, including
303 ignoring it and halting the processor. OP_NOP is a bit annoying
304 because the issuer expects busy flag still to be raised then lowered.
305 (this requires a fake counter to be set).
307 comb
, sync
= m
.d
.comb
, m
.d
.sync
310 # indicate if core is busy
311 busy_o
= self
.o
.busy_o
312 any_busy_o
= self
.o
.any_busy_o
314 # connect up temporary copy of incoming instruction. the FSM will
315 # either blat the incoming instruction (if valid) into self.ireg
316 # or if the instruction could not be delivered, keep dropping the
317 # latched copy into ireg
318 ilatch
= self
.ispec()
319 self
.instr_active
= Signal()
321 # enable/busy-signals for each FU, get one bit for each FU (by name)
322 fu_enable
= Signal(len(fus
), reset_less
=True)
323 fu_busy
= Signal(len(fus
), reset_less
=True)
326 for i
, funame
in enumerate(fus
.keys()):
327 fu_bitdict
[funame
] = fu_enable
[i
]
328 fu_selected
[funame
] = fu_busy
[i
]
330 # identify function units and create a list by fnunit so that
331 # PriorityPickers can be created for selecting one of them that
332 # isn't busy at the time the incoming instruction needs passing on
333 by_fnunit
= defaultdict(list)
334 for fname
, member
in Function
.__members
__.items():
335 for funame
, fu
in fus
.items():
336 fnunit
= fu
.fnunit
.value
337 if member
.value
& fnunit
: # this FU handles this type of op
338 by_fnunit
[fname
].append((funame
, fu
)) # add by Function
340 # ok now just print out the list of FUs by Function, because we can
341 for fname
, fu_list
in by_fnunit
.items():
342 print ("FUs by type", fname
, fu_list
)
344 # now create a PriorityPicker per FU-type such that only one
345 # non-busy FU will be picked
347 fu_found
= Signal() # take a note if no Function Unit was available
348 for fname
, fu_list
in by_fnunit
.items():
349 i_pp
= PriorityPicker(len(fu_list
))
350 m
.submodules
['i_pp_%s' % fname
] = i_pp
352 for i
, (funame
, fu
) in enumerate(fu_list
):
353 # match the decoded instruction (e.do.fn_unit) against the
354 # "capability" of this FU, gate that by whether that FU is
355 # busy, and drop that into the PriorityPicker.
356 # this will give us an output of the first available *non-busy*
357 # Function Unit (Reservation Statio) capable of handling this
359 fnunit
= fu
.fnunit
.value
360 en_req
= Signal(name
="issue_en_%s" % funame
, reset_less
=True)
361 fnmatch
= (self
.ireg
.e
.do
.fn_unit
& fnunit
).bool()
362 comb
+= en_req
.eq(fnmatch
& ~fu
.busy_o
&
364 i_l
.append(en_req
) # store in list for doing the Cat-trick
365 # picker output, gated by enable: store in fu_bitdict
366 po
= Signal(name
="o_issue_pick_"+funame
) # picker output
367 comb
+= po
.eq(i_pp
.o
[i
] & i_pp
.en_o
)
368 comb
+= fu_bitdict
[funame
].eq(po
)
369 comb
+= fu_selected
[funame
].eq(fu
.busy_o | po
)
370 # if we don't do this, then when there are no FUs available,
371 # the "p.o_ready" signal will go back "ok we accepted this
372 # instruction" which of course isn't true.
373 with m
.If(i_pp
.en_o
):
374 comb
+= fu_found
.eq(1)
375 # for each input, Cat them together and drop them into the picker
376 comb
+= i_pp
.i
.eq(Cat(*i_l
))
378 # rdmask, which is for registers needs to come from the *main* decoder
379 for funame
, fu
in fus
.items():
380 rdmask
= get_rdflags(m
, self
.ireg
.e
, fu
)
381 comb
+= fu
.rdmaskn
.eq(~rdmask
)
383 # sigh - need a NOP counter
385 with m
.If(counter
!= 0):
386 sync
+= counter
.eq(counter
- 1)
389 # default to reading from incoming instruction: may be overridden
390 # by copy from latch when "waiting"
391 comb
+= self
.ireg
.eq(self
.i
)
392 # always say "ready" except if overridden
393 comb
+= self
.p
.o_ready
.eq(1)
396 with m
.State("READY"):
397 with m
.If(self
.p
.i_valid
): # run only when valid
398 with m
.Switch(self
.ireg
.e
.do
.insn_type
):
399 # check for ATTN: halt if true
400 with m
.Case(MicrOp
.OP_ATTN
):
401 m
.d
.sync
+= self
.o
.core_terminate_o
.eq(1)
403 # fake NOP - this isn't really used (Issuer detects NOP)
404 with m
.Case(MicrOp
.OP_NOP
):
405 sync
+= counter
.eq(2)
409 comb
+= self
.instr_active
.eq(1)
410 comb
+= self
.p
.o_ready
.eq(0)
411 # connect instructions. only one enabled at a time
412 for funame
, fu
in fus
.items():
413 do
= self
.des
[funame
]
414 enable
= fu_bitdict
[funame
]
416 # run this FunctionUnit if enabled route op,
417 # issue, busy, read flags and mask to FU
419 # operand comes from the *local* decoder
420 # do not actually issue, though, if there
421 # is a waw hazard. decoder has to still
422 # be asserted in order to detect that, tho
423 comb
+= fu
.oper_i
.eq_from(do
)
424 # issue when valid (and no write-hazard)
425 comb
+= fu
.issue_i
.eq(~self
.waw_hazard
)
426 # instruction ok, indicate ready
427 comb
+= self
.p
.o_ready
.eq(1)
429 if self
.allow_overlap
:
430 with m
.If(~fu_found | self
.waw_hazard
):
431 # latch copy of instruction
432 sync
+= ilatch
.eq(self
.i
)
433 comb
+= self
.p
.o_ready
.eq(1) # accept
437 with m
.State("WAITING"):
438 comb
+= self
.instr_active
.eq(1)
439 comb
+= self
.p
.o_ready
.eq(0)
441 # using copy of instruction, keep waiting until an FU is free
442 comb
+= self
.ireg
.eq(ilatch
)
443 with m
.If(fu_found
): # wait for conflict to clear
444 # connect instructions. only one enabled at a time
445 for funame
, fu
in fus
.items():
446 do
= self
.des
[funame
]
447 enable
= fu_bitdict
[funame
]
449 # run this FunctionUnit if enabled route op,
450 # issue, busy, read flags and mask to FU
452 # operand comes from the *local* decoder,
453 # which is asserted even if not issued,
454 # so that WaW-detection can check for hazards.
455 # only if the waw hazard is clear does the
456 # instruction actually get issued
457 comb
+= fu
.oper_i
.eq_from(do
)
459 comb
+= fu
.issue_i
.eq(~self
.waw_hazard
)
460 with m
.If(~self
.waw_hazard
):
461 comb
+= self
.p
.o_ready
.eq(1)
465 print ("core: overlap allowed", self
.allow_overlap
)
466 # true when any FU is busy (including the cycle where it is perhaps
467 # to be issued - because that's what fu_busy is)
468 comb
+= any_busy_o
.eq(fu_busy
.bool())
469 if not self
.allow_overlap
:
470 # for simple non-overlap, if any instruction is busy, set
471 # busy output for core.
472 comb
+= busy_o
.eq(any_busy_o
)
474 # sigh deal with a fun situation that needs to be investigated
476 with m
.If(self
.issue_conflict
):
478 # make sure that LDST, SPR, MMU, Branch and Trap all say "busy"
479 # and do not allow overlap. these are all the ones that
480 # are non-forward-progressing: exceptions etc. that otherwise
481 # change CoreState for some reason (MSR, PC, SVSTATE)
482 for funame
, fu
in fus
.items():
483 if (funame
.lower().startswith('ldst') or
484 funame
.lower().startswith('branch') or
485 funame
.lower().startswith('mmu') or
486 funame
.lower().startswith('spr') or
487 funame
.lower().startswith('trap')):
488 with m
.If(fu
.busy_o
):
491 # return both the function unit "enable" dict as well as the "busy".
492 # the "busy-or-issued" can be passed in to the Read/Write port
493 # connecters to give them permission to request access to regfiles
494 return fu_bitdict
, fu_selected
496 def connect_rdport(self
, m
, fu_bitdict
, fu_selected
,
497 rdpickers
, regfile
, regname
, fspec
):
498 comb
, sync
= m
.d
.comb
, m
.d
.sync
504 # select the required read port. these are pre-defined sizes
505 rfile
= regs
.rf
[regfile
.lower()]
506 rport
= rfile
.r_ports
[rpidx
]
507 print("read regfile", rpidx
, regfile
, regs
.rf
.keys(),
510 # for checking if the read port has an outstanding write
511 if self
.make_hazard_vecs
:
512 wv
= regs
.wv
[regfile
.lower()]
513 wvchk
= wv
.q_int
# write-vec bit-level hazard check
515 # if a hazard is detected on this read port, simply blithely block
516 # every FU from reading on it. this is complete overkill but very
518 hazard_detected
= Signal(name
="raw_%s_%s" % (regfile
, rpidx
))
521 if not isinstance(fspecs
, list):
527 for i
, fspec
in enumerate(fspecs
):
528 # get the regfile specs for this regfile port
529 print ("fpsec", i
, fspec
, len(fspec
.specs
))
530 name
= "%s_%s_%d" % (regfile
, regname
, i
)
531 ppoffs
.append(pplen
) # record offset for picker
532 pplen
+= len(fspec
.specs
)
533 rdflag
= Signal(name
="rdflag_"+name
, reset_less
=True)
534 comb
+= rdflag
.eq(fspec
.okflag
)
535 rdflags
.append(rdflag
)
537 print ("pplen", pplen
)
539 # create a priority picker to manage this port
540 rdpickers
[regfile
][rpidx
] = rdpick
= PriorityPicker(pplen
)
541 m
.submodules
["rdpick_%s_%s" % (regfile
, rpidx
)] = rdpick
547 for i
, fspec
in enumerate(fspecs
):
548 (rf
, _read
, wid
, fuspecs
) = \
549 (fspec
.okflag
, fspec
.regport
, fspec
.wid
, fspec
.specs
)
550 # connect up the FU req/go signals, and the reg-read to the FU
551 # and create a Read Broadcast Bus
552 for pi
, fuspec
in enumerate(fspec
.specs
):
553 (funame
, fu
, idx
) = (fuspec
.funame
, fuspec
.fu
, fuspec
.idx
)
555 name
= "%s_%s_%s_%i" % (regfile
, rpidx
, funame
, pi
)
556 fu_active
= fu_selected
[funame
]
557 fu_issued
= fu_bitdict
[funame
]
559 # get (or set up) a latched copy of read register number
560 # and (sigh) also the read-ok flag
561 # TODO: use nmutil latchregister
562 rhname
= "%s_%s_%d" % (regfile
, regname
, i
)
563 rdflag
= Signal(name
="rdflag_%s_%s" % (funame
, rhname
),
565 if rhname
not in fu
.rf_latches
:
566 rfl
= Signal(name
="rdflag_latch_%s_%s" % (funame
, rhname
))
567 fu
.rf_latches
[rhname
] = rfl
568 with m
.If(fu
.issue_i
):
569 sync
+= rfl
.eq(rdflags
[i
])
571 rfl
= fu
.rf_latches
[rhname
]
573 # now the register port
574 rname
= "%s_%s_%s_%d" % (funame
, regfile
, regname
, pi
)
575 read
= Signal
.like(_read
, name
="read_"+rname
)
576 if rname
not in fu
.rd_latches
:
577 rdl
= Signal
.like(_read
, name
="rdlatch_"+rname
)
578 fu
.rd_latches
[rname
] = rdl
579 with m
.If(fu
.issue_i
):
580 sync
+= rdl
.eq(_read
)
582 rdl
= fu
.rd_latches
[rname
]
584 # make the read immediately available on issue cycle
585 # after the read cycle, otherwies use the latched copy.
586 # this captures the regport and okflag on issue
587 with m
.If(fu
.issue_i
):
588 comb
+= read
.eq(_read
)
589 comb
+= rdflag
.eq(rdflags
[i
])
592 comb
+= rdflag
.eq(rfl
)
594 # connect request-read to picker input, and output to go-rd
595 addr_en
= Signal
.like(read
, name
="addr_en_"+name
)
596 pick
= Signal(name
="pick_"+name
) # picker input
597 rp
= Signal(name
="rp_"+name
) # picker output
598 delay_pick
= Signal(name
="dp_"+name
) # read-enable "underway"
599 rhazard
= Signal(name
="rhaz_"+name
)
601 # exclude any currently-enabled read-request (mask out active)
602 # entirely block anything hazarded from being picked
603 comb
+= pick
.eq(fu
.rd_rel_o
[idx
] & fu_active
& rdflag
&
604 ~delay_pick
& ~rhazard
)
605 comb
+= rdpick
.i
[pi
].eq(pick
)
606 comb
+= fu
.go_rd_i
[idx
].eq(delay_pick
) # pass in *delayed* pick
608 # if picked, select read-port "reg select" number to port
609 comb
+= rp
.eq(rdpick
.o
[pi
] & rdpick
.en_o
)
610 sync
+= delay_pick
.eq(rp
) # delayed "pick"
611 comb
+= addr_en
.eq(Mux(rp
, read
, 0))
613 # the read-enable happens combinatorially (see mux-bus below)
614 # but it results in the data coming out on a one-cycle delay.
618 addrs
.append(addr_en
)
621 # use the *delayed* pick signal to put requested data onto bus
622 with m
.If(delay_pick
):
623 # connect regfile port to input, creating fan-out Bus
625 print("reg connect widths",
626 regfile
, regname
, pi
, funame
,
627 src
.shape(), rport
.o_data
.shape())
628 # all FUs connect to same port
629 comb
+= src
.eq(rport
.o_data
)
631 if not self
.make_hazard_vecs
:
634 # read the write-hazard bitvector (wv) for any bit that is
635 wvchk_en
= Signal(len(wvchk
), name
="wv_chk_addr_en_"+name
)
636 issue_active
= Signal(name
="rd_iactive_"+name
)
637 # XXX combinatorial loop here
638 comb
+= issue_active
.eq(fu_active
& rdflag
)
639 with m
.If(issue_active
):
641 comb
+= wvchk_en
.eq(read
)
643 comb
+= wvchk_en
.eq(1<<read
)
644 # if FU is busy (which doesn't get set at the same time as
645 # issue) and no hazard was detected, clear wvchk_en (i.e.
646 # stop checking for hazards). there is a loop here, but it's
647 # via a DFF, so is ok. some linters may complain, but hey.
648 with m
.If(fu
.busy_o
& ~rhazard
):
649 comb
+= wvchk_en
.eq(0)
651 # read-hazard is ANDed with (filtered by) what is actually
653 comb
+= rhazard
.eq((wvchk
& wvchk_en
).bool())
655 wvens
.append(wvchk_en
)
657 # or-reduce the muxed read signals
659 # for unary-addressed
660 comb
+= rport
.ren
.eq(ortreereduce_sig(rens
))
662 # for binary-addressed
663 comb
+= rport
.addr
.eq(ortreereduce_sig(addrs
))
664 comb
+= rport
.ren
.eq(Cat(*rens
).bool())
665 print ("binary", regfile
, rpidx
, rport
, rport
.ren
, rens
, addrs
)
667 if not self
.make_hazard_vecs
:
668 return Const(0) # declare "no hazards"
670 # enable the read bitvectors for this issued instruction
671 # and return whether any write-hazard bit is set
672 wvchk_and
= Signal(len(wvchk
), name
="wv_chk_"+name
)
673 comb
+= wvchk_and
.eq(wvchk
& ortreereduce_sig(wvens
))
674 comb
+= hazard_detected
.eq(wvchk_and
.bool())
675 return hazard_detected
677 def connect_rdports(self
, m
, fu_bitdict
, fu_selected
):
678 """connect read ports
680 orders the read regspecs into a dict-of-dicts, by regfile, by
681 regport name, then connects all FUs that want that regport by
682 way of a PriorityPicker.
684 comb
, sync
= m
.d
.comb
, m
.d
.sync
689 # dictionary of lists of regfile read ports
690 byregfiles_rdspec
= self
.get_byregfiles(m
, True)
692 # okaay, now we need a PriorityPicker per regfile per regfile port
693 # loootta pickers... peter piper picked a pack of pickled peppers...
695 for regfile
, fuspecs
in byregfiles_rdspec
.items():
696 rdpickers
[regfile
] = {}
698 # argh. an experiment to merge RA and RB in the INT regfile
699 # (we have too many read/write ports)
700 if self
.regreduce_en
:
702 fuspecs
['rabc'] = [fuspecs
.pop('rb')]
703 fuspecs
['rabc'].append(fuspecs
.pop('rc'))
704 fuspecs
['rabc'].append(fuspecs
.pop('ra'))
705 if regfile
== 'FAST':
706 fuspecs
['fast1'] = [fuspecs
.pop('fast1')]
707 if 'fast2' in fuspecs
:
708 fuspecs
['fast1'].append(fuspecs
.pop('fast2'))
709 if 'fast3' in fuspecs
:
710 fuspecs
['fast1'].append(fuspecs
.pop('fast3'))
712 # for each named regfile port, connect up all FUs to that port
713 # also return (and collate) hazard detection)
714 for (regname
, fspec
) in sort_fuspecs(fuspecs
):
715 print("connect rd", regname
, fspec
)
716 rh
= self
.connect_rdport(m
, fu_bitdict
, fu_selected
,
721 return Cat(*rd_hazard
).bool()
723 def make_hazards(self
, m
, regfile
, rfile
, wvclr
, wvset
,
724 funame
, regname
, idx
,
725 addr_en
, wp
, fu
, fu_active
, wrflag
, write
,
727 """make_hazards: a setter and a clearer for the regfile write ports
729 setter is at issue time (using PowerDecoder2 regfile write numbers)
730 clearer is at regfile write time (when FU has said what to write to)
732 there is *one* unusual case here which has to be dealt with:
733 when the Function Unit does *NOT* request a write to the regfile
734 (has its data.ok bit CLEARED). this is perfectly legitimate.
737 comb
, sync
= m
.d
.comb
, m
.d
.sync
738 name
= "%s_%s_%d" % (funame
, regname
, idx
)
740 # connect up the bitvector write hazard. unlike the
741 # regfile writeports, a ONE must be written to the corresponding
742 # bit of the hazard bitvector (to indicate the existence of
745 # the detection of what shall be written to is based
746 # on *issue*. it is delayed by 1 cycle so that instructions
747 # "addi 5,5,0x2" do not cause combinatorial loops due to
748 # fake-dependency on *themselves*. this will totally fail
749 # spectacularly when doing multi-issue
750 print ("write vector (for regread)", regfile
, wvset
)
751 wviaddr_en
= Signal(len(wvset
), name
="wv_issue_addr_en_"+name
)
752 issue_active
= Signal(name
="iactive_"+name
)
753 sync
+= issue_active
.eq(fu
.issue_i
& fu_active
& wrflag
)
754 with m
.If(issue_active
):
756 comb
+= wviaddr_en
.eq(write
)
758 comb
+= wviaddr_en
.eq(1<<write
)
760 # deal with write vector clear: this kicks in when the regfile
761 # is written to, and clears the corresponding bitvector entry
762 print ("write vector", regfile
, wvclr
)
763 wvaddr_en
= Signal(len(wvclr
), name
="wvaddr_en_"+name
)
765 comb
+= wvaddr_en
.eq(addr_en
)
768 comb
+= wvaddr_en
.eq(1<<addr_en
)
770 # XXX ASSUME that LDSTFunctionUnit always sets the data it intends to
771 # this may NOT be the case when an exception occurs
772 if isinstance(fu
, LDSTFunctionUnit
):
773 return wvaddr_en
, wviaddr_en
775 # okaaay, this is preparation for the awkward case.
776 # * latch a copy of wrflag when issue goes high.
777 # * when the fu_wrok (data.ok) flag is NOT set,
778 # but the FU is done, the FU is NEVER going to write
779 # so the bitvector has to be cleared.
780 latch_wrflag
= Signal(name
="latch_wrflag_"+name
)
781 with m
.If(~fu
.busy_o
):
782 sync
+= latch_wrflag
.eq(0)
783 with m
.If(fu
.issue_i
& fu_active
):
784 sync
+= latch_wrflag
.eq(wrflag
)
785 with m
.If(fu
.alu_done_o
& latch_wrflag
& ~fu_wrok
):
787 comb
+= wvaddr_en
.eq(write
) # addr_en gated with wp, don't use
789 comb
+= wvaddr_en
.eq(1<<addr_en
) # binary addr_en not gated
791 return wvaddr_en
, wviaddr_en
793 def connect_wrport(self
, m
, fu_bitdict
, fu_selected
,
794 wrpickers
, regfile
, regname
, fspec
):
795 comb
, sync
= m
.d
.comb
, m
.d
.sync
801 # select the required write port. these are pre-defined sizes
802 rfile
= regs
.rf
[regfile
.lower()]
803 wport
= rfile
.w_ports
[rpidx
]
805 print("connect wr", regname
, "unary", rfile
.unary
, fspec
)
806 print(regfile
, regs
.rf
.keys())
808 # select the write-protection hazard vector. note that this still
809 # requires to WRITE to the hazard bitvector! read-requests need
810 # to RAISE the bitvector (set it to 1), which, duh, requires a WRITE
811 if self
.make_hazard_vecs
:
812 wv
= regs
.wv
[regfile
.lower()]
813 wvset
= wv
.s
# write-vec bit-level hazard ctrl
814 wvclr
= wv
.r
# write-vec bit-level hazard ctrl
815 wvchk
= wv
.q
# write-after-write hazard check
818 if not isinstance(fspecs
, list):
825 for i
, fspec
in enumerate(fspecs
):
826 # get the regfile specs for this regfile port
827 (wf
, _write
, wid
, fuspecs
) = \
828 (fspec
.okflag
, fspec
.regport
, fspec
.wid
, fspec
.specs
)
829 print ("fpsec", i
, "wrflag", wf
, fspec
, len(fuspecs
))
830 ppoffs
.append(pplen
) # record offset for picker
831 pplen
+= len(fuspecs
)
833 name
= "%s_%s_%d" % (regfile
, regname
, i
)
834 wrflag
= Signal(name
="wr_flag_"+name
)
836 comb
+= wrflag
.eq(wf
)
839 wrflags
.append(wrflag
)
841 # create a priority picker to manage this port
842 wrpickers
[regfile
][rpidx
] = wrpick
= PriorityPicker(pplen
)
843 m
.submodules
["wrpick_%s_%s" % (regfile
, rpidx
)] = wrpick
850 #wvens = [] - not needed: reading of writevec is permanently held hi
852 for i
, fspec
in enumerate(fspecs
):
853 # connect up the FU req/go signals and the reg-read to the FU
854 # these are arbitrated by Data.ok signals
855 (wf
, _write
, wid
, fuspecs
) = \
856 (fspec
.okflag
, fspec
.regport
, fspec
.wid
, fspec
.specs
)
857 for pi
, fuspec
in enumerate(fspec
.specs
):
858 (funame
, fu
, idx
) = (fuspec
.funame
, fuspec
.fu
, fuspec
.idx
)
859 fu_requested
= fu_bitdict
[funame
]
861 name
= "%s_%s_%s_%d" % (funame
, regfile
, regname
, idx
)
862 # get (or set up) a write-latched copy of write register number
863 write
= Signal
.like(_write
, name
="write_"+name
)
864 rname
= "%s_%s_%s_%d" % (funame
, regfile
, regname
, idx
)
865 if rname
not in fu
.wr_latches
:
866 wrl
= Signal
.like(_write
, name
="wrlatch_"+rname
)
867 fu
.wr_latches
[rname
] = write
868 # do not depend on fu.issue_i here, it creates a
869 # combinatorial loop on waw checking. using the FU
870 # "enable" bitdict entry for this FU is sufficient,
871 # because the PowerDecoder2 read/write nums are
872 # valid continuously when the instruction is valid
873 with m
.If(fu_requested
):
874 sync
+= wrl
.eq(_write
)
875 comb
+= write
.eq(_write
)
877 comb
+= write
.eq(wrl
)
879 write
= fu
.wr_latches
[rname
]
881 # write-request comes from dest.ok
882 dest
= fu
.get_out(idx
)
883 fu_dest_latch
= fu
.get_fu_out(idx
) # latched output
884 name
= "%s_%s_%d" % (funame
, regname
, idx
)
885 fu_wrok
= Signal(name
="fu_wrok_"+name
, reset_less
=True)
886 comb
+= fu_wrok
.eq(dest
.ok
& fu
.busy_o
)
888 # connect request-write to picker input, and output to go-wr
889 fu_active
= fu_selected
[funame
]
890 pick
= fu
.wr
.rel_o
[idx
] & fu_active
891 comb
+= wrpick
.i
[pi
].eq(pick
)
892 # create a single-pulse go write from the picker output
893 wr_pick
= Signal(name
="wpick_%s_%s_%d" % (funame
, regname
, idx
))
894 comb
+= wr_pick
.eq(wrpick
.o
[pi
] & wrpick
.en_o
)
895 comb
+= fu
.go_wr_i
[idx
].eq(rising_edge(m
, wr_pick
))
897 # connect the regspec write "reg select" number to this port
898 # only if one FU actually requests (and is granted) the port
899 # will the write-enable be activated
900 wname
= "waddr_en_%s_%s_%d" % (funame
, regname
, idx
)
901 addr_en
= Signal
.like(write
, name
=wname
)
903 comb
+= wp
.eq(wr_pick
& wrpick
.en_o
)
904 comb
+= addr_en
.eq(Mux(wp
, write
, 0))
908 addrs
.append(addr_en
)
911 # connect regfile port to input
912 print("reg connect widths",
913 regfile
, regname
, pi
, funame
,
914 dest
.shape(), wport
.i_data
.shape())
915 wsigs
.append(fu_dest_latch
)
917 # now connect up the bitvector write hazard
918 if not self
.make_hazard_vecs
:
920 res
= self
.make_hazards(m
, regfile
, rfile
, wvclr
, wvset
,
921 funame
, regname
, idx
,
922 addr_en
, wp
, fu
, fu_active
,
923 wrflags
[i
], write
, fu_wrok
)
924 wvaddr_en
, wv_issue_en
= res
925 wvclren
.append(wvaddr_en
) # set only: no data => clear bit
926 wvseten
.append(wv_issue_en
) # set data same as enable
928 # read the write-hazard bitvector (wv) for any bit that is
929 fu_requested
= fu_bitdict
[funame
]
930 wvchk_en
= Signal(len(wvchk
), name
="waw_chk_addr_en_"+name
)
931 issue_active
= Signal(name
="waw_iactive_"+name
)
932 whazard
= Signal(name
="whaz_"+name
)
934 # XXX EEK! STATE regfile (branch) does not have an
935 # write-active indicator in regspec_decode_write()
936 print ("XXX FIXME waw_iactive", issue_active
,
939 # check bits from the incoming instruction. note (back
940 # in connect_instruction) that the decoder is held for
941 # us to be able to do this, here... *without* issue being
942 # held HI. we MUST NOT gate this with fu.issue_i or
943 # with fu_bitdict "enable": it would create a loop
944 comb
+= issue_active
.eq(wf
)
945 with m
.If(issue_active
):
947 comb
+= wvchk_en
.eq(write
)
949 comb
+= wvchk_en
.eq(1<<write
)
950 # if FU is busy (which doesn't get set at the same time as
951 # issue) and no hazard was detected, clear wvchk_en (i.e.
952 # stop checking for hazards). there is a loop here, but it's
953 # via a DFF, so is ok. some linters may complain, but hey.
954 with m
.If(fu
.busy_o
& ~whazard
):
955 comb
+= wvchk_en
.eq(0)
957 # write-hazard is ANDed with (filtered by) what is actually
958 # being requested. the wvchk data is on a one-clock delay,
959 # and wvchk_en comes directly from the main decoder
960 comb
+= whazard
.eq((wvchk
& wvchk_en
).bool())
962 comb
+= fu
._waw
_hazard
.eq(1)
964 #wvens.append(wvchk_en)
966 # here is where we create the Write Broadcast Bus. simple, eh?
967 comb
+= wport
.i_data
.eq(ortreereduce_sig(wsigs
))
969 # for unary-addressed
970 comb
+= wport
.wen
.eq(ortreereduce_sig(wens
))
972 # for binary-addressed
973 comb
+= wport
.addr
.eq(ortreereduce_sig(addrs
))
974 comb
+= wport
.wen
.eq(ortreereduce_sig(wens
))
976 if not self
.make_hazard_vecs
:
979 # return these here rather than set wvclr/wvset directly,
980 # because there may be more than one write-port to a given
981 # regfile. example: XER has a write-port for SO, CA, and OV
982 # and the *last one added* of those would overwrite the other
983 # two. solution: have connect_wrports collate all the
984 # or-tree-reduced bitvector set/clear requests and drop them
985 # in as a single "thing". this can only be done because the
986 # set/get is an unary bitvector.
987 print ("make write-vecs", regfile
, regname
, wvset
, wvclr
)
988 return (wvclren
, # clear (regfile write)
989 wvseten
) # set (issue time)
991 def connect_wrports(self
, m
, fu_bitdict
, fu_selected
):
992 """connect write ports
994 orders the write regspecs into a dict-of-dicts, by regfile,
995 by regport name, then connects all FUs that want that regport
996 by way of a PriorityPicker.
998 note that the write-port wen, write-port data, and go_wr_i all need to
999 be on the exact same clock cycle. as there is a combinatorial loop bug
1000 at the moment, these all use sync.
1002 comb
, sync
= m
.d
.comb
, m
.d
.sync
1005 # dictionary of lists of regfile write ports
1006 byregfiles_wrspec
= self
.get_byregfiles(m
, False)
1008 # same for write ports.
1009 # BLECH! complex code-duplication! BLECH!
1011 wvclrers
= defaultdict(list)
1012 wvseters
= defaultdict(list)
1013 for regfile
, fuspecs
in byregfiles_wrspec
.items():
1014 wrpickers
[regfile
] = {}
1016 if self
.regreduce_en
:
1017 # argh, more port-merging
1018 if regfile
== 'INT':
1019 fuspecs
['o'] = [fuspecs
.pop('o')]
1020 fuspecs
['o'].append(fuspecs
.pop('o1'))
1021 if regfile
== 'FAST':
1022 fuspecs
['fast1'] = [fuspecs
.pop('fast1')]
1023 if 'fast2' in fuspecs
:
1024 fuspecs
['fast1'].append(fuspecs
.pop('fast2'))
1025 if 'fast3' in fuspecs
:
1026 fuspecs
['fast1'].append(fuspecs
.pop('fast3'))
1028 # collate these and record them by regfile because there
1029 # are sometimes more write-ports per regfile
1030 for (regname
, fspec
) in sort_fuspecs(fuspecs
):
1031 wvclren
, wvseten
= self
.connect_wrport(m
,
1032 fu_bitdict
, fu_selected
,
1034 regfile
, regname
, fspec
)
1035 wvclrers
[regfile
.lower()] += wvclren
1036 wvseters
[regfile
.lower()] += wvseten
1038 if not self
.make_hazard_vecs
:
1041 # for write-vectors: reduce the clr-ers and set-ers down to
1042 # a single set of bits. otherwise if there are two write
1043 # ports (on some regfiles), the last one doing comb += on
1044 # the reg.wv[regfile] instance "wins" (and all others are ignored,
1045 # whoops). if there was only one write-port per wv regfile this would
1047 for regfile
in wvclrers
.keys():
1048 wv
= regs
.wv
[regfile
]
1049 wvset
= wv
.s
# write-vec bit-level hazard ctrl
1050 wvclr
= wv
.r
# write-vec bit-level hazard ctrl
1051 wvclren
= wvclrers
[regfile
]
1052 wvseten
= wvseters
[regfile
]
1053 comb
+= wvclr
.eq(ortreereduce_sig(wvclren
)) # clear (regfile write)
1054 comb
+= wvset
.eq(ortreereduce_sig(wvseten
)) # set (issue time)
1056 def get_byregfiles(self
, m
, readmode
):
1058 mode
= "read" if readmode
else "write"
1061 e
= self
.ireg
.e
# decoded instruction to execute
1063 # dictionary of dictionaries of lists/tuples of regfile ports.
1064 # first key: regfile. second key: regfile port name
1065 byregfiles_spec
= defaultdict(dict)
1067 for (funame
, fu
) in fus
.items():
1068 # create in each FU a receptacle for the read/write register
1069 # hazard numbers (and okflags for read). to be latched in
1070 # connect_rd/write_ports
1072 fu
.rd_latches
= {} # read reg number latches
1073 fu
.rf_latches
= {} # read flag latches
1077 # construct regfile specs: read uses inspec, write outspec
1078 print("%s ports for %s" % (mode
, funame
))
1079 for idx
in range(fu
.n_src
if readmode
else fu
.n_dst
):
1080 (regfile
, regname
, wid
) = fu
.get_io_spec(readmode
, idx
)
1081 print(" %d %s %s %s" % (idx
, regfile
, regname
, str(wid
)))
1083 # the PowerDecoder2 (main one, not the satellites) contains
1084 # the decoded regfile numbers. obtain these now
1085 decinfo
= regspec_decode(m
, readmode
, e
, regfile
, regname
)
1086 okflag
, regport
= decinfo
.okflag
, decinfo
.regport
1088 # construct the dictionary of regspec information by regfile
1089 if regname
not in byregfiles_spec
[regfile
]:
1090 byregfiles_spec
[regfile
][regname
] = \
1091 ByRegSpec(okflag
, regport
, wid
, [])
1093 # here we start to create "lanes" where each Function Unit
1094 # requiring access to a given [single-contended resource]
1095 # regfile port is appended to a list, so that PriorityPickers
1096 # can be created to give uncontested access to it
1097 fuspec
= FUSpec(funame
, fu
, idx
)
1098 byregfiles_spec
[regfile
][regname
].specs
.append(fuspec
)
1100 # ok just print that all out, for convenience
1101 for regfile
, fuspecs
in byregfiles_spec
.items():
1102 print("regfile %s ports:" % mode
, regfile
)
1103 for regname
, fspec
in fuspecs
.items():
1104 [okflag
, regport
, wid
, fuspecs
] = fspec
1105 print(" rf %s port %s lane: %s" % (mode
, regfile
, regname
))
1106 print(" %s" % regname
, wid
, okflag
, regport
)
1107 for (funame
, fu
, idx
) in fuspecs
:
1108 fusig
= fu
.src_i
[idx
] if readmode
else fu
.dest
[idx
]
1109 print(" ", funame
, fu
.__class
__.__name
__, idx
, fusig
)
1112 return byregfiles_spec
1115 yield from self
.fus
.ports()
1116 yield from self
.i
.e
.ports()
1117 yield from self
.l0
.ports()
1124 if __name__
== '__main__':
1125 pspec
= TestMemPspec(ldst_ifacetype
='testpi',
1131 dut
= NonProductionCore(pspec
)
1132 vl
= rtlil
.convert(dut
, ports
=dut
.ports())
1133 with
open("test_core.il", "w") as f
: