3 not in any way intended for production use. connects up FunctionUnits to
4 Register Files in a brain-dead fashion that only permits one and only one
5 Function Unit to be operational.
7 the principle here is to take the Function Units, analyse their regspecs,
8 and turn their requirements for access to register file read/write ports
9 into groupings by Register File and Register File Port name.
11 under each grouping - by regfile/port - a list of Function Units that
12 need to connect to that port is created. as these are a contended
13 resource a "Broadcast Bus" per read/write port is then also created,
14 with access to it managed by a PriorityPicker.
16 the brain-dead part of this module is that even though there is no
17 conflict of access, regfile read/write hazards are *not* analysed,
18 and consequently it is safer to wait for the Function Unit to complete
19 before allowing a new instruction to proceed.
20 (update: actually this is being added now:
21 https://bugs.libre-soc.org/show_bug.cgi?id=737)
24 from nmigen
import (Elaboratable
, Module
, Signal
, ResetSignal
, Cat
, Mux
,
26 from nmigen
.cli
import rtlil
28 from openpower
.decoder
.power_decoder2
import PowerDecodeSubset
29 from openpower
.decoder
.power_regspec_map
import regspec_decode
30 from openpower
.sv
.svp64
import SVP64Rec
32 from nmutil
.picker
import PriorityPicker
33 from nmutil
.util
import treereduce
34 from nmutil
.singlepipe
import ControlBase
36 from soc
.fu
.compunits
.compunits
import AllFunctionUnits
, LDSTFunctionUnit
37 from soc
.regfile
.regfiles
import RegFiles
38 from openpower
.decoder
.power_decoder2
import get_rdflags
39 from soc
.experiment
.l0_cache
import TstL0CacheBuffer
# test only
40 from soc
.config
.test
.test_loadstore
import TestMemPspec
41 from openpower
.decoder
.power_enums
import MicrOp
, Function
42 from soc
.simple
.core_data
import CoreInput
, CoreOutput
44 from collections
import defaultdict
, namedtuple
47 from nmutil
.util
import rising_edge
49 FUSpec
= namedtuple("FUSpec", ["funame", "fu", "idx"])
50 ByRegSpec
= namedtuple("ByRegSpec", ["okflag", "regport", "wid", "specs"])
52 # helper function for reducing a list of signals down to a parallel
54 def ortreereduce(tree
, attr
="o_data"):
55 return treereduce(tree
, operator
.or_
, lambda x
: getattr(x
, attr
))
58 def ortreereduce_sig(tree
):
59 return treereduce(tree
, operator
.or_
, lambda x
: x
)
62 # helper function to place full regs declarations first
63 def sort_fuspecs(fuspecs
):
65 for (regname
, fspec
) in fuspecs
.items():
66 if regname
.startswith("full"):
67 res
.append((regname
, fspec
))
68 for (regname
, fspec
) in fuspecs
.items():
69 if not regname
.startswith("full"):
70 res
.append((regname
, fspec
))
71 return res
# enumerate(res)
74 # a hazard bitvector "remap" function which returns an AST expression
75 # that remaps read/write hazard regfile port numbers to either a full
76 # bitvector or a reduced subset one. SPR for example is reduced to a
78 # CRITICALLY-IMPORTANT NOTE: these bitvectors *have* to match up per
79 # regfile! therefore the remapping is per regfile, *NOT* per regfile
80 # port and certainly not based on whether it is a read port or write port.
81 # note that any reductions here will result in degraded performance due
82 # to conflicts, but at least it keeps the hazard matrix sizes down to "sane"
83 def bitvector_remap(regfile
, rfile
, port
):
84 # 8-bits (at the moment, no SVP64), CR is unary: no remap
87 # 3 bits, unary alrady: return the port
90 # 3 bits, unary: return the port
93 # 3 bits, unary: return the port
94 if regfile
== 'SVSTATE':
96 # 9 bits (9 entries), might be unary already
98 if rfile
.unary
: # FAST might be unary already
102 # 10 bits (!!) - reduce to one
104 if rfile
.unary
: # FAST might be unary already
109 if rfile
.unary
: # INT, check if unary/binary
115 # derive from ControlBase rather than have a separate Stage instance,
116 # this is simpler to do
117 class NonProductionCore(ControlBase
):
118 def __init__(self
, pspec
):
121 # test is SVP64 is to be enabled
122 self
.svp64_en
= hasattr(pspec
, "svp64") and (pspec
.svp64
== True)
124 # test to see if regfile ports should be reduced
125 self
.regreduce_en
= (hasattr(pspec
, "regreduce") and
126 (pspec
.regreduce
== True))
128 # test to see if overlapping of instructions is allowed
129 # (not normally enabled for TestIssuer FSM but useful for checking
130 # the bitvector hazard detection, before doing In-Order)
131 self
.allow_overlap
= (hasattr(pspec
, "allow_overlap") and
132 (pspec
.allow_overlap
== True))
135 self
.make_hazard_vecs
= self
.allow_overlap
136 self
.core_type
= "fsm"
137 if hasattr(pspec
, "core_type"):
138 self
.core_type
= pspec
.core_type
140 super().__init
__(stage
=self
)
142 # single LD/ST funnel for memory access
143 self
.l0
= l0
= TstL0CacheBuffer(pspec
, n_units
=1)
146 # function units (only one each)
147 # only include mmu if enabled in pspec
148 self
.fus
= AllFunctionUnits(pspec
, pilist
=[pi
])
150 # link LoadStore1 into MMU
151 mmu
= self
.fus
.get_fu('mmu0')
152 ldst0
= self
.fus
.get_fu('ldst0')
153 print ("core pspec", pspec
.ldst_ifacetype
)
154 print ("core mmu", mmu
)
156 lsi
= l0
.cmpi
.lsmem
.lsi
# a LoadStore1 Interface object
157 print ("core lsmem.lsi", lsi
)
158 mmu
.alu
.set_ldst_interface(lsi
)
160 # register files (yes plural)
161 self
.regs
= RegFiles(pspec
, make_hazard_vecs
=self
.make_hazard_vecs
)
163 # set up input and output: unusual requirement to set data directly
164 # (due to the way that the core is set up in a different domain,
165 # see TestIssuer.setup_peripherals
166 self
.p
.i_data
, self
.n
.o_data
= self
.new_specs(None)
167 self
.i
, self
.o
= self
.p
.i_data
, self
.n
.o_data
169 # actual internal input data used (captured)
170 self
.ireg
= self
.ispec()
172 # create per-FU instruction decoders (subsetted). these "satellite"
173 # decoders reduce wire fan-out from the one (main) PowerDecoder2
174 # (used directly by the trap unit) to the *twelve* (or more)
175 # Function Units. we can either have 32 wires (the instruction)
176 # to each, or we can have well over a 200 wire fan-out (to 12
177 # ALUs). it's an easy choice to make.
181 # eep, these should be *per FU* i.e. for FunctionUnitBaseMulti
182 # they should be shared (put into the ALU *once*).
184 for funame
, fu
in self
.fus
.fus
.items():
185 f_name
= fu
.fnunit
.name
186 fnunit
= fu
.fnunit
.value
187 opkls
= fu
.opsubsetkls
189 # TRAP decoder is the *main* decoder
190 self
.trapunit
= funame
192 assert funame
not in self
.decoders
193 self
.decoders
[funame
] = PowerDecodeSubset(None, opkls
, f_name
,
195 state
=self
.ireg
.state
,
196 svp64_en
=self
.svp64_en
,
197 regreduce_en
=self
.regreduce_en
)
198 self
.des
[funame
] = self
.decoders
[funame
].do
200 # create per-Function Unit write-after-write hazard signals
201 # yes, really, this should have been added in ReservationStations
203 for funame
, fu
in self
.fus
.fus
.items():
204 fu
._waw
_hazard
= Signal(name
="waw_%s" % funame
)
206 # share the SPR decoder with the MMU if it exists
207 if "mmu0" in self
.decoders
:
208 self
.decoders
["mmu0"].mmu0_spr_dec
= self
.decoders
["spr0"]
210 # next 3 functions are Stage API Compliance
211 def setup(self
, m
, i
):
215 return CoreInput(self
.pspec
, self
.svp64_en
, self
.regreduce_en
)
220 # elaborate function to create HDL
221 def elaborate(self
, platform
):
222 m
= super().elaborate(platform
)
224 # for testing purposes, to cut down on build time in coriolis2
225 if hasattr(self
.pspec
, "nocore") and self
.pspec
.nocore
== True:
226 x
= Signal() # dummy signal
231 m
.submodules
.fus
= self
.fus
232 m
.submodules
.l0
= l0
= self
.l0
233 self
.regs
.elaborate_into(m
, platform
)
237 # amalgamate write-hazards into a single top-level Signal
238 self
.waw_hazard
= Signal()
240 for funame
, fu
in self
.fus
.fus
.items():
241 whaz
.append(fu
._waw
_hazard
)
242 comb
+= self
.waw_hazard
.eq(Cat(*whaz
).bool())
245 self
.connect_satellite_decoders(m
)
247 # ssh, cheat: trap uses the main decoder because of the rewriting
248 self
.des
[self
.trapunit
] = self
.ireg
.e
.do
250 # connect up Function Units, then read/write ports, and hazard conflict
251 self
.issue_conflict
= Signal()
252 fu_bitdict
, fu_selected
= self
.connect_instruction(m
)
253 raw_hazard
= self
.connect_rdports(m
, fu_bitdict
, fu_selected
)
254 self
.connect_wrports(m
, fu_bitdict
, fu_selected
)
255 if self
.allow_overlap
:
256 comb
+= self
.issue_conflict
.eq(raw_hazard
)
258 # note if an exception happened. in a pipelined or OoO design
259 # this needs to be accompanied by "shadowing" (or stalling)
261 for exc
in self
.fus
.excs
.values():
262 el
.append(exc
.happened
)
263 if len(el
) > 0: # at least one exception
264 comb
+= self
.o
.exc_happened
.eq(Cat(*el
).bool())
268 def connect_satellite_decoders(self
, m
):
270 for k
, v
in self
.decoders
.items():
271 # connect each satellite decoder and give it the instruction.
272 # as subset decoders this massively reduces wire fanout given
273 # the large number of ALUs
274 m
.submodules
["dec_%s" % k
] = v
275 comb
+= v
.dec
.raw_opcode_in
.eq(self
.ireg
.raw_insn_i
)
276 comb
+= v
.dec
.bigendian
.eq(self
.ireg
.bigendian_i
)
277 # sigh due to SVP64 RA_OR_ZERO detection connect these too
278 comb
+= v
.sv_a_nz
.eq(self
.ireg
.sv_a_nz
)
279 if not self
.svp64_en
:
281 comb
+= v
.pred_sm
.eq(self
.ireg
.sv_pred_sm
)
282 comb
+= v
.pred_dm
.eq(self
.ireg
.sv_pred_dm
)
283 if k
== self
.trapunit
:
285 comb
+= v
.sv_rm
.eq(self
.ireg
.sv_rm
) # pass through SVP64 RM
286 comb
+= v
.is_svp64_mode
.eq(self
.ireg
.is_svp64_mode
)
287 # only the LDST PowerDecodeSubset *actually* needs to
288 # know to use the alternative decoder. this is all
290 if not k
.lower().startswith("ldst"):
292 comb
+= v
.use_svp64_ldst_dec
.eq( self
.ireg
.use_svp64_ldst_dec
)
294 def connect_instruction(self
, m
):
295 """connect_instruction
297 uses decoded (from PowerOp) function unit information from CSV files
298 to ascertain which Function Unit should deal with the current
301 some (such as OP_ATTN, OP_NOP) are dealt with here, including
302 ignoring it and halting the processor. OP_NOP is a bit annoying
303 because the issuer expects busy flag still to be raised then lowered.
304 (this requires a fake counter to be set).
306 comb
, sync
= m
.d
.comb
, m
.d
.sync
309 # indicate if core is busy
310 busy_o
= self
.o
.busy_o
311 any_busy_o
= self
.o
.any_busy_o
313 # connect up temporary copy of incoming instruction. the FSM will
314 # either blat the incoming instruction (if valid) into self.ireg
315 # or if the instruction could not be delivered, keep dropping the
316 # latched copy into ireg
317 ilatch
= self
.ispec()
318 self
.instr_active
= Signal()
320 # enable/busy-signals for each FU, get one bit for each FU (by name)
321 fu_enable
= Signal(len(fus
), reset_less
=True)
322 fu_busy
= Signal(len(fus
), reset_less
=True)
325 for i
, funame
in enumerate(fus
.keys()):
326 fu_bitdict
[funame
] = fu_enable
[i
]
327 fu_selected
[funame
] = fu_busy
[i
]
329 # identify function units and create a list by fnunit so that
330 # PriorityPickers can be created for selecting one of them that
331 # isn't busy at the time the incoming instruction needs passing on
332 by_fnunit
= defaultdict(list)
333 for fname
, member
in Function
.__members
__.items():
334 for funame
, fu
in fus
.items():
335 fnunit
= fu
.fnunit
.value
336 if member
.value
& fnunit
: # this FU handles this type of op
337 by_fnunit
[fname
].append((funame
, fu
)) # add by Function
339 # ok now just print out the list of FUs by Function, because we can
340 for fname
, fu_list
in by_fnunit
.items():
341 print ("FUs by type", fname
, fu_list
)
343 # now create a PriorityPicker per FU-type such that only one
344 # non-busy FU will be picked
346 fu_found
= Signal() # take a note if no Function Unit was available
347 for fname
, fu_list
in by_fnunit
.items():
348 i_pp
= PriorityPicker(len(fu_list
))
349 m
.submodules
['i_pp_%s' % fname
] = i_pp
351 for i
, (funame
, fu
) in enumerate(fu_list
):
352 # match the decoded instruction (e.do.fn_unit) against the
353 # "capability" of this FU, gate that by whether that FU is
354 # busy, and drop that into the PriorityPicker.
355 # this will give us an output of the first available *non-busy*
356 # Function Unit (Reservation Statio) capable of handling this
358 fnunit
= fu
.fnunit
.value
359 en_req
= Signal(name
="issue_en_%s" % funame
, reset_less
=True)
360 fnmatch
= (self
.ireg
.e
.do
.fn_unit
& fnunit
).bool()
361 comb
+= en_req
.eq(fnmatch
& ~fu
.busy_o
&
363 i_l
.append(en_req
) # store in list for doing the Cat-trick
364 # picker output, gated by enable: store in fu_bitdict
365 po
= Signal(name
="o_issue_pick_"+funame
) # picker output
366 comb
+= po
.eq(i_pp
.o
[i
] & i_pp
.en_o
)
367 comb
+= fu_bitdict
[funame
].eq(po
)
368 comb
+= fu_selected
[funame
].eq(fu
.busy_o | po
)
369 # if we don't do this, then when there are no FUs available,
370 # the "p.o_ready" signal will go back "ok we accepted this
371 # instruction" which of course isn't true.
372 with m
.If(i_pp
.en_o
):
373 comb
+= fu_found
.eq(1)
374 # for each input, Cat them together and drop them into the picker
375 comb
+= i_pp
.i
.eq(Cat(*i_l
))
377 # rdmask, which is for registers needs to come from the *main* decoder
378 for funame
, fu
in fus
.items():
379 rdmask
= get_rdflags(m
, self
.ireg
.e
, fu
)
380 comb
+= fu
.rdmaskn
.eq(~rdmask
)
382 # sigh - need a NOP counter
384 with m
.If(counter
!= 0):
385 sync
+= counter
.eq(counter
- 1)
388 # default to reading from incoming instruction: may be overridden
389 # by copy from latch when "waiting"
390 comb
+= self
.ireg
.eq(self
.i
)
391 # always say "ready" except if overridden
392 comb
+= self
.p
.o_ready
.eq(1)
395 with m
.State("READY"):
396 with m
.If(self
.p
.i_valid
): # run only when valid
397 with m
.Switch(self
.ireg
.e
.do
.insn_type
):
398 # check for ATTN: halt if true
399 with m
.Case(MicrOp
.OP_ATTN
):
400 m
.d
.sync
+= self
.o
.core_terminate_o
.eq(1)
402 # fake NOP - this isn't really used (Issuer detects NOP)
403 with m
.Case(MicrOp
.OP_NOP
):
404 sync
+= counter
.eq(2)
408 comb
+= self
.instr_active
.eq(1)
409 comb
+= self
.p
.o_ready
.eq(0)
410 # connect instructions. only one enabled at a time
411 for funame
, fu
in fus
.items():
412 do
= self
.des
[funame
]
413 enable
= fu_bitdict
[funame
]
415 # run this FunctionUnit if enabled route op,
416 # issue, busy, read flags and mask to FU
418 # operand comes from the *local* decoder
419 # do not actually issue, though, if there
420 # is a waw hazard. decoder has to still
421 # be asserted in order to detect that, tho
422 comb
+= fu
.oper_i
.eq_from(do
)
423 # issue when valid (and no write-hazard)
424 comb
+= fu
.issue_i
.eq(~self
.waw_hazard
)
425 # instruction ok, indicate ready
426 comb
+= self
.p
.o_ready
.eq(1)
428 if self
.allow_overlap
:
429 with m
.If(~fu_found | self
.waw_hazard
):
430 # latch copy of instruction
431 sync
+= ilatch
.eq(self
.i
)
432 comb
+= self
.p
.o_ready
.eq(1) # accept
436 with m
.State("WAITING"):
437 comb
+= self
.instr_active
.eq(1)
438 comb
+= self
.p
.o_ready
.eq(0)
440 # using copy of instruction, keep waiting until an FU is free
441 comb
+= self
.ireg
.eq(ilatch
)
442 with m
.If(fu_found
): # wait for conflict to clear
443 # connect instructions. only one enabled at a time
444 for funame
, fu
in fus
.items():
445 do
= self
.des
[funame
]
446 enable
= fu_bitdict
[funame
]
448 # run this FunctionUnit if enabled route op,
449 # issue, busy, read flags and mask to FU
451 # operand comes from the *local* decoder,
452 # which is asserted even if not issued,
453 # so that WaW-detection can check for hazards.
454 # only if the waw hazard is clear does the
455 # instruction actually get issued
456 comb
+= fu
.oper_i
.eq_from(do
)
458 comb
+= fu
.issue_i
.eq(~self
.waw_hazard
)
459 with m
.If(~self
.waw_hazard
):
460 comb
+= self
.p
.o_ready
.eq(1)
464 print ("core: overlap allowed", self
.allow_overlap
)
465 # true when any FU is busy (including the cycle where it is perhaps
466 # to be issued - because that's what fu_busy is)
467 comb
+= any_busy_o
.eq(fu_busy
.bool())
468 if not self
.allow_overlap
:
469 # for simple non-overlap, if any instruction is busy, set
470 # busy output for core.
471 comb
+= busy_o
.eq(any_busy_o
)
473 # sigh deal with a fun situation that needs to be investigated
475 with m
.If(self
.issue_conflict
):
477 # make sure that LDST, SPR, MMU, Branch and Trap all say "busy"
478 # and do not allow overlap. these are all the ones that
479 # are non-forward-progressing: exceptions etc. that otherwise
480 # change CoreState for some reason (MSR, PC, SVSTATE)
481 for funame
, fu
in fus
.items():
482 if (funame
.lower().startswith('ldst') or
483 funame
.lower().startswith('branch') or
484 funame
.lower().startswith('mmu') or
485 funame
.lower().startswith('spr') or
486 funame
.lower().startswith('trap')):
487 with m
.If(fu
.busy_o
):
490 # return both the function unit "enable" dict as well as the "busy".
491 # the "busy-or-issued" can be passed in to the Read/Write port
492 # connecters to give them permission to request access to regfiles
493 return fu_bitdict
, fu_selected
495 def connect_rdport(self
, m
, fu_bitdict
, fu_selected
,
496 rdpickers
, regfile
, regname
, fspec
):
497 comb
, sync
= m
.d
.comb
, m
.d
.sync
503 # select the required read port. these are pre-defined sizes
504 rfile
= regs
.rf
[regfile
.lower()]
505 rport
= rfile
.r_ports
[rpidx
]
506 print("read regfile", rpidx
, regfile
, regs
.rf
.keys(),
509 # for checking if the read port has an outstanding write
510 if self
.make_hazard_vecs
:
511 wv
= regs
.wv
[regfile
.lower()]
512 wvchk
= wv
.q_int
# write-vec bit-level hazard check
514 # if a hazard is detected on this read port, simply blithely block
515 # every FU from reading on it. this is complete overkill but very
517 hazard_detected
= Signal(name
="raw_%s_%s" % (regfile
, rpidx
))
520 if not isinstance(fspecs
, list):
526 for i
, fspec
in enumerate(fspecs
):
527 # get the regfile specs for this regfile port
528 print ("fpsec", i
, fspec
, len(fspec
.specs
))
529 name
= "%s_%s_%d" % (regfile
, regname
, i
)
530 ppoffs
.append(pplen
) # record offset for picker
531 pplen
+= len(fspec
.specs
)
532 rdflag
= Signal(name
="rdflag_"+name
, reset_less
=True)
533 comb
+= rdflag
.eq(fspec
.okflag
)
534 rdflags
.append(rdflag
)
536 print ("pplen", pplen
)
538 # create a priority picker to manage this port
539 rdpickers
[regfile
][rpidx
] = rdpick
= PriorityPicker(pplen
)
540 m
.submodules
["rdpick_%s_%s" % (regfile
, rpidx
)] = rdpick
546 for i
, fspec
in enumerate(fspecs
):
547 (rf
, _read
, wid
, fuspecs
) = \
548 (fspec
.okflag
, fspec
.regport
, fspec
.wid
, fspec
.specs
)
549 # connect up the FU req/go signals, and the reg-read to the FU
550 # and create a Read Broadcast Bus
551 for pi
, fuspec
in enumerate(fspec
.specs
):
552 (funame
, fu
, idx
) = (fuspec
.funame
, fuspec
.fu
, fuspec
.idx
)
554 name
= "%s_%s_%s_%i" % (regfile
, rpidx
, funame
, pi
)
555 fu_active
= fu_selected
[funame
]
556 fu_issued
= fu_bitdict
[funame
]
558 # get (or set up) a latched copy of read register number
559 # and (sigh) also the read-ok flag
560 # TODO: use nmutil latchregister
561 rhname
= "%s_%s_%d" % (regfile
, regname
, i
)
562 rdflag
= Signal(name
="rdflag_%s_%s" % (funame
, rhname
),
564 if rhname
not in fu
.rf_latches
:
565 rfl
= Signal(name
="rdflag_latch_%s_%s" % (funame
, rhname
))
566 fu
.rf_latches
[rhname
] = rfl
567 with m
.If(fu
.issue_i
):
568 sync
+= rfl
.eq(rdflags
[i
])
570 rfl
= fu
.rf_latches
[rhname
]
572 # now the register port
573 rname
= "%s_%s_%s_%d" % (funame
, regfile
, regname
, pi
)
574 read
= Signal
.like(_read
, name
="read_"+rname
)
575 if rname
not in fu
.rd_latches
:
576 rdl
= Signal
.like(_read
, name
="rdlatch_"+rname
)
577 fu
.rd_latches
[rname
] = rdl
578 with m
.If(fu
.issue_i
):
579 sync
+= rdl
.eq(_read
)
581 rdl
= fu
.rd_latches
[rname
]
583 # make the read immediately available on issue cycle
584 # after the read cycle, otherwies use the latched copy.
585 # this captures the regport and okflag on issue
586 with m
.If(fu
.issue_i
):
587 comb
+= read
.eq(_read
)
588 comb
+= rdflag
.eq(rdflags
[i
])
591 comb
+= rdflag
.eq(rfl
)
593 # connect request-read to picker input, and output to go-rd
594 addr_en
= Signal
.like(read
, name
="addr_en_"+name
)
595 pick
= Signal(name
="pick_"+name
) # picker input
596 rp
= Signal(name
="rp_"+name
) # picker output
597 delay_pick
= Signal(name
="dp_"+name
) # read-enable "underway"
598 rhazard
= Signal(name
="rhaz_"+name
)
600 # exclude any currently-enabled read-request (mask out active)
601 # entirely block anything hazarded from being picked
602 comb
+= pick
.eq(fu
.rd_rel_o
[idx
] & fu_active
& rdflag
&
603 ~delay_pick
& ~rhazard
)
604 comb
+= rdpick
.i
[pi
].eq(pick
)
605 comb
+= fu
.go_rd_i
[idx
].eq(delay_pick
) # pass in *delayed* pick
607 # if picked, select read-port "reg select" number to port
608 comb
+= rp
.eq(rdpick
.o
[pi
] & rdpick
.en_o
)
609 sync
+= delay_pick
.eq(rp
) # delayed "pick"
610 comb
+= addr_en
.eq(Mux(rp
, read
, 0))
612 # the read-enable happens combinatorially (see mux-bus below)
613 # but it results in the data coming out on a one-cycle delay.
617 addrs
.append(addr_en
)
620 # use the *delayed* pick signal to put requested data onto bus
621 with m
.If(delay_pick
):
622 # connect regfile port to input, creating fan-out Bus
624 print("reg connect widths",
625 regfile
, regname
, pi
, funame
,
626 src
.shape(), rport
.o_data
.shape())
627 # all FUs connect to same port
628 comb
+= src
.eq(rport
.o_data
)
630 if not self
.make_hazard_vecs
:
633 # read the write-hazard bitvector (wv) for any bit that is
634 wvchk_en
= Signal(len(wvchk
), name
="wv_chk_addr_en_"+name
)
635 issue_active
= Signal(name
="rd_iactive_"+name
)
636 # XXX combinatorial loop here
637 comb
+= issue_active
.eq(fu_active
& rdflag
)
638 with m
.If(issue_active
):
640 comb
+= wvchk_en
.eq(read
)
642 comb
+= wvchk_en
.eq(1<<read
)
643 # if FU is busy (which doesn't get set at the same time as
644 # issue) and no hazard was detected, clear wvchk_en (i.e.
645 # stop checking for hazards). there is a loop here, but it's
646 # via a DFF, so is ok. some linters may complain, but hey.
647 with m
.If(fu
.busy_o
& ~rhazard
):
648 comb
+= wvchk_en
.eq(0)
650 # read-hazard is ANDed with (filtered by) what is actually
652 comb
+= rhazard
.eq((wvchk
& wvchk_en
).bool())
654 wvens
.append(wvchk_en
)
656 # or-reduce the muxed read signals
658 # for unary-addressed
659 comb
+= rport
.ren
.eq(ortreereduce_sig(rens
))
661 # for binary-addressed
662 comb
+= rport
.addr
.eq(ortreereduce_sig(addrs
))
663 comb
+= rport
.ren
.eq(Cat(*rens
).bool())
664 print ("binary", regfile
, rpidx
, rport
, rport
.ren
, rens
, addrs
)
666 if not self
.make_hazard_vecs
:
667 return Const(0) # declare "no hazards"
669 # enable the read bitvectors for this issued instruction
670 # and return whether any write-hazard bit is set
671 wvchk_and
= Signal(len(wvchk
), name
="wv_chk_"+name
)
672 comb
+= wvchk_and
.eq(wvchk
& ortreereduce_sig(wvens
))
673 comb
+= hazard_detected
.eq(wvchk_and
.bool())
674 return hazard_detected
676 def connect_rdports(self
, m
, fu_bitdict
, fu_selected
):
677 """connect read ports
679 orders the read regspecs into a dict-of-dicts, by regfile, by
680 regport name, then connects all FUs that want that regport by
681 way of a PriorityPicker.
683 comb
, sync
= m
.d
.comb
, m
.d
.sync
688 # dictionary of lists of regfile read ports
689 byregfiles_rdspec
= self
.get_byregfiles(m
, True)
691 # okaay, now we need a PriorityPicker per regfile per regfile port
692 # loootta pickers... peter piper picked a pack of pickled peppers...
694 for regfile
, fuspecs
in byregfiles_rdspec
.items():
695 rdpickers
[regfile
] = {}
697 # argh. an experiment to merge RA and RB in the INT regfile
698 # (we have too many read/write ports)
699 if self
.regreduce_en
:
701 fuspecs
['rabc'] = [fuspecs
.pop('rb')]
702 fuspecs
['rabc'].append(fuspecs
.pop('rc'))
703 fuspecs
['rabc'].append(fuspecs
.pop('ra'))
704 if regfile
== 'FAST':
705 fuspecs
['fast1'] = [fuspecs
.pop('fast1')]
706 if 'fast2' in fuspecs
:
707 fuspecs
['fast1'].append(fuspecs
.pop('fast2'))
708 if 'fast3' in fuspecs
:
709 fuspecs
['fast1'].append(fuspecs
.pop('fast3'))
711 # for each named regfile port, connect up all FUs to that port
712 # also return (and collate) hazard detection)
713 for (regname
, fspec
) in sort_fuspecs(fuspecs
):
714 print("connect rd", regname
, fspec
)
715 rh
= self
.connect_rdport(m
, fu_bitdict
, fu_selected
,
720 return Cat(*rd_hazard
).bool()
722 def make_hazards(self
, m
, regfile
, rfile
, wvclr
, wvset
,
723 funame
, regname
, idx
,
724 addr_en
, wp
, fu
, fu_active
, wrflag
, write
,
726 """make_hazards: a setter and a clearer for the regfile write ports
728 setter is at issue time (using PowerDecoder2 regfile write numbers)
729 clearer is at regfile write time (when FU has said what to write to)
731 there is *one* unusual case here which has to be dealt with:
732 when the Function Unit does *NOT* request a write to the regfile
733 (has its data.ok bit CLEARED). this is perfectly legitimate.
736 comb
, sync
= m
.d
.comb
, m
.d
.sync
737 name
= "%s_%s_%d" % (funame
, regname
, idx
)
739 # connect up the bitvector write hazard. unlike the
740 # regfile writeports, a ONE must be written to the corresponding
741 # bit of the hazard bitvector (to indicate the existence of
744 # the detection of what shall be written to is based
745 # on *issue*. it is delayed by 1 cycle so that instructions
746 # "addi 5,5,0x2" do not cause combinatorial loops due to
747 # fake-dependency on *themselves*. this will totally fail
748 # spectacularly when doing multi-issue
749 print ("write vector (for regread)", regfile
, wvset
)
750 wviaddr_en
= Signal(len(wvset
), name
="wv_issue_addr_en_"+name
)
751 issue_active
= Signal(name
="iactive_"+name
)
752 sync
+= issue_active
.eq(fu
.issue_i
& fu_active
& wrflag
)
753 with m
.If(issue_active
):
755 comb
+= wviaddr_en
.eq(write
)
757 comb
+= wviaddr_en
.eq(1<<write
)
759 # deal with write vector clear: this kicks in when the regfile
760 # is written to, and clears the corresponding bitvector entry
761 print ("write vector", regfile
, wvclr
)
762 wvaddr_en
= Signal(len(wvclr
), name
="wvaddr_en_"+name
)
764 comb
+= wvaddr_en
.eq(addr_en
)
767 comb
+= wvaddr_en
.eq(1<<addr_en
)
769 # XXX ASSUME that LDSTFunctionUnit always sets the data it intends to
770 # this may NOT be the case when an exception occurs
771 if isinstance(fu
, LDSTFunctionUnit
):
772 return wvaddr_en
, wviaddr_en
774 # okaaay, this is preparation for the awkward case.
775 # * latch a copy of wrflag when issue goes high.
776 # * when the fu_wrok (data.ok) flag is NOT set,
777 # but the FU is done, the FU is NEVER going to write
778 # so the bitvector has to be cleared.
779 latch_wrflag
= Signal(name
="latch_wrflag_"+name
)
780 with m
.If(~fu
.busy_o
):
781 sync
+= latch_wrflag
.eq(0)
782 with m
.If(fu
.issue_i
& fu_active
):
783 sync
+= latch_wrflag
.eq(wrflag
)
784 with m
.If(fu
.alu_done_o
& latch_wrflag
& ~fu_wrok
):
786 comb
+= wvaddr_en
.eq(write
) # addr_en gated with wp, don't use
788 comb
+= wvaddr_en
.eq(1<<addr_en
) # binary addr_en not gated
790 return wvaddr_en
, wviaddr_en
792 def connect_wrport(self
, m
, fu_bitdict
, fu_selected
,
793 wrpickers
, regfile
, regname
, fspec
):
794 comb
, sync
= m
.d
.comb
, m
.d
.sync
800 # select the required write port. these are pre-defined sizes
801 rfile
= regs
.rf
[regfile
.lower()]
802 wport
= rfile
.w_ports
[rpidx
]
804 print("connect wr", regname
, "unary", rfile
.unary
, fspec
)
805 print(regfile
, regs
.rf
.keys())
807 # select the write-protection hazard vector. note that this still
808 # requires to WRITE to the hazard bitvector! read-requests need
809 # to RAISE the bitvector (set it to 1), which, duh, requires a WRITE
810 if self
.make_hazard_vecs
:
811 wv
= regs
.wv
[regfile
.lower()]
812 wvset
= wv
.s
# write-vec bit-level hazard ctrl
813 wvclr
= wv
.r
# write-vec bit-level hazard ctrl
814 wvchk
= wv
.q
# write-after-write hazard check
817 if not isinstance(fspecs
, list):
824 for i
, fspec
in enumerate(fspecs
):
825 # get the regfile specs for this regfile port
826 (wf
, _write
, wid
, fuspecs
) = \
827 (fspec
.okflag
, fspec
.regport
, fspec
.wid
, fspec
.specs
)
828 print ("fpsec", i
, "wrflag", wf
, fspec
, len(fuspecs
))
829 ppoffs
.append(pplen
) # record offset for picker
830 pplen
+= len(fuspecs
)
832 name
= "%s_%s_%d" % (regfile
, regname
, i
)
833 wrflag
= Signal(name
="wr_flag_"+name
)
835 comb
+= wrflag
.eq(wf
)
838 wrflags
.append(wrflag
)
840 # create a priority picker to manage this port
841 wrpickers
[regfile
][rpidx
] = wrpick
= PriorityPicker(pplen
)
842 m
.submodules
["wrpick_%s_%s" % (regfile
, rpidx
)] = wrpick
849 #wvens = [] - not needed: reading of writevec is permanently held hi
851 for i
, fspec
in enumerate(fspecs
):
852 # connect up the FU req/go signals and the reg-read to the FU
853 # these are arbitrated by Data.ok signals
854 (wf
, _write
, wid
, fuspecs
) = \
855 (fspec
.okflag
, fspec
.regport
, fspec
.wid
, fspec
.specs
)
856 for pi
, fuspec
in enumerate(fspec
.specs
):
857 (funame
, fu
, idx
) = (fuspec
.funame
, fuspec
.fu
, fuspec
.idx
)
858 fu_requested
= fu_bitdict
[funame
]
860 name
= "%s_%s_%s_%d" % (funame
, regfile
, regname
, idx
)
861 # get (or set up) a write-latched copy of write register number
862 write
= Signal
.like(_write
, name
="write_"+name
)
863 rname
= "%s_%s_%s_%d" % (funame
, regfile
, regname
, idx
)
864 if rname
not in fu
.wr_latches
:
865 wrl
= Signal
.like(_write
, name
="wrlatch_"+rname
)
866 fu
.wr_latches
[rname
] = write
867 # do not depend on fu.issue_i here, it creates a
868 # combinatorial loop on waw checking. using the FU
869 # "enable" bitdict entry for this FU is sufficient,
870 # because the PowerDecoder2 read/write nums are
871 # valid continuously when the instruction is valid
872 with m
.If(fu_requested
):
873 sync
+= wrl
.eq(_write
)
874 comb
+= write
.eq(_write
)
876 comb
+= write
.eq(wrl
)
878 write
= fu
.wr_latches
[rname
]
880 # write-request comes from dest.ok
881 dest
= fu
.get_out(idx
)
882 fu_dest_latch
= fu
.get_fu_out(idx
) # latched output
883 name
= "%s_%s_%d" % (funame
, regname
, idx
)
884 fu_wrok
= Signal(name
="fu_wrok_"+name
, reset_less
=True)
885 comb
+= fu_wrok
.eq(dest
.ok
& fu
.busy_o
)
887 # connect request-write to picker input, and output to go-wr
888 fu_active
= fu_selected
[funame
]
889 pick
= fu
.wr
.rel_o
[idx
] & fu_active
890 comb
+= wrpick
.i
[pi
].eq(pick
)
891 # create a single-pulse go write from the picker output
892 wr_pick
= Signal(name
="wpick_%s_%s_%d" % (funame
, regname
, idx
))
893 comb
+= wr_pick
.eq(wrpick
.o
[pi
] & wrpick
.en_o
)
894 comb
+= fu
.go_wr_i
[idx
].eq(rising_edge(m
, wr_pick
))
896 # connect the regspec write "reg select" number to this port
897 # only if one FU actually requests (and is granted) the port
898 # will the write-enable be activated
899 wname
= "waddr_en_%s_%s_%d" % (funame
, regname
, idx
)
900 addr_en
= Signal
.like(write
, name
=wname
)
902 comb
+= wp
.eq(wr_pick
& wrpick
.en_o
)
903 comb
+= addr_en
.eq(Mux(wp
, write
, 0))
907 addrs
.append(addr_en
)
910 # connect regfile port to input
911 print("reg connect widths",
912 regfile
, regname
, pi
, funame
,
913 dest
.shape(), wport
.i_data
.shape())
914 wsigs
.append(fu_dest_latch
)
916 # now connect up the bitvector write hazard
917 if not self
.make_hazard_vecs
:
919 res
= self
.make_hazards(m
, regfile
, rfile
, wvclr
, wvset
,
920 funame
, regname
, idx
,
921 addr_en
, wp
, fu
, fu_active
,
922 wrflags
[i
], write
, fu_wrok
)
923 wvaddr_en
, wv_issue_en
= res
924 wvclren
.append(wvaddr_en
) # set only: no data => clear bit
925 wvseten
.append(wv_issue_en
) # set data same as enable
927 # read the write-hazard bitvector (wv) for any bit that is
928 fu_requested
= fu_bitdict
[funame
]
929 wvchk_en
= Signal(len(wvchk
), name
="waw_chk_addr_en_"+name
)
930 issue_active
= Signal(name
="waw_iactive_"+name
)
931 whazard
= Signal(name
="whaz_"+name
)
933 # XXX EEK! STATE regfile (branch) does not have an
934 # write-active indicator in regspec_decode_write()
935 print ("XXX FIXME waw_iactive", issue_active
,
938 # check bits from the incoming instruction. note (back
939 # in connect_instruction) that the decoder is held for
940 # us to be able to do this, here... *without* issue being
941 # held HI. we MUST NOT gate this with fu.issue_i or
942 # with fu_bitdict "enable": it would create a loop
943 comb
+= issue_active
.eq(wf
)
944 with m
.If(issue_active
):
946 comb
+= wvchk_en
.eq(write
)
948 comb
+= wvchk_en
.eq(1<<write
)
949 # if FU is busy (which doesn't get set at the same time as
950 # issue) and no hazard was detected, clear wvchk_en (i.e.
951 # stop checking for hazards). there is a loop here, but it's
952 # via a DFF, so is ok. some linters may complain, but hey.
953 with m
.If(fu
.busy_o
& ~whazard
):
954 comb
+= wvchk_en
.eq(0)
956 # write-hazard is ANDed with (filtered by) what is actually
957 # being requested. the wvchk data is on a one-clock delay,
958 # and wvchk_en comes directly from the main decoder
959 comb
+= whazard
.eq((wvchk
& wvchk_en
).bool())
961 comb
+= fu
._waw
_hazard
.eq(1)
963 #wvens.append(wvchk_en)
965 # here is where we create the Write Broadcast Bus. simple, eh?
966 comb
+= wport
.i_data
.eq(ortreereduce_sig(wsigs
))
968 # for unary-addressed
969 comb
+= wport
.wen
.eq(ortreereduce_sig(wens
))
971 # for binary-addressed
972 comb
+= wport
.addr
.eq(ortreereduce_sig(addrs
))
973 comb
+= wport
.wen
.eq(ortreereduce_sig(wens
))
975 if not self
.make_hazard_vecs
:
978 # return these here rather than set wvclr/wvset directly,
979 # because there may be more than one write-port to a given
980 # regfile. example: XER has a write-port for SO, CA, and OV
981 # and the *last one added* of those would overwrite the other
982 # two. solution: have connect_wrports collate all the
983 # or-tree-reduced bitvector set/clear requests and drop them
984 # in as a single "thing". this can only be done because the
985 # set/get is an unary bitvector.
986 print ("make write-vecs", regfile
, regname
, wvset
, wvclr
)
987 return (wvclren
, # clear (regfile write)
988 wvseten
) # set (issue time)
990 def connect_wrports(self
, m
, fu_bitdict
, fu_selected
):
991 """connect write ports
993 orders the write regspecs into a dict-of-dicts, by regfile,
994 by regport name, then connects all FUs that want that regport
995 by way of a PriorityPicker.
997 note that the write-port wen, write-port data, and go_wr_i all need to
998 be on the exact same clock cycle. as there is a combinatorial loop bug
999 at the moment, these all use sync.
1001 comb
, sync
= m
.d
.comb
, m
.d
.sync
1004 # dictionary of lists of regfile write ports
1005 byregfiles_wrspec
= self
.get_byregfiles(m
, False)
1007 # same for write ports.
1008 # BLECH! complex code-duplication! BLECH!
1010 wvclrers
= defaultdict(list)
1011 wvseters
= defaultdict(list)
1012 for regfile
, fuspecs
in byregfiles_wrspec
.items():
1013 wrpickers
[regfile
] = {}
1015 if self
.regreduce_en
:
1016 # argh, more port-merging
1017 if regfile
== 'INT':
1018 fuspecs
['o'] = [fuspecs
.pop('o')]
1019 fuspecs
['o'].append(fuspecs
.pop('o1'))
1020 if regfile
== 'FAST':
1021 fuspecs
['fast1'] = [fuspecs
.pop('fast1')]
1022 if 'fast2' in fuspecs
:
1023 fuspecs
['fast1'].append(fuspecs
.pop('fast2'))
1024 if 'fast3' in fuspecs
:
1025 fuspecs
['fast1'].append(fuspecs
.pop('fast3'))
1027 # collate these and record them by regfile because there
1028 # are sometimes more write-ports per regfile
1029 for (regname
, fspec
) in sort_fuspecs(fuspecs
):
1030 wvclren
, wvseten
= self
.connect_wrport(m
,
1031 fu_bitdict
, fu_selected
,
1033 regfile
, regname
, fspec
)
1034 wvclrers
[regfile
.lower()] += wvclren
1035 wvseters
[regfile
.lower()] += wvseten
1037 if not self
.make_hazard_vecs
:
1040 # for write-vectors: reduce the clr-ers and set-ers down to
1041 # a single set of bits. otherwise if there are two write
1042 # ports (on some regfiles), the last one doing comb += on
1043 # the reg.wv[regfile] instance "wins" (and all others are ignored,
1044 # whoops). if there was only one write-port per wv regfile this would
1046 for regfile
in wvclrers
.keys():
1047 wv
= regs
.wv
[regfile
]
1048 wvset
= wv
.s
# write-vec bit-level hazard ctrl
1049 wvclr
= wv
.r
# write-vec bit-level hazard ctrl
1050 wvclren
= wvclrers
[regfile
]
1051 wvseten
= wvseters
[regfile
]
1052 comb
+= wvclr
.eq(ortreereduce_sig(wvclren
)) # clear (regfile write)
1053 comb
+= wvset
.eq(ortreereduce_sig(wvseten
)) # set (issue time)
1055 def get_byregfiles(self
, m
, readmode
):
1057 mode
= "read" if readmode
else "write"
1060 e
= self
.ireg
.e
# decoded instruction to execute
1062 # dictionary of dictionaries of lists/tuples of regfile ports.
1063 # first key: regfile. second key: regfile port name
1064 byregfiles_spec
= defaultdict(dict)
1066 for (funame
, fu
) in fus
.items():
1067 # create in each FU a receptacle for the read/write register
1068 # hazard numbers (and okflags for read). to be latched in
1069 # connect_rd/write_ports
1071 fu
.rd_latches
= {} # read reg number latches
1072 fu
.rf_latches
= {} # read flag latches
1076 # construct regfile specs: read uses inspec, write outspec
1077 print("%s ports for %s" % (mode
, funame
))
1078 for idx
in range(fu
.n_src
if readmode
else fu
.n_dst
):
1079 (regfile
, regname
, wid
) = fu
.get_io_spec(readmode
, idx
)
1080 print(" %d %s %s %s" % (idx
, regfile
, regname
, str(wid
)))
1082 # the PowerDecoder2 (main one, not the satellites) contains
1083 # the decoded regfile numbers. obtain these now
1084 decinfo
= regspec_decode(m
, readmode
, e
, regfile
, regname
)
1085 okflag
, regport
= decinfo
.okflag
, decinfo
.regport
1087 # construct the dictionary of regspec information by regfile
1088 if regname
not in byregfiles_spec
[regfile
]:
1089 byregfiles_spec
[regfile
][regname
] = \
1090 ByRegSpec(okflag
, regport
, wid
, [])
1092 # here we start to create "lanes" where each Function Unit
1093 # requiring access to a given [single-contended resource]
1094 # regfile port is appended to a list, so that PriorityPickers
1095 # can be created to give uncontested access to it
1096 fuspec
= FUSpec(funame
, fu
, idx
)
1097 byregfiles_spec
[regfile
][regname
].specs
.append(fuspec
)
1099 # ok just print that all out, for convenience
1100 for regfile
, fuspecs
in byregfiles_spec
.items():
1101 print("regfile %s ports:" % mode
, regfile
)
1102 for regname
, fspec
in fuspecs
.items():
1103 [okflag
, regport
, wid
, fuspecs
] = fspec
1104 print(" rf %s port %s lane: %s" % (mode
, regfile
, regname
))
1105 print(" %s" % regname
, wid
, okflag
, regport
)
1106 for (funame
, fu
, idx
) in fuspecs
:
1107 fusig
= fu
.src_i
[idx
] if readmode
else fu
.dest
[idx
]
1108 print(" ", funame
, fu
.__class
__.__name
__, idx
, fusig
)
1111 return byregfiles_spec
1114 yield from self
.fus
.ports()
1115 yield from self
.i
.e
.ports()
1116 yield from self
.l0
.ports()
1123 if __name__
== '__main__':
1124 pspec
= TestMemPspec(ldst_ifacetype
='testpi',
1130 dut
= NonProductionCore(pspec
)
1131 vl
= rtlil
.convert(dut
, ports
=dut
.ports())
1132 with
open("test_core.il", "w") as f
: