3 not in any way intended for production use. connects up FunctionUnits to
4 Register Files in a brain-dead fashion that only permits one and only one
5 Function Unit to be operational.
7 the principle here is to take the Function Units, analyse their regspecs,
8 and turn their requirements for access to register file read/write ports
9 into groupings by Register File and Register File Port name.
11 under each grouping - by regfile/port - a list of Function Units that
12 need to connect to that port is created. as these are a contended
13 resource a "Broadcast Bus" per read/write port is then also created,
14 with access to it managed by a PriorityPicker.
16 the brain-dead part of this module is that even though there is no
17 conflict of access, regfile read/write hazards are *not* analysed,
18 and consequently it is safer to wait for the Function Unit to complete
19 before allowing a new instruction to proceed.
20 (update: actually this is being added now:
21 https://bugs.libre-soc.org/show_bug.cgi?id=737)
24 from nmigen
import (Elaboratable
, Module
, Signal
, ResetSignal
, Cat
, Mux
,
26 from nmigen
.cli
import rtlil
28 from openpower
.decoder
.power_decoder2
import PowerDecodeSubset
29 from openpower
.decoder
.power_regspec_map
import regspec_decode
30 from openpower
.sv
.svp64
import SVP64Rec
32 from nmutil
.picker
import PriorityPicker
33 from nmutil
.util
import treereduce
34 from nmutil
.singlepipe
import ControlBase
36 from soc
.fu
.compunits
.compunits
import AllFunctionUnits
, LDSTFunctionUnit
37 from soc
.regfile
.regfiles
import RegFiles
38 from openpower
.decoder
.power_decoder2
import get_rdflags
39 from soc
.experiment
.l0_cache
import TstL0CacheBuffer
# test only
40 from soc
.config
.test
.test_loadstore
import TestMemPspec
41 from openpower
.decoder
.power_enums
import MicrOp
, Function
42 from soc
.simple
.core_data
import CoreInput
, CoreOutput
44 from collections
import defaultdict
, namedtuple
47 from nmutil
.util
import rising_edge
49 FUSpec
= namedtuple("FUSpec", ["funame", "fu", "idx"])
50 ByRegSpec
= namedtuple("ByRegSpec", ["okflag", "regport", "wid", "specs"])
52 # helper function for reducing a list of signals down to a parallel
54 def ortreereduce(tree
, attr
="o_data"):
55 return treereduce(tree
, operator
.or_
, lambda x
: getattr(x
, attr
))
58 def ortreereduce_sig(tree
):
59 return treereduce(tree
, operator
.or_
, lambda x
: x
)
62 # helper function to place full regs declarations first
63 def sort_fuspecs(fuspecs
):
65 for (regname
, fspec
) in fuspecs
.items():
66 if regname
.startswith("full"):
67 res
.append((regname
, fspec
))
68 for (regname
, fspec
) in fuspecs
.items():
69 if not regname
.startswith("full"):
70 res
.append((regname
, fspec
))
71 return res
# enumerate(res)
74 # a hazard bitvector "remap" function which returns an AST expression
75 # that remaps read/write hazard regfile port numbers to either a full
76 # bitvector or a reduced subset one. SPR for example is reduced to a
78 # CRITICALLY-IMPORTANT NOTE: these bitvectors *have* to match up per
79 # regfile! therefore the remapping is per regfile, *NOT* per regfile
80 # port and certainly not based on whether it is a read port or write port.
81 # note that any reductions here will result in degraded performance due
82 # to conflicts, but at least it keeps the hazard matrix sizes down to "sane"
83 def bitvector_remap(regfile
, rfile
, port
):
84 # 8-bits (at the moment, no SVP64), CR is unary: no remap
87 # 3 bits, unary alrady: return the port
90 # 3 bits, unary: return the port
93 # 3 bits, unary: return the port
94 if regfile
== 'SVSTATE':
96 # 9 bits (9 entries), might be unary already
98 if rfile
.unary
: # FAST might be unary already
102 # 10 bits (!!) - reduce to one
104 if rfile
.unary
: # FAST might be unary already
109 if rfile
.unary
: # INT, check if unary/binary
115 # derive from ControlBase rather than have a separate Stage instance,
116 # this is simpler to do
117 class NonProductionCore(ControlBase
):
118 def __init__(self
, pspec
):
121 # test is SVP64 is to be enabled
122 self
.svp64_en
= hasattr(pspec
, "svp64") and (pspec
.svp64
== True)
124 # test to see if regfile ports should be reduced
125 self
.regreduce_en
= (hasattr(pspec
, "regreduce") and
126 (pspec
.regreduce
== True))
128 # test to see if overlapping of instructions is allowed
129 # (not normally enabled for TestIssuer FSM but useful for checking
130 # the bitvector hazard detection, before doing In-Order)
131 self
.allow_overlap
= (hasattr(pspec
, "allow_overlap") and
132 (pspec
.allow_overlap
== True))
135 self
.make_hazard_vecs
= self
.allow_overlap
136 self
.core_type
= "fsm"
137 if hasattr(pspec
, "core_type"):
138 self
.core_type
= pspec
.core_type
140 super().__init
__(stage
=self
)
142 # single LD/ST funnel for memory access
143 self
.l0
= l0
= TstL0CacheBuffer(pspec
, n_units
=1)
146 # function units (only one each)
147 # only include mmu if enabled in pspec
148 self
.fus
= AllFunctionUnits(pspec
, pilist
=[pi
])
150 # link LoadStore1 into MMU
151 mmu
= self
.fus
.get_fu('mmu0')
152 ldst0
= self
.fus
.get_fu('ldst0')
153 print ("core pspec", pspec
.ldst_ifacetype
)
154 print ("core mmu", mmu
)
156 lsi
= l0
.cmpi
.lsmem
.lsi
# a LoadStore1 Interface object
157 print ("core lsmem.lsi", lsi
)
158 mmu
.alu
.set_ldst_interface(lsi
)
159 # urr store I-Cache in core so it is easier to get at
160 self
.icache
= lsi
.icache
162 # alternative reset values for STATE regs
163 self
.msr_at_reset
= 0x0
164 if hasattr(pspec
, "msr_reset") and isinstance(pspec
.msr_reset
, int):
165 self
.msr_at_reset
= pspec
.msr_reset
166 state_resets
= [0x0, # PC at reset
167 self
.msr_at_reset
, # MSR at reset
168 0x0, # SVSTATE at reset
172 # register files (yes plural)
173 self
.regs
= RegFiles(pspec
, make_hazard_vecs
=self
.make_hazard_vecs
,
174 state_resets
=state_resets
)
176 # set up input and output: unusual requirement to set data directly
177 # (due to the way that the core is set up in a different domain,
178 # see TestIssuer.setup_peripherals
179 self
.p
.i_data
, self
.n
.o_data
= self
.new_specs(None)
180 self
.i
, self
.o
= self
.p
.i_data
, self
.n
.o_data
182 # actual internal input data used (captured)
183 self
.ireg
= self
.ispec()
185 # create per-FU instruction decoders (subsetted). these "satellite"
186 # decoders reduce wire fan-out from the one (main) PowerDecoder2
187 # (used directly by the trap unit) to the *twelve* (or more)
188 # Function Units. we can either have 32 wires (the instruction)
189 # to each, or we can have well over a 200 wire fan-out (to 12
190 # ALUs). it's an easy choice to make.
194 # eep, these should be *per FU* i.e. for FunctionUnitBaseMulti
195 # they should be shared (put into the ALU *once*).
197 for funame
, fu
in self
.fus
.fus
.items():
198 f_name
= fu
.fnunit
.name
199 fnunit
= fu
.fnunit
.value
200 opkls
= fu
.opsubsetkls
202 # TRAP decoder is the *main* decoder
203 self
.trapunit
= funame
205 assert funame
not in self
.decoders
206 self
.decoders
[funame
] = PowerDecodeSubset(None, opkls
, f_name
,
208 state
=self
.ireg
.state
,
209 svp64_en
=self
.svp64_en
,
210 regreduce_en
=self
.regreduce_en
)
211 self
.des
[funame
] = self
.decoders
[funame
].do
212 print ("create decoder subset", funame
, opkls
, self
.des
[funame
])
214 # create per-Function Unit write-after-write hazard signals
215 # yes, really, this should have been added in ReservationStations
217 for funame
, fu
in self
.fus
.fus
.items():
218 fu
._waw
_hazard
= Signal(name
="waw_%s" % funame
)
220 # share the SPR decoder with the MMU if it exists
221 if "mmu0" in self
.decoders
:
222 self
.decoders
["mmu0"].mmu0_spr_dec
= self
.decoders
["spr0"]
224 # allow pausing of the DEC/TB FSM back in Issuer, by spotting
225 # if there is an MTSPR instruction
226 self
.pause_dec_tb
= Signal()
228 # next 3 functions are Stage API Compliance
229 def setup(self
, m
, i
):
233 return CoreInput(self
.pspec
, self
.svp64_en
, self
.regreduce_en
)
238 # elaborate function to create HDL
239 def elaborate(self
, platform
):
240 m
= super().elaborate(platform
)
242 # for testing purposes, to cut down on build time in coriolis2
243 if hasattr(self
.pspec
, "nocore") and self
.pspec
.nocore
== True:
244 x
= Signal() # dummy signal
249 m
.submodules
.fus
= self
.fus
250 m
.submodules
.l0
= l0
= self
.l0
251 self
.regs
.elaborate_into(m
, platform
)
255 # amalgamate write-hazards into a single top-level Signal
256 self
.waw_hazard
= Signal()
258 for funame
, fu
in self
.fus
.fus
.items():
259 whaz
.append(fu
._waw
_hazard
)
260 comb
+= self
.waw_hazard
.eq(Cat(*whaz
).bool())
263 self
.connect_satellite_decoders(m
)
265 # ssh, cheat: trap uses the main decoder because of the rewriting
266 self
.des
[self
.trapunit
] = self
.ireg
.e
.do
268 # connect up Function Units, then read/write ports, and hazard conflict
269 self
.issue_conflict
= Signal()
270 fu_bitdict
, fu_selected
= self
.connect_instruction(m
)
271 raw_hazard
= self
.connect_rdports(m
, fu_bitdict
, fu_selected
)
272 self
.connect_wrports(m
, fu_bitdict
, fu_selected
)
273 if self
.allow_overlap
:
274 comb
+= self
.issue_conflict
.eq(raw_hazard
)
276 # note if an exception happened. in a pipelined or OoO design
277 # this needs to be accompanied by "shadowing" (or stalling)
279 for exc
in self
.fus
.excs
.values():
280 el
.append(exc
.happened
)
281 if len(el
) > 0: # at least one exception
282 comb
+= self
.o
.exc_happened
.eq(Cat(*el
).bool())
286 def connect_satellite_decoders(self
, m
):
288 for k
, v
in self
.decoders
.items():
289 # connect each satellite decoder and give it the instruction.
290 # as subset decoders this massively reduces wire fanout given
291 # the large number of ALUs
292 m
.submodules
["dec_%s" % k
] = v
293 comb
+= v
.dec
.raw_opcode_in
.eq(self
.ireg
.raw_insn_i
)
294 comb
+= v
.dec
.bigendian
.eq(self
.ireg
.bigendian_i
)
295 # sigh due to SVP64 RA_OR_ZERO detection connect these too
296 comb
+= v
.sv_a_nz
.eq(self
.ireg
.sv_a_nz
)
297 if not self
.svp64_en
:
299 comb
+= v
.pred_sm
.eq(self
.ireg
.sv_pred_sm
)
300 comb
+= v
.pred_dm
.eq(self
.ireg
.sv_pred_dm
)
301 if k
== self
.trapunit
:
303 comb
+= v
.sv_rm
.eq(self
.ireg
.sv_rm
) # pass through SVP64 RM
304 comb
+= v
.is_svp64_mode
.eq(self
.ireg
.is_svp64_mode
)
305 # only the LDST PowerDecodeSubset *actually* needs to
306 # know to use the alternative decoder. this is all
308 if not k
.lower().startswith("ldst"):
310 comb
+= v
.use_svp64_ldst_dec
.eq( self
.ireg
.use_svp64_ldst_dec
)
312 def connect_instruction(self
, m
):
313 """connect_instruction
315 uses decoded (from PowerOp) function unit information from CSV files
316 to ascertain which Function Unit should deal with the current
319 some (such as OP_ATTN, OP_NOP) are dealt with here, including
320 ignoring it and halting the processor. OP_NOP is a bit annoying
321 because the issuer expects busy flag still to be raised then lowered.
322 (this requires a fake counter to be set).
324 comb
, sync
= m
.d
.comb
, m
.d
.sync
327 # indicate if core is busy
328 busy_o
= self
.o
.busy_o
329 any_busy_o
= self
.o
.any_busy_o
331 # connect up temporary copy of incoming instruction. the FSM will
332 # either blat the incoming instruction (if valid) into self.ireg
333 # or if the instruction could not be delivered, keep dropping the
334 # latched copy into ireg
335 ilatch
= self
.ispec()
336 self
.instr_active
= Signal()
338 # enable/busy-signals for each FU, get one bit for each FU (by name)
339 fu_enable
= Signal(len(fus
), reset_less
=True)
340 fu_busy
= Signal(len(fus
), reset_less
=True)
343 for i
, funame
in enumerate(fus
.keys()):
344 fu_bitdict
[funame
] = fu_enable
[i
]
345 fu_selected
[funame
] = fu_busy
[i
]
347 # identify function units and create a list by fnunit so that
348 # PriorityPickers can be created for selecting one of them that
349 # isn't busy at the time the incoming instruction needs passing on
350 by_fnunit
= defaultdict(list)
351 for fname
, member
in Function
.__members
__.items():
352 for funame
, fu
in fus
.items():
353 fnunit
= fu
.fnunit
.value
354 if member
.value
& fnunit
: # this FU handles this type of op
355 by_fnunit
[fname
].append((funame
, fu
)) # add by Function
357 # ok now just print out the list of FUs by Function, because we can
358 for fname
, fu_list
in by_fnunit
.items():
359 print ("FUs by type", fname
, fu_list
)
361 # now create a PriorityPicker per FU-type such that only one
362 # non-busy FU will be picked
364 fu_found
= Signal() # take a note if no Function Unit was available
365 for fname
, fu_list
in by_fnunit
.items():
366 i_pp
= PriorityPicker(len(fu_list
))
367 m
.submodules
['i_pp_%s' % fname
] = i_pp
369 for i
, (funame
, fu
) in enumerate(fu_list
):
370 # match the decoded instruction (e.do.fn_unit) against the
371 # "capability" of this FU, gate that by whether that FU is
372 # busy, and drop that into the PriorityPicker.
373 # this will give us an output of the first available *non-busy*
374 # Function Unit (Reservation Statio) capable of handling this
376 fnunit
= fu
.fnunit
.value
377 en_req
= Signal(name
="issue_en_%s" % funame
, reset_less
=True)
378 fnmatch
= (self
.ireg
.e
.do
.fn_unit
& fnunit
).bool()
379 comb
+= en_req
.eq(fnmatch
& ~fu
.busy_o
&
381 i_l
.append(en_req
) # store in list for doing the Cat-trick
382 # picker output, gated by enable: store in fu_bitdict
383 po
= Signal(name
="o_issue_pick_"+funame
) # picker output
384 comb
+= po
.eq(i_pp
.o
[i
] & i_pp
.en_o
)
385 comb
+= fu_bitdict
[funame
].eq(po
)
386 comb
+= fu_selected
[funame
].eq(fu
.busy_o | po
)
387 # if we don't do this, then when there are no FUs available,
388 # the "p.o_ready" signal will go back "ok we accepted this
389 # instruction" which of course isn't true.
390 with m
.If(i_pp
.en_o
):
391 comb
+= fu_found
.eq(1)
392 # for each input, Cat them together and drop them into the picker
393 comb
+= i_pp
.i
.eq(Cat(*i_l
))
395 # rdmask, which is for registers needs to come from the *main* decoder
396 for funame
, fu
in fus
.items():
397 rdmask
= get_rdflags(m
, self
.ireg
.e
, fu
)
398 comb
+= fu
.rdmaskn
.eq(~rdmask
)
400 # sigh - need a NOP counter
402 with m
.If(counter
!= 0):
403 sync
+= counter
.eq(counter
- 1)
406 # default to reading from incoming instruction: may be overridden
407 # by copy from latch when "waiting"
408 comb
+= self
.ireg
.eq(self
.i
)
409 # always say "ready" except if overridden
410 comb
+= self
.p
.o_ready
.eq(1)
413 with m
.State("READY"):
414 with m
.If(self
.p
.i_valid
): # run only when valid
415 with m
.Switch(self
.ireg
.e
.do
.insn_type
):
416 # check for ATTN: halt if true
417 with m
.Case(MicrOp
.OP_ATTN
):
418 m
.d
.sync
+= self
.o
.core_terminate_o
.eq(1)
420 # fake NOP - this isn't really used (Issuer detects NOP)
421 with m
.Case(MicrOp
.OP_NOP
):
422 sync
+= counter
.eq(2)
426 comb
+= self
.instr_active
.eq(1)
427 comb
+= self
.p
.o_ready
.eq(0)
428 # connect instructions. only one enabled at a time
429 for funame
, fu
in fus
.items():
430 do
= self
.des
[funame
]
431 enable
= fu_bitdict
[funame
]
433 # run this FunctionUnit if enabled route op,
434 # issue, busy, read flags and mask to FU
436 # operand comes from the *local* decoder
437 # do not actually issue, though, if there
438 # is a waw hazard. decoder has to still
439 # be asserted in order to detect that, tho
440 comb
+= fu
.oper_i
.eq_from(do
)
442 # URRR this is truly dreadful.
443 # OP_FETCH_FAILED is a "fake" op.
444 # no instruction creates it. OP_TRAP
445 # uses the *main* decoder: this is
446 # a *Satellite* decoder that reacts
447 # on *insn_in*... not fake ops. gaah.
448 main_op
= self
.ireg
.e
.do
449 with m
.If(main_op
.insn_type
==
450 MicrOp
.OP_FETCH_FAILED
):
451 comb
+= fu
.oper_i
.insn_type
.eq(
452 MicrOp
.OP_FETCH_FAILED
)
453 comb
+= fu
.oper_i
.fn_unit
.eq(
455 # issue when valid (and no write-hazard)
456 comb
+= fu
.issue_i
.eq(~self
.waw_hazard
)
457 # instruction ok, indicate ready
458 comb
+= self
.p
.o_ready
.eq(1)
460 if self
.allow_overlap
:
461 with m
.If(~fu_found | self
.waw_hazard
):
462 # latch copy of instruction
463 sync
+= ilatch
.eq(self
.i
)
464 comb
+= self
.p
.o_ready
.eq(1) # accept
468 with m
.State("WAITING"):
469 comb
+= self
.instr_active
.eq(1)
470 comb
+= self
.p
.o_ready
.eq(0)
472 # using copy of instruction, keep waiting until an FU is free
473 comb
+= self
.ireg
.eq(ilatch
)
474 with m
.If(fu_found
): # wait for conflict to clear
475 # connect instructions. only one enabled at a time
476 for funame
, fu
in fus
.items():
477 do
= self
.des
[funame
]
478 enable
= fu_bitdict
[funame
]
480 # run this FunctionUnit if enabled route op,
481 # issue, busy, read flags and mask to FU
483 # operand comes from the *local* decoder,
484 # which is asserted even if not issued,
485 # so that WaW-detection can check for hazards.
486 # only if the waw hazard is clear does the
487 # instruction actually get issued
488 comb
+= fu
.oper_i
.eq_from(do
)
490 comb
+= fu
.issue_i
.eq(~self
.waw_hazard
)
491 with m
.If(~self
.waw_hazard
):
492 comb
+= self
.p
.o_ready
.eq(1)
496 print ("core: overlap allowed", self
.allow_overlap
)
497 # true when any FU is busy (including the cycle where it is perhaps
498 # to be issued - because that's what fu_busy is)
499 comb
+= any_busy_o
.eq(fu_busy
.bool())
500 if not self
.allow_overlap
:
501 # for simple non-overlap, if any instruction is busy, set
502 # busy output for core.
503 comb
+= busy_o
.eq(any_busy_o
)
505 # sigh deal with a fun situation that needs to be investigated
507 with m
.If(self
.issue_conflict
):
509 # make sure that LDST, SPR, MMU, Branch and Trap all say "busy"
510 # and do not allow overlap. these are all the ones that
511 # are non-forward-progressing: exceptions etc. that otherwise
512 # change CoreState for some reason (MSR, PC, SVSTATE)
513 for funame
, fu
in fus
.items():
514 if (funame
.lower().startswith('ldst') or
515 funame
.lower().startswith('branch') or
516 funame
.lower().startswith('mmu') or
517 funame
.lower().startswith('spr') or
518 funame
.lower().startswith('trap')):
519 with m
.If(fu
.busy_o
):
521 # for SPR pipeline pause dec/tb FSM to avoid race condition
522 # TODO: really this should be much more sophisticated,
523 # spot MTSPR, spot that DEC/TB is what is to be updated.
524 # a job for PowerDecoder2, there
525 if funame
.lower().startswith('spr'):
526 with m
.If(fu
.busy_o
):
527 comb
+= self
.pause_dec_tb
.eq(1)
529 # return both the function unit "enable" dict as well as the "busy".
530 # the "busy-or-issued" can be passed in to the Read/Write port
531 # connecters to give them permission to request access to regfiles
532 return fu_bitdict
, fu_selected
534 def connect_rdport(self
, m
, fu_bitdict
, fu_selected
,
535 rdpickers
, regfile
, regname
, fspec
):
536 comb
, sync
= m
.d
.comb
, m
.d
.sync
542 # select the required read port. these are pre-defined sizes
543 rfile
= regs
.rf
[regfile
.lower()]
544 rport
= rfile
.r_ports
[rpidx
]
545 print("read regfile", rpidx
, regfile
, regs
.rf
.keys(),
548 # for checking if the read port has an outstanding write
549 if self
.make_hazard_vecs
:
550 wv
= regs
.wv
[regfile
.lower()]
551 wvchk
= wv
.q_int
# write-vec bit-level hazard check
553 # if a hazard is detected on this read port, simply blithely block
554 # every FU from reading on it. this is complete overkill but very
556 hazard_detected
= Signal(name
="raw_%s_%s" % (regfile
, rpidx
))
559 if not isinstance(fspecs
, list):
565 for i
, fspec
in enumerate(fspecs
):
566 # get the regfile specs for this regfile port
567 print ("fpsec", i
, fspec
, len(fspec
.specs
))
568 name
= "%s_%s_%d" % (regfile
, regname
, i
)
569 ppoffs
.append(pplen
) # record offset for picker
570 pplen
+= len(fspec
.specs
)
571 rdflag
= Signal(name
="rdflag_"+name
, reset_less
=True)
572 comb
+= rdflag
.eq(fspec
.okflag
)
573 rdflags
.append(rdflag
)
575 print ("pplen", pplen
)
577 # create a priority picker to manage this port
578 rdpickers
[regfile
][rpidx
] = rdpick
= PriorityPicker(pplen
)
579 m
.submodules
["rdpick_%s_%s" % (regfile
, rpidx
)] = rdpick
585 for i
, fspec
in enumerate(fspecs
):
586 (rf
, _read
, wid
, fuspecs
) = \
587 (fspec
.okflag
, fspec
.regport
, fspec
.wid
, fspec
.specs
)
588 # connect up the FU req/go signals, and the reg-read to the FU
589 # and create a Read Broadcast Bus
590 for pi
, fuspec
in enumerate(fspec
.specs
):
591 (funame
, fu
, idx
) = (fuspec
.funame
, fuspec
.fu
, fuspec
.idx
)
593 name
= "%s_%s_%s_%i" % (regfile
, rpidx
, funame
, pi
)
594 fu_active
= fu_selected
[funame
]
595 fu_issued
= fu_bitdict
[funame
]
597 # get (or set up) a latched copy of read register number
598 # and (sigh) also the read-ok flag
599 # TODO: use nmutil latchregister
600 rhname
= "%s_%s_%d" % (regfile
, regname
, i
)
601 rdflag
= Signal(name
="rdflag_%s_%s" % (funame
, rhname
),
603 if rhname
not in fu
.rf_latches
:
604 rfl
= Signal(name
="rdflag_latch_%s_%s" % (funame
, rhname
))
605 fu
.rf_latches
[rhname
] = rfl
606 with m
.If(fu
.issue_i
):
607 sync
+= rfl
.eq(rdflags
[i
])
609 rfl
= fu
.rf_latches
[rhname
]
611 # now the register port
612 rname
= "%s_%s_%s_%d" % (funame
, regfile
, regname
, pi
)
613 read
= Signal
.like(_read
, name
="read_"+rname
)
614 if rname
not in fu
.rd_latches
:
615 rdl
= Signal
.like(_read
, name
="rdlatch_"+rname
)
616 fu
.rd_latches
[rname
] = rdl
617 with m
.If(fu
.issue_i
):
618 sync
+= rdl
.eq(_read
)
620 rdl
= fu
.rd_latches
[rname
]
622 # make the read immediately available on issue cycle
623 # after the read cycle, otherwies use the latched copy.
624 # this captures the regport and okflag on issue
625 with m
.If(fu
.issue_i
):
626 comb
+= read
.eq(_read
)
627 comb
+= rdflag
.eq(rdflags
[i
])
630 comb
+= rdflag
.eq(rfl
)
632 # connect request-read to picker input, and output to go-rd
633 addr_en
= Signal
.like(read
, name
="addr_en_"+name
)
634 pick
= Signal(name
="pick_"+name
) # picker input
635 rp
= Signal(name
="rp_"+name
) # picker output
636 delay_pick
= Signal(name
="dp_"+name
) # read-enable "underway"
637 rhazard
= Signal(name
="rhaz_"+name
)
639 # exclude any currently-enabled read-request (mask out active)
640 # entirely block anything hazarded from being picked
641 comb
+= pick
.eq(fu
.rd_rel_o
[idx
] & fu_active
& rdflag
&
642 ~delay_pick
& ~rhazard
)
643 comb
+= rdpick
.i
[pi
].eq(pick
)
644 comb
+= fu
.go_rd_i
[idx
].eq(delay_pick
) # pass in *delayed* pick
646 # if picked, select read-port "reg select" number to port
647 comb
+= rp
.eq(rdpick
.o
[pi
] & rdpick
.en_o
)
648 sync
+= delay_pick
.eq(rp
) # delayed "pick"
649 comb
+= addr_en
.eq(Mux(rp
, read
, 0))
651 # the read-enable happens combinatorially (see mux-bus below)
652 # but it results in the data coming out on a one-cycle delay.
656 addrs
.append(addr_en
)
659 # use the *delayed* pick signal to put requested data onto bus
660 with m
.If(delay_pick
):
661 # connect regfile port to input, creating fan-out Bus
663 print("reg connect widths",
664 regfile
, regname
, pi
, funame
,
665 src
.shape(), rport
.o_data
.shape())
666 # all FUs connect to same port
667 comb
+= src
.eq(rport
.o_data
)
669 if not self
.make_hazard_vecs
:
672 # read the write-hazard bitvector (wv) for any bit that is
673 wvchk_en
= Signal(len(wvchk
), name
="wv_chk_addr_en_"+name
)
674 issue_active
= Signal(name
="rd_iactive_"+name
)
675 # XXX combinatorial loop here
676 comb
+= issue_active
.eq(fu_active
& rdflag
)
677 with m
.If(issue_active
):
679 comb
+= wvchk_en
.eq(read
)
681 comb
+= wvchk_en
.eq(1<<read
)
682 # if FU is busy (which doesn't get set at the same time as
683 # issue) and no hazard was detected, clear wvchk_en (i.e.
684 # stop checking for hazards). there is a loop here, but it's
685 # via a DFF, so is ok. some linters may complain, but hey.
686 with m
.If(fu
.busy_o
& ~rhazard
):
687 comb
+= wvchk_en
.eq(0)
689 # read-hazard is ANDed with (filtered by) what is actually
691 comb
+= rhazard
.eq((wvchk
& wvchk_en
).bool())
693 wvens
.append(wvchk_en
)
695 # or-reduce the muxed read signals
697 # for unary-addressed
698 comb
+= rport
.ren
.eq(ortreereduce_sig(rens
))
700 # for binary-addressed
701 comb
+= rport
.addr
.eq(ortreereduce_sig(addrs
))
702 comb
+= rport
.ren
.eq(Cat(*rens
).bool())
703 print ("binary", regfile
, rpidx
, rport
, rport
.ren
, rens
, addrs
)
705 if not self
.make_hazard_vecs
:
706 return Const(0) # declare "no hazards"
708 # enable the read bitvectors for this issued instruction
709 # and return whether any write-hazard bit is set
710 wvchk_and
= Signal(len(wvchk
), name
="wv_chk_"+name
)
711 comb
+= wvchk_and
.eq(wvchk
& ortreereduce_sig(wvens
))
712 comb
+= hazard_detected
.eq(wvchk_and
.bool())
713 return hazard_detected
715 def connect_rdports(self
, m
, fu_bitdict
, fu_selected
):
716 """connect read ports
718 orders the read regspecs into a dict-of-dicts, by regfile, by
719 regport name, then connects all FUs that want that regport by
720 way of a PriorityPicker.
722 comb
, sync
= m
.d
.comb
, m
.d
.sync
727 # dictionary of lists of regfile read ports
728 byregfiles_rdspec
= self
.get_byregfiles(m
, True)
730 # okaay, now we need a PriorityPicker per regfile per regfile port
731 # loootta pickers... peter piper picked a pack of pickled peppers...
733 for regfile
, fuspecs
in byregfiles_rdspec
.items():
734 rdpickers
[regfile
] = {}
736 # argh. an experiment to merge RA and RB in the INT regfile
737 # (we have too many read/write ports)
738 if self
.regreduce_en
:
740 fuspecs
['rabc'] = [fuspecs
.pop('rb')]
741 fuspecs
['rabc'].append(fuspecs
.pop('rc'))
742 fuspecs
['rabc'].append(fuspecs
.pop('ra'))
743 if regfile
== 'FAST':
744 fuspecs
['fast1'] = [fuspecs
.pop('fast1')]
745 if 'fast2' in fuspecs
:
746 fuspecs
['fast1'].append(fuspecs
.pop('fast2'))
747 if 'fast3' in fuspecs
:
748 fuspecs
['fast1'].append(fuspecs
.pop('fast3'))
750 # for each named regfile port, connect up all FUs to that port
751 # also return (and collate) hazard detection)
752 for (regname
, fspec
) in sort_fuspecs(fuspecs
):
753 print("connect rd", regname
, fspec
)
754 rh
= self
.connect_rdport(m
, fu_bitdict
, fu_selected
,
759 return Cat(*rd_hazard
).bool()
761 def make_hazards(self
, m
, regfile
, rfile
, wvclr
, wvset
,
762 funame
, regname
, idx
,
763 addr_en
, wp
, fu
, fu_active
, wrflag
, write
,
765 """make_hazards: a setter and a clearer for the regfile write ports
767 setter is at issue time (using PowerDecoder2 regfile write numbers)
768 clearer is at regfile write time (when FU has said what to write to)
770 there is *one* unusual case here which has to be dealt with:
771 when the Function Unit does *NOT* request a write to the regfile
772 (has its data.ok bit CLEARED). this is perfectly legitimate.
775 comb
, sync
= m
.d
.comb
, m
.d
.sync
776 name
= "%s_%s_%d" % (funame
, regname
, idx
)
778 # connect up the bitvector write hazard. unlike the
779 # regfile writeports, a ONE must be written to the corresponding
780 # bit of the hazard bitvector (to indicate the existence of
783 # the detection of what shall be written to is based
784 # on *issue*. it is delayed by 1 cycle so that instructions
785 # "addi 5,5,0x2" do not cause combinatorial loops due to
786 # fake-dependency on *themselves*. this will totally fail
787 # spectacularly when doing multi-issue
788 print ("write vector (for regread)", regfile
, wvset
)
789 wviaddr_en
= Signal(len(wvset
), name
="wv_issue_addr_en_"+name
)
790 issue_active
= Signal(name
="iactive_"+name
)
791 sync
+= issue_active
.eq(fu
.issue_i
& fu_active
& wrflag
)
792 with m
.If(issue_active
):
794 comb
+= wviaddr_en
.eq(write
)
796 comb
+= wviaddr_en
.eq(1<<write
)
798 # deal with write vector clear: this kicks in when the regfile
799 # is written to, and clears the corresponding bitvector entry
800 print ("write vector", regfile
, wvclr
)
801 wvaddr_en
= Signal(len(wvclr
), name
="wvaddr_en_"+name
)
803 comb
+= wvaddr_en
.eq(addr_en
)
806 comb
+= wvaddr_en
.eq(1<<addr_en
)
808 # XXX ASSUME that LDSTFunctionUnit always sets the data it intends to
809 # this may NOT be the case when an exception occurs
810 if isinstance(fu
, LDSTFunctionUnit
):
811 return wvaddr_en
, wviaddr_en
813 # okaaay, this is preparation for the awkward case.
814 # * latch a copy of wrflag when issue goes high.
815 # * when the fu_wrok (data.ok) flag is NOT set,
816 # but the FU is done, the FU is NEVER going to write
817 # so the bitvector has to be cleared.
818 latch_wrflag
= Signal(name
="latch_wrflag_"+name
)
819 with m
.If(~fu
.busy_o
):
820 sync
+= latch_wrflag
.eq(0)
821 with m
.If(fu
.issue_i
& fu_active
):
822 sync
+= latch_wrflag
.eq(wrflag
)
823 with m
.If(fu
.alu_done_o
& latch_wrflag
& ~fu_wrok
):
825 comb
+= wvaddr_en
.eq(write
) # addr_en gated with wp, don't use
827 comb
+= wvaddr_en
.eq(1<<addr_en
) # binary addr_en not gated
829 return wvaddr_en
, wviaddr_en
831 def connect_wrport(self
, m
, fu_bitdict
, fu_selected
,
832 wrpickers
, regfile
, regname
, fspec
):
833 comb
, sync
= m
.d
.comb
, m
.d
.sync
839 # select the required write port. these are pre-defined sizes
840 rfile
= regs
.rf
[regfile
.lower()]
841 wport
= rfile
.w_ports
[rpidx
]
843 print("connect wr", regname
, "unary", rfile
.unary
, fspec
)
844 print(regfile
, regs
.rf
.keys())
846 # select the write-protection hazard vector. note that this still
847 # requires to WRITE to the hazard bitvector! read-requests need
848 # to RAISE the bitvector (set it to 1), which, duh, requires a WRITE
849 if self
.make_hazard_vecs
:
850 wv
= regs
.wv
[regfile
.lower()]
851 wvset
= wv
.s
# write-vec bit-level hazard ctrl
852 wvclr
= wv
.r
# write-vec bit-level hazard ctrl
853 wvchk
= wv
.q
# write-after-write hazard check
856 if not isinstance(fspecs
, list):
863 for i
, fspec
in enumerate(fspecs
):
864 # get the regfile specs for this regfile port
865 (wf
, _write
, wid
, fuspecs
) = \
866 (fspec
.okflag
, fspec
.regport
, fspec
.wid
, fspec
.specs
)
867 print ("fpsec", i
, "wrflag", wf
, fspec
, len(fuspecs
))
868 ppoffs
.append(pplen
) # record offset for picker
869 pplen
+= len(fuspecs
)
871 name
= "%s_%s_%d" % (regfile
, regname
, i
)
872 wrflag
= Signal(name
="wr_flag_"+name
)
874 comb
+= wrflag
.eq(wf
)
877 wrflags
.append(wrflag
)
879 # create a priority picker to manage this port
880 wrpickers
[regfile
][rpidx
] = wrpick
= PriorityPicker(pplen
)
881 m
.submodules
["wrpick_%s_%s" % (regfile
, rpidx
)] = wrpick
888 #wvens = [] - not needed: reading of writevec is permanently held hi
890 for i
, fspec
in enumerate(fspecs
):
891 # connect up the FU req/go signals and the reg-read to the FU
892 # these are arbitrated by Data.ok signals
893 (wf
, _write
, wid
, fuspecs
) = \
894 (fspec
.okflag
, fspec
.regport
, fspec
.wid
, fspec
.specs
)
895 for pi
, fuspec
in enumerate(fspec
.specs
):
896 (funame
, fu
, idx
) = (fuspec
.funame
, fuspec
.fu
, fuspec
.idx
)
897 fu_requested
= fu_bitdict
[funame
]
899 name
= "%s_%s_%s_%d" % (funame
, regfile
, regname
, idx
)
900 # get (or set up) a write-latched copy of write register number
901 write
= Signal
.like(_write
, name
="write_"+name
)
902 rname
= "%s_%s_%s_%d" % (funame
, regfile
, regname
, idx
)
903 if rname
not in fu
.wr_latches
:
904 wrl
= Signal
.like(_write
, name
="wrlatch_"+rname
)
905 fu
.wr_latches
[rname
] = write
906 # do not depend on fu.issue_i here, it creates a
907 # combinatorial loop on waw checking. using the FU
908 # "enable" bitdict entry for this FU is sufficient,
909 # because the PowerDecoder2 read/write nums are
910 # valid continuously when the instruction is valid
911 with m
.If(fu_requested
):
912 sync
+= wrl
.eq(_write
)
913 comb
+= write
.eq(_write
)
915 comb
+= write
.eq(wrl
)
917 write
= fu
.wr_latches
[rname
]
919 # write-request comes from dest.ok
920 dest
= fu
.get_out(idx
)
921 fu_dest_latch
= fu
.get_fu_out(idx
) # latched output
922 name
= "%s_%s_%d" % (funame
, regname
, idx
)
923 fu_wrok
= Signal(name
="fu_wrok_"+name
, reset_less
=True)
924 comb
+= fu_wrok
.eq(dest
.ok
& fu
.busy_o
)
926 # connect request-write to picker input, and output to go-wr
927 fu_active
= fu_selected
[funame
]
928 pick
= fu
.wr
.rel_o
[idx
] & fu_active
929 comb
+= wrpick
.i
[pi
].eq(pick
)
930 # create a single-pulse go write from the picker output
931 wr_pick
= Signal(name
="wpick_%s_%s_%d" % (funame
, regname
, idx
))
932 comb
+= wr_pick
.eq(wrpick
.o
[pi
] & wrpick
.en_o
)
933 comb
+= fu
.go_wr_i
[idx
].eq(rising_edge(m
, wr_pick
))
935 # connect the regspec write "reg select" number to this port
936 # only if one FU actually requests (and is granted) the port
937 # will the write-enable be activated
938 wname
= "waddr_en_%s_%s_%d" % (funame
, regname
, idx
)
939 addr_en
= Signal
.like(write
, name
=wname
)
941 comb
+= wp
.eq(wr_pick
& wrpick
.en_o
)
942 comb
+= addr_en
.eq(Mux(wp
, write
, 0))
946 addrs
.append(addr_en
)
949 # connect regfile port to input
950 print("reg connect widths",
951 regfile
, regname
, pi
, funame
,
952 dest
.shape(), wport
.i_data
.shape())
953 wsigs
.append(fu_dest_latch
)
955 # now connect up the bitvector write hazard
956 if not self
.make_hazard_vecs
:
958 res
= self
.make_hazards(m
, regfile
, rfile
, wvclr
, wvset
,
959 funame
, regname
, idx
,
960 addr_en
, wp
, fu
, fu_active
,
961 wrflags
[i
], write
, fu_wrok
)
962 wvaddr_en
, wv_issue_en
= res
963 wvclren
.append(wvaddr_en
) # set only: no data => clear bit
964 wvseten
.append(wv_issue_en
) # set data same as enable
966 # read the write-hazard bitvector (wv) for any bit that is
967 fu_requested
= fu_bitdict
[funame
]
968 wvchk_en
= Signal(len(wvchk
), name
="waw_chk_addr_en_"+name
)
969 issue_active
= Signal(name
="waw_iactive_"+name
)
970 whazard
= Signal(name
="whaz_"+name
)
972 # XXX EEK! STATE regfile (branch) does not have an
973 # write-active indicator in regspec_decode_write()
974 print ("XXX FIXME waw_iactive", issue_active
,
977 # check bits from the incoming instruction. note (back
978 # in connect_instruction) that the decoder is held for
979 # us to be able to do this, here... *without* issue being
980 # held HI. we MUST NOT gate this with fu.issue_i or
981 # with fu_bitdict "enable": it would create a loop
982 comb
+= issue_active
.eq(wf
)
983 with m
.If(issue_active
):
985 comb
+= wvchk_en
.eq(write
)
987 comb
+= wvchk_en
.eq(1<<write
)
988 # if FU is busy (which doesn't get set at the same time as
989 # issue) and no hazard was detected, clear wvchk_en (i.e.
990 # stop checking for hazards). there is a loop here, but it's
991 # via a DFF, so is ok. some linters may complain, but hey.
992 with m
.If(fu
.busy_o
& ~whazard
):
993 comb
+= wvchk_en
.eq(0)
995 # write-hazard is ANDed with (filtered by) what is actually
996 # being requested. the wvchk data is on a one-clock delay,
997 # and wvchk_en comes directly from the main decoder
998 comb
+= whazard
.eq((wvchk
& wvchk_en
).bool())
1000 comb
+= fu
._waw
_hazard
.eq(1)
1002 #wvens.append(wvchk_en)
1004 # here is where we create the Write Broadcast Bus. simple, eh?
1005 comb
+= wport
.i_data
.eq(ortreereduce_sig(wsigs
))
1007 # for unary-addressed
1008 comb
+= wport
.wen
.eq(ortreereduce_sig(wens
))
1010 # for binary-addressed
1011 comb
+= wport
.addr
.eq(ortreereduce_sig(addrs
))
1012 comb
+= wport
.wen
.eq(ortreereduce_sig(wens
))
1014 if not self
.make_hazard_vecs
:
1017 # return these here rather than set wvclr/wvset directly,
1018 # because there may be more than one write-port to a given
1019 # regfile. example: XER has a write-port for SO, CA, and OV
1020 # and the *last one added* of those would overwrite the other
1021 # two. solution: have connect_wrports collate all the
1022 # or-tree-reduced bitvector set/clear requests and drop them
1023 # in as a single "thing". this can only be done because the
1024 # set/get is an unary bitvector.
1025 print ("make write-vecs", regfile
, regname
, wvset
, wvclr
)
1026 return (wvclren
, # clear (regfile write)
1027 wvseten
) # set (issue time)
1029 def connect_wrports(self
, m
, fu_bitdict
, fu_selected
):
1030 """connect write ports
1032 orders the write regspecs into a dict-of-dicts, by regfile,
1033 by regport name, then connects all FUs that want that regport
1034 by way of a PriorityPicker.
1036 note that the write-port wen, write-port data, and go_wr_i all need to
1037 be on the exact same clock cycle. as there is a combinatorial loop bug
1038 at the moment, these all use sync.
1040 comb
, sync
= m
.d
.comb
, m
.d
.sync
1043 # dictionary of lists of regfile write ports
1044 byregfiles_wrspec
= self
.get_byregfiles(m
, False)
1046 # same for write ports.
1047 # BLECH! complex code-duplication! BLECH!
1049 wvclrers
= defaultdict(list)
1050 wvseters
= defaultdict(list)
1051 for regfile
, fuspecs
in byregfiles_wrspec
.items():
1052 wrpickers
[regfile
] = {}
1054 if self
.regreduce_en
:
1055 # argh, more port-merging
1056 if regfile
== 'INT':
1057 fuspecs
['o'] = [fuspecs
.pop('o')]
1058 fuspecs
['o'].append(fuspecs
.pop('o1'))
1059 if regfile
== 'FAST':
1060 fuspecs
['fast1'] = [fuspecs
.pop('fast1')]
1061 if 'fast2' in fuspecs
:
1062 fuspecs
['fast1'].append(fuspecs
.pop('fast2'))
1063 if 'fast3' in fuspecs
:
1064 fuspecs
['fast1'].append(fuspecs
.pop('fast3'))
1066 # collate these and record them by regfile because there
1067 # are sometimes more write-ports per regfile
1068 for (regname
, fspec
) in sort_fuspecs(fuspecs
):
1069 wvclren
, wvseten
= self
.connect_wrport(m
,
1070 fu_bitdict
, fu_selected
,
1072 regfile
, regname
, fspec
)
1073 wvclrers
[regfile
.lower()] += wvclren
1074 wvseters
[regfile
.lower()] += wvseten
1076 if not self
.make_hazard_vecs
:
1079 # for write-vectors: reduce the clr-ers and set-ers down to
1080 # a single set of bits. otherwise if there are two write
1081 # ports (on some regfiles), the last one doing comb += on
1082 # the reg.wv[regfile] instance "wins" (and all others are ignored,
1083 # whoops). if there was only one write-port per wv regfile this would
1085 for regfile
in wvclrers
.keys():
1086 wv
= regs
.wv
[regfile
]
1087 wvset
= wv
.s
# write-vec bit-level hazard ctrl
1088 wvclr
= wv
.r
# write-vec bit-level hazard ctrl
1089 wvclren
= wvclrers
[regfile
]
1090 wvseten
= wvseters
[regfile
]
1091 comb
+= wvclr
.eq(ortreereduce_sig(wvclren
)) # clear (regfile write)
1092 comb
+= wvset
.eq(ortreereduce_sig(wvseten
)) # set (issue time)
1094 def get_byregfiles(self
, m
, readmode
):
1096 mode
= "read" if readmode
else "write"
1099 e
= self
.ireg
.e
# decoded instruction to execute
1101 # dictionary of dictionaries of lists/tuples of regfile ports.
1102 # first key: regfile. second key: regfile port name
1103 byregfiles_spec
= defaultdict(dict)
1105 for (funame
, fu
) in fus
.items():
1106 # create in each FU a receptacle for the read/write register
1107 # hazard numbers (and okflags for read). to be latched in
1108 # connect_rd/write_ports
1110 fu
.rd_latches
= {} # read reg number latches
1111 fu
.rf_latches
= {} # read flag latches
1115 # construct regfile specs: read uses inspec, write outspec
1116 print("%s ports for %s" % (mode
, funame
))
1117 for idx
in range(fu
.n_src
if readmode
else fu
.n_dst
):
1118 (regfile
, regname
, wid
) = fu
.get_io_spec(readmode
, idx
)
1119 print(" %d %s %s %s" % (idx
, regfile
, regname
, str(wid
)))
1121 # the PowerDecoder2 (main one, not the satellites) contains
1122 # the decoded regfile numbers. obtain these now
1123 decinfo
= regspec_decode(m
, readmode
, e
, regfile
, regname
)
1124 okflag
, regport
= decinfo
.okflag
, decinfo
.regport
1126 # construct the dictionary of regspec information by regfile
1127 if regname
not in byregfiles_spec
[regfile
]:
1128 byregfiles_spec
[regfile
][regname
] = \
1129 ByRegSpec(okflag
, regport
, wid
, [])
1131 # here we start to create "lanes" where each Function Unit
1132 # requiring access to a given [single-contended resource]
1133 # regfile port is appended to a list, so that PriorityPickers
1134 # can be created to give uncontested access to it
1135 fuspec
= FUSpec(funame
, fu
, idx
)
1136 byregfiles_spec
[regfile
][regname
].specs
.append(fuspec
)
1138 # ok just print that all out, for convenience
1139 for regfile
, fuspecs
in byregfiles_spec
.items():
1140 print("regfile %s ports:" % mode
, regfile
)
1141 for regname
, fspec
in fuspecs
.items():
1142 [okflag
, regport
, wid
, fuspecs
] = fspec
1143 print(" rf %s port %s lane: %s" % (mode
, regfile
, regname
))
1144 print(" %s" % regname
, wid
, okflag
, regport
)
1145 for (funame
, fu
, idx
) in fuspecs
:
1146 fusig
= fu
.src_i
[idx
] if readmode
else fu
.dest
[idx
]
1147 print(" ", funame
, fu
.__class
__.__name
__, idx
, fusig
)
1150 return byregfiles_spec
1153 yield from self
.fus
.ports()
1154 yield from self
.i
.e
.ports()
1155 yield from self
.l0
.ports()
1162 if __name__
== '__main__':
1163 pspec
= TestMemPspec(ldst_ifacetype
='testpi',
1169 dut
= NonProductionCore(pspec
)
1170 vl
= rtlil
.convert(dut
, ports
=dut
.ports())
1171 with
open("test_core.il", "w") as f
: