3 not in any way intended for production use. connects up FunctionUnits to
4 Register Files in a brain-dead fashion that only permits one and only one
5 Function Unit to be operational.
7 the principle here is to take the Function Units, analyse their regspecs,
8 and turn their requirements for access to register file read/write ports
9 into groupings by Register File and Register File Port name.
11 under each grouping - by regfile/port - a list of Function Units that
12 need to connect to that port is created. as these are a contended
13 resource a "Broadcast Bus" per read/write port is then also created,
14 with access to it managed by a PriorityPicker.
16 the brain-dead part of this module is that even though there is no
17 conflict of access, regfile read/write hazards are *not* analysed,
18 and consequently it is safer to wait for the Function Unit to complete
19 before allowing a new instruction to proceed.
20 (update: actually this is being added now:
21 https://bugs.libre-soc.org/show_bug.cgi?id=737)
24 from nmigen
import (Elaboratable
, Module
, Signal
, ResetSignal
, Cat
, Mux
,
26 from nmigen
.cli
import rtlil
28 from openpower
.decoder
.power_decoder2
import PowerDecodeSubset
29 from openpower
.decoder
.power_regspec_map
import regspec_decode_read
30 from openpower
.decoder
.power_regspec_map
import regspec_decode_write
31 from openpower
.sv
.svp64
import SVP64Rec
33 from nmutil
.picker
import PriorityPicker
34 from nmutil
.util
import treereduce
35 from nmutil
.singlepipe
import ControlBase
37 from soc
.fu
.compunits
.compunits
import AllFunctionUnits
, LDSTFunctionUnit
38 from soc
.regfile
.regfiles
import RegFiles
39 from openpower
.decoder
.power_decoder2
import get_rdflags
40 from soc
.experiment
.l0_cache
import TstL0CacheBuffer
# test only
41 from soc
.config
.test
.test_loadstore
import TestMemPspec
42 from openpower
.decoder
.power_enums
import MicrOp
, Function
43 from soc
.simple
.core_data
import CoreInput
, CoreOutput
45 from collections
import defaultdict
, namedtuple
48 from nmutil
.util
import rising_edge
50 FUSpec
= namedtuple("FUSpec", ["funame", "fu", "idx"])
51 ByRegSpec
= namedtuple("ByRegSpec", ["rdport", "wrport", "read",
52 "write", "wid", "specs"])
54 # helper function for reducing a list of signals down to a parallel
56 def ortreereduce(tree
, attr
="o_data"):
57 return treereduce(tree
, operator
.or_
, lambda x
: getattr(x
, attr
))
60 def ortreereduce_sig(tree
):
61 return treereduce(tree
, operator
.or_
, lambda x
: x
)
64 # helper function to place full regs declarations first
65 def sort_fuspecs(fuspecs
):
67 for (regname
, fspec
) in fuspecs
.items():
68 if regname
.startswith("full"):
69 res
.append((regname
, fspec
))
70 for (regname
, fspec
) in fuspecs
.items():
71 if not regname
.startswith("full"):
72 res
.append((regname
, fspec
))
73 return res
# enumerate(res)
76 # derive from ControlBase rather than have a separate Stage instance,
77 # this is simpler to do
78 class NonProductionCore(ControlBase
):
79 def __init__(self
, pspec
):
82 # test is SVP64 is to be enabled
83 self
.svp64_en
= hasattr(pspec
, "svp64") and (pspec
.svp64
== True)
85 # test to see if regfile ports should be reduced
86 self
.regreduce_en
= (hasattr(pspec
, "regreduce") and
87 (pspec
.regreduce
== True))
89 # test to see if overlapping of instructions is allowed
90 # (not normally enabled for TestIssuer FSM but useful for checking
91 # the bitvector hazard detection, before doing In-Order)
92 self
.allow_overlap
= (hasattr(pspec
, "allow_overlap") and
93 (pspec
.allow_overlap
== True))
96 self
.make_hazard_vecs
= self
.allow_overlap
97 self
.core_type
= "fsm"
98 if hasattr(pspec
, "core_type"):
99 self
.core_type
= pspec
.core_type
101 super().__init
__(stage
=self
)
103 # single LD/ST funnel for memory access
104 self
.l0
= l0
= TstL0CacheBuffer(pspec
, n_units
=1)
107 # function units (only one each)
108 # only include mmu if enabled in pspec
109 self
.fus
= AllFunctionUnits(pspec
, pilist
=[pi
])
111 # link LoadStore1 into MMU
112 mmu
= self
.fus
.get_fu('mmu0')
113 print ("core pspec", pspec
.ldst_ifacetype
)
114 print ("core mmu", mmu
)
116 print ("core lsmem.lsi", l0
.cmpi
.lsmem
.lsi
)
117 mmu
.alu
.set_ldst_interface(l0
.cmpi
.lsmem
.lsi
)
119 # register files (yes plural)
120 self
.regs
= RegFiles(pspec
, make_hazard_vecs
=self
.make_hazard_vecs
)
122 # set up input and output: unusual requirement to set data directly
123 # (due to the way that the core is set up in a different domain,
124 # see TestIssuer.setup_peripherals
125 self
.p
.i_data
, self
.n
.o_data
= self
.new_specs(None)
126 self
.i
, self
.o
= self
.p
.i_data
, self
.n
.o_data
128 # actual internal input data used (captured)
129 self
.ireg
= self
.ispec()
131 # create per-FU instruction decoders (subsetted). these "satellite"
132 # decoders reduce wire fan-out from the one (main) PowerDecoder2
133 # (used directly by the trap unit) to the *twelve* (or more)
134 # Function Units. we can either have 32 wires (the instruction)
135 # to each, or we can have well over a 200 wire fan-out (to 12
136 # ALUs). it's an easy choice to make.
140 # eep, these should be *per FU* i.e. for FunctionUnitBaseMulti
141 # they should be shared (put into the ALU *once*).
143 for funame
, fu
in self
.fus
.fus
.items():
144 f_name
= fu
.fnunit
.name
145 fnunit
= fu
.fnunit
.value
146 opkls
= fu
.opsubsetkls
148 # TRAP decoder is the *main* decoder
149 self
.trapunit
= funame
151 assert funame
not in self
.decoders
152 self
.decoders
[funame
] = PowerDecodeSubset(None, opkls
, f_name
,
154 state
=self
.ireg
.state
,
155 svp64_en
=self
.svp64_en
,
156 regreduce_en
=self
.regreduce_en
)
157 self
.des
[funame
] = self
.decoders
[funame
].do
159 # create per-Function Unit write-after-write hazard signals
160 # yes, really, this should have been added in ReservationStations
162 for funame
, fu
in self
.fus
.fus
.items():
163 fu
._waw
_hazard
= Signal(name
="waw_%s" % funame
)
165 # share the SPR decoder with the MMU if it exists
166 if "mmu0" in self
.decoders
:
167 self
.decoders
["mmu0"].mmu0_spr_dec
= self
.decoders
["spr0"]
169 # next 3 functions are Stage API Compliance
170 def setup(self
, m
, i
):
174 return CoreInput(self
.pspec
, self
.svp64_en
, self
.regreduce_en
)
179 # elaborate function to create HDL
180 def elaborate(self
, platform
):
181 m
= super().elaborate(platform
)
183 # for testing purposes, to cut down on build time in coriolis2
184 if hasattr(self
.pspec
, "nocore") and self
.pspec
.nocore
== True:
185 x
= Signal() # dummy signal
190 m
.submodules
.fus
= self
.fus
191 m
.submodules
.l0
= l0
= self
.l0
192 self
.regs
.elaborate_into(m
, platform
)
196 # amalgamate write-hazards into a single top-level Signal
197 self
.waw_hazard
= Signal()
199 for funame
, fu
in self
.fus
.fus
.items():
200 whaz
.append(fu
._waw
_hazard
)
201 comb
+= self
.waw_hazard
.eq(Cat(*whaz
).bool())
204 self
.connect_satellite_decoders(m
)
206 # ssh, cheat: trap uses the main decoder because of the rewriting
207 self
.des
[self
.trapunit
] = self
.ireg
.e
.do
209 # connect up Function Units, then read/write ports, and hazard conflict
210 self
.issue_conflict
= Signal()
211 fu_bitdict
, fu_selected
= self
.connect_instruction(m
)
212 raw_hazard
= self
.connect_rdports(m
, fu_bitdict
, fu_selected
)
213 self
.connect_wrports(m
, fu_bitdict
, fu_selected
)
214 if self
.allow_overlap
:
215 comb
+= self
.issue_conflict
.eq(raw_hazard
)
217 # note if an exception happened. in a pipelined or OoO design
218 # this needs to be accompanied by "shadowing" (or stalling)
220 for exc
in self
.fus
.excs
.values():
221 el
.append(exc
.happened
)
222 if len(el
) > 0: # at least one exception
223 comb
+= self
.o
.exc_happened
.eq(Cat(*el
).bool())
227 def connect_satellite_decoders(self
, m
):
229 for k
, v
in self
.decoders
.items():
230 # connect each satellite decoder and give it the instruction.
231 # as subset decoders this massively reduces wire fanout given
232 # the large number of ALUs
233 m
.submodules
["dec_%s" % k
] = v
234 comb
+= v
.dec
.raw_opcode_in
.eq(self
.ireg
.raw_insn_i
)
235 comb
+= v
.dec
.bigendian
.eq(self
.ireg
.bigendian_i
)
236 # sigh due to SVP64 RA_OR_ZERO detection connect these too
237 comb
+= v
.sv_a_nz
.eq(self
.ireg
.sv_a_nz
)
239 comb
+= v
.pred_sm
.eq(self
.ireg
.sv_pred_sm
)
240 comb
+= v
.pred_dm
.eq(self
.ireg
.sv_pred_dm
)
241 if k
!= self
.trapunit
:
242 comb
+= v
.sv_rm
.eq(self
.ireg
.sv_rm
) # pass through SVP64 RM
243 comb
+= v
.is_svp64_mode
.eq(self
.ireg
.is_svp64_mode
)
244 # only the LDST PowerDecodeSubset *actually* needs to
245 # know to use the alternative decoder. this is all
247 if k
.lower().startswith("ldst"):
248 comb
+= v
.use_svp64_ldst_dec
.eq(
249 self
.ireg
.use_svp64_ldst_dec
)
251 def connect_instruction(self
, m
):
252 """connect_instruction
254 uses decoded (from PowerOp) function unit information from CSV files
255 to ascertain which Function Unit should deal with the current
258 some (such as OP_ATTN, OP_NOP) are dealt with here, including
259 ignoring it and halting the processor. OP_NOP is a bit annoying
260 because the issuer expects busy flag still to be raised then lowered.
261 (this requires a fake counter to be set).
263 comb
, sync
= m
.d
.comb
, m
.d
.sync
266 # indicate if core is busy
267 busy_o
= self
.o
.busy_o
268 any_busy_o
= self
.o
.any_busy_o
270 # connect up temporary copy of incoming instruction. the FSM will
271 # either blat the incoming instruction (if valid) into self.ireg
272 # or if the instruction could not be delivered, keep dropping the
273 # latched copy into ireg
274 ilatch
= self
.ispec()
275 self
.instr_active
= Signal()
277 # enable/busy-signals for each FU, get one bit for each FU (by name)
278 fu_enable
= Signal(len(fus
), reset_less
=True)
279 fu_busy
= Signal(len(fus
), reset_less
=True)
282 for i
, funame
in enumerate(fus
.keys()):
283 fu_bitdict
[funame
] = fu_enable
[i
]
284 fu_selected
[funame
] = fu_busy
[i
]
286 # identify function units and create a list by fnunit so that
287 # PriorityPickers can be created for selecting one of them that
288 # isn't busy at the time the incoming instruction needs passing on
289 by_fnunit
= defaultdict(list)
290 for fname
, member
in Function
.__members
__.items():
291 for funame
, fu
in fus
.items():
292 fnunit
= fu
.fnunit
.value
293 if member
.value
& fnunit
: # this FU handles this type of op
294 by_fnunit
[fname
].append((funame
, fu
)) # add by Function
296 # ok now just print out the list of FUs by Function, because we can
297 for fname
, fu_list
in by_fnunit
.items():
298 print ("FUs by type", fname
, fu_list
)
300 # now create a PriorityPicker per FU-type such that only one
301 # non-busy FU will be picked
303 fu_found
= Signal() # take a note if no Function Unit was available
304 for fname
, fu_list
in by_fnunit
.items():
305 i_pp
= PriorityPicker(len(fu_list
))
306 m
.submodules
['i_pp_%s' % fname
] = i_pp
308 for i
, (funame
, fu
) in enumerate(fu_list
):
309 # match the decoded instruction (e.do.fn_unit) against the
310 # "capability" of this FU, gate that by whether that FU is
311 # busy, and drop that into the PriorityPicker.
312 # this will give us an output of the first available *non-busy*
313 # Function Unit (Reservation Statio) capable of handling this
315 fnunit
= fu
.fnunit
.value
316 en_req
= Signal(name
="issue_en_%s" % funame
, reset_less
=True)
317 fnmatch
= (self
.ireg
.e
.do
.fn_unit
& fnunit
).bool()
318 comb
+= en_req
.eq(fnmatch
& ~fu
.busy_o
&
320 i_l
.append(en_req
) # store in list for doing the Cat-trick
321 # picker output, gated by enable: store in fu_bitdict
322 po
= Signal(name
="o_issue_pick_"+funame
) # picker output
323 comb
+= po
.eq(i_pp
.o
[i
] & i_pp
.en_o
)
324 comb
+= fu_bitdict
[funame
].eq(po
)
325 comb
+= fu_selected
[funame
].eq(fu
.busy_o | po
)
326 # if we don't do this, then when there are no FUs available,
327 # the "p.o_ready" signal will go back "ok we accepted this
328 # instruction" which of course isn't true.
329 with m
.If(i_pp
.en_o
):
330 comb
+= fu_found
.eq(1)
331 # for each input, Cat them together and drop them into the picker
332 comb
+= i_pp
.i
.eq(Cat(*i_l
))
334 # rdmask, which is for registers needs to come from the *main* decoder
335 for funame
, fu
in fus
.items():
336 rdmask
= get_rdflags(self
.ireg
.e
, fu
)
337 comb
+= fu
.rdmaskn
.eq(~rdmask
)
339 # sigh - need a NOP counter
341 with m
.If(counter
!= 0):
342 sync
+= counter
.eq(counter
- 1)
345 # default to reading from incoming instruction: may be overridden
346 # by copy from latch when "waiting"
347 comb
+= self
.ireg
.eq(self
.i
)
348 # always say "ready" except if overridden
349 comb
+= self
.p
.o_ready
.eq(1)
352 with m
.State("READY"):
353 with m
.If(self
.p
.i_valid
): # run only when valid
354 with m
.Switch(self
.ireg
.e
.do
.insn_type
):
355 # check for ATTN: halt if true
356 with m
.Case(MicrOp
.OP_ATTN
):
357 m
.d
.sync
+= self
.o
.core_terminate_o
.eq(1)
359 # fake NOP - this isn't really used (Issuer detects NOP)
360 with m
.Case(MicrOp
.OP_NOP
):
361 sync
+= counter
.eq(2)
365 comb
+= self
.instr_active
.eq(1)
366 comb
+= self
.p
.o_ready
.eq(0)
367 # connect instructions. only one enabled at a time
368 for funame
, fu
in fus
.items():
369 do
= self
.des
[funame
]
370 enable
= fu_bitdict
[funame
]
372 # run this FunctionUnit if enabled route op,
373 # issue, busy, read flags and mask to FU
375 # operand comes from the *local* decoder
376 # do not actually issue, though, if there
377 # is a waw hazard. decoder has to still
378 # be asserted in order to detect that, tho
379 comb
+= fu
.oper_i
.eq_from(do
)
380 # issue when valid (and no write-hazard)
381 comb
+= fu
.issue_i
.eq(~self
.waw_hazard
)
382 # instruction ok, indicate ready
383 comb
+= self
.p
.o_ready
.eq(1)
385 if self
.allow_overlap
:
386 with m
.If(~fu_found | self
.waw_hazard
):
387 # latch copy of instruction
388 sync
+= ilatch
.eq(self
.i
)
389 comb
+= self
.p
.o_ready
.eq(1) # accept
393 with m
.State("WAITING"):
394 comb
+= self
.instr_active
.eq(1)
395 comb
+= self
.p
.o_ready
.eq(0)
397 # using copy of instruction, keep waiting until an FU is free
398 comb
+= self
.ireg
.eq(ilatch
)
399 with m
.If(fu_found
): # wait for conflict to clear
400 # connect instructions. only one enabled at a time
401 for funame
, fu
in fus
.items():
402 do
= self
.des
[funame
]
403 enable
= fu_bitdict
[funame
]
405 # run this FunctionUnit if enabled route op,
406 # issue, busy, read flags and mask to FU
408 # operand comes from the *local* decoder,
409 # which is asserted even if not issued,
410 # so that WaW-detection can check for hazards.
411 # only if the waw hazard is clear does the
412 # instruction actually get issued
413 comb
+= fu
.oper_i
.eq_from(do
)
415 comb
+= fu
.issue_i
.eq(~self
.waw_hazard
)
416 with m
.If(~self
.waw_hazard
):
417 comb
+= self
.p
.o_ready
.eq(1)
421 print ("core: overlap allowed", self
.allow_overlap
)
422 # true when any FU is busy (including the cycle where it is perhaps
423 # to be issued - because that's what fu_busy is)
424 comb
+= any_busy_o
.eq(fu_busy
.bool())
425 if not self
.allow_overlap
:
426 # for simple non-overlap, if any instruction is busy, set
427 # busy output for core.
428 comb
+= busy_o
.eq(any_busy_o
)
430 # sigh deal with a fun situation that needs to be investigated
432 with m
.If(self
.issue_conflict
):
434 # make sure that LDST, SPR, MMU, Branch and Trap all say "busy"
435 # and do not allow overlap. these are all the ones that
436 # are non-forward-progressing: exceptions etc. that otherwise
437 # change CoreState for some reason (MSR, PC, SVSTATE)
438 for funame
, fu
in fus
.items():
439 if (funame
.lower().startswith('ldst') or
440 funame
.lower().startswith('branch') or
441 funame
.lower().startswith('mmu') or
442 funame
.lower().startswith('spr') or
443 funame
.lower().startswith('trap')):
444 with m
.If(fu
.busy_o
):
447 # return both the function unit "enable" dict as well as the "busy".
448 # the "busy-or-issued" can be passed in to the Read/Write port
449 # connecters to give them permission to request access to regfiles
450 return fu_bitdict
, fu_selected
452 def connect_rdport(self
, m
, fu_bitdict
, fu_selected
,
453 rdpickers
, regfile
, regname
, fspec
):
454 comb
, sync
= m
.d
.comb
, m
.d
.sync
460 # select the required read port. these are pre-defined sizes
461 rfile
= regs
.rf
[regfile
.lower()]
462 rport
= rfile
.r_ports
[rpidx
]
463 print("read regfile", rpidx
, regfile
, regs
.rf
.keys(),
466 # for checking if the read port has an outstanding write
467 if self
.make_hazard_vecs
:
468 wv
= regs
.wv
[regfile
.lower()]
469 wvchk
= wv
.q_int
# write-vec bit-level hazard check
471 # if a hazard is detected on this read port, simply blithely block
472 # every FU from reading on it. this is complete overkill but very
474 hazard_detected
= Signal(name
="raw_%s_%s" % (regfile
, rpidx
))
477 if not isinstance(fspecs
, list):
483 for i
, fspec
in enumerate(fspecs
):
484 # get the regfile specs for this regfile port
485 (rf
, wf
, _read
, _write
, wid
, fuspecs
) = \
486 (fspec
.rdport
, fspec
.wrport
, fspec
.read
, fspec
.write
,
487 fspec
.wid
, fspec
.specs
)
488 print ("fpsec", i
, fspec
, len(fuspecs
))
489 name
= "%s_%s_%d" % (regfile
, regname
, i
)
490 ppoffs
.append(pplen
) # record offset for picker
491 pplen
+= len(fspec
.specs
)
492 rdflag
= Signal(name
="rdflag_"+name
, reset_less
=True)
493 comb
+= rdflag
.eq(fspec
.rdport
)
494 rdflags
.append(rdflag
)
496 print ("pplen", pplen
)
498 # create a priority picker to manage this port
499 rdpickers
[regfile
][rpidx
] = rdpick
= PriorityPicker(pplen
)
500 m
.submodules
["rdpick_%s_%s" % (regfile
, rpidx
)] = rdpick
506 for i
, fspec
in enumerate(fspecs
):
507 (rf
, wf
, _read
, _write
, wid
, fuspecs
) = \
508 (fspec
.rdport
, fspec
.wrport
, fspec
.read
, fspec
.write
,
509 fspec
.wid
, fspec
.specs
)
510 # connect up the FU req/go signals, and the reg-read to the FU
511 # and create a Read Broadcast Bus
512 for pi
, fuspec
in enumerate(fspec
.specs
):
513 (funame
, fu
, idx
) = (fuspec
.funame
, fuspec
.fu
, fuspec
.idx
)
515 name
= "%s_%s_%s_%i" % (regfile
, rpidx
, funame
, pi
)
516 fu_active
= fu_selected
[funame
]
517 fu_issued
= fu_bitdict
[funame
]
519 # get (or set up) a latched copy of read register number
520 # and (sigh) also the read-ok flag
521 rname
= "%s_%s_%s_%d" % (funame
, regfile
, regname
, pi
)
522 rhname
= "%s_%s_%d" % (regfile
, regname
, i
)
523 read
= Signal
.like(_read
, name
="read_"+name
)
524 rdflag
= Signal(name
="rdflag_%s_%s" % (funame
, rhname
),
526 if rhname
not in fu
.rf_latches
:
527 rfl
= Signal(name
="rdflag_latch_"+rname
)
528 fu
.rf_latches
[rhname
] = rfl
529 with m
.If(fu
.issue_i
):
530 sync
+= rfl
.eq(rdflags
[i
])
532 rfl
= fu
.rf_latches
[rhname
]
533 if rname
not in fu
.rd_latches
:
534 rdl
= Signal
.like(_read
, name
="rdlatch_"+rname
)
535 fu
.rd_latches
[rname
] = rdl
536 with m
.If(fu
.issue_i
):
537 sync
+= rdl
.eq(_read
)
539 rdl
= fu
.rd_latches
[rname
]
540 # latch to make the read immediately available on issue cycle
541 # after the read cycle, use the latched copy
542 with m
.If(fu
.issue_i
):
543 comb
+= read
.eq(_read
)
544 comb
+= rdflag
.eq(rdflags
[i
])
547 comb
+= rdflag
.eq(rfl
)
549 # connect request-read to picker input, and output to go-rd
550 addr_en
= Signal
.like(read
, name
="addr_en_"+name
)
551 pick
= Signal(name
="pick_"+name
) # picker input
552 rp
= Signal(name
="rp_"+name
) # picker output
553 delay_pick
= Signal(name
="dp_"+name
) # read-enable "underway"
554 rhazard
= Signal(name
="rhaz_"+name
)
556 # exclude any currently-enabled read-request (mask out active)
557 # entirely block anything hazarded from being picked
558 comb
+= pick
.eq(fu
.rd_rel_o
[idx
] & fu_active
& rdflag
&
559 ~delay_pick
& ~rhazard
)
560 comb
+= rdpick
.i
[pi
].eq(pick
)
561 comb
+= fu
.go_rd_i
[idx
].eq(delay_pick
) # pass in *delayed* pick
563 # if picked, select read-port "reg select" number to port
564 comb
+= rp
.eq(rdpick
.o
[pi
] & rdpick
.en_o
)
565 sync
+= delay_pick
.eq(rp
) # delayed "pick"
566 comb
+= addr_en
.eq(Mux(rp
, read
, 0))
568 # the read-enable happens combinatorially (see mux-bus below)
569 # but it results in the data coming out on a one-cycle delay.
573 addrs
.append(addr_en
)
576 # use the *delayed* pick signal to put requested data onto bus
577 with m
.If(delay_pick
):
578 # connect regfile port to input, creating fan-out Bus
580 print("reg connect widths",
581 regfile
, regname
, pi
, funame
,
582 src
.shape(), rport
.o_data
.shape())
583 # all FUs connect to same port
584 comb
+= src
.eq(rport
.o_data
)
586 if not self
.make_hazard_vecs
:
589 # read the write-hazard bitvector (wv) for any bit that is
590 wvchk_en
= Signal(len(wvchk
), name
="wv_chk_addr_en_"+name
)
591 issue_active
= Signal(name
="rd_iactive_"+name
)
592 # XXX combinatorial loop here
593 comb
+= issue_active
.eq(fu_active
& rdflag
)
594 with m
.If(issue_active
):
596 comb
+= wvchk_en
.eq(read
)
598 comb
+= wvchk_en
.eq(1<<read
)
599 # if FU is busy (which doesn't get set at the same time as
600 # issue) and no hazard was detected, clear wvchk_en (i.e.
601 # stop checking for hazards). there is a loop here, but it's
602 # via a DFF, so is ok. some linters may complain, but hey.
603 with m
.If(fu
.busy_o
& ~rhazard
):
604 comb
+= wvchk_en
.eq(0)
606 # read-hazard is ANDed with (filtered by) what is actually
608 comb
+= rhazard
.eq((wvchk
& wvchk_en
).bool())
610 wvens
.append(wvchk_en
)
612 # or-reduce the muxed read signals
614 # for unary-addressed
615 comb
+= rport
.ren
.eq(ortreereduce_sig(rens
))
617 # for binary-addressed
618 comb
+= rport
.addr
.eq(ortreereduce_sig(addrs
))
619 comb
+= rport
.ren
.eq(Cat(*rens
).bool())
620 print ("binary", regfile
, rpidx
, rport
, rport
.ren
, rens
, addrs
)
622 if not self
.make_hazard_vecs
:
623 return Const(0) # declare "no hazards"
625 # enable the read bitvectors for this issued instruction
626 # and return whether any write-hazard bit is set
627 wvchk_and
= Signal(len(wvchk
), name
="wv_chk_"+name
)
628 comb
+= wvchk_and
.eq(wvchk
& ortreereduce_sig(wvens
))
629 comb
+= hazard_detected
.eq(wvchk_and
.bool())
630 return hazard_detected
632 def connect_rdports(self
, m
, fu_bitdict
, fu_selected
):
633 """connect read ports
635 orders the read regspecs into a dict-of-dicts, by regfile, by
636 regport name, then connects all FUs that want that regport by
637 way of a PriorityPicker.
639 comb
, sync
= m
.d
.comb
, m
.d
.sync
644 # dictionary of lists of regfile read ports
645 byregfiles_rd
, byregfiles_rdspec
= self
.get_byregfiles(True)
647 # okaay, now we need a PriorityPicker per regfile per regfile port
648 # loootta pickers... peter piper picked a pack of pickled peppers...
650 for regfile
, spec
in byregfiles_rd
.items():
651 fuspecs
= byregfiles_rdspec
[regfile
]
652 rdpickers
[regfile
] = {}
654 # argh. an experiment to merge RA and RB in the INT regfile
655 # (we have too many read/write ports)
656 if self
.regreduce_en
:
658 fuspecs
['rabc'] = [fuspecs
.pop('rb')]
659 fuspecs
['rabc'].append(fuspecs
.pop('rc'))
660 fuspecs
['rabc'].append(fuspecs
.pop('ra'))
661 if regfile
== 'FAST':
662 fuspecs
['fast1'] = [fuspecs
.pop('fast1')]
663 if 'fast2' in fuspecs
:
664 fuspecs
['fast1'].append(fuspecs
.pop('fast2'))
665 if 'fast3' in fuspecs
:
666 fuspecs
['fast1'].append(fuspecs
.pop('fast3'))
668 # for each named regfile port, connect up all FUs to that port
669 # also return (and collate) hazard detection)
670 for (regname
, fspec
) in sort_fuspecs(fuspecs
):
671 print("connect rd", regname
, fspec
)
672 rh
= self
.connect_rdport(m
, fu_bitdict
, fu_selected
,
677 return Cat(*rd_hazard
).bool()
679 def make_hazards(self
, m
, regfile
, rfile
, wvclr
, wvset
,
680 funame
, regname
, idx
,
681 addr_en
, wp
, fu
, fu_active
, wrflag
, write
,
683 """make_hazards: a setter and a clearer for the regfile write ports
685 setter is at issue time (using PowerDecoder2 regfile write numbers)
686 clearer is at regfile write time (when FU has said what to write to)
688 there is *one* unusual case here which has to be dealt with:
689 when the Function Unit does *NOT* request a write to the regfile
690 (has its data.ok bit CLEARED). this is perfectly legitimate.
693 comb
, sync
= m
.d
.comb
, m
.d
.sync
694 name
= "%s_%s_%d" % (funame
, regname
, idx
)
696 # connect up the bitvector write hazard. unlike the
697 # regfile writeports, a ONE must be written to the corresponding
698 # bit of the hazard bitvector (to indicate the existence of
701 # the detection of what shall be written to is based
702 # on *issue*. it is delayed by 1 cycle so that instructions
703 # "addi 5,5,0x2" do not cause combinatorial loops due to
704 # fake-dependency on *themselves*. this will totally fail
705 # spectacularly when doing multi-issue
706 print ("write vector (for regread)", regfile
, wvset
)
707 wviaddr_en
= Signal(len(wvset
), name
="wv_issue_addr_en_"+name
)
708 issue_active
= Signal(name
="iactive_"+name
)
709 sync
+= issue_active
.eq(fu
.issue_i
& fu_active
& wrflag
)
710 with m
.If(issue_active
):
712 comb
+= wviaddr_en
.eq(write
)
714 comb
+= wviaddr_en
.eq(1<<write
)
716 # deal with write vector clear: this kicks in when the regfile
717 # is written to, and clears the corresponding bitvector entry
718 print ("write vector", regfile
, wvclr
)
719 wvaddr_en
= Signal(len(wvclr
), name
="wvaddr_en_"+name
)
721 comb
+= wvaddr_en
.eq(addr_en
)
724 comb
+= wvaddr_en
.eq(1<<addr_en
)
726 # XXX ASSUME that LDSTFunctionUnit always sets the data it intends to
727 # this may NOT be the case when an exception occurs
728 if isinstance(fu
, LDSTFunctionUnit
):
729 return wvaddr_en
, wviaddr_en
731 # okaaay, this is preparation for the awkward case.
732 # * latch a copy of wrflag when issue goes high.
733 # * when the fu_wrok (data.ok) flag is NOT set,
734 # but the FU is done, the FU is NEVER going to write
735 # so the bitvector has to be cleared.
736 latch_wrflag
= Signal(name
="latch_wrflag_"+name
)
737 with m
.If(~fu
.busy_o
):
738 sync
+= latch_wrflag
.eq(0)
739 with m
.If(fu
.issue_i
& fu_active
):
740 sync
+= latch_wrflag
.eq(wrflag
)
741 with m
.If(fu
.alu_done_o
& latch_wrflag
& ~fu_wrok
):
743 comb
+= wvaddr_en
.eq(write
) # addr_en gated with wp, don't use
745 comb
+= wvaddr_en
.eq(1<<addr_en
) # binary addr_en not gated
747 return wvaddr_en
, wviaddr_en
749 def connect_wrport(self
, m
, fu_bitdict
, fu_selected
,
750 wrpickers
, regfile
, regname
, fspec
):
751 comb
, sync
= m
.d
.comb
, m
.d
.sync
757 # select the required write port. these are pre-defined sizes
758 rfile
= regs
.rf
[regfile
.lower()]
759 wport
= rfile
.w_ports
[rpidx
]
761 print("connect wr", regname
, "unary", rfile
.unary
, fspec
)
762 print(regfile
, regs
.rf
.keys())
764 # select the write-protection hazard vector. note that this still
765 # requires to WRITE to the hazard bitvector! read-requests need
766 # to RAISE the bitvector (set it to 1), which, duh, requires a WRITE
767 if self
.make_hazard_vecs
:
768 wv
= regs
.wv
[regfile
.lower()]
769 wvset
= wv
.s
# write-vec bit-level hazard ctrl
770 wvclr
= wv
.r
# write-vec bit-level hazard ctrl
771 wvchk
= wv
.q
# write-after-write hazard check
772 wvchk_qint
= wv
.q
# write-after-write hazard check, NOT delayed
775 if not isinstance(fspecs
, list):
783 for i
, fspec
in enumerate(fspecs
):
784 # get the regfile specs for this regfile port
785 (rf
, wf
, _read
, _write
, wid
, fuspecs
) = \
786 (fspec
.rdport
, fspec
.wrport
, fspec
.read
, fspec
.write
,
787 fspec
.wid
, fspec
.specs
)
788 print ("fpsec", i
, "wrflag", wf
, fspec
, len(fuspecs
))
789 ppoffs
.append(pplen
) # record offset for picker
790 pplen
+= len(fuspecs
)
792 name
= "%s_%s_%d" % (regfile
, regname
, i
)
793 rdflag
= Signal(name
="rd_flag_"+name
)
794 wrflag
= Signal(name
="wr_flag_"+name
)
796 comb
+= rdflag
.eq(rf
)
800 comb
+= wrflag
.eq(wf
)
803 rdflags
.append(rdflag
)
804 wrflags
.append(wrflag
)
806 # create a priority picker to manage this port
807 wrpickers
[regfile
][rpidx
] = wrpick
= PriorityPicker(pplen
)
808 m
.submodules
["wrpick_%s_%s" % (regfile
, rpidx
)] = wrpick
815 #wvens = [] - not needed: reading of writevec is permanently held hi
817 for i
, fspec
in enumerate(fspecs
):
818 # connect up the FU req/go signals and the reg-read to the FU
819 # these are arbitrated by Data.ok signals
820 (rf
, wf
, _read
, _write
, wid
, fuspecs
) = \
821 (fspec
.rdport
, fspec
.wrport
, fspec
.read
, fspec
.write
,
822 fspec
.wid
, fspec
.specs
)
823 for pi
, fuspec
in enumerate(fspec
.specs
):
824 (funame
, fu
, idx
) = (fuspec
.funame
, fuspec
.fu
, fuspec
.idx
)
825 fu_requested
= fu_bitdict
[funame
]
827 name
= "%s_%s_%s_%d" % (funame
, regfile
, regname
, idx
)
828 # get (or set up) a write-latched copy of write register number
829 write
= Signal
.like(_write
, name
="write_"+name
)
830 rname
= "%s_%s_%s_%d" % (funame
, regfile
, regname
, idx
)
831 if rname
not in fu
.wr_latches
:
832 wrl
= Signal
.like(_write
, name
="wrlatch_"+rname
)
833 fu
.wr_latches
[rname
] = write
834 # do not depend on fu.issue_i here, it creates a
835 # combinatorial loop on waw checking. using the FU
836 # "enable" bitdict entry for this FU is sufficient,
837 # because the PowerDecoder2 read/write nums are
838 # valid continuously when the instruction is valid
839 with m
.If(fu_requested
):
840 sync
+= wrl
.eq(_write
)
841 comb
+= write
.eq(_write
)
843 comb
+= write
.eq(wrl
)
845 write
= fu
.wr_latches
[rname
]
847 # write-request comes from dest.ok
848 dest
= fu
.get_out(idx
)
849 fu_dest_latch
= fu
.get_fu_out(idx
) # latched output
850 name
= "%s_%s_%d" % (funame
, regname
, idx
)
851 fu_wrok
= Signal(name
="fu_wrok_"+name
, reset_less
=True)
852 comb
+= fu_wrok
.eq(dest
.ok
& fu
.busy_o
)
854 # connect request-write to picker input, and output to go-wr
855 fu_active
= fu_selected
[funame
]
856 pick
= fu
.wr
.rel_o
[idx
] & fu_active
857 comb
+= wrpick
.i
[pi
].eq(pick
)
858 # create a single-pulse go write from the picker output
859 wr_pick
= Signal(name
="wpick_%s_%s_%d" % (funame
, regname
, idx
))
860 comb
+= wr_pick
.eq(wrpick
.o
[pi
] & wrpick
.en_o
)
861 comb
+= fu
.go_wr_i
[idx
].eq(rising_edge(m
, wr_pick
))
863 # connect the regspec write "reg select" number to this port
864 # only if one FU actually requests (and is granted) the port
865 # will the write-enable be activated
866 wname
= "waddr_en_%s_%s_%d" % (funame
, regname
, idx
)
867 addr_en
= Signal
.like(write
, name
=wname
)
869 comb
+= wp
.eq(wr_pick
& wrpick
.en_o
)
870 comb
+= addr_en
.eq(Mux(wp
, write
, 0))
874 addrs
.append(addr_en
)
877 # connect regfile port to input
878 print("reg connect widths",
879 regfile
, regname
, pi
, funame
,
880 dest
.shape(), wport
.i_data
.shape())
881 wsigs
.append(fu_dest_latch
)
883 # now connect up the bitvector write hazard
884 if not self
.make_hazard_vecs
:
886 res
= self
.make_hazards(m
, regfile
, rfile
, wvclr
, wvset
,
887 funame
, regname
, idx
,
888 addr_en
, wp
, fu
, fu_active
,
889 wrflags
[i
], write
, fu_wrok
)
890 wvaddr_en
, wv_issue_en
= res
891 wvclren
.append(wvaddr_en
) # set only: no data => clear bit
892 wvseten
.append(wv_issue_en
) # set data same as enable
894 # read the write-hazard bitvector (wv) for any bit that is
895 fu_requested
= fu_bitdict
[funame
]
896 wvchk_en
= Signal(len(wvchk
), name
="waw_chk_addr_en_"+name
)
897 issue_active
= Signal(name
="waw_iactive_"+name
)
898 whazard
= Signal(name
="whaz_"+name
)
900 # XXX EEK! STATE regfile (branch) does not have an
901 # write-active indicator in regspec_decode_write()
902 print ("XXX FIXME waw_iactive", issue_active
,
905 # check bits from the incoming instruction. note (back
906 # in connect_instruction) that the decoder is held for
907 # us to be able to do this, here... *without* issue being
908 # held HI. we MUST NOT gate this with fu.issue_i or
909 # with fu_bitdict "enable": it would create a loop
910 comb
+= issue_active
.eq(wf
)
911 with m
.If(issue_active
):
913 comb
+= wvchk_en
.eq(write
)
915 comb
+= wvchk_en
.eq(1<<write
)
916 # if FU is busy (which doesn't get set at the same time as
917 # issue) and no hazard was detected, clear wvchk_en (i.e.
918 # stop checking for hazards). there is a loop here, but it's
919 # via a DFF, so is ok. some linters may complain, but hey.
920 with m
.If(fu
.busy_o
& ~whazard
):
921 comb
+= wvchk_en
.eq(0)
923 # write-hazard is ANDed with (filtered by) what is actually
924 # being requested. the wvchk data is on a one-clock delay,
925 # and wvchk_en comes directly from the main decoder
926 comb
+= whazard
.eq((wvchk_qint
& wvchk_en
).bool())
928 comb
+= fu
._waw
_hazard
.eq(1)
930 #wvens.append(wvchk_en)
932 # here is where we create the Write Broadcast Bus. simple, eh?
933 comb
+= wport
.i_data
.eq(ortreereduce_sig(wsigs
))
935 # for unary-addressed
936 comb
+= wport
.wen
.eq(ortreereduce_sig(wens
))
938 # for binary-addressed
939 comb
+= wport
.addr
.eq(ortreereduce_sig(addrs
))
940 comb
+= wport
.wen
.eq(ortreereduce_sig(wens
))
942 if not self
.make_hazard_vecs
:
945 # return these here rather than set wvclr/wvset directly,
946 # because there may be more than one write-port to a given
947 # regfile. example: XER has a write-port for SO, CA, and OV
948 # and the *last one added* of those would overwrite the other
949 # two. solution: have connect_wrports collate all the
950 # or-tree-reduced bitvector set/clear requests and drop them
951 # in as a single "thing". this can only be done because the
952 # set/get is an unary bitvector.
953 print ("make write-vecs", regfile
, regname
, wvset
, wvclr
)
954 return (ortreereduce_sig(wvclren
), # clear (regfile write)
955 ortreereduce_sig(wvseten
)) # set (issue time)
957 def connect_wrports(self
, m
, fu_bitdict
, fu_selected
):
958 """connect write ports
960 orders the write regspecs into a dict-of-dicts, by regfile,
961 by regport name, then connects all FUs that want that regport
962 by way of a PriorityPicker.
964 note that the write-port wen, write-port data, and go_wr_i all need to
965 be on the exact same clock cycle. as there is a combinatorial loop bug
966 at the moment, these all use sync.
968 comb
, sync
= m
.d
.comb
, m
.d
.sync
971 # dictionary of lists of regfile write ports
972 byregfiles_wr
, byregfiles_wrspec
= self
.get_byregfiles(False)
974 # same for write ports.
975 # BLECH! complex code-duplication! BLECH!
977 wvclrers
= defaultdict(list)
978 wvseters
= defaultdict(list)
979 for regfile
, spec
in byregfiles_wr
.items():
980 fuspecs
= byregfiles_wrspec
[regfile
]
981 wrpickers
[regfile
] = {}
983 if self
.regreduce_en
:
984 # argh, more port-merging
986 fuspecs
['o'] = [fuspecs
.pop('o')]
987 fuspecs
['o'].append(fuspecs
.pop('o1'))
988 if regfile
== 'FAST':
989 fuspecs
['fast1'] = [fuspecs
.pop('fast1')]
990 if 'fast2' in fuspecs
:
991 fuspecs
['fast1'].append(fuspecs
.pop('fast2'))
992 if 'fast3' in fuspecs
:
993 fuspecs
['fast1'].append(fuspecs
.pop('fast3'))
995 # collate these and record them by regfile because there
996 # are sometimes more write-ports per regfile
997 for (regname
, fspec
) in sort_fuspecs(fuspecs
):
998 wvclren
, wvseten
= self
.connect_wrport(m
,
999 fu_bitdict
, fu_selected
,
1001 regfile
, regname
, fspec
)
1002 wvclrers
[regfile
.lower()].append(wvclren
)
1003 wvseters
[regfile
.lower()].append(wvseten
)
1005 if not self
.make_hazard_vecs
:
1008 # for write-vectors: reduce the clr-ers and set-ers down to
1009 # a single set of bits. otherwise if there are two write
1010 # ports (on some regfiles), the last one doing comb += on
1011 # the reg.wv[regfile] instance "wins" (and all others are ignored,
1012 # whoops). if there was only one write-port per wv regfile this would
1014 for regfile
in wvclrers
.keys():
1015 wv
= regs
.wv
[regfile
]
1016 wvset
= wv
.s
# write-vec bit-level hazard ctrl
1017 wvclr
= wv
.r
# write-vec bit-level hazard ctrl
1018 wvclren
= wvclrers
[regfile
]
1019 wvseten
= wvseters
[regfile
]
1020 comb
+= wvclr
.eq(ortreereduce_sig(wvclren
)) # clear (regfile write)
1021 comb
+= wvset
.eq(ortreereduce_sig(wvseten
)) # set (issue time)
1023 def get_byregfiles(self
, readmode
):
1025 mode
= "read" if readmode
else "write"
1028 e
= self
.ireg
.e
# decoded instruction to execute
1030 # dictionary of dictionaries of lists/tuples of regfile ports.
1031 # first key: regfile. second key: regfile port name
1032 byregfiles
= defaultdict(lambda: defaultdict(list))
1033 byregfiles_spec
= defaultdict(dict)
1035 for (funame
, fu
) in fus
.items():
1036 # create in each FU a receptacle for the read/write register
1037 # hazard numbers. to be latched in connect_rd/write_ports
1038 # XXX better that this is moved into the actual FUs, but
1039 # the issue there is that this function is actually better
1040 # suited at the moment
1042 fu
.rd_latches
= {} # read reg number latches
1043 fu
.rf_latches
= {} # read flag latches
1047 print("%s ports for %s" % (mode
, funame
))
1048 for idx
in range(fu
.n_src
if readmode
else fu
.n_dst
):
1049 # construct regfile specs: read uses inspec, write outspec
1051 (regfile
, regname
, wid
) = fu
.get_in_spec(idx
)
1053 (regfile
, regname
, wid
) = fu
.get_out_spec(idx
)
1054 print(" %d %s %s %s" % (idx
, regfile
, regname
, str(wid
)))
1056 # the PowerDecoder2 (main one, not the satellites) contains
1057 # the decoded regfile numbers. obtain these now
1059 rdport
, read
= regspec_decode_read(e
, regfile
, regname
)
1060 wrport
, write
= None, None
1062 rdport
, read
= None, None
1063 wrport
, write
= regspec_decode_write(e
, regfile
, regname
)
1065 # construct the dictionary of regspec information by regfile
1066 if regname
not in byregfiles_spec
[regfile
]:
1067 byregfiles_spec
[regfile
][regname
] = \
1068 ByRegSpec(rdport
, wrport
, read
, write
, wid
, [])
1069 # here we start to create "lanes"
1070 fuspec
= FUSpec(funame
, fu
, idx
)
1071 byregfiles
[regfile
][idx
].append(fuspec
)
1072 byregfiles_spec
[regfile
][regname
].specs
.append(fuspec
)
1075 # append a latch Signal to the FU's list of latches
1076 rname
= "%s_%s" % (regfile
, regname
)
1078 if rname
not in fu
.rd_latches
:
1079 rdl
= Signal
.like(read
, name
="rdlatch_"+rname
)
1080 fu
.rd_latches
[rname
] = rdl
1082 if rname
not in fu
.wr_latches
:
1083 wrl
= Signal
.like(write
, name
="wrlatch_"+rname
)
1084 fu
.wr_latches
[rname
] = wrl
1086 # ok just print that all out, for convenience
1087 for regfile
, spec
in byregfiles
.items():
1088 print("regfile %s ports:" % mode
, regfile
)
1089 fuspecs
= byregfiles_spec
[regfile
]
1090 for regname
, fspec
in fuspecs
.items():
1091 [rdport
, wrport
, read
, write
, wid
, fuspecs
] = fspec
1092 print(" rf %s port %s lane: %s" % (mode
, regfile
, regname
))
1093 print(" %s" % regname
, wid
, read
, write
, rdport
, wrport
)
1094 for (funame
, fu
, idx
) in fuspecs
:
1095 fusig
= fu
.src_i
[idx
] if readmode
else fu
.dest
[idx
]
1096 print(" ", funame
, fu
.__class
__.__name
__, idx
, fusig
)
1099 return byregfiles
, byregfiles_spec
1102 yield from self
.fus
.ports()
1103 yield from self
.i
.e
.ports()
1104 yield from self
.l0
.ports()
1111 if __name__
== '__main__':
1112 pspec
= TestMemPspec(ldst_ifacetype
='testpi',
1118 dut
= NonProductionCore(pspec
)
1119 vl
= rtlil
.convert(dut
, ports
=dut
.ports())
1120 with
open("test_core.il", "w") as f
: