4b6611fce358a42886b5e62183bc2481592771dc
3 not in any way intended for production use. connects up FunctionUnits to
4 Register Files in a brain-dead fashion that only permits one and only one
5 Function Unit to be operational.
7 the principle here is to take the Function Units, analyse their regspecs,
8 and turn their requirements for access to register file read/write ports
9 into groupings by Register File and Register File Port name.
11 under each grouping - by regfile/port - a list of Function Units that
12 need to connect to that port is created. as these are a contended
13 resource a "Broadcast Bus" per read/write port is then also created,
14 with access to it managed by a PriorityPicker.
16 the brain-dead part of this module is that even though there is no
17 conflict of access, regfile read/write hazards are *not* analysed,
18 and consequently it is safer to wait for the Function Unit to complete
19 before allowing a new instruction to proceed.
20 (update: actually this is being added now:
21 https://bugs.libre-soc.org/show_bug.cgi?id=737)
24 from nmigen
import (Elaboratable
, Module
, Signal
, ResetSignal
, Cat
, Mux
,
26 from nmigen
.cli
import rtlil
28 from openpower
.decoder
.power_decoder2
import PowerDecodeSubset
29 from openpower
.decoder
.power_regspec_map
import regspec_decode
30 from openpower
.sv
.svp64
import SVP64Rec
32 from nmutil
.picker
import PriorityPicker
33 from nmutil
.util
import treereduce
34 from nmutil
.singlepipe
import ControlBase
36 from soc
.fu
.compunits
.compunits
import AllFunctionUnits
, LDSTFunctionUnit
37 from soc
.regfile
.regfiles
import RegFiles
38 from openpower
.decoder
.power_decoder2
import get_rdflags
39 from soc
.experiment
.l0_cache
import TstL0CacheBuffer
# test only
40 from soc
.config
.test
.test_loadstore
import TestMemPspec
41 from openpower
.decoder
.power_enums
import MicrOp
, Function
42 from soc
.simple
.core_data
import CoreInput
, CoreOutput
44 from collections
import defaultdict
, namedtuple
47 from nmutil
.util
import rising_edge
49 FUSpec
= namedtuple("FUSpec", ["funame", "fu", "idx"])
50 ByRegSpec
= namedtuple("ByRegSpec", ["okflag", "regport", "wid", "specs"])
52 # helper function for reducing a list of signals down to a parallel
54 def ortreereduce(tree
, attr
="o_data"):
55 return treereduce(tree
, operator
.or_
, lambda x
: getattr(x
, attr
))
58 def ortreereduce_sig(tree
):
59 return treereduce(tree
, operator
.or_
, lambda x
: x
)
62 # helper function to place full regs declarations first
63 def sort_fuspecs(fuspecs
):
65 for (regname
, fspec
) in fuspecs
.items():
66 if regname
.startswith("full"):
67 res
.append((regname
, fspec
))
68 for (regname
, fspec
) in fuspecs
.items():
69 if not regname
.startswith("full"):
70 res
.append((regname
, fspec
))
71 return res
# enumerate(res)
74 # derive from ControlBase rather than have a separate Stage instance,
75 # this is simpler to do
76 class NonProductionCore(ControlBase
):
77 def __init__(self
, pspec
):
80 # test is SVP64 is to be enabled
81 self
.svp64_en
= hasattr(pspec
, "svp64") and (pspec
.svp64
== True)
83 # test to see if regfile ports should be reduced
84 self
.regreduce_en
= (hasattr(pspec
, "regreduce") and
85 (pspec
.regreduce
== True))
87 # test to see if overlapping of instructions is allowed
88 # (not normally enabled for TestIssuer FSM but useful for checking
89 # the bitvector hazard detection, before doing In-Order)
90 self
.allow_overlap
= (hasattr(pspec
, "allow_overlap") and
91 (pspec
.allow_overlap
== True))
94 self
.make_hazard_vecs
= self
.allow_overlap
95 self
.core_type
= "fsm"
96 if hasattr(pspec
, "core_type"):
97 self
.core_type
= pspec
.core_type
99 super().__init
__(stage
=self
)
101 # single LD/ST funnel for memory access
102 self
.l0
= l0
= TstL0CacheBuffer(pspec
, n_units
=1)
105 # function units (only one each)
106 # only include mmu if enabled in pspec
107 self
.fus
= AllFunctionUnits(pspec
, pilist
=[pi
])
109 # link LoadStore1 into MMU
110 mmu
= self
.fus
.get_fu('mmu0')
111 print ("core pspec", pspec
.ldst_ifacetype
)
112 print ("core mmu", mmu
)
114 print ("core lsmem.lsi", l0
.cmpi
.lsmem
.lsi
)
115 mmu
.alu
.set_ldst_interface(l0
.cmpi
.lsmem
.lsi
)
117 # register files (yes plural)
118 self
.regs
= RegFiles(pspec
, make_hazard_vecs
=self
.make_hazard_vecs
)
120 # set up input and output: unusual requirement to set data directly
121 # (due to the way that the core is set up in a different domain,
122 # see TestIssuer.setup_peripherals
123 self
.p
.i_data
, self
.n
.o_data
= self
.new_specs(None)
124 self
.i
, self
.o
= self
.p
.i_data
, self
.n
.o_data
126 # actual internal input data used (captured)
127 self
.ireg
= self
.ispec()
129 # create per-FU instruction decoders (subsetted). these "satellite"
130 # decoders reduce wire fan-out from the one (main) PowerDecoder2
131 # (used directly by the trap unit) to the *twelve* (or more)
132 # Function Units. we can either have 32 wires (the instruction)
133 # to each, or we can have well over a 200 wire fan-out (to 12
134 # ALUs). it's an easy choice to make.
138 # eep, these should be *per FU* i.e. for FunctionUnitBaseMulti
139 # they should be shared (put into the ALU *once*).
141 for funame
, fu
in self
.fus
.fus
.items():
142 f_name
= fu
.fnunit
.name
143 fnunit
= fu
.fnunit
.value
144 opkls
= fu
.opsubsetkls
146 # TRAP decoder is the *main* decoder
147 self
.trapunit
= funame
149 assert funame
not in self
.decoders
150 self
.decoders
[funame
] = PowerDecodeSubset(None, opkls
, f_name
,
152 state
=self
.ireg
.state
,
153 svp64_en
=self
.svp64_en
,
154 regreduce_en
=self
.regreduce_en
)
155 self
.des
[funame
] = self
.decoders
[funame
].do
157 # create per-Function Unit write-after-write hazard signals
158 # yes, really, this should have been added in ReservationStations
160 for funame
, fu
in self
.fus
.fus
.items():
161 fu
._waw
_hazard
= Signal(name
="waw_%s" % funame
)
163 # share the SPR decoder with the MMU if it exists
164 if "mmu0" in self
.decoders
:
165 self
.decoders
["mmu0"].mmu0_spr_dec
= self
.decoders
["spr0"]
167 # next 3 functions are Stage API Compliance
168 def setup(self
, m
, i
):
172 return CoreInput(self
.pspec
, self
.svp64_en
, self
.regreduce_en
)
177 # elaborate function to create HDL
178 def elaborate(self
, platform
):
179 m
= super().elaborate(platform
)
181 # for testing purposes, to cut down on build time in coriolis2
182 if hasattr(self
.pspec
, "nocore") and self
.pspec
.nocore
== True:
183 x
= Signal() # dummy signal
188 m
.submodules
.fus
= self
.fus
189 m
.submodules
.l0
= l0
= self
.l0
190 self
.regs
.elaborate_into(m
, platform
)
194 # amalgamate write-hazards into a single top-level Signal
195 self
.waw_hazard
= Signal()
197 for funame
, fu
in self
.fus
.fus
.items():
198 whaz
.append(fu
._waw
_hazard
)
199 comb
+= self
.waw_hazard
.eq(Cat(*whaz
).bool())
202 self
.connect_satellite_decoders(m
)
204 # ssh, cheat: trap uses the main decoder because of the rewriting
205 self
.des
[self
.trapunit
] = self
.ireg
.e
.do
207 # connect up Function Units, then read/write ports, and hazard conflict
208 self
.issue_conflict
= Signal()
209 fu_bitdict
, fu_selected
= self
.connect_instruction(m
)
210 raw_hazard
= self
.connect_rdports(m
, fu_bitdict
, fu_selected
)
211 self
.connect_wrports(m
, fu_bitdict
, fu_selected
)
212 if self
.allow_overlap
:
213 comb
+= self
.issue_conflict
.eq(raw_hazard
)
215 # note if an exception happened. in a pipelined or OoO design
216 # this needs to be accompanied by "shadowing" (or stalling)
218 for exc
in self
.fus
.excs
.values():
219 el
.append(exc
.happened
)
220 if len(el
) > 0: # at least one exception
221 comb
+= self
.o
.exc_happened
.eq(Cat(*el
).bool())
225 def connect_satellite_decoders(self
, m
):
227 for k
, v
in self
.decoders
.items():
228 # connect each satellite decoder and give it the instruction.
229 # as subset decoders this massively reduces wire fanout given
230 # the large number of ALUs
231 m
.submodules
["dec_%s" % k
] = v
232 comb
+= v
.dec
.raw_opcode_in
.eq(self
.ireg
.raw_insn_i
)
233 comb
+= v
.dec
.bigendian
.eq(self
.ireg
.bigendian_i
)
234 # sigh due to SVP64 RA_OR_ZERO detection connect these too
235 comb
+= v
.sv_a_nz
.eq(self
.ireg
.sv_a_nz
)
237 comb
+= v
.pred_sm
.eq(self
.ireg
.sv_pred_sm
)
238 comb
+= v
.pred_dm
.eq(self
.ireg
.sv_pred_dm
)
239 if k
!= self
.trapunit
:
240 comb
+= v
.sv_rm
.eq(self
.ireg
.sv_rm
) # pass through SVP64 RM
241 comb
+= v
.is_svp64_mode
.eq(self
.ireg
.is_svp64_mode
)
242 # only the LDST PowerDecodeSubset *actually* needs to
243 # know to use the alternative decoder. this is all
245 if k
.lower().startswith("ldst"):
246 comb
+= v
.use_svp64_ldst_dec
.eq(
247 self
.ireg
.use_svp64_ldst_dec
)
249 def connect_instruction(self
, m
):
250 """connect_instruction
252 uses decoded (from PowerOp) function unit information from CSV files
253 to ascertain which Function Unit should deal with the current
256 some (such as OP_ATTN, OP_NOP) are dealt with here, including
257 ignoring it and halting the processor. OP_NOP is a bit annoying
258 because the issuer expects busy flag still to be raised then lowered.
259 (this requires a fake counter to be set).
261 comb
, sync
= m
.d
.comb
, m
.d
.sync
264 # indicate if core is busy
265 busy_o
= self
.o
.busy_o
266 any_busy_o
= self
.o
.any_busy_o
268 # connect up temporary copy of incoming instruction. the FSM will
269 # either blat the incoming instruction (if valid) into self.ireg
270 # or if the instruction could not be delivered, keep dropping the
271 # latched copy into ireg
272 ilatch
= self
.ispec()
273 self
.instr_active
= Signal()
275 # enable/busy-signals for each FU, get one bit for each FU (by name)
276 fu_enable
= Signal(len(fus
), reset_less
=True)
277 fu_busy
= Signal(len(fus
), reset_less
=True)
280 for i
, funame
in enumerate(fus
.keys()):
281 fu_bitdict
[funame
] = fu_enable
[i
]
282 fu_selected
[funame
] = fu_busy
[i
]
284 # identify function units and create a list by fnunit so that
285 # PriorityPickers can be created for selecting one of them that
286 # isn't busy at the time the incoming instruction needs passing on
287 by_fnunit
= defaultdict(list)
288 for fname
, member
in Function
.__members
__.items():
289 for funame
, fu
in fus
.items():
290 fnunit
= fu
.fnunit
.value
291 if member
.value
& fnunit
: # this FU handles this type of op
292 by_fnunit
[fname
].append((funame
, fu
)) # add by Function
294 # ok now just print out the list of FUs by Function, because we can
295 for fname
, fu_list
in by_fnunit
.items():
296 print ("FUs by type", fname
, fu_list
)
298 # now create a PriorityPicker per FU-type such that only one
299 # non-busy FU will be picked
301 fu_found
= Signal() # take a note if no Function Unit was available
302 for fname
, fu_list
in by_fnunit
.items():
303 i_pp
= PriorityPicker(len(fu_list
))
304 m
.submodules
['i_pp_%s' % fname
] = i_pp
306 for i
, (funame
, fu
) in enumerate(fu_list
):
307 # match the decoded instruction (e.do.fn_unit) against the
308 # "capability" of this FU, gate that by whether that FU is
309 # busy, and drop that into the PriorityPicker.
310 # this will give us an output of the first available *non-busy*
311 # Function Unit (Reservation Statio) capable of handling this
313 fnunit
= fu
.fnunit
.value
314 en_req
= Signal(name
="issue_en_%s" % funame
, reset_less
=True)
315 fnmatch
= (self
.ireg
.e
.do
.fn_unit
& fnunit
).bool()
316 comb
+= en_req
.eq(fnmatch
& ~fu
.busy_o
&
318 i_l
.append(en_req
) # store in list for doing the Cat-trick
319 # picker output, gated by enable: store in fu_bitdict
320 po
= Signal(name
="o_issue_pick_"+funame
) # picker output
321 comb
+= po
.eq(i_pp
.o
[i
] & i_pp
.en_o
)
322 comb
+= fu_bitdict
[funame
].eq(po
)
323 comb
+= fu_selected
[funame
].eq(fu
.busy_o | po
)
324 # if we don't do this, then when there are no FUs available,
325 # the "p.o_ready" signal will go back "ok we accepted this
326 # instruction" which of course isn't true.
327 with m
.If(i_pp
.en_o
):
328 comb
+= fu_found
.eq(1)
329 # for each input, Cat them together and drop them into the picker
330 comb
+= i_pp
.i
.eq(Cat(*i_l
))
332 # rdmask, which is for registers needs to come from the *main* decoder
333 for funame
, fu
in fus
.items():
334 rdmask
= get_rdflags(self
.ireg
.e
, fu
)
335 comb
+= fu
.rdmaskn
.eq(~rdmask
)
337 # sigh - need a NOP counter
339 with m
.If(counter
!= 0):
340 sync
+= counter
.eq(counter
- 1)
343 # default to reading from incoming instruction: may be overridden
344 # by copy from latch when "waiting"
345 comb
+= self
.ireg
.eq(self
.i
)
346 # always say "ready" except if overridden
347 comb
+= self
.p
.o_ready
.eq(1)
350 with m
.State("READY"):
351 with m
.If(self
.p
.i_valid
): # run only when valid
352 with m
.Switch(self
.ireg
.e
.do
.insn_type
):
353 # check for ATTN: halt if true
354 with m
.Case(MicrOp
.OP_ATTN
):
355 m
.d
.sync
+= self
.o
.core_terminate_o
.eq(1)
357 # fake NOP - this isn't really used (Issuer detects NOP)
358 with m
.Case(MicrOp
.OP_NOP
):
359 sync
+= counter
.eq(2)
363 comb
+= self
.instr_active
.eq(1)
364 comb
+= self
.p
.o_ready
.eq(0)
365 # connect instructions. only one enabled at a time
366 for funame
, fu
in fus
.items():
367 do
= self
.des
[funame
]
368 enable
= fu_bitdict
[funame
]
370 # run this FunctionUnit if enabled route op,
371 # issue, busy, read flags and mask to FU
373 # operand comes from the *local* decoder
374 # do not actually issue, though, if there
375 # is a waw hazard. decoder has to still
376 # be asserted in order to detect that, tho
377 comb
+= fu
.oper_i
.eq_from(do
)
378 # issue when valid (and no write-hazard)
379 comb
+= fu
.issue_i
.eq(~self
.waw_hazard
)
380 # instruction ok, indicate ready
381 comb
+= self
.p
.o_ready
.eq(1)
383 if self
.allow_overlap
:
384 with m
.If(~fu_found | self
.waw_hazard
):
385 # latch copy of instruction
386 sync
+= ilatch
.eq(self
.i
)
387 comb
+= self
.p
.o_ready
.eq(1) # accept
391 with m
.State("WAITING"):
392 comb
+= self
.instr_active
.eq(1)
393 comb
+= self
.p
.o_ready
.eq(0)
395 # using copy of instruction, keep waiting until an FU is free
396 comb
+= self
.ireg
.eq(ilatch
)
397 with m
.If(fu_found
): # wait for conflict to clear
398 # connect instructions. only one enabled at a time
399 for funame
, fu
in fus
.items():
400 do
= self
.des
[funame
]
401 enable
= fu_bitdict
[funame
]
403 # run this FunctionUnit if enabled route op,
404 # issue, busy, read flags and mask to FU
406 # operand comes from the *local* decoder,
407 # which is asserted even if not issued,
408 # so that WaW-detection can check for hazards.
409 # only if the waw hazard is clear does the
410 # instruction actually get issued
411 comb
+= fu
.oper_i
.eq_from(do
)
413 comb
+= fu
.issue_i
.eq(~self
.waw_hazard
)
414 with m
.If(~self
.waw_hazard
):
415 comb
+= self
.p
.o_ready
.eq(1)
419 print ("core: overlap allowed", self
.allow_overlap
)
420 # true when any FU is busy (including the cycle where it is perhaps
421 # to be issued - because that's what fu_busy is)
422 comb
+= any_busy_o
.eq(fu_busy
.bool())
423 if not self
.allow_overlap
:
424 # for simple non-overlap, if any instruction is busy, set
425 # busy output for core.
426 comb
+= busy_o
.eq(any_busy_o
)
428 # sigh deal with a fun situation that needs to be investigated
430 with m
.If(self
.issue_conflict
):
432 # make sure that LDST, SPR, MMU, Branch and Trap all say "busy"
433 # and do not allow overlap. these are all the ones that
434 # are non-forward-progressing: exceptions etc. that otherwise
435 # change CoreState for some reason (MSR, PC, SVSTATE)
436 for funame
, fu
in fus
.items():
437 if (funame
.lower().startswith('ldst') or
438 funame
.lower().startswith('branch') or
439 funame
.lower().startswith('mmu') or
440 funame
.lower().startswith('spr') or
441 funame
.lower().startswith('trap')):
442 with m
.If(fu
.busy_o
):
445 # return both the function unit "enable" dict as well as the "busy".
446 # the "busy-or-issued" can be passed in to the Read/Write port
447 # connecters to give them permission to request access to regfiles
448 return fu_bitdict
, fu_selected
450 def connect_rdport(self
, m
, fu_bitdict
, fu_selected
,
451 rdpickers
, regfile
, regname
, fspec
):
452 comb
, sync
= m
.d
.comb
, m
.d
.sync
458 # select the required read port. these are pre-defined sizes
459 rfile
= regs
.rf
[regfile
.lower()]
460 rport
= rfile
.r_ports
[rpidx
]
461 print("read regfile", rpidx
, regfile
, regs
.rf
.keys(),
464 # for checking if the read port has an outstanding write
465 if self
.make_hazard_vecs
:
466 wv
= regs
.wv
[regfile
.lower()]
467 wvchk
= wv
.q_int
# write-vec bit-level hazard check
469 # if a hazard is detected on this read port, simply blithely block
470 # every FU from reading on it. this is complete overkill but very
472 hazard_detected
= Signal(name
="raw_%s_%s" % (regfile
, rpidx
))
475 if not isinstance(fspecs
, list):
481 for i
, fspec
in enumerate(fspecs
):
482 # get the regfile specs for this regfile port
483 print ("fpsec", i
, fspec
, len(fspec
.specs
))
484 name
= "%s_%s_%d" % (regfile
, regname
, i
)
485 ppoffs
.append(pplen
) # record offset for picker
486 pplen
+= len(fspec
.specs
)
487 rdflag
= Signal(name
="rdflag_"+name
, reset_less
=True)
488 comb
+= rdflag
.eq(fspec
.okflag
)
489 rdflags
.append(rdflag
)
491 print ("pplen", pplen
)
493 # create a priority picker to manage this port
494 rdpickers
[regfile
][rpidx
] = rdpick
= PriorityPicker(pplen
)
495 m
.submodules
["rdpick_%s_%s" % (regfile
, rpidx
)] = rdpick
501 for i
, fspec
in enumerate(fspecs
):
502 (rf
, _read
, wid
, fuspecs
) = \
503 (fspec
.okflag
, fspec
.regport
, fspec
.wid
, fspec
.specs
)
504 # connect up the FU req/go signals, and the reg-read to the FU
505 # and create a Read Broadcast Bus
506 for pi
, fuspec
in enumerate(fspec
.specs
):
507 (funame
, fu
, idx
) = (fuspec
.funame
, fuspec
.fu
, fuspec
.idx
)
509 name
= "%s_%s_%s_%i" % (regfile
, rpidx
, funame
, pi
)
510 fu_active
= fu_selected
[funame
]
511 fu_issued
= fu_bitdict
[funame
]
513 # get (or set up) a latched copy of read register number
514 # and (sigh) also the read-ok flag
515 # TODO: use nmutil latchregister
516 rhname
= "%s_%s_%d" % (regfile
, regname
, i
)
517 rdflag
= Signal(name
="rdflag_%s_%s" % (funame
, rhname
),
519 if rhname
not in fu
.rf_latches
:
520 rfl
= Signal(name
="rdflag_latch_"+rhname
)
521 fu
.rf_latches
[rhname
] = rfl
522 with m
.If(fu
.issue_i
):
523 sync
+= rfl
.eq(rdflags
[i
])
525 rfl
= fu
.rf_latches
[rhname
]
527 # now the register port
528 rname
= "%s_%s_%s_%d" % (funame
, regfile
, regname
, pi
)
529 read
= Signal
.like(_read
, name
="read_"+rname
)
530 if rname
not in fu
.rd_latches
:
531 rdl
= Signal
.like(_read
, name
="rdlatch_"+rname
)
532 fu
.rd_latches
[rname
] = rdl
533 with m
.If(fu
.issue_i
):
534 sync
+= rdl
.eq(_read
)
536 rdl
= fu
.rd_latches
[rname
]
538 # make the read immediately available on issue cycle
539 # after the read cycle, otherwies use the latched copy.
540 # this captures the regport and okflag on issue
541 with m
.If(fu
.issue_i
):
542 comb
+= read
.eq(_read
)
543 comb
+= rdflag
.eq(rdflags
[i
])
546 comb
+= rdflag
.eq(rfl
)
548 # connect request-read to picker input, and output to go-rd
549 addr_en
= Signal
.like(read
, name
="addr_en_"+name
)
550 pick
= Signal(name
="pick_"+name
) # picker input
551 rp
= Signal(name
="rp_"+name
) # picker output
552 delay_pick
= Signal(name
="dp_"+name
) # read-enable "underway"
553 rhazard
= Signal(name
="rhaz_"+name
)
555 # exclude any currently-enabled read-request (mask out active)
556 # entirely block anything hazarded from being picked
557 comb
+= pick
.eq(fu
.rd_rel_o
[idx
] & fu_active
& rdflag
&
558 ~delay_pick
& ~rhazard
)
559 comb
+= rdpick
.i
[pi
].eq(pick
)
560 comb
+= fu
.go_rd_i
[idx
].eq(delay_pick
) # pass in *delayed* pick
562 # if picked, select read-port "reg select" number to port
563 comb
+= rp
.eq(rdpick
.o
[pi
] & rdpick
.en_o
)
564 sync
+= delay_pick
.eq(rp
) # delayed "pick"
565 comb
+= addr_en
.eq(Mux(rp
, read
, 0))
567 # the read-enable happens combinatorially (see mux-bus below)
568 # but it results in the data coming out on a one-cycle delay.
572 addrs
.append(addr_en
)
575 # use the *delayed* pick signal to put requested data onto bus
576 with m
.If(delay_pick
):
577 # connect regfile port to input, creating fan-out Bus
579 print("reg connect widths",
580 regfile
, regname
, pi
, funame
,
581 src
.shape(), rport
.o_data
.shape())
582 # all FUs connect to same port
583 comb
+= src
.eq(rport
.o_data
)
585 if not self
.make_hazard_vecs
:
588 # read the write-hazard bitvector (wv) for any bit that is
589 wvchk_en
= Signal(len(wvchk
), name
="wv_chk_addr_en_"+name
)
590 issue_active
= Signal(name
="rd_iactive_"+name
)
591 # XXX combinatorial loop here
592 comb
+= issue_active
.eq(fu_active
& rdflag
)
593 with m
.If(issue_active
):
595 comb
+= wvchk_en
.eq(read
)
597 comb
+= wvchk_en
.eq(1<<read
)
598 # if FU is busy (which doesn't get set at the same time as
599 # issue) and no hazard was detected, clear wvchk_en (i.e.
600 # stop checking for hazards). there is a loop here, but it's
601 # via a DFF, so is ok. some linters may complain, but hey.
602 with m
.If(fu
.busy_o
& ~rhazard
):
603 comb
+= wvchk_en
.eq(0)
605 # read-hazard is ANDed with (filtered by) what is actually
607 comb
+= rhazard
.eq((wvchk
& wvchk_en
).bool())
609 wvens
.append(wvchk_en
)
611 # or-reduce the muxed read signals
613 # for unary-addressed
614 comb
+= rport
.ren
.eq(ortreereduce_sig(rens
))
616 # for binary-addressed
617 comb
+= rport
.addr
.eq(ortreereduce_sig(addrs
))
618 comb
+= rport
.ren
.eq(Cat(*rens
).bool())
619 print ("binary", regfile
, rpidx
, rport
, rport
.ren
, rens
, addrs
)
621 if not self
.make_hazard_vecs
:
622 return Const(0) # declare "no hazards"
624 # enable the read bitvectors for this issued instruction
625 # and return whether any write-hazard bit is set
626 wvchk_and
= Signal(len(wvchk
), name
="wv_chk_"+name
)
627 comb
+= wvchk_and
.eq(wvchk
& ortreereduce_sig(wvens
))
628 comb
+= hazard_detected
.eq(wvchk_and
.bool())
629 return hazard_detected
631 def connect_rdports(self
, m
, fu_bitdict
, fu_selected
):
632 """connect read ports
634 orders the read regspecs into a dict-of-dicts, by regfile, by
635 regport name, then connects all FUs that want that regport by
636 way of a PriorityPicker.
638 comb
, sync
= m
.d
.comb
, m
.d
.sync
643 # dictionary of lists of regfile read ports
644 byregfiles_rd
, byregfiles_rdspec
= self
.get_byregfiles(True)
646 # okaay, now we need a PriorityPicker per regfile per regfile port
647 # loootta pickers... peter piper picked a pack of pickled peppers...
649 for regfile
, spec
in byregfiles_rd
.items():
650 fuspecs
= byregfiles_rdspec
[regfile
]
651 rdpickers
[regfile
] = {}
653 # argh. an experiment to merge RA and RB in the INT regfile
654 # (we have too many read/write ports)
655 if self
.regreduce_en
:
657 fuspecs
['rabc'] = [fuspecs
.pop('rb')]
658 fuspecs
['rabc'].append(fuspecs
.pop('rc'))
659 fuspecs
['rabc'].append(fuspecs
.pop('ra'))
660 if regfile
== 'FAST':
661 fuspecs
['fast1'] = [fuspecs
.pop('fast1')]
662 if 'fast2' in fuspecs
:
663 fuspecs
['fast1'].append(fuspecs
.pop('fast2'))
664 if 'fast3' in fuspecs
:
665 fuspecs
['fast1'].append(fuspecs
.pop('fast3'))
667 # for each named regfile port, connect up all FUs to that port
668 # also return (and collate) hazard detection)
669 for (regname
, fspec
) in sort_fuspecs(fuspecs
):
670 print("connect rd", regname
, fspec
)
671 rh
= self
.connect_rdport(m
, fu_bitdict
, fu_selected
,
676 return Cat(*rd_hazard
).bool()
678 def make_hazards(self
, m
, regfile
, rfile
, wvclr
, wvset
,
679 funame
, regname
, idx
,
680 addr_en
, wp
, fu
, fu_active
, wrflag
, write
,
682 """make_hazards: a setter and a clearer for the regfile write ports
684 setter is at issue time (using PowerDecoder2 regfile write numbers)
685 clearer is at regfile write time (when FU has said what to write to)
687 there is *one* unusual case here which has to be dealt with:
688 when the Function Unit does *NOT* request a write to the regfile
689 (has its data.ok bit CLEARED). this is perfectly legitimate.
692 comb
, sync
= m
.d
.comb
, m
.d
.sync
693 name
= "%s_%s_%d" % (funame
, regname
, idx
)
695 # connect up the bitvector write hazard. unlike the
696 # regfile writeports, a ONE must be written to the corresponding
697 # bit of the hazard bitvector (to indicate the existence of
700 # the detection of what shall be written to is based
701 # on *issue*. it is delayed by 1 cycle so that instructions
702 # "addi 5,5,0x2" do not cause combinatorial loops due to
703 # fake-dependency on *themselves*. this will totally fail
704 # spectacularly when doing multi-issue
705 print ("write vector (for regread)", regfile
, wvset
)
706 wviaddr_en
= Signal(len(wvset
), name
="wv_issue_addr_en_"+name
)
707 issue_active
= Signal(name
="iactive_"+name
)
708 sync
+= issue_active
.eq(fu
.issue_i
& fu_active
& wrflag
)
709 with m
.If(issue_active
):
711 comb
+= wviaddr_en
.eq(write
)
713 comb
+= wviaddr_en
.eq(1<<write
)
715 # deal with write vector clear: this kicks in when the regfile
716 # is written to, and clears the corresponding bitvector entry
717 print ("write vector", regfile
, wvclr
)
718 wvaddr_en
= Signal(len(wvclr
), name
="wvaddr_en_"+name
)
720 comb
+= wvaddr_en
.eq(addr_en
)
723 comb
+= wvaddr_en
.eq(1<<addr_en
)
725 # XXX ASSUME that LDSTFunctionUnit always sets the data it intends to
726 # this may NOT be the case when an exception occurs
727 if isinstance(fu
, LDSTFunctionUnit
):
728 return wvaddr_en
, wviaddr_en
730 # okaaay, this is preparation for the awkward case.
731 # * latch a copy of wrflag when issue goes high.
732 # * when the fu_wrok (data.ok) flag is NOT set,
733 # but the FU is done, the FU is NEVER going to write
734 # so the bitvector has to be cleared.
735 latch_wrflag
= Signal(name
="latch_wrflag_"+name
)
736 with m
.If(~fu
.busy_o
):
737 sync
+= latch_wrflag
.eq(0)
738 with m
.If(fu
.issue_i
& fu_active
):
739 sync
+= latch_wrflag
.eq(wrflag
)
740 with m
.If(fu
.alu_done_o
& latch_wrflag
& ~fu_wrok
):
742 comb
+= wvaddr_en
.eq(write
) # addr_en gated with wp, don't use
744 comb
+= wvaddr_en
.eq(1<<addr_en
) # binary addr_en not gated
746 return wvaddr_en
, wviaddr_en
748 def connect_wrport(self
, m
, fu_bitdict
, fu_selected
,
749 wrpickers
, regfile
, regname
, fspec
):
750 comb
, sync
= m
.d
.comb
, m
.d
.sync
756 # select the required write port. these are pre-defined sizes
757 rfile
= regs
.rf
[regfile
.lower()]
758 wport
= rfile
.w_ports
[rpidx
]
760 print("connect wr", regname
, "unary", rfile
.unary
, fspec
)
761 print(regfile
, regs
.rf
.keys())
763 # select the write-protection hazard vector. note that this still
764 # requires to WRITE to the hazard bitvector! read-requests need
765 # to RAISE the bitvector (set it to 1), which, duh, requires a WRITE
766 if self
.make_hazard_vecs
:
767 wv
= regs
.wv
[regfile
.lower()]
768 wvset
= wv
.s
# write-vec bit-level hazard ctrl
769 wvclr
= wv
.r
# write-vec bit-level hazard ctrl
770 wvchk
= wv
.q
# write-after-write hazard check
771 wvchk_qint
= wv
.q
# write-after-write hazard check, NOT delayed
774 if not isinstance(fspecs
, list):
781 for i
, fspec
in enumerate(fspecs
):
782 # get the regfile specs for this regfile port
783 (wf
, _write
, wid
, fuspecs
) = \
784 (fspec
.okflag
, fspec
.regport
, fspec
.wid
, fspec
.specs
)
785 print ("fpsec", i
, "wrflag", wf
, fspec
, len(fuspecs
))
786 ppoffs
.append(pplen
) # record offset for picker
787 pplen
+= len(fuspecs
)
789 name
= "%s_%s_%d" % (regfile
, regname
, i
)
790 wrflag
= Signal(name
="wr_flag_"+name
)
792 comb
+= wrflag
.eq(wf
)
795 wrflags
.append(wrflag
)
797 # create a priority picker to manage this port
798 wrpickers
[regfile
][rpidx
] = wrpick
= PriorityPicker(pplen
)
799 m
.submodules
["wrpick_%s_%s" % (regfile
, rpidx
)] = wrpick
806 #wvens = [] - not needed: reading of writevec is permanently held hi
808 for i
, fspec
in enumerate(fspecs
):
809 # connect up the FU req/go signals and the reg-read to the FU
810 # these are arbitrated by Data.ok signals
811 (wf
, _write
, wid
, fuspecs
) = \
812 (fspec
.okflag
, fspec
.regport
, fspec
.wid
, fspec
.specs
)
813 for pi
, fuspec
in enumerate(fspec
.specs
):
814 (funame
, fu
, idx
) = (fuspec
.funame
, fuspec
.fu
, fuspec
.idx
)
815 fu_requested
= fu_bitdict
[funame
]
817 name
= "%s_%s_%s_%d" % (funame
, regfile
, regname
, idx
)
818 # get (or set up) a write-latched copy of write register number
819 write
= Signal
.like(_write
, name
="write_"+name
)
820 rname
= "%s_%s_%s_%d" % (funame
, regfile
, regname
, idx
)
821 if rname
not in fu
.wr_latches
:
822 wrl
= Signal
.like(_write
, name
="wrlatch_"+rname
)
823 fu
.wr_latches
[rname
] = write
824 # do not depend on fu.issue_i here, it creates a
825 # combinatorial loop on waw checking. using the FU
826 # "enable" bitdict entry for this FU is sufficient,
827 # because the PowerDecoder2 read/write nums are
828 # valid continuously when the instruction is valid
829 with m
.If(fu_requested
):
830 sync
+= wrl
.eq(_write
)
831 comb
+= write
.eq(_write
)
833 comb
+= write
.eq(wrl
)
835 write
= fu
.wr_latches
[rname
]
837 # write-request comes from dest.ok
838 dest
= fu
.get_out(idx
)
839 fu_dest_latch
= fu
.get_fu_out(idx
) # latched output
840 name
= "%s_%s_%d" % (funame
, regname
, idx
)
841 fu_wrok
= Signal(name
="fu_wrok_"+name
, reset_less
=True)
842 comb
+= fu_wrok
.eq(dest
.ok
& fu
.busy_o
)
844 # connect request-write to picker input, and output to go-wr
845 fu_active
= fu_selected
[funame
]
846 pick
= fu
.wr
.rel_o
[idx
] & fu_active
847 comb
+= wrpick
.i
[pi
].eq(pick
)
848 # create a single-pulse go write from the picker output
849 wr_pick
= Signal(name
="wpick_%s_%s_%d" % (funame
, regname
, idx
))
850 comb
+= wr_pick
.eq(wrpick
.o
[pi
] & wrpick
.en_o
)
851 comb
+= fu
.go_wr_i
[idx
].eq(rising_edge(m
, wr_pick
))
853 # connect the regspec write "reg select" number to this port
854 # only if one FU actually requests (and is granted) the port
855 # will the write-enable be activated
856 wname
= "waddr_en_%s_%s_%d" % (funame
, regname
, idx
)
857 addr_en
= Signal
.like(write
, name
=wname
)
859 comb
+= wp
.eq(wr_pick
& wrpick
.en_o
)
860 comb
+= addr_en
.eq(Mux(wp
, write
, 0))
864 addrs
.append(addr_en
)
867 # connect regfile port to input
868 print("reg connect widths",
869 regfile
, regname
, pi
, funame
,
870 dest
.shape(), wport
.i_data
.shape())
871 wsigs
.append(fu_dest_latch
)
873 # now connect up the bitvector write hazard
874 if not self
.make_hazard_vecs
:
876 res
= self
.make_hazards(m
, regfile
, rfile
, wvclr
, wvset
,
877 funame
, regname
, idx
,
878 addr_en
, wp
, fu
, fu_active
,
879 wrflags
[i
], write
, fu_wrok
)
880 wvaddr_en
, wv_issue_en
= res
881 wvclren
.append(wvaddr_en
) # set only: no data => clear bit
882 wvseten
.append(wv_issue_en
) # set data same as enable
884 # read the write-hazard bitvector (wv) for any bit that is
885 fu_requested
= fu_bitdict
[funame
]
886 wvchk_en
= Signal(len(wvchk
), name
="waw_chk_addr_en_"+name
)
887 issue_active
= Signal(name
="waw_iactive_"+name
)
888 whazard
= Signal(name
="whaz_"+name
)
890 # XXX EEK! STATE regfile (branch) does not have an
891 # write-active indicator in regspec_decode_write()
892 print ("XXX FIXME waw_iactive", issue_active
,
895 # check bits from the incoming instruction. note (back
896 # in connect_instruction) that the decoder is held for
897 # us to be able to do this, here... *without* issue being
898 # held HI. we MUST NOT gate this with fu.issue_i or
899 # with fu_bitdict "enable": it would create a loop
900 comb
+= issue_active
.eq(wf
)
901 with m
.If(issue_active
):
903 comb
+= wvchk_en
.eq(write
)
905 comb
+= wvchk_en
.eq(1<<write
)
906 # if FU is busy (which doesn't get set at the same time as
907 # issue) and no hazard was detected, clear wvchk_en (i.e.
908 # stop checking for hazards). there is a loop here, but it's
909 # via a DFF, so is ok. some linters may complain, but hey.
910 with m
.If(fu
.busy_o
& ~whazard
):
911 comb
+= wvchk_en
.eq(0)
913 # write-hazard is ANDed with (filtered by) what is actually
914 # being requested. the wvchk data is on a one-clock delay,
915 # and wvchk_en comes directly from the main decoder
916 comb
+= whazard
.eq((wvchk_qint
& wvchk_en
).bool())
918 comb
+= fu
._waw
_hazard
.eq(1)
920 #wvens.append(wvchk_en)
922 # here is where we create the Write Broadcast Bus. simple, eh?
923 comb
+= wport
.i_data
.eq(ortreereduce_sig(wsigs
))
925 # for unary-addressed
926 comb
+= wport
.wen
.eq(ortreereduce_sig(wens
))
928 # for binary-addressed
929 comb
+= wport
.addr
.eq(ortreereduce_sig(addrs
))
930 comb
+= wport
.wen
.eq(ortreereduce_sig(wens
))
932 if not self
.make_hazard_vecs
:
935 # return these here rather than set wvclr/wvset directly,
936 # because there may be more than one write-port to a given
937 # regfile. example: XER has a write-port for SO, CA, and OV
938 # and the *last one added* of those would overwrite the other
939 # two. solution: have connect_wrports collate all the
940 # or-tree-reduced bitvector set/clear requests and drop them
941 # in as a single "thing". this can only be done because the
942 # set/get is an unary bitvector.
943 print ("make write-vecs", regfile
, regname
, wvset
, wvclr
)
944 return (ortreereduce_sig(wvclren
), # clear (regfile write)
945 ortreereduce_sig(wvseten
)) # set (issue time)
947 def connect_wrports(self
, m
, fu_bitdict
, fu_selected
):
948 """connect write ports
950 orders the write regspecs into a dict-of-dicts, by regfile,
951 by regport name, then connects all FUs that want that regport
952 by way of a PriorityPicker.
954 note that the write-port wen, write-port data, and go_wr_i all need to
955 be on the exact same clock cycle. as there is a combinatorial loop bug
956 at the moment, these all use sync.
958 comb
, sync
= m
.d
.comb
, m
.d
.sync
961 # dictionary of lists of regfile write ports
962 byregfiles_wr
, byregfiles_wrspec
= self
.get_byregfiles(False)
964 # same for write ports.
965 # BLECH! complex code-duplication! BLECH!
967 wvclrers
= defaultdict(list)
968 wvseters
= defaultdict(list)
969 for regfile
, spec
in byregfiles_wr
.items():
970 fuspecs
= byregfiles_wrspec
[regfile
]
971 wrpickers
[regfile
] = {}
973 if self
.regreduce_en
:
974 # argh, more port-merging
976 fuspecs
['o'] = [fuspecs
.pop('o')]
977 fuspecs
['o'].append(fuspecs
.pop('o1'))
978 if regfile
== 'FAST':
979 fuspecs
['fast1'] = [fuspecs
.pop('fast1')]
980 if 'fast2' in fuspecs
:
981 fuspecs
['fast1'].append(fuspecs
.pop('fast2'))
982 if 'fast3' in fuspecs
:
983 fuspecs
['fast1'].append(fuspecs
.pop('fast3'))
985 # collate these and record them by regfile because there
986 # are sometimes more write-ports per regfile
987 for (regname
, fspec
) in sort_fuspecs(fuspecs
):
988 wvclren
, wvseten
= self
.connect_wrport(m
,
989 fu_bitdict
, fu_selected
,
991 regfile
, regname
, fspec
)
992 wvclrers
[regfile
.lower()].append(wvclren
)
993 wvseters
[regfile
.lower()].append(wvseten
)
995 if not self
.make_hazard_vecs
:
998 # for write-vectors: reduce the clr-ers and set-ers down to
999 # a single set of bits. otherwise if there are two write
1000 # ports (on some regfiles), the last one doing comb += on
1001 # the reg.wv[regfile] instance "wins" (and all others are ignored,
1002 # whoops). if there was only one write-port per wv regfile this would
1004 for regfile
in wvclrers
.keys():
1005 wv
= regs
.wv
[regfile
]
1006 wvset
= wv
.s
# write-vec bit-level hazard ctrl
1007 wvclr
= wv
.r
# write-vec bit-level hazard ctrl
1008 wvclren
= wvclrers
[regfile
]
1009 wvseten
= wvseters
[regfile
]
1010 comb
+= wvclr
.eq(ortreereduce_sig(wvclren
)) # clear (regfile write)
1011 comb
+= wvset
.eq(ortreereduce_sig(wvseten
)) # set (issue time)
1013 def get_byregfiles(self
, readmode
):
1015 mode
= "read" if readmode
else "write"
1018 e
= self
.ireg
.e
# decoded instruction to execute
1020 # dictionary of dictionaries of lists/tuples of regfile ports.
1021 # first key: regfile. second key: regfile port name
1022 byregfiles
= defaultdict(lambda: defaultdict(list))
1023 byregfiles_spec
= defaultdict(dict)
1025 for (funame
, fu
) in fus
.items():
1026 # create in each FU a receptacle for the read/write register
1027 # hazard numbers. to be latched in connect_rd/write_ports
1028 # XXX better that this is moved into the actual FUs, but
1029 # the issue there is that this function is actually better
1030 # suited at the moment
1032 fu
.rd_latches
= {} # read reg number latches
1033 fu
.rf_latches
= {} # read flag latches
1037 print("%s ports for %s" % (mode
, funame
))
1038 for idx
in range(fu
.n_src
if readmode
else fu
.n_dst
):
1039 # construct regfile specs: read uses inspec, write outspec
1040 (regfile
, regname
, wid
) = fu
.get_io_spec(readmode
, idx
)
1041 print(" %d %s %s %s" % (idx
, regfile
, regname
, str(wid
)))
1043 # the PowerDecoder2 (main one, not the satellites) contains
1044 # the decoded regfile numbers. obtain these now
1045 okflag
, regport
= regspec_decode(readmode
, e
, regfile
, regname
)
1047 # construct the dictionary of regspec information by regfile
1048 if regname
not in byregfiles_spec
[regfile
]:
1049 byregfiles_spec
[regfile
][regname
] = \
1050 ByRegSpec(okflag
, regport
, wid
, [])
1051 # here we start to create "lanes"
1052 fuspec
= FUSpec(funame
, fu
, idx
)
1053 byregfiles
[regfile
][idx
].append(fuspec
)
1054 byregfiles_spec
[regfile
][regname
].specs
.append(fuspec
)
1056 # ok just print that all out, for convenience
1057 for regfile
, spec
in byregfiles
.items():
1058 print("regfile %s ports:" % mode
, regfile
)
1059 fuspecs
= byregfiles_spec
[regfile
]
1060 for regname
, fspec
in fuspecs
.items():
1061 [okflag
, regport
, wid
, fuspecs
] = fspec
1062 print(" rf %s port %s lane: %s" % (mode
, regfile
, regname
))
1063 print(" %s" % regname
, wid
, okflag
, regport
)
1064 for (funame
, fu
, idx
) in fuspecs
:
1065 fusig
= fu
.src_i
[idx
] if readmode
else fu
.dest
[idx
]
1066 print(" ", funame
, fu
.__class
__.__name
__, idx
, fusig
)
1069 return byregfiles
, byregfiles_spec
1072 yield from self
.fus
.ports()
1073 yield from self
.i
.e
.ports()
1074 yield from self
.l0
.ports()
1081 if __name__
== '__main__':
1082 pspec
= TestMemPspec(ldst_ifacetype
='testpi',
1088 dut
= NonProductionCore(pspec
)
1089 vl
= rtlil
.convert(dut
, ports
=dut
.ports())
1090 with
open("test_core.il", "w") as f
: