6bca3bd8b02215bb0f61c8468cc8297582d0a699
3 not in any way intended for production use. connects up FunctionUnits to
4 Register Files in a brain-dead fashion that only permits one and only one
5 Function Unit to be operational.
7 the principle here is to take the Function Units, analyse their regspecs,
8 and turn their requirements for access to register file read/write ports
9 into groupings by Register File and Register File Port name.
11 under each grouping - by regfile/port - a list of Function Units that
12 need to connect to that port is created. as these are a contended
13 resource a "Broadcast Bus" per read/write port is then also created,
14 with access to it managed by a PriorityPicker.
16 the brain-dead part of this module is that even though there is no
17 conflict of access, regfile read/write hazards are *not* analysed,
18 and consequently it is safer to wait for the Function Unit to complete
19 before allowing a new instruction to proceed.
20 (update: actually this is being added now:
21 https://bugs.libre-soc.org/show_bug.cgi?id=737)
24 from nmigen
import (Elaboratable
, Module
, Signal
, ResetSignal
, Cat
, Mux
,
26 from nmigen
.cli
import rtlil
28 from openpower
.decoder
.power_decoder2
import PowerDecodeSubset
29 from openpower
.decoder
.power_regspec_map
import regspec_decode
30 from openpower
.sv
.svp64
import SVP64Rec
32 from nmutil
.picker
import PriorityPicker
33 from nmutil
.util
import treereduce
34 from nmutil
.singlepipe
import ControlBase
36 from soc
.fu
.compunits
.compunits
import AllFunctionUnits
, LDSTFunctionUnit
37 from soc
.regfile
.regfiles
import RegFiles
38 from openpower
.decoder
.power_decoder2
import get_rdflags
39 from soc
.experiment
.l0_cache
import TstL0CacheBuffer
# test only
40 from soc
.config
.test
.test_loadstore
import TestMemPspec
41 from openpower
.decoder
.power_enums
import MicrOp
, Function
42 from soc
.simple
.core_data
import CoreInput
, CoreOutput
44 from collections
import defaultdict
, namedtuple
47 from nmutil
.util
import rising_edge
49 FUSpec
= namedtuple("FUSpec", ["funame", "fu", "idx"])
50 ByRegSpec
= namedtuple("ByRegSpec", ["okflag", "regport", "wid", "specs"])
52 # helper function for reducing a list of signals down to a parallel
54 def ortreereduce(tree
, attr
="o_data"):
55 return treereduce(tree
, operator
.or_
, lambda x
: getattr(x
, attr
))
58 def ortreereduce_sig(tree
):
59 return treereduce(tree
, operator
.or_
, lambda x
: x
)
62 # helper function to place full regs declarations first
63 def sort_fuspecs(fuspecs
):
65 for (regname
, fspec
) in fuspecs
.items():
66 if regname
.startswith("full"):
67 res
.append((regname
, fspec
))
68 for (regname
, fspec
) in fuspecs
.items():
69 if not regname
.startswith("full"):
70 res
.append((regname
, fspec
))
71 return res
# enumerate(res)
74 # a hazard bitvector "remap" function which returns an AST expression
75 # that remaps read/write hazard regfile port numbers to either a full
76 # bitvector or a reduced subset one. SPR for example is reduced to a
78 # CRITICALLY-IMPORTANT NOTE: these bitvectors *have* to match up per
79 # regfile! therefore the remapping is per regfile, *NOT* per regfile
80 # port and certainly not based on whether it is a read port or write port.
81 # note that any reductions here will result in degraded performance due
82 # to conflicts, but at least it keeps the hazard matrix sizes down to "sane"
83 def bitvector_remap(regfile
, rfile
, port
):
84 # 8-bits (at the moment, no SVP64), CR is unary: no remap
87 # 3 bits, unary alrady: return the port
90 # 3 bits, unary: return the port
93 # 3 bits, unary: return the port
94 if regfile
== 'SVSTATE':
96 # 9 bits (9 entries), might be unary already
98 if rfile
.unary
: # FAST might be unary already
102 # 10 bits (!!) - reduce to one
104 if rfile
.unary
: # FAST might be unary already
109 if rfile
.unary
: # INT, check if unary/binary
115 # derive from ControlBase rather than have a separate Stage instance,
116 # this is simpler to do
117 class NonProductionCore(ControlBase
):
118 def __init__(self
, pspec
):
121 # test is SVP64 is to be enabled
122 self
.svp64_en
= hasattr(pspec
, "svp64") and (pspec
.svp64
== True)
124 # test to see if regfile ports should be reduced
125 self
.regreduce_en
= (hasattr(pspec
, "regreduce") and
126 (pspec
.regreduce
== True))
128 # test to see if overlapping of instructions is allowed
129 # (not normally enabled for TestIssuer FSM but useful for checking
130 # the bitvector hazard detection, before doing In-Order)
131 self
.allow_overlap
= (hasattr(pspec
, "allow_overlap") and
132 (pspec
.allow_overlap
== True))
135 self
.make_hazard_vecs
= self
.allow_overlap
136 self
.core_type
= "fsm"
137 if hasattr(pspec
, "core_type"):
138 self
.core_type
= pspec
.core_type
140 super().__init
__(stage
=self
)
142 # single LD/ST funnel for memory access
143 self
.l0
= l0
= TstL0CacheBuffer(pspec
, n_units
=1)
146 # function units (only one each)
147 # only include mmu if enabled in pspec
148 self
.fus
= AllFunctionUnits(pspec
, pilist
=[pi
])
150 # link LoadStore1 into MMU
151 mmu
= self
.fus
.get_fu('mmu0')
152 ldst0
= self
.fus
.get_fu('ldst0')
153 print ("core pspec", pspec
.ldst_ifacetype
)
154 print ("core mmu", mmu
)
156 lsi
= l0
.cmpi
.lsmem
.lsi
# a LoadStore1 Interface object
157 print ("core lsmem.lsi", lsi
)
158 mmu
.alu
.set_ldst_interface(lsi
)
159 # urr store I-Cache in core so it is easier to get at
160 self
.icache
= lsi
.icache
162 # register files (yes plural)
163 self
.regs
= RegFiles(pspec
, make_hazard_vecs
=self
.make_hazard_vecs
)
165 # set up input and output: unusual requirement to set data directly
166 # (due to the way that the core is set up in a different domain,
167 # see TestIssuer.setup_peripherals
168 self
.p
.i_data
, self
.n
.o_data
= self
.new_specs(None)
169 self
.i
, self
.o
= self
.p
.i_data
, self
.n
.o_data
171 # actual internal input data used (captured)
172 self
.ireg
= self
.ispec()
174 # create per-FU instruction decoders (subsetted). these "satellite"
175 # decoders reduce wire fan-out from the one (main) PowerDecoder2
176 # (used directly by the trap unit) to the *twelve* (or more)
177 # Function Units. we can either have 32 wires (the instruction)
178 # to each, or we can have well over a 200 wire fan-out (to 12
179 # ALUs). it's an easy choice to make.
183 # eep, these should be *per FU* i.e. for FunctionUnitBaseMulti
184 # they should be shared (put into the ALU *once*).
186 for funame
, fu
in self
.fus
.fus
.items():
187 f_name
= fu
.fnunit
.name
188 fnunit
= fu
.fnunit
.value
189 opkls
= fu
.opsubsetkls
191 # TRAP decoder is the *main* decoder
192 self
.trapunit
= funame
194 assert funame
not in self
.decoders
195 self
.decoders
[funame
] = PowerDecodeSubset(None, opkls
, f_name
,
197 state
=self
.ireg
.state
,
198 svp64_en
=self
.svp64_en
,
199 regreduce_en
=self
.regreduce_en
)
200 self
.des
[funame
] = self
.decoders
[funame
].do
202 # create per-Function Unit write-after-write hazard signals
203 # yes, really, this should have been added in ReservationStations
205 for funame
, fu
in self
.fus
.fus
.items():
206 fu
._waw
_hazard
= Signal(name
="waw_%s" % funame
)
208 # share the SPR decoder with the MMU if it exists
209 if "mmu0" in self
.decoders
:
210 self
.decoders
["mmu0"].mmu0_spr_dec
= self
.decoders
["spr0"]
212 # next 3 functions are Stage API Compliance
213 def setup(self
, m
, i
):
217 return CoreInput(self
.pspec
, self
.svp64_en
, self
.regreduce_en
)
222 # elaborate function to create HDL
223 def elaborate(self
, platform
):
224 m
= super().elaborate(platform
)
226 # for testing purposes, to cut down on build time in coriolis2
227 if hasattr(self
.pspec
, "nocore") and self
.pspec
.nocore
== True:
228 x
= Signal() # dummy signal
233 m
.submodules
.fus
= self
.fus
234 m
.submodules
.l0
= l0
= self
.l0
235 self
.regs
.elaborate_into(m
, platform
)
239 # amalgamate write-hazards into a single top-level Signal
240 self
.waw_hazard
= Signal()
242 for funame
, fu
in self
.fus
.fus
.items():
243 whaz
.append(fu
._waw
_hazard
)
244 comb
+= self
.waw_hazard
.eq(Cat(*whaz
).bool())
247 self
.connect_satellite_decoders(m
)
249 # ssh, cheat: trap uses the main decoder because of the rewriting
250 self
.des
[self
.trapunit
] = self
.ireg
.e
.do
252 # connect up Function Units, then read/write ports, and hazard conflict
253 self
.issue_conflict
= Signal()
254 fu_bitdict
, fu_selected
= self
.connect_instruction(m
)
255 raw_hazard
= self
.connect_rdports(m
, fu_bitdict
, fu_selected
)
256 self
.connect_wrports(m
, fu_bitdict
, fu_selected
)
257 if self
.allow_overlap
:
258 comb
+= self
.issue_conflict
.eq(raw_hazard
)
260 # note if an exception happened. in a pipelined or OoO design
261 # this needs to be accompanied by "shadowing" (or stalling)
263 for exc
in self
.fus
.excs
.values():
264 el
.append(exc
.happened
)
265 if len(el
) > 0: # at least one exception
266 comb
+= self
.o
.exc_happened
.eq(Cat(*el
).bool())
270 def connect_satellite_decoders(self
, m
):
272 for k
, v
in self
.decoders
.items():
273 # connect each satellite decoder and give it the instruction.
274 # as subset decoders this massively reduces wire fanout given
275 # the large number of ALUs
276 m
.submodules
["dec_%s" % k
] = v
277 comb
+= v
.dec
.raw_opcode_in
.eq(self
.ireg
.raw_insn_i
)
278 comb
+= v
.dec
.bigendian
.eq(self
.ireg
.bigendian_i
)
279 # sigh due to SVP64 RA_OR_ZERO detection connect these too
280 comb
+= v
.sv_a_nz
.eq(self
.ireg
.sv_a_nz
)
281 if not self
.svp64_en
:
283 comb
+= v
.pred_sm
.eq(self
.ireg
.sv_pred_sm
)
284 comb
+= v
.pred_dm
.eq(self
.ireg
.sv_pred_dm
)
285 if k
== self
.trapunit
:
287 comb
+= v
.sv_rm
.eq(self
.ireg
.sv_rm
) # pass through SVP64 RM
288 comb
+= v
.is_svp64_mode
.eq(self
.ireg
.is_svp64_mode
)
289 # only the LDST PowerDecodeSubset *actually* needs to
290 # know to use the alternative decoder. this is all
292 if not k
.lower().startswith("ldst"):
294 comb
+= v
.use_svp64_ldst_dec
.eq( self
.ireg
.use_svp64_ldst_dec
)
296 def connect_instruction(self
, m
):
297 """connect_instruction
299 uses decoded (from PowerOp) function unit information from CSV files
300 to ascertain which Function Unit should deal with the current
303 some (such as OP_ATTN, OP_NOP) are dealt with here, including
304 ignoring it and halting the processor. OP_NOP is a bit annoying
305 because the issuer expects busy flag still to be raised then lowered.
306 (this requires a fake counter to be set).
308 comb
, sync
= m
.d
.comb
, m
.d
.sync
311 # indicate if core is busy
312 busy_o
= self
.o
.busy_o
313 any_busy_o
= self
.o
.any_busy_o
315 # connect up temporary copy of incoming instruction. the FSM will
316 # either blat the incoming instruction (if valid) into self.ireg
317 # or if the instruction could not be delivered, keep dropping the
318 # latched copy into ireg
319 ilatch
= self
.ispec()
320 self
.instr_active
= Signal()
322 # enable/busy-signals for each FU, get one bit for each FU (by name)
323 fu_enable
= Signal(len(fus
), reset_less
=True)
324 fu_busy
= Signal(len(fus
), reset_less
=True)
327 for i
, funame
in enumerate(fus
.keys()):
328 fu_bitdict
[funame
] = fu_enable
[i
]
329 fu_selected
[funame
] = fu_busy
[i
]
331 # identify function units and create a list by fnunit so that
332 # PriorityPickers can be created for selecting one of them that
333 # isn't busy at the time the incoming instruction needs passing on
334 by_fnunit
= defaultdict(list)
335 for fname
, member
in Function
.__members
__.items():
336 for funame
, fu
in fus
.items():
337 fnunit
= fu
.fnunit
.value
338 if member
.value
& fnunit
: # this FU handles this type of op
339 by_fnunit
[fname
].append((funame
, fu
)) # add by Function
341 # ok now just print out the list of FUs by Function, because we can
342 for fname
, fu_list
in by_fnunit
.items():
343 print ("FUs by type", fname
, fu_list
)
345 # now create a PriorityPicker per FU-type such that only one
346 # non-busy FU will be picked
348 fu_found
= Signal() # take a note if no Function Unit was available
349 for fname
, fu_list
in by_fnunit
.items():
350 i_pp
= PriorityPicker(len(fu_list
))
351 m
.submodules
['i_pp_%s' % fname
] = i_pp
353 for i
, (funame
, fu
) in enumerate(fu_list
):
354 # match the decoded instruction (e.do.fn_unit) against the
355 # "capability" of this FU, gate that by whether that FU is
356 # busy, and drop that into the PriorityPicker.
357 # this will give us an output of the first available *non-busy*
358 # Function Unit (Reservation Statio) capable of handling this
360 fnunit
= fu
.fnunit
.value
361 en_req
= Signal(name
="issue_en_%s" % funame
, reset_less
=True)
362 fnmatch
= (self
.ireg
.e
.do
.fn_unit
& fnunit
).bool()
363 comb
+= en_req
.eq(fnmatch
& ~fu
.busy_o
&
365 i_l
.append(en_req
) # store in list for doing the Cat-trick
366 # picker output, gated by enable: store in fu_bitdict
367 po
= Signal(name
="o_issue_pick_"+funame
) # picker output
368 comb
+= po
.eq(i_pp
.o
[i
] & i_pp
.en_o
)
369 comb
+= fu_bitdict
[funame
].eq(po
)
370 comb
+= fu_selected
[funame
].eq(fu
.busy_o | po
)
371 # if we don't do this, then when there are no FUs available,
372 # the "p.o_ready" signal will go back "ok we accepted this
373 # instruction" which of course isn't true.
374 with m
.If(i_pp
.en_o
):
375 comb
+= fu_found
.eq(1)
376 # for each input, Cat them together and drop them into the picker
377 comb
+= i_pp
.i
.eq(Cat(*i_l
))
379 # rdmask, which is for registers needs to come from the *main* decoder
380 for funame
, fu
in fus
.items():
381 rdmask
= get_rdflags(m
, self
.ireg
.e
, fu
)
382 comb
+= fu
.rdmaskn
.eq(~rdmask
)
384 # sigh - need a NOP counter
386 with m
.If(counter
!= 0):
387 sync
+= counter
.eq(counter
- 1)
390 # default to reading from incoming instruction: may be overridden
391 # by copy from latch when "waiting"
392 comb
+= self
.ireg
.eq(self
.i
)
393 # always say "ready" except if overridden
394 comb
+= self
.p
.o_ready
.eq(1)
397 with m
.State("READY"):
398 with m
.If(self
.p
.i_valid
): # run only when valid
399 with m
.Switch(self
.ireg
.e
.do
.insn_type
):
400 # check for ATTN: halt if true
401 with m
.Case(MicrOp
.OP_ATTN
):
402 m
.d
.sync
+= self
.o
.core_terminate_o
.eq(1)
404 # fake NOP - this isn't really used (Issuer detects NOP)
405 with m
.Case(MicrOp
.OP_NOP
):
406 sync
+= counter
.eq(2)
410 comb
+= self
.instr_active
.eq(1)
411 comb
+= self
.p
.o_ready
.eq(0)
412 # connect instructions. only one enabled at a time
413 for funame
, fu
in fus
.items():
414 do
= self
.des
[funame
]
415 enable
= fu_bitdict
[funame
]
417 # run this FunctionUnit if enabled route op,
418 # issue, busy, read flags and mask to FU
420 # operand comes from the *local* decoder
421 # do not actually issue, though, if there
422 # is a waw hazard. decoder has to still
423 # be asserted in order to detect that, tho
424 comb
+= fu
.oper_i
.eq_from(do
)
425 # issue when valid (and no write-hazard)
426 comb
+= fu
.issue_i
.eq(~self
.waw_hazard
)
427 # instruction ok, indicate ready
428 comb
+= self
.p
.o_ready
.eq(1)
430 if self
.allow_overlap
:
431 with m
.If(~fu_found | self
.waw_hazard
):
432 # latch copy of instruction
433 sync
+= ilatch
.eq(self
.i
)
434 comb
+= self
.p
.o_ready
.eq(1) # accept
438 with m
.State("WAITING"):
439 comb
+= self
.instr_active
.eq(1)
440 comb
+= self
.p
.o_ready
.eq(0)
442 # using copy of instruction, keep waiting until an FU is free
443 comb
+= self
.ireg
.eq(ilatch
)
444 with m
.If(fu_found
): # wait for conflict to clear
445 # connect instructions. only one enabled at a time
446 for funame
, fu
in fus
.items():
447 do
= self
.des
[funame
]
448 enable
= fu_bitdict
[funame
]
450 # run this FunctionUnit if enabled route op,
451 # issue, busy, read flags and mask to FU
453 # operand comes from the *local* decoder,
454 # which is asserted even if not issued,
455 # so that WaW-detection can check for hazards.
456 # only if the waw hazard is clear does the
457 # instruction actually get issued
458 comb
+= fu
.oper_i
.eq_from(do
)
460 comb
+= fu
.issue_i
.eq(~self
.waw_hazard
)
461 with m
.If(~self
.waw_hazard
):
462 comb
+= self
.p
.o_ready
.eq(1)
466 print ("core: overlap allowed", self
.allow_overlap
)
467 # true when any FU is busy (including the cycle where it is perhaps
468 # to be issued - because that's what fu_busy is)
469 comb
+= any_busy_o
.eq(fu_busy
.bool())
470 if not self
.allow_overlap
:
471 # for simple non-overlap, if any instruction is busy, set
472 # busy output for core.
473 comb
+= busy_o
.eq(any_busy_o
)
475 # sigh deal with a fun situation that needs to be investigated
477 with m
.If(self
.issue_conflict
):
479 # make sure that LDST, SPR, MMU, Branch and Trap all say "busy"
480 # and do not allow overlap. these are all the ones that
481 # are non-forward-progressing: exceptions etc. that otherwise
482 # change CoreState for some reason (MSR, PC, SVSTATE)
483 for funame
, fu
in fus
.items():
484 if (funame
.lower().startswith('ldst') or
485 funame
.lower().startswith('branch') or
486 funame
.lower().startswith('mmu') or
487 funame
.lower().startswith('spr') or
488 funame
.lower().startswith('trap')):
489 with m
.If(fu
.busy_o
):
492 # return both the function unit "enable" dict as well as the "busy".
493 # the "busy-or-issued" can be passed in to the Read/Write port
494 # connecters to give them permission to request access to regfiles
495 return fu_bitdict
, fu_selected
497 def connect_rdport(self
, m
, fu_bitdict
, fu_selected
,
498 rdpickers
, regfile
, regname
, fspec
):
499 comb
, sync
= m
.d
.comb
, m
.d
.sync
505 # select the required read port. these are pre-defined sizes
506 rfile
= regs
.rf
[regfile
.lower()]
507 rport
= rfile
.r_ports
[rpidx
]
508 print("read regfile", rpidx
, regfile
, regs
.rf
.keys(),
511 # for checking if the read port has an outstanding write
512 if self
.make_hazard_vecs
:
513 wv
= regs
.wv
[regfile
.lower()]
514 wvchk
= wv
.q_int
# write-vec bit-level hazard check
516 # if a hazard is detected on this read port, simply blithely block
517 # every FU from reading on it. this is complete overkill but very
519 hazard_detected
= Signal(name
="raw_%s_%s" % (regfile
, rpidx
))
522 if not isinstance(fspecs
, list):
528 for i
, fspec
in enumerate(fspecs
):
529 # get the regfile specs for this regfile port
530 print ("fpsec", i
, fspec
, len(fspec
.specs
))
531 name
= "%s_%s_%d" % (regfile
, regname
, i
)
532 ppoffs
.append(pplen
) # record offset for picker
533 pplen
+= len(fspec
.specs
)
534 rdflag
= Signal(name
="rdflag_"+name
, reset_less
=True)
535 comb
+= rdflag
.eq(fspec
.okflag
)
536 rdflags
.append(rdflag
)
538 print ("pplen", pplen
)
540 # create a priority picker to manage this port
541 rdpickers
[regfile
][rpidx
] = rdpick
= PriorityPicker(pplen
)
542 m
.submodules
["rdpick_%s_%s" % (regfile
, rpidx
)] = rdpick
548 for i
, fspec
in enumerate(fspecs
):
549 (rf
, _read
, wid
, fuspecs
) = \
550 (fspec
.okflag
, fspec
.regport
, fspec
.wid
, fspec
.specs
)
551 # connect up the FU req/go signals, and the reg-read to the FU
552 # and create a Read Broadcast Bus
553 for pi
, fuspec
in enumerate(fspec
.specs
):
554 (funame
, fu
, idx
) = (fuspec
.funame
, fuspec
.fu
, fuspec
.idx
)
556 name
= "%s_%s_%s_%i" % (regfile
, rpidx
, funame
, pi
)
557 fu_active
= fu_selected
[funame
]
558 fu_issued
= fu_bitdict
[funame
]
560 # get (or set up) a latched copy of read register number
561 # and (sigh) also the read-ok flag
562 # TODO: use nmutil latchregister
563 rhname
= "%s_%s_%d" % (regfile
, regname
, i
)
564 rdflag
= Signal(name
="rdflag_%s_%s" % (funame
, rhname
),
566 if rhname
not in fu
.rf_latches
:
567 rfl
= Signal(name
="rdflag_latch_%s_%s" % (funame
, rhname
))
568 fu
.rf_latches
[rhname
] = rfl
569 with m
.If(fu
.issue_i
):
570 sync
+= rfl
.eq(rdflags
[i
])
572 rfl
= fu
.rf_latches
[rhname
]
574 # now the register port
575 rname
= "%s_%s_%s_%d" % (funame
, regfile
, regname
, pi
)
576 read
= Signal
.like(_read
, name
="read_"+rname
)
577 if rname
not in fu
.rd_latches
:
578 rdl
= Signal
.like(_read
, name
="rdlatch_"+rname
)
579 fu
.rd_latches
[rname
] = rdl
580 with m
.If(fu
.issue_i
):
581 sync
+= rdl
.eq(_read
)
583 rdl
= fu
.rd_latches
[rname
]
585 # make the read immediately available on issue cycle
586 # after the read cycle, otherwies use the latched copy.
587 # this captures the regport and okflag on issue
588 with m
.If(fu
.issue_i
):
589 comb
+= read
.eq(_read
)
590 comb
+= rdflag
.eq(rdflags
[i
])
593 comb
+= rdflag
.eq(rfl
)
595 # connect request-read to picker input, and output to go-rd
596 addr_en
= Signal
.like(read
, name
="addr_en_"+name
)
597 pick
= Signal(name
="pick_"+name
) # picker input
598 rp
= Signal(name
="rp_"+name
) # picker output
599 delay_pick
= Signal(name
="dp_"+name
) # read-enable "underway"
600 rhazard
= Signal(name
="rhaz_"+name
)
602 # exclude any currently-enabled read-request (mask out active)
603 # entirely block anything hazarded from being picked
604 comb
+= pick
.eq(fu
.rd_rel_o
[idx
] & fu_active
& rdflag
&
605 ~delay_pick
& ~rhazard
)
606 comb
+= rdpick
.i
[pi
].eq(pick
)
607 comb
+= fu
.go_rd_i
[idx
].eq(delay_pick
) # pass in *delayed* pick
609 # if picked, select read-port "reg select" number to port
610 comb
+= rp
.eq(rdpick
.o
[pi
] & rdpick
.en_o
)
611 sync
+= delay_pick
.eq(rp
) # delayed "pick"
612 comb
+= addr_en
.eq(Mux(rp
, read
, 0))
614 # the read-enable happens combinatorially (see mux-bus below)
615 # but it results in the data coming out on a one-cycle delay.
619 addrs
.append(addr_en
)
622 # use the *delayed* pick signal to put requested data onto bus
623 with m
.If(delay_pick
):
624 # connect regfile port to input, creating fan-out Bus
626 print("reg connect widths",
627 regfile
, regname
, pi
, funame
,
628 src
.shape(), rport
.o_data
.shape())
629 # all FUs connect to same port
630 comb
+= src
.eq(rport
.o_data
)
632 if not self
.make_hazard_vecs
:
635 # read the write-hazard bitvector (wv) for any bit that is
636 wvchk_en
= Signal(len(wvchk
), name
="wv_chk_addr_en_"+name
)
637 issue_active
= Signal(name
="rd_iactive_"+name
)
638 # XXX combinatorial loop here
639 comb
+= issue_active
.eq(fu_active
& rdflag
)
640 with m
.If(issue_active
):
642 comb
+= wvchk_en
.eq(read
)
644 comb
+= wvchk_en
.eq(1<<read
)
645 # if FU is busy (which doesn't get set at the same time as
646 # issue) and no hazard was detected, clear wvchk_en (i.e.
647 # stop checking for hazards). there is a loop here, but it's
648 # via a DFF, so is ok. some linters may complain, but hey.
649 with m
.If(fu
.busy_o
& ~rhazard
):
650 comb
+= wvchk_en
.eq(0)
652 # read-hazard is ANDed with (filtered by) what is actually
654 comb
+= rhazard
.eq((wvchk
& wvchk_en
).bool())
656 wvens
.append(wvchk_en
)
658 # or-reduce the muxed read signals
660 # for unary-addressed
661 comb
+= rport
.ren
.eq(ortreereduce_sig(rens
))
663 # for binary-addressed
664 comb
+= rport
.addr
.eq(ortreereduce_sig(addrs
))
665 comb
+= rport
.ren
.eq(Cat(*rens
).bool())
666 print ("binary", regfile
, rpidx
, rport
, rport
.ren
, rens
, addrs
)
668 if not self
.make_hazard_vecs
:
669 return Const(0) # declare "no hazards"
671 # enable the read bitvectors for this issued instruction
672 # and return whether any write-hazard bit is set
673 wvchk_and
= Signal(len(wvchk
), name
="wv_chk_"+name
)
674 comb
+= wvchk_and
.eq(wvchk
& ortreereduce_sig(wvens
))
675 comb
+= hazard_detected
.eq(wvchk_and
.bool())
676 return hazard_detected
678 def connect_rdports(self
, m
, fu_bitdict
, fu_selected
):
679 """connect read ports
681 orders the read regspecs into a dict-of-dicts, by regfile, by
682 regport name, then connects all FUs that want that regport by
683 way of a PriorityPicker.
685 comb
, sync
= m
.d
.comb
, m
.d
.sync
690 # dictionary of lists of regfile read ports
691 byregfiles_rdspec
= self
.get_byregfiles(m
, True)
693 # okaay, now we need a PriorityPicker per regfile per regfile port
694 # loootta pickers... peter piper picked a pack of pickled peppers...
696 for regfile
, fuspecs
in byregfiles_rdspec
.items():
697 rdpickers
[regfile
] = {}
699 # argh. an experiment to merge RA and RB in the INT regfile
700 # (we have too many read/write ports)
701 if self
.regreduce_en
:
703 fuspecs
['rabc'] = [fuspecs
.pop('rb')]
704 fuspecs
['rabc'].append(fuspecs
.pop('rc'))
705 fuspecs
['rabc'].append(fuspecs
.pop('ra'))
706 if regfile
== 'FAST':
707 fuspecs
['fast1'] = [fuspecs
.pop('fast1')]
708 if 'fast2' in fuspecs
:
709 fuspecs
['fast1'].append(fuspecs
.pop('fast2'))
710 if 'fast3' in fuspecs
:
711 fuspecs
['fast1'].append(fuspecs
.pop('fast3'))
713 # for each named regfile port, connect up all FUs to that port
714 # also return (and collate) hazard detection)
715 for (regname
, fspec
) in sort_fuspecs(fuspecs
):
716 print("connect rd", regname
, fspec
)
717 rh
= self
.connect_rdport(m
, fu_bitdict
, fu_selected
,
722 return Cat(*rd_hazard
).bool()
724 def make_hazards(self
, m
, regfile
, rfile
, wvclr
, wvset
,
725 funame
, regname
, idx
,
726 addr_en
, wp
, fu
, fu_active
, wrflag
, write
,
728 """make_hazards: a setter and a clearer for the regfile write ports
730 setter is at issue time (using PowerDecoder2 regfile write numbers)
731 clearer is at regfile write time (when FU has said what to write to)
733 there is *one* unusual case here which has to be dealt with:
734 when the Function Unit does *NOT* request a write to the regfile
735 (has its data.ok bit CLEARED). this is perfectly legitimate.
738 comb
, sync
= m
.d
.comb
, m
.d
.sync
739 name
= "%s_%s_%d" % (funame
, regname
, idx
)
741 # connect up the bitvector write hazard. unlike the
742 # regfile writeports, a ONE must be written to the corresponding
743 # bit of the hazard bitvector (to indicate the existence of
746 # the detection of what shall be written to is based
747 # on *issue*. it is delayed by 1 cycle so that instructions
748 # "addi 5,5,0x2" do not cause combinatorial loops due to
749 # fake-dependency on *themselves*. this will totally fail
750 # spectacularly when doing multi-issue
751 print ("write vector (for regread)", regfile
, wvset
)
752 wviaddr_en
= Signal(len(wvset
), name
="wv_issue_addr_en_"+name
)
753 issue_active
= Signal(name
="iactive_"+name
)
754 sync
+= issue_active
.eq(fu
.issue_i
& fu_active
& wrflag
)
755 with m
.If(issue_active
):
757 comb
+= wviaddr_en
.eq(write
)
759 comb
+= wviaddr_en
.eq(1<<write
)
761 # deal with write vector clear: this kicks in when the regfile
762 # is written to, and clears the corresponding bitvector entry
763 print ("write vector", regfile
, wvclr
)
764 wvaddr_en
= Signal(len(wvclr
), name
="wvaddr_en_"+name
)
766 comb
+= wvaddr_en
.eq(addr_en
)
769 comb
+= wvaddr_en
.eq(1<<addr_en
)
771 # XXX ASSUME that LDSTFunctionUnit always sets the data it intends to
772 # this may NOT be the case when an exception occurs
773 if isinstance(fu
, LDSTFunctionUnit
):
774 return wvaddr_en
, wviaddr_en
776 # okaaay, this is preparation for the awkward case.
777 # * latch a copy of wrflag when issue goes high.
778 # * when the fu_wrok (data.ok) flag is NOT set,
779 # but the FU is done, the FU is NEVER going to write
780 # so the bitvector has to be cleared.
781 latch_wrflag
= Signal(name
="latch_wrflag_"+name
)
782 with m
.If(~fu
.busy_o
):
783 sync
+= latch_wrflag
.eq(0)
784 with m
.If(fu
.issue_i
& fu_active
):
785 sync
+= latch_wrflag
.eq(wrflag
)
786 with m
.If(fu
.alu_done_o
& latch_wrflag
& ~fu_wrok
):
788 comb
+= wvaddr_en
.eq(write
) # addr_en gated with wp, don't use
790 comb
+= wvaddr_en
.eq(1<<addr_en
) # binary addr_en not gated
792 return wvaddr_en
, wviaddr_en
794 def connect_wrport(self
, m
, fu_bitdict
, fu_selected
,
795 wrpickers
, regfile
, regname
, fspec
):
796 comb
, sync
= m
.d
.comb
, m
.d
.sync
802 # select the required write port. these are pre-defined sizes
803 rfile
= regs
.rf
[regfile
.lower()]
804 wport
= rfile
.w_ports
[rpidx
]
806 print("connect wr", regname
, "unary", rfile
.unary
, fspec
)
807 print(regfile
, regs
.rf
.keys())
809 # select the write-protection hazard vector. note that this still
810 # requires to WRITE to the hazard bitvector! read-requests need
811 # to RAISE the bitvector (set it to 1), which, duh, requires a WRITE
812 if self
.make_hazard_vecs
:
813 wv
= regs
.wv
[regfile
.lower()]
814 wvset
= wv
.s
# write-vec bit-level hazard ctrl
815 wvclr
= wv
.r
# write-vec bit-level hazard ctrl
816 wvchk
= wv
.q
# write-after-write hazard check
819 if not isinstance(fspecs
, list):
826 for i
, fspec
in enumerate(fspecs
):
827 # get the regfile specs for this regfile port
828 (wf
, _write
, wid
, fuspecs
) = \
829 (fspec
.okflag
, fspec
.regport
, fspec
.wid
, fspec
.specs
)
830 print ("fpsec", i
, "wrflag", wf
, fspec
, len(fuspecs
))
831 ppoffs
.append(pplen
) # record offset for picker
832 pplen
+= len(fuspecs
)
834 name
= "%s_%s_%d" % (regfile
, regname
, i
)
835 wrflag
= Signal(name
="wr_flag_"+name
)
837 comb
+= wrflag
.eq(wf
)
840 wrflags
.append(wrflag
)
842 # create a priority picker to manage this port
843 wrpickers
[regfile
][rpidx
] = wrpick
= PriorityPicker(pplen
)
844 m
.submodules
["wrpick_%s_%s" % (regfile
, rpidx
)] = wrpick
851 #wvens = [] - not needed: reading of writevec is permanently held hi
853 for i
, fspec
in enumerate(fspecs
):
854 # connect up the FU req/go signals and the reg-read to the FU
855 # these are arbitrated by Data.ok signals
856 (wf
, _write
, wid
, fuspecs
) = \
857 (fspec
.okflag
, fspec
.regport
, fspec
.wid
, fspec
.specs
)
858 for pi
, fuspec
in enumerate(fspec
.specs
):
859 (funame
, fu
, idx
) = (fuspec
.funame
, fuspec
.fu
, fuspec
.idx
)
860 fu_requested
= fu_bitdict
[funame
]
862 name
= "%s_%s_%s_%d" % (funame
, regfile
, regname
, idx
)
863 # get (or set up) a write-latched copy of write register number
864 write
= Signal
.like(_write
, name
="write_"+name
)
865 rname
= "%s_%s_%s_%d" % (funame
, regfile
, regname
, idx
)
866 if rname
not in fu
.wr_latches
:
867 wrl
= Signal
.like(_write
, name
="wrlatch_"+rname
)
868 fu
.wr_latches
[rname
] = write
869 # do not depend on fu.issue_i here, it creates a
870 # combinatorial loop on waw checking. using the FU
871 # "enable" bitdict entry for this FU is sufficient,
872 # because the PowerDecoder2 read/write nums are
873 # valid continuously when the instruction is valid
874 with m
.If(fu_requested
):
875 sync
+= wrl
.eq(_write
)
876 comb
+= write
.eq(_write
)
878 comb
+= write
.eq(wrl
)
880 write
= fu
.wr_latches
[rname
]
882 # write-request comes from dest.ok
883 dest
= fu
.get_out(idx
)
884 fu_dest_latch
= fu
.get_fu_out(idx
) # latched output
885 name
= "%s_%s_%d" % (funame
, regname
, idx
)
886 fu_wrok
= Signal(name
="fu_wrok_"+name
, reset_less
=True)
887 comb
+= fu_wrok
.eq(dest
.ok
& fu
.busy_o
)
889 # connect request-write to picker input, and output to go-wr
890 fu_active
= fu_selected
[funame
]
891 pick
= fu
.wr
.rel_o
[idx
] & fu_active
892 comb
+= wrpick
.i
[pi
].eq(pick
)
893 # create a single-pulse go write from the picker output
894 wr_pick
= Signal(name
="wpick_%s_%s_%d" % (funame
, regname
, idx
))
895 comb
+= wr_pick
.eq(wrpick
.o
[pi
] & wrpick
.en_o
)
896 comb
+= fu
.go_wr_i
[idx
].eq(rising_edge(m
, wr_pick
))
898 # connect the regspec write "reg select" number to this port
899 # only if one FU actually requests (and is granted) the port
900 # will the write-enable be activated
901 wname
= "waddr_en_%s_%s_%d" % (funame
, regname
, idx
)
902 addr_en
= Signal
.like(write
, name
=wname
)
904 comb
+= wp
.eq(wr_pick
& wrpick
.en_o
)
905 comb
+= addr_en
.eq(Mux(wp
, write
, 0))
909 addrs
.append(addr_en
)
912 # connect regfile port to input
913 print("reg connect widths",
914 regfile
, regname
, pi
, funame
,
915 dest
.shape(), wport
.i_data
.shape())
916 wsigs
.append(fu_dest_latch
)
918 # now connect up the bitvector write hazard
919 if not self
.make_hazard_vecs
:
921 res
= self
.make_hazards(m
, regfile
, rfile
, wvclr
, wvset
,
922 funame
, regname
, idx
,
923 addr_en
, wp
, fu
, fu_active
,
924 wrflags
[i
], write
, fu_wrok
)
925 wvaddr_en
, wv_issue_en
= res
926 wvclren
.append(wvaddr_en
) # set only: no data => clear bit
927 wvseten
.append(wv_issue_en
) # set data same as enable
929 # read the write-hazard bitvector (wv) for any bit that is
930 fu_requested
= fu_bitdict
[funame
]
931 wvchk_en
= Signal(len(wvchk
), name
="waw_chk_addr_en_"+name
)
932 issue_active
= Signal(name
="waw_iactive_"+name
)
933 whazard
= Signal(name
="whaz_"+name
)
935 # XXX EEK! STATE regfile (branch) does not have an
936 # write-active indicator in regspec_decode_write()
937 print ("XXX FIXME waw_iactive", issue_active
,
940 # check bits from the incoming instruction. note (back
941 # in connect_instruction) that the decoder is held for
942 # us to be able to do this, here... *without* issue being
943 # held HI. we MUST NOT gate this with fu.issue_i or
944 # with fu_bitdict "enable": it would create a loop
945 comb
+= issue_active
.eq(wf
)
946 with m
.If(issue_active
):
948 comb
+= wvchk_en
.eq(write
)
950 comb
+= wvchk_en
.eq(1<<write
)
951 # if FU is busy (which doesn't get set at the same time as
952 # issue) and no hazard was detected, clear wvchk_en (i.e.
953 # stop checking for hazards). there is a loop here, but it's
954 # via a DFF, so is ok. some linters may complain, but hey.
955 with m
.If(fu
.busy_o
& ~whazard
):
956 comb
+= wvchk_en
.eq(0)
958 # write-hazard is ANDed with (filtered by) what is actually
959 # being requested. the wvchk data is on a one-clock delay,
960 # and wvchk_en comes directly from the main decoder
961 comb
+= whazard
.eq((wvchk
& wvchk_en
).bool())
963 comb
+= fu
._waw
_hazard
.eq(1)
965 #wvens.append(wvchk_en)
967 # here is where we create the Write Broadcast Bus. simple, eh?
968 comb
+= wport
.i_data
.eq(ortreereduce_sig(wsigs
))
970 # for unary-addressed
971 comb
+= wport
.wen
.eq(ortreereduce_sig(wens
))
973 # for binary-addressed
974 comb
+= wport
.addr
.eq(ortreereduce_sig(addrs
))
975 comb
+= wport
.wen
.eq(ortreereduce_sig(wens
))
977 if not self
.make_hazard_vecs
:
980 # return these here rather than set wvclr/wvset directly,
981 # because there may be more than one write-port to a given
982 # regfile. example: XER has a write-port for SO, CA, and OV
983 # and the *last one added* of those would overwrite the other
984 # two. solution: have connect_wrports collate all the
985 # or-tree-reduced bitvector set/clear requests and drop them
986 # in as a single "thing". this can only be done because the
987 # set/get is an unary bitvector.
988 print ("make write-vecs", regfile
, regname
, wvset
, wvclr
)
989 return (wvclren
, # clear (regfile write)
990 wvseten
) # set (issue time)
992 def connect_wrports(self
, m
, fu_bitdict
, fu_selected
):
993 """connect write ports
995 orders the write regspecs into a dict-of-dicts, by regfile,
996 by regport name, then connects all FUs that want that regport
997 by way of a PriorityPicker.
999 note that the write-port wen, write-port data, and go_wr_i all need to
1000 be on the exact same clock cycle. as there is a combinatorial loop bug
1001 at the moment, these all use sync.
1003 comb
, sync
= m
.d
.comb
, m
.d
.sync
1006 # dictionary of lists of regfile write ports
1007 byregfiles_wrspec
= self
.get_byregfiles(m
, False)
1009 # same for write ports.
1010 # BLECH! complex code-duplication! BLECH!
1012 wvclrers
= defaultdict(list)
1013 wvseters
= defaultdict(list)
1014 for regfile
, fuspecs
in byregfiles_wrspec
.items():
1015 wrpickers
[regfile
] = {}
1017 if self
.regreduce_en
:
1018 # argh, more port-merging
1019 if regfile
== 'INT':
1020 fuspecs
['o'] = [fuspecs
.pop('o')]
1021 fuspecs
['o'].append(fuspecs
.pop('o1'))
1022 if regfile
== 'FAST':
1023 fuspecs
['fast1'] = [fuspecs
.pop('fast1')]
1024 if 'fast2' in fuspecs
:
1025 fuspecs
['fast1'].append(fuspecs
.pop('fast2'))
1026 if 'fast3' in fuspecs
:
1027 fuspecs
['fast1'].append(fuspecs
.pop('fast3'))
1029 # collate these and record them by regfile because there
1030 # are sometimes more write-ports per regfile
1031 for (regname
, fspec
) in sort_fuspecs(fuspecs
):
1032 wvclren
, wvseten
= self
.connect_wrport(m
,
1033 fu_bitdict
, fu_selected
,
1035 regfile
, regname
, fspec
)
1036 wvclrers
[regfile
.lower()] += wvclren
1037 wvseters
[regfile
.lower()] += wvseten
1039 if not self
.make_hazard_vecs
:
1042 # for write-vectors: reduce the clr-ers and set-ers down to
1043 # a single set of bits. otherwise if there are two write
1044 # ports (on some regfiles), the last one doing comb += on
1045 # the reg.wv[regfile] instance "wins" (and all others are ignored,
1046 # whoops). if there was only one write-port per wv regfile this would
1048 for regfile
in wvclrers
.keys():
1049 wv
= regs
.wv
[regfile
]
1050 wvset
= wv
.s
# write-vec bit-level hazard ctrl
1051 wvclr
= wv
.r
# write-vec bit-level hazard ctrl
1052 wvclren
= wvclrers
[regfile
]
1053 wvseten
= wvseters
[regfile
]
1054 comb
+= wvclr
.eq(ortreereduce_sig(wvclren
)) # clear (regfile write)
1055 comb
+= wvset
.eq(ortreereduce_sig(wvseten
)) # set (issue time)
1057 def get_byregfiles(self
, m
, readmode
):
1059 mode
= "read" if readmode
else "write"
1062 e
= self
.ireg
.e
# decoded instruction to execute
1064 # dictionary of dictionaries of lists/tuples of regfile ports.
1065 # first key: regfile. second key: regfile port name
1066 byregfiles_spec
= defaultdict(dict)
1068 for (funame
, fu
) in fus
.items():
1069 # create in each FU a receptacle for the read/write register
1070 # hazard numbers (and okflags for read). to be latched in
1071 # connect_rd/write_ports
1073 fu
.rd_latches
= {} # read reg number latches
1074 fu
.rf_latches
= {} # read flag latches
1078 # construct regfile specs: read uses inspec, write outspec
1079 print("%s ports for %s" % (mode
, funame
))
1080 for idx
in range(fu
.n_src
if readmode
else fu
.n_dst
):
1081 (regfile
, regname
, wid
) = fu
.get_io_spec(readmode
, idx
)
1082 print(" %d %s %s %s" % (idx
, regfile
, regname
, str(wid
)))
1084 # the PowerDecoder2 (main one, not the satellites) contains
1085 # the decoded regfile numbers. obtain these now
1086 decinfo
= regspec_decode(m
, readmode
, e
, regfile
, regname
)
1087 okflag
, regport
= decinfo
.okflag
, decinfo
.regport
1089 # construct the dictionary of regspec information by regfile
1090 if regname
not in byregfiles_spec
[regfile
]:
1091 byregfiles_spec
[regfile
][regname
] = \
1092 ByRegSpec(okflag
, regport
, wid
, [])
1094 # here we start to create "lanes" where each Function Unit
1095 # requiring access to a given [single-contended resource]
1096 # regfile port is appended to a list, so that PriorityPickers
1097 # can be created to give uncontested access to it
1098 fuspec
= FUSpec(funame
, fu
, idx
)
1099 byregfiles_spec
[regfile
][regname
].specs
.append(fuspec
)
1101 # ok just print that all out, for convenience
1102 for regfile
, fuspecs
in byregfiles_spec
.items():
1103 print("regfile %s ports:" % mode
, regfile
)
1104 for regname
, fspec
in fuspecs
.items():
1105 [okflag
, regport
, wid
, fuspecs
] = fspec
1106 print(" rf %s port %s lane: %s" % (mode
, regfile
, regname
))
1107 print(" %s" % regname
, wid
, okflag
, regport
)
1108 for (funame
, fu
, idx
) in fuspecs
:
1109 fusig
= fu
.src_i
[idx
] if readmode
else fu
.dest
[idx
]
1110 print(" ", funame
, fu
.__class
__.__name
__, idx
, fusig
)
1113 return byregfiles_spec
1116 yield from self
.fus
.ports()
1117 yield from self
.i
.e
.ports()
1118 yield from self
.l0
.ports()
1125 if __name__
== '__main__':
1126 pspec
= TestMemPspec(ldst_ifacetype
='testpi',
1132 dut
= NonProductionCore(pspec
)
1133 vl
= rtlil
.convert(dut
, ports
=dut
.ports())
1134 with
open("test_core.il", "w") as f
: