3 not in any way intended for production use. connects up FunctionUnits to
4 Register Files in a brain-dead fashion that only permits one and only one
5 Function Unit to be operational.
7 the principle here is to take the Function Units, analyse their regspecs,
8 and turn their requirements for access to register file read/write ports
9 into groupings by Register File and Register File Port name.
11 under each grouping - by regfile/port - a list of Function Units that
12 need to connect to that port is created. as these are a contended
13 resource a "Broadcast Bus" per read/write port is then also created,
14 with access to it managed by a PriorityPicker.
16 the brain-dead part of this module is that even though there is no
17 conflict of access, regfile read/write hazards are *not* analysed,
18 and consequently it is safer to wait for the Function Unit to complete
19 before allowing a new instruction to proceed.
22 from nmigen
import (Elaboratable
, Module
, Signal
, ResetSignal
, Cat
, Mux
,
24 from nmigen
.cli
import rtlil
26 from openpower
.decoder
.power_decoder2
import PowerDecodeSubset
27 from openpower
.decoder
.power_regspec_map
import regspec_decode_read
28 from openpower
.decoder
.power_regspec_map
import regspec_decode_write
29 from openpower
.sv
.svp64
import SVP64Rec
31 from nmutil
.picker
import PriorityPicker
32 from nmutil
.util
import treereduce
33 from nmutil
.singlepipe
import ControlBase
35 from soc
.fu
.compunits
.compunits
import AllFunctionUnits
, LDSTFunctionUnit
36 from soc
.regfile
.regfiles
import RegFiles
37 from openpower
.decoder
.power_decoder2
import get_rdflags
38 from soc
.experiment
.l0_cache
import TstL0CacheBuffer
# test only
39 from soc
.config
.test
.test_loadstore
import TestMemPspec
40 from openpower
.decoder
.power_enums
import MicrOp
, Function
41 from soc
.simple
.core_data
import CoreInput
, CoreOutput
43 from collections
import defaultdict
46 from nmutil
.util
import rising_edge
49 # helper function for reducing a list of signals down to a parallel
51 def ortreereduce(tree
, attr
="o_data"):
52 return treereduce(tree
, operator
.or_
, lambda x
: getattr(x
, attr
))
55 def ortreereduce_sig(tree
):
56 return treereduce(tree
, operator
.or_
, lambda x
: x
)
59 # helper function to place full regs declarations first
60 def sort_fuspecs(fuspecs
):
62 for (regname
, fspec
) in fuspecs
.items():
63 if regname
.startswith("full"):
64 res
.append((regname
, fspec
))
65 for (regname
, fspec
) in fuspecs
.items():
66 if not regname
.startswith("full"):
67 res
.append((regname
, fspec
))
68 return res
# enumerate(res)
71 # derive from ControlBase rather than have a separate Stage instance,
72 # this is simpler to do
73 class NonProductionCore(ControlBase
):
74 def __init__(self
, pspec
):
77 # test is SVP64 is to be enabled
78 self
.svp64_en
= hasattr(pspec
, "svp64") and (pspec
.svp64
== True)
80 # test to see if regfile ports should be reduced
81 self
.regreduce_en
= (hasattr(pspec
, "regreduce") and
82 (pspec
.regreduce
== True))
84 # test to see if overlapping of instructions is allowed
85 # (not normally enabled for TestIssuer FSM but useful for checking
86 # the bitvector hazard detection, before doing In-Order)
87 self
.allow_overlap
= (hasattr(pspec
, "allow_overlap") and
88 (pspec
.allow_overlap
== True))
91 self
.make_hazard_vecs
= True
92 self
.core_type
= "fsm"
93 if hasattr(pspec
, "core_type"):
94 self
.core_type
= pspec
.core_type
96 super().__init
__(stage
=self
)
98 # single LD/ST funnel for memory access
99 self
.l0
= l0
= TstL0CacheBuffer(pspec
, n_units
=1)
102 # function units (only one each)
103 # only include mmu if enabled in pspec
104 self
.fus
= AllFunctionUnits(pspec
, pilist
=[pi
])
106 # link LoadStore1 into MMU
107 mmu
= self
.fus
.get_fu('mmu0')
108 print ("core pspec", pspec
.ldst_ifacetype
)
109 print ("core mmu", mmu
)
111 print ("core lsmem.lsi", l0
.cmpi
.lsmem
.lsi
)
112 mmu
.alu
.set_ldst_interface(l0
.cmpi
.lsmem
.lsi
)
114 # register files (yes plural)
115 self
.regs
= RegFiles(pspec
, make_hazard_vecs
=self
.make_hazard_vecs
)
117 # set up input and output: unusual requirement to set data directly
118 # (due to the way that the core is set up in a different domain,
119 # see TestIssuer.setup_peripherals
120 self
.p
.i_data
, self
.n
.o_data
= self
.new_specs(None)
121 self
.i
, self
.o
= self
.p
.i_data
, self
.n
.o_data
123 # actual internal input data used (captured)
124 self
.ireg
= self
.ispec()
126 # create per-FU instruction decoders (subsetted). these "satellite"
127 # decoders reduce wire fan-out from the one (main) PowerDecoder2
128 # (used directly by the trap unit) to the *twelve* (or more)
129 # Function Units. we can either have 32 wires (the instruction)
130 # to each, or we can have well over a 200 wire fan-out (to 12
131 # ALUs). it's an easy choice to make.
135 for funame
, fu
in self
.fus
.fus
.items():
136 f_name
= fu
.fnunit
.name
137 fnunit
= fu
.fnunit
.value
138 opkls
= fu
.opsubsetkls
140 # TRAP decoder is the *main* decoder
141 self
.trapunit
= funame
143 self
.decoders
[funame
] = PowerDecodeSubset(None, opkls
, f_name
,
145 state
=self
.ireg
.state
,
146 svp64_en
=self
.svp64_en
,
147 regreduce_en
=self
.regreduce_en
)
148 self
.des
[funame
] = self
.decoders
[funame
].do
150 # share the SPR decoder with the MMU if it exists
151 if "mmu0" in self
.decoders
:
152 self
.decoders
["mmu0"].mmu0_spr_dec
= self
.decoders
["spr0"]
154 # next 3 functions are Stage API Compliance
155 def setup(self
, m
, i
):
159 return CoreInput(self
.pspec
, self
.svp64_en
, self
.regreduce_en
)
164 # elaborate function to create HDL
165 def elaborate(self
, platform
):
166 m
= super().elaborate(platform
)
168 # for testing purposes, to cut down on build time in coriolis2
169 if hasattr(self
.pspec
, "nocore") and self
.pspec
.nocore
== True:
170 x
= Signal() # dummy signal
175 m
.submodules
.fus
= self
.fus
176 m
.submodules
.l0
= l0
= self
.l0
177 self
.regs
.elaborate_into(m
, platform
)
182 self
.connect_satellite_decoders(m
)
184 # ssh, cheat: trap uses the main decoder because of the rewriting
185 self
.des
[self
.trapunit
] = self
.ireg
.e
.do
187 # connect up Function Units, then read/write ports, and hazard conflict
188 self
.issue_conflict
= Signal()
189 fu_bitdict
, fu_selected
= self
.connect_instruction(m
)
190 raw_hazard
= self
.connect_rdports(m
, fu_bitdict
, fu_selected
)
191 self
.connect_wrports(m
, fu_bitdict
, fu_selected
)
192 if self
.allow_overlap
:
193 comb
+= self
.issue_conflict
.eq(raw_hazard
)
195 # note if an exception happened. in a pipelined or OoO design
196 # this needs to be accompanied by "shadowing" (or stalling)
198 for exc
in self
.fus
.excs
.values():
199 el
.append(exc
.happened
)
200 if len(el
) > 0: # at least one exception
201 comb
+= self
.o
.exc_happened
.eq(Cat(*el
).bool())
205 def connect_satellite_decoders(self
, m
):
207 for k
, v
in self
.decoders
.items():
208 # connect each satellite decoder and give it the instruction.
209 # as subset decoders this massively reduces wire fanout given
210 # the large number of ALUs
211 setattr(m
.submodules
, "dec_%s" % v
.fn_name
, v
)
212 comb
+= v
.dec
.raw_opcode_in
.eq(self
.ireg
.raw_insn_i
)
213 comb
+= v
.dec
.bigendian
.eq(self
.ireg
.bigendian_i
)
214 # sigh due to SVP64 RA_OR_ZERO detection connect these too
215 comb
+= v
.sv_a_nz
.eq(self
.ireg
.sv_a_nz
)
217 comb
+= v
.pred_sm
.eq(self
.ireg
.sv_pred_sm
)
218 comb
+= v
.pred_dm
.eq(self
.ireg
.sv_pred_dm
)
219 if k
!= self
.trapunit
:
220 comb
+= v
.sv_rm
.eq(self
.ireg
.sv_rm
) # pass through SVP64 RM
221 comb
+= v
.is_svp64_mode
.eq(self
.ireg
.is_svp64_mode
)
222 # only the LDST PowerDecodeSubset *actually* needs to
223 # know to use the alternative decoder. this is all
225 if k
.lower().startswith("ldst"):
226 comb
+= v
.use_svp64_ldst_dec
.eq(
227 self
.ireg
.use_svp64_ldst_dec
)
229 def connect_instruction(self
, m
):
230 """connect_instruction
232 uses decoded (from PowerOp) function unit information from CSV files
233 to ascertain which Function Unit should deal with the current
236 some (such as OP_ATTN, OP_NOP) are dealt with here, including
237 ignoring it and halting the processor. OP_NOP is a bit annoying
238 because the issuer expects busy flag still to be raised then lowered.
239 (this requires a fake counter to be set).
241 comb
, sync
= m
.d
.comb
, m
.d
.sync
244 # indicate if core is busy
245 busy_o
= self
.o
.busy_o
246 any_busy_o
= self
.o
.any_busy_o
248 # connect up temporary copy of incoming instruction. the FSM will
249 # either blat the incoming instruction (if valid) into self.ireg
250 # or if the instruction could not be delivered, keep dropping the
251 # latched copy into ireg
252 ilatch
= self
.ispec()
253 self
.instr_active
= Signal()
255 # enable/busy-signals for each FU, get one bit for each FU (by name)
256 fu_enable
= Signal(len(fus
), reset_less
=True)
257 fu_busy
= Signal(len(fus
), reset_less
=True)
260 for i
, funame
in enumerate(fus
.keys()):
261 fu_bitdict
[funame
] = fu_enable
[i
]
262 fu_selected
[funame
] = fu_busy
[i
]
264 # identify function units and create a list by fnunit so that
265 # PriorityPickers can be created for selecting one of them that
266 # isn't busy at the time the incoming instruction needs passing on
267 by_fnunit
= defaultdict(list)
268 for fname
, member
in Function
.__members
__.items():
269 for funame
, fu
in fus
.items():
270 fnunit
= fu
.fnunit
.value
271 if member
.value
& fnunit
: # this FU handles this type of op
272 by_fnunit
[fname
].append((funame
, fu
)) # add by Function
274 # ok now just print out the list of FUs by Function, because we can
275 for fname
, fu_list
in by_fnunit
.items():
276 print ("FUs by type", fname
, fu_list
)
278 # now create a PriorityPicker per FU-type such that only one
279 # non-busy FU will be picked
281 fu_found
= Signal() # take a note if no Function Unit was available
282 for fname
, fu_list
in by_fnunit
.items():
283 i_pp
= PriorityPicker(len(fu_list
))
284 m
.submodules
['i_pp_%s' % fname
] = i_pp
286 for i
, (funame
, fu
) in enumerate(fu_list
):
287 # match the decoded instruction (e.do.fn_unit) against the
288 # "capability" of this FU, gate that by whether that FU is
289 # busy, and drop that into the PriorityPicker.
290 # this will give us an output of the first available *non-busy*
291 # Function Unit (Reservation Statio) capable of handling this
293 fnunit
= fu
.fnunit
.value
294 en_req
= Signal(name
="issue_en_%s" % funame
, reset_less
=True)
295 fnmatch
= (self
.ireg
.e
.do
.fn_unit
& fnunit
).bool()
296 comb
+= en_req
.eq(fnmatch
& ~fu
.busy_o
&
298 i_l
.append(en_req
) # store in list for doing the Cat-trick
299 # picker output, gated by enable: store in fu_bitdict
300 po
= Signal(name
="o_issue_pick_"+funame
) # picker output
301 comb
+= po
.eq(i_pp
.o
[i
] & i_pp
.en_o
)
302 comb
+= fu_bitdict
[funame
].eq(po
)
303 comb
+= fu_selected
[funame
].eq(fu
.busy_o | po
)
304 # if we don't do this, then when there are no FUs available,
305 # the "p.o_ready" signal will go back "ok we accepted this
306 # instruction" which of course isn't true.
307 with m
.If(i_pp
.en_o
):
308 comb
+= fu_found
.eq(1)
309 # for each input, Cat them together and drop them into the picker
310 comb
+= i_pp
.i
.eq(Cat(*i_l
))
312 # rdmask, which is for registers needs to come from the *main* decoder
313 for funame
, fu
in fus
.items():
314 rdmask
= get_rdflags(self
.ireg
.e
, fu
)
315 comb
+= fu
.rdmaskn
.eq(~rdmask
)
317 # sigh - need a NOP counter
319 with m
.If(counter
!= 0):
320 sync
+= counter
.eq(counter
- 1)
323 # default to reading from incoming instruction: may be overridden
324 # by copy from latch when "waiting"
325 comb
+= self
.ireg
.eq(self
.i
)
326 # always say "ready" except if overridden
327 comb
+= self
.p
.o_ready
.eq(1)
330 with m
.State("READY"):
331 with m
.If(self
.p
.i_valid
): # run only when valid
332 with m
.Switch(self
.ireg
.e
.do
.insn_type
):
333 # check for ATTN: halt if true
334 with m
.Case(MicrOp
.OP_ATTN
):
335 m
.d
.sync
+= self
.o
.core_terminate_o
.eq(1)
337 # fake NOP - this isn't really used (Issuer detects NOP)
338 with m
.Case(MicrOp
.OP_NOP
):
339 sync
+= counter
.eq(2)
343 comb
+= self
.instr_active
.eq(1)
344 comb
+= self
.p
.o_ready
.eq(0)
345 # connect instructions. only one enabled at a time
346 for funame
, fu
in fus
.items():
347 do
= self
.des
[funame
]
348 enable
= fu_bitdict
[funame
]
350 # run this FunctionUnit if enabled route op,
351 # issue, busy, read flags and mask to FU
353 # operand comes from the *local* decoder
354 comb
+= fu
.oper_i
.eq_from(do
)
355 comb
+= fu
.issue_i
.eq(1) # issue when valid
356 # instruction ok, indicate ready
357 comb
+= self
.p
.o_ready
.eq(1)
359 if self
.allow_overlap
:
360 with m
.If(~fu_found
):
361 # latch copy of instruction
362 sync
+= ilatch
.eq(self
.i
)
363 comb
+= self
.p
.o_ready
.eq(1) # accept
367 with m
.State("WAITING"):
368 comb
+= self
.instr_active
.eq(1)
369 comb
+= self
.p
.o_ready
.eq(0)
371 # using copy of instruction, keep waiting until an FU is free
372 comb
+= self
.ireg
.eq(ilatch
)
373 with m
.If(fu_found
): # wait for conflict to clear
374 # connect instructions. only one enabled at a time
375 for funame
, fu
in fus
.items():
376 do
= self
.des
[funame
]
377 enable
= fu_bitdict
[funame
]
379 # run this FunctionUnit if enabled route op,
380 # issue, busy, read flags and mask to FU
382 # operand comes from the *local* decoder
383 comb
+= fu
.oper_i
.eq_from(do
)
384 comb
+= fu
.issue_i
.eq(1) # issue when valid
385 comb
+= self
.p
.o_ready
.eq(1)
389 print ("core: overlap allowed", self
.allow_overlap
)
390 busys
= map(lambda fu
: fu
.busy_o
, fus
.values())
391 comb
+= any_busy_o
.eq(Cat(*busys
).bool())
392 if not self
.allow_overlap
:
393 # for simple non-overlap, if any instruction is busy, set
394 # busy output for core.
395 comb
+= busy_o
.eq(any_busy_o
)
397 # sigh deal with a fun situation that needs to be investigated
399 with m
.If(self
.issue_conflict
):
402 # return both the function unit "enable" dict as well as the "busy".
403 # the "busy-or-issued" can be passed in to the Read/Write port
404 # connecters to give them permission to request access to regfiles
405 return fu_bitdict
, fu_selected
407 def connect_rdport(self
, m
, fu_bitdict
, fu_selected
,
408 rdpickers
, regfile
, regname
, fspec
):
409 comb
, sync
= m
.d
.comb
, m
.d
.sync
415 # select the required read port. these are pre-defined sizes
416 rfile
= regs
.rf
[regfile
.lower()]
417 rport
= rfile
.r_ports
[rpidx
]
418 print("read regfile", rpidx
, regfile
, regs
.rf
.keys(),
421 # for checking if the read port has an outstanding write
422 if self
.make_hazard_vecs
:
423 wv
= regs
.wv
[regfile
.lower()]
424 wvchk
= wv
.r_ports
["issue"] # write-vec bit-level hazard check
426 # if a hazard is detected on this read port, simply blithely block
427 # every FU from reading on it. this is complete overkill but very
429 hazard_detected
= Signal(name
="raw_%s_%s" % (regfile
, rpidx
))
432 if not isinstance(fspecs
, list):
438 for i
, fspec
in enumerate(fspecs
):
439 # get the regfile specs for this regfile port
440 (rf
, wf
, read
, write
, wid
, fuspec
) = fspec
441 print ("fpsec", i
, fspec
, len(fuspec
))
442 ppoffs
.append(pplen
) # record offset for picker
444 name
= "rdflag_%s_%s_%d" % (regfile
, regname
, i
)
445 rdflag
= Signal(name
=name
, reset_less
=True)
446 comb
+= rdflag
.eq(rf
)
447 rdflags
.append(rdflag
)
449 print ("pplen", pplen
)
451 # create a priority picker to manage this port
452 rdpickers
[regfile
][rpidx
] = rdpick
= PriorityPicker(pplen
)
453 setattr(m
.submodules
, "rdpick_%s_%s" % (regfile
, rpidx
), rdpick
)
459 for i
, fspec
in enumerate(fspecs
):
460 (rf
, wf
, _read
, _write
, wid
, fuspec
) = fspec
461 # connect up the FU req/go signals, and the reg-read to the FU
462 # and create a Read Broadcast Bus
463 for pi
, (funame
, fu
, idx
) in enumerate(fuspec
):
465 name
= "%s_%s_%s_%i" % (regfile
, rpidx
, funame
, pi
)
466 fu_active
= fu_selected
[funame
]
467 fu_issued
= fu_bitdict
[funame
]
469 # get (or set up) a latched copy of read register number
470 rname
= "%s_%s_%s_%d" % (funame
, regfile
, regname
, pi
)
471 read
= Signal
.like(_read
, name
="read_"+name
)
472 if rname
not in fu
.rd_latches
:
473 rdl
= Signal
.like(_read
, name
="rdlatch_"+rname
)
474 fu
.rd_latches
[rname
] = rdl
475 with m
.If(fu
.issue_i
):
476 sync
+= rdl
.eq(_read
)
478 rdl
= fu
.rd_latches
[rname
]
479 # latch to make the read immediately available on issue cycle
480 # after the read cycle, use the latched copy
481 with m
.If(fu
.issue_i
):
482 comb
+= read
.eq(_read
)
486 # connect request-read to picker input, and output to go-rd
487 addr_en
= Signal
.like(read
, name
="addr_en_"+name
)
488 pick
= Signal(name
="pick_"+name
) # picker input
489 rp
= Signal(name
="rp_"+name
) # picker output
490 delay_pick
= Signal(name
="dp_"+name
) # read-enable "underway"
491 rhazard
= Signal(name
="rhaz_"+name
)
493 # exclude any currently-enabled read-request (mask out active)
494 comb
+= pick
.eq(fu
.rd_rel_o
[idx
] & fu_active
& rdflags
[i
] &
495 ~delay_pick
& ~rhazard
)
496 # entirely block anything hazarded from being picked
497 comb
+= rdpick
.i
[pi
].eq(pick
)
498 comb
+= fu
.go_rd_i
[idx
].eq(delay_pick
) # pass in *delayed* pick
500 # if picked, select read-port "reg select" number to port
501 comb
+= rp
.eq(rdpick
.o
[pi
] & rdpick
.en_o
)
502 sync
+= delay_pick
.eq(rp
) # delayed "pick"
503 comb
+= addr_en
.eq(Mux(rp
, read
, 0))
505 # the read-enable happens combinatorially (see mux-bus below)
506 # but it results in the data coming out on a one-cycle delay.
510 addrs
.append(addr_en
)
513 # use the *delayed* pick signal to put requested data onto bus
514 with m
.If(delay_pick
):
515 # connect regfile port to input, creating fan-out Bus
517 print("reg connect widths",
518 regfile
, regname
, pi
, funame
,
519 src
.shape(), rport
.o_data
.shape())
520 # all FUs connect to same port
521 comb
+= src
.eq(rport
.o_data
)
523 if not self
.make_hazard_vecs
:
526 # read the write-hazard bitvector (wv) for any bit that is
527 wvchk_en
= Signal(len(wvchk
.ren
), name
="wv_chk_addr_en_"+name
)
528 issue_active
= Signal(name
="rd_iactive_"+name
)
529 # XXX combinatorial loop here
530 comb
+= issue_active
.eq(fu_active
& rf
)
531 with m
.If(issue_active
):
533 comb
+= wvchk_en
.eq(read
)
535 comb
+= wvchk_en
.eq(1<<read
)
536 # if FU is busy (which doesn't get set at the same time as
537 # issue) and no hazard was detected, clear wvchk_en (i.e.
538 # stop checking for hazards)
539 with m
.If(fu
.busy_o
& ~rhazard
):
540 comb
+= wvchk_en
.eq(0)
542 comb
+= rhazard
.eq((wvchk
.o_data
& wvchk_en
).bool())
544 wvens
.append(wvchk_en
)
546 # or-reduce the muxed read signals
548 # for unary-addressed
549 comb
+= rport
.ren
.eq(ortreereduce_sig(rens
))
551 # for binary-addressed
552 comb
+= rport
.addr
.eq(ortreereduce_sig(addrs
))
553 comb
+= rport
.ren
.eq(Cat(*rens
).bool())
554 print ("binary", regfile
, rpidx
, rport
, rport
.ren
, rens
, addrs
)
556 if not self
.make_hazard_vecs
:
557 return Const(0) # declare "no hazards"
559 # enable the read bitvectors for this issued instruction
560 # and return whether any write-hazard bit is set
561 comb
+= wvchk
.ren
.eq(ortreereduce_sig(wvens
))
562 comb
+= hazard_detected
.eq(wvchk
.o_data
.bool())
563 return hazard_detected
565 def connect_rdports(self
, m
, fu_bitdict
, fu_selected
):
566 """connect read ports
568 orders the read regspecs into a dict-of-dicts, by regfile, by
569 regport name, then connects all FUs that want that regport by
570 way of a PriorityPicker.
572 comb
, sync
= m
.d
.comb
, m
.d
.sync
577 # dictionary of lists of regfile read ports
578 byregfiles_rd
, byregfiles_rdspec
= self
.get_byregfiles(True)
580 # okaay, now we need a PriorityPicker per regfile per regfile port
581 # loootta pickers... peter piper picked a pack of pickled peppers...
583 for regfile
, spec
in byregfiles_rd
.items():
584 fuspecs
= byregfiles_rdspec
[regfile
]
585 rdpickers
[regfile
] = {}
587 # argh. an experiment to merge RA and RB in the INT regfile
588 # (we have too many read/write ports)
589 if self
.regreduce_en
:
591 fuspecs
['rabc'] = [fuspecs
.pop('rb')]
592 fuspecs
['rabc'].append(fuspecs
.pop('rc'))
593 fuspecs
['rabc'].append(fuspecs
.pop('ra'))
594 if regfile
== 'FAST':
595 fuspecs
['fast1'] = [fuspecs
.pop('fast1')]
596 if 'fast2' in fuspecs
:
597 fuspecs
['fast1'].append(fuspecs
.pop('fast2'))
598 if 'fast3' in fuspecs
:
599 fuspecs
['fast1'].append(fuspecs
.pop('fast3'))
601 # for each named regfile port, connect up all FUs to that port
602 # also return (and collate) hazard detection)
603 for (regname
, fspec
) in sort_fuspecs(fuspecs
):
604 print("connect rd", regname
, fspec
)
605 rh
= self
.connect_rdport(m
, fu_bitdict
, fu_selected
,
610 return Cat(*rd_hazard
).bool()
612 def make_hazards(self
, m
, regfile
, rfile
, wvclr
, wvset
,
613 funame
, regname
, idx
,
614 addr_en
, wp
, fu
, fu_active
, wrflag
, write
,
616 """make_hazards: a setter and a clearer for the regfile write ports
618 setter is at issue time (using PowerDecoder2 regfile write numbers)
619 clearer is at regfile write time (when FU has said what to write to)
621 there is *one* unusual case here which has to be dealt with:
622 when the Function Unit does *NOT* request a write to the regfile
623 (has its data.ok bit CLEARED). this is perfectly legitimate.
626 comb
, sync
= m
.d
.comb
, m
.d
.sync
627 name
= "%s_%s_%d" % (funame
, regname
, idx
)
629 # connect up the bitvector write hazard. unlike the
630 # regfile writeports, a ONE must be written to the corresponding
631 # bit of the hazard bitvector (to indicate the existence of
634 # the detection of what shall be written to is based
636 print ("write vector (for regread)", regfile
, wvset
)
637 wviaddr_en
= Signal(len(wvset
.wen
), name
="wv_issue_addr_en_"+name
)
638 issue_active
= Signal(name
="iactive_"+name
)
639 comb
+= issue_active
.eq(fu
.issue_i
& fu_active
& wrflag
)
640 with m
.If(issue_active
):
642 comb
+= wviaddr_en
.eq(write
)
644 comb
+= wviaddr_en
.eq(1<<write
)
646 # deal with write vector clear: this kicks in when the regfile
647 # is written to, and clears the corresponding bitvector entry
648 print ("write vector", regfile
, wvclr
)
649 wvaddr_en
= Signal(len(wvclr
.wen
), name
="wvaddr_en_"+name
)
651 comb
+= wvaddr_en
.eq(addr_en
)
654 comb
+= wvaddr_en
.eq(1<<addr_en
)
656 # XXX ASSUME that LDSTFunctionUnit always sets the data it intends to
657 # this may NOT be the case when an exception occurs
658 if isinstance(fu
, LDSTFunctionUnit
):
659 return wvaddr_en
, wviaddr_en
661 # okaaay, this is preparation for the awkward case.
662 # * latch a copy of wrflag when issue goes high.
663 # * when the fu_wrok (data.ok) flag is NOT set,
664 # but the FU is done, the FU is NEVER going to write
665 # so the bitvector has to be cleared.
666 latch_wrflag
= Signal(name
="latch_wrflag_"+name
)
667 with m
.If(~fu
.busy_o
):
668 sync
+= latch_wrflag
.eq(0)
669 with m
.If(fu
.issue_i
& fu_active
):
670 sync
+= latch_wrflag
.eq(wrflag
)
671 with m
.If(fu
.alu_done_o
& latch_wrflag
& ~fu_wrok
):
673 comb
+= wvaddr_en
.eq(write
) # addr_en gated with wp, don't use
675 comb
+= wvaddr_en
.eq(1<<addr_en
) # binary addr_en not gated
677 return wvaddr_en
, wviaddr_en
679 def connect_wrport(self
, m
, fu_bitdict
, fu_selected
,
680 wrpickers
, regfile
, regname
, fspec
):
681 comb
, sync
= m
.d
.comb
, m
.d
.sync
687 # select the required write port. these are pre-defined sizes
688 rfile
= regs
.rf
[regfile
.lower()]
689 wport
= rfile
.w_ports
[rpidx
]
691 print("connect wr", regname
, "unary", rfile
.unary
, fspec
)
692 print(regfile
, regs
.rf
.keys())
694 # select the write-protection hazard vector. note that this still
695 # requires to WRITE to the hazard bitvector! read-requests need
696 # to RAISE the bitvector (set it to 1), which, duh, requires a WRITE
697 if self
.make_hazard_vecs
:
698 wv
= regs
.wv
[regfile
.lower()]
699 wvset
= wv
.w_ports
["set"] # write-vec bit-level hazard ctrl
700 wvclr
= wv
.w_ports
["clr"] # write-vec bit-level hazard ctrl
703 if not isinstance(fspecs
, list):
711 for i
, fspec
in enumerate(fspecs
):
712 # get the regfile specs for this regfile port
713 (rf
, wf
, read
, write
, wid
, fuspec
) = fspec
714 print ("fpsec", i
, "wrflag", wf
, fspec
, len(fuspec
))
715 ppoffs
.append(pplen
) # record offset for picker
718 name
= "%s_%s_%d" % (regfile
, regname
, i
)
719 rdflag
= Signal(name
="rd_flag_"+name
)
720 wrflag
= Signal(name
="wr_flag_"+name
)
722 comb
+= rdflag
.eq(rf
)
726 comb
+= wrflag
.eq(wf
)
729 rdflags
.append(rdflag
)
730 wrflags
.append(wrflag
)
732 # create a priority picker to manage this port
733 wrpickers
[regfile
][rpidx
] = wrpick
= PriorityPicker(pplen
)
734 setattr(m
.submodules
, "wrpick_%s_%s" % (regfile
, rpidx
), wrpick
)
742 for i
, fspec
in enumerate(fspecs
):
743 # connect up the FU req/go signals and the reg-read to the FU
744 # these are arbitrated by Data.ok signals
745 (rf
, wf
, read
, _write
, wid
, fuspec
) = fspec
746 for pi
, (funame
, fu
, idx
) in enumerate(fuspec
):
748 name
= "%s_%s_%s_%d" % (funame
, regfile
, regname
, idx
)
749 # get (or set up) a write-latched copy of write register number
750 write
= Signal
.like(_write
, name
="write_"+name
)
751 rname
= "%s_%s_%s" % (funame
, regfile
, regname
)
752 if rname
not in fu
.wr_latches
:
753 wrl
= Signal
.like(_write
, name
="wrlatch_"+rname
)
754 fu
.wr_latches
[rname
] = write
755 with m
.If(fu
.issue_i
):
756 sync
+= wrl
.eq(_write
)
757 comb
+= write
.eq(_write
)
759 comb
+= write
.eq(wrl
)
761 write
= fu
.wr_latches
[rname
]
763 # write-request comes from dest.ok
764 dest
= fu
.get_out(idx
)
765 fu_dest_latch
= fu
.get_fu_out(idx
) # latched output
766 name
= "fu_wrok_%s_%s_%d" % (funame
, regname
, idx
)
767 fu_wrok
= Signal(name
=name
, reset_less
=True)
768 comb
+= fu_wrok
.eq(dest
.ok
& fu
.busy_o
)
770 # connect request-write to picker input, and output to go-wr
771 fu_active
= fu_selected
[funame
]
772 pick
= fu
.wr
.rel_o
[idx
] & fu_active
773 comb
+= wrpick
.i
[pi
].eq(pick
)
774 # create a single-pulse go write from the picker output
775 wr_pick
= Signal(name
="wpick_%s_%s_%d" % (funame
, regname
, idx
))
776 comb
+= wr_pick
.eq(wrpick
.o
[pi
] & wrpick
.en_o
)
777 comb
+= fu
.go_wr_i
[idx
].eq(rising_edge(m
, wr_pick
))
779 # connect the regspec write "reg select" number to this port
780 # only if one FU actually requests (and is granted) the port
781 # will the write-enable be activated
782 wname
= "waddr_en_%s_%s_%d" % (funame
, regname
, idx
)
783 addr_en
= Signal
.like(write
, name
=wname
)
785 comb
+= wp
.eq(wr_pick
& wrpick
.en_o
)
786 comb
+= addr_en
.eq(Mux(wp
, write
, 0))
790 addrs
.append(addr_en
)
793 # connect regfile port to input
794 print("reg connect widths",
795 regfile
, regname
, pi
, funame
,
796 dest
.shape(), wport
.i_data
.shape())
797 wsigs
.append(fu_dest_latch
)
799 # now connect up the bitvector write hazard
800 if not self
.make_hazard_vecs
:
802 res
= self
.make_hazards(m
, regfile
, rfile
, wvclr
, wvset
,
803 funame
, regname
, idx
,
804 addr_en
, wp
, fu
, fu_active
,
805 wrflags
[i
], write
, fu_wrok
)
806 wvaddr_en
, wv_issue_en
= res
807 wvclren
.append(wvaddr_en
) # set only: no data => clear bit
808 wvseten
.append(wv_issue_en
) # set data same as enable
809 wvsets
.append(wv_issue_en
) # because enable needs a 1
811 # here is where we create the Write Broadcast Bus. simple, eh?
812 comb
+= wport
.i_data
.eq(ortreereduce_sig(wsigs
))
814 # for unary-addressed
815 comb
+= wport
.wen
.eq(ortreereduce_sig(wens
))
817 # for binary-addressed
818 comb
+= wport
.addr
.eq(ortreereduce_sig(addrs
))
819 comb
+= wport
.wen
.eq(ortreereduce_sig(wens
))
821 if not self
.make_hazard_vecs
:
825 comb
+= wvclr
.wen
.eq(ortreereduce_sig(wvclren
)) # clear (regfile write)
826 comb
+= wvset
.wen
.eq(ortreereduce_sig(wvseten
)) # set (issue time)
827 comb
+= wvset
.i_data
.eq(ortreereduce_sig(wvsets
))
829 def connect_wrports(self
, m
, fu_bitdict
, fu_selected
):
830 """connect write ports
832 orders the write regspecs into a dict-of-dicts, by regfile,
833 by regport name, then connects all FUs that want that regport
834 by way of a PriorityPicker.
836 note that the write-port wen, write-port data, and go_wr_i all need to
837 be on the exact same clock cycle. as there is a combinatorial loop bug
838 at the moment, these all use sync.
840 comb
, sync
= m
.d
.comb
, m
.d
.sync
843 # dictionary of lists of regfile write ports
844 byregfiles_wr
, byregfiles_wrspec
= self
.get_byregfiles(False)
846 # same for write ports.
847 # BLECH! complex code-duplication! BLECH!
849 for regfile
, spec
in byregfiles_wr
.items():
850 fuspecs
= byregfiles_wrspec
[regfile
]
851 wrpickers
[regfile
] = {}
853 if self
.regreduce_en
:
854 # argh, more port-merging
856 fuspecs
['o'] = [fuspecs
.pop('o')]
857 fuspecs
['o'].append(fuspecs
.pop('o1'))
858 if regfile
== 'FAST':
859 fuspecs
['fast1'] = [fuspecs
.pop('fast1')]
860 if 'fast2' in fuspecs
:
861 fuspecs
['fast1'].append(fuspecs
.pop('fast2'))
862 if 'fast3' in fuspecs
:
863 fuspecs
['fast1'].append(fuspecs
.pop('fast3'))
865 for (regname
, fspec
) in sort_fuspecs(fuspecs
):
866 self
.connect_wrport(m
, fu_bitdict
, fu_selected
, wrpickers
,
867 regfile
, regname
, fspec
)
869 def get_byregfiles(self
, readmode
):
871 mode
= "read" if readmode
else "write"
874 e
= self
.ireg
.e
# decoded instruction to execute
876 # dictionary of dictionaries of lists of regfile ports.
877 # first key: regfile. second key: regfile port name
878 byregfiles
= defaultdict(dict)
879 byregfiles_spec
= defaultdict(dict)
881 for (funame
, fu
) in fus
.items():
882 # create in each FU a receptacle for the read/write register
883 # hazard numbers. to be latched in connect_rd/write_ports
884 # XXX better that this is moved into the actual FUs, but
885 # the issue there is that this function is actually better
886 # suited at the moment
892 print("%s ports for %s" % (mode
, funame
))
893 for idx
in range(fu
.n_src
if readmode
else fu
.n_dst
):
894 # construct regfile specs: read uses inspec, write outspec
896 (regfile
, regname
, wid
) = fu
.get_in_spec(idx
)
898 (regfile
, regname
, wid
) = fu
.get_out_spec(idx
)
899 print(" %d %s %s %s" % (idx
, regfile
, regname
, str(wid
)))
901 # the PowerDecoder2 (main one, not the satellites) contains
902 # the decoded regfile numbers. obtain these now
904 rdflag
, read
= regspec_decode_read(e
, regfile
, regname
)
905 wrport
, write
= None, None
907 rdflag
, read
= None, None
908 wrport
, write
= regspec_decode_write(e
, regfile
, regname
)
910 # construct the dictionary of regspec information by regfile
911 if regname
not in byregfiles_spec
[regfile
]:
912 byregfiles_spec
[regfile
][regname
] = \
913 (rdflag
, wrport
, read
, write
, wid
, [])
914 # here we start to create "lanes"
915 if idx
not in byregfiles
[regfile
]:
916 byregfiles
[regfile
][idx
] = []
917 fuspec
= (funame
, fu
, idx
)
918 byregfiles
[regfile
][idx
].append(fuspec
)
919 byregfiles_spec
[regfile
][regname
][5].append(fuspec
)
922 # append a latch Signal to the FU's list of latches
923 rname
= "%s_%s" % (regfile
, regname
)
925 if rname
not in fu
.rd_latches
:
926 rdl
= Signal
.like(read
, name
="rdlatch_"+rname
)
927 fu
.rd_latches
[rname
] = rdl
929 if rname
not in fu
.wr_latches
:
930 wrl
= Signal
.like(write
, name
="wrlatch_"+rname
)
931 fu
.wr_latches
[rname
] = wrl
933 # ok just print that all out, for convenience
934 for regfile
, spec
in byregfiles
.items():
935 print("regfile %s ports:" % mode
, regfile
)
936 fuspecs
= byregfiles_spec
[regfile
]
937 for regname
, fspec
in fuspecs
.items():
938 [rdflag
, wrflag
, read
, write
, wid
, fuspec
] = fspec
939 print(" rf %s port %s lane: %s" % (mode
, regfile
, regname
))
940 print(" %s" % regname
, wid
, read
, write
, rdflag
, wrflag
)
941 for (funame
, fu
, idx
) in fuspec
:
942 fusig
= fu
.src_i
[idx
] if readmode
else fu
.dest
[idx
]
943 print(" ", funame
, fu
.__class
__.__name
__, idx
, fusig
)
946 return byregfiles
, byregfiles_spec
949 yield from self
.fus
.ports()
950 yield from self
.i
.e
.ports()
951 yield from self
.l0
.ports()
958 if __name__
== '__main__':
959 pspec
= TestMemPspec(ldst_ifacetype
='testpi',
964 dut
= NonProductionCore(pspec
)
965 vl
= rtlil
.convert(dut
, ports
=dut
.ports())
966 with
open("test_core.il", "w") as f
: