3 not in any way intended for production use. connects up FunctionUnits to
4 Register Files in a brain-dead fashion that only permits one and only one
5 Function Unit to be operational.
7 the principle here is to take the Function Units, analyse their regspecs,
8 and turn their requirements for access to register file read/write ports
9 into groupings by Register File and Register File Port name.
11 under each grouping - by regfile/port - a list of Function Units that
12 need to connect to that port is created. as these are a contended
13 resource a "Broadcast Bus" per read/write port is then also created,
14 with access to it managed by a PriorityPicker.
16 the brain-dead part of this module is that even though there is no
17 conflict of access, regfile read/write hazards are *not* analysed,
18 and consequently it is safer to wait for the Function Unit to complete
19 before allowing a new instruction to proceed.
22 from nmigen
import (Elaboratable
, Module
, Signal
, ResetSignal
, Cat
, Mux
,
24 from nmigen
.cli
import rtlil
26 from openpower
.decoder
.power_decoder2
import PowerDecodeSubset
27 from openpower
.decoder
.power_regspec_map
import regspec_decode_read
28 from openpower
.decoder
.power_regspec_map
import regspec_decode_write
29 from openpower
.sv
.svp64
import SVP64Rec
31 from nmutil
.picker
import PriorityPicker
32 from nmutil
.util
import treereduce
33 from nmutil
.singlepipe
import ControlBase
35 from soc
.fu
.compunits
.compunits
import AllFunctionUnits
, LDSTFunctionUnit
36 from soc
.regfile
.regfiles
import RegFiles
37 from openpower
.decoder
.power_decoder2
import get_rdflags
38 from soc
.experiment
.l0_cache
import TstL0CacheBuffer
# test only
39 from soc
.config
.test
.test_loadstore
import TestMemPspec
40 from openpower
.decoder
.power_enums
import MicrOp
, Function
41 from soc
.simple
.core_data
import CoreInput
, CoreOutput
43 from collections
import defaultdict
46 from nmutil
.util
import rising_edge
49 # helper function for reducing a list of signals down to a parallel
51 def ortreereduce(tree
, attr
="o_data"):
52 return treereduce(tree
, operator
.or_
, lambda x
: getattr(x
, attr
))
55 def ortreereduce_sig(tree
):
56 return treereduce(tree
, operator
.or_
, lambda x
: x
)
59 # helper function to place full regs declarations first
60 def sort_fuspecs(fuspecs
):
62 for (regname
, fspec
) in fuspecs
.items():
63 if regname
.startswith("full"):
64 res
.append((regname
, fspec
))
65 for (regname
, fspec
) in fuspecs
.items():
66 if not regname
.startswith("full"):
67 res
.append((regname
, fspec
))
68 return res
# enumerate(res)
71 # derive from ControlBase rather than have a separate Stage instance,
72 # this is simpler to do
73 class NonProductionCore(ControlBase
):
74 def __init__(self
, pspec
):
77 # test is SVP64 is to be enabled
78 self
.svp64_en
= hasattr(pspec
, "svp64") and (pspec
.svp64
== True)
80 # test to see if regfile ports should be reduced
81 self
.regreduce_en
= (hasattr(pspec
, "regreduce") and
82 (pspec
.regreduce
== True))
84 # test to see if overlapping of instructions is allowed
85 # (not normally enabled for TestIssuer FSM but useful for checking
86 # the bitvector hazard detection, before doing In-Order)
87 self
.allow_overlap
= (hasattr(pspec
, "allow_overlap") and
88 (pspec
.allow_overlap
== True))
91 self
.make_hazard_vecs
= True
92 self
.core_type
= "fsm"
93 if hasattr(pspec
, "core_type"):
94 self
.core_type
= pspec
.core_type
96 super().__init
__(stage
=self
)
98 # single LD/ST funnel for memory access
99 self
.l0
= l0
= TstL0CacheBuffer(pspec
, n_units
=1)
102 # function units (only one each)
103 # only include mmu if enabled in pspec
104 self
.fus
= AllFunctionUnits(pspec
, pilist
=[pi
])
106 # link LoadStore1 into MMU
107 mmu
= self
.fus
.get_fu('mmu0')
108 print ("core pspec", pspec
.ldst_ifacetype
)
109 print ("core mmu", mmu
)
111 print ("core lsmem.lsi", l0
.cmpi
.lsmem
.lsi
)
112 mmu
.alu
.set_ldst_interface(l0
.cmpi
.lsmem
.lsi
)
114 # register files (yes plural)
115 self
.regs
= RegFiles(pspec
, make_hazard_vecs
=self
.make_hazard_vecs
)
117 # set up input and output: unusual requirement to set data directly
118 # (due to the way that the core is set up in a different domain,
119 # see TestIssuer.setup_peripherals
120 self
.p
.i_data
, self
.n
.o_data
= self
.new_specs(None)
121 self
.i
, self
.o
= self
.p
.i_data
, self
.n
.o_data
123 # actual internal input data used (captured)
124 self
.ireg
= self
.ispec()
126 # create per-FU instruction decoders (subsetted). these "satellite"
127 # decoders reduce wire fan-out from the one (main) PowerDecoder2
128 # (used directly by the trap unit) to the *twelve* (or more)
129 # Function Units. we can either have 32 wires (the instruction)
130 # to each, or we can have well over a 200 wire fan-out (to 12
131 # ALUs). it's an easy choice to make.
135 for funame
, fu
in self
.fus
.fus
.items():
136 f_name
= fu
.fnunit
.name
137 fnunit
= fu
.fnunit
.value
138 opkls
= fu
.opsubsetkls
140 # TRAP decoder is the *main* decoder
141 self
.trapunit
= funame
143 self
.decoders
[funame
] = PowerDecodeSubset(None, opkls
, f_name
,
145 state
=self
.ireg
.state
,
146 svp64_en
=self
.svp64_en
,
147 regreduce_en
=self
.regreduce_en
)
148 self
.des
[funame
] = self
.decoders
[funame
].do
150 # share the SPR decoder with the MMU if it exists
151 if "mmu0" in self
.decoders
:
152 self
.decoders
["mmu0"].mmu0_spr_dec
= self
.decoders
["spr0"]
154 # next 3 functions are Stage API Compliance
155 def setup(self
, m
, i
):
159 return CoreInput(self
.pspec
, self
.svp64_en
, self
.regreduce_en
)
164 # elaborate function to create HDL
165 def elaborate(self
, platform
):
166 m
= super().elaborate(platform
)
168 # for testing purposes, to cut down on build time in coriolis2
169 if hasattr(self
.pspec
, "nocore") and self
.pspec
.nocore
== True:
170 x
= Signal() # dummy signal
175 m
.submodules
.fus
= self
.fus
176 m
.submodules
.l0
= l0
= self
.l0
177 self
.regs
.elaborate_into(m
, platform
)
182 self
.connect_satellite_decoders(m
)
184 # ssh, cheat: trap uses the main decoder because of the rewriting
185 self
.des
[self
.trapunit
] = self
.ireg
.e
.do
187 # connect up Function Units, then read/write ports, and hazard conflict
188 self
.issue_conflict
= Signal()
189 fu_bitdict
, fu_selected
= self
.connect_instruction(m
)
190 raw_hazard
= self
.connect_rdports(m
, fu_bitdict
, fu_selected
)
191 self
.connect_wrports(m
, fu_bitdict
, fu_selected
)
192 if self
.allow_overlap
:
193 comb
+= self
.issue_conflict
.eq(raw_hazard
)
195 # note if an exception happened. in a pipelined or OoO design
196 # this needs to be accompanied by "shadowing" (or stalling)
198 for exc
in self
.fus
.excs
.values():
199 el
.append(exc
.happened
)
200 if len(el
) > 0: # at least one exception
201 comb
+= self
.o
.exc_happened
.eq(Cat(*el
).bool())
205 def connect_satellite_decoders(self
, m
):
207 for k
, v
in self
.decoders
.items():
208 # connect each satellite decoder and give it the instruction.
209 # as subset decoders this massively reduces wire fanout given
210 # the large number of ALUs
211 setattr(m
.submodules
, "dec_%s" % v
.fn_name
, v
)
212 comb
+= v
.dec
.raw_opcode_in
.eq(self
.ireg
.raw_insn_i
)
213 comb
+= v
.dec
.bigendian
.eq(self
.ireg
.bigendian_i
)
214 # sigh due to SVP64 RA_OR_ZERO detection connect these too
215 comb
+= v
.sv_a_nz
.eq(self
.ireg
.sv_a_nz
)
217 comb
+= v
.pred_sm
.eq(self
.ireg
.sv_pred_sm
)
218 comb
+= v
.pred_dm
.eq(self
.ireg
.sv_pred_dm
)
219 if k
!= self
.trapunit
:
220 comb
+= v
.sv_rm
.eq(self
.ireg
.sv_rm
) # pass through SVP64 RM
221 comb
+= v
.is_svp64_mode
.eq(self
.ireg
.is_svp64_mode
)
222 # only the LDST PowerDecodeSubset *actually* needs to
223 # know to use the alternative decoder. this is all
225 if k
.lower().startswith("ldst"):
226 comb
+= v
.use_svp64_ldst_dec
.eq(
227 self
.ireg
.use_svp64_ldst_dec
)
229 def connect_instruction(self
, m
):
230 """connect_instruction
232 uses decoded (from PowerOp) function unit information from CSV files
233 to ascertain which Function Unit should deal with the current
236 some (such as OP_ATTN, OP_NOP) are dealt with here, including
237 ignoring it and halting the processor. OP_NOP is a bit annoying
238 because the issuer expects busy flag still to be raised then lowered.
239 (this requires a fake counter to be set).
241 comb
, sync
= m
.d
.comb
, m
.d
.sync
244 # indicate if core is busy
245 busy_o
= self
.o
.busy_o
246 any_busy_o
= self
.o
.any_busy_o
248 # connect up temporary copy of incoming instruction. the FSM will
249 # either blat the incoming instruction (if valid) into self.ireg
250 # or if the instruction could not be delivered, keep dropping the
251 # latched copy into ireg
252 ilatch
= self
.ispec()
253 self
.instr_active
= Signal()
255 # enable/busy-signals for each FU, get one bit for each FU (by name)
256 fu_enable
= Signal(len(fus
), reset_less
=True)
257 fu_busy
= Signal(len(fus
), reset_less
=True)
260 for i
, funame
in enumerate(fus
.keys()):
261 fu_bitdict
[funame
] = fu_enable
[i
]
262 fu_selected
[funame
] = fu_busy
[i
]
264 # identify function units and create a list by fnunit so that
265 # PriorityPickers can be created for selecting one of them that
266 # isn't busy at the time the incoming instruction needs passing on
267 by_fnunit
= defaultdict(list)
268 for fname
, member
in Function
.__members
__.items():
269 for funame
, fu
in fus
.items():
270 fnunit
= fu
.fnunit
.value
271 if member
.value
& fnunit
: # this FU handles this type of op
272 by_fnunit
[fname
].append((funame
, fu
)) # add by Function
274 # ok now just print out the list of FUs by Function, because we can
275 for fname
, fu_list
in by_fnunit
.items():
276 print ("FUs by type", fname
, fu_list
)
278 # now create a PriorityPicker per FU-type such that only one
279 # non-busy FU will be picked
281 fu_found
= Signal() # take a note if no Function Unit was available
282 for fname
, fu_list
in by_fnunit
.items():
283 i_pp
= PriorityPicker(len(fu_list
))
284 m
.submodules
['i_pp_%s' % fname
] = i_pp
286 for i
, (funame
, fu
) in enumerate(fu_list
):
287 # match the decoded instruction (e.do.fn_unit) against the
288 # "capability" of this FU, gate that by whether that FU is
289 # busy, and drop that into the PriorityPicker.
290 # this will give us an output of the first available *non-busy*
291 # Function Unit (Reservation Statio) capable of handling this
293 fnunit
= fu
.fnunit
.value
294 en_req
= Signal(name
="issue_en_%s" % funame
, reset_less
=True)
295 fnmatch
= (self
.ireg
.e
.do
.fn_unit
& fnunit
).bool()
296 comb
+= en_req
.eq(fnmatch
& ~fu
.busy_o
&
298 i_l
.append(en_req
) # store in list for doing the Cat-trick
299 # picker output, gated by enable: store in fu_bitdict
300 po
= Signal(name
="o_issue_pick_"+funame
) # picker output
301 comb
+= po
.eq(i_pp
.o
[i
] & i_pp
.en_o
)
302 comb
+= fu_bitdict
[funame
].eq(po
)
303 comb
+= fu_selected
[funame
].eq(fu
.busy_o | po
)
304 # if we don't do this, then when there are no FUs available,
305 # the "p.o_ready" signal will go back "ok we accepted this
306 # instruction" which of course isn't true.
307 with m
.If(i_pp
.en_o
):
308 comb
+= fu_found
.eq(1)
309 # for each input, Cat them together and drop them into the picker
310 comb
+= i_pp
.i
.eq(Cat(*i_l
))
312 # rdmask, which is for registers needs to come from the *main* decoder
313 for funame
, fu
in fus
.items():
314 rdmask
= get_rdflags(self
.ireg
.e
, fu
)
315 comb
+= fu
.rdmaskn
.eq(~rdmask
)
317 # sigh - need a NOP counter
319 with m
.If(counter
!= 0):
320 sync
+= counter
.eq(counter
- 1)
323 # default to reading from incoming instruction: may be overridden
324 # by copy from latch when "waiting"
325 comb
+= self
.ireg
.eq(self
.i
)
326 # always say "ready" except if overridden
327 comb
+= self
.p
.o_ready
.eq(1)
330 with m
.State("READY"):
331 with m
.If(self
.p
.i_valid
): # run only when valid
332 with m
.Switch(self
.ireg
.e
.do
.insn_type
):
333 # check for ATTN: halt if true
334 with m
.Case(MicrOp
.OP_ATTN
):
335 m
.d
.sync
+= self
.o
.core_terminate_o
.eq(1)
337 # fake NOP - this isn't really used (Issuer detects NOP)
338 with m
.Case(MicrOp
.OP_NOP
):
339 sync
+= counter
.eq(2)
343 comb
+= self
.instr_active
.eq(1)
344 comb
+= self
.p
.o_ready
.eq(0)
345 # connect instructions. only one enabled at a time
346 for funame
, fu
in fus
.items():
347 do
= self
.des
[funame
]
348 enable
= fu_bitdict
[funame
]
350 # run this FunctionUnit if enabled route op,
351 # issue, busy, read flags and mask to FU
353 # operand comes from the *local* decoder
354 comb
+= fu
.oper_i
.eq_from(do
)
355 comb
+= fu
.issue_i
.eq(1) # issue when valid
356 # instruction ok, indicate ready
357 comb
+= self
.p
.o_ready
.eq(1)
359 if self
.allow_overlap
:
360 with m
.If(~fu_found
):
361 # latch copy of instruction
362 sync
+= ilatch
.eq(self
.i
)
363 comb
+= self
.p
.o_ready
.eq(1) # accept
367 with m
.State("WAITING"):
368 comb
+= self
.instr_active
.eq(1)
369 comb
+= self
.p
.o_ready
.eq(0)
371 # using copy of instruction, keep waiting until an FU is free
372 comb
+= self
.ireg
.eq(ilatch
)
373 with m
.If(fu_found
): # wait for conflict to clear
374 # connect instructions. only one enabled at a time
375 for funame
, fu
in fus
.items():
376 do
= self
.des
[funame
]
377 enable
= fu_bitdict
[funame
]
379 # run this FunctionUnit if enabled route op,
380 # issue, busy, read flags and mask to FU
382 # operand comes from the *local* decoder
383 comb
+= fu
.oper_i
.eq_from(do
)
384 comb
+= fu
.issue_i
.eq(1) # issue when valid
385 comb
+= self
.p
.o_ready
.eq(1)
389 print ("core: overlap allowed", self
.allow_overlap
)
390 busys
= map(lambda fu
: fu
.busy_o
, fus
.values())
391 comb
+= any_busy_o
.eq(Cat(*busys
).bool())
392 if not self
.allow_overlap
:
393 # for simple non-overlap, if any instruction is busy, set
394 # busy output for core.
395 comb
+= busy_o
.eq(any_busy_o
)
397 # sigh deal with a fun situation that needs to be investigated
399 with m
.If(self
.issue_conflict
):
402 # return both the function unit "enable" dict as well as the "busy".
403 # the "busy-or-issued" can be passed in to the Read/Write port
404 # connecters to give them permission to request access to regfiles
405 return fu_bitdict
, fu_selected
407 def connect_rdport(self
, m
, fu_bitdict
, fu_selected
,
408 rdpickers
, regfile
, regname
, fspec
):
409 comb
, sync
= m
.d
.comb
, m
.d
.sync
415 # select the required read port. these are pre-defined sizes
416 rfile
= regs
.rf
[regfile
.lower()]
417 rport
= rfile
.r_ports
[rpidx
]
418 print("read regfile", rpidx
, regfile
, regs
.rf
.keys(),
421 # for checking if the read port has an outstanding write
422 if self
.make_hazard_vecs
:
423 wv
= regs
.wv
[regfile
.lower()]
424 wvchk
= wv
.r_ports
["issue"] # write-vec bit-level hazard check
426 # if a hazard is detected on this read port, simply blithely block
427 # every FU from reading on it. this is complete overkill but very
429 hazard_detected
= Signal(name
="raw_%s_%s" % (regfile
, rpidx
))
432 if not isinstance(fspecs
, list):
438 for i
, fspec
in enumerate(fspecs
):
439 # get the regfile specs for this regfile port
440 (rf
, wf
, read
, write
, wid
, fuspec
) = fspec
441 print ("fpsec", i
, fspec
, len(fuspec
))
442 ppoffs
.append(pplen
) # record offset for picker
444 name
= "rdflag_%s_%s_%d" % (regfile
, regname
, i
)
445 rdflag
= Signal(name
=name
, reset_less
=True)
446 comb
+= rdflag
.eq(rf
)
447 rdflags
.append(rdflag
)
449 print ("pplen", pplen
)
451 # create a priority picker to manage this port
452 rdpickers
[regfile
][rpidx
] = rdpick
= PriorityPicker(pplen
)
453 setattr(m
.submodules
, "rdpick_%s_%s" % (regfile
, rpidx
), rdpick
)
459 for i
, fspec
in enumerate(fspecs
):
460 (rf
, wf
, _read
, _write
, wid
, fuspec
) = fspec
461 # connect up the FU req/go signals, and the reg-read to the FU
462 # and create a Read Broadcast Bus
463 for pi
, (funame
, fu
, idx
) in enumerate(fuspec
):
465 name
= "%s_%s_%s_%i" % (regfile
, rpidx
, funame
, pi
)
466 fu_active
= fu_selected
[funame
]
467 fu_issued
= fu_bitdict
[funame
]
469 # get (or set up) a latched copy of read register number
470 rname
= "%s_%s_%s_%d" % (funame
, regfile
, regname
, pi
)
471 read
= Signal
.like(_read
, name
="read_"+name
)
472 if rname
not in fu
.rd_latches
:
473 rdl
= Signal
.like(_read
, name
="rdlatch_"+rname
)
474 fu
.rd_latches
[rname
] = rdl
475 with m
.If(fu
.issue_i
):
476 sync
+= rdl
.eq(_read
)
478 rdl
= fu
.rd_latches
[rname
]
479 # latch to make the read immediately available on issue cycle
480 # after the read cycle, use the latched copy
481 with m
.If(fu
.issue_i
):
482 comb
+= read
.eq(_read
)
486 # connect request-read to picker input, and output to go-rd
487 addr_en
= Signal
.like(read
, name
="addr_en_"+name
)
488 pick
= Signal(name
="pick_"+name
) # picker input
489 rp
= Signal(name
="rp_"+name
) # picker output
490 delay_pick
= Signal(name
="dp_"+name
) # read-enable "underway"
492 # exclude any currently-enabled read-request (mask out active)
493 comb
+= pick
.eq(fu
.rd_rel_o
[idx
] & fu_active
& rdflags
[i
] &
494 ~delay_pick
& ~hazard_detected
)
495 # entirely block anything hazarded from being picked
496 comb
+= rdpick
.i
[pi
].eq(pick
)
497 comb
+= fu
.go_rd_i
[idx
].eq(delay_pick
) # pass in *delayed* pick
499 # if picked, select read-port "reg select" number to port
500 comb
+= rp
.eq(rdpick
.o
[pi
] & rdpick
.en_o
)
501 sync
+= delay_pick
.eq(rp
) # delayed "pick"
502 comb
+= addr_en
.eq(Mux(rp
, read
, 0))
504 # the read-enable happens combinatorially (see mux-bus below)
505 # but it results in the data coming out on a one-cycle delay.
509 addrs
.append(addr_en
)
512 # use the *delayed* pick signal to put requested data onto bus
513 with m
.If(delay_pick
):
514 # connect regfile port to input, creating fan-out Bus
516 print("reg connect widths",
517 regfile
, regname
, pi
, funame
,
518 src
.shape(), rport
.o_data
.shape())
519 # all FUs connect to same port
520 comb
+= src
.eq(rport
.o_data
)
522 if not self
.make_hazard_vecs
:
525 # read the write-hazard bitvector (wv) for any bit that is
526 wvchk_en
= Signal(len(wvchk
.ren
), name
="wv_chk_addr_en_"+name
)
527 issue_active
= Signal(name
="rd_iactive_"+name
)
528 # XXX combinatorial loop here
529 comb
+= issue_active
.eq(fu_active
& rf
)
530 with m
.If(issue_active
):
532 comb
+= wvchk_en
.eq(read
)
534 comb
+= wvchk_en
.eq(1<<read
)
535 # if FU is busy (which doesn't get set at the same time as
536 # issue) and no hazard was detected, clear wvchk_en (i.e.
537 # stop checking for hazards)
538 with m
.If(fu
.busy_o
& ~hazard_detected
):
539 comb
+= wvchk_en
.eq(0)
541 wvens
.append(wvchk_en
)
543 # or-reduce the muxed read signals
545 # for unary-addressed
546 comb
+= rport
.ren
.eq(ortreereduce_sig(rens
))
548 # for binary-addressed
549 comb
+= rport
.addr
.eq(ortreereduce_sig(addrs
))
550 comb
+= rport
.ren
.eq(Cat(*rens
).bool())
551 print ("binary", regfile
, rpidx
, rport
, rport
.ren
, rens
, addrs
)
553 if not self
.make_hazard_vecs
:
554 return Const(0) # declare "no hazards"
556 # enable the read bitvectors for this issued instruction
557 # and return whether any write-hazard bit is set
558 comb
+= wvchk
.ren
.eq(ortreereduce_sig(wvens
))
559 comb
+= hazard_detected
.eq(wvchk
.o_data
.bool())
560 return hazard_detected
562 def connect_rdports(self
, m
, fu_bitdict
, fu_selected
):
563 """connect read ports
565 orders the read regspecs into a dict-of-dicts, by regfile, by
566 regport name, then connects all FUs that want that regport by
567 way of a PriorityPicker.
569 comb
, sync
= m
.d
.comb
, m
.d
.sync
574 # dictionary of lists of regfile read ports
575 byregfiles_rd
, byregfiles_rdspec
= self
.get_byregfiles(True)
577 # okaay, now we need a PriorityPicker per regfile per regfile port
578 # loootta pickers... peter piper picked a pack of pickled peppers...
580 for regfile
, spec
in byregfiles_rd
.items():
581 fuspecs
= byregfiles_rdspec
[regfile
]
582 rdpickers
[regfile
] = {}
584 # argh. an experiment to merge RA and RB in the INT regfile
585 # (we have too many read/write ports)
586 if self
.regreduce_en
:
588 fuspecs
['rabc'] = [fuspecs
.pop('rb')]
589 fuspecs
['rabc'].append(fuspecs
.pop('rc'))
590 fuspecs
['rabc'].append(fuspecs
.pop('ra'))
591 if regfile
== 'FAST':
592 fuspecs
['fast1'] = [fuspecs
.pop('fast1')]
593 if 'fast2' in fuspecs
:
594 fuspecs
['fast1'].append(fuspecs
.pop('fast2'))
595 if 'fast3' in fuspecs
:
596 fuspecs
['fast1'].append(fuspecs
.pop('fast3'))
598 # for each named regfile port, connect up all FUs to that port
599 # also return (and collate) hazard detection)
600 for (regname
, fspec
) in sort_fuspecs(fuspecs
):
601 print("connect rd", regname
, fspec
)
602 rh
= self
.connect_rdport(m
, fu_bitdict
, fu_selected
,
607 return Cat(*rd_hazard
).bool()
609 def make_hazards(self
, m
, regfile
, rfile
, wvclr
, wvset
,
610 funame
, regname
, idx
,
611 addr_en
, wp
, fu
, fu_active
, wrflag
, write
,
613 """make_hazards: a setter and a clearer for the regfile write ports
615 setter is at issue time (using PowerDecoder2 regfile write numbers)
616 clearer is at regfile write time (when FU has said what to write to)
618 there is *one* unusual case here which has to be dealt with:
619 when the Function Unit does *NOT* request a write to the regfile
620 (has its data.ok bit CLEARED). this is perfectly legitimate.
623 comb
, sync
= m
.d
.comb
, m
.d
.sync
624 name
= "%s_%s_%d" % (funame
, regname
, idx
)
626 # connect up the bitvector write hazard. unlike the
627 # regfile writeports, a ONE must be written to the corresponding
628 # bit of the hazard bitvector (to indicate the existence of
631 # the detection of what shall be written to is based
633 print ("write vector (for regread)", regfile
, wvset
)
634 wviaddr_en
= Signal(len(wvset
.wen
), name
="wv_issue_addr_en_"+name
)
635 issue_active
= Signal(name
="iactive_"+name
)
636 comb
+= issue_active
.eq(fu
.issue_i
& fu_active
& wrflag
)
637 with m
.If(issue_active
):
639 comb
+= wviaddr_en
.eq(write
)
641 comb
+= wviaddr_en
.eq(1<<write
)
643 # deal with write vector clear: this kicks in when the regfile
644 # is written to, and clears the corresponding bitvector entry
645 print ("write vector", regfile
, wvclr
)
646 wvaddr_en
= Signal(len(wvclr
.wen
), name
="wvaddr_en_"+name
)
648 comb
+= wvaddr_en
.eq(addr_en
)
651 comb
+= wvaddr_en
.eq(1<<addr_en
)
653 # XXX ASSUME that LDSTFunctionUnit always sets the data it intends to
654 # this may NOT be the case when an exception occurs
655 if isinstance(fu
, LDSTFunctionUnit
):
656 return wvaddr_en
, wviaddr_en
658 # okaaay, this is preparation for the awkward case.
659 # * latch a copy of wrflag when issue goes high.
660 # * when the fu_wrok (data.ok) flag is NOT set,
661 # but the FU is done, the FU is NEVER going to write
662 # so the bitvector has to be cleared.
663 latch_wrflag
= Signal(name
="latch_wrflag_"+name
)
664 with m
.If(~fu
.busy_o
):
665 sync
+= latch_wrflag
.eq(0)
666 with m
.If(fu
.issue_i
& fu_active
):
667 sync
+= latch_wrflag
.eq(wrflag
)
668 with m
.If(fu
.alu_done_o
& latch_wrflag
& ~fu_wrok
):
670 comb
+= wvaddr_en
.eq(write
) # addr_en gated with wp, don't use
672 comb
+= wvaddr_en
.eq(1<<addr_en
) # binary addr_en not gated
674 return wvaddr_en
, wviaddr_en
676 def connect_wrport(self
, m
, fu_bitdict
, fu_selected
,
677 wrpickers
, regfile
, regname
, fspec
):
678 comb
, sync
= m
.d
.comb
, m
.d
.sync
684 # select the required write port. these are pre-defined sizes
685 rfile
= regs
.rf
[regfile
.lower()]
686 wport
= rfile
.w_ports
[rpidx
]
688 print("connect wr", regname
, "unary", rfile
.unary
, fspec
)
689 print(regfile
, regs
.rf
.keys())
691 # select the write-protection hazard vector. note that this still
692 # requires to WRITE to the hazard bitvector! read-requests need
693 # to RAISE the bitvector (set it to 1), which, duh, requires a WRITE
694 if self
.make_hazard_vecs
:
695 wv
= regs
.wv
[regfile
.lower()]
696 wvset
= wv
.w_ports
["set"] # write-vec bit-level hazard ctrl
697 wvclr
= wv
.w_ports
["clr"] # write-vec bit-level hazard ctrl
700 if not isinstance(fspecs
, list):
708 for i
, fspec
in enumerate(fspecs
):
709 # get the regfile specs for this regfile port
710 (rf
, wf
, read
, write
, wid
, fuspec
) = fspec
711 print ("fpsec", i
, "wrflag", wf
, fspec
, len(fuspec
))
712 ppoffs
.append(pplen
) # record offset for picker
715 name
= "%s_%s_%d" % (regfile
, regname
, i
)
716 rdflag
= Signal(name
="rd_flag_"+name
)
717 wrflag
= Signal(name
="wr_flag_"+name
)
719 comb
+= rdflag
.eq(rf
)
723 comb
+= wrflag
.eq(wf
)
726 rdflags
.append(rdflag
)
727 wrflags
.append(wrflag
)
729 # create a priority picker to manage this port
730 wrpickers
[regfile
][rpidx
] = wrpick
= PriorityPicker(pplen
)
731 setattr(m
.submodules
, "wrpick_%s_%s" % (regfile
, rpidx
), wrpick
)
739 for i
, fspec
in enumerate(fspecs
):
740 # connect up the FU req/go signals and the reg-read to the FU
741 # these are arbitrated by Data.ok signals
742 (rf
, wf
, read
, _write
, wid
, fuspec
) = fspec
743 for pi
, (funame
, fu
, idx
) in enumerate(fuspec
):
745 name
= "%s_%s_%s_%d" % (funame
, regfile
, regname
, idx
)
746 # get (or set up) a write-latched copy of write register number
747 write
= Signal
.like(_write
, name
="write_"+name
)
748 rname
= "%s_%s_%s" % (funame
, regfile
, regname
)
749 if rname
not in fu
.wr_latches
:
750 wrl
= Signal
.like(_write
, name
="wrlatch_"+rname
)
751 fu
.wr_latches
[rname
] = write
752 with m
.If(fu
.issue_i
):
753 sync
+= wrl
.eq(_write
)
754 comb
+= write
.eq(_write
)
756 comb
+= write
.eq(wrl
)
758 write
= fu
.wr_latches
[rname
]
760 # write-request comes from dest.ok
761 dest
= fu
.get_out(idx
)
762 fu_dest_latch
= fu
.get_fu_out(idx
) # latched output
763 name
= "fu_wrok_%s_%s_%d" % (funame
, regname
, idx
)
764 fu_wrok
= Signal(name
=name
, reset_less
=True)
765 comb
+= fu_wrok
.eq(dest
.ok
& fu
.busy_o
)
767 # connect request-write to picker input, and output to go-wr
768 fu_active
= fu_selected
[funame
]
769 pick
= fu
.wr
.rel_o
[idx
] & fu_active
770 comb
+= wrpick
.i
[pi
].eq(pick
)
771 # create a single-pulse go write from the picker output
772 wr_pick
= Signal(name
="wpick_%s_%s_%d" % (funame
, regname
, idx
))
773 comb
+= wr_pick
.eq(wrpick
.o
[pi
] & wrpick
.en_o
)
774 comb
+= fu
.go_wr_i
[idx
].eq(rising_edge(m
, wr_pick
))
776 # connect the regspec write "reg select" number to this port
777 # only if one FU actually requests (and is granted) the port
778 # will the write-enable be activated
779 wname
= "waddr_en_%s_%s_%d" % (funame
, regname
, idx
)
780 addr_en
= Signal
.like(write
, name
=wname
)
782 comb
+= wp
.eq(wr_pick
& wrpick
.en_o
)
783 comb
+= addr_en
.eq(Mux(wp
, write
, 0))
787 addrs
.append(addr_en
)
790 # connect regfile port to input
791 print("reg connect widths",
792 regfile
, regname
, pi
, funame
,
793 dest
.shape(), wport
.i_data
.shape())
794 wsigs
.append(fu_dest_latch
)
796 # now connect up the bitvector write hazard
797 if not self
.make_hazard_vecs
:
799 res
= self
.make_hazards(m
, regfile
, rfile
, wvclr
, wvset
,
800 funame
, regname
, idx
,
801 addr_en
, wp
, fu
, fu_active
,
802 wrflags
[i
], write
, fu_wrok
)
803 wvaddr_en
, wv_issue_en
= res
804 wvclren
.append(wvaddr_en
) # set only: no data => clear bit
805 wvseten
.append(wv_issue_en
) # set data same as enable
806 wvsets
.append(wv_issue_en
) # because enable needs a 1
808 # here is where we create the Write Broadcast Bus. simple, eh?
809 comb
+= wport
.i_data
.eq(ortreereduce_sig(wsigs
))
811 # for unary-addressed
812 comb
+= wport
.wen
.eq(ortreereduce_sig(wens
))
814 # for binary-addressed
815 comb
+= wport
.addr
.eq(ortreereduce_sig(addrs
))
816 comb
+= wport
.wen
.eq(ortreereduce_sig(wens
))
818 if not self
.make_hazard_vecs
:
822 comb
+= wvclr
.wen
.eq(ortreereduce_sig(wvclren
)) # clear (regfile write)
823 comb
+= wvset
.wen
.eq(ortreereduce_sig(wvseten
)) # set (issue time)
824 comb
+= wvset
.i_data
.eq(ortreereduce_sig(wvsets
))
826 def connect_wrports(self
, m
, fu_bitdict
, fu_selected
):
827 """connect write ports
829 orders the write regspecs into a dict-of-dicts, by regfile,
830 by regport name, then connects all FUs that want that regport
831 by way of a PriorityPicker.
833 note that the write-port wen, write-port data, and go_wr_i all need to
834 be on the exact same clock cycle. as there is a combinatorial loop bug
835 at the moment, these all use sync.
837 comb
, sync
= m
.d
.comb
, m
.d
.sync
840 # dictionary of lists of regfile write ports
841 byregfiles_wr
, byregfiles_wrspec
= self
.get_byregfiles(False)
843 # same for write ports.
844 # BLECH! complex code-duplication! BLECH!
846 for regfile
, spec
in byregfiles_wr
.items():
847 fuspecs
= byregfiles_wrspec
[regfile
]
848 wrpickers
[regfile
] = {}
850 if self
.regreduce_en
:
851 # argh, more port-merging
853 fuspecs
['o'] = [fuspecs
.pop('o')]
854 fuspecs
['o'].append(fuspecs
.pop('o1'))
855 if regfile
== 'FAST':
856 fuspecs
['fast1'] = [fuspecs
.pop('fast1')]
857 if 'fast2' in fuspecs
:
858 fuspecs
['fast1'].append(fuspecs
.pop('fast2'))
859 if 'fast3' in fuspecs
:
860 fuspecs
['fast1'].append(fuspecs
.pop('fast3'))
862 for (regname
, fspec
) in sort_fuspecs(fuspecs
):
863 self
.connect_wrport(m
, fu_bitdict
, fu_selected
, wrpickers
,
864 regfile
, regname
, fspec
)
866 def get_byregfiles(self
, readmode
):
868 mode
= "read" if readmode
else "write"
871 e
= self
.ireg
.e
# decoded instruction to execute
873 # dictionary of dictionaries of lists of regfile ports.
874 # first key: regfile. second key: regfile port name
875 byregfiles
= defaultdict(dict)
876 byregfiles_spec
= defaultdict(dict)
878 for (funame
, fu
) in fus
.items():
879 # create in each FU a receptacle for the read/write register
880 # hazard numbers. to be latched in connect_rd/write_ports
881 # XXX better that this is moved into the actual FUs, but
882 # the issue there is that this function is actually better
883 # suited at the moment
889 print("%s ports for %s" % (mode
, funame
))
890 for idx
in range(fu
.n_src
if readmode
else fu
.n_dst
):
891 # construct regfile specs: read uses inspec, write outspec
893 (regfile
, regname
, wid
) = fu
.get_in_spec(idx
)
895 (regfile
, regname
, wid
) = fu
.get_out_spec(idx
)
896 print(" %d %s %s %s" % (idx
, regfile
, regname
, str(wid
)))
898 # the PowerDecoder2 (main one, not the satellites) contains
899 # the decoded regfile numbers. obtain these now
901 rdflag
, read
= regspec_decode_read(e
, regfile
, regname
)
902 wrport
, write
= None, None
904 rdflag
, read
= None, None
905 wrport
, write
= regspec_decode_write(e
, regfile
, regname
)
907 # construct the dictionary of regspec information by regfile
908 if regname
not in byregfiles_spec
[regfile
]:
909 byregfiles_spec
[regfile
][regname
] = \
910 (rdflag
, wrport
, read
, write
, wid
, [])
911 # here we start to create "lanes"
912 if idx
not in byregfiles
[regfile
]:
913 byregfiles
[regfile
][idx
] = []
914 fuspec
= (funame
, fu
, idx
)
915 byregfiles
[regfile
][idx
].append(fuspec
)
916 byregfiles_spec
[regfile
][regname
][5].append(fuspec
)
919 # append a latch Signal to the FU's list of latches
920 rname
= "%s_%s" % (regfile
, regname
)
922 if rname
not in fu
.rd_latches
:
923 rdl
= Signal
.like(read
, name
="rdlatch_"+rname
)
924 fu
.rd_latches
[rname
] = rdl
926 if rname
not in fu
.wr_latches
:
927 wrl
= Signal
.like(write
, name
="wrlatch_"+rname
)
928 fu
.wr_latches
[rname
] = wrl
930 # ok just print that all out, for convenience
931 for regfile
, spec
in byregfiles
.items():
932 print("regfile %s ports:" % mode
, regfile
)
933 fuspecs
= byregfiles_spec
[regfile
]
934 for regname
, fspec
in fuspecs
.items():
935 [rdflag
, wrflag
, read
, write
, wid
, fuspec
] = fspec
936 print(" rf %s port %s lane: %s" % (mode
, regfile
, regname
))
937 print(" %s" % regname
, wid
, read
, write
, rdflag
, wrflag
)
938 for (funame
, fu
, idx
) in fuspec
:
939 fusig
= fu
.src_i
[idx
] if readmode
else fu
.dest
[idx
]
940 print(" ", funame
, fu
.__class
__.__name
__, idx
, fusig
)
943 return byregfiles
, byregfiles_spec
946 yield from self
.fus
.ports()
947 yield from self
.i
.e
.ports()
948 yield from self
.l0
.ports()
955 if __name__
== '__main__':
956 pspec
= TestMemPspec(ldst_ifacetype
='testpi',
961 dut
= NonProductionCore(pspec
)
962 vl
= rtlil
.convert(dut
, ports
=dut
.ports())
963 with
open("test_core.il", "w") as f
: