3 not in any way intended for production use. connects up FunctionUnits to
4 Register Files in a brain-dead fashion that only permits one and only one
5 Function Unit to be operational.
7 the principle here is to take the Function Units, analyse their regspecs,
8 and turn their requirements for access to register file read/write ports
9 into groupings by Register File and Register File Port name.
11 under each grouping - by regfile/port - a list of Function Units that
12 need to connect to that port is created. as these are a contended
13 resource a "Broadcast Bus" per read/write port is then also created,
14 with access to it managed by a PriorityPicker.
16 the brain-dead part of this module is that even though there is no
17 conflict of access, regfile read/write hazards are *not* analysed,
18 and consequently it is safer to wait for the Function Unit to complete
19 before allowing a new instruction to proceed.
22 from nmigen
import Elaboratable
, Module
, Signal
, ResetSignal
, Cat
, Mux
23 from nmigen
.cli
import rtlil
25 from openpower
.decoder
.power_decoder2
import PowerDecodeSubset
26 from openpower
.decoder
.power_regspec_map
import regspec_decode_read
27 from openpower
.decoder
.power_regspec_map
import regspec_decode_write
28 from openpower
.sv
.svp64
import SVP64Rec
30 from nmutil
.picker
import PriorityPicker
31 from nmutil
.util
import treereduce
33 from soc
.fu
.compunits
.compunits
import AllFunctionUnits
34 from soc
.regfile
.regfiles
import RegFiles
35 from openpower
.decoder
.decode2execute1
import Decode2ToExecute1Type
36 from openpower
.decoder
.decode2execute1
import IssuerDecode2ToOperand
37 from openpower
.decoder
.power_decoder2
import get_rdflags
38 from openpower
.decoder
.decode2execute1
import Data
39 from soc
.experiment
.l0_cache
import TstL0CacheBuffer
# test only
40 from soc
.config
.test
.test_loadstore
import TestMemPspec
41 from openpower
.decoder
.power_enums
import MicrOp
42 from soc
.config
.state
import CoreState
46 from nmutil
.util
import rising_edge
49 # helper function for reducing a list of signals down to a parallel
51 def ortreereduce(tree
, attr
="data_o"):
52 return treereduce(tree
, operator
.or_
, lambda x
: getattr(x
, attr
))
55 def ortreereduce_sig(tree
):
56 return treereduce(tree
, operator
.or_
, lambda x
: x
)
59 # helper function to place full regs declarations first
60 def sort_fuspecs(fuspecs
):
62 for (regname
, fspec
) in fuspecs
.items():
63 if regname
.startswith("full"):
64 res
.append((regname
, fspec
))
65 for (regname
, fspec
) in fuspecs
.items():
66 if not regname
.startswith("full"):
67 res
.append((regname
, fspec
))
68 return res
# enumerate(res)
71 class NonProductionCore(Elaboratable
):
72 def __init__(self
, pspec
):
75 # test is SVP64 is to be enabled
76 self
.svp64_en
= hasattr(pspec
, "svp64") and (pspec
.svp64
== True)
78 # test to see if regfile ports should be reduced
79 self
.regreduce_en
= (hasattr(pspec
, "regreduce") and
80 (pspec
.regreduce
== True))
82 # single LD/ST funnel for memory access
83 self
.l0
= l0
= TstL0CacheBuffer(pspec
, n_units
=1)
86 # function units (only one each)
87 # only include mmu if enabled in pspec
88 self
.fus
= AllFunctionUnits(pspec
, pilist
=[pi
])
90 # link LoadStore1 into MMU
91 mmu
= self
.fus
.get_fu('mmu0')
92 print ("core pspec", pspec
.ldst_ifacetype
)
93 print ("core mmu", mmu
)
94 print ("core lsmem.lsi", l0
.cmpi
.lsmem
.lsi
)
96 mmu
.alu
.set_ldst_interface(l0
.cmpi
.lsmem
.lsi
)
98 # register files (yes plural)
99 self
.regs
= RegFiles(pspec
)
101 # instruction decoder - needs a Trap-capable Record (captures EINT etc.)
102 self
.e
= Decode2ToExecute1Type("core", opkls
=IssuerDecode2ToOperand
,
103 regreduce_en
=self
.regreduce_en
)
105 # SVP64 RA_OR_ZERO needs to know if the relevant EXTRA2/3 field is zero
106 self
.sv_a_nz
= Signal()
108 # state and raw instruction (and SVP64 ReMap fields)
109 self
.state
= CoreState("core")
110 self
.raw_insn_i
= Signal(32) # raw instruction
111 self
.bigendian_i
= Signal() # bigendian - TODO, set by MSR.BE
113 self
.sv_rm
= SVP64Rec(name
="core_svp64_rm") # SVP64 RM field
114 self
.sv_pred_sm
= Signal() # TODO: SIMD width
115 self
.sv_pred_dm
= Signal() # TODO: SIMD width
117 # issue/valid/busy signalling
118 self
.ivalid_i
= Signal(reset_less
=True) # instruction is valid
119 self
.issue_i
= Signal(reset_less
=True)
120 self
.busy_o
= Signal(name
="corebusy_o", reset_less
=True)
122 # start/stop and terminated signalling
123 self
.core_terminate_o
= Signal(reset
=0) # indicates stopped
125 # create per-FU instruction decoders (subsetted)
129 for funame
, fu
in self
.fus
.fus
.items():
130 f_name
= fu
.fnunit
.name
131 fnunit
= fu
.fnunit
.value
132 opkls
= fu
.opsubsetkls
134 self
.trapunit
= funame
136 self
.decoders
[funame
] = PowerDecodeSubset(None, opkls
, f_name
,
139 svp64_en
=self
.svp64_en
,
140 regreduce_en
=self
.regreduce_en
)
141 self
.des
[funame
] = self
.decoders
[funame
].do
143 if "mmu0" in self
.decoders
:
144 self
.decoders
["mmu0"].mmu0_spr_dec
= self
.decoders
["spr0"]
146 def elaborate(self
, platform
):
148 # for testing purposes, to cut down on build time in coriolis2
149 if hasattr(self
.pspec
, "nocore") and self
.pspec
.nocore
== True:
150 x
= Signal() # dummy signal
155 m
.submodules
.fus
= self
.fus
156 m
.submodules
.l0
= l0
= self
.l0
157 self
.regs
.elaborate_into(m
, platform
)
162 for k
, v
in self
.decoders
.items():
163 setattr(m
.submodules
, "dec_%s" % v
.fn_name
, v
)
164 comb
+= v
.dec
.raw_opcode_in
.eq(self
.raw_insn_i
)
165 comb
+= v
.dec
.bigendian
.eq(self
.bigendian_i
)
166 # sigh due to SVP64 RA_OR_ZERO detection connect these too
167 comb
+= v
.sv_a_nz
.eq(self
.sv_a_nz
)
169 comb
+= v
.pred_sm
.eq(self
.sv_pred_sm
)
170 comb
+= v
.pred_dm
.eq(self
.sv_pred_dm
)
171 if k
!= self
.trapunit
:
172 comb
+= v
.sv_rm
.eq(self
.sv_rm
) # pass through SVP64 ReMap
174 # ssh, cheat: trap uses the main decoder because of the rewriting
175 self
.des
[self
.trapunit
] = self
.e
.do
177 # connect up Function Units, then read/write ports
178 fu_bitdict
= self
.connect_instruction(m
)
179 self
.connect_rdports(m
, fu_bitdict
)
180 self
.connect_wrports(m
, fu_bitdict
)
184 def connect_instruction(self
, m
):
185 """connect_instruction
187 uses decoded (from PowerOp) function unit information from CSV files
188 to ascertain which Function Unit should deal with the current
191 some (such as OP_ATTN, OP_NOP) are dealt with here, including
192 ignoring it and halting the processor. OP_NOP is a bit annoying
193 because the issuer expects busy flag still to be raised then lowered.
194 (this requires a fake counter to be set).
196 comb
, sync
= m
.d
.comb
, m
.d
.sync
199 # enable-signals for each FU, get one bit for each FU (by name)
200 fu_enable
= Signal(len(fus
), reset_less
=True)
202 for i
, funame
in enumerate(fus
.keys()):
203 fu_bitdict
[funame
] = fu_enable
[i
]
205 # enable the required Function Unit based on the opcode decode
206 # note: this *only* works correctly for simple core when one and
207 # *only* one FU is allocated per instruction
208 for funame
, fu
in fus
.items():
209 fnunit
= fu
.fnunit
.value
210 enable
= Signal(name
="en_%s" % funame
, reset_less
=True)
211 comb
+= enable
.eq((self
.e
.do
.fn_unit
& fnunit
).bool())
212 comb
+= fu_bitdict
[funame
].eq(enable
)
214 # sigh - need a NOP counter
216 with m
.If(counter
!= 0):
217 sync
+= counter
.eq(counter
- 1)
218 comb
+= self
.busy_o
.eq(1)
220 with m
.If(self
.ivalid_i
): # run only when valid
221 with m
.Switch(self
.e
.do
.insn_type
):
222 # check for ATTN: halt if true
223 with m
.Case(MicrOp
.OP_ATTN
):
224 m
.d
.sync
+= self
.core_terminate_o
.eq(1)
226 with m
.Case(MicrOp
.OP_NOP
):
227 sync
+= counter
.eq(2)
228 comb
+= self
.busy_o
.eq(1)
231 # connect up instructions. only one enabled at a time
232 for funame
, fu
in fus
.items():
233 do
= self
.des
[funame
]
234 enable
= fu_bitdict
[funame
]
236 # run this FunctionUnit if enabled
237 # route op, issue, busy, read flags and mask to FU
239 # operand comes from the *local* decoder
240 comb
+= fu
.oper_i
.eq_from(do
)
241 #comb += fu.oper_i.eq_from_execute1(e)
242 comb
+= fu
.issue_i
.eq(self
.issue_i
)
243 comb
+= self
.busy_o
.eq(fu
.busy_o
)
244 # rdmask, which is for registers, needs to come
245 # from the *main* decoder
246 rdmask
= get_rdflags(self
.e
, fu
)
247 comb
+= fu
.rdmaskn
.eq(~rdmask
)
251 def connect_rdport(self
, m
, fu_bitdict
, rdpickers
, regfile
, regname
, fspec
):
252 comb
, sync
= m
.d
.comb
, m
.d
.sync
258 # select the required read port. these are pre-defined sizes
259 rfile
= regs
.rf
[regfile
.lower()]
260 rport
= rfile
.r_ports
[rpidx
]
261 print("read regfile", rpidx
, regfile
, regs
.rf
.keys(),
265 if not isinstance(fspecs
, list):
272 for i
, fspec
in enumerate(fspecs
):
273 # get the regfile specs for this regfile port
274 (rf
, read
, write
, wid
, fuspec
) = fspec
275 print ("fpsec", i
, fspec
, len(fuspec
))
276 ppoffs
.append(pplen
) # record offset for picker
278 name
= "rdflag_%s_%s_%d" % (regfile
, regname
, i
)
279 rdflag
= Signal(name
=name
, reset_less
=True)
280 comb
+= rdflag
.eq(rf
)
281 rdflags
.append(rdflag
)
284 print ("pplen", pplen
)
286 # create a priority picker to manage this port
287 rdpickers
[regfile
][rpidx
] = rdpick
= PriorityPicker(pplen
)
288 setattr(m
.submodules
, "rdpick_%s_%s" % (regfile
, rpidx
), rdpick
)
292 for i
, fspec
in enumerate(fspecs
):
293 (rf
, read
, write
, wid
, fuspec
) = fspec
294 # connect up the FU req/go signals, and the reg-read to the FU
295 # and create a Read Broadcast Bus
296 for pi
, (funame
, fu
, idx
) in enumerate(fuspec
):
299 # connect request-read to picker input, and output to go-rd
300 fu_active
= fu_bitdict
[funame
]
301 name
= "%s_%s_%s_%i" % (regfile
, rpidx
, funame
, pi
)
302 addr_en
= Signal
.like(reads
[i
], name
="addr_en_"+name
)
303 pick
= Signal(name
="pick_"+name
) # picker input
304 rp
= Signal(name
="rp_"+name
) # picker output
305 delay_pick
= Signal(name
="dp_"+name
) # read-enable "underway"
307 # exclude any currently-enabled read-request (mask out active)
308 comb
+= pick
.eq(fu
.rd_rel_o
[idx
] & fu_active
& rdflags
[i
] &
310 comb
+= rdpick
.i
[pi
].eq(pick
)
311 comb
+= fu
.go_rd_i
[idx
].eq(delay_pick
) # pass in *delayed* pick
313 # if picked, select read-port "reg select" number to port
314 comb
+= rp
.eq(rdpick
.o
[pi
] & rdpick
.en_o
)
315 sync
+= delay_pick
.eq(rp
) # delayed "pick"
316 comb
+= addr_en
.eq(Mux(rp
, reads
[i
], 0))
318 # the read-enable happens combinatorially (see mux-bus below)
319 # but it results in the data coming out on a one-cycle delay.
323 addrs
.append(addr_en
)
326 # use the *delayed* pick signal to put requested data onto bus
327 with m
.If(delay_pick
):
328 # connect regfile port to input, creating fan-out Bus
330 print("reg connect widths",
331 regfile
, regname
, pi
, funame
,
332 src
.shape(), rport
.data_o
.shape())
333 # all FUs connect to same port
334 comb
+= src
.eq(rport
.data_o
)
336 # or-reduce the muxed read signals
338 # for unary-addressed
339 comb
+= rport
.ren
.eq(ortreereduce_sig(rens
))
341 # for binary-addressed
342 comb
+= rport
.addr
.eq(ortreereduce_sig(addrs
))
343 comb
+= rport
.ren
.eq(Cat(*rens
).bool())
344 print ("binary", regfile
, rpidx
, rport
, rport
.ren
, rens
, addrs
)
346 def connect_rdports(self
, m
, fu_bitdict
):
347 """connect read ports
349 orders the read regspecs into a dict-of-dicts, by regfile, by
350 regport name, then connects all FUs that want that regport by
351 way of a PriorityPicker.
353 comb
, sync
= m
.d
.comb
, m
.d
.sync
357 # dictionary of lists of regfile read ports
358 byregfiles_rd
, byregfiles_rdspec
= self
.get_byregfiles(True)
360 # okaay, now we need a PriorityPicker per regfile per regfile port
361 # loootta pickers... peter piper picked a pack of pickled peppers...
363 for regfile
, spec
in byregfiles_rd
.items():
364 fuspecs
= byregfiles_rdspec
[regfile
]
365 rdpickers
[regfile
] = {}
367 # argh. an experiment to merge RA and RB in the INT regfile
368 # (we have too many read/write ports)
369 if self
.regreduce_en
:
371 fuspecs
['rabc'] = [fuspecs
.pop('rb')]
372 fuspecs
['rabc'].append(fuspecs
.pop('rc'))
373 fuspecs
['rabc'].append(fuspecs
.pop('ra'))
374 if regfile
== 'FAST':
375 fuspecs
['fast1'] = [fuspecs
.pop('fast1')]
376 if 'fast2' in fuspecs
:
377 fuspecs
['fast1'].append(fuspecs
.pop('fast2'))
378 if 'fast3' in fuspecs
:
379 fuspecs
['fast1'].append(fuspecs
.pop('fast3'))
381 # for each named regfile port, connect up all FUs to that port
382 for (regname
, fspec
) in sort_fuspecs(fuspecs
):
383 print("connect rd", regname
, fspec
)
384 self
.connect_rdport(m
, fu_bitdict
, rdpickers
, regfile
,
387 def connect_wrport(self
, m
, fu_bitdict
, wrpickers
, regfile
, regname
, fspec
):
388 comb
, sync
= m
.d
.comb
, m
.d
.sync
392 print("connect wr", regname
, fspec
)
395 # select the required write port. these are pre-defined sizes
396 print(regfile
, regs
.rf
.keys())
397 rfile
= regs
.rf
[regfile
.lower()]
398 wport
= rfile
.w_ports
[rpidx
]
401 if not isinstance(fspecs
, list):
407 for i
, fspec
in enumerate(fspecs
):
408 # get the regfile specs for this regfile port
409 (rf
, read
, write
, wid
, fuspec
) = fspec
410 print ("fpsec", i
, fspec
, len(fuspec
))
411 ppoffs
.append(pplen
) # record offset for picker
414 # create a priority picker to manage this port
415 wrpickers
[regfile
][rpidx
] = wrpick
= PriorityPicker(pplen
)
416 setattr(m
.submodules
, "wrpick_%s_%s" % (regfile
, rpidx
), wrpick
)
421 for i
, fspec
in enumerate(fspecs
):
422 # connect up the FU req/go signals and the reg-read to the FU
423 # these are arbitrated by Data.ok signals
424 (rf
, read
, write
, wid
, fuspec
) = fspec
425 for pi
, (funame
, fu
, idx
) in enumerate(fuspec
):
428 # write-request comes from dest.ok
429 dest
= fu
.get_out(idx
)
430 fu_dest_latch
= fu
.get_fu_out(idx
) # latched output
431 name
= "wrflag_%s_%s_%d" % (funame
, regname
, idx
)
432 wrflag
= Signal(name
=name
, reset_less
=True)
433 comb
+= wrflag
.eq(dest
.ok
& fu
.busy_o
)
435 # connect request-write to picker input, and output to go-wr
436 fu_active
= fu_bitdict
[funame
]
437 pick
= fu
.wr
.rel_o
[idx
] & fu_active
# & wrflag
438 comb
+= wrpick
.i
[pi
].eq(pick
)
439 # create a single-pulse go write from the picker output
441 comb
+= wr_pick
.eq(wrpick
.o
[pi
] & wrpick
.en_o
)
442 comb
+= fu
.go_wr_i
[idx
].eq(rising_edge(m
, wr_pick
))
444 # connect the regspec write "reg select" number to this port
445 # only if one FU actually requests (and is granted) the port
446 # will the write-enable be activated
447 addr_en
= Signal
.like(write
)
449 comb
+= wp
.eq(wr_pick
& wrpick
.en_o
)
450 comb
+= addr_en
.eq(Mux(wp
, write
, 0))
454 addrs
.append(addr_en
)
457 # connect regfile port to input
458 print("reg connect widths",
459 regfile
, regname
, pi
, funame
,
460 dest
.shape(), wport
.data_i
.shape())
461 wsigs
.append(fu_dest_latch
)
463 # here is where we create the Write Broadcast Bus. simple, eh?
464 comb
+= wport
.data_i
.eq(ortreereduce_sig(wsigs
))
466 # for unary-addressed
467 comb
+= wport
.wen
.eq(ortreereduce_sig(wens
))
469 # for binary-addressed
470 comb
+= wport
.addr
.eq(ortreereduce_sig(addrs
))
471 comb
+= wport
.wen
.eq(ortreereduce_sig(wens
))
473 def connect_wrports(self
, m
, fu_bitdict
):
474 """connect write ports
476 orders the write regspecs into a dict-of-dicts, by regfile,
477 by regport name, then connects all FUs that want that regport
478 by way of a PriorityPicker.
480 note that the write-port wen, write-port data, and go_wr_i all need to
481 be on the exact same clock cycle. as there is a combinatorial loop bug
482 at the moment, these all use sync.
484 comb
, sync
= m
.d
.comb
, m
.d
.sync
487 # dictionary of lists of regfile write ports
488 byregfiles_wr
, byregfiles_wrspec
= self
.get_byregfiles(False)
490 # same for write ports.
491 # BLECH! complex code-duplication! BLECH!
493 for regfile
, spec
in byregfiles_wr
.items():
494 fuspecs
= byregfiles_wrspec
[regfile
]
495 wrpickers
[regfile
] = {}
497 if self
.regreduce_en
:
498 # argh, more port-merging
500 fuspecs
['o'] = [fuspecs
.pop('o')]
501 fuspecs
['o'].append(fuspecs
.pop('o1'))
502 if regfile
== 'FAST':
503 fuspecs
['fast1'] = [fuspecs
.pop('fast1')]
504 if 'fast2' in fuspecs
:
505 fuspecs
['fast1'].append(fuspecs
.pop('fast2'))
506 if 'fast3' in fuspecs
:
507 fuspecs
['fast1'].append(fuspecs
.pop('fast3'))
509 for (regname
, fspec
) in sort_fuspecs(fuspecs
):
510 self
.connect_wrport(m
, fu_bitdict
, wrpickers
,
511 regfile
, regname
, fspec
)
513 def get_byregfiles(self
, readmode
):
515 mode
= "read" if readmode
else "write"
518 e
= self
.e
# decoded instruction to execute
520 # dictionary of lists of regfile ports
523 for (funame
, fu
) in fus
.items():
524 print("%s ports for %s" % (mode
, funame
))
525 for idx
in range(fu
.n_src
if readmode
else fu
.n_dst
):
527 (regfile
, regname
, wid
) = fu
.get_in_spec(idx
)
529 (regfile
, regname
, wid
) = fu
.get_out_spec(idx
)
530 print(" %d %s %s %s" % (idx
, regfile
, regname
, str(wid
)))
532 rdflag
, read
= regspec_decode_read(e
, regfile
, regname
)
535 rdflag
, read
= None, None
536 wrport
, write
= regspec_decode_write(e
, regfile
, regname
)
537 if regfile
not in byregfiles
:
538 byregfiles
[regfile
] = {}
539 byregfiles_spec
[regfile
] = {}
540 if regname
not in byregfiles_spec
[regfile
]:
541 byregfiles_spec
[regfile
][regname
] = \
542 (rdflag
, read
, write
, wid
, [])
543 # here we start to create "lanes"
544 if idx
not in byregfiles
[regfile
]:
545 byregfiles
[regfile
][idx
] = []
546 fuspec
= (funame
, fu
, idx
)
547 byregfiles
[regfile
][idx
].append(fuspec
)
548 byregfiles_spec
[regfile
][regname
][4].append(fuspec
)
550 # ok just print that out, for convenience
551 for regfile
, spec
in byregfiles
.items():
552 print("regfile %s ports:" % mode
, regfile
)
553 fuspecs
= byregfiles_spec
[regfile
]
554 for regname
, fspec
in fuspecs
.items():
555 [rdflag
, read
, write
, wid
, fuspec
] = fspec
556 print(" rf %s port %s lane: %s" % (mode
, regfile
, regname
))
557 print(" %s" % regname
, wid
, read
, write
, rdflag
)
558 for (funame
, fu
, idx
) in fuspec
:
559 fusig
= fu
.src_i
[idx
] if readmode
else fu
.dest
[idx
]
560 print(" ", funame
, fu
, idx
, fusig
)
563 return byregfiles
, byregfiles_spec
566 yield from self
.fus
.ports()
567 yield from self
.e
.ports()
568 yield from self
.l0
.ports()
575 if __name__
== '__main__':
576 pspec
= TestMemPspec(ldst_ifacetype
='testpi',
581 dut
= NonProductionCore(pspec
)
582 vl
= rtlil
.convert(dut
, ports
=dut
.ports())
583 with
open("test_core.il", "w") as f
: