3 not in any way intended for production use. connects up FunctionUnits to
4 Register Files in a brain-dead fashion that only permits one and only one
5 Function Unit to be operational.
7 the principle here is to take the Function Units, analyse their regspecs,
8 and turn their requirements for access to register file read/write ports
9 into groupings by Register File and Register File Port name.
11 under each grouping - by regfile/port - a list of Function Units that
12 need to connect to that port is created. as these are a contended
13 resource a "Broadcast Bus" per read/write port is then also created,
14 with access to it managed by a PriorityPicker.
16 the brain-dead part of this module is that even though there is no
17 conflict of access, regfile read/write hazards are *not* analysed,
18 and consequently it is safer to wait for the Function Unit to complete
19 before allowing a new instruction to proceed.
22 from nmigen
import Elaboratable
, Module
, Signal
, ResetSignal
, Cat
, Mux
23 from nmigen
.cli
import rtlil
25 from openpower
.decoder
.power_decoder2
import PowerDecodeSubset
26 from openpower
.decoder
.power_regspec_map
import regspec_decode_read
27 from openpower
.decoder
.power_regspec_map
import regspec_decode_write
28 from openpower
.sv
.svp64
import SVP64Rec
30 from nmutil
.picker
import PriorityPicker
31 from nmutil
.util
import treereduce
33 from soc
.fu
.compunits
.compunits
import AllFunctionUnits
34 from soc
.regfile
.regfiles
import RegFiles
35 from openpower
.decoder
.decode2execute1
import Decode2ToExecute1Type
36 from openpower
.decoder
.decode2execute1
import IssuerDecode2ToOperand
37 from openpower
.decoder
.power_decoder2
import get_rdflags
38 from openpower
.decoder
.decode2execute1
import Data
39 from soc
.experiment
.l0_cache
import TstL0CacheBuffer
# test only
40 from soc
.config
.test
.test_loadstore
import TestMemPspec
41 from openpower
.decoder
.power_enums
import MicrOp
42 from soc
.config
.state
import CoreState
46 from nmutil
.util
import rising_edge
49 # helper function for reducing a list of signals down to a parallel
51 def ortreereduce(tree
, attr
="data_o"):
52 return treereduce(tree
, operator
.or_
, lambda x
: getattr(x
, attr
))
55 def ortreereduce_sig(tree
):
56 return treereduce(tree
, operator
.or_
, lambda x
: x
)
59 # helper function to place full regs declarations first
60 def sort_fuspecs(fuspecs
):
62 for (regname
, fspec
) in fuspecs
.items():
63 if regname
.startswith("full"):
64 res
.append((regname
, fspec
))
65 for (regname
, fspec
) in fuspecs
.items():
66 if not regname
.startswith("full"):
67 res
.append((regname
, fspec
))
68 return res
# enumerate(res)
71 class NonProductionCore(Elaboratable
):
72 def __init__(self
, pspec
):
75 # test is SVP64 is to be enabled
76 self
.svp64_en
= hasattr(pspec
, "svp64") and (pspec
.svp64
== True)
78 # test to see if regfile ports should be reduced
79 self
.regreduce_en
= (hasattr(pspec
, "regreduce") and
80 (pspec
.regreduce
== True))
82 # single LD/ST funnel for memory access
83 self
.l0
= l0
= TstL0CacheBuffer(pspec
, n_units
=1)
86 # function units (only one each)
87 # only include mmu if enabled in pspec
88 self
.fus
= AllFunctionUnits(pspec
, pilist
=[pi
])
90 # link LoadStore1 into MMU
91 mmu
= self
.fus
.get_fu('mmu0')
92 print ("core pspec", pspec
.ldst_ifacetype
)
93 print ("core mmu", mmu
)
94 print ("core lsmem.lsi", l0
.cmpi
.lsmem
.lsi
)
96 mmu
.alu
.set_ldst_interface(l0
.cmpi
.lsmem
.lsi
)
98 # register files (yes plural)
99 self
.regs
= RegFiles(pspec
)
101 # instruction decoder - needs a Trap-capable Record (captures EINT etc.)
102 self
.e
= Decode2ToExecute1Type("core", opkls
=IssuerDecode2ToOperand
,
103 regreduce_en
=self
.regreduce_en
)
105 # SVP64 RA_OR_ZERO needs to know if the relevant EXTRA2/3 field is zero
106 self
.sv_a_nz
= Signal()
108 # state and raw instruction (and SVP64 ReMap fields)
109 self
.state
= CoreState("core")
110 self
.raw_insn_i
= Signal(32) # raw instruction
111 self
.bigendian_i
= Signal() # bigendian - TODO, set by MSR.BE
113 self
.sv_rm
= SVP64Rec(name
="core_svp64_rm") # SVP64 RM field
115 # issue/valid/busy signalling
116 self
.ivalid_i
= Signal(reset_less
=True) # instruction is valid
117 self
.issue_i
= Signal(reset_less
=True)
118 self
.busy_o
= Signal(name
="corebusy_o", reset_less
=True)
120 # start/stop and terminated signalling
121 self
.core_terminate_o
= Signal(reset
=0) # indicates stopped
123 # create per-FU instruction decoders (subsetted)
127 for funame
, fu
in self
.fus
.fus
.items():
128 f_name
= fu
.fnunit
.name
129 fnunit
= fu
.fnunit
.value
130 opkls
= fu
.opsubsetkls
132 self
.trapunit
= funame
134 self
.decoders
[funame
] = PowerDecodeSubset(None, opkls
, f_name
,
137 svp64_en
=self
.svp64_en
,
138 regreduce_en
=self
.regreduce_en
)
139 self
.des
[funame
] = self
.decoders
[funame
].do
141 if "mmu0" in self
.decoders
:
142 self
.decoders
["mmu0"].mmu0_spr_dec
= self
.decoders
["spr0"]
144 def elaborate(self
, platform
):
146 # for testing purposes, to cut down on build time in coriolis2
147 if hasattr(self
.pspec
, "nocore") and self
.pspec
.nocore
== True:
148 x
= Signal() # dummy signal
153 m
.submodules
.fus
= self
.fus
154 m
.submodules
.l0
= l0
= self
.l0
155 self
.regs
.elaborate_into(m
, platform
)
160 for k
, v
in self
.decoders
.items():
161 setattr(m
.submodules
, "dec_%s" % v
.fn_name
, v
)
162 comb
+= v
.dec
.raw_opcode_in
.eq(self
.raw_insn_i
)
163 comb
+= v
.dec
.bigendian
.eq(self
.bigendian_i
)
164 # sigh due to SVP64 RA_OR_ZERO detection connect these too
165 comb
+= v
.sv_a_nz
.eq(self
.sv_a_nz
)
166 if self
.svp64_en
and k
!= self
.trapunit
:
167 comb
+= v
.sv_rm
.eq(self
.sv_rm
) # pass through SVP64 ReMap
169 # ssh, cheat: trap uses the main decoder because of the rewriting
170 self
.des
[self
.trapunit
] = self
.e
.do
172 # connect up Function Units, then read/write ports
173 fu_bitdict
= self
.connect_instruction(m
)
174 self
.connect_rdports(m
, fu_bitdict
)
175 self
.connect_wrports(m
, fu_bitdict
)
179 def connect_instruction(self
, m
):
180 """connect_instruction
182 uses decoded (from PowerOp) function unit information from CSV files
183 to ascertain which Function Unit should deal with the current
186 some (such as OP_ATTN, OP_NOP) are dealt with here, including
187 ignoring it and halting the processor. OP_NOP is a bit annoying
188 because the issuer expects busy flag still to be raised then lowered.
189 (this requires a fake counter to be set).
191 comb
, sync
= m
.d
.comb
, m
.d
.sync
194 # enable-signals for each FU, get one bit for each FU (by name)
195 fu_enable
= Signal(len(fus
), reset_less
=True)
197 for i
, funame
in enumerate(fus
.keys()):
198 fu_bitdict
[funame
] = fu_enable
[i
]
200 # enable the required Function Unit based on the opcode decode
201 # note: this *only* works correctly for simple core when one and
202 # *only* one FU is allocated per instruction
203 for funame
, fu
in fus
.items():
204 fnunit
= fu
.fnunit
.value
205 enable
= Signal(name
="en_%s" % funame
, reset_less
=True)
206 comb
+= enable
.eq((self
.e
.do
.fn_unit
& fnunit
).bool())
207 comb
+= fu_bitdict
[funame
].eq(enable
)
209 # sigh - need a NOP counter
211 with m
.If(counter
!= 0):
212 sync
+= counter
.eq(counter
- 1)
213 comb
+= self
.busy_o
.eq(1)
215 with m
.If(self
.ivalid_i
): # run only when valid
216 with m
.Switch(self
.e
.do
.insn_type
):
217 # check for ATTN: halt if true
218 with m
.Case(MicrOp
.OP_ATTN
):
219 m
.d
.sync
+= self
.core_terminate_o
.eq(1)
221 with m
.Case(MicrOp
.OP_NOP
):
222 sync
+= counter
.eq(2)
223 comb
+= self
.busy_o
.eq(1)
226 # connect up instructions. only one enabled at a time
227 for funame
, fu
in fus
.items():
228 do
= self
.des
[funame
]
229 enable
= fu_bitdict
[funame
]
231 # run this FunctionUnit if enabled
232 # route op, issue, busy, read flags and mask to FU
234 # operand comes from the *local* decoder
235 comb
+= fu
.oper_i
.eq_from(do
)
236 #comb += fu.oper_i.eq_from_execute1(e)
237 comb
+= fu
.issue_i
.eq(self
.issue_i
)
238 comb
+= self
.busy_o
.eq(fu
.busy_o
)
239 # rdmask, which is for registers, needs to come
240 # from the *main* decoder
241 rdmask
= get_rdflags(self
.e
, fu
)
242 comb
+= fu
.rdmaskn
.eq(~rdmask
)
246 def connect_rdport(self
, m
, fu_bitdict
, rdpickers
, regfile
, regname
, fspec
):
247 comb
, sync
= m
.d
.comb
, m
.d
.sync
253 # select the required read port. these are pre-defined sizes
254 rfile
= regs
.rf
[regfile
.lower()]
255 rport
= rfile
.r_ports
[rpidx
]
256 print("read regfile", rpidx
, regfile
, regs
.rf
.keys(),
260 if not isinstance(fspecs
, list):
267 for i
, fspec
in enumerate(fspecs
):
268 # get the regfile specs for this regfile port
269 (rf
, read
, write
, wid
, fuspec
) = fspec
270 print ("fpsec", i
, fspec
, len(fuspec
))
271 ppoffs
.append(pplen
) # record offset for picker
273 name
= "rdflag_%s_%s_%d" % (regfile
, regname
, i
)
274 rdflag
= Signal(name
=name
, reset_less
=True)
275 comb
+= rdflag
.eq(rf
)
276 rdflags
.append(rdflag
)
279 print ("pplen", pplen
)
281 # create a priority picker to manage this port
282 rdpickers
[regfile
][rpidx
] = rdpick
= PriorityPicker(pplen
)
283 setattr(m
.submodules
, "rdpick_%s_%s" % (regfile
, rpidx
), rdpick
)
287 for i
, fspec
in enumerate(fspecs
):
288 (rf
, read
, write
, wid
, fuspec
) = fspec
289 # connect up the FU req/go signals, and the reg-read to the FU
290 # and create a Read Broadcast Bus
291 for pi
, (funame
, fu
, idx
) in enumerate(fuspec
):
294 # connect request-read to picker input, and output to go-rd
295 fu_active
= fu_bitdict
[funame
]
296 name
= "%s_%s_%s_%i" % (regfile
, rpidx
, funame
, pi
)
297 addr_en
= Signal
.like(reads
[i
], name
="addr_en_"+name
)
298 pick
= Signal(name
="pick_"+name
) # picker input
299 rp
= Signal(name
="rp_"+name
) # picker output
300 delay_pick
= Signal(name
="dp_"+name
) # read-enable "underway"
302 # exclude any currently-enabled read-request (mask out active)
303 comb
+= pick
.eq(fu
.rd_rel_o
[idx
] & fu_active
& rdflags
[i
] &
305 comb
+= rdpick
.i
[pi
].eq(pick
)
306 comb
+= fu
.go_rd_i
[idx
].eq(delay_pick
) # pass in *delayed* pick
308 # if picked, select read-port "reg select" number to port
309 comb
+= rp
.eq(rdpick
.o
[pi
] & rdpick
.en_o
)
310 sync
+= delay_pick
.eq(rp
) # delayed "pick"
311 comb
+= addr_en
.eq(Mux(rp
, reads
[i
], 0))
313 # the read-enable happens combinatorially (see mux-bus below)
314 # but it results in the data coming out on a one-cycle delay.
318 addrs
.append(addr_en
)
321 # use the *delayed* pick signal to put requested data onto bus
322 with m
.If(delay_pick
):
323 # connect regfile port to input, creating fan-out Bus
325 print("reg connect widths",
326 regfile
, regname
, pi
, funame
,
327 src
.shape(), rport
.data_o
.shape())
328 # all FUs connect to same port
329 comb
+= src
.eq(rport
.data_o
)
331 # or-reduce the muxed read signals
333 # for unary-addressed
334 comb
+= rport
.ren
.eq(ortreereduce_sig(rens
))
336 # for binary-addressed
337 comb
+= rport
.addr
.eq(ortreereduce_sig(addrs
))
338 comb
+= rport
.ren
.eq(Cat(*rens
).bool())
339 print ("binary", regfile
, rpidx
, rport
, rport
.ren
, rens
, addrs
)
341 def connect_rdports(self
, m
, fu_bitdict
):
342 """connect read ports
344 orders the read regspecs into a dict-of-dicts, by regfile, by
345 regport name, then connects all FUs that want that regport by
346 way of a PriorityPicker.
348 comb
, sync
= m
.d
.comb
, m
.d
.sync
352 # dictionary of lists of regfile read ports
353 byregfiles_rd
, byregfiles_rdspec
= self
.get_byregfiles(True)
355 # okaay, now we need a PriorityPicker per regfile per regfile port
356 # loootta pickers... peter piper picked a pack of pickled peppers...
358 for regfile
, spec
in byregfiles_rd
.items():
359 fuspecs
= byregfiles_rdspec
[regfile
]
360 rdpickers
[regfile
] = {}
362 # argh. an experiment to merge RA and RB in the INT regfile
363 # (we have too many read/write ports)
364 if self
.regreduce_en
:
366 fuspecs
['rabc'] = [fuspecs
.pop('rb')]
367 fuspecs
['rabc'].append(fuspecs
.pop('rc'))
368 fuspecs
['rabc'].append(fuspecs
.pop('ra'))
369 if regfile
== 'FAST':
370 fuspecs
['fast1'] = [fuspecs
.pop('fast1')]
371 if 'fast2' in fuspecs
:
372 fuspecs
['fast1'].append(fuspecs
.pop('fast2'))
373 if 'fast3' in fuspecs
:
374 fuspecs
['fast1'].append(fuspecs
.pop('fast3'))
376 # for each named regfile port, connect up all FUs to that port
377 for (regname
, fspec
) in sort_fuspecs(fuspecs
):
378 print("connect rd", regname
, fspec
)
379 self
.connect_rdport(m
, fu_bitdict
, rdpickers
, regfile
,
382 def connect_wrport(self
, m
, fu_bitdict
, wrpickers
, regfile
, regname
, fspec
):
383 comb
, sync
= m
.d
.comb
, m
.d
.sync
387 print("connect wr", regname
, fspec
)
390 # select the required write port. these are pre-defined sizes
391 print(regfile
, regs
.rf
.keys())
392 rfile
= regs
.rf
[regfile
.lower()]
393 wport
= rfile
.w_ports
[rpidx
]
396 if not isinstance(fspecs
, list):
402 for i
, fspec
in enumerate(fspecs
):
403 # get the regfile specs for this regfile port
404 (rf
, read
, write
, wid
, fuspec
) = fspec
405 print ("fpsec", i
, fspec
, len(fuspec
))
406 ppoffs
.append(pplen
) # record offset for picker
409 # create a priority picker to manage this port
410 wrpickers
[regfile
][rpidx
] = wrpick
= PriorityPicker(pplen
)
411 setattr(m
.submodules
, "wrpick_%s_%s" % (regfile
, rpidx
), wrpick
)
416 for i
, fspec
in enumerate(fspecs
):
417 # connect up the FU req/go signals and the reg-read to the FU
418 # these are arbitrated by Data.ok signals
419 (rf
, read
, write
, wid
, fuspec
) = fspec
420 for pi
, (funame
, fu
, idx
) in enumerate(fuspec
):
423 # write-request comes from dest.ok
424 dest
= fu
.get_out(idx
)
425 fu_dest_latch
= fu
.get_fu_out(idx
) # latched output
426 name
= "wrflag_%s_%s_%d" % (funame
, regname
, idx
)
427 wrflag
= Signal(name
=name
, reset_less
=True)
428 comb
+= wrflag
.eq(dest
.ok
& fu
.busy_o
)
430 # connect request-write to picker input, and output to go-wr
431 fu_active
= fu_bitdict
[funame
]
432 pick
= fu
.wr
.rel_o
[idx
] & fu_active
# & wrflag
433 comb
+= wrpick
.i
[pi
].eq(pick
)
434 # create a single-pulse go write from the picker output
436 comb
+= wr_pick
.eq(wrpick
.o
[pi
] & wrpick
.en_o
)
437 comb
+= fu
.go_wr_i
[idx
].eq(rising_edge(m
, wr_pick
))
439 # connect the regspec write "reg select" number to this port
440 # only if one FU actually requests (and is granted) the port
441 # will the write-enable be activated
442 addr_en
= Signal
.like(write
)
444 comb
+= wp
.eq(wr_pick
& wrpick
.en_o
)
445 comb
+= addr_en
.eq(Mux(wp
, write
, 0))
449 addrs
.append(addr_en
)
452 # connect regfile port to input
453 print("reg connect widths",
454 regfile
, regname
, pi
, funame
,
455 dest
.shape(), wport
.data_i
.shape())
456 wsigs
.append(fu_dest_latch
)
458 # here is where we create the Write Broadcast Bus. simple, eh?
459 comb
+= wport
.data_i
.eq(ortreereduce_sig(wsigs
))
461 # for unary-addressed
462 comb
+= wport
.wen
.eq(ortreereduce_sig(wens
))
464 # for binary-addressed
465 comb
+= wport
.addr
.eq(ortreereduce_sig(addrs
))
466 comb
+= wport
.wen
.eq(ortreereduce_sig(wens
))
468 def connect_wrports(self
, m
, fu_bitdict
):
469 """connect write ports
471 orders the write regspecs into a dict-of-dicts, by regfile,
472 by regport name, then connects all FUs that want that regport
473 by way of a PriorityPicker.
475 note that the write-port wen, write-port data, and go_wr_i all need to
476 be on the exact same clock cycle. as there is a combinatorial loop bug
477 at the moment, these all use sync.
479 comb
, sync
= m
.d
.comb
, m
.d
.sync
482 # dictionary of lists of regfile write ports
483 byregfiles_wr
, byregfiles_wrspec
= self
.get_byregfiles(False)
485 # same for write ports.
486 # BLECH! complex code-duplication! BLECH!
488 for regfile
, spec
in byregfiles_wr
.items():
489 fuspecs
= byregfiles_wrspec
[regfile
]
490 wrpickers
[regfile
] = {}
492 if self
.regreduce_en
:
493 # argh, more port-merging
495 fuspecs
['o'] = [fuspecs
.pop('o')]
496 fuspecs
['o'].append(fuspecs
.pop('o1'))
497 if regfile
== 'FAST':
498 fuspecs
['fast1'] = [fuspecs
.pop('fast1')]
499 if 'fast2' in fuspecs
:
500 fuspecs
['fast1'].append(fuspecs
.pop('fast2'))
501 if 'fast3' in fuspecs
:
502 fuspecs
['fast1'].append(fuspecs
.pop('fast3'))
504 for (regname
, fspec
) in sort_fuspecs(fuspecs
):
505 self
.connect_wrport(m
, fu_bitdict
, wrpickers
,
506 regfile
, regname
, fspec
)
508 def get_byregfiles(self
, readmode
):
510 mode
= "read" if readmode
else "write"
513 e
= self
.e
# decoded instruction to execute
515 # dictionary of lists of regfile ports
518 for (funame
, fu
) in fus
.items():
519 print("%s ports for %s" % (mode
, funame
))
520 for idx
in range(fu
.n_src
if readmode
else fu
.n_dst
):
522 (regfile
, regname
, wid
) = fu
.get_in_spec(idx
)
524 (regfile
, regname
, wid
) = fu
.get_out_spec(idx
)
525 print(" %d %s %s %s" % (idx
, regfile
, regname
, str(wid
)))
527 rdflag
, read
= regspec_decode_read(e
, regfile
, regname
)
530 rdflag
, read
= None, None
531 wrport
, write
= regspec_decode_write(e
, regfile
, regname
)
532 if regfile
not in byregfiles
:
533 byregfiles
[regfile
] = {}
534 byregfiles_spec
[regfile
] = {}
535 if regname
not in byregfiles_spec
[regfile
]:
536 byregfiles_spec
[regfile
][regname
] = \
537 (rdflag
, read
, write
, wid
, [])
538 # here we start to create "lanes"
539 if idx
not in byregfiles
[regfile
]:
540 byregfiles
[regfile
][idx
] = []
541 fuspec
= (funame
, fu
, idx
)
542 byregfiles
[regfile
][idx
].append(fuspec
)
543 byregfiles_spec
[regfile
][regname
][4].append(fuspec
)
545 # ok just print that out, for convenience
546 for regfile
, spec
in byregfiles
.items():
547 print("regfile %s ports:" % mode
, regfile
)
548 fuspecs
= byregfiles_spec
[regfile
]
549 for regname
, fspec
in fuspecs
.items():
550 [rdflag
, read
, write
, wid
, fuspec
] = fspec
551 print(" rf %s port %s lane: %s" % (mode
, regfile
, regname
))
552 print(" %s" % regname
, wid
, read
, write
, rdflag
)
553 for (funame
, fu
, idx
) in fuspec
:
554 fusig
= fu
.src_i
[idx
] if readmode
else fu
.dest
[idx
]
555 print(" ", funame
, fu
, idx
, fusig
)
558 return byregfiles
, byregfiles_spec
561 yield from self
.fus
.ports()
562 yield from self
.e
.ports()
563 yield from self
.l0
.ports()
570 if __name__
== '__main__':
571 pspec
= TestMemPspec(ldst_ifacetype
='testpi',
576 dut
= NonProductionCore(pspec
)
577 vl
= rtlil
.convert(dut
, ports
=dut
.ports())
578 with
open("test_core.il", "w") as f
: