3 not in any way intended for production use. connects up FunctionUnits to
4 Register Files in a brain-dead fashion that only permits one and only one
5 Function Unit to be operational.
7 the principle here is to take the Function Units, analyse their regspecs,
8 and turn their requirements for access to register file read/write ports
9 into groupings by Register File and Register File Port name.
11 under each grouping - by regfile/port - a list of Function Units that
12 need to connect to that port is created. as these are a contended
13 resource a "Broadcast Bus" per read/write port is then also created,
14 with access to it managed by a PriorityPicker.
16 the brain-dead part of this module is that even though there is no
17 conflict of access, regfile read/write hazards are *not* analysed,
18 and consequently it is safer to wait for the Function Unit to complete
19 before allowing a new instruction to proceed.
22 from nmigen
import Elaboratable
, Module
, Signal
, ResetSignal
, Cat
, Mux
23 from nmigen
.cli
import rtlil
25 from openpower
.decoder
.power_decoder2
import PowerDecodeSubset
26 from openpower
.decoder
.power_regspec_map
import regspec_decode_read
27 from openpower
.decoder
.power_regspec_map
import regspec_decode_write
29 from nmutil
.picker
import PriorityPicker
30 from nmutil
.util
import treereduce
32 from soc
.fu
.compunits
.compunits
import AllFunctionUnits
33 from soc
.regfile
.regfiles
import RegFiles
34 from openpower
.decoder
.decode2execute1
import Decode2ToExecute1Type
35 from openpower
.decoder
.decode2execute1
import IssuerDecode2ToOperand
36 from openpower
.decoder
.power_decoder2
import get_rdflags
37 from openpower
.decoder
.decode2execute1
import Data
38 from soc
.experiment
.l0_cache
import TstL0CacheBuffer
# test only
39 from soc
.config
.test
.test_loadstore
import TestMemPspec
40 from openpower
.decoder
.power_enums
import MicrOp
41 from soc
.config
.state
import CoreState
45 from nmutil
.util
import rising_edge
48 # helper function for reducing a list of signals down to a parallel
50 def ortreereduce(tree
, attr
="data_o"):
51 return treereduce(tree
, operator
.or_
, lambda x
: getattr(x
, attr
))
54 def ortreereduce_sig(tree
):
55 return treereduce(tree
, operator
.or_
, lambda x
: x
)
58 # helper function to place full regs declarations first
59 def sort_fuspecs(fuspecs
):
61 for (regname
, fspec
) in fuspecs
.items():
62 if regname
.startswith("full"):
63 res
.append((regname
, fspec
))
64 for (regname
, fspec
) in fuspecs
.items():
65 if not regname
.startswith("full"):
66 res
.append((regname
, fspec
))
67 return res
# enumerate(res)
70 class NonProductionCore(Elaboratable
):
71 def __init__(self
, pspec
):
74 # test is SVP64 is to be enabled
75 self
.svp64_en
= hasattr(pspec
, "svp64") and (pspec
.svp64
== True)
77 # test to see if regfile ports should be reduced
78 self
.regreduce_en
= (hasattr(pspec
, "regreduce") and
79 (pspec
.regreduce
== True))
81 # single LD/ST funnel for memory access
82 self
.l0
= l0
= TstL0CacheBuffer(pspec
, n_units
=1)
85 # function units (only one each)
86 # only include mmu if enabled in pspec
87 self
.fus
= AllFunctionUnits(pspec
, pilist
=[pi
])
89 # link LoadStore1 into MMU
90 if hasattr(self
.fus
, 'mmu') and hasattr(l0
.cmpi
, "ldst"):
91 self
.fus
.mmu
.set_ldst_interface(l0
.cmpi
.ldst
)
93 # register files (yes plural)
94 self
.regs
= RegFiles(pspec
)
96 # instruction decoder - needs a Trap-capable Record (captures EINT etc.)
97 self
.e
= Decode2ToExecute1Type("core", opkls
=IssuerDecode2ToOperand
,
98 regreduce_en
=self
.regreduce_en
)
100 # SVP64 RA_OR_ZERO needs to know if the relevant EXTRA2/3 field is zero
101 self
.sv_a_nz
= Signal()
103 # state and raw instruction
104 self
.state
= CoreState("core")
105 self
.raw_insn_i
= Signal(32) # raw instruction
106 self
.bigendian_i
= Signal() # bigendian - TODO, set by MSR.BE
108 # issue/valid/busy signalling
109 self
.ivalid_i
= Signal(reset_less
=True) # instruction is valid
110 self
.issue_i
= Signal(reset_less
=True)
111 self
.busy_o
= Signal(name
="corebusy_o", reset_less
=True)
113 # start/stop and terminated signalling
114 self
.core_terminate_o
= Signal(reset
=0) # indicates stopped
116 # create per-FU instruction decoders (subsetted)
120 for funame
, fu
in self
.fus
.fus
.items():
121 f_name
= fu
.fnunit
.name
122 fnunit
= fu
.fnunit
.value
123 opkls
= fu
.opsubsetkls
125 self
.trapunit
= funame
127 self
.decoders
[funame
] = PowerDecodeSubset(None, opkls
, f_name
,
130 svp64_en
=self
.svp64_en
,
131 regreduce_en
=self
.regreduce_en
)
132 self
.des
[funame
] = self
.decoders
[funame
].do
134 if "mmu0" in self
.decoders
:
135 self
.decoders
["mmu0"].mmu0_spr_dec
= self
.decoders
["spr0"]
137 def elaborate(self
, platform
):
139 # for testing purposes, to cut down on build time in coriolis2
140 if hasattr(self
.pspec
, "nocore") and self
.pspec
.nocore
== True:
141 x
= Signal() # dummy signal
146 m
.submodules
.fus
= self
.fus
147 m
.submodules
.l0
= l0
= self
.l0
148 self
.regs
.elaborate_into(m
, platform
)
153 for k
, v
in self
.decoders
.items():
154 setattr(m
.submodules
, "dec_%s" % v
.fn_name
, v
)
155 comb
+= v
.dec
.raw_opcode_in
.eq(self
.raw_insn_i
)
156 comb
+= v
.dec
.bigendian
.eq(self
.bigendian_i
)
157 # sigh due to SVP64 RA_OR_ZERO detection connect these too
158 comb
+= v
.sv_a_nz
.eq(self
.sv_a_nz
)
160 # ssh, cheat: trap uses the main decoder because of the rewriting
161 self
.des
[self
.trapunit
] = self
.e
.do
163 # connect up Function Units, then read/write ports
164 fu_bitdict
= self
.connect_instruction(m
)
165 self
.connect_rdports(m
, fu_bitdict
)
166 self
.connect_wrports(m
, fu_bitdict
)
170 def connect_instruction(self
, m
):
171 """connect_instruction
173 uses decoded (from PowerOp) function unit information from CSV files
174 to ascertain which Function Unit should deal with the current
177 some (such as OP_ATTN, OP_NOP) are dealt with here, including
178 ignoring it and halting the processor. OP_NOP is a bit annoying
179 because the issuer expects busy flag still to be raised then lowered.
180 (this requires a fake counter to be set).
182 comb
, sync
= m
.d
.comb
, m
.d
.sync
185 # enable-signals for each FU, get one bit for each FU (by name)
186 fu_enable
= Signal(len(fus
), reset_less
=True)
188 for i
, funame
in enumerate(fus
.keys()):
189 fu_bitdict
[funame
] = fu_enable
[i
]
191 # enable the required Function Unit based on the opcode decode
192 # note: this *only* works correctly for simple core when one and
193 # *only* one FU is allocated per instruction
194 for funame
, fu
in fus
.items():
195 fnunit
= fu
.fnunit
.value
196 enable
= Signal(name
="en_%s" % funame
, reset_less
=True)
197 comb
+= enable
.eq((self
.e
.do
.fn_unit
& fnunit
).bool())
198 comb
+= fu_bitdict
[funame
].eq(enable
)
200 # sigh - need a NOP counter
202 with m
.If(counter
!= 0):
203 sync
+= counter
.eq(counter
- 1)
204 comb
+= self
.busy_o
.eq(1)
206 with m
.If(self
.ivalid_i
): # run only when valid
207 with m
.Switch(self
.e
.do
.insn_type
):
208 # check for ATTN: halt if true
209 with m
.Case(MicrOp
.OP_ATTN
):
210 m
.d
.sync
+= self
.core_terminate_o
.eq(1)
212 with m
.Case(MicrOp
.OP_NOP
):
213 sync
+= counter
.eq(2)
214 comb
+= self
.busy_o
.eq(1)
217 # connect up instructions. only one enabled at a time
218 for funame
, fu
in fus
.items():
219 do
= self
.des
[funame
]
220 enable
= fu_bitdict
[funame
]
222 # run this FunctionUnit if enabled
223 # route op, issue, busy, read flags and mask to FU
225 # operand comes from the *local* decoder
226 comb
+= fu
.oper_i
.eq_from(do
)
227 #comb += fu.oper_i.eq_from_execute1(e)
228 comb
+= fu
.issue_i
.eq(self
.issue_i
)
229 comb
+= self
.busy_o
.eq(fu
.busy_o
)
230 # rdmask, which is for registers, needs to come
231 # from the *main* decoder
232 rdmask
= get_rdflags(self
.e
, fu
)
233 comb
+= fu
.rdmaskn
.eq(~rdmask
)
237 def connect_rdport(self
, m
, fu_bitdict
, rdpickers
, regfile
, regname
, fspec
):
238 comb
, sync
= m
.d
.comb
, m
.d
.sync
244 # select the required read port. these are pre-defined sizes
245 rfile
= regs
.rf
[regfile
.lower()]
246 rport
= rfile
.r_ports
[rpidx
]
247 print("read regfile", rpidx
, regfile
, regs
.rf
.keys(),
251 if not isinstance(fspecs
, list):
258 for i
, fspec
in enumerate(fspecs
):
259 # get the regfile specs for this regfile port
260 (rf
, read
, write
, wid
, fuspec
) = fspec
261 print ("fpsec", i
, fspec
, len(fuspec
))
262 ppoffs
.append(pplen
) # record offset for picker
264 name
= "rdflag_%s_%s_%d" % (regfile
, regname
, i
)
265 rdflag
= Signal(name
=name
, reset_less
=True)
266 comb
+= rdflag
.eq(rf
)
267 rdflags
.append(rdflag
)
270 print ("pplen", pplen
)
272 # create a priority picker to manage this port
273 rdpickers
[regfile
][rpidx
] = rdpick
= PriorityPicker(pplen
)
274 setattr(m
.submodules
, "rdpick_%s_%s" % (regfile
, rpidx
), rdpick
)
278 for i
, fspec
in enumerate(fspecs
):
279 (rf
, read
, write
, wid
, fuspec
) = fspec
280 # connect up the FU req/go signals, and the reg-read to the FU
281 # and create a Read Broadcast Bus
282 for pi
, (funame
, fu
, idx
) in enumerate(fuspec
):
285 # connect request-read to picker input, and output to go-rd
286 fu_active
= fu_bitdict
[funame
]
287 name
= "%s_%s_%s_%i" % (regfile
, rpidx
, funame
, pi
)
288 addr_en
= Signal
.like(reads
[i
], name
="addr_en_"+name
)
289 pick
= Signal(name
="pick_"+name
) # picker input
290 rp
= Signal(name
="rp_"+name
) # picker output
291 delay_pick
= Signal(name
="dp_"+name
) # read-enable "underway"
293 # exclude any currently-enabled read-request (mask out active)
294 comb
+= pick
.eq(fu
.rd_rel_o
[idx
] & fu_active
& rdflags
[i
] &
296 comb
+= rdpick
.i
[pi
].eq(pick
)
297 comb
+= fu
.go_rd_i
[idx
].eq(delay_pick
) # pass in *delayed* pick
299 # if picked, select read-port "reg select" number to port
300 comb
+= rp
.eq(rdpick
.o
[pi
] & rdpick
.en_o
)
301 sync
+= delay_pick
.eq(rp
) # delayed "pick"
302 comb
+= addr_en
.eq(Mux(rp
, reads
[i
], 0))
304 # the read-enable happens combinatorially (see mux-bus below)
305 # but it results in the data coming out on a one-cycle delay.
309 addrs
.append(addr_en
)
312 # use the *delayed* pick signal to put requested data onto bus
313 with m
.If(delay_pick
):
314 # connect regfile port to input, creating fan-out Bus
316 print("reg connect widths",
317 regfile
, regname
, pi
, funame
,
318 src
.shape(), rport
.data_o
.shape())
319 # all FUs connect to same port
320 comb
+= src
.eq(rport
.data_o
)
322 # or-reduce the muxed read signals
324 # for unary-addressed
325 comb
+= rport
.ren
.eq(ortreereduce_sig(rens
))
327 # for binary-addressed
328 comb
+= rport
.addr
.eq(ortreereduce_sig(addrs
))
329 comb
+= rport
.ren
.eq(Cat(*rens
).bool())
330 print ("binary", regfile
, rpidx
, rport
, rport
.ren
, rens
, addrs
)
332 def connect_rdports(self
, m
, fu_bitdict
):
333 """connect read ports
335 orders the read regspecs into a dict-of-dicts, by regfile, by
336 regport name, then connects all FUs that want that regport by
337 way of a PriorityPicker.
339 comb
, sync
= m
.d
.comb
, m
.d
.sync
343 # dictionary of lists of regfile read ports
344 byregfiles_rd
, byregfiles_rdspec
= self
.get_byregfiles(True)
346 # okaay, now we need a PriorityPicker per regfile per regfile port
347 # loootta pickers... peter piper picked a pack of pickled peppers...
349 for regfile
, spec
in byregfiles_rd
.items():
350 fuspecs
= byregfiles_rdspec
[regfile
]
351 rdpickers
[regfile
] = {}
353 # argh. an experiment to merge RA and RB in the INT regfile
354 # (we have too many read/write ports)
355 if self
.regreduce_en
:
357 fuspecs
['rabc'] = [fuspecs
.pop('rb')]
358 fuspecs
['rabc'].append(fuspecs
.pop('rc'))
359 fuspecs
['rabc'].append(fuspecs
.pop('ra'))
360 if regfile
== 'FAST':
361 fuspecs
['fast1'] = [fuspecs
.pop('fast1')]
362 if 'fast2' in fuspecs
:
363 fuspecs
['fast1'].append(fuspecs
.pop('fast2'))
365 # for each named regfile port, connect up all FUs to that port
366 for (regname
, fspec
) in sort_fuspecs(fuspecs
):
367 print("connect rd", regname
, fspec
)
368 self
.connect_rdport(m
, fu_bitdict
, rdpickers
, regfile
,
371 def connect_wrport(self
, m
, fu_bitdict
, wrpickers
, regfile
, regname
, fspec
):
372 comb
, sync
= m
.d
.comb
, m
.d
.sync
376 print("connect wr", regname
, fspec
)
379 # select the required write port. these are pre-defined sizes
380 print(regfile
, regs
.rf
.keys())
381 rfile
= regs
.rf
[regfile
.lower()]
382 wport
= rfile
.w_ports
[rpidx
]
385 if not isinstance(fspecs
, list):
391 for i
, fspec
in enumerate(fspecs
):
392 # get the regfile specs for this regfile port
393 (rf
, read
, write
, wid
, fuspec
) = fspec
394 print ("fpsec", i
, fspec
, len(fuspec
))
395 ppoffs
.append(pplen
) # record offset for picker
398 # create a priority picker to manage this port
399 wrpickers
[regfile
][rpidx
] = wrpick
= PriorityPicker(pplen
)
400 setattr(m
.submodules
, "wrpick_%s_%s" % (regfile
, rpidx
), wrpick
)
405 for i
, fspec
in enumerate(fspecs
):
406 # connect up the FU req/go signals and the reg-read to the FU
407 # these are arbitrated by Data.ok signals
408 (rf
, read
, write
, wid
, fuspec
) = fspec
409 for pi
, (funame
, fu
, idx
) in enumerate(fuspec
):
412 # write-request comes from dest.ok
413 dest
= fu
.get_out(idx
)
414 fu_dest_latch
= fu
.get_fu_out(idx
) # latched output
415 name
= "wrflag_%s_%s_%d" % (funame
, regname
, idx
)
416 wrflag
= Signal(name
=name
, reset_less
=True)
417 comb
+= wrflag
.eq(dest
.ok
& fu
.busy_o
)
419 # connect request-write to picker input, and output to go-wr
420 fu_active
= fu_bitdict
[funame
]
421 pick
= fu
.wr
.rel_o
[idx
] & fu_active
# & wrflag
422 comb
+= wrpick
.i
[pi
].eq(pick
)
423 # create a single-pulse go write from the picker output
425 comb
+= wr_pick
.eq(wrpick
.o
[pi
] & wrpick
.en_o
)
426 comb
+= fu
.go_wr_i
[idx
].eq(rising_edge(m
, wr_pick
))
428 # connect the regspec write "reg select" number to this port
429 # only if one FU actually requests (and is granted) the port
430 # will the write-enable be activated
431 addr_en
= Signal
.like(write
)
433 comb
+= wp
.eq(wr_pick
& wrpick
.en_o
)
434 comb
+= addr_en
.eq(Mux(wp
, write
, 0))
438 addrs
.append(addr_en
)
441 # connect regfile port to input
442 print("reg connect widths",
443 regfile
, regname
, pi
, funame
,
444 dest
.shape(), wport
.data_i
.shape())
445 wsigs
.append(fu_dest_latch
)
447 # here is where we create the Write Broadcast Bus. simple, eh?
448 comb
+= wport
.data_i
.eq(ortreereduce_sig(wsigs
))
450 # for unary-addressed
451 comb
+= wport
.wen
.eq(ortreereduce_sig(wens
))
453 # for binary-addressed
454 comb
+= wport
.addr
.eq(ortreereduce_sig(addrs
))
455 comb
+= wport
.wen
.eq(ortreereduce_sig(wens
))
457 def connect_wrports(self
, m
, fu_bitdict
):
458 """connect write ports
460 orders the write regspecs into a dict-of-dicts, by regfile,
461 by regport name, then connects all FUs that want that regport
462 by way of a PriorityPicker.
464 note that the write-port wen, write-port data, and go_wr_i all need to
465 be on the exact same clock cycle. as there is a combinatorial loop bug
466 at the moment, these all use sync.
468 comb
, sync
= m
.d
.comb
, m
.d
.sync
471 # dictionary of lists of regfile write ports
472 byregfiles_wr
, byregfiles_wrspec
= self
.get_byregfiles(False)
474 # same for write ports.
475 # BLECH! complex code-duplication! BLECH!
477 for regfile
, spec
in byregfiles_wr
.items():
478 fuspecs
= byregfiles_wrspec
[regfile
]
479 wrpickers
[regfile
] = {}
481 if self
.regreduce_en
:
482 # argh, more port-merging
484 fuspecs
['o'] = [fuspecs
.pop('o')]
485 fuspecs
['o'].append(fuspecs
.pop('o1'))
486 if regfile
== 'FAST':
487 fuspecs
['fast1'] = [fuspecs
.pop('fast1')]
488 if 'fast2' in fuspecs
:
489 fuspecs
['fast1'].append(fuspecs
.pop('fast2'))
491 for (regname
, fspec
) in sort_fuspecs(fuspecs
):
492 self
.connect_wrport(m
, fu_bitdict
, wrpickers
,
493 regfile
, regname
, fspec
)
495 def get_byregfiles(self
, readmode
):
497 mode
= "read" if readmode
else "write"
500 e
= self
.e
# decoded instruction to execute
502 # dictionary of lists of regfile ports
505 for (funame
, fu
) in fus
.items():
506 print("%s ports for %s" % (mode
, funame
))
507 for idx
in range(fu
.n_src
if readmode
else fu
.n_dst
):
509 (regfile
, regname
, wid
) = fu
.get_in_spec(idx
)
511 (regfile
, regname
, wid
) = fu
.get_out_spec(idx
)
512 print(" %d %s %s %s" % (idx
, regfile
, regname
, str(wid
)))
514 rdflag
, read
= regspec_decode_read(e
, regfile
, regname
)
517 rdflag
, read
= None, None
518 wrport
, write
= regspec_decode_write(e
, regfile
, regname
)
519 if regfile
not in byregfiles
:
520 byregfiles
[regfile
] = {}
521 byregfiles_spec
[regfile
] = {}
522 if regname
not in byregfiles_spec
[regfile
]:
523 byregfiles_spec
[regfile
][regname
] = \
524 (rdflag
, read
, write
, wid
, [])
525 # here we start to create "lanes"
526 if idx
not in byregfiles
[regfile
]:
527 byregfiles
[regfile
][idx
] = []
528 fuspec
= (funame
, fu
, idx
)
529 byregfiles
[regfile
][idx
].append(fuspec
)
530 byregfiles_spec
[regfile
][regname
][4].append(fuspec
)
532 # ok just print that out, for convenience
533 for regfile
, spec
in byregfiles
.items():
534 print("regfile %s ports:" % mode
, regfile
)
535 fuspecs
= byregfiles_spec
[regfile
]
536 for regname
, fspec
in fuspecs
.items():
537 [rdflag
, read
, write
, wid
, fuspec
] = fspec
538 print(" rf %s port %s lane: %s" % (mode
, regfile
, regname
))
539 print(" %s" % regname
, wid
, read
, write
, rdflag
)
540 for (funame
, fu
, idx
) in fuspec
:
541 fusig
= fu
.src_i
[idx
] if readmode
else fu
.dest
[idx
]
542 print(" ", funame
, fu
, idx
, fusig
)
545 return byregfiles
, byregfiles_spec
548 yield from self
.fus
.ports()
549 yield from self
.e
.ports()
550 yield from self
.l0
.ports()
557 if __name__
== '__main__':
558 pspec
= TestMemPspec(ldst_ifacetype
='testpi',
563 dut
= NonProductionCore(pspec
)
564 vl
= rtlil
.convert(dut
, ports
=dut
.ports())
565 with
open("test_core.il", "w") as f
: