3 not in any way intended for production use. connects up FunctionUnits to
4 Register Files in a brain-dead fashion that only permits one and only one
5 Function Unit to be operational.
7 the principle here is to take the Function Units, analyse their regspecs,
8 and turn their requirements for access to register file read/write ports
9 into groupings by Register File and Register File Port name.
11 under each grouping - by regfile/port - a list of Function Units that
12 need to connect to that port is created. as these are a contended
13 resource a "Broadcast Bus" per read/write port is then also created,
14 with access to it managed by a PriorityPicker.
16 the brain-dead part of this module is that even though there is no
17 conflict of access, regfile read/write hazards are *not* analysed,
18 and consequently it is safer to wait for the Function Unit to complete
19 before allowing a new instruction to proceed.
22 from nmigen
import Elaboratable
, Module
, Signal
, ResetSignal
, Cat
, Mux
23 from nmigen
.cli
import rtlil
25 from soc
.decoder
.power_decoder2
import PowerDecodeSubset
26 from soc
.decoder
.power_regspec_map
import regspec_decode_read
27 from soc
.decoder
.power_regspec_map
import regspec_decode_write
29 from nmutil
.picker
import PriorityPicker
30 from nmutil
.util
import treereduce
32 from soc
.fu
.compunits
.compunits
import AllFunctionUnits
33 from soc
.regfile
.regfiles
import RegFiles
34 from soc
.decoder
.decode2execute1
import Decode2ToExecute1Type
35 from soc
.decoder
.decode2execute1
import IssuerDecode2ToOperand
36 from soc
.decoder
.power_decoder2
import get_rdflags
37 from soc
.decoder
.decode2execute1
import Data
38 from soc
.experiment
.l0_cache
import TstL0CacheBuffer
# test only
39 from soc
.config
.test
.test_loadstore
import TestMemPspec
40 from soc
.decoder
.power_enums
import MicrOp
41 from soc
.config
.state
import CoreState
45 from nmutil
.util
import rising_edge
48 # helper function for reducing a list of signals down to a parallel
50 def ortreereduce(tree
, attr
="data_o"):
51 return treereduce(tree
, operator
.or_
, lambda x
: getattr(x
, attr
))
54 def ortreereduce_sig(tree
):
55 return treereduce(tree
, operator
.or_
, lambda x
: x
)
58 # helper function to place full regs declarations first
59 def sort_fuspecs(fuspecs
):
61 for (regname
, fspec
) in fuspecs
.items():
62 if regname
.startswith("full"):
63 res
.append((regname
, fspec
))
64 for (regname
, fspec
) in fuspecs
.items():
65 if not regname
.startswith("full"):
66 res
.append((regname
, fspec
))
67 return res
# enumerate(res)
70 class NonProductionCore(Elaboratable
):
71 def __init__(self
, pspec
):
74 # single LD/ST funnel for memory access
75 self
.l0
= TstL0CacheBuffer(pspec
, n_units
=1)
76 pi
= self
.l0
.l0
.dports
[0]
78 # function units (only one each)
79 # only include mmu if enabled in pspec
80 self
.fus
= AllFunctionUnits(pspec
, pilist
=[pi
])
82 # register files (yes plural)
83 self
.regs
= RegFiles()
85 # instruction decoder - needs a Trap-capable Record (captures EINT etc.)
86 self
.e
= Decode2ToExecute1Type("core", opkls
=IssuerDecode2ToOperand
)
88 # SVP64 RA_OR_ZERO needs to know if the relevant EXTRA2/3 field is zero
89 self
.sv_a_nz
= Signal()
91 # state and raw instruction
92 self
.state
= CoreState("core")
93 self
.raw_insn_i
= Signal(32) # raw instruction
94 self
.bigendian_i
= Signal() # bigendian - TODO, set by MSR.BE
96 # issue/valid/busy signalling
97 self
.ivalid_i
= Signal(reset_less
=True) # instruction is valid
98 self
.issue_i
= Signal(reset_less
=True)
99 self
.busy_o
= Signal(name
="corebusy_o", reset_less
=True)
101 # start/stop and terminated signalling
102 self
.core_stopped_i
= Signal(reset_less
=True)
103 self
.core_terminate_o
= Signal(reset
=0) # indicates stopped
105 # create per-FU instruction decoders (subsetted)
109 for funame
, fu
in self
.fus
.fus
.items():
110 f_name
= fu
.fnunit
.name
111 fnunit
= fu
.fnunit
.value
112 opkls
= fu
.opsubsetkls
114 self
.trapunit
= funame
116 self
.decoders
[funame
] = PowerDecodeSubset(None, opkls
, f_name
,
119 self
.des
[funame
] = self
.decoders
[funame
].do
121 if "mmu0" in self
.decoders
:
122 self
.decoders
["mmu0"].mmu0_spr_dec
= self
.decoders
["spr0"]
124 def elaborate(self
, platform
):
126 # for testing purposes, to cut down on build time in coriolis2
127 if hasattr(self
.pspec
, "nocore") and self
.pspec
.nocore
== True:
128 x
= Signal() # dummy signal
133 m
.submodules
.fus
= self
.fus
134 m
.submodules
.l0
= l0
= self
.l0
135 self
.regs
.elaborate_into(m
, platform
)
140 for k
, v
in self
.decoders
.items():
141 setattr(m
.submodules
, "dec_%s" % v
.fn_name
, v
)
142 comb
+= v
.dec
.raw_opcode_in
.eq(self
.raw_insn_i
)
143 comb
+= v
.dec
.bigendian
.eq(self
.bigendian_i
)
144 # sigh due to SVP64 RA_OR_ZERO detection connect these too
145 comb
+= v
.sv_a_nz
.eq(self
.sv_a_nz
)
147 # ssh, cheat: trap uses the main decoder because of the rewriting
148 self
.des
[self
.trapunit
] = self
.e
.do
150 # connect up Function Units, then read/write ports
151 fu_bitdict
= self
.connect_instruction(m
)
152 self
.connect_rdports(m
, fu_bitdict
)
153 self
.connect_wrports(m
, fu_bitdict
)
157 def connect_instruction(self
, m
):
158 """connect_instruction
160 uses decoded (from PowerOp) function unit information from CSV files
161 to ascertain which Function Unit should deal with the current
164 some (such as OP_ATTN, OP_NOP) are dealt with here, including
165 ignoring it and halting the processor. OP_NOP is a bit annoying
166 because the issuer expects busy flag still to be raised then lowered.
167 (this requires a fake counter to be set).
169 comb
, sync
= m
.d
.comb
, m
.d
.sync
172 # enable-signals for each FU, get one bit for each FU (by name)
173 fu_enable
= Signal(len(fus
), reset_less
=True)
175 for i
, funame
in enumerate(fus
.keys()):
176 fu_bitdict
[funame
] = fu_enable
[i
]
178 # enable the required Function Unit based on the opcode decode
179 # note: this *only* works correctly for simple core when one and
180 # *only* one FU is allocated per instruction
181 for funame
, fu
in fus
.items():
182 fnunit
= fu
.fnunit
.value
183 enable
= Signal(name
="en_%s" % funame
, reset_less
=True)
184 comb
+= enable
.eq((self
.e
.do
.fn_unit
& fnunit
).bool())
185 comb
+= fu_bitdict
[funame
].eq(enable
)
187 # sigh - need a NOP counter
189 with m
.If(counter
!= 0):
190 sync
+= counter
.eq(counter
- 1)
191 comb
+= self
.busy_o
.eq(1)
193 with m
.If(self
.ivalid_i
): # run only when valid
194 with m
.Switch(self
.e
.do
.insn_type
):
195 # check for ATTN: halt if true
196 with m
.Case(MicrOp
.OP_ATTN
):
197 m
.d
.sync
+= self
.core_terminate_o
.eq(1)
199 with m
.Case(MicrOp
.OP_NOP
):
200 sync
+= counter
.eq(2)
201 comb
+= self
.busy_o
.eq(1)
204 # connect up instructions. only one enabled at a time
205 for funame
, fu
in fus
.items():
206 do
= self
.des
[funame
]
207 enable
= fu_bitdict
[funame
]
209 # run this FunctionUnit if enabled
210 # route op, issue, busy, read flags and mask to FU
212 # operand comes from the *local* decoder
213 comb
+= fu
.oper_i
.eq_from(do
)
214 #comb += fu.oper_i.eq_from_execute1(e)
215 comb
+= fu
.issue_i
.eq(self
.issue_i
)
216 comb
+= self
.busy_o
.eq(fu
.busy_o
)
217 # rdmask, which is for registers, needs to come
218 # from the *main* decoder
219 rdmask
= get_rdflags(self
.e
, fu
)
220 comb
+= fu
.rdmaskn
.eq(~rdmask
)
224 def connect_rdport(self
, m
, fu_bitdict
, rdpickers
, regfile
, regname
, fspec
):
225 comb
, sync
= m
.d
.comb
, m
.d
.sync
231 # select the required read port. these are pre-defined sizes
232 rfile
= regs
.rf
[regfile
.lower()]
233 rport
= rfile
.r_ports
[rpidx
]
234 print("read regfile", rpidx
, regfile
, regs
.rf
.keys(),
238 if not isinstance(fspecs
, list):
245 for i
, fspec
in enumerate(fspecs
):
246 # get the regfile specs for this regfile port
247 (rf
, read
, write
, wid
, fuspec
) = fspec
248 print ("fpsec", i
, fspec
, len(fuspec
))
249 ppoffs
.append(pplen
) # record offset for picker
251 name
= "rdflag_%s_%s_%d" % (regfile
, regname
, i
)
252 rdflag
= Signal(name
=name
, reset_less
=True)
253 comb
+= rdflag
.eq(rf
)
254 rdflags
.append(rdflag
)
257 print ("pplen", pplen
)
259 # create a priority picker to manage this port
260 rdpickers
[regfile
][rpidx
] = rdpick
= PriorityPicker(pplen
)
261 setattr(m
.submodules
, "rdpick_%s_%s" % (regfile
, rpidx
), rdpick
)
265 for i
, fspec
in enumerate(fspecs
):
266 (rf
, read
, write
, wid
, fuspec
) = fspec
267 # connect up the FU req/go signals, and the reg-read to the FU
268 # and create a Read Broadcast Bus
269 for pi
, (funame
, fu
, idx
) in enumerate(fuspec
):
272 # connect request-read to picker input, and output to go-rd
273 fu_active
= fu_bitdict
[funame
]
274 name
= "%s_%s_%s_%i" % (regfile
, rpidx
, funame
, pi
)
275 addr_en
= Signal
.like(reads
[i
], name
="addr_en_"+name
)
276 pick
= Signal(name
="pick_"+name
) # picker input
277 rp
= Signal(name
="rp_"+name
) # picker output
278 delay_pick
= Signal(name
="dp_"+name
) # read-enable "underway"
280 # exclude any currently-enabled read-request (mask out active)
281 comb
+= pick
.eq(fu
.rd_rel_o
[idx
] & fu_active
& rdflags
[i
] &
283 comb
+= rdpick
.i
[pi
].eq(pick
)
284 comb
+= fu
.go_rd_i
[idx
].eq(delay_pick
) # pass in *delayed* pick
286 # if picked, select read-port "reg select" number to port
287 comb
+= rp
.eq(rdpick
.o
[pi
] & rdpick
.en_o
)
288 sync
+= delay_pick
.eq(rp
) # delayed "pick"
289 comb
+= addr_en
.eq(Mux(rp
, reads
[i
], 0))
291 # the read-enable happens combinatorially (see mux-bus below)
292 # but it results in the data coming out on a one-cycle delay.
296 addrs
.append(addr_en
)
299 # use the *delayed* pick signal to put requested data onto bus
300 with m
.If(delay_pick
):
301 # connect regfile port to input, creating fan-out Bus
303 print("reg connect widths",
304 regfile
, regname
, pi
, funame
,
305 src
.shape(), rport
.data_o
.shape())
306 # all FUs connect to same port
307 comb
+= src
.eq(rport
.data_o
)
309 # or-reduce the muxed read signals
311 # for unary-addressed
312 comb
+= rport
.ren
.eq(ortreereduce_sig(rens
))
314 # for binary-addressed
315 comb
+= rport
.addr
.eq(ortreereduce_sig(addrs
))
316 comb
+= rport
.ren
.eq(Cat(*rens
).bool())
317 print ("binary", regfile
, rpidx
, rport
, rport
.ren
, rens
, addrs
)
319 def connect_rdports(self
, m
, fu_bitdict
):
320 """connect read ports
322 orders the read regspecs into a dict-of-dicts, by regfile, by
323 regport name, then connects all FUs that want that regport by
324 way of a PriorityPicker.
326 comb
, sync
= m
.d
.comb
, m
.d
.sync
330 # dictionary of lists of regfile read ports
331 byregfiles_rd
, byregfiles_rdspec
= self
.get_byregfiles(True)
333 # okaay, now we need a PriorityPicker per regfile per regfile port
334 # loootta pickers... peter piper picked a pack of pickled peppers...
336 for regfile
, spec
in byregfiles_rd
.items():
337 fuspecs
= byregfiles_rdspec
[regfile
]
338 rdpickers
[regfile
] = {}
340 # argh. an experiment to merge RA and RB in the INT regfile
341 # (we have too many read/write ports)
342 #if regfile == 'INT':
343 #fuspecs['rabc'] = [fuspecs.pop('rb')]
344 #fuspecs['rabc'].append(fuspecs.pop('rc'))
345 #fuspecs['rabc'].append(fuspecs.pop('ra'))
346 #if regfile == 'FAST':
347 # fuspecs['fast1'] = [fuspecs.pop('fast1')]
348 # if 'fast2' in fuspecs:
349 # fuspecs['fast1'].append(fuspecs.pop('fast2'))
351 # for each named regfile port, connect up all FUs to that port
352 for (regname
, fspec
) in sort_fuspecs(fuspecs
):
353 print("connect rd", regname
, fspec
)
354 self
.connect_rdport(m
, fu_bitdict
, rdpickers
, regfile
,
357 def connect_wrport(self
, m
, fu_bitdict
, wrpickers
, regfile
, regname
, fspec
):
358 comb
, sync
= m
.d
.comb
, m
.d
.sync
362 print("connect wr", regname
, fspec
)
365 # select the required write port. these are pre-defined sizes
366 print(regfile
, regs
.rf
.keys())
367 rfile
= regs
.rf
[regfile
.lower()]
368 wport
= rfile
.w_ports
[rpidx
]
371 if not isinstance(fspecs
, list):
377 for i
, fspec
in enumerate(fspecs
):
378 # get the regfile specs for this regfile port
379 (rf
, read
, write
, wid
, fuspec
) = fspec
380 print ("fpsec", i
, fspec
, len(fuspec
))
381 ppoffs
.append(pplen
) # record offset for picker
384 # create a priority picker to manage this port
385 wrpickers
[regfile
][rpidx
] = wrpick
= PriorityPicker(pplen
)
386 setattr(m
.submodules
, "wrpick_%s_%s" % (regfile
, rpidx
), wrpick
)
391 for i
, fspec
in enumerate(fspecs
):
392 # connect up the FU req/go signals and the reg-read to the FU
393 # these are arbitrated by Data.ok signals
394 (rf
, read
, write
, wid
, fuspec
) = fspec
395 for pi
, (funame
, fu
, idx
) in enumerate(fuspec
):
398 # write-request comes from dest.ok
399 dest
= fu
.get_out(idx
)
400 fu_dest_latch
= fu
.get_fu_out(idx
) # latched output
401 name
= "wrflag_%s_%s_%d" % (funame
, regname
, idx
)
402 wrflag
= Signal(name
=name
, reset_less
=True)
403 comb
+= wrflag
.eq(dest
.ok
& fu
.busy_o
)
405 # connect request-write to picker input, and output to go-wr
406 fu_active
= fu_bitdict
[funame
]
407 pick
= fu
.wr
.rel_o
[idx
] & fu_active
# & wrflag
408 comb
+= wrpick
.i
[pi
].eq(pick
)
409 # create a single-pulse go write from the picker output
411 comb
+= wr_pick
.eq(wrpick
.o
[pi
] & wrpick
.en_o
)
412 comb
+= fu
.go_wr_i
[idx
].eq(rising_edge(m
, wr_pick
))
414 # connect the regspec write "reg select" number to this port
415 # only if one FU actually requests (and is granted) the port
416 # will the write-enable be activated
417 addr_en
= Signal
.like(write
)
419 comb
+= wp
.eq(wr_pick
& wrpick
.en_o
)
420 comb
+= addr_en
.eq(Mux(wp
, write
, 0))
424 addrs
.append(addr_en
)
427 # connect regfile port to input
428 print("reg connect widths",
429 regfile
, regname
, pi
, funame
,
430 dest
.shape(), wport
.data_i
.shape())
431 wsigs
.append(fu_dest_latch
)
433 # here is where we create the Write Broadcast Bus. simple, eh?
434 comb
+= wport
.data_i
.eq(ortreereduce_sig(wsigs
))
436 # for unary-addressed
437 comb
+= wport
.wen
.eq(ortreereduce_sig(wens
))
439 # for binary-addressed
440 comb
+= wport
.addr
.eq(ortreereduce_sig(addrs
))
441 comb
+= wport
.wen
.eq(ortreereduce_sig(wens
))
443 def connect_wrports(self
, m
, fu_bitdict
):
444 """connect write ports
446 orders the write regspecs into a dict-of-dicts, by regfile,
447 by regport name, then connects all FUs that want that regport
448 by way of a PriorityPicker.
450 note that the write-port wen, write-port data, and go_wr_i all need to
451 be on the exact same clock cycle. as there is a combinatorial loop bug
452 at the moment, these all use sync.
454 comb
, sync
= m
.d
.comb
, m
.d
.sync
457 # dictionary of lists of regfile write ports
458 byregfiles_wr
, byregfiles_wrspec
= self
.get_byregfiles(False)
460 # same for write ports.
461 # BLECH! complex code-duplication! BLECH!
463 for regfile
, spec
in byregfiles_wr
.items():
464 fuspecs
= byregfiles_wrspec
[regfile
]
465 wrpickers
[regfile
] = {}
467 # argh, more port-merging
469 fuspecs
['o'] = [fuspecs
.pop('o')]
470 fuspecs
['o'].append(fuspecs
.pop('o1'))
471 if regfile
== 'FAST':
472 fuspecs
['fast1'] = [fuspecs
.pop('fast1')]
473 if 'fast2' in fuspecs
:
474 fuspecs
['fast1'].append(fuspecs
.pop('fast2'))
476 for (regname
, fspec
) in sort_fuspecs(fuspecs
):
477 self
.connect_wrport(m
, fu_bitdict
, wrpickers
,
478 regfile
, regname
, fspec
)
480 def get_byregfiles(self
, readmode
):
482 mode
= "read" if readmode
else "write"
485 e
= self
.e
# decoded instruction to execute
487 # dictionary of lists of regfile ports
490 for (funame
, fu
) in fus
.items():
491 print("%s ports for %s" % (mode
, funame
))
492 for idx
in range(fu
.n_src
if readmode
else fu
.n_dst
):
494 (regfile
, regname
, wid
) = fu
.get_in_spec(idx
)
496 (regfile
, regname
, wid
) = fu
.get_out_spec(idx
)
497 print(" %d %s %s %s" % (idx
, regfile
, regname
, str(wid
)))
499 rdflag
, read
= regspec_decode_read(e
, regfile
, regname
)
502 rdflag
, read
= None, None
503 wrport
, write
= regspec_decode_write(e
, regfile
, regname
)
504 if regfile
not in byregfiles
:
505 byregfiles
[regfile
] = {}
506 byregfiles_spec
[regfile
] = {}
507 if regname
not in byregfiles_spec
[regfile
]:
508 byregfiles_spec
[regfile
][regname
] = \
509 (rdflag
, read
, write
, wid
, [])
510 # here we start to create "lanes"
511 if idx
not in byregfiles
[regfile
]:
512 byregfiles
[regfile
][idx
] = []
513 fuspec
= (funame
, fu
, idx
)
514 byregfiles
[regfile
][idx
].append(fuspec
)
515 byregfiles_spec
[regfile
][regname
][4].append(fuspec
)
517 # ok just print that out, for convenience
518 for regfile
, spec
in byregfiles
.items():
519 print("regfile %s ports:" % mode
, regfile
)
520 fuspecs
= byregfiles_spec
[regfile
]
521 for regname
, fspec
in fuspecs
.items():
522 [rdflag
, read
, write
, wid
, fuspec
] = fspec
523 print(" rf %s port %s lane: %s" % (mode
, regfile
, regname
))
524 print(" %s" % regname
, wid
, read
, write
, rdflag
)
525 for (funame
, fu
, idx
) in fuspec
:
526 fusig
= fu
.src_i
[idx
] if readmode
else fu
.dest
[idx
]
527 print(" ", funame
, fu
, idx
, fusig
)
530 return byregfiles
, byregfiles_spec
533 yield from self
.fus
.ports()
534 yield from self
.e
.ports()
535 yield from self
.l0
.ports()
542 if __name__
== '__main__':
543 pspec
= TestMemPspec(ldst_ifacetype
='testpi',
548 dut
= NonProductionCore(pspec
)
549 vl
= rtlil
.convert(dut
, ports
=dut
.ports())
550 with
open("test_core.il", "w") as f
: