3 not in any way intended for production use. connects up FunctionUnits to
4 Register Files in a brain-dead fashion that only permits one and only one
5 Function Unit to be operational.
7 the principle here is to take the Function Units, analyse their regspecs,
8 and turn their requirements for access to register file read/write ports
9 into groupings by Register File and Register File Port name.
11 under each grouping - by regfile/port - a list of Function Units that
12 need to connect to that port is created. as these are a contended
13 resource a "Broadcast Bus" per read/write port is then also created,
14 with access to it managed by a PriorityPicker.
16 the brain-dead part of this module is that even though there is no
17 conflict of access, regfile read/write hazards are *not* analysed,
18 and consequently it is safer to wait for the Function Unit to complete
19 before allowing a new instruction to proceed.
22 from nmigen
import Elaboratable
, Module
, Signal
, ResetSignal
, Cat
, Mux
23 from nmigen
.cli
import rtlil
25 from soc
.decoder
.power_decoder2
import PowerDecodeSubset
26 from soc
.decoder
.power_regspec_map
import regspec_decode_read
27 from soc
.decoder
.power_regspec_map
import regspec_decode_write
29 from nmutil
.picker
import PriorityPicker
30 from nmutil
.util
import treereduce
32 from soc
.fu
.compunits
.compunits
import AllFunctionUnits
33 from soc
.regfile
.regfiles
import RegFiles
34 from soc
.decoder
.decode2execute1
import Decode2ToExecute1Type
35 from soc
.decoder
.decode2execute1
import IssuerDecode2ToOperand
36 from soc
.decoder
.power_decoder2
import get_rdflags
37 from soc
.decoder
.decode2execute1
import Data
38 from soc
.experiment
.l0_cache
import TstL0CacheBuffer
# test only
39 from soc
.config
.test
.test_loadstore
import TestMemPspec
40 from soc
.decoder
.power_enums
import MicrOp
41 from soc
.config
.state
import CoreState
45 from nmutil
.util
import rising_edge
48 # helper function for reducing a list of signals down to a parallel
50 def ortreereduce(tree
, attr
="data_o"):
51 return treereduce(tree
, operator
.or_
, lambda x
: getattr(x
, attr
))
54 def ortreereduce_sig(tree
):
55 return treereduce(tree
, operator
.or_
, lambda x
: x
)
58 # helper function to place full regs declarations first
59 def sort_fuspecs(fuspecs
):
61 for (regname
, fspec
) in fuspecs
.items():
62 if regname
.startswith("full"):
63 res
.append((regname
, fspec
))
64 for (regname
, fspec
) in fuspecs
.items():
65 if not regname
.startswith("full"):
66 res
.append((regname
, fspec
))
67 return res
# enumerate(res)
70 class NonProductionCore(Elaboratable
):
71 def __init__(self
, pspec
):
74 # single LD/ST funnel for memory access
75 self
.l0
= TstL0CacheBuffer(pspec
, n_units
=1)
76 pi
= self
.l0
.l0
.dports
[0]
81 self
.dcache
= DCache()
83 # function units (only one each)
84 self
.fus
= AllFunctionUnits(pspec
, pilist
=[pi
])
86 # register files (yes plural)
87 self
.regs
= RegFiles()
89 # instruction decoder - needs a Trap-capable Record (captures EINT etc.)
90 self
.e
= Decode2ToExecute1Type("core", opkls
=IssuerDecode2ToOperand
)
92 self
.state
= CoreState("core")
93 self
.raw_insn_i
= Signal(32) # raw instruction
94 self
.bigendian_i
= Signal() # bigendian
96 # issue/valid/busy signalling
97 self
.ivalid_i
= Signal(reset_less
=True) # instruction is valid
98 self
.issue_i
= Signal(reset_less
=True)
99 self
.busy_o
= Signal(name
="corebusy_o", reset_less
=True)
101 # start/stop and terminated signalling
102 self
.core_stopped_i
= Signal(reset_less
=True)
103 self
.core_terminate_o
= Signal(reset
=0) # indicates stopped
105 # create per-FU instruction decoders (subsetted)
109 for funame
, fu
in self
.fus
.fus
.items():
110 f_name
= fu
.fnunit
.name
111 fnunit
= fu
.fnunit
.value
112 opkls
= fu
.opsubsetkls
114 self
.trapunit
= funame
116 self
.decoders
[funame
] = PowerDecodeSubset(None, opkls
, f_name
,
119 self
.des
[funame
] = self
.decoders
[funame
].do
121 def elaborate(self
, platform
):
123 # for testing purposes, to cut down on build time in coriolis2
124 if hasattr(self
.pspec
, "nocore") and self
.pspec
.nocore
== True:
125 x
= Signal() # dummy signal
130 m
.submodules
.fus
= self
.fus
131 m
.submodules
.l0
= l0
= self
.l0
132 self
.regs
.elaborate_into(m
, platform
)
137 for k
, v
in self
.decoders
.items():
138 setattr(m
.submodules
, "dec_%s" % v
.fn_name
, v
)
139 comb
+= v
.dec
.raw_opcode_in
.eq(self
.raw_insn_i
)
140 comb
+= v
.dec
.bigendian
.eq(self
.bigendian_i
)
142 # ssh, cheat: trap uses the main decoder because of the rewriting
143 self
.des
[self
.trapunit
] = self
.e
.do
145 # connect up Function Units, then read/write ports
146 fu_bitdict
= self
.connect_instruction(m
)
147 self
.connect_rdports(m
, fu_bitdict
)
148 self
.connect_wrports(m
, fu_bitdict
)
152 def connect_instruction(self
, m
):
153 """connect_instruction
155 uses decoded (from PowerOp) function unit information from CSV files
156 to ascertain which Function Unit should deal with the current
159 some (such as OP_ATTN, OP_NOP) are dealt with here, including
160 ignoring it and halting the processor. OP_NOP is a bit annoying
161 because the issuer expects busy flag still to be raised then lowered.
162 (this requires a fake counter to be set).
164 comb
, sync
= m
.d
.comb
, m
.d
.sync
167 # enable-signals for each FU, get one bit for each FU (by name)
168 fu_enable
= Signal(len(fus
), reset_less
=True)
170 for i
, funame
in enumerate(fus
.keys()):
171 fu_bitdict
[funame
] = fu_enable
[i
]
173 # enable the required Function Unit based on the opcode decode
174 # note: this *only* works correctly for simple core when one and
175 # *only* one FU is allocated per instruction
176 for funame
, fu
in fus
.items():
177 fnunit
= fu
.fnunit
.value
178 enable
= Signal(name
="en_%s" % funame
, reset_less
=True)
179 comb
+= enable
.eq((self
.e
.do
.fn_unit
& fnunit
).bool())
180 comb
+= fu_bitdict
[funame
].eq(enable
)
182 # sigh - need a NOP counter
184 with m
.If(counter
!= 0):
185 sync
+= counter
.eq(counter
- 1)
186 comb
+= self
.busy_o
.eq(1)
188 with m
.If(self
.ivalid_i
): # run only when valid
189 with m
.Switch(self
.e
.do
.insn_type
):
190 # check for ATTN: halt if true
191 with m
.Case(MicrOp
.OP_ATTN
):
192 m
.d
.sync
+= self
.core_terminate_o
.eq(1)
194 with m
.Case(MicrOp
.OP_NOP
):
195 sync
+= counter
.eq(2)
196 comb
+= self
.busy_o
.eq(1)
199 # connect up instructions. only one enabled at a time
200 for funame
, fu
in fus
.items():
201 do
= self
.des
[funame
]
202 enable
= fu_bitdict
[funame
]
204 # run this FunctionUnit if enabled
205 # route op, issue, busy, read flags and mask to FU
207 # operand comes from the *local* decoder
208 comb
+= fu
.oper_i
.eq_from(do
)
209 #comb += fu.oper_i.eq_from_execute1(e)
210 comb
+= fu
.issue_i
.eq(self
.issue_i
)
211 comb
+= self
.busy_o
.eq(fu
.busy_o
)
212 # rdmask, which is for registers, needs to come
213 # from the *main* decoder
214 rdmask
= get_rdflags(self
.e
, fu
)
215 comb
+= fu
.rdmaskn
.eq(~rdmask
)
219 def connect_rdport(self
, m
, fu_bitdict
, rdpickers
, regfile
, regname
, fspec
):
220 comb
, sync
= m
.d
.comb
, m
.d
.sync
226 # select the required read port. these are pre-defined sizes
227 rfile
= regs
.rf
[regfile
.lower()]
228 rport
= rfile
.r_ports
[rpidx
]
229 print("read regfile", rpidx
, regfile
, regs
.rf
.keys(),
233 if not isinstance(fspecs
, list):
240 for i
, fspec
in enumerate(fspecs
):
241 # get the regfile specs for this regfile port
242 (rf
, read
, write
, wid
, fuspec
) = fspec
243 print ("fpsec", i
, fspec
, len(fuspec
))
244 ppoffs
.append(pplen
) # record offset for picker
246 name
= "rdflag_%s_%s_%d" % (regfile
, regname
, i
)
247 rdflag
= Signal(name
=name
, reset_less
=True)
248 comb
+= rdflag
.eq(rf
)
249 rdflags
.append(rdflag
)
252 print ("pplen", pplen
)
254 # create a priority picker to manage this port
255 rdpickers
[regfile
][rpidx
] = rdpick
= PriorityPicker(pplen
)
256 setattr(m
.submodules
, "rdpick_%s_%s" % (regfile
, rpidx
), rdpick
)
260 for i
, fspec
in enumerate(fspecs
):
261 (rf
, read
, write
, wid
, fuspec
) = fspec
262 # connect up the FU req/go signals, and the reg-read to the FU
263 # and create a Read Broadcast Bus
264 for pi
, (funame
, fu
, idx
) in enumerate(fuspec
):
267 # connect request-read to picker input, and output to go-rd
268 fu_active
= fu_bitdict
[funame
]
269 name
= "%s_%s_%s_%i" % (regfile
, rpidx
, funame
, pi
)
270 addr_en
= Signal
.like(reads
[i
], name
="addr_en_"+name
)
271 pick
= Signal(name
="pick_"+name
) # picker input
272 rp
= Signal(name
="rp_"+name
) # picker output
273 delay_pick
= Signal(name
="dp_"+name
) # read-enable "underway"
275 # exclude any currently-enabled read-request (mask out active)
276 comb
+= pick
.eq(fu
.rd_rel_o
[idx
] & fu_active
& rdflags
[i
] &
278 comb
+= rdpick
.i
[pi
].eq(pick
)
279 comb
+= fu
.go_rd_i
[idx
].eq(delay_pick
) # pass in *delayed* pick
281 # if picked, select read-port "reg select" number to port
282 comb
+= rp
.eq(rdpick
.o
[pi
] & rdpick
.en_o
)
283 sync
+= delay_pick
.eq(rp
) # delayed "pick"
284 comb
+= addr_en
.eq(Mux(rp
, reads
[i
], 0))
286 # the read-enable happens combinatorially (see mux-bus below)
287 # but it results in the data coming out on a one-cycle delay.
291 addrs
.append(addr_en
)
294 # use the *delayed* pick signal to put requested data onto bus
295 with m
.If(delay_pick
):
296 # connect regfile port to input, creating fan-out Bus
298 print("reg connect widths",
299 regfile
, regname
, pi
, funame
,
300 src
.shape(), rport
.data_o
.shape())
301 # all FUs connect to same port
302 comb
+= src
.eq(rport
.data_o
)
304 # or-reduce the muxed read signals
306 # for unary-addressed
307 comb
+= rport
.ren
.eq(ortreereduce_sig(rens
))
309 # for binary-addressed
310 comb
+= rport
.addr
.eq(ortreereduce_sig(addrs
))
311 comb
+= rport
.ren
.eq(Cat(*rens
).bool())
312 print ("binary", regfile
, rpidx
, rport
, rport
.ren
, rens
, addrs
)
314 def connect_rdports(self
, m
, fu_bitdict
):
315 """connect read ports
317 orders the read regspecs into a dict-of-dicts, by regfile, by
318 regport name, then connects all FUs that want that regport by
319 way of a PriorityPicker.
321 comb
, sync
= m
.d
.comb
, m
.d
.sync
325 # dictionary of lists of regfile read ports
326 byregfiles_rd
, byregfiles_rdspec
= self
.get_byregfiles(True)
328 # okaay, now we need a PriorityPicker per regfile per regfile port
329 # loootta pickers... peter piper picked a pack of pickled peppers...
331 for regfile
, spec
in byregfiles_rd
.items():
332 fuspecs
= byregfiles_rdspec
[regfile
]
333 rdpickers
[regfile
] = {}
335 # argh. an experiment to merge RA and RB in the INT regfile
336 # (we have too many read/write ports)
337 #if regfile == 'INT':
338 #fuspecs['rabc'] = [fuspecs.pop('rb')]
339 #fuspecs['rabc'].append(fuspecs.pop('rc'))
340 #fuspecs['rabc'].append(fuspecs.pop('ra'))
341 #if regfile == 'FAST':
342 # fuspecs['fast1'] = [fuspecs.pop('fast1')]
343 # if 'fast2' in fuspecs:
344 # fuspecs['fast1'].append(fuspecs.pop('fast2'))
346 # for each named regfile port, connect up all FUs to that port
347 for (regname
, fspec
) in sort_fuspecs(fuspecs
):
348 print("connect rd", regname
, fspec
)
349 self
.connect_rdport(m
, fu_bitdict
, rdpickers
, regfile
,
352 def connect_wrport(self
, m
, fu_bitdict
, wrpickers
, regfile
, regname
, fspec
):
353 comb
, sync
= m
.d
.comb
, m
.d
.sync
357 print("connect wr", regname
, fspec
)
360 # select the required write port. these are pre-defined sizes
361 print(regfile
, regs
.rf
.keys())
362 rfile
= regs
.rf
[regfile
.lower()]
363 wport
= rfile
.w_ports
[rpidx
]
366 if not isinstance(fspecs
, list):
372 for i
, fspec
in enumerate(fspecs
):
373 # get the regfile specs for this regfile port
374 (rf
, read
, write
, wid
, fuspec
) = fspec
375 print ("fpsec", i
, fspec
, len(fuspec
))
376 ppoffs
.append(pplen
) # record offset for picker
379 # create a priority picker to manage this port
380 wrpickers
[regfile
][rpidx
] = wrpick
= PriorityPicker(pplen
)
381 setattr(m
.submodules
, "wrpick_%s_%s" % (regfile
, rpidx
), wrpick
)
386 for i
, fspec
in enumerate(fspecs
):
387 # connect up the FU req/go signals and the reg-read to the FU
388 # these are arbitrated by Data.ok signals
389 (rf
, read
, write
, wid
, fuspec
) = fspec
390 for pi
, (funame
, fu
, idx
) in enumerate(fuspec
):
393 # write-request comes from dest.ok
394 dest
= fu
.get_out(idx
)
395 fu_dest_latch
= fu
.get_fu_out(idx
) # latched output
396 name
= "wrflag_%s_%s_%d" % (funame
, regname
, idx
)
397 wrflag
= Signal(name
=name
, reset_less
=True)
398 comb
+= wrflag
.eq(dest
.ok
& fu
.busy_o
)
400 # connect request-write to picker input, and output to go-wr
401 fu_active
= fu_bitdict
[funame
]
402 pick
= fu
.wr
.rel_o
[idx
] & fu_active
# & wrflag
403 comb
+= wrpick
.i
[pi
].eq(pick
)
404 # create a single-pulse go write from the picker output
406 comb
+= wr_pick
.eq(wrpick
.o
[pi
] & wrpick
.en_o
)
407 comb
+= fu
.go_wr_i
[idx
].eq(rising_edge(m
, wr_pick
))
409 # connect the regspec write "reg select" number to this port
410 # only if one FU actually requests (and is granted) the port
411 # will the write-enable be activated
412 addr_en
= Signal
.like(write
)
414 comb
+= wp
.eq(wr_pick
& wrpick
.en_o
)
415 comb
+= addr_en
.eq(Mux(wp
, write
, 0))
419 addrs
.append(addr_en
)
422 # connect regfile port to input
423 print("reg connect widths",
424 regfile
, regname
, pi
, funame
,
425 dest
.shape(), wport
.data_i
.shape())
426 wsigs
.append(fu_dest_latch
)
428 # here is where we create the Write Broadcast Bus. simple, eh?
429 comb
+= wport
.data_i
.eq(ortreereduce_sig(wsigs
))
431 # for unary-addressed
432 comb
+= wport
.wen
.eq(ortreereduce_sig(wens
))
434 # for binary-addressed
435 comb
+= wport
.addr
.eq(ortreereduce_sig(addrs
))
436 comb
+= wport
.wen
.eq(ortreereduce_sig(wens
))
438 def connect_wrports(self
, m
, fu_bitdict
):
439 """connect write ports
441 orders the write regspecs into a dict-of-dicts, by regfile,
442 by regport name, then connects all FUs that want that regport
443 by way of a PriorityPicker.
445 note that the write-port wen, write-port data, and go_wr_i all need to
446 be on the exact same clock cycle. as there is a combinatorial loop bug
447 at the moment, these all use sync.
449 comb
, sync
= m
.d
.comb
, m
.d
.sync
452 # dictionary of lists of regfile write ports
453 byregfiles_wr
, byregfiles_wrspec
= self
.get_byregfiles(False)
455 # same for write ports.
456 # BLECH! complex code-duplication! BLECH!
458 for regfile
, spec
in byregfiles_wr
.items():
459 fuspecs
= byregfiles_wrspec
[regfile
]
460 wrpickers
[regfile
] = {}
462 # argh, more port-merging
464 fuspecs
['o'] = [fuspecs
.pop('o')]
465 fuspecs
['o'].append(fuspecs
.pop('o1'))
466 if regfile
== 'FAST':
467 fuspecs
['fast1'] = [fuspecs
.pop('fast1')]
468 if 'fast2' in fuspecs
:
469 fuspecs
['fast1'].append(fuspecs
.pop('fast2'))
471 for (regname
, fspec
) in sort_fuspecs(fuspecs
):
472 self
.connect_wrport(m
, fu_bitdict
, wrpickers
,
473 regfile
, regname
, fspec
)
475 def get_byregfiles(self
, readmode
):
477 mode
= "read" if readmode
else "write"
480 e
= self
.e
# decoded instruction to execute
482 # dictionary of lists of regfile ports
485 for (funame
, fu
) in fus
.items():
486 print("%s ports for %s" % (mode
, funame
))
487 for idx
in range(fu
.n_src
if readmode
else fu
.n_dst
):
489 (regfile
, regname
, wid
) = fu
.get_in_spec(idx
)
491 (regfile
, regname
, wid
) = fu
.get_out_spec(idx
)
492 print(" %d %s %s %s" % (idx
, regfile
, regname
, str(wid
)))
494 rdflag
, read
= regspec_decode_read(e
, regfile
, regname
)
497 rdflag
, read
= None, None
498 wrport
, write
= regspec_decode_write(e
, regfile
, regname
)
499 if regfile
not in byregfiles
:
500 byregfiles
[regfile
] = {}
501 byregfiles_spec
[regfile
] = {}
502 if regname
not in byregfiles_spec
[regfile
]:
503 byregfiles_spec
[regfile
][regname
] = \
504 (rdflag
, read
, write
, wid
, [])
505 # here we start to create "lanes"
506 if idx
not in byregfiles
[regfile
]:
507 byregfiles
[regfile
][idx
] = []
508 fuspec
= (funame
, fu
, idx
)
509 byregfiles
[regfile
][idx
].append(fuspec
)
510 byregfiles_spec
[regfile
][regname
][4].append(fuspec
)
512 # ok just print that out, for convenience
513 for regfile
, spec
in byregfiles
.items():
514 print("regfile %s ports:" % mode
, regfile
)
515 fuspecs
= byregfiles_spec
[regfile
]
516 for regname
, fspec
in fuspecs
.items():
517 [rdflag
, read
, write
, wid
, fuspec
] = fspec
518 print(" rf %s port %s lane: %s" % (mode
, regfile
, regname
))
519 print(" %s" % regname
, wid
, read
, write
, rdflag
)
520 for (funame
, fu
, idx
) in fuspec
:
521 fusig
= fu
.src_i
[idx
] if readmode
else fu
.dest
[idx
]
522 print(" ", funame
, fu
, idx
, fusig
)
525 return byregfiles
, byregfiles_spec
528 yield from self
.fus
.ports()
529 yield from self
.e
.ports()
530 yield from self
.l0
.ports()
537 if __name__
== '__main__':
538 pspec
= TestMemPspec(ldst_ifacetype
='testpi',
543 dut
= NonProductionCore(pspec
)
544 vl
= rtlil
.convert(dut
, ports
=dut
.ports())
545 with
open("test_core.il", "w") as f
: