3 not in any way intended for production use. connects up FunctionUnits to
4 Register Files in a brain-dead fashion that only permits one and only one
5 Function Unit to be operational.
7 the principle here is to take the Function Units, analyse their regspecs,
8 and turn their requirements for access to register file read/write ports
9 into groupings by Register File and Register File Port name.
11 under each grouping - by regfile/port - a list of Function Units that
12 need to connect to that port is created. as these are a contended
13 resource a "Broadcast Bus" per read/write port is then also created,
14 with access to it managed by a PriorityPicker.
16 the brain-dead part of this module is that even though there is no
17 conflict of access, regfile read/write hazards are *not* analysed,
18 and consequently it is safer to wait for the Function Unit to complete
19 before allowing a new instruction to proceed.
22 from nmigen
import Elaboratable
, Module
, Signal
, ResetSignal
, Cat
, Mux
23 from nmigen
.cli
import rtlil
25 from soc
.decoder
.power_regspec_map
import regspec_decode_read
26 from soc
.decoder
.power_regspec_map
import regspec_decode_write
28 from nmutil
.picker
import PriorityPicker
29 from nmutil
.util
import treereduce
31 from soc
.fu
.compunits
.compunits
import AllFunctionUnits
32 from soc
.regfile
.regfiles
import RegFiles
33 from soc
.decoder
.decode2execute1
import Decode2ToExecute1Type
34 from soc
.decoder
.power_decoder2
import get_rdflags
35 from soc
.decoder
.decode2execute1
import Data
36 from soc
.experiment
.l0_cache
import TstL0CacheBuffer
# test only
37 from soc
.config
.test
.test_loadstore
import TestMemPspec
38 from soc
.decoder
.power_enums
import MicrOp
41 from nmutil
.util
import rising_edge
44 # helper function for reducing a list of signals down to a parallel
46 def ortreereduce(tree
, attr
="data_o"):
47 return treereduce(tree
, operator
.or_
, lambda x
: getattr(x
, attr
))
50 def ortreereduce_sig(tree
):
51 return treereduce(tree
, operator
.or_
, lambda x
: x
)
54 # helper function to place full regs declarations first
55 def sort_fuspecs(fuspecs
):
57 for (regname
, fspec
) in fuspecs
.items():
58 if regname
.startswith("full"):
59 res
.append((regname
, fspec
))
60 for (regname
, fspec
) in fuspecs
.items():
61 if not regname
.startswith("full"):
62 res
.append((regname
, fspec
))
63 return res
# enumerate(res)
66 class NonProductionCore(Elaboratable
):
67 def __init__(self
, pspec
):
69 # single LD/ST funnel for memory access
70 self
.l0
= TstL0CacheBuffer(pspec
, n_units
=1)
71 pi
= self
.l0
.l0
.dports
[0]
73 # function units (only one each)
74 self
.fus
= AllFunctionUnits(pspec
, pilist
=[pi
])
76 # register files (yes plural)
77 self
.regs
= RegFiles()
80 self
.e
= Decode2ToExecute1Type() # decoded instruction
82 # issue/valid/busy signalling
83 self
.ivalid_i
= Signal(reset_less
=True) # instruction is valid
84 self
.issue_i
= Signal(reset_less
=True)
85 self
.busy_o
= Signal(name
="corebusy_o", reset_less
=True)
87 # start/stop and terminated signalling
88 self
.core_stopped_i
= Signal(reset_less
=True)
89 self
.core_reset_i
= Signal()
90 self
.core_terminate_o
= Signal(reset
=0) # indicates stopped
92 def elaborate(self
, platform
):
95 m
.submodules
.fus
= self
.fus
96 m
.submodules
.l0
= l0
= self
.l0
97 self
.regs
.elaborate_into(m
, platform
)
101 # connect up Function Units, then read/write ports
102 fu_bitdict
= self
.connect_instruction(m
)
103 self
.connect_rdports(m
, fu_bitdict
)
104 self
.connect_wrports(m
, fu_bitdict
)
107 m
.d
.comb
+= ResetSignal().eq(self
.core_reset_i
)
111 def connect_instruction(self
, m
):
112 """connect_instruction
114 uses decoded (from PowerOp) function unit information from CSV files
115 to ascertain which Function Unit should deal with the current
118 some (such as OP_ATTN, OP_NOP) are dealt with here, including
119 ignoring it and halting the processor. OP_NOP is a bit annoying
120 because the issuer expects busy flag still to be raised then lowered.
121 (this requires a fake counter to be set).
123 comb
, sync
= m
.d
.comb
, m
.d
.sync
125 e
= self
.e
# to execute
127 # enable-signals for each FU, get one bit for each FU (by name)
128 fu_enable
= Signal(len(fus
), reset_less
=True)
130 for i
, funame
in enumerate(fus
.keys()):
131 fu_bitdict
[funame
] = fu_enable
[i
]
133 # enable the required Function Unit based on the opcode decode
134 # note: this *only* works correctly for simple core when one and
135 # *only* one FU is allocated per instruction
136 for funame
, fu
in fus
.items():
137 fnunit
= fu
.fnunit
.value
138 enable
= Signal(name
="en_%s" % funame
, reset_less
=True)
139 comb
+= enable
.eq((e
.do
.fn_unit
& fnunit
).bool())
140 comb
+= fu_bitdict
[funame
].eq(enable
)
142 # sigh - need a NOP counter
144 with m
.If(counter
!= 0):
145 sync
+= counter
.eq(counter
- 1)
146 comb
+= self
.busy_o
.eq(1)
148 with m
.If(self
.ivalid_i
): # run only when valid
149 with m
.Switch(e
.do
.insn_type
):
150 # check for ATTN: halt if true
151 with m
.Case(MicrOp
.OP_ATTN
):
152 m
.d
.sync
+= self
.core_terminate_o
.eq(1)
154 with m
.Case(MicrOp
.OP_NOP
):
155 sync
+= counter
.eq(2)
156 comb
+= self
.busy_o
.eq(1)
159 # connect up instructions. only one enabled at a time
160 for funame
, fu
in fus
.items():
161 enable
= fu_bitdict
[funame
]
163 # run this FunctionUnit if enabled
165 # route op, issue, busy, read flags and mask to FU
166 comb
+= fu
.oper_i
.eq_from_execute1(e
)
167 comb
+= fu
.issue_i
.eq(self
.issue_i
)
168 comb
+= self
.busy_o
.eq(fu
.busy_o
)
169 rdmask
= get_rdflags(e
, fu
)
170 comb
+= fu
.rdmaskn
.eq(~rdmask
)
174 def connect_rdport(self
, m
, fu_bitdict
, rdpickers
, regfile
, regname
, fspec
):
175 comb
, sync
= m
.d
.comb
, m
.d
.sync
181 # select the required read port. these are pre-defined sizes
182 rfile
= regs
.rf
[regfile
.lower()]
183 rport
= rfile
.r_ports
[rpidx
]
184 print("read regfile", rpidx
, regfile
, regs
.rf
.keys(),
188 if not isinstance(fspecs
, list):
195 for i
, fspec
in enumerate(fspecs
):
196 # get the regfile specs for this regfile port
197 (rf
, read
, write
, wid
, fuspec
) = fspec
198 print ("fpsec", i
, fspec
, len(fuspec
))
199 ppoffs
.append(pplen
) # record offset for picker
201 name
= "rdflag_%s_%s_%d" % (regfile
, regname
, i
)
202 rdflag
= Signal(name
=name
, reset_less
=True)
203 comb
+= rdflag
.eq(rf
)
204 rdflags
.append(rdflag
)
207 print ("pplen", pplen
)
209 # create a priority picker to manage this port
210 rdpickers
[regfile
][rpidx
] = rdpick
= PriorityPicker(pplen
)
211 setattr(m
.submodules
, "rdpick_%s_%s" % (regfile
, rpidx
), rdpick
)
215 for i
, fspec
in enumerate(fspecs
):
216 (rf
, read
, write
, wid
, fuspec
) = fspec
217 # connect up the FU req/go signals, and the reg-read to the FU
218 # and create a Read Broadcast Bus
219 for pi
, (funame
, fu
, idx
) in enumerate(fuspec
):
222 # connect request-read to picker input, and output to go-rd
223 fu_active
= fu_bitdict
[funame
]
224 name
= "%s_%s_%s_%i" % (regfile
, rpidx
, funame
, pi
)
225 addr_en
= Signal
.like(reads
[i
], name
="addr_en_"+name
)
226 pick
= Signal(name
="pick_"+name
) # picker input
227 rp
= Signal(name
="rp_"+name
) # picker output
228 delay_pick
= Signal(name
="dp_"+name
) # read-enable "underway"
230 # exclude any currently-enabled read-request (mask out active)
231 comb
+= pick
.eq(fu
.rd_rel_o
[idx
] & fu_active
& rdflags
[i
] &
233 comb
+= rdpick
.i
[pi
].eq(pick
)
234 comb
+= fu
.go_rd_i
[idx
].eq(delay_pick
) # pass in *delayed* pick
236 # if picked, select read-port "reg select" number to port
237 comb
+= rp
.eq(rdpick
.o
[pi
] & rdpick
.en_o
)
238 sync
+= delay_pick
.eq(rp
) # delayed "pick"
239 comb
+= addr_en
.eq(Mux(rp
, reads
[i
], 0))
241 # the read-enable happens combinatorially (see mux-bus below)
242 # but it results in the data coming out on a one-cycle delay.
246 addrs
.append(addr_en
)
249 # use the *delayed* pick signal to put requested data onto bus
250 with m
.If(delay_pick
):
251 # connect regfile port to input, creating fan-out Bus
253 print("reg connect widths",
254 regfile
, regname
, pi
, funame
,
255 src
.shape(), rport
.data_o
.shape())
256 # all FUs connect to same port
257 comb
+= src
.eq(rport
.data_o
)
259 # or-reduce the muxed read signals
261 # for unary-addressed
262 comb
+= rport
.ren
.eq(ortreereduce_sig(rens
))
264 # for binary-addressed
265 comb
+= rport
.addr
.eq(ortreereduce_sig(addrs
))
266 comb
+= rport
.ren
.eq(Cat(*rens
).bool())
267 print ("binary", regfile
, rpidx
, rport
, rport
.ren
, rens
, addrs
)
269 def connect_rdports(self
, m
, fu_bitdict
):
270 """connect read ports
272 orders the read regspecs into a dict-of-dicts, by regfile, by
273 regport name, then connects all FUs that want that regport by
274 way of a PriorityPicker.
276 comb
, sync
= m
.d
.comb
, m
.d
.sync
280 # dictionary of lists of regfile read ports
281 byregfiles_rd
, byregfiles_rdspec
= self
.get_byregfiles(True)
283 # okaay, now we need a PriorityPicker per regfile per regfile port
284 # loootta pickers... peter piper picked a pack of pickled peppers...
286 for regfile
, spec
in byregfiles_rd
.items():
287 fuspecs
= byregfiles_rdspec
[regfile
]
288 rdpickers
[regfile
] = {}
290 # argh. an experiment to merge RA and RB in the INT regfile
291 # (we have too many read/write ports)
292 #if regfile == 'INT':
293 #fuspecs['rabc'] = [fuspecs.pop('rb')]
294 #fuspecs['rabc'].append(fuspecs.pop('rc'))
295 #fuspecs['rabc'].append(fuspecs.pop('ra'))
296 #if regfile == 'FAST':
297 # fuspecs['fast1'] = [fuspecs.pop('fast1')]
298 # if 'fast2' in fuspecs:
299 # fuspecs['fast1'].append(fuspecs.pop('fast2'))
301 # for each named regfile port, connect up all FUs to that port
302 for (regname
, fspec
) in sort_fuspecs(fuspecs
):
303 print("connect rd", regname
, fspec
)
304 self
.connect_rdport(m
, fu_bitdict
, rdpickers
, regfile
,
307 def connect_wrport(self
, m
, fu_bitdict
, wrpickers
, regfile
, regname
, fspec
):
308 comb
, sync
= m
.d
.comb
, m
.d
.sync
312 print("connect wr", regname
, fspec
)
315 # select the required write port. these are pre-defined sizes
316 print(regfile
, regs
.rf
.keys())
317 rfile
= regs
.rf
[regfile
.lower()]
318 wport
= rfile
.w_ports
[rpidx
]
321 if not isinstance(fspecs
, list):
327 for i
, fspec
in enumerate(fspecs
):
328 # get the regfile specs for this regfile port
329 (rf
, read
, write
, wid
, fuspec
) = fspec
330 print ("fpsec", i
, fspec
, len(fuspec
))
331 ppoffs
.append(pplen
) # record offset for picker
334 # create a priority picker to manage this port
335 wrpickers
[regfile
][rpidx
] = wrpick
= PriorityPicker(pplen
)
336 setattr(m
.submodules
, "wrpick_%s_%s" % (regfile
, rpidx
), wrpick
)
341 for i
, fspec
in enumerate(fspecs
):
342 # connect up the FU req/go signals and the reg-read to the FU
343 # these are arbitrated by Data.ok signals
344 (rf
, read
, write
, wid
, fuspec
) = fspec
345 for pi
, (funame
, fu
, idx
) in enumerate(fuspec
):
348 # write-request comes from dest.ok
349 dest
= fu
.get_out(idx
)
350 fu_dest_latch
= fu
.get_fu_out(idx
) # latched output
351 name
= "wrflag_%s_%s_%d" % (funame
, regname
, idx
)
352 wrflag
= Signal(name
=name
, reset_less
=True)
353 comb
+= wrflag
.eq(dest
.ok
& fu
.busy_o
)
355 # connect request-write to picker input, and output to go-wr
356 fu_active
= fu_bitdict
[funame
]
357 pick
= fu
.wr
.rel_o
[idx
] & fu_active
# & wrflag
358 comb
+= wrpick
.i
[pi
].eq(pick
)
359 # create a single-pulse go write from the picker output
361 comb
+= wr_pick
.eq(wrpick
.o
[pi
] & wrpick
.en_o
)
362 comb
+= fu
.go_wr_i
[idx
].eq(rising_edge(m
, wr_pick
))
364 # connect the regspec write "reg select" number to this port
365 # only if one FU actually requests (and is granted) the port
366 # will the write-enable be activated
367 addr_en
= Signal
.like(write
)
369 comb
+= wp
.eq(wr_pick
& wrpick
.en_o
)
370 comb
+= addr_en
.eq(Mux(wp
, write
, 0))
374 addrs
.append(addr_en
)
377 # connect regfile port to input
378 print("reg connect widths",
379 regfile
, regname
, pi
, funame
,
380 dest
.shape(), wport
.data_i
.shape())
381 wsigs
.append(fu_dest_latch
)
383 # here is where we create the Write Broadcast Bus. simple, eh?
384 comb
+= wport
.data_i
.eq(ortreereduce_sig(wsigs
))
386 # for unary-addressed
387 comb
+= wport
.wen
.eq(ortreereduce_sig(wens
))
389 # for binary-addressed
390 comb
+= wport
.addr
.eq(ortreereduce_sig(addrs
))
391 comb
+= wport
.wen
.eq(ortreereduce_sig(wens
))
393 def connect_wrports(self
, m
, fu_bitdict
):
394 """connect write ports
396 orders the write regspecs into a dict-of-dicts, by regfile,
397 by regport name, then connects all FUs that want that regport
398 by way of a PriorityPicker.
400 note that the write-port wen, write-port data, and go_wr_i all need to
401 be on the exact same clock cycle. as there is a combinatorial loop bug
402 at the moment, these all use sync.
404 comb
, sync
= m
.d
.comb
, m
.d
.sync
407 # dictionary of lists of regfile write ports
408 byregfiles_wr
, byregfiles_wrspec
= self
.get_byregfiles(False)
410 # same for write ports.
411 # BLECH! complex code-duplication! BLECH!
413 for regfile
, spec
in byregfiles_wr
.items():
414 fuspecs
= byregfiles_wrspec
[regfile
]
415 wrpickers
[regfile
] = {}
417 # argh, more port-merging
419 fuspecs
['o'] = [fuspecs
.pop('o')]
420 fuspecs
['o'].append(fuspecs
.pop('o1'))
421 if regfile
== 'FAST':
422 fuspecs
['fast1'] = [fuspecs
.pop('fast1')]
423 if 'fast2' in fuspecs
:
424 fuspecs
['fast1'].append(fuspecs
.pop('fast2'))
426 for (regname
, fspec
) in sort_fuspecs(fuspecs
):
427 self
.connect_wrport(m
, fu_bitdict
, wrpickers
,
428 regfile
, regname
, fspec
)
430 def get_byregfiles(self
, readmode
):
432 mode
= "read" if readmode
else "write"
435 e
= self
.e
# decoded instruction to execute
437 # dictionary of lists of regfile ports
440 for (funame
, fu
) in fus
.items():
441 print("%s ports for %s" % (mode
, funame
))
442 for idx
in range(fu
.n_src
if readmode
else fu
.n_dst
):
444 (regfile
, regname
, wid
) = fu
.get_in_spec(idx
)
446 (regfile
, regname
, wid
) = fu
.get_out_spec(idx
)
447 print(" %d %s %s %s" % (idx
, regfile
, regname
, str(wid
)))
449 rdflag
, read
= regspec_decode_read(e
, regfile
, regname
)
452 rdflag
, read
= None, None
453 wrport
, write
= regspec_decode_write(e
, regfile
, regname
)
454 if regfile
not in byregfiles
:
455 byregfiles
[regfile
] = {}
456 byregfiles_spec
[regfile
] = {}
457 if regname
not in byregfiles_spec
[regfile
]:
458 byregfiles_spec
[regfile
][regname
] = \
459 (rdflag
, read
, write
, wid
, [])
460 # here we start to create "lanes"
461 if idx
not in byregfiles
[regfile
]:
462 byregfiles
[regfile
][idx
] = []
463 fuspec
= (funame
, fu
, idx
)
464 byregfiles
[regfile
][idx
].append(fuspec
)
465 byregfiles_spec
[regfile
][regname
][4].append(fuspec
)
467 # ok just print that out, for convenience
468 for regfile
, spec
in byregfiles
.items():
469 print("regfile %s ports:" % mode
, regfile
)
470 fuspecs
= byregfiles_spec
[regfile
]
471 for regname
, fspec
in fuspecs
.items():
472 [rdflag
, read
, write
, wid
, fuspec
] = fspec
473 print(" rf %s port %s lane: %s" % (mode
, regfile
, regname
))
474 print(" %s" % regname
, wid
, read
, write
, rdflag
)
475 for (funame
, fu
, idx
) in fuspec
:
476 fusig
= fu
.src_i
[idx
] if readmode
else fu
.dest
[idx
]
477 print(" ", funame
, fu
, idx
, fusig
)
480 return byregfiles
, byregfiles_spec
483 yield from self
.fus
.ports()
484 yield from self
.e
.ports()
485 yield from self
.l0
.ports()
492 if __name__
== '__main__':
493 pspec
= TestMemPspec(ldst_ifacetype
='testpi',
498 dut
= NonProductionCore(pspec
)
499 vl
= rtlil
.convert(dut
, ports
=dut
.ports())
500 with
open("test_core.il", "w") as f
: