3 not in any way intended for production use. connects up FunctionUnits to
4 Register Files in a brain-dead fashion that only permits one and only one
5 Function Unit to be operational.
7 the principle here is to take the Function Units, analyse their regspecs,
8 and turn their requirements for access to register file read/write ports
9 into groupings by Register File and Register File Port name.
11 under each grouping - by regfile/port - a list of Function Units that
12 need to connect to that port is created. as these are a contended
13 resource a "Broadcast Bus" per read/write port is then also created,
14 with access to it managed by a PriorityPicker.
16 the brain-dead part of this module is that even though there is no
17 conflict of access, regfile read/write hazards are *not* analysed,
18 and consequently it is safer to wait for the Function Unit to complete
19 before allowing a new instruction to proceed.
22 from nmigen
import Elaboratable
, Module
, Signal
, ResetSignal
, Cat
, Mux
23 from nmigen
.cli
import rtlil
25 from soc
.decoder
.power_regspec_map
import regspec_decode_read
26 from soc
.decoder
.power_regspec_map
import regspec_decode_write
28 from nmutil
.picker
import PriorityPicker
29 from nmutil
.util
import treereduce
31 from soc
.fu
.compunits
.compunits
import AllFunctionUnits
32 from soc
.regfile
.regfiles
import RegFiles
33 from soc
.decoder
.decode2execute1
import Decode2ToExecute1Type
34 from soc
.decoder
.power_decoder2
import get_rdflags
35 from soc
.decoder
.decode2execute1
import Data
36 from soc
.experiment
.l0_cache
import TstL0CacheBuffer
# test only
37 from soc
.config
.test
.test_loadstore
import TestMemPspec
38 from soc
.decoder
.power_enums
import MicrOp
41 from nmutil
.util
import rising_edge
44 # helper function for reducing a list of signals down to a parallel
46 def ortreereduce(tree
, attr
="data_o"):
47 return treereduce(tree
, operator
.or_
, lambda x
: getattr(x
, attr
))
50 def ortreereduce_sig(tree
):
51 return treereduce(tree
, operator
.or_
, lambda x
: x
)
54 # helper function to place full regs declarations first
55 def sort_fuspecs(fuspecs
):
57 for (regname
, fspec
) in fuspecs
.items():
58 if regname
.startswith("full"):
59 res
.append((regname
, fspec
))
60 for (regname
, fspec
) in fuspecs
.items():
61 if not regname
.startswith("full"):
62 res
.append((regname
, fspec
))
63 return res
# enumerate(res)
66 class NonProductionCore(Elaboratable
):
67 def __init__(self
, pspec
):
68 # single LD/ST funnel for memory access
69 self
.l0
= TstL0CacheBuffer(pspec
, n_units
=1)
70 pi
= self
.l0
.l0
.dports
[0]
72 # function units (only one each)
73 self
.fus
= AllFunctionUnits(pspec
, pilist
=[pi
])
75 # register files (yes plural)
76 self
.regs
= RegFiles()
79 self
.e
= Decode2ToExecute1Type() # decoded instruction
81 # issue/valid/busy signalling
82 self
.ivalid_i
= Signal(reset_less
=True) # instruction is valid
83 self
.issue_i
= Signal(reset_less
=True)
84 self
.busy_o
= Signal(name
="corebusy_o", reset_less
=True)
86 # start/stop and terminated signalling
87 self
.core_stopped_i
= Signal(reset_less
=True)
88 self
.core_reset_i
= Signal()
89 self
.core_terminate_o
= Signal(reset
=0) # indicates stopped
91 def elaborate(self
, platform
):
94 m
.submodules
.fus
= self
.fus
95 m
.submodules
.l0
= l0
= self
.l0
96 self
.regs
.elaborate_into(m
, platform
)
100 # connect up Function Units, then read/write ports
101 fu_bitdict
= self
.connect_instruction(m
)
102 self
.connect_rdports(m
, fu_bitdict
)
103 self
.connect_wrports(m
, fu_bitdict
)
106 m
.d
.comb
+= ResetSignal().eq(self
.core_reset_i
)
110 def connect_instruction(self
, m
):
111 """connect_instruction
113 uses decoded (from PowerOp) function unit information from CSV files
114 to ascertain which Function Unit should deal with the current
117 some (such as OP_ATTN, OP_NOP) are dealt with here, including
118 ignoring it and halting the processor. OP_NOP is a bit annoying
119 because the issuer expects busy flag still to be raised then lowered.
120 (this requires a fake counter to be set).
122 comb
, sync
= m
.d
.comb
, m
.d
.sync
124 e
= self
.e
# to execute
126 # enable-signals for each FU, get one bit for each FU (by name)
127 fu_enable
= Signal(len(fus
), reset_less
=True)
129 for i
, funame
in enumerate(fus
.keys()):
130 fu_bitdict
[funame
] = fu_enable
[i
]
132 # enable the required Function Unit based on the opcode decode
133 # note: this *only* works correctly for simple core when one and
134 # *only* one FU is allocated per instruction
135 for funame
, fu
in fus
.items():
136 fnunit
= fu
.fnunit
.value
137 enable
= Signal(name
="en_%s" % funame
, reset_less
=True)
138 comb
+= enable
.eq((e
.do
.fn_unit
& fnunit
).bool())
139 comb
+= fu_bitdict
[funame
].eq(enable
)
141 # sigh - need a NOP counter
143 with m
.If(counter
!= 0):
144 sync
+= counter
.eq(counter
- 1)
145 comb
+= self
.busy_o
.eq(1)
147 with m
.If(self
.ivalid_i
): # run only when valid
148 with m
.Switch(e
.do
.insn_type
):
149 # check for ATTN: halt if true
150 with m
.Case(MicrOp
.OP_ATTN
):
151 m
.d
.sync
+= self
.core_terminate_o
.eq(1)
153 with m
.Case(MicrOp
.OP_NOP
):
154 sync
+= counter
.eq(2)
155 comb
+= self
.busy_o
.eq(1)
158 # connect up instructions. only one enabled at a time
159 for funame
, fu
in fus
.items():
160 enable
= fu_bitdict
[funame
]
162 # run this FunctionUnit if enabled
164 # route op, issue, busy, read flags and mask to FU
165 comb
+= fu
.oper_i
.eq_from_execute1(e
)
166 comb
+= fu
.issue_i
.eq(self
.issue_i
)
167 comb
+= self
.busy_o
.eq(fu
.busy_o
)
168 rdmask
= get_rdflags(e
, fu
)
169 comb
+= fu
.rdmaskn
.eq(~rdmask
)
173 def connect_rdport(self
, m
, fu_bitdict
, rdpickers
, regfile
, regname
, fspec
):
174 comb
, sync
= m
.d
.comb
, m
.d
.sync
180 # select the required read port. these are pre-defined sizes
181 rfile
= regs
.rf
[regfile
.lower()]
182 rport
= rfile
.r_ports
[rpidx
]
183 print("read regfile", rpidx
, regfile
, regs
.rf
.keys(),
187 if not isinstance(fspecs
, list):
194 for i
, fspec
in enumerate(fspecs
):
195 # get the regfile specs for this regfile port
196 (rf
, read
, write
, wid
, fuspec
) = fspec
197 print ("fpsec", i
, fspec
, len(fuspec
))
198 ppoffs
.append(pplen
) # record offset for picker
200 name
= "rdflag_%s_%s_%d" % (regfile
, regname
, i
)
201 rdflag
= Signal(name
=name
, reset_less
=True)
202 comb
+= rdflag
.eq(rf
)
203 rdflags
.append(rdflag
)
206 print ("pplen", pplen
)
208 # create a priority picker to manage this port
209 rdpickers
[regfile
][rpidx
] = rdpick
= PriorityPicker(pplen
)
210 setattr(m
.submodules
, "rdpick_%s_%s" % (regfile
, rpidx
), rdpick
)
214 for i
, fspec
in enumerate(fspecs
):
215 (rf
, read
, write
, wid
, fuspec
) = fspec
216 # connect up the FU req/go signals, and the reg-read to the FU
217 # and create a Read Broadcast Bus
218 for pi
, (funame
, fu
, idx
) in enumerate(fuspec
):
221 # connect request-read to picker input, and output to go-rd
222 fu_active
= fu_bitdict
[funame
]
223 name
= "%s_%s_%s_%i" % (regfile
, rpidx
, funame
, pi
)
224 addr_en
= Signal
.like(reads
[i
], name
="addr_en_"+name
)
225 pick
= Signal(name
="pick_"+name
) # picker input
226 rp
= Signal(name
="rp_"+name
) # picker output
227 delay_pick
= Signal(name
="dp_"+name
) # read-enable "underway"
229 # exclude any currently-enabled read-request (mask out active)
230 comb
+= pick
.eq(fu
.rd_rel_o
[idx
] & fu_active
& rdflags
[i
] &
232 comb
+= rdpick
.i
[pi
].eq(pick
)
233 comb
+= fu
.go_rd_i
[idx
].eq(delay_pick
) # pass in *delayed* pick
235 # if picked, select read-port "reg select" number to port
236 comb
+= rp
.eq(rdpick
.o
[pi
] & rdpick
.en_o
)
237 sync
+= delay_pick
.eq(rp
) # delayed "pick"
238 comb
+= addr_en
.eq(Mux(rp
, reads
[i
], 0))
240 # the read-enable happens combinatorially (see mux-bus below)
241 # but it results in the data coming out on a one-cycle delay.
245 addrs
.append(addr_en
)
248 # use the *delayed* pick signal to put requested data onto bus
249 with m
.If(delay_pick
):
250 # connect regfile port to input, creating fan-out Bus
252 print("reg connect widths",
253 regfile
, regname
, pi
, funame
,
254 src
.shape(), rport
.data_o
.shape())
255 # all FUs connect to same port
256 comb
+= src
.eq(rport
.data_o
)
258 # or-reduce the muxed read signals
260 # for unary-addressed
261 comb
+= rport
.ren
.eq(ortreereduce_sig(rens
))
263 # for binary-addressed
264 comb
+= rport
.addr
.eq(ortreereduce_sig(addrs
))
265 comb
+= rport
.ren
.eq(Cat(*rens
).bool())
266 print ("binary", regfile
, rpidx
, rport
, rport
.ren
, rens
, addrs
)
268 def connect_rdports(self
, m
, fu_bitdict
):
269 """connect read ports
271 orders the read regspecs into a dict-of-dicts, by regfile, by
272 regport name, then connects all FUs that want that regport by
273 way of a PriorityPicker.
275 comb
, sync
= m
.d
.comb
, m
.d
.sync
279 # dictionary of lists of regfile read ports
280 byregfiles_rd
, byregfiles_rdspec
= self
.get_byregfiles(True)
282 # okaay, now we need a PriorityPicker per regfile per regfile port
283 # loootta pickers... peter piper picked a pack of pickled peppers...
285 for regfile
, spec
in byregfiles_rd
.items():
286 fuspecs
= byregfiles_rdspec
[regfile
]
287 rdpickers
[regfile
] = {}
289 # argh. an experiment to merge RA and RB in the INT regfile
290 # (we have too many read/write ports)
291 #if regfile == 'INT':
292 #fuspecs['rabc'] = [fuspecs.pop('rb')]
293 #fuspecs['rabc'].append(fuspecs.pop('rc'))
294 #fuspecs['rabc'].append(fuspecs.pop('ra'))
295 #if regfile == 'FAST':
296 # fuspecs['fast1'] = [fuspecs.pop('fast1')]
297 # if 'fast2' in fuspecs:
298 # fuspecs['fast1'].append(fuspecs.pop('fast2'))
300 # for each named regfile port, connect up all FUs to that port
301 for (regname
, fspec
) in sort_fuspecs(fuspecs
):
302 print("connect rd", regname
, fspec
)
303 self
.connect_rdport(m
, fu_bitdict
, rdpickers
, regfile
,
306 def connect_wrport(self
, m
, fu_bitdict
, wrpickers
, regfile
, regname
, fspec
):
307 comb
, sync
= m
.d
.comb
, m
.d
.sync
311 print("connect wr", regname
, fspec
)
314 # select the required write port. these are pre-defined sizes
315 print(regfile
, regs
.rf
.keys())
316 rfile
= regs
.rf
[regfile
.lower()]
317 wport
= rfile
.w_ports
[rpidx
]
320 if not isinstance(fspecs
, list):
326 for i
, fspec
in enumerate(fspecs
):
327 # get the regfile specs for this regfile port
328 (rf
, read
, write
, wid
, fuspec
) = fspec
329 print ("fpsec", i
, fspec
, len(fuspec
))
330 ppoffs
.append(pplen
) # record offset for picker
333 # create a priority picker to manage this port
334 wrpickers
[regfile
][rpidx
] = wrpick
= PriorityPicker(pplen
)
335 setattr(m
.submodules
, "wrpick_%s_%s" % (regfile
, rpidx
), wrpick
)
340 for i
, fspec
in enumerate(fspecs
):
341 # connect up the FU req/go signals and the reg-read to the FU
342 # these are arbitrated by Data.ok signals
343 (rf
, read
, write
, wid
, fuspec
) = fspec
344 for pi
, (funame
, fu
, idx
) in enumerate(fuspec
):
347 # write-request comes from dest.ok
348 dest
= fu
.get_out(idx
)
349 fu_dest_latch
= fu
.get_fu_out(idx
) # latched output
350 name
= "wrflag_%s_%s_%d" % (funame
, regname
, idx
)
351 wrflag
= Signal(name
=name
, reset_less
=True)
352 comb
+= wrflag
.eq(dest
.ok
& fu
.busy_o
)
354 # connect request-write to picker input, and output to go-wr
355 fu_active
= fu_bitdict
[funame
]
356 pick
= fu
.wr
.rel_o
[idx
] & fu_active
# & wrflag
357 comb
+= wrpick
.i
[pi
].eq(pick
)
358 # create a single-pulse go write from the picker output
360 comb
+= wr_pick
.eq(wrpick
.o
[pi
] & wrpick
.en_o
)
361 comb
+= fu
.go_wr_i
[idx
].eq(rising_edge(m
, wr_pick
))
363 # connect the regspec write "reg select" number to this port
364 # only if one FU actually requests (and is granted) the port
365 # will the write-enable be activated
366 addr_en
= Signal
.like(write
)
368 comb
+= wp
.eq(wr_pick
& wrpick
.en_o
)
369 comb
+= addr_en
.eq(Mux(wp
, write
, 0))
373 addrs
.append(addr_en
)
376 # connect regfile port to input
377 print("reg connect widths",
378 regfile
, regname
, pi
, funame
,
379 dest
.shape(), wport
.data_i
.shape())
380 wsigs
.append(fu_dest_latch
)
382 # here is where we create the Write Broadcast Bus. simple, eh?
383 comb
+= wport
.data_i
.eq(ortreereduce_sig(wsigs
))
385 # for unary-addressed
386 comb
+= wport
.wen
.eq(ortreereduce_sig(wens
))
388 # for binary-addressed
389 comb
+= wport
.addr
.eq(ortreereduce_sig(addrs
))
390 comb
+= wport
.wen
.eq(ortreereduce_sig(wens
))
392 def connect_wrports(self
, m
, fu_bitdict
):
393 """connect write ports
395 orders the write regspecs into a dict-of-dicts, by regfile,
396 by regport name, then connects all FUs that want that regport
397 by way of a PriorityPicker.
399 note that the write-port wen, write-port data, and go_wr_i all need to
400 be on the exact same clock cycle. as there is a combinatorial loop bug
401 at the moment, these all use sync.
403 comb
, sync
= m
.d
.comb
, m
.d
.sync
406 # dictionary of lists of regfile write ports
407 byregfiles_wr
, byregfiles_wrspec
= self
.get_byregfiles(False)
409 # same for write ports.
410 # BLECH! complex code-duplication! BLECH!
412 for regfile
, spec
in byregfiles_wr
.items():
413 fuspecs
= byregfiles_wrspec
[regfile
]
414 wrpickers
[regfile
] = {}
416 # argh, more port-merging
418 fuspecs
['o'] = [fuspecs
.pop('o')]
419 fuspecs
['o'].append(fuspecs
.pop('o1'))
420 if regfile
== 'FAST':
421 fuspecs
['fast1'] = [fuspecs
.pop('fast1')]
422 if 'fast2' in fuspecs
:
423 fuspecs
['fast1'].append(fuspecs
.pop('fast2'))
425 for (regname
, fspec
) in sort_fuspecs(fuspecs
):
426 self
.connect_wrport(m
, fu_bitdict
, wrpickers
,
427 regfile
, regname
, fspec
)
429 def get_byregfiles(self
, readmode
):
431 mode
= "read" if readmode
else "write"
434 e
= self
.e
# decoded instruction to execute
436 # dictionary of lists of regfile ports
439 for (funame
, fu
) in fus
.items():
440 print("%s ports for %s" % (mode
, funame
))
441 for idx
in range(fu
.n_src
if readmode
else fu
.n_dst
):
443 (regfile
, regname
, wid
) = fu
.get_in_spec(idx
)
445 (regfile
, regname
, wid
) = fu
.get_out_spec(idx
)
446 print(" %d %s %s %s" % (idx
, regfile
, regname
, str(wid
)))
448 rdflag
, read
= regspec_decode_read(e
, regfile
, regname
)
451 rdflag
, read
= None, None
452 wrport
, write
= regspec_decode_write(e
, regfile
, regname
)
453 if regfile
not in byregfiles
:
454 byregfiles
[regfile
] = {}
455 byregfiles_spec
[regfile
] = {}
456 if regname
not in byregfiles_spec
[regfile
]:
457 byregfiles_spec
[regfile
][regname
] = \
458 (rdflag
, read
, write
, wid
, [])
459 # here we start to create "lanes"
460 if idx
not in byregfiles
[regfile
]:
461 byregfiles
[regfile
][idx
] = []
462 fuspec
= (funame
, fu
, idx
)
463 byregfiles
[regfile
][idx
].append(fuspec
)
464 byregfiles_spec
[regfile
][regname
][4].append(fuspec
)
466 # ok just print that out, for convenience
467 for regfile
, spec
in byregfiles
.items():
468 print("regfile %s ports:" % mode
, regfile
)
469 fuspecs
= byregfiles_spec
[regfile
]
470 for regname
, fspec
in fuspecs
.items():
471 [rdflag
, read
, write
, wid
, fuspec
] = fspec
472 print(" rf %s port %s lane: %s" % (mode
, regfile
, regname
))
473 print(" %s" % regname
, wid
, read
, write
, rdflag
)
474 for (funame
, fu
, idx
) in fuspec
:
475 fusig
= fu
.src_i
[idx
] if readmode
else fu
.dest
[idx
]
476 print(" ", funame
, fu
, idx
, fusig
)
479 return byregfiles
, byregfiles_spec
482 yield from self
.fus
.ports()
483 yield from self
.e
.ports()
484 yield from self
.l0
.ports()
491 if __name__
== '__main__':
492 pspec
= TestMemPspec(ldst_ifacetype
='testpi',
497 dut
= NonProductionCore(pspec
)
498 vl
= rtlil
.convert(dut
, ports
=dut
.ports())
499 with
open("test_core.il", "w") as f
: