9d115a3dc297e450b5fc02e854a90b9c8b0d28bc
3 not in any way intended for production use. connects up FunctionUnits to
4 Register Files in a brain-dead fashion that only permits one and only one
5 Function Unit to be operational.
7 the principle here is to take the Function Units, analyse their regspecs,
8 and turn their requirements for access to register file read/write ports
9 into groupings by Register File and Register File Port name.
11 under each grouping - by regfile/port - a list of Function Units that
12 need to connect to that port is created. as these are a contended
13 resource a "Broadcast Bus" per read/write port is then also created,
14 with access to it managed by a PriorityPicker.
16 the brain-dead part of this module is that even though there is no
17 conflict of access, regfile read/write hazards are *not* analysed,
18 and consequently it is safer to wait for the Function Unit to complete
19 before allowing a new instruction to proceed.
22 from nmigen
import Elaboratable
, Module
, Signal
, ResetSignal
, Cat
, Mux
23 from nmigen
.cli
import rtlil
25 from soc
.decoder
.power_decoder2
import PowerDecodeSubset
26 from soc
.decoder
.power_regspec_map
import regspec_decode_read
27 from soc
.decoder
.power_regspec_map
import regspec_decode_write
29 from nmutil
.picker
import PriorityPicker
30 from nmutil
.util
import treereduce
32 from soc
.fu
.compunits
.compunits
import AllFunctionUnits
33 from soc
.regfile
.regfiles
import RegFiles
34 from soc
.decoder
.decode2execute1
import Decode2ToExecute1Type
35 from soc
.decoder
.decode2execute1
import IssuerDecode2ToOperand
36 from soc
.decoder
.power_decoder2
import get_rdflags
37 from soc
.decoder
.decode2execute1
import Data
38 from soc
.experiment
.l0_cache
import TstL0CacheBuffer
# test only
39 from soc
.config
.test
.test_loadstore
import TestMemPspec
40 from soc
.decoder
.power_enums
import MicrOp
41 from soc
.config
.state
import CoreState
45 from nmutil
.util
import rising_edge
48 # helper function for reducing a list of signals down to a parallel
50 def ortreereduce(tree
, attr
="data_o"):
51 return treereduce(tree
, operator
.or_
, lambda x
: getattr(x
, attr
))
54 def ortreereduce_sig(tree
):
55 return treereduce(tree
, operator
.or_
, lambda x
: x
)
58 # helper function to place full regs declarations first
59 def sort_fuspecs(fuspecs
):
61 for (regname
, fspec
) in fuspecs
.items():
62 if regname
.startswith("full"):
63 res
.append((regname
, fspec
))
64 for (regname
, fspec
) in fuspecs
.items():
65 if not regname
.startswith("full"):
66 res
.append((regname
, fspec
))
67 return res
# enumerate(res)
70 class NonProductionCore(Elaboratable
):
71 def __init__(self
, pspec
):
74 # single LD/ST funnel for memory access
75 self
.l0
= TstL0CacheBuffer(pspec
, n_units
=1)
76 pi
= self
.l0
.l0
.dports
[0]
78 # function units (only one each)
79 # only include mmu if enabled in pspec
80 self
.fus
= AllFunctionUnits(pspec
, pilist
=[pi
])
82 # register files (yes plural)
83 self
.regs
= RegFiles()
85 # instruction decoder - needs a Trap-capable Record (captures EINT etc.)
86 self
.e
= Decode2ToExecute1Type("core", opkls
=IssuerDecode2ToOperand
)
88 self
.state
= CoreState("core")
89 self
.raw_insn_i
= Signal(32) # raw instruction
90 self
.bigendian_i
= Signal() # bigendian
92 # issue/valid/busy signalling
93 self
.ivalid_i
= Signal(reset_less
=True) # instruction is valid
94 self
.issue_i
= Signal(reset_less
=True)
95 self
.busy_o
= Signal(name
="corebusy_o", reset_less
=True)
97 # start/stop and terminated signalling
98 self
.core_stopped_i
= Signal(reset_less
=True)
99 self
.core_terminate_o
= Signal(reset
=0) # indicates stopped
101 # create per-FU instruction decoders (subsetted)
105 for funame
, fu
in self
.fus
.fus
.items():
106 f_name
= fu
.fnunit
.name
107 fnunit
= fu
.fnunit
.value
108 opkls
= fu
.opsubsetkls
110 self
.trapunit
= funame
112 self
.decoders
[funame
] = PowerDecodeSubset(None, opkls
, f_name
,
115 self
.des
[funame
] = self
.decoders
[funame
].do
117 def elaborate(self
, platform
):
119 # for testing purposes, to cut down on build time in coriolis2
120 if hasattr(self
.pspec
, "nocore") and self
.pspec
.nocore
== True:
121 x
= Signal() # dummy signal
126 m
.submodules
.fus
= self
.fus
127 m
.submodules
.l0
= l0
= self
.l0
128 self
.regs
.elaborate_into(m
, platform
)
133 for k
, v
in self
.decoders
.items():
134 setattr(m
.submodules
, "dec_%s" % v
.fn_name
, v
)
135 comb
+= v
.dec
.raw_opcode_in
.eq(self
.raw_insn_i
)
136 comb
+= v
.dec
.bigendian
.eq(self
.bigendian_i
)
138 # ssh, cheat: trap uses the main decoder because of the rewriting
139 self
.des
[self
.trapunit
] = self
.e
.do
141 # connect up Function Units, then read/write ports
142 fu_bitdict
= self
.connect_instruction(m
)
143 self
.connect_rdports(m
, fu_bitdict
)
144 self
.connect_wrports(m
, fu_bitdict
)
148 def connect_instruction(self
, m
):
149 """connect_instruction
151 uses decoded (from PowerOp) function unit information from CSV files
152 to ascertain which Function Unit should deal with the current
155 some (such as OP_ATTN, OP_NOP) are dealt with here, including
156 ignoring it and halting the processor. OP_NOP is a bit annoying
157 because the issuer expects busy flag still to be raised then lowered.
158 (this requires a fake counter to be set).
160 comb
, sync
= m
.d
.comb
, m
.d
.sync
163 # enable-signals for each FU, get one bit for each FU (by name)
164 fu_enable
= Signal(len(fus
), reset_less
=True)
166 for i
, funame
in enumerate(fus
.keys()):
167 fu_bitdict
[funame
] = fu_enable
[i
]
169 # enable the required Function Unit based on the opcode decode
170 # note: this *only* works correctly for simple core when one and
171 # *only* one FU is allocated per instruction
172 for funame
, fu
in fus
.items():
173 fnunit
= fu
.fnunit
.value
174 enable
= Signal(name
="en_%s" % funame
, reset_less
=True)
175 comb
+= enable
.eq((self
.e
.do
.fn_unit
& fnunit
).bool())
176 comb
+= fu_bitdict
[funame
].eq(enable
)
178 # sigh - need a NOP counter
180 with m
.If(counter
!= 0):
181 sync
+= counter
.eq(counter
- 1)
182 comb
+= self
.busy_o
.eq(1)
184 with m
.If(self
.ivalid_i
): # run only when valid
185 with m
.Switch(self
.e
.do
.insn_type
):
186 # check for ATTN: halt if true
187 with m
.Case(MicrOp
.OP_ATTN
):
188 m
.d
.sync
+= self
.core_terminate_o
.eq(1)
190 with m
.Case(MicrOp
.OP_NOP
):
191 sync
+= counter
.eq(2)
192 comb
+= self
.busy_o
.eq(1)
195 # connect up instructions. only one enabled at a time
196 for funame
, fu
in fus
.items():
197 do
= self
.des
[funame
]
198 enable
= fu_bitdict
[funame
]
200 # run this FunctionUnit if enabled
201 # route op, issue, busy, read flags and mask to FU
203 # operand comes from the *local* decoder
204 comb
+= fu
.oper_i
.eq_from(do
)
205 #comb += fu.oper_i.eq_from_execute1(e)
206 comb
+= fu
.issue_i
.eq(self
.issue_i
)
207 comb
+= self
.busy_o
.eq(fu
.busy_o
)
208 # rdmask, which is for registers, needs to come
209 # from the *main* decoder
210 rdmask
= get_rdflags(self
.e
, fu
)
211 comb
+= fu
.rdmaskn
.eq(~rdmask
)
215 def connect_rdport(self
, m
, fu_bitdict
, rdpickers
, regfile
, regname
, fspec
):
216 comb
, sync
= m
.d
.comb
, m
.d
.sync
222 # select the required read port. these are pre-defined sizes
223 rfile
= regs
.rf
[regfile
.lower()]
224 rport
= rfile
.r_ports
[rpidx
]
225 print("read regfile", rpidx
, regfile
, regs
.rf
.keys(),
229 if not isinstance(fspecs
, list):
236 for i
, fspec
in enumerate(fspecs
):
237 # get the regfile specs for this regfile port
238 (rf
, read
, write
, wid
, fuspec
) = fspec
239 print ("fpsec", i
, fspec
, len(fuspec
))
240 ppoffs
.append(pplen
) # record offset for picker
242 name
= "rdflag_%s_%s_%d" % (regfile
, regname
, i
)
243 rdflag
= Signal(name
=name
, reset_less
=True)
244 comb
+= rdflag
.eq(rf
)
245 rdflags
.append(rdflag
)
248 print ("pplen", pplen
)
250 # create a priority picker to manage this port
251 rdpickers
[regfile
][rpidx
] = rdpick
= PriorityPicker(pplen
)
252 setattr(m
.submodules
, "rdpick_%s_%s" % (regfile
, rpidx
), rdpick
)
256 for i
, fspec
in enumerate(fspecs
):
257 (rf
, read
, write
, wid
, fuspec
) = fspec
258 # connect up the FU req/go signals, and the reg-read to the FU
259 # and create a Read Broadcast Bus
260 for pi
, (funame
, fu
, idx
) in enumerate(fuspec
):
263 # connect request-read to picker input, and output to go-rd
264 fu_active
= fu_bitdict
[funame
]
265 name
= "%s_%s_%s_%i" % (regfile
, rpidx
, funame
, pi
)
266 addr_en
= Signal
.like(reads
[i
], name
="addr_en_"+name
)
267 pick
= Signal(name
="pick_"+name
) # picker input
268 rp
= Signal(name
="rp_"+name
) # picker output
269 delay_pick
= Signal(name
="dp_"+name
) # read-enable "underway"
271 # exclude any currently-enabled read-request (mask out active)
272 comb
+= pick
.eq(fu
.rd_rel_o
[idx
] & fu_active
& rdflags
[i
] &
274 comb
+= rdpick
.i
[pi
].eq(pick
)
275 comb
+= fu
.go_rd_i
[idx
].eq(delay_pick
) # pass in *delayed* pick
277 # if picked, select read-port "reg select" number to port
278 comb
+= rp
.eq(rdpick
.o
[pi
] & rdpick
.en_o
)
279 sync
+= delay_pick
.eq(rp
) # delayed "pick"
280 comb
+= addr_en
.eq(Mux(rp
, reads
[i
], 0))
282 # the read-enable happens combinatorially (see mux-bus below)
283 # but it results in the data coming out on a one-cycle delay.
287 addrs
.append(addr_en
)
290 # use the *delayed* pick signal to put requested data onto bus
291 with m
.If(delay_pick
):
292 # connect regfile port to input, creating fan-out Bus
294 print("reg connect widths",
295 regfile
, regname
, pi
, funame
,
296 src
.shape(), rport
.data_o
.shape())
297 # all FUs connect to same port
298 comb
+= src
.eq(rport
.data_o
)
300 # or-reduce the muxed read signals
302 # for unary-addressed
303 comb
+= rport
.ren
.eq(ortreereduce_sig(rens
))
305 # for binary-addressed
306 comb
+= rport
.addr
.eq(ortreereduce_sig(addrs
))
307 comb
+= rport
.ren
.eq(Cat(*rens
).bool())
308 print ("binary", regfile
, rpidx
, rport
, rport
.ren
, rens
, addrs
)
310 def connect_rdports(self
, m
, fu_bitdict
):
311 """connect read ports
313 orders the read regspecs into a dict-of-dicts, by regfile, by
314 regport name, then connects all FUs that want that regport by
315 way of a PriorityPicker.
317 comb
, sync
= m
.d
.comb
, m
.d
.sync
321 # dictionary of lists of regfile read ports
322 byregfiles_rd
, byregfiles_rdspec
= self
.get_byregfiles(True)
324 # okaay, now we need a PriorityPicker per regfile per regfile port
325 # loootta pickers... peter piper picked a pack of pickled peppers...
327 for regfile
, spec
in byregfiles_rd
.items():
328 fuspecs
= byregfiles_rdspec
[regfile
]
329 rdpickers
[regfile
] = {}
331 # argh. an experiment to merge RA and RB in the INT regfile
332 # (we have too many read/write ports)
333 #if regfile == 'INT':
334 #fuspecs['rabc'] = [fuspecs.pop('rb')]
335 #fuspecs['rabc'].append(fuspecs.pop('rc'))
336 #fuspecs['rabc'].append(fuspecs.pop('ra'))
337 #if regfile == 'FAST':
338 # fuspecs['fast1'] = [fuspecs.pop('fast1')]
339 # if 'fast2' in fuspecs:
340 # fuspecs['fast1'].append(fuspecs.pop('fast2'))
342 # for each named regfile port, connect up all FUs to that port
343 for (regname
, fspec
) in sort_fuspecs(fuspecs
):
344 print("connect rd", regname
, fspec
)
345 self
.connect_rdport(m
, fu_bitdict
, rdpickers
, regfile
,
348 def connect_wrport(self
, m
, fu_bitdict
, wrpickers
, regfile
, regname
, fspec
):
349 comb
, sync
= m
.d
.comb
, m
.d
.sync
353 print("connect wr", regname
, fspec
)
356 # select the required write port. these are pre-defined sizes
357 print(regfile
, regs
.rf
.keys())
358 rfile
= regs
.rf
[regfile
.lower()]
359 wport
= rfile
.w_ports
[rpidx
]
362 if not isinstance(fspecs
, list):
368 for i
, fspec
in enumerate(fspecs
):
369 # get the regfile specs for this regfile port
370 (rf
, read
, write
, wid
, fuspec
) = fspec
371 print ("fpsec", i
, fspec
, len(fuspec
))
372 ppoffs
.append(pplen
) # record offset for picker
375 # create a priority picker to manage this port
376 wrpickers
[regfile
][rpidx
] = wrpick
= PriorityPicker(pplen
)
377 setattr(m
.submodules
, "wrpick_%s_%s" % (regfile
, rpidx
), wrpick
)
382 for i
, fspec
in enumerate(fspecs
):
383 # connect up the FU req/go signals and the reg-read to the FU
384 # these are arbitrated by Data.ok signals
385 (rf
, read
, write
, wid
, fuspec
) = fspec
386 for pi
, (funame
, fu
, idx
) in enumerate(fuspec
):
389 # write-request comes from dest.ok
390 dest
= fu
.get_out(idx
)
391 fu_dest_latch
= fu
.get_fu_out(idx
) # latched output
392 name
= "wrflag_%s_%s_%d" % (funame
, regname
, idx
)
393 wrflag
= Signal(name
=name
, reset_less
=True)
394 comb
+= wrflag
.eq(dest
.ok
& fu
.busy_o
)
396 # connect request-write to picker input, and output to go-wr
397 fu_active
= fu_bitdict
[funame
]
398 pick
= fu
.wr
.rel_o
[idx
] & fu_active
# & wrflag
399 comb
+= wrpick
.i
[pi
].eq(pick
)
400 # create a single-pulse go write from the picker output
402 comb
+= wr_pick
.eq(wrpick
.o
[pi
] & wrpick
.en_o
)
403 comb
+= fu
.go_wr_i
[idx
].eq(rising_edge(m
, wr_pick
))
405 # connect the regspec write "reg select" number to this port
406 # only if one FU actually requests (and is granted) the port
407 # will the write-enable be activated
408 addr_en
= Signal
.like(write
)
410 comb
+= wp
.eq(wr_pick
& wrpick
.en_o
)
411 comb
+= addr_en
.eq(Mux(wp
, write
, 0))
415 addrs
.append(addr_en
)
418 # connect regfile port to input
419 print("reg connect widths",
420 regfile
, regname
, pi
, funame
,
421 dest
.shape(), wport
.data_i
.shape())
422 wsigs
.append(fu_dest_latch
)
424 # here is where we create the Write Broadcast Bus. simple, eh?
425 comb
+= wport
.data_i
.eq(ortreereduce_sig(wsigs
))
427 # for unary-addressed
428 comb
+= wport
.wen
.eq(ortreereduce_sig(wens
))
430 # for binary-addressed
431 comb
+= wport
.addr
.eq(ortreereduce_sig(addrs
))
432 comb
+= wport
.wen
.eq(ortreereduce_sig(wens
))
434 def connect_wrports(self
, m
, fu_bitdict
):
435 """connect write ports
437 orders the write regspecs into a dict-of-dicts, by regfile,
438 by regport name, then connects all FUs that want that regport
439 by way of a PriorityPicker.
441 note that the write-port wen, write-port data, and go_wr_i all need to
442 be on the exact same clock cycle. as there is a combinatorial loop bug
443 at the moment, these all use sync.
445 comb
, sync
= m
.d
.comb
, m
.d
.sync
448 # dictionary of lists of regfile write ports
449 byregfiles_wr
, byregfiles_wrspec
= self
.get_byregfiles(False)
451 # same for write ports.
452 # BLECH! complex code-duplication! BLECH!
454 for regfile
, spec
in byregfiles_wr
.items():
455 fuspecs
= byregfiles_wrspec
[regfile
]
456 wrpickers
[regfile
] = {}
458 # argh, more port-merging
460 fuspecs
['o'] = [fuspecs
.pop('o')]
461 fuspecs
['o'].append(fuspecs
.pop('o1'))
462 if regfile
== 'FAST':
463 fuspecs
['fast1'] = [fuspecs
.pop('fast1')]
464 if 'fast2' in fuspecs
:
465 fuspecs
['fast1'].append(fuspecs
.pop('fast2'))
467 for (regname
, fspec
) in sort_fuspecs(fuspecs
):
468 self
.connect_wrport(m
, fu_bitdict
, wrpickers
,
469 regfile
, regname
, fspec
)
471 def get_byregfiles(self
, readmode
):
473 mode
= "read" if readmode
else "write"
476 e
= self
.e
# decoded instruction to execute
478 # dictionary of lists of regfile ports
481 for (funame
, fu
) in fus
.items():
482 print("%s ports for %s" % (mode
, funame
))
483 for idx
in range(fu
.n_src
if readmode
else fu
.n_dst
):
485 (regfile
, regname
, wid
) = fu
.get_in_spec(idx
)
487 (regfile
, regname
, wid
) = fu
.get_out_spec(idx
)
488 print(" %d %s %s %s" % (idx
, regfile
, regname
, str(wid
)))
490 rdflag
, read
= regspec_decode_read(e
, regfile
, regname
)
493 rdflag
, read
= None, None
494 wrport
, write
= regspec_decode_write(e
, regfile
, regname
)
495 if regfile
not in byregfiles
:
496 byregfiles
[regfile
] = {}
497 byregfiles_spec
[regfile
] = {}
498 if regname
not in byregfiles_spec
[regfile
]:
499 byregfiles_spec
[regfile
][regname
] = \
500 (rdflag
, read
, write
, wid
, [])
501 # here we start to create "lanes"
502 if idx
not in byregfiles
[regfile
]:
503 byregfiles
[regfile
][idx
] = []
504 fuspec
= (funame
, fu
, idx
)
505 byregfiles
[regfile
][idx
].append(fuspec
)
506 byregfiles_spec
[regfile
][regname
][4].append(fuspec
)
508 # ok just print that out, for convenience
509 for regfile
, spec
in byregfiles
.items():
510 print("regfile %s ports:" % mode
, regfile
)
511 fuspecs
= byregfiles_spec
[regfile
]
512 for regname
, fspec
in fuspecs
.items():
513 [rdflag
, read
, write
, wid
, fuspec
] = fspec
514 print(" rf %s port %s lane: %s" % (mode
, regfile
, regname
))
515 print(" %s" % regname
, wid
, read
, write
, rdflag
)
516 for (funame
, fu
, idx
) in fuspec
:
517 fusig
= fu
.src_i
[idx
] if readmode
else fu
.dest
[idx
]
518 print(" ", funame
, fu
, idx
, fusig
)
521 return byregfiles
, byregfiles_spec
524 yield from self
.fus
.ports()
525 yield from self
.e
.ports()
526 yield from self
.l0
.ports()
533 if __name__
== '__main__':
534 pspec
= TestMemPspec(ldst_ifacetype
='testpi',
539 dut
= NonProductionCore(pspec
)
540 vl
= rtlil
.convert(dut
, ports
=dut
.ports())
541 with
open("test_core.il", "w") as f
: