3 not in any way intended for production use. connects up FunctionUnits to
4 Register Files in a brain-dead fashion that only permits one and only one
5 Function Unit to be operational.
7 the principle here is to take the Function Units, analyse their regspecs,
8 and turn their requirements for access to register file read/write ports
9 into groupings by Register File and Register File Port name.
11 under each grouping - by regfile/port - a list of Function Units that
12 need to connect to that port is created. as these are a contended
13 resource a "Broadcast Bus" per read/write port is then also created,
14 with access to it managed by a PriorityPicker.
16 the brain-dead part of this module is that even though there is no
17 conflict of access, regfile read/write hazards are *not* analysed,
18 and consequently it is safer to wait for the Function Unit to complete
19 before allowing a new instruction to proceed.
22 from nmigen
import Elaboratable
, Module
, Signal
, ResetSignal
, Cat
, Mux
23 from nmigen
.cli
import rtlil
25 from soc
.decoder
.power_decoder2
import PowerDecodeSubset
26 from soc
.decoder
.power_regspec_map
import regspec_decode_read
27 from soc
.decoder
.power_regspec_map
import regspec_decode_write
29 from nmutil
.picker
import PriorityPicker
30 from nmutil
.util
import treereduce
32 from soc
.fu
.compunits
.compunits
import AllFunctionUnits
33 from soc
.regfile
.regfiles
import RegFiles
34 from soc
.decoder
.decode2execute1
import Decode2ToExecute1Type
35 from soc
.decoder
.decode2execute1
import IssuerDecode2ToOperand
36 from soc
.decoder
.power_decoder2
import get_rdflags
37 from soc
.decoder
.decode2execute1
import Data
38 from soc
.experiment
.l0_cache
import TstL0CacheBuffer
# test only
39 from soc
.config
.test
.test_loadstore
import TestMemPspec
40 from soc
.decoder
.power_enums
import MicrOp
41 from soc
.config
.state
import CoreState
45 from nmutil
.util
import rising_edge
48 # helper function for reducing a list of signals down to a parallel
50 def ortreereduce(tree
, attr
="data_o"):
51 return treereduce(tree
, operator
.or_
, lambda x
: getattr(x
, attr
))
54 def ortreereduce_sig(tree
):
55 return treereduce(tree
, operator
.or_
, lambda x
: x
)
58 # helper function to place full regs declarations first
59 def sort_fuspecs(fuspecs
):
61 for (regname
, fspec
) in fuspecs
.items():
62 if regname
.startswith("full"):
63 res
.append((regname
, fspec
))
64 for (regname
, fspec
) in fuspecs
.items():
65 if not regname
.startswith("full"):
66 res
.append((regname
, fspec
))
67 return res
# enumerate(res)
70 class NonProductionCore(Elaboratable
):
71 def __init__(self
, pspec
):
74 # single LD/ST funnel for memory access
75 self
.l0
= TstL0CacheBuffer(pspec
, n_units
=1)
76 pi
= self
.l0
.l0
.dports
[0]
81 self
.dcache
= DCache()
83 # function units (only one each)
84 self
.fus
= AllFunctionUnits(pspec
, pilist
=[pi
])
86 # register files (yes plural)
87 self
.regs
= RegFiles()
89 # instruction decoder - needs a Trap-capable Record (captures EINT etc.)
90 self
.e
= Decode2ToExecute1Type("core", opkls
=IssuerDecode2ToOperand
)
92 self
.state
= CoreState("core")
93 self
.raw_insn_i
= Signal(32) # raw instruction
94 self
.bigendian_i
= Signal() # bigendian
96 # issue/valid/busy signalling
97 self
.ivalid_i
= Signal(reset_less
=True) # instruction is valid
98 self
.issue_i
= Signal(reset_less
=True)
99 self
.busy_o
= Signal(name
="corebusy_o", reset_less
=True)
101 # start/stop and terminated signalling
102 self
.core_stopped_i
= Signal(reset_less
=True)
103 self
.core_reset_i
= Signal()
104 self
.core_terminate_o
= Signal(reset
=0) # indicates stopped
106 # create per-FU instruction decoders (subsetted)
110 for funame
, fu
in self
.fus
.fus
.items():
111 f_name
= fu
.fnunit
.name
112 fnunit
= fu
.fnunit
.value
113 opkls
= fu
.opsubsetkls
115 self
.trapunit
= funame
117 self
.decoders
[funame
] = PowerDecodeSubset(None, opkls
, f_name
,
120 self
.des
[funame
] = self
.decoders
[funame
].do
122 def elaborate(self
, platform
):
124 # for testing purposes, to cut down on build time in coriolis2
125 if hasattr(self
.pspec
, "nocore") and self
.pspec
.nocore
== True:
129 m
.submodules
.fus
= self
.fus
130 m
.submodules
.l0
= l0
= self
.l0
131 self
.regs
.elaborate_into(m
, platform
)
136 for k
, v
in self
.decoders
.items():
137 setattr(m
.submodules
, "dec_%s" % v
.fn_name
, v
)
138 comb
+= v
.dec
.raw_opcode_in
.eq(self
.raw_insn_i
)
139 comb
+= v
.dec
.bigendian
.eq(self
.bigendian_i
)
141 # ssh, cheat: trap uses the main decoder because of the rewriting
142 self
.des
[self
.trapunit
] = self
.e
.do
144 # connect up Function Units, then read/write ports
145 fu_bitdict
= self
.connect_instruction(m
)
146 self
.connect_rdports(m
, fu_bitdict
)
147 self
.connect_wrports(m
, fu_bitdict
)
150 m
.d
.comb
+= ResetSignal().eq(self
.core_reset_i
)
154 def connect_instruction(self
, m
):
155 """connect_instruction
157 uses decoded (from PowerOp) function unit information from CSV files
158 to ascertain which Function Unit should deal with the current
161 some (such as OP_ATTN, OP_NOP) are dealt with here, including
162 ignoring it and halting the processor. OP_NOP is a bit annoying
163 because the issuer expects busy flag still to be raised then lowered.
164 (this requires a fake counter to be set).
166 comb
, sync
= m
.d
.comb
, m
.d
.sync
169 # enable-signals for each FU, get one bit for each FU (by name)
170 fu_enable
= Signal(len(fus
), reset_less
=True)
172 for i
, funame
in enumerate(fus
.keys()):
173 fu_bitdict
[funame
] = fu_enable
[i
]
175 # enable the required Function Unit based on the opcode decode
176 # note: this *only* works correctly for simple core when one and
177 # *only* one FU is allocated per instruction
178 for funame
, fu
in fus
.items():
179 fnunit
= fu
.fnunit
.value
180 enable
= Signal(name
="en_%s" % funame
, reset_less
=True)
181 comb
+= enable
.eq((self
.e
.do
.fn_unit
& fnunit
).bool())
182 comb
+= fu_bitdict
[funame
].eq(enable
)
184 # sigh - need a NOP counter
186 with m
.If(counter
!= 0):
187 sync
+= counter
.eq(counter
- 1)
188 comb
+= self
.busy_o
.eq(1)
190 with m
.If(self
.ivalid_i
): # run only when valid
191 with m
.Switch(self
.e
.do
.insn_type
):
192 # check for ATTN: halt if true
193 with m
.Case(MicrOp
.OP_ATTN
):
194 m
.d
.sync
+= self
.core_terminate_o
.eq(1)
196 with m
.Case(MicrOp
.OP_NOP
):
197 sync
+= counter
.eq(2)
198 comb
+= self
.busy_o
.eq(1)
201 # connect up instructions. only one enabled at a time
202 for funame
, fu
in fus
.items():
203 do
= self
.des
[funame
]
204 enable
= fu_bitdict
[funame
]
206 # run this FunctionUnit if enabled
207 # route op, issue, busy, read flags and mask to FU
209 # operand comes from the *local* decoder
210 comb
+= fu
.oper_i
.eq_from(do
)
211 #comb += fu.oper_i.eq_from_execute1(e)
212 comb
+= fu
.issue_i
.eq(self
.issue_i
)
213 comb
+= self
.busy_o
.eq(fu
.busy_o
)
214 # rdmask, which is for registers, needs to come
215 # from the *main* decoder
216 rdmask
= get_rdflags(self
.e
, fu
)
217 comb
+= fu
.rdmaskn
.eq(~rdmask
)
221 def connect_rdport(self
, m
, fu_bitdict
, rdpickers
, regfile
, regname
, fspec
):
222 comb
, sync
= m
.d
.comb
, m
.d
.sync
228 # select the required read port. these are pre-defined sizes
229 rfile
= regs
.rf
[regfile
.lower()]
230 rport
= rfile
.r_ports
[rpidx
]
231 print("read regfile", rpidx
, regfile
, regs
.rf
.keys(),
235 if not isinstance(fspecs
, list):
242 for i
, fspec
in enumerate(fspecs
):
243 # get the regfile specs for this regfile port
244 (rf
, read
, write
, wid
, fuspec
) = fspec
245 print ("fpsec", i
, fspec
, len(fuspec
))
246 ppoffs
.append(pplen
) # record offset for picker
248 name
= "rdflag_%s_%s_%d" % (regfile
, regname
, i
)
249 rdflag
= Signal(name
=name
, reset_less
=True)
250 comb
+= rdflag
.eq(rf
)
251 rdflags
.append(rdflag
)
254 print ("pplen", pplen
)
256 # create a priority picker to manage this port
257 rdpickers
[regfile
][rpidx
] = rdpick
= PriorityPicker(pplen
)
258 setattr(m
.submodules
, "rdpick_%s_%s" % (regfile
, rpidx
), rdpick
)
262 for i
, fspec
in enumerate(fspecs
):
263 (rf
, read
, write
, wid
, fuspec
) = fspec
264 # connect up the FU req/go signals, and the reg-read to the FU
265 # and create a Read Broadcast Bus
266 for pi
, (funame
, fu
, idx
) in enumerate(fuspec
):
269 # connect request-read to picker input, and output to go-rd
270 fu_active
= fu_bitdict
[funame
]
271 name
= "%s_%s_%s_%i" % (regfile
, rpidx
, funame
, pi
)
272 addr_en
= Signal
.like(reads
[i
], name
="addr_en_"+name
)
273 pick
= Signal(name
="pick_"+name
) # picker input
274 rp
= Signal(name
="rp_"+name
) # picker output
275 delay_pick
= Signal(name
="dp_"+name
) # read-enable "underway"
277 # exclude any currently-enabled read-request (mask out active)
278 comb
+= pick
.eq(fu
.rd_rel_o
[idx
] & fu_active
& rdflags
[i
] &
280 comb
+= rdpick
.i
[pi
].eq(pick
)
281 comb
+= fu
.go_rd_i
[idx
].eq(delay_pick
) # pass in *delayed* pick
283 # if picked, select read-port "reg select" number to port
284 comb
+= rp
.eq(rdpick
.o
[pi
] & rdpick
.en_o
)
285 sync
+= delay_pick
.eq(rp
) # delayed "pick"
286 comb
+= addr_en
.eq(Mux(rp
, reads
[i
], 0))
288 # the read-enable happens combinatorially (see mux-bus below)
289 # but it results in the data coming out on a one-cycle delay.
293 addrs
.append(addr_en
)
296 # use the *delayed* pick signal to put requested data onto bus
297 with m
.If(delay_pick
):
298 # connect regfile port to input, creating fan-out Bus
300 print("reg connect widths",
301 regfile
, regname
, pi
, funame
,
302 src
.shape(), rport
.data_o
.shape())
303 # all FUs connect to same port
304 comb
+= src
.eq(rport
.data_o
)
306 # or-reduce the muxed read signals
308 # for unary-addressed
309 comb
+= rport
.ren
.eq(ortreereduce_sig(rens
))
311 # for binary-addressed
312 comb
+= rport
.addr
.eq(ortreereduce_sig(addrs
))
313 comb
+= rport
.ren
.eq(Cat(*rens
).bool())
314 print ("binary", regfile
, rpidx
, rport
, rport
.ren
, rens
, addrs
)
316 def connect_rdports(self
, m
, fu_bitdict
):
317 """connect read ports
319 orders the read regspecs into a dict-of-dicts, by regfile, by
320 regport name, then connects all FUs that want that regport by
321 way of a PriorityPicker.
323 comb
, sync
= m
.d
.comb
, m
.d
.sync
327 # dictionary of lists of regfile read ports
328 byregfiles_rd
, byregfiles_rdspec
= self
.get_byregfiles(True)
330 # okaay, now we need a PriorityPicker per regfile per regfile port
331 # loootta pickers... peter piper picked a pack of pickled peppers...
333 for regfile
, spec
in byregfiles_rd
.items():
334 fuspecs
= byregfiles_rdspec
[regfile
]
335 rdpickers
[regfile
] = {}
337 # argh. an experiment to merge RA and RB in the INT regfile
338 # (we have too many read/write ports)
339 #if regfile == 'INT':
340 #fuspecs['rabc'] = [fuspecs.pop('rb')]
341 #fuspecs['rabc'].append(fuspecs.pop('rc'))
342 #fuspecs['rabc'].append(fuspecs.pop('ra'))
343 #if regfile == 'FAST':
344 # fuspecs['fast1'] = [fuspecs.pop('fast1')]
345 # if 'fast2' in fuspecs:
346 # fuspecs['fast1'].append(fuspecs.pop('fast2'))
348 # for each named regfile port, connect up all FUs to that port
349 for (regname
, fspec
) in sort_fuspecs(fuspecs
):
350 print("connect rd", regname
, fspec
)
351 self
.connect_rdport(m
, fu_bitdict
, rdpickers
, regfile
,
354 def connect_wrport(self
, m
, fu_bitdict
, wrpickers
, regfile
, regname
, fspec
):
355 comb
, sync
= m
.d
.comb
, m
.d
.sync
359 print("connect wr", regname
, fspec
)
362 # select the required write port. these are pre-defined sizes
363 print(regfile
, regs
.rf
.keys())
364 rfile
= regs
.rf
[regfile
.lower()]
365 wport
= rfile
.w_ports
[rpidx
]
368 if not isinstance(fspecs
, list):
374 for i
, fspec
in enumerate(fspecs
):
375 # get the regfile specs for this regfile port
376 (rf
, read
, write
, wid
, fuspec
) = fspec
377 print ("fpsec", i
, fspec
, len(fuspec
))
378 ppoffs
.append(pplen
) # record offset for picker
381 # create a priority picker to manage this port
382 wrpickers
[regfile
][rpidx
] = wrpick
= PriorityPicker(pplen
)
383 setattr(m
.submodules
, "wrpick_%s_%s" % (regfile
, rpidx
), wrpick
)
388 for i
, fspec
in enumerate(fspecs
):
389 # connect up the FU req/go signals and the reg-read to the FU
390 # these are arbitrated by Data.ok signals
391 (rf
, read
, write
, wid
, fuspec
) = fspec
392 for pi
, (funame
, fu
, idx
) in enumerate(fuspec
):
395 # write-request comes from dest.ok
396 dest
= fu
.get_out(idx
)
397 fu_dest_latch
= fu
.get_fu_out(idx
) # latched output
398 name
= "wrflag_%s_%s_%d" % (funame
, regname
, idx
)
399 wrflag
= Signal(name
=name
, reset_less
=True)
400 comb
+= wrflag
.eq(dest
.ok
& fu
.busy_o
)
402 # connect request-write to picker input, and output to go-wr
403 fu_active
= fu_bitdict
[funame
]
404 pick
= fu
.wr
.rel_o
[idx
] & fu_active
# & wrflag
405 comb
+= wrpick
.i
[pi
].eq(pick
)
406 # create a single-pulse go write from the picker output
408 comb
+= wr_pick
.eq(wrpick
.o
[pi
] & wrpick
.en_o
)
409 comb
+= fu
.go_wr_i
[idx
].eq(rising_edge(m
, wr_pick
))
411 # connect the regspec write "reg select" number to this port
412 # only if one FU actually requests (and is granted) the port
413 # will the write-enable be activated
414 addr_en
= Signal
.like(write
)
416 comb
+= wp
.eq(wr_pick
& wrpick
.en_o
)
417 comb
+= addr_en
.eq(Mux(wp
, write
, 0))
421 addrs
.append(addr_en
)
424 # connect regfile port to input
425 print("reg connect widths",
426 regfile
, regname
, pi
, funame
,
427 dest
.shape(), wport
.data_i
.shape())
428 wsigs
.append(fu_dest_latch
)
430 # here is where we create the Write Broadcast Bus. simple, eh?
431 comb
+= wport
.data_i
.eq(ortreereduce_sig(wsigs
))
433 # for unary-addressed
434 comb
+= wport
.wen
.eq(ortreereduce_sig(wens
))
436 # for binary-addressed
437 comb
+= wport
.addr
.eq(ortreereduce_sig(addrs
))
438 comb
+= wport
.wen
.eq(ortreereduce_sig(wens
))
440 def connect_wrports(self
, m
, fu_bitdict
):
441 """connect write ports
443 orders the write regspecs into a dict-of-dicts, by regfile,
444 by regport name, then connects all FUs that want that regport
445 by way of a PriorityPicker.
447 note that the write-port wen, write-port data, and go_wr_i all need to
448 be on the exact same clock cycle. as there is a combinatorial loop bug
449 at the moment, these all use sync.
451 comb
, sync
= m
.d
.comb
, m
.d
.sync
454 # dictionary of lists of regfile write ports
455 byregfiles_wr
, byregfiles_wrspec
= self
.get_byregfiles(False)
457 # same for write ports.
458 # BLECH! complex code-duplication! BLECH!
460 for regfile
, spec
in byregfiles_wr
.items():
461 fuspecs
= byregfiles_wrspec
[regfile
]
462 wrpickers
[regfile
] = {}
464 # argh, more port-merging
466 fuspecs
['o'] = [fuspecs
.pop('o')]
467 fuspecs
['o'].append(fuspecs
.pop('o1'))
468 if regfile
== 'FAST':
469 fuspecs
['fast1'] = [fuspecs
.pop('fast1')]
470 if 'fast2' in fuspecs
:
471 fuspecs
['fast1'].append(fuspecs
.pop('fast2'))
473 for (regname
, fspec
) in sort_fuspecs(fuspecs
):
474 self
.connect_wrport(m
, fu_bitdict
, wrpickers
,
475 regfile
, regname
, fspec
)
477 def get_byregfiles(self
, readmode
):
479 mode
= "read" if readmode
else "write"
482 e
= self
.e
# decoded instruction to execute
484 # dictionary of lists of regfile ports
487 for (funame
, fu
) in fus
.items():
488 print("%s ports for %s" % (mode
, funame
))
489 for idx
in range(fu
.n_src
if readmode
else fu
.n_dst
):
491 (regfile
, regname
, wid
) = fu
.get_in_spec(idx
)
493 (regfile
, regname
, wid
) = fu
.get_out_spec(idx
)
494 print(" %d %s %s %s" % (idx
, regfile
, regname
, str(wid
)))
496 rdflag
, read
= regspec_decode_read(e
, regfile
, regname
)
499 rdflag
, read
= None, None
500 wrport
, write
= regspec_decode_write(e
, regfile
, regname
)
501 if regfile
not in byregfiles
:
502 byregfiles
[regfile
] = {}
503 byregfiles_spec
[regfile
] = {}
504 if regname
not in byregfiles_spec
[regfile
]:
505 byregfiles_spec
[regfile
][regname
] = \
506 (rdflag
, read
, write
, wid
, [])
507 # here we start to create "lanes"
508 if idx
not in byregfiles
[regfile
]:
509 byregfiles
[regfile
][idx
] = []
510 fuspec
= (funame
, fu
, idx
)
511 byregfiles
[regfile
][idx
].append(fuspec
)
512 byregfiles_spec
[regfile
][regname
][4].append(fuspec
)
514 # ok just print that out, for convenience
515 for regfile
, spec
in byregfiles
.items():
516 print("regfile %s ports:" % mode
, regfile
)
517 fuspecs
= byregfiles_spec
[regfile
]
518 for regname
, fspec
in fuspecs
.items():
519 [rdflag
, read
, write
, wid
, fuspec
] = fspec
520 print(" rf %s port %s lane: %s" % (mode
, regfile
, regname
))
521 print(" %s" % regname
, wid
, read
, write
, rdflag
)
522 for (funame
, fu
, idx
) in fuspec
:
523 fusig
= fu
.src_i
[idx
] if readmode
else fu
.dest
[idx
]
524 print(" ", funame
, fu
, idx
, fusig
)
527 return byregfiles
, byregfiles_spec
530 yield from self
.fus
.ports()
531 yield from self
.e
.ports()
532 yield from self
.l0
.ports()
539 if __name__
== '__main__':
540 pspec
= TestMemPspec(ldst_ifacetype
='testpi',
545 dut
= NonProductionCore(pspec
)
546 vl
= rtlil
.convert(dut
, ports
=dut
.ports())
547 with
open("test_core.il", "w") as f
: