reducing regfile port usage by sharing read ports
[soc.git] / src / soc / simple / core.py
1 """simple core
2
3 not in any way intended for production use. connects up FunctionUnits to
4 Register Files in a brain-dead fashion that only permits one and only one
5 Function Unit to be operational.
6
7 the principle here is to take the Function Units, analyse their regspecs,
8 and turn their requirements for access to register file read/write ports
9 into groupings by Register File and Register File Port name.
10
11 under each grouping - by regfile/port - a list of Function Units that
12 need to connect to that port is created. as these are a contended
13 resource a "Broadcast Bus" per read/write port is then also created,
14 with access to it managed by a PriorityPicker.
15
16 the brain-dead part of this module is that even though there is no
17 conflict of access, regfile read/write hazards are *not* analysed,
18 and consequently it is safer to wait for the Function Unit to complete
19 before allowing a new instruction to proceed.
20 """
21
22 from nmigen import Elaboratable, Module, Signal, ResetSignal, Cat
23 from nmigen.cli import rtlil
24
25 from nmutil.picker import PriorityPicker
26 from nmutil.util import treereduce
27
28 from soc.fu.compunits.compunits import AllFunctionUnits
29 from soc.regfile.regfiles import RegFiles
30 from soc.decoder.power_decoder import create_pdecode
31 from soc.decoder.power_decoder2 import PowerDecode2
32 from soc.decoder.decode2execute1 import Data
33 from soc.experiment.l0_cache import TstL0CacheBuffer # test only
34 from soc.config.test.test_loadstore import TestMemPspec
35 from soc.decoder.power_enums import MicrOp
36 import operator
37
38 from nmutil.util import rising_edge
39
40
41 # helper function for reducing a list of signals down to a parallel
42 # ORed single signal.
43 def ortreereduce(tree, attr="data_o"):
44 return treereduce(tree, operator.or_, lambda x: getattr(x, attr))
45
46
47 def ortreereduce_sig(tree):
48 return treereduce(tree, operator.or_, lambda x: x)
49
50
51 # helper function to place full regs declarations first
52 def sort_fuspecs(fuspecs):
53 res = []
54 for (regname, fspec) in fuspecs.items():
55 if regname.startswith("full"):
56 res.append((regname, fspec))
57 for (regname, fspec) in fuspecs.items():
58 if not regname.startswith("full"):
59 res.append((regname, fspec))
60 return res # enumerate(res)
61
62
63 class NonProductionCore(Elaboratable):
64 def __init__(self, pspec):
65 # single LD/ST funnel for memory access
66 self.l0 = TstL0CacheBuffer(pspec, n_units=1)
67 pi = self.l0.l0.dports[0]
68
69 # function units (only one each)
70 self.fus = AllFunctionUnits(pspec, pilist=[pi])
71
72 # register files (yes plural)
73 self.regs = RegFiles()
74
75 # instruction decoder
76 pdecode = create_pdecode()
77 self.pdecode2 = PowerDecode2(pdecode) # instruction decoder
78
79 # issue/valid/busy signalling
80 self.ivalid_i = self.pdecode2.valid # instruction is valid
81 self.issue_i = Signal(reset_less=True)
82 self.busy_o = Signal(name="corebusy_o", reset_less=True)
83
84 # instruction input
85 self.bigendian_i = self.pdecode2.dec.bigendian
86 self.raw_opcode_i = self.pdecode2.dec.raw_opcode_in
87
88 # start/stop and terminated signalling
89 self.core_stopped_i = Signal(reset_less=True)
90 self.core_reset_i = Signal()
91 self.core_terminate_o = Signal(reset=0) # indicates stopped
92
93 def elaborate(self, platform):
94 m = Module()
95
96 m.submodules.pdecode2 = dec2 = self.pdecode2
97 m.submodules.fus = self.fus
98 m.submodules.l0 = l0 = self.l0
99 self.regs.elaborate_into(m, platform)
100 regs = self.regs
101 fus = self.fus.fus
102
103 # connect up Function Units, then read/write ports
104 fu_bitdict = self.connect_instruction(m)
105 self.connect_rdports(m, fu_bitdict)
106 self.connect_wrports(m, fu_bitdict)
107
108 # connect up reset
109 m.d.comb += ResetSignal().eq(self.core_reset_i)
110
111 return m
112
113 def connect_instruction(self, m):
114 """connect_instruction
115
116 uses decoded (from PowerOp) function unit information from CSV files
117 to ascertain which Function Unit should deal with the current
118 instruction.
119
120 some (such as OP_ATTN, OP_NOP) are dealt with here, including
121 ignoring it and halting the processor. OP_NOP is a bit annoying
122 because the issuer expects busy flag still to be raised then lowered.
123 (this requires a fake counter to be set).
124 """
125 comb, sync = m.d.comb, m.d.sync
126 fus = self.fus.fus
127 dec2 = self.pdecode2
128
129 # enable-signals for each FU, get one bit for each FU (by name)
130 fu_enable = Signal(len(fus), reset_less=True)
131 fu_bitdict = {}
132 for i, funame in enumerate(fus.keys()):
133 fu_bitdict[funame] = fu_enable[i]
134
135 # enable the required Function Unit based on the opcode decode
136 # note: this *only* works correctly for simple core when one and
137 # *only* one FU is allocated per instruction
138 for funame, fu in fus.items():
139 fnunit = fu.fnunit.value
140 enable = Signal(name="en_%s" % funame, reset_less=True)
141 comb += enable.eq((dec2.e.do.fn_unit & fnunit).bool())
142 comb += fu_bitdict[funame].eq(enable)
143
144 # sigh - need a NOP counter
145 counter = Signal(2)
146 with m.If(counter != 0):
147 sync += counter.eq(counter - 1)
148 comb += self.busy_o.eq(1)
149
150 with m.If(self.ivalid_i): # run only when valid
151 with m.Switch(dec2.e.do.insn_type):
152 # check for ATTN: halt if true
153 with m.Case(MicrOp.OP_ATTN):
154 m.d.sync += self.core_terminate_o.eq(1)
155
156 with m.Case(MicrOp.OP_NOP):
157 sync += counter.eq(2)
158 comb += self.busy_o.eq(1)
159
160 with m.Default():
161 # connect up instructions. only one enabled at a time
162 for funame, fu in fus.items():
163 enable = fu_bitdict[funame]
164
165 # run this FunctionUnit if enabled
166 with m.If(enable):
167 # route op, issue, busy, read flags and mask to FU
168 comb += fu.oper_i.eq_from_execute1(dec2.e)
169 comb += fu.issue_i.eq(self.issue_i)
170 comb += self.busy_o.eq(fu.busy_o)
171 rdmask = dec2.rdflags(fu)
172 comb += fu.rdmaskn.eq(~rdmask)
173
174 return fu_bitdict
175
176 def connect_rdport(self, m, fu_bitdict, rdpickers, regfile, regname, fspec):
177 comb, sync = m.d.comb, m.d.sync
178 fus = self.fus.fus
179 regs = self.regs
180
181 rpidx = regname
182
183 # select the required read port. these are pre-defined sizes
184 print(rpidx, regfile, regs.rf.keys())
185 rport = regs.rf[regfile.lower()].r_ports[rpidx]
186
187 fspecs = fspec
188 if not isinstance(fspecs, list):
189 fspecs = [fspecs]
190
191 rdflags = []
192 pplen = 0
193 reads = []
194 ppoffs = []
195 for i, fspec in enumerate(fspecs):
196 # get the regfile specs for this regfile port
197 (rf, read, write, wid, fuspec) = fspec
198 print ("fpsec", i, fspec, len(fuspec))
199 ppoffs.append(pplen) # record offset for picker
200 pplen += len(fuspec)
201 name = "rdflag_%s_%s_%d" % (regfile, regname, i)
202 rdflag = Signal(name=name, reset_less=True)
203 comb += rdflag.eq(rf)
204 rdflags.append(rdflag)
205 reads.append(read)
206
207 print ("pplen", pplen)
208
209 # create a priority picker to manage this port
210 rdpickers[regfile][rpidx] = rdpick = PriorityPicker(pplen)
211 setattr(m.submodules, "rdpick_%s_%s" % (regfile, rpidx), rdpick)
212
213 for i, fspec in enumerate(fspecs):
214 (rf, read, write, wid, fuspec) = fspec
215 # connect up the FU req/go signals, and the reg-read to the FU
216 # and create a Read Broadcast Bus
217 for pi, (funame, fu, idx) in enumerate(fuspec):
218 pi += ppoffs[i]
219 src = fu.src_i[idx]
220
221 # connect request-read to picker input, and output to go-rd
222 fu_active = fu_bitdict[funame]
223 pick = Signal()
224 comb += pick.eq(fu.rd_rel_o[idx] & fu_active & rdflags[i])
225 print (pick, len(pick))
226 print (rdpick.i, len(rdpick.i), pi)
227 comb += rdpick.i[pi].eq(pick)
228 comb += fu.go_rd_i[idx].eq(rdpick.o[pi])
229
230 # if picked, select read-port "reg select" number to port
231 with m.If(rdpick.o[pi] & rdpick.en_o):
232 comb += rport.ren.eq(reads[i])
233
234 # connect regfile port to input, creating a Broadcast Bus
235 print("reg connect widths",
236 regfile, regname, pi, funame,
237 src.shape(), rport.data_o.shape())
238 # all FUs connect to same port
239 comb += src.eq(rport.data_o)
240
241 def connect_rdports(self, m, fu_bitdict):
242 """connect read ports
243
244 orders the read regspecs into a dict-of-dicts, by regfile, by
245 regport name, then connects all FUs that want that regport by
246 way of a PriorityPicker.
247 """
248 comb, sync = m.d.comb, m.d.sync
249 fus = self.fus.fus
250 regs = self.regs
251
252 # dictionary of lists of regfile read ports
253 byregfiles_rd, byregfiles_rdspec = self.get_byregfiles(True)
254
255 # okaay, now we need a PriorityPicker per regfile per regfile port
256 # loootta pickers... peter piper picked a pack of pickled peppers...
257 rdpickers = {}
258 for regfile, spec in byregfiles_rd.items():
259 fuspecs = byregfiles_rdspec[regfile]
260 rdpickers[regfile] = {}
261
262 # argh. an experiment to merge RA and RB in the INT regfile
263 # (we have too many read/write ports)
264 if regfile == 'INT':
265 fuspecs['rbc'] = [fuspecs.pop('rb')]
266 fuspecs['rbc'].append(fuspecs.pop('rc'))
267 if regfile == 'FAST':
268 fuspecs['fast1'] = [fuspecs.pop('fast1')]
269 fuspecs['fast1'].append(fuspecs.pop('fast2'))
270
271 # for each named regfile port, connect up all FUs to that port
272 for (regname, fspec) in sort_fuspecs(fuspecs):
273 print("connect rd", regname, fspec)
274 self.connect_rdport(m, fu_bitdict, rdpickers, regfile,
275 regname, fspec)
276
277 def connect_wrports(self, m, fu_bitdict):
278 """connect write ports
279
280 orders the write regspecs into a dict-of-dicts, by regfile,
281 by regport name, then connects all FUs that want that regport
282 by way of a PriorityPicker.
283
284 note that the write-port wen, write-port data, and go_wr_i all need to
285 be on the exact same clock cycle. as there is a combinatorial loop bug
286 at the moment, these all use sync.
287 """
288 comb, sync = m.d.comb, m.d.sync
289 fus = self.fus.fus
290 regs = self.regs
291 # dictionary of lists of regfile write ports
292 byregfiles_wr, byregfiles_wrspec = self.get_byregfiles(False)
293
294 # same for write ports.
295 # BLECH! complex code-duplication! BLECH!
296 wrpickers = {}
297 for regfile, spec in byregfiles_wr.items():
298 fuspecs = byregfiles_wrspec[regfile]
299 wrpickers[regfile] = {}
300 for (regname, fspec) in sort_fuspecs(fuspecs):
301 print("connect wr", regname, fspec)
302 rpidx = regname
303 # get the regfile specs for this regfile port
304 (rf, read, write, wid, fuspec) = fspec
305
306 # select the required write port. these are pre-defined sizes
307 print(regfile, regs.rf.keys())
308 wport = regs.rf[regfile.lower()].w_ports[rpidx]
309
310 # create a priority picker to manage this port
311 wrpickers[regfile][rpidx] = wrpick = PriorityPicker(
312 len(fuspec))
313 setattr(m.submodules, "wrpick_%s_%s" %
314 (regfile, rpidx), wrpick)
315
316 # connect the regspec write "reg select" number to this port
317 # only if one FU actually requests (and is granted) the port
318 # will the write-enable be activated
319 with m.If(wrpick.en_o):
320 comb += wport.wen.eq(write)
321 with m.Else():
322 comb += wport.wen.eq(0)
323
324 # connect up the FU req/go signals and the reg-read to the FU
325 # these are arbitrated by Data.ok signals
326 wsigs = []
327 for pi, (funame, fu, idx) in enumerate(fuspec):
328 # write-request comes from dest.ok
329 dest = fu.get_out(idx)
330 fu_dest_latch = fu.get_fu_out(idx) # latched output
331 name = "wrflag_%s_%s_%d" % (funame, regname, idx)
332 wrflag = Signal(name=name, reset_less=True)
333 comb += wrflag.eq(dest.ok & fu.busy_o)
334
335 # connect request-write to picker input, and output to go-wr
336 fu_active = fu_bitdict[funame]
337 pick = fu.wr.rel_o[idx] & fu_active # & wrflag
338 comb += wrpick.i[pi].eq(pick)
339 # create a single-pulse go write from the picker output
340 wr_pick = Signal()
341 comb += wr_pick.eq(wrpick.o[pi] & wrpick.en_o)
342 comb += fu.go_wr_i[idx].eq(rising_edge(m, wr_pick))
343 # connect regfile port to input
344 print("reg connect widths",
345 regfile, regname, pi, funame,
346 dest.shape(), wport.data_i.shape())
347 wsigs.append(fu_dest_latch)
348
349 # here is where we create the Write Broadcast Bus. simple, eh?
350 comb += wport.data_i.eq(ortreereduce_sig(wsigs))
351
352 def get_byregfiles(self, readmode):
353
354 mode = "read" if readmode else "write"
355 dec2 = self.pdecode2
356 regs = self.regs
357 fus = self.fus.fus
358
359 # dictionary of lists of regfile ports
360 byregfiles = {}
361 byregfiles_spec = {}
362 for (funame, fu) in fus.items():
363 print("%s ports for %s" % (mode, funame))
364 for idx in range(fu.n_src if readmode else fu.n_dst):
365 if readmode:
366 (regfile, regname, wid) = fu.get_in_spec(idx)
367 else:
368 (regfile, regname, wid) = fu.get_out_spec(idx)
369 print(" %d %s %s %s" % (idx, regfile, regname, str(wid)))
370 if readmode:
371 rdflag, read = dec2.regspecmap_read(regfile, regname)
372 write = None
373 else:
374 rdflag, read = None, None
375 wrport, write = dec2.regspecmap_write(regfile, regname)
376 if regfile not in byregfiles:
377 byregfiles[regfile] = {}
378 byregfiles_spec[regfile] = {}
379 if regname not in byregfiles_spec[regfile]:
380 byregfiles_spec[regfile][regname] = \
381 (rdflag, read, write, wid, [])
382 # here we start to create "lanes"
383 if idx not in byregfiles[regfile]:
384 byregfiles[regfile][idx] = []
385 fuspec = (funame, fu, idx)
386 byregfiles[regfile][idx].append(fuspec)
387 byregfiles_spec[regfile][regname][4].append(fuspec)
388
389 # ok just print that out, for convenience
390 for regfile, spec in byregfiles.items():
391 print("regfile %s ports:" % mode, regfile)
392 fuspecs = byregfiles_spec[regfile]
393 for regname, fspec in fuspecs.items():
394 [rdflag, read, write, wid, fuspec] = fspec
395 print(" rf %s port %s lane: %s" % (mode, regfile, regname))
396 print(" %s" % regname, wid, read, write, rdflag)
397 for (funame, fu, idx) in fuspec:
398 fusig = fu.src_i[idx] if readmode else fu.dest[idx]
399 print(" ", funame, fu, idx, fusig)
400 print()
401
402 return byregfiles, byregfiles_spec
403
404 def __iter__(self):
405 yield from self.fus.ports()
406 yield from self.pdecode2.ports()
407 yield from self.l0.ports()
408 # TODO: regs
409
410 def ports(self):
411 return list(self)
412
413
414 if __name__ == '__main__':
415 pspec = TestMemPspec(ldst_ifacetype='testpi',
416 imem_ifacetype='',
417 addr_wid=48,
418 mask_wid=8,
419 reg_wid=64)
420 dut = NonProductionCore(pspec)
421 vl = rtlil.convert(dut, ports=dut.ports())
422 with open("test_core.il", "w") as f:
423 f.write(vl)