put multi-ports back (for read) on int and fast regfiles
[soc.git] / src / soc / simple / core.py
1 """simple core
2
3 not in any way intended for production use. connects up FunctionUnits to
4 Register Files in a brain-dead fashion that only permits one and only one
5 Function Unit to be operational.
6
7 the principle here is to take the Function Units, analyse their regspecs,
8 and turn their requirements for access to register file read/write ports
9 into groupings by Register File and Register File Port name.
10
11 under each grouping - by regfile/port - a list of Function Units that
12 need to connect to that port is created. as these are a contended
13 resource a "Broadcast Bus" per read/write port is then also created,
14 with access to it managed by a PriorityPicker.
15
16 the brain-dead part of this module is that even though there is no
17 conflict of access, regfile read/write hazards are *not* analysed,
18 and consequently it is safer to wait for the Function Unit to complete
19 before allowing a new instruction to proceed.
20 """
21
22 from nmigen import Elaboratable, Module, Signal, ResetSignal, Cat, Mux
23 from nmigen.cli import rtlil
24
25 from nmutil.picker import PriorityPicker
26 from nmutil.util import treereduce
27
28 from soc.fu.compunits.compunits import AllFunctionUnits
29 from soc.regfile.regfiles import RegFiles
30 from soc.decoder.power_decoder import create_pdecode
31 from soc.decoder.power_decoder2 import PowerDecode2
32 from soc.decoder.decode2execute1 import Data
33 from soc.experiment.l0_cache import TstL0CacheBuffer # test only
34 from soc.config.test.test_loadstore import TestMemPspec
35 from soc.decoder.power_enums import MicrOp
36 import operator
37
38 from nmutil.util import rising_edge
39
40
41 # helper function for reducing a list of signals down to a parallel
42 # ORed single signal.
43 def ortreereduce(tree, attr="data_o"):
44 return treereduce(tree, operator.or_, lambda x: getattr(x, attr))
45
46
47 def ortreereduce_sig(tree):
48 return treereduce(tree, operator.or_, lambda x: x)
49
50
51 # helper function to place full regs declarations first
52 def sort_fuspecs(fuspecs):
53 res = []
54 for (regname, fspec) in fuspecs.items():
55 if regname.startswith("full"):
56 res.append((regname, fspec))
57 for (regname, fspec) in fuspecs.items():
58 if not regname.startswith("full"):
59 res.append((regname, fspec))
60 return res # enumerate(res)
61
62
63 class NonProductionCore(Elaboratable):
64 def __init__(self, pspec):
65 # single LD/ST funnel for memory access
66 self.l0 = TstL0CacheBuffer(pspec, n_units=1)
67 pi = self.l0.l0.dports[0]
68
69 # function units (only one each)
70 self.fus = AllFunctionUnits(pspec, pilist=[pi])
71
72 # register files (yes plural)
73 self.regs = RegFiles()
74
75 # instruction decoder
76 pdecode = create_pdecode()
77 self.pdecode2 = PowerDecode2(pdecode) # instruction decoder
78
79 # issue/valid/busy signalling
80 self.ivalid_i = self.pdecode2.valid # instruction is valid
81 self.issue_i = Signal(reset_less=True)
82 self.busy_o = Signal(name="corebusy_o", reset_less=True)
83
84 # instruction input
85 self.bigendian_i = self.pdecode2.dec.bigendian
86 self.raw_opcode_i = self.pdecode2.dec.raw_opcode_in
87
88 # start/stop and terminated signalling
89 self.core_stopped_i = Signal(reset_less=True)
90 self.core_reset_i = Signal()
91 self.core_terminate_o = Signal(reset=0) # indicates stopped
92
93 def elaborate(self, platform):
94 m = Module()
95
96 m.submodules.pdecode2 = dec2 = self.pdecode2
97 m.submodules.fus = self.fus
98 m.submodules.l0 = l0 = self.l0
99 self.regs.elaborate_into(m, platform)
100 regs = self.regs
101 fus = self.fus.fus
102
103 # connect up Function Units, then read/write ports
104 fu_bitdict = self.connect_instruction(m)
105 self.connect_rdports(m, fu_bitdict)
106 self.connect_wrports(m, fu_bitdict)
107
108 # connect up reset
109 m.d.comb += ResetSignal().eq(self.core_reset_i)
110
111 return m
112
113 def connect_instruction(self, m):
114 """connect_instruction
115
116 uses decoded (from PowerOp) function unit information from CSV files
117 to ascertain which Function Unit should deal with the current
118 instruction.
119
120 some (such as OP_ATTN, OP_NOP) are dealt with here, including
121 ignoring it and halting the processor. OP_NOP is a bit annoying
122 because the issuer expects busy flag still to be raised then lowered.
123 (this requires a fake counter to be set).
124 """
125 comb, sync = m.d.comb, m.d.sync
126 fus = self.fus.fus
127 dec2 = self.pdecode2
128
129 # enable-signals for each FU, get one bit for each FU (by name)
130 fu_enable = Signal(len(fus), reset_less=True)
131 fu_bitdict = {}
132 for i, funame in enumerate(fus.keys()):
133 fu_bitdict[funame] = fu_enable[i]
134
135 # enable the required Function Unit based on the opcode decode
136 # note: this *only* works correctly for simple core when one and
137 # *only* one FU is allocated per instruction
138 for funame, fu in fus.items():
139 fnunit = fu.fnunit.value
140 enable = Signal(name="en_%s" % funame, reset_less=True)
141 comb += enable.eq((dec2.e.do.fn_unit & fnunit).bool())
142 comb += fu_bitdict[funame].eq(enable)
143
144 # sigh - need a NOP counter
145 counter = Signal(2)
146 with m.If(counter != 0):
147 sync += counter.eq(counter - 1)
148 comb += self.busy_o.eq(1)
149
150 with m.If(self.ivalid_i): # run only when valid
151 with m.Switch(dec2.e.do.insn_type):
152 # check for ATTN: halt if true
153 with m.Case(MicrOp.OP_ATTN):
154 m.d.sync += self.core_terminate_o.eq(1)
155
156 with m.Case(MicrOp.OP_NOP):
157 sync += counter.eq(2)
158 comb += self.busy_o.eq(1)
159
160 with m.Default():
161 # connect up instructions. only one enabled at a time
162 for funame, fu in fus.items():
163 enable = fu_bitdict[funame]
164
165 # run this FunctionUnit if enabled
166 with m.If(enable):
167 # route op, issue, busy, read flags and mask to FU
168 comb += fu.oper_i.eq_from_execute1(dec2.e)
169 comb += fu.issue_i.eq(self.issue_i)
170 comb += self.busy_o.eq(fu.busy_o)
171 rdmask = dec2.rdflags(fu)
172 comb += fu.rdmaskn.eq(~rdmask)
173
174 return fu_bitdict
175
176 def connect_rdport(self, m, fu_bitdict, rdpickers, regfile, regname, fspec):
177 comb, sync = m.d.comb, m.d.sync
178 fus = self.fus.fus
179 regs = self.regs
180
181 rpidx = regname
182
183 # select the required read port. these are pre-defined sizes
184 rfile = regs.rf[regfile.lower()]
185 rport = rfile.r_ports[rpidx]
186 print("read regfile", rpidx, regfile, regs.rf.keys(),
187 rfile, rfile.unary)
188
189 fspecs = fspec
190 if not isinstance(fspecs, list):
191 fspecs = [fspecs]
192
193 rdflags = []
194 pplen = 0
195 reads = []
196 ppoffs = []
197 for i, fspec in enumerate(fspecs):
198 # get the regfile specs for this regfile port
199 (rf, read, write, wid, fuspec) = fspec
200 print ("fpsec", i, fspec, len(fuspec))
201 ppoffs.append(pplen) # record offset for picker
202 pplen += len(fuspec)
203 name = "rdflag_%s_%s_%d" % (regfile, regname, i)
204 rdflag = Signal(name=name, reset_less=True)
205 comb += rdflag.eq(rf)
206 rdflags.append(rdflag)
207 reads.append(read)
208
209 print ("pplen", pplen)
210
211 # create a priority picker to manage this port
212 rdpickers[regfile][rpidx] = rdpick = PriorityPicker(pplen)
213 setattr(m.submodules, "rdpick_%s_%s" % (regfile, rpidx), rdpick)
214
215 rens = []
216 addrs = []
217 for i, fspec in enumerate(fspecs):
218 (rf, read, write, wid, fuspec) = fspec
219 # connect up the FU req/go signals, and the reg-read to the FU
220 # and create a Read Broadcast Bus
221 for pi, (funame, fu, idx) in enumerate(fuspec):
222 pi += ppoffs[i]
223
224 # connect request-read to picker input, and output to go-rd
225 fu_active = fu_bitdict[funame]
226 name = "%s_%s_%s_%i" % (regfile, rpidx, funame, pi)
227 addr_en = Signal.like(reads[i], name="addr_en_"+name)
228 rp = Signal(name="rp_"+name)
229 pick = Signal()
230
231 comb += pick.eq(fu.rd_rel_o[idx] & fu_active & rdflags[i])
232 comb += rdpick.i[pi].eq(pick)
233 sync += fu.go_rd_i[idx].eq(rising_edge(m, rp))
234 # if picked, select read-port "reg select" number to port
235 comb += rp.eq(rdpick.o[pi] & rdpick.en_o)
236 comb += addr_en.eq(Mux(rp, reads[i], 0))
237 if rfile.unary:
238 rens.append(addr_en)
239 else:
240 addrs.append(addr_en)
241 rens.append(rp)
242
243 with m.If(rp):
244 # connect regfile port to input, creating fan-out Bus
245 src = fu.src_i[idx]
246 print("reg connect widths",
247 regfile, regname, pi, funame,
248 src.shape(), rport.data_o.shape())
249 # all FUs connect to same port
250 sync += src.eq(rport.data_o)
251
252 # or-reduce the muxed read signals
253 if rfile.unary:
254 # for unary-addressed
255 comb += rport.ren.eq(ortreereduce_sig(rens))
256 else:
257 # for binary-addressed
258 comb += rport.addr.eq(ortreereduce_sig(addrs))
259 comb += rport.ren.eq(Cat(*rens).bool())
260 print ("binary", regfile, rpidx, rport, rport.ren, rens, addrs)
261
262 def connect_rdports(self, m, fu_bitdict):
263 """connect read ports
264
265 orders the read regspecs into a dict-of-dicts, by regfile, by
266 regport name, then connects all FUs that want that regport by
267 way of a PriorityPicker.
268 """
269 comb, sync = m.d.comb, m.d.sync
270 fus = self.fus.fus
271 regs = self.regs
272
273 # dictionary of lists of regfile read ports
274 byregfiles_rd, byregfiles_rdspec = self.get_byregfiles(True)
275
276 # okaay, now we need a PriorityPicker per regfile per regfile port
277 # loootta pickers... peter piper picked a pack of pickled peppers...
278 rdpickers = {}
279 for regfile, spec in byregfiles_rd.items():
280 fuspecs = byregfiles_rdspec[regfile]
281 rdpickers[regfile] = {}
282
283 # argh. an experiment to merge RA and RB in the INT regfile
284 # (we have too many read/write ports)
285 #if regfile == 'INT':
286 #fuspecs['rabc'] = [fuspecs.pop('rb')]
287 #fuspecs['rabc'].append(fuspecs.pop('rc'))
288 #fuspecs['rabc'].append(fuspecs.pop('ra'))
289 #if regfile == 'FAST':
290 # fuspecs['fast1'] = [fuspecs.pop('fast1')]
291 # if 'fast2' in fuspecs:
292 # fuspecs['fast1'].append(fuspecs.pop('fast2'))
293
294 # for each named regfile port, connect up all FUs to that port
295 for (regname, fspec) in sort_fuspecs(fuspecs):
296 print("connect rd", regname, fspec)
297 self.connect_rdport(m, fu_bitdict, rdpickers, regfile,
298 regname, fspec)
299
300 def connect_wrport(self, m, fu_bitdict, wrpickers, regfile, regname, fspec):
301 comb, sync = m.d.comb, m.d.sync
302 fus = self.fus.fus
303 regs = self.regs
304
305 print("connect wr", regname, fspec)
306 rpidx = regname
307
308 # select the required write port. these are pre-defined sizes
309 print(regfile, regs.rf.keys())
310 rfile = regs.rf[regfile.lower()]
311 wport = rfile.w_ports[rpidx]
312
313 fspecs = fspec
314 if not isinstance(fspecs, list):
315 fspecs = [fspecs]
316
317 pplen = 0
318 writes = []
319 ppoffs = []
320 for i, fspec in enumerate(fspecs):
321 # get the regfile specs for this regfile port
322 (rf, read, write, wid, fuspec) = fspec
323 print ("fpsec", i, fspec, len(fuspec))
324 ppoffs.append(pplen) # record offset for picker
325 pplen += len(fuspec)
326
327 # create a priority picker to manage this port
328 wrpickers[regfile][rpidx] = wrpick = PriorityPicker(pplen)
329 setattr(m.submodules, "wrpick_%s_%s" % (regfile, rpidx), wrpick)
330
331 wsigs = []
332 wens = []
333 addrs = []
334 for i, fspec in enumerate(fspecs):
335 # connect up the FU req/go signals and the reg-read to the FU
336 # these are arbitrated by Data.ok signals
337 (rf, read, write, wid, fuspec) = fspec
338 for pi, (funame, fu, idx) in enumerate(fuspec):
339 pi += ppoffs[i]
340
341 # write-request comes from dest.ok
342 dest = fu.get_out(idx)
343 fu_dest_latch = fu.get_fu_out(idx) # latched output
344 name = "wrflag_%s_%s_%d" % (funame, regname, idx)
345 wrflag = Signal(name=name, reset_less=True)
346 comb += wrflag.eq(dest.ok & fu.busy_o)
347
348 # connect request-write to picker input, and output to go-wr
349 fu_active = fu_bitdict[funame]
350 pick = fu.wr.rel_o[idx] & fu_active # & wrflag
351 comb += wrpick.i[pi].eq(pick)
352 # create a single-pulse go write from the picker output
353 wr_pick = Signal()
354 comb += wr_pick.eq(wrpick.o[pi] & wrpick.en_o)
355 comb += fu.go_wr_i[idx].eq(rising_edge(m, wr_pick))
356
357 # connect the regspec write "reg select" number to this port
358 # only if one FU actually requests (and is granted) the port
359 # will the write-enable be activated
360 addr_en = Signal.like(write)
361 wp = Signal()
362 comb += wp.eq(wr_pick & wrpick.en_o)
363 comb += addr_en.eq(Mux(wp, write, 0))
364 if rfile.unary:
365 wens.append(addr_en)
366 else:
367 addrs.append(addr_en)
368 wens.append(wp)
369
370 # connect regfile port to input
371 print("reg connect widths",
372 regfile, regname, pi, funame,
373 dest.shape(), wport.data_i.shape())
374 wsigs.append(fu_dest_latch)
375
376 # here is where we create the Write Broadcast Bus. simple, eh?
377 comb += wport.data_i.eq(ortreereduce_sig(wsigs))
378 if rfile.unary:
379 # for unary-addressed
380 comb += wport.wen.eq(ortreereduce_sig(wens))
381 else:
382 # for binary-addressed
383 comb += wport.addr.eq(ortreereduce_sig(addrs))
384 comb += wport.wen.eq(ortreereduce_sig(wens))
385
386 def connect_wrports(self, m, fu_bitdict):
387 """connect write ports
388
389 orders the write regspecs into a dict-of-dicts, by regfile,
390 by regport name, then connects all FUs that want that regport
391 by way of a PriorityPicker.
392
393 note that the write-port wen, write-port data, and go_wr_i all need to
394 be on the exact same clock cycle. as there is a combinatorial loop bug
395 at the moment, these all use sync.
396 """
397 comb, sync = m.d.comb, m.d.sync
398 fus = self.fus.fus
399 regs = self.regs
400 # dictionary of lists of regfile write ports
401 byregfiles_wr, byregfiles_wrspec = self.get_byregfiles(False)
402
403 # same for write ports.
404 # BLECH! complex code-duplication! BLECH!
405 wrpickers = {}
406 for regfile, spec in byregfiles_wr.items():
407 fuspecs = byregfiles_wrspec[regfile]
408 wrpickers[regfile] = {}
409
410 # argh, more port-merging
411 if regfile == 'INT':
412 fuspecs['o'] = [fuspecs.pop('o')]
413 fuspecs['o'].append(fuspecs.pop('o1'))
414 if regfile == 'FAST':
415 fuspecs['fast1'] = [fuspecs.pop('fast1')]
416 if 'fast2' in fuspecs:
417 fuspecs['fast1'].append(fuspecs.pop('fast2'))
418
419 for (regname, fspec) in sort_fuspecs(fuspecs):
420 self.connect_wrport(m, fu_bitdict, wrpickers,
421 regfile, regname, fspec)
422
423 def get_byregfiles(self, readmode):
424
425 mode = "read" if readmode else "write"
426 dec2 = self.pdecode2
427 regs = self.regs
428 fus = self.fus.fus
429
430 # dictionary of lists of regfile ports
431 byregfiles = {}
432 byregfiles_spec = {}
433 for (funame, fu) in fus.items():
434 print("%s ports for %s" % (mode, funame))
435 for idx in range(fu.n_src if readmode else fu.n_dst):
436 if readmode:
437 (regfile, regname, wid) = fu.get_in_spec(idx)
438 else:
439 (regfile, regname, wid) = fu.get_out_spec(idx)
440 print(" %d %s %s %s" % (idx, regfile, regname, str(wid)))
441 if readmode:
442 rdflag, read = dec2.regspecmap_read(regfile, regname)
443 write = None
444 else:
445 rdflag, read = None, None
446 wrport, write = dec2.regspecmap_write(regfile, regname)
447 if regfile not in byregfiles:
448 byregfiles[regfile] = {}
449 byregfiles_spec[regfile] = {}
450 if regname not in byregfiles_spec[regfile]:
451 byregfiles_spec[regfile][regname] = \
452 (rdflag, read, write, wid, [])
453 # here we start to create "lanes"
454 if idx not in byregfiles[regfile]:
455 byregfiles[regfile][idx] = []
456 fuspec = (funame, fu, idx)
457 byregfiles[regfile][idx].append(fuspec)
458 byregfiles_spec[regfile][regname][4].append(fuspec)
459
460 # ok just print that out, for convenience
461 for regfile, spec in byregfiles.items():
462 print("regfile %s ports:" % mode, regfile)
463 fuspecs = byregfiles_spec[regfile]
464 for regname, fspec in fuspecs.items():
465 [rdflag, read, write, wid, fuspec] = fspec
466 print(" rf %s port %s lane: %s" % (mode, regfile, regname))
467 print(" %s" % regname, wid, read, write, rdflag)
468 for (funame, fu, idx) in fuspec:
469 fusig = fu.src_i[idx] if readmode else fu.dest[idx]
470 print(" ", funame, fu, idx, fusig)
471 print()
472
473 return byregfiles, byregfiles_spec
474
475 def __iter__(self):
476 yield from self.fus.ports()
477 yield from self.pdecode2.ports()
478 yield from self.l0.ports()
479 # TODO: regs
480
481 def ports(self):
482 return list(self)
483
484
485 if __name__ == '__main__':
486 pspec = TestMemPspec(ldst_ifacetype='testpi',
487 imem_ifacetype='',
488 addr_wid=48,
489 mask_wid=8,
490 reg_wid=64)
491 dut = NonProductionCore(pspec)
492 vl = rtlil.convert(dut, ports=dut.ports())
493 with open("test_core.il", "w") as f:
494 f.write(vl)