rather big change to interaction between regfile and compunits on read
[soc.git] / src / soc / simple / core.py
1 """simple core
2
3 not in any way intended for production use. connects up FunctionUnits to
4 Register Files in a brain-dead fashion that only permits one and only one
5 Function Unit to be operational.
6
7 the principle here is to take the Function Units, analyse their regspecs,
8 and turn their requirements for access to register file read/write ports
9 into groupings by Register File and Register File Port name.
10
11 under each grouping - by regfile/port - a list of Function Units that
12 need to connect to that port is created. as these are a contended
13 resource a "Broadcast Bus" per read/write port is then also created,
14 with access to it managed by a PriorityPicker.
15
16 the brain-dead part of this module is that even though there is no
17 conflict of access, regfile read/write hazards are *not* analysed,
18 and consequently it is safer to wait for the Function Unit to complete
19 before allowing a new instruction to proceed.
20 """
21
22 from nmigen import Elaboratable, Module, Signal, ResetSignal, Cat, Mux
23 from nmigen.cli import rtlil
24
25 from soc.decoder.power_regspec_map import regspec_decode_read
26 from soc.decoder.power_regspec_map import regspec_decode_write
27
28 from nmutil.picker import PriorityPicker
29 from nmutil.util import treereduce
30
31 from soc.fu.compunits.compunits import AllFunctionUnits
32 from soc.regfile.regfiles import RegFiles
33 from soc.decoder.decode2execute1 import Decode2ToExecute1Type
34 from soc.decoder.power_decoder2 import get_rdflags
35 from soc.decoder.decode2execute1 import Data
36 from soc.experiment.l0_cache import TstL0CacheBuffer # test only
37 from soc.config.test.test_loadstore import TestMemPspec
38 from soc.decoder.power_enums import MicrOp
39 import operator
40
41 from nmutil.util import rising_edge
42
43
44 # helper function for reducing a list of signals down to a parallel
45 # ORed single signal.
46 def ortreereduce(tree, attr="data_o"):
47 return treereduce(tree, operator.or_, lambda x: getattr(x, attr))
48
49
50 def ortreereduce_sig(tree):
51 return treereduce(tree, operator.or_, lambda x: x)
52
53
54 # helper function to place full regs declarations first
55 def sort_fuspecs(fuspecs):
56 res = []
57 for (regname, fspec) in fuspecs.items():
58 if regname.startswith("full"):
59 res.append((regname, fspec))
60 for (regname, fspec) in fuspecs.items():
61 if not regname.startswith("full"):
62 res.append((regname, fspec))
63 return res # enumerate(res)
64
65
66 class NonProductionCore(Elaboratable):
67 def __init__(self, pspec):
68 # single LD/ST funnel for memory access
69 self.l0 = TstL0CacheBuffer(pspec, n_units=1)
70 pi = self.l0.l0.dports[0]
71
72 # function units (only one each)
73 self.fus = AllFunctionUnits(pspec, pilist=[pi])
74
75 # register files (yes plural)
76 self.regs = RegFiles()
77
78 # instruction decoder
79 self.e = Decode2ToExecute1Type() # decoded instruction
80
81 # issue/valid/busy signalling
82 self.ivalid_i = Signal(reset_less=True) # instruction is valid
83 self.issue_i = Signal(reset_less=True)
84 self.busy_o = Signal(name="corebusy_o", reset_less=True)
85
86 # start/stop and terminated signalling
87 self.core_stopped_i = Signal(reset_less=True)
88 self.core_reset_i = Signal()
89 self.core_terminate_o = Signal(reset=0) # indicates stopped
90
91 def elaborate(self, platform):
92 m = Module()
93
94 m.submodules.fus = self.fus
95 m.submodules.l0 = l0 = self.l0
96 self.regs.elaborate_into(m, platform)
97 regs = self.regs
98 fus = self.fus.fus
99
100 # connect up Function Units, then read/write ports
101 fu_bitdict = self.connect_instruction(m)
102 self.connect_rdports(m, fu_bitdict)
103 self.connect_wrports(m, fu_bitdict)
104
105 # connect up reset
106 m.d.comb += ResetSignal().eq(self.core_reset_i)
107
108 return m
109
110 def connect_instruction(self, m):
111 """connect_instruction
112
113 uses decoded (from PowerOp) function unit information from CSV files
114 to ascertain which Function Unit should deal with the current
115 instruction.
116
117 some (such as OP_ATTN, OP_NOP) are dealt with here, including
118 ignoring it and halting the processor. OP_NOP is a bit annoying
119 because the issuer expects busy flag still to be raised then lowered.
120 (this requires a fake counter to be set).
121 """
122 comb, sync = m.d.comb, m.d.sync
123 fus = self.fus.fus
124 e = self.e # to execute
125
126 # enable-signals for each FU, get one bit for each FU (by name)
127 fu_enable = Signal(len(fus), reset_less=True)
128 fu_bitdict = {}
129 for i, funame in enumerate(fus.keys()):
130 fu_bitdict[funame] = fu_enable[i]
131
132 # enable the required Function Unit based on the opcode decode
133 # note: this *only* works correctly for simple core when one and
134 # *only* one FU is allocated per instruction
135 for funame, fu in fus.items():
136 fnunit = fu.fnunit.value
137 enable = Signal(name="en_%s" % funame, reset_less=True)
138 comb += enable.eq((e.do.fn_unit & fnunit).bool())
139 comb += fu_bitdict[funame].eq(enable)
140
141 # sigh - need a NOP counter
142 counter = Signal(2)
143 with m.If(counter != 0):
144 sync += counter.eq(counter - 1)
145 comb += self.busy_o.eq(1)
146
147 with m.If(self.ivalid_i): # run only when valid
148 with m.Switch(e.do.insn_type):
149 # check for ATTN: halt if true
150 with m.Case(MicrOp.OP_ATTN):
151 m.d.sync += self.core_terminate_o.eq(1)
152
153 with m.Case(MicrOp.OP_NOP):
154 sync += counter.eq(2)
155 comb += self.busy_o.eq(1)
156
157 with m.Default():
158 # connect up instructions. only one enabled at a time
159 for funame, fu in fus.items():
160 enable = fu_bitdict[funame]
161
162 # run this FunctionUnit if enabled
163 with m.If(enable):
164 # route op, issue, busy, read flags and mask to FU
165 comb += fu.oper_i.eq_from_execute1(e)
166 comb += fu.issue_i.eq(self.issue_i)
167 comb += self.busy_o.eq(fu.busy_o)
168 rdmask = get_rdflags(e, fu)
169 comb += fu.rdmaskn.eq(~rdmask)
170
171 return fu_bitdict
172
173 def connect_rdport(self, m, fu_bitdict, rdpickers, regfile, regname, fspec):
174 comb, sync = m.d.comb, m.d.sync
175 fus = self.fus.fus
176 regs = self.regs
177
178 rpidx = regname
179
180 # select the required read port. these are pre-defined sizes
181 rfile = regs.rf[regfile.lower()]
182 rport = rfile.r_ports[rpidx]
183 print("read regfile", rpidx, regfile, regs.rf.keys(),
184 rfile, rfile.unary)
185
186 fspecs = fspec
187 if not isinstance(fspecs, list):
188 fspecs = [fspecs]
189
190 rdflags = []
191 pplen = 0
192 reads = []
193 ppoffs = []
194 for i, fspec in enumerate(fspecs):
195 # get the regfile specs for this regfile port
196 (rf, read, write, wid, fuspec) = fspec
197 print ("fpsec", i, fspec, len(fuspec))
198 ppoffs.append(pplen) # record offset for picker
199 pplen += len(fuspec)
200 name = "rdflag_%s_%s_%d" % (regfile, regname, i)
201 rdflag = Signal(name=name, reset_less=True)
202 comb += rdflag.eq(rf)
203 rdflags.append(rdflag)
204 reads.append(read)
205
206 print ("pplen", pplen)
207
208 # create a priority picker to manage this port
209 rdpickers[regfile][rpidx] = rdpick = PriorityPicker(pplen)
210 setattr(m.submodules, "rdpick_%s_%s" % (regfile, rpidx), rdpick)
211
212 rens = []
213 addrs = []
214 for i, fspec in enumerate(fspecs):
215 (rf, read, write, wid, fuspec) = fspec
216 # connect up the FU req/go signals, and the reg-read to the FU
217 # and create a Read Broadcast Bus
218 for pi, (funame, fu, idx) in enumerate(fuspec):
219 pi += ppoffs[i]
220
221 # connect request-read to picker input, and output to go-rd
222 fu_active = fu_bitdict[funame]
223 name = "%s_%s_%s_%i" % (regfile, rpidx, funame, pi)
224 addr_en = Signal.like(reads[i], name="addr_en_"+name)
225 pick = Signal(name="pick_"+name) # picker input
226 rp = Signal(name="rp_"+name) # picker output
227 delay_pick = Signal(name="dp_"+name) # read-enable "underway"
228
229 # exclude any currently-enabled read-request (mask out active)
230 comb += pick.eq(fu.rd_rel_o[idx] & fu_active & rdflags[i] &
231 ~delay_pick)
232 comb += rdpick.i[pi].eq(pick)
233 comb += fu.go_rd_i[idx].eq(delay_pick) # pass in *delayed* pick
234
235 # if picked, select read-port "reg select" number to port
236 comb += rp.eq(rdpick.o[pi] & rdpick.en_o)
237 sync += delay_pick.eq(rp) # delayed "pick"
238 comb += addr_en.eq(Mux(rp, reads[i], 0))
239
240 # the read-enable happens combinatorially (see mux-bus below)
241 # but it results in the data coming out on a one-cycle delay.
242 if rfile.unary:
243 rens.append(addr_en)
244 else:
245 addrs.append(addr_en)
246 rens.append(rp)
247
248 # use the *delayed* pick signal to put requested data onto bus
249 with m.If(delay_pick):
250 # connect regfile port to input, creating fan-out Bus
251 src = fu.src_i[idx]
252 print("reg connect widths",
253 regfile, regname, pi, funame,
254 src.shape(), rport.data_o.shape())
255 # all FUs connect to same port
256 comb += src.eq(rport.data_o)
257
258 # or-reduce the muxed read signals
259 if rfile.unary:
260 # for unary-addressed
261 comb += rport.ren.eq(ortreereduce_sig(rens))
262 else:
263 # for binary-addressed
264 comb += rport.addr.eq(ortreereduce_sig(addrs))
265 comb += rport.ren.eq(Cat(*rens).bool())
266 print ("binary", regfile, rpidx, rport, rport.ren, rens, addrs)
267
268 def connect_rdports(self, m, fu_bitdict):
269 """connect read ports
270
271 orders the read regspecs into a dict-of-dicts, by regfile, by
272 regport name, then connects all FUs that want that regport by
273 way of a PriorityPicker.
274 """
275 comb, sync = m.d.comb, m.d.sync
276 fus = self.fus.fus
277 regs = self.regs
278
279 # dictionary of lists of regfile read ports
280 byregfiles_rd, byregfiles_rdspec = self.get_byregfiles(True)
281
282 # okaay, now we need a PriorityPicker per regfile per regfile port
283 # loootta pickers... peter piper picked a pack of pickled peppers...
284 rdpickers = {}
285 for regfile, spec in byregfiles_rd.items():
286 fuspecs = byregfiles_rdspec[regfile]
287 rdpickers[regfile] = {}
288
289 # argh. an experiment to merge RA and RB in the INT regfile
290 # (we have too many read/write ports)
291 #if regfile == 'INT':
292 #fuspecs['rabc'] = [fuspecs.pop('rb')]
293 #fuspecs['rabc'].append(fuspecs.pop('rc'))
294 #fuspecs['rabc'].append(fuspecs.pop('ra'))
295 #if regfile == 'FAST':
296 # fuspecs['fast1'] = [fuspecs.pop('fast1')]
297 # if 'fast2' in fuspecs:
298 # fuspecs['fast1'].append(fuspecs.pop('fast2'))
299
300 # for each named regfile port, connect up all FUs to that port
301 for (regname, fspec) in sort_fuspecs(fuspecs):
302 print("connect rd", regname, fspec)
303 self.connect_rdport(m, fu_bitdict, rdpickers, regfile,
304 regname, fspec)
305
306 def connect_wrport(self, m, fu_bitdict, wrpickers, regfile, regname, fspec):
307 comb, sync = m.d.comb, m.d.sync
308 fus = self.fus.fus
309 regs = self.regs
310
311 print("connect wr", regname, fspec)
312 rpidx = regname
313
314 # select the required write port. these are pre-defined sizes
315 print(regfile, regs.rf.keys())
316 rfile = regs.rf[regfile.lower()]
317 wport = rfile.w_ports[rpidx]
318
319 fspecs = fspec
320 if not isinstance(fspecs, list):
321 fspecs = [fspecs]
322
323 pplen = 0
324 writes = []
325 ppoffs = []
326 for i, fspec in enumerate(fspecs):
327 # get the regfile specs for this regfile port
328 (rf, read, write, wid, fuspec) = fspec
329 print ("fpsec", i, fspec, len(fuspec))
330 ppoffs.append(pplen) # record offset for picker
331 pplen += len(fuspec)
332
333 # create a priority picker to manage this port
334 wrpickers[regfile][rpidx] = wrpick = PriorityPicker(pplen)
335 setattr(m.submodules, "wrpick_%s_%s" % (regfile, rpidx), wrpick)
336
337 wsigs = []
338 wens = []
339 addrs = []
340 for i, fspec in enumerate(fspecs):
341 # connect up the FU req/go signals and the reg-read to the FU
342 # these are arbitrated by Data.ok signals
343 (rf, read, write, wid, fuspec) = fspec
344 for pi, (funame, fu, idx) in enumerate(fuspec):
345 pi += ppoffs[i]
346
347 # write-request comes from dest.ok
348 dest = fu.get_out(idx)
349 fu_dest_latch = fu.get_fu_out(idx) # latched output
350 name = "wrflag_%s_%s_%d" % (funame, regname, idx)
351 wrflag = Signal(name=name, reset_less=True)
352 comb += wrflag.eq(dest.ok & fu.busy_o)
353
354 # connect request-write to picker input, and output to go-wr
355 fu_active = fu_bitdict[funame]
356 pick = fu.wr.rel_o[idx] & fu_active # & wrflag
357 comb += wrpick.i[pi].eq(pick)
358 # create a single-pulse go write from the picker output
359 wr_pick = Signal()
360 comb += wr_pick.eq(wrpick.o[pi] & wrpick.en_o)
361 comb += fu.go_wr_i[idx].eq(rising_edge(m, wr_pick))
362
363 # connect the regspec write "reg select" number to this port
364 # only if one FU actually requests (and is granted) the port
365 # will the write-enable be activated
366 addr_en = Signal.like(write)
367 wp = Signal()
368 comb += wp.eq(wr_pick & wrpick.en_o)
369 comb += addr_en.eq(Mux(wp, write, 0))
370 if rfile.unary:
371 wens.append(addr_en)
372 else:
373 addrs.append(addr_en)
374 wens.append(wp)
375
376 # connect regfile port to input
377 print("reg connect widths",
378 regfile, regname, pi, funame,
379 dest.shape(), wport.data_i.shape())
380 wsigs.append(fu_dest_latch)
381
382 # here is where we create the Write Broadcast Bus. simple, eh?
383 comb += wport.data_i.eq(ortreereduce_sig(wsigs))
384 if rfile.unary:
385 # for unary-addressed
386 comb += wport.wen.eq(ortreereduce_sig(wens))
387 else:
388 # for binary-addressed
389 comb += wport.addr.eq(ortreereduce_sig(addrs))
390 comb += wport.wen.eq(ortreereduce_sig(wens))
391
392 def connect_wrports(self, m, fu_bitdict):
393 """connect write ports
394
395 orders the write regspecs into a dict-of-dicts, by regfile,
396 by regport name, then connects all FUs that want that regport
397 by way of a PriorityPicker.
398
399 note that the write-port wen, write-port data, and go_wr_i all need to
400 be on the exact same clock cycle. as there is a combinatorial loop bug
401 at the moment, these all use sync.
402 """
403 comb, sync = m.d.comb, m.d.sync
404 fus = self.fus.fus
405 regs = self.regs
406 # dictionary of lists of regfile write ports
407 byregfiles_wr, byregfiles_wrspec = self.get_byregfiles(False)
408
409 # same for write ports.
410 # BLECH! complex code-duplication! BLECH!
411 wrpickers = {}
412 for regfile, spec in byregfiles_wr.items():
413 fuspecs = byregfiles_wrspec[regfile]
414 wrpickers[regfile] = {}
415
416 # argh, more port-merging
417 if regfile == 'INT':
418 fuspecs['o'] = [fuspecs.pop('o')]
419 fuspecs['o'].append(fuspecs.pop('o1'))
420 if regfile == 'FAST':
421 fuspecs['fast1'] = [fuspecs.pop('fast1')]
422 if 'fast2' in fuspecs:
423 fuspecs['fast1'].append(fuspecs.pop('fast2'))
424
425 for (regname, fspec) in sort_fuspecs(fuspecs):
426 self.connect_wrport(m, fu_bitdict, wrpickers,
427 regfile, regname, fspec)
428
429 def get_byregfiles(self, readmode):
430
431 mode = "read" if readmode else "write"
432 regs = self.regs
433 fus = self.fus.fus
434 e = self.e # decoded instruction to execute
435
436 # dictionary of lists of regfile ports
437 byregfiles = {}
438 byregfiles_spec = {}
439 for (funame, fu) in fus.items():
440 print("%s ports for %s" % (mode, funame))
441 for idx in range(fu.n_src if readmode else fu.n_dst):
442 if readmode:
443 (regfile, regname, wid) = fu.get_in_spec(idx)
444 else:
445 (regfile, regname, wid) = fu.get_out_spec(idx)
446 print(" %d %s %s %s" % (idx, regfile, regname, str(wid)))
447 if readmode:
448 rdflag, read = regspec_decode_read(e, regfile, regname)
449 write = None
450 else:
451 rdflag, read = None, None
452 wrport, write = regspec_decode_write(e, regfile, regname)
453 if regfile not in byregfiles:
454 byregfiles[regfile] = {}
455 byregfiles_spec[regfile] = {}
456 if regname not in byregfiles_spec[regfile]:
457 byregfiles_spec[regfile][regname] = \
458 (rdflag, read, write, wid, [])
459 # here we start to create "lanes"
460 if idx not in byregfiles[regfile]:
461 byregfiles[regfile][idx] = []
462 fuspec = (funame, fu, idx)
463 byregfiles[regfile][idx].append(fuspec)
464 byregfiles_spec[regfile][regname][4].append(fuspec)
465
466 # ok just print that out, for convenience
467 for regfile, spec in byregfiles.items():
468 print("regfile %s ports:" % mode, regfile)
469 fuspecs = byregfiles_spec[regfile]
470 for regname, fspec in fuspecs.items():
471 [rdflag, read, write, wid, fuspec] = fspec
472 print(" rf %s port %s lane: %s" % (mode, regfile, regname))
473 print(" %s" % regname, wid, read, write, rdflag)
474 for (funame, fu, idx) in fuspec:
475 fusig = fu.src_i[idx] if readmode else fu.dest[idx]
476 print(" ", funame, fu, idx, fusig)
477 print()
478
479 return byregfiles, byregfiles_spec
480
481 def __iter__(self):
482 yield from self.fus.ports()
483 yield from self.e.ports()
484 yield from self.l0.ports()
485 # TODO: regs
486
487 def ports(self):
488 return list(self)
489
490
491 if __name__ == '__main__':
492 pspec = TestMemPspec(ldst_ifacetype='testpi',
493 imem_ifacetype='',
494 addr_wid=48,
495 mask_wid=8,
496 reg_wid=64)
497 dut = NonProductionCore(pspec)
498 vl = rtlil.convert(dut, ports=dut.ports())
499 with open("test_core.il", "w") as f:
500 f.write(vl)