pass in CoreState to PowerDecoder rather than eq a copy of it
[soc.git] / src / soc / simple / core.py
1 """simple core
2
3 not in any way intended for production use. connects up FunctionUnits to
4 Register Files in a brain-dead fashion that only permits one and only one
5 Function Unit to be operational.
6
7 the principle here is to take the Function Units, analyse their regspecs,
8 and turn their requirements for access to register file read/write ports
9 into groupings by Register File and Register File Port name.
10
11 under each grouping - by regfile/port - a list of Function Units that
12 need to connect to that port is created. as these are a contended
13 resource a "Broadcast Bus" per read/write port is then also created,
14 with access to it managed by a PriorityPicker.
15
16 the brain-dead part of this module is that even though there is no
17 conflict of access, regfile read/write hazards are *not* analysed,
18 and consequently it is safer to wait for the Function Unit to complete
19 before allowing a new instruction to proceed.
20 """
21
22 from nmigen import Elaboratable, Module, Signal, ResetSignal, Cat, Mux
23 from nmigen.cli import rtlil
24
25 from soc.decoder.power_decoder2 import PowerDecodeSubset
26 from soc.decoder.power_regspec_map import regspec_decode_read
27 from soc.decoder.power_regspec_map import regspec_decode_write
28
29 from nmutil.picker import PriorityPicker
30 from nmutil.util import treereduce
31
32 from soc.fu.compunits.compunits import AllFunctionUnits
33 from soc.regfile.regfiles import RegFiles
34 from soc.decoder.decode2execute1 import Decode2ToExecute1Type
35 from soc.decoder.power_decoder2 import get_rdflags
36 from soc.decoder.decode2execute1 import Data
37 from soc.experiment.l0_cache import TstL0CacheBuffer # test only
38 from soc.config.test.test_loadstore import TestMemPspec
39 from soc.decoder.power_enums import MicrOp
40 from soc.config.state import CoreState
41
42 import operator
43
44 from nmutil.util import rising_edge
45
46
47 # helper function for reducing a list of signals down to a parallel
48 # ORed single signal.
49 def ortreereduce(tree, attr="data_o"):
50 return treereduce(tree, operator.or_, lambda x: getattr(x, attr))
51
52
53 def ortreereduce_sig(tree):
54 return treereduce(tree, operator.or_, lambda x: x)
55
56
57 # helper function to place full regs declarations first
58 def sort_fuspecs(fuspecs):
59 res = []
60 for (regname, fspec) in fuspecs.items():
61 if regname.startswith("full"):
62 res.append((regname, fspec))
63 for (regname, fspec) in fuspecs.items():
64 if not regname.startswith("full"):
65 res.append((regname, fspec))
66 return res # enumerate(res)
67
68
69 class NonProductionCore(Elaboratable):
70 def __init__(self, pspec):
71
72 # single LD/ST funnel for memory access
73 self.l0 = TstL0CacheBuffer(pspec, n_units=1)
74 pi = self.l0.l0.dports[0]
75
76 # function units (only one each)
77 self.fus = AllFunctionUnits(pspec, pilist=[pi])
78
79 # register files (yes plural)
80 self.regs = RegFiles()
81
82 # instruction decoder
83 self.e = Decode2ToExecute1Type() # decoded instruction
84 self.state = CoreState("core")
85 self.raw_insn_i = Signal(32) # raw instruction
86 self.bigendian_i = Signal() # bigendian
87
88 # issue/valid/busy signalling
89 self.ivalid_i = Signal(reset_less=True) # instruction is valid
90 self.issue_i = Signal(reset_less=True)
91 self.busy_o = Signal(name="corebusy_o", reset_less=True)
92
93 # start/stop and terminated signalling
94 self.core_stopped_i = Signal(reset_less=True)
95 self.core_reset_i = Signal()
96 self.core_terminate_o = Signal(reset=0) # indicates stopped
97
98 # create per-FU instruction decoders (subsetted)
99 self.decoders = {}
100 self.ees = {}
101
102 for funame, fu in self.fus.fus.items():
103 f_name = fu.fnunit.name
104 fnunit = fu.fnunit.value
105 opkls = fu.opsubsetkls
106 if f_name == 'TRAP':
107 self.trapunit = funame
108 continue
109 self.decoders[funame] = PowerDecodeSubset(None, opkls, f_name,
110 final=True,
111 state=self.state)
112 self.ees[funame] = self.decoders[funame].e
113
114 def elaborate(self, platform):
115 m = Module()
116 comb = m.d.comb
117
118 m.submodules.fus = self.fus
119 m.submodules.l0 = l0 = self.l0
120 self.regs.elaborate_into(m, platform)
121 regs = self.regs
122 fus = self.fus.fus
123
124 # connect decoders
125 for k, v in self.decoders.items():
126 setattr(m.submodules, "dec_%s" % v.fn_name, v)
127 comb += v.dec.raw_opcode_in.eq(self.raw_insn_i)
128 comb += v.dec.bigendian.eq(self.bigendian_i)
129
130 # ssh, cheat: trap uses the main decoder because of the rewriting
131 self.ees[self.trapunit] = self.e
132
133 # connect up Function Units, then read/write ports
134 fu_bitdict = self.connect_instruction(m)
135 self.connect_rdports(m, fu_bitdict)
136 self.connect_wrports(m, fu_bitdict)
137
138 # connect up reset
139 m.d.comb += ResetSignal().eq(self.core_reset_i)
140
141 return m
142
143 def connect_instruction(self, m):
144 """connect_instruction
145
146 uses decoded (from PowerOp) function unit information from CSV files
147 to ascertain which Function Unit should deal with the current
148 instruction.
149
150 some (such as OP_ATTN, OP_NOP) are dealt with here, including
151 ignoring it and halting the processor. OP_NOP is a bit annoying
152 because the issuer expects busy flag still to be raised then lowered.
153 (this requires a fake counter to be set).
154 """
155 comb, sync = m.d.comb, m.d.sync
156 fus = self.fus.fus
157
158 # enable-signals for each FU, get one bit for each FU (by name)
159 fu_enable = Signal(len(fus), reset_less=True)
160 fu_bitdict = {}
161 for i, funame in enumerate(fus.keys()):
162 fu_bitdict[funame] = fu_enable[i]
163
164 # enable the required Function Unit based on the opcode decode
165 # note: this *only* works correctly for simple core when one and
166 # *only* one FU is allocated per instruction
167 for funame, fu in fus.items():
168 fnunit = fu.fnunit.value
169 enable = Signal(name="en_%s" % funame, reset_less=True)
170 comb += enable.eq((self.e.do.fn_unit & fnunit).bool())
171 comb += fu_bitdict[funame].eq(enable)
172
173 # sigh - need a NOP counter
174 counter = Signal(2)
175 with m.If(counter != 0):
176 sync += counter.eq(counter - 1)
177 comb += self.busy_o.eq(1)
178
179 with m.If(self.ivalid_i): # run only when valid
180 with m.Switch(self.e.do.insn_type):
181 # check for ATTN: halt if true
182 with m.Case(MicrOp.OP_ATTN):
183 m.d.sync += self.core_terminate_o.eq(1)
184
185 with m.Case(MicrOp.OP_NOP):
186 sync += counter.eq(2)
187 comb += self.busy_o.eq(1)
188
189 with m.Default():
190 # connect up instructions. only one enabled at a time
191 for funame, fu in fus.items():
192 e = self.ees[funame]
193 enable = fu_bitdict[funame]
194
195 # run this FunctionUnit if enabled
196 # route op, issue, busy, read flags and mask to FU
197 with m.If(enable):
198 # operand comes from the *local* decoder
199 comb += fu.oper_i.eq_from(e.do)
200 #comb += fu.oper_i.eq_from_execute1(e)
201 comb += fu.issue_i.eq(self.issue_i)
202 comb += self.busy_o.eq(fu.busy_o)
203 # rdmask, which is for registers, needs to come
204 # from the *main* decoder
205 rdmask = get_rdflags(self.e, fu)
206 comb += fu.rdmaskn.eq(~rdmask)
207
208 return fu_bitdict
209
210 def connect_rdport(self, m, fu_bitdict, rdpickers, regfile, regname, fspec):
211 comb, sync = m.d.comb, m.d.sync
212 fus = self.fus.fus
213 regs = self.regs
214
215 rpidx = regname
216
217 # select the required read port. these are pre-defined sizes
218 rfile = regs.rf[regfile.lower()]
219 rport = rfile.r_ports[rpidx]
220 print("read regfile", rpidx, regfile, regs.rf.keys(),
221 rfile, rfile.unary)
222
223 fspecs = fspec
224 if not isinstance(fspecs, list):
225 fspecs = [fspecs]
226
227 rdflags = []
228 pplen = 0
229 reads = []
230 ppoffs = []
231 for i, fspec in enumerate(fspecs):
232 # get the regfile specs for this regfile port
233 (rf, read, write, wid, fuspec) = fspec
234 print ("fpsec", i, fspec, len(fuspec))
235 ppoffs.append(pplen) # record offset for picker
236 pplen += len(fuspec)
237 name = "rdflag_%s_%s_%d" % (regfile, regname, i)
238 rdflag = Signal(name=name, reset_less=True)
239 comb += rdflag.eq(rf)
240 rdflags.append(rdflag)
241 reads.append(read)
242
243 print ("pplen", pplen)
244
245 # create a priority picker to manage this port
246 rdpickers[regfile][rpidx] = rdpick = PriorityPicker(pplen)
247 setattr(m.submodules, "rdpick_%s_%s" % (regfile, rpidx), rdpick)
248
249 rens = []
250 addrs = []
251 for i, fspec in enumerate(fspecs):
252 (rf, read, write, wid, fuspec) = fspec
253 # connect up the FU req/go signals, and the reg-read to the FU
254 # and create a Read Broadcast Bus
255 for pi, (funame, fu, idx) in enumerate(fuspec):
256 pi += ppoffs[i]
257
258 # connect request-read to picker input, and output to go-rd
259 fu_active = fu_bitdict[funame]
260 name = "%s_%s_%s_%i" % (regfile, rpidx, funame, pi)
261 addr_en = Signal.like(reads[i], name="addr_en_"+name)
262 pick = Signal(name="pick_"+name) # picker input
263 rp = Signal(name="rp_"+name) # picker output
264 delay_pick = Signal(name="dp_"+name) # read-enable "underway"
265
266 # exclude any currently-enabled read-request (mask out active)
267 comb += pick.eq(fu.rd_rel_o[idx] & fu_active & rdflags[i] &
268 ~delay_pick)
269 comb += rdpick.i[pi].eq(pick)
270 comb += fu.go_rd_i[idx].eq(delay_pick) # pass in *delayed* pick
271
272 # if picked, select read-port "reg select" number to port
273 comb += rp.eq(rdpick.o[pi] & rdpick.en_o)
274 sync += delay_pick.eq(rp) # delayed "pick"
275 comb += addr_en.eq(Mux(rp, reads[i], 0))
276
277 # the read-enable happens combinatorially (see mux-bus below)
278 # but it results in the data coming out on a one-cycle delay.
279 if rfile.unary:
280 rens.append(addr_en)
281 else:
282 addrs.append(addr_en)
283 rens.append(rp)
284
285 # use the *delayed* pick signal to put requested data onto bus
286 with m.If(delay_pick):
287 # connect regfile port to input, creating fan-out Bus
288 src = fu.src_i[idx]
289 print("reg connect widths",
290 regfile, regname, pi, funame,
291 src.shape(), rport.data_o.shape())
292 # all FUs connect to same port
293 comb += src.eq(rport.data_o)
294
295 # or-reduce the muxed read signals
296 if rfile.unary:
297 # for unary-addressed
298 comb += rport.ren.eq(ortreereduce_sig(rens))
299 else:
300 # for binary-addressed
301 comb += rport.addr.eq(ortreereduce_sig(addrs))
302 comb += rport.ren.eq(Cat(*rens).bool())
303 print ("binary", regfile, rpidx, rport, rport.ren, rens, addrs)
304
305 def connect_rdports(self, m, fu_bitdict):
306 """connect read ports
307
308 orders the read regspecs into a dict-of-dicts, by regfile, by
309 regport name, then connects all FUs that want that regport by
310 way of a PriorityPicker.
311 """
312 comb, sync = m.d.comb, m.d.sync
313 fus = self.fus.fus
314 regs = self.regs
315
316 # dictionary of lists of regfile read ports
317 byregfiles_rd, byregfiles_rdspec = self.get_byregfiles(True)
318
319 # okaay, now we need a PriorityPicker per regfile per regfile port
320 # loootta pickers... peter piper picked a pack of pickled peppers...
321 rdpickers = {}
322 for regfile, spec in byregfiles_rd.items():
323 fuspecs = byregfiles_rdspec[regfile]
324 rdpickers[regfile] = {}
325
326 # argh. an experiment to merge RA and RB in the INT regfile
327 # (we have too many read/write ports)
328 #if regfile == 'INT':
329 #fuspecs['rabc'] = [fuspecs.pop('rb')]
330 #fuspecs['rabc'].append(fuspecs.pop('rc'))
331 #fuspecs['rabc'].append(fuspecs.pop('ra'))
332 #if regfile == 'FAST':
333 # fuspecs['fast1'] = [fuspecs.pop('fast1')]
334 # if 'fast2' in fuspecs:
335 # fuspecs['fast1'].append(fuspecs.pop('fast2'))
336
337 # for each named regfile port, connect up all FUs to that port
338 for (regname, fspec) in sort_fuspecs(fuspecs):
339 print("connect rd", regname, fspec)
340 self.connect_rdport(m, fu_bitdict, rdpickers, regfile,
341 regname, fspec)
342
343 def connect_wrport(self, m, fu_bitdict, wrpickers, regfile, regname, fspec):
344 comb, sync = m.d.comb, m.d.sync
345 fus = self.fus.fus
346 regs = self.regs
347
348 print("connect wr", regname, fspec)
349 rpidx = regname
350
351 # select the required write port. these are pre-defined sizes
352 print(regfile, regs.rf.keys())
353 rfile = regs.rf[regfile.lower()]
354 wport = rfile.w_ports[rpidx]
355
356 fspecs = fspec
357 if not isinstance(fspecs, list):
358 fspecs = [fspecs]
359
360 pplen = 0
361 writes = []
362 ppoffs = []
363 for i, fspec in enumerate(fspecs):
364 # get the regfile specs for this regfile port
365 (rf, read, write, wid, fuspec) = fspec
366 print ("fpsec", i, fspec, len(fuspec))
367 ppoffs.append(pplen) # record offset for picker
368 pplen += len(fuspec)
369
370 # create a priority picker to manage this port
371 wrpickers[regfile][rpidx] = wrpick = PriorityPicker(pplen)
372 setattr(m.submodules, "wrpick_%s_%s" % (regfile, rpidx), wrpick)
373
374 wsigs = []
375 wens = []
376 addrs = []
377 for i, fspec in enumerate(fspecs):
378 # connect up the FU req/go signals and the reg-read to the FU
379 # these are arbitrated by Data.ok signals
380 (rf, read, write, wid, fuspec) = fspec
381 for pi, (funame, fu, idx) in enumerate(fuspec):
382 pi += ppoffs[i]
383
384 # write-request comes from dest.ok
385 dest = fu.get_out(idx)
386 fu_dest_latch = fu.get_fu_out(idx) # latched output
387 name = "wrflag_%s_%s_%d" % (funame, regname, idx)
388 wrflag = Signal(name=name, reset_less=True)
389 comb += wrflag.eq(dest.ok & fu.busy_o)
390
391 # connect request-write to picker input, and output to go-wr
392 fu_active = fu_bitdict[funame]
393 pick = fu.wr.rel_o[idx] & fu_active # & wrflag
394 comb += wrpick.i[pi].eq(pick)
395 # create a single-pulse go write from the picker output
396 wr_pick = Signal()
397 comb += wr_pick.eq(wrpick.o[pi] & wrpick.en_o)
398 comb += fu.go_wr_i[idx].eq(rising_edge(m, wr_pick))
399
400 # connect the regspec write "reg select" number to this port
401 # only if one FU actually requests (and is granted) the port
402 # will the write-enable be activated
403 addr_en = Signal.like(write)
404 wp = Signal()
405 comb += wp.eq(wr_pick & wrpick.en_o)
406 comb += addr_en.eq(Mux(wp, write, 0))
407 if rfile.unary:
408 wens.append(addr_en)
409 else:
410 addrs.append(addr_en)
411 wens.append(wp)
412
413 # connect regfile port to input
414 print("reg connect widths",
415 regfile, regname, pi, funame,
416 dest.shape(), wport.data_i.shape())
417 wsigs.append(fu_dest_latch)
418
419 # here is where we create the Write Broadcast Bus. simple, eh?
420 comb += wport.data_i.eq(ortreereduce_sig(wsigs))
421 if rfile.unary:
422 # for unary-addressed
423 comb += wport.wen.eq(ortreereduce_sig(wens))
424 else:
425 # for binary-addressed
426 comb += wport.addr.eq(ortreereduce_sig(addrs))
427 comb += wport.wen.eq(ortreereduce_sig(wens))
428
429 def connect_wrports(self, m, fu_bitdict):
430 """connect write ports
431
432 orders the write regspecs into a dict-of-dicts, by regfile,
433 by regport name, then connects all FUs that want that regport
434 by way of a PriorityPicker.
435
436 note that the write-port wen, write-port data, and go_wr_i all need to
437 be on the exact same clock cycle. as there is a combinatorial loop bug
438 at the moment, these all use sync.
439 """
440 comb, sync = m.d.comb, m.d.sync
441 fus = self.fus.fus
442 regs = self.regs
443 # dictionary of lists of regfile write ports
444 byregfiles_wr, byregfiles_wrspec = self.get_byregfiles(False)
445
446 # same for write ports.
447 # BLECH! complex code-duplication! BLECH!
448 wrpickers = {}
449 for regfile, spec in byregfiles_wr.items():
450 fuspecs = byregfiles_wrspec[regfile]
451 wrpickers[regfile] = {}
452
453 # argh, more port-merging
454 if regfile == 'INT':
455 fuspecs['o'] = [fuspecs.pop('o')]
456 fuspecs['o'].append(fuspecs.pop('o1'))
457 if regfile == 'FAST':
458 fuspecs['fast1'] = [fuspecs.pop('fast1')]
459 if 'fast2' in fuspecs:
460 fuspecs['fast1'].append(fuspecs.pop('fast2'))
461
462 for (regname, fspec) in sort_fuspecs(fuspecs):
463 self.connect_wrport(m, fu_bitdict, wrpickers,
464 regfile, regname, fspec)
465
466 def get_byregfiles(self, readmode):
467
468 mode = "read" if readmode else "write"
469 regs = self.regs
470 fus = self.fus.fus
471 e = self.e # decoded instruction to execute
472
473 # dictionary of lists of regfile ports
474 byregfiles = {}
475 byregfiles_spec = {}
476 for (funame, fu) in fus.items():
477 print("%s ports for %s" % (mode, funame))
478 for idx in range(fu.n_src if readmode else fu.n_dst):
479 if readmode:
480 (regfile, regname, wid) = fu.get_in_spec(idx)
481 else:
482 (regfile, regname, wid) = fu.get_out_spec(idx)
483 print(" %d %s %s %s" % (idx, regfile, regname, str(wid)))
484 if readmode:
485 rdflag, read = regspec_decode_read(e, regfile, regname)
486 write = None
487 else:
488 rdflag, read = None, None
489 wrport, write = regspec_decode_write(e, regfile, regname)
490 if regfile not in byregfiles:
491 byregfiles[regfile] = {}
492 byregfiles_spec[regfile] = {}
493 if regname not in byregfiles_spec[regfile]:
494 byregfiles_spec[regfile][regname] = \
495 (rdflag, read, write, wid, [])
496 # here we start to create "lanes"
497 if idx not in byregfiles[regfile]:
498 byregfiles[regfile][idx] = []
499 fuspec = (funame, fu, idx)
500 byregfiles[regfile][idx].append(fuspec)
501 byregfiles_spec[regfile][regname][4].append(fuspec)
502
503 # ok just print that out, for convenience
504 for regfile, spec in byregfiles.items():
505 print("regfile %s ports:" % mode, regfile)
506 fuspecs = byregfiles_spec[regfile]
507 for regname, fspec in fuspecs.items():
508 [rdflag, read, write, wid, fuspec] = fspec
509 print(" rf %s port %s lane: %s" % (mode, regfile, regname))
510 print(" %s" % regname, wid, read, write, rdflag)
511 for (funame, fu, idx) in fuspec:
512 fusig = fu.src_i[idx] if readmode else fu.dest[idx]
513 print(" ", funame, fu, idx, fusig)
514 print()
515
516 return byregfiles, byregfiles_spec
517
518 def __iter__(self):
519 yield from self.fus.ports()
520 yield from self.e.ports()
521 yield from self.l0.ports()
522 # TODO: regs
523
524 def ports(self):
525 return list(self)
526
527
528 if __name__ == '__main__':
529 pspec = TestMemPspec(ldst_ifacetype='testpi',
530 imem_ifacetype='',
531 addr_wid=48,
532 mask_wid=8,
533 reg_wid=64)
534 dut = NonProductionCore(pspec)
535 vl = rtlil.convert(dut, ports=dut.ports())
536 with open("test_core.il", "w") as f:
537 f.write(vl)