core_stopped_i unused: remove
[soc.git] / src / soc / simple / core.py
1 """simple core
2
3 not in any way intended for production use. connects up FunctionUnits to
4 Register Files in a brain-dead fashion that only permits one and only one
5 Function Unit to be operational.
6
7 the principle here is to take the Function Units, analyse their regspecs,
8 and turn their requirements for access to register file read/write ports
9 into groupings by Register File and Register File Port name.
10
11 under each grouping - by regfile/port - a list of Function Units that
12 need to connect to that port is created. as these are a contended
13 resource a "Broadcast Bus" per read/write port is then also created,
14 with access to it managed by a PriorityPicker.
15
16 the brain-dead part of this module is that even though there is no
17 conflict of access, regfile read/write hazards are *not* analysed,
18 and consequently it is safer to wait for the Function Unit to complete
19 before allowing a new instruction to proceed.
20 """
21
22 from nmigen import Elaboratable, Module, Signal, ResetSignal, Cat, Mux
23 from nmigen.cli import rtlil
24
25 from soc.decoder.power_decoder2 import PowerDecodeSubset
26 from soc.decoder.power_regspec_map import regspec_decode_read
27 from soc.decoder.power_regspec_map import regspec_decode_write
28
29 from nmutil.picker import PriorityPicker
30 from nmutil.util import treereduce
31
32 from soc.fu.compunits.compunits import AllFunctionUnits
33 from soc.regfile.regfiles import RegFiles
34 from soc.decoder.decode2execute1 import Decode2ToExecute1Type
35 from soc.decoder.decode2execute1 import IssuerDecode2ToOperand
36 from soc.decoder.power_decoder2 import get_rdflags
37 from soc.decoder.decode2execute1 import Data
38 from soc.experiment.l0_cache import TstL0CacheBuffer # test only
39 from soc.config.test.test_loadstore import TestMemPspec
40 from soc.decoder.power_enums import MicrOp
41 from soc.config.state import CoreState
42
43 import operator
44
45 from nmutil.util import rising_edge
46
47
48 # helper function for reducing a list of signals down to a parallel
49 # ORed single signal.
50 def ortreereduce(tree, attr="data_o"):
51 return treereduce(tree, operator.or_, lambda x: getattr(x, attr))
52
53
54 def ortreereduce_sig(tree):
55 return treereduce(tree, operator.or_, lambda x: x)
56
57
58 # helper function to place full regs declarations first
59 def sort_fuspecs(fuspecs):
60 res = []
61 for (regname, fspec) in fuspecs.items():
62 if regname.startswith("full"):
63 res.append((regname, fspec))
64 for (regname, fspec) in fuspecs.items():
65 if not regname.startswith("full"):
66 res.append((regname, fspec))
67 return res # enumerate(res)
68
69
70 class NonProductionCore(Elaboratable):
71 def __init__(self, pspec):
72 self.pspec = pspec
73
74 # test is SVP64 is to be enabled
75 self.svp64_en = hasattr(pspec, "svp64") and (pspec.svp64 == True)
76
77 # test to see if regfile ports should be reduced
78 self.regreduce_en = (hasattr(pspec, "regreduce") and
79 (pspec.regreduce == True))
80
81 # single LD/ST funnel for memory access
82 self.l0 = TstL0CacheBuffer(pspec, n_units=1)
83 pi = self.l0.l0.dports[0]
84
85 # function units (only one each)
86 # only include mmu if enabled in pspec
87 self.fus = AllFunctionUnits(pspec, pilist=[pi])
88
89 # register files (yes plural)
90 self.regs = RegFiles(pspec)
91
92 # instruction decoder - needs a Trap-capable Record (captures EINT etc.)
93 self.e = Decode2ToExecute1Type("core", opkls=IssuerDecode2ToOperand,
94 regreduce_en=self.regreduce_en)
95
96 # SVP64 RA_OR_ZERO needs to know if the relevant EXTRA2/3 field is zero
97 self.sv_a_nz = Signal()
98
99 # state and raw instruction
100 self.state = CoreState("core")
101 self.raw_insn_i = Signal(32) # raw instruction
102 self.bigendian_i = Signal() # bigendian - TODO, set by MSR.BE
103
104 # issue/valid/busy signalling
105 self.ivalid_i = Signal(reset_less=True) # instruction is valid
106 self.issue_i = Signal(reset_less=True)
107 self.busy_o = Signal(name="corebusy_o", reset_less=True)
108
109 # start/stop and terminated signalling
110 self.core_terminate_o = Signal(reset=0) # indicates stopped
111
112 # create per-FU instruction decoders (subsetted)
113 self.decoders = {}
114 self.des = {}
115
116 for funame, fu in self.fus.fus.items():
117 f_name = fu.fnunit.name
118 fnunit = fu.fnunit.value
119 opkls = fu.opsubsetkls
120 if f_name == 'TRAP':
121 self.trapunit = funame
122 continue
123 self.decoders[funame] = PowerDecodeSubset(None, opkls, f_name,
124 final=True,
125 state=self.state,
126 svp64_en=self.svp64_en,
127 regreduce_en=self.regreduce_en)
128 self.des[funame] = self.decoders[funame].do
129
130 if "mmu0" in self.decoders:
131 self.decoders["mmu0"].mmu0_spr_dec = self.decoders["spr0"]
132
133 def elaborate(self, platform):
134 m = Module()
135 # for testing purposes, to cut down on build time in coriolis2
136 if hasattr(self.pspec, "nocore") and self.pspec.nocore == True:
137 x = Signal() # dummy signal
138 m.d.sync += x.eq(~x)
139 return m
140 comb = m.d.comb
141
142 m.submodules.fus = self.fus
143 m.submodules.l0 = l0 = self.l0
144 self.regs.elaborate_into(m, platform)
145 regs = self.regs
146 fus = self.fus.fus
147
148 # connect decoders
149 for k, v in self.decoders.items():
150 setattr(m.submodules, "dec_%s" % v.fn_name, v)
151 comb += v.dec.raw_opcode_in.eq(self.raw_insn_i)
152 comb += v.dec.bigendian.eq(self.bigendian_i)
153 # sigh due to SVP64 RA_OR_ZERO detection connect these too
154 comb += v.sv_a_nz.eq(self.sv_a_nz)
155
156 # ssh, cheat: trap uses the main decoder because of the rewriting
157 self.des[self.trapunit] = self.e.do
158
159 # connect up Function Units, then read/write ports
160 fu_bitdict = self.connect_instruction(m)
161 self.connect_rdports(m, fu_bitdict)
162 self.connect_wrports(m, fu_bitdict)
163
164 return m
165
166 def connect_instruction(self, m):
167 """connect_instruction
168
169 uses decoded (from PowerOp) function unit information from CSV files
170 to ascertain which Function Unit should deal with the current
171 instruction.
172
173 some (such as OP_ATTN, OP_NOP) are dealt with here, including
174 ignoring it and halting the processor. OP_NOP is a bit annoying
175 because the issuer expects busy flag still to be raised then lowered.
176 (this requires a fake counter to be set).
177 """
178 comb, sync = m.d.comb, m.d.sync
179 fus = self.fus.fus
180
181 # enable-signals for each FU, get one bit for each FU (by name)
182 fu_enable = Signal(len(fus), reset_less=True)
183 fu_bitdict = {}
184 for i, funame in enumerate(fus.keys()):
185 fu_bitdict[funame] = fu_enable[i]
186
187 # enable the required Function Unit based on the opcode decode
188 # note: this *only* works correctly for simple core when one and
189 # *only* one FU is allocated per instruction
190 for funame, fu in fus.items():
191 fnunit = fu.fnunit.value
192 enable = Signal(name="en_%s" % funame, reset_less=True)
193 comb += enable.eq((self.e.do.fn_unit & fnunit).bool())
194 comb += fu_bitdict[funame].eq(enable)
195
196 # sigh - need a NOP counter
197 counter = Signal(2)
198 with m.If(counter != 0):
199 sync += counter.eq(counter - 1)
200 comb += self.busy_o.eq(1)
201
202 with m.If(self.ivalid_i): # run only when valid
203 with m.Switch(self.e.do.insn_type):
204 # check for ATTN: halt if true
205 with m.Case(MicrOp.OP_ATTN):
206 m.d.sync += self.core_terminate_o.eq(1)
207
208 with m.Case(MicrOp.OP_NOP):
209 sync += counter.eq(2)
210 comb += self.busy_o.eq(1)
211
212 with m.Default():
213 # connect up instructions. only one enabled at a time
214 for funame, fu in fus.items():
215 do = self.des[funame]
216 enable = fu_bitdict[funame]
217
218 # run this FunctionUnit if enabled
219 # route op, issue, busy, read flags and mask to FU
220 with m.If(enable):
221 # operand comes from the *local* decoder
222 comb += fu.oper_i.eq_from(do)
223 #comb += fu.oper_i.eq_from_execute1(e)
224 comb += fu.issue_i.eq(self.issue_i)
225 comb += self.busy_o.eq(fu.busy_o)
226 # rdmask, which is for registers, needs to come
227 # from the *main* decoder
228 rdmask = get_rdflags(self.e, fu)
229 comb += fu.rdmaskn.eq(~rdmask)
230
231 return fu_bitdict
232
233 def connect_rdport(self, m, fu_bitdict, rdpickers, regfile, regname, fspec):
234 comb, sync = m.d.comb, m.d.sync
235 fus = self.fus.fus
236 regs = self.regs
237
238 rpidx = regname
239
240 # select the required read port. these are pre-defined sizes
241 rfile = regs.rf[regfile.lower()]
242 rport = rfile.r_ports[rpidx]
243 print("read regfile", rpidx, regfile, regs.rf.keys(),
244 rfile, rfile.unary)
245
246 fspecs = fspec
247 if not isinstance(fspecs, list):
248 fspecs = [fspecs]
249
250 rdflags = []
251 pplen = 0
252 reads = []
253 ppoffs = []
254 for i, fspec in enumerate(fspecs):
255 # get the regfile specs for this regfile port
256 (rf, read, write, wid, fuspec) = fspec
257 print ("fpsec", i, fspec, len(fuspec))
258 ppoffs.append(pplen) # record offset for picker
259 pplen += len(fuspec)
260 name = "rdflag_%s_%s_%d" % (regfile, regname, i)
261 rdflag = Signal(name=name, reset_less=True)
262 comb += rdflag.eq(rf)
263 rdflags.append(rdflag)
264 reads.append(read)
265
266 print ("pplen", pplen)
267
268 # create a priority picker to manage this port
269 rdpickers[regfile][rpidx] = rdpick = PriorityPicker(pplen)
270 setattr(m.submodules, "rdpick_%s_%s" % (regfile, rpidx), rdpick)
271
272 rens = []
273 addrs = []
274 for i, fspec in enumerate(fspecs):
275 (rf, read, write, wid, fuspec) = fspec
276 # connect up the FU req/go signals, and the reg-read to the FU
277 # and create a Read Broadcast Bus
278 for pi, (funame, fu, idx) in enumerate(fuspec):
279 pi += ppoffs[i]
280
281 # connect request-read to picker input, and output to go-rd
282 fu_active = fu_bitdict[funame]
283 name = "%s_%s_%s_%i" % (regfile, rpidx, funame, pi)
284 addr_en = Signal.like(reads[i], name="addr_en_"+name)
285 pick = Signal(name="pick_"+name) # picker input
286 rp = Signal(name="rp_"+name) # picker output
287 delay_pick = Signal(name="dp_"+name) # read-enable "underway"
288
289 # exclude any currently-enabled read-request (mask out active)
290 comb += pick.eq(fu.rd_rel_o[idx] & fu_active & rdflags[i] &
291 ~delay_pick)
292 comb += rdpick.i[pi].eq(pick)
293 comb += fu.go_rd_i[idx].eq(delay_pick) # pass in *delayed* pick
294
295 # if picked, select read-port "reg select" number to port
296 comb += rp.eq(rdpick.o[pi] & rdpick.en_o)
297 sync += delay_pick.eq(rp) # delayed "pick"
298 comb += addr_en.eq(Mux(rp, reads[i], 0))
299
300 # the read-enable happens combinatorially (see mux-bus below)
301 # but it results in the data coming out on a one-cycle delay.
302 if rfile.unary:
303 rens.append(addr_en)
304 else:
305 addrs.append(addr_en)
306 rens.append(rp)
307
308 # use the *delayed* pick signal to put requested data onto bus
309 with m.If(delay_pick):
310 # connect regfile port to input, creating fan-out Bus
311 src = fu.src_i[idx]
312 print("reg connect widths",
313 regfile, regname, pi, funame,
314 src.shape(), rport.data_o.shape())
315 # all FUs connect to same port
316 comb += src.eq(rport.data_o)
317
318 # or-reduce the muxed read signals
319 if rfile.unary:
320 # for unary-addressed
321 comb += rport.ren.eq(ortreereduce_sig(rens))
322 else:
323 # for binary-addressed
324 comb += rport.addr.eq(ortreereduce_sig(addrs))
325 comb += rport.ren.eq(Cat(*rens).bool())
326 print ("binary", regfile, rpidx, rport, rport.ren, rens, addrs)
327
328 def connect_rdports(self, m, fu_bitdict):
329 """connect read ports
330
331 orders the read regspecs into a dict-of-dicts, by regfile, by
332 regport name, then connects all FUs that want that regport by
333 way of a PriorityPicker.
334 """
335 comb, sync = m.d.comb, m.d.sync
336 fus = self.fus.fus
337 regs = self.regs
338
339 # dictionary of lists of regfile read ports
340 byregfiles_rd, byregfiles_rdspec = self.get_byregfiles(True)
341
342 # okaay, now we need a PriorityPicker per regfile per regfile port
343 # loootta pickers... peter piper picked a pack of pickled peppers...
344 rdpickers = {}
345 for regfile, spec in byregfiles_rd.items():
346 fuspecs = byregfiles_rdspec[regfile]
347 rdpickers[regfile] = {}
348
349 # argh. an experiment to merge RA and RB in the INT regfile
350 # (we have too many read/write ports)
351 if self.regreduce_en:
352 if regfile == 'INT':
353 fuspecs['rabc'] = [fuspecs.pop('rb')]
354 fuspecs['rabc'].append(fuspecs.pop('rc'))
355 fuspecs['rabc'].append(fuspecs.pop('ra'))
356 if regfile == 'FAST':
357 fuspecs['fast1'] = [fuspecs.pop('fast1')]
358 if 'fast2' in fuspecs:
359 fuspecs['fast1'].append(fuspecs.pop('fast2'))
360
361 # for each named regfile port, connect up all FUs to that port
362 for (regname, fspec) in sort_fuspecs(fuspecs):
363 print("connect rd", regname, fspec)
364 self.connect_rdport(m, fu_bitdict, rdpickers, regfile,
365 regname, fspec)
366
367 def connect_wrport(self, m, fu_bitdict, wrpickers, regfile, regname, fspec):
368 comb, sync = m.d.comb, m.d.sync
369 fus = self.fus.fus
370 regs = self.regs
371
372 print("connect wr", regname, fspec)
373 rpidx = regname
374
375 # select the required write port. these are pre-defined sizes
376 print(regfile, regs.rf.keys())
377 rfile = regs.rf[regfile.lower()]
378 wport = rfile.w_ports[rpidx]
379
380 fspecs = fspec
381 if not isinstance(fspecs, list):
382 fspecs = [fspecs]
383
384 pplen = 0
385 writes = []
386 ppoffs = []
387 for i, fspec in enumerate(fspecs):
388 # get the regfile specs for this regfile port
389 (rf, read, write, wid, fuspec) = fspec
390 print ("fpsec", i, fspec, len(fuspec))
391 ppoffs.append(pplen) # record offset for picker
392 pplen += len(fuspec)
393
394 # create a priority picker to manage this port
395 wrpickers[regfile][rpidx] = wrpick = PriorityPicker(pplen)
396 setattr(m.submodules, "wrpick_%s_%s" % (regfile, rpidx), wrpick)
397
398 wsigs = []
399 wens = []
400 addrs = []
401 for i, fspec in enumerate(fspecs):
402 # connect up the FU req/go signals and the reg-read to the FU
403 # these are arbitrated by Data.ok signals
404 (rf, read, write, wid, fuspec) = fspec
405 for pi, (funame, fu, idx) in enumerate(fuspec):
406 pi += ppoffs[i]
407
408 # write-request comes from dest.ok
409 dest = fu.get_out(idx)
410 fu_dest_latch = fu.get_fu_out(idx) # latched output
411 name = "wrflag_%s_%s_%d" % (funame, regname, idx)
412 wrflag = Signal(name=name, reset_less=True)
413 comb += wrflag.eq(dest.ok & fu.busy_o)
414
415 # connect request-write to picker input, and output to go-wr
416 fu_active = fu_bitdict[funame]
417 pick = fu.wr.rel_o[idx] & fu_active # & wrflag
418 comb += wrpick.i[pi].eq(pick)
419 # create a single-pulse go write from the picker output
420 wr_pick = Signal()
421 comb += wr_pick.eq(wrpick.o[pi] & wrpick.en_o)
422 comb += fu.go_wr_i[idx].eq(rising_edge(m, wr_pick))
423
424 # connect the regspec write "reg select" number to this port
425 # only if one FU actually requests (and is granted) the port
426 # will the write-enable be activated
427 addr_en = Signal.like(write)
428 wp = Signal()
429 comb += wp.eq(wr_pick & wrpick.en_o)
430 comb += addr_en.eq(Mux(wp, write, 0))
431 if rfile.unary:
432 wens.append(addr_en)
433 else:
434 addrs.append(addr_en)
435 wens.append(wp)
436
437 # connect regfile port to input
438 print("reg connect widths",
439 regfile, regname, pi, funame,
440 dest.shape(), wport.data_i.shape())
441 wsigs.append(fu_dest_latch)
442
443 # here is where we create the Write Broadcast Bus. simple, eh?
444 comb += wport.data_i.eq(ortreereduce_sig(wsigs))
445 if rfile.unary:
446 # for unary-addressed
447 comb += wport.wen.eq(ortreereduce_sig(wens))
448 else:
449 # for binary-addressed
450 comb += wport.addr.eq(ortreereduce_sig(addrs))
451 comb += wport.wen.eq(ortreereduce_sig(wens))
452
453 def connect_wrports(self, m, fu_bitdict):
454 """connect write ports
455
456 orders the write regspecs into a dict-of-dicts, by regfile,
457 by regport name, then connects all FUs that want that regport
458 by way of a PriorityPicker.
459
460 note that the write-port wen, write-port data, and go_wr_i all need to
461 be on the exact same clock cycle. as there is a combinatorial loop bug
462 at the moment, these all use sync.
463 """
464 comb, sync = m.d.comb, m.d.sync
465 fus = self.fus.fus
466 regs = self.regs
467 # dictionary of lists of regfile write ports
468 byregfiles_wr, byregfiles_wrspec = self.get_byregfiles(False)
469
470 # same for write ports.
471 # BLECH! complex code-duplication! BLECH!
472 wrpickers = {}
473 for regfile, spec in byregfiles_wr.items():
474 fuspecs = byregfiles_wrspec[regfile]
475 wrpickers[regfile] = {}
476
477 if self.regreduce_en:
478 # argh, more port-merging
479 if regfile == 'INT':
480 fuspecs['o'] = [fuspecs.pop('o')]
481 fuspecs['o'].append(fuspecs.pop('o1'))
482 if regfile == 'FAST':
483 fuspecs['fast1'] = [fuspecs.pop('fast1')]
484 if 'fast2' in fuspecs:
485 fuspecs['fast1'].append(fuspecs.pop('fast2'))
486
487 for (regname, fspec) in sort_fuspecs(fuspecs):
488 self.connect_wrport(m, fu_bitdict, wrpickers,
489 regfile, regname, fspec)
490
491 def get_byregfiles(self, readmode):
492
493 mode = "read" if readmode else "write"
494 regs = self.regs
495 fus = self.fus.fus
496 e = self.e # decoded instruction to execute
497
498 # dictionary of lists of regfile ports
499 byregfiles = {}
500 byregfiles_spec = {}
501 for (funame, fu) in fus.items():
502 print("%s ports for %s" % (mode, funame))
503 for idx in range(fu.n_src if readmode else fu.n_dst):
504 if readmode:
505 (regfile, regname, wid) = fu.get_in_spec(idx)
506 else:
507 (regfile, regname, wid) = fu.get_out_spec(idx)
508 print(" %d %s %s %s" % (idx, regfile, regname, str(wid)))
509 if readmode:
510 rdflag, read = regspec_decode_read(e, regfile, regname)
511 write = None
512 else:
513 rdflag, read = None, None
514 wrport, write = regspec_decode_write(e, regfile, regname)
515 if regfile not in byregfiles:
516 byregfiles[regfile] = {}
517 byregfiles_spec[regfile] = {}
518 if regname not in byregfiles_spec[regfile]:
519 byregfiles_spec[regfile][regname] = \
520 (rdflag, read, write, wid, [])
521 # here we start to create "lanes"
522 if idx not in byregfiles[regfile]:
523 byregfiles[regfile][idx] = []
524 fuspec = (funame, fu, idx)
525 byregfiles[regfile][idx].append(fuspec)
526 byregfiles_spec[regfile][regname][4].append(fuspec)
527
528 # ok just print that out, for convenience
529 for regfile, spec in byregfiles.items():
530 print("regfile %s ports:" % mode, regfile)
531 fuspecs = byregfiles_spec[regfile]
532 for regname, fspec in fuspecs.items():
533 [rdflag, read, write, wid, fuspec] = fspec
534 print(" rf %s port %s lane: %s" % (mode, regfile, regname))
535 print(" %s" % regname, wid, read, write, rdflag)
536 for (funame, fu, idx) in fuspec:
537 fusig = fu.src_i[idx] if readmode else fu.dest[idx]
538 print(" ", funame, fu, idx, fusig)
539 print()
540
541 return byregfiles, byregfiles_spec
542
543 def __iter__(self):
544 yield from self.fus.ports()
545 yield from self.e.ports()
546 yield from self.l0.ports()
547 # TODO: regs
548
549 def ports(self):
550 return list(self)
551
552
553 if __name__ == '__main__':
554 pspec = TestMemPspec(ldst_ifacetype='testpi',
555 imem_ifacetype='',
556 addr_wid=48,
557 mask_wid=8,
558 reg_wid=64)
559 dut = NonProductionCore(pspec)
560 vl = rtlil.convert(dut, ports=dut.ports())
561 with open("test_core.il", "w") as f:
562 f.write(vl)