add link of RA_OR_ZERO SVP64 detection
[soc.git] / src / soc / simple / core.py
1 """simple core
2
3 not in any way intended for production use. connects up FunctionUnits to
4 Register Files in a brain-dead fashion that only permits one and only one
5 Function Unit to be operational.
6
7 the principle here is to take the Function Units, analyse their regspecs,
8 and turn their requirements for access to register file read/write ports
9 into groupings by Register File and Register File Port name.
10
11 under each grouping - by regfile/port - a list of Function Units that
12 need to connect to that port is created. as these are a contended
13 resource a "Broadcast Bus" per read/write port is then also created,
14 with access to it managed by a PriorityPicker.
15
16 the brain-dead part of this module is that even though there is no
17 conflict of access, regfile read/write hazards are *not* analysed,
18 and consequently it is safer to wait for the Function Unit to complete
19 before allowing a new instruction to proceed.
20 """
21
22 from nmigen import Elaboratable, Module, Signal, ResetSignal, Cat, Mux
23 from nmigen.cli import rtlil
24
25 from soc.decoder.power_decoder2 import PowerDecodeSubset
26 from soc.decoder.power_regspec_map import regspec_decode_read
27 from soc.decoder.power_regspec_map import regspec_decode_write
28
29 from nmutil.picker import PriorityPicker
30 from nmutil.util import treereduce
31
32 from soc.fu.compunits.compunits import AllFunctionUnits
33 from soc.regfile.regfiles import RegFiles
34 from soc.decoder.decode2execute1 import Decode2ToExecute1Type
35 from soc.decoder.decode2execute1 import IssuerDecode2ToOperand
36 from soc.decoder.power_decoder2 import get_rdflags
37 from soc.decoder.decode2execute1 import Data
38 from soc.experiment.l0_cache import TstL0CacheBuffer # test only
39 from soc.config.test.test_loadstore import TestMemPspec
40 from soc.decoder.power_enums import MicrOp
41 from soc.config.state import CoreState
42
43 import operator
44
45 from nmutil.util import rising_edge
46
47
48 # helper function for reducing a list of signals down to a parallel
49 # ORed single signal.
50 def ortreereduce(tree, attr="data_o"):
51 return treereduce(tree, operator.or_, lambda x: getattr(x, attr))
52
53
54 def ortreereduce_sig(tree):
55 return treereduce(tree, operator.or_, lambda x: x)
56
57
58 # helper function to place full regs declarations first
59 def sort_fuspecs(fuspecs):
60 res = []
61 for (regname, fspec) in fuspecs.items():
62 if regname.startswith("full"):
63 res.append((regname, fspec))
64 for (regname, fspec) in fuspecs.items():
65 if not regname.startswith("full"):
66 res.append((regname, fspec))
67 return res # enumerate(res)
68
69
70 class NonProductionCore(Elaboratable):
71 def __init__(self, pspec):
72 self.pspec = pspec
73
74 # single LD/ST funnel for memory access
75 self.l0 = TstL0CacheBuffer(pspec, n_units=1)
76 pi = self.l0.l0.dports[0]
77
78 # function units (only one each)
79 # only include mmu if enabled in pspec
80 self.fus = AllFunctionUnits(pspec, pilist=[pi])
81
82 # register files (yes plural)
83 self.regs = RegFiles()
84
85 # instruction decoder - needs a Trap-capable Record (captures EINT etc.)
86 self.e = Decode2ToExecute1Type("core", opkls=IssuerDecode2ToOperand)
87
88 # SVP64 RA_OR_ZERO needs to know if the relevant EXTRA2/3 field is zero
89 self.sv_a_nz = Signal()
90
91 # state and raw instruction
92 self.state = CoreState("core")
93 self.raw_insn_i = Signal(32) # raw instruction
94 self.bigendian_i = Signal() # bigendian - TODO, set by MSR.BE
95
96 # issue/valid/busy signalling
97 self.ivalid_i = Signal(reset_less=True) # instruction is valid
98 self.issue_i = Signal(reset_less=True)
99 self.busy_o = Signal(name="corebusy_o", reset_less=True)
100
101 # start/stop and terminated signalling
102 self.core_stopped_i = Signal(reset_less=True)
103 self.core_terminate_o = Signal(reset=0) # indicates stopped
104
105 # create per-FU instruction decoders (subsetted)
106 self.decoders = {}
107 self.des = {}
108
109 for funame, fu in self.fus.fus.items():
110 f_name = fu.fnunit.name
111 fnunit = fu.fnunit.value
112 opkls = fu.opsubsetkls
113 if f_name == 'TRAP':
114 self.trapunit = funame
115 continue
116 self.decoders[funame] = PowerDecodeSubset(None, opkls, f_name,
117 final=True,
118 state=self.state)
119 self.des[funame] = self.decoders[funame].do
120
121 if "mmu0" in self.decoders:
122 self.decoders["mmu0"].mmu0_spr_dec = self.decoders["spr0"]
123
124 def elaborate(self, platform):
125 m = Module()
126 # for testing purposes, to cut down on build time in coriolis2
127 if hasattr(self.pspec, "nocore") and self.pspec.nocore == True:
128 x = Signal() # dummy signal
129 m.d.sync += x.eq(~x)
130 return m
131 comb = m.d.comb
132
133 m.submodules.fus = self.fus
134 m.submodules.l0 = l0 = self.l0
135 self.regs.elaborate_into(m, platform)
136 regs = self.regs
137 fus = self.fus.fus
138
139 # connect decoders
140 for k, v in self.decoders.items():
141 setattr(m.submodules, "dec_%s" % v.fn_name, v)
142 comb += v.dec.raw_opcode_in.eq(self.raw_insn_i)
143 comb += v.dec.bigendian.eq(self.bigendian_i)
144 # sigh due to SVP64 RA_OR_ZERO detection connect these too
145 comb += v.sv_a_nz.eq(self.sv_a_nz)
146
147 # ssh, cheat: trap uses the main decoder because of the rewriting
148 self.des[self.trapunit] = self.e.do
149
150 # connect up Function Units, then read/write ports
151 fu_bitdict = self.connect_instruction(m)
152 self.connect_rdports(m, fu_bitdict)
153 self.connect_wrports(m, fu_bitdict)
154
155 return m
156
157 def connect_instruction(self, m):
158 """connect_instruction
159
160 uses decoded (from PowerOp) function unit information from CSV files
161 to ascertain which Function Unit should deal with the current
162 instruction.
163
164 some (such as OP_ATTN, OP_NOP) are dealt with here, including
165 ignoring it and halting the processor. OP_NOP is a bit annoying
166 because the issuer expects busy flag still to be raised then lowered.
167 (this requires a fake counter to be set).
168 """
169 comb, sync = m.d.comb, m.d.sync
170 fus = self.fus.fus
171
172 # enable-signals for each FU, get one bit for each FU (by name)
173 fu_enable = Signal(len(fus), reset_less=True)
174 fu_bitdict = {}
175 for i, funame in enumerate(fus.keys()):
176 fu_bitdict[funame] = fu_enable[i]
177
178 # enable the required Function Unit based on the opcode decode
179 # note: this *only* works correctly for simple core when one and
180 # *only* one FU is allocated per instruction
181 for funame, fu in fus.items():
182 fnunit = fu.fnunit.value
183 enable = Signal(name="en_%s" % funame, reset_less=True)
184 comb += enable.eq((self.e.do.fn_unit & fnunit).bool())
185 comb += fu_bitdict[funame].eq(enable)
186
187 # sigh - need a NOP counter
188 counter = Signal(2)
189 with m.If(counter != 0):
190 sync += counter.eq(counter - 1)
191 comb += self.busy_o.eq(1)
192
193 with m.If(self.ivalid_i): # run only when valid
194 with m.Switch(self.e.do.insn_type):
195 # check for ATTN: halt if true
196 with m.Case(MicrOp.OP_ATTN):
197 m.d.sync += self.core_terminate_o.eq(1)
198
199 with m.Case(MicrOp.OP_NOP):
200 sync += counter.eq(2)
201 comb += self.busy_o.eq(1)
202
203 with m.Default():
204 # connect up instructions. only one enabled at a time
205 for funame, fu in fus.items():
206 do = self.des[funame]
207 enable = fu_bitdict[funame]
208
209 # run this FunctionUnit if enabled
210 # route op, issue, busy, read flags and mask to FU
211 with m.If(enable):
212 # operand comes from the *local* decoder
213 comb += fu.oper_i.eq_from(do)
214 #comb += fu.oper_i.eq_from_execute1(e)
215 comb += fu.issue_i.eq(self.issue_i)
216 comb += self.busy_o.eq(fu.busy_o)
217 # rdmask, which is for registers, needs to come
218 # from the *main* decoder
219 rdmask = get_rdflags(self.e, fu)
220 comb += fu.rdmaskn.eq(~rdmask)
221
222 return fu_bitdict
223
224 def connect_rdport(self, m, fu_bitdict, rdpickers, regfile, regname, fspec):
225 comb, sync = m.d.comb, m.d.sync
226 fus = self.fus.fus
227 regs = self.regs
228
229 rpidx = regname
230
231 # select the required read port. these are pre-defined sizes
232 rfile = regs.rf[regfile.lower()]
233 rport = rfile.r_ports[rpidx]
234 print("read regfile", rpidx, regfile, regs.rf.keys(),
235 rfile, rfile.unary)
236
237 fspecs = fspec
238 if not isinstance(fspecs, list):
239 fspecs = [fspecs]
240
241 rdflags = []
242 pplen = 0
243 reads = []
244 ppoffs = []
245 for i, fspec in enumerate(fspecs):
246 # get the regfile specs for this regfile port
247 (rf, read, write, wid, fuspec) = fspec
248 print ("fpsec", i, fspec, len(fuspec))
249 ppoffs.append(pplen) # record offset for picker
250 pplen += len(fuspec)
251 name = "rdflag_%s_%s_%d" % (regfile, regname, i)
252 rdflag = Signal(name=name, reset_less=True)
253 comb += rdflag.eq(rf)
254 rdflags.append(rdflag)
255 reads.append(read)
256
257 print ("pplen", pplen)
258
259 # create a priority picker to manage this port
260 rdpickers[regfile][rpidx] = rdpick = PriorityPicker(pplen)
261 setattr(m.submodules, "rdpick_%s_%s" % (regfile, rpidx), rdpick)
262
263 rens = []
264 addrs = []
265 for i, fspec in enumerate(fspecs):
266 (rf, read, write, wid, fuspec) = fspec
267 # connect up the FU req/go signals, and the reg-read to the FU
268 # and create a Read Broadcast Bus
269 for pi, (funame, fu, idx) in enumerate(fuspec):
270 pi += ppoffs[i]
271
272 # connect request-read to picker input, and output to go-rd
273 fu_active = fu_bitdict[funame]
274 name = "%s_%s_%s_%i" % (regfile, rpidx, funame, pi)
275 addr_en = Signal.like(reads[i], name="addr_en_"+name)
276 pick = Signal(name="pick_"+name) # picker input
277 rp = Signal(name="rp_"+name) # picker output
278 delay_pick = Signal(name="dp_"+name) # read-enable "underway"
279
280 # exclude any currently-enabled read-request (mask out active)
281 comb += pick.eq(fu.rd_rel_o[idx] & fu_active & rdflags[i] &
282 ~delay_pick)
283 comb += rdpick.i[pi].eq(pick)
284 comb += fu.go_rd_i[idx].eq(delay_pick) # pass in *delayed* pick
285
286 # if picked, select read-port "reg select" number to port
287 comb += rp.eq(rdpick.o[pi] & rdpick.en_o)
288 sync += delay_pick.eq(rp) # delayed "pick"
289 comb += addr_en.eq(Mux(rp, reads[i], 0))
290
291 # the read-enable happens combinatorially (see mux-bus below)
292 # but it results in the data coming out on a one-cycle delay.
293 if rfile.unary:
294 rens.append(addr_en)
295 else:
296 addrs.append(addr_en)
297 rens.append(rp)
298
299 # use the *delayed* pick signal to put requested data onto bus
300 with m.If(delay_pick):
301 # connect regfile port to input, creating fan-out Bus
302 src = fu.src_i[idx]
303 print("reg connect widths",
304 regfile, regname, pi, funame,
305 src.shape(), rport.data_o.shape())
306 # all FUs connect to same port
307 comb += src.eq(rport.data_o)
308
309 # or-reduce the muxed read signals
310 if rfile.unary:
311 # for unary-addressed
312 comb += rport.ren.eq(ortreereduce_sig(rens))
313 else:
314 # for binary-addressed
315 comb += rport.addr.eq(ortreereduce_sig(addrs))
316 comb += rport.ren.eq(Cat(*rens).bool())
317 print ("binary", regfile, rpidx, rport, rport.ren, rens, addrs)
318
319 def connect_rdports(self, m, fu_bitdict):
320 """connect read ports
321
322 orders the read regspecs into a dict-of-dicts, by regfile, by
323 regport name, then connects all FUs that want that regport by
324 way of a PriorityPicker.
325 """
326 comb, sync = m.d.comb, m.d.sync
327 fus = self.fus.fus
328 regs = self.regs
329
330 # dictionary of lists of regfile read ports
331 byregfiles_rd, byregfiles_rdspec = self.get_byregfiles(True)
332
333 # okaay, now we need a PriorityPicker per regfile per regfile port
334 # loootta pickers... peter piper picked a pack of pickled peppers...
335 rdpickers = {}
336 for regfile, spec in byregfiles_rd.items():
337 fuspecs = byregfiles_rdspec[regfile]
338 rdpickers[regfile] = {}
339
340 # argh. an experiment to merge RA and RB in the INT regfile
341 # (we have too many read/write ports)
342 #if regfile == 'INT':
343 #fuspecs['rabc'] = [fuspecs.pop('rb')]
344 #fuspecs['rabc'].append(fuspecs.pop('rc'))
345 #fuspecs['rabc'].append(fuspecs.pop('ra'))
346 #if regfile == 'FAST':
347 # fuspecs['fast1'] = [fuspecs.pop('fast1')]
348 # if 'fast2' in fuspecs:
349 # fuspecs['fast1'].append(fuspecs.pop('fast2'))
350
351 # for each named regfile port, connect up all FUs to that port
352 for (regname, fspec) in sort_fuspecs(fuspecs):
353 print("connect rd", regname, fspec)
354 self.connect_rdport(m, fu_bitdict, rdpickers, regfile,
355 regname, fspec)
356
357 def connect_wrport(self, m, fu_bitdict, wrpickers, regfile, regname, fspec):
358 comb, sync = m.d.comb, m.d.sync
359 fus = self.fus.fus
360 regs = self.regs
361
362 print("connect wr", regname, fspec)
363 rpidx = regname
364
365 # select the required write port. these are pre-defined sizes
366 print(regfile, regs.rf.keys())
367 rfile = regs.rf[regfile.lower()]
368 wport = rfile.w_ports[rpidx]
369
370 fspecs = fspec
371 if not isinstance(fspecs, list):
372 fspecs = [fspecs]
373
374 pplen = 0
375 writes = []
376 ppoffs = []
377 for i, fspec in enumerate(fspecs):
378 # get the regfile specs for this regfile port
379 (rf, read, write, wid, fuspec) = fspec
380 print ("fpsec", i, fspec, len(fuspec))
381 ppoffs.append(pplen) # record offset for picker
382 pplen += len(fuspec)
383
384 # create a priority picker to manage this port
385 wrpickers[regfile][rpidx] = wrpick = PriorityPicker(pplen)
386 setattr(m.submodules, "wrpick_%s_%s" % (regfile, rpidx), wrpick)
387
388 wsigs = []
389 wens = []
390 addrs = []
391 for i, fspec in enumerate(fspecs):
392 # connect up the FU req/go signals and the reg-read to the FU
393 # these are arbitrated by Data.ok signals
394 (rf, read, write, wid, fuspec) = fspec
395 for pi, (funame, fu, idx) in enumerate(fuspec):
396 pi += ppoffs[i]
397
398 # write-request comes from dest.ok
399 dest = fu.get_out(idx)
400 fu_dest_latch = fu.get_fu_out(idx) # latched output
401 name = "wrflag_%s_%s_%d" % (funame, regname, idx)
402 wrflag = Signal(name=name, reset_less=True)
403 comb += wrflag.eq(dest.ok & fu.busy_o)
404
405 # connect request-write to picker input, and output to go-wr
406 fu_active = fu_bitdict[funame]
407 pick = fu.wr.rel_o[idx] & fu_active # & wrflag
408 comb += wrpick.i[pi].eq(pick)
409 # create a single-pulse go write from the picker output
410 wr_pick = Signal()
411 comb += wr_pick.eq(wrpick.o[pi] & wrpick.en_o)
412 comb += fu.go_wr_i[idx].eq(rising_edge(m, wr_pick))
413
414 # connect the regspec write "reg select" number to this port
415 # only if one FU actually requests (and is granted) the port
416 # will the write-enable be activated
417 addr_en = Signal.like(write)
418 wp = Signal()
419 comb += wp.eq(wr_pick & wrpick.en_o)
420 comb += addr_en.eq(Mux(wp, write, 0))
421 if rfile.unary:
422 wens.append(addr_en)
423 else:
424 addrs.append(addr_en)
425 wens.append(wp)
426
427 # connect regfile port to input
428 print("reg connect widths",
429 regfile, regname, pi, funame,
430 dest.shape(), wport.data_i.shape())
431 wsigs.append(fu_dest_latch)
432
433 # here is where we create the Write Broadcast Bus. simple, eh?
434 comb += wport.data_i.eq(ortreereduce_sig(wsigs))
435 if rfile.unary:
436 # for unary-addressed
437 comb += wport.wen.eq(ortreereduce_sig(wens))
438 else:
439 # for binary-addressed
440 comb += wport.addr.eq(ortreereduce_sig(addrs))
441 comb += wport.wen.eq(ortreereduce_sig(wens))
442
443 def connect_wrports(self, m, fu_bitdict):
444 """connect write ports
445
446 orders the write regspecs into a dict-of-dicts, by regfile,
447 by regport name, then connects all FUs that want that regport
448 by way of a PriorityPicker.
449
450 note that the write-port wen, write-port data, and go_wr_i all need to
451 be on the exact same clock cycle. as there is a combinatorial loop bug
452 at the moment, these all use sync.
453 """
454 comb, sync = m.d.comb, m.d.sync
455 fus = self.fus.fus
456 regs = self.regs
457 # dictionary of lists of regfile write ports
458 byregfiles_wr, byregfiles_wrspec = self.get_byregfiles(False)
459
460 # same for write ports.
461 # BLECH! complex code-duplication! BLECH!
462 wrpickers = {}
463 for regfile, spec in byregfiles_wr.items():
464 fuspecs = byregfiles_wrspec[regfile]
465 wrpickers[regfile] = {}
466
467 # argh, more port-merging
468 if regfile == 'INT':
469 fuspecs['o'] = [fuspecs.pop('o')]
470 fuspecs['o'].append(fuspecs.pop('o1'))
471 if regfile == 'FAST':
472 fuspecs['fast1'] = [fuspecs.pop('fast1')]
473 if 'fast2' in fuspecs:
474 fuspecs['fast1'].append(fuspecs.pop('fast2'))
475
476 for (regname, fspec) in sort_fuspecs(fuspecs):
477 self.connect_wrport(m, fu_bitdict, wrpickers,
478 regfile, regname, fspec)
479
480 def get_byregfiles(self, readmode):
481
482 mode = "read" if readmode else "write"
483 regs = self.regs
484 fus = self.fus.fus
485 e = self.e # decoded instruction to execute
486
487 # dictionary of lists of regfile ports
488 byregfiles = {}
489 byregfiles_spec = {}
490 for (funame, fu) in fus.items():
491 print("%s ports for %s" % (mode, funame))
492 for idx in range(fu.n_src if readmode else fu.n_dst):
493 if readmode:
494 (regfile, regname, wid) = fu.get_in_spec(idx)
495 else:
496 (regfile, regname, wid) = fu.get_out_spec(idx)
497 print(" %d %s %s %s" % (idx, regfile, regname, str(wid)))
498 if readmode:
499 rdflag, read = regspec_decode_read(e, regfile, regname)
500 write = None
501 else:
502 rdflag, read = None, None
503 wrport, write = regspec_decode_write(e, regfile, regname)
504 if regfile not in byregfiles:
505 byregfiles[regfile] = {}
506 byregfiles_spec[regfile] = {}
507 if regname not in byregfiles_spec[regfile]:
508 byregfiles_spec[regfile][regname] = \
509 (rdflag, read, write, wid, [])
510 # here we start to create "lanes"
511 if idx not in byregfiles[regfile]:
512 byregfiles[regfile][idx] = []
513 fuspec = (funame, fu, idx)
514 byregfiles[regfile][idx].append(fuspec)
515 byregfiles_spec[regfile][regname][4].append(fuspec)
516
517 # ok just print that out, for convenience
518 for regfile, spec in byregfiles.items():
519 print("regfile %s ports:" % mode, regfile)
520 fuspecs = byregfiles_spec[regfile]
521 for regname, fspec in fuspecs.items():
522 [rdflag, read, write, wid, fuspec] = fspec
523 print(" rf %s port %s lane: %s" % (mode, regfile, regname))
524 print(" %s" % regname, wid, read, write, rdflag)
525 for (funame, fu, idx) in fuspec:
526 fusig = fu.src_i[idx] if readmode else fu.dest[idx]
527 print(" ", funame, fu, idx, fusig)
528 print()
529
530 return byregfiles, byregfiles_spec
531
532 def __iter__(self):
533 yield from self.fus.ports()
534 yield from self.e.ports()
535 yield from self.l0.ports()
536 # TODO: regs
537
538 def ports(self):
539 return list(self)
540
541
542 if __name__ == '__main__':
543 pspec = TestMemPspec(ldst_ifacetype='testpi',
544 imem_ifacetype='',
545 addr_wid=48,
546 mask_wid=8,
547 reg_wid=64)
548 dut = NonProductionCore(pspec)
549 vl = rtlil.convert(dut, ports=dut.ports())
550 with open("test_core.il", "w") as f:
551 f.write(vl)