radix: reading first page table entry
[soc.git] / src / soc / simple / core.py
1 """simple core
2
3 not in any way intended for production use. connects up FunctionUnits to
4 Register Files in a brain-dead fashion that only permits one and only one
5 Function Unit to be operational.
6
7 the principle here is to take the Function Units, analyse their regspecs,
8 and turn their requirements for access to register file read/write ports
9 into groupings by Register File and Register File Port name.
10
11 under each grouping - by regfile/port - a list of Function Units that
12 need to connect to that port is created. as these are a contended
13 resource a "Broadcast Bus" per read/write port is then also created,
14 with access to it managed by a PriorityPicker.
15
16 the brain-dead part of this module is that even though there is no
17 conflict of access, regfile read/write hazards are *not* analysed,
18 and consequently it is safer to wait for the Function Unit to complete
19 before allowing a new instruction to proceed.
20 """
21
22 from nmigen import Elaboratable, Module, Signal, ResetSignal, Cat, Mux
23 from nmigen.cli import rtlil
24
25 from soc.decoder.power_decoder2 import PowerDecodeSubset
26 from soc.decoder.power_regspec_map import regspec_decode_read
27 from soc.decoder.power_regspec_map import regspec_decode_write
28
29 from nmutil.picker import PriorityPicker
30 from nmutil.util import treereduce
31
32 from soc.fu.compunits.compunits import AllFunctionUnits
33 from soc.regfile.regfiles import RegFiles
34 from soc.decoder.decode2execute1 import Decode2ToExecute1Type
35 from soc.decoder.decode2execute1 import IssuerDecode2ToOperand
36 from soc.decoder.power_decoder2 import get_rdflags
37 from soc.decoder.decode2execute1 import Data
38 from soc.experiment.l0_cache import TstL0CacheBuffer # test only
39 from soc.config.test.test_loadstore import TestMemPspec
40 from soc.decoder.power_enums import MicrOp
41 from soc.config.state import CoreState
42
43 import operator
44
45 from nmutil.util import rising_edge
46
47
48 # helper function for reducing a list of signals down to a parallel
49 # ORed single signal.
50 def ortreereduce(tree, attr="data_o"):
51 return treereduce(tree, operator.or_, lambda x: getattr(x, attr))
52
53
54 def ortreereduce_sig(tree):
55 return treereduce(tree, operator.or_, lambda x: x)
56
57
58 # helper function to place full regs declarations first
59 def sort_fuspecs(fuspecs):
60 res = []
61 for (regname, fspec) in fuspecs.items():
62 if regname.startswith("full"):
63 res.append((regname, fspec))
64 for (regname, fspec) in fuspecs.items():
65 if not regname.startswith("full"):
66 res.append((regname, fspec))
67 return res # enumerate(res)
68
69
70 class NonProductionCore(Elaboratable):
71 def __init__(self, pspec):
72 self.pspec = pspec
73
74 # single LD/ST funnel for memory access
75 self.l0 = TstL0CacheBuffer(pspec, n_units=1)
76 pi = self.l0.l0.dports[0]
77
78 # function units (only one each)
79 # only include mmu if enabled in pspec
80 self.fus = AllFunctionUnits(pspec, pilist=[pi])
81
82 # register files (yes plural)
83 self.regs = RegFiles()
84
85 # instruction decoder - needs a Trap-capable Record (captures EINT etc.)
86 self.e = Decode2ToExecute1Type("core", opkls=IssuerDecode2ToOperand)
87
88 self.state = CoreState("core")
89 self.raw_insn_i = Signal(32) # raw instruction
90 self.bigendian_i = Signal() # bigendian
91
92 # issue/valid/busy signalling
93 self.ivalid_i = Signal(reset_less=True) # instruction is valid
94 self.issue_i = Signal(reset_less=True)
95 self.busy_o = Signal(name="corebusy_o", reset_less=True)
96
97 # start/stop and terminated signalling
98 self.core_stopped_i = Signal(reset_less=True)
99 self.core_terminate_o = Signal(reset=0) # indicates stopped
100
101 # create per-FU instruction decoders (subsetted)
102 self.decoders = {}
103 self.des = {}
104
105 for funame, fu in self.fus.fus.items():
106 f_name = fu.fnunit.name
107 fnunit = fu.fnunit.value
108 opkls = fu.opsubsetkls
109 if f_name == 'TRAP':
110 self.trapunit = funame
111 continue
112 self.decoders[funame] = PowerDecodeSubset(None, opkls, f_name,
113 final=True,
114 state=self.state)
115 self.des[funame] = self.decoders[funame].do
116
117 if "mmu0" in self.decoders:
118 self.decoders["mmu0"].mmu0_spr_dec = self.decoders["spr0"]
119
120 def elaborate(self, platform):
121 m = Module()
122 # for testing purposes, to cut down on build time in coriolis2
123 if hasattr(self.pspec, "nocore") and self.pspec.nocore == True:
124 x = Signal() # dummy signal
125 m.d.sync += x.eq(~x)
126 return m
127 comb = m.d.comb
128
129 m.submodules.fus = self.fus
130 m.submodules.l0 = l0 = self.l0
131 self.regs.elaborate_into(m, platform)
132 regs = self.regs
133 fus = self.fus.fus
134
135 # connect decoders
136 for k, v in self.decoders.items():
137 setattr(m.submodules, "dec_%s" % v.fn_name, v)
138 comb += v.dec.raw_opcode_in.eq(self.raw_insn_i)
139 comb += v.dec.bigendian.eq(self.bigendian_i)
140
141 # ssh, cheat: trap uses the main decoder because of the rewriting
142 self.des[self.trapunit] = self.e.do
143
144 # connect up Function Units, then read/write ports
145 fu_bitdict = self.connect_instruction(m)
146 self.connect_rdports(m, fu_bitdict)
147 self.connect_wrports(m, fu_bitdict)
148
149 return m
150
151 def connect_instruction(self, m):
152 """connect_instruction
153
154 uses decoded (from PowerOp) function unit information from CSV files
155 to ascertain which Function Unit should deal with the current
156 instruction.
157
158 some (such as OP_ATTN, OP_NOP) are dealt with here, including
159 ignoring it and halting the processor. OP_NOP is a bit annoying
160 because the issuer expects busy flag still to be raised then lowered.
161 (this requires a fake counter to be set).
162 """
163 comb, sync = m.d.comb, m.d.sync
164 fus = self.fus.fus
165
166 # enable-signals for each FU, get one bit for each FU (by name)
167 fu_enable = Signal(len(fus), reset_less=True)
168 fu_bitdict = {}
169 for i, funame in enumerate(fus.keys()):
170 fu_bitdict[funame] = fu_enable[i]
171
172 # enable the required Function Unit based on the opcode decode
173 # note: this *only* works correctly for simple core when one and
174 # *only* one FU is allocated per instruction
175 for funame, fu in fus.items():
176 fnunit = fu.fnunit.value
177 enable = Signal(name="en_%s" % funame, reset_less=True)
178 comb += enable.eq((self.e.do.fn_unit & fnunit).bool())
179 comb += fu_bitdict[funame].eq(enable)
180
181 # sigh - need a NOP counter
182 counter = Signal(2)
183 with m.If(counter != 0):
184 sync += counter.eq(counter - 1)
185 comb += self.busy_o.eq(1)
186
187 with m.If(self.ivalid_i): # run only when valid
188 with m.Switch(self.e.do.insn_type):
189 # check for ATTN: halt if true
190 with m.Case(MicrOp.OP_ATTN):
191 m.d.sync += self.core_terminate_o.eq(1)
192
193 with m.Case(MicrOp.OP_NOP):
194 sync += counter.eq(2)
195 comb += self.busy_o.eq(1)
196
197 with m.Default():
198 # connect up instructions. only one enabled at a time
199 for funame, fu in fus.items():
200 do = self.des[funame]
201 enable = fu_bitdict[funame]
202
203 # run this FunctionUnit if enabled
204 # route op, issue, busy, read flags and mask to FU
205 with m.If(enable):
206 # operand comes from the *local* decoder
207 comb += fu.oper_i.eq_from(do)
208 #comb += fu.oper_i.eq_from_execute1(e)
209 comb += fu.issue_i.eq(self.issue_i)
210 comb += self.busy_o.eq(fu.busy_o)
211 # rdmask, which is for registers, needs to come
212 # from the *main* decoder
213 rdmask = get_rdflags(self.e, fu)
214 comb += fu.rdmaskn.eq(~rdmask)
215
216 return fu_bitdict
217
218 def connect_rdport(self, m, fu_bitdict, rdpickers, regfile, regname, fspec):
219 comb, sync = m.d.comb, m.d.sync
220 fus = self.fus.fus
221 regs = self.regs
222
223 rpidx = regname
224
225 # select the required read port. these are pre-defined sizes
226 rfile = regs.rf[regfile.lower()]
227 rport = rfile.r_ports[rpidx]
228 print("read regfile", rpidx, regfile, regs.rf.keys(),
229 rfile, rfile.unary)
230
231 fspecs = fspec
232 if not isinstance(fspecs, list):
233 fspecs = [fspecs]
234
235 rdflags = []
236 pplen = 0
237 reads = []
238 ppoffs = []
239 for i, fspec in enumerate(fspecs):
240 # get the regfile specs for this regfile port
241 (rf, read, write, wid, fuspec) = fspec
242 print ("fpsec", i, fspec, len(fuspec))
243 ppoffs.append(pplen) # record offset for picker
244 pplen += len(fuspec)
245 name = "rdflag_%s_%s_%d" % (regfile, regname, i)
246 rdflag = Signal(name=name, reset_less=True)
247 comb += rdflag.eq(rf)
248 rdflags.append(rdflag)
249 reads.append(read)
250
251 print ("pplen", pplen)
252
253 # create a priority picker to manage this port
254 rdpickers[regfile][rpidx] = rdpick = PriorityPicker(pplen)
255 setattr(m.submodules, "rdpick_%s_%s" % (regfile, rpidx), rdpick)
256
257 rens = []
258 addrs = []
259 for i, fspec in enumerate(fspecs):
260 (rf, read, write, wid, fuspec) = fspec
261 # connect up the FU req/go signals, and the reg-read to the FU
262 # and create a Read Broadcast Bus
263 for pi, (funame, fu, idx) in enumerate(fuspec):
264 pi += ppoffs[i]
265
266 # connect request-read to picker input, and output to go-rd
267 fu_active = fu_bitdict[funame]
268 name = "%s_%s_%s_%i" % (regfile, rpidx, funame, pi)
269 addr_en = Signal.like(reads[i], name="addr_en_"+name)
270 pick = Signal(name="pick_"+name) # picker input
271 rp = Signal(name="rp_"+name) # picker output
272 delay_pick = Signal(name="dp_"+name) # read-enable "underway"
273
274 # exclude any currently-enabled read-request (mask out active)
275 comb += pick.eq(fu.rd_rel_o[idx] & fu_active & rdflags[i] &
276 ~delay_pick)
277 comb += rdpick.i[pi].eq(pick)
278 comb += fu.go_rd_i[idx].eq(delay_pick) # pass in *delayed* pick
279
280 # if picked, select read-port "reg select" number to port
281 comb += rp.eq(rdpick.o[pi] & rdpick.en_o)
282 sync += delay_pick.eq(rp) # delayed "pick"
283 comb += addr_en.eq(Mux(rp, reads[i], 0))
284
285 # the read-enable happens combinatorially (see mux-bus below)
286 # but it results in the data coming out on a one-cycle delay.
287 if rfile.unary:
288 rens.append(addr_en)
289 else:
290 addrs.append(addr_en)
291 rens.append(rp)
292
293 # use the *delayed* pick signal to put requested data onto bus
294 with m.If(delay_pick):
295 # connect regfile port to input, creating fan-out Bus
296 src = fu.src_i[idx]
297 print("reg connect widths",
298 regfile, regname, pi, funame,
299 src.shape(), rport.data_o.shape())
300 # all FUs connect to same port
301 comb += src.eq(rport.data_o)
302
303 # or-reduce the muxed read signals
304 if rfile.unary:
305 # for unary-addressed
306 comb += rport.ren.eq(ortreereduce_sig(rens))
307 else:
308 # for binary-addressed
309 comb += rport.addr.eq(ortreereduce_sig(addrs))
310 comb += rport.ren.eq(Cat(*rens).bool())
311 print ("binary", regfile, rpidx, rport, rport.ren, rens, addrs)
312
313 def connect_rdports(self, m, fu_bitdict):
314 """connect read ports
315
316 orders the read regspecs into a dict-of-dicts, by regfile, by
317 regport name, then connects all FUs that want that regport by
318 way of a PriorityPicker.
319 """
320 comb, sync = m.d.comb, m.d.sync
321 fus = self.fus.fus
322 regs = self.regs
323
324 # dictionary of lists of regfile read ports
325 byregfiles_rd, byregfiles_rdspec = self.get_byregfiles(True)
326
327 # okaay, now we need a PriorityPicker per regfile per regfile port
328 # loootta pickers... peter piper picked a pack of pickled peppers...
329 rdpickers = {}
330 for regfile, spec in byregfiles_rd.items():
331 fuspecs = byregfiles_rdspec[regfile]
332 rdpickers[regfile] = {}
333
334 # argh. an experiment to merge RA and RB in the INT regfile
335 # (we have too many read/write ports)
336 #if regfile == 'INT':
337 #fuspecs['rabc'] = [fuspecs.pop('rb')]
338 #fuspecs['rabc'].append(fuspecs.pop('rc'))
339 #fuspecs['rabc'].append(fuspecs.pop('ra'))
340 #if regfile == 'FAST':
341 # fuspecs['fast1'] = [fuspecs.pop('fast1')]
342 # if 'fast2' in fuspecs:
343 # fuspecs['fast1'].append(fuspecs.pop('fast2'))
344
345 # for each named regfile port, connect up all FUs to that port
346 for (regname, fspec) in sort_fuspecs(fuspecs):
347 print("connect rd", regname, fspec)
348 self.connect_rdport(m, fu_bitdict, rdpickers, regfile,
349 regname, fspec)
350
351 def connect_wrport(self, m, fu_bitdict, wrpickers, regfile, regname, fspec):
352 comb, sync = m.d.comb, m.d.sync
353 fus = self.fus.fus
354 regs = self.regs
355
356 print("connect wr", regname, fspec)
357 rpidx = regname
358
359 # select the required write port. these are pre-defined sizes
360 print(regfile, regs.rf.keys())
361 rfile = regs.rf[regfile.lower()]
362 wport = rfile.w_ports[rpidx]
363
364 fspecs = fspec
365 if not isinstance(fspecs, list):
366 fspecs = [fspecs]
367
368 pplen = 0
369 writes = []
370 ppoffs = []
371 for i, fspec in enumerate(fspecs):
372 # get the regfile specs for this regfile port
373 (rf, read, write, wid, fuspec) = fspec
374 print ("fpsec", i, fspec, len(fuspec))
375 ppoffs.append(pplen) # record offset for picker
376 pplen += len(fuspec)
377
378 # create a priority picker to manage this port
379 wrpickers[regfile][rpidx] = wrpick = PriorityPicker(pplen)
380 setattr(m.submodules, "wrpick_%s_%s" % (regfile, rpidx), wrpick)
381
382 wsigs = []
383 wens = []
384 addrs = []
385 for i, fspec in enumerate(fspecs):
386 # connect up the FU req/go signals and the reg-read to the FU
387 # these are arbitrated by Data.ok signals
388 (rf, read, write, wid, fuspec) = fspec
389 for pi, (funame, fu, idx) in enumerate(fuspec):
390 pi += ppoffs[i]
391
392 # write-request comes from dest.ok
393 dest = fu.get_out(idx)
394 fu_dest_latch = fu.get_fu_out(idx) # latched output
395 name = "wrflag_%s_%s_%d" % (funame, regname, idx)
396 wrflag = Signal(name=name, reset_less=True)
397 comb += wrflag.eq(dest.ok & fu.busy_o)
398
399 # connect request-write to picker input, and output to go-wr
400 fu_active = fu_bitdict[funame]
401 pick = fu.wr.rel_o[idx] & fu_active # & wrflag
402 comb += wrpick.i[pi].eq(pick)
403 # create a single-pulse go write from the picker output
404 wr_pick = Signal()
405 comb += wr_pick.eq(wrpick.o[pi] & wrpick.en_o)
406 comb += fu.go_wr_i[idx].eq(rising_edge(m, wr_pick))
407
408 # connect the regspec write "reg select" number to this port
409 # only if one FU actually requests (and is granted) the port
410 # will the write-enable be activated
411 addr_en = Signal.like(write)
412 wp = Signal()
413 comb += wp.eq(wr_pick & wrpick.en_o)
414 comb += addr_en.eq(Mux(wp, write, 0))
415 if rfile.unary:
416 wens.append(addr_en)
417 else:
418 addrs.append(addr_en)
419 wens.append(wp)
420
421 # connect regfile port to input
422 print("reg connect widths",
423 regfile, regname, pi, funame,
424 dest.shape(), wport.data_i.shape())
425 wsigs.append(fu_dest_latch)
426
427 # here is where we create the Write Broadcast Bus. simple, eh?
428 comb += wport.data_i.eq(ortreereduce_sig(wsigs))
429 if rfile.unary:
430 # for unary-addressed
431 comb += wport.wen.eq(ortreereduce_sig(wens))
432 else:
433 # for binary-addressed
434 comb += wport.addr.eq(ortreereduce_sig(addrs))
435 comb += wport.wen.eq(ortreereduce_sig(wens))
436
437 def connect_wrports(self, m, fu_bitdict):
438 """connect write ports
439
440 orders the write regspecs into a dict-of-dicts, by regfile,
441 by regport name, then connects all FUs that want that regport
442 by way of a PriorityPicker.
443
444 note that the write-port wen, write-port data, and go_wr_i all need to
445 be on the exact same clock cycle. as there is a combinatorial loop bug
446 at the moment, these all use sync.
447 """
448 comb, sync = m.d.comb, m.d.sync
449 fus = self.fus.fus
450 regs = self.regs
451 # dictionary of lists of regfile write ports
452 byregfiles_wr, byregfiles_wrspec = self.get_byregfiles(False)
453
454 # same for write ports.
455 # BLECH! complex code-duplication! BLECH!
456 wrpickers = {}
457 for regfile, spec in byregfiles_wr.items():
458 fuspecs = byregfiles_wrspec[regfile]
459 wrpickers[regfile] = {}
460
461 # argh, more port-merging
462 if regfile == 'INT':
463 fuspecs['o'] = [fuspecs.pop('o')]
464 fuspecs['o'].append(fuspecs.pop('o1'))
465 if regfile == 'FAST':
466 fuspecs['fast1'] = [fuspecs.pop('fast1')]
467 if 'fast2' in fuspecs:
468 fuspecs['fast1'].append(fuspecs.pop('fast2'))
469
470 for (regname, fspec) in sort_fuspecs(fuspecs):
471 self.connect_wrport(m, fu_bitdict, wrpickers,
472 regfile, regname, fspec)
473
474 def get_byregfiles(self, readmode):
475
476 mode = "read" if readmode else "write"
477 regs = self.regs
478 fus = self.fus.fus
479 e = self.e # decoded instruction to execute
480
481 # dictionary of lists of regfile ports
482 byregfiles = {}
483 byregfiles_spec = {}
484 for (funame, fu) in fus.items():
485 print("%s ports for %s" % (mode, funame))
486 for idx in range(fu.n_src if readmode else fu.n_dst):
487 if readmode:
488 (regfile, regname, wid) = fu.get_in_spec(idx)
489 else:
490 (regfile, regname, wid) = fu.get_out_spec(idx)
491 print(" %d %s %s %s" % (idx, regfile, regname, str(wid)))
492 if readmode:
493 rdflag, read = regspec_decode_read(e, regfile, regname)
494 write = None
495 else:
496 rdflag, read = None, None
497 wrport, write = regspec_decode_write(e, regfile, regname)
498 if regfile not in byregfiles:
499 byregfiles[regfile] = {}
500 byregfiles_spec[regfile] = {}
501 if regname not in byregfiles_spec[regfile]:
502 byregfiles_spec[regfile][regname] = \
503 (rdflag, read, write, wid, [])
504 # here we start to create "lanes"
505 if idx not in byregfiles[regfile]:
506 byregfiles[regfile][idx] = []
507 fuspec = (funame, fu, idx)
508 byregfiles[regfile][idx].append(fuspec)
509 byregfiles_spec[regfile][regname][4].append(fuspec)
510
511 # ok just print that out, for convenience
512 for regfile, spec in byregfiles.items():
513 print("regfile %s ports:" % mode, regfile)
514 fuspecs = byregfiles_spec[regfile]
515 for regname, fspec in fuspecs.items():
516 [rdflag, read, write, wid, fuspec] = fspec
517 print(" rf %s port %s lane: %s" % (mode, regfile, regname))
518 print(" %s" % regname, wid, read, write, rdflag)
519 for (funame, fu, idx) in fuspec:
520 fusig = fu.src_i[idx] if readmode else fu.dest[idx]
521 print(" ", funame, fu, idx, fusig)
522 print()
523
524 return byregfiles, byregfiles_spec
525
526 def __iter__(self):
527 yield from self.fus.ports()
528 yield from self.e.ports()
529 yield from self.l0.ports()
530 # TODO: regs
531
532 def ports(self):
533 return list(self)
534
535
536 if __name__ == '__main__':
537 pspec = TestMemPspec(ldst_ifacetype='testpi',
538 imem_ifacetype='',
539 addr_wid=48,
540 mask_wid=8,
541 reg_wid=64)
542 dut = NonProductionCore(pspec)
543 vl = rtlil.convert(dut, ports=dut.ports())
544 with open("test_core.il", "w") as f:
545 f.write(vl)