set up LoadStore1 in ConfigMemoryPortInterface and hook it up in MMU
[soc.git] / src / soc / simple / core.py
1 """simple core
2
3 not in any way intended for production use. connects up FunctionUnits to
4 Register Files in a brain-dead fashion that only permits one and only one
5 Function Unit to be operational.
6
7 the principle here is to take the Function Units, analyse their regspecs,
8 and turn their requirements for access to register file read/write ports
9 into groupings by Register File and Register File Port name.
10
11 under each grouping - by regfile/port - a list of Function Units that
12 need to connect to that port is created. as these are a contended
13 resource a "Broadcast Bus" per read/write port is then also created,
14 with access to it managed by a PriorityPicker.
15
16 the brain-dead part of this module is that even though there is no
17 conflict of access, regfile read/write hazards are *not* analysed,
18 and consequently it is safer to wait for the Function Unit to complete
19 before allowing a new instruction to proceed.
20 """
21
22 from nmigen import Elaboratable, Module, Signal, ResetSignal, Cat, Mux
23 from nmigen.cli import rtlil
24
25 from openpower.decoder.power_decoder2 import PowerDecodeSubset
26 from openpower.decoder.power_regspec_map import regspec_decode_read
27 from openpower.decoder.power_regspec_map import regspec_decode_write
28
29 from nmutil.picker import PriorityPicker
30 from nmutil.util import treereduce
31
32 from soc.fu.compunits.compunits import AllFunctionUnits
33 from soc.regfile.regfiles import RegFiles
34 from openpower.decoder.decode2execute1 import Decode2ToExecute1Type
35 from openpower.decoder.decode2execute1 import IssuerDecode2ToOperand
36 from openpower.decoder.power_decoder2 import get_rdflags
37 from openpower.decoder.decode2execute1 import Data
38 from soc.experiment.l0_cache import TstL0CacheBuffer # test only
39 from soc.config.test.test_loadstore import TestMemPspec
40 from openpower.decoder.power_enums import MicrOp
41 from soc.config.state import CoreState
42
43 import operator
44
45 from nmutil.util import rising_edge
46
47
48 # helper function for reducing a list of signals down to a parallel
49 # ORed single signal.
50 def ortreereduce(tree, attr="data_o"):
51 return treereduce(tree, operator.or_, lambda x: getattr(x, attr))
52
53
54 def ortreereduce_sig(tree):
55 return treereduce(tree, operator.or_, lambda x: x)
56
57
58 # helper function to place full regs declarations first
59 def sort_fuspecs(fuspecs):
60 res = []
61 for (regname, fspec) in fuspecs.items():
62 if regname.startswith("full"):
63 res.append((regname, fspec))
64 for (regname, fspec) in fuspecs.items():
65 if not regname.startswith("full"):
66 res.append((regname, fspec))
67 return res # enumerate(res)
68
69
70 class NonProductionCore(Elaboratable):
71 def __init__(self, pspec):
72 self.pspec = pspec
73
74 # test is SVP64 is to be enabled
75 self.svp64_en = hasattr(pspec, "svp64") and (pspec.svp64 == True)
76
77 # test to see if regfile ports should be reduced
78 self.regreduce_en = (hasattr(pspec, "regreduce") and
79 (pspec.regreduce == True))
80
81 # single LD/ST funnel for memory access
82 self.l0 = l0 = TstL0CacheBuffer(pspec, n_units=1)
83 pi = l0.l0.dports[0]
84
85 # function units (only one each)
86 # only include mmu if enabled in pspec
87 self.fus = AllFunctionUnits(pspec, pilist=[pi])
88
89 # link LoadStore1 into MMU
90 if hasattr(self.fus, 'mmu') and hasattr(l0.cmpi, "ldst"):
91 self.fus.mmu.set_ldst_interface(l0.cmpi.ldst)
92
93 # register files (yes plural)
94 self.regs = RegFiles(pspec)
95
96 # instruction decoder - needs a Trap-capable Record (captures EINT etc.)
97 self.e = Decode2ToExecute1Type("core", opkls=IssuerDecode2ToOperand,
98 regreduce_en=self.regreduce_en)
99
100 # SVP64 RA_OR_ZERO needs to know if the relevant EXTRA2/3 field is zero
101 self.sv_a_nz = Signal()
102
103 # state and raw instruction
104 self.state = CoreState("core")
105 self.raw_insn_i = Signal(32) # raw instruction
106 self.bigendian_i = Signal() # bigendian - TODO, set by MSR.BE
107
108 # issue/valid/busy signalling
109 self.ivalid_i = Signal(reset_less=True) # instruction is valid
110 self.issue_i = Signal(reset_less=True)
111 self.busy_o = Signal(name="corebusy_o", reset_less=True)
112
113 # start/stop and terminated signalling
114 self.core_terminate_o = Signal(reset=0) # indicates stopped
115
116 # create per-FU instruction decoders (subsetted)
117 self.decoders = {}
118 self.des = {}
119
120 for funame, fu in self.fus.fus.items():
121 f_name = fu.fnunit.name
122 fnunit = fu.fnunit.value
123 opkls = fu.opsubsetkls
124 if f_name == 'TRAP':
125 self.trapunit = funame
126 continue
127 self.decoders[funame] = PowerDecodeSubset(None, opkls, f_name,
128 final=True,
129 state=self.state,
130 svp64_en=self.svp64_en,
131 regreduce_en=self.regreduce_en)
132 self.des[funame] = self.decoders[funame].do
133
134 if "mmu0" in self.decoders:
135 self.decoders["mmu0"].mmu0_spr_dec = self.decoders["spr0"]
136
137 def elaborate(self, platform):
138 m = Module()
139 # for testing purposes, to cut down on build time in coriolis2
140 if hasattr(self.pspec, "nocore") and self.pspec.nocore == True:
141 x = Signal() # dummy signal
142 m.d.sync += x.eq(~x)
143 return m
144 comb = m.d.comb
145
146 m.submodules.fus = self.fus
147 m.submodules.l0 = l0 = self.l0
148 self.regs.elaborate_into(m, platform)
149 regs = self.regs
150 fus = self.fus.fus
151
152 # connect decoders
153 for k, v in self.decoders.items():
154 setattr(m.submodules, "dec_%s" % v.fn_name, v)
155 comb += v.dec.raw_opcode_in.eq(self.raw_insn_i)
156 comb += v.dec.bigendian.eq(self.bigendian_i)
157 # sigh due to SVP64 RA_OR_ZERO detection connect these too
158 comb += v.sv_a_nz.eq(self.sv_a_nz)
159
160 # ssh, cheat: trap uses the main decoder because of the rewriting
161 self.des[self.trapunit] = self.e.do
162
163 # connect up Function Units, then read/write ports
164 fu_bitdict = self.connect_instruction(m)
165 self.connect_rdports(m, fu_bitdict)
166 self.connect_wrports(m, fu_bitdict)
167
168 return m
169
170 def connect_instruction(self, m):
171 """connect_instruction
172
173 uses decoded (from PowerOp) function unit information from CSV files
174 to ascertain which Function Unit should deal with the current
175 instruction.
176
177 some (such as OP_ATTN, OP_NOP) are dealt with here, including
178 ignoring it and halting the processor. OP_NOP is a bit annoying
179 because the issuer expects busy flag still to be raised then lowered.
180 (this requires a fake counter to be set).
181 """
182 comb, sync = m.d.comb, m.d.sync
183 fus = self.fus.fus
184
185 # enable-signals for each FU, get one bit for each FU (by name)
186 fu_enable = Signal(len(fus), reset_less=True)
187 fu_bitdict = {}
188 for i, funame in enumerate(fus.keys()):
189 fu_bitdict[funame] = fu_enable[i]
190
191 # enable the required Function Unit based on the opcode decode
192 # note: this *only* works correctly for simple core when one and
193 # *only* one FU is allocated per instruction
194 for funame, fu in fus.items():
195 fnunit = fu.fnunit.value
196 enable = Signal(name="en_%s" % funame, reset_less=True)
197 comb += enable.eq((self.e.do.fn_unit & fnunit).bool())
198 comb += fu_bitdict[funame].eq(enable)
199
200 # sigh - need a NOP counter
201 counter = Signal(2)
202 with m.If(counter != 0):
203 sync += counter.eq(counter - 1)
204 comb += self.busy_o.eq(1)
205
206 with m.If(self.ivalid_i): # run only when valid
207 with m.Switch(self.e.do.insn_type):
208 # check for ATTN: halt if true
209 with m.Case(MicrOp.OP_ATTN):
210 m.d.sync += self.core_terminate_o.eq(1)
211
212 with m.Case(MicrOp.OP_NOP):
213 sync += counter.eq(2)
214 comb += self.busy_o.eq(1)
215
216 with m.Default():
217 # connect up instructions. only one enabled at a time
218 for funame, fu in fus.items():
219 do = self.des[funame]
220 enable = fu_bitdict[funame]
221
222 # run this FunctionUnit if enabled
223 # route op, issue, busy, read flags and mask to FU
224 with m.If(enable):
225 # operand comes from the *local* decoder
226 comb += fu.oper_i.eq_from(do)
227 #comb += fu.oper_i.eq_from_execute1(e)
228 comb += fu.issue_i.eq(self.issue_i)
229 comb += self.busy_o.eq(fu.busy_o)
230 # rdmask, which is for registers, needs to come
231 # from the *main* decoder
232 rdmask = get_rdflags(self.e, fu)
233 comb += fu.rdmaskn.eq(~rdmask)
234
235 return fu_bitdict
236
237 def connect_rdport(self, m, fu_bitdict, rdpickers, regfile, regname, fspec):
238 comb, sync = m.d.comb, m.d.sync
239 fus = self.fus.fus
240 regs = self.regs
241
242 rpidx = regname
243
244 # select the required read port. these are pre-defined sizes
245 rfile = regs.rf[regfile.lower()]
246 rport = rfile.r_ports[rpidx]
247 print("read regfile", rpidx, regfile, regs.rf.keys(),
248 rfile, rfile.unary)
249
250 fspecs = fspec
251 if not isinstance(fspecs, list):
252 fspecs = [fspecs]
253
254 rdflags = []
255 pplen = 0
256 reads = []
257 ppoffs = []
258 for i, fspec in enumerate(fspecs):
259 # get the regfile specs for this regfile port
260 (rf, read, write, wid, fuspec) = fspec
261 print ("fpsec", i, fspec, len(fuspec))
262 ppoffs.append(pplen) # record offset for picker
263 pplen += len(fuspec)
264 name = "rdflag_%s_%s_%d" % (regfile, regname, i)
265 rdflag = Signal(name=name, reset_less=True)
266 comb += rdflag.eq(rf)
267 rdflags.append(rdflag)
268 reads.append(read)
269
270 print ("pplen", pplen)
271
272 # create a priority picker to manage this port
273 rdpickers[regfile][rpidx] = rdpick = PriorityPicker(pplen)
274 setattr(m.submodules, "rdpick_%s_%s" % (regfile, rpidx), rdpick)
275
276 rens = []
277 addrs = []
278 for i, fspec in enumerate(fspecs):
279 (rf, read, write, wid, fuspec) = fspec
280 # connect up the FU req/go signals, and the reg-read to the FU
281 # and create a Read Broadcast Bus
282 for pi, (funame, fu, idx) in enumerate(fuspec):
283 pi += ppoffs[i]
284
285 # connect request-read to picker input, and output to go-rd
286 fu_active = fu_bitdict[funame]
287 name = "%s_%s_%s_%i" % (regfile, rpidx, funame, pi)
288 addr_en = Signal.like(reads[i], name="addr_en_"+name)
289 pick = Signal(name="pick_"+name) # picker input
290 rp = Signal(name="rp_"+name) # picker output
291 delay_pick = Signal(name="dp_"+name) # read-enable "underway"
292
293 # exclude any currently-enabled read-request (mask out active)
294 comb += pick.eq(fu.rd_rel_o[idx] & fu_active & rdflags[i] &
295 ~delay_pick)
296 comb += rdpick.i[pi].eq(pick)
297 comb += fu.go_rd_i[idx].eq(delay_pick) # pass in *delayed* pick
298
299 # if picked, select read-port "reg select" number to port
300 comb += rp.eq(rdpick.o[pi] & rdpick.en_o)
301 sync += delay_pick.eq(rp) # delayed "pick"
302 comb += addr_en.eq(Mux(rp, reads[i], 0))
303
304 # the read-enable happens combinatorially (see mux-bus below)
305 # but it results in the data coming out on a one-cycle delay.
306 if rfile.unary:
307 rens.append(addr_en)
308 else:
309 addrs.append(addr_en)
310 rens.append(rp)
311
312 # use the *delayed* pick signal to put requested data onto bus
313 with m.If(delay_pick):
314 # connect regfile port to input, creating fan-out Bus
315 src = fu.src_i[idx]
316 print("reg connect widths",
317 regfile, regname, pi, funame,
318 src.shape(), rport.data_o.shape())
319 # all FUs connect to same port
320 comb += src.eq(rport.data_o)
321
322 # or-reduce the muxed read signals
323 if rfile.unary:
324 # for unary-addressed
325 comb += rport.ren.eq(ortreereduce_sig(rens))
326 else:
327 # for binary-addressed
328 comb += rport.addr.eq(ortreereduce_sig(addrs))
329 comb += rport.ren.eq(Cat(*rens).bool())
330 print ("binary", regfile, rpidx, rport, rport.ren, rens, addrs)
331
332 def connect_rdports(self, m, fu_bitdict):
333 """connect read ports
334
335 orders the read regspecs into a dict-of-dicts, by regfile, by
336 regport name, then connects all FUs that want that regport by
337 way of a PriorityPicker.
338 """
339 comb, sync = m.d.comb, m.d.sync
340 fus = self.fus.fus
341 regs = self.regs
342
343 # dictionary of lists of regfile read ports
344 byregfiles_rd, byregfiles_rdspec = self.get_byregfiles(True)
345
346 # okaay, now we need a PriorityPicker per regfile per regfile port
347 # loootta pickers... peter piper picked a pack of pickled peppers...
348 rdpickers = {}
349 for regfile, spec in byregfiles_rd.items():
350 fuspecs = byregfiles_rdspec[regfile]
351 rdpickers[regfile] = {}
352
353 # argh. an experiment to merge RA and RB in the INT regfile
354 # (we have too many read/write ports)
355 if self.regreduce_en:
356 if regfile == 'INT':
357 fuspecs['rabc'] = [fuspecs.pop('rb')]
358 fuspecs['rabc'].append(fuspecs.pop('rc'))
359 fuspecs['rabc'].append(fuspecs.pop('ra'))
360 if regfile == 'FAST':
361 fuspecs['fast1'] = [fuspecs.pop('fast1')]
362 if 'fast2' in fuspecs:
363 fuspecs['fast1'].append(fuspecs.pop('fast2'))
364
365 # for each named regfile port, connect up all FUs to that port
366 for (regname, fspec) in sort_fuspecs(fuspecs):
367 print("connect rd", regname, fspec)
368 self.connect_rdport(m, fu_bitdict, rdpickers, regfile,
369 regname, fspec)
370
371 def connect_wrport(self, m, fu_bitdict, wrpickers, regfile, regname, fspec):
372 comb, sync = m.d.comb, m.d.sync
373 fus = self.fus.fus
374 regs = self.regs
375
376 print("connect wr", regname, fspec)
377 rpidx = regname
378
379 # select the required write port. these are pre-defined sizes
380 print(regfile, regs.rf.keys())
381 rfile = regs.rf[regfile.lower()]
382 wport = rfile.w_ports[rpidx]
383
384 fspecs = fspec
385 if not isinstance(fspecs, list):
386 fspecs = [fspecs]
387
388 pplen = 0
389 writes = []
390 ppoffs = []
391 for i, fspec in enumerate(fspecs):
392 # get the regfile specs for this regfile port
393 (rf, read, write, wid, fuspec) = fspec
394 print ("fpsec", i, fspec, len(fuspec))
395 ppoffs.append(pplen) # record offset for picker
396 pplen += len(fuspec)
397
398 # create a priority picker to manage this port
399 wrpickers[regfile][rpidx] = wrpick = PriorityPicker(pplen)
400 setattr(m.submodules, "wrpick_%s_%s" % (regfile, rpidx), wrpick)
401
402 wsigs = []
403 wens = []
404 addrs = []
405 for i, fspec in enumerate(fspecs):
406 # connect up the FU req/go signals and the reg-read to the FU
407 # these are arbitrated by Data.ok signals
408 (rf, read, write, wid, fuspec) = fspec
409 for pi, (funame, fu, idx) in enumerate(fuspec):
410 pi += ppoffs[i]
411
412 # write-request comes from dest.ok
413 dest = fu.get_out(idx)
414 fu_dest_latch = fu.get_fu_out(idx) # latched output
415 name = "wrflag_%s_%s_%d" % (funame, regname, idx)
416 wrflag = Signal(name=name, reset_less=True)
417 comb += wrflag.eq(dest.ok & fu.busy_o)
418
419 # connect request-write to picker input, and output to go-wr
420 fu_active = fu_bitdict[funame]
421 pick = fu.wr.rel_o[idx] & fu_active # & wrflag
422 comb += wrpick.i[pi].eq(pick)
423 # create a single-pulse go write from the picker output
424 wr_pick = Signal()
425 comb += wr_pick.eq(wrpick.o[pi] & wrpick.en_o)
426 comb += fu.go_wr_i[idx].eq(rising_edge(m, wr_pick))
427
428 # connect the regspec write "reg select" number to this port
429 # only if one FU actually requests (and is granted) the port
430 # will the write-enable be activated
431 addr_en = Signal.like(write)
432 wp = Signal()
433 comb += wp.eq(wr_pick & wrpick.en_o)
434 comb += addr_en.eq(Mux(wp, write, 0))
435 if rfile.unary:
436 wens.append(addr_en)
437 else:
438 addrs.append(addr_en)
439 wens.append(wp)
440
441 # connect regfile port to input
442 print("reg connect widths",
443 regfile, regname, pi, funame,
444 dest.shape(), wport.data_i.shape())
445 wsigs.append(fu_dest_latch)
446
447 # here is where we create the Write Broadcast Bus. simple, eh?
448 comb += wport.data_i.eq(ortreereduce_sig(wsigs))
449 if rfile.unary:
450 # for unary-addressed
451 comb += wport.wen.eq(ortreereduce_sig(wens))
452 else:
453 # for binary-addressed
454 comb += wport.addr.eq(ortreereduce_sig(addrs))
455 comb += wport.wen.eq(ortreereduce_sig(wens))
456
457 def connect_wrports(self, m, fu_bitdict):
458 """connect write ports
459
460 orders the write regspecs into a dict-of-dicts, by regfile,
461 by regport name, then connects all FUs that want that regport
462 by way of a PriorityPicker.
463
464 note that the write-port wen, write-port data, and go_wr_i all need to
465 be on the exact same clock cycle. as there is a combinatorial loop bug
466 at the moment, these all use sync.
467 """
468 comb, sync = m.d.comb, m.d.sync
469 fus = self.fus.fus
470 regs = self.regs
471 # dictionary of lists of regfile write ports
472 byregfiles_wr, byregfiles_wrspec = self.get_byregfiles(False)
473
474 # same for write ports.
475 # BLECH! complex code-duplication! BLECH!
476 wrpickers = {}
477 for regfile, spec in byregfiles_wr.items():
478 fuspecs = byregfiles_wrspec[regfile]
479 wrpickers[regfile] = {}
480
481 if self.regreduce_en:
482 # argh, more port-merging
483 if regfile == 'INT':
484 fuspecs['o'] = [fuspecs.pop('o')]
485 fuspecs['o'].append(fuspecs.pop('o1'))
486 if regfile == 'FAST':
487 fuspecs['fast1'] = [fuspecs.pop('fast1')]
488 if 'fast2' in fuspecs:
489 fuspecs['fast1'].append(fuspecs.pop('fast2'))
490
491 for (regname, fspec) in sort_fuspecs(fuspecs):
492 self.connect_wrport(m, fu_bitdict, wrpickers,
493 regfile, regname, fspec)
494
495 def get_byregfiles(self, readmode):
496
497 mode = "read" if readmode else "write"
498 regs = self.regs
499 fus = self.fus.fus
500 e = self.e # decoded instruction to execute
501
502 # dictionary of lists of regfile ports
503 byregfiles = {}
504 byregfiles_spec = {}
505 for (funame, fu) in fus.items():
506 print("%s ports for %s" % (mode, funame))
507 for idx in range(fu.n_src if readmode else fu.n_dst):
508 if readmode:
509 (regfile, regname, wid) = fu.get_in_spec(idx)
510 else:
511 (regfile, regname, wid) = fu.get_out_spec(idx)
512 print(" %d %s %s %s" % (idx, regfile, regname, str(wid)))
513 if readmode:
514 rdflag, read = regspec_decode_read(e, regfile, regname)
515 write = None
516 else:
517 rdflag, read = None, None
518 wrport, write = regspec_decode_write(e, regfile, regname)
519 if regfile not in byregfiles:
520 byregfiles[regfile] = {}
521 byregfiles_spec[regfile] = {}
522 if regname not in byregfiles_spec[regfile]:
523 byregfiles_spec[regfile][regname] = \
524 (rdflag, read, write, wid, [])
525 # here we start to create "lanes"
526 if idx not in byregfiles[regfile]:
527 byregfiles[regfile][idx] = []
528 fuspec = (funame, fu, idx)
529 byregfiles[regfile][idx].append(fuspec)
530 byregfiles_spec[regfile][regname][4].append(fuspec)
531
532 # ok just print that out, for convenience
533 for regfile, spec in byregfiles.items():
534 print("regfile %s ports:" % mode, regfile)
535 fuspecs = byregfiles_spec[regfile]
536 for regname, fspec in fuspecs.items():
537 [rdflag, read, write, wid, fuspec] = fspec
538 print(" rf %s port %s lane: %s" % (mode, regfile, regname))
539 print(" %s" % regname, wid, read, write, rdflag)
540 for (funame, fu, idx) in fuspec:
541 fusig = fu.src_i[idx] if readmode else fu.dest[idx]
542 print(" ", funame, fu, idx, fusig)
543 print()
544
545 return byregfiles, byregfiles_spec
546
547 def __iter__(self):
548 yield from self.fus.ports()
549 yield from self.e.ports()
550 yield from self.l0.ports()
551 # TODO: regs
552
553 def ports(self):
554 return list(self)
555
556
557 if __name__ == '__main__':
558 pspec = TestMemPspec(ldst_ifacetype='testpi',
559 imem_ifacetype='',
560 addr_wid=48,
561 mask_wid=8,
562 reg_wid=64)
563 dut = NonProductionCore(pspec)
564 vl = rtlil.convert(dut, ports=dut.ports())
565 with open("test_core.il", "w") as f:
566 f.write(vl)