Merge branch 'master' of ssh://git.libre-riscv.org:922/soc
[soc.git] / src / soc / simple / core.py
1 """simple core
2
3 not in any way intended for production use. connects up FunctionUnits to
4 Register Files in a brain-dead fashion that only permits one and only one
5 Function Unit to be operational.
6
7 the principle here is to take the Function Units, analyse their regspecs,
8 and turn their requirements for access to register file read/write ports
9 into groupings by Register File and Register File Port name.
10
11 under each grouping - by regfile/port - a list of Function Units that
12 need to connect to that port is created. as these are a contended
13 resource a "Broadcast Bus" per read/write port is then also created,
14 with access to it managed by a PriorityPicker.
15
16 the brain-dead part of this module is that even though there is no
17 conflict of access, regfile read/write hazards are *not* analysed,
18 and consequently it is safer to wait for the Function Unit to complete
19 before allowing a new instruction to proceed.
20 (update: actually this is being added now:
21 https://bugs.libre-soc.org/show_bug.cgi?id=737)
22 """
23
24 from nmigen import (Elaboratable, Module, Signal, ResetSignal, Cat, Mux,
25 Const)
26 from nmigen.cli import rtlil
27
28 from openpower.decoder.power_decoder2 import PowerDecodeSubset
29 from openpower.decoder.power_regspec_map import regspec_decode
30 from openpower.sv.svp64 import SVP64Rec
31
32 from nmutil.picker import PriorityPicker
33 from nmutil.util import treereduce
34 from nmutil.singlepipe import ControlBase
35
36 from soc.fu.compunits.compunits import AllFunctionUnits, LDSTFunctionUnit
37 from soc.regfile.regfiles import RegFiles
38 from openpower.decoder.power_decoder2 import get_rdflags
39 from soc.experiment.l0_cache import TstL0CacheBuffer # test only
40 from soc.config.test.test_loadstore import TestMemPspec
41 from openpower.decoder.power_enums import MicrOp, Function
42 from soc.simple.core_data import CoreInput, CoreOutput
43
44 from collections import defaultdict, namedtuple
45 import operator
46
47 from nmutil.util import rising_edge
48
49 FUSpec = namedtuple("FUSpec", ["funame", "fu", "idx"])
50 ByRegSpec = namedtuple("ByRegSpec", ["okflag", "regport", "wid", "specs"])
51
52 # helper function for reducing a list of signals down to a parallel
53 # ORed single signal.
54 def ortreereduce(tree, attr="o_data"):
55 return treereduce(tree, operator.or_, lambda x: getattr(x, attr))
56
57
58 def ortreereduce_sig(tree):
59 return treereduce(tree, operator.or_, lambda x: x)
60
61
62 # helper function to place full regs declarations first
63 def sort_fuspecs(fuspecs):
64 res = []
65 for (regname, fspec) in fuspecs.items():
66 if regname.startswith("full"):
67 res.append((regname, fspec))
68 for (regname, fspec) in fuspecs.items():
69 if not regname.startswith("full"):
70 res.append((regname, fspec))
71 return res # enumerate(res)
72
73
74 # a hazard bitvector "remap" function which returns an AST expression
75 # that remaps read/write hazard regfile port numbers to either a full
76 # bitvector or a reduced subset one. SPR for example is reduced to a
77 # single bit.
78 # CRITICALLY-IMPORTANT NOTE: these bitvectors *have* to match up per
79 # regfile! therefore the remapping is per regfile, *NOT* per regfile
80 # port and certainly not based on whether it is a read port or write port.
81 # note that any reductions here will result in degraded performance due
82 # to conflicts, but at least it keeps the hazard matrix sizes down to "sane"
83 def bitvector_remap(regfile, rfile, port):
84 # 8-bits (at the moment, no SVP64), CR is unary: no remap
85 if regfile == 'CR':
86 return port
87 # 3 bits, unary alrady: return the port
88 if regfile == 'XER':
89 return port
90 # 3 bits, unary: return the port
91 if regfile == 'XER':
92 return port
93 # 3 bits, unary: return the port
94 if regfile == 'SVSTATE':
95 return port
96 # 9 bits (9 entries), might be unary already
97 if regfile == 'FAST':
98 if rfile.unary: # FAST might be unary already
99 return port
100 else:
101 return 1 << port
102 # 10 bits (!!) - reduce to one
103 if regfile == 'SPR':
104 if rfile.unary: # FAST might be unary already
105 return port
106 else:
107 return 1 << port
108 if regfile == 'INT':
109 if rfile.unary: # INT, check if unary/binary
110 return port
111 else:
112 return 1 << port
113
114
115 # derive from ControlBase rather than have a separate Stage instance,
116 # this is simpler to do
117 class NonProductionCore(ControlBase):
118 def __init__(self, pspec):
119 self.pspec = pspec
120
121 # test is SVP64 is to be enabled
122 self.svp64_en = hasattr(pspec, "svp64") and (pspec.svp64 == True)
123
124 # test to see if regfile ports should be reduced
125 self.regreduce_en = (hasattr(pspec, "regreduce") and
126 (pspec.regreduce == True))
127
128 # test to see if overlapping of instructions is allowed
129 # (not normally enabled for TestIssuer FSM but useful for checking
130 # the bitvector hazard detection, before doing In-Order)
131 self.allow_overlap = (hasattr(pspec, "allow_overlap") and
132 (pspec.allow_overlap == True))
133
134 # test core type
135 self.make_hazard_vecs = self.allow_overlap
136 self.core_type = "fsm"
137 if hasattr(pspec, "core_type"):
138 self.core_type = pspec.core_type
139
140 super().__init__(stage=self)
141
142 # single LD/ST funnel for memory access
143 self.l0 = l0 = TstL0CacheBuffer(pspec, n_units=1)
144 pi = l0.l0.dports[0]
145
146 # function units (only one each)
147 # only include mmu if enabled in pspec
148 self.fus = AllFunctionUnits(pspec, pilist=[pi])
149
150 # link LoadStore1 into MMU
151 mmu = self.fus.get_fu('mmu0')
152 print ("core pspec", pspec.ldst_ifacetype)
153 print ("core mmu", mmu)
154 if mmu is not None:
155 print ("core lsmem.lsi", l0.cmpi.lsmem.lsi)
156 mmu.alu.set_ldst_interface(l0.cmpi.lsmem.lsi)
157
158 # register files (yes plural)
159 self.regs = RegFiles(pspec, make_hazard_vecs=self.make_hazard_vecs)
160
161 # set up input and output: unusual requirement to set data directly
162 # (due to the way that the core is set up in a different domain,
163 # see TestIssuer.setup_peripherals
164 self.p.i_data, self.n.o_data = self.new_specs(None)
165 self.i, self.o = self.p.i_data, self.n.o_data
166
167 # actual internal input data used (captured)
168 self.ireg = self.ispec()
169
170 # create per-FU instruction decoders (subsetted). these "satellite"
171 # decoders reduce wire fan-out from the one (main) PowerDecoder2
172 # (used directly by the trap unit) to the *twelve* (or more)
173 # Function Units. we can either have 32 wires (the instruction)
174 # to each, or we can have well over a 200 wire fan-out (to 12
175 # ALUs). it's an easy choice to make.
176 self.decoders = {}
177 self.des = {}
178
179 # eep, these should be *per FU* i.e. for FunctionUnitBaseMulti
180 # they should be shared (put into the ALU *once*).
181
182 for funame, fu in self.fus.fus.items():
183 f_name = fu.fnunit.name
184 fnunit = fu.fnunit.value
185 opkls = fu.opsubsetkls
186 if f_name == 'TRAP':
187 # TRAP decoder is the *main* decoder
188 self.trapunit = funame
189 continue
190 assert funame not in self.decoders
191 self.decoders[funame] = PowerDecodeSubset(None, opkls, f_name,
192 final=True,
193 state=self.ireg.state,
194 svp64_en=self.svp64_en,
195 regreduce_en=self.regreduce_en)
196 self.des[funame] = self.decoders[funame].do
197
198 # create per-Function Unit write-after-write hazard signals
199 # yes, really, this should have been added in ReservationStations
200 # but hey.
201 for funame, fu in self.fus.fus.items():
202 fu._waw_hazard = Signal(name="waw_%s" % funame)
203
204 # share the SPR decoder with the MMU if it exists
205 if "mmu0" in self.decoders:
206 self.decoders["mmu0"].mmu0_spr_dec = self.decoders["spr0"]
207
208 # next 3 functions are Stage API Compliance
209 def setup(self, m, i):
210 pass
211
212 def ispec(self):
213 return CoreInput(self.pspec, self.svp64_en, self.regreduce_en)
214
215 def ospec(self):
216 return CoreOutput()
217
218 # elaborate function to create HDL
219 def elaborate(self, platform):
220 m = super().elaborate(platform)
221
222 # for testing purposes, to cut down on build time in coriolis2
223 if hasattr(self.pspec, "nocore") and self.pspec.nocore == True:
224 x = Signal() # dummy signal
225 m.d.sync += x.eq(~x)
226 return m
227 comb = m.d.comb
228
229 m.submodules.fus = self.fus
230 m.submodules.l0 = l0 = self.l0
231 self.regs.elaborate_into(m, platform)
232 regs = self.regs
233 fus = self.fus.fus
234
235 # amalgamate write-hazards into a single top-level Signal
236 self.waw_hazard = Signal()
237 whaz = []
238 for funame, fu in self.fus.fus.items():
239 whaz.append(fu._waw_hazard)
240 comb += self.waw_hazard.eq(Cat(*whaz).bool())
241
242 # connect decoders
243 self.connect_satellite_decoders(m)
244
245 # ssh, cheat: trap uses the main decoder because of the rewriting
246 self.des[self.trapunit] = self.ireg.e.do
247
248 # connect up Function Units, then read/write ports, and hazard conflict
249 self.issue_conflict = Signal()
250 fu_bitdict, fu_selected = self.connect_instruction(m)
251 raw_hazard = self.connect_rdports(m, fu_bitdict, fu_selected)
252 self.connect_wrports(m, fu_bitdict, fu_selected)
253 if self.allow_overlap:
254 comb += self.issue_conflict.eq(raw_hazard)
255
256 # note if an exception happened. in a pipelined or OoO design
257 # this needs to be accompanied by "shadowing" (or stalling)
258 el = []
259 for exc in self.fus.excs.values():
260 el.append(exc.happened)
261 if len(el) > 0: # at least one exception
262 comb += self.o.exc_happened.eq(Cat(*el).bool())
263
264 return m
265
266 def connect_satellite_decoders(self, m):
267 comb = m.d.comb
268 for k, v in self.decoders.items():
269 # connect each satellite decoder and give it the instruction.
270 # as subset decoders this massively reduces wire fanout given
271 # the large number of ALUs
272 m.submodules["dec_%s" % k] = v
273 comb += v.dec.raw_opcode_in.eq(self.ireg.raw_insn_i)
274 comb += v.dec.bigendian.eq(self.ireg.bigendian_i)
275 # sigh due to SVP64 RA_OR_ZERO detection connect these too
276 comb += v.sv_a_nz.eq(self.ireg.sv_a_nz)
277 if not self.svp64_en:
278 continue
279 comb += v.pred_sm.eq(self.ireg.sv_pred_sm)
280 comb += v.pred_dm.eq(self.ireg.sv_pred_dm)
281 if k == self.trapunit:
282 continue
283 comb += v.sv_rm.eq(self.ireg.sv_rm) # pass through SVP64 RM
284 comb += v.is_svp64_mode.eq(self.ireg.is_svp64_mode)
285 # only the LDST PowerDecodeSubset *actually* needs to
286 # know to use the alternative decoder. this is all
287 # a terrible hack
288 if not k.lower().startswith("ldst"):
289 continue
290 comb += v.use_svp64_ldst_dec.eq( self.ireg.use_svp64_ldst_dec)
291
292 def connect_instruction(self, m):
293 """connect_instruction
294
295 uses decoded (from PowerOp) function unit information from CSV files
296 to ascertain which Function Unit should deal with the current
297 instruction.
298
299 some (such as OP_ATTN, OP_NOP) are dealt with here, including
300 ignoring it and halting the processor. OP_NOP is a bit annoying
301 because the issuer expects busy flag still to be raised then lowered.
302 (this requires a fake counter to be set).
303 """
304 comb, sync = m.d.comb, m.d.sync
305 fus = self.fus.fus
306
307 # indicate if core is busy
308 busy_o = self.o.busy_o
309 any_busy_o = self.o.any_busy_o
310
311 # connect up temporary copy of incoming instruction. the FSM will
312 # either blat the incoming instruction (if valid) into self.ireg
313 # or if the instruction could not be delivered, keep dropping the
314 # latched copy into ireg
315 ilatch = self.ispec()
316 self.instr_active = Signal()
317
318 # enable/busy-signals for each FU, get one bit for each FU (by name)
319 fu_enable = Signal(len(fus), reset_less=True)
320 fu_busy = Signal(len(fus), reset_less=True)
321 fu_bitdict = {}
322 fu_selected = {}
323 for i, funame in enumerate(fus.keys()):
324 fu_bitdict[funame] = fu_enable[i]
325 fu_selected[funame] = fu_busy[i]
326
327 # identify function units and create a list by fnunit so that
328 # PriorityPickers can be created for selecting one of them that
329 # isn't busy at the time the incoming instruction needs passing on
330 by_fnunit = defaultdict(list)
331 for fname, member in Function.__members__.items():
332 for funame, fu in fus.items():
333 fnunit = fu.fnunit.value
334 if member.value & fnunit: # this FU handles this type of op
335 by_fnunit[fname].append((funame, fu)) # add by Function
336
337 # ok now just print out the list of FUs by Function, because we can
338 for fname, fu_list in by_fnunit.items():
339 print ("FUs by type", fname, fu_list)
340
341 # now create a PriorityPicker per FU-type such that only one
342 # non-busy FU will be picked
343 issue_pps = {}
344 fu_found = Signal() # take a note if no Function Unit was available
345 for fname, fu_list in by_fnunit.items():
346 i_pp = PriorityPicker(len(fu_list))
347 m.submodules['i_pp_%s' % fname] = i_pp
348 i_l = []
349 for i, (funame, fu) in enumerate(fu_list):
350 # match the decoded instruction (e.do.fn_unit) against the
351 # "capability" of this FU, gate that by whether that FU is
352 # busy, and drop that into the PriorityPicker.
353 # this will give us an output of the first available *non-busy*
354 # Function Unit (Reservation Statio) capable of handling this
355 # instruction.
356 fnunit = fu.fnunit.value
357 en_req = Signal(name="issue_en_%s" % funame, reset_less=True)
358 fnmatch = (self.ireg.e.do.fn_unit & fnunit).bool()
359 comb += en_req.eq(fnmatch & ~fu.busy_o &
360 self.instr_active)
361 i_l.append(en_req) # store in list for doing the Cat-trick
362 # picker output, gated by enable: store in fu_bitdict
363 po = Signal(name="o_issue_pick_"+funame) # picker output
364 comb += po.eq(i_pp.o[i] & i_pp.en_o)
365 comb += fu_bitdict[funame].eq(po)
366 comb += fu_selected[funame].eq(fu.busy_o | po)
367 # if we don't do this, then when there are no FUs available,
368 # the "p.o_ready" signal will go back "ok we accepted this
369 # instruction" which of course isn't true.
370 with m.If(i_pp.en_o):
371 comb += fu_found.eq(1)
372 # for each input, Cat them together and drop them into the picker
373 comb += i_pp.i.eq(Cat(*i_l))
374
375 # rdmask, which is for registers needs to come from the *main* decoder
376 for funame, fu in fus.items():
377 rdmask = get_rdflags(m, self.ireg.e, fu)
378 comb += fu.rdmaskn.eq(~rdmask)
379
380 # sigh - need a NOP counter
381 counter = Signal(2)
382 with m.If(counter != 0):
383 sync += counter.eq(counter - 1)
384 comb += busy_o.eq(1)
385
386 # default to reading from incoming instruction: may be overridden
387 # by copy from latch when "waiting"
388 comb += self.ireg.eq(self.i)
389 # always say "ready" except if overridden
390 comb += self.p.o_ready.eq(1)
391
392 with m.FSM():
393 with m.State("READY"):
394 with m.If(self.p.i_valid): # run only when valid
395 with m.Switch(self.ireg.e.do.insn_type):
396 # check for ATTN: halt if true
397 with m.Case(MicrOp.OP_ATTN):
398 m.d.sync += self.o.core_terminate_o.eq(1)
399
400 # fake NOP - this isn't really used (Issuer detects NOP)
401 with m.Case(MicrOp.OP_NOP):
402 sync += counter.eq(2)
403 comb += busy_o.eq(1)
404
405 with m.Default():
406 comb += self.instr_active.eq(1)
407 comb += self.p.o_ready.eq(0)
408 # connect instructions. only one enabled at a time
409 for funame, fu in fus.items():
410 do = self.des[funame]
411 enable = fu_bitdict[funame]
412
413 # run this FunctionUnit if enabled route op,
414 # issue, busy, read flags and mask to FU
415 with m.If(enable):
416 # operand comes from the *local* decoder
417 # do not actually issue, though, if there
418 # is a waw hazard. decoder has to still
419 # be asserted in order to detect that, tho
420 comb += fu.oper_i.eq_from(do)
421 # issue when valid (and no write-hazard)
422 comb += fu.issue_i.eq(~self.waw_hazard)
423 # instruction ok, indicate ready
424 comb += self.p.o_ready.eq(1)
425
426 if self.allow_overlap:
427 with m.If(~fu_found | self.waw_hazard):
428 # latch copy of instruction
429 sync += ilatch.eq(self.i)
430 comb += self.p.o_ready.eq(1) # accept
431 comb += busy_o.eq(1)
432 m.next = "WAITING"
433
434 with m.State("WAITING"):
435 comb += self.instr_active.eq(1)
436 comb += self.p.o_ready.eq(0)
437 comb += busy_o.eq(1)
438 # using copy of instruction, keep waiting until an FU is free
439 comb += self.ireg.eq(ilatch)
440 with m.If(fu_found): # wait for conflict to clear
441 # connect instructions. only one enabled at a time
442 for funame, fu in fus.items():
443 do = self.des[funame]
444 enable = fu_bitdict[funame]
445
446 # run this FunctionUnit if enabled route op,
447 # issue, busy, read flags and mask to FU
448 with m.If(enable):
449 # operand comes from the *local* decoder,
450 # which is asserted even if not issued,
451 # so that WaW-detection can check for hazards.
452 # only if the waw hazard is clear does the
453 # instruction actually get issued
454 comb += fu.oper_i.eq_from(do)
455 # issue when valid
456 comb += fu.issue_i.eq(~self.waw_hazard)
457 with m.If(~self.waw_hazard):
458 comb += self.p.o_ready.eq(1)
459 comb += busy_o.eq(0)
460 m.next = "READY"
461
462 print ("core: overlap allowed", self.allow_overlap)
463 # true when any FU is busy (including the cycle where it is perhaps
464 # to be issued - because that's what fu_busy is)
465 comb += any_busy_o.eq(fu_busy.bool())
466 if not self.allow_overlap:
467 # for simple non-overlap, if any instruction is busy, set
468 # busy output for core.
469 comb += busy_o.eq(any_busy_o)
470 else:
471 # sigh deal with a fun situation that needs to be investigated
472 # and resolved
473 with m.If(self.issue_conflict):
474 comb += busy_o.eq(1)
475 # make sure that LDST, SPR, MMU, Branch and Trap all say "busy"
476 # and do not allow overlap. these are all the ones that
477 # are non-forward-progressing: exceptions etc. that otherwise
478 # change CoreState for some reason (MSR, PC, SVSTATE)
479 for funame, fu in fus.items():
480 if (funame.lower().startswith('ldst') or
481 funame.lower().startswith('branch') or
482 funame.lower().startswith('mmu') or
483 funame.lower().startswith('spr') or
484 funame.lower().startswith('trap')):
485 with m.If(fu.busy_o):
486 comb += busy_o.eq(1)
487
488 # return both the function unit "enable" dict as well as the "busy".
489 # the "busy-or-issued" can be passed in to the Read/Write port
490 # connecters to give them permission to request access to regfiles
491 return fu_bitdict, fu_selected
492
493 def connect_rdport(self, m, fu_bitdict, fu_selected,
494 rdpickers, regfile, regname, fspec):
495 comb, sync = m.d.comb, m.d.sync
496 fus = self.fus.fus
497 regs = self.regs
498
499 rpidx = regname
500
501 # select the required read port. these are pre-defined sizes
502 rfile = regs.rf[regfile.lower()]
503 rport = rfile.r_ports[rpidx]
504 print("read regfile", rpidx, regfile, regs.rf.keys(),
505 rfile, rfile.unary)
506
507 # for checking if the read port has an outstanding write
508 if self.make_hazard_vecs:
509 wv = regs.wv[regfile.lower()]
510 wvchk = wv.q_int # write-vec bit-level hazard check
511
512 # if a hazard is detected on this read port, simply blithely block
513 # every FU from reading on it. this is complete overkill but very
514 # simple for now.
515 hazard_detected = Signal(name="raw_%s_%s" % (regfile, rpidx))
516
517 fspecs = fspec
518 if not isinstance(fspecs, list):
519 fspecs = [fspecs]
520
521 rdflags = []
522 pplen = 0
523 ppoffs = []
524 for i, fspec in enumerate(fspecs):
525 # get the regfile specs for this regfile port
526 print ("fpsec", i, fspec, len(fspec.specs))
527 name = "%s_%s_%d" % (regfile, regname, i)
528 ppoffs.append(pplen) # record offset for picker
529 pplen += len(fspec.specs)
530 rdflag = Signal(name="rdflag_"+name, reset_less=True)
531 comb += rdflag.eq(fspec.okflag)
532 rdflags.append(rdflag)
533
534 print ("pplen", pplen)
535
536 # create a priority picker to manage this port
537 rdpickers[regfile][rpidx] = rdpick = PriorityPicker(pplen)
538 m.submodules["rdpick_%s_%s" % (regfile, rpidx)] = rdpick
539
540 rens = []
541 addrs = []
542 wvens = []
543
544 for i, fspec in enumerate(fspecs):
545 (rf, _read, wid, fuspecs) = \
546 (fspec.okflag, fspec.regport, fspec.wid, fspec.specs)
547 # connect up the FU req/go signals, and the reg-read to the FU
548 # and create a Read Broadcast Bus
549 for pi, fuspec in enumerate(fspec.specs):
550 (funame, fu, idx) = (fuspec.funame, fuspec.fu, fuspec.idx)
551 pi += ppoffs[i]
552 name = "%s_%s_%s_%i" % (regfile, rpidx, funame, pi)
553 fu_active = fu_selected[funame]
554 fu_issued = fu_bitdict[funame]
555
556 # get (or set up) a latched copy of read register number
557 # and (sigh) also the read-ok flag
558 # TODO: use nmutil latchregister
559 rhname = "%s_%s_%d" % (regfile, regname, i)
560 rdflag = Signal(name="rdflag_%s_%s" % (funame, rhname),
561 reset_less=True)
562 if rhname not in fu.rf_latches:
563 rfl = Signal(name="rdflag_latch_%s_%s" % (funame, rhname))
564 fu.rf_latches[rhname] = rfl
565 with m.If(fu.issue_i):
566 sync += rfl.eq(rdflags[i])
567 else:
568 rfl = fu.rf_latches[rhname]
569
570 # now the register port
571 rname = "%s_%s_%s_%d" % (funame, regfile, regname, pi)
572 read = Signal.like(_read, name="read_"+rname)
573 if rname not in fu.rd_latches:
574 rdl = Signal.like(_read, name="rdlatch_"+rname)
575 fu.rd_latches[rname] = rdl
576 with m.If(fu.issue_i):
577 sync += rdl.eq(_read)
578 else:
579 rdl = fu.rd_latches[rname]
580
581 # make the read immediately available on issue cycle
582 # after the read cycle, otherwies use the latched copy.
583 # this captures the regport and okflag on issue
584 with m.If(fu.issue_i):
585 comb += read.eq(_read)
586 comb += rdflag.eq(rdflags[i])
587 with m.Else():
588 comb += read.eq(rdl)
589 comb += rdflag.eq(rfl)
590
591 # connect request-read to picker input, and output to go-rd
592 addr_en = Signal.like(read, name="addr_en_"+name)
593 pick = Signal(name="pick_"+name) # picker input
594 rp = Signal(name="rp_"+name) # picker output
595 delay_pick = Signal(name="dp_"+name) # read-enable "underway"
596 rhazard = Signal(name="rhaz_"+name)
597
598 # exclude any currently-enabled read-request (mask out active)
599 # entirely block anything hazarded from being picked
600 comb += pick.eq(fu.rd_rel_o[idx] & fu_active & rdflag &
601 ~delay_pick & ~rhazard)
602 comb += rdpick.i[pi].eq(pick)
603 comb += fu.go_rd_i[idx].eq(delay_pick) # pass in *delayed* pick
604
605 # if picked, select read-port "reg select" number to port
606 comb += rp.eq(rdpick.o[pi] & rdpick.en_o)
607 sync += delay_pick.eq(rp) # delayed "pick"
608 comb += addr_en.eq(Mux(rp, read, 0))
609
610 # the read-enable happens combinatorially (see mux-bus below)
611 # but it results in the data coming out on a one-cycle delay.
612 if rfile.unary:
613 rens.append(addr_en)
614 else:
615 addrs.append(addr_en)
616 rens.append(rp)
617
618 # use the *delayed* pick signal to put requested data onto bus
619 with m.If(delay_pick):
620 # connect regfile port to input, creating fan-out Bus
621 src = fu.src_i[idx]
622 print("reg connect widths",
623 regfile, regname, pi, funame,
624 src.shape(), rport.o_data.shape())
625 # all FUs connect to same port
626 comb += src.eq(rport.o_data)
627
628 if not self.make_hazard_vecs:
629 continue
630
631 # read the write-hazard bitvector (wv) for any bit that is
632 wvchk_en = Signal(len(wvchk), name="wv_chk_addr_en_"+name)
633 issue_active = Signal(name="rd_iactive_"+name)
634 # XXX combinatorial loop here
635 comb += issue_active.eq(fu_active & rdflag)
636 with m.If(issue_active):
637 if rfile.unary:
638 comb += wvchk_en.eq(read)
639 else:
640 comb += wvchk_en.eq(1<<read)
641 # if FU is busy (which doesn't get set at the same time as
642 # issue) and no hazard was detected, clear wvchk_en (i.e.
643 # stop checking for hazards). there is a loop here, but it's
644 # via a DFF, so is ok. some linters may complain, but hey.
645 with m.If(fu.busy_o & ~rhazard):
646 comb += wvchk_en.eq(0)
647
648 # read-hazard is ANDed with (filtered by) what is actually
649 # being requested.
650 comb += rhazard.eq((wvchk & wvchk_en).bool())
651
652 wvens.append(wvchk_en)
653
654 # or-reduce the muxed read signals
655 if rfile.unary:
656 # for unary-addressed
657 comb += rport.ren.eq(ortreereduce_sig(rens))
658 else:
659 # for binary-addressed
660 comb += rport.addr.eq(ortreereduce_sig(addrs))
661 comb += rport.ren.eq(Cat(*rens).bool())
662 print ("binary", regfile, rpidx, rport, rport.ren, rens, addrs)
663
664 if not self.make_hazard_vecs:
665 return Const(0) # declare "no hazards"
666
667 # enable the read bitvectors for this issued instruction
668 # and return whether any write-hazard bit is set
669 wvchk_and = Signal(len(wvchk), name="wv_chk_"+name)
670 comb += wvchk_and.eq(wvchk & ortreereduce_sig(wvens))
671 comb += hazard_detected.eq(wvchk_and.bool())
672 return hazard_detected
673
674 def connect_rdports(self, m, fu_bitdict, fu_selected):
675 """connect read ports
676
677 orders the read regspecs into a dict-of-dicts, by regfile, by
678 regport name, then connects all FUs that want that regport by
679 way of a PriorityPicker.
680 """
681 comb, sync = m.d.comb, m.d.sync
682 fus = self.fus.fus
683 regs = self.regs
684 rd_hazard = []
685
686 # dictionary of lists of regfile read ports
687 byregfiles_rdspec = self.get_byregfiles(m, True)
688
689 # okaay, now we need a PriorityPicker per regfile per regfile port
690 # loootta pickers... peter piper picked a pack of pickled peppers...
691 rdpickers = {}
692 for regfile, fuspecs in byregfiles_rdspec.items():
693 rdpickers[regfile] = {}
694
695 # argh. an experiment to merge RA and RB in the INT regfile
696 # (we have too many read/write ports)
697 if self.regreduce_en:
698 if regfile == 'INT':
699 fuspecs['rabc'] = [fuspecs.pop('rb')]
700 fuspecs['rabc'].append(fuspecs.pop('rc'))
701 fuspecs['rabc'].append(fuspecs.pop('ra'))
702 if regfile == 'FAST':
703 fuspecs['fast1'] = [fuspecs.pop('fast1')]
704 if 'fast2' in fuspecs:
705 fuspecs['fast1'].append(fuspecs.pop('fast2'))
706 if 'fast3' in fuspecs:
707 fuspecs['fast1'].append(fuspecs.pop('fast3'))
708
709 # for each named regfile port, connect up all FUs to that port
710 # also return (and collate) hazard detection)
711 for (regname, fspec) in sort_fuspecs(fuspecs):
712 print("connect rd", regname, fspec)
713 rh = self.connect_rdport(m, fu_bitdict, fu_selected,
714 rdpickers, regfile,
715 regname, fspec)
716 rd_hazard.append(rh)
717
718 return Cat(*rd_hazard).bool()
719
720 def make_hazards(self, m, regfile, rfile, wvclr, wvset,
721 funame, regname, idx,
722 addr_en, wp, fu, fu_active, wrflag, write,
723 fu_wrok):
724 """make_hazards: a setter and a clearer for the regfile write ports
725
726 setter is at issue time (using PowerDecoder2 regfile write numbers)
727 clearer is at regfile write time (when FU has said what to write to)
728
729 there is *one* unusual case here which has to be dealt with:
730 when the Function Unit does *NOT* request a write to the regfile
731 (has its data.ok bit CLEARED). this is perfectly legitimate.
732 and a royal pain.
733 """
734 comb, sync = m.d.comb, m.d.sync
735 name = "%s_%s_%d" % (funame, regname, idx)
736
737 # connect up the bitvector write hazard. unlike the
738 # regfile writeports, a ONE must be written to the corresponding
739 # bit of the hazard bitvector (to indicate the existence of
740 # the hazard)
741
742 # the detection of what shall be written to is based
743 # on *issue*. it is delayed by 1 cycle so that instructions
744 # "addi 5,5,0x2" do not cause combinatorial loops due to
745 # fake-dependency on *themselves*. this will totally fail
746 # spectacularly when doing multi-issue
747 print ("write vector (for regread)", regfile, wvset)
748 wviaddr_en = Signal(len(wvset), name="wv_issue_addr_en_"+name)
749 issue_active = Signal(name="iactive_"+name)
750 sync += issue_active.eq(fu.issue_i & fu_active & wrflag)
751 with m.If(issue_active):
752 if rfile.unary:
753 comb += wviaddr_en.eq(write)
754 else:
755 comb += wviaddr_en.eq(1<<write)
756
757 # deal with write vector clear: this kicks in when the regfile
758 # is written to, and clears the corresponding bitvector entry
759 print ("write vector", regfile, wvclr)
760 wvaddr_en = Signal(len(wvclr), name="wvaddr_en_"+name)
761 if rfile.unary:
762 comb += wvaddr_en.eq(addr_en)
763 else:
764 with m.If(wp):
765 comb += wvaddr_en.eq(1<<addr_en)
766
767 # XXX ASSUME that LDSTFunctionUnit always sets the data it intends to
768 # this may NOT be the case when an exception occurs
769 if isinstance(fu, LDSTFunctionUnit):
770 return wvaddr_en, wviaddr_en
771
772 # okaaay, this is preparation for the awkward case.
773 # * latch a copy of wrflag when issue goes high.
774 # * when the fu_wrok (data.ok) flag is NOT set,
775 # but the FU is done, the FU is NEVER going to write
776 # so the bitvector has to be cleared.
777 latch_wrflag = Signal(name="latch_wrflag_"+name)
778 with m.If(~fu.busy_o):
779 sync += latch_wrflag.eq(0)
780 with m.If(fu.issue_i & fu_active):
781 sync += latch_wrflag.eq(wrflag)
782 with m.If(fu.alu_done_o & latch_wrflag & ~fu_wrok):
783 if rfile.unary:
784 comb += wvaddr_en.eq(write) # addr_en gated with wp, don't use
785 else:
786 comb += wvaddr_en.eq(1<<addr_en) # binary addr_en not gated
787
788 return wvaddr_en, wviaddr_en
789
790 def connect_wrport(self, m, fu_bitdict, fu_selected,
791 wrpickers, regfile, regname, fspec):
792 comb, sync = m.d.comb, m.d.sync
793 fus = self.fus.fus
794 regs = self.regs
795
796 rpidx = regname
797
798 # select the required write port. these are pre-defined sizes
799 rfile = regs.rf[regfile.lower()]
800 wport = rfile.w_ports[rpidx]
801
802 print("connect wr", regname, "unary", rfile.unary, fspec)
803 print(regfile, regs.rf.keys())
804
805 # select the write-protection hazard vector. note that this still
806 # requires to WRITE to the hazard bitvector! read-requests need
807 # to RAISE the bitvector (set it to 1), which, duh, requires a WRITE
808 if self.make_hazard_vecs:
809 wv = regs.wv[regfile.lower()]
810 wvset = wv.s # write-vec bit-level hazard ctrl
811 wvclr = wv.r # write-vec bit-level hazard ctrl
812 wvchk = wv.q # write-after-write hazard check
813
814 fspecs = fspec
815 if not isinstance(fspecs, list):
816 fspecs = [fspecs]
817
818 pplen = 0
819 writes = []
820 ppoffs = []
821 wrflags = []
822 for i, fspec in enumerate(fspecs):
823 # get the regfile specs for this regfile port
824 (wf, _write, wid, fuspecs) = \
825 (fspec.okflag, fspec.regport, fspec.wid, fspec.specs)
826 print ("fpsec", i, "wrflag", wf, fspec, len(fuspecs))
827 ppoffs.append(pplen) # record offset for picker
828 pplen += len(fuspecs)
829
830 name = "%s_%s_%d" % (regfile, regname, i)
831 wrflag = Signal(name="wr_flag_"+name)
832 if wf is not None:
833 comb += wrflag.eq(wf)
834 else:
835 comb += wrflag.eq(0)
836 wrflags.append(wrflag)
837
838 # create a priority picker to manage this port
839 wrpickers[regfile][rpidx] = wrpick = PriorityPicker(pplen)
840 m.submodules["wrpick_%s_%s" % (regfile, rpidx)] = wrpick
841
842 wsigs = []
843 wens = []
844 wvsets = []
845 wvseten = []
846 wvclren = []
847 #wvens = [] - not needed: reading of writevec is permanently held hi
848 addrs = []
849 for i, fspec in enumerate(fspecs):
850 # connect up the FU req/go signals and the reg-read to the FU
851 # these are arbitrated by Data.ok signals
852 (wf, _write, wid, fuspecs) = \
853 (fspec.okflag, fspec.regport, fspec.wid, fspec.specs)
854 for pi, fuspec in enumerate(fspec.specs):
855 (funame, fu, idx) = (fuspec.funame, fuspec.fu, fuspec.idx)
856 fu_requested = fu_bitdict[funame]
857 pi += ppoffs[i]
858 name = "%s_%s_%s_%d" % (funame, regfile, regname, idx)
859 # get (or set up) a write-latched copy of write register number
860 write = Signal.like(_write, name="write_"+name)
861 rname = "%s_%s_%s_%d" % (funame, regfile, regname, idx)
862 if rname not in fu.wr_latches:
863 wrl = Signal.like(_write, name="wrlatch_"+rname)
864 fu.wr_latches[rname] = write
865 # do not depend on fu.issue_i here, it creates a
866 # combinatorial loop on waw checking. using the FU
867 # "enable" bitdict entry for this FU is sufficient,
868 # because the PowerDecoder2 read/write nums are
869 # valid continuously when the instruction is valid
870 with m.If(fu_requested):
871 sync += wrl.eq(_write)
872 comb += write.eq(_write)
873 with m.Else():
874 comb += write.eq(wrl)
875 else:
876 write = fu.wr_latches[rname]
877
878 # write-request comes from dest.ok
879 dest = fu.get_out(idx)
880 fu_dest_latch = fu.get_fu_out(idx) # latched output
881 name = "%s_%s_%d" % (funame, regname, idx)
882 fu_wrok = Signal(name="fu_wrok_"+name, reset_less=True)
883 comb += fu_wrok.eq(dest.ok & fu.busy_o)
884
885 # connect request-write to picker input, and output to go-wr
886 fu_active = fu_selected[funame]
887 pick = fu.wr.rel_o[idx] & fu_active
888 comb += wrpick.i[pi].eq(pick)
889 # create a single-pulse go write from the picker output
890 wr_pick = Signal(name="wpick_%s_%s_%d" % (funame, regname, idx))
891 comb += wr_pick.eq(wrpick.o[pi] & wrpick.en_o)
892 comb += fu.go_wr_i[idx].eq(rising_edge(m, wr_pick))
893
894 # connect the regspec write "reg select" number to this port
895 # only if one FU actually requests (and is granted) the port
896 # will the write-enable be activated
897 wname = "waddr_en_%s_%s_%d" % (funame, regname, idx)
898 addr_en = Signal.like(write, name=wname)
899 wp = Signal()
900 comb += wp.eq(wr_pick & wrpick.en_o)
901 comb += addr_en.eq(Mux(wp, write, 0))
902 if rfile.unary:
903 wens.append(addr_en)
904 else:
905 addrs.append(addr_en)
906 wens.append(wp)
907
908 # connect regfile port to input
909 print("reg connect widths",
910 regfile, regname, pi, funame,
911 dest.shape(), wport.i_data.shape())
912 wsigs.append(fu_dest_latch)
913
914 # now connect up the bitvector write hazard
915 if not self.make_hazard_vecs:
916 continue
917 res = self.make_hazards(m, regfile, rfile, wvclr, wvset,
918 funame, regname, idx,
919 addr_en, wp, fu, fu_active,
920 wrflags[i], write, fu_wrok)
921 wvaddr_en, wv_issue_en = res
922 wvclren.append(wvaddr_en) # set only: no data => clear bit
923 wvseten.append(wv_issue_en) # set data same as enable
924
925 # read the write-hazard bitvector (wv) for any bit that is
926 fu_requested = fu_bitdict[funame]
927 wvchk_en = Signal(len(wvchk), name="waw_chk_addr_en_"+name)
928 issue_active = Signal(name="waw_iactive_"+name)
929 whazard = Signal(name="whaz_"+name)
930 if wf is None:
931 # XXX EEK! STATE regfile (branch) does not have an
932 # write-active indicator in regspec_decode_write()
933 print ("XXX FIXME waw_iactive", issue_active,
934 fu_requested, wf)
935 else:
936 # check bits from the incoming instruction. note (back
937 # in connect_instruction) that the decoder is held for
938 # us to be able to do this, here... *without* issue being
939 # held HI. we MUST NOT gate this with fu.issue_i or
940 # with fu_bitdict "enable": it would create a loop
941 comb += issue_active.eq(wf)
942 with m.If(issue_active):
943 if rfile.unary:
944 comb += wvchk_en.eq(write)
945 else:
946 comb += wvchk_en.eq(1<<write)
947 # if FU is busy (which doesn't get set at the same time as
948 # issue) and no hazard was detected, clear wvchk_en (i.e.
949 # stop checking for hazards). there is a loop here, but it's
950 # via a DFF, so is ok. some linters may complain, but hey.
951 with m.If(fu.busy_o & ~whazard):
952 comb += wvchk_en.eq(0)
953
954 # write-hazard is ANDed with (filtered by) what is actually
955 # being requested. the wvchk data is on a one-clock delay,
956 # and wvchk_en comes directly from the main decoder
957 comb += whazard.eq((wvchk & wvchk_en).bool())
958 with m.If(whazard):
959 comb += fu._waw_hazard.eq(1)
960
961 #wvens.append(wvchk_en)
962
963 # here is where we create the Write Broadcast Bus. simple, eh?
964 comb += wport.i_data.eq(ortreereduce_sig(wsigs))
965 if rfile.unary:
966 # for unary-addressed
967 comb += wport.wen.eq(ortreereduce_sig(wens))
968 else:
969 # for binary-addressed
970 comb += wport.addr.eq(ortreereduce_sig(addrs))
971 comb += wport.wen.eq(ortreereduce_sig(wens))
972
973 if not self.make_hazard_vecs:
974 return [], []
975
976 # return these here rather than set wvclr/wvset directly,
977 # because there may be more than one write-port to a given
978 # regfile. example: XER has a write-port for SO, CA, and OV
979 # and the *last one added* of those would overwrite the other
980 # two. solution: have connect_wrports collate all the
981 # or-tree-reduced bitvector set/clear requests and drop them
982 # in as a single "thing". this can only be done because the
983 # set/get is an unary bitvector.
984 print ("make write-vecs", regfile, regname, wvset, wvclr)
985 return (wvclren, # clear (regfile write)
986 wvseten) # set (issue time)
987
988 def connect_wrports(self, m, fu_bitdict, fu_selected):
989 """connect write ports
990
991 orders the write regspecs into a dict-of-dicts, by regfile,
992 by regport name, then connects all FUs that want that regport
993 by way of a PriorityPicker.
994
995 note that the write-port wen, write-port data, and go_wr_i all need to
996 be on the exact same clock cycle. as there is a combinatorial loop bug
997 at the moment, these all use sync.
998 """
999 comb, sync = m.d.comb, m.d.sync
1000 fus = self.fus.fus
1001 regs = self.regs
1002 # dictionary of lists of regfile write ports
1003 byregfiles_wrspec = self.get_byregfiles(m, False)
1004
1005 # same for write ports.
1006 # BLECH! complex code-duplication! BLECH!
1007 wrpickers = {}
1008 wvclrers = defaultdict(list)
1009 wvseters = defaultdict(list)
1010 for regfile, fuspecs in byregfiles_wrspec.items():
1011 wrpickers[regfile] = {}
1012
1013 if self.regreduce_en:
1014 # argh, more port-merging
1015 if regfile == 'INT':
1016 fuspecs['o'] = [fuspecs.pop('o')]
1017 fuspecs['o'].append(fuspecs.pop('o1'))
1018 if regfile == 'FAST':
1019 fuspecs['fast1'] = [fuspecs.pop('fast1')]
1020 if 'fast2' in fuspecs:
1021 fuspecs['fast1'].append(fuspecs.pop('fast2'))
1022 if 'fast3' in fuspecs:
1023 fuspecs['fast1'].append(fuspecs.pop('fast3'))
1024
1025 # collate these and record them by regfile because there
1026 # are sometimes more write-ports per regfile
1027 for (regname, fspec) in sort_fuspecs(fuspecs):
1028 wvclren, wvseten = self.connect_wrport(m,
1029 fu_bitdict, fu_selected,
1030 wrpickers,
1031 regfile, regname, fspec)
1032 wvclrers[regfile.lower()] += wvclren
1033 wvseters[regfile.lower()] += wvseten
1034
1035 if not self.make_hazard_vecs:
1036 return
1037
1038 # for write-vectors: reduce the clr-ers and set-ers down to
1039 # a single set of bits. otherwise if there are two write
1040 # ports (on some regfiles), the last one doing comb += on
1041 # the reg.wv[regfile] instance "wins" (and all others are ignored,
1042 # whoops). if there was only one write-port per wv regfile this would
1043 # not be an issue.
1044 for regfile in wvclrers.keys():
1045 wv = regs.wv[regfile]
1046 wvset = wv.s # write-vec bit-level hazard ctrl
1047 wvclr = wv.r # write-vec bit-level hazard ctrl
1048 wvclren = wvclrers[regfile]
1049 wvseten = wvseters[regfile]
1050 comb += wvclr.eq(ortreereduce_sig(wvclren)) # clear (regfile write)
1051 comb += wvset.eq(ortreereduce_sig(wvseten)) # set (issue time)
1052
1053 def get_byregfiles(self, m, readmode):
1054
1055 mode = "read" if readmode else "write"
1056 regs = self.regs
1057 fus = self.fus.fus
1058 e = self.ireg.e # decoded instruction to execute
1059
1060 # dictionary of dictionaries of lists/tuples of regfile ports.
1061 # first key: regfile. second key: regfile port name
1062 byregfiles_spec = defaultdict(dict)
1063
1064 for (funame, fu) in fus.items():
1065 # create in each FU a receptacle for the read/write register
1066 # hazard numbers (and okflags for read). to be latched in
1067 # connect_rd/write_ports
1068 if readmode:
1069 fu.rd_latches = {} # read reg number latches
1070 fu.rf_latches = {} # read flag latches
1071 else:
1072 fu.wr_latches = {}
1073
1074 # construct regfile specs: read uses inspec, write outspec
1075 print("%s ports for %s" % (mode, funame))
1076 for idx in range(fu.n_src if readmode else fu.n_dst):
1077 (regfile, regname, wid) = fu.get_io_spec(readmode, idx)
1078 print(" %d %s %s %s" % (idx, regfile, regname, str(wid)))
1079
1080 # the PowerDecoder2 (main one, not the satellites) contains
1081 # the decoded regfile numbers. obtain these now
1082 decinfo = regspec_decode(m, readmode, e, regfile, regname)
1083 okflag, regport = decinfo.okflag, decinfo.regport
1084
1085 # construct the dictionary of regspec information by regfile
1086 if regname not in byregfiles_spec[regfile]:
1087 byregfiles_spec[regfile][regname] = \
1088 ByRegSpec(okflag, regport, wid, [])
1089
1090 # here we start to create "lanes" where each Function Unit
1091 # requiring access to a given [single-contended resource]
1092 # regfile port is appended to a list, so that PriorityPickers
1093 # can be created to give uncontested access to it
1094 fuspec = FUSpec(funame, fu, idx)
1095 byregfiles_spec[regfile][regname].specs.append(fuspec)
1096
1097 # ok just print that all out, for convenience
1098 for regfile, fuspecs in byregfiles_spec.items():
1099 print("regfile %s ports:" % mode, regfile)
1100 for regname, fspec in fuspecs.items():
1101 [okflag, regport, wid, fuspecs] = fspec
1102 print(" rf %s port %s lane: %s" % (mode, regfile, regname))
1103 print(" %s" % regname, wid, okflag, regport)
1104 for (funame, fu, idx) in fuspecs:
1105 fusig = fu.src_i[idx] if readmode else fu.dest[idx]
1106 print(" ", funame, fu.__class__.__name__, idx, fusig)
1107 print()
1108
1109 return byregfiles_spec
1110
1111 def __iter__(self):
1112 yield from self.fus.ports()
1113 yield from self.i.e.ports()
1114 yield from self.l0.ports()
1115 # TODO: regs
1116
1117 def ports(self):
1118 return list(self)
1119
1120
1121 if __name__ == '__main__':
1122 pspec = TestMemPspec(ldst_ifacetype='testpi',
1123 imem_ifacetype='',
1124 addr_wid=48,
1125 allow_overlap=True,
1126 mask_wid=8,
1127 reg_wid=64)
1128 dut = NonProductionCore(pspec)
1129 vl = rtlil.convert(dut, ports=dut.ports())
1130 with open("test_core.il", "w") as f:
1131 f.write(vl)