connect up I-Cache to FetchUnitInterface
[soc.git] / src / soc / simple / core.py
1 """simple core
2
3 not in any way intended for production use. connects up FunctionUnits to
4 Register Files in a brain-dead fashion that only permits one and only one
5 Function Unit to be operational.
6
7 the principle here is to take the Function Units, analyse their regspecs,
8 and turn their requirements for access to register file read/write ports
9 into groupings by Register File and Register File Port name.
10
11 under each grouping - by regfile/port - a list of Function Units that
12 need to connect to that port is created. as these are a contended
13 resource a "Broadcast Bus" per read/write port is then also created,
14 with access to it managed by a PriorityPicker.
15
16 the brain-dead part of this module is that even though there is no
17 conflict of access, regfile read/write hazards are *not* analysed,
18 and consequently it is safer to wait for the Function Unit to complete
19 before allowing a new instruction to proceed.
20 (update: actually this is being added now:
21 https://bugs.libre-soc.org/show_bug.cgi?id=737)
22 """
23
24 from nmigen import (Elaboratable, Module, Signal, ResetSignal, Cat, Mux,
25 Const)
26 from nmigen.cli import rtlil
27
28 from openpower.decoder.power_decoder2 import PowerDecodeSubset
29 from openpower.decoder.power_regspec_map import regspec_decode
30 from openpower.sv.svp64 import SVP64Rec
31
32 from nmutil.picker import PriorityPicker
33 from nmutil.util import treereduce
34 from nmutil.singlepipe import ControlBase
35
36 from soc.fu.compunits.compunits import AllFunctionUnits, LDSTFunctionUnit
37 from soc.regfile.regfiles import RegFiles
38 from openpower.decoder.power_decoder2 import get_rdflags
39 from soc.experiment.l0_cache import TstL0CacheBuffer # test only
40 from soc.config.test.test_loadstore import TestMemPspec
41 from openpower.decoder.power_enums import MicrOp, Function
42 from soc.simple.core_data import CoreInput, CoreOutput
43
44 from collections import defaultdict, namedtuple
45 import operator
46
47 from nmutil.util import rising_edge
48
49 FUSpec = namedtuple("FUSpec", ["funame", "fu", "idx"])
50 ByRegSpec = namedtuple("ByRegSpec", ["okflag", "regport", "wid", "specs"])
51
52 # helper function for reducing a list of signals down to a parallel
53 # ORed single signal.
54 def ortreereduce(tree, attr="o_data"):
55 return treereduce(tree, operator.or_, lambda x: getattr(x, attr))
56
57
58 def ortreereduce_sig(tree):
59 return treereduce(tree, operator.or_, lambda x: x)
60
61
62 # helper function to place full regs declarations first
63 def sort_fuspecs(fuspecs):
64 res = []
65 for (regname, fspec) in fuspecs.items():
66 if regname.startswith("full"):
67 res.append((regname, fspec))
68 for (regname, fspec) in fuspecs.items():
69 if not regname.startswith("full"):
70 res.append((regname, fspec))
71 return res # enumerate(res)
72
73
74 # a hazard bitvector "remap" function which returns an AST expression
75 # that remaps read/write hazard regfile port numbers to either a full
76 # bitvector or a reduced subset one. SPR for example is reduced to a
77 # single bit.
78 # CRITICALLY-IMPORTANT NOTE: these bitvectors *have* to match up per
79 # regfile! therefore the remapping is per regfile, *NOT* per regfile
80 # port and certainly not based on whether it is a read port or write port.
81 # note that any reductions here will result in degraded performance due
82 # to conflicts, but at least it keeps the hazard matrix sizes down to "sane"
83 def bitvector_remap(regfile, rfile, port):
84 # 8-bits (at the moment, no SVP64), CR is unary: no remap
85 if regfile == 'CR':
86 return port
87 # 3 bits, unary alrady: return the port
88 if regfile == 'XER':
89 return port
90 # 3 bits, unary: return the port
91 if regfile == 'XER':
92 return port
93 # 3 bits, unary: return the port
94 if regfile == 'SVSTATE':
95 return port
96 # 9 bits (9 entries), might be unary already
97 if regfile == 'FAST':
98 if rfile.unary: # FAST might be unary already
99 return port
100 else:
101 return 1 << port
102 # 10 bits (!!) - reduce to one
103 if regfile == 'SPR':
104 if rfile.unary: # FAST might be unary already
105 return port
106 else:
107 return 1 << port
108 if regfile == 'INT':
109 if rfile.unary: # INT, check if unary/binary
110 return port
111 else:
112 return 1 << port
113
114
115 # derive from ControlBase rather than have a separate Stage instance,
116 # this is simpler to do
117 class NonProductionCore(ControlBase):
118 def __init__(self, pspec):
119 self.pspec = pspec
120
121 # test is SVP64 is to be enabled
122 self.svp64_en = hasattr(pspec, "svp64") and (pspec.svp64 == True)
123
124 # test to see if regfile ports should be reduced
125 self.regreduce_en = (hasattr(pspec, "regreduce") and
126 (pspec.regreduce == True))
127
128 # test to see if overlapping of instructions is allowed
129 # (not normally enabled for TestIssuer FSM but useful for checking
130 # the bitvector hazard detection, before doing In-Order)
131 self.allow_overlap = (hasattr(pspec, "allow_overlap") and
132 (pspec.allow_overlap == True))
133
134 # test core type
135 self.make_hazard_vecs = self.allow_overlap
136 self.core_type = "fsm"
137 if hasattr(pspec, "core_type"):
138 self.core_type = pspec.core_type
139
140 super().__init__(stage=self)
141
142 # single LD/ST funnel for memory access
143 self.l0 = l0 = TstL0CacheBuffer(pspec, n_units=1)
144 pi = l0.l0.dports[0]
145
146 # function units (only one each)
147 # only include mmu if enabled in pspec
148 self.fus = AllFunctionUnits(pspec, pilist=[pi])
149
150 # link LoadStore1 into MMU
151 mmu = self.fus.get_fu('mmu0')
152 ldst0 = self.fus.get_fu('ldst0')
153 print ("core pspec", pspec.ldst_ifacetype)
154 print ("core mmu", mmu)
155 if mmu is not None:
156 lsi = l0.cmpi.lsmem.lsi # a LoadStore1 Interface object
157 print ("core lsmem.lsi", lsi)
158 mmu.alu.set_ldst_interface(lsi)
159
160 # register files (yes plural)
161 self.regs = RegFiles(pspec, make_hazard_vecs=self.make_hazard_vecs)
162
163 # set up input and output: unusual requirement to set data directly
164 # (due to the way that the core is set up in a different domain,
165 # see TestIssuer.setup_peripherals
166 self.p.i_data, self.n.o_data = self.new_specs(None)
167 self.i, self.o = self.p.i_data, self.n.o_data
168
169 # actual internal input data used (captured)
170 self.ireg = self.ispec()
171
172 # create per-FU instruction decoders (subsetted). these "satellite"
173 # decoders reduce wire fan-out from the one (main) PowerDecoder2
174 # (used directly by the trap unit) to the *twelve* (or more)
175 # Function Units. we can either have 32 wires (the instruction)
176 # to each, or we can have well over a 200 wire fan-out (to 12
177 # ALUs). it's an easy choice to make.
178 self.decoders = {}
179 self.des = {}
180
181 # eep, these should be *per FU* i.e. for FunctionUnitBaseMulti
182 # they should be shared (put into the ALU *once*).
183
184 for funame, fu in self.fus.fus.items():
185 f_name = fu.fnunit.name
186 fnunit = fu.fnunit.value
187 opkls = fu.opsubsetkls
188 if f_name == 'TRAP':
189 # TRAP decoder is the *main* decoder
190 self.trapunit = funame
191 continue
192 assert funame not in self.decoders
193 self.decoders[funame] = PowerDecodeSubset(None, opkls, f_name,
194 final=True,
195 state=self.ireg.state,
196 svp64_en=self.svp64_en,
197 regreduce_en=self.regreduce_en)
198 self.des[funame] = self.decoders[funame].do
199
200 # create per-Function Unit write-after-write hazard signals
201 # yes, really, this should have been added in ReservationStations
202 # but hey.
203 for funame, fu in self.fus.fus.items():
204 fu._waw_hazard = Signal(name="waw_%s" % funame)
205
206 # share the SPR decoder with the MMU if it exists
207 if "mmu0" in self.decoders:
208 self.decoders["mmu0"].mmu0_spr_dec = self.decoders["spr0"]
209
210 # next 3 functions are Stage API Compliance
211 def setup(self, m, i):
212 pass
213
214 def ispec(self):
215 return CoreInput(self.pspec, self.svp64_en, self.regreduce_en)
216
217 def ospec(self):
218 return CoreOutput()
219
220 # elaborate function to create HDL
221 def elaborate(self, platform):
222 m = super().elaborate(platform)
223
224 # for testing purposes, to cut down on build time in coriolis2
225 if hasattr(self.pspec, "nocore") and self.pspec.nocore == True:
226 x = Signal() # dummy signal
227 m.d.sync += x.eq(~x)
228 return m
229 comb = m.d.comb
230
231 m.submodules.fus = self.fus
232 m.submodules.l0 = l0 = self.l0
233 self.regs.elaborate_into(m, platform)
234 regs = self.regs
235 fus = self.fus.fus
236
237 # amalgamate write-hazards into a single top-level Signal
238 self.waw_hazard = Signal()
239 whaz = []
240 for funame, fu in self.fus.fus.items():
241 whaz.append(fu._waw_hazard)
242 comb += self.waw_hazard.eq(Cat(*whaz).bool())
243
244 # connect decoders
245 self.connect_satellite_decoders(m)
246
247 # ssh, cheat: trap uses the main decoder because of the rewriting
248 self.des[self.trapunit] = self.ireg.e.do
249
250 # connect up Function Units, then read/write ports, and hazard conflict
251 self.issue_conflict = Signal()
252 fu_bitdict, fu_selected = self.connect_instruction(m)
253 raw_hazard = self.connect_rdports(m, fu_bitdict, fu_selected)
254 self.connect_wrports(m, fu_bitdict, fu_selected)
255 if self.allow_overlap:
256 comb += self.issue_conflict.eq(raw_hazard)
257
258 # note if an exception happened. in a pipelined or OoO design
259 # this needs to be accompanied by "shadowing" (or stalling)
260 el = []
261 for exc in self.fus.excs.values():
262 el.append(exc.happened)
263 if len(el) > 0: # at least one exception
264 comb += self.o.exc_happened.eq(Cat(*el).bool())
265
266 return m
267
268 def connect_satellite_decoders(self, m):
269 comb = m.d.comb
270 for k, v in self.decoders.items():
271 # connect each satellite decoder and give it the instruction.
272 # as subset decoders this massively reduces wire fanout given
273 # the large number of ALUs
274 m.submodules["dec_%s" % k] = v
275 comb += v.dec.raw_opcode_in.eq(self.ireg.raw_insn_i)
276 comb += v.dec.bigendian.eq(self.ireg.bigendian_i)
277 # sigh due to SVP64 RA_OR_ZERO detection connect these too
278 comb += v.sv_a_nz.eq(self.ireg.sv_a_nz)
279 if not self.svp64_en:
280 continue
281 comb += v.pred_sm.eq(self.ireg.sv_pred_sm)
282 comb += v.pred_dm.eq(self.ireg.sv_pred_dm)
283 if k == self.trapunit:
284 continue
285 comb += v.sv_rm.eq(self.ireg.sv_rm) # pass through SVP64 RM
286 comb += v.is_svp64_mode.eq(self.ireg.is_svp64_mode)
287 # only the LDST PowerDecodeSubset *actually* needs to
288 # know to use the alternative decoder. this is all
289 # a terrible hack
290 if not k.lower().startswith("ldst"):
291 continue
292 comb += v.use_svp64_ldst_dec.eq( self.ireg.use_svp64_ldst_dec)
293
294 def connect_instruction(self, m):
295 """connect_instruction
296
297 uses decoded (from PowerOp) function unit information from CSV files
298 to ascertain which Function Unit should deal with the current
299 instruction.
300
301 some (such as OP_ATTN, OP_NOP) are dealt with here, including
302 ignoring it and halting the processor. OP_NOP is a bit annoying
303 because the issuer expects busy flag still to be raised then lowered.
304 (this requires a fake counter to be set).
305 """
306 comb, sync = m.d.comb, m.d.sync
307 fus = self.fus.fus
308
309 # indicate if core is busy
310 busy_o = self.o.busy_o
311 any_busy_o = self.o.any_busy_o
312
313 # connect up temporary copy of incoming instruction. the FSM will
314 # either blat the incoming instruction (if valid) into self.ireg
315 # or if the instruction could not be delivered, keep dropping the
316 # latched copy into ireg
317 ilatch = self.ispec()
318 self.instr_active = Signal()
319
320 # enable/busy-signals for each FU, get one bit for each FU (by name)
321 fu_enable = Signal(len(fus), reset_less=True)
322 fu_busy = Signal(len(fus), reset_less=True)
323 fu_bitdict = {}
324 fu_selected = {}
325 for i, funame in enumerate(fus.keys()):
326 fu_bitdict[funame] = fu_enable[i]
327 fu_selected[funame] = fu_busy[i]
328
329 # identify function units and create a list by fnunit so that
330 # PriorityPickers can be created for selecting one of them that
331 # isn't busy at the time the incoming instruction needs passing on
332 by_fnunit = defaultdict(list)
333 for fname, member in Function.__members__.items():
334 for funame, fu in fus.items():
335 fnunit = fu.fnunit.value
336 if member.value & fnunit: # this FU handles this type of op
337 by_fnunit[fname].append((funame, fu)) # add by Function
338
339 # ok now just print out the list of FUs by Function, because we can
340 for fname, fu_list in by_fnunit.items():
341 print ("FUs by type", fname, fu_list)
342
343 # now create a PriorityPicker per FU-type such that only one
344 # non-busy FU will be picked
345 issue_pps = {}
346 fu_found = Signal() # take a note if no Function Unit was available
347 for fname, fu_list in by_fnunit.items():
348 i_pp = PriorityPicker(len(fu_list))
349 m.submodules['i_pp_%s' % fname] = i_pp
350 i_l = []
351 for i, (funame, fu) in enumerate(fu_list):
352 # match the decoded instruction (e.do.fn_unit) against the
353 # "capability" of this FU, gate that by whether that FU is
354 # busy, and drop that into the PriorityPicker.
355 # this will give us an output of the first available *non-busy*
356 # Function Unit (Reservation Statio) capable of handling this
357 # instruction.
358 fnunit = fu.fnunit.value
359 en_req = Signal(name="issue_en_%s" % funame, reset_less=True)
360 fnmatch = (self.ireg.e.do.fn_unit & fnunit).bool()
361 comb += en_req.eq(fnmatch & ~fu.busy_o &
362 self.instr_active)
363 i_l.append(en_req) # store in list for doing the Cat-trick
364 # picker output, gated by enable: store in fu_bitdict
365 po = Signal(name="o_issue_pick_"+funame) # picker output
366 comb += po.eq(i_pp.o[i] & i_pp.en_o)
367 comb += fu_bitdict[funame].eq(po)
368 comb += fu_selected[funame].eq(fu.busy_o | po)
369 # if we don't do this, then when there are no FUs available,
370 # the "p.o_ready" signal will go back "ok we accepted this
371 # instruction" which of course isn't true.
372 with m.If(i_pp.en_o):
373 comb += fu_found.eq(1)
374 # for each input, Cat them together and drop them into the picker
375 comb += i_pp.i.eq(Cat(*i_l))
376
377 # rdmask, which is for registers needs to come from the *main* decoder
378 for funame, fu in fus.items():
379 rdmask = get_rdflags(m, self.ireg.e, fu)
380 comb += fu.rdmaskn.eq(~rdmask)
381
382 # sigh - need a NOP counter
383 counter = Signal(2)
384 with m.If(counter != 0):
385 sync += counter.eq(counter - 1)
386 comb += busy_o.eq(1)
387
388 # default to reading from incoming instruction: may be overridden
389 # by copy from latch when "waiting"
390 comb += self.ireg.eq(self.i)
391 # always say "ready" except if overridden
392 comb += self.p.o_ready.eq(1)
393
394 with m.FSM():
395 with m.State("READY"):
396 with m.If(self.p.i_valid): # run only when valid
397 with m.Switch(self.ireg.e.do.insn_type):
398 # check for ATTN: halt if true
399 with m.Case(MicrOp.OP_ATTN):
400 m.d.sync += self.o.core_terminate_o.eq(1)
401
402 # fake NOP - this isn't really used (Issuer detects NOP)
403 with m.Case(MicrOp.OP_NOP):
404 sync += counter.eq(2)
405 comb += busy_o.eq(1)
406
407 with m.Default():
408 comb += self.instr_active.eq(1)
409 comb += self.p.o_ready.eq(0)
410 # connect instructions. only one enabled at a time
411 for funame, fu in fus.items():
412 do = self.des[funame]
413 enable = fu_bitdict[funame]
414
415 # run this FunctionUnit if enabled route op,
416 # issue, busy, read flags and mask to FU
417 with m.If(enable):
418 # operand comes from the *local* decoder
419 # do not actually issue, though, if there
420 # is a waw hazard. decoder has to still
421 # be asserted in order to detect that, tho
422 comb += fu.oper_i.eq_from(do)
423 # issue when valid (and no write-hazard)
424 comb += fu.issue_i.eq(~self.waw_hazard)
425 # instruction ok, indicate ready
426 comb += self.p.o_ready.eq(1)
427
428 if self.allow_overlap:
429 with m.If(~fu_found | self.waw_hazard):
430 # latch copy of instruction
431 sync += ilatch.eq(self.i)
432 comb += self.p.o_ready.eq(1) # accept
433 comb += busy_o.eq(1)
434 m.next = "WAITING"
435
436 with m.State("WAITING"):
437 comb += self.instr_active.eq(1)
438 comb += self.p.o_ready.eq(0)
439 comb += busy_o.eq(1)
440 # using copy of instruction, keep waiting until an FU is free
441 comb += self.ireg.eq(ilatch)
442 with m.If(fu_found): # wait for conflict to clear
443 # connect instructions. only one enabled at a time
444 for funame, fu in fus.items():
445 do = self.des[funame]
446 enable = fu_bitdict[funame]
447
448 # run this FunctionUnit if enabled route op,
449 # issue, busy, read flags and mask to FU
450 with m.If(enable):
451 # operand comes from the *local* decoder,
452 # which is asserted even if not issued,
453 # so that WaW-detection can check for hazards.
454 # only if the waw hazard is clear does the
455 # instruction actually get issued
456 comb += fu.oper_i.eq_from(do)
457 # issue when valid
458 comb += fu.issue_i.eq(~self.waw_hazard)
459 with m.If(~self.waw_hazard):
460 comb += self.p.o_ready.eq(1)
461 comb += busy_o.eq(0)
462 m.next = "READY"
463
464 print ("core: overlap allowed", self.allow_overlap)
465 # true when any FU is busy (including the cycle where it is perhaps
466 # to be issued - because that's what fu_busy is)
467 comb += any_busy_o.eq(fu_busy.bool())
468 if not self.allow_overlap:
469 # for simple non-overlap, if any instruction is busy, set
470 # busy output for core.
471 comb += busy_o.eq(any_busy_o)
472 else:
473 # sigh deal with a fun situation that needs to be investigated
474 # and resolved
475 with m.If(self.issue_conflict):
476 comb += busy_o.eq(1)
477 # make sure that LDST, SPR, MMU, Branch and Trap all say "busy"
478 # and do not allow overlap. these are all the ones that
479 # are non-forward-progressing: exceptions etc. that otherwise
480 # change CoreState for some reason (MSR, PC, SVSTATE)
481 for funame, fu in fus.items():
482 if (funame.lower().startswith('ldst') or
483 funame.lower().startswith('branch') or
484 funame.lower().startswith('mmu') or
485 funame.lower().startswith('spr') or
486 funame.lower().startswith('trap')):
487 with m.If(fu.busy_o):
488 comb += busy_o.eq(1)
489
490 # return both the function unit "enable" dict as well as the "busy".
491 # the "busy-or-issued" can be passed in to the Read/Write port
492 # connecters to give them permission to request access to regfiles
493 return fu_bitdict, fu_selected
494
495 def connect_rdport(self, m, fu_bitdict, fu_selected,
496 rdpickers, regfile, regname, fspec):
497 comb, sync = m.d.comb, m.d.sync
498 fus = self.fus.fus
499 regs = self.regs
500
501 rpidx = regname
502
503 # select the required read port. these are pre-defined sizes
504 rfile = regs.rf[regfile.lower()]
505 rport = rfile.r_ports[rpidx]
506 print("read regfile", rpidx, regfile, regs.rf.keys(),
507 rfile, rfile.unary)
508
509 # for checking if the read port has an outstanding write
510 if self.make_hazard_vecs:
511 wv = regs.wv[regfile.lower()]
512 wvchk = wv.q_int # write-vec bit-level hazard check
513
514 # if a hazard is detected on this read port, simply blithely block
515 # every FU from reading on it. this is complete overkill but very
516 # simple for now.
517 hazard_detected = Signal(name="raw_%s_%s" % (regfile, rpidx))
518
519 fspecs = fspec
520 if not isinstance(fspecs, list):
521 fspecs = [fspecs]
522
523 rdflags = []
524 pplen = 0
525 ppoffs = []
526 for i, fspec in enumerate(fspecs):
527 # get the regfile specs for this regfile port
528 print ("fpsec", i, fspec, len(fspec.specs))
529 name = "%s_%s_%d" % (regfile, regname, i)
530 ppoffs.append(pplen) # record offset for picker
531 pplen += len(fspec.specs)
532 rdflag = Signal(name="rdflag_"+name, reset_less=True)
533 comb += rdflag.eq(fspec.okflag)
534 rdflags.append(rdflag)
535
536 print ("pplen", pplen)
537
538 # create a priority picker to manage this port
539 rdpickers[regfile][rpidx] = rdpick = PriorityPicker(pplen)
540 m.submodules["rdpick_%s_%s" % (regfile, rpidx)] = rdpick
541
542 rens = []
543 addrs = []
544 wvens = []
545
546 for i, fspec in enumerate(fspecs):
547 (rf, _read, wid, fuspecs) = \
548 (fspec.okflag, fspec.regport, fspec.wid, fspec.specs)
549 # connect up the FU req/go signals, and the reg-read to the FU
550 # and create a Read Broadcast Bus
551 for pi, fuspec in enumerate(fspec.specs):
552 (funame, fu, idx) = (fuspec.funame, fuspec.fu, fuspec.idx)
553 pi += ppoffs[i]
554 name = "%s_%s_%s_%i" % (regfile, rpidx, funame, pi)
555 fu_active = fu_selected[funame]
556 fu_issued = fu_bitdict[funame]
557
558 # get (or set up) a latched copy of read register number
559 # and (sigh) also the read-ok flag
560 # TODO: use nmutil latchregister
561 rhname = "%s_%s_%d" % (regfile, regname, i)
562 rdflag = Signal(name="rdflag_%s_%s" % (funame, rhname),
563 reset_less=True)
564 if rhname not in fu.rf_latches:
565 rfl = Signal(name="rdflag_latch_%s_%s" % (funame, rhname))
566 fu.rf_latches[rhname] = rfl
567 with m.If(fu.issue_i):
568 sync += rfl.eq(rdflags[i])
569 else:
570 rfl = fu.rf_latches[rhname]
571
572 # now the register port
573 rname = "%s_%s_%s_%d" % (funame, regfile, regname, pi)
574 read = Signal.like(_read, name="read_"+rname)
575 if rname not in fu.rd_latches:
576 rdl = Signal.like(_read, name="rdlatch_"+rname)
577 fu.rd_latches[rname] = rdl
578 with m.If(fu.issue_i):
579 sync += rdl.eq(_read)
580 else:
581 rdl = fu.rd_latches[rname]
582
583 # make the read immediately available on issue cycle
584 # after the read cycle, otherwies use the latched copy.
585 # this captures the regport and okflag on issue
586 with m.If(fu.issue_i):
587 comb += read.eq(_read)
588 comb += rdflag.eq(rdflags[i])
589 with m.Else():
590 comb += read.eq(rdl)
591 comb += rdflag.eq(rfl)
592
593 # connect request-read to picker input, and output to go-rd
594 addr_en = Signal.like(read, name="addr_en_"+name)
595 pick = Signal(name="pick_"+name) # picker input
596 rp = Signal(name="rp_"+name) # picker output
597 delay_pick = Signal(name="dp_"+name) # read-enable "underway"
598 rhazard = Signal(name="rhaz_"+name)
599
600 # exclude any currently-enabled read-request (mask out active)
601 # entirely block anything hazarded from being picked
602 comb += pick.eq(fu.rd_rel_o[idx] & fu_active & rdflag &
603 ~delay_pick & ~rhazard)
604 comb += rdpick.i[pi].eq(pick)
605 comb += fu.go_rd_i[idx].eq(delay_pick) # pass in *delayed* pick
606
607 # if picked, select read-port "reg select" number to port
608 comb += rp.eq(rdpick.o[pi] & rdpick.en_o)
609 sync += delay_pick.eq(rp) # delayed "pick"
610 comb += addr_en.eq(Mux(rp, read, 0))
611
612 # the read-enable happens combinatorially (see mux-bus below)
613 # but it results in the data coming out on a one-cycle delay.
614 if rfile.unary:
615 rens.append(addr_en)
616 else:
617 addrs.append(addr_en)
618 rens.append(rp)
619
620 # use the *delayed* pick signal to put requested data onto bus
621 with m.If(delay_pick):
622 # connect regfile port to input, creating fan-out Bus
623 src = fu.src_i[idx]
624 print("reg connect widths",
625 regfile, regname, pi, funame,
626 src.shape(), rport.o_data.shape())
627 # all FUs connect to same port
628 comb += src.eq(rport.o_data)
629
630 if not self.make_hazard_vecs:
631 continue
632
633 # read the write-hazard bitvector (wv) for any bit that is
634 wvchk_en = Signal(len(wvchk), name="wv_chk_addr_en_"+name)
635 issue_active = Signal(name="rd_iactive_"+name)
636 # XXX combinatorial loop here
637 comb += issue_active.eq(fu_active & rdflag)
638 with m.If(issue_active):
639 if rfile.unary:
640 comb += wvchk_en.eq(read)
641 else:
642 comb += wvchk_en.eq(1<<read)
643 # if FU is busy (which doesn't get set at the same time as
644 # issue) and no hazard was detected, clear wvchk_en (i.e.
645 # stop checking for hazards). there is a loop here, but it's
646 # via a DFF, so is ok. some linters may complain, but hey.
647 with m.If(fu.busy_o & ~rhazard):
648 comb += wvchk_en.eq(0)
649
650 # read-hazard is ANDed with (filtered by) what is actually
651 # being requested.
652 comb += rhazard.eq((wvchk & wvchk_en).bool())
653
654 wvens.append(wvchk_en)
655
656 # or-reduce the muxed read signals
657 if rfile.unary:
658 # for unary-addressed
659 comb += rport.ren.eq(ortreereduce_sig(rens))
660 else:
661 # for binary-addressed
662 comb += rport.addr.eq(ortreereduce_sig(addrs))
663 comb += rport.ren.eq(Cat(*rens).bool())
664 print ("binary", regfile, rpidx, rport, rport.ren, rens, addrs)
665
666 if not self.make_hazard_vecs:
667 return Const(0) # declare "no hazards"
668
669 # enable the read bitvectors for this issued instruction
670 # and return whether any write-hazard bit is set
671 wvchk_and = Signal(len(wvchk), name="wv_chk_"+name)
672 comb += wvchk_and.eq(wvchk & ortreereduce_sig(wvens))
673 comb += hazard_detected.eq(wvchk_and.bool())
674 return hazard_detected
675
676 def connect_rdports(self, m, fu_bitdict, fu_selected):
677 """connect read ports
678
679 orders the read regspecs into a dict-of-dicts, by regfile, by
680 regport name, then connects all FUs that want that regport by
681 way of a PriorityPicker.
682 """
683 comb, sync = m.d.comb, m.d.sync
684 fus = self.fus.fus
685 regs = self.regs
686 rd_hazard = []
687
688 # dictionary of lists of regfile read ports
689 byregfiles_rdspec = self.get_byregfiles(m, True)
690
691 # okaay, now we need a PriorityPicker per regfile per regfile port
692 # loootta pickers... peter piper picked a pack of pickled peppers...
693 rdpickers = {}
694 for regfile, fuspecs in byregfiles_rdspec.items():
695 rdpickers[regfile] = {}
696
697 # argh. an experiment to merge RA and RB in the INT regfile
698 # (we have too many read/write ports)
699 if self.regreduce_en:
700 if regfile == 'INT':
701 fuspecs['rabc'] = [fuspecs.pop('rb')]
702 fuspecs['rabc'].append(fuspecs.pop('rc'))
703 fuspecs['rabc'].append(fuspecs.pop('ra'))
704 if regfile == 'FAST':
705 fuspecs['fast1'] = [fuspecs.pop('fast1')]
706 if 'fast2' in fuspecs:
707 fuspecs['fast1'].append(fuspecs.pop('fast2'))
708 if 'fast3' in fuspecs:
709 fuspecs['fast1'].append(fuspecs.pop('fast3'))
710
711 # for each named regfile port, connect up all FUs to that port
712 # also return (and collate) hazard detection)
713 for (regname, fspec) in sort_fuspecs(fuspecs):
714 print("connect rd", regname, fspec)
715 rh = self.connect_rdport(m, fu_bitdict, fu_selected,
716 rdpickers, regfile,
717 regname, fspec)
718 rd_hazard.append(rh)
719
720 return Cat(*rd_hazard).bool()
721
722 def make_hazards(self, m, regfile, rfile, wvclr, wvset,
723 funame, regname, idx,
724 addr_en, wp, fu, fu_active, wrflag, write,
725 fu_wrok):
726 """make_hazards: a setter and a clearer for the regfile write ports
727
728 setter is at issue time (using PowerDecoder2 regfile write numbers)
729 clearer is at regfile write time (when FU has said what to write to)
730
731 there is *one* unusual case here which has to be dealt with:
732 when the Function Unit does *NOT* request a write to the regfile
733 (has its data.ok bit CLEARED). this is perfectly legitimate.
734 and a royal pain.
735 """
736 comb, sync = m.d.comb, m.d.sync
737 name = "%s_%s_%d" % (funame, regname, idx)
738
739 # connect up the bitvector write hazard. unlike the
740 # regfile writeports, a ONE must be written to the corresponding
741 # bit of the hazard bitvector (to indicate the existence of
742 # the hazard)
743
744 # the detection of what shall be written to is based
745 # on *issue*. it is delayed by 1 cycle so that instructions
746 # "addi 5,5,0x2" do not cause combinatorial loops due to
747 # fake-dependency on *themselves*. this will totally fail
748 # spectacularly when doing multi-issue
749 print ("write vector (for regread)", regfile, wvset)
750 wviaddr_en = Signal(len(wvset), name="wv_issue_addr_en_"+name)
751 issue_active = Signal(name="iactive_"+name)
752 sync += issue_active.eq(fu.issue_i & fu_active & wrflag)
753 with m.If(issue_active):
754 if rfile.unary:
755 comb += wviaddr_en.eq(write)
756 else:
757 comb += wviaddr_en.eq(1<<write)
758
759 # deal with write vector clear: this kicks in when the regfile
760 # is written to, and clears the corresponding bitvector entry
761 print ("write vector", regfile, wvclr)
762 wvaddr_en = Signal(len(wvclr), name="wvaddr_en_"+name)
763 if rfile.unary:
764 comb += wvaddr_en.eq(addr_en)
765 else:
766 with m.If(wp):
767 comb += wvaddr_en.eq(1<<addr_en)
768
769 # XXX ASSUME that LDSTFunctionUnit always sets the data it intends to
770 # this may NOT be the case when an exception occurs
771 if isinstance(fu, LDSTFunctionUnit):
772 return wvaddr_en, wviaddr_en
773
774 # okaaay, this is preparation for the awkward case.
775 # * latch a copy of wrflag when issue goes high.
776 # * when the fu_wrok (data.ok) flag is NOT set,
777 # but the FU is done, the FU is NEVER going to write
778 # so the bitvector has to be cleared.
779 latch_wrflag = Signal(name="latch_wrflag_"+name)
780 with m.If(~fu.busy_o):
781 sync += latch_wrflag.eq(0)
782 with m.If(fu.issue_i & fu_active):
783 sync += latch_wrflag.eq(wrflag)
784 with m.If(fu.alu_done_o & latch_wrflag & ~fu_wrok):
785 if rfile.unary:
786 comb += wvaddr_en.eq(write) # addr_en gated with wp, don't use
787 else:
788 comb += wvaddr_en.eq(1<<addr_en) # binary addr_en not gated
789
790 return wvaddr_en, wviaddr_en
791
792 def connect_wrport(self, m, fu_bitdict, fu_selected,
793 wrpickers, regfile, regname, fspec):
794 comb, sync = m.d.comb, m.d.sync
795 fus = self.fus.fus
796 regs = self.regs
797
798 rpidx = regname
799
800 # select the required write port. these are pre-defined sizes
801 rfile = regs.rf[regfile.lower()]
802 wport = rfile.w_ports[rpidx]
803
804 print("connect wr", regname, "unary", rfile.unary, fspec)
805 print(regfile, regs.rf.keys())
806
807 # select the write-protection hazard vector. note that this still
808 # requires to WRITE to the hazard bitvector! read-requests need
809 # to RAISE the bitvector (set it to 1), which, duh, requires a WRITE
810 if self.make_hazard_vecs:
811 wv = regs.wv[regfile.lower()]
812 wvset = wv.s # write-vec bit-level hazard ctrl
813 wvclr = wv.r # write-vec bit-level hazard ctrl
814 wvchk = wv.q # write-after-write hazard check
815
816 fspecs = fspec
817 if not isinstance(fspecs, list):
818 fspecs = [fspecs]
819
820 pplen = 0
821 writes = []
822 ppoffs = []
823 wrflags = []
824 for i, fspec in enumerate(fspecs):
825 # get the regfile specs for this regfile port
826 (wf, _write, wid, fuspecs) = \
827 (fspec.okflag, fspec.regport, fspec.wid, fspec.specs)
828 print ("fpsec", i, "wrflag", wf, fspec, len(fuspecs))
829 ppoffs.append(pplen) # record offset for picker
830 pplen += len(fuspecs)
831
832 name = "%s_%s_%d" % (regfile, regname, i)
833 wrflag = Signal(name="wr_flag_"+name)
834 if wf is not None:
835 comb += wrflag.eq(wf)
836 else:
837 comb += wrflag.eq(0)
838 wrflags.append(wrflag)
839
840 # create a priority picker to manage this port
841 wrpickers[regfile][rpidx] = wrpick = PriorityPicker(pplen)
842 m.submodules["wrpick_%s_%s" % (regfile, rpidx)] = wrpick
843
844 wsigs = []
845 wens = []
846 wvsets = []
847 wvseten = []
848 wvclren = []
849 #wvens = [] - not needed: reading of writevec is permanently held hi
850 addrs = []
851 for i, fspec in enumerate(fspecs):
852 # connect up the FU req/go signals and the reg-read to the FU
853 # these are arbitrated by Data.ok signals
854 (wf, _write, wid, fuspecs) = \
855 (fspec.okflag, fspec.regport, fspec.wid, fspec.specs)
856 for pi, fuspec in enumerate(fspec.specs):
857 (funame, fu, idx) = (fuspec.funame, fuspec.fu, fuspec.idx)
858 fu_requested = fu_bitdict[funame]
859 pi += ppoffs[i]
860 name = "%s_%s_%s_%d" % (funame, regfile, regname, idx)
861 # get (or set up) a write-latched copy of write register number
862 write = Signal.like(_write, name="write_"+name)
863 rname = "%s_%s_%s_%d" % (funame, regfile, regname, idx)
864 if rname not in fu.wr_latches:
865 wrl = Signal.like(_write, name="wrlatch_"+rname)
866 fu.wr_latches[rname] = write
867 # do not depend on fu.issue_i here, it creates a
868 # combinatorial loop on waw checking. using the FU
869 # "enable" bitdict entry for this FU is sufficient,
870 # because the PowerDecoder2 read/write nums are
871 # valid continuously when the instruction is valid
872 with m.If(fu_requested):
873 sync += wrl.eq(_write)
874 comb += write.eq(_write)
875 with m.Else():
876 comb += write.eq(wrl)
877 else:
878 write = fu.wr_latches[rname]
879
880 # write-request comes from dest.ok
881 dest = fu.get_out(idx)
882 fu_dest_latch = fu.get_fu_out(idx) # latched output
883 name = "%s_%s_%d" % (funame, regname, idx)
884 fu_wrok = Signal(name="fu_wrok_"+name, reset_less=True)
885 comb += fu_wrok.eq(dest.ok & fu.busy_o)
886
887 # connect request-write to picker input, and output to go-wr
888 fu_active = fu_selected[funame]
889 pick = fu.wr.rel_o[idx] & fu_active
890 comb += wrpick.i[pi].eq(pick)
891 # create a single-pulse go write from the picker output
892 wr_pick = Signal(name="wpick_%s_%s_%d" % (funame, regname, idx))
893 comb += wr_pick.eq(wrpick.o[pi] & wrpick.en_o)
894 comb += fu.go_wr_i[idx].eq(rising_edge(m, wr_pick))
895
896 # connect the regspec write "reg select" number to this port
897 # only if one FU actually requests (and is granted) the port
898 # will the write-enable be activated
899 wname = "waddr_en_%s_%s_%d" % (funame, regname, idx)
900 addr_en = Signal.like(write, name=wname)
901 wp = Signal()
902 comb += wp.eq(wr_pick & wrpick.en_o)
903 comb += addr_en.eq(Mux(wp, write, 0))
904 if rfile.unary:
905 wens.append(addr_en)
906 else:
907 addrs.append(addr_en)
908 wens.append(wp)
909
910 # connect regfile port to input
911 print("reg connect widths",
912 regfile, regname, pi, funame,
913 dest.shape(), wport.i_data.shape())
914 wsigs.append(fu_dest_latch)
915
916 # now connect up the bitvector write hazard
917 if not self.make_hazard_vecs:
918 continue
919 res = self.make_hazards(m, regfile, rfile, wvclr, wvset,
920 funame, regname, idx,
921 addr_en, wp, fu, fu_active,
922 wrflags[i], write, fu_wrok)
923 wvaddr_en, wv_issue_en = res
924 wvclren.append(wvaddr_en) # set only: no data => clear bit
925 wvseten.append(wv_issue_en) # set data same as enable
926
927 # read the write-hazard bitvector (wv) for any bit that is
928 fu_requested = fu_bitdict[funame]
929 wvchk_en = Signal(len(wvchk), name="waw_chk_addr_en_"+name)
930 issue_active = Signal(name="waw_iactive_"+name)
931 whazard = Signal(name="whaz_"+name)
932 if wf is None:
933 # XXX EEK! STATE regfile (branch) does not have an
934 # write-active indicator in regspec_decode_write()
935 print ("XXX FIXME waw_iactive", issue_active,
936 fu_requested, wf)
937 else:
938 # check bits from the incoming instruction. note (back
939 # in connect_instruction) that the decoder is held for
940 # us to be able to do this, here... *without* issue being
941 # held HI. we MUST NOT gate this with fu.issue_i or
942 # with fu_bitdict "enable": it would create a loop
943 comb += issue_active.eq(wf)
944 with m.If(issue_active):
945 if rfile.unary:
946 comb += wvchk_en.eq(write)
947 else:
948 comb += wvchk_en.eq(1<<write)
949 # if FU is busy (which doesn't get set at the same time as
950 # issue) and no hazard was detected, clear wvchk_en (i.e.
951 # stop checking for hazards). there is a loop here, but it's
952 # via a DFF, so is ok. some linters may complain, but hey.
953 with m.If(fu.busy_o & ~whazard):
954 comb += wvchk_en.eq(0)
955
956 # write-hazard is ANDed with (filtered by) what is actually
957 # being requested. the wvchk data is on a one-clock delay,
958 # and wvchk_en comes directly from the main decoder
959 comb += whazard.eq((wvchk & wvchk_en).bool())
960 with m.If(whazard):
961 comb += fu._waw_hazard.eq(1)
962
963 #wvens.append(wvchk_en)
964
965 # here is where we create the Write Broadcast Bus. simple, eh?
966 comb += wport.i_data.eq(ortreereduce_sig(wsigs))
967 if rfile.unary:
968 # for unary-addressed
969 comb += wport.wen.eq(ortreereduce_sig(wens))
970 else:
971 # for binary-addressed
972 comb += wport.addr.eq(ortreereduce_sig(addrs))
973 comb += wport.wen.eq(ortreereduce_sig(wens))
974
975 if not self.make_hazard_vecs:
976 return [], []
977
978 # return these here rather than set wvclr/wvset directly,
979 # because there may be more than one write-port to a given
980 # regfile. example: XER has a write-port for SO, CA, and OV
981 # and the *last one added* of those would overwrite the other
982 # two. solution: have connect_wrports collate all the
983 # or-tree-reduced bitvector set/clear requests and drop them
984 # in as a single "thing". this can only be done because the
985 # set/get is an unary bitvector.
986 print ("make write-vecs", regfile, regname, wvset, wvclr)
987 return (wvclren, # clear (regfile write)
988 wvseten) # set (issue time)
989
990 def connect_wrports(self, m, fu_bitdict, fu_selected):
991 """connect write ports
992
993 orders the write regspecs into a dict-of-dicts, by regfile,
994 by regport name, then connects all FUs that want that regport
995 by way of a PriorityPicker.
996
997 note that the write-port wen, write-port data, and go_wr_i all need to
998 be on the exact same clock cycle. as there is a combinatorial loop bug
999 at the moment, these all use sync.
1000 """
1001 comb, sync = m.d.comb, m.d.sync
1002 fus = self.fus.fus
1003 regs = self.regs
1004 # dictionary of lists of regfile write ports
1005 byregfiles_wrspec = self.get_byregfiles(m, False)
1006
1007 # same for write ports.
1008 # BLECH! complex code-duplication! BLECH!
1009 wrpickers = {}
1010 wvclrers = defaultdict(list)
1011 wvseters = defaultdict(list)
1012 for regfile, fuspecs in byregfiles_wrspec.items():
1013 wrpickers[regfile] = {}
1014
1015 if self.regreduce_en:
1016 # argh, more port-merging
1017 if regfile == 'INT':
1018 fuspecs['o'] = [fuspecs.pop('o')]
1019 fuspecs['o'].append(fuspecs.pop('o1'))
1020 if regfile == 'FAST':
1021 fuspecs['fast1'] = [fuspecs.pop('fast1')]
1022 if 'fast2' in fuspecs:
1023 fuspecs['fast1'].append(fuspecs.pop('fast2'))
1024 if 'fast3' in fuspecs:
1025 fuspecs['fast1'].append(fuspecs.pop('fast3'))
1026
1027 # collate these and record them by regfile because there
1028 # are sometimes more write-ports per regfile
1029 for (regname, fspec) in sort_fuspecs(fuspecs):
1030 wvclren, wvseten = self.connect_wrport(m,
1031 fu_bitdict, fu_selected,
1032 wrpickers,
1033 regfile, regname, fspec)
1034 wvclrers[regfile.lower()] += wvclren
1035 wvseters[regfile.lower()] += wvseten
1036
1037 if not self.make_hazard_vecs:
1038 return
1039
1040 # for write-vectors: reduce the clr-ers and set-ers down to
1041 # a single set of bits. otherwise if there are two write
1042 # ports (on some regfiles), the last one doing comb += on
1043 # the reg.wv[regfile] instance "wins" (and all others are ignored,
1044 # whoops). if there was only one write-port per wv regfile this would
1045 # not be an issue.
1046 for regfile in wvclrers.keys():
1047 wv = regs.wv[regfile]
1048 wvset = wv.s # write-vec bit-level hazard ctrl
1049 wvclr = wv.r # write-vec bit-level hazard ctrl
1050 wvclren = wvclrers[regfile]
1051 wvseten = wvseters[regfile]
1052 comb += wvclr.eq(ortreereduce_sig(wvclren)) # clear (regfile write)
1053 comb += wvset.eq(ortreereduce_sig(wvseten)) # set (issue time)
1054
1055 def get_byregfiles(self, m, readmode):
1056
1057 mode = "read" if readmode else "write"
1058 regs = self.regs
1059 fus = self.fus.fus
1060 e = self.ireg.e # decoded instruction to execute
1061
1062 # dictionary of dictionaries of lists/tuples of regfile ports.
1063 # first key: regfile. second key: regfile port name
1064 byregfiles_spec = defaultdict(dict)
1065
1066 for (funame, fu) in fus.items():
1067 # create in each FU a receptacle for the read/write register
1068 # hazard numbers (and okflags for read). to be latched in
1069 # connect_rd/write_ports
1070 if readmode:
1071 fu.rd_latches = {} # read reg number latches
1072 fu.rf_latches = {} # read flag latches
1073 else:
1074 fu.wr_latches = {}
1075
1076 # construct regfile specs: read uses inspec, write outspec
1077 print("%s ports for %s" % (mode, funame))
1078 for idx in range(fu.n_src if readmode else fu.n_dst):
1079 (regfile, regname, wid) = fu.get_io_spec(readmode, idx)
1080 print(" %d %s %s %s" % (idx, regfile, regname, str(wid)))
1081
1082 # the PowerDecoder2 (main one, not the satellites) contains
1083 # the decoded regfile numbers. obtain these now
1084 decinfo = regspec_decode(m, readmode, e, regfile, regname)
1085 okflag, regport = decinfo.okflag, decinfo.regport
1086
1087 # construct the dictionary of regspec information by regfile
1088 if regname not in byregfiles_spec[regfile]:
1089 byregfiles_spec[regfile][regname] = \
1090 ByRegSpec(okflag, regport, wid, [])
1091
1092 # here we start to create "lanes" where each Function Unit
1093 # requiring access to a given [single-contended resource]
1094 # regfile port is appended to a list, so that PriorityPickers
1095 # can be created to give uncontested access to it
1096 fuspec = FUSpec(funame, fu, idx)
1097 byregfiles_spec[regfile][regname].specs.append(fuspec)
1098
1099 # ok just print that all out, for convenience
1100 for regfile, fuspecs in byregfiles_spec.items():
1101 print("regfile %s ports:" % mode, regfile)
1102 for regname, fspec in fuspecs.items():
1103 [okflag, regport, wid, fuspecs] = fspec
1104 print(" rf %s port %s lane: %s" % (mode, regfile, regname))
1105 print(" %s" % regname, wid, okflag, regport)
1106 for (funame, fu, idx) in fuspecs:
1107 fusig = fu.src_i[idx] if readmode else fu.dest[idx]
1108 print(" ", funame, fu.__class__.__name__, idx, fusig)
1109 print()
1110
1111 return byregfiles_spec
1112
1113 def __iter__(self):
1114 yield from self.fus.ports()
1115 yield from self.i.e.ports()
1116 yield from self.l0.ports()
1117 # TODO: regs
1118
1119 def ports(self):
1120 return list(self)
1121
1122
1123 if __name__ == '__main__':
1124 pspec = TestMemPspec(ldst_ifacetype='testpi',
1125 imem_ifacetype='',
1126 addr_wid=48,
1127 allow_overlap=True,
1128 mask_wid=8,
1129 reg_wid=64)
1130 dut = NonProductionCore(pspec)
1131 vl = rtlil.convert(dut, ports=dut.ports())
1132 with open("test_core.il", "w") as f:
1133 f.write(vl)