move DEC and TB into StateRegs, to make room in FastRegs
[soc.git] / src / soc / simple / core.py
1 """simple core
2
3 not in any way intended for production use. connects up FunctionUnits to
4 Register Files in a brain-dead fashion that only permits one and only one
5 Function Unit to be operational.
6
7 the principle here is to take the Function Units, analyse their regspecs,
8 and turn their requirements for access to register file read/write ports
9 into groupings by Register File and Register File Port name.
10
11 under each grouping - by regfile/port - a list of Function Units that
12 need to connect to that port is created. as these are a contended
13 resource a "Broadcast Bus" per read/write port is then also created,
14 with access to it managed by a PriorityPicker.
15
16 the brain-dead part of this module is that even though there is no
17 conflict of access, regfile read/write hazards are *not* analysed,
18 and consequently it is safer to wait for the Function Unit to complete
19 before allowing a new instruction to proceed.
20 (update: actually this is being added now:
21 https://bugs.libre-soc.org/show_bug.cgi?id=737)
22 """
23
24 from nmigen import (Elaboratable, Module, Signal, ResetSignal, Cat, Mux,
25 Const)
26 from nmigen.cli import rtlil
27
28 from openpower.decoder.power_decoder2 import PowerDecodeSubset
29 from openpower.decoder.power_regspec_map import regspec_decode
30 from openpower.sv.svp64 import SVP64Rec
31
32 from nmutil.picker import PriorityPicker
33 from nmutil.util import treereduce
34 from nmutil.singlepipe import ControlBase
35
36 from soc.fu.compunits.compunits import AllFunctionUnits, LDSTFunctionUnit
37 from soc.regfile.regfiles import RegFiles
38 from openpower.decoder.power_decoder2 import get_rdflags
39 from soc.experiment.l0_cache import TstL0CacheBuffer # test only
40 from soc.config.test.test_loadstore import TestMemPspec
41 from openpower.decoder.power_enums import MicrOp, Function
42 from soc.simple.core_data import CoreInput, CoreOutput
43
44 from collections import defaultdict, namedtuple
45 import operator
46
47 from nmutil.util import rising_edge
48
49 FUSpec = namedtuple("FUSpec", ["funame", "fu", "idx"])
50 ByRegSpec = namedtuple("ByRegSpec", ["okflag", "regport", "wid", "specs"])
51
52 # helper function for reducing a list of signals down to a parallel
53 # ORed single signal.
54 def ortreereduce(tree, attr="o_data"):
55 return treereduce(tree, operator.or_, lambda x: getattr(x, attr))
56
57
58 def ortreereduce_sig(tree):
59 return treereduce(tree, operator.or_, lambda x: x)
60
61
62 # helper function to place full regs declarations first
63 def sort_fuspecs(fuspecs):
64 res = []
65 for (regname, fspec) in fuspecs.items():
66 if regname.startswith("full"):
67 res.append((regname, fspec))
68 for (regname, fspec) in fuspecs.items():
69 if not regname.startswith("full"):
70 res.append((regname, fspec))
71 return res # enumerate(res)
72
73
74 # a hazard bitvector "remap" function which returns an AST expression
75 # that remaps read/write hazard regfile port numbers to either a full
76 # bitvector or a reduced subset one. SPR for example is reduced to a
77 # single bit.
78 # CRITICALLY-IMPORTANT NOTE: these bitvectors *have* to match up per
79 # regfile! therefore the remapping is per regfile, *NOT* per regfile
80 # port and certainly not based on whether it is a read port or write port.
81 # note that any reductions here will result in degraded performance due
82 # to conflicts, but at least it keeps the hazard matrix sizes down to "sane"
83 def bitvector_remap(regfile, rfile, port):
84 # 8-bits (at the moment, no SVP64), CR is unary: no remap
85 if regfile == 'CR':
86 return port
87 # 3 bits, unary alrady: return the port
88 if regfile == 'XER':
89 return port
90 # 3 bits, unary: return the port
91 if regfile == 'XER':
92 return port
93 # 3 bits, unary: return the port
94 if regfile == 'SVSTATE':
95 return port
96 # 9 bits (9 entries), might be unary already
97 if regfile == 'FAST':
98 if rfile.unary: # FAST might be unary already
99 return port
100 else:
101 return 1 << port
102 # 10 bits (!!) - reduce to one
103 if regfile == 'SPR':
104 if rfile.unary: # FAST might be unary already
105 return port
106 else:
107 return 1 << port
108 if regfile == 'INT':
109 if rfile.unary: # INT, check if unary/binary
110 return port
111 else:
112 return 1 << port
113
114
115 # derive from ControlBase rather than have a separate Stage instance,
116 # this is simpler to do
117 class NonProductionCore(ControlBase):
118 def __init__(self, pspec):
119 self.pspec = pspec
120
121 # test is SVP64 is to be enabled
122 self.svp64_en = hasattr(pspec, "svp64") and (pspec.svp64 == True)
123
124 # test to see if regfile ports should be reduced
125 self.regreduce_en = (hasattr(pspec, "regreduce") and
126 (pspec.regreduce == True))
127
128 # test to see if overlapping of instructions is allowed
129 # (not normally enabled for TestIssuer FSM but useful for checking
130 # the bitvector hazard detection, before doing In-Order)
131 self.allow_overlap = (hasattr(pspec, "allow_overlap") and
132 (pspec.allow_overlap == True))
133
134 # test core type
135 self.make_hazard_vecs = self.allow_overlap
136 self.core_type = "fsm"
137 if hasattr(pspec, "core_type"):
138 self.core_type = pspec.core_type
139
140 super().__init__(stage=self)
141
142 # single LD/ST funnel for memory access
143 self.l0 = l0 = TstL0CacheBuffer(pspec, n_units=1)
144 pi = l0.l0.dports[0]
145
146 # function units (only one each)
147 # only include mmu if enabled in pspec
148 self.fus = AllFunctionUnits(pspec, pilist=[pi])
149
150 # link LoadStore1 into MMU
151 mmu = self.fus.get_fu('mmu0')
152 ldst0 = self.fus.get_fu('ldst0')
153 print ("core pspec", pspec.ldst_ifacetype)
154 print ("core mmu", mmu)
155 if mmu is not None:
156 lsi = l0.cmpi.lsmem.lsi # a LoadStore1 Interface object
157 print ("core lsmem.lsi", lsi)
158 mmu.alu.set_ldst_interface(lsi)
159 # urr store I-Cache in core so it is easier to get at
160 self.icache = lsi.icache
161
162 self.msr_at_reset = 0x0
163 if hasattr(pspec, "msr_reset") and isinstance(pspec.msr_reset, int):
164 self.msr_at_reset = pspec.msr_reset
165 state_resets = [0x0, # PC at reset
166 self.msr_at_reset, # MSR at reset
167 0x0, # SVSTATE at reset
168 0x0, # DEC at reset
169 0x0] # TB at reset
170
171 # register files (yes plural)
172 self.regs = RegFiles(pspec, make_hazard_vecs=self.make_hazard_vecs,
173 state_resets=state_resets)
174
175 # set up input and output: unusual requirement to set data directly
176 # (due to the way that the core is set up in a different domain,
177 # see TestIssuer.setup_peripherals
178 self.p.i_data, self.n.o_data = self.new_specs(None)
179 self.i, self.o = self.p.i_data, self.n.o_data
180
181 # actual internal input data used (captured)
182 self.ireg = self.ispec()
183
184 # create per-FU instruction decoders (subsetted). these "satellite"
185 # decoders reduce wire fan-out from the one (main) PowerDecoder2
186 # (used directly by the trap unit) to the *twelve* (or more)
187 # Function Units. we can either have 32 wires (the instruction)
188 # to each, or we can have well over a 200 wire fan-out (to 12
189 # ALUs). it's an easy choice to make.
190 self.decoders = {}
191 self.des = {}
192
193 # eep, these should be *per FU* i.e. for FunctionUnitBaseMulti
194 # they should be shared (put into the ALU *once*).
195
196 for funame, fu in self.fus.fus.items():
197 f_name = fu.fnunit.name
198 fnunit = fu.fnunit.value
199 opkls = fu.opsubsetkls
200 if f_name == 'TRAP':
201 # TRAP decoder is the *main* decoder
202 self.trapunit = funame
203 continue
204 assert funame not in self.decoders
205 self.decoders[funame] = PowerDecodeSubset(None, opkls, f_name,
206 final=True,
207 state=self.ireg.state,
208 svp64_en=self.svp64_en,
209 regreduce_en=self.regreduce_en)
210 self.des[funame] = self.decoders[funame].do
211 print ("create decoder subset", funame, opkls, self.des[funame])
212
213 # create per-Function Unit write-after-write hazard signals
214 # yes, really, this should have been added in ReservationStations
215 # but hey.
216 for funame, fu in self.fus.fus.items():
217 fu._waw_hazard = Signal(name="waw_%s" % funame)
218
219 # share the SPR decoder with the MMU if it exists
220 if "mmu0" in self.decoders:
221 self.decoders["mmu0"].mmu0_spr_dec = self.decoders["spr0"]
222
223 # allow pausing of the DEC/TB FSM back in Issuer, by spotting
224 # if there is an MTSPR instruction
225 self.pause_dec_tb = Signal()
226
227 # next 3 functions are Stage API Compliance
228 def setup(self, m, i):
229 pass
230
231 def ispec(self):
232 return CoreInput(self.pspec, self.svp64_en, self.regreduce_en)
233
234 def ospec(self):
235 return CoreOutput()
236
237 # elaborate function to create HDL
238 def elaborate(self, platform):
239 m = super().elaborate(platform)
240
241 # for testing purposes, to cut down on build time in coriolis2
242 if hasattr(self.pspec, "nocore") and self.pspec.nocore == True:
243 x = Signal() # dummy signal
244 m.d.sync += x.eq(~x)
245 return m
246 comb = m.d.comb
247
248 m.submodules.fus = self.fus
249 m.submodules.l0 = l0 = self.l0
250 self.regs.elaborate_into(m, platform)
251 regs = self.regs
252 fus = self.fus.fus
253
254 # amalgamate write-hazards into a single top-level Signal
255 self.waw_hazard = Signal()
256 whaz = []
257 for funame, fu in self.fus.fus.items():
258 whaz.append(fu._waw_hazard)
259 comb += self.waw_hazard.eq(Cat(*whaz).bool())
260
261 # connect decoders
262 self.connect_satellite_decoders(m)
263
264 # ssh, cheat: trap uses the main decoder because of the rewriting
265 self.des[self.trapunit] = self.ireg.e.do
266
267 # connect up Function Units, then read/write ports, and hazard conflict
268 self.issue_conflict = Signal()
269 fu_bitdict, fu_selected = self.connect_instruction(m)
270 raw_hazard = self.connect_rdports(m, fu_bitdict, fu_selected)
271 self.connect_wrports(m, fu_bitdict, fu_selected)
272 if self.allow_overlap:
273 comb += self.issue_conflict.eq(raw_hazard)
274
275 # note if an exception happened. in a pipelined or OoO design
276 # this needs to be accompanied by "shadowing" (or stalling)
277 el = []
278 for exc in self.fus.excs.values():
279 el.append(exc.happened)
280 if len(el) > 0: # at least one exception
281 comb += self.o.exc_happened.eq(Cat(*el).bool())
282
283 return m
284
285 def connect_satellite_decoders(self, m):
286 comb = m.d.comb
287 for k, v in self.decoders.items():
288 # connect each satellite decoder and give it the instruction.
289 # as subset decoders this massively reduces wire fanout given
290 # the large number of ALUs
291 m.submodules["dec_%s" % k] = v
292 comb += v.dec.raw_opcode_in.eq(self.ireg.raw_insn_i)
293 comb += v.dec.bigendian.eq(self.ireg.bigendian_i)
294 # sigh due to SVP64 RA_OR_ZERO detection connect these too
295 comb += v.sv_a_nz.eq(self.ireg.sv_a_nz)
296 if not self.svp64_en:
297 continue
298 comb += v.pred_sm.eq(self.ireg.sv_pred_sm)
299 comb += v.pred_dm.eq(self.ireg.sv_pred_dm)
300 if k == self.trapunit:
301 continue
302 comb += v.sv_rm.eq(self.ireg.sv_rm) # pass through SVP64 RM
303 comb += v.is_svp64_mode.eq(self.ireg.is_svp64_mode)
304 # only the LDST PowerDecodeSubset *actually* needs to
305 # know to use the alternative decoder. this is all
306 # a terrible hack
307 if not k.lower().startswith("ldst"):
308 continue
309 comb += v.use_svp64_ldst_dec.eq( self.ireg.use_svp64_ldst_dec)
310
311 def connect_instruction(self, m):
312 """connect_instruction
313
314 uses decoded (from PowerOp) function unit information from CSV files
315 to ascertain which Function Unit should deal with the current
316 instruction.
317
318 some (such as OP_ATTN, OP_NOP) are dealt with here, including
319 ignoring it and halting the processor. OP_NOP is a bit annoying
320 because the issuer expects busy flag still to be raised then lowered.
321 (this requires a fake counter to be set).
322 """
323 comb, sync = m.d.comb, m.d.sync
324 fus = self.fus.fus
325
326 # indicate if core is busy
327 busy_o = self.o.busy_o
328 any_busy_o = self.o.any_busy_o
329
330 # connect up temporary copy of incoming instruction. the FSM will
331 # either blat the incoming instruction (if valid) into self.ireg
332 # or if the instruction could not be delivered, keep dropping the
333 # latched copy into ireg
334 ilatch = self.ispec()
335 self.instr_active = Signal()
336
337 # enable/busy-signals for each FU, get one bit for each FU (by name)
338 fu_enable = Signal(len(fus), reset_less=True)
339 fu_busy = Signal(len(fus), reset_less=True)
340 fu_bitdict = {}
341 fu_selected = {}
342 for i, funame in enumerate(fus.keys()):
343 fu_bitdict[funame] = fu_enable[i]
344 fu_selected[funame] = fu_busy[i]
345
346 # identify function units and create a list by fnunit so that
347 # PriorityPickers can be created for selecting one of them that
348 # isn't busy at the time the incoming instruction needs passing on
349 by_fnunit = defaultdict(list)
350 for fname, member in Function.__members__.items():
351 for funame, fu in fus.items():
352 fnunit = fu.fnunit.value
353 if member.value & fnunit: # this FU handles this type of op
354 by_fnunit[fname].append((funame, fu)) # add by Function
355
356 # ok now just print out the list of FUs by Function, because we can
357 for fname, fu_list in by_fnunit.items():
358 print ("FUs by type", fname, fu_list)
359
360 # now create a PriorityPicker per FU-type such that only one
361 # non-busy FU will be picked
362 issue_pps = {}
363 fu_found = Signal() # take a note if no Function Unit was available
364 for fname, fu_list in by_fnunit.items():
365 i_pp = PriorityPicker(len(fu_list))
366 m.submodules['i_pp_%s' % fname] = i_pp
367 i_l = []
368 for i, (funame, fu) in enumerate(fu_list):
369 # match the decoded instruction (e.do.fn_unit) against the
370 # "capability" of this FU, gate that by whether that FU is
371 # busy, and drop that into the PriorityPicker.
372 # this will give us an output of the first available *non-busy*
373 # Function Unit (Reservation Statio) capable of handling this
374 # instruction.
375 fnunit = fu.fnunit.value
376 en_req = Signal(name="issue_en_%s" % funame, reset_less=True)
377 fnmatch = (self.ireg.e.do.fn_unit & fnunit).bool()
378 comb += en_req.eq(fnmatch & ~fu.busy_o &
379 self.instr_active)
380 i_l.append(en_req) # store in list for doing the Cat-trick
381 # picker output, gated by enable: store in fu_bitdict
382 po = Signal(name="o_issue_pick_"+funame) # picker output
383 comb += po.eq(i_pp.o[i] & i_pp.en_o)
384 comb += fu_bitdict[funame].eq(po)
385 comb += fu_selected[funame].eq(fu.busy_o | po)
386 # if we don't do this, then when there are no FUs available,
387 # the "p.o_ready" signal will go back "ok we accepted this
388 # instruction" which of course isn't true.
389 with m.If(i_pp.en_o):
390 comb += fu_found.eq(1)
391 # for each input, Cat them together and drop them into the picker
392 comb += i_pp.i.eq(Cat(*i_l))
393
394 # rdmask, which is for registers needs to come from the *main* decoder
395 for funame, fu in fus.items():
396 rdmask = get_rdflags(m, self.ireg.e, fu)
397 comb += fu.rdmaskn.eq(~rdmask)
398
399 # sigh - need a NOP counter
400 counter = Signal(2)
401 with m.If(counter != 0):
402 sync += counter.eq(counter - 1)
403 comb += busy_o.eq(1)
404
405 # default to reading from incoming instruction: may be overridden
406 # by copy from latch when "waiting"
407 comb += self.ireg.eq(self.i)
408 # always say "ready" except if overridden
409 comb += self.p.o_ready.eq(1)
410
411 with m.FSM():
412 with m.State("READY"):
413 with m.If(self.p.i_valid): # run only when valid
414 with m.Switch(self.ireg.e.do.insn_type):
415 # check for ATTN: halt if true
416 with m.Case(MicrOp.OP_ATTN):
417 m.d.sync += self.o.core_terminate_o.eq(1)
418
419 # fake NOP - this isn't really used (Issuer detects NOP)
420 with m.Case(MicrOp.OP_NOP):
421 sync += counter.eq(2)
422 comb += busy_o.eq(1)
423
424 with m.Default():
425 comb += self.instr_active.eq(1)
426 comb += self.p.o_ready.eq(0)
427 # connect instructions. only one enabled at a time
428 for funame, fu in fus.items():
429 do = self.des[funame]
430 enable = fu_bitdict[funame]
431
432 # run this FunctionUnit if enabled route op,
433 # issue, busy, read flags and mask to FU
434 with m.If(enable):
435 # operand comes from the *local* decoder
436 # do not actually issue, though, if there
437 # is a waw hazard. decoder has to still
438 # be asserted in order to detect that, tho
439 comb += fu.oper_i.eq_from(do)
440 if funame == 'mmu0':
441 # URRR this is truly dreadful.
442 # OP_FETCH_FAILED is a "fake" op.
443 # no instruction creates it. OP_TRAP
444 # uses the *main* decoder: this is
445 # a *Satellite* decoder that reacts
446 # on *insn_in*... not fake ops. gaah.
447 main_op = self.ireg.e.do
448 with m.If(main_op.insn_type ==
449 MicrOp.OP_FETCH_FAILED):
450 comb += fu.oper_i.insn_type.eq(
451 MicrOp.OP_FETCH_FAILED)
452 comb += fu.oper_i.fn_unit.eq(
453 Function.MMU)
454 # issue when valid (and no write-hazard)
455 comb += fu.issue_i.eq(~self.waw_hazard)
456 # instruction ok, indicate ready
457 comb += self.p.o_ready.eq(1)
458
459 if self.allow_overlap:
460 with m.If(~fu_found | self.waw_hazard):
461 # latch copy of instruction
462 sync += ilatch.eq(self.i)
463 comb += self.p.o_ready.eq(1) # accept
464 comb += busy_o.eq(1)
465 m.next = "WAITING"
466
467 with m.State("WAITING"):
468 comb += self.instr_active.eq(1)
469 comb += self.p.o_ready.eq(0)
470 comb += busy_o.eq(1)
471 # using copy of instruction, keep waiting until an FU is free
472 comb += self.ireg.eq(ilatch)
473 with m.If(fu_found): # wait for conflict to clear
474 # connect instructions. only one enabled at a time
475 for funame, fu in fus.items():
476 do = self.des[funame]
477 enable = fu_bitdict[funame]
478
479 # run this FunctionUnit if enabled route op,
480 # issue, busy, read flags and mask to FU
481 with m.If(enable):
482 # operand comes from the *local* decoder,
483 # which is asserted even if not issued,
484 # so that WaW-detection can check for hazards.
485 # only if the waw hazard is clear does the
486 # instruction actually get issued
487 comb += fu.oper_i.eq_from(do)
488 # issue when valid
489 comb += fu.issue_i.eq(~self.waw_hazard)
490 with m.If(~self.waw_hazard):
491 comb += self.p.o_ready.eq(1)
492 comb += busy_o.eq(0)
493 m.next = "READY"
494
495 print ("core: overlap allowed", self.allow_overlap)
496 # true when any FU is busy (including the cycle where it is perhaps
497 # to be issued - because that's what fu_busy is)
498 comb += any_busy_o.eq(fu_busy.bool())
499 if not self.allow_overlap:
500 # for simple non-overlap, if any instruction is busy, set
501 # busy output for core.
502 comb += busy_o.eq(any_busy_o)
503 else:
504 # sigh deal with a fun situation that needs to be investigated
505 # and resolved
506 with m.If(self.issue_conflict):
507 comb += busy_o.eq(1)
508 # make sure that LDST, SPR, MMU, Branch and Trap all say "busy"
509 # and do not allow overlap. these are all the ones that
510 # are non-forward-progressing: exceptions etc. that otherwise
511 # change CoreState for some reason (MSR, PC, SVSTATE)
512 for funame, fu in fus.items():
513 if (funame.lower().startswith('ldst') or
514 funame.lower().startswith('branch') or
515 funame.lower().startswith('mmu') or
516 funame.lower().startswith('spr') or
517 funame.lower().startswith('trap')):
518 with m.If(fu.busy_o):
519 comb += busy_o.eq(1)
520 # for SPR pipeline pause dec/tb FSM to avoid race condition
521 # TODO: really this should be much more sophisticated,
522 # spot MTSPR, spot that DEC/TB is what is to be updated.
523 # a job for PowerDecoder2, there
524 if funame.lower().startswith('spr'):
525 with m.If(fu.busy_o):
526 comb += self.pause_dec_tb.eq(1)
527
528 # return both the function unit "enable" dict as well as the "busy".
529 # the "busy-or-issued" can be passed in to the Read/Write port
530 # connecters to give them permission to request access to regfiles
531 return fu_bitdict, fu_selected
532
533 def connect_rdport(self, m, fu_bitdict, fu_selected,
534 rdpickers, regfile, regname, fspec):
535 comb, sync = m.d.comb, m.d.sync
536 fus = self.fus.fus
537 regs = self.regs
538
539 rpidx = regname
540
541 # select the required read port. these are pre-defined sizes
542 rfile = regs.rf[regfile.lower()]
543 rport = rfile.r_ports[rpidx]
544 print("read regfile", rpidx, regfile, regs.rf.keys(),
545 rfile, rfile.unary)
546
547 # for checking if the read port has an outstanding write
548 if self.make_hazard_vecs:
549 wv = regs.wv[regfile.lower()]
550 wvchk = wv.q_int # write-vec bit-level hazard check
551
552 # if a hazard is detected on this read port, simply blithely block
553 # every FU from reading on it. this is complete overkill but very
554 # simple for now.
555 hazard_detected = Signal(name="raw_%s_%s" % (regfile, rpidx))
556
557 fspecs = fspec
558 if not isinstance(fspecs, list):
559 fspecs = [fspecs]
560
561 rdflags = []
562 pplen = 0
563 ppoffs = []
564 for i, fspec in enumerate(fspecs):
565 # get the regfile specs for this regfile port
566 print ("fpsec", i, fspec, len(fspec.specs))
567 name = "%s_%s_%d" % (regfile, regname, i)
568 ppoffs.append(pplen) # record offset for picker
569 pplen += len(fspec.specs)
570 rdflag = Signal(name="rdflag_"+name, reset_less=True)
571 comb += rdflag.eq(fspec.okflag)
572 rdflags.append(rdflag)
573
574 print ("pplen", pplen)
575
576 # create a priority picker to manage this port
577 rdpickers[regfile][rpidx] = rdpick = PriorityPicker(pplen)
578 m.submodules["rdpick_%s_%s" % (regfile, rpidx)] = rdpick
579
580 rens = []
581 addrs = []
582 wvens = []
583
584 for i, fspec in enumerate(fspecs):
585 (rf, _read, wid, fuspecs) = \
586 (fspec.okflag, fspec.regport, fspec.wid, fspec.specs)
587 # connect up the FU req/go signals, and the reg-read to the FU
588 # and create a Read Broadcast Bus
589 for pi, fuspec in enumerate(fspec.specs):
590 (funame, fu, idx) = (fuspec.funame, fuspec.fu, fuspec.idx)
591 pi += ppoffs[i]
592 name = "%s_%s_%s_%i" % (regfile, rpidx, funame, pi)
593 fu_active = fu_selected[funame]
594 fu_issued = fu_bitdict[funame]
595
596 # get (or set up) a latched copy of read register number
597 # and (sigh) also the read-ok flag
598 # TODO: use nmutil latchregister
599 rhname = "%s_%s_%d" % (regfile, regname, i)
600 rdflag = Signal(name="rdflag_%s_%s" % (funame, rhname),
601 reset_less=True)
602 if rhname not in fu.rf_latches:
603 rfl = Signal(name="rdflag_latch_%s_%s" % (funame, rhname))
604 fu.rf_latches[rhname] = rfl
605 with m.If(fu.issue_i):
606 sync += rfl.eq(rdflags[i])
607 else:
608 rfl = fu.rf_latches[rhname]
609
610 # now the register port
611 rname = "%s_%s_%s_%d" % (funame, regfile, regname, pi)
612 read = Signal.like(_read, name="read_"+rname)
613 if rname not in fu.rd_latches:
614 rdl = Signal.like(_read, name="rdlatch_"+rname)
615 fu.rd_latches[rname] = rdl
616 with m.If(fu.issue_i):
617 sync += rdl.eq(_read)
618 else:
619 rdl = fu.rd_latches[rname]
620
621 # make the read immediately available on issue cycle
622 # after the read cycle, otherwies use the latched copy.
623 # this captures the regport and okflag on issue
624 with m.If(fu.issue_i):
625 comb += read.eq(_read)
626 comb += rdflag.eq(rdflags[i])
627 with m.Else():
628 comb += read.eq(rdl)
629 comb += rdflag.eq(rfl)
630
631 # connect request-read to picker input, and output to go-rd
632 addr_en = Signal.like(read, name="addr_en_"+name)
633 pick = Signal(name="pick_"+name) # picker input
634 rp = Signal(name="rp_"+name) # picker output
635 delay_pick = Signal(name="dp_"+name) # read-enable "underway"
636 rhazard = Signal(name="rhaz_"+name)
637
638 # exclude any currently-enabled read-request (mask out active)
639 # entirely block anything hazarded from being picked
640 comb += pick.eq(fu.rd_rel_o[idx] & fu_active & rdflag &
641 ~delay_pick & ~rhazard)
642 comb += rdpick.i[pi].eq(pick)
643 comb += fu.go_rd_i[idx].eq(delay_pick) # pass in *delayed* pick
644
645 # if picked, select read-port "reg select" number to port
646 comb += rp.eq(rdpick.o[pi] & rdpick.en_o)
647 sync += delay_pick.eq(rp) # delayed "pick"
648 comb += addr_en.eq(Mux(rp, read, 0))
649
650 # the read-enable happens combinatorially (see mux-bus below)
651 # but it results in the data coming out on a one-cycle delay.
652 if rfile.unary:
653 rens.append(addr_en)
654 else:
655 addrs.append(addr_en)
656 rens.append(rp)
657
658 # use the *delayed* pick signal to put requested data onto bus
659 with m.If(delay_pick):
660 # connect regfile port to input, creating fan-out Bus
661 src = fu.src_i[idx]
662 print("reg connect widths",
663 regfile, regname, pi, funame,
664 src.shape(), rport.o_data.shape())
665 # all FUs connect to same port
666 comb += src.eq(rport.o_data)
667
668 if not self.make_hazard_vecs:
669 continue
670
671 # read the write-hazard bitvector (wv) for any bit that is
672 wvchk_en = Signal(len(wvchk), name="wv_chk_addr_en_"+name)
673 issue_active = Signal(name="rd_iactive_"+name)
674 # XXX combinatorial loop here
675 comb += issue_active.eq(fu_active & rdflag)
676 with m.If(issue_active):
677 if rfile.unary:
678 comb += wvchk_en.eq(read)
679 else:
680 comb += wvchk_en.eq(1<<read)
681 # if FU is busy (which doesn't get set at the same time as
682 # issue) and no hazard was detected, clear wvchk_en (i.e.
683 # stop checking for hazards). there is a loop here, but it's
684 # via a DFF, so is ok. some linters may complain, but hey.
685 with m.If(fu.busy_o & ~rhazard):
686 comb += wvchk_en.eq(0)
687
688 # read-hazard is ANDed with (filtered by) what is actually
689 # being requested.
690 comb += rhazard.eq((wvchk & wvchk_en).bool())
691
692 wvens.append(wvchk_en)
693
694 # or-reduce the muxed read signals
695 if rfile.unary:
696 # for unary-addressed
697 comb += rport.ren.eq(ortreereduce_sig(rens))
698 else:
699 # for binary-addressed
700 comb += rport.addr.eq(ortreereduce_sig(addrs))
701 comb += rport.ren.eq(Cat(*rens).bool())
702 print ("binary", regfile, rpidx, rport, rport.ren, rens, addrs)
703
704 if not self.make_hazard_vecs:
705 return Const(0) # declare "no hazards"
706
707 # enable the read bitvectors for this issued instruction
708 # and return whether any write-hazard bit is set
709 wvchk_and = Signal(len(wvchk), name="wv_chk_"+name)
710 comb += wvchk_and.eq(wvchk & ortreereduce_sig(wvens))
711 comb += hazard_detected.eq(wvchk_and.bool())
712 return hazard_detected
713
714 def connect_rdports(self, m, fu_bitdict, fu_selected):
715 """connect read ports
716
717 orders the read regspecs into a dict-of-dicts, by regfile, by
718 regport name, then connects all FUs that want that regport by
719 way of a PriorityPicker.
720 """
721 comb, sync = m.d.comb, m.d.sync
722 fus = self.fus.fus
723 regs = self.regs
724 rd_hazard = []
725
726 # dictionary of lists of regfile read ports
727 byregfiles_rdspec = self.get_byregfiles(m, True)
728
729 # okaay, now we need a PriorityPicker per regfile per regfile port
730 # loootta pickers... peter piper picked a pack of pickled peppers...
731 rdpickers = {}
732 for regfile, fuspecs in byregfiles_rdspec.items():
733 rdpickers[regfile] = {}
734
735 # argh. an experiment to merge RA and RB in the INT regfile
736 # (we have too many read/write ports)
737 if self.regreduce_en:
738 if regfile == 'INT':
739 fuspecs['rabc'] = [fuspecs.pop('rb')]
740 fuspecs['rabc'].append(fuspecs.pop('rc'))
741 fuspecs['rabc'].append(fuspecs.pop('ra'))
742 if regfile == 'FAST':
743 fuspecs['fast1'] = [fuspecs.pop('fast1')]
744 if 'fast2' in fuspecs:
745 fuspecs['fast1'].append(fuspecs.pop('fast2'))
746 if 'fast3' in fuspecs:
747 fuspecs['fast1'].append(fuspecs.pop('fast3'))
748
749 # for each named regfile port, connect up all FUs to that port
750 # also return (and collate) hazard detection)
751 for (regname, fspec) in sort_fuspecs(fuspecs):
752 print("connect rd", regname, fspec)
753 rh = self.connect_rdport(m, fu_bitdict, fu_selected,
754 rdpickers, regfile,
755 regname, fspec)
756 rd_hazard.append(rh)
757
758 return Cat(*rd_hazard).bool()
759
760 def make_hazards(self, m, regfile, rfile, wvclr, wvset,
761 funame, regname, idx,
762 addr_en, wp, fu, fu_active, wrflag, write,
763 fu_wrok):
764 """make_hazards: a setter and a clearer for the regfile write ports
765
766 setter is at issue time (using PowerDecoder2 regfile write numbers)
767 clearer is at regfile write time (when FU has said what to write to)
768
769 there is *one* unusual case here which has to be dealt with:
770 when the Function Unit does *NOT* request a write to the regfile
771 (has its data.ok bit CLEARED). this is perfectly legitimate.
772 and a royal pain.
773 """
774 comb, sync = m.d.comb, m.d.sync
775 name = "%s_%s_%d" % (funame, regname, idx)
776
777 # connect up the bitvector write hazard. unlike the
778 # regfile writeports, a ONE must be written to the corresponding
779 # bit of the hazard bitvector (to indicate the existence of
780 # the hazard)
781
782 # the detection of what shall be written to is based
783 # on *issue*. it is delayed by 1 cycle so that instructions
784 # "addi 5,5,0x2" do not cause combinatorial loops due to
785 # fake-dependency on *themselves*. this will totally fail
786 # spectacularly when doing multi-issue
787 print ("write vector (for regread)", regfile, wvset)
788 wviaddr_en = Signal(len(wvset), name="wv_issue_addr_en_"+name)
789 issue_active = Signal(name="iactive_"+name)
790 sync += issue_active.eq(fu.issue_i & fu_active & wrflag)
791 with m.If(issue_active):
792 if rfile.unary:
793 comb += wviaddr_en.eq(write)
794 else:
795 comb += wviaddr_en.eq(1<<write)
796
797 # deal with write vector clear: this kicks in when the regfile
798 # is written to, and clears the corresponding bitvector entry
799 print ("write vector", regfile, wvclr)
800 wvaddr_en = Signal(len(wvclr), name="wvaddr_en_"+name)
801 if rfile.unary:
802 comb += wvaddr_en.eq(addr_en)
803 else:
804 with m.If(wp):
805 comb += wvaddr_en.eq(1<<addr_en)
806
807 # XXX ASSUME that LDSTFunctionUnit always sets the data it intends to
808 # this may NOT be the case when an exception occurs
809 if isinstance(fu, LDSTFunctionUnit):
810 return wvaddr_en, wviaddr_en
811
812 # okaaay, this is preparation for the awkward case.
813 # * latch a copy of wrflag when issue goes high.
814 # * when the fu_wrok (data.ok) flag is NOT set,
815 # but the FU is done, the FU is NEVER going to write
816 # so the bitvector has to be cleared.
817 latch_wrflag = Signal(name="latch_wrflag_"+name)
818 with m.If(~fu.busy_o):
819 sync += latch_wrflag.eq(0)
820 with m.If(fu.issue_i & fu_active):
821 sync += latch_wrflag.eq(wrflag)
822 with m.If(fu.alu_done_o & latch_wrflag & ~fu_wrok):
823 if rfile.unary:
824 comb += wvaddr_en.eq(write) # addr_en gated with wp, don't use
825 else:
826 comb += wvaddr_en.eq(1<<addr_en) # binary addr_en not gated
827
828 return wvaddr_en, wviaddr_en
829
830 def connect_wrport(self, m, fu_bitdict, fu_selected,
831 wrpickers, regfile, regname, fspec):
832 comb, sync = m.d.comb, m.d.sync
833 fus = self.fus.fus
834 regs = self.regs
835
836 rpidx = regname
837
838 # select the required write port. these are pre-defined sizes
839 rfile = regs.rf[regfile.lower()]
840 wport = rfile.w_ports[rpidx]
841
842 print("connect wr", regname, "unary", rfile.unary, fspec)
843 print(regfile, regs.rf.keys())
844
845 # select the write-protection hazard vector. note that this still
846 # requires to WRITE to the hazard bitvector! read-requests need
847 # to RAISE the bitvector (set it to 1), which, duh, requires a WRITE
848 if self.make_hazard_vecs:
849 wv = regs.wv[regfile.lower()]
850 wvset = wv.s # write-vec bit-level hazard ctrl
851 wvclr = wv.r # write-vec bit-level hazard ctrl
852 wvchk = wv.q # write-after-write hazard check
853
854 fspecs = fspec
855 if not isinstance(fspecs, list):
856 fspecs = [fspecs]
857
858 pplen = 0
859 writes = []
860 ppoffs = []
861 wrflags = []
862 for i, fspec in enumerate(fspecs):
863 # get the regfile specs for this regfile port
864 (wf, _write, wid, fuspecs) = \
865 (fspec.okflag, fspec.regport, fspec.wid, fspec.specs)
866 print ("fpsec", i, "wrflag", wf, fspec, len(fuspecs))
867 ppoffs.append(pplen) # record offset for picker
868 pplen += len(fuspecs)
869
870 name = "%s_%s_%d" % (regfile, regname, i)
871 wrflag = Signal(name="wr_flag_"+name)
872 if wf is not None:
873 comb += wrflag.eq(wf)
874 else:
875 comb += wrflag.eq(0)
876 wrflags.append(wrflag)
877
878 # create a priority picker to manage this port
879 wrpickers[regfile][rpidx] = wrpick = PriorityPicker(pplen)
880 m.submodules["wrpick_%s_%s" % (regfile, rpidx)] = wrpick
881
882 wsigs = []
883 wens = []
884 wvsets = []
885 wvseten = []
886 wvclren = []
887 #wvens = [] - not needed: reading of writevec is permanently held hi
888 addrs = []
889 for i, fspec in enumerate(fspecs):
890 # connect up the FU req/go signals and the reg-read to the FU
891 # these are arbitrated by Data.ok signals
892 (wf, _write, wid, fuspecs) = \
893 (fspec.okflag, fspec.regport, fspec.wid, fspec.specs)
894 for pi, fuspec in enumerate(fspec.specs):
895 (funame, fu, idx) = (fuspec.funame, fuspec.fu, fuspec.idx)
896 fu_requested = fu_bitdict[funame]
897 pi += ppoffs[i]
898 name = "%s_%s_%s_%d" % (funame, regfile, regname, idx)
899 # get (or set up) a write-latched copy of write register number
900 write = Signal.like(_write, name="write_"+name)
901 rname = "%s_%s_%s_%d" % (funame, regfile, regname, idx)
902 if rname not in fu.wr_latches:
903 wrl = Signal.like(_write, name="wrlatch_"+rname)
904 fu.wr_latches[rname] = write
905 # do not depend on fu.issue_i here, it creates a
906 # combinatorial loop on waw checking. using the FU
907 # "enable" bitdict entry for this FU is sufficient,
908 # because the PowerDecoder2 read/write nums are
909 # valid continuously when the instruction is valid
910 with m.If(fu_requested):
911 sync += wrl.eq(_write)
912 comb += write.eq(_write)
913 with m.Else():
914 comb += write.eq(wrl)
915 else:
916 write = fu.wr_latches[rname]
917
918 # write-request comes from dest.ok
919 dest = fu.get_out(idx)
920 fu_dest_latch = fu.get_fu_out(idx) # latched output
921 name = "%s_%s_%d" % (funame, regname, idx)
922 fu_wrok = Signal(name="fu_wrok_"+name, reset_less=True)
923 comb += fu_wrok.eq(dest.ok & fu.busy_o)
924
925 # connect request-write to picker input, and output to go-wr
926 fu_active = fu_selected[funame]
927 pick = fu.wr.rel_o[idx] & fu_active
928 comb += wrpick.i[pi].eq(pick)
929 # create a single-pulse go write from the picker output
930 wr_pick = Signal(name="wpick_%s_%s_%d" % (funame, regname, idx))
931 comb += wr_pick.eq(wrpick.o[pi] & wrpick.en_o)
932 comb += fu.go_wr_i[idx].eq(rising_edge(m, wr_pick))
933
934 # connect the regspec write "reg select" number to this port
935 # only if one FU actually requests (and is granted) the port
936 # will the write-enable be activated
937 wname = "waddr_en_%s_%s_%d" % (funame, regname, idx)
938 addr_en = Signal.like(write, name=wname)
939 wp = Signal()
940 comb += wp.eq(wr_pick & wrpick.en_o)
941 comb += addr_en.eq(Mux(wp, write, 0))
942 if rfile.unary:
943 wens.append(addr_en)
944 else:
945 addrs.append(addr_en)
946 wens.append(wp)
947
948 # connect regfile port to input
949 print("reg connect widths",
950 regfile, regname, pi, funame,
951 dest.shape(), wport.i_data.shape())
952 wsigs.append(fu_dest_latch)
953
954 # now connect up the bitvector write hazard
955 if not self.make_hazard_vecs:
956 continue
957 res = self.make_hazards(m, regfile, rfile, wvclr, wvset,
958 funame, regname, idx,
959 addr_en, wp, fu, fu_active,
960 wrflags[i], write, fu_wrok)
961 wvaddr_en, wv_issue_en = res
962 wvclren.append(wvaddr_en) # set only: no data => clear bit
963 wvseten.append(wv_issue_en) # set data same as enable
964
965 # read the write-hazard bitvector (wv) for any bit that is
966 fu_requested = fu_bitdict[funame]
967 wvchk_en = Signal(len(wvchk), name="waw_chk_addr_en_"+name)
968 issue_active = Signal(name="waw_iactive_"+name)
969 whazard = Signal(name="whaz_"+name)
970 if wf is None:
971 # XXX EEK! STATE regfile (branch) does not have an
972 # write-active indicator in regspec_decode_write()
973 print ("XXX FIXME waw_iactive", issue_active,
974 fu_requested, wf)
975 else:
976 # check bits from the incoming instruction. note (back
977 # in connect_instruction) that the decoder is held for
978 # us to be able to do this, here... *without* issue being
979 # held HI. we MUST NOT gate this with fu.issue_i or
980 # with fu_bitdict "enable": it would create a loop
981 comb += issue_active.eq(wf)
982 with m.If(issue_active):
983 if rfile.unary:
984 comb += wvchk_en.eq(write)
985 else:
986 comb += wvchk_en.eq(1<<write)
987 # if FU is busy (which doesn't get set at the same time as
988 # issue) and no hazard was detected, clear wvchk_en (i.e.
989 # stop checking for hazards). there is a loop here, but it's
990 # via a DFF, so is ok. some linters may complain, but hey.
991 with m.If(fu.busy_o & ~whazard):
992 comb += wvchk_en.eq(0)
993
994 # write-hazard is ANDed with (filtered by) what is actually
995 # being requested. the wvchk data is on a one-clock delay,
996 # and wvchk_en comes directly from the main decoder
997 comb += whazard.eq((wvchk & wvchk_en).bool())
998 with m.If(whazard):
999 comb += fu._waw_hazard.eq(1)
1000
1001 #wvens.append(wvchk_en)
1002
1003 # here is where we create the Write Broadcast Bus. simple, eh?
1004 comb += wport.i_data.eq(ortreereduce_sig(wsigs))
1005 if rfile.unary:
1006 # for unary-addressed
1007 comb += wport.wen.eq(ortreereduce_sig(wens))
1008 else:
1009 # for binary-addressed
1010 comb += wport.addr.eq(ortreereduce_sig(addrs))
1011 comb += wport.wen.eq(ortreereduce_sig(wens))
1012
1013 if not self.make_hazard_vecs:
1014 return [], []
1015
1016 # return these here rather than set wvclr/wvset directly,
1017 # because there may be more than one write-port to a given
1018 # regfile. example: XER has a write-port for SO, CA, and OV
1019 # and the *last one added* of those would overwrite the other
1020 # two. solution: have connect_wrports collate all the
1021 # or-tree-reduced bitvector set/clear requests and drop them
1022 # in as a single "thing". this can only be done because the
1023 # set/get is an unary bitvector.
1024 print ("make write-vecs", regfile, regname, wvset, wvclr)
1025 return (wvclren, # clear (regfile write)
1026 wvseten) # set (issue time)
1027
1028 def connect_wrports(self, m, fu_bitdict, fu_selected):
1029 """connect write ports
1030
1031 orders the write regspecs into a dict-of-dicts, by regfile,
1032 by regport name, then connects all FUs that want that regport
1033 by way of a PriorityPicker.
1034
1035 note that the write-port wen, write-port data, and go_wr_i all need to
1036 be on the exact same clock cycle. as there is a combinatorial loop bug
1037 at the moment, these all use sync.
1038 """
1039 comb, sync = m.d.comb, m.d.sync
1040 fus = self.fus.fus
1041 regs = self.regs
1042 # dictionary of lists of regfile write ports
1043 byregfiles_wrspec = self.get_byregfiles(m, False)
1044
1045 # same for write ports.
1046 # BLECH! complex code-duplication! BLECH!
1047 wrpickers = {}
1048 wvclrers = defaultdict(list)
1049 wvseters = defaultdict(list)
1050 for regfile, fuspecs in byregfiles_wrspec.items():
1051 wrpickers[regfile] = {}
1052
1053 if self.regreduce_en:
1054 # argh, more port-merging
1055 if regfile == 'INT':
1056 fuspecs['o'] = [fuspecs.pop('o')]
1057 fuspecs['o'].append(fuspecs.pop('o1'))
1058 if regfile == 'FAST':
1059 fuspecs['fast1'] = [fuspecs.pop('fast1')]
1060 if 'fast2' in fuspecs:
1061 fuspecs['fast1'].append(fuspecs.pop('fast2'))
1062 if 'fast3' in fuspecs:
1063 fuspecs['fast1'].append(fuspecs.pop('fast3'))
1064
1065 # collate these and record them by regfile because there
1066 # are sometimes more write-ports per regfile
1067 for (regname, fspec) in sort_fuspecs(fuspecs):
1068 wvclren, wvseten = self.connect_wrport(m,
1069 fu_bitdict, fu_selected,
1070 wrpickers,
1071 regfile, regname, fspec)
1072 wvclrers[regfile.lower()] += wvclren
1073 wvseters[regfile.lower()] += wvseten
1074
1075 if not self.make_hazard_vecs:
1076 return
1077
1078 # for write-vectors: reduce the clr-ers and set-ers down to
1079 # a single set of bits. otherwise if there are two write
1080 # ports (on some regfiles), the last one doing comb += on
1081 # the reg.wv[regfile] instance "wins" (and all others are ignored,
1082 # whoops). if there was only one write-port per wv regfile this would
1083 # not be an issue.
1084 for regfile in wvclrers.keys():
1085 wv = regs.wv[regfile]
1086 wvset = wv.s # write-vec bit-level hazard ctrl
1087 wvclr = wv.r # write-vec bit-level hazard ctrl
1088 wvclren = wvclrers[regfile]
1089 wvseten = wvseters[regfile]
1090 comb += wvclr.eq(ortreereduce_sig(wvclren)) # clear (regfile write)
1091 comb += wvset.eq(ortreereduce_sig(wvseten)) # set (issue time)
1092
1093 def get_byregfiles(self, m, readmode):
1094
1095 mode = "read" if readmode else "write"
1096 regs = self.regs
1097 fus = self.fus.fus
1098 e = self.ireg.e # decoded instruction to execute
1099
1100 # dictionary of dictionaries of lists/tuples of regfile ports.
1101 # first key: regfile. second key: regfile port name
1102 byregfiles_spec = defaultdict(dict)
1103
1104 for (funame, fu) in fus.items():
1105 # create in each FU a receptacle for the read/write register
1106 # hazard numbers (and okflags for read). to be latched in
1107 # connect_rd/write_ports
1108 if readmode:
1109 fu.rd_latches = {} # read reg number latches
1110 fu.rf_latches = {} # read flag latches
1111 else:
1112 fu.wr_latches = {}
1113
1114 # construct regfile specs: read uses inspec, write outspec
1115 print("%s ports for %s" % (mode, funame))
1116 for idx in range(fu.n_src if readmode else fu.n_dst):
1117 (regfile, regname, wid) = fu.get_io_spec(readmode, idx)
1118 print(" %d %s %s %s" % (idx, regfile, regname, str(wid)))
1119
1120 # the PowerDecoder2 (main one, not the satellites) contains
1121 # the decoded regfile numbers. obtain these now
1122 decinfo = regspec_decode(m, readmode, e, regfile, regname)
1123 okflag, regport = decinfo.okflag, decinfo.regport
1124
1125 # construct the dictionary of regspec information by regfile
1126 if regname not in byregfiles_spec[regfile]:
1127 byregfiles_spec[regfile][regname] = \
1128 ByRegSpec(okflag, regport, wid, [])
1129
1130 # here we start to create "lanes" where each Function Unit
1131 # requiring access to a given [single-contended resource]
1132 # regfile port is appended to a list, so that PriorityPickers
1133 # can be created to give uncontested access to it
1134 fuspec = FUSpec(funame, fu, idx)
1135 byregfiles_spec[regfile][regname].specs.append(fuspec)
1136
1137 # ok just print that all out, for convenience
1138 for regfile, fuspecs in byregfiles_spec.items():
1139 print("regfile %s ports:" % mode, regfile)
1140 for regname, fspec in fuspecs.items():
1141 [okflag, regport, wid, fuspecs] = fspec
1142 print(" rf %s port %s lane: %s" % (mode, regfile, regname))
1143 print(" %s" % regname, wid, okflag, regport)
1144 for (funame, fu, idx) in fuspecs:
1145 fusig = fu.src_i[idx] if readmode else fu.dest[idx]
1146 print(" ", funame, fu.__class__.__name__, idx, fusig)
1147 print()
1148
1149 return byregfiles_spec
1150
1151 def __iter__(self):
1152 yield from self.fus.ports()
1153 yield from self.i.e.ports()
1154 yield from self.l0.ports()
1155 # TODO: regs
1156
1157 def ports(self):
1158 return list(self)
1159
1160
1161 if __name__ == '__main__':
1162 pspec = TestMemPspec(ldst_ifacetype='testpi',
1163 imem_ifacetype='',
1164 addr_wid=64,
1165 allow_overlap=True,
1166 mask_wid=8,
1167 reg_wid=64)
1168 dut = NonProductionCore(pspec)
1169 vl = rtlil.convert(dut, ports=dut.ports())
1170 with open("test_core.il", "w") as f:
1171 f.write(vl)