add pause_dec_tb signal (not very sophisticated) to Core
[soc.git] / src / soc / simple / core.py
1 """simple core
2
3 not in any way intended for production use. connects up FunctionUnits to
4 Register Files in a brain-dead fashion that only permits one and only one
5 Function Unit to be operational.
6
7 the principle here is to take the Function Units, analyse their regspecs,
8 and turn their requirements for access to register file read/write ports
9 into groupings by Register File and Register File Port name.
10
11 under each grouping - by regfile/port - a list of Function Units that
12 need to connect to that port is created. as these are a contended
13 resource a "Broadcast Bus" per read/write port is then also created,
14 with access to it managed by a PriorityPicker.
15
16 the brain-dead part of this module is that even though there is no
17 conflict of access, regfile read/write hazards are *not* analysed,
18 and consequently it is safer to wait for the Function Unit to complete
19 before allowing a new instruction to proceed.
20 (update: actually this is being added now:
21 https://bugs.libre-soc.org/show_bug.cgi?id=737)
22 """
23
24 from nmigen import (Elaboratable, Module, Signal, ResetSignal, Cat, Mux,
25 Const)
26 from nmigen.cli import rtlil
27
28 from openpower.decoder.power_decoder2 import PowerDecodeSubset
29 from openpower.decoder.power_regspec_map import regspec_decode
30 from openpower.sv.svp64 import SVP64Rec
31
32 from nmutil.picker import PriorityPicker
33 from nmutil.util import treereduce
34 from nmutil.singlepipe import ControlBase
35
36 from soc.fu.compunits.compunits import AllFunctionUnits, LDSTFunctionUnit
37 from soc.regfile.regfiles import RegFiles
38 from openpower.decoder.power_decoder2 import get_rdflags
39 from soc.experiment.l0_cache import TstL0CacheBuffer # test only
40 from soc.config.test.test_loadstore import TestMemPspec
41 from openpower.decoder.power_enums import MicrOp, Function
42 from soc.simple.core_data import CoreInput, CoreOutput
43
44 from collections import defaultdict, namedtuple
45 import operator
46
47 from nmutil.util import rising_edge
48
49 FUSpec = namedtuple("FUSpec", ["funame", "fu", "idx"])
50 ByRegSpec = namedtuple("ByRegSpec", ["okflag", "regport", "wid", "specs"])
51
52 # helper function for reducing a list of signals down to a parallel
53 # ORed single signal.
54 def ortreereduce(tree, attr="o_data"):
55 return treereduce(tree, operator.or_, lambda x: getattr(x, attr))
56
57
58 def ortreereduce_sig(tree):
59 return treereduce(tree, operator.or_, lambda x: x)
60
61
62 # helper function to place full regs declarations first
63 def sort_fuspecs(fuspecs):
64 res = []
65 for (regname, fspec) in fuspecs.items():
66 if regname.startswith("full"):
67 res.append((regname, fspec))
68 for (regname, fspec) in fuspecs.items():
69 if not regname.startswith("full"):
70 res.append((regname, fspec))
71 return res # enumerate(res)
72
73
74 # a hazard bitvector "remap" function which returns an AST expression
75 # that remaps read/write hazard regfile port numbers to either a full
76 # bitvector or a reduced subset one. SPR for example is reduced to a
77 # single bit.
78 # CRITICALLY-IMPORTANT NOTE: these bitvectors *have* to match up per
79 # regfile! therefore the remapping is per regfile, *NOT* per regfile
80 # port and certainly not based on whether it is a read port or write port.
81 # note that any reductions here will result in degraded performance due
82 # to conflicts, but at least it keeps the hazard matrix sizes down to "sane"
83 def bitvector_remap(regfile, rfile, port):
84 # 8-bits (at the moment, no SVP64), CR is unary: no remap
85 if regfile == 'CR':
86 return port
87 # 3 bits, unary alrady: return the port
88 if regfile == 'XER':
89 return port
90 # 3 bits, unary: return the port
91 if regfile == 'XER':
92 return port
93 # 3 bits, unary: return the port
94 if regfile == 'SVSTATE':
95 return port
96 # 9 bits (9 entries), might be unary already
97 if regfile == 'FAST':
98 if rfile.unary: # FAST might be unary already
99 return port
100 else:
101 return 1 << port
102 # 10 bits (!!) - reduce to one
103 if regfile == 'SPR':
104 if rfile.unary: # FAST might be unary already
105 return port
106 else:
107 return 1 << port
108 if regfile == 'INT':
109 if rfile.unary: # INT, check if unary/binary
110 return port
111 else:
112 return 1 << port
113
114
115 # derive from ControlBase rather than have a separate Stage instance,
116 # this is simpler to do
117 class NonProductionCore(ControlBase):
118 def __init__(self, pspec):
119 self.pspec = pspec
120
121 # test is SVP64 is to be enabled
122 self.svp64_en = hasattr(pspec, "svp64") and (pspec.svp64 == True)
123
124 # test to see if regfile ports should be reduced
125 self.regreduce_en = (hasattr(pspec, "regreduce") and
126 (pspec.regreduce == True))
127
128 # test to see if overlapping of instructions is allowed
129 # (not normally enabled for TestIssuer FSM but useful for checking
130 # the bitvector hazard detection, before doing In-Order)
131 self.allow_overlap = (hasattr(pspec, "allow_overlap") and
132 (pspec.allow_overlap == True))
133
134 # test core type
135 self.make_hazard_vecs = self.allow_overlap
136 self.core_type = "fsm"
137 if hasattr(pspec, "core_type"):
138 self.core_type = pspec.core_type
139
140 super().__init__(stage=self)
141
142 # single LD/ST funnel for memory access
143 self.l0 = l0 = TstL0CacheBuffer(pspec, n_units=1)
144 pi = l0.l0.dports[0]
145
146 # function units (only one each)
147 # only include mmu if enabled in pspec
148 self.fus = AllFunctionUnits(pspec, pilist=[pi])
149
150 # link LoadStore1 into MMU
151 mmu = self.fus.get_fu('mmu0')
152 ldst0 = self.fus.get_fu('ldst0')
153 print ("core pspec", pspec.ldst_ifacetype)
154 print ("core mmu", mmu)
155 if mmu is not None:
156 lsi = l0.cmpi.lsmem.lsi # a LoadStore1 Interface object
157 print ("core lsmem.lsi", lsi)
158 mmu.alu.set_ldst_interface(lsi)
159 # urr store I-Cache in core so it is easier to get at
160 self.icache = lsi.icache
161
162 self.msr_at_reset = 0x0
163 if hasattr(pspec, "msr_reset") and isinstance(pspec.msr_reset, int):
164 self.msr_at_reset = pspec.msr_reset
165 state_resets = [0x0, # PC at reset
166 self.msr_at_reset, # MSR at reset
167 0x0] # SVSTATE at reset
168
169 # register files (yes plural)
170 self.regs = RegFiles(pspec, make_hazard_vecs=self.make_hazard_vecs,
171 state_resets=state_resets)
172
173 # set up input and output: unusual requirement to set data directly
174 # (due to the way that the core is set up in a different domain,
175 # see TestIssuer.setup_peripherals
176 self.p.i_data, self.n.o_data = self.new_specs(None)
177 self.i, self.o = self.p.i_data, self.n.o_data
178
179 # actual internal input data used (captured)
180 self.ireg = self.ispec()
181
182 # create per-FU instruction decoders (subsetted). these "satellite"
183 # decoders reduce wire fan-out from the one (main) PowerDecoder2
184 # (used directly by the trap unit) to the *twelve* (or more)
185 # Function Units. we can either have 32 wires (the instruction)
186 # to each, or we can have well over a 200 wire fan-out (to 12
187 # ALUs). it's an easy choice to make.
188 self.decoders = {}
189 self.des = {}
190
191 # eep, these should be *per FU* i.e. for FunctionUnitBaseMulti
192 # they should be shared (put into the ALU *once*).
193
194 for funame, fu in self.fus.fus.items():
195 f_name = fu.fnunit.name
196 fnunit = fu.fnunit.value
197 opkls = fu.opsubsetkls
198 if f_name == 'TRAP':
199 # TRAP decoder is the *main* decoder
200 self.trapunit = funame
201 continue
202 assert funame not in self.decoders
203 self.decoders[funame] = PowerDecodeSubset(None, opkls, f_name,
204 final=True,
205 state=self.ireg.state,
206 svp64_en=self.svp64_en,
207 regreduce_en=self.regreduce_en)
208 self.des[funame] = self.decoders[funame].do
209 print ("create decoder subset", funame, opkls, self.des[funame])
210
211 # create per-Function Unit write-after-write hazard signals
212 # yes, really, this should have been added in ReservationStations
213 # but hey.
214 for funame, fu in self.fus.fus.items():
215 fu._waw_hazard = Signal(name="waw_%s" % funame)
216
217 # share the SPR decoder with the MMU if it exists
218 if "mmu0" in self.decoders:
219 self.decoders["mmu0"].mmu0_spr_dec = self.decoders["spr0"]
220
221 # allow pausing of the DEC/TB FSM back in Issuer, by spotting
222 # if there is an MTSPR instruction
223 self.pause_dec_tb = Signal()
224
225 # next 3 functions are Stage API Compliance
226 def setup(self, m, i):
227 pass
228
229 def ispec(self):
230 return CoreInput(self.pspec, self.svp64_en, self.regreduce_en)
231
232 def ospec(self):
233 return CoreOutput()
234
235 # elaborate function to create HDL
236 def elaborate(self, platform):
237 m = super().elaborate(platform)
238
239 # for testing purposes, to cut down on build time in coriolis2
240 if hasattr(self.pspec, "nocore") and self.pspec.nocore == True:
241 x = Signal() # dummy signal
242 m.d.sync += x.eq(~x)
243 return m
244 comb = m.d.comb
245
246 m.submodules.fus = self.fus
247 m.submodules.l0 = l0 = self.l0
248 self.regs.elaborate_into(m, platform)
249 regs = self.regs
250 fus = self.fus.fus
251
252 # amalgamate write-hazards into a single top-level Signal
253 self.waw_hazard = Signal()
254 whaz = []
255 for funame, fu in self.fus.fus.items():
256 whaz.append(fu._waw_hazard)
257 comb += self.waw_hazard.eq(Cat(*whaz).bool())
258
259 # connect decoders
260 self.connect_satellite_decoders(m)
261
262 # ssh, cheat: trap uses the main decoder because of the rewriting
263 self.des[self.trapunit] = self.ireg.e.do
264
265 # connect up Function Units, then read/write ports, and hazard conflict
266 self.issue_conflict = Signal()
267 fu_bitdict, fu_selected = self.connect_instruction(m)
268 raw_hazard = self.connect_rdports(m, fu_bitdict, fu_selected)
269 self.connect_wrports(m, fu_bitdict, fu_selected)
270 if self.allow_overlap:
271 comb += self.issue_conflict.eq(raw_hazard)
272
273 # note if an exception happened. in a pipelined or OoO design
274 # this needs to be accompanied by "shadowing" (or stalling)
275 el = []
276 for exc in self.fus.excs.values():
277 el.append(exc.happened)
278 if len(el) > 0: # at least one exception
279 comb += self.o.exc_happened.eq(Cat(*el).bool())
280
281 return m
282
283 def connect_satellite_decoders(self, m):
284 comb = m.d.comb
285 for k, v in self.decoders.items():
286 # connect each satellite decoder and give it the instruction.
287 # as subset decoders this massively reduces wire fanout given
288 # the large number of ALUs
289 m.submodules["dec_%s" % k] = v
290 comb += v.dec.raw_opcode_in.eq(self.ireg.raw_insn_i)
291 comb += v.dec.bigendian.eq(self.ireg.bigendian_i)
292 # sigh due to SVP64 RA_OR_ZERO detection connect these too
293 comb += v.sv_a_nz.eq(self.ireg.sv_a_nz)
294 if not self.svp64_en:
295 continue
296 comb += v.pred_sm.eq(self.ireg.sv_pred_sm)
297 comb += v.pred_dm.eq(self.ireg.sv_pred_dm)
298 if k == self.trapunit:
299 continue
300 comb += v.sv_rm.eq(self.ireg.sv_rm) # pass through SVP64 RM
301 comb += v.is_svp64_mode.eq(self.ireg.is_svp64_mode)
302 # only the LDST PowerDecodeSubset *actually* needs to
303 # know to use the alternative decoder. this is all
304 # a terrible hack
305 if not k.lower().startswith("ldst"):
306 continue
307 comb += v.use_svp64_ldst_dec.eq( self.ireg.use_svp64_ldst_dec)
308
309 def connect_instruction(self, m):
310 """connect_instruction
311
312 uses decoded (from PowerOp) function unit information from CSV files
313 to ascertain which Function Unit should deal with the current
314 instruction.
315
316 some (such as OP_ATTN, OP_NOP) are dealt with here, including
317 ignoring it and halting the processor. OP_NOP is a bit annoying
318 because the issuer expects busy flag still to be raised then lowered.
319 (this requires a fake counter to be set).
320 """
321 comb, sync = m.d.comb, m.d.sync
322 fus = self.fus.fus
323
324 # indicate if core is busy
325 busy_o = self.o.busy_o
326 any_busy_o = self.o.any_busy_o
327
328 # connect up temporary copy of incoming instruction. the FSM will
329 # either blat the incoming instruction (if valid) into self.ireg
330 # or if the instruction could not be delivered, keep dropping the
331 # latched copy into ireg
332 ilatch = self.ispec()
333 self.instr_active = Signal()
334
335 # enable/busy-signals for each FU, get one bit for each FU (by name)
336 fu_enable = Signal(len(fus), reset_less=True)
337 fu_busy = Signal(len(fus), reset_less=True)
338 fu_bitdict = {}
339 fu_selected = {}
340 for i, funame in enumerate(fus.keys()):
341 fu_bitdict[funame] = fu_enable[i]
342 fu_selected[funame] = fu_busy[i]
343
344 # identify function units and create a list by fnunit so that
345 # PriorityPickers can be created for selecting one of them that
346 # isn't busy at the time the incoming instruction needs passing on
347 by_fnunit = defaultdict(list)
348 for fname, member in Function.__members__.items():
349 for funame, fu in fus.items():
350 fnunit = fu.fnunit.value
351 if member.value & fnunit: # this FU handles this type of op
352 by_fnunit[fname].append((funame, fu)) # add by Function
353
354 # ok now just print out the list of FUs by Function, because we can
355 for fname, fu_list in by_fnunit.items():
356 print ("FUs by type", fname, fu_list)
357
358 # now create a PriorityPicker per FU-type such that only one
359 # non-busy FU will be picked
360 issue_pps = {}
361 fu_found = Signal() # take a note if no Function Unit was available
362 for fname, fu_list in by_fnunit.items():
363 i_pp = PriorityPicker(len(fu_list))
364 m.submodules['i_pp_%s' % fname] = i_pp
365 i_l = []
366 for i, (funame, fu) in enumerate(fu_list):
367 # match the decoded instruction (e.do.fn_unit) against the
368 # "capability" of this FU, gate that by whether that FU is
369 # busy, and drop that into the PriorityPicker.
370 # this will give us an output of the first available *non-busy*
371 # Function Unit (Reservation Statio) capable of handling this
372 # instruction.
373 fnunit = fu.fnunit.value
374 en_req = Signal(name="issue_en_%s" % funame, reset_less=True)
375 fnmatch = (self.ireg.e.do.fn_unit & fnunit).bool()
376 comb += en_req.eq(fnmatch & ~fu.busy_o &
377 self.instr_active)
378 i_l.append(en_req) # store in list for doing the Cat-trick
379 # picker output, gated by enable: store in fu_bitdict
380 po = Signal(name="o_issue_pick_"+funame) # picker output
381 comb += po.eq(i_pp.o[i] & i_pp.en_o)
382 comb += fu_bitdict[funame].eq(po)
383 comb += fu_selected[funame].eq(fu.busy_o | po)
384 # if we don't do this, then when there are no FUs available,
385 # the "p.o_ready" signal will go back "ok we accepted this
386 # instruction" which of course isn't true.
387 with m.If(i_pp.en_o):
388 comb += fu_found.eq(1)
389 # for each input, Cat them together and drop them into the picker
390 comb += i_pp.i.eq(Cat(*i_l))
391
392 # rdmask, which is for registers needs to come from the *main* decoder
393 for funame, fu in fus.items():
394 rdmask = get_rdflags(m, self.ireg.e, fu)
395 comb += fu.rdmaskn.eq(~rdmask)
396
397 # sigh - need a NOP counter
398 counter = Signal(2)
399 with m.If(counter != 0):
400 sync += counter.eq(counter - 1)
401 comb += busy_o.eq(1)
402
403 # default to reading from incoming instruction: may be overridden
404 # by copy from latch when "waiting"
405 comb += self.ireg.eq(self.i)
406 # always say "ready" except if overridden
407 comb += self.p.o_ready.eq(1)
408
409 with m.FSM():
410 with m.State("READY"):
411 with m.If(self.p.i_valid): # run only when valid
412 with m.Switch(self.ireg.e.do.insn_type):
413 # check for ATTN: halt if true
414 with m.Case(MicrOp.OP_ATTN):
415 m.d.sync += self.o.core_terminate_o.eq(1)
416
417 # fake NOP - this isn't really used (Issuer detects NOP)
418 with m.Case(MicrOp.OP_NOP):
419 sync += counter.eq(2)
420 comb += busy_o.eq(1)
421
422 with m.Default():
423 comb += self.instr_active.eq(1)
424 comb += self.p.o_ready.eq(0)
425 # connect instructions. only one enabled at a time
426 for funame, fu in fus.items():
427 do = self.des[funame]
428 enable = fu_bitdict[funame]
429
430 # run this FunctionUnit if enabled route op,
431 # issue, busy, read flags and mask to FU
432 with m.If(enable):
433 # operand comes from the *local* decoder
434 # do not actually issue, though, if there
435 # is a waw hazard. decoder has to still
436 # be asserted in order to detect that, tho
437 comb += fu.oper_i.eq_from(do)
438 if funame == 'mmu0':
439 # URRR this is truly dreadful.
440 # OP_FETCH_FAILED is a "fake" op.
441 # no instruction creates it. OP_TRAP
442 # uses the *main* decoder: this is
443 # a *Satellite* decoder that reacts
444 # on *insn_in*... not fake ops. gaah.
445 main_op = self.ireg.e.do
446 with m.If(main_op.insn_type ==
447 MicrOp.OP_FETCH_FAILED):
448 comb += fu.oper_i.insn_type.eq(
449 MicrOp.OP_FETCH_FAILED)
450 comb += fu.oper_i.fn_unit.eq(
451 Function.MMU)
452 # issue when valid (and no write-hazard)
453 comb += fu.issue_i.eq(~self.waw_hazard)
454 # instruction ok, indicate ready
455 comb += self.p.o_ready.eq(1)
456
457 if self.allow_overlap:
458 with m.If(~fu_found | self.waw_hazard):
459 # latch copy of instruction
460 sync += ilatch.eq(self.i)
461 comb += self.p.o_ready.eq(1) # accept
462 comb += busy_o.eq(1)
463 m.next = "WAITING"
464
465 with m.State("WAITING"):
466 comb += self.instr_active.eq(1)
467 comb += self.p.o_ready.eq(0)
468 comb += busy_o.eq(1)
469 # using copy of instruction, keep waiting until an FU is free
470 comb += self.ireg.eq(ilatch)
471 with m.If(fu_found): # wait for conflict to clear
472 # connect instructions. only one enabled at a time
473 for funame, fu in fus.items():
474 do = self.des[funame]
475 enable = fu_bitdict[funame]
476
477 # run this FunctionUnit if enabled route op,
478 # issue, busy, read flags and mask to FU
479 with m.If(enable):
480 # operand comes from the *local* decoder,
481 # which is asserted even if not issued,
482 # so that WaW-detection can check for hazards.
483 # only if the waw hazard is clear does the
484 # instruction actually get issued
485 comb += fu.oper_i.eq_from(do)
486 # issue when valid
487 comb += fu.issue_i.eq(~self.waw_hazard)
488 with m.If(~self.waw_hazard):
489 comb += self.p.o_ready.eq(1)
490 comb += busy_o.eq(0)
491 m.next = "READY"
492
493 print ("core: overlap allowed", self.allow_overlap)
494 # true when any FU is busy (including the cycle where it is perhaps
495 # to be issued - because that's what fu_busy is)
496 comb += any_busy_o.eq(fu_busy.bool())
497 if not self.allow_overlap:
498 # for simple non-overlap, if any instruction is busy, set
499 # busy output for core.
500 comb += busy_o.eq(any_busy_o)
501 else:
502 # sigh deal with a fun situation that needs to be investigated
503 # and resolved
504 with m.If(self.issue_conflict):
505 comb += busy_o.eq(1)
506 # make sure that LDST, SPR, MMU, Branch and Trap all say "busy"
507 # and do not allow overlap. these are all the ones that
508 # are non-forward-progressing: exceptions etc. that otherwise
509 # change CoreState for some reason (MSR, PC, SVSTATE)
510 for funame, fu in fus.items():
511 if (funame.lower().startswith('ldst') or
512 funame.lower().startswith('branch') or
513 funame.lower().startswith('mmu') or
514 funame.lower().startswith('spr') or
515 funame.lower().startswith('trap')):
516 with m.If(fu.busy_o):
517 comb += busy_o.eq(1)
518 # for SPR pipeline pause dec/tb FSM to avoid race condition
519 # TODO: really this should be much more sophisticated,
520 # spot MTSPR, spot that DEC/TB is what is to be updated.
521 # a job for PowerDecoder2, there
522 if funame.lower().startswith('spr'):
523 with m.If(fu.busy_o):
524 comb += self.pause_dec_tb.eq(1)
525
526 # return both the function unit "enable" dict as well as the "busy".
527 # the "busy-or-issued" can be passed in to the Read/Write port
528 # connecters to give them permission to request access to regfiles
529 return fu_bitdict, fu_selected
530
531 def connect_rdport(self, m, fu_bitdict, fu_selected,
532 rdpickers, regfile, regname, fspec):
533 comb, sync = m.d.comb, m.d.sync
534 fus = self.fus.fus
535 regs = self.regs
536
537 rpidx = regname
538
539 # select the required read port. these are pre-defined sizes
540 rfile = regs.rf[regfile.lower()]
541 rport = rfile.r_ports[rpidx]
542 print("read regfile", rpidx, regfile, regs.rf.keys(),
543 rfile, rfile.unary)
544
545 # for checking if the read port has an outstanding write
546 if self.make_hazard_vecs:
547 wv = regs.wv[regfile.lower()]
548 wvchk = wv.q_int # write-vec bit-level hazard check
549
550 # if a hazard is detected on this read port, simply blithely block
551 # every FU from reading on it. this is complete overkill but very
552 # simple for now.
553 hazard_detected = Signal(name="raw_%s_%s" % (regfile, rpidx))
554
555 fspecs = fspec
556 if not isinstance(fspecs, list):
557 fspecs = [fspecs]
558
559 rdflags = []
560 pplen = 0
561 ppoffs = []
562 for i, fspec in enumerate(fspecs):
563 # get the regfile specs for this regfile port
564 print ("fpsec", i, fspec, len(fspec.specs))
565 name = "%s_%s_%d" % (regfile, regname, i)
566 ppoffs.append(pplen) # record offset for picker
567 pplen += len(fspec.specs)
568 rdflag = Signal(name="rdflag_"+name, reset_less=True)
569 comb += rdflag.eq(fspec.okflag)
570 rdflags.append(rdflag)
571
572 print ("pplen", pplen)
573
574 # create a priority picker to manage this port
575 rdpickers[regfile][rpidx] = rdpick = PriorityPicker(pplen)
576 m.submodules["rdpick_%s_%s" % (regfile, rpidx)] = rdpick
577
578 rens = []
579 addrs = []
580 wvens = []
581
582 for i, fspec in enumerate(fspecs):
583 (rf, _read, wid, fuspecs) = \
584 (fspec.okflag, fspec.regport, fspec.wid, fspec.specs)
585 # connect up the FU req/go signals, and the reg-read to the FU
586 # and create a Read Broadcast Bus
587 for pi, fuspec in enumerate(fspec.specs):
588 (funame, fu, idx) = (fuspec.funame, fuspec.fu, fuspec.idx)
589 pi += ppoffs[i]
590 name = "%s_%s_%s_%i" % (regfile, rpidx, funame, pi)
591 fu_active = fu_selected[funame]
592 fu_issued = fu_bitdict[funame]
593
594 # get (or set up) a latched copy of read register number
595 # and (sigh) also the read-ok flag
596 # TODO: use nmutil latchregister
597 rhname = "%s_%s_%d" % (regfile, regname, i)
598 rdflag = Signal(name="rdflag_%s_%s" % (funame, rhname),
599 reset_less=True)
600 if rhname not in fu.rf_latches:
601 rfl = Signal(name="rdflag_latch_%s_%s" % (funame, rhname))
602 fu.rf_latches[rhname] = rfl
603 with m.If(fu.issue_i):
604 sync += rfl.eq(rdflags[i])
605 else:
606 rfl = fu.rf_latches[rhname]
607
608 # now the register port
609 rname = "%s_%s_%s_%d" % (funame, regfile, regname, pi)
610 read = Signal.like(_read, name="read_"+rname)
611 if rname not in fu.rd_latches:
612 rdl = Signal.like(_read, name="rdlatch_"+rname)
613 fu.rd_latches[rname] = rdl
614 with m.If(fu.issue_i):
615 sync += rdl.eq(_read)
616 else:
617 rdl = fu.rd_latches[rname]
618
619 # make the read immediately available on issue cycle
620 # after the read cycle, otherwies use the latched copy.
621 # this captures the regport and okflag on issue
622 with m.If(fu.issue_i):
623 comb += read.eq(_read)
624 comb += rdflag.eq(rdflags[i])
625 with m.Else():
626 comb += read.eq(rdl)
627 comb += rdflag.eq(rfl)
628
629 # connect request-read to picker input, and output to go-rd
630 addr_en = Signal.like(read, name="addr_en_"+name)
631 pick = Signal(name="pick_"+name) # picker input
632 rp = Signal(name="rp_"+name) # picker output
633 delay_pick = Signal(name="dp_"+name) # read-enable "underway"
634 rhazard = Signal(name="rhaz_"+name)
635
636 # exclude any currently-enabled read-request (mask out active)
637 # entirely block anything hazarded from being picked
638 comb += pick.eq(fu.rd_rel_o[idx] & fu_active & rdflag &
639 ~delay_pick & ~rhazard)
640 comb += rdpick.i[pi].eq(pick)
641 comb += fu.go_rd_i[idx].eq(delay_pick) # pass in *delayed* pick
642
643 # if picked, select read-port "reg select" number to port
644 comb += rp.eq(rdpick.o[pi] & rdpick.en_o)
645 sync += delay_pick.eq(rp) # delayed "pick"
646 comb += addr_en.eq(Mux(rp, read, 0))
647
648 # the read-enable happens combinatorially (see mux-bus below)
649 # but it results in the data coming out on a one-cycle delay.
650 if rfile.unary:
651 rens.append(addr_en)
652 else:
653 addrs.append(addr_en)
654 rens.append(rp)
655
656 # use the *delayed* pick signal to put requested data onto bus
657 with m.If(delay_pick):
658 # connect regfile port to input, creating fan-out Bus
659 src = fu.src_i[idx]
660 print("reg connect widths",
661 regfile, regname, pi, funame,
662 src.shape(), rport.o_data.shape())
663 # all FUs connect to same port
664 comb += src.eq(rport.o_data)
665
666 if not self.make_hazard_vecs:
667 continue
668
669 # read the write-hazard bitvector (wv) for any bit that is
670 wvchk_en = Signal(len(wvchk), name="wv_chk_addr_en_"+name)
671 issue_active = Signal(name="rd_iactive_"+name)
672 # XXX combinatorial loop here
673 comb += issue_active.eq(fu_active & rdflag)
674 with m.If(issue_active):
675 if rfile.unary:
676 comb += wvchk_en.eq(read)
677 else:
678 comb += wvchk_en.eq(1<<read)
679 # if FU is busy (which doesn't get set at the same time as
680 # issue) and no hazard was detected, clear wvchk_en (i.e.
681 # stop checking for hazards). there is a loop here, but it's
682 # via a DFF, so is ok. some linters may complain, but hey.
683 with m.If(fu.busy_o & ~rhazard):
684 comb += wvchk_en.eq(0)
685
686 # read-hazard is ANDed with (filtered by) what is actually
687 # being requested.
688 comb += rhazard.eq((wvchk & wvchk_en).bool())
689
690 wvens.append(wvchk_en)
691
692 # or-reduce the muxed read signals
693 if rfile.unary:
694 # for unary-addressed
695 comb += rport.ren.eq(ortreereduce_sig(rens))
696 else:
697 # for binary-addressed
698 comb += rport.addr.eq(ortreereduce_sig(addrs))
699 comb += rport.ren.eq(Cat(*rens).bool())
700 print ("binary", regfile, rpidx, rport, rport.ren, rens, addrs)
701
702 if not self.make_hazard_vecs:
703 return Const(0) # declare "no hazards"
704
705 # enable the read bitvectors for this issued instruction
706 # and return whether any write-hazard bit is set
707 wvchk_and = Signal(len(wvchk), name="wv_chk_"+name)
708 comb += wvchk_and.eq(wvchk & ortreereduce_sig(wvens))
709 comb += hazard_detected.eq(wvchk_and.bool())
710 return hazard_detected
711
712 def connect_rdports(self, m, fu_bitdict, fu_selected):
713 """connect read ports
714
715 orders the read regspecs into a dict-of-dicts, by regfile, by
716 regport name, then connects all FUs that want that regport by
717 way of a PriorityPicker.
718 """
719 comb, sync = m.d.comb, m.d.sync
720 fus = self.fus.fus
721 regs = self.regs
722 rd_hazard = []
723
724 # dictionary of lists of regfile read ports
725 byregfiles_rdspec = self.get_byregfiles(m, True)
726
727 # okaay, now we need a PriorityPicker per regfile per regfile port
728 # loootta pickers... peter piper picked a pack of pickled peppers...
729 rdpickers = {}
730 for regfile, fuspecs in byregfiles_rdspec.items():
731 rdpickers[regfile] = {}
732
733 # argh. an experiment to merge RA and RB in the INT regfile
734 # (we have too many read/write ports)
735 if self.regreduce_en:
736 if regfile == 'INT':
737 fuspecs['rabc'] = [fuspecs.pop('rb')]
738 fuspecs['rabc'].append(fuspecs.pop('rc'))
739 fuspecs['rabc'].append(fuspecs.pop('ra'))
740 if regfile == 'FAST':
741 fuspecs['fast1'] = [fuspecs.pop('fast1')]
742 if 'fast2' in fuspecs:
743 fuspecs['fast1'].append(fuspecs.pop('fast2'))
744 if 'fast3' in fuspecs:
745 fuspecs['fast1'].append(fuspecs.pop('fast3'))
746
747 # for each named regfile port, connect up all FUs to that port
748 # also return (and collate) hazard detection)
749 for (regname, fspec) in sort_fuspecs(fuspecs):
750 print("connect rd", regname, fspec)
751 rh = self.connect_rdport(m, fu_bitdict, fu_selected,
752 rdpickers, regfile,
753 regname, fspec)
754 rd_hazard.append(rh)
755
756 return Cat(*rd_hazard).bool()
757
758 def make_hazards(self, m, regfile, rfile, wvclr, wvset,
759 funame, regname, idx,
760 addr_en, wp, fu, fu_active, wrflag, write,
761 fu_wrok):
762 """make_hazards: a setter and a clearer for the regfile write ports
763
764 setter is at issue time (using PowerDecoder2 regfile write numbers)
765 clearer is at regfile write time (when FU has said what to write to)
766
767 there is *one* unusual case here which has to be dealt with:
768 when the Function Unit does *NOT* request a write to the regfile
769 (has its data.ok bit CLEARED). this is perfectly legitimate.
770 and a royal pain.
771 """
772 comb, sync = m.d.comb, m.d.sync
773 name = "%s_%s_%d" % (funame, regname, idx)
774
775 # connect up the bitvector write hazard. unlike the
776 # regfile writeports, a ONE must be written to the corresponding
777 # bit of the hazard bitvector (to indicate the existence of
778 # the hazard)
779
780 # the detection of what shall be written to is based
781 # on *issue*. it is delayed by 1 cycle so that instructions
782 # "addi 5,5,0x2" do not cause combinatorial loops due to
783 # fake-dependency on *themselves*. this will totally fail
784 # spectacularly when doing multi-issue
785 print ("write vector (for regread)", regfile, wvset)
786 wviaddr_en = Signal(len(wvset), name="wv_issue_addr_en_"+name)
787 issue_active = Signal(name="iactive_"+name)
788 sync += issue_active.eq(fu.issue_i & fu_active & wrflag)
789 with m.If(issue_active):
790 if rfile.unary:
791 comb += wviaddr_en.eq(write)
792 else:
793 comb += wviaddr_en.eq(1<<write)
794
795 # deal with write vector clear: this kicks in when the regfile
796 # is written to, and clears the corresponding bitvector entry
797 print ("write vector", regfile, wvclr)
798 wvaddr_en = Signal(len(wvclr), name="wvaddr_en_"+name)
799 if rfile.unary:
800 comb += wvaddr_en.eq(addr_en)
801 else:
802 with m.If(wp):
803 comb += wvaddr_en.eq(1<<addr_en)
804
805 # XXX ASSUME that LDSTFunctionUnit always sets the data it intends to
806 # this may NOT be the case when an exception occurs
807 if isinstance(fu, LDSTFunctionUnit):
808 return wvaddr_en, wviaddr_en
809
810 # okaaay, this is preparation for the awkward case.
811 # * latch a copy of wrflag when issue goes high.
812 # * when the fu_wrok (data.ok) flag is NOT set,
813 # but the FU is done, the FU is NEVER going to write
814 # so the bitvector has to be cleared.
815 latch_wrflag = Signal(name="latch_wrflag_"+name)
816 with m.If(~fu.busy_o):
817 sync += latch_wrflag.eq(0)
818 with m.If(fu.issue_i & fu_active):
819 sync += latch_wrflag.eq(wrflag)
820 with m.If(fu.alu_done_o & latch_wrflag & ~fu_wrok):
821 if rfile.unary:
822 comb += wvaddr_en.eq(write) # addr_en gated with wp, don't use
823 else:
824 comb += wvaddr_en.eq(1<<addr_en) # binary addr_en not gated
825
826 return wvaddr_en, wviaddr_en
827
828 def connect_wrport(self, m, fu_bitdict, fu_selected,
829 wrpickers, regfile, regname, fspec):
830 comb, sync = m.d.comb, m.d.sync
831 fus = self.fus.fus
832 regs = self.regs
833
834 rpidx = regname
835
836 # select the required write port. these are pre-defined sizes
837 rfile = regs.rf[regfile.lower()]
838 wport = rfile.w_ports[rpidx]
839
840 print("connect wr", regname, "unary", rfile.unary, fspec)
841 print(regfile, regs.rf.keys())
842
843 # select the write-protection hazard vector. note that this still
844 # requires to WRITE to the hazard bitvector! read-requests need
845 # to RAISE the bitvector (set it to 1), which, duh, requires a WRITE
846 if self.make_hazard_vecs:
847 wv = regs.wv[regfile.lower()]
848 wvset = wv.s # write-vec bit-level hazard ctrl
849 wvclr = wv.r # write-vec bit-level hazard ctrl
850 wvchk = wv.q # write-after-write hazard check
851
852 fspecs = fspec
853 if not isinstance(fspecs, list):
854 fspecs = [fspecs]
855
856 pplen = 0
857 writes = []
858 ppoffs = []
859 wrflags = []
860 for i, fspec in enumerate(fspecs):
861 # get the regfile specs for this regfile port
862 (wf, _write, wid, fuspecs) = \
863 (fspec.okflag, fspec.regport, fspec.wid, fspec.specs)
864 print ("fpsec", i, "wrflag", wf, fspec, len(fuspecs))
865 ppoffs.append(pplen) # record offset for picker
866 pplen += len(fuspecs)
867
868 name = "%s_%s_%d" % (regfile, regname, i)
869 wrflag = Signal(name="wr_flag_"+name)
870 if wf is not None:
871 comb += wrflag.eq(wf)
872 else:
873 comb += wrflag.eq(0)
874 wrflags.append(wrflag)
875
876 # create a priority picker to manage this port
877 wrpickers[regfile][rpidx] = wrpick = PriorityPicker(pplen)
878 m.submodules["wrpick_%s_%s" % (regfile, rpidx)] = wrpick
879
880 wsigs = []
881 wens = []
882 wvsets = []
883 wvseten = []
884 wvclren = []
885 #wvens = [] - not needed: reading of writevec is permanently held hi
886 addrs = []
887 for i, fspec in enumerate(fspecs):
888 # connect up the FU req/go signals and the reg-read to the FU
889 # these are arbitrated by Data.ok signals
890 (wf, _write, wid, fuspecs) = \
891 (fspec.okflag, fspec.regport, fspec.wid, fspec.specs)
892 for pi, fuspec in enumerate(fspec.specs):
893 (funame, fu, idx) = (fuspec.funame, fuspec.fu, fuspec.idx)
894 fu_requested = fu_bitdict[funame]
895 pi += ppoffs[i]
896 name = "%s_%s_%s_%d" % (funame, regfile, regname, idx)
897 # get (or set up) a write-latched copy of write register number
898 write = Signal.like(_write, name="write_"+name)
899 rname = "%s_%s_%s_%d" % (funame, regfile, regname, idx)
900 if rname not in fu.wr_latches:
901 wrl = Signal.like(_write, name="wrlatch_"+rname)
902 fu.wr_latches[rname] = write
903 # do not depend on fu.issue_i here, it creates a
904 # combinatorial loop on waw checking. using the FU
905 # "enable" bitdict entry for this FU is sufficient,
906 # because the PowerDecoder2 read/write nums are
907 # valid continuously when the instruction is valid
908 with m.If(fu_requested):
909 sync += wrl.eq(_write)
910 comb += write.eq(_write)
911 with m.Else():
912 comb += write.eq(wrl)
913 else:
914 write = fu.wr_latches[rname]
915
916 # write-request comes from dest.ok
917 dest = fu.get_out(idx)
918 fu_dest_latch = fu.get_fu_out(idx) # latched output
919 name = "%s_%s_%d" % (funame, regname, idx)
920 fu_wrok = Signal(name="fu_wrok_"+name, reset_less=True)
921 comb += fu_wrok.eq(dest.ok & fu.busy_o)
922
923 # connect request-write to picker input, and output to go-wr
924 fu_active = fu_selected[funame]
925 pick = fu.wr.rel_o[idx] & fu_active
926 comb += wrpick.i[pi].eq(pick)
927 # create a single-pulse go write from the picker output
928 wr_pick = Signal(name="wpick_%s_%s_%d" % (funame, regname, idx))
929 comb += wr_pick.eq(wrpick.o[pi] & wrpick.en_o)
930 comb += fu.go_wr_i[idx].eq(rising_edge(m, wr_pick))
931
932 # connect the regspec write "reg select" number to this port
933 # only if one FU actually requests (and is granted) the port
934 # will the write-enable be activated
935 wname = "waddr_en_%s_%s_%d" % (funame, regname, idx)
936 addr_en = Signal.like(write, name=wname)
937 wp = Signal()
938 comb += wp.eq(wr_pick & wrpick.en_o)
939 comb += addr_en.eq(Mux(wp, write, 0))
940 if rfile.unary:
941 wens.append(addr_en)
942 else:
943 addrs.append(addr_en)
944 wens.append(wp)
945
946 # connect regfile port to input
947 print("reg connect widths",
948 regfile, regname, pi, funame,
949 dest.shape(), wport.i_data.shape())
950 wsigs.append(fu_dest_latch)
951
952 # now connect up the bitvector write hazard
953 if not self.make_hazard_vecs:
954 continue
955 res = self.make_hazards(m, regfile, rfile, wvclr, wvset,
956 funame, regname, idx,
957 addr_en, wp, fu, fu_active,
958 wrflags[i], write, fu_wrok)
959 wvaddr_en, wv_issue_en = res
960 wvclren.append(wvaddr_en) # set only: no data => clear bit
961 wvseten.append(wv_issue_en) # set data same as enable
962
963 # read the write-hazard bitvector (wv) for any bit that is
964 fu_requested = fu_bitdict[funame]
965 wvchk_en = Signal(len(wvchk), name="waw_chk_addr_en_"+name)
966 issue_active = Signal(name="waw_iactive_"+name)
967 whazard = Signal(name="whaz_"+name)
968 if wf is None:
969 # XXX EEK! STATE regfile (branch) does not have an
970 # write-active indicator in regspec_decode_write()
971 print ("XXX FIXME waw_iactive", issue_active,
972 fu_requested, wf)
973 else:
974 # check bits from the incoming instruction. note (back
975 # in connect_instruction) that the decoder is held for
976 # us to be able to do this, here... *without* issue being
977 # held HI. we MUST NOT gate this with fu.issue_i or
978 # with fu_bitdict "enable": it would create a loop
979 comb += issue_active.eq(wf)
980 with m.If(issue_active):
981 if rfile.unary:
982 comb += wvchk_en.eq(write)
983 else:
984 comb += wvchk_en.eq(1<<write)
985 # if FU is busy (which doesn't get set at the same time as
986 # issue) and no hazard was detected, clear wvchk_en (i.e.
987 # stop checking for hazards). there is a loop here, but it's
988 # via a DFF, so is ok. some linters may complain, but hey.
989 with m.If(fu.busy_o & ~whazard):
990 comb += wvchk_en.eq(0)
991
992 # write-hazard is ANDed with (filtered by) what is actually
993 # being requested. the wvchk data is on a one-clock delay,
994 # and wvchk_en comes directly from the main decoder
995 comb += whazard.eq((wvchk & wvchk_en).bool())
996 with m.If(whazard):
997 comb += fu._waw_hazard.eq(1)
998
999 #wvens.append(wvchk_en)
1000
1001 # here is where we create the Write Broadcast Bus. simple, eh?
1002 comb += wport.i_data.eq(ortreereduce_sig(wsigs))
1003 if rfile.unary:
1004 # for unary-addressed
1005 comb += wport.wen.eq(ortreereduce_sig(wens))
1006 else:
1007 # for binary-addressed
1008 comb += wport.addr.eq(ortreereduce_sig(addrs))
1009 comb += wport.wen.eq(ortreereduce_sig(wens))
1010
1011 if not self.make_hazard_vecs:
1012 return [], []
1013
1014 # return these here rather than set wvclr/wvset directly,
1015 # because there may be more than one write-port to a given
1016 # regfile. example: XER has a write-port for SO, CA, and OV
1017 # and the *last one added* of those would overwrite the other
1018 # two. solution: have connect_wrports collate all the
1019 # or-tree-reduced bitvector set/clear requests and drop them
1020 # in as a single "thing". this can only be done because the
1021 # set/get is an unary bitvector.
1022 print ("make write-vecs", regfile, regname, wvset, wvclr)
1023 return (wvclren, # clear (regfile write)
1024 wvseten) # set (issue time)
1025
1026 def connect_wrports(self, m, fu_bitdict, fu_selected):
1027 """connect write ports
1028
1029 orders the write regspecs into a dict-of-dicts, by regfile,
1030 by regport name, then connects all FUs that want that regport
1031 by way of a PriorityPicker.
1032
1033 note that the write-port wen, write-port data, and go_wr_i all need to
1034 be on the exact same clock cycle. as there is a combinatorial loop bug
1035 at the moment, these all use sync.
1036 """
1037 comb, sync = m.d.comb, m.d.sync
1038 fus = self.fus.fus
1039 regs = self.regs
1040 # dictionary of lists of regfile write ports
1041 byregfiles_wrspec = self.get_byregfiles(m, False)
1042
1043 # same for write ports.
1044 # BLECH! complex code-duplication! BLECH!
1045 wrpickers = {}
1046 wvclrers = defaultdict(list)
1047 wvseters = defaultdict(list)
1048 for regfile, fuspecs in byregfiles_wrspec.items():
1049 wrpickers[regfile] = {}
1050
1051 if self.regreduce_en:
1052 # argh, more port-merging
1053 if regfile == 'INT':
1054 fuspecs['o'] = [fuspecs.pop('o')]
1055 fuspecs['o'].append(fuspecs.pop('o1'))
1056 if regfile == 'FAST':
1057 fuspecs['fast1'] = [fuspecs.pop('fast1')]
1058 if 'fast2' in fuspecs:
1059 fuspecs['fast1'].append(fuspecs.pop('fast2'))
1060 if 'fast3' in fuspecs:
1061 fuspecs['fast1'].append(fuspecs.pop('fast3'))
1062
1063 # collate these and record them by regfile because there
1064 # are sometimes more write-ports per regfile
1065 for (regname, fspec) in sort_fuspecs(fuspecs):
1066 wvclren, wvseten = self.connect_wrport(m,
1067 fu_bitdict, fu_selected,
1068 wrpickers,
1069 regfile, regname, fspec)
1070 wvclrers[regfile.lower()] += wvclren
1071 wvseters[regfile.lower()] += wvseten
1072
1073 if not self.make_hazard_vecs:
1074 return
1075
1076 # for write-vectors: reduce the clr-ers and set-ers down to
1077 # a single set of bits. otherwise if there are two write
1078 # ports (on some regfiles), the last one doing comb += on
1079 # the reg.wv[regfile] instance "wins" (and all others are ignored,
1080 # whoops). if there was only one write-port per wv regfile this would
1081 # not be an issue.
1082 for regfile in wvclrers.keys():
1083 wv = regs.wv[regfile]
1084 wvset = wv.s # write-vec bit-level hazard ctrl
1085 wvclr = wv.r # write-vec bit-level hazard ctrl
1086 wvclren = wvclrers[regfile]
1087 wvseten = wvseters[regfile]
1088 comb += wvclr.eq(ortreereduce_sig(wvclren)) # clear (regfile write)
1089 comb += wvset.eq(ortreereduce_sig(wvseten)) # set (issue time)
1090
1091 def get_byregfiles(self, m, readmode):
1092
1093 mode = "read" if readmode else "write"
1094 regs = self.regs
1095 fus = self.fus.fus
1096 e = self.ireg.e # decoded instruction to execute
1097
1098 # dictionary of dictionaries of lists/tuples of regfile ports.
1099 # first key: regfile. second key: regfile port name
1100 byregfiles_spec = defaultdict(dict)
1101
1102 for (funame, fu) in fus.items():
1103 # create in each FU a receptacle for the read/write register
1104 # hazard numbers (and okflags for read). to be latched in
1105 # connect_rd/write_ports
1106 if readmode:
1107 fu.rd_latches = {} # read reg number latches
1108 fu.rf_latches = {} # read flag latches
1109 else:
1110 fu.wr_latches = {}
1111
1112 # construct regfile specs: read uses inspec, write outspec
1113 print("%s ports for %s" % (mode, funame))
1114 for idx in range(fu.n_src if readmode else fu.n_dst):
1115 (regfile, regname, wid) = fu.get_io_spec(readmode, idx)
1116 print(" %d %s %s %s" % (idx, regfile, regname, str(wid)))
1117
1118 # the PowerDecoder2 (main one, not the satellites) contains
1119 # the decoded regfile numbers. obtain these now
1120 decinfo = regspec_decode(m, readmode, e, regfile, regname)
1121 okflag, regport = decinfo.okflag, decinfo.regport
1122
1123 # construct the dictionary of regspec information by regfile
1124 if regname not in byregfiles_spec[regfile]:
1125 byregfiles_spec[regfile][regname] = \
1126 ByRegSpec(okflag, regport, wid, [])
1127
1128 # here we start to create "lanes" where each Function Unit
1129 # requiring access to a given [single-contended resource]
1130 # regfile port is appended to a list, so that PriorityPickers
1131 # can be created to give uncontested access to it
1132 fuspec = FUSpec(funame, fu, idx)
1133 byregfiles_spec[regfile][regname].specs.append(fuspec)
1134
1135 # ok just print that all out, for convenience
1136 for regfile, fuspecs in byregfiles_spec.items():
1137 print("regfile %s ports:" % mode, regfile)
1138 for regname, fspec in fuspecs.items():
1139 [okflag, regport, wid, fuspecs] = fspec
1140 print(" rf %s port %s lane: %s" % (mode, regfile, regname))
1141 print(" %s" % regname, wid, okflag, regport)
1142 for (funame, fu, idx) in fuspecs:
1143 fusig = fu.src_i[idx] if readmode else fu.dest[idx]
1144 print(" ", funame, fu.__class__.__name__, idx, fusig)
1145 print()
1146
1147 return byregfiles_spec
1148
1149 def __iter__(self):
1150 yield from self.fus.ports()
1151 yield from self.i.e.ports()
1152 yield from self.l0.ports()
1153 # TODO: regs
1154
1155 def ports(self):
1156 return list(self)
1157
1158
1159 if __name__ == '__main__':
1160 pspec = TestMemPspec(ldst_ifacetype='testpi',
1161 imem_ifacetype='',
1162 addr_wid=64,
1163 allow_overlap=True,
1164 mask_wid=8,
1165 reg_wid=64)
1166 dut = NonProductionCore(pspec)
1167 vl = rtlil.convert(dut, ports=dut.ports())
1168 with open("test_core.il", "w") as f:
1169 f.write(vl)