make icache accessible to core, working back to TestIssuer
[soc.git] / src / soc / simple / core.py
1 """simple core
2
3 not in any way intended for production use. connects up FunctionUnits to
4 Register Files in a brain-dead fashion that only permits one and only one
5 Function Unit to be operational.
6
7 the principle here is to take the Function Units, analyse their regspecs,
8 and turn their requirements for access to register file read/write ports
9 into groupings by Register File and Register File Port name.
10
11 under each grouping - by regfile/port - a list of Function Units that
12 need to connect to that port is created. as these are a contended
13 resource a "Broadcast Bus" per read/write port is then also created,
14 with access to it managed by a PriorityPicker.
15
16 the brain-dead part of this module is that even though there is no
17 conflict of access, regfile read/write hazards are *not* analysed,
18 and consequently it is safer to wait for the Function Unit to complete
19 before allowing a new instruction to proceed.
20 (update: actually this is being added now:
21 https://bugs.libre-soc.org/show_bug.cgi?id=737)
22 """
23
24 from nmigen import (Elaboratable, Module, Signal, ResetSignal, Cat, Mux,
25 Const)
26 from nmigen.cli import rtlil
27
28 from openpower.decoder.power_decoder2 import PowerDecodeSubset
29 from openpower.decoder.power_regspec_map import regspec_decode
30 from openpower.sv.svp64 import SVP64Rec
31
32 from nmutil.picker import PriorityPicker
33 from nmutil.util import treereduce
34 from nmutil.singlepipe import ControlBase
35
36 from soc.fu.compunits.compunits import AllFunctionUnits, LDSTFunctionUnit
37 from soc.regfile.regfiles import RegFiles
38 from openpower.decoder.power_decoder2 import get_rdflags
39 from soc.experiment.l0_cache import TstL0CacheBuffer # test only
40 from soc.config.test.test_loadstore import TestMemPspec
41 from openpower.decoder.power_enums import MicrOp, Function
42 from soc.simple.core_data import CoreInput, CoreOutput
43
44 from collections import defaultdict, namedtuple
45 import operator
46
47 from nmutil.util import rising_edge
48
49 FUSpec = namedtuple("FUSpec", ["funame", "fu", "idx"])
50 ByRegSpec = namedtuple("ByRegSpec", ["okflag", "regport", "wid", "specs"])
51
52 # helper function for reducing a list of signals down to a parallel
53 # ORed single signal.
54 def ortreereduce(tree, attr="o_data"):
55 return treereduce(tree, operator.or_, lambda x: getattr(x, attr))
56
57
58 def ortreereduce_sig(tree):
59 return treereduce(tree, operator.or_, lambda x: x)
60
61
62 # helper function to place full regs declarations first
63 def sort_fuspecs(fuspecs):
64 res = []
65 for (regname, fspec) in fuspecs.items():
66 if regname.startswith("full"):
67 res.append((regname, fspec))
68 for (regname, fspec) in fuspecs.items():
69 if not regname.startswith("full"):
70 res.append((regname, fspec))
71 return res # enumerate(res)
72
73
74 # a hazard bitvector "remap" function which returns an AST expression
75 # that remaps read/write hazard regfile port numbers to either a full
76 # bitvector or a reduced subset one. SPR for example is reduced to a
77 # single bit.
78 # CRITICALLY-IMPORTANT NOTE: these bitvectors *have* to match up per
79 # regfile! therefore the remapping is per regfile, *NOT* per regfile
80 # port and certainly not based on whether it is a read port or write port.
81 # note that any reductions here will result in degraded performance due
82 # to conflicts, but at least it keeps the hazard matrix sizes down to "sane"
83 def bitvector_remap(regfile, rfile, port):
84 # 8-bits (at the moment, no SVP64), CR is unary: no remap
85 if regfile == 'CR':
86 return port
87 # 3 bits, unary alrady: return the port
88 if regfile == 'XER':
89 return port
90 # 3 bits, unary: return the port
91 if regfile == 'XER':
92 return port
93 # 3 bits, unary: return the port
94 if regfile == 'SVSTATE':
95 return port
96 # 9 bits (9 entries), might be unary already
97 if regfile == 'FAST':
98 if rfile.unary: # FAST might be unary already
99 return port
100 else:
101 return 1 << port
102 # 10 bits (!!) - reduce to one
103 if regfile == 'SPR':
104 if rfile.unary: # FAST might be unary already
105 return port
106 else:
107 return 1 << port
108 if regfile == 'INT':
109 if rfile.unary: # INT, check if unary/binary
110 return port
111 else:
112 return 1 << port
113
114
115 # derive from ControlBase rather than have a separate Stage instance,
116 # this is simpler to do
117 class NonProductionCore(ControlBase):
118 def __init__(self, pspec):
119 self.pspec = pspec
120
121 # test is SVP64 is to be enabled
122 self.svp64_en = hasattr(pspec, "svp64") and (pspec.svp64 == True)
123
124 # test to see if regfile ports should be reduced
125 self.regreduce_en = (hasattr(pspec, "regreduce") and
126 (pspec.regreduce == True))
127
128 # test to see if overlapping of instructions is allowed
129 # (not normally enabled for TestIssuer FSM but useful for checking
130 # the bitvector hazard detection, before doing In-Order)
131 self.allow_overlap = (hasattr(pspec, "allow_overlap") and
132 (pspec.allow_overlap == True))
133
134 # test core type
135 self.make_hazard_vecs = self.allow_overlap
136 self.core_type = "fsm"
137 if hasattr(pspec, "core_type"):
138 self.core_type = pspec.core_type
139
140 super().__init__(stage=self)
141
142 # single LD/ST funnel for memory access
143 self.l0 = l0 = TstL0CacheBuffer(pspec, n_units=1)
144 pi = l0.l0.dports[0]
145
146 # function units (only one each)
147 # only include mmu if enabled in pspec
148 self.fus = AllFunctionUnits(pspec, pilist=[pi])
149
150 # link LoadStore1 into MMU and make L1 I-Cache easy to get at
151 mmu = self.fus.get_fu('mmu0')
152 ldst0 = self.fus.get_fu('ldst0')
153 print ("core pspec", pspec.ldst_ifacetype)
154 print ("core mmu", mmu)
155 if mmu is not None:
156 lsi = l0.cmpi.lsmem.lsi # a LoadStore1 Interface object
157 print ("core lsmem.lsi", lsi)
158 mmu.alu.set_ldst_interface(lsi)
159 self.icache = lsi.icache
160
161 # register files (yes plural)
162 self.regs = RegFiles(pspec, make_hazard_vecs=self.make_hazard_vecs)
163
164 # set up input and output: unusual requirement to set data directly
165 # (due to the way that the core is set up in a different domain,
166 # see TestIssuer.setup_peripherals
167 self.p.i_data, self.n.o_data = self.new_specs(None)
168 self.i, self.o = self.p.i_data, self.n.o_data
169
170 # actual internal input data used (captured)
171 self.ireg = self.ispec()
172
173 # create per-FU instruction decoders (subsetted). these "satellite"
174 # decoders reduce wire fan-out from the one (main) PowerDecoder2
175 # (used directly by the trap unit) to the *twelve* (or more)
176 # Function Units. we can either have 32 wires (the instruction)
177 # to each, or we can have well over a 200 wire fan-out (to 12
178 # ALUs). it's an easy choice to make.
179 self.decoders = {}
180 self.des = {}
181
182 # eep, these should be *per FU* i.e. for FunctionUnitBaseMulti
183 # they should be shared (put into the ALU *once*).
184
185 for funame, fu in self.fus.fus.items():
186 f_name = fu.fnunit.name
187 fnunit = fu.fnunit.value
188 opkls = fu.opsubsetkls
189 if f_name == 'TRAP':
190 # TRAP decoder is the *main* decoder
191 self.trapunit = funame
192 continue
193 assert funame not in self.decoders
194 self.decoders[funame] = PowerDecodeSubset(None, opkls, f_name,
195 final=True,
196 state=self.ireg.state,
197 svp64_en=self.svp64_en,
198 regreduce_en=self.regreduce_en)
199 self.des[funame] = self.decoders[funame].do
200
201 # create per-Function Unit write-after-write hazard signals
202 # yes, really, this should have been added in ReservationStations
203 # but hey.
204 for funame, fu in self.fus.fus.items():
205 fu._waw_hazard = Signal(name="waw_%s" % funame)
206
207 # share the SPR decoder with the MMU if it exists
208 if "mmu0" in self.decoders:
209 self.decoders["mmu0"].mmu0_spr_dec = self.decoders["spr0"]
210
211 # next 3 functions are Stage API Compliance
212 def setup(self, m, i):
213 pass
214
215 def ispec(self):
216 return CoreInput(self.pspec, self.svp64_en, self.regreduce_en)
217
218 def ospec(self):
219 return CoreOutput()
220
221 # elaborate function to create HDL
222 def elaborate(self, platform):
223 m = super().elaborate(platform)
224
225 # for testing purposes, to cut down on build time in coriolis2
226 if hasattr(self.pspec, "nocore") and self.pspec.nocore == True:
227 x = Signal() # dummy signal
228 m.d.sync += x.eq(~x)
229 return m
230 comb = m.d.comb
231
232 m.submodules.fus = self.fus
233 m.submodules.l0 = l0 = self.l0
234 self.regs.elaborate_into(m, platform)
235 regs = self.regs
236 fus = self.fus.fus
237
238 # amalgamate write-hazards into a single top-level Signal
239 self.waw_hazard = Signal()
240 whaz = []
241 for funame, fu in self.fus.fus.items():
242 whaz.append(fu._waw_hazard)
243 comb += self.waw_hazard.eq(Cat(*whaz).bool())
244
245 # connect decoders
246 self.connect_satellite_decoders(m)
247
248 # ssh, cheat: trap uses the main decoder because of the rewriting
249 self.des[self.trapunit] = self.ireg.e.do
250
251 # connect up Function Units, then read/write ports, and hazard conflict
252 self.issue_conflict = Signal()
253 fu_bitdict, fu_selected = self.connect_instruction(m)
254 raw_hazard = self.connect_rdports(m, fu_bitdict, fu_selected)
255 self.connect_wrports(m, fu_bitdict, fu_selected)
256 if self.allow_overlap:
257 comb += self.issue_conflict.eq(raw_hazard)
258
259 # note if an exception happened. in a pipelined or OoO design
260 # this needs to be accompanied by "shadowing" (or stalling)
261 el = []
262 for exc in self.fus.excs.values():
263 el.append(exc.happened)
264 if len(el) > 0: # at least one exception
265 comb += self.o.exc_happened.eq(Cat(*el).bool())
266
267 return m
268
269 def connect_satellite_decoders(self, m):
270 comb = m.d.comb
271 for k, v in self.decoders.items():
272 # connect each satellite decoder and give it the instruction.
273 # as subset decoders this massively reduces wire fanout given
274 # the large number of ALUs
275 m.submodules["dec_%s" % k] = v
276 comb += v.dec.raw_opcode_in.eq(self.ireg.raw_insn_i)
277 comb += v.dec.bigendian.eq(self.ireg.bigendian_i)
278 # sigh due to SVP64 RA_OR_ZERO detection connect these too
279 comb += v.sv_a_nz.eq(self.ireg.sv_a_nz)
280 if not self.svp64_en:
281 continue
282 comb += v.pred_sm.eq(self.ireg.sv_pred_sm)
283 comb += v.pred_dm.eq(self.ireg.sv_pred_dm)
284 if k == self.trapunit:
285 continue
286 comb += v.sv_rm.eq(self.ireg.sv_rm) # pass through SVP64 RM
287 comb += v.is_svp64_mode.eq(self.ireg.is_svp64_mode)
288 # only the LDST PowerDecodeSubset *actually* needs to
289 # know to use the alternative decoder. this is all
290 # a terrible hack
291 if not k.lower().startswith("ldst"):
292 continue
293 comb += v.use_svp64_ldst_dec.eq( self.ireg.use_svp64_ldst_dec)
294
295 def connect_instruction(self, m):
296 """connect_instruction
297
298 uses decoded (from PowerOp) function unit information from CSV files
299 to ascertain which Function Unit should deal with the current
300 instruction.
301
302 some (such as OP_ATTN, OP_NOP) are dealt with here, including
303 ignoring it and halting the processor. OP_NOP is a bit annoying
304 because the issuer expects busy flag still to be raised then lowered.
305 (this requires a fake counter to be set).
306 """
307 comb, sync = m.d.comb, m.d.sync
308 fus = self.fus.fus
309
310 # indicate if core is busy
311 busy_o = self.o.busy_o
312 any_busy_o = self.o.any_busy_o
313
314 # connect up temporary copy of incoming instruction. the FSM will
315 # either blat the incoming instruction (if valid) into self.ireg
316 # or if the instruction could not be delivered, keep dropping the
317 # latched copy into ireg
318 ilatch = self.ispec()
319 self.instr_active = Signal()
320
321 # enable/busy-signals for each FU, get one bit for each FU (by name)
322 fu_enable = Signal(len(fus), reset_less=True)
323 fu_busy = Signal(len(fus), reset_less=True)
324 fu_bitdict = {}
325 fu_selected = {}
326 for i, funame in enumerate(fus.keys()):
327 fu_bitdict[funame] = fu_enable[i]
328 fu_selected[funame] = fu_busy[i]
329
330 # identify function units and create a list by fnunit so that
331 # PriorityPickers can be created for selecting one of them that
332 # isn't busy at the time the incoming instruction needs passing on
333 by_fnunit = defaultdict(list)
334 for fname, member in Function.__members__.items():
335 for funame, fu in fus.items():
336 fnunit = fu.fnunit.value
337 if member.value & fnunit: # this FU handles this type of op
338 by_fnunit[fname].append((funame, fu)) # add by Function
339
340 # ok now just print out the list of FUs by Function, because we can
341 for fname, fu_list in by_fnunit.items():
342 print ("FUs by type", fname, fu_list)
343
344 # now create a PriorityPicker per FU-type such that only one
345 # non-busy FU will be picked
346 issue_pps = {}
347 fu_found = Signal() # take a note if no Function Unit was available
348 for fname, fu_list in by_fnunit.items():
349 i_pp = PriorityPicker(len(fu_list))
350 m.submodules['i_pp_%s' % fname] = i_pp
351 i_l = []
352 for i, (funame, fu) in enumerate(fu_list):
353 # match the decoded instruction (e.do.fn_unit) against the
354 # "capability" of this FU, gate that by whether that FU is
355 # busy, and drop that into the PriorityPicker.
356 # this will give us an output of the first available *non-busy*
357 # Function Unit (Reservation Statio) capable of handling this
358 # instruction.
359 fnunit = fu.fnunit.value
360 en_req = Signal(name="issue_en_%s" % funame, reset_less=True)
361 fnmatch = (self.ireg.e.do.fn_unit & fnunit).bool()
362 comb += en_req.eq(fnmatch & ~fu.busy_o &
363 self.instr_active)
364 i_l.append(en_req) # store in list for doing the Cat-trick
365 # picker output, gated by enable: store in fu_bitdict
366 po = Signal(name="o_issue_pick_"+funame) # picker output
367 comb += po.eq(i_pp.o[i] & i_pp.en_o)
368 comb += fu_bitdict[funame].eq(po)
369 comb += fu_selected[funame].eq(fu.busy_o | po)
370 # if we don't do this, then when there are no FUs available,
371 # the "p.o_ready" signal will go back "ok we accepted this
372 # instruction" which of course isn't true.
373 with m.If(i_pp.en_o):
374 comb += fu_found.eq(1)
375 # for each input, Cat them together and drop them into the picker
376 comb += i_pp.i.eq(Cat(*i_l))
377
378 # rdmask, which is for registers needs to come from the *main* decoder
379 for funame, fu in fus.items():
380 rdmask = get_rdflags(m, self.ireg.e, fu)
381 comb += fu.rdmaskn.eq(~rdmask)
382
383 # sigh - need a NOP counter
384 counter = Signal(2)
385 with m.If(counter != 0):
386 sync += counter.eq(counter - 1)
387 comb += busy_o.eq(1)
388
389 # default to reading from incoming instruction: may be overridden
390 # by copy from latch when "waiting"
391 comb += self.ireg.eq(self.i)
392 # always say "ready" except if overridden
393 comb += self.p.o_ready.eq(1)
394
395 with m.FSM():
396 with m.State("READY"):
397 with m.If(self.p.i_valid): # run only when valid
398 with m.Switch(self.ireg.e.do.insn_type):
399 # check for ATTN: halt if true
400 with m.Case(MicrOp.OP_ATTN):
401 m.d.sync += self.o.core_terminate_o.eq(1)
402
403 # fake NOP - this isn't really used (Issuer detects NOP)
404 with m.Case(MicrOp.OP_NOP):
405 sync += counter.eq(2)
406 comb += busy_o.eq(1)
407
408 with m.Default():
409 comb += self.instr_active.eq(1)
410 comb += self.p.o_ready.eq(0)
411 # connect instructions. only one enabled at a time
412 for funame, fu in fus.items():
413 do = self.des[funame]
414 enable = fu_bitdict[funame]
415
416 # run this FunctionUnit if enabled route op,
417 # issue, busy, read flags and mask to FU
418 with m.If(enable):
419 # operand comes from the *local* decoder
420 # do not actually issue, though, if there
421 # is a waw hazard. decoder has to still
422 # be asserted in order to detect that, tho
423 comb += fu.oper_i.eq_from(do)
424 # issue when valid (and no write-hazard)
425 comb += fu.issue_i.eq(~self.waw_hazard)
426 # instruction ok, indicate ready
427 comb += self.p.o_ready.eq(1)
428
429 if self.allow_overlap:
430 with m.If(~fu_found | self.waw_hazard):
431 # latch copy of instruction
432 sync += ilatch.eq(self.i)
433 comb += self.p.o_ready.eq(1) # accept
434 comb += busy_o.eq(1)
435 m.next = "WAITING"
436
437 with m.State("WAITING"):
438 comb += self.instr_active.eq(1)
439 comb += self.p.o_ready.eq(0)
440 comb += busy_o.eq(1)
441 # using copy of instruction, keep waiting until an FU is free
442 comb += self.ireg.eq(ilatch)
443 with m.If(fu_found): # wait for conflict to clear
444 # connect instructions. only one enabled at a time
445 for funame, fu in fus.items():
446 do = self.des[funame]
447 enable = fu_bitdict[funame]
448
449 # run this FunctionUnit if enabled route op,
450 # issue, busy, read flags and mask to FU
451 with m.If(enable):
452 # operand comes from the *local* decoder,
453 # which is asserted even if not issued,
454 # so that WaW-detection can check for hazards.
455 # only if the waw hazard is clear does the
456 # instruction actually get issued
457 comb += fu.oper_i.eq_from(do)
458 # issue when valid
459 comb += fu.issue_i.eq(~self.waw_hazard)
460 with m.If(~self.waw_hazard):
461 comb += self.p.o_ready.eq(1)
462 comb += busy_o.eq(0)
463 m.next = "READY"
464
465 print ("core: overlap allowed", self.allow_overlap)
466 # true when any FU is busy (including the cycle where it is perhaps
467 # to be issued - because that's what fu_busy is)
468 comb += any_busy_o.eq(fu_busy.bool())
469 if not self.allow_overlap:
470 # for simple non-overlap, if any instruction is busy, set
471 # busy output for core.
472 comb += busy_o.eq(any_busy_o)
473 else:
474 # sigh deal with a fun situation that needs to be investigated
475 # and resolved
476 with m.If(self.issue_conflict):
477 comb += busy_o.eq(1)
478 # make sure that LDST, SPR, MMU, Branch and Trap all say "busy"
479 # and do not allow overlap. these are all the ones that
480 # are non-forward-progressing: exceptions etc. that otherwise
481 # change CoreState for some reason (MSR, PC, SVSTATE)
482 for funame, fu in fus.items():
483 if (funame.lower().startswith('ldst') or
484 funame.lower().startswith('branch') or
485 funame.lower().startswith('mmu') or
486 funame.lower().startswith('spr') or
487 funame.lower().startswith('trap')):
488 with m.If(fu.busy_o):
489 comb += busy_o.eq(1)
490
491 # return both the function unit "enable" dict as well as the "busy".
492 # the "busy-or-issued" can be passed in to the Read/Write port
493 # connecters to give them permission to request access to regfiles
494 return fu_bitdict, fu_selected
495
496 def connect_rdport(self, m, fu_bitdict, fu_selected,
497 rdpickers, regfile, regname, fspec):
498 comb, sync = m.d.comb, m.d.sync
499 fus = self.fus.fus
500 regs = self.regs
501
502 rpidx = regname
503
504 # select the required read port. these are pre-defined sizes
505 rfile = regs.rf[regfile.lower()]
506 rport = rfile.r_ports[rpidx]
507 print("read regfile", rpidx, regfile, regs.rf.keys(),
508 rfile, rfile.unary)
509
510 # for checking if the read port has an outstanding write
511 if self.make_hazard_vecs:
512 wv = regs.wv[regfile.lower()]
513 wvchk = wv.q_int # write-vec bit-level hazard check
514
515 # if a hazard is detected on this read port, simply blithely block
516 # every FU from reading on it. this is complete overkill but very
517 # simple for now.
518 hazard_detected = Signal(name="raw_%s_%s" % (regfile, rpidx))
519
520 fspecs = fspec
521 if not isinstance(fspecs, list):
522 fspecs = [fspecs]
523
524 rdflags = []
525 pplen = 0
526 ppoffs = []
527 for i, fspec in enumerate(fspecs):
528 # get the regfile specs for this regfile port
529 print ("fpsec", i, fspec, len(fspec.specs))
530 name = "%s_%s_%d" % (regfile, regname, i)
531 ppoffs.append(pplen) # record offset for picker
532 pplen += len(fspec.specs)
533 rdflag = Signal(name="rdflag_"+name, reset_less=True)
534 comb += rdflag.eq(fspec.okflag)
535 rdflags.append(rdflag)
536
537 print ("pplen", pplen)
538
539 # create a priority picker to manage this port
540 rdpickers[regfile][rpidx] = rdpick = PriorityPicker(pplen)
541 m.submodules["rdpick_%s_%s" % (regfile, rpidx)] = rdpick
542
543 rens = []
544 addrs = []
545 wvens = []
546
547 for i, fspec in enumerate(fspecs):
548 (rf, _read, wid, fuspecs) = \
549 (fspec.okflag, fspec.regport, fspec.wid, fspec.specs)
550 # connect up the FU req/go signals, and the reg-read to the FU
551 # and create a Read Broadcast Bus
552 for pi, fuspec in enumerate(fspec.specs):
553 (funame, fu, idx) = (fuspec.funame, fuspec.fu, fuspec.idx)
554 pi += ppoffs[i]
555 name = "%s_%s_%s_%i" % (regfile, rpidx, funame, pi)
556 fu_active = fu_selected[funame]
557 fu_issued = fu_bitdict[funame]
558
559 # get (or set up) a latched copy of read register number
560 # and (sigh) also the read-ok flag
561 # TODO: use nmutil latchregister
562 rhname = "%s_%s_%d" % (regfile, regname, i)
563 rdflag = Signal(name="rdflag_%s_%s" % (funame, rhname),
564 reset_less=True)
565 if rhname not in fu.rf_latches:
566 rfl = Signal(name="rdflag_latch_%s_%s" % (funame, rhname))
567 fu.rf_latches[rhname] = rfl
568 with m.If(fu.issue_i):
569 sync += rfl.eq(rdflags[i])
570 else:
571 rfl = fu.rf_latches[rhname]
572
573 # now the register port
574 rname = "%s_%s_%s_%d" % (funame, regfile, regname, pi)
575 read = Signal.like(_read, name="read_"+rname)
576 if rname not in fu.rd_latches:
577 rdl = Signal.like(_read, name="rdlatch_"+rname)
578 fu.rd_latches[rname] = rdl
579 with m.If(fu.issue_i):
580 sync += rdl.eq(_read)
581 else:
582 rdl = fu.rd_latches[rname]
583
584 # make the read immediately available on issue cycle
585 # after the read cycle, otherwies use the latched copy.
586 # this captures the regport and okflag on issue
587 with m.If(fu.issue_i):
588 comb += read.eq(_read)
589 comb += rdflag.eq(rdflags[i])
590 with m.Else():
591 comb += read.eq(rdl)
592 comb += rdflag.eq(rfl)
593
594 # connect request-read to picker input, and output to go-rd
595 addr_en = Signal.like(read, name="addr_en_"+name)
596 pick = Signal(name="pick_"+name) # picker input
597 rp = Signal(name="rp_"+name) # picker output
598 delay_pick = Signal(name="dp_"+name) # read-enable "underway"
599 rhazard = Signal(name="rhaz_"+name)
600
601 # exclude any currently-enabled read-request (mask out active)
602 # entirely block anything hazarded from being picked
603 comb += pick.eq(fu.rd_rel_o[idx] & fu_active & rdflag &
604 ~delay_pick & ~rhazard)
605 comb += rdpick.i[pi].eq(pick)
606 comb += fu.go_rd_i[idx].eq(delay_pick) # pass in *delayed* pick
607
608 # if picked, select read-port "reg select" number to port
609 comb += rp.eq(rdpick.o[pi] & rdpick.en_o)
610 sync += delay_pick.eq(rp) # delayed "pick"
611 comb += addr_en.eq(Mux(rp, read, 0))
612
613 # the read-enable happens combinatorially (see mux-bus below)
614 # but it results in the data coming out on a one-cycle delay.
615 if rfile.unary:
616 rens.append(addr_en)
617 else:
618 addrs.append(addr_en)
619 rens.append(rp)
620
621 # use the *delayed* pick signal to put requested data onto bus
622 with m.If(delay_pick):
623 # connect regfile port to input, creating fan-out Bus
624 src = fu.src_i[idx]
625 print("reg connect widths",
626 regfile, regname, pi, funame,
627 src.shape(), rport.o_data.shape())
628 # all FUs connect to same port
629 comb += src.eq(rport.o_data)
630
631 if not self.make_hazard_vecs:
632 continue
633
634 # read the write-hazard bitvector (wv) for any bit that is
635 wvchk_en = Signal(len(wvchk), name="wv_chk_addr_en_"+name)
636 issue_active = Signal(name="rd_iactive_"+name)
637 # XXX combinatorial loop here
638 comb += issue_active.eq(fu_active & rdflag)
639 with m.If(issue_active):
640 if rfile.unary:
641 comb += wvchk_en.eq(read)
642 else:
643 comb += wvchk_en.eq(1<<read)
644 # if FU is busy (which doesn't get set at the same time as
645 # issue) and no hazard was detected, clear wvchk_en (i.e.
646 # stop checking for hazards). there is a loop here, but it's
647 # via a DFF, so is ok. some linters may complain, but hey.
648 with m.If(fu.busy_o & ~rhazard):
649 comb += wvchk_en.eq(0)
650
651 # read-hazard is ANDed with (filtered by) what is actually
652 # being requested.
653 comb += rhazard.eq((wvchk & wvchk_en).bool())
654
655 wvens.append(wvchk_en)
656
657 # or-reduce the muxed read signals
658 if rfile.unary:
659 # for unary-addressed
660 comb += rport.ren.eq(ortreereduce_sig(rens))
661 else:
662 # for binary-addressed
663 comb += rport.addr.eq(ortreereduce_sig(addrs))
664 comb += rport.ren.eq(Cat(*rens).bool())
665 print ("binary", regfile, rpidx, rport, rport.ren, rens, addrs)
666
667 if not self.make_hazard_vecs:
668 return Const(0) # declare "no hazards"
669
670 # enable the read bitvectors for this issued instruction
671 # and return whether any write-hazard bit is set
672 wvchk_and = Signal(len(wvchk), name="wv_chk_"+name)
673 comb += wvchk_and.eq(wvchk & ortreereduce_sig(wvens))
674 comb += hazard_detected.eq(wvchk_and.bool())
675 return hazard_detected
676
677 def connect_rdports(self, m, fu_bitdict, fu_selected):
678 """connect read ports
679
680 orders the read regspecs into a dict-of-dicts, by regfile, by
681 regport name, then connects all FUs that want that regport by
682 way of a PriorityPicker.
683 """
684 comb, sync = m.d.comb, m.d.sync
685 fus = self.fus.fus
686 regs = self.regs
687 rd_hazard = []
688
689 # dictionary of lists of regfile read ports
690 byregfiles_rdspec = self.get_byregfiles(m, True)
691
692 # okaay, now we need a PriorityPicker per regfile per regfile port
693 # loootta pickers... peter piper picked a pack of pickled peppers...
694 rdpickers = {}
695 for regfile, fuspecs in byregfiles_rdspec.items():
696 rdpickers[regfile] = {}
697
698 # argh. an experiment to merge RA and RB in the INT regfile
699 # (we have too many read/write ports)
700 if self.regreduce_en:
701 if regfile == 'INT':
702 fuspecs['rabc'] = [fuspecs.pop('rb')]
703 fuspecs['rabc'].append(fuspecs.pop('rc'))
704 fuspecs['rabc'].append(fuspecs.pop('ra'))
705 if regfile == 'FAST':
706 fuspecs['fast1'] = [fuspecs.pop('fast1')]
707 if 'fast2' in fuspecs:
708 fuspecs['fast1'].append(fuspecs.pop('fast2'))
709 if 'fast3' in fuspecs:
710 fuspecs['fast1'].append(fuspecs.pop('fast3'))
711
712 # for each named regfile port, connect up all FUs to that port
713 # also return (and collate) hazard detection)
714 for (regname, fspec) in sort_fuspecs(fuspecs):
715 print("connect rd", regname, fspec)
716 rh = self.connect_rdport(m, fu_bitdict, fu_selected,
717 rdpickers, regfile,
718 regname, fspec)
719 rd_hazard.append(rh)
720
721 return Cat(*rd_hazard).bool()
722
723 def make_hazards(self, m, regfile, rfile, wvclr, wvset,
724 funame, regname, idx,
725 addr_en, wp, fu, fu_active, wrflag, write,
726 fu_wrok):
727 """make_hazards: a setter and a clearer for the regfile write ports
728
729 setter is at issue time (using PowerDecoder2 regfile write numbers)
730 clearer is at regfile write time (when FU has said what to write to)
731
732 there is *one* unusual case here which has to be dealt with:
733 when the Function Unit does *NOT* request a write to the regfile
734 (has its data.ok bit CLEARED). this is perfectly legitimate.
735 and a royal pain.
736 """
737 comb, sync = m.d.comb, m.d.sync
738 name = "%s_%s_%d" % (funame, regname, idx)
739
740 # connect up the bitvector write hazard. unlike the
741 # regfile writeports, a ONE must be written to the corresponding
742 # bit of the hazard bitvector (to indicate the existence of
743 # the hazard)
744
745 # the detection of what shall be written to is based
746 # on *issue*. it is delayed by 1 cycle so that instructions
747 # "addi 5,5,0x2" do not cause combinatorial loops due to
748 # fake-dependency on *themselves*. this will totally fail
749 # spectacularly when doing multi-issue
750 print ("write vector (for regread)", regfile, wvset)
751 wviaddr_en = Signal(len(wvset), name="wv_issue_addr_en_"+name)
752 issue_active = Signal(name="iactive_"+name)
753 sync += issue_active.eq(fu.issue_i & fu_active & wrflag)
754 with m.If(issue_active):
755 if rfile.unary:
756 comb += wviaddr_en.eq(write)
757 else:
758 comb += wviaddr_en.eq(1<<write)
759
760 # deal with write vector clear: this kicks in when the regfile
761 # is written to, and clears the corresponding bitvector entry
762 print ("write vector", regfile, wvclr)
763 wvaddr_en = Signal(len(wvclr), name="wvaddr_en_"+name)
764 if rfile.unary:
765 comb += wvaddr_en.eq(addr_en)
766 else:
767 with m.If(wp):
768 comb += wvaddr_en.eq(1<<addr_en)
769
770 # XXX ASSUME that LDSTFunctionUnit always sets the data it intends to
771 # this may NOT be the case when an exception occurs
772 if isinstance(fu, LDSTFunctionUnit):
773 return wvaddr_en, wviaddr_en
774
775 # okaaay, this is preparation for the awkward case.
776 # * latch a copy of wrflag when issue goes high.
777 # * when the fu_wrok (data.ok) flag is NOT set,
778 # but the FU is done, the FU is NEVER going to write
779 # so the bitvector has to be cleared.
780 latch_wrflag = Signal(name="latch_wrflag_"+name)
781 with m.If(~fu.busy_o):
782 sync += latch_wrflag.eq(0)
783 with m.If(fu.issue_i & fu_active):
784 sync += latch_wrflag.eq(wrflag)
785 with m.If(fu.alu_done_o & latch_wrflag & ~fu_wrok):
786 if rfile.unary:
787 comb += wvaddr_en.eq(write) # addr_en gated with wp, don't use
788 else:
789 comb += wvaddr_en.eq(1<<addr_en) # binary addr_en not gated
790
791 return wvaddr_en, wviaddr_en
792
793 def connect_wrport(self, m, fu_bitdict, fu_selected,
794 wrpickers, regfile, regname, fspec):
795 comb, sync = m.d.comb, m.d.sync
796 fus = self.fus.fus
797 regs = self.regs
798
799 rpidx = regname
800
801 # select the required write port. these are pre-defined sizes
802 rfile = regs.rf[regfile.lower()]
803 wport = rfile.w_ports[rpidx]
804
805 print("connect wr", regname, "unary", rfile.unary, fspec)
806 print(regfile, regs.rf.keys())
807
808 # select the write-protection hazard vector. note that this still
809 # requires to WRITE to the hazard bitvector! read-requests need
810 # to RAISE the bitvector (set it to 1), which, duh, requires a WRITE
811 if self.make_hazard_vecs:
812 wv = regs.wv[regfile.lower()]
813 wvset = wv.s # write-vec bit-level hazard ctrl
814 wvclr = wv.r # write-vec bit-level hazard ctrl
815 wvchk = wv.q # write-after-write hazard check
816
817 fspecs = fspec
818 if not isinstance(fspecs, list):
819 fspecs = [fspecs]
820
821 pplen = 0
822 writes = []
823 ppoffs = []
824 wrflags = []
825 for i, fspec in enumerate(fspecs):
826 # get the regfile specs for this regfile port
827 (wf, _write, wid, fuspecs) = \
828 (fspec.okflag, fspec.regport, fspec.wid, fspec.specs)
829 print ("fpsec", i, "wrflag", wf, fspec, len(fuspecs))
830 ppoffs.append(pplen) # record offset for picker
831 pplen += len(fuspecs)
832
833 name = "%s_%s_%d" % (regfile, regname, i)
834 wrflag = Signal(name="wr_flag_"+name)
835 if wf is not None:
836 comb += wrflag.eq(wf)
837 else:
838 comb += wrflag.eq(0)
839 wrflags.append(wrflag)
840
841 # create a priority picker to manage this port
842 wrpickers[regfile][rpidx] = wrpick = PriorityPicker(pplen)
843 m.submodules["wrpick_%s_%s" % (regfile, rpidx)] = wrpick
844
845 wsigs = []
846 wens = []
847 wvsets = []
848 wvseten = []
849 wvclren = []
850 #wvens = [] - not needed: reading of writevec is permanently held hi
851 addrs = []
852 for i, fspec in enumerate(fspecs):
853 # connect up the FU req/go signals and the reg-read to the FU
854 # these are arbitrated by Data.ok signals
855 (wf, _write, wid, fuspecs) = \
856 (fspec.okflag, fspec.regport, fspec.wid, fspec.specs)
857 for pi, fuspec in enumerate(fspec.specs):
858 (funame, fu, idx) = (fuspec.funame, fuspec.fu, fuspec.idx)
859 fu_requested = fu_bitdict[funame]
860 pi += ppoffs[i]
861 name = "%s_%s_%s_%d" % (funame, regfile, regname, idx)
862 # get (or set up) a write-latched copy of write register number
863 write = Signal.like(_write, name="write_"+name)
864 rname = "%s_%s_%s_%d" % (funame, regfile, regname, idx)
865 if rname not in fu.wr_latches:
866 wrl = Signal.like(_write, name="wrlatch_"+rname)
867 fu.wr_latches[rname] = write
868 # do not depend on fu.issue_i here, it creates a
869 # combinatorial loop on waw checking. using the FU
870 # "enable" bitdict entry for this FU is sufficient,
871 # because the PowerDecoder2 read/write nums are
872 # valid continuously when the instruction is valid
873 with m.If(fu_requested):
874 sync += wrl.eq(_write)
875 comb += write.eq(_write)
876 with m.Else():
877 comb += write.eq(wrl)
878 else:
879 write = fu.wr_latches[rname]
880
881 # write-request comes from dest.ok
882 dest = fu.get_out(idx)
883 fu_dest_latch = fu.get_fu_out(idx) # latched output
884 name = "%s_%s_%d" % (funame, regname, idx)
885 fu_wrok = Signal(name="fu_wrok_"+name, reset_less=True)
886 comb += fu_wrok.eq(dest.ok & fu.busy_o)
887
888 # connect request-write to picker input, and output to go-wr
889 fu_active = fu_selected[funame]
890 pick = fu.wr.rel_o[idx] & fu_active
891 comb += wrpick.i[pi].eq(pick)
892 # create a single-pulse go write from the picker output
893 wr_pick = Signal(name="wpick_%s_%s_%d" % (funame, regname, idx))
894 comb += wr_pick.eq(wrpick.o[pi] & wrpick.en_o)
895 comb += fu.go_wr_i[idx].eq(rising_edge(m, wr_pick))
896
897 # connect the regspec write "reg select" number to this port
898 # only if one FU actually requests (and is granted) the port
899 # will the write-enable be activated
900 wname = "waddr_en_%s_%s_%d" % (funame, regname, idx)
901 addr_en = Signal.like(write, name=wname)
902 wp = Signal()
903 comb += wp.eq(wr_pick & wrpick.en_o)
904 comb += addr_en.eq(Mux(wp, write, 0))
905 if rfile.unary:
906 wens.append(addr_en)
907 else:
908 addrs.append(addr_en)
909 wens.append(wp)
910
911 # connect regfile port to input
912 print("reg connect widths",
913 regfile, regname, pi, funame,
914 dest.shape(), wport.i_data.shape())
915 wsigs.append(fu_dest_latch)
916
917 # now connect up the bitvector write hazard
918 if not self.make_hazard_vecs:
919 continue
920 res = self.make_hazards(m, regfile, rfile, wvclr, wvset,
921 funame, regname, idx,
922 addr_en, wp, fu, fu_active,
923 wrflags[i], write, fu_wrok)
924 wvaddr_en, wv_issue_en = res
925 wvclren.append(wvaddr_en) # set only: no data => clear bit
926 wvseten.append(wv_issue_en) # set data same as enable
927
928 # read the write-hazard bitvector (wv) for any bit that is
929 fu_requested = fu_bitdict[funame]
930 wvchk_en = Signal(len(wvchk), name="waw_chk_addr_en_"+name)
931 issue_active = Signal(name="waw_iactive_"+name)
932 whazard = Signal(name="whaz_"+name)
933 if wf is None:
934 # XXX EEK! STATE regfile (branch) does not have an
935 # write-active indicator in regspec_decode_write()
936 print ("XXX FIXME waw_iactive", issue_active,
937 fu_requested, wf)
938 else:
939 # check bits from the incoming instruction. note (back
940 # in connect_instruction) that the decoder is held for
941 # us to be able to do this, here... *without* issue being
942 # held HI. we MUST NOT gate this with fu.issue_i or
943 # with fu_bitdict "enable": it would create a loop
944 comb += issue_active.eq(wf)
945 with m.If(issue_active):
946 if rfile.unary:
947 comb += wvchk_en.eq(write)
948 else:
949 comb += wvchk_en.eq(1<<write)
950 # if FU is busy (which doesn't get set at the same time as
951 # issue) and no hazard was detected, clear wvchk_en (i.e.
952 # stop checking for hazards). there is a loop here, but it's
953 # via a DFF, so is ok. some linters may complain, but hey.
954 with m.If(fu.busy_o & ~whazard):
955 comb += wvchk_en.eq(0)
956
957 # write-hazard is ANDed with (filtered by) what is actually
958 # being requested. the wvchk data is on a one-clock delay,
959 # and wvchk_en comes directly from the main decoder
960 comb += whazard.eq((wvchk & wvchk_en).bool())
961 with m.If(whazard):
962 comb += fu._waw_hazard.eq(1)
963
964 #wvens.append(wvchk_en)
965
966 # here is where we create the Write Broadcast Bus. simple, eh?
967 comb += wport.i_data.eq(ortreereduce_sig(wsigs))
968 if rfile.unary:
969 # for unary-addressed
970 comb += wport.wen.eq(ortreereduce_sig(wens))
971 else:
972 # for binary-addressed
973 comb += wport.addr.eq(ortreereduce_sig(addrs))
974 comb += wport.wen.eq(ortreereduce_sig(wens))
975
976 if not self.make_hazard_vecs:
977 return [], []
978
979 # return these here rather than set wvclr/wvset directly,
980 # because there may be more than one write-port to a given
981 # regfile. example: XER has a write-port for SO, CA, and OV
982 # and the *last one added* of those would overwrite the other
983 # two. solution: have connect_wrports collate all the
984 # or-tree-reduced bitvector set/clear requests and drop them
985 # in as a single "thing". this can only be done because the
986 # set/get is an unary bitvector.
987 print ("make write-vecs", regfile, regname, wvset, wvclr)
988 return (wvclren, # clear (regfile write)
989 wvseten) # set (issue time)
990
991 def connect_wrports(self, m, fu_bitdict, fu_selected):
992 """connect write ports
993
994 orders the write regspecs into a dict-of-dicts, by regfile,
995 by regport name, then connects all FUs that want that regport
996 by way of a PriorityPicker.
997
998 note that the write-port wen, write-port data, and go_wr_i all need to
999 be on the exact same clock cycle. as there is a combinatorial loop bug
1000 at the moment, these all use sync.
1001 """
1002 comb, sync = m.d.comb, m.d.sync
1003 fus = self.fus.fus
1004 regs = self.regs
1005 # dictionary of lists of regfile write ports
1006 byregfiles_wrspec = self.get_byregfiles(m, False)
1007
1008 # same for write ports.
1009 # BLECH! complex code-duplication! BLECH!
1010 wrpickers = {}
1011 wvclrers = defaultdict(list)
1012 wvseters = defaultdict(list)
1013 for regfile, fuspecs in byregfiles_wrspec.items():
1014 wrpickers[regfile] = {}
1015
1016 if self.regreduce_en:
1017 # argh, more port-merging
1018 if regfile == 'INT':
1019 fuspecs['o'] = [fuspecs.pop('o')]
1020 fuspecs['o'].append(fuspecs.pop('o1'))
1021 if regfile == 'FAST':
1022 fuspecs['fast1'] = [fuspecs.pop('fast1')]
1023 if 'fast2' in fuspecs:
1024 fuspecs['fast1'].append(fuspecs.pop('fast2'))
1025 if 'fast3' in fuspecs:
1026 fuspecs['fast1'].append(fuspecs.pop('fast3'))
1027
1028 # collate these and record them by regfile because there
1029 # are sometimes more write-ports per regfile
1030 for (regname, fspec) in sort_fuspecs(fuspecs):
1031 wvclren, wvseten = self.connect_wrport(m,
1032 fu_bitdict, fu_selected,
1033 wrpickers,
1034 regfile, regname, fspec)
1035 wvclrers[regfile.lower()] += wvclren
1036 wvseters[regfile.lower()] += wvseten
1037
1038 if not self.make_hazard_vecs:
1039 return
1040
1041 # for write-vectors: reduce the clr-ers and set-ers down to
1042 # a single set of bits. otherwise if there are two write
1043 # ports (on some regfiles), the last one doing comb += on
1044 # the reg.wv[regfile] instance "wins" (and all others are ignored,
1045 # whoops). if there was only one write-port per wv regfile this would
1046 # not be an issue.
1047 for regfile in wvclrers.keys():
1048 wv = regs.wv[regfile]
1049 wvset = wv.s # write-vec bit-level hazard ctrl
1050 wvclr = wv.r # write-vec bit-level hazard ctrl
1051 wvclren = wvclrers[regfile]
1052 wvseten = wvseters[regfile]
1053 comb += wvclr.eq(ortreereduce_sig(wvclren)) # clear (regfile write)
1054 comb += wvset.eq(ortreereduce_sig(wvseten)) # set (issue time)
1055
1056 def get_byregfiles(self, m, readmode):
1057
1058 mode = "read" if readmode else "write"
1059 regs = self.regs
1060 fus = self.fus.fus
1061 e = self.ireg.e # decoded instruction to execute
1062
1063 # dictionary of dictionaries of lists/tuples of regfile ports.
1064 # first key: regfile. second key: regfile port name
1065 byregfiles_spec = defaultdict(dict)
1066
1067 for (funame, fu) in fus.items():
1068 # create in each FU a receptacle for the read/write register
1069 # hazard numbers (and okflags for read). to be latched in
1070 # connect_rd/write_ports
1071 if readmode:
1072 fu.rd_latches = {} # read reg number latches
1073 fu.rf_latches = {} # read flag latches
1074 else:
1075 fu.wr_latches = {}
1076
1077 # construct regfile specs: read uses inspec, write outspec
1078 print("%s ports for %s" % (mode, funame))
1079 for idx in range(fu.n_src if readmode else fu.n_dst):
1080 (regfile, regname, wid) = fu.get_io_spec(readmode, idx)
1081 print(" %d %s %s %s" % (idx, regfile, regname, str(wid)))
1082
1083 # the PowerDecoder2 (main one, not the satellites) contains
1084 # the decoded regfile numbers. obtain these now
1085 decinfo = regspec_decode(m, readmode, e, regfile, regname)
1086 okflag, regport = decinfo.okflag, decinfo.regport
1087
1088 # construct the dictionary of regspec information by regfile
1089 if regname not in byregfiles_spec[regfile]:
1090 byregfiles_spec[regfile][regname] = \
1091 ByRegSpec(okflag, regport, wid, [])
1092
1093 # here we start to create "lanes" where each Function Unit
1094 # requiring access to a given [single-contended resource]
1095 # regfile port is appended to a list, so that PriorityPickers
1096 # can be created to give uncontested access to it
1097 fuspec = FUSpec(funame, fu, idx)
1098 byregfiles_spec[regfile][regname].specs.append(fuspec)
1099
1100 # ok just print that all out, for convenience
1101 for regfile, fuspecs in byregfiles_spec.items():
1102 print("regfile %s ports:" % mode, regfile)
1103 for regname, fspec in fuspecs.items():
1104 [okflag, regport, wid, fuspecs] = fspec
1105 print(" rf %s port %s lane: %s" % (mode, regfile, regname))
1106 print(" %s" % regname, wid, okflag, regport)
1107 for (funame, fu, idx) in fuspecs:
1108 fusig = fu.src_i[idx] if readmode else fu.dest[idx]
1109 print(" ", funame, fu.__class__.__name__, idx, fusig)
1110 print()
1111
1112 return byregfiles_spec
1113
1114 def __iter__(self):
1115 yield from self.fus.ports()
1116 yield from self.i.e.ports()
1117 yield from self.l0.ports()
1118 # TODO: regs
1119
1120 def ports(self):
1121 return list(self)
1122
1123
1124 if __name__ == '__main__':
1125 pspec = TestMemPspec(ldst_ifacetype='testpi',
1126 imem_ifacetype='',
1127 addr_wid=48,
1128 allow_overlap=True,
1129 mask_wid=8,
1130 reg_wid=64)
1131 dut = NonProductionCore(pspec)
1132 vl = rtlil.convert(dut, ports=dut.ports())
1133 with open("test_core.il", "w") as f:
1134 f.write(vl)