use new regspec_decode and fu.get_iospec functions
[soc.git] / src / soc / simple / core.py
1 """simple core
2
3 not in any way intended for production use. connects up FunctionUnits to
4 Register Files in a brain-dead fashion that only permits one and only one
5 Function Unit to be operational.
6
7 the principle here is to take the Function Units, analyse their regspecs,
8 and turn their requirements for access to register file read/write ports
9 into groupings by Register File and Register File Port name.
10
11 under each grouping - by regfile/port - a list of Function Units that
12 need to connect to that port is created. as these are a contended
13 resource a "Broadcast Bus" per read/write port is then also created,
14 with access to it managed by a PriorityPicker.
15
16 the brain-dead part of this module is that even though there is no
17 conflict of access, regfile read/write hazards are *not* analysed,
18 and consequently it is safer to wait for the Function Unit to complete
19 before allowing a new instruction to proceed.
20 (update: actually this is being added now:
21 https://bugs.libre-soc.org/show_bug.cgi?id=737)
22 """
23
24 from nmigen import (Elaboratable, Module, Signal, ResetSignal, Cat, Mux,
25 Const)
26 from nmigen.cli import rtlil
27
28 from openpower.decoder.power_decoder2 import PowerDecodeSubset
29 from openpower.decoder.power_regspec_map import regspec_decode
30 from openpower.sv.svp64 import SVP64Rec
31
32 from nmutil.picker import PriorityPicker
33 from nmutil.util import treereduce
34 from nmutil.singlepipe import ControlBase
35
36 from soc.fu.compunits.compunits import AllFunctionUnits, LDSTFunctionUnit
37 from soc.regfile.regfiles import RegFiles
38 from openpower.decoder.power_decoder2 import get_rdflags
39 from soc.experiment.l0_cache import TstL0CacheBuffer # test only
40 from soc.config.test.test_loadstore import TestMemPspec
41 from openpower.decoder.power_enums import MicrOp, Function
42 from soc.simple.core_data import CoreInput, CoreOutput
43
44 from collections import defaultdict, namedtuple
45 import operator
46
47 from nmutil.util import rising_edge
48
49 FUSpec = namedtuple("FUSpec", ["funame", "fu", "idx"])
50 ByRegSpec = namedtuple("ByRegSpec", ["okflag", "regport", "wid", "specs"])
51
52 # helper function for reducing a list of signals down to a parallel
53 # ORed single signal.
54 def ortreereduce(tree, attr="o_data"):
55 return treereduce(tree, operator.or_, lambda x: getattr(x, attr))
56
57
58 def ortreereduce_sig(tree):
59 return treereduce(tree, operator.or_, lambda x: x)
60
61
62 # helper function to place full regs declarations first
63 def sort_fuspecs(fuspecs):
64 res = []
65 for (regname, fspec) in fuspecs.items():
66 if regname.startswith("full"):
67 res.append((regname, fspec))
68 for (regname, fspec) in fuspecs.items():
69 if not regname.startswith("full"):
70 res.append((regname, fspec))
71 return res # enumerate(res)
72
73
74 # derive from ControlBase rather than have a separate Stage instance,
75 # this is simpler to do
76 class NonProductionCore(ControlBase):
77 def __init__(self, pspec):
78 self.pspec = pspec
79
80 # test is SVP64 is to be enabled
81 self.svp64_en = hasattr(pspec, "svp64") and (pspec.svp64 == True)
82
83 # test to see if regfile ports should be reduced
84 self.regreduce_en = (hasattr(pspec, "regreduce") and
85 (pspec.regreduce == True))
86
87 # test to see if overlapping of instructions is allowed
88 # (not normally enabled for TestIssuer FSM but useful for checking
89 # the bitvector hazard detection, before doing In-Order)
90 self.allow_overlap = (hasattr(pspec, "allow_overlap") and
91 (pspec.allow_overlap == True))
92
93 # test core type
94 self.make_hazard_vecs = self.allow_overlap
95 self.core_type = "fsm"
96 if hasattr(pspec, "core_type"):
97 self.core_type = pspec.core_type
98
99 super().__init__(stage=self)
100
101 # single LD/ST funnel for memory access
102 self.l0 = l0 = TstL0CacheBuffer(pspec, n_units=1)
103 pi = l0.l0.dports[0]
104
105 # function units (only one each)
106 # only include mmu if enabled in pspec
107 self.fus = AllFunctionUnits(pspec, pilist=[pi])
108
109 # link LoadStore1 into MMU
110 mmu = self.fus.get_fu('mmu0')
111 print ("core pspec", pspec.ldst_ifacetype)
112 print ("core mmu", mmu)
113 if mmu is not None:
114 print ("core lsmem.lsi", l0.cmpi.lsmem.lsi)
115 mmu.alu.set_ldst_interface(l0.cmpi.lsmem.lsi)
116
117 # register files (yes plural)
118 self.regs = RegFiles(pspec, make_hazard_vecs=self.make_hazard_vecs)
119
120 # set up input and output: unusual requirement to set data directly
121 # (due to the way that the core is set up in a different domain,
122 # see TestIssuer.setup_peripherals
123 self.p.i_data, self.n.o_data = self.new_specs(None)
124 self.i, self.o = self.p.i_data, self.n.o_data
125
126 # actual internal input data used (captured)
127 self.ireg = self.ispec()
128
129 # create per-FU instruction decoders (subsetted). these "satellite"
130 # decoders reduce wire fan-out from the one (main) PowerDecoder2
131 # (used directly by the trap unit) to the *twelve* (or more)
132 # Function Units. we can either have 32 wires (the instruction)
133 # to each, or we can have well over a 200 wire fan-out (to 12
134 # ALUs). it's an easy choice to make.
135 self.decoders = {}
136 self.des = {}
137
138 # eep, these should be *per FU* i.e. for FunctionUnitBaseMulti
139 # they should be shared (put into the ALU *once*).
140
141 for funame, fu in self.fus.fus.items():
142 f_name = fu.fnunit.name
143 fnunit = fu.fnunit.value
144 opkls = fu.opsubsetkls
145 if f_name == 'TRAP':
146 # TRAP decoder is the *main* decoder
147 self.trapunit = funame
148 continue
149 assert funame not in self.decoders
150 self.decoders[funame] = PowerDecodeSubset(None, opkls, f_name,
151 final=True,
152 state=self.ireg.state,
153 svp64_en=self.svp64_en,
154 regreduce_en=self.regreduce_en)
155 self.des[funame] = self.decoders[funame].do
156
157 # create per-Function Unit write-after-write hazard signals
158 # yes, really, this should have been added in ReservationStations
159 # but hey.
160 for funame, fu in self.fus.fus.items():
161 fu._waw_hazard = Signal(name="waw_%s" % funame)
162
163 # share the SPR decoder with the MMU if it exists
164 if "mmu0" in self.decoders:
165 self.decoders["mmu0"].mmu0_spr_dec = self.decoders["spr0"]
166
167 # next 3 functions are Stage API Compliance
168 def setup(self, m, i):
169 pass
170
171 def ispec(self):
172 return CoreInput(self.pspec, self.svp64_en, self.regreduce_en)
173
174 def ospec(self):
175 return CoreOutput()
176
177 # elaborate function to create HDL
178 def elaborate(self, platform):
179 m = super().elaborate(platform)
180
181 # for testing purposes, to cut down on build time in coriolis2
182 if hasattr(self.pspec, "nocore") and self.pspec.nocore == True:
183 x = Signal() # dummy signal
184 m.d.sync += x.eq(~x)
185 return m
186 comb = m.d.comb
187
188 m.submodules.fus = self.fus
189 m.submodules.l0 = l0 = self.l0
190 self.regs.elaborate_into(m, platform)
191 regs = self.regs
192 fus = self.fus.fus
193
194 # amalgamate write-hazards into a single top-level Signal
195 self.waw_hazard = Signal()
196 whaz = []
197 for funame, fu in self.fus.fus.items():
198 whaz.append(fu._waw_hazard)
199 comb += self.waw_hazard.eq(Cat(*whaz).bool())
200
201 # connect decoders
202 self.connect_satellite_decoders(m)
203
204 # ssh, cheat: trap uses the main decoder because of the rewriting
205 self.des[self.trapunit] = self.ireg.e.do
206
207 # connect up Function Units, then read/write ports, and hazard conflict
208 self.issue_conflict = Signal()
209 fu_bitdict, fu_selected = self.connect_instruction(m)
210 raw_hazard = self.connect_rdports(m, fu_bitdict, fu_selected)
211 self.connect_wrports(m, fu_bitdict, fu_selected)
212 if self.allow_overlap:
213 comb += self.issue_conflict.eq(raw_hazard)
214
215 # note if an exception happened. in a pipelined or OoO design
216 # this needs to be accompanied by "shadowing" (or stalling)
217 el = []
218 for exc in self.fus.excs.values():
219 el.append(exc.happened)
220 if len(el) > 0: # at least one exception
221 comb += self.o.exc_happened.eq(Cat(*el).bool())
222
223 return m
224
225 def connect_satellite_decoders(self, m):
226 comb = m.d.comb
227 for k, v in self.decoders.items():
228 # connect each satellite decoder and give it the instruction.
229 # as subset decoders this massively reduces wire fanout given
230 # the large number of ALUs
231 m.submodules["dec_%s" % k] = v
232 comb += v.dec.raw_opcode_in.eq(self.ireg.raw_insn_i)
233 comb += v.dec.bigendian.eq(self.ireg.bigendian_i)
234 # sigh due to SVP64 RA_OR_ZERO detection connect these too
235 comb += v.sv_a_nz.eq(self.ireg.sv_a_nz)
236 if self.svp64_en:
237 comb += v.pred_sm.eq(self.ireg.sv_pred_sm)
238 comb += v.pred_dm.eq(self.ireg.sv_pred_dm)
239 if k != self.trapunit:
240 comb += v.sv_rm.eq(self.ireg.sv_rm) # pass through SVP64 RM
241 comb += v.is_svp64_mode.eq(self.ireg.is_svp64_mode)
242 # only the LDST PowerDecodeSubset *actually* needs to
243 # know to use the alternative decoder. this is all
244 # a terrible hack
245 if k.lower().startswith("ldst"):
246 comb += v.use_svp64_ldst_dec.eq(
247 self.ireg.use_svp64_ldst_dec)
248
249 def connect_instruction(self, m):
250 """connect_instruction
251
252 uses decoded (from PowerOp) function unit information from CSV files
253 to ascertain which Function Unit should deal with the current
254 instruction.
255
256 some (such as OP_ATTN, OP_NOP) are dealt with here, including
257 ignoring it and halting the processor. OP_NOP is a bit annoying
258 because the issuer expects busy flag still to be raised then lowered.
259 (this requires a fake counter to be set).
260 """
261 comb, sync = m.d.comb, m.d.sync
262 fus = self.fus.fus
263
264 # indicate if core is busy
265 busy_o = self.o.busy_o
266 any_busy_o = self.o.any_busy_o
267
268 # connect up temporary copy of incoming instruction. the FSM will
269 # either blat the incoming instruction (if valid) into self.ireg
270 # or if the instruction could not be delivered, keep dropping the
271 # latched copy into ireg
272 ilatch = self.ispec()
273 self.instr_active = Signal()
274
275 # enable/busy-signals for each FU, get one bit for each FU (by name)
276 fu_enable = Signal(len(fus), reset_less=True)
277 fu_busy = Signal(len(fus), reset_less=True)
278 fu_bitdict = {}
279 fu_selected = {}
280 for i, funame in enumerate(fus.keys()):
281 fu_bitdict[funame] = fu_enable[i]
282 fu_selected[funame] = fu_busy[i]
283
284 # identify function units and create a list by fnunit so that
285 # PriorityPickers can be created for selecting one of them that
286 # isn't busy at the time the incoming instruction needs passing on
287 by_fnunit = defaultdict(list)
288 for fname, member in Function.__members__.items():
289 for funame, fu in fus.items():
290 fnunit = fu.fnunit.value
291 if member.value & fnunit: # this FU handles this type of op
292 by_fnunit[fname].append((funame, fu)) # add by Function
293
294 # ok now just print out the list of FUs by Function, because we can
295 for fname, fu_list in by_fnunit.items():
296 print ("FUs by type", fname, fu_list)
297
298 # now create a PriorityPicker per FU-type such that only one
299 # non-busy FU will be picked
300 issue_pps = {}
301 fu_found = Signal() # take a note if no Function Unit was available
302 for fname, fu_list in by_fnunit.items():
303 i_pp = PriorityPicker(len(fu_list))
304 m.submodules['i_pp_%s' % fname] = i_pp
305 i_l = []
306 for i, (funame, fu) in enumerate(fu_list):
307 # match the decoded instruction (e.do.fn_unit) against the
308 # "capability" of this FU, gate that by whether that FU is
309 # busy, and drop that into the PriorityPicker.
310 # this will give us an output of the first available *non-busy*
311 # Function Unit (Reservation Statio) capable of handling this
312 # instruction.
313 fnunit = fu.fnunit.value
314 en_req = Signal(name="issue_en_%s" % funame, reset_less=True)
315 fnmatch = (self.ireg.e.do.fn_unit & fnunit).bool()
316 comb += en_req.eq(fnmatch & ~fu.busy_o &
317 self.instr_active)
318 i_l.append(en_req) # store in list for doing the Cat-trick
319 # picker output, gated by enable: store in fu_bitdict
320 po = Signal(name="o_issue_pick_"+funame) # picker output
321 comb += po.eq(i_pp.o[i] & i_pp.en_o)
322 comb += fu_bitdict[funame].eq(po)
323 comb += fu_selected[funame].eq(fu.busy_o | po)
324 # if we don't do this, then when there are no FUs available,
325 # the "p.o_ready" signal will go back "ok we accepted this
326 # instruction" which of course isn't true.
327 with m.If(i_pp.en_o):
328 comb += fu_found.eq(1)
329 # for each input, Cat them together and drop them into the picker
330 comb += i_pp.i.eq(Cat(*i_l))
331
332 # rdmask, which is for registers needs to come from the *main* decoder
333 for funame, fu in fus.items():
334 rdmask = get_rdflags(self.ireg.e, fu)
335 comb += fu.rdmaskn.eq(~rdmask)
336
337 # sigh - need a NOP counter
338 counter = Signal(2)
339 with m.If(counter != 0):
340 sync += counter.eq(counter - 1)
341 comb += busy_o.eq(1)
342
343 # default to reading from incoming instruction: may be overridden
344 # by copy from latch when "waiting"
345 comb += self.ireg.eq(self.i)
346 # always say "ready" except if overridden
347 comb += self.p.o_ready.eq(1)
348
349 with m.FSM():
350 with m.State("READY"):
351 with m.If(self.p.i_valid): # run only when valid
352 with m.Switch(self.ireg.e.do.insn_type):
353 # check for ATTN: halt if true
354 with m.Case(MicrOp.OP_ATTN):
355 m.d.sync += self.o.core_terminate_o.eq(1)
356
357 # fake NOP - this isn't really used (Issuer detects NOP)
358 with m.Case(MicrOp.OP_NOP):
359 sync += counter.eq(2)
360 comb += busy_o.eq(1)
361
362 with m.Default():
363 comb += self.instr_active.eq(1)
364 comb += self.p.o_ready.eq(0)
365 # connect instructions. only one enabled at a time
366 for funame, fu in fus.items():
367 do = self.des[funame]
368 enable = fu_bitdict[funame]
369
370 # run this FunctionUnit if enabled route op,
371 # issue, busy, read flags and mask to FU
372 with m.If(enable):
373 # operand comes from the *local* decoder
374 # do not actually issue, though, if there
375 # is a waw hazard. decoder has to still
376 # be asserted in order to detect that, tho
377 comb += fu.oper_i.eq_from(do)
378 # issue when valid (and no write-hazard)
379 comb += fu.issue_i.eq(~self.waw_hazard)
380 # instruction ok, indicate ready
381 comb += self.p.o_ready.eq(1)
382
383 if self.allow_overlap:
384 with m.If(~fu_found | self.waw_hazard):
385 # latch copy of instruction
386 sync += ilatch.eq(self.i)
387 comb += self.p.o_ready.eq(1) # accept
388 comb += busy_o.eq(1)
389 m.next = "WAITING"
390
391 with m.State("WAITING"):
392 comb += self.instr_active.eq(1)
393 comb += self.p.o_ready.eq(0)
394 comb += busy_o.eq(1)
395 # using copy of instruction, keep waiting until an FU is free
396 comb += self.ireg.eq(ilatch)
397 with m.If(fu_found): # wait for conflict to clear
398 # connect instructions. only one enabled at a time
399 for funame, fu in fus.items():
400 do = self.des[funame]
401 enable = fu_bitdict[funame]
402
403 # run this FunctionUnit if enabled route op,
404 # issue, busy, read flags and mask to FU
405 with m.If(enable):
406 # operand comes from the *local* decoder,
407 # which is asserted even if not issued,
408 # so that WaW-detection can check for hazards.
409 # only if the waw hazard is clear does the
410 # instruction actually get issued
411 comb += fu.oper_i.eq_from(do)
412 # issue when valid
413 comb += fu.issue_i.eq(~self.waw_hazard)
414 with m.If(~self.waw_hazard):
415 comb += self.p.o_ready.eq(1)
416 comb += busy_o.eq(0)
417 m.next = "READY"
418
419 print ("core: overlap allowed", self.allow_overlap)
420 # true when any FU is busy (including the cycle where it is perhaps
421 # to be issued - because that's what fu_busy is)
422 comb += any_busy_o.eq(fu_busy.bool())
423 if not self.allow_overlap:
424 # for simple non-overlap, if any instruction is busy, set
425 # busy output for core.
426 comb += busy_o.eq(any_busy_o)
427 else:
428 # sigh deal with a fun situation that needs to be investigated
429 # and resolved
430 with m.If(self.issue_conflict):
431 comb += busy_o.eq(1)
432 # make sure that LDST, SPR, MMU, Branch and Trap all say "busy"
433 # and do not allow overlap. these are all the ones that
434 # are non-forward-progressing: exceptions etc. that otherwise
435 # change CoreState for some reason (MSR, PC, SVSTATE)
436 for funame, fu in fus.items():
437 if (funame.lower().startswith('ldst') or
438 funame.lower().startswith('branch') or
439 funame.lower().startswith('mmu') or
440 funame.lower().startswith('spr') or
441 funame.lower().startswith('trap')):
442 with m.If(fu.busy_o):
443 comb += busy_o.eq(1)
444
445 # return both the function unit "enable" dict as well as the "busy".
446 # the "busy-or-issued" can be passed in to the Read/Write port
447 # connecters to give them permission to request access to regfiles
448 return fu_bitdict, fu_selected
449
450 def connect_rdport(self, m, fu_bitdict, fu_selected,
451 rdpickers, regfile, regname, fspec):
452 comb, sync = m.d.comb, m.d.sync
453 fus = self.fus.fus
454 regs = self.regs
455
456 rpidx = regname
457
458 # select the required read port. these are pre-defined sizes
459 rfile = regs.rf[regfile.lower()]
460 rport = rfile.r_ports[rpidx]
461 print("read regfile", rpidx, regfile, regs.rf.keys(),
462 rfile, rfile.unary)
463
464 # for checking if the read port has an outstanding write
465 if self.make_hazard_vecs:
466 wv = regs.wv[regfile.lower()]
467 wvchk = wv.q_int # write-vec bit-level hazard check
468
469 # if a hazard is detected on this read port, simply blithely block
470 # every FU from reading on it. this is complete overkill but very
471 # simple for now.
472 hazard_detected = Signal(name="raw_%s_%s" % (regfile, rpidx))
473
474 fspecs = fspec
475 if not isinstance(fspecs, list):
476 fspecs = [fspecs]
477
478 rdflags = []
479 pplen = 0
480 ppoffs = []
481 for i, fspec in enumerate(fspecs):
482 # get the regfile specs for this regfile port
483 print ("fpsec", i, fspec, len(fspec.specs))
484 name = "%s_%s_%d" % (regfile, regname, i)
485 ppoffs.append(pplen) # record offset for picker
486 pplen += len(fspec.specs)
487 rdflag = Signal(name="rdflag_"+name, reset_less=True)
488 comb += rdflag.eq(fspec.okflag)
489 rdflags.append(rdflag)
490
491 print ("pplen", pplen)
492
493 # create a priority picker to manage this port
494 rdpickers[regfile][rpidx] = rdpick = PriorityPicker(pplen)
495 m.submodules["rdpick_%s_%s" % (regfile, rpidx)] = rdpick
496
497 rens = []
498 addrs = []
499 wvens = []
500
501 for i, fspec in enumerate(fspecs):
502 (rf, _read, wid, fuspecs) = \
503 (fspec.okflag, fspec.regport, fspec.wid, fspec.specs)
504 # connect up the FU req/go signals, and the reg-read to the FU
505 # and create a Read Broadcast Bus
506 for pi, fuspec in enumerate(fspec.specs):
507 (funame, fu, idx) = (fuspec.funame, fuspec.fu, fuspec.idx)
508 pi += ppoffs[i]
509 name = "%s_%s_%s_%i" % (regfile, rpidx, funame, pi)
510 fu_active = fu_selected[funame]
511 fu_issued = fu_bitdict[funame]
512
513 # get (or set up) a latched copy of read register number
514 # and (sigh) also the read-ok flag
515 # TODO: use nmutil latchregister
516 rhname = "%s_%s_%d" % (regfile, regname, i)
517 rdflag = Signal(name="rdflag_%s_%s" % (funame, rhname),
518 reset_less=True)
519 if rhname not in fu.rf_latches:
520 rfl = Signal(name="rdflag_latch_"+rhname)
521 fu.rf_latches[rhname] = rfl
522 with m.If(fu.issue_i):
523 sync += rfl.eq(rdflags[i])
524 else:
525 rfl = fu.rf_latches[rhname]
526
527 # now the register port
528 rname = "%s_%s_%s_%d" % (funame, regfile, regname, pi)
529 read = Signal.like(_read, name="read_"+rname)
530 if rname not in fu.rd_latches:
531 rdl = Signal.like(_read, name="rdlatch_"+rname)
532 fu.rd_latches[rname] = rdl
533 with m.If(fu.issue_i):
534 sync += rdl.eq(_read)
535 else:
536 rdl = fu.rd_latches[rname]
537
538 # make the read immediately available on issue cycle
539 # after the read cycle, otherwies use the latched copy.
540 # this captures the regport and okflag on issue
541 with m.If(fu.issue_i):
542 comb += read.eq(_read)
543 comb += rdflag.eq(rdflags[i])
544 with m.Else():
545 comb += read.eq(rdl)
546 comb += rdflag.eq(rfl)
547
548 # connect request-read to picker input, and output to go-rd
549 addr_en = Signal.like(read, name="addr_en_"+name)
550 pick = Signal(name="pick_"+name) # picker input
551 rp = Signal(name="rp_"+name) # picker output
552 delay_pick = Signal(name="dp_"+name) # read-enable "underway"
553 rhazard = Signal(name="rhaz_"+name)
554
555 # exclude any currently-enabled read-request (mask out active)
556 # entirely block anything hazarded from being picked
557 comb += pick.eq(fu.rd_rel_o[idx] & fu_active & rdflag &
558 ~delay_pick & ~rhazard)
559 comb += rdpick.i[pi].eq(pick)
560 comb += fu.go_rd_i[idx].eq(delay_pick) # pass in *delayed* pick
561
562 # if picked, select read-port "reg select" number to port
563 comb += rp.eq(rdpick.o[pi] & rdpick.en_o)
564 sync += delay_pick.eq(rp) # delayed "pick"
565 comb += addr_en.eq(Mux(rp, read, 0))
566
567 # the read-enable happens combinatorially (see mux-bus below)
568 # but it results in the data coming out on a one-cycle delay.
569 if rfile.unary:
570 rens.append(addr_en)
571 else:
572 addrs.append(addr_en)
573 rens.append(rp)
574
575 # use the *delayed* pick signal to put requested data onto bus
576 with m.If(delay_pick):
577 # connect regfile port to input, creating fan-out Bus
578 src = fu.src_i[idx]
579 print("reg connect widths",
580 regfile, regname, pi, funame,
581 src.shape(), rport.o_data.shape())
582 # all FUs connect to same port
583 comb += src.eq(rport.o_data)
584
585 if not self.make_hazard_vecs:
586 continue
587
588 # read the write-hazard bitvector (wv) for any bit that is
589 wvchk_en = Signal(len(wvchk), name="wv_chk_addr_en_"+name)
590 issue_active = Signal(name="rd_iactive_"+name)
591 # XXX combinatorial loop here
592 comb += issue_active.eq(fu_active & rdflag)
593 with m.If(issue_active):
594 if rfile.unary:
595 comb += wvchk_en.eq(read)
596 else:
597 comb += wvchk_en.eq(1<<read)
598 # if FU is busy (which doesn't get set at the same time as
599 # issue) and no hazard was detected, clear wvchk_en (i.e.
600 # stop checking for hazards). there is a loop here, but it's
601 # via a DFF, so is ok. some linters may complain, but hey.
602 with m.If(fu.busy_o & ~rhazard):
603 comb += wvchk_en.eq(0)
604
605 # read-hazard is ANDed with (filtered by) what is actually
606 # being requested.
607 comb += rhazard.eq((wvchk & wvchk_en).bool())
608
609 wvens.append(wvchk_en)
610
611 # or-reduce the muxed read signals
612 if rfile.unary:
613 # for unary-addressed
614 comb += rport.ren.eq(ortreereduce_sig(rens))
615 else:
616 # for binary-addressed
617 comb += rport.addr.eq(ortreereduce_sig(addrs))
618 comb += rport.ren.eq(Cat(*rens).bool())
619 print ("binary", regfile, rpidx, rport, rport.ren, rens, addrs)
620
621 if not self.make_hazard_vecs:
622 return Const(0) # declare "no hazards"
623
624 # enable the read bitvectors for this issued instruction
625 # and return whether any write-hazard bit is set
626 wvchk_and = Signal(len(wvchk), name="wv_chk_"+name)
627 comb += wvchk_and.eq(wvchk & ortreereduce_sig(wvens))
628 comb += hazard_detected.eq(wvchk_and.bool())
629 return hazard_detected
630
631 def connect_rdports(self, m, fu_bitdict, fu_selected):
632 """connect read ports
633
634 orders the read regspecs into a dict-of-dicts, by regfile, by
635 regport name, then connects all FUs that want that regport by
636 way of a PriorityPicker.
637 """
638 comb, sync = m.d.comb, m.d.sync
639 fus = self.fus.fus
640 regs = self.regs
641 rd_hazard = []
642
643 # dictionary of lists of regfile read ports
644 byregfiles_rd, byregfiles_rdspec = self.get_byregfiles(True)
645
646 # okaay, now we need a PriorityPicker per regfile per regfile port
647 # loootta pickers... peter piper picked a pack of pickled peppers...
648 rdpickers = {}
649 for regfile, spec in byregfiles_rd.items():
650 fuspecs = byregfiles_rdspec[regfile]
651 rdpickers[regfile] = {}
652
653 # argh. an experiment to merge RA and RB in the INT regfile
654 # (we have too many read/write ports)
655 if self.regreduce_en:
656 if regfile == 'INT':
657 fuspecs['rabc'] = [fuspecs.pop('rb')]
658 fuspecs['rabc'].append(fuspecs.pop('rc'))
659 fuspecs['rabc'].append(fuspecs.pop('ra'))
660 if regfile == 'FAST':
661 fuspecs['fast1'] = [fuspecs.pop('fast1')]
662 if 'fast2' in fuspecs:
663 fuspecs['fast1'].append(fuspecs.pop('fast2'))
664 if 'fast3' in fuspecs:
665 fuspecs['fast1'].append(fuspecs.pop('fast3'))
666
667 # for each named regfile port, connect up all FUs to that port
668 # also return (and collate) hazard detection)
669 for (regname, fspec) in sort_fuspecs(fuspecs):
670 print("connect rd", regname, fspec)
671 rh = self.connect_rdport(m, fu_bitdict, fu_selected,
672 rdpickers, regfile,
673 regname, fspec)
674 rd_hazard.append(rh)
675
676 return Cat(*rd_hazard).bool()
677
678 def make_hazards(self, m, regfile, rfile, wvclr, wvset,
679 funame, regname, idx,
680 addr_en, wp, fu, fu_active, wrflag, write,
681 fu_wrok):
682 """make_hazards: a setter and a clearer for the regfile write ports
683
684 setter is at issue time (using PowerDecoder2 regfile write numbers)
685 clearer is at regfile write time (when FU has said what to write to)
686
687 there is *one* unusual case here which has to be dealt with:
688 when the Function Unit does *NOT* request a write to the regfile
689 (has its data.ok bit CLEARED). this is perfectly legitimate.
690 and a royal pain.
691 """
692 comb, sync = m.d.comb, m.d.sync
693 name = "%s_%s_%d" % (funame, regname, idx)
694
695 # connect up the bitvector write hazard. unlike the
696 # regfile writeports, a ONE must be written to the corresponding
697 # bit of the hazard bitvector (to indicate the existence of
698 # the hazard)
699
700 # the detection of what shall be written to is based
701 # on *issue*. it is delayed by 1 cycle so that instructions
702 # "addi 5,5,0x2" do not cause combinatorial loops due to
703 # fake-dependency on *themselves*. this will totally fail
704 # spectacularly when doing multi-issue
705 print ("write vector (for regread)", regfile, wvset)
706 wviaddr_en = Signal(len(wvset), name="wv_issue_addr_en_"+name)
707 issue_active = Signal(name="iactive_"+name)
708 sync += issue_active.eq(fu.issue_i & fu_active & wrflag)
709 with m.If(issue_active):
710 if rfile.unary:
711 comb += wviaddr_en.eq(write)
712 else:
713 comb += wviaddr_en.eq(1<<write)
714
715 # deal with write vector clear: this kicks in when the regfile
716 # is written to, and clears the corresponding bitvector entry
717 print ("write vector", regfile, wvclr)
718 wvaddr_en = Signal(len(wvclr), name="wvaddr_en_"+name)
719 if rfile.unary:
720 comb += wvaddr_en.eq(addr_en)
721 else:
722 with m.If(wp):
723 comb += wvaddr_en.eq(1<<addr_en)
724
725 # XXX ASSUME that LDSTFunctionUnit always sets the data it intends to
726 # this may NOT be the case when an exception occurs
727 if isinstance(fu, LDSTFunctionUnit):
728 return wvaddr_en, wviaddr_en
729
730 # okaaay, this is preparation for the awkward case.
731 # * latch a copy of wrflag when issue goes high.
732 # * when the fu_wrok (data.ok) flag is NOT set,
733 # but the FU is done, the FU is NEVER going to write
734 # so the bitvector has to be cleared.
735 latch_wrflag = Signal(name="latch_wrflag_"+name)
736 with m.If(~fu.busy_o):
737 sync += latch_wrflag.eq(0)
738 with m.If(fu.issue_i & fu_active):
739 sync += latch_wrflag.eq(wrflag)
740 with m.If(fu.alu_done_o & latch_wrflag & ~fu_wrok):
741 if rfile.unary:
742 comb += wvaddr_en.eq(write) # addr_en gated with wp, don't use
743 else:
744 comb += wvaddr_en.eq(1<<addr_en) # binary addr_en not gated
745
746 return wvaddr_en, wviaddr_en
747
748 def connect_wrport(self, m, fu_bitdict, fu_selected,
749 wrpickers, regfile, regname, fspec):
750 comb, sync = m.d.comb, m.d.sync
751 fus = self.fus.fus
752 regs = self.regs
753
754 rpidx = regname
755
756 # select the required write port. these are pre-defined sizes
757 rfile = regs.rf[regfile.lower()]
758 wport = rfile.w_ports[rpidx]
759
760 print("connect wr", regname, "unary", rfile.unary, fspec)
761 print(regfile, regs.rf.keys())
762
763 # select the write-protection hazard vector. note that this still
764 # requires to WRITE to the hazard bitvector! read-requests need
765 # to RAISE the bitvector (set it to 1), which, duh, requires a WRITE
766 if self.make_hazard_vecs:
767 wv = regs.wv[regfile.lower()]
768 wvset = wv.s # write-vec bit-level hazard ctrl
769 wvclr = wv.r # write-vec bit-level hazard ctrl
770 wvchk = wv.q # write-after-write hazard check
771 wvchk_qint = wv.q # write-after-write hazard check, NOT delayed
772
773 fspecs = fspec
774 if not isinstance(fspecs, list):
775 fspecs = [fspecs]
776
777 pplen = 0
778 writes = []
779 ppoffs = []
780 wrflags = []
781 for i, fspec in enumerate(fspecs):
782 # get the regfile specs for this regfile port
783 (wf, _write, wid, fuspecs) = \
784 (fspec.okflag, fspec.regport, fspec.wid, fspec.specs)
785 print ("fpsec", i, "wrflag", wf, fspec, len(fuspecs))
786 ppoffs.append(pplen) # record offset for picker
787 pplen += len(fuspecs)
788
789 name = "%s_%s_%d" % (regfile, regname, i)
790 wrflag = Signal(name="wr_flag_"+name)
791 if wf is not None:
792 comb += wrflag.eq(wf)
793 else:
794 comb += wrflag.eq(0)
795 wrflags.append(wrflag)
796
797 # create a priority picker to manage this port
798 wrpickers[regfile][rpidx] = wrpick = PriorityPicker(pplen)
799 m.submodules["wrpick_%s_%s" % (regfile, rpidx)] = wrpick
800
801 wsigs = []
802 wens = []
803 wvsets = []
804 wvseten = []
805 wvclren = []
806 #wvens = [] - not needed: reading of writevec is permanently held hi
807 addrs = []
808 for i, fspec in enumerate(fspecs):
809 # connect up the FU req/go signals and the reg-read to the FU
810 # these are arbitrated by Data.ok signals
811 (wf, _write, wid, fuspecs) = \
812 (fspec.okflag, fspec.regport, fspec.wid, fspec.specs)
813 for pi, fuspec in enumerate(fspec.specs):
814 (funame, fu, idx) = (fuspec.funame, fuspec.fu, fuspec.idx)
815 fu_requested = fu_bitdict[funame]
816 pi += ppoffs[i]
817 name = "%s_%s_%s_%d" % (funame, regfile, regname, idx)
818 # get (or set up) a write-latched copy of write register number
819 write = Signal.like(_write, name="write_"+name)
820 rname = "%s_%s_%s_%d" % (funame, regfile, regname, idx)
821 if rname not in fu.wr_latches:
822 wrl = Signal.like(_write, name="wrlatch_"+rname)
823 fu.wr_latches[rname] = write
824 # do not depend on fu.issue_i here, it creates a
825 # combinatorial loop on waw checking. using the FU
826 # "enable" bitdict entry for this FU is sufficient,
827 # because the PowerDecoder2 read/write nums are
828 # valid continuously when the instruction is valid
829 with m.If(fu_requested):
830 sync += wrl.eq(_write)
831 comb += write.eq(_write)
832 with m.Else():
833 comb += write.eq(wrl)
834 else:
835 write = fu.wr_latches[rname]
836
837 # write-request comes from dest.ok
838 dest = fu.get_out(idx)
839 fu_dest_latch = fu.get_fu_out(idx) # latched output
840 name = "%s_%s_%d" % (funame, regname, idx)
841 fu_wrok = Signal(name="fu_wrok_"+name, reset_less=True)
842 comb += fu_wrok.eq(dest.ok & fu.busy_o)
843
844 # connect request-write to picker input, and output to go-wr
845 fu_active = fu_selected[funame]
846 pick = fu.wr.rel_o[idx] & fu_active
847 comb += wrpick.i[pi].eq(pick)
848 # create a single-pulse go write from the picker output
849 wr_pick = Signal(name="wpick_%s_%s_%d" % (funame, regname, idx))
850 comb += wr_pick.eq(wrpick.o[pi] & wrpick.en_o)
851 comb += fu.go_wr_i[idx].eq(rising_edge(m, wr_pick))
852
853 # connect the regspec write "reg select" number to this port
854 # only if one FU actually requests (and is granted) the port
855 # will the write-enable be activated
856 wname = "waddr_en_%s_%s_%d" % (funame, regname, idx)
857 addr_en = Signal.like(write, name=wname)
858 wp = Signal()
859 comb += wp.eq(wr_pick & wrpick.en_o)
860 comb += addr_en.eq(Mux(wp, write, 0))
861 if rfile.unary:
862 wens.append(addr_en)
863 else:
864 addrs.append(addr_en)
865 wens.append(wp)
866
867 # connect regfile port to input
868 print("reg connect widths",
869 regfile, regname, pi, funame,
870 dest.shape(), wport.i_data.shape())
871 wsigs.append(fu_dest_latch)
872
873 # now connect up the bitvector write hazard
874 if not self.make_hazard_vecs:
875 continue
876 res = self.make_hazards(m, regfile, rfile, wvclr, wvset,
877 funame, regname, idx,
878 addr_en, wp, fu, fu_active,
879 wrflags[i], write, fu_wrok)
880 wvaddr_en, wv_issue_en = res
881 wvclren.append(wvaddr_en) # set only: no data => clear bit
882 wvseten.append(wv_issue_en) # set data same as enable
883
884 # read the write-hazard bitvector (wv) for any bit that is
885 fu_requested = fu_bitdict[funame]
886 wvchk_en = Signal(len(wvchk), name="waw_chk_addr_en_"+name)
887 issue_active = Signal(name="waw_iactive_"+name)
888 whazard = Signal(name="whaz_"+name)
889 if wf is None:
890 # XXX EEK! STATE regfile (branch) does not have an
891 # write-active indicator in regspec_decode_write()
892 print ("XXX FIXME waw_iactive", issue_active,
893 fu_requested, wf)
894 else:
895 # check bits from the incoming instruction. note (back
896 # in connect_instruction) that the decoder is held for
897 # us to be able to do this, here... *without* issue being
898 # held HI. we MUST NOT gate this with fu.issue_i or
899 # with fu_bitdict "enable": it would create a loop
900 comb += issue_active.eq(wf)
901 with m.If(issue_active):
902 if rfile.unary:
903 comb += wvchk_en.eq(write)
904 else:
905 comb += wvchk_en.eq(1<<write)
906 # if FU is busy (which doesn't get set at the same time as
907 # issue) and no hazard was detected, clear wvchk_en (i.e.
908 # stop checking for hazards). there is a loop here, but it's
909 # via a DFF, so is ok. some linters may complain, but hey.
910 with m.If(fu.busy_o & ~whazard):
911 comb += wvchk_en.eq(0)
912
913 # write-hazard is ANDed with (filtered by) what is actually
914 # being requested. the wvchk data is on a one-clock delay,
915 # and wvchk_en comes directly from the main decoder
916 comb += whazard.eq((wvchk_qint & wvchk_en).bool())
917 with m.If(whazard):
918 comb += fu._waw_hazard.eq(1)
919
920 #wvens.append(wvchk_en)
921
922 # here is where we create the Write Broadcast Bus. simple, eh?
923 comb += wport.i_data.eq(ortreereduce_sig(wsigs))
924 if rfile.unary:
925 # for unary-addressed
926 comb += wport.wen.eq(ortreereduce_sig(wens))
927 else:
928 # for binary-addressed
929 comb += wport.addr.eq(ortreereduce_sig(addrs))
930 comb += wport.wen.eq(ortreereduce_sig(wens))
931
932 if not self.make_hazard_vecs:
933 return [], []
934
935 # return these here rather than set wvclr/wvset directly,
936 # because there may be more than one write-port to a given
937 # regfile. example: XER has a write-port for SO, CA, and OV
938 # and the *last one added* of those would overwrite the other
939 # two. solution: have connect_wrports collate all the
940 # or-tree-reduced bitvector set/clear requests and drop them
941 # in as a single "thing". this can only be done because the
942 # set/get is an unary bitvector.
943 print ("make write-vecs", regfile, regname, wvset, wvclr)
944 return (ortreereduce_sig(wvclren), # clear (regfile write)
945 ortreereduce_sig(wvseten)) # set (issue time)
946
947 def connect_wrports(self, m, fu_bitdict, fu_selected):
948 """connect write ports
949
950 orders the write regspecs into a dict-of-dicts, by regfile,
951 by regport name, then connects all FUs that want that regport
952 by way of a PriorityPicker.
953
954 note that the write-port wen, write-port data, and go_wr_i all need to
955 be on the exact same clock cycle. as there is a combinatorial loop bug
956 at the moment, these all use sync.
957 """
958 comb, sync = m.d.comb, m.d.sync
959 fus = self.fus.fus
960 regs = self.regs
961 # dictionary of lists of regfile write ports
962 byregfiles_wr, byregfiles_wrspec = self.get_byregfiles(False)
963
964 # same for write ports.
965 # BLECH! complex code-duplication! BLECH!
966 wrpickers = {}
967 wvclrers = defaultdict(list)
968 wvseters = defaultdict(list)
969 for regfile, spec in byregfiles_wr.items():
970 fuspecs = byregfiles_wrspec[regfile]
971 wrpickers[regfile] = {}
972
973 if self.regreduce_en:
974 # argh, more port-merging
975 if regfile == 'INT':
976 fuspecs['o'] = [fuspecs.pop('o')]
977 fuspecs['o'].append(fuspecs.pop('o1'))
978 if regfile == 'FAST':
979 fuspecs['fast1'] = [fuspecs.pop('fast1')]
980 if 'fast2' in fuspecs:
981 fuspecs['fast1'].append(fuspecs.pop('fast2'))
982 if 'fast3' in fuspecs:
983 fuspecs['fast1'].append(fuspecs.pop('fast3'))
984
985 # collate these and record them by regfile because there
986 # are sometimes more write-ports per regfile
987 for (regname, fspec) in sort_fuspecs(fuspecs):
988 wvclren, wvseten = self.connect_wrport(m,
989 fu_bitdict, fu_selected,
990 wrpickers,
991 regfile, regname, fspec)
992 wvclrers[regfile.lower()].append(wvclren)
993 wvseters[regfile.lower()].append(wvseten)
994
995 if not self.make_hazard_vecs:
996 return
997
998 # for write-vectors: reduce the clr-ers and set-ers down to
999 # a single set of bits. otherwise if there are two write
1000 # ports (on some regfiles), the last one doing comb += on
1001 # the reg.wv[regfile] instance "wins" (and all others are ignored,
1002 # whoops). if there was only one write-port per wv regfile this would
1003 # not be an issue.
1004 for regfile in wvclrers.keys():
1005 wv = regs.wv[regfile]
1006 wvset = wv.s # write-vec bit-level hazard ctrl
1007 wvclr = wv.r # write-vec bit-level hazard ctrl
1008 wvclren = wvclrers[regfile]
1009 wvseten = wvseters[regfile]
1010 comb += wvclr.eq(ortreereduce_sig(wvclren)) # clear (regfile write)
1011 comb += wvset.eq(ortreereduce_sig(wvseten)) # set (issue time)
1012
1013 def get_byregfiles(self, readmode):
1014
1015 mode = "read" if readmode else "write"
1016 regs = self.regs
1017 fus = self.fus.fus
1018 e = self.ireg.e # decoded instruction to execute
1019
1020 # dictionary of dictionaries of lists/tuples of regfile ports.
1021 # first key: regfile. second key: regfile port name
1022 byregfiles = defaultdict(lambda: defaultdict(list))
1023 byregfiles_spec = defaultdict(dict)
1024
1025 for (funame, fu) in fus.items():
1026 # create in each FU a receptacle for the read/write register
1027 # hazard numbers. to be latched in connect_rd/write_ports
1028 # XXX better that this is moved into the actual FUs, but
1029 # the issue there is that this function is actually better
1030 # suited at the moment
1031 if readmode:
1032 fu.rd_latches = {} # read reg number latches
1033 fu.rf_latches = {} # read flag latches
1034 else:
1035 fu.wr_latches = {}
1036
1037 print("%s ports for %s" % (mode, funame))
1038 for idx in range(fu.n_src if readmode else fu.n_dst):
1039 # construct regfile specs: read uses inspec, write outspec
1040 (regfile, regname, wid) = fu.get_io_spec(readmode, idx)
1041 print(" %d %s %s %s" % (idx, regfile, regname, str(wid)))
1042
1043 # the PowerDecoder2 (main one, not the satellites) contains
1044 # the decoded regfile numbers. obtain these now
1045 okflag, regport = regspec_decode(readmode, e, regfile, regname)
1046
1047 # construct the dictionary of regspec information by regfile
1048 if regname not in byregfiles_spec[regfile]:
1049 byregfiles_spec[regfile][regname] = \
1050 ByRegSpec(okflag, regport, wid, [])
1051 # here we start to create "lanes"
1052 fuspec = FUSpec(funame, fu, idx)
1053 byregfiles[regfile][idx].append(fuspec)
1054 byregfiles_spec[regfile][regname].specs.append(fuspec)
1055
1056 # ok just print that all out, for convenience
1057 for regfile, spec in byregfiles.items():
1058 print("regfile %s ports:" % mode, regfile)
1059 fuspecs = byregfiles_spec[regfile]
1060 for regname, fspec in fuspecs.items():
1061 [okflag, regport, wid, fuspecs] = fspec
1062 print(" rf %s port %s lane: %s" % (mode, regfile, regname))
1063 print(" %s" % regname, wid, okflag, regport)
1064 for (funame, fu, idx) in fuspecs:
1065 fusig = fu.src_i[idx] if readmode else fu.dest[idx]
1066 print(" ", funame, fu.__class__.__name__, idx, fusig)
1067 print()
1068
1069 return byregfiles, byregfiles_spec
1070
1071 def __iter__(self):
1072 yield from self.fus.ports()
1073 yield from self.i.e.ports()
1074 yield from self.l0.ports()
1075 # TODO: regs
1076
1077 def ports(self):
1078 return list(self)
1079
1080
1081 if __name__ == '__main__':
1082 pspec = TestMemPspec(ldst_ifacetype='testpi',
1083 imem_ifacetype='',
1084 addr_wid=48,
1085 allow_overlap=True,
1086 mask_wid=8,
1087 reg_wid=64)
1088 dut = NonProductionCore(pspec)
1089 vl = rtlil.convert(dut, ports=dut.ports())
1090 with open("test_core.il", "w") as f:
1091 f.write(vl)