block picker hazard on input to PriorityPicker rather than output
[soc.git] / src / soc / simple / core.py
1 """simple core
2
3 not in any way intended for production use. connects up FunctionUnits to
4 Register Files in a brain-dead fashion that only permits one and only one
5 Function Unit to be operational.
6
7 the principle here is to take the Function Units, analyse their regspecs,
8 and turn their requirements for access to register file read/write ports
9 into groupings by Register File and Register File Port name.
10
11 under each grouping - by regfile/port - a list of Function Units that
12 need to connect to that port is created. as these are a contended
13 resource a "Broadcast Bus" per read/write port is then also created,
14 with access to it managed by a PriorityPicker.
15
16 the brain-dead part of this module is that even though there is no
17 conflict of access, regfile read/write hazards are *not* analysed,
18 and consequently it is safer to wait for the Function Unit to complete
19 before allowing a new instruction to proceed.
20 """
21
22 from nmigen import (Elaboratable, Module, Signal, ResetSignal, Cat, Mux,
23 Const)
24 from nmigen.cli import rtlil
25
26 from openpower.decoder.power_decoder2 import PowerDecodeSubset
27 from openpower.decoder.power_regspec_map import regspec_decode_read
28 from openpower.decoder.power_regspec_map import regspec_decode_write
29 from openpower.sv.svp64 import SVP64Rec
30
31 from nmutil.picker import PriorityPicker
32 from nmutil.util import treereduce
33 from nmutil.singlepipe import ControlBase
34
35 from soc.fu.compunits.compunits import AllFunctionUnits, LDSTFunctionUnit
36 from soc.regfile.regfiles import RegFiles
37 from openpower.decoder.power_decoder2 import get_rdflags
38 from soc.experiment.l0_cache import TstL0CacheBuffer # test only
39 from soc.config.test.test_loadstore import TestMemPspec
40 from openpower.decoder.power_enums import MicrOp, Function
41 from soc.simple.core_data import CoreInput, CoreOutput
42
43 from collections import defaultdict
44 import operator
45
46 from nmutil.util import rising_edge
47
48
49 # helper function for reducing a list of signals down to a parallel
50 # ORed single signal.
51 def ortreereduce(tree, attr="o_data"):
52 return treereduce(tree, operator.or_, lambda x: getattr(x, attr))
53
54
55 def ortreereduce_sig(tree):
56 return treereduce(tree, operator.or_, lambda x: x)
57
58
59 # helper function to place full regs declarations first
60 def sort_fuspecs(fuspecs):
61 res = []
62 for (regname, fspec) in fuspecs.items():
63 if regname.startswith("full"):
64 res.append((regname, fspec))
65 for (regname, fspec) in fuspecs.items():
66 if not regname.startswith("full"):
67 res.append((regname, fspec))
68 return res # enumerate(res)
69
70
71 # derive from ControlBase rather than have a separate Stage instance,
72 # this is simpler to do
73 class NonProductionCore(ControlBase):
74 def __init__(self, pspec):
75 self.pspec = pspec
76
77 # test is SVP64 is to be enabled
78 self.svp64_en = hasattr(pspec, "svp64") and (pspec.svp64 == True)
79
80 # test to see if regfile ports should be reduced
81 self.regreduce_en = (hasattr(pspec, "regreduce") and
82 (pspec.regreduce == True))
83
84 # test to see if overlapping of instructions is allowed
85 # (not normally enabled for TestIssuer FSM but useful for checking
86 # the bitvector hazard detection, before doing In-Order)
87 self.allow_overlap = (hasattr(pspec, "allow_overlap") and
88 (pspec.allow_overlap == True))
89
90 # test core type
91 self.make_hazard_vecs = True
92 self.core_type = "fsm"
93 if hasattr(pspec, "core_type"):
94 self.core_type = pspec.core_type
95
96 super().__init__(stage=self)
97
98 # single LD/ST funnel for memory access
99 self.l0 = l0 = TstL0CacheBuffer(pspec, n_units=1)
100 pi = l0.l0.dports[0]
101
102 # function units (only one each)
103 # only include mmu if enabled in pspec
104 self.fus = AllFunctionUnits(pspec, pilist=[pi])
105
106 # link LoadStore1 into MMU
107 mmu = self.fus.get_fu('mmu0')
108 print ("core pspec", pspec.ldst_ifacetype)
109 print ("core mmu", mmu)
110 if mmu is not None:
111 print ("core lsmem.lsi", l0.cmpi.lsmem.lsi)
112 mmu.alu.set_ldst_interface(l0.cmpi.lsmem.lsi)
113
114 # register files (yes plural)
115 self.regs = RegFiles(pspec, make_hazard_vecs=self.make_hazard_vecs)
116
117 # set up input and output: unusual requirement to set data directly
118 # (due to the way that the core is set up in a different domain,
119 # see TestIssuer.setup_peripherals
120 self.p.i_data, self.n.o_data = self.new_specs(None)
121 self.i, self.o = self.p.i_data, self.n.o_data
122
123 # actual internal input data used (captured)
124 self.ireg = self.ispec()
125
126 # create per-FU instruction decoders (subsetted). these "satellite"
127 # decoders reduce wire fan-out from the one (main) PowerDecoder2
128 # (used directly by the trap unit) to the *twelve* (or more)
129 # Function Units. we can either have 32 wires (the instruction)
130 # to each, or we can have well over a 200 wire fan-out (to 12
131 # ALUs). it's an easy choice to make.
132 self.decoders = {}
133 self.des = {}
134
135 for funame, fu in self.fus.fus.items():
136 f_name = fu.fnunit.name
137 fnunit = fu.fnunit.value
138 opkls = fu.opsubsetkls
139 if f_name == 'TRAP':
140 # TRAP decoder is the *main* decoder
141 self.trapunit = funame
142 continue
143 self.decoders[funame] = PowerDecodeSubset(None, opkls, f_name,
144 final=True,
145 state=self.ireg.state,
146 svp64_en=self.svp64_en,
147 regreduce_en=self.regreduce_en)
148 self.des[funame] = self.decoders[funame].do
149
150 # share the SPR decoder with the MMU if it exists
151 if "mmu0" in self.decoders:
152 self.decoders["mmu0"].mmu0_spr_dec = self.decoders["spr0"]
153
154 # next 3 functions are Stage API Compliance
155 def setup(self, m, i):
156 pass
157
158 def ispec(self):
159 return CoreInput(self.pspec, self.svp64_en, self.regreduce_en)
160
161 def ospec(self):
162 return CoreOutput()
163
164 # elaborate function to create HDL
165 def elaborate(self, platform):
166 m = super().elaborate(platform)
167
168 # for testing purposes, to cut down on build time in coriolis2
169 if hasattr(self.pspec, "nocore") and self.pspec.nocore == True:
170 x = Signal() # dummy signal
171 m.d.sync += x.eq(~x)
172 return m
173 comb = m.d.comb
174
175 m.submodules.fus = self.fus
176 m.submodules.l0 = l0 = self.l0
177 self.regs.elaborate_into(m, platform)
178 regs = self.regs
179 fus = self.fus.fus
180
181 # connect decoders
182 self.connect_satellite_decoders(m)
183
184 # ssh, cheat: trap uses the main decoder because of the rewriting
185 self.des[self.trapunit] = self.ireg.e.do
186
187 # connect up Function Units, then read/write ports, and hazard conflict
188 self.issue_conflict = Signal()
189 fu_bitdict, fu_selected = self.connect_instruction(m)
190 raw_hazard = self.connect_rdports(m, fu_bitdict, fu_selected)
191 self.connect_wrports(m, fu_bitdict, fu_selected)
192 if self.allow_overlap:
193 comb += self.issue_conflict.eq(raw_hazard)
194
195 # note if an exception happened. in a pipelined or OoO design
196 # this needs to be accompanied by "shadowing" (or stalling)
197 el = []
198 for exc in self.fus.excs.values():
199 el.append(exc.happened)
200 if len(el) > 0: # at least one exception
201 comb += self.o.exc_happened.eq(Cat(*el).bool())
202
203 return m
204
205 def connect_satellite_decoders(self, m):
206 comb = m.d.comb
207 for k, v in self.decoders.items():
208 # connect each satellite decoder and give it the instruction.
209 # as subset decoders this massively reduces wire fanout given
210 # the large number of ALUs
211 setattr(m.submodules, "dec_%s" % v.fn_name, v)
212 comb += v.dec.raw_opcode_in.eq(self.ireg.raw_insn_i)
213 comb += v.dec.bigendian.eq(self.ireg.bigendian_i)
214 # sigh due to SVP64 RA_OR_ZERO detection connect these too
215 comb += v.sv_a_nz.eq(self.ireg.sv_a_nz)
216 if self.svp64_en:
217 comb += v.pred_sm.eq(self.ireg.sv_pred_sm)
218 comb += v.pred_dm.eq(self.ireg.sv_pred_dm)
219 if k != self.trapunit:
220 comb += v.sv_rm.eq(self.ireg.sv_rm) # pass through SVP64 RM
221 comb += v.is_svp64_mode.eq(self.ireg.is_svp64_mode)
222 # only the LDST PowerDecodeSubset *actually* needs to
223 # know to use the alternative decoder. this is all
224 # a terrible hack
225 if k.lower().startswith("ldst"):
226 comb += v.use_svp64_ldst_dec.eq(
227 self.ireg.use_svp64_ldst_dec)
228
229 def connect_instruction(self, m):
230 """connect_instruction
231
232 uses decoded (from PowerOp) function unit information from CSV files
233 to ascertain which Function Unit should deal with the current
234 instruction.
235
236 some (such as OP_ATTN, OP_NOP) are dealt with here, including
237 ignoring it and halting the processor. OP_NOP is a bit annoying
238 because the issuer expects busy flag still to be raised then lowered.
239 (this requires a fake counter to be set).
240 """
241 comb, sync = m.d.comb, m.d.sync
242 fus = self.fus.fus
243
244 # indicate if core is busy
245 busy_o = self.o.busy_o
246 any_busy_o = self.o.any_busy_o
247
248 # connect up temporary copy of incoming instruction. the FSM will
249 # either blat the incoming instruction (if valid) into self.ireg
250 # or if the instruction could not be delivered, keep dropping the
251 # latched copy into ireg
252 ilatch = self.ispec()
253 self.instr_active = Signal()
254
255 # enable/busy-signals for each FU, get one bit for each FU (by name)
256 fu_enable = Signal(len(fus), reset_less=True)
257 fu_busy = Signal(len(fus), reset_less=True)
258 fu_bitdict = {}
259 fu_selected = {}
260 for i, funame in enumerate(fus.keys()):
261 fu_bitdict[funame] = fu_enable[i]
262 fu_selected[funame] = fu_busy[i]
263
264 # identify function units and create a list by fnunit so that
265 # PriorityPickers can be created for selecting one of them that
266 # isn't busy at the time the incoming instruction needs passing on
267 by_fnunit = defaultdict(list)
268 for fname, member in Function.__members__.items():
269 for funame, fu in fus.items():
270 fnunit = fu.fnunit.value
271 if member.value & fnunit: # this FU handles this type of op
272 by_fnunit[fname].append((funame, fu)) # add by Function
273
274 # ok now just print out the list of FUs by Function, because we can
275 for fname, fu_list in by_fnunit.items():
276 print ("FUs by type", fname, fu_list)
277
278 # now create a PriorityPicker per FU-type such that only one
279 # non-busy FU will be picked
280 issue_pps = {}
281 fu_found = Signal() # take a note if no Function Unit was available
282 for fname, fu_list in by_fnunit.items():
283 i_pp = PriorityPicker(len(fu_list))
284 m.submodules['i_pp_%s' % fname] = i_pp
285 i_l = []
286 for i, (funame, fu) in enumerate(fu_list):
287 # match the decoded instruction (e.do.fn_unit) against the
288 # "capability" of this FU, gate that by whether that FU is
289 # busy, and drop that into the PriorityPicker.
290 # this will give us an output of the first available *non-busy*
291 # Function Unit (Reservation Statio) capable of handling this
292 # instruction.
293 fnunit = fu.fnunit.value
294 en_req = Signal(name="issue_en_%s" % funame, reset_less=True)
295 fnmatch = (self.ireg.e.do.fn_unit & fnunit).bool()
296 comb += en_req.eq(fnmatch & ~fu.busy_o &
297 self.instr_active)
298 i_l.append(en_req) # store in list for doing the Cat-trick
299 # picker output, gated by enable: store in fu_bitdict
300 po = Signal(name="o_issue_pick_"+funame) # picker output
301 comb += po.eq(i_pp.o[i] & i_pp.en_o)
302 comb += fu_bitdict[funame].eq(po)
303 comb += fu_selected[funame].eq(fu.busy_o | po)
304 # if we don't do this, then when there are no FUs available,
305 # the "p.o_ready" signal will go back "ok we accepted this
306 # instruction" which of course isn't true.
307 with m.If(i_pp.en_o):
308 comb += fu_found.eq(1)
309 # for each input, Cat them together and drop them into the picker
310 comb += i_pp.i.eq(Cat(*i_l))
311
312 # rdmask, which is for registers needs to come from the *main* decoder
313 for funame, fu in fus.items():
314 rdmask = get_rdflags(self.ireg.e, fu)
315 comb += fu.rdmaskn.eq(~rdmask)
316
317 # sigh - need a NOP counter
318 counter = Signal(2)
319 with m.If(counter != 0):
320 sync += counter.eq(counter - 1)
321 comb += busy_o.eq(1)
322
323 # default to reading from incoming instruction: may be overridden
324 # by copy from latch when "waiting"
325 comb += self.ireg.eq(self.i)
326 # always say "ready" except if overridden
327 comb += self.p.o_ready.eq(1)
328
329 with m.FSM():
330 with m.State("READY"):
331 with m.If(self.p.i_valid): # run only when valid
332 with m.Switch(self.ireg.e.do.insn_type):
333 # check for ATTN: halt if true
334 with m.Case(MicrOp.OP_ATTN):
335 m.d.sync += self.o.core_terminate_o.eq(1)
336
337 # fake NOP - this isn't really used (Issuer detects NOP)
338 with m.Case(MicrOp.OP_NOP):
339 sync += counter.eq(2)
340 comb += busy_o.eq(1)
341
342 with m.Default():
343 comb += self.instr_active.eq(1)
344 comb += self.p.o_ready.eq(0)
345 # connect instructions. only one enabled at a time
346 for funame, fu in fus.items():
347 do = self.des[funame]
348 enable = fu_bitdict[funame]
349
350 # run this FunctionUnit if enabled route op,
351 # issue, busy, read flags and mask to FU
352 with m.If(enable):
353 # operand comes from the *local* decoder
354 comb += fu.oper_i.eq_from(do)
355 comb += fu.issue_i.eq(1) # issue when valid
356 # instruction ok, indicate ready
357 comb += self.p.o_ready.eq(1)
358
359 if self.allow_overlap:
360 with m.If(~fu_found):
361 # latch copy of instruction
362 sync += ilatch.eq(self.i)
363 comb += self.p.o_ready.eq(1) # accept
364 comb += busy_o.eq(1)
365 m.next = "WAITING"
366
367 with m.State("WAITING"):
368 comb += self.instr_active.eq(1)
369 comb += self.p.o_ready.eq(0)
370 comb += busy_o.eq(1)
371 # using copy of instruction, keep waiting until an FU is free
372 comb += self.ireg.eq(ilatch)
373 with m.If(fu_found): # wait for conflict to clear
374 # connect instructions. only one enabled at a time
375 for funame, fu in fus.items():
376 do = self.des[funame]
377 enable = fu_bitdict[funame]
378
379 # run this FunctionUnit if enabled route op,
380 # issue, busy, read flags and mask to FU
381 with m.If(enable):
382 # operand comes from the *local* decoder
383 comb += fu.oper_i.eq_from(do)
384 comb += fu.issue_i.eq(1) # issue when valid
385 comb += self.p.o_ready.eq(1)
386 comb += busy_o.eq(0)
387 m.next = "READY"
388
389 print ("core: overlap allowed", self.allow_overlap)
390 busys = map(lambda fu: fu.busy_o, fus.values())
391 comb += any_busy_o.eq(Cat(*busys).bool())
392 if not self.allow_overlap:
393 # for simple non-overlap, if any instruction is busy, set
394 # busy output for core.
395 comb += busy_o.eq(any_busy_o)
396 else:
397 # sigh deal with a fun situation that needs to be investigated
398 # and resolved
399 with m.If(self.issue_conflict):
400 comb += busy_o.eq(1)
401
402 # return both the function unit "enable" dict as well as the "busy".
403 # the "busy-or-issued" can be passed in to the Read/Write port
404 # connecters to give them permission to request access to regfiles
405 return fu_bitdict, fu_selected
406
407 def connect_rdport(self, m, fu_bitdict, fu_selected,
408 rdpickers, regfile, regname, fspec):
409 comb, sync = m.d.comb, m.d.sync
410 fus = self.fus.fus
411 regs = self.regs
412
413 rpidx = regname
414
415 # select the required read port. these are pre-defined sizes
416 rfile = regs.rf[regfile.lower()]
417 rport = rfile.r_ports[rpidx]
418 print("read regfile", rpidx, regfile, regs.rf.keys(),
419 rfile, rfile.unary)
420
421 # for checking if the read port has an outstanding write
422 if self.make_hazard_vecs:
423 wv = regs.wv[regfile.lower()]
424 wvchk = wv.r_ports["issue"] # write-vec bit-level hazard check
425
426 # if a hazard is detected on this read port, simply blithely block
427 # every FU from reading on it. this is complete overkill but very
428 # simple for now.
429 hazard_detected = Signal(name="raw_%s_%s" % (regfile, rpidx))
430
431 fspecs = fspec
432 if not isinstance(fspecs, list):
433 fspecs = [fspecs]
434
435 rdflags = []
436 pplen = 0
437 ppoffs = []
438 for i, fspec in enumerate(fspecs):
439 # get the regfile specs for this regfile port
440 (rf, wf, read, write, wid, fuspec) = fspec
441 print ("fpsec", i, fspec, len(fuspec))
442 ppoffs.append(pplen) # record offset for picker
443 pplen += len(fuspec)
444 name = "rdflag_%s_%s_%d" % (regfile, regname, i)
445 rdflag = Signal(name=name, reset_less=True)
446 comb += rdflag.eq(rf)
447 rdflags.append(rdflag)
448
449 print ("pplen", pplen)
450
451 # create a priority picker to manage this port
452 rdpickers[regfile][rpidx] = rdpick = PriorityPicker(pplen)
453 setattr(m.submodules, "rdpick_%s_%s" % (regfile, rpidx), rdpick)
454
455 rens = []
456 addrs = []
457 wvens = []
458
459 for i, fspec in enumerate(fspecs):
460 (rf, wf, _read, _write, wid, fuspec) = fspec
461 # connect up the FU req/go signals, and the reg-read to the FU
462 # and create a Read Broadcast Bus
463 for pi, (funame, fu, idx) in enumerate(fuspec):
464 pi += ppoffs[i]
465 name = "%s_%s_%s_%i" % (regfile, rpidx, funame, pi)
466 fu_active = fu_selected[funame]
467 fu_issued = fu_bitdict[funame]
468
469 # get (or set up) a latched copy of read register number
470 rname = "%s_%s_%s_%d" % (funame, regfile, regname, pi)
471 read = Signal.like(_read, name="read_"+name)
472 if rname not in fu.rd_latches:
473 rdl = Signal.like(_read, name="rdlatch_"+rname)
474 fu.rd_latches[rname] = rdl
475 with m.If(fu.issue_i):
476 sync += rdl.eq(_read)
477 else:
478 rdl = fu.rd_latches[rname]
479 # latch to make the read immediately available on issue cycle
480 # after the read cycle, use the latched copy
481 with m.If(fu.issue_i):
482 comb += read.eq(_read)
483 with m.Else():
484 comb += read.eq(rdl)
485
486 # connect request-read to picker input, and output to go-rd
487 addr_en = Signal.like(read, name="addr_en_"+name)
488 pick = Signal(name="pick_"+name) # picker input
489 rp = Signal(name="rp_"+name) # picker output
490 delay_pick = Signal(name="dp_"+name) # read-enable "underway"
491
492 # exclude any currently-enabled read-request (mask out active)
493 comb += pick.eq(fu.rd_rel_o[idx] & fu_active & rdflags[i] &
494 ~delay_pick & ~hazard_detected)
495 # entirely block anything hazarded from being picked
496 comb += rdpick.i[pi].eq(pick)
497 comb += fu.go_rd_i[idx].eq(delay_pick) # pass in *delayed* pick
498
499 # if picked, select read-port "reg select" number to port
500 comb += rp.eq(rdpick.o[pi] & rdpick.en_o)
501 sync += delay_pick.eq(rp) # delayed "pick"
502 comb += addr_en.eq(Mux(rp, read, 0))
503
504 # the read-enable happens combinatorially (see mux-bus below)
505 # but it results in the data coming out on a one-cycle delay.
506 if rfile.unary:
507 rens.append(addr_en)
508 else:
509 addrs.append(addr_en)
510 rens.append(rp)
511
512 # use the *delayed* pick signal to put requested data onto bus
513 with m.If(delay_pick):
514 # connect regfile port to input, creating fan-out Bus
515 src = fu.src_i[idx]
516 print("reg connect widths",
517 regfile, regname, pi, funame,
518 src.shape(), rport.o_data.shape())
519 # all FUs connect to same port
520 comb += src.eq(rport.o_data)
521
522 if not self.make_hazard_vecs:
523 continue
524
525 # read the write-hazard bitvector (wv) for any bit that is
526 wvchk_en = Signal(len(wvchk.ren), name="wv_chk_addr_en_"+name)
527 issue_active = Signal(name="rd_iactive_"+name)
528 # XXX combinatorial loop here
529 comb += issue_active.eq(fu_active & rf)
530 with m.If(issue_active):
531 if rfile.unary:
532 comb += wvchk_en.eq(read)
533 else:
534 comb += wvchk_en.eq(1<<read)
535 # if FU is busy (which doesn't get set at the same time as
536 # issue) and no hazard was detected, clear wvchk_en (i.e.
537 # stop checking for hazards)
538 with m.If(fu.busy_o & ~hazard_detected):
539 comb += wvchk_en.eq(0)
540
541 wvens.append(wvchk_en)
542
543 # or-reduce the muxed read signals
544 if rfile.unary:
545 # for unary-addressed
546 comb += rport.ren.eq(ortreereduce_sig(rens))
547 else:
548 # for binary-addressed
549 comb += rport.addr.eq(ortreereduce_sig(addrs))
550 comb += rport.ren.eq(Cat(*rens).bool())
551 print ("binary", regfile, rpidx, rport, rport.ren, rens, addrs)
552
553 if not self.make_hazard_vecs:
554 return Const(0) # declare "no hazards"
555
556 # enable the read bitvectors for this issued instruction
557 # and return whether any write-hazard bit is set
558 comb += wvchk.ren.eq(ortreereduce_sig(wvens))
559 comb += hazard_detected.eq(wvchk.o_data.bool())
560 return hazard_detected
561
562 def connect_rdports(self, m, fu_bitdict, fu_selected):
563 """connect read ports
564
565 orders the read regspecs into a dict-of-dicts, by regfile, by
566 regport name, then connects all FUs that want that regport by
567 way of a PriorityPicker.
568 """
569 comb, sync = m.d.comb, m.d.sync
570 fus = self.fus.fus
571 regs = self.regs
572 rd_hazard = []
573
574 # dictionary of lists of regfile read ports
575 byregfiles_rd, byregfiles_rdspec = self.get_byregfiles(True)
576
577 # okaay, now we need a PriorityPicker per regfile per regfile port
578 # loootta pickers... peter piper picked a pack of pickled peppers...
579 rdpickers = {}
580 for regfile, spec in byregfiles_rd.items():
581 fuspecs = byregfiles_rdspec[regfile]
582 rdpickers[regfile] = {}
583
584 # argh. an experiment to merge RA and RB in the INT regfile
585 # (we have too many read/write ports)
586 if self.regreduce_en:
587 if regfile == 'INT':
588 fuspecs['rabc'] = [fuspecs.pop('rb')]
589 fuspecs['rabc'].append(fuspecs.pop('rc'))
590 fuspecs['rabc'].append(fuspecs.pop('ra'))
591 if regfile == 'FAST':
592 fuspecs['fast1'] = [fuspecs.pop('fast1')]
593 if 'fast2' in fuspecs:
594 fuspecs['fast1'].append(fuspecs.pop('fast2'))
595 if 'fast3' in fuspecs:
596 fuspecs['fast1'].append(fuspecs.pop('fast3'))
597
598 # for each named regfile port, connect up all FUs to that port
599 # also return (and collate) hazard detection)
600 for (regname, fspec) in sort_fuspecs(fuspecs):
601 print("connect rd", regname, fspec)
602 rh = self.connect_rdport(m, fu_bitdict, fu_selected,
603 rdpickers, regfile,
604 regname, fspec)
605 rd_hazard.append(rh)
606
607 return Cat(*rd_hazard).bool()
608
609 def make_hazards(self, m, regfile, rfile, wvclr, wvset,
610 funame, regname, idx,
611 addr_en, wp, fu, fu_active, wrflag, write,
612 fu_wrok):
613 """make_hazards: a setter and a clearer for the regfile write ports
614
615 setter is at issue time (using PowerDecoder2 regfile write numbers)
616 clearer is at regfile write time (when FU has said what to write to)
617
618 there is *one* unusual case here which has to be dealt with:
619 when the Function Unit does *NOT* request a write to the regfile
620 (has its data.ok bit CLEARED). this is perfectly legitimate.
621 and a royal pain.
622 """
623 comb, sync = m.d.comb, m.d.sync
624 name = "%s_%s_%d" % (funame, regname, idx)
625
626 # connect up the bitvector write hazard. unlike the
627 # regfile writeports, a ONE must be written to the corresponding
628 # bit of the hazard bitvector (to indicate the existence of
629 # the hazard)
630
631 # the detection of what shall be written to is based
632 # on *issue*
633 print ("write vector (for regread)", regfile, wvset)
634 wviaddr_en = Signal(len(wvset.wen), name="wv_issue_addr_en_"+name)
635 issue_active = Signal(name="iactive_"+name)
636 comb += issue_active.eq(fu.issue_i & fu_active & wrflag)
637 with m.If(issue_active):
638 if rfile.unary:
639 comb += wviaddr_en.eq(write)
640 else:
641 comb += wviaddr_en.eq(1<<write)
642
643 # deal with write vector clear: this kicks in when the regfile
644 # is written to, and clears the corresponding bitvector entry
645 print ("write vector", regfile, wvclr)
646 wvaddr_en = Signal(len(wvclr.wen), name="wvaddr_en_"+name)
647 if rfile.unary:
648 comb += wvaddr_en.eq(addr_en)
649 else:
650 with m.If(wp):
651 comb += wvaddr_en.eq(1<<addr_en)
652
653 # XXX ASSUME that LDSTFunctionUnit always sets the data it intends to
654 # this may NOT be the case when an exception occurs
655 if isinstance(fu, LDSTFunctionUnit):
656 return wvaddr_en, wviaddr_en
657
658 # okaaay, this is preparation for the awkward case.
659 # * latch a copy of wrflag when issue goes high.
660 # * when the fu_wrok (data.ok) flag is NOT set,
661 # but the FU is done, the FU is NEVER going to write
662 # so the bitvector has to be cleared.
663 latch_wrflag = Signal(name="latch_wrflag_"+name)
664 with m.If(~fu.busy_o):
665 sync += latch_wrflag.eq(0)
666 with m.If(fu.issue_i & fu_active):
667 sync += latch_wrflag.eq(wrflag)
668 with m.If(fu.alu_done_o & latch_wrflag & ~fu_wrok):
669 if rfile.unary:
670 comb += wvaddr_en.eq(write) # addr_en gated with wp, don't use
671 else:
672 comb += wvaddr_en.eq(1<<addr_en) # binary addr_en not gated
673
674 return wvaddr_en, wviaddr_en
675
676 def connect_wrport(self, m, fu_bitdict, fu_selected,
677 wrpickers, regfile, regname, fspec):
678 comb, sync = m.d.comb, m.d.sync
679 fus = self.fus.fus
680 regs = self.regs
681
682 rpidx = regname
683
684 # select the required write port. these are pre-defined sizes
685 rfile = regs.rf[regfile.lower()]
686 wport = rfile.w_ports[rpidx]
687
688 print("connect wr", regname, "unary", rfile.unary, fspec)
689 print(regfile, regs.rf.keys())
690
691 # select the write-protection hazard vector. note that this still
692 # requires to WRITE to the hazard bitvector! read-requests need
693 # to RAISE the bitvector (set it to 1), which, duh, requires a WRITE
694 if self.make_hazard_vecs:
695 wv = regs.wv[regfile.lower()]
696 wvset = wv.w_ports["set"] # write-vec bit-level hazard ctrl
697 wvclr = wv.w_ports["clr"] # write-vec bit-level hazard ctrl
698
699 fspecs = fspec
700 if not isinstance(fspecs, list):
701 fspecs = [fspecs]
702
703 pplen = 0
704 writes = []
705 ppoffs = []
706 rdflags = []
707 wrflags = []
708 for i, fspec in enumerate(fspecs):
709 # get the regfile specs for this regfile port
710 (rf, wf, read, write, wid, fuspec) = fspec
711 print ("fpsec", i, "wrflag", wf, fspec, len(fuspec))
712 ppoffs.append(pplen) # record offset for picker
713 pplen += len(fuspec)
714
715 name = "%s_%s_%d" % (regfile, regname, i)
716 rdflag = Signal(name="rd_flag_"+name)
717 wrflag = Signal(name="wr_flag_"+name)
718 if rf is not None:
719 comb += rdflag.eq(rf)
720 else:
721 comb += rdflag.eq(0)
722 if wf is not None:
723 comb += wrflag.eq(wf)
724 else:
725 comb += wrflag.eq(0)
726 rdflags.append(rdflag)
727 wrflags.append(wrflag)
728
729 # create a priority picker to manage this port
730 wrpickers[regfile][rpidx] = wrpick = PriorityPicker(pplen)
731 setattr(m.submodules, "wrpick_%s_%s" % (regfile, rpidx), wrpick)
732
733 wsigs = []
734 wens = []
735 wvsets = []
736 wvseten = []
737 wvclren = []
738 addrs = []
739 for i, fspec in enumerate(fspecs):
740 # connect up the FU req/go signals and the reg-read to the FU
741 # these are arbitrated by Data.ok signals
742 (rf, wf, read, _write, wid, fuspec) = fspec
743 for pi, (funame, fu, idx) in enumerate(fuspec):
744 pi += ppoffs[i]
745 name = "%s_%s_%s_%d" % (funame, regfile, regname, idx)
746 # get (or set up) a write-latched copy of write register number
747 write = Signal.like(_write, name="write_"+name)
748 rname = "%s_%s_%s" % (funame, regfile, regname)
749 if rname not in fu.wr_latches:
750 wrl = Signal.like(_write, name="wrlatch_"+rname)
751 fu.wr_latches[rname] = write
752 with m.If(fu.issue_i):
753 sync += wrl.eq(_write)
754 comb += write.eq(_write)
755 with m.Else():
756 comb += write.eq(wrl)
757 else:
758 write = fu.wr_latches[rname]
759
760 # write-request comes from dest.ok
761 dest = fu.get_out(idx)
762 fu_dest_latch = fu.get_fu_out(idx) # latched output
763 name = "fu_wrok_%s_%s_%d" % (funame, regname, idx)
764 fu_wrok = Signal(name=name, reset_less=True)
765 comb += fu_wrok.eq(dest.ok & fu.busy_o)
766
767 # connect request-write to picker input, and output to go-wr
768 fu_active = fu_selected[funame]
769 pick = fu.wr.rel_o[idx] & fu_active
770 comb += wrpick.i[pi].eq(pick)
771 # create a single-pulse go write from the picker output
772 wr_pick = Signal(name="wpick_%s_%s_%d" % (funame, regname, idx))
773 comb += wr_pick.eq(wrpick.o[pi] & wrpick.en_o)
774 comb += fu.go_wr_i[idx].eq(rising_edge(m, wr_pick))
775
776 # connect the regspec write "reg select" number to this port
777 # only if one FU actually requests (and is granted) the port
778 # will the write-enable be activated
779 wname = "waddr_en_%s_%s_%d" % (funame, regname, idx)
780 addr_en = Signal.like(write, name=wname)
781 wp = Signal()
782 comb += wp.eq(wr_pick & wrpick.en_o)
783 comb += addr_en.eq(Mux(wp, write, 0))
784 if rfile.unary:
785 wens.append(addr_en)
786 else:
787 addrs.append(addr_en)
788 wens.append(wp)
789
790 # connect regfile port to input
791 print("reg connect widths",
792 regfile, regname, pi, funame,
793 dest.shape(), wport.i_data.shape())
794 wsigs.append(fu_dest_latch)
795
796 # now connect up the bitvector write hazard
797 if not self.make_hazard_vecs:
798 continue
799 res = self.make_hazards(m, regfile, rfile, wvclr, wvset,
800 funame, regname, idx,
801 addr_en, wp, fu, fu_active,
802 wrflags[i], write, fu_wrok)
803 wvaddr_en, wv_issue_en = res
804 wvclren.append(wvaddr_en) # set only: no data => clear bit
805 wvseten.append(wv_issue_en) # set data same as enable
806 wvsets.append(wv_issue_en) # because enable needs a 1
807
808 # here is where we create the Write Broadcast Bus. simple, eh?
809 comb += wport.i_data.eq(ortreereduce_sig(wsigs))
810 if rfile.unary:
811 # for unary-addressed
812 comb += wport.wen.eq(ortreereduce_sig(wens))
813 else:
814 # for binary-addressed
815 comb += wport.addr.eq(ortreereduce_sig(addrs))
816 comb += wport.wen.eq(ortreereduce_sig(wens))
817
818 if not self.make_hazard_vecs:
819 return
820
821 # for write-vectors
822 comb += wvclr.wen.eq(ortreereduce_sig(wvclren)) # clear (regfile write)
823 comb += wvset.wen.eq(ortreereduce_sig(wvseten)) # set (issue time)
824 comb += wvset.i_data.eq(ortreereduce_sig(wvsets))
825
826 def connect_wrports(self, m, fu_bitdict, fu_selected):
827 """connect write ports
828
829 orders the write regspecs into a dict-of-dicts, by regfile,
830 by regport name, then connects all FUs that want that regport
831 by way of a PriorityPicker.
832
833 note that the write-port wen, write-port data, and go_wr_i all need to
834 be on the exact same clock cycle. as there is a combinatorial loop bug
835 at the moment, these all use sync.
836 """
837 comb, sync = m.d.comb, m.d.sync
838 fus = self.fus.fus
839 regs = self.regs
840 # dictionary of lists of regfile write ports
841 byregfiles_wr, byregfiles_wrspec = self.get_byregfiles(False)
842
843 # same for write ports.
844 # BLECH! complex code-duplication! BLECH!
845 wrpickers = {}
846 for regfile, spec in byregfiles_wr.items():
847 fuspecs = byregfiles_wrspec[regfile]
848 wrpickers[regfile] = {}
849
850 if self.regreduce_en:
851 # argh, more port-merging
852 if regfile == 'INT':
853 fuspecs['o'] = [fuspecs.pop('o')]
854 fuspecs['o'].append(fuspecs.pop('o1'))
855 if regfile == 'FAST':
856 fuspecs['fast1'] = [fuspecs.pop('fast1')]
857 if 'fast2' in fuspecs:
858 fuspecs['fast1'].append(fuspecs.pop('fast2'))
859 if 'fast3' in fuspecs:
860 fuspecs['fast1'].append(fuspecs.pop('fast3'))
861
862 for (regname, fspec) in sort_fuspecs(fuspecs):
863 self.connect_wrport(m, fu_bitdict, fu_selected, wrpickers,
864 regfile, regname, fspec)
865
866 def get_byregfiles(self, readmode):
867
868 mode = "read" if readmode else "write"
869 regs = self.regs
870 fus = self.fus.fus
871 e = self.ireg.e # decoded instruction to execute
872
873 # dictionary of dictionaries of lists of regfile ports.
874 # first key: regfile. second key: regfile port name
875 byregfiles = defaultdict(dict)
876 byregfiles_spec = defaultdict(dict)
877
878 for (funame, fu) in fus.items():
879 # create in each FU a receptacle for the read/write register
880 # hazard numbers. to be latched in connect_rd/write_ports
881 # XXX better that this is moved into the actual FUs, but
882 # the issue there is that this function is actually better
883 # suited at the moment
884 if readmode:
885 fu.rd_latches = {}
886 else:
887 fu.wr_latches = {}
888
889 print("%s ports for %s" % (mode, funame))
890 for idx in range(fu.n_src if readmode else fu.n_dst):
891 # construct regfile specs: read uses inspec, write outspec
892 if readmode:
893 (regfile, regname, wid) = fu.get_in_spec(idx)
894 else:
895 (regfile, regname, wid) = fu.get_out_spec(idx)
896 print(" %d %s %s %s" % (idx, regfile, regname, str(wid)))
897
898 # the PowerDecoder2 (main one, not the satellites) contains
899 # the decoded regfile numbers. obtain these now
900 if readmode:
901 rdflag, read = regspec_decode_read(e, regfile, regname)
902 wrport, write = None, None
903 else:
904 rdflag, read = None, None
905 wrport, write = regspec_decode_write(e, regfile, regname)
906
907 # construct the dictionary of regspec information by regfile
908 if regname not in byregfiles_spec[regfile]:
909 byregfiles_spec[regfile][regname] = \
910 (rdflag, wrport, read, write, wid, [])
911 # here we start to create "lanes"
912 if idx not in byregfiles[regfile]:
913 byregfiles[regfile][idx] = []
914 fuspec = (funame, fu, idx)
915 byregfiles[regfile][idx].append(fuspec)
916 byregfiles_spec[regfile][regname][5].append(fuspec)
917
918 continue
919 # append a latch Signal to the FU's list of latches
920 rname = "%s_%s" % (regfile, regname)
921 if readmode:
922 if rname not in fu.rd_latches:
923 rdl = Signal.like(read, name="rdlatch_"+rname)
924 fu.rd_latches[rname] = rdl
925 else:
926 if rname not in fu.wr_latches:
927 wrl = Signal.like(write, name="wrlatch_"+rname)
928 fu.wr_latches[rname] = wrl
929
930 # ok just print that all out, for convenience
931 for regfile, spec in byregfiles.items():
932 print("regfile %s ports:" % mode, regfile)
933 fuspecs = byregfiles_spec[regfile]
934 for regname, fspec in fuspecs.items():
935 [rdflag, wrflag, read, write, wid, fuspec] = fspec
936 print(" rf %s port %s lane: %s" % (mode, regfile, regname))
937 print(" %s" % regname, wid, read, write, rdflag, wrflag)
938 for (funame, fu, idx) in fuspec:
939 fusig = fu.src_i[idx] if readmode else fu.dest[idx]
940 print(" ", funame, fu.__class__.__name__, idx, fusig)
941 print()
942
943 return byregfiles, byregfiles_spec
944
945 def __iter__(self):
946 yield from self.fus.ports()
947 yield from self.i.e.ports()
948 yield from self.l0.ports()
949 # TODO: regs
950
951 def ports(self):
952 return list(self)
953
954
955 if __name__ == '__main__':
956 pspec = TestMemPspec(ldst_ifacetype='testpi',
957 imem_ifacetype='',
958 addr_wid=48,
959 mask_wid=8,
960 reg_wid=64)
961 dut = NonProductionCore(pspec)
962 vl = rtlil.convert(dut, ports=dut.ports())
963 with open("test_core.il", "w") as f:
964 f.write(vl)