local variable rename in FetchFSM
[soc.git] / src / soc / simple / core.py
1 """simple core
2
3 not in any way intended for production use. connects up FunctionUnits to
4 Register Files in a brain-dead fashion that only permits one and only one
5 Function Unit to be operational.
6
7 the principle here is to take the Function Units, analyse their regspecs,
8 and turn their requirements for access to register file read/write ports
9 into groupings by Register File and Register File Port name.
10
11 under each grouping - by regfile/port - a list of Function Units that
12 need to connect to that port is created. as these are a contended
13 resource a "Broadcast Bus" per read/write port is then also created,
14 with access to it managed by a PriorityPicker.
15
16 the brain-dead part of this module is that even though there is no
17 conflict of access, regfile read/write hazards are *not* analysed,
18 and consequently it is safer to wait for the Function Unit to complete
19 before allowing a new instruction to proceed.
20 """
21
22 from nmigen import (Elaboratable, Module, Signal, ResetSignal, Cat, Mux,
23 Const)
24 from nmigen.cli import rtlil
25
26 from openpower.decoder.power_decoder2 import PowerDecodeSubset
27 from openpower.decoder.power_regspec_map import regspec_decode_read
28 from openpower.decoder.power_regspec_map import regspec_decode_write
29 from openpower.sv.svp64 import SVP64Rec
30
31 from nmutil.picker import PriorityPicker
32 from nmutil.util import treereduce
33 from nmutil.singlepipe import ControlBase
34
35 from soc.fu.compunits.compunits import AllFunctionUnits, LDSTFunctionUnit
36 from soc.regfile.regfiles import RegFiles
37 from openpower.decoder.power_decoder2 import get_rdflags
38 from soc.experiment.l0_cache import TstL0CacheBuffer # test only
39 from soc.config.test.test_loadstore import TestMemPspec
40 from openpower.decoder.power_enums import MicrOp, Function
41 from soc.simple.core_data import CoreInput, CoreOutput
42
43 from collections import defaultdict
44 import operator
45
46 from nmutil.util import rising_edge
47
48
49 # helper function for reducing a list of signals down to a parallel
50 # ORed single signal.
51 def ortreereduce(tree, attr="o_data"):
52 return treereduce(tree, operator.or_, lambda x: getattr(x, attr))
53
54
55 def ortreereduce_sig(tree):
56 return treereduce(tree, operator.or_, lambda x: x)
57
58
59 # helper function to place full regs declarations first
60 def sort_fuspecs(fuspecs):
61 res = []
62 for (regname, fspec) in fuspecs.items():
63 if regname.startswith("full"):
64 res.append((regname, fspec))
65 for (regname, fspec) in fuspecs.items():
66 if not regname.startswith("full"):
67 res.append((regname, fspec))
68 return res # enumerate(res)
69
70
71 # derive from ControlBase rather than have a separate Stage instance,
72 # this is simpler to do
73 class NonProductionCore(ControlBase):
74 def __init__(self, pspec):
75 self.pspec = pspec
76
77 # test is SVP64 is to be enabled
78 self.svp64_en = hasattr(pspec, "svp64") and (pspec.svp64 == True)
79
80 # test to see if regfile ports should be reduced
81 self.regreduce_en = (hasattr(pspec, "regreduce") and
82 (pspec.regreduce == True))
83
84 # test to see if overlapping of instructions is allowed
85 # (not normally enabled for TestIssuer FSM but useful for checking
86 # the bitvector hazard detection, before doing In-Order)
87 self.allow_overlap = (hasattr(pspec, "allow_overlap") and
88 (pspec.allow_overlap == True))
89
90 # test core type
91 self.make_hazard_vecs = True
92 self.core_type = "fsm"
93 if hasattr(pspec, "core_type"):
94 self.core_type = pspec.core_type
95
96 super().__init__(stage=self)
97
98 # single LD/ST funnel for memory access
99 self.l0 = l0 = TstL0CacheBuffer(pspec, n_units=1)
100 pi = l0.l0.dports[0]
101
102 # function units (only one each)
103 # only include mmu if enabled in pspec
104 self.fus = AllFunctionUnits(pspec, pilist=[pi])
105
106 # link LoadStore1 into MMU
107 mmu = self.fus.get_fu('mmu0')
108 print ("core pspec", pspec.ldst_ifacetype)
109 print ("core mmu", mmu)
110 if mmu is not None:
111 print ("core lsmem.lsi", l0.cmpi.lsmem.lsi)
112 mmu.alu.set_ldst_interface(l0.cmpi.lsmem.lsi)
113
114 # register files (yes plural)
115 self.regs = RegFiles(pspec, make_hazard_vecs=self.make_hazard_vecs)
116
117 # set up input and output: unusual requirement to set data directly
118 # (due to the way that the core is set up in a different domain,
119 # see TestIssuer.setup_peripherals
120 self.p.i_data, self.n.o_data = self.new_specs(None)
121 self.i, self.o = self.p.i_data, self.n.o_data
122
123 # actual internal input data used (captured)
124 self.ireg = self.ispec()
125
126 # create per-FU instruction decoders (subsetted). these "satellite"
127 # decoders reduce wire fan-out from the one (main) PowerDecoder2
128 # (used directly by the trap unit) to the *twelve* (or more)
129 # Function Units. we can either have 32 wires (the instruction)
130 # to each, or we can have well over a 200 wire fan-out (to 12
131 # ALUs). it's an easy choice to make.
132 self.decoders = {}
133 self.des = {}
134
135 for funame, fu in self.fus.fus.items():
136 f_name = fu.fnunit.name
137 fnunit = fu.fnunit.value
138 opkls = fu.opsubsetkls
139 if f_name == 'TRAP':
140 # TRAP decoder is the *main* decoder
141 self.trapunit = funame
142 continue
143 self.decoders[funame] = PowerDecodeSubset(None, opkls, f_name,
144 final=True,
145 state=self.ireg.state,
146 svp64_en=self.svp64_en,
147 regreduce_en=self.regreduce_en)
148 self.des[funame] = self.decoders[funame].do
149
150 # share the SPR decoder with the MMU if it exists
151 if "mmu0" in self.decoders:
152 self.decoders["mmu0"].mmu0_spr_dec = self.decoders["spr0"]
153
154 # next 3 functions are Stage API Compliance
155 def setup(self, m, i):
156 pass
157
158 def ispec(self):
159 return CoreInput(self.pspec, self.svp64_en, self.regreduce_en)
160
161 def ospec(self):
162 return CoreOutput()
163
164 # elaborate function to create HDL
165 def elaborate(self, platform):
166 m = super().elaborate(platform)
167
168 # for testing purposes, to cut down on build time in coriolis2
169 if hasattr(self.pspec, "nocore") and self.pspec.nocore == True:
170 x = Signal() # dummy signal
171 m.d.sync += x.eq(~x)
172 return m
173 comb = m.d.comb
174
175 m.submodules.fus = self.fus
176 m.submodules.l0 = l0 = self.l0
177 self.regs.elaborate_into(m, platform)
178 regs = self.regs
179 fus = self.fus.fus
180
181 # connect decoders
182 self.connect_satellite_decoders(m)
183
184 # ssh, cheat: trap uses the main decoder because of the rewriting
185 self.des[self.trapunit] = self.ireg.e.do
186
187 # connect up Function Units, then read/write ports, and hazard conflict
188 self.issue_conflict = Signal()
189 fu_bitdict, fu_selected = self.connect_instruction(m)
190 raw_hazard = self.connect_rdports(m, fu_bitdict, fu_selected)
191 self.connect_wrports(m, fu_bitdict, fu_selected)
192 if self.allow_overlap:
193 comb += self.issue_conflict.eq(raw_hazard)
194
195 # note if an exception happened. in a pipelined or OoO design
196 # this needs to be accompanied by "shadowing" (or stalling)
197 el = []
198 for exc in self.fus.excs.values():
199 el.append(exc.happened)
200 if len(el) > 0: # at least one exception
201 comb += self.o.exc_happened.eq(Cat(*el).bool())
202
203 return m
204
205 def connect_satellite_decoders(self, m):
206 comb = m.d.comb
207 for k, v in self.decoders.items():
208 # connect each satellite decoder and give it the instruction.
209 # as subset decoders this massively reduces wire fanout given
210 # the large number of ALUs
211 setattr(m.submodules, "dec_%s" % v.fn_name, v)
212 comb += v.dec.raw_opcode_in.eq(self.ireg.raw_insn_i)
213 comb += v.dec.bigendian.eq(self.ireg.bigendian_i)
214 # sigh due to SVP64 RA_OR_ZERO detection connect these too
215 comb += v.sv_a_nz.eq(self.ireg.sv_a_nz)
216 if self.svp64_en:
217 comb += v.pred_sm.eq(self.ireg.sv_pred_sm)
218 comb += v.pred_dm.eq(self.ireg.sv_pred_dm)
219 if k != self.trapunit:
220 comb += v.sv_rm.eq(self.ireg.sv_rm) # pass through SVP64 RM
221 comb += v.is_svp64_mode.eq(self.ireg.is_svp64_mode)
222 # only the LDST PowerDecodeSubset *actually* needs to
223 # know to use the alternative decoder. this is all
224 # a terrible hack
225 if k.lower().startswith("ldst"):
226 comb += v.use_svp64_ldst_dec.eq(
227 self.ireg.use_svp64_ldst_dec)
228
229 def connect_instruction(self, m):
230 """connect_instruction
231
232 uses decoded (from PowerOp) function unit information from CSV files
233 to ascertain which Function Unit should deal with the current
234 instruction.
235
236 some (such as OP_ATTN, OP_NOP) are dealt with here, including
237 ignoring it and halting the processor. OP_NOP is a bit annoying
238 because the issuer expects busy flag still to be raised then lowered.
239 (this requires a fake counter to be set).
240 """
241 comb, sync = m.d.comb, m.d.sync
242 fus = self.fus.fus
243
244 # indicate if core is busy
245 busy_o = self.o.busy_o
246 any_busy_o = self.o.any_busy_o
247
248 # connect up temporary copy of incoming instruction. the FSM will
249 # either blat the incoming instruction (if valid) into self.ireg
250 # or if the instruction could not be delivered, keep dropping the
251 # latched copy into ireg
252 ilatch = self.ispec()
253 self.instr_active = Signal()
254
255 # enable/busy-signals for each FU, get one bit for each FU (by name)
256 fu_enable = Signal(len(fus), reset_less=True)
257 fu_busy = Signal(len(fus), reset_less=True)
258 fu_bitdict = {}
259 fu_selected = {}
260 for i, funame in enumerate(fus.keys()):
261 fu_bitdict[funame] = fu_enable[i]
262 fu_selected[funame] = fu_busy[i]
263
264 # identify function units and create a list by fnunit so that
265 # PriorityPickers can be created for selecting one of them that
266 # isn't busy at the time the incoming instruction needs passing on
267 by_fnunit = defaultdict(list)
268 for fname, member in Function.__members__.items():
269 for funame, fu in fus.items():
270 fnunit = fu.fnunit.value
271 if member.value & fnunit: # this FU handles this type of op
272 by_fnunit[fname].append((funame, fu)) # add by Function
273
274 # ok now just print out the list of FUs by Function, because we can
275 for fname, fu_list in by_fnunit.items():
276 print ("FUs by type", fname, fu_list)
277
278 # now create a PriorityPicker per FU-type such that only one
279 # non-busy FU will be picked
280 issue_pps = {}
281 fu_found = Signal() # take a note if no Function Unit was available
282 for fname, fu_list in by_fnunit.items():
283 i_pp = PriorityPicker(len(fu_list))
284 m.submodules['i_pp_%s' % fname] = i_pp
285 i_l = []
286 for i, (funame, fu) in enumerate(fu_list):
287 # match the decoded instruction (e.do.fn_unit) against the
288 # "capability" of this FU, gate that by whether that FU is
289 # busy, and drop that into the PriorityPicker.
290 # this will give us an output of the first available *non-busy*
291 # Function Unit (Reservation Statio) capable of handling this
292 # instruction.
293 fnunit = fu.fnunit.value
294 en_req = Signal(name="issue_en_%s" % funame, reset_less=True)
295 fnmatch = (self.ireg.e.do.fn_unit & fnunit).bool()
296 comb += en_req.eq(fnmatch & ~fu.busy_o &
297 self.instr_active)
298 i_l.append(en_req) # store in list for doing the Cat-trick
299 # picker output, gated by enable: store in fu_bitdict
300 po = Signal(name="o_issue_pick_"+funame) # picker output
301 comb += po.eq(i_pp.o[i] & i_pp.en_o)
302 comb += fu_bitdict[funame].eq(po)
303 comb += fu_selected[funame].eq(fu.busy_o | po)
304 # if we don't do this, then when there are no FUs available,
305 # the "p.o_ready" signal will go back "ok we accepted this
306 # instruction" which of course isn't true.
307 with m.If(i_pp.en_o):
308 comb += fu_found.eq(1)
309 # for each input, Cat them together and drop them into the picker
310 comb += i_pp.i.eq(Cat(*i_l))
311
312 # rdmask, which is for registers needs to come from the *main* decoder
313 for funame, fu in fus.items():
314 rdmask = get_rdflags(self.ireg.e, fu)
315 comb += fu.rdmaskn.eq(~rdmask)
316
317 # sigh - need a NOP counter
318 counter = Signal(2)
319 with m.If(counter != 0):
320 sync += counter.eq(counter - 1)
321 comb += busy_o.eq(1)
322
323 # default to reading from incoming instruction: may be overridden
324 # by copy from latch when "waiting"
325 comb += self.ireg.eq(self.i)
326 # always say "ready" except if overridden
327 comb += self.p.o_ready.eq(1)
328
329 with m.FSM():
330 with m.State("READY"):
331 with m.If(self.p.i_valid): # run only when valid
332 with m.Switch(self.ireg.e.do.insn_type):
333 # check for ATTN: halt if true
334 with m.Case(MicrOp.OP_ATTN):
335 m.d.sync += self.o.core_terminate_o.eq(1)
336
337 # fake NOP - this isn't really used (Issuer detects NOP)
338 with m.Case(MicrOp.OP_NOP):
339 sync += counter.eq(2)
340 comb += busy_o.eq(1)
341
342 with m.Default():
343 comb += self.instr_active.eq(1)
344 comb += self.p.o_ready.eq(0)
345 # connect instructions. only one enabled at a time
346 for funame, fu in fus.items():
347 do = self.des[funame]
348 enable = fu_bitdict[funame]
349
350 # run this FunctionUnit if enabled route op,
351 # issue, busy, read flags and mask to FU
352 with m.If(enable):
353 # operand comes from the *local* decoder
354 comb += fu.oper_i.eq_from(do)
355 comb += fu.issue_i.eq(1) # issue when valid
356 # instruction ok, indicate ready
357 comb += self.p.o_ready.eq(1)
358
359 if self.allow_overlap:
360 with m.If(~fu_found):
361 # latch copy of instruction
362 sync += ilatch.eq(self.i)
363 comb += self.p.o_ready.eq(1) # accept
364 comb += busy_o.eq(1)
365 m.next = "WAITING"
366
367 with m.State("WAITING"):
368 comb += self.instr_active.eq(1)
369 comb += self.p.o_ready.eq(0)
370 comb += busy_o.eq(1)
371 # using copy of instruction, keep waiting until an FU is free
372 comb += self.ireg.eq(ilatch)
373 with m.If(fu_found): # wait for conflict to clear
374 # connect instructions. only one enabled at a time
375 for funame, fu in fus.items():
376 do = self.des[funame]
377 enable = fu_bitdict[funame]
378
379 # run this FunctionUnit if enabled route op,
380 # issue, busy, read flags and mask to FU
381 with m.If(enable):
382 # operand comes from the *local* decoder
383 comb += fu.oper_i.eq_from(do)
384 comb += fu.issue_i.eq(1) # issue when valid
385 comb += self.p.o_ready.eq(1)
386 comb += busy_o.eq(0)
387 m.next = "READY"
388
389 print ("core: overlap allowed", self.allow_overlap)
390 busys = map(lambda fu: fu.busy_o, fus.values())
391 comb += any_busy_o.eq(Cat(*busys).bool())
392 if not self.allow_overlap:
393 # for simple non-overlap, if any instruction is busy, set
394 # busy output for core.
395 comb += busy_o.eq(any_busy_o)
396 else:
397 # sigh deal with a fun situation that needs to be investigated
398 # and resolved
399 with m.If(self.issue_conflict):
400 comb += busy_o.eq(1)
401
402 # return both the function unit "enable" dict as well as the "busy".
403 # the "busy-or-issued" can be passed in to the Read/Write port
404 # connecters to give them permission to request access to regfiles
405 return fu_bitdict, fu_selected
406
407 def connect_rdport(self, m, fu_bitdict, fu_selected,
408 rdpickers, regfile, regname, fspec):
409 comb, sync = m.d.comb, m.d.sync
410 fus = self.fus.fus
411 regs = self.regs
412
413 rpidx = regname
414
415 # select the required read port. these are pre-defined sizes
416 rfile = regs.rf[regfile.lower()]
417 rport = rfile.r_ports[rpidx]
418 print("read regfile", rpidx, regfile, regs.rf.keys(),
419 rfile, rfile.unary)
420
421 # for checking if the read port has an outstanding write
422 if self.make_hazard_vecs:
423 wv = regs.wv[regfile.lower()]
424 wvchk = wv.r_ports["issue"] # write-vec bit-level hazard check
425
426 # if a hazard is detected on this read port, simply blithely block
427 # every FU from reading on it. this is complete overkill but very
428 # simple for now.
429 hazard_detected = Signal(name="raw_%s_%s" % (regfile, rpidx))
430
431 fspecs = fspec
432 if not isinstance(fspecs, list):
433 fspecs = [fspecs]
434
435 rdflags = []
436 pplen = 0
437 ppoffs = []
438 for i, fspec in enumerate(fspecs):
439 # get the regfile specs for this regfile port
440 (rf, wf, read, write, wid, fuspec) = fspec
441 print ("fpsec", i, fspec, len(fuspec))
442 ppoffs.append(pplen) # record offset for picker
443 pplen += len(fuspec)
444 name = "rdflag_%s_%s_%d" % (regfile, regname, i)
445 rdflag = Signal(name=name, reset_less=True)
446 comb += rdflag.eq(rf)
447 rdflags.append(rdflag)
448
449 print ("pplen", pplen)
450
451 # create a priority picker to manage this port
452 rdpickers[regfile][rpidx] = rdpick = PriorityPicker(pplen)
453 setattr(m.submodules, "rdpick_%s_%s" % (regfile, rpidx), rdpick)
454
455 rens = []
456 addrs = []
457 wvens = []
458
459 for i, fspec in enumerate(fspecs):
460 (rf, wf, _read, _write, wid, fuspec) = fspec
461 # connect up the FU req/go signals, and the reg-read to the FU
462 # and create a Read Broadcast Bus
463 for pi, (funame, fu, idx) in enumerate(fuspec):
464 pi += ppoffs[i]
465 name = "%s_%s_%s_%i" % (regfile, rpidx, funame, pi)
466 fu_active = fu_selected[funame]
467 fu_issued = fu_bitdict[funame]
468
469 # get (or set up) a latched copy of read register number
470 rname = "%s_%s_%s_%d" % (funame, regfile, regname, pi)
471 read = Signal.like(_read, name="read_"+name)
472 if rname not in fu.rd_latches:
473 rdl = Signal.like(_read, name="rdlatch_"+rname)
474 fu.rd_latches[rname] = rdl
475 with m.If(fu.issue_i):
476 sync += rdl.eq(_read)
477 else:
478 rdl = fu.rd_latches[rname]
479 # latch to make the read immediately available on issue cycle
480 # after the read cycle, use the latched copy
481 with m.If(fu.issue_i):
482 comb += read.eq(_read)
483 with m.Else():
484 comb += read.eq(rdl)
485
486 # connect request-read to picker input, and output to go-rd
487 addr_en = Signal.like(read, name="addr_en_"+name)
488 pick = Signal(name="pick_"+name) # picker input
489 rp = Signal(name="rp_"+name) # picker output
490 delay_pick = Signal(name="dp_"+name) # read-enable "underway"
491 rhazard = Signal(name="rhaz_"+name)
492
493 # exclude any currently-enabled read-request (mask out active)
494 comb += pick.eq(fu.rd_rel_o[idx] & fu_active & rdflags[i] &
495 ~delay_pick & ~rhazard)
496 # entirely block anything hazarded from being picked
497 comb += rdpick.i[pi].eq(pick)
498 comb += fu.go_rd_i[idx].eq(delay_pick) # pass in *delayed* pick
499
500 # if picked, select read-port "reg select" number to port
501 comb += rp.eq(rdpick.o[pi] & rdpick.en_o)
502 sync += delay_pick.eq(rp) # delayed "pick"
503 comb += addr_en.eq(Mux(rp, read, 0))
504
505 # the read-enable happens combinatorially (see mux-bus below)
506 # but it results in the data coming out on a one-cycle delay.
507 if rfile.unary:
508 rens.append(addr_en)
509 else:
510 addrs.append(addr_en)
511 rens.append(rp)
512
513 # use the *delayed* pick signal to put requested data onto bus
514 with m.If(delay_pick):
515 # connect regfile port to input, creating fan-out Bus
516 src = fu.src_i[idx]
517 print("reg connect widths",
518 regfile, regname, pi, funame,
519 src.shape(), rport.o_data.shape())
520 # all FUs connect to same port
521 comb += src.eq(rport.o_data)
522
523 if not self.make_hazard_vecs:
524 continue
525
526 # read the write-hazard bitvector (wv) for any bit that is
527 wvchk_en = Signal(len(wvchk.ren), name="wv_chk_addr_en_"+name)
528 issue_active = Signal(name="rd_iactive_"+name)
529 # XXX combinatorial loop here
530 comb += issue_active.eq(fu_active & rf)
531 with m.If(issue_active):
532 if rfile.unary:
533 comb += wvchk_en.eq(read)
534 else:
535 comb += wvchk_en.eq(1<<read)
536 # if FU is busy (which doesn't get set at the same time as
537 # issue) and no hazard was detected, clear wvchk_en (i.e.
538 # stop checking for hazards)
539 with m.If(fu.busy_o & ~rhazard):
540 comb += wvchk_en.eq(0)
541
542 comb += rhazard.eq((wvchk.o_data & wvchk_en).bool())
543
544 wvens.append(wvchk_en)
545
546 # or-reduce the muxed read signals
547 if rfile.unary:
548 # for unary-addressed
549 comb += rport.ren.eq(ortreereduce_sig(rens))
550 else:
551 # for binary-addressed
552 comb += rport.addr.eq(ortreereduce_sig(addrs))
553 comb += rport.ren.eq(Cat(*rens).bool())
554 print ("binary", regfile, rpidx, rport, rport.ren, rens, addrs)
555
556 if not self.make_hazard_vecs:
557 return Const(0) # declare "no hazards"
558
559 # enable the read bitvectors for this issued instruction
560 # and return whether any write-hazard bit is set
561 comb += wvchk.ren.eq(ortreereduce_sig(wvens))
562 comb += hazard_detected.eq(wvchk.o_data.bool())
563 return hazard_detected
564
565 def connect_rdports(self, m, fu_bitdict, fu_selected):
566 """connect read ports
567
568 orders the read regspecs into a dict-of-dicts, by regfile, by
569 regport name, then connects all FUs that want that regport by
570 way of a PriorityPicker.
571 """
572 comb, sync = m.d.comb, m.d.sync
573 fus = self.fus.fus
574 regs = self.regs
575 rd_hazard = []
576
577 # dictionary of lists of regfile read ports
578 byregfiles_rd, byregfiles_rdspec = self.get_byregfiles(True)
579
580 # okaay, now we need a PriorityPicker per regfile per regfile port
581 # loootta pickers... peter piper picked a pack of pickled peppers...
582 rdpickers = {}
583 for regfile, spec in byregfiles_rd.items():
584 fuspecs = byregfiles_rdspec[regfile]
585 rdpickers[regfile] = {}
586
587 # argh. an experiment to merge RA and RB in the INT regfile
588 # (we have too many read/write ports)
589 if self.regreduce_en:
590 if regfile == 'INT':
591 fuspecs['rabc'] = [fuspecs.pop('rb')]
592 fuspecs['rabc'].append(fuspecs.pop('rc'))
593 fuspecs['rabc'].append(fuspecs.pop('ra'))
594 if regfile == 'FAST':
595 fuspecs['fast1'] = [fuspecs.pop('fast1')]
596 if 'fast2' in fuspecs:
597 fuspecs['fast1'].append(fuspecs.pop('fast2'))
598 if 'fast3' in fuspecs:
599 fuspecs['fast1'].append(fuspecs.pop('fast3'))
600
601 # for each named regfile port, connect up all FUs to that port
602 # also return (and collate) hazard detection)
603 for (regname, fspec) in sort_fuspecs(fuspecs):
604 print("connect rd", regname, fspec)
605 rh = self.connect_rdport(m, fu_bitdict, fu_selected,
606 rdpickers, regfile,
607 regname, fspec)
608 rd_hazard.append(rh)
609
610 return Cat(*rd_hazard).bool()
611
612 def make_hazards(self, m, regfile, rfile, wvclr, wvset,
613 funame, regname, idx,
614 addr_en, wp, fu, fu_active, wrflag, write,
615 fu_wrok):
616 """make_hazards: a setter and a clearer for the regfile write ports
617
618 setter is at issue time (using PowerDecoder2 regfile write numbers)
619 clearer is at regfile write time (when FU has said what to write to)
620
621 there is *one* unusual case here which has to be dealt with:
622 when the Function Unit does *NOT* request a write to the regfile
623 (has its data.ok bit CLEARED). this is perfectly legitimate.
624 and a royal pain.
625 """
626 comb, sync = m.d.comb, m.d.sync
627 name = "%s_%s_%d" % (funame, regname, idx)
628
629 # connect up the bitvector write hazard. unlike the
630 # regfile writeports, a ONE must be written to the corresponding
631 # bit of the hazard bitvector (to indicate the existence of
632 # the hazard)
633
634 # the detection of what shall be written to is based
635 # on *issue*
636 print ("write vector (for regread)", regfile, wvset)
637 wviaddr_en = Signal(len(wvset.wen), name="wv_issue_addr_en_"+name)
638 issue_active = Signal(name="iactive_"+name)
639 comb += issue_active.eq(fu.issue_i & fu_active & wrflag)
640 with m.If(issue_active):
641 if rfile.unary:
642 comb += wviaddr_en.eq(write)
643 else:
644 comb += wviaddr_en.eq(1<<write)
645
646 # deal with write vector clear: this kicks in when the regfile
647 # is written to, and clears the corresponding bitvector entry
648 print ("write vector", regfile, wvclr)
649 wvaddr_en = Signal(len(wvclr.wen), name="wvaddr_en_"+name)
650 if rfile.unary:
651 comb += wvaddr_en.eq(addr_en)
652 else:
653 with m.If(wp):
654 comb += wvaddr_en.eq(1<<addr_en)
655
656 # XXX ASSUME that LDSTFunctionUnit always sets the data it intends to
657 # this may NOT be the case when an exception occurs
658 if isinstance(fu, LDSTFunctionUnit):
659 return wvaddr_en, wviaddr_en
660
661 # okaaay, this is preparation for the awkward case.
662 # * latch a copy of wrflag when issue goes high.
663 # * when the fu_wrok (data.ok) flag is NOT set,
664 # but the FU is done, the FU is NEVER going to write
665 # so the bitvector has to be cleared.
666 latch_wrflag = Signal(name="latch_wrflag_"+name)
667 with m.If(~fu.busy_o):
668 sync += latch_wrflag.eq(0)
669 with m.If(fu.issue_i & fu_active):
670 sync += latch_wrflag.eq(wrflag)
671 with m.If(fu.alu_done_o & latch_wrflag & ~fu_wrok):
672 if rfile.unary:
673 comb += wvaddr_en.eq(write) # addr_en gated with wp, don't use
674 else:
675 comb += wvaddr_en.eq(1<<addr_en) # binary addr_en not gated
676
677 return wvaddr_en, wviaddr_en
678
679 def connect_wrport(self, m, fu_bitdict, fu_selected,
680 wrpickers, regfile, regname, fspec):
681 comb, sync = m.d.comb, m.d.sync
682 fus = self.fus.fus
683 regs = self.regs
684
685 rpidx = regname
686
687 # select the required write port. these are pre-defined sizes
688 rfile = regs.rf[regfile.lower()]
689 wport = rfile.w_ports[rpidx]
690
691 print("connect wr", regname, "unary", rfile.unary, fspec)
692 print(regfile, regs.rf.keys())
693
694 # select the write-protection hazard vector. note that this still
695 # requires to WRITE to the hazard bitvector! read-requests need
696 # to RAISE the bitvector (set it to 1), which, duh, requires a WRITE
697 if self.make_hazard_vecs:
698 wv = regs.wv[regfile.lower()]
699 wvset = wv.w_ports["set"] # write-vec bit-level hazard ctrl
700 wvclr = wv.w_ports["clr"] # write-vec bit-level hazard ctrl
701
702 fspecs = fspec
703 if not isinstance(fspecs, list):
704 fspecs = [fspecs]
705
706 pplen = 0
707 writes = []
708 ppoffs = []
709 rdflags = []
710 wrflags = []
711 for i, fspec in enumerate(fspecs):
712 # get the regfile specs for this regfile port
713 (rf, wf, read, write, wid, fuspec) = fspec
714 print ("fpsec", i, "wrflag", wf, fspec, len(fuspec))
715 ppoffs.append(pplen) # record offset for picker
716 pplen += len(fuspec)
717
718 name = "%s_%s_%d" % (regfile, regname, i)
719 rdflag = Signal(name="rd_flag_"+name)
720 wrflag = Signal(name="wr_flag_"+name)
721 if rf is not None:
722 comb += rdflag.eq(rf)
723 else:
724 comb += rdflag.eq(0)
725 if wf is not None:
726 comb += wrflag.eq(wf)
727 else:
728 comb += wrflag.eq(0)
729 rdflags.append(rdflag)
730 wrflags.append(wrflag)
731
732 # create a priority picker to manage this port
733 wrpickers[regfile][rpidx] = wrpick = PriorityPicker(pplen)
734 setattr(m.submodules, "wrpick_%s_%s" % (regfile, rpidx), wrpick)
735
736 wsigs = []
737 wens = []
738 wvsets = []
739 wvseten = []
740 wvclren = []
741 addrs = []
742 for i, fspec in enumerate(fspecs):
743 # connect up the FU req/go signals and the reg-read to the FU
744 # these are arbitrated by Data.ok signals
745 (rf, wf, read, _write, wid, fuspec) = fspec
746 for pi, (funame, fu, idx) in enumerate(fuspec):
747 pi += ppoffs[i]
748 name = "%s_%s_%s_%d" % (funame, regfile, regname, idx)
749 # get (or set up) a write-latched copy of write register number
750 write = Signal.like(_write, name="write_"+name)
751 rname = "%s_%s_%s" % (funame, regfile, regname)
752 if rname not in fu.wr_latches:
753 wrl = Signal.like(_write, name="wrlatch_"+rname)
754 fu.wr_latches[rname] = write
755 with m.If(fu.issue_i):
756 sync += wrl.eq(_write)
757 comb += write.eq(_write)
758 with m.Else():
759 comb += write.eq(wrl)
760 else:
761 write = fu.wr_latches[rname]
762
763 # write-request comes from dest.ok
764 dest = fu.get_out(idx)
765 fu_dest_latch = fu.get_fu_out(idx) # latched output
766 name = "fu_wrok_%s_%s_%d" % (funame, regname, idx)
767 fu_wrok = Signal(name=name, reset_less=True)
768 comb += fu_wrok.eq(dest.ok & fu.busy_o)
769
770 # connect request-write to picker input, and output to go-wr
771 fu_active = fu_selected[funame]
772 pick = fu.wr.rel_o[idx] & fu_active
773 comb += wrpick.i[pi].eq(pick)
774 # create a single-pulse go write from the picker output
775 wr_pick = Signal(name="wpick_%s_%s_%d" % (funame, regname, idx))
776 comb += wr_pick.eq(wrpick.o[pi] & wrpick.en_o)
777 comb += fu.go_wr_i[idx].eq(rising_edge(m, wr_pick))
778
779 # connect the regspec write "reg select" number to this port
780 # only if one FU actually requests (and is granted) the port
781 # will the write-enable be activated
782 wname = "waddr_en_%s_%s_%d" % (funame, regname, idx)
783 addr_en = Signal.like(write, name=wname)
784 wp = Signal()
785 comb += wp.eq(wr_pick & wrpick.en_o)
786 comb += addr_en.eq(Mux(wp, write, 0))
787 if rfile.unary:
788 wens.append(addr_en)
789 else:
790 addrs.append(addr_en)
791 wens.append(wp)
792
793 # connect regfile port to input
794 print("reg connect widths",
795 regfile, regname, pi, funame,
796 dest.shape(), wport.i_data.shape())
797 wsigs.append(fu_dest_latch)
798
799 # now connect up the bitvector write hazard
800 if not self.make_hazard_vecs:
801 continue
802 res = self.make_hazards(m, regfile, rfile, wvclr, wvset,
803 funame, regname, idx,
804 addr_en, wp, fu, fu_active,
805 wrflags[i], write, fu_wrok)
806 wvaddr_en, wv_issue_en = res
807 wvclren.append(wvaddr_en) # set only: no data => clear bit
808 wvseten.append(wv_issue_en) # set data same as enable
809 wvsets.append(wv_issue_en) # because enable needs a 1
810
811 # here is where we create the Write Broadcast Bus. simple, eh?
812 comb += wport.i_data.eq(ortreereduce_sig(wsigs))
813 if rfile.unary:
814 # for unary-addressed
815 comb += wport.wen.eq(ortreereduce_sig(wens))
816 else:
817 # for binary-addressed
818 comb += wport.addr.eq(ortreereduce_sig(addrs))
819 comb += wport.wen.eq(ortreereduce_sig(wens))
820
821 if not self.make_hazard_vecs:
822 return
823
824 # for write-vectors
825 comb += wvclr.wen.eq(ortreereduce_sig(wvclren)) # clear (regfile write)
826 comb += wvset.wen.eq(ortreereduce_sig(wvseten)) # set (issue time)
827 comb += wvset.i_data.eq(ortreereduce_sig(wvsets))
828
829 def connect_wrports(self, m, fu_bitdict, fu_selected):
830 """connect write ports
831
832 orders the write regspecs into a dict-of-dicts, by regfile,
833 by regport name, then connects all FUs that want that regport
834 by way of a PriorityPicker.
835
836 note that the write-port wen, write-port data, and go_wr_i all need to
837 be on the exact same clock cycle. as there is a combinatorial loop bug
838 at the moment, these all use sync.
839 """
840 comb, sync = m.d.comb, m.d.sync
841 fus = self.fus.fus
842 regs = self.regs
843 # dictionary of lists of regfile write ports
844 byregfiles_wr, byregfiles_wrspec = self.get_byregfiles(False)
845
846 # same for write ports.
847 # BLECH! complex code-duplication! BLECH!
848 wrpickers = {}
849 for regfile, spec in byregfiles_wr.items():
850 fuspecs = byregfiles_wrspec[regfile]
851 wrpickers[regfile] = {}
852
853 if self.regreduce_en:
854 # argh, more port-merging
855 if regfile == 'INT':
856 fuspecs['o'] = [fuspecs.pop('o')]
857 fuspecs['o'].append(fuspecs.pop('o1'))
858 if regfile == 'FAST':
859 fuspecs['fast1'] = [fuspecs.pop('fast1')]
860 if 'fast2' in fuspecs:
861 fuspecs['fast1'].append(fuspecs.pop('fast2'))
862 if 'fast3' in fuspecs:
863 fuspecs['fast1'].append(fuspecs.pop('fast3'))
864
865 for (regname, fspec) in sort_fuspecs(fuspecs):
866 self.connect_wrport(m, fu_bitdict, fu_selected, wrpickers,
867 regfile, regname, fspec)
868
869 def get_byregfiles(self, readmode):
870
871 mode = "read" if readmode else "write"
872 regs = self.regs
873 fus = self.fus.fus
874 e = self.ireg.e # decoded instruction to execute
875
876 # dictionary of dictionaries of lists of regfile ports.
877 # first key: regfile. second key: regfile port name
878 byregfiles = defaultdict(dict)
879 byregfiles_spec = defaultdict(dict)
880
881 for (funame, fu) in fus.items():
882 # create in each FU a receptacle for the read/write register
883 # hazard numbers. to be latched in connect_rd/write_ports
884 # XXX better that this is moved into the actual FUs, but
885 # the issue there is that this function is actually better
886 # suited at the moment
887 if readmode:
888 fu.rd_latches = {}
889 else:
890 fu.wr_latches = {}
891
892 print("%s ports for %s" % (mode, funame))
893 for idx in range(fu.n_src if readmode else fu.n_dst):
894 # construct regfile specs: read uses inspec, write outspec
895 if readmode:
896 (regfile, regname, wid) = fu.get_in_spec(idx)
897 else:
898 (regfile, regname, wid) = fu.get_out_spec(idx)
899 print(" %d %s %s %s" % (idx, regfile, regname, str(wid)))
900
901 # the PowerDecoder2 (main one, not the satellites) contains
902 # the decoded regfile numbers. obtain these now
903 if readmode:
904 rdflag, read = regspec_decode_read(e, regfile, regname)
905 wrport, write = None, None
906 else:
907 rdflag, read = None, None
908 wrport, write = regspec_decode_write(e, regfile, regname)
909
910 # construct the dictionary of regspec information by regfile
911 if regname not in byregfiles_spec[regfile]:
912 byregfiles_spec[regfile][regname] = \
913 (rdflag, wrport, read, write, wid, [])
914 # here we start to create "lanes"
915 if idx not in byregfiles[regfile]:
916 byregfiles[regfile][idx] = []
917 fuspec = (funame, fu, idx)
918 byregfiles[regfile][idx].append(fuspec)
919 byregfiles_spec[regfile][regname][5].append(fuspec)
920
921 continue
922 # append a latch Signal to the FU's list of latches
923 rname = "%s_%s" % (regfile, regname)
924 if readmode:
925 if rname not in fu.rd_latches:
926 rdl = Signal.like(read, name="rdlatch_"+rname)
927 fu.rd_latches[rname] = rdl
928 else:
929 if rname not in fu.wr_latches:
930 wrl = Signal.like(write, name="wrlatch_"+rname)
931 fu.wr_latches[rname] = wrl
932
933 # ok just print that all out, for convenience
934 for regfile, spec in byregfiles.items():
935 print("regfile %s ports:" % mode, regfile)
936 fuspecs = byregfiles_spec[regfile]
937 for regname, fspec in fuspecs.items():
938 [rdflag, wrflag, read, write, wid, fuspec] = fspec
939 print(" rf %s port %s lane: %s" % (mode, regfile, regname))
940 print(" %s" % regname, wid, read, write, rdflag, wrflag)
941 for (funame, fu, idx) in fuspec:
942 fusig = fu.src_i[idx] if readmode else fu.dest[idx]
943 print(" ", funame, fu.__class__.__name__, idx, fusig)
944 print()
945
946 return byregfiles, byregfiles_spec
947
948 def __iter__(self):
949 yield from self.fus.ports()
950 yield from self.i.e.ports()
951 yield from self.l0.ports()
952 # TODO: regs
953
954 def ports(self):
955 return list(self)
956
957
958 if __name__ == '__main__':
959 pspec = TestMemPspec(ldst_ifacetype='testpi',
960 imem_ifacetype='',
961 addr_wid=48,
962 mask_wid=8,
963 reg_wid=64)
964 dut = NonProductionCore(pspec)
965 vl = rtlil.convert(dut, ports=dut.ports())
966 with open("test_core.il", "w") as f:
967 f.write(vl)