X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fsoc%2Fsimple%2Fcore.py;h=021e4fa3bc2483b98c44e518d60616b90af1c81f;hb=1097971e235fc4d075025bd6f86150c674e4b1f2;hp=368c2272b53b9629baa5aef6355c53621487f04b;hpb=b2f857427f0b362732c1fea8d9c1c8e09b96c3d4;p=soc.git diff --git a/src/soc/simple/core.py b/src/soc/simple/core.py index 368c2272..021e4fa3 100644 --- a/src/soc/simple/core.py +++ b/src/soc/simple/core.py @@ -29,18 +29,17 @@ from openpower.sv.svp64 import SVP64Rec from nmutil.picker import PriorityPicker from nmutil.util import treereduce +from nmutil.singlepipe import ControlBase -from soc.fu.compunits.compunits import AllFunctionUnits +from soc.fu.compunits.compunits import AllFunctionUnits, LDSTFunctionUnit from soc.regfile.regfiles import RegFiles -from openpower.decoder.decode2execute1 import Decode2ToExecute1Type -from openpower.decoder.decode2execute1 import IssuerDecode2ToOperand from openpower.decoder.power_decoder2 import get_rdflags -from openpower.decoder.decode2execute1 import Data from soc.experiment.l0_cache import TstL0CacheBuffer # test only from soc.config.test.test_loadstore import TestMemPspec -from openpower.decoder.power_enums import MicrOp -from soc.config.state import CoreState +from openpower.decoder.power_enums import MicrOp, Function +from soc.simple.core_data import CoreInput, CoreOutput +from collections import defaultdict import operator from nmutil.util import rising_edge @@ -68,58 +67,9 @@ def sort_fuspecs(fuspecs): return res # enumerate(res) -class CoreInput: - def __init__(self, pspec, svp64_en, regreduce_en): - self.pspec = pspec - self.svp64_en = svp64_en - self.e = Decode2ToExecute1Type("core", opkls=IssuerDecode2ToOperand, - regreduce_en=regreduce_en) - - # SVP64 RA_OR_ZERO needs to know if the relevant EXTRA2/3 field is zero - self.sv_a_nz = Signal() - - # state and raw instruction (and SVP64 ReMap fields) - self.state = CoreState("core") - self.raw_insn_i = Signal(32) # raw instruction - self.bigendian_i = Signal() # bigendian - TODO, set by MSR.BE - if svp64_en: - self.sv_rm = SVP64Rec(name="core_svp64_rm") # SVP64 RM field - self.is_svp64_mode = Signal() # set if SVP64 mode is enabled - self.use_svp64_ldst_dec = Signal() # use alternative LDST decoder - self.sv_pred_sm = Signal() # TODO: SIMD width - self.sv_pred_dm = Signal() # TODO: SIMD width - - # issue/valid/busy signalling - self.ivalid_i = Signal(reset_less=True) # instruction is valid - self.issue_i = Signal(reset_less=True) - - def eq(self, i): - self.e.eq(i.e) - self.sv_a_nz.eq(i.sv_a_nz) - self.state.eq(i.state) - self.raw_insn_i.eq(i.raw_insn_i) - self.bigendian_i.eq(i.bigendian_i) - if not self.svp64_en: - return - self.sv_rm.eq(i.sv_rm) - self.is_svp64_mode.eq(i.is_svp64_mode) - self.use_svp64_ldst_dec.eq(i.use_svp64_ldst_dec) - self.sv_pred_sm.eq(i.sv_pred_sm) - self.sv_pred_dm.eq(i.sv_pred_dm) - - -class CoreOutput: - def __init__(self): - self.busy_o = Signal(name="corebusy_o", reset_less=True) - # start/stop and terminated signalling - self.core_terminate_o = Signal(reset=0) # indicates stopped - - def eq(self, i): - self.busy_o.eq(i.busy_o) - self.core_terminate_o.eq(i.core_terminate_o) - - -class NonProductionCore(Elaboratable): +# derive from ControlBase rather than have a separate Stage instance, +# this is simpler to do +class NonProductionCore(ControlBase): def __init__(self, pspec): self.pspec = pspec @@ -130,6 +80,20 @@ class NonProductionCore(Elaboratable): self.regreduce_en = (hasattr(pspec, "regreduce") and (pspec.regreduce == True)) + # test to see if overlapping of instructions is allowed + # (not normally enabled for TestIssuer FSM but useful for checking + # the bitvector hazard detection, before doing In-Order) + self.allow_overlap = (hasattr(pspec, "allow_overlap") and + (pspec.allow_overlap == True)) + + # test core type + self.make_hazard_vecs = True + self.core_type = "fsm" + if hasattr(pspec, "core_type"): + self.core_type = pspec.core_type + + super().__init__(stage=self) + # single LD/ST funnel for memory access self.l0 = l0 = TstL0CacheBuffer(pspec, n_units=1) pi = l0.l0.dports[0] @@ -142,18 +106,28 @@ class NonProductionCore(Elaboratable): mmu = self.fus.get_fu('mmu0') print ("core pspec", pspec.ldst_ifacetype) print ("core mmu", mmu) - print ("core lsmem.lsi", l0.cmpi.lsmem.lsi) if mmu is not None: + print ("core lsmem.lsi", l0.cmpi.lsmem.lsi) mmu.alu.set_ldst_interface(l0.cmpi.lsmem.lsi) # register files (yes plural) - self.regs = RegFiles(pspec) - - # set up input and output - self.i = CoreInput(pspec, self.svp64_en, self.regreduce_en) - self.o = CoreOutput() - - # create per-FU instruction decoders (subsetted) + self.regs = RegFiles(pspec, make_hazard_vecs=self.make_hazard_vecs) + + # set up input and output: unusual requirement to set data directly + # (due to the way that the core is set up in a different domain, + # see TestIssuer.setup_peripherals + self.p.i_data, self.n.o_data = self.new_specs(None) + self.i, self.o = self.p.i_data, self.n.o_data + + # actual internal input data used (captured) + self.ireg = self.ispec() + + # create per-FU instruction decoders (subsetted). these "satellite" + # decoders reduce wire fan-out from the one (main) PowerDecoder2 + # (used directly by the trap unit) to the *twelve* (or more) + # Function Units. we can either have 32 wires (the instruction) + # to each, or we can have well over a 200 wire fan-out (to 12 + # ALUs). it's an easy choice to make. self.decoders = {} self.des = {} @@ -167,16 +141,29 @@ class NonProductionCore(Elaboratable): continue self.decoders[funame] = PowerDecodeSubset(None, opkls, f_name, final=True, - state=self.i.state, + state=self.ireg.state, svp64_en=self.svp64_en, regreduce_en=self.regreduce_en) self.des[funame] = self.decoders[funame].do + # share the SPR decoder with the MMU if it exists if "mmu0" in self.decoders: self.decoders["mmu0"].mmu0_spr_dec = self.decoders["spr0"] + # next 3 functions are Stage API Compliance + def setup(self, m, i): + pass + + def ispec(self): + return CoreInput(self.pspec, self.svp64_en, self.regreduce_en) + + def ospec(self): + return CoreOutput() + + # elaborate function to create HDL def elaborate(self, platform): - m = Module() + m = super().elaborate(platform) + # for testing purposes, to cut down on build time in coriolis2 if hasattr(self.pspec, "nocore") and self.pspec.nocore == True: x = Signal() # dummy signal @@ -191,39 +178,54 @@ class NonProductionCore(Elaboratable): fus = self.fus.fus # connect decoders + self.connect_satellite_decoders(m) + + # ssh, cheat: trap uses the main decoder because of the rewriting + self.des[self.trapunit] = self.ireg.e.do + + # connect up Function Units, then read/write ports, and hazard conflict + issue_conflict = Signal() + fu_bitdict, fu_selected = self.connect_instruction(m, issue_conflict) + raw_hazard = self.connect_rdports(m, fu_selected) + self.connect_wrports(m, fu_selected) + if self.allow_overlap: + comb += issue_conflict.eq(raw_hazard) + + # note if an exception happened. in a pipelined or OoO design + # this needs to be accompanied by "shadowing" (or stalling) + el = [] + for exc in self.fus.excs.values(): + el.append(exc.happened) + if len(el) > 0: # at least one exception + comb += self.o.exc_happened.eq(Cat(*el).bool()) + + return m + + def connect_satellite_decoders(self, m): + comb = m.d.comb for k, v in self.decoders.items(): # connect each satellite decoder and give it the instruction. # as subset decoders this massively reduces wire fanout given # the large number of ALUs setattr(m.submodules, "dec_%s" % v.fn_name, v) - comb += v.dec.raw_opcode_in.eq(self.i.raw_insn_i) - comb += v.dec.bigendian.eq(self.i.bigendian_i) + comb += v.dec.raw_opcode_in.eq(self.ireg.raw_insn_i) + comb += v.dec.bigendian.eq(self.ireg.bigendian_i) # sigh due to SVP64 RA_OR_ZERO detection connect these too - comb += v.sv_a_nz.eq(self.i.sv_a_nz) + comb += v.sv_a_nz.eq(self.ireg.sv_a_nz) if self.svp64_en: - comb += v.pred_sm.eq(self.i.sv_pred_sm) - comb += v.pred_dm.eq(self.i.sv_pred_dm) + comb += v.pred_sm.eq(self.ireg.sv_pred_sm) + comb += v.pred_dm.eq(self.ireg.sv_pred_dm) if k != self.trapunit: - comb += v.sv_rm.eq(self.i.sv_rm) # pass through SVP64 ReMap - comb += v.is_svp64_mode.eq(self.i.is_svp64_mode) + comb += v.sv_rm.eq(self.ireg.sv_rm) # pass through SVP64 RM + comb += v.is_svp64_mode.eq(self.ireg.is_svp64_mode) # only the LDST PowerDecodeSubset *actually* needs to # know to use the alternative decoder. this is all # a terrible hack if k.lower().startswith("ldst"): comb += v.use_svp64_ldst_dec.eq( - self.i.use_svp64_ldst_dec) - - # ssh, cheat: trap uses the main decoder because of the rewriting - self.des[self.trapunit] = self.i.e.do + self.ireg.use_svp64_ldst_dec) - # connect up Function Units, then read/write ports - fu_bitdict = self.connect_instruction(m) - self.connect_rdports(m, fu_bitdict) - self.connect_wrports(m, fu_bitdict) - - return m - - def connect_instruction(self, m): + def connect_instruction(self, m, issue_conflict): """connect_instruction uses decoded (from PowerOp) function unit information from CSV files @@ -238,57 +240,165 @@ class NonProductionCore(Elaboratable): comb, sync = m.d.comb, m.d.sync fus = self.fus.fus - # enable-signals for each FU, get one bit for each FU (by name) + # indicate if core is busy + busy_o = self.o.busy_o + + # connect up temporary copy of incoming instruction. the FSM will + # either blat the incoming instruction (if valid) into self.ireg + # or if the instruction could not be delivered, keep dropping the + # latched copy into ireg + ilatch = self.ispec() + self.instr_active = Signal() + + # enable/busy-signals for each FU, get one bit for each FU (by name) fu_enable = Signal(len(fus), reset_less=True) + fu_busy = Signal(len(fus), reset_less=True) fu_bitdict = {} + fu_selected = {} for i, funame in enumerate(fus.keys()): fu_bitdict[funame] = fu_enable[i] - - # enable the required Function Unit based on the opcode decode - # note: this *only* works correctly for simple core when one and - # *only* one FU is allocated per instruction + fu_selected[funame] = fu_busy[i] + + # identify function units and create a list by fnunit so that + # PriorityPickers can be created for selecting one of them that + # isn't busy at the time the incoming instruction needs passing on + by_fnunit = defaultdict(list) + for fname, member in Function.__members__.items(): + for funame, fu in fus.items(): + fnunit = fu.fnunit.value + if member.value & fnunit: # this FU handles this type of op + by_fnunit[fname].append((funame, fu)) # add by Function + + # ok now just print out the list of FUs by Function, because we can + for fname, fu_list in by_fnunit.items(): + print ("FUs by type", fname, fu_list) + + # now create a PriorityPicker per FU-type such that only one + # non-busy FU will be picked + issue_pps = {} + fu_found = Signal() # take a note if no Function Unit was available + for fname, fu_list in by_fnunit.items(): + i_pp = PriorityPicker(len(fu_list)) + m.submodules['i_pp_%s' % fname] = i_pp + i_l = [] + for i, (funame, fu) in enumerate(fu_list): + # match the decoded instruction (e.do.fn_unit) against the + # "capability" of this FU, gate that by whether that FU is + # busy, and drop that into the PriorityPicker. + # this will give us an output of the first available *non-busy* + # Function Unit (Reservation Statio) capable of handling this + # instruction. + fnunit = fu.fnunit.value + en_req = Signal(name="issue_en_%s" % funame, reset_less=True) + fnmatch = (self.ireg.e.do.fn_unit & fnunit).bool() + comb += en_req.eq(fnmatch & ~fu.busy_o & self.instr_active) + i_l.append(en_req) # store in list for doing the Cat-trick + # picker output, gated by enable: store in fu_bitdict + po = Signal(name="o_issue_pick_"+funame) # picker output + comb += po.eq(i_pp.o[i] & i_pp.en_o) + comb += fu_bitdict[funame].eq(po) + comb += fu_selected[funame].eq(fu.busy_o | po) + # if we don't do this, then when there are no FUs available, + # the "p.o_ready" signal will go back "ok we accepted this + # instruction" which of course isn't true. + with m.If(~issue_conflict & i_pp.en_o): + comb += fu_found.eq(1) + # for each input, Cat them together and drop them into the picker + comb += i_pp.i.eq(Cat(*i_l)) + + # rdmask, which is for registers needs to come from the *main* decoder for funame, fu in fus.items(): - fnunit = fu.fnunit.value - enable = Signal(name="en_%s" % funame, reset_less=True) - comb += enable.eq((self.i.e.do.fn_unit & fnunit).bool()) - comb += fu_bitdict[funame].eq(enable) + rdmask = get_rdflags(self.ireg.e, fu) + comb += fu.rdmaskn.eq(~rdmask) # sigh - need a NOP counter counter = Signal(2) with m.If(counter != 0): sync += counter.eq(counter - 1) - comb += self.o.busy_o.eq(1) - - with m.If(self.i.ivalid_i): # run only when valid - with m.Switch(self.i.e.do.insn_type): - # check for ATTN: halt if true - with m.Case(MicrOp.OP_ATTN): - m.d.sync += self.o.core_terminate_o.eq(1) - - with m.Case(MicrOp.OP_NOP): - sync += counter.eq(2) - comb += self.o.busy_o.eq(1) - - with m.Default(): - # connect up instructions. only one enabled at a time + comb += busy_o.eq(1) + + # default to reading from incoming instruction: may be overridden + # by copy from latch when "waiting" + comb += self.ireg.eq(self.i) + # always say "ready" except if overridden + comb += self.p.o_ready.eq(1) + + l_issue_conflict = Signal() + + with m.FSM(): + with m.State("READY"): + with m.If(self.p.i_valid): # run only when valid + with m.Switch(self.ireg.e.do.insn_type): + # check for ATTN: halt if true + with m.Case(MicrOp.OP_ATTN): + m.d.sync += self.o.core_terminate_o.eq(1) + + # fake NOP - this isn't really used (Issuer detects NOP) + with m.Case(MicrOp.OP_NOP): + sync += counter.eq(2) + comb += busy_o.eq(1) + + with m.Default(): + comb += self.instr_active.eq(1) + comb += self.p.o_ready.eq(0) + # connect instructions. only one enabled at a time + for funame, fu in fus.items(): + do = self.des[funame] + enable = fu_bitdict[funame] + + # run this FunctionUnit if enabled route op, + # issue, busy, read flags and mask to FU + with m.If(enable & ~issue_conflict): + # operand comes from the *local* decoder + comb += fu.oper_i.eq_from(do) + comb += fu.issue_i.eq(1) # issue when valid + # instruction ok, indicate ready + comb += self.p.o_ready.eq(1) + + if self.allow_overlap: + with m.If(~fu_found): + # latch copy of instruction + sync += ilatch.eq(self.i) + sync += l_issue_conflict.eq(issue_conflict) + comb += self.p.o_ready.eq(1) # accept + comb += busy_o.eq(1) + m.next = "WAITING" + + with m.State("WAITING"): + comb += self.instr_active.eq(1) + with m.If(fu_found): + sync += l_issue_conflict.eq(0) + comb += self.p.o_ready.eq(0) + comb += busy_o.eq(1) + # using copy of instruction, keep waiting until an FU is free + comb += self.ireg.eq(ilatch) + with m.If(~l_issue_conflict): # wait for conflict to clear + # connect instructions. only one enabled at a time for funame, fu in fus.items(): do = self.des[funame] enable = fu_bitdict[funame] - # run this FunctionUnit if enabled - # route op, issue, busy, read flags and mask to FU + # run this FunctionUnit if enabled route op, + # issue, busy, read flags and mask to FU with m.If(enable): # operand comes from the *local* decoder comb += fu.oper_i.eq_from(do) - #comb += fu.oper_i.eq_from_execute1(e) - comb += fu.issue_i.eq(self.i.issue_i) - comb += self.o.busy_o.eq(fu.busy_o) - # rdmask, which is for registers, needs to come - # from the *main* decoder - rdmask = get_rdflags(self.i.e, fu) - comb += fu.rdmaskn.eq(~rdmask) - - return fu_bitdict + comb += fu.issue_i.eq(1) # issue when valid + comb += self.p.o_ready.eq(1) + comb += busy_o.eq(0) + m.next = "READY" + + print ("core: overlap allowed", self.allow_overlap) + if not self.allow_overlap: + # for simple non-overlap, if any instruction is busy, set + # busy output for core. + busys = map(lambda fu: fu.busy_o, fus.values()) + comb += busy_o.eq(Cat(*busys).bool()) + + # return both the function unit "enable" dict as well as the "busy". + # the "busy-or-issued" can be passed in to the Read/Write port + # connecters to give them permission to request access to regfiles + return fu_bitdict, fu_selected def connect_rdport(self, m, fu_bitdict, rdpickers, regfile, regname, fspec): comb, sync = m.d.comb, m.d.sync @@ -303,17 +413,21 @@ class NonProductionCore(Elaboratable): print("read regfile", rpidx, regfile, regs.rf.keys(), rfile, rfile.unary) + # for checking if the read port has an outstanding write + if self.make_hazard_vecs: + wv = regs.wv[regfile.lower()] + wvchk = wv.r_ports["issue"] # write-vec bit-level hazard check + fspecs = fspec if not isinstance(fspecs, list): fspecs = [fspecs] rdflags = [] pplen = 0 - reads = [] ppoffs = [] for i, fspec in enumerate(fspecs): # get the regfile specs for this regfile port - (rf, read, write, wid, fuspec) = fspec + (rf, wf, read, write, wid, fuspec) = fspec print ("fpsec", i, fspec, len(fuspec)) ppoffs.append(pplen) # record offset for picker pplen += len(fuspec) @@ -321,7 +435,6 @@ class NonProductionCore(Elaboratable): rdflag = Signal(name=name, reset_less=True) comb += rdflag.eq(rf) rdflags.append(rdflag) - reads.append(read) print ("pplen", pplen) @@ -331,17 +444,36 @@ class NonProductionCore(Elaboratable): rens = [] addrs = [] + wvens = [] + for i, fspec in enumerate(fspecs): - (rf, read, write, wid, fuspec) = fspec + (rf, wf, _read, _write, wid, fuspec) = fspec # connect up the FU req/go signals, and the reg-read to the FU # and create a Read Broadcast Bus for pi, (funame, fu, idx) in enumerate(fuspec): pi += ppoffs[i] + name = "%s_%s_%s_%i" % (regfile, rpidx, funame, pi) + fu_active = fu_bitdict[funame] + + # get (or set up) a latched copy of read register number + rname = "%s_%s_%s_%d" % (funame, regfile, regname, pi) + read = Signal.like(_read, name="read_"+name) + if rname not in fu.rd_latches: + rdl = Signal.like(_read, name="rdlatch_"+rname) + fu.rd_latches[rname] = rdl + with m.If(fu.issue_i): + sync += rdl.eq(_read) + else: + rdl = fu.rd_latches[rname] + # latch to make the read immediately available on issue cycle + # after the read cycle, use the latched copy + with m.If(fu.issue_i): + comb += read.eq(_read) + with m.Else(): + comb += read.eq(rdl) # connect request-read to picker input, and output to go-rd - fu_active = fu_bitdict[funame] - name = "%s_%s_%s_%i" % (regfile, rpidx, funame, pi) - addr_en = Signal.like(reads[i], name="addr_en_"+name) + addr_en = Signal.like(read, name="addr_en_"+name) pick = Signal(name="pick_"+name) # picker input rp = Signal(name="rp_"+name) # picker output delay_pick = Signal(name="dp_"+name) # read-enable "underway" @@ -355,7 +487,7 @@ class NonProductionCore(Elaboratable): # if picked, select read-port "reg select" number to port comb += rp.eq(rdpick.o[pi] & rdpick.en_o) sync += delay_pick.eq(rp) # delayed "pick" - comb += addr_en.eq(Mux(rp, reads[i], 0)) + comb += addr_en.eq(Mux(rp, read, 0)) # the read-enable happens combinatorially (see mux-bus below) # but it results in the data coming out on a one-cycle delay. @@ -375,6 +507,21 @@ class NonProductionCore(Elaboratable): # all FUs connect to same port comb += src.eq(rport.o_data) + if not self.make_hazard_vecs: + continue + + # read the write-hazard bitvector (wv) for any bit that is + wvchk_en = Signal(len(wvchk.ren), name="wv_chk_addr_en_"+name) + issue_active = Signal(name="rd_iactive_"+name) + # XXX combinatorial loop here + comb += issue_active.eq(self.instr_active & rf) + with m.If(issue_active): + if rfile.unary: + comb += wvchk_en.eq(read) + else: + comb += wvchk_en.eq(1< clear bit + wvseten.append(wv_issue_en) # set data same as enable + wvsets.append(wv_issue_en) # because enable needs a 1 + # here is where we create the Write Broadcast Bus. simple, eh? comb += wport.i_data.eq(ortreereduce_sig(wsigs)) if rfile.unary: @@ -512,6 +796,14 @@ class NonProductionCore(Elaboratable): comb += wport.addr.eq(ortreereduce_sig(addrs)) comb += wport.wen.eq(ortreereduce_sig(wens)) + if not self.make_hazard_vecs: + return + + # for write-vectors + comb += wvclr.wen.eq(ortreereduce_sig(wvclren)) # clear (regfile write) + comb += wvset.wen.eq(ortreereduce_sig(wvseten)) # set (issue time) + comb += wvset.i_data.eq(ortreereduce_sig(wvsets)) + def connect_wrports(self, m, fu_bitdict): """connect write ports @@ -557,49 +849,76 @@ class NonProductionCore(Elaboratable): mode = "read" if readmode else "write" regs = self.regs fus = self.fus.fus - e = self.i.e # decoded instruction to execute + e = self.ireg.e # decoded instruction to execute + + # dictionary of dictionaries of lists of regfile ports. + # first key: regfile. second key: regfile port name + byregfiles = defaultdict(dict) + byregfiles_spec = defaultdict(dict) - # dictionary of lists of regfile ports - byregfiles = {} - byregfiles_spec = {} for (funame, fu) in fus.items(): + # create in each FU a receptacle for the read/write register + # hazard numbers. to be latched in connect_rd/write_ports + # XXX better that this is moved into the actual FUs, but + # the issue there is that this function is actually better + # suited at the moment + if readmode: + fu.rd_latches = {} + else: + fu.wr_latches = {} + print("%s ports for %s" % (mode, funame)) for idx in range(fu.n_src if readmode else fu.n_dst): + # construct regfile specs: read uses inspec, write outspec if readmode: (regfile, regname, wid) = fu.get_in_spec(idx) else: (regfile, regname, wid) = fu.get_out_spec(idx) print(" %d %s %s %s" % (idx, regfile, regname, str(wid))) + + # the PowerDecoder2 (main one, not the satellites) contains + # the decoded regfile numbers. obtain these now if readmode: rdflag, read = regspec_decode_read(e, regfile, regname) - write = None + wrport, write = None, None else: rdflag, read = None, None wrport, write = regspec_decode_write(e, regfile, regname) - if regfile not in byregfiles: - byregfiles[regfile] = {} - byregfiles_spec[regfile] = {} + + # construct the dictionary of regspec information by regfile if regname not in byregfiles_spec[regfile]: byregfiles_spec[regfile][regname] = \ - (rdflag, read, write, wid, []) + (rdflag, wrport, read, write, wid, []) # here we start to create "lanes" if idx not in byregfiles[regfile]: byregfiles[regfile][idx] = [] fuspec = (funame, fu, idx) byregfiles[regfile][idx].append(fuspec) - byregfiles_spec[regfile][regname][4].append(fuspec) + byregfiles_spec[regfile][regname][5].append(fuspec) + + continue + # append a latch Signal to the FU's list of latches + rname = "%s_%s" % (regfile, regname) + if readmode: + if rname not in fu.rd_latches: + rdl = Signal.like(read, name="rdlatch_"+rname) + fu.rd_latches[rname] = rdl + else: + if rname not in fu.wr_latches: + wrl = Signal.like(write, name="wrlatch_"+rname) + fu.wr_latches[rname] = wrl - # ok just print that out, for convenience + # ok just print that all out, for convenience for regfile, spec in byregfiles.items(): print("regfile %s ports:" % mode, regfile) fuspecs = byregfiles_spec[regfile] for regname, fspec in fuspecs.items(): - [rdflag, read, write, wid, fuspec] = fspec + [rdflag, wrflag, read, write, wid, fuspec] = fspec print(" rf %s port %s lane: %s" % (mode, regfile, regname)) - print(" %s" % regname, wid, read, write, rdflag) + print(" %s" % regname, wid, read, write, rdflag, wrflag) for (funame, fu, idx) in fuspec: fusig = fu.src_i[idx] if readmode else fu.dest[idx] - print(" ", funame, fu, idx, fusig) + print(" ", funame, fu.__class__.__name__, idx, fusig) print() return byregfiles, byregfiles_spec