X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fsoc%2Fsimple%2Fcore.py;h=f31fad32e8a8ff378f37230ea6d88a61cba77a73;hb=fafa3895712fef0aff848647b994ae36c0f65ffd;hp=f6e9a73b724d9a68eabed13674b1d5a643f1c766;hpb=575802fa56d7175ebbdc16bb5c493b556dab9c74;p=soc.git diff --git a/src/soc/simple/core.py b/src/soc/simple/core.py index f6e9a73b..f31fad32 100644 --- a/src/soc/simple/core.py +++ b/src/soc/simple/core.py @@ -19,26 +19,35 @@ and consequently it is safer to wait for the Function Unit to complete before allowing a new instruction to proceed. """ -from nmigen import Elaboratable, Module, Signal +from nmigen import Elaboratable, Module, Signal, ResetSignal, Cat, Mux from nmigen.cli import rtlil +from openpower.decoder.power_decoder2 import PowerDecodeSubset +from openpower.decoder.power_regspec_map import regspec_decode_read +from openpower.decoder.power_regspec_map import regspec_decode_write +from openpower.sv.svp64 import SVP64Rec + from nmutil.picker import PriorityPicker from nmutil.util import treereduce +from nmutil.singlepipe import ControlBase from soc.fu.compunits.compunits import AllFunctionUnits from soc.regfile.regfiles import RegFiles -from soc.decoder.power_decoder import create_pdecode -from soc.decoder.power_decoder2 import PowerDecode2 -from soc.decoder.decode2execute1 import Data +from openpower.decoder.power_decoder2 import get_rdflags from soc.experiment.l0_cache import TstL0CacheBuffer # test only from soc.config.test.test_loadstore import TestMemPspec -from soc.decoder.power_enums import MicrOp +from openpower.decoder.power_enums import MicrOp, Function +from soc.simple.core_data import CoreInput, CoreOutput + +from collections import defaultdict import operator +from nmutil.util import rising_edge + # helper function for reducing a list of signals down to a parallel # ORed single signal. -def ortreereduce(tree, attr="data_o"): +def ortreereduce(tree, attr="o_data"): return treereduce(tree, operator.or_, lambda x: getattr(x, attr)) @@ -58,64 +67,144 @@ def sort_fuspecs(fuspecs): return res # enumerate(res) -class NonProductionCore(Elaboratable): +# derive from ControlBase rather than have a separate Stage instance, +# this is simpler to do +class NonProductionCore(ControlBase): def __init__(self, pspec): + self.pspec = pspec + + # test is SVP64 is to be enabled + self.svp64_en = hasattr(pspec, "svp64") and (pspec.svp64 == True) + + # test to see if regfile ports should be reduced + self.regreduce_en = (hasattr(pspec, "regreduce") and + (pspec.regreduce == True)) + + # test core type + self.core_type = "fsm" + if hasattr(pspec, "core_type"): + self.core_type = pspec.core_type + + super().__init__(stage=self) + # single LD/ST funnel for memory access - self.l0 = TstL0CacheBuffer(pspec, n_units=1) - pi = self.l0.l0.dports[0] + self.l0 = l0 = TstL0CacheBuffer(pspec, n_units=1) + pi = l0.l0.dports[0] # function units (only one each) + # only include mmu if enabled in pspec self.fus = AllFunctionUnits(pspec, pilist=[pi]) - # register files (yes plural) - self.regs = RegFiles() + # link LoadStore1 into MMU + mmu = self.fus.get_fu('mmu0') + print ("core pspec", pspec.ldst_ifacetype) + print ("core mmu", mmu) + print ("core lsmem.lsi", l0.cmpi.lsmem.lsi) + if mmu is not None: + mmu.alu.set_ldst_interface(l0.cmpi.lsmem.lsi) - # instruction decoder - pdecode = create_pdecode() - self.pdecode2 = PowerDecode2(pdecode) # instruction decoder + # register files (yes plural) + self.regs = RegFiles(pspec, make_hazard_vecs=True) - # issue/valid/busy signalling - self.ivalid_i = self.pdecode2.valid # instruction is valid - self.issue_i = Signal(reset_less=True) - self.busy_o = Signal(name="corebusy_o", reset_less=True) + # set up input and output: unusual requirement to set data directly + # (due to the way that the core is set up in a different domain, + # see TestIssuer.setup_peripherals + self.i, self.o = self.new_specs(None) + self.i, self.o = self.p.i_data, self.n.o_data - # instruction input - self.bigendian_i = self.pdecode2.dec.bigendian - self.raw_opcode_i = self.pdecode2.dec.raw_opcode_in + # create per-FU instruction decoders (subsetted) + self.decoders = {} + self.des = {} - # start/stop and terminated signalling - self.core_start_i = Signal(reset_less=True) - self.core_stop_i = Signal(reset_less=True) - self.core_terminated_o = Signal(reset=0) # indicates stopped + for funame, fu in self.fus.fus.items(): + f_name = fu.fnunit.name + fnunit = fu.fnunit.value + opkls = fu.opsubsetkls + if f_name == 'TRAP': + # TRAP decoder is the *main* decoder + self.trapunit = funame + continue + self.decoders[funame] = PowerDecodeSubset(None, opkls, f_name, + final=True, + state=self.i.state, + svp64_en=self.svp64_en, + regreduce_en=self.regreduce_en) + self.des[funame] = self.decoders[funame].do + + if "mmu0" in self.decoders: + self.decoders["mmu0"].mmu0_spr_dec = self.decoders["spr0"] + + def setup(self, m, i): + pass + + def ispec(self): + return CoreInput(self.pspec, self.svp64_en, self.regreduce_en) + + def ospec(self): + return CoreOutput() def elaborate(self, platform): - m = Module() + m = super().elaborate(platform) + + # for testing purposes, to cut down on build time in coriolis2 + if hasattr(self.pspec, "nocore") and self.pspec.nocore == True: + x = Signal() # dummy signal + m.d.sync += x.eq(~x) + return m + comb = m.d.comb - m.submodules.pdecode2 = dec2 = self.pdecode2 m.submodules.fus = self.fus m.submodules.l0 = l0 = self.l0 self.regs.elaborate_into(m, platform) regs = self.regs fus = self.fus.fus - # core start/stopped state - core_stopped = Signal(reset=1) # begins in stopped state + # connect decoders + self.connect_satellite_decoders(m) - # start/stop signalling - with m.If(self.core_start_i): - m.d.sync += core_stopped.eq(0) - with m.If(self.core_stop_i): - m.d.sync += core_stopped.eq(1) - m.d.comb += self.core_terminated_o.eq(core_stopped) + # ssh, cheat: trap uses the main decoder because of the rewriting + self.des[self.trapunit] = self.i.e.do # connect up Function Units, then read/write ports - fu_bitdict = self.connect_instruction(m, core_stopped) - self.connect_rdports(m, fu_bitdict) - self.connect_wrports(m, fu_bitdict) + fu_bitdict, fu_selected = self.connect_instruction(m) + self.connect_rdports(m, fu_selected) + self.connect_wrports(m, fu_selected) + + # note if an exception happened. in a pipelined or OoO design + # this needs to be accompanied by "shadowing" (or stalling) + el = [] + for exc in self.fus.excs.values(): + el.append(exc.happened) + if len(el) > 0: # at least one exception + comb += self.o.exc_happened.eq(Cat(*el).bool()) return m - def connect_instruction(self, m, core_stopped): + def connect_satellite_decoders(self, m): + comb = m.d.comb + for k, v in self.decoders.items(): + # connect each satellite decoder and give it the instruction. + # as subset decoders this massively reduces wire fanout given + # the large number of ALUs + setattr(m.submodules, "dec_%s" % v.fn_name, v) + comb += v.dec.raw_opcode_in.eq(self.i.raw_insn_i) + comb += v.dec.bigendian.eq(self.i.bigendian_i) + # sigh due to SVP64 RA_OR_ZERO detection connect these too + comb += v.sv_a_nz.eq(self.i.sv_a_nz) + if self.svp64_en: + comb += v.pred_sm.eq(self.i.sv_pred_sm) + comb += v.pred_dm.eq(self.i.sv_pred_dm) + if k != self.trapunit: + comb += v.sv_rm.eq(self.i.sv_rm) # pass through SVP64 ReMap + comb += v.is_svp64_mode.eq(self.i.is_svp64_mode) + # only the LDST PowerDecodeSubset *actually* needs to + # know to use the alternative decoder. this is all + # a terrible hack + if k.lower().startswith("ldst"): + comb += v.use_svp64_ldst_dec.eq( + self.i.use_svp64_ldst_dec) + + def connect_instruction(self, m): """connect_instruction uses decoded (from PowerOp) function unit information from CSV files @@ -129,57 +218,211 @@ class NonProductionCore(Elaboratable): """ comb, sync = m.d.comb, m.d.sync fus = self.fus.fus - dec2 = self.pdecode2 - # enable-signals for each FU, get one bit for each FU (by name) + # indicate if core is busy + busy_o = self.o.busy_o + + # enable/busy-signals for each FU, get one bit for each FU (by name) fu_enable = Signal(len(fus), reset_less=True) + fu_busy = Signal(len(fus), reset_less=True) fu_bitdict = {} + fu_selected = {} for i, funame in enumerate(fus.keys()): fu_bitdict[funame] = fu_enable[i] - # only run when allowed and when instruction is valid - can_run = Signal(reset_less=True) - comb += can_run.eq(self.ivalid_i & ~core_stopped) - - # enable the required Function Unit based on the opcode decode - # note: this *only* works correctly for simple core when one and - # *only* one FU is allocated per instruction - for funame, fu in fus.items(): - fnunit = fu.fnunit.value - enable = Signal(name="en_%s" % funame, reset_less=True) - comb += enable.eq((dec2.e.do.fn_unit & fnunit).bool()) - comb += fu_bitdict[funame].eq(enable) + fu_selected[funame] = fu_busy[i] + + # identify function units and create a list by fnunit so that + # PriorityPickers can be created for selecting one of them that + # isn't busy at the time the incoming instruction needs passing on + by_fnunit = defaultdict(list) + for fname, member in Function.__members__.items(): + for funame, fu in fus.items(): + fnunit = fu.fnunit.value + if member.value & fnunit: # this FU handles this type of op + by_fnunit[fname].append((funame, fu)) # add by Function + + # ok now just print out the list of FUs by Function, because we can + for fname, fu_list in by_fnunit.items(): + print ("FUs by type", fname, fu_list) + + # now create a PriorityPicker per FU-type such that only one + # non-busy FU will be picked + issue_pps = {} + fu_found = Signal() # take a note if no Function Unit was available + for fname, fu_list in by_fnunit.items(): + i_pp = PriorityPicker(len(fu_list)) + m.submodules['i_pp_%s' % fname] = i_pp + i_l = [] + for i, (funame, fu) in enumerate(fu_list): + # match the decoded instruction (e.do.fn_unit) against the + # "capability" of this FU, gate that by whether that FU is + # busy, and drop that into the PriorityPicker. + # this will give us an output of the first available *non-busy* + # Function Unit (Reservation Statio) capable of handling this + # instruction. + fnunit = fu.fnunit.value + en_req = Signal(name="issue_en_%s" % funame, reset_less=True) + fnmatch = (self.i.e.do.fn_unit & fnunit).bool() + comb += en_req.eq(fnmatch & ~fu.busy_o & self.p.i_valid) + i_l.append(en_req) # store in list for doing the Cat-trick + # picker output, gated by enable: store in fu_bitdict + po = Signal(name="o_issue_pick_"+funame) # picker output + comb += po.eq(i_pp.o[i] & i_pp.en_o) + comb += fu_bitdict[funame].eq(po) + comb += fu_selected[funame].eq(fu.busy_o | po) + # if we don't do this, then when there are no FUs available, + # the "p.o_ready" signal will go back "ok we accepted this + # instruction" which of course isn't true. + comb += fu_found.eq(~fnmatch | i_pp.en_o) + # for each input, Cat them together and drop them into the picker + comb += i_pp.i.eq(Cat(*i_l)) # sigh - need a NOP counter counter = Signal(2) with m.If(counter != 0): sync += counter.eq(counter - 1) - comb += self.busy_o.eq(1) + comb += busy_o.eq(1) - with m.If(can_run): - with m.Switch(dec2.e.do.insn_type): + with m.If(self.p.i_valid): # run only when valid + with m.Switch(self.i.e.do.insn_type): # check for ATTN: halt if true with m.Case(MicrOp.OP_ATTN): - m.d.sync += core_stopped.eq(1) + m.d.sync += self.o.core_terminate_o.eq(1) + # fake NOP - this isn't really used (Issuer detects NOP) with m.Case(MicrOp.OP_NOP): sync += counter.eq(2) - comb += self.busy_o.eq(1) + comb += busy_o.eq(1) with m.Default(): # connect up instructions. only one enabled at a time for funame, fu in fus.items(): + do = self.des[funame] enable = fu_bitdict[funame] # run this FunctionUnit if enabled + # route op, issue, busy, read flags and mask to FU with m.If(enable): - # route op, issue, busy, read flags and mask to FU - comb += fu.oper_i.eq_from_execute1(dec2.e) - comb += fu.issue_i.eq(self.issue_i) - comb += self.busy_o.eq(fu.busy_o) - rdmask = dec2.rdflags(fu) + # operand comes from the *local* decoder + comb += fu.oper_i.eq_from(do) + comb += fu.issue_i.eq(1) # issue when input valid + # rdmask, which is for registers, needs to come + # from the *main* decoder + rdmask = get_rdflags(self.i.e, fu) comb += fu.rdmaskn.eq(~rdmask) - return fu_bitdict + # if instruction is busy, set busy output for core. + busys = map(lambda fu: fu.busy_o, fus.values()) + comb += busy_o.eq(Cat(*busys).bool()) + + # ready/valid signalling. if busy, means refuse incoming issue. + # (this is a global signal, TODO, change to one which allows + # overlapping instructions) + # also, if there was no fu found we must not send back a valid + # indicator. BUT, of course, when there is no instruction + # we must ignore the fu_found flag, otherwise o_ready will never + # be set when everything is idle + comb += self.p.o_ready.eq(fu_found | ~self.p.i_valid) + + # return both the function unit "enable" dict as well as the "busy". + # the "busy-or-issued" can be passed in to the Read/Write port + # connecters to give them permission to request access to regfiles + return fu_bitdict, fu_selected + + def connect_rdport(self, m, fu_bitdict, rdpickers, regfile, regname, fspec): + comb, sync = m.d.comb, m.d.sync + fus = self.fus.fus + regs = self.regs + + rpidx = regname + + # select the required read port. these are pre-defined sizes + rfile = regs.rf[regfile.lower()] + rport = rfile.r_ports[rpidx] + print("read regfile", rpidx, regfile, regs.rf.keys(), + rfile, rfile.unary) + + fspecs = fspec + if not isinstance(fspecs, list): + fspecs = [fspecs] + + rdflags = [] + pplen = 0 + reads = [] + ppoffs = [] + for i, fspec in enumerate(fspecs): + # get the regfile specs for this regfile port + (rf, read, write, wid, fuspec) = fspec + print ("fpsec", i, fspec, len(fuspec)) + ppoffs.append(pplen) # record offset for picker + pplen += len(fuspec) + name = "rdflag_%s_%s_%d" % (regfile, regname, i) + rdflag = Signal(name=name, reset_less=True) + comb += rdflag.eq(rf) + rdflags.append(rdflag) + reads.append(read) + + print ("pplen", pplen) + + # create a priority picker to manage this port + rdpickers[regfile][rpidx] = rdpick = PriorityPicker(pplen) + setattr(m.submodules, "rdpick_%s_%s" % (regfile, rpidx), rdpick) + + rens = [] + addrs = [] + for i, fspec in enumerate(fspecs): + (rf, read, write, wid, fuspec) = fspec + # connect up the FU req/go signals, and the reg-read to the FU + # and create a Read Broadcast Bus + for pi, (funame, fu, idx) in enumerate(fuspec): + pi += ppoffs[i] + + # connect request-read to picker input, and output to go-rd + fu_active = fu_bitdict[funame] + name = "%s_%s_%s_%i" % (regfile, rpidx, funame, pi) + addr_en = Signal.like(reads[i], name="addr_en_"+name) + pick = Signal(name="pick_"+name) # picker input + rp = Signal(name="rp_"+name) # picker output + delay_pick = Signal(name="dp_"+name) # read-enable "underway" + + # exclude any currently-enabled read-request (mask out active) + comb += pick.eq(fu.rd_rel_o[idx] & fu_active & rdflags[i] & + ~delay_pick) + comb += rdpick.i[pi].eq(pick) + comb += fu.go_rd_i[idx].eq(delay_pick) # pass in *delayed* pick + + # if picked, select read-port "reg select" number to port + comb += rp.eq(rdpick.o[pi] & rdpick.en_o) + sync += delay_pick.eq(rp) # delayed "pick" + comb += addr_en.eq(Mux(rp, reads[i], 0)) + + # the read-enable happens combinatorially (see mux-bus below) + # but it results in the data coming out on a one-cycle delay. + if rfile.unary: + rens.append(addr_en) + else: + addrs.append(addr_en) + rens.append(rp) + + # use the *delayed* pick signal to put requested data onto bus + with m.If(delay_pick): + # connect regfile port to input, creating fan-out Bus + src = fu.src_i[idx] + print("reg connect widths", + regfile, regname, pi, funame, + src.shape(), rport.o_data.shape()) + # all FUs connect to same port + comb += src.eq(rport.o_data) + + # or-reduce the muxed read signals + if rfile.unary: + # for unary-addressed + comb += rport.ren.eq(ortreereduce_sig(rens)) + else: + # for binary-addressed + comb += rport.addr.eq(ortreereduce_sig(addrs)) + comb += rport.ren.eq(Cat(*rens).bool()) + print ("binary", regfile, rpidx, rport, rport.ren, rens, addrs) def connect_rdports(self, m, fu_bitdict): """connect read ports @@ -202,47 +445,111 @@ class NonProductionCore(Elaboratable): fuspecs = byregfiles_rdspec[regfile] rdpickers[regfile] = {} + # argh. an experiment to merge RA and RB in the INT regfile + # (we have too many read/write ports) + if self.regreduce_en: + if regfile == 'INT': + fuspecs['rabc'] = [fuspecs.pop('rb')] + fuspecs['rabc'].append(fuspecs.pop('rc')) + fuspecs['rabc'].append(fuspecs.pop('ra')) + if regfile == 'FAST': + fuspecs['fast1'] = [fuspecs.pop('fast1')] + if 'fast2' in fuspecs: + fuspecs['fast1'].append(fuspecs.pop('fast2')) + if 'fast3' in fuspecs: + fuspecs['fast1'].append(fuspecs.pop('fast3')) + # for each named regfile port, connect up all FUs to that port for (regname, fspec) in sort_fuspecs(fuspecs): print("connect rd", regname, fspec) - rpidx = regname - # get the regfile specs for this regfile port - (rf, read, write, wid, fuspec) = fspec - name = "rdflag_%s_%s" % (regfile, regname) - rdflag = Signal(name=name, reset_less=True) - comb += rdflag.eq(rf) - - # select the required read port. these are pre-defined sizes - print(rpidx, regfile, regs.rf.keys()) - rport = regs.rf[regfile.lower()].r_ports[rpidx] - - # create a priority picker to manage this port - rdpickers[regfile][rpidx] = rdpick = PriorityPicker( - len(fuspec)) - setattr(m.submodules, "rdpick_%s_%s" % - (regfile, rpidx), rdpick) - - # connect the regspec "reg select" number to this port - with m.If(rdpick.en_o): - comb += rport.ren.eq(read) - - # connect up the FU req/go signals, and the reg-read to the FU - # and create a Read Broadcast Bus - for pi, (funame, fu, idx) in enumerate(fuspec): - src = fu.src_i[idx] + self.connect_rdport(m, fu_bitdict, rdpickers, regfile, + regname, fspec) - # connect request-read to picker input, and output to go-rd - fu_active = fu_bitdict[funame] - pick = fu.rd_rel_o[idx] & fu_active & rdflag - comb += rdpick.i[pi].eq(pick) - comb += fu.go_rd_i[idx].eq(rdpick.o[pi]) + def connect_wrport(self, m, fu_bitdict, wrpickers, regfile, regname, fspec): + comb, sync = m.d.comb, m.d.sync + fus = self.fus.fus + regs = self.regs - # connect regfile port to input, creating a Broadcast Bus - print("reg connect widths", - regfile, regname, pi, funame, - src.shape(), rport.data_o.shape()) - # all FUs connect to same port - comb += src.eq(rport.data_o) + print("connect wr", regname, fspec) + rpidx = regname + + # select the required write port. these are pre-defined sizes + print(regfile, regs.rf.keys()) + rfile = regs.rf[regfile.lower()] + wport = rfile.w_ports[rpidx] + + fspecs = fspec + if not isinstance(fspecs, list): + fspecs = [fspecs] + + pplen = 0 + writes = [] + ppoffs = [] + for i, fspec in enumerate(fspecs): + # get the regfile specs for this regfile port + (rf, read, write, wid, fuspec) = fspec + print ("fpsec", i, fspec, len(fuspec)) + ppoffs.append(pplen) # record offset for picker + pplen += len(fuspec) + + # create a priority picker to manage this port + wrpickers[regfile][rpidx] = wrpick = PriorityPicker(pplen) + setattr(m.submodules, "wrpick_%s_%s" % (regfile, rpidx), wrpick) + + wsigs = [] + wens = [] + addrs = [] + for i, fspec in enumerate(fspecs): + # connect up the FU req/go signals and the reg-read to the FU + # these are arbitrated by Data.ok signals + (rf, read, write, wid, fuspec) = fspec + for pi, (funame, fu, idx) in enumerate(fuspec): + pi += ppoffs[i] + + # write-request comes from dest.ok + dest = fu.get_out(idx) + fu_dest_latch = fu.get_fu_out(idx) # latched output + name = "wrflag_%s_%s_%d" % (funame, regname, idx) + wrflag = Signal(name=name, reset_less=True) + comb += wrflag.eq(dest.ok & fu.busy_o) + + # connect request-write to picker input, and output to go-wr + fu_active = fu_bitdict[funame] + pick = fu.wr.rel_o[idx] & fu_active # & wrflag + comb += wrpick.i[pi].eq(pick) + # create a single-pulse go write from the picker output + wr_pick = Signal(name="wpick_%s_%s_%d" % (funame, regname, idx)) + comb += wr_pick.eq(wrpick.o[pi] & wrpick.en_o) + comb += fu.go_wr_i[idx].eq(rising_edge(m, wr_pick)) + + # connect the regspec write "reg select" number to this port + # only if one FU actually requests (and is granted) the port + # will the write-enable be activated + addr_en = Signal.like(write) + wp = Signal() + comb += wp.eq(wr_pick & wrpick.en_o) + comb += addr_en.eq(Mux(wp, write, 0)) + if rfile.unary: + wens.append(addr_en) + else: + addrs.append(addr_en) + wens.append(wp) + + # connect regfile port to input + print("reg connect widths", + regfile, regname, pi, funame, + dest.shape(), wport.i_data.shape()) + wsigs.append(fu_dest_latch) + + # here is where we create the Write Broadcast Bus. simple, eh? + comb += wport.i_data.eq(ortreereduce_sig(wsigs)) + if rfile.unary: + # for unary-addressed + comb += wport.wen.eq(ortreereduce_sig(wens)) + else: + # for binary-addressed + comb += wport.addr.eq(ortreereduce_sig(addrs)) + comb += wport.wen.eq(ortreereduce_sig(wens)) def connect_wrports(self, m, fu_bitdict): """connect write ports @@ -267,61 +574,29 @@ class NonProductionCore(Elaboratable): for regfile, spec in byregfiles_wr.items(): fuspecs = byregfiles_wrspec[regfile] wrpickers[regfile] = {} - for (regname, fspec) in sort_fuspecs(fuspecs): - print("connect wr", regname, fspec) - rpidx = regname - # get the regfile specs for this regfile port - (rf, read, write, wid, fuspec) = fspec - # select the required write port. these are pre-defined sizes - print(regfile, regs.rf.keys()) - wport = regs.rf[regfile.lower()].w_ports[rpidx] + if self.regreduce_en: + # argh, more port-merging + if regfile == 'INT': + fuspecs['o'] = [fuspecs.pop('o')] + fuspecs['o'].append(fuspecs.pop('o1')) + if regfile == 'FAST': + fuspecs['fast1'] = [fuspecs.pop('fast1')] + if 'fast2' in fuspecs: + fuspecs['fast1'].append(fuspecs.pop('fast2')) + if 'fast3' in fuspecs: + fuspecs['fast1'].append(fuspecs.pop('fast3')) - # create a priority picker to manage this port - wrpickers[regfile][rpidx] = wrpick = PriorityPicker( - len(fuspec)) - setattr(m.submodules, "wrpick_%s_%s" % - (regfile, rpidx), wrpick) - - # connect the regspec write "reg select" number to this port - # only if one FU actually requests (and is granted) the port - # will the write-enable be activated - with m.If(wrpick.en_o): - comb += wport.wen.eq(write) - with m.Else(): - comb += wport.wen.eq(0) - - # connect up the FU req/go signals and the reg-read to the FU - # these are arbitrated by Data.ok signals - wsigs = [] - for pi, (funame, fu, idx) in enumerate(fuspec): - # write-request comes from dest.ok - dest = fu.get_out(idx) - fu_dest_latch = fu.get_fu_out(idx) # latched output - name = "wrflag_%s_%s_%d" % (funame, regname, idx) - wrflag = Signal(name=name, reset_less=True) - comb += wrflag.eq(dest.ok & fu.busy_o) - - # connect request-read to picker input, and output to go-wr - fu_active = fu_bitdict[funame] - pick = fu.wr.rel[idx] & fu_active # & wrflag - comb += wrpick.i[pi].eq(pick) - comb += fu.go_wr_i[idx].eq(wrpick.o[pi] & wrpick.en_o) - # connect regfile port to input - print("reg connect widths", - regfile, regname, pi, funame, - dest.shape(), wport.data_i.shape()) - wsigs.append(fu_dest_latch) - - # here is where we create the Write Broadcast Bus. simple, eh? - comb += wport.data_i.eq(ortreereduce_sig(wsigs)) + for (regname, fspec) in sort_fuspecs(fuspecs): + self.connect_wrport(m, fu_bitdict, wrpickers, + regfile, regname, fspec) def get_byregfiles(self, readmode): mode = "read" if readmode else "write" - dec2 = self.pdecode2 regs = self.regs fus = self.fus.fus + e = self.i.e # decoded instruction to execute # dictionary of lists of regfile ports byregfiles = {} @@ -335,17 +610,17 @@ class NonProductionCore(Elaboratable): (regfile, regname, wid) = fu.get_out_spec(idx) print(" %d %s %s %s" % (idx, regfile, regname, str(wid))) if readmode: - rdflag, read = dec2.regspecmap_read(regfile, regname) + rdflag, read = regspec_decode_read(e, regfile, regname) write = None else: rdflag, read = None, None - wrport, write = dec2.regspecmap_write(regfile, regname) + wrport, write = regspec_decode_write(e, regfile, regname) if regfile not in byregfiles: byregfiles[regfile] = {} byregfiles_spec[regfile] = {} if regname not in byregfiles_spec[regfile]: byregfiles_spec[regfile][regname] = \ - [rdflag, read, write, wid, []] + (rdflag, read, write, wid, []) # here we start to create "lanes" if idx not in byregfiles[regfile]: byregfiles[regfile][idx] = [] @@ -370,7 +645,7 @@ class NonProductionCore(Elaboratable): def __iter__(self): yield from self.fus.ports() - yield from self.pdecode2.ports() + yield from self.i.e.ports() yield from self.l0.ports() # TODO: regs