X-Git-Url: https://git.libre-soc.org/?p=soc.git;a=blobdiff_plain;f=src%2Fsoc%2Fsimple%2Fcore.py;h=d3a66bb1e9916c50277b1d5b1dc2a859707fc90b;hp=f6e9a73b724d9a68eabed13674b1d5a643f1c766;hb=3a1921d77d0a3aefe65e7b0499dfb71cc9867942;hpb=575802fa56d7175ebbdc16bb5c493b556dab9c74 diff --git a/src/soc/simple/core.py b/src/soc/simple/core.py index f6e9a73b..d3a66bb1 100644 --- a/src/soc/simple/core.py +++ b/src/soc/simple/core.py @@ -19,22 +19,31 @@ and consequently it is safer to wait for the Function Unit to complete before allowing a new instruction to proceed. """ -from nmigen import Elaboratable, Module, Signal +from nmigen import Elaboratable, Module, Signal, ResetSignal, Cat, Mux from nmigen.cli import rtlil +from soc.decoder.power_decoder2 import PowerDecodeSubset +from soc.decoder.power_regspec_map import regspec_decode_read +from soc.decoder.power_regspec_map import regspec_decode_write + from nmutil.picker import PriorityPicker from nmutil.util import treereduce from soc.fu.compunits.compunits import AllFunctionUnits from soc.regfile.regfiles import RegFiles -from soc.decoder.power_decoder import create_pdecode -from soc.decoder.power_decoder2 import PowerDecode2 +from soc.decoder.decode2execute1 import Decode2ToExecute1Type +from soc.decoder.decode2execute1 import IssuerDecode2ToOperand +from soc.decoder.power_decoder2 import get_rdflags from soc.decoder.decode2execute1 import Data from soc.experiment.l0_cache import TstL0CacheBuffer # test only from soc.config.test.test_loadstore import TestMemPspec from soc.decoder.power_enums import MicrOp +from soc.config.state import CoreState + import operator +from nmutil.util import rising_edge + # helper function for reducing a list of signals down to a parallel # ORed single signal. @@ -60,62 +69,86 @@ def sort_fuspecs(fuspecs): class NonProductionCore(Elaboratable): def __init__(self, pspec): + self.pspec = pspec + # single LD/ST funnel for memory access self.l0 = TstL0CacheBuffer(pspec, n_units=1) pi = self.l0.l0.dports[0] # function units (only one each) + # only include mmu if enabled in pspec self.fus = AllFunctionUnits(pspec, pilist=[pi]) # register files (yes plural) self.regs = RegFiles() - # instruction decoder - pdecode = create_pdecode() - self.pdecode2 = PowerDecode2(pdecode) # instruction decoder + # instruction decoder - needs a Trap-capable Record (captures EINT etc.) + self.e = Decode2ToExecute1Type("core", opkls=IssuerDecode2ToOperand) + + self.state = CoreState("core") + self.raw_insn_i = Signal(32) # raw instruction + self.bigendian_i = Signal() # bigendian # issue/valid/busy signalling - self.ivalid_i = self.pdecode2.valid # instruction is valid + self.ivalid_i = Signal(reset_less=True) # instruction is valid self.issue_i = Signal(reset_less=True) self.busy_o = Signal(name="corebusy_o", reset_less=True) - # instruction input - self.bigendian_i = self.pdecode2.dec.bigendian - self.raw_opcode_i = self.pdecode2.dec.raw_opcode_in - # start/stop and terminated signalling - self.core_start_i = Signal(reset_less=True) - self.core_stop_i = Signal(reset_less=True) - self.core_terminated_o = Signal(reset=0) # indicates stopped + self.core_stopped_i = Signal(reset_less=True) + self.core_terminate_o = Signal(reset=0) # indicates stopped + + # create per-FU instruction decoders (subsetted) + self.decoders = {} + self.des = {} + + for funame, fu in self.fus.fus.items(): + f_name = fu.fnunit.name + fnunit = fu.fnunit.value + opkls = fu.opsubsetkls + if f_name == 'TRAP': + self.trapunit = funame + continue + self.decoders[funame] = PowerDecodeSubset(None, opkls, f_name, + final=True, + state=self.state) + self.des[funame] = self.decoders[funame].do + + if "mmu0" in self.decoders: + self.decoders["mmu0"].mmu0_spr_dec = self.decoders["spr0"] def elaborate(self, platform): m = Module() + # for testing purposes, to cut down on build time in coriolis2 + if hasattr(self.pspec, "nocore") and self.pspec.nocore == True: + x = Signal() # dummy signal + m.d.sync += x.eq(~x) + return m + comb = m.d.comb - m.submodules.pdecode2 = dec2 = self.pdecode2 m.submodules.fus = self.fus m.submodules.l0 = l0 = self.l0 self.regs.elaborate_into(m, platform) regs = self.regs fus = self.fus.fus - # core start/stopped state - core_stopped = Signal(reset=1) # begins in stopped state + # connect decoders + for k, v in self.decoders.items(): + setattr(m.submodules, "dec_%s" % v.fn_name, v) + comb += v.dec.raw_opcode_in.eq(self.raw_insn_i) + comb += v.dec.bigendian.eq(self.bigendian_i) - # start/stop signalling - with m.If(self.core_start_i): - m.d.sync += core_stopped.eq(0) - with m.If(self.core_stop_i): - m.d.sync += core_stopped.eq(1) - m.d.comb += self.core_terminated_o.eq(core_stopped) + # ssh, cheat: trap uses the main decoder because of the rewriting + self.des[self.trapunit] = self.e.do # connect up Function Units, then read/write ports - fu_bitdict = self.connect_instruction(m, core_stopped) + fu_bitdict = self.connect_instruction(m) self.connect_rdports(m, fu_bitdict) self.connect_wrports(m, fu_bitdict) return m - def connect_instruction(self, m, core_stopped): + def connect_instruction(self, m): """connect_instruction uses decoded (from PowerOp) function unit information from CSV files @@ -129,16 +162,12 @@ class NonProductionCore(Elaboratable): """ comb, sync = m.d.comb, m.d.sync fus = self.fus.fus - dec2 = self.pdecode2 # enable-signals for each FU, get one bit for each FU (by name) fu_enable = Signal(len(fus), reset_less=True) fu_bitdict = {} for i, funame in enumerate(fus.keys()): fu_bitdict[funame] = fu_enable[i] - # only run when allowed and when instruction is valid - can_run = Signal(reset_less=True) - comb += can_run.eq(self.ivalid_i & ~core_stopped) # enable the required Function Unit based on the opcode decode # note: this *only* works correctly for simple core when one and @@ -146,7 +175,7 @@ class NonProductionCore(Elaboratable): for funame, fu in fus.items(): fnunit = fu.fnunit.value enable = Signal(name="en_%s" % funame, reset_less=True) - comb += enable.eq((dec2.e.do.fn_unit & fnunit).bool()) + comb += enable.eq((self.e.do.fn_unit & fnunit).bool()) comb += fu_bitdict[funame].eq(enable) # sigh - need a NOP counter @@ -155,11 +184,11 @@ class NonProductionCore(Elaboratable): sync += counter.eq(counter - 1) comb += self.busy_o.eq(1) - with m.If(can_run): - with m.Switch(dec2.e.do.insn_type): + with m.If(self.ivalid_i): # run only when valid + with m.Switch(self.e.do.insn_type): # check for ATTN: halt if true with m.Case(MicrOp.OP_ATTN): - m.d.sync += core_stopped.eq(1) + m.d.sync += self.core_terminate_o.eq(1) with m.Case(MicrOp.OP_NOP): sync += counter.eq(2) @@ -168,19 +197,119 @@ class NonProductionCore(Elaboratable): with m.Default(): # connect up instructions. only one enabled at a time for funame, fu in fus.items(): + do = self.des[funame] enable = fu_bitdict[funame] # run this FunctionUnit if enabled + # route op, issue, busy, read flags and mask to FU with m.If(enable): - # route op, issue, busy, read flags and mask to FU - comb += fu.oper_i.eq_from_execute1(dec2.e) + # operand comes from the *local* decoder + comb += fu.oper_i.eq_from(do) + #comb += fu.oper_i.eq_from_execute1(e) comb += fu.issue_i.eq(self.issue_i) comb += self.busy_o.eq(fu.busy_o) - rdmask = dec2.rdflags(fu) + # rdmask, which is for registers, needs to come + # from the *main* decoder + rdmask = get_rdflags(self.e, fu) comb += fu.rdmaskn.eq(~rdmask) return fu_bitdict + def connect_rdport(self, m, fu_bitdict, rdpickers, regfile, regname, fspec): + comb, sync = m.d.comb, m.d.sync + fus = self.fus.fus + regs = self.regs + + rpidx = regname + + # select the required read port. these are pre-defined sizes + rfile = regs.rf[regfile.lower()] + rport = rfile.r_ports[rpidx] + print("read regfile", rpidx, regfile, regs.rf.keys(), + rfile, rfile.unary) + + fspecs = fspec + if not isinstance(fspecs, list): + fspecs = [fspecs] + + rdflags = [] + pplen = 0 + reads = [] + ppoffs = [] + for i, fspec in enumerate(fspecs): + # get the regfile specs for this regfile port + (rf, read, write, wid, fuspec) = fspec + print ("fpsec", i, fspec, len(fuspec)) + ppoffs.append(pplen) # record offset for picker + pplen += len(fuspec) + name = "rdflag_%s_%s_%d" % (regfile, regname, i) + rdflag = Signal(name=name, reset_less=True) + comb += rdflag.eq(rf) + rdflags.append(rdflag) + reads.append(read) + + print ("pplen", pplen) + + # create a priority picker to manage this port + rdpickers[regfile][rpidx] = rdpick = PriorityPicker(pplen) + setattr(m.submodules, "rdpick_%s_%s" % (regfile, rpidx), rdpick) + + rens = [] + addrs = [] + for i, fspec in enumerate(fspecs): + (rf, read, write, wid, fuspec) = fspec + # connect up the FU req/go signals, and the reg-read to the FU + # and create a Read Broadcast Bus + for pi, (funame, fu, idx) in enumerate(fuspec): + pi += ppoffs[i] + + # connect request-read to picker input, and output to go-rd + fu_active = fu_bitdict[funame] + name = "%s_%s_%s_%i" % (regfile, rpidx, funame, pi) + addr_en = Signal.like(reads[i], name="addr_en_"+name) + pick = Signal(name="pick_"+name) # picker input + rp = Signal(name="rp_"+name) # picker output + delay_pick = Signal(name="dp_"+name) # read-enable "underway" + + # exclude any currently-enabled read-request (mask out active) + comb += pick.eq(fu.rd_rel_o[idx] & fu_active & rdflags[i] & + ~delay_pick) + comb += rdpick.i[pi].eq(pick) + comb += fu.go_rd_i[idx].eq(delay_pick) # pass in *delayed* pick + + # if picked, select read-port "reg select" number to port + comb += rp.eq(rdpick.o[pi] & rdpick.en_o) + sync += delay_pick.eq(rp) # delayed "pick" + comb += addr_en.eq(Mux(rp, reads[i], 0)) + + # the read-enable happens combinatorially (see mux-bus below) + # but it results in the data coming out on a one-cycle delay. + if rfile.unary: + rens.append(addr_en) + else: + addrs.append(addr_en) + rens.append(rp) + + # use the *delayed* pick signal to put requested data onto bus + with m.If(delay_pick): + # connect regfile port to input, creating fan-out Bus + src = fu.src_i[idx] + print("reg connect widths", + regfile, regname, pi, funame, + src.shape(), rport.data_o.shape()) + # all FUs connect to same port + comb += src.eq(rport.data_o) + + # or-reduce the muxed read signals + if rfile.unary: + # for unary-addressed + comb += rport.ren.eq(ortreereduce_sig(rens)) + else: + # for binary-addressed + comb += rport.addr.eq(ortreereduce_sig(addrs)) + comb += rport.ren.eq(Cat(*rens).bool()) + print ("binary", regfile, rpidx, rport, rport.ren, rens, addrs) + def connect_rdports(self, m, fu_bitdict): """connect read ports @@ -202,47 +331,108 @@ class NonProductionCore(Elaboratable): fuspecs = byregfiles_rdspec[regfile] rdpickers[regfile] = {} + # argh. an experiment to merge RA and RB in the INT regfile + # (we have too many read/write ports) + #if regfile == 'INT': + #fuspecs['rabc'] = [fuspecs.pop('rb')] + #fuspecs['rabc'].append(fuspecs.pop('rc')) + #fuspecs['rabc'].append(fuspecs.pop('ra')) + #if regfile == 'FAST': + # fuspecs['fast1'] = [fuspecs.pop('fast1')] + # if 'fast2' in fuspecs: + # fuspecs['fast1'].append(fuspecs.pop('fast2')) + # for each named regfile port, connect up all FUs to that port for (regname, fspec) in sort_fuspecs(fuspecs): print("connect rd", regname, fspec) - rpidx = regname - # get the regfile specs for this regfile port - (rf, read, write, wid, fuspec) = fspec - name = "rdflag_%s_%s" % (regfile, regname) - rdflag = Signal(name=name, reset_less=True) - comb += rdflag.eq(rf) - - # select the required read port. these are pre-defined sizes - print(rpidx, regfile, regs.rf.keys()) - rport = regs.rf[regfile.lower()].r_ports[rpidx] - - # create a priority picker to manage this port - rdpickers[regfile][rpidx] = rdpick = PriorityPicker( - len(fuspec)) - setattr(m.submodules, "rdpick_%s_%s" % - (regfile, rpidx), rdpick) - - # connect the regspec "reg select" number to this port - with m.If(rdpick.en_o): - comb += rport.ren.eq(read) - - # connect up the FU req/go signals, and the reg-read to the FU - # and create a Read Broadcast Bus - for pi, (funame, fu, idx) in enumerate(fuspec): - src = fu.src_i[idx] + self.connect_rdport(m, fu_bitdict, rdpickers, regfile, + regname, fspec) - # connect request-read to picker input, and output to go-rd - fu_active = fu_bitdict[funame] - pick = fu.rd_rel_o[idx] & fu_active & rdflag - comb += rdpick.i[pi].eq(pick) - comb += fu.go_rd_i[idx].eq(rdpick.o[pi]) + def connect_wrport(self, m, fu_bitdict, wrpickers, regfile, regname, fspec): + comb, sync = m.d.comb, m.d.sync + fus = self.fus.fus + regs = self.regs - # connect regfile port to input, creating a Broadcast Bus - print("reg connect widths", - regfile, regname, pi, funame, - src.shape(), rport.data_o.shape()) - # all FUs connect to same port - comb += src.eq(rport.data_o) + print("connect wr", regname, fspec) + rpidx = regname + + # select the required write port. these are pre-defined sizes + print(regfile, regs.rf.keys()) + rfile = regs.rf[regfile.lower()] + wport = rfile.w_ports[rpidx] + + fspecs = fspec + if not isinstance(fspecs, list): + fspecs = [fspecs] + + pplen = 0 + writes = [] + ppoffs = [] + for i, fspec in enumerate(fspecs): + # get the regfile specs for this regfile port + (rf, read, write, wid, fuspec) = fspec + print ("fpsec", i, fspec, len(fuspec)) + ppoffs.append(pplen) # record offset for picker + pplen += len(fuspec) + + # create a priority picker to manage this port + wrpickers[regfile][rpidx] = wrpick = PriorityPicker(pplen) + setattr(m.submodules, "wrpick_%s_%s" % (regfile, rpidx), wrpick) + + wsigs = [] + wens = [] + addrs = [] + for i, fspec in enumerate(fspecs): + # connect up the FU req/go signals and the reg-read to the FU + # these are arbitrated by Data.ok signals + (rf, read, write, wid, fuspec) = fspec + for pi, (funame, fu, idx) in enumerate(fuspec): + pi += ppoffs[i] + + # write-request comes from dest.ok + dest = fu.get_out(idx) + fu_dest_latch = fu.get_fu_out(idx) # latched output + name = "wrflag_%s_%s_%d" % (funame, regname, idx) + wrflag = Signal(name=name, reset_less=True) + comb += wrflag.eq(dest.ok & fu.busy_o) + + # connect request-write to picker input, and output to go-wr + fu_active = fu_bitdict[funame] + pick = fu.wr.rel_o[idx] & fu_active # & wrflag + comb += wrpick.i[pi].eq(pick) + # create a single-pulse go write from the picker output + wr_pick = Signal() + comb += wr_pick.eq(wrpick.o[pi] & wrpick.en_o) + comb += fu.go_wr_i[idx].eq(rising_edge(m, wr_pick)) + + # connect the regspec write "reg select" number to this port + # only if one FU actually requests (and is granted) the port + # will the write-enable be activated + addr_en = Signal.like(write) + wp = Signal() + comb += wp.eq(wr_pick & wrpick.en_o) + comb += addr_en.eq(Mux(wp, write, 0)) + if rfile.unary: + wens.append(addr_en) + else: + addrs.append(addr_en) + wens.append(wp) + + # connect regfile port to input + print("reg connect widths", + regfile, regname, pi, funame, + dest.shape(), wport.data_i.shape()) + wsigs.append(fu_dest_latch) + + # here is where we create the Write Broadcast Bus. simple, eh? + comb += wport.data_i.eq(ortreereduce_sig(wsigs)) + if rfile.unary: + # for unary-addressed + comb += wport.wen.eq(ortreereduce_sig(wens)) + else: + # for binary-addressed + comb += wport.addr.eq(ortreereduce_sig(addrs)) + comb += wport.wen.eq(ortreereduce_sig(wens)) def connect_wrports(self, m, fu_bitdict): """connect write ports @@ -267,61 +457,26 @@ class NonProductionCore(Elaboratable): for regfile, spec in byregfiles_wr.items(): fuspecs = byregfiles_wrspec[regfile] wrpickers[regfile] = {} - for (regname, fspec) in sort_fuspecs(fuspecs): - print("connect wr", regname, fspec) - rpidx = regname - # get the regfile specs for this regfile port - (rf, read, write, wid, fuspec) = fspec - # select the required write port. these are pre-defined sizes - print(regfile, regs.rf.keys()) - wport = regs.rf[regfile.lower()].w_ports[rpidx] + # argh, more port-merging + if regfile == 'INT': + fuspecs['o'] = [fuspecs.pop('o')] + fuspecs['o'].append(fuspecs.pop('o1')) + if regfile == 'FAST': + fuspecs['fast1'] = [fuspecs.pop('fast1')] + if 'fast2' in fuspecs: + fuspecs['fast1'].append(fuspecs.pop('fast2')) - # create a priority picker to manage this port - wrpickers[regfile][rpidx] = wrpick = PriorityPicker( - len(fuspec)) - setattr(m.submodules, "wrpick_%s_%s" % - (regfile, rpidx), wrpick) - - # connect the regspec write "reg select" number to this port - # only if one FU actually requests (and is granted) the port - # will the write-enable be activated - with m.If(wrpick.en_o): - comb += wport.wen.eq(write) - with m.Else(): - comb += wport.wen.eq(0) - - # connect up the FU req/go signals and the reg-read to the FU - # these are arbitrated by Data.ok signals - wsigs = [] - for pi, (funame, fu, idx) in enumerate(fuspec): - # write-request comes from dest.ok - dest = fu.get_out(idx) - fu_dest_latch = fu.get_fu_out(idx) # latched output - name = "wrflag_%s_%s_%d" % (funame, regname, idx) - wrflag = Signal(name=name, reset_less=True) - comb += wrflag.eq(dest.ok & fu.busy_o) - - # connect request-read to picker input, and output to go-wr - fu_active = fu_bitdict[funame] - pick = fu.wr.rel[idx] & fu_active # & wrflag - comb += wrpick.i[pi].eq(pick) - comb += fu.go_wr_i[idx].eq(wrpick.o[pi] & wrpick.en_o) - # connect regfile port to input - print("reg connect widths", - regfile, regname, pi, funame, - dest.shape(), wport.data_i.shape()) - wsigs.append(fu_dest_latch) - - # here is where we create the Write Broadcast Bus. simple, eh? - comb += wport.data_i.eq(ortreereduce_sig(wsigs)) + for (regname, fspec) in sort_fuspecs(fuspecs): + self.connect_wrport(m, fu_bitdict, wrpickers, + regfile, regname, fspec) def get_byregfiles(self, readmode): mode = "read" if readmode else "write" - dec2 = self.pdecode2 regs = self.regs fus = self.fus.fus + e = self.e # decoded instruction to execute # dictionary of lists of regfile ports byregfiles = {} @@ -335,17 +490,17 @@ class NonProductionCore(Elaboratable): (regfile, regname, wid) = fu.get_out_spec(idx) print(" %d %s %s %s" % (idx, regfile, regname, str(wid))) if readmode: - rdflag, read = dec2.regspecmap_read(regfile, regname) + rdflag, read = regspec_decode_read(e, regfile, regname) write = None else: rdflag, read = None, None - wrport, write = dec2.regspecmap_write(regfile, regname) + wrport, write = regspec_decode_write(e, regfile, regname) if regfile not in byregfiles: byregfiles[regfile] = {} byregfiles_spec[regfile] = {} if regname not in byregfiles_spec[regfile]: byregfiles_spec[regfile][regname] = \ - [rdflag, read, write, wid, []] + (rdflag, read, write, wid, []) # here we start to create "lanes" if idx not in byregfiles[regfile]: byregfiles[regfile][idx] = [] @@ -370,7 +525,7 @@ class NonProductionCore(Elaboratable): def __iter__(self): yield from self.fus.ports() - yield from self.pdecode2.ports() + yield from self.e.ports() yield from self.l0.ports() # TODO: regs