not in any way intended for production use. connects up FunctionUnits to
Register Files in a brain-dead fashion that only permits one and only one
Function Unit to be operational.
+
+the principle here is to take the Function Units, analyse their regspecs,
+and turn their requirements for access to register file read/write ports
+into groupings by Register File and Register File Port name.
+
+under each grouping - by regfile/port - a list of Function Units that
+need to connect to that port is created. as these are a contended
+resource a "Broadcast Bus" per read/write port is then also created,
+with access to it managed by a PriorityPicker.
+
+the brain-dead part of this module is that even though there is no
+conflict of access, regfile read/write hazards are *not* analysed,
+and consequently it is safer to wait for the Function Unit to complete
+before allowing a new instruction to proceed.
"""
-from nmigen import Elaboratable, Module, Signal
+
+from nmigen import Elaboratable, Module, Signal, ResetSignal, Cat, Mux
from nmigen.cli import rtlil
+from soc.decoder.power_regspec_map import regspec_decode_read
+from soc.decoder.power_regspec_map import regspec_decode_write
+
from nmutil.picker import PriorityPicker
from nmutil.util import treereduce
from soc.fu.compunits.compunits import AllFunctionUnits
from soc.regfile.regfiles import RegFiles
-from soc.decoder.power_decoder import create_pdecode
-from soc.decoder.power_decoder2 import PowerDecode2
+from soc.decoder.decode2execute1 import Decode2ToExecute1Type
+from soc.decoder.power_decoder2 import get_rdflags
+from soc.decoder.decode2execute1 import Data
+from soc.experiment.l0_cache import TstL0CacheBuffer # test only
+from soc.config.test.test_loadstore import TestMemPspec
+from soc.decoder.power_enums import MicrOp
+import operator
+from nmutil.util import rising_edge
+# helper function for reducing a list of signals down to a parallel
+# ORed single signal.
def ortreereduce(tree, attr="data_o"):
return treereduce(tree, operator.or_, lambda x: getattr(x, attr))
+def ortreereduce_sig(tree):
+ return treereduce(tree, operator.or_, lambda x: x)
+
+
+# helper function to place full regs declarations first
+def sort_fuspecs(fuspecs):
+ res = []
+ for (regname, fspec) in fuspecs.items():
+ if regname.startswith("full"):
+ res.append((regname, fspec))
+ for (regname, fspec) in fuspecs.items():
+ if not regname.startswith("full"):
+ res.append((regname, fspec))
+ return res # enumerate(res)
+
+
class NonProductionCore(Elaboratable):
- def __init__(self):
- self.fus = AllFunctionUnits()
+ def __init__(self, pspec):
+ # single LD/ST funnel for memory access
+ self.l0 = TstL0CacheBuffer(pspec, n_units=1)
+ pi = self.l0.l0.dports[0]
+
+ # function units (only one each)
+ self.fus = AllFunctionUnits(pspec, pilist=[pi])
+
+ # register files (yes plural)
self.regs = RegFiles()
- self.pdecode = pdecode = create_pdecode()
- self.pdecode2 = PowerDecode2(pdecode) # instruction decoder
- self.ivalid_i = self.pdecode2.e.valid # instruction is valid
+
+ # instruction decoder
+ self.e = Decode2ToExecute1Type() # decoded instruction
+
+ # issue/valid/busy signalling
+ self.ivalid_i = Signal(reset_less=True) # instruction is valid
+ self.issue_i = Signal(reset_less=True)
+ self.busy_o = Signal(name="corebusy_o", reset_less=True)
+
+ # start/stop and terminated signalling
+ self.core_stopped_i = Signal(reset_less=True)
+ self.core_reset_i = Signal()
+ self.core_terminate_o = Signal(reset=0) # indicates stopped
def elaborate(self, platform):
m = Module()
- comb = m.d.comb
- m.submodules.pdecode2 = dec2 = self.pdecode2
m.submodules.fus = self.fus
+ m.submodules.l0 = l0 = self.l0
self.regs.elaborate_into(m, platform)
regs = self.regs
fus = self.fus.fus
+ # connect up Function Units, then read/write ports
+ fu_bitdict = self.connect_instruction(m)
+ self.connect_rdports(m, fu_bitdict)
+ self.connect_wrports(m, fu_bitdict)
+
+ # connect up reset
+ m.d.comb += ResetSignal().eq(self.core_reset_i)
+
+ return m
+
+ def connect_instruction(self, m):
+ """connect_instruction
+
+ uses decoded (from PowerOp) function unit information from CSV files
+ to ascertain which Function Unit should deal with the current
+ instruction.
+
+ some (such as OP_ATTN, OP_NOP) are dealt with here, including
+ ignoring it and halting the processor. OP_NOP is a bit annoying
+ because the issuer expects busy flag still to be raised then lowered.
+ (this requires a fake counter to be set).
+ """
+ comb, sync = m.d.comb, m.d.sync
+ fus = self.fus.fus
+ e = self.e # to execute
+
# enable-signals for each FU, get one bit for each FU (by name)
fu_enable = Signal(len(fus), reset_less=True)
fu_bitdict = {}
for i, funame in enumerate(fus.keys()):
fu_bitdict[funame] = fu_enable[i]
- # dictionary of lists of regfile read ports
- byregfiles_rd = {}
- byregfiles_rdspec = {}
- for (funame, fu) in fus.items():
- print ("read ports for %s" % funame)
- for idx in range(fu.n_src):
- (regfile, regname, wid) = fu.get_in_spec(idx)
- print (" %s %s %s" % (regfile, regname, str(wid)))
- rdflag, read, _ = dec2.regspecmap(regfile, regname)
- if regfile not in byregfiles_rd:
- byregfiles_rd[regfile] = {}
- byregfiles_rdspec[regfile] = (regname, rdflag, read, wid)
- # here we start to create "lanes"
- if idx not in byregfiles_rd[regfile]:
- byregfiles_rd[regfile][idx] = []
- fuspec = (funame, fu)
- byregfiles_rd[regfile][idx].append(fuspec)
+ # enable the required Function Unit based on the opcode decode
+ # note: this *only* works correctly for simple core when one and
+ # *only* one FU is allocated per instruction
+ for funame, fu in fus.items():
+ fnunit = fu.fnunit.value
+ enable = Signal(name="en_%s" % funame, reset_less=True)
+ comb += enable.eq((e.do.fn_unit & fnunit).bool())
+ comb += fu_bitdict[funame].eq(enable)
- # ok just print that out, for convenience
- for regfile, spec in byregfiles_rd.items():
- print ("regfile read ports:", regfile)
- for idx, fuspec in spec.items():
- print (" regfile read port %s lane: %d" % (regfile, idx))
- (regname, rdflag, read, wid) = byregfiles_rdspec[regfile]
- print (" %s" % regname, wid, read, rdflag)
- for (funame, fu) in fuspec:
- print (" ", funame, fu, fu.src_i[idx])
- print ()
+ # sigh - need a NOP counter
+ counter = Signal(2)
+ with m.If(counter != 0):
+ sync += counter.eq(counter - 1)
+ comb += self.busy_o.eq(1)
+
+ with m.If(self.ivalid_i): # run only when valid
+ with m.Switch(e.do.insn_type):
+ # check for ATTN: halt if true
+ with m.Case(MicrOp.OP_ATTN):
+ m.d.sync += self.core_terminate_o.eq(1)
+
+ with m.Case(MicrOp.OP_NOP):
+ sync += counter.eq(2)
+ comb += self.busy_o.eq(1)
+
+ with m.Default():
+ # connect up instructions. only one enabled at a time
+ for funame, fu in fus.items():
+ enable = fu_bitdict[funame]
+
+ # run this FunctionUnit if enabled
+ with m.If(enable):
+ # route op, issue, busy, read flags and mask to FU
+ comb += fu.oper_i.eq_from_execute1(e)
+ comb += fu.issue_i.eq(self.issue_i)
+ comb += self.busy_o.eq(fu.busy_o)
+ rdmask = get_rdflags(e, fu)
+ comb += fu.rdmaskn.eq(~rdmask)
+
+ return fu_bitdict
+
+ def connect_rdport(self, m, fu_bitdict, rdpickers, regfile, regname, fspec):
+ comb, sync = m.d.comb, m.d.sync
+ fus = self.fus.fus
+ regs = self.regs
+
+ rpidx = regname
+
+ # select the required read port. these are pre-defined sizes
+ rfile = regs.rf[regfile.lower()]
+ rport = rfile.r_ports[rpidx]
+ print("read regfile", rpidx, regfile, regs.rf.keys(),
+ rfile, rfile.unary)
+
+ fspecs = fspec
+ if not isinstance(fspecs, list):
+ fspecs = [fspecs]
+
+ rdflags = []
+ pplen = 0
+ reads = []
+ ppoffs = []
+ for i, fspec in enumerate(fspecs):
+ # get the regfile specs for this regfile port
+ (rf, read, write, wid, fuspec) = fspec
+ print ("fpsec", i, fspec, len(fuspec))
+ ppoffs.append(pplen) # record offset for picker
+ pplen += len(fuspec)
+ name = "rdflag_%s_%s_%d" % (regfile, regname, i)
+ rdflag = Signal(name=name, reset_less=True)
+ comb += rdflag.eq(rf)
+ rdflags.append(rdflag)
+ reads.append(read)
+
+ print ("pplen", pplen)
+
+ # create a priority picker to manage this port
+ rdpickers[regfile][rpidx] = rdpick = PriorityPicker(pplen)
+ setattr(m.submodules, "rdpick_%s_%s" % (regfile, rpidx), rdpick)
+
+ rens = []
+ addrs = []
+ for i, fspec in enumerate(fspecs):
+ (rf, read, write, wid, fuspec) = fspec
+ # connect up the FU req/go signals, and the reg-read to the FU
+ # and create a Read Broadcast Bus
+ for pi, (funame, fu, idx) in enumerate(fuspec):
+ pi += ppoffs[i]
+
+ # connect request-read to picker input, and output to go-rd
+ fu_active = fu_bitdict[funame]
+ name = "%s_%s_%s_%i" % (regfile, rpidx, funame, pi)
+ addr_en = Signal.like(reads[i], name="addr_en_"+name)
+ pick = Signal(name="pick_"+name) # picker input
+ rp = Signal(name="rp_"+name) # picker output
+ delay_pick = Signal(name="dp_"+name) # read-enable "underway"
+
+ # exclude any currently-enabled read-request (mask out active)
+ comb += pick.eq(fu.rd_rel_o[idx] & fu_active & rdflags[i] &
+ ~delay_pick)
+ comb += rdpick.i[pi].eq(pick)
+ comb += fu.go_rd_i[idx].eq(delay_pick) # pass in *delayed* pick
+
+ # if picked, select read-port "reg select" number to port
+ comb += rp.eq(rdpick.o[pi] & rdpick.en_o)
+ sync += delay_pick.eq(rp) # delayed "pick"
+ comb += addr_en.eq(Mux(rp, reads[i], 0))
+
+ # the read-enable happens combinatorially (see mux-bus below)
+ # but it results in the data coming out on a one-cycle delay.
+ if rfile.unary:
+ rens.append(addr_en)
+ else:
+ addrs.append(addr_en)
+ rens.append(rp)
+
+ # use the *delayed* pick signal to put requested data onto bus
+ with m.If(delay_pick):
+ # connect regfile port to input, creating fan-out Bus
+ src = fu.src_i[idx]
+ print("reg connect widths",
+ regfile, regname, pi, funame,
+ src.shape(), rport.data_o.shape())
+ # all FUs connect to same port
+ comb += src.eq(rport.data_o)
+
+ # or-reduce the muxed read signals
+ if rfile.unary:
+ # for unary-addressed
+ comb += rport.ren.eq(ortreereduce_sig(rens))
+ else:
+ # for binary-addressed
+ comb += rport.addr.eq(ortreereduce_sig(addrs))
+ comb += rport.ren.eq(Cat(*rens).bool())
+ print ("binary", regfile, rpidx, rport, rport.ren, rens, addrs)
+
+ def connect_rdports(self, m, fu_bitdict):
+ """connect read ports
+
+ orders the read regspecs into a dict-of-dicts, by regfile, by
+ regport name, then connects all FUs that want that regport by
+ way of a PriorityPicker.
+ """
+ comb, sync = m.d.comb, m.d.sync
+ fus = self.fus.fus
+ regs = self.regs
+
+ # dictionary of lists of regfile read ports
+ byregfiles_rd, byregfiles_rdspec = self.get_byregfiles(True)
# okaay, now we need a PriorityPicker per regfile per regfile port
# loootta pickers... peter piper picked a pack of pickled peppers...
rdpickers = {}
for regfile, spec in byregfiles_rd.items():
+ fuspecs = byregfiles_rdspec[regfile]
rdpickers[regfile] = {}
- for rpidx, (idx, fuspec) in enumerate(spec.items()):
- # get the regfile specs for this regfile port
- (regname, rdflag, read, wid) = byregfiles_rdspec[regfile]
-
- # "munge" the regfile port index, due to full-port access
- if regfile in ['xer', 'cr']:
- if regname.startswith('full'):
- rpidx = 0 # by convention, first port
- else:
- rpidx += 1 # start indexing port 0 from 1
-
- # select the required read port. these are pre-defined sizes
- print (regfile, regs.rf.keys())
- rport = regs.rf[regfile.lower()].r_ports[rpidx]
-
- # create a priority picker to manage this port
- rdpickers[regfile][idx] = rdpick = PriorityPicker(len(fuspec))
- setattr(m.submodules, "rdpick_%s_%d" % (regfile, idx), rdpick)
-
- # connect the regspec "reg select" number to this port
- with m.If(rdpick.en_o):
- comb += rport.ren.eq(read)
-
- # connect up the FU req/go signals and the reg-read to the FU
- for pi, (funame, fu) in enumerate(fuspec):
- # connect request-read to picker input, and output to go-rd
- fu_active = fu_bitdict[funame]
- comb += rdpick.i[pi].eq(fu.rd_rel_o[idx] & fu_active)
- comb += fu.go_rd_i[idx].eq(rdpick.o[pi])
- # connect regfile port to input
- comb += fu.src_i[idx].eq(rport.data_o)
- return m
+ # argh. an experiment to merge RA and RB in the INT regfile
+ # (we have too many read/write ports)
+ #if regfile == 'INT':
+ #fuspecs['rabc'] = [fuspecs.pop('rb')]
+ #fuspecs['rabc'].append(fuspecs.pop('rc'))
+ #fuspecs['rabc'].append(fuspecs.pop('ra'))
+ #if regfile == 'FAST':
+ # fuspecs['fast1'] = [fuspecs.pop('fast1')]
+ # if 'fast2' in fuspecs:
+ # fuspecs['fast1'].append(fuspecs.pop('fast2'))
+
+ # for each named regfile port, connect up all FUs to that port
+ for (regname, fspec) in sort_fuspecs(fuspecs):
+ print("connect rd", regname, fspec)
+ self.connect_rdport(m, fu_bitdict, rdpickers, regfile,
+ regname, fspec)
+
+ def connect_wrport(self, m, fu_bitdict, wrpickers, regfile, regname, fspec):
+ comb, sync = m.d.comb, m.d.sync
+ fus = self.fus.fus
+ regs = self.regs
+
+ print("connect wr", regname, fspec)
+ rpidx = regname
+
+ # select the required write port. these are pre-defined sizes
+ print(regfile, regs.rf.keys())
+ rfile = regs.rf[regfile.lower()]
+ wport = rfile.w_ports[rpidx]
+
+ fspecs = fspec
+ if not isinstance(fspecs, list):
+ fspecs = [fspecs]
+
+ pplen = 0
+ writes = []
+ ppoffs = []
+ for i, fspec in enumerate(fspecs):
+ # get the regfile specs for this regfile port
+ (rf, read, write, wid, fuspec) = fspec
+ print ("fpsec", i, fspec, len(fuspec))
+ ppoffs.append(pplen) # record offset for picker
+ pplen += len(fuspec)
+
+ # create a priority picker to manage this port
+ wrpickers[regfile][rpidx] = wrpick = PriorityPicker(pplen)
+ setattr(m.submodules, "wrpick_%s_%s" % (regfile, rpidx), wrpick)
+
+ wsigs = []
+ wens = []
+ addrs = []
+ for i, fspec in enumerate(fspecs):
+ # connect up the FU req/go signals and the reg-read to the FU
+ # these are arbitrated by Data.ok signals
+ (rf, read, write, wid, fuspec) = fspec
+ for pi, (funame, fu, idx) in enumerate(fuspec):
+ pi += ppoffs[i]
+
+ # write-request comes from dest.ok
+ dest = fu.get_out(idx)
+ fu_dest_latch = fu.get_fu_out(idx) # latched output
+ name = "wrflag_%s_%s_%d" % (funame, regname, idx)
+ wrflag = Signal(name=name, reset_less=True)
+ comb += wrflag.eq(dest.ok & fu.busy_o)
+
+ # connect request-write to picker input, and output to go-wr
+ fu_active = fu_bitdict[funame]
+ pick = fu.wr.rel_o[idx] & fu_active # & wrflag
+ comb += wrpick.i[pi].eq(pick)
+ # create a single-pulse go write from the picker output
+ wr_pick = Signal()
+ comb += wr_pick.eq(wrpick.o[pi] & wrpick.en_o)
+ comb += fu.go_wr_i[idx].eq(rising_edge(m, wr_pick))
+
+ # connect the regspec write "reg select" number to this port
+ # only if one FU actually requests (and is granted) the port
+ # will the write-enable be activated
+ addr_en = Signal.like(write)
+ wp = Signal()
+ comb += wp.eq(wr_pick & wrpick.en_o)
+ comb += addr_en.eq(Mux(wp, write, 0))
+ if rfile.unary:
+ wens.append(addr_en)
+ else:
+ addrs.append(addr_en)
+ wens.append(wp)
+
+ # connect regfile port to input
+ print("reg connect widths",
+ regfile, regname, pi, funame,
+ dest.shape(), wport.data_i.shape())
+ wsigs.append(fu_dest_latch)
+
+ # here is where we create the Write Broadcast Bus. simple, eh?
+ comb += wport.data_i.eq(ortreereduce_sig(wsigs))
+ if rfile.unary:
+ # for unary-addressed
+ comb += wport.wen.eq(ortreereduce_sig(wens))
+ else:
+ # for binary-addressed
+ comb += wport.addr.eq(ortreereduce_sig(addrs))
+ comb += wport.wen.eq(ortreereduce_sig(wens))
+
+ def connect_wrports(self, m, fu_bitdict):
+ """connect write ports
+
+ orders the write regspecs into a dict-of-dicts, by regfile,
+ by regport name, then connects all FUs that want that regport
+ by way of a PriorityPicker.
+
+ note that the write-port wen, write-port data, and go_wr_i all need to
+ be on the exact same clock cycle. as there is a combinatorial loop bug
+ at the moment, these all use sync.
+ """
+ comb, sync = m.d.comb, m.d.sync
+ fus = self.fus.fus
+ regs = self.regs
+ # dictionary of lists of regfile write ports
+ byregfiles_wr, byregfiles_wrspec = self.get_byregfiles(False)
+
+ # same for write ports.
+ # BLECH! complex code-duplication! BLECH!
+ wrpickers = {}
+ for regfile, spec in byregfiles_wr.items():
+ fuspecs = byregfiles_wrspec[regfile]
+ wrpickers[regfile] = {}
+
+ # argh, more port-merging
+ if regfile == 'INT':
+ fuspecs['o'] = [fuspecs.pop('o')]
+ fuspecs['o'].append(fuspecs.pop('o1'))
+ if regfile == 'FAST':
+ fuspecs['fast1'] = [fuspecs.pop('fast1')]
+ if 'fast2' in fuspecs:
+ fuspecs['fast1'].append(fuspecs.pop('fast2'))
+
+ for (regname, fspec) in sort_fuspecs(fuspecs):
+ self.connect_wrport(m, fu_bitdict, wrpickers,
+ regfile, regname, fspec)
+
+ def get_byregfiles(self, readmode):
+
+ mode = "read" if readmode else "write"
+ regs = self.regs
+ fus = self.fus.fus
+ e = self.e # decoded instruction to execute
+
+ # dictionary of lists of regfile ports
+ byregfiles = {}
+ byregfiles_spec = {}
+ for (funame, fu) in fus.items():
+ print("%s ports for %s" % (mode, funame))
+ for idx in range(fu.n_src if readmode else fu.n_dst):
+ if readmode:
+ (regfile, regname, wid) = fu.get_in_spec(idx)
+ else:
+ (regfile, regname, wid) = fu.get_out_spec(idx)
+ print(" %d %s %s %s" % (idx, regfile, regname, str(wid)))
+ if readmode:
+ rdflag, read = regspec_decode_read(e, regfile, regname)
+ write = None
+ else:
+ rdflag, read = None, None
+ wrport, write = regspec_decode_write(e, regfile, regname)
+ if regfile not in byregfiles:
+ byregfiles[regfile] = {}
+ byregfiles_spec[regfile] = {}
+ if regname not in byregfiles_spec[regfile]:
+ byregfiles_spec[regfile][regname] = \
+ (rdflag, read, write, wid, [])
+ # here we start to create "lanes"
+ if idx not in byregfiles[regfile]:
+ byregfiles[regfile][idx] = []
+ fuspec = (funame, fu, idx)
+ byregfiles[regfile][idx].append(fuspec)
+ byregfiles_spec[regfile][regname][4].append(fuspec)
+
+ # ok just print that out, for convenience
+ for regfile, spec in byregfiles.items():
+ print("regfile %s ports:" % mode, regfile)
+ fuspecs = byregfiles_spec[regfile]
+ for regname, fspec in fuspecs.items():
+ [rdflag, read, write, wid, fuspec] = fspec
+ print(" rf %s port %s lane: %s" % (mode, regfile, regname))
+ print(" %s" % regname, wid, read, write, rdflag)
+ for (funame, fu, idx) in fuspec:
+ fusig = fu.src_i[idx] if readmode else fu.dest[idx]
+ print(" ", funame, fu, idx, fusig)
+ print()
+
+ return byregfiles, byregfiles_spec
def __iter__(self):
yield from self.fus.ports()
- yield from self.pdecode2.ports()
+ yield from self.e.ports()
+ yield from self.l0.ports()
# TODO: regs
def ports(self):
if __name__ == '__main__':
- dut = NonProductionCore()
+ pspec = TestMemPspec(ldst_ifacetype='testpi',
+ imem_ifacetype='',
+ addr_wid=48,
+ mask_wid=8,
+ reg_wid=64)
+ dut = NonProductionCore(pspec)
vl = rtlil.convert(dut, ports=dut.ports())
- with open("non_production_core.il", "w") as f:
+ with open("test_core.il", "w") as f:
f.write(vl)