from nmigen import Elaboratable, Module, Signal, ResetSignal, Cat, Mux
from nmigen.cli import rtlil
+from soc.decoder.power_decoder2 import PowerDecodeSubset
+from soc.decoder.power_regspec_map import regspec_decode_read
+from soc.decoder.power_regspec_map import regspec_decode_write
+
from nmutil.picker import PriorityPicker
from nmutil.util import treereduce
from soc.fu.compunits.compunits import AllFunctionUnits
from soc.regfile.regfiles import RegFiles
-from soc.decoder.power_decoder import create_pdecode
-from soc.decoder.power_decoder2 import PowerDecode2
+from soc.decoder.decode2execute1 import Decode2ToExecute1Type
+from soc.decoder.decode2execute1 import IssuerDecode2ToOperand
+from soc.decoder.power_decoder2 import get_rdflags
from soc.decoder.decode2execute1 import Data
from soc.experiment.l0_cache import TstL0CacheBuffer # test only
from soc.config.test.test_loadstore import TestMemPspec
from soc.decoder.power_enums import MicrOp
+from soc.config.state import CoreState
+
import operator
from nmutil.util import rising_edge
class NonProductionCore(Elaboratable):
def __init__(self, pspec):
+ self.pspec = pspec
+
# single LD/ST funnel for memory access
self.l0 = TstL0CacheBuffer(pspec, n_units=1)
pi = self.l0.l0.dports[0]
# function units (only one each)
+ # only include mmu if enabled in pspec
self.fus = AllFunctionUnits(pspec, pilist=[pi])
# register files (yes plural)
self.regs = RegFiles()
- # instruction decoder
- pdecode = create_pdecode()
- self.pdecode2 = PowerDecode2(pdecode) # instruction decoder
+ # instruction decoder - needs a Trap-capable Record (captures EINT etc.)
+ self.e = Decode2ToExecute1Type("core", opkls=IssuerDecode2ToOperand)
+
+ self.state = CoreState("core")
+ self.raw_insn_i = Signal(32) # raw instruction
+ self.bigendian_i = Signal() # bigendian
# issue/valid/busy signalling
- self.ivalid_i = self.pdecode2.valid # instruction is valid
+ self.ivalid_i = Signal(reset_less=True) # instruction is valid
self.issue_i = Signal(reset_less=True)
self.busy_o = Signal(name="corebusy_o", reset_less=True)
- # instruction input
- self.bigendian_i = self.pdecode2.dec.bigendian
- self.raw_opcode_i = self.pdecode2.dec.raw_opcode_in
-
# start/stop and terminated signalling
self.core_stopped_i = Signal(reset_less=True)
- self.core_reset_i = Signal()
self.core_terminate_o = Signal(reset=0) # indicates stopped
+ # create per-FU instruction decoders (subsetted)
+ self.decoders = {}
+ self.des = {}
+
+ for funame, fu in self.fus.fus.items():
+ f_name = fu.fnunit.name
+ fnunit = fu.fnunit.value
+ opkls = fu.opsubsetkls
+ if f_name == 'TRAP':
+ self.trapunit = funame
+ continue
+ self.decoders[funame] = PowerDecodeSubset(None, opkls, f_name,
+ final=True,
+ state=self.state)
+ self.des[funame] = self.decoders[funame].do
+
+ if "mmu0" in self.decoders:
+ self.decoders["mmu0"].mmu0_spr_dec = self.decoders["spr0"]
+
def elaborate(self, platform):
m = Module()
+ # for testing purposes, to cut down on build time in coriolis2
+ if hasattr(self.pspec, "nocore") and self.pspec.nocore == True:
+ x = Signal() # dummy signal
+ m.d.sync += x.eq(~x)
+ return m
+ comb = m.d.comb
- m.submodules.pdecode2 = dec2 = self.pdecode2
m.submodules.fus = self.fus
m.submodules.l0 = l0 = self.l0
self.regs.elaborate_into(m, platform)
regs = self.regs
fus = self.fus.fus
+ # connect decoders
+ for k, v in self.decoders.items():
+ setattr(m.submodules, "dec_%s" % v.fn_name, v)
+ comb += v.dec.raw_opcode_in.eq(self.raw_insn_i)
+ comb += v.dec.bigendian.eq(self.bigendian_i)
+
+ # ssh, cheat: trap uses the main decoder because of the rewriting
+ self.des[self.trapunit] = self.e.do
+
# connect up Function Units, then read/write ports
fu_bitdict = self.connect_instruction(m)
self.connect_rdports(m, fu_bitdict)
self.connect_wrports(m, fu_bitdict)
- # connect up reset
- m.d.comb += ResetSignal().eq(self.core_reset_i)
-
return m
def connect_instruction(self, m):
"""
comb, sync = m.d.comb, m.d.sync
fus = self.fus.fus
- dec2 = self.pdecode2
# enable-signals for each FU, get one bit for each FU (by name)
fu_enable = Signal(len(fus), reset_less=True)
for funame, fu in fus.items():
fnunit = fu.fnunit.value
enable = Signal(name="en_%s" % funame, reset_less=True)
- comb += enable.eq((dec2.e.do.fn_unit & fnunit).bool())
+ comb += enable.eq((self.e.do.fn_unit & fnunit).bool())
comb += fu_bitdict[funame].eq(enable)
# sigh - need a NOP counter
comb += self.busy_o.eq(1)
with m.If(self.ivalid_i): # run only when valid
- with m.Switch(dec2.e.do.insn_type):
+ with m.Switch(self.e.do.insn_type):
# check for ATTN: halt if true
with m.Case(MicrOp.OP_ATTN):
m.d.sync += self.core_terminate_o.eq(1)
with m.Default():
# connect up instructions. only one enabled at a time
for funame, fu in fus.items():
+ do = self.des[funame]
enable = fu_bitdict[funame]
# run this FunctionUnit if enabled
+ # route op, issue, busy, read flags and mask to FU
with m.If(enable):
- # route op, issue, busy, read flags and mask to FU
- comb += fu.oper_i.eq_from_execute1(dec2.e)
+ # operand comes from the *local* decoder
+ comb += fu.oper_i.eq_from(do)
+ #comb += fu.oper_i.eq_from_execute1(e)
comb += fu.issue_i.eq(self.issue_i)
comb += self.busy_o.eq(fu.busy_o)
- rdmask = dec2.rdflags(fu)
+ # rdmask, which is for registers, needs to come
+ # from the *main* decoder
+ rdmask = get_rdflags(self.e, fu)
comb += fu.rdmaskn.eq(~rdmask)
return fu_bitdict
rpidx = regname
# select the required read port. these are pre-defined sizes
- print(rpidx, regfile, regs.rf.keys())
- rport = regs.rf[regfile.lower()].r_ports[rpidx]
+ rfile = regs.rf[regfile.lower()]
+ rport = rfile.r_ports[rpidx]
+ print("read regfile", rpidx, regfile, regs.rf.keys(),
+ rfile, rfile.unary)
fspecs = fspec
if not isinstance(fspecs, list):
setattr(m.submodules, "rdpick_%s_%s" % (regfile, rpidx), rdpick)
rens = []
+ addrs = []
for i, fspec in enumerate(fspecs):
(rf, read, write, wid, fuspec) = fspec
# connect up the FU req/go signals, and the reg-read to the FU
# and create a Read Broadcast Bus
for pi, (funame, fu, idx) in enumerate(fuspec):
pi += ppoffs[i]
- src = fu.src_i[idx]
# connect request-read to picker input, and output to go-rd
fu_active = fu_bitdict[funame]
- pick = Signal()
- comb += pick.eq(fu.rd_rel_o[idx] & fu_active & rdflags[i])
- print (pick, len(pick))
- print (rdpick.i, len(rdpick.i), pi)
+ name = "%s_%s_%s_%i" % (regfile, rpidx, funame, pi)
+ addr_en = Signal.like(reads[i], name="addr_en_"+name)
+ pick = Signal(name="pick_"+name) # picker input
+ rp = Signal(name="rp_"+name) # picker output
+ delay_pick = Signal(name="dp_"+name) # read-enable "underway"
+
+ # exclude any currently-enabled read-request (mask out active)
+ comb += pick.eq(fu.rd_rel_o[idx] & fu_active & rdflags[i] &
+ ~delay_pick)
comb += rdpick.i[pi].eq(pick)
- comb += fu.go_rd_i[idx].eq(rdpick.o[pi])
+ comb += fu.go_rd_i[idx].eq(delay_pick) # pass in *delayed* pick
# if picked, select read-port "reg select" number to port
- read_en = Signal.like(reads[i])
- comb += read_en.eq(Mux(rdpick.o[pi] & rdpick.en_o, reads[i], 0))
- rens.append(read_en)
+ comb += rp.eq(rdpick.o[pi] & rdpick.en_o)
+ sync += delay_pick.eq(rp) # delayed "pick"
+ comb += addr_en.eq(Mux(rp, reads[i], 0))
+
+ # the read-enable happens combinatorially (see mux-bus below)
+ # but it results in the data coming out on a one-cycle delay.
+ if rfile.unary:
+ rens.append(addr_en)
+ else:
+ addrs.append(addr_en)
+ rens.append(rp)
- with m.If(rdpick.o[pi] & rdpick.en_o):
- # connect regfile port to input, creating a Broadcast Bus
+ # use the *delayed* pick signal to put requested data onto bus
+ with m.If(delay_pick):
+ # connect regfile port to input, creating fan-out Bus
+ src = fu.src_i[idx]
print("reg connect widths",
regfile, regname, pi, funame,
src.shape(), rport.data_o.shape())
comb += src.eq(rport.data_o)
# or-reduce the muxed read signals
- comb += rport.ren.eq(ortreereduce_sig(rens))
+ if rfile.unary:
+ # for unary-addressed
+ comb += rport.ren.eq(ortreereduce_sig(rens))
+ else:
+ # for binary-addressed
+ comb += rport.addr.eq(ortreereduce_sig(addrs))
+ comb += rport.ren.eq(Cat(*rens).bool())
+ print ("binary", regfile, rpidx, rport, rport.ren, rens, addrs)
def connect_rdports(self, m, fu_bitdict):
"""connect read ports
# argh. an experiment to merge RA and RB in the INT regfile
# (we have too many read/write ports)
- if regfile == 'INT':
- fuspecs['rbc'] = [fuspecs.pop('rb')]
- fuspecs['rbc'].append(fuspecs.pop('rc'))
- if regfile == 'FAST':
- fuspecs['fast1'] = [fuspecs.pop('fast1')]
- fuspecs['fast1'].append(fuspecs.pop('fast2'))
+ #if regfile == 'INT':
+ #fuspecs['rabc'] = [fuspecs.pop('rb')]
+ #fuspecs['rabc'].append(fuspecs.pop('rc'))
+ #fuspecs['rabc'].append(fuspecs.pop('ra'))
+ #if regfile == 'FAST':
+ # fuspecs['fast1'] = [fuspecs.pop('fast1')]
+ # if 'fast2' in fuspecs:
+ # fuspecs['fast1'].append(fuspecs.pop('fast2'))
# for each named regfile port, connect up all FUs to that port
for (regname, fspec) in sort_fuspecs(fuspecs):
# select the required write port. these are pre-defined sizes
print(regfile, regs.rf.keys())
- wport = regs.rf[regfile.lower()].w_ports[rpidx]
+ rfile = regs.rf[regfile.lower()]
+ wport = rfile.w_ports[rpidx]
fspecs = fspec
if not isinstance(fspecs, list):
wsigs = []
wens = []
+ addrs = []
for i, fspec in enumerate(fspecs):
# connect up the FU req/go signals and the reg-read to the FU
# these are arbitrated by Data.ok signals
# connect the regspec write "reg select" number to this port
# only if one FU actually requests (and is granted) the port
# will the write-enable be activated
- write_en = Signal.like(write)
- comb += write_en.eq(Mux(wr_pick & wrpick.en_o, write, 0))
- wens.append(write_en)
+ addr_en = Signal.like(write)
+ wp = Signal()
+ comb += wp.eq(wr_pick & wrpick.en_o)
+ comb += addr_en.eq(Mux(wp, write, 0))
+ if rfile.unary:
+ wens.append(addr_en)
+ else:
+ addrs.append(addr_en)
+ wens.append(wp)
# connect regfile port to input
print("reg connect widths",
# here is where we create the Write Broadcast Bus. simple, eh?
comb += wport.data_i.eq(ortreereduce_sig(wsigs))
- comb += wport.wen.eq(ortreereduce_sig(wens))
+ if rfile.unary:
+ # for unary-addressed
+ comb += wport.wen.eq(ortreereduce_sig(wens))
+ else:
+ # for binary-addressed
+ comb += wport.addr.eq(ortreereduce_sig(addrs))
+ comb += wport.wen.eq(ortreereduce_sig(wens))
def connect_wrports(self, m, fu_bitdict):
"""connect write ports
fuspecs['o'].append(fuspecs.pop('o1'))
if regfile == 'FAST':
fuspecs['fast1'] = [fuspecs.pop('fast1')]
- fuspecs['fast1'].append(fuspecs.pop('fast2'))
+ if 'fast2' in fuspecs:
+ fuspecs['fast1'].append(fuspecs.pop('fast2'))
for (regname, fspec) in sort_fuspecs(fuspecs):
self.connect_wrport(m, fu_bitdict, wrpickers,
def get_byregfiles(self, readmode):
mode = "read" if readmode else "write"
- dec2 = self.pdecode2
regs = self.regs
fus = self.fus.fus
+ e = self.e # decoded instruction to execute
# dictionary of lists of regfile ports
byregfiles = {}
(regfile, regname, wid) = fu.get_out_spec(idx)
print(" %d %s %s %s" % (idx, regfile, regname, str(wid)))
if readmode:
- rdflag, read = dec2.regspecmap_read(regfile, regname)
+ rdflag, read = regspec_decode_read(e, regfile, regname)
write = None
else:
rdflag, read = None, None
- wrport, write = dec2.regspecmap_write(regfile, regname)
+ wrport, write = regspec_decode_write(e, regfile, regname)
if regfile not in byregfiles:
byregfiles[regfile] = {}
byregfiles_spec[regfile] = {}
def __iter__(self):
yield from self.fus.ports()
- yield from self.pdecode2.ports()
+ yield from self.e.ports()
yield from self.l0.ports()
# TODO: regs