From: Luke Kenneth Casson Leighton Date: Sun, 9 Feb 2020 19:54:38 +0000 (+0000) Subject: split out nmutil library based on ieee754fpu code X-Git-Tag: 24jan2021_ls180~90 X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=4241aad8550d189a4aff51a0ecba9777347ac3bf;p=nmutil.git split out nmutil library based on ieee754fpu code --- diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..77d4f5d --- /dev/null +++ b/.gitignore @@ -0,0 +1,11 @@ +*.vcd +*.py? +!*.pyi +.*.sw? +__pycache__ +*.v +*.il +*.il.* +.eggs +*.egg-info +*.gtkw diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..e33b622 --- /dev/null +++ b/Makefile @@ -0,0 +1,7 @@ +PYTHON3 ?= "python3" + +install: + $(PYTHON3) setup.py develop # yes, develop, not install + +test: + $(PYTHON3) setup.py test # could just run nosetest3... diff --git a/README.md b/README.md new file mode 100644 index 0000000..d96fb45 --- /dev/null +++ b/README.md @@ -0,0 +1,9 @@ +# NMigen Util + +This project implements utilities for nmigen + +# Requirements + +* nmigen +* yosys (latest git repository, required by nmigen) + diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..2dfe140 --- /dev/null +++ b/setup.py @@ -0,0 +1,39 @@ +from setuptools import setup, find_packages +import sys, os + +here = os.path.abspath(os.path.dirname(__file__)) +README = open(os.path.join(here, 'README.md')).read() +NEWS = open(os.path.join(here, 'NEWS.txt')).read() + +version = '0.0.1' + +install_requires = [ +] + +test_requires = [ + 'nose', +] + +setup( + name='nmigen', + version=version, + description="A nmigen utility library", + long_description=README + '\n\n' + NEWS, + classifiers=[ + "Topic :: Software Development :: Libraries", + "License :: OSI Approved :: LGPLv3+", + "Programming Language :: Python :: 3", + ], + keywords='nmigen utilities', + author='Luke Kenneth Casson Leighton', + author_email='lkcl@libre-riscv.org', + url='http://git.libre-riscv.org/?p=nmutil', + license='GPLv3+', + packages=find_packages('src'), + package_dir = {'': 'src'}, + include_package_data=True, + zip_safe=False, + install_requires=install_requires, + tests_require=test_requires, + test_suite='nose.collector', +) diff --git a/src/nmutil/__init__.py b/src/nmutil/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/nmutil/concurrentunit.py b/src/nmutil/concurrentunit.py new file mode 100644 index 0000000..da63d32 --- /dev/null +++ b/src/nmutil/concurrentunit.py @@ -0,0 +1,81 @@ +""" concurrent unit from mitch alsup augmentations to 6600 scoreboard + + * data fans in + * data goes through a pipeline + * results fan back out. + + the output data format has to have a member "muxid", which is used + as the array index on fan-out +""" + +from math import log +from nmigen import Module, Elaboratable +from nmigen.cli import main, verilog + +from nmutil.singlepipe import PassThroughStage +from nmutil.multipipe import CombMuxOutPipe +from nmutil.multipipe import PriorityCombMuxInPipe + + +def num_bits(n): + return int(log(n) / log(2)) + + +class FPADDInMuxPipe(PriorityCombMuxInPipe): + def __init__(self, num_rows, iospecfn, maskwid=0): + self.num_rows = num_rows + stage = PassThroughStage(iospecfn) + PriorityCombMuxInPipe.__init__(self, stage, p_len=self.num_rows, + maskwid=maskwid) + + +class FPADDMuxOutPipe(CombMuxOutPipe): + def __init__(self, num_rows, iospecfn, maskwid=0): + self.num_rows = num_rows + stage = PassThroughStage(iospecfn) + CombMuxOutPipe.__init__(self, stage, n_len=self.num_rows, + maskwid=maskwid) + + +class ReservationStations(Elaboratable): + """ Reservation-Station pipeline + + Input: num_rows - number of input and output Reservation Stations + + Requires: the addition of an "alu" object, from which ispec and ospec + are taken, and inpipe and outpipe are connected to it + + * fan-in on inputs (an array of FPADDBaseData: a,b,mid) + * ALU pipeline + * fan-out on outputs (an array of FPPackData: z,mid) + + Fan-in and Fan-out are combinatorial. + """ + def __init__(self, num_rows, maskwid=0): + self.num_rows = nr = num_rows + self.inpipe = FPADDInMuxPipe(nr, self.i_specfn, maskwid) # fan-in + self.outpipe = FPADDMuxOutPipe(nr, self.o_specfn, maskwid) # fan-out + + self.p = self.inpipe.p # kinda annoying, + self.n = self.outpipe.n # use pipe in/out as this class in/out + self._ports = self.inpipe.ports() + self.outpipe.ports() + + def elaborate(self, platform): + m = Module() + m.submodules.inpipe = self.inpipe + m.submodules.alu = self.alu + m.submodules.outpipe = self.outpipe + + m.d.comb += self.inpipe.n.connect_to_next(self.alu.p) + m.d.comb += self.alu.connect_to_next(self.outpipe) + + return m + + def ports(self): + return self._ports + + def i_specfn(self): + return self.alu.ispec() + + def o_specfn(self): + return self.alu.ospec() diff --git a/src/nmutil/dynamicpipe.py b/src/nmutil/dynamicpipe.py new file mode 100644 index 0000000..f9c649c --- /dev/null +++ b/src/nmutil/dynamicpipe.py @@ -0,0 +1,100 @@ +# SPDX-License-Identifier: LGPL-2.1-or-later +# See Notices.txt for copyright information + +""" Meta-class that allows a dynamic runtime parameter-selectable "mixin" + +The reasons why this technique is being deployed is because SimpleHandshake +needs to be dynamically replaced at the end-users' choice, without having +to duplicate dozens of classes using multiple-inheritanc "Mix-in" techniques. + +It is however extremely unusual, and has been explicitly limited to this *one* +module. DO NOT try to use this technique elsewhere, it is extremely hard to +understand (meta-class programming). + +""" + +from abc import ABCMeta + +from nmutil.singlepipe import SimpleHandshake +from nmutil.singlepipe import MaskCancellable + +import threading + +# with many thanks to jsbueno on stackexchange for this one +# https://stackoverflow.com/questions/57273070/ +# list post: +# http://lists.libre-riscv.org/pipermail/libre-riscv-dev/2019-July/002259.html + +class Meta(ABCMeta): + registry = {} + recursing = threading.local() + recursing.check = False + mlock = threading.Lock() + + def __call__(cls, *args, **kw): + mcls = cls.__class__ + if mcls.recursing.check: + return super().__call__(*args, **kw) + spec = args[0] + base = spec.pipekls # pick up the dynamic class from PipelineSpec, HERE + + if (cls, base) not in mcls.registry: + print ("__call__", args, kw, cls, base, + base.__bases__, cls.__bases__) + mcls.registry[cls, base] = type( + cls.__name__, + (cls, base) + cls.__bases__[1:], + {} + ) + real_cls = mcls.registry[cls, base] + + with mcls.mlock: + mcls.recursing.check = True + instance = real_cls.__class__.__call__(real_cls, *args, **kw) + mcls.recursing.check = False + return instance + + +# Inherit from this class instead of SimpleHandshake (or other ControlBase +# derivative), and the metaclass will instead *replace* DynamicPipe - +# *at runtime* - with the class that is specified *as a parameter* +# in PipelineSpec. +# +# as explained in the list posting and in the stackexchange post, this is +# needed to avoid a MASSIVE suite of duplicated multiple-inheritance classes +# that "Mix in" SimpleHandshake (or other). +# +# unfortunately, composition does not work in this instance +# (make an *instance* of SimpleHandshake or other class and pass it in) +# due to the multiple level inheritance, and in several places +# the inheriting class needs to do some setup that the deriving class +# needs in order to function correctly. + +class DynamicPipe(metaclass=Meta): + def __init__(self, *args): + print ("DynamicPipe init", super(), args) + super().__init__(self, *args) + + +# bad hack: the DynamicPipe metaclass ends up creating an __init__ signature +# for the dynamically-derived class. luckily, SimpleHandshake only needs +# "self" as the 1st argument (it is its own "Stage"). anything else +# could hypothetically be passed through the pspec. +class SimpleHandshakeRedir(SimpleHandshake): + def __init__(self, mod, *args): + print ("redir", mod, args) + stage = self + if args and args[0].stage: + stage = args[0].stage + SimpleHandshake.__init__(self, stage) + + +class MaskCancellableRedir(MaskCancellable): + def __init__(self, mod, *args): + stage = self + maskwid = args[0].maskwid + if args[0].stage: + stage = args[0].stage + print ("redir mask", mod, args, maskwid) + MaskCancellable.__init__(self, stage, maskwid) + diff --git a/src/nmutil/iocontrol.py b/src/nmutil/iocontrol.py new file mode 100644 index 0000000..efe0c38 --- /dev/null +++ b/src/nmutil/iocontrol.py @@ -0,0 +1,282 @@ +""" IO Control API + + Associated development bugs: + * http://bugs.libre-riscv.org/show_bug.cgi?id=148 + * http://bugs.libre-riscv.org/show_bug.cgi?id=64 + * http://bugs.libre-riscv.org/show_bug.cgi?id=57 + + Important: see Stage API (stageapi.py) in combination with below + + Main classes: PrevControl and NextControl. + + These classes manage the data and the synchronisation state + to the previous and next stage, respectively. ready/valid + signals are used by the Pipeline classes to tell if data + may be safely passed from stage to stage. + + The connection from one stage to the next is carried out with + NextControl.connect_to_next. It is *not* necessary to have + a PrevControl.connect_to_prev because it is functionally + directly equivalent to prev->next->connect_to_next. +""" + +from nmigen import Signal, Cat, Const, Module, Value, Elaboratable +from nmigen.cli import verilog, rtlil +from nmigen.hdl.rec import Record + +from collections.abc import Sequence, Iterable +from collections import OrderedDict + +from nmutil import nmoperator + + +class Object: + def __init__(self): + self.fields = OrderedDict() + + def __setattr__(self, k, v): + print ("kv", k, v) + if (k.startswith('_') or k in ["fields", "name", "src_loc"] or + k in dir(Object) or "fields" not in self.__dict__): + return object.__setattr__(self, k, v) + self.fields[k] = v + + def __getattr__(self, k): + if k in self.__dict__: + return object.__getattr__(self, k) + try: + return self.fields[k] + except KeyError as e: + raise AttributeError(e) + + def __iter__(self): + for x in self.fields.values(): # OrderedDict so order is preserved + if isinstance(x, Iterable): + yield from x + else: + yield x + + def eq(self, inp): + res = [] + for (k, o) in self.fields.items(): + i = getattr(inp, k) + print ("eq", o, i) + rres = o.eq(i) + if isinstance(rres, Sequence): + res += rres + else: + res.append(rres) + print (res) + return res + + def ports(self): # being called "keys" would be much better + return list(self) + + +class RecordObject(Record): + def __init__(self, layout=None, name=None): + Record.__init__(self, layout=layout or [], name=name) + + def __setattr__(self, k, v): + #print (dir(Record)) + if (k.startswith('_') or k in ["fields", "name", "src_loc"] or + k in dir(Record) or "fields" not in self.__dict__): + return object.__setattr__(self, k, v) + self.fields[k] = v + #print ("RecordObject setattr", k, v) + if isinstance(v, Record): + newlayout = {k: (k, v.layout)} + elif isinstance(v, Value): + newlayout = {k: (k, v.shape())} + else: + newlayout = {k: (k, nmoperator.shape(v))} + self.layout.fields.update(newlayout) + + def __iter__(self): + for x in self.fields.values(): # remember: fields is an OrderedDict + if isinstance(x, Iterable): + yield from x # a bit like flatten (nmigen.tools) + else: + yield x + + def ports(self): # would be better being called "keys" + return list(self) + + +class PrevControl(Elaboratable): + """ contains signals that come *from* the previous stage (both in and out) + * valid_i: previous stage indicating all incoming data is valid. + may be a multi-bit signal, where all bits are required + to be asserted to indicate "valid". + * ready_o: output to next stage indicating readiness to accept data + * data_i : an input - MUST be added by the USER of this class + """ + + def __init__(self, i_width=1, stage_ctl=False, maskwid=0, offs=0): + self.stage_ctl = stage_ctl + self.maskwid = maskwid + if maskwid: + self.mask_i = Signal(maskwid) # prev >>in self + self.stop_i = Signal(maskwid) # prev >>in self + self.valid_i = Signal(i_width, name="p_valid_i") # prev >>in self + self._ready_o = Signal(name="p_ready_o") # prev < 1: + # multi-bit case: valid only when valid_i is all 1s + all1s = Const(-1, (len(self.valid_i), False)) + valid_i = (self.valid_i == all1s) + else: + # single-bit valid_i case + valid_i = self.valid_i + + # when stage indicates not ready, incoming data + # must "appear" to be not ready too + if self.stage_ctl: + valid_i = valid_i & self.s_ready_o + + return valid_i + + def elaborate(self, platform): + m = Module() + m.d.comb += self.trigger.eq(self.valid_i_test & self.ready_o) + return m + + def eq(self, i): + res = [nmoperator.eq(self.data_i, i.data_i), + self.ready_o.eq(i.ready_o), + self.valid_i.eq(i.valid_i)] + if self.maskwid: + res.append(self.mask_i.eq(i.mask_i)) + return res + + def __iter__(self): + yield self.valid_i + yield self.ready_o + if self.maskwid: + yield self.mask_i + yield self.stop_i + if hasattr(self.data_i, "ports"): + yield from self.data_i.ports() + elif isinstance(self.data_i, Sequence): + yield from self.data_i + else: + yield self.data_i + + def ports(self): + return list(self) + + +class NextControl(Elaboratable): + """ contains the signals that go *to* the next stage (both in and out) + * valid_o: output indicating to next stage that data is valid + * ready_i: input from next stage indicating that it can accept data + * data_o : an output - MUST be added by the USER of this class + """ + def __init__(self, stage_ctl=False, maskwid=0): + self.stage_ctl = stage_ctl + self.maskwid = maskwid + if maskwid: + self.mask_o = Signal(maskwid) # self out>> next + self.stop_o = Signal(maskwid) # self out>> next + self.valid_o = Signal(name="n_valid_o") # self out>> next + self.ready_i = Signal(name="n_ready_i") # self < 1: + r_data = Array(r_data) + p_valid_i = Array(p_valid_i) + n_ready_in = Array(n_ready_in) + data_valid = Array(data_valid) + + nirn = Signal(reset_less=True) + m.d.comb += nirn.eq(~self.n.ready_i) + mid = self.p_mux.m_id + print ("CombMuxIn mid", self, self.stage, self.routemask, mid, p_len) + for i in range(p_len): + m.d.comb += data_valid[i].eq(0) + m.d.comb += n_ready_in[i].eq(1) + m.d.comb += p_valid_i[i].eq(0) + #m.d.comb += self.p[i].ready_o.eq(~data_valid[i] | self.n.ready_i) + m.d.comb += self.p[i].ready_o.eq(0) + p = self.p[mid] + maskedout = Signal(reset_less=True) + if hasattr(p, "mask_i"): + m.d.comb += maskedout.eq(p.mask_i & ~p.stop_i) + else: + m.d.comb += maskedout.eq(1) + m.d.comb += p_valid_i[mid].eq(maskedout & self.p_mux.active) + m.d.comb += self.p[mid].ready_o.eq(~data_valid[mid] | self.n.ready_i) + m.d.comb += n_ready_in[mid].eq(nirn & data_valid[mid]) + anyvalid = Signal(i, reset_less=True) + av = [] + for i in range(p_len): + av.append(data_valid[i]) + anyvalid = Cat(*av) + m.d.comb += self.n.valid_o.eq(anyvalid.bool()) + m.d.comb += data_valid[mid].eq(p_valid_i[mid] | \ + (n_ready_in[mid] )) + + if self.routemask: + # XXX hack - fixes loop + m.d.comb += eq(self.n.stop_o, self.p[-1].stop_i) + for i in range(p_len): + p = self.p[i] + vr = Signal(name="vr%d" % i, reset_less=True) + maskedout = Signal(name="maskedout%d" % i, reset_less=True) + if hasattr(p, "mask_i"): + m.d.comb += maskedout.eq(p.mask_i & ~p.stop_i) + else: + m.d.comb += maskedout.eq(1) + m.d.comb += vr.eq(maskedout.bool() & p.valid_i & p.ready_o) + #m.d.comb += vr.eq(p.valid_i & p.ready_o) + with m.If(vr): + m.d.comb += eq(self.n.mask_o, self.p[i].mask_i) + m.d.comb += eq(r_data[i], self.p[i].data_i) + else: + ml = [] # accumulate output masks + ms = [] # accumulate output stops + for i in range(p_len): + vr = Signal(reset_less=True) + p = self.p[i] + vr = Signal(reset_less=True) + maskedout = Signal(reset_less=True) + if hasattr(p, "mask_i"): + m.d.comb += maskedout.eq(p.mask_i & ~p.stop_i) + else: + m.d.comb += maskedout.eq(1) + m.d.comb += vr.eq(maskedout.bool() & p.valid_i & p.ready_o) + with m.If(vr): + m.d.comb += eq(r_data[i], self.p[i].data_i) + if self.maskwid: + mlen = len(self.p[i].mask_i) + s = mlen*i + e = mlen*(i+1) + ml.append(Mux(vr, self.p[i].mask_i, Const(0, mlen))) + ms.append(self.p[i].stop_i) + if self.maskwid: + m.d.comb += self.n.mask_o.eq(Cat(*ml)) + m.d.comb += self.n.stop_o.eq(Cat(*ms)) + + m.d.comb += eq(self.n.data_o, self.process(r_data[mid])) + + return m + + +class NonCombMultiInPipeline(MultiInControlBase): + """ A multi-input pipeline block conforming to the Pipeline API + + Attributes: + ----------- + p.data_i : StageInput, shaped according to ispec + The pipeline input + p.data_o : StageOutput, shaped according to ospec + The pipeline output + r_data : input_shape according to ispec + A temporary (buffered) copy of a prior (valid) input. + This is HELD if the output is not ready. It is updated + SYNCHRONOUSLY. + """ + + def __init__(self, stage, p_len, p_mux, maskwid=0, routemask=False): + MultiInControlBase.__init__(self, p_len=p_len, maskwid=maskwid, + routemask=routemask) + self.stage = stage + self.maskwid = maskwid + self.p_mux = p_mux + + # set up the input and output data + for i in range(p_len): + name = 'data_i_%d' % i + self.p[i].data_i = _spec(stage.ispec, name) # input type + self.n.data_o = _spec(stage.ospec, 'data_o') + + def process(self, i): + if hasattr(self.stage, "process"): + return self.stage.process(i) + return i + + def elaborate(self, platform): + m = MultiInControlBase.elaborate(self, platform) + + m.submodules.p_mux = self.p_mux + + # need an array of buffer registers conforming to *input* spec + r_data = [] + r_busy = [] + p_valid_i = [] + p_len = len(self.p) + for i in range(p_len): + name = 'r_%d' % i + r = _spec(self.stage.ispec, name) # input type + r_data.append(r) + r_busy.append(Signal(name="r_busy%d" % i, reset_less=True)) + p_valid_i.append(Signal(name="p_valid_i%d" % i, reset_less=True)) + if hasattr(self.stage, "setup"): + print ("setup", self, self.stage, r) + self.stage.setup(m, r) + if len(r_data) > 1: + r_data = Array(r_data) + p_valid_i = Array(p_valid_i) + r_busy = Array(r_busy) + + nirn = Signal(reset_less=True) + m.d.comb += nirn.eq(~self.n.ready_i) + mid = self.p_mux.m_id + print ("CombMuxIn mid", self, self.stage, self.routemask, mid, p_len) + for i in range(p_len): + m.d.comb += r_busy[i].eq(0) + m.d.comb += n_ready_in[i].eq(1) + m.d.comb += p_valid_i[i].eq(0) + m.d.comb += self.p[i].ready_o.eq(n_ready_in[i]) + p = self.p[mid] + maskedout = Signal(reset_less=True) + if hasattr(p, "mask_i"): + m.d.comb += maskedout.eq(p.mask_i & ~p.stop_i) + else: + m.d.comb += maskedout.eq(1) + m.d.comb += p_valid_i[mid].eq(maskedout & self.p_mux.active) + m.d.comb += self.p[mid].ready_o.eq(~data_valid[mid] | self.n.ready_i) + m.d.comb += n_ready_in[mid].eq(nirn & data_valid[mid]) + anyvalid = Signal(i, reset_less=True) + av = [] + for i in range(p_len): + av.append(data_valid[i]) + anyvalid = Cat(*av) + m.d.comb += self.n.valid_o.eq(anyvalid.bool()) + m.d.comb += data_valid[mid].eq(p_valid_i[mid] | \ + (n_ready_in[mid] )) + + if self.routemask: + # XXX hack - fixes loop + m.d.comb += eq(self.n.stop_o, self.p[-1].stop_i) + for i in range(p_len): + p = self.p[i] + vr = Signal(name="vr%d" % i, reset_less=True) + maskedout = Signal(name="maskedout%d" % i, reset_less=True) + if hasattr(p, "mask_i"): + m.d.comb += maskedout.eq(p.mask_i & ~p.stop_i) + else: + m.d.comb += maskedout.eq(1) + m.d.comb += vr.eq(maskedout.bool() & p.valid_i & p.ready_o) + #m.d.comb += vr.eq(p.valid_i & p.ready_o) + with m.If(vr): + m.d.comb += eq(self.n.mask_o, self.p[i].mask_i) + m.d.comb += eq(r_data[i], self.p[i].data_i) + else: + ml = [] # accumulate output masks + ms = [] # accumulate output stops + for i in range(p_len): + vr = Signal(reset_less=True) + p = self.p[i] + vr = Signal(reset_less=True) + maskedout = Signal(reset_less=True) + if hasattr(p, "mask_i"): + m.d.comb += maskedout.eq(p.mask_i & ~p.stop_i) + else: + m.d.comb += maskedout.eq(1) + m.d.comb += vr.eq(maskedout.bool() & p.valid_i & p.ready_o) + with m.If(vr): + m.d.comb += eq(r_data[i], self.p[i].data_i) + if self.maskwid: + mlen = len(self.p[i].mask_i) + s = mlen*i + e = mlen*(i+1) + ml.append(Mux(vr, self.p[i].mask_i, Const(0, mlen))) + ms.append(self.p[i].stop_i) + if self.maskwid: + m.d.comb += self.n.mask_o.eq(Cat(*ml)) + m.d.comb += self.n.stop_o.eq(Cat(*ms)) + + m.d.comb += eq(self.n.data_o, self.process(r_data[mid])) + + return m + + +class CombMuxOutPipe(CombMultiOutPipeline): + def __init__(self, stage, n_len, maskwid=0, muxidname=None, + routemask=False): + muxidname = muxidname or "muxid" + # HACK: stage is also the n-way multiplexer + CombMultiOutPipeline.__init__(self, stage, n_len=n_len, + n_mux=stage, maskwid=maskwid, + routemask=routemask) + + # HACK: n-mux is also the stage... so set the muxid equal to input muxid + muxid = getattr(self.p.data_i, muxidname) + print ("combmuxout", muxidname, muxid) + stage.m_id = muxid + + + +class InputPriorityArbiter(Elaboratable): + """ arbitration module for Input-Mux pipe, baed on PriorityEncoder + """ + def __init__(self, pipe, num_rows): + self.pipe = pipe + self.num_rows = num_rows + self.mmax = int(log(self.num_rows) / log(2)) + self.m_id = Signal(self.mmax, reset_less=True) # multiplex id + self.active = Signal(reset_less=True) + + def elaborate(self, platform): + m = Module() + + assert len(self.pipe.p) == self.num_rows, \ + "must declare input to be same size" + pe = PriorityEncoder(self.num_rows) + m.submodules.selector = pe + + # connect priority encoder + in_ready = [] + for i in range(self.num_rows): + p_valid_i = Signal(reset_less=True) + if self.pipe.maskwid and not self.pipe.routemask: + p = self.pipe.p[i] + maskedout = Signal(reset_less=True) + m.d.comb += maskedout.eq(p.mask_i & ~p.stop_i) + m.d.comb += p_valid_i.eq(maskedout.bool() & p.valid_i_test) + else: + m.d.comb += p_valid_i.eq(self.pipe.p[i].valid_i_test) + in_ready.append(p_valid_i) + m.d.comb += pe.i.eq(Cat(*in_ready)) # array of input "valids" + m.d.comb += self.active.eq(~pe.n) # encoder active (one input valid) + m.d.comb += self.m_id.eq(pe.o) # output one active input + + return m + + def ports(self): + return [self.m_id, self.active] + + + +class PriorityCombMuxInPipe(CombMultiInPipeline): + """ an example of how to use the combinatorial pipeline. + """ + + def __init__(self, stage, p_len=2, maskwid=0, routemask=False): + p_mux = InputPriorityArbiter(self, p_len) + CombMultiInPipeline.__init__(self, stage, p_len, p_mux, + maskwid=maskwid, routemask=routemask) + + +if __name__ == '__main__': + + from nmutil.test.example_buf_pipe import ExampleStage + dut = PriorityCombMuxInPipe(ExampleStage) + vl = rtlil.convert(dut, ports=dut.ports()) + with open("test_combpipe.il", "w") as f: + f.write(vl) diff --git a/src/nmutil/nmoperator.py b/src/nmutil/nmoperator.py new file mode 100644 index 0000000..fd50d2f --- /dev/null +++ b/src/nmutil/nmoperator.py @@ -0,0 +1,172 @@ +""" nmigen operator functions / utils + + eq: + -- + + a strategically very important function that is identical in function + to nmigen's Signal.eq function, except it may take objects, or a list + of objects, or a tuple of objects, and where objects may also be + Records. +""" + +from nmigen import Signal, Cat, Const, Mux, Module, Value, Elaboratable +from nmigen.cli import verilog, rtlil +from nmigen.lib.fifo import SyncFIFO, SyncFIFOBuffered +from nmigen.hdl.ast import ArrayProxy +from nmigen.hdl.rec import Record, Layout + +from abc import ABCMeta, abstractmethod +from collections.abc import Sequence, Iterable +from collections import OrderedDict +from nmutil.queue import Queue +import inspect + + +class Visitor2: + """ a helper class for iterating twin-argument compound data structures. + + Record is a special (unusual, recursive) case, where the input may be + specified as a dictionary (which may contain further dictionaries, + recursively), where the field names of the dictionary must match + the Record's field spec. Alternatively, an object with the same + member names as the Record may be assigned: it does not have to + *be* a Record. + + ArrayProxy is also special-cased, it's a bit messy: whilst ArrayProxy + has an eq function, the object being assigned to it (e.g. a python + object) might not. despite the *input* having an eq function, + that doesn't help us, because it's the *ArrayProxy* that's being + assigned to. so.... we cheat. use the ports() function of the + python object, enumerate them, find out the list of Signals that way, + and assign them. + """ + def iterator2(self, o, i): + if isinstance(o, dict): + yield from self.dict_iter2(o, i) + + if not isinstance(o, Sequence): + o, i = [o], [i] + for (ao, ai) in zip(o, i): + #print ("visit", fn, ao, ai) + if isinstance(ao, Record): + yield from self.record_iter2(ao, ai) + elif isinstance(ao, ArrayProxy) and not isinstance(ai, Value): + yield from self.arrayproxy_iter2(ao, ai) + else: + yield (ao, ai) + + def dict_iter2(self, o, i): + for (k, v) in o.items(): + print ("d-iter", v, i[k]) + yield (v, i[k]) + return res + + def _not_quite_working_with_all_unit_tests_record_iter2(self, ao, ai): + print ("record_iter2", ao, ai, type(ao), type(ai)) + if isinstance(ai, Value): + if isinstance(ao, Sequence): + ao, ai = [ao], [ai] + for o, i in zip(ao, ai): + yield (o, i) + return + for idx, (field_name, field_shape, _) in enumerate(ao.layout): + if isinstance(field_shape, Layout): + val = ai.fields + else: + val = ai + if hasattr(val, field_name): # check for attribute + val = getattr(val, field_name) + else: + val = val[field_name] # dictionary-style specification + yield from self.iterator2(ao.fields[field_name], val) + + def record_iter2(self, ao, ai): + for idx, (field_name, field_shape, _) in enumerate(ao.layout): + if isinstance(field_shape, Layout): + val = ai.fields + else: + val = ai + if hasattr(val, field_name): # check for attribute + val = getattr(val, field_name) + else: + val = val[field_name] # dictionary-style specification + yield from self.iterator2(ao.fields[field_name], val) + + def arrayproxy_iter2(self, ao, ai): + #print ("arrayproxy_iter2", ai.ports(), ai, ao) + for p in ai.ports(): + #print ("arrayproxy - p", p, p.name, ao) + op = getattr(ao, p.name) + yield from self.iterator2(op, p) + + +class Visitor: + """ a helper class for iterating single-argument compound data structures. + similar to Visitor2. + """ + def iterate(self, i): + """ iterate a compound structure recursively using yield + """ + if not isinstance(i, Sequence): + i = [i] + for ai in i: + #print ("iterate", ai) + if isinstance(ai, Record): + #print ("record", list(ai.layout)) + yield from self.record_iter(ai) + elif isinstance(ai, ArrayProxy) and not isinstance(ai, Value): + yield from self.array_iter(ai) + else: + yield ai + + def record_iter(self, ai): + for idx, (field_name, field_shape, _) in enumerate(ai.layout): + if isinstance(field_shape, Layout): + val = ai.fields + else: + val = ai + if hasattr(val, field_name): # check for attribute + val = getattr(val, field_name) + else: + val = val[field_name] # dictionary-style specification + #print ("recidx", idx, field_name, field_shape, val) + yield from self.iterate(val) + + def array_iter(self, ai): + for p in ai.ports(): + yield from self.iterate(p) + + +def eq(o, i): + """ makes signals equal: a helper routine which identifies if it is being + passed a list (or tuple) of objects, or signals, or Records, and calls + the objects' eq function. + """ + res = [] + for (ao, ai) in Visitor2().iterator2(o, i): + rres = ao.eq(ai) + if not isinstance(rres, Sequence): + rres = [rres] + res += rres + return res + + +def shape(i): + #print ("shape", i) + r = 0 + for part in list(i): + #print ("shape?", part) + s, _ = part.shape() + r += s + return r, False + + +def cat(i): + """ flattens a compound structure recursively using Cat + """ + from nmigen._utils import flatten + #res = list(flatten(i)) # works (as of nmigen commit f22106e5) HOWEVER... + res = list(Visitor().iterate(i)) # needed because input may be a sequence + return Cat(*res) + + diff --git a/src/nmutil/noconflict.py b/src/nmutil/noconflict.py new file mode 100644 index 0000000..ad7eb09 --- /dev/null +++ b/src/nmutil/noconflict.py @@ -0,0 +1,55 @@ +import inspect, types + +############## preliminary: two utility functions ##################### + +def skip_redundant(iterable, skipset=None): + "Redundant items are repeated items or items in the original skipset." + if skipset is None: skipset = set() + for item in iterable: + if item not in skipset: + skipset.add(item) + yield item + + +def remove_redundant(metaclasses): + skipset = set([type]) + for meta in metaclasses: # determines the metaclasses to be skipped + skipset.update(inspect.getmro(meta)[1:]) + return tuple(skip_redundant(metaclasses, skipset)) + +################################################################## +## now the core of the module: two mutually recursive functions ## +################################################################## + +memoized_metaclasses_map = {} + +def get_noconflict_metaclass(bases, left_metas, right_metas): + """Not intended to be used outside of this module, unless you know + what you are doing.""" + # make tuple of needed metaclasses in specified priority order + metas = left_metas + tuple(map(type, bases)) + right_metas + needed_metas = remove_redundant(metas) + + # return existing confict-solving meta, if any + if needed_metas in memoized_metaclasses_map: + return memoized_metaclasses_map[needed_metas] + # nope: compute, memoize and return needed conflict-solving meta + elif not needed_metas: # wee, a trivial case, happy us + meta = type + elif len(needed_metas) == 1: # another trivial case + meta = needed_metas[0] + # check for recursion, can happen i.e. for Zope ExtensionClasses + elif needed_metas == bases: + raise TypeError("Incompatible root metatypes", needed_metas) + else: # gotta work ... + metaname = '_' + ''.join([m.__name__ for m in needed_metas]) + meta = classmaker()(metaname, needed_metas, {}) + memoized_metaclasses_map[needed_metas] = meta + return meta + +def classmaker(left_metas=(), right_metas=()): + def make_class(name, bases, adict): + print ("make_class", name) + metaclass = get_noconflict_metaclass(bases, left_metas, right_metas) + return metaclass(name, bases, adict) + return make_class diff --git a/src/nmutil/picker.py b/src/nmutil/picker.py new file mode 100644 index 0000000..d47f785 --- /dev/null +++ b/src/nmutil/picker.py @@ -0,0 +1,42 @@ +""" Priority Picker: optimised back-to-back PriorityEncoder and Decoder + + The input is N bits, the output is N bits wide and only one is + enabled. +""" + +from nmigen import Module, Signal, Cat, Elaboratable + +class PriorityPicker(Elaboratable): + """ implements a priority-picker. input: N bits, output: N bits + """ + def __init__(self, wid): + self.wid = wid + # inputs + self.i = Signal(wid, reset_less=True) + self.o = Signal(wid, reset_less=True) + + def elaborate(self, platform): + m = Module() + + res = [] + ni = Signal(self.wid, reset_less = True) + m.d.comb += ni.eq(~self.i) + for i in range(0, self.wid): + t = Signal(reset_less = True) + res.append(t) + if i == 0: + m.d.comb += t.eq(self.i[i]) + else: + m.d.comb += t.eq(~Cat(ni[i], *self.i[:i]).bool()) + + # we like Cat(*xxx). turn lists into concatenated bits + m.d.comb += self.o.eq(Cat(*res)) + + return m + + def __iter__(self): + yield self.i + yield self.o + + def ports(self): + return list(self) diff --git a/src/nmutil/pipeline.py b/src/nmutil/pipeline.py new file mode 100644 index 0000000..812b527 --- /dev/null +++ b/src/nmutil/pipeline.py @@ -0,0 +1,394 @@ +""" Example 5: Making use of PyRTL and Introspection. """ + +from collections.abc import Sequence + +from nmigen import Signal +from nmigen.hdl.rec import Record +from nmigen import tracer +from nmigen.compat.fhdl.bitcontainer import value_bits_sign +from contextlib import contextmanager + +from nmutil.nmoperator import eq +from nmutil.singlepipe import StageCls, ControlBase, BufferedHandshake +from nmutil.singlepipe import UnbufferedPipeline + + +# The following example shows how pyrtl can be used to make some interesting +# hardware structures using python introspection. In particular, this example +# makes a N-stage pipeline structure. Any specific pipeline is then a derived +# class of SimplePipeline where methods with names starting with "stage" are +# stages, and new members with names not starting with "_" are to be registered +# for the next stage. + +def like(value, rname, pipe, pipemode=False): + if isinstance(value, ObjectProxy): + return ObjectProxy.like(pipe, value, pipemode=pipemode, + name=rname, reset_less=True) + else: + return Signal(value_bits_sign(value), name=rname, + reset_less=True) + return Signal.like(value, name=rname, reset_less=True) + +def get_assigns(_assigns): + assigns = [] + for e in _assigns: + if isinstance(e, ObjectProxy): + assigns += get_assigns(e._assigns) + else: + assigns.append(e) + return assigns + + +def get_eqs(_eqs): + eqs = [] + for e in _eqs: + if isinstance(e, ObjectProxy): + eqs += get_eqs(e._eqs) + else: + eqs.append(e) + return eqs + + +class ObjectProxy: + def __init__(self, m, name=None, pipemode=False, syncmode=True): + self._m = m + if name is None: + name = tracer.get_var_name(default=None) + self.name = name + self._pipemode = pipemode + self._syncmode = syncmode + self._eqs = {} + self._assigns = [] + self._preg_map = {} + + @classmethod + def like(cls, m, value, pipemode=False, name=None, src_loc_at=0, **kwargs): + name = name or tracer.get_var_name(depth=2 + src_loc_at, + default="$like") + + src_loc_at_1 = 1 + src_loc_at + r = ObjectProxy(m, value.name, pipemode) + #for a, aname in value._preg_map.items(): + # r._preg_map[aname] = like(a, aname, m, pipemode) + for a in value.ports(): + aname = a.name + r._preg_map[aname] = like(a, aname, m, pipemode) + return r + + def __repr__(self): + subobjs = [] + for a in self.ports(): + aname = a.name + ai = self._preg_map[aname] + subobjs.append(repr(ai)) + return "" % subobjs + + def get_specs(self, liked=False): + res = [] + for k, v in self._preg_map.items(): + #v = like(v, k, stage._m) + res.append(v) + if isinstance(v, ObjectProxy): + res += v.get_specs() + return res + + def eq(self, i): + print ("ObjectProxy eq", self, i) + res = [] + for a in self.ports(): + aname = a.name + ai = i._preg_map[aname] + res.append(a.eq(ai)) + return res + + def ports(self): + res = [] + for aname, a in self._preg_map.items(): + if isinstance(a, Signal) or isinstance(a, ObjectProxy) or \ + isinstance(a, Record): + res.append(a) + #print ("ObjectPorts", res) + return res + + def __getattr__(self, name): + try: + v = self._preg_map[name] + return v + #return like(v, name, self._m) + except KeyError: + raise AttributeError( + 'error, no pipeline register "%s" defined for OP %s' + % (name, self.name)) + + def __setattr__(self, name, value): + if name.startswith('_') or name in ['name', 'ports', 'eq', 'like']: + # do not do anything tricky with variables starting with '_' + object.__setattr__(self, name, value) + return + #rname = "%s_%s" % (self.name, name) + rname = name + new_pipereg = like(value, rname, self._m, self._pipemode) + self._preg_map[name] = new_pipereg + #object.__setattr__(self, name, new_pipereg) + if self._pipemode: + #print ("OP pipemode", self._syncmode, new_pipereg, value) + assign = eq(new_pipereg, value) + if self._syncmode: + self._m.d.sync += assign + else: + self._m.d.comb += assign + elif self._m: + #print ("OP !pipemode assign", new_pipereg, value, type(value)) + self._m.d.comb += eq(new_pipereg, value) + else: + #print ("OP !pipemode !m", new_pipereg, value, type(value)) + self._assigns += eq(new_pipereg, value) + if isinstance(value, ObjectProxy): + #print ("OP, defer assigns:", value._assigns) + self._assigns += value._assigns + self._eqs.append(value._eqs) + + +class PipelineStage: + """ Pipeline builder stage with auto generation of pipeline registers. + """ + + def __init__(self, name, m, prev=None, pipemode=False, ispec=None): + self._m = m + self._stagename = name + self._preg_map = {'__nextstage__': {}} + self._prev_stage = prev + self._ispec = ispec + if ispec: + self._preg_map[self._stagename] = ispec + if prev: + print ("prev", prev._stagename, prev._preg_map) + #if prev._stagename in prev._preg_map: + # m = prev._preg_map[prev._stagename] + # self._preg_map[prev._stagename] = m + if '__nextstage__' in prev._preg_map: + m = prev._preg_map['__nextstage__'] + m = likedict(m) + self._preg_map[self._stagename] = m + #for k, v in m.items(): + #m[k] = like(v, k, self._m) + print ("make current", self._stagename, m) + self._pipemode = pipemode + self._eqs = {} + self._assigns = [] + + def __getattribute__(self, name): + if name.startswith('_'): + return object.__getattribute__(self, name) + #if name in self._preg_map['__nextstage__']: + # return self._preg_map['__nextstage__'][name] + try: + print ("getattr", name, object.__getattribute__(self, '_preg_map')) + v = self._preg_map[self._stagename][name] + return v + #return like(v, name, self._m) + except KeyError: + raise AttributeError( + 'error, no pipeline register "%s" defined for stage %s' + % (name, self._stagename)) + + def __setattr__(self, name, value): + if name.startswith('_'): + # do not do anything tricky with variables starting with '_' + object.__setattr__(self, name, value) + return + pipereg_id = self._stagename + rname = 'pipereg_' + pipereg_id + '_' + name + new_pipereg = like(value, rname, self._m, self._pipemode) + next_stage = '__nextstage__' + if next_stage not in self._preg_map: + self._preg_map[next_stage] = {} + self._preg_map[next_stage][name] = new_pipereg + print ("setattr", name, value, self._preg_map) + if self._pipemode: + self._eqs[name] = new_pipereg + assign = eq(new_pipereg, value) + print ("pipemode: append", new_pipereg, value, assign) + if isinstance(value, ObjectProxy): + print ("OP, assigns:", value._assigns) + self._assigns += value._assigns + self._eqs[name]._eqs = value._eqs + #self._m.d.comb += assign + self._assigns += assign + elif self._m: + print ("!pipemode: assign", new_pipereg, value) + assign = eq(new_pipereg, value) + self._m.d.sync += assign + else: + print ("!pipemode !m: defer assign", new_pipereg, value) + assign = eq(new_pipereg, value) + self._eqs[name] = new_pipereg + self._assigns += assign + if isinstance(value, ObjectProxy): + print ("OP, defer assigns:", value._assigns) + self._assigns += value._assigns + self._eqs[name]._eqs = value._eqs + +def likelist(specs): + res = [] + for v in specs: + res.append(like(v, v.name, None, pipemode=True)) + return res + +def likedict(specs): + if not isinstance(specs, dict): + return like(specs, specs.name, None, pipemode=True) + res = {} + for k, v in specs.items(): + res[k] = likedict(v) + return res + + +class AutoStage(StageCls): + def __init__(self, inspecs, outspecs, eqs, assigns): + self.inspecs, self.outspecs = inspecs, outspecs + self.eqs, self.assigns = eqs, assigns + #self.o = self.ospec() + def ispec(self): return likedict(self.inspecs) + def ospec(self): return likedict(self.outspecs) + + def process(self, i): + print ("stage process", i) + return self.eqs + + def setup(self, m, i): + print ("stage setup i", i, m) + print ("stage setup inspecs", self.inspecs) + print ("stage setup outspecs", self.outspecs) + print ("stage setup eqs", self.eqs) + #self.o = self.ospec() + m.d.comb += eq(self.inspecs, i) + #m.d.comb += eq(self.outspecs, self.eqs) + #m.d.comb += eq(self.o, i) + + +class AutoPipe(UnbufferedPipeline): + def __init__(self, stage, assigns): + UnbufferedPipeline.__init__(self, stage) + self.assigns = assigns + + def elaborate(self, platform): + m = UnbufferedPipeline.elaborate(self, platform) + m.d.comb += self.assigns + print ("assigns", self.assigns, m) + return m + + +class PipeManager: + def __init__(self, m, pipemode=False, pipetype=None): + self.m = m + self.pipemode = pipemode + self.pipetype = pipetype + + @contextmanager + def Stage(self, name, prev=None, ispec=None): + if ispec: + ispec = likedict(ispec) + print ("start stage", name, ispec) + stage = PipelineStage(name, None, prev, self.pipemode, ispec=ispec) + try: + yield stage, self.m #stage._m + finally: + pass + if self.pipemode: + if stage._ispec: + print ("use ispec", stage._ispec) + inspecs = stage._ispec + else: + inspecs = self.get_specs(stage, name) + #inspecs = likedict(inspecs) + outspecs = self.get_specs(stage, '__nextstage__', liked=True) + print ("stage inspecs", name, inspecs) + print ("stage outspecs", name, outspecs) + eqs = stage._eqs # get_eqs(stage._eqs) + assigns = get_assigns(stage._assigns) + print ("stage eqs", name, eqs) + print ("stage assigns", name, assigns) + s = AutoStage(inspecs, outspecs, eqs, assigns) + self.stages.append(s) + print ("end stage", name, self.pipemode, "\n") + + def get_specs(self, stage, name, liked=False): + return stage._preg_map[name] + if name in stage._preg_map: + res = [] + for k, v in stage._preg_map[name].items(): + #v = like(v, k, stage._m) + res.append(v) + #if isinstance(v, ObjectProxy): + # res += v.get_specs() + return res + return {} + + def __enter__(self): + self.stages = [] + return self + + def __exit__(self, *args): + print ("exit stage", args) + pipes = [] + cb = ControlBase() + for s in self.stages: + print ("stage specs", s, s.inspecs, s.outspecs) + if self.pipetype == 'buffered': + p = BufferedHandshake(s) + else: + p = AutoPipe(s, s.assigns) + pipes.append(p) + self.m.submodules += p + + self.m.d.comb += cb.connect(pipes) + + +class SimplePipeline: + """ Pipeline builder with auto generation of pipeline registers. + """ + + def __init__(self, m): + self._m = m + self._pipeline_register_map = {} + self._current_stage_num = 0 + + def _setup(self): + stage_list = [] + for method in dir(self): + if method.startswith('stage'): + stage_list.append(method) + for stage in sorted(stage_list): + stage_method = getattr(self, stage) + stage_method() + self._current_stage_num += 1 + + def __getattr__(self, name): + try: + return self._pipeline_register_map[self._current_stage_num][name] + except KeyError: + raise AttributeError( + 'error, no pipeline register "%s" defined for stage %d' + % (name, self._current_stage_num)) + + def __setattr__(self, name, value): + if name.startswith('_'): + # do not do anything tricky with variables starting with '_' + object.__setattr__(self, name, value) + return + next_stage = self._current_stage_num + 1 + pipereg_id = str(self._current_stage_num) + 'to' + str(next_stage) + rname = 'pipereg_' + pipereg_id + '_' + name + #new_pipereg = Signal(value_bits_sign(value), name=rname, + # reset_less=True) + if isinstance(value, ObjectProxy): + new_pipereg = ObjectProxy.like(self._m, value, + name=rname, reset_less = True) + else: + new_pipereg = Signal.like(value, name=rname, reset_less = True) + if next_stage not in self._pipeline_register_map: + self._pipeline_register_map[next_stage] = {} + self._pipeline_register_map[next_stage][name] = new_pipereg + self._m.d.sync += eq(new_pipereg, value) + diff --git a/src/nmutil/pipemodbase.py b/src/nmutil/pipemodbase.py new file mode 100644 index 0000000..0c5a02f --- /dev/null +++ b/src/nmutil/pipemodbase.py @@ -0,0 +1,54 @@ +from nmigen import Elaboratable +from ieee754.pipeline import DynamicPipe +from nmutil.singlepipe import StageChain + + +class PipeModBase(Elaboratable): + """PipeModBase: common code between nearly every pipeline module + """ + def __init__(self, pspec, modname): + self.modname = modname # use this to give a name to this module + self.pspec = pspec + self.i = self.ispec() + self.o = self.ospec() + + def process(self, i): + return self.o + + def setup(self, m, i): + """ links module to inputs and outputs + """ + setattr(m.submodules, self.modname, self) + m.d.comb += self.i.eq(i) + + +class PipeModBaseChain(DynamicPipe): + """PipeModBaseChain: common code between stage-chained pipes + + Links a set of combinatorial modules (get_chain) together + and uses pspec.pipekls to dynamically select the pipeline type + Also conforms to the Pipeline Stage API + """ + def __init__(self, pspec): + self.pspec = pspec + self.chain = self.get_chain() + super().__init__(pspec) + + def ispec(self): + """ returns the input spec of the first module in the chain + """ + return self.chain[0].ispec() + + def ospec(self): + """ returns the output spec of the last module in the chain + """ + return self.chain[-1].ospec() + + def process(self, i): + return self.o # ... returned here (see setup comment below) + + def setup(self, m, i): + """ links module to inputs and outputs + """ + StageChain(self.chain).setup(m, i) # input linked here, through chain + self.o = self.chain[-1].o # output is the last thing in the chain... diff --git a/src/nmutil/queue.py b/src/nmutil/queue.py new file mode 100644 index 0000000..3d47c63 --- /dev/null +++ b/src/nmutil/queue.py @@ -0,0 +1,194 @@ +# Copyright (c) 2014 - 2019 The Regents of the University of +# California (Regents). All Rights Reserved. Redistribution and use in +# source and binary forms, with or without modification, are permitted +# provided that the following conditions are met: +# * Redistributions of source code must retain the above +# copyright notice, this list of conditions and the following +# two paragraphs of disclaimer. +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following +# two paragraphs of disclaimer in the documentation and/or other materials +# provided with the distribution. +# * Neither the name of the Regents nor the names of its contributors +# may be used to endorse or promote products derived from this +# software without specific prior written permission. +# IN NO EVENT SHALL REGENTS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, +# SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, +# ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF +# REGENTS HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# REGENTS SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE. THE SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF +# ANY, PROVIDED HEREUNDER IS PROVIDED "AS IS". REGENTS HAS NO OBLIGATION +# TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR +# MODIFICATIONS. + +from nmigen import Module, Signal, Memory, Mux, Elaboratable +from nmigen.utils import bits_for +from nmigen.cli import main +from nmigen.lib.fifo import FIFOInterface + +# translated from https://github.com/freechipsproject/chisel3/blob/a4a29e29c3f1eed18f851dcf10bdc845571dfcb6/src/main/scala/chisel3/util/Decoupled.scala#L185 # noqa + + +class Queue(FIFOInterface, Elaboratable): + def __init__(self, width, depth, fwft=True, pipe=False): + """ Queue (FIFO) with pipe mode and first-write fall-through capability + + * :width: width of Queue data in/out + * :depth: queue depth. NOTE: may be set to 0 (this is ok) + * :fwft : first-write, fall-through mode (Chisel Queue "flow" mode) + * :pipe : pipe mode. NOTE: this mode can cause unanticipated + problems. when read is enabled, so is writeable. + therefore if read is enabled, the data ABSOLUTELY MUST + be read. + + fwft mode = True basically means that the data may be transferred + combinatorially from input to output. + + Attributes: + * level: available free space (number of unread entries) + + din = enq_data, writable = enq_ready, we = enq_valid + dout = deq_data, re = deq_ready, readable = deq_valid + """ + FIFOInterface.__init__(self, width, depth, fwft) + self.pipe = pipe + self.depth = depth + self.level = Signal(bits_for(depth)) + + def elaborate(self, platform): + m = Module() + + # set up an SRAM. XXX bug in Memory: cannot create SRAM of depth 1 + ram = Memory(self.width, self.depth if self.depth > 1 else 2) + m.submodules.ram_read = ram_read = ram.read_port(domain="comb") + m.submodules.ram_write = ram_write = ram.write_port() + + # convenience names, for people familiar with ready/valid terminology + # "p" stands for "previous stage", "n" stands for "next stage" + # for people familiar with the chisel Decoupled library: + # enq is "enqueue" (data in, aka "prev stage"), + # deq is "dequeue" (data out, aka "next stage") + p_ready_o = self.writable + p_valid_i = self.we + enq_data = self.din # aka p_data_i + + n_valid_o = self.readable + n_ready_i = self.re + deq_data = self.dout # aka n_data_o + + # intermediaries + ptr_width = bits_for(self.depth - 1) if self.depth > 1 else 0 + enq_ptr = Signal(ptr_width) # cyclic pointer to "insert" point (wrport) + deq_ptr = Signal(ptr_width) # cyclic pointer to "remove" point (rdport) + maybe_full = Signal() # not reset_less (set by sync) + + # temporaries + do_enq = Signal(reset_less=True) + do_deq = Signal(reset_less=True) + ptr_diff = Signal(ptr_width) + ptr_match = Signal(reset_less=True) + empty = Signal(reset_less=True) + full = Signal(reset_less=True) + enq_max = Signal(reset_less=True) + deq_max = Signal(reset_less=True) + + m.d.comb += [ptr_match.eq(enq_ptr == deq_ptr), # read-ptr = write-ptr + ptr_diff.eq(enq_ptr - deq_ptr), + enq_max.eq(enq_ptr == self.depth - 1), + deq_max.eq(deq_ptr == self.depth - 1), + empty.eq(ptr_match & ~maybe_full), + full.eq(ptr_match & maybe_full), + do_enq.eq(p_ready_o & p_valid_i), # write conditions ok + do_deq.eq(n_ready_i & n_valid_o), # read conditions ok + + # set readable and writable (NOTE: see pipe mode below) + n_valid_o.eq(~empty), # cannot read if empty! + p_ready_o.eq(~full), # cannot write if full! + + # set up memory and connect to input and output + ram_write.addr.eq(enq_ptr), + ram_write.data.eq(enq_data), + ram_write.en.eq(do_enq), + ram_read.addr.eq(deq_ptr), + deq_data.eq(ram_read.data) # NOTE: overridden in fwft mode + ] + + # under write conditions, SRAM write-pointer moves on next clock + with m.If(do_enq): + m.d.sync += enq_ptr.eq(Mux(enq_max, 0, enq_ptr+1)) + + # under read conditions, SRAM read-pointer moves on next clock + with m.If(do_deq): + m.d.sync += deq_ptr.eq(Mux(deq_max, 0, deq_ptr+1)) + + # if read-but-not-write or write-but-not-read, maybe_full set + with m.If(do_enq != do_deq): + m.d.sync += maybe_full.eq(do_enq) + + # first-word fall-through: same as "flow" parameter in Chisel3 Queue + # basically instead of relying on the Memory characteristics (which + # in FPGAs do not have write-through), then when the queue is empty + # take the output directly from the input, i.e. *bypass* the SRAM. + # this done combinatorially to give the exact same characteristics + # as Memory "write-through"... without relying on a changing API + if self.fwft: + with m.If(p_valid_i): + m.d.comb += n_valid_o.eq(1) + with m.If(empty): + m.d.comb += deq_data.eq(enq_data) + m.d.comb += do_deq.eq(0) + with m.If(n_ready_i): + m.d.comb += do_enq.eq(0) + + # pipe mode: if next stage says it's ready (readable), we + # *must* declare the input ready (writeable). + if self.pipe: + with m.If(n_ready_i): + m.d.comb += p_ready_o.eq(1) + + # set the count (available free space), optimise on power-of-two + if self.depth == 1 << ptr_width: # is depth a power of 2 + m.d.comb += self.level.eq( + Mux(maybe_full & ptr_match, self.depth, 0) | ptr_diff) + else: + m.d.comb += self.level.eq(Mux(ptr_match, + Mux(maybe_full, self.depth, 0), + Mux(deq_ptr > enq_ptr, + self.depth + ptr_diff, + ptr_diff))) + + return m + + +if __name__ == "__main__": + reg_stage = Queue(1, 1, pipe=True) + break_ready_chain_stage = Queue(1, 1, pipe=True, fwft=True) + m = Module() + ports = [] + + def queue_ports(queue, name_prefix): + retval = [] + for name in ["level", + "dout", + "readable", + "writable"]: + port = getattr(queue, name) + signal = Signal(port.shape(), name=name_prefix+name) + m.d.comb += signal.eq(port) + retval.append(signal) + for name in ["re", + "din", + "we"]: + port = getattr(queue, name) + signal = Signal(port.shape(), name=name_prefix+name) + m.d.comb += port.eq(signal) + retval.append(signal) + return retval + + m.submodules.reg_stage = reg_stage + ports += queue_ports(reg_stage, "reg_stage_") + m.submodules.break_ready_chain_stage = break_ready_chain_stage + ports += queue_ports(break_ready_chain_stage, "break_ready_chain_stage_") + main(m, ports=ports) diff --git a/src/nmutil/singlepipe.py b/src/nmutil/singlepipe.py new file mode 100644 index 0000000..4880a81 --- /dev/null +++ b/src/nmutil/singlepipe.py @@ -0,0 +1,994 @@ +""" Pipeline API. For multi-input and multi-output variants, see multipipe. + + Associated development bugs: + * http://bugs.libre-riscv.org/show_bug.cgi?id=148 + * http://bugs.libre-riscv.org/show_bug.cgi?id=64 + * http://bugs.libre-riscv.org/show_bug.cgi?id=57 + + Important: see Stage API (stageapi.py) and IO Control API + (iocontrol.py) in combination with below. This module + "combines" the Stage API with the IO Control API to create + the Pipeline API. + + The one critically important key difference between StageAPI and + PipelineAPI: + + * StageAPI: combinatorial (NO REGISTERS / LATCHES PERMITTED) + * PipelineAPI: synchronous registers / latches get added here + + RecordBasedStage: + ---------------- + + A convenience class that takes an input shape, output shape, a + "processing" function and an optional "setup" function. Honestly + though, there's not much more effort to just... create a class + that returns a couple of Records (see ExampleAddRecordStage in + examples). + + PassThroughStage: + ---------------- + + A convenience class that takes a single function as a parameter, + that is chain-called to create the exact same input and output spec. + It has a process() function that simply returns its input. + + Instances of this class are completely redundant if handed to + StageChain, however when passed to UnbufferedPipeline they + can be used to introduce a single clock delay. + + ControlBase: + ----------- + + The base class for pipelines. Contains previous and next ready/valid/data. + Also has an extremely useful "connect" function that can be used to + connect a chain of pipelines and present the exact same prev/next + ready/valid/data API. + + Note: pipelines basically do not become pipelines as such until + handed to a derivative of ControlBase. ControlBase itself is *not* + strictly considered a pipeline class. Wishbone and AXI4 (master or + slave) could be derived from ControlBase, for example. + UnbufferedPipeline: + ------------------ + + A simple stalling clock-synchronised pipeline that has no buffering + (unlike BufferedHandshake). Data flows on *every* clock cycle when + the conditions are right (this is nominally when the input is valid + and the output is ready). + + A stall anywhere along the line will result in a stall back-propagating + down the entire chain. The BufferedHandshake by contrast will buffer + incoming data, allowing previous stages one clock cycle's grace before + also having to stall. + + An advantage of the UnbufferedPipeline over the Buffered one is + that the amount of logic needed (number of gates) is greatly + reduced (no second set of buffers basically) + + The disadvantage of the UnbufferedPipeline is that the valid/ready + logic, if chained together, is *combinatorial*, resulting in + progressively larger gate delay. + + PassThroughHandshake: + ------------------ + + A Control class that introduces a single clock delay, passing its + data through unaltered. Unlike RegisterPipeline (which relies + on UnbufferedPipeline and PassThroughStage) it handles ready/valid + itself. + + RegisterPipeline: + ---------------- + + A convenience class that, because UnbufferedPipeline introduces a single + clock delay, when its stage is a PassThroughStage, it results in a Pipeline + stage that, duh, delays its (unmodified) input by one clock cycle. + + BufferedHandshake: + ---------------- + + nmigen implementation of buffered pipeline stage, based on zipcpu: + https://zipcpu.com/blog/2017/08/14/strategies-for-pipelining.html + + this module requires quite a bit of thought to understand how it works + (and why it is needed in the first place). reading the above is + *strongly* recommended. + + unlike john dawson's IEEE754 FPU STB/ACK signalling, which requires + the STB / ACK signals to raise and lower (on separate clocks) before + data may proceeed (thus only allowing one piece of data to proceed + on *ALTERNATE* cycles), the signalling here is a true pipeline + where data will flow on *every* clock when the conditions are right. + + input acceptance conditions are when: + * incoming previous-stage strobe (p.valid_i) is HIGH + * outgoing previous-stage ready (p.ready_o) is LOW + + output transmission conditions are when: + * outgoing next-stage strobe (n.valid_o) is HIGH + * outgoing next-stage ready (n.ready_i) is LOW + + the tricky bit is when the input has valid data and the output is not + ready to accept it. if it wasn't for the clock synchronisation, it + would be possible to tell the input "hey don't send that data, we're + not ready". unfortunately, it's not possible to "change the past": + the previous stage *has no choice* but to pass on its data. + + therefore, the incoming data *must* be accepted - and stored: that + is the responsibility / contract that this stage *must* accept. + on the same clock, it's possible to tell the input that it must + not send any more data. this is the "stall" condition. + + we now effectively have *two* possible pieces of data to "choose" from: + the buffered data, and the incoming data. the decision as to which + to process and output is based on whether we are in "stall" or not. + i.e. when the next stage is no longer ready, the output comes from + the buffer if a stall had previously occurred, otherwise it comes + direct from processing the input. + + this allows us to respect a synchronous "travelling STB" with what + dan calls a "buffered handshake". + + it's quite a complex state machine! + + SimpleHandshake + --------------- + + Synchronised pipeline, Based on: + https://github.com/ZipCPU/dbgbus/blob/master/hexbus/rtl/hbdeword.v +""" + +from nmigen import Signal, Mux, Module, Elaboratable, Const +from nmigen.cli import verilog, rtlil +from nmigen.hdl.rec import Record + +from nmutil.queue import Queue +import inspect + +from nmutil.iocontrol import (PrevControl, NextControl, Object, RecordObject) +from nmutil.stageapi import (_spec, StageCls, Stage, StageChain, StageHelper) +from nmutil import nmoperator + + +class RecordBasedStage(Stage): + """ convenience class which provides a Records-based layout. + honestly it's a lot easier just to create a direct Records-based + class (see ExampleAddRecordStage) + """ + def __init__(self, in_shape, out_shape, processfn, setupfn=None): + self.in_shape = in_shape + self.out_shape = out_shape + self.__process = processfn + self.__setup = setupfn + def ispec(self): return Record(self.in_shape) + def ospec(self): return Record(self.out_shape) + def process(seif, i): return self.__process(i) + def setup(seif, m, i): return self.__setup(m, i) + + +class PassThroughStage(StageCls): + """ a pass-through stage with its input data spec identical to its output, + and "passes through" its data from input to output (does nothing). + + use this basically to explicitly make any data spec Stage-compliant. + (many APIs would potentially use a static "wrap" method in e.g. + StageCls to achieve a similar effect) + """ + def __init__(self, iospecfn): self.iospecfn = iospecfn + def ispec(self): return self.iospecfn() + def ospec(self): return self.iospecfn() + + +class ControlBase(StageHelper, Elaboratable): + """ Common functions for Pipeline API. Note: a "pipeline stage" only + exists (conceptually) when a ControlBase derivative is handed + a Stage (combinatorial block) + + NOTE: ControlBase derives from StageHelper, making it accidentally + compliant with the Stage API. Using those functions directly + *BYPASSES* a ControlBase instance ready/valid signalling, which + clearly should not be done without a really, really good reason. + """ + def __init__(self, stage=None, in_multi=None, stage_ctl=False, maskwid=0): + """ Base class containing ready/valid/data to previous and next stages + + * p: contains ready/valid to the previous stage + * n: contains ready/valid to the next stage + + Except when calling Controlbase.connect(), user must also: + * add data_i member to PrevControl (p) and + * add data_o member to NextControl (n) + Calling ControlBase._new_data is a good way to do that. + """ + print ("ControlBase", self, stage, in_multi, stage_ctl) + StageHelper.__init__(self, stage) + + # set up input and output IO ACK (prev/next ready/valid) + self.p = PrevControl(in_multi, stage_ctl, maskwid=maskwid) + self.n = NextControl(stage_ctl, maskwid=maskwid) + + # set up the input and output data + if stage is not None: + self._new_data("data") + + def _new_data(self, name): + """ allocates new data_i and data_o + """ + self.p.data_i, self.n.data_o = self.new_specs(name) + + @property + def data_r(self): + return self.process(self.p.data_i) + + def connect_to_next(self, nxt): + """ helper function to connect to the next stage data/valid/ready. + """ + return self.n.connect_to_next(nxt.p) + + def _connect_in(self, prev): + """ internal helper function to connect stage to an input source. + do not use to connect stage-to-stage! + """ + return self.p._connect_in(prev.p) + + def _connect_out(self, nxt): + """ internal helper function to connect stage to an output source. + do not use to connect stage-to-stage! + """ + return self.n._connect_out(nxt.n) + + def connect(self, pipechain): + """ connects a chain (list) of Pipeline instances together and + links them to this ControlBase instance: + + in <----> self <---> out + | ^ + v | + [pipe1, pipe2, pipe3, pipe4] + | ^ | ^ | ^ + v | v | v | + out---in out--in out---in + + Also takes care of allocating data_i/data_o, by looking up + the data spec for each end of the pipechain. i.e It is NOT + necessary to allocate self.p.data_i or self.n.data_o manually: + this is handled AUTOMATICALLY, here. + + Basically this function is the direct equivalent of StageChain, + except that unlike StageChain, the Pipeline logic is followed. + + Just as StageChain presents an object that conforms to the + Stage API from a list of objects that also conform to the + Stage API, an object that calls this Pipeline connect function + has the exact same pipeline API as the list of pipline objects + it is called with. + + Thus it becomes possible to build up larger chains recursively. + More complex chains (multi-input, multi-output) will have to be + done manually. + + Argument: + + * :pipechain: - a sequence of ControlBase-derived classes + (must be one or more in length) + + Returns: + + * a list of eq assignments that will need to be added in + an elaborate() to m.d.comb + """ + assert len(pipechain) > 0, "pipechain must be non-zero length" + assert self.stage is None, "do not use connect with a stage" + eqs = [] # collated list of assignment statements + + # connect inter-chain + for i in range(len(pipechain)-1): + pipe1 = pipechain[i] # earlier + pipe2 = pipechain[i+1] # later (by 1) + eqs += pipe1.connect_to_next(pipe2) # earlier n to later p + + # connect front and back of chain to ourselves + front = pipechain[0] # first in chain + end = pipechain[-1] # last in chain + self.set_specs(front, end) # sets up ispec/ospec functions + self._new_data("chain") # NOTE: REPLACES existing data + eqs += front._connect_in(self) # front p to our p + eqs += end._connect_out(self) # end n to our n + + return eqs + + def set_input(self, i): + """ helper function to set the input data (used in unit tests) + """ + return nmoperator.eq(self.p.data_i, i) + + def __iter__(self): + yield from self.p # yields ready/valid/data (data also gets yielded) + yield from self.n # ditto + + def ports(self): + return list(self) + + def elaborate(self, platform): + """ handles case where stage has dynamic ready/valid functions + """ + m = Module() + m.submodules.p = self.p + m.submodules.n = self.n + + self.setup(m, self.p.data_i) + + if not self.p.stage_ctl: + return m + + # intercept the previous (outgoing) "ready", combine with stage ready + m.d.comb += self.p.s_ready_o.eq(self.p._ready_o & self.stage.d_ready) + + # intercept the next (incoming) "ready" and combine it with data valid + sdv = self.stage.d_valid(self.n.ready_i) + m.d.comb += self.n.d_valid.eq(self.n.ready_i & sdv) + + return m + + +class BufferedHandshake(ControlBase): + """ buffered pipeline stage. data and strobe signals travel in sync. + if ever the input is ready and the output is not, processed data + is shunted in a temporary register. + + Argument: stage. see Stage API above + + stage-1 p.valid_i >>in stage n.valid_o out>> stage+1 + stage-1 p.ready_o <>in stage n.data_o out>> stage+1 + | | + process --->----^ + | | + +-- r_data ->-+ + + input data p.data_i is read (only), is processed and goes into an + intermediate result store [process()]. this is updated combinatorially. + + in a non-stall condition, the intermediate result will go into the + output (update_output). however if ever there is a stall, it goes + into r_data instead [update_buffer()]. + + when the non-stall condition is released, r_data is the first + to be transferred to the output [flush_buffer()], and the stall + condition cleared. + + on the next cycle (as long as stall is not raised again) the + input may begin to be processed and transferred directly to output. + """ + + def elaborate(self, platform): + self.m = ControlBase.elaborate(self, platform) + + result = _spec(self.stage.ospec, "r_tmp") + r_data = _spec(self.stage.ospec, "r_data") + + # establish some combinatorial temporaries + o_n_validn = Signal(reset_less=True) + n_ready_i = Signal(reset_less=True, name="n_i_rdy_data") + nir_por = Signal(reset_less=True) + nir_por_n = Signal(reset_less=True) + p_valid_i = Signal(reset_less=True) + nir_novn = Signal(reset_less=True) + nirn_novn = Signal(reset_less=True) + por_pivn = Signal(reset_less=True) + npnn = Signal(reset_less=True) + self.m.d.comb += [p_valid_i.eq(self.p.valid_i_test), + o_n_validn.eq(~self.n.valid_o), + n_ready_i.eq(self.n.ready_i_test), + nir_por.eq(n_ready_i & self.p._ready_o), + nir_por_n.eq(n_ready_i & ~self.p._ready_o), + nir_novn.eq(n_ready_i | o_n_validn), + nirn_novn.eq(~n_ready_i & o_n_validn), + npnn.eq(nir_por | nirn_novn), + por_pivn.eq(self.p._ready_o & ~p_valid_i) + ] + + # store result of processing in combinatorial temporary + self.m.d.comb += nmoperator.eq(result, self.data_r) + + # if not in stall condition, update the temporary register + with self.m.If(self.p.ready_o): # not stalled + self.m.d.sync += nmoperator.eq(r_data, result) # update buffer + + # data pass-through conditions + with self.m.If(npnn): + data_o = self._postprocess(result) # XXX TBD, does nothing right now + self.m.d.sync += [self.n.valid_o.eq(p_valid_i), # valid if p_valid + nmoperator.eq(self.n.data_o, data_o), # update out + ] + # buffer flush conditions (NOTE: can override data passthru conditions) + with self.m.If(nir_por_n): # not stalled + # Flush the [already processed] buffer to the output port. + data_o = self._postprocess(r_data) # XXX TBD, does nothing right now + self.m.d.sync += [self.n.valid_o.eq(1), # reg empty + nmoperator.eq(self.n.data_o, data_o), # flush + ] + # output ready conditions + self.m.d.sync += self.p._ready_o.eq(nir_novn | por_pivn) + + return self.m + + +class MaskNoDelayCancellable(ControlBase): + """ Mask-activated Cancellable pipeline (that does not respect "ready") + + Based on (identical behaviour to) SimpleHandshake. + TODO: decide whether to merge *into* SimpleHandshake. + + Argument: stage. see Stage API above + + stage-1 p.valid_i >>in stage n.valid_o out>> stage+1 + stage-1 p.ready_o <>in stage n.data_o out>> stage+1 + | | + +--process->--^ + """ + def __init__(self, stage, maskwid, in_multi=None, stage_ctl=False): + ControlBase.__init__(self, stage, in_multi, stage_ctl, maskwid) + + def elaborate(self, platform): + self.m = m = ControlBase.elaborate(self, platform) + + # store result of processing in combinatorial temporary + result = _spec(self.stage.ospec, "r_tmp") + m.d.comb += nmoperator.eq(result, self.data_r) + + # establish if the data should be passed on. cancellation is + # a global signal. + # XXX EXCEPTIONAL CIRCUMSTANCES: inspection of the data payload + # is NOT "normal" for the Stage API. + p_valid_i = Signal(reset_less=True) + #print ("self.p.data_i", self.p.data_i) + maskedout = Signal(len(self.p.mask_i), reset_less=True) + m.d.comb += maskedout.eq(self.p.mask_i & ~self.p.stop_i) + m.d.comb += p_valid_i.eq(maskedout.bool()) + + # if idmask nonzero, mask gets passed on (and register set). + # register is left as-is if idmask is zero, but out-mask is set to zero + # note however: only the *uncancelled* mask bits get passed on + m.d.sync += self.n.valid_o.eq(p_valid_i) + m.d.sync += self.n.mask_o.eq(Mux(p_valid_i, maskedout, 0)) + with m.If(p_valid_i): + data_o = self._postprocess(result) # XXX TBD, does nothing right now + m.d.sync += nmoperator.eq(self.n.data_o, data_o) # update output + + # output valid if + # input always "ready" + #m.d.comb += self.p._ready_o.eq(self.n.ready_i_test) + m.d.comb += self.p._ready_o.eq(Const(1)) + + # always pass on stop (as combinatorial: single signal) + m.d.comb += self.n.stop_o.eq(self.p.stop_i) + + return self.m + + +class MaskCancellable(ControlBase): + """ Mask-activated Cancellable pipeline + + Arguments: + + * stage. see Stage API above + * maskwid - sets up cancellation capability (mask and stop). + * in_multi + * stage_ctl + * dynamic - allows switching from sync to combinatorial (passthrough) + USE WITH CARE. will need the entire pipe to be quiescent + before switching, otherwise data WILL be destroyed. + + stage-1 p.valid_i >>in stage n.valid_o out>> stage+1 + stage-1 p.ready_o <>in stage n.data_o out>> stage+1 + | | + +--process->--^ + """ + def __init__(self, stage, maskwid, in_multi=None, stage_ctl=False, + dynamic=False): + ControlBase.__init__(self, stage, in_multi, stage_ctl, maskwid) + self.dynamic = dynamic + if dynamic: + self.latchmode = Signal() + else: + self.latchmode = Const(1) + + def elaborate(self, platform): + self.m = m = ControlBase.elaborate(self, platform) + + mask_r = Signal(len(self.p.mask_i), reset_less=True) + data_r = _spec(self.stage.ospec, "data_r") + m.d.comb += nmoperator.eq(data_r, self._postprocess(self.data_r)) + + with m.If(self.latchmode): + r_busy = Signal() + r_latch = _spec(self.stage.ospec, "r_latch") + + # establish if the data should be passed on. cancellation is + # a global signal. + p_valid_i = Signal(reset_less=True) + #print ("self.p.data_i", self.p.data_i) + maskedout = Signal(len(self.p.mask_i), reset_less=True) + m.d.comb += maskedout.eq(self.p.mask_i & ~self.p.stop_i) + + # establish some combinatorial temporaries + n_ready_i = Signal(reset_less=True, name="n_i_rdy_data") + p_valid_i_p_ready_o = Signal(reset_less=True) + m.d.comb += [p_valid_i.eq(self.p.valid_i_test & maskedout.bool()), + n_ready_i.eq(self.n.ready_i_test), + p_valid_i_p_ready_o.eq(p_valid_i & self.p.ready_o), + ] + + # if idmask nonzero, mask gets passed on (and register set). + # register is left as-is if idmask is zero, but out-mask is set to + # zero + # note however: only the *uncancelled* mask bits get passed on + m.d.sync += mask_r.eq(Mux(p_valid_i, maskedout, 0)) + m.d.comb += self.n.mask_o.eq(mask_r) + + # always pass on stop (as combinatorial: single signal) + m.d.comb += self.n.stop_o.eq(self.p.stop_i) + + stor = Signal(reset_less=True) + m.d.comb += stor.eq(p_valid_i_p_ready_o | n_ready_i) + with m.If(stor): + # store result of processing in combinatorial temporary + m.d.sync += nmoperator.eq(r_latch, data_r) + + # previous valid and ready + with m.If(p_valid_i_p_ready_o): + m.d.sync += r_busy.eq(1) # output valid + # previous invalid or not ready, however next is accepting + with m.Elif(n_ready_i): + m.d.sync += r_busy.eq(0) # ...so set output invalid + + # output set combinatorially from latch + m.d.comb += nmoperator.eq(self.n.data_o, r_latch) + + m.d.comb += self.n.valid_o.eq(r_busy) + # if next is ready, so is previous + m.d.comb += self.p._ready_o.eq(n_ready_i) + + with m.Else(): + # pass everything straight through. p connected to n: data, + # valid, mask, everything. this is "effectively" just a + # StageChain: MaskCancellable is doing "nothing" except + # combinatorially passing everything through + # (except now it's *dynamically selectable* whether to do that) + m.d.comb += self.n.valid_o.eq(self.p.valid_i_test) + m.d.comb += self.p._ready_o.eq(self.n.ready_i_test) + m.d.comb += self.n.stop_o.eq(self.p.stop_i) + m.d.comb += self.n.mask_o.eq(self.p.mask_i) + m.d.comb += nmoperator.eq(self.n.data_o, data_r) + + return self.m + + +class SimpleHandshake(ControlBase): + """ simple handshake control. data and strobe signals travel in sync. + implements the protocol used by Wishbone and AXI4. + + Argument: stage. see Stage API above + + stage-1 p.valid_i >>in stage n.valid_o out>> stage+1 + stage-1 p.ready_o <>in stage n.data_o out>> stage+1 + | | + +--process->--^ + Truth Table + + Inputs Temporary Output Data + ------- ---------- ----- ---- + P P N N PiV& ~NiR& N P + i o i o PoR NoV o o + V R R V V R + + ------- - - - - + 0 0 0 0 0 0 >0 0 reg + 0 0 0 1 0 1 >1 0 reg + 0 0 1 0 0 0 0 1 process(data_i) + 0 0 1 1 0 0 0 1 process(data_i) + ------- - - - - + 0 1 0 0 0 0 >0 0 reg + 0 1 0 1 0 1 >1 0 reg + 0 1 1 0 0 0 0 1 process(data_i) + 0 1 1 1 0 0 0 1 process(data_i) + ------- - - - - + 1 0 0 0 0 0 >0 0 reg + 1 0 0 1 0 1 >1 0 reg + 1 0 1 0 0 0 0 1 process(data_i) + 1 0 1 1 0 0 0 1 process(data_i) + ------- - - - - + 1 1 0 0 1 0 1 0 process(data_i) + 1 1 0 1 1 1 1 0 process(data_i) + 1 1 1 0 1 0 1 1 process(data_i) + 1 1 1 1 1 0 1 1 process(data_i) + ------- - - - - + """ + + def elaborate(self, platform): + self.m = m = ControlBase.elaborate(self, platform) + + r_busy = Signal() + result = _spec(self.stage.ospec, "r_tmp") + + # establish some combinatorial temporaries + n_ready_i = Signal(reset_less=True, name="n_i_rdy_data") + p_valid_i_p_ready_o = Signal(reset_less=True) + p_valid_i = Signal(reset_less=True) + m.d.comb += [p_valid_i.eq(self.p.valid_i_test), + n_ready_i.eq(self.n.ready_i_test), + p_valid_i_p_ready_o.eq(p_valid_i & self.p.ready_o), + ] + + # store result of processing in combinatorial temporary + m.d.comb += nmoperator.eq(result, self.data_r) + + # previous valid and ready + with m.If(p_valid_i_p_ready_o): + data_o = self._postprocess(result) # XXX TBD, does nothing right now + m.d.sync += [r_busy.eq(1), # output valid + nmoperator.eq(self.n.data_o, data_o), # update output + ] + # previous invalid or not ready, however next is accepting + with m.Elif(n_ready_i): + data_o = self._postprocess(result) # XXX TBD, does nothing right now + m.d.sync += [nmoperator.eq(self.n.data_o, data_o)] + # TODO: could still send data here (if there was any) + #m.d.sync += self.n.valid_o.eq(0) # ...so set output invalid + m.d.sync += r_busy.eq(0) # ...so set output invalid + + m.d.comb += self.n.valid_o.eq(r_busy) + # if next is ready, so is previous + m.d.comb += self.p._ready_o.eq(n_ready_i) + + return self.m + + +class UnbufferedPipeline(ControlBase): + """ A simple pipeline stage with single-clock synchronisation + and two-way valid/ready synchronised signalling. + + Note that a stall in one stage will result in the entire pipeline + chain stalling. + + Also that unlike BufferedHandshake, the valid/ready signalling does NOT + travel synchronously with the data: the valid/ready signalling + combines in a *combinatorial* fashion. Therefore, a long pipeline + chain will lengthen propagation delays. + + Argument: stage. see Stage API, above + + stage-1 p.valid_i >>in stage n.valid_o out>> stage+1 + stage-1 p.ready_o <>in stage n.data_o out>> stage+1 + | | + r_data result + | | + +--process ->-+ + + Attributes: + ----------- + p.data_i : StageInput, shaped according to ispec + The pipeline input + p.data_o : StageOutput, shaped according to ospec + The pipeline output + r_data : input_shape according to ispec + A temporary (buffered) copy of a prior (valid) input. + This is HELD if the output is not ready. It is updated + SYNCHRONOUSLY. + result: output_shape according to ospec + The output of the combinatorial logic. it is updated + COMBINATORIALLY (no clock dependence). + + Truth Table + + Inputs Temp Output Data + ------- - ----- ---- + P P N N ~NiR& N P + i o i o NoV o o + V R R V V R + + ------- - - - + 0 0 0 0 0 0 1 reg + 0 0 0 1 1 1 0 reg + 0 0 1 0 0 0 1 reg + 0 0 1 1 0 0 1 reg + ------- - - - + 0 1 0 0 0 0 1 reg + 0 1 0 1 1 1 0 reg + 0 1 1 0 0 0 1 reg + 0 1 1 1 0 0 1 reg + ------- - - - + 1 0 0 0 0 1 1 reg + 1 0 0 1 1 1 0 reg + 1 0 1 0 0 1 1 reg + 1 0 1 1 0 1 1 reg + ------- - - - + 1 1 0 0 0 1 1 process(data_i) + 1 1 0 1 1 1 0 process(data_i) + 1 1 1 0 0 1 1 process(data_i) + 1 1 1 1 0 1 1 process(data_i) + ------- - - - + + Note: PoR is *NOT* involved in the above decision-making. + """ + + def elaborate(self, platform): + self.m = m = ControlBase.elaborate(self, platform) + + data_valid = Signal() # is data valid or not + r_data = _spec(self.stage.ospec, "r_tmp") # output type + + # some temporaries + p_valid_i = Signal(reset_less=True) + pv = Signal(reset_less=True) + buf_full = Signal(reset_less=True) + m.d.comb += p_valid_i.eq(self.p.valid_i_test) + m.d.comb += pv.eq(self.p.valid_i & self.p.ready_o) + m.d.comb += buf_full.eq(~self.n.ready_i_test & data_valid) + + m.d.comb += self.n.valid_o.eq(data_valid) + m.d.comb += self.p._ready_o.eq(~data_valid | self.n.ready_i_test) + m.d.sync += data_valid.eq(p_valid_i | buf_full) + + with m.If(pv): + m.d.sync += nmoperator.eq(r_data, self.data_r) + data_o = self._postprocess(r_data) # XXX TBD, does nothing right now + m.d.comb += nmoperator.eq(self.n.data_o, data_o) + + return self.m + + +class UnbufferedPipeline2(ControlBase): + """ A simple pipeline stage with single-clock synchronisation + and two-way valid/ready synchronised signalling. + + Note that a stall in one stage will result in the entire pipeline + chain stalling. + + Also that unlike BufferedHandshake, the valid/ready signalling does NOT + travel synchronously with the data: the valid/ready signalling + combines in a *combinatorial* fashion. Therefore, a long pipeline + chain will lengthen propagation delays. + + Argument: stage. see Stage API, above + + stage-1 p.valid_i >>in stage n.valid_o out>> stage+1 + stage-1 p.ready_o <>in stage n.data_o out>> stage+1 + | | | + +- process-> buf <-+ + Attributes: + ----------- + p.data_i : StageInput, shaped according to ispec + The pipeline input + p.data_o : StageOutput, shaped according to ospec + The pipeline output + buf : output_shape according to ospec + A temporary (buffered) copy of a valid output + This is HELD if the output is not ready. It is updated + SYNCHRONOUSLY. + + Inputs Temp Output Data + ------- - ----- + P P N N ~NiR& N P (buf_full) + i o i o NoV o o + V R R V V R + + ------- - - - + 0 0 0 0 0 0 1 process(data_i) + 0 0 0 1 1 1 0 reg (odata, unchanged) + 0 0 1 0 0 0 1 process(data_i) + 0 0 1 1 0 0 1 process(data_i) + ------- - - - + 0 1 0 0 0 0 1 process(data_i) + 0 1 0 1 1 1 0 reg (odata, unchanged) + 0 1 1 0 0 0 1 process(data_i) + 0 1 1 1 0 0 1 process(data_i) + ------- - - - + 1 0 0 0 0 1 1 process(data_i) + 1 0 0 1 1 1 0 reg (odata, unchanged) + 1 0 1 0 0 1 1 process(data_i) + 1 0 1 1 0 1 1 process(data_i) + ------- - - - + 1 1 0 0 0 1 1 process(data_i) + 1 1 0 1 1 1 0 reg (odata, unchanged) + 1 1 1 0 0 1 1 process(data_i) + 1 1 1 1 0 1 1 process(data_i) + ------- - - - + + Note: PoR is *NOT* involved in the above decision-making. + """ + + def elaborate(self, platform): + self.m = m = ControlBase.elaborate(self, platform) + + buf_full = Signal() # is data valid or not + buf = _spec(self.stage.ospec, "r_tmp") # output type + + # some temporaries + p_valid_i = Signal(reset_less=True) + m.d.comb += p_valid_i.eq(self.p.valid_i_test) + + m.d.comb += self.n.valid_o.eq(buf_full | p_valid_i) + m.d.comb += self.p._ready_o.eq(~buf_full) + m.d.sync += buf_full.eq(~self.n.ready_i_test & self.n.valid_o) + + data_o = Mux(buf_full, buf, self.data_r) + data_o = self._postprocess(data_o) # XXX TBD, does nothing right now + m.d.comb += nmoperator.eq(self.n.data_o, data_o) + m.d.sync += nmoperator.eq(buf, self.n.data_o) + + return self.m + + +class PassThroughHandshake(ControlBase): + """ A control block that delays by one clock cycle. + + Inputs Temporary Output Data + ------- ------------------ ----- ---- + P P N N PiV& PiV| NiR| pvr N P (pvr) + i o i o PoR ~PoR ~NoV o o + V R R V V R + + ------- - - - - - - + 0 0 0 0 0 1 1 0 1 1 odata (unchanged) + 0 0 0 1 0 1 0 0 1 0 odata (unchanged) + 0 0 1 0 0 1 1 0 1 1 odata (unchanged) + 0 0 1 1 0 1 1 0 1 1 odata (unchanged) + ------- - - - - - - + 0 1 0 0 0 0 1 0 0 1 odata (unchanged) + 0 1 0 1 0 0 0 0 0 0 odata (unchanged) + 0 1 1 0 0 0 1 0 0 1 odata (unchanged) + 0 1 1 1 0 0 1 0 0 1 odata (unchanged) + ------- - - - - - - + 1 0 0 0 0 1 1 1 1 1 process(in) + 1 0 0 1 0 1 0 0 1 0 odata (unchanged) + 1 0 1 0 0 1 1 1 1 1 process(in) + 1 0 1 1 0 1 1 1 1 1 process(in) + ------- - - - - - - + 1 1 0 0 1 1 1 1 1 1 process(in) + 1 1 0 1 1 1 0 0 1 0 odata (unchanged) + 1 1 1 0 1 1 1 1 1 1 process(in) + 1 1 1 1 1 1 1 1 1 1 process(in) + ------- - - - - - - + + """ + + def elaborate(self, platform): + self.m = m = ControlBase.elaborate(self, platform) + + r_data = _spec(self.stage.ospec, "r_tmp") # output type + + # temporaries + p_valid_i = Signal(reset_less=True) + pvr = Signal(reset_less=True) + m.d.comb += p_valid_i.eq(self.p.valid_i_test) + m.d.comb += pvr.eq(p_valid_i & self.p.ready_o) + + m.d.comb += self.p.ready_o.eq(~self.n.valid_o | self.n.ready_i_test) + m.d.sync += self.n.valid_o.eq(p_valid_i | ~self.p.ready_o) + + odata = Mux(pvr, self.data_r, r_data) + m.d.sync += nmoperator.eq(r_data, odata) + r_data = self._postprocess(r_data) # XXX TBD, does nothing right now + m.d.comb += nmoperator.eq(self.n.data_o, r_data) + + return m + + +class RegisterPipeline(UnbufferedPipeline): + """ A pipeline stage that delays by one clock cycle, creating a + sync'd latch out of data_o and valid_o as an indirect byproduct + of using PassThroughStage + """ + def __init__(self, iospecfn): + UnbufferedPipeline.__init__(self, PassThroughStage(iospecfn)) + + +class FIFOControl(ControlBase): + """ FIFO Control. Uses Queue to store data, coincidentally + happens to have same valid/ready signalling as Stage API. + + data_i -> fifo.din -> FIFO -> fifo.dout -> data_o + """ + def __init__(self, depth, stage, in_multi=None, stage_ctl=False, + fwft=True, pipe=False): + """ FIFO Control + + * :depth: number of entries in the FIFO + * :stage: data processing block + * :fwft: first word fall-thru mode (non-fwft introduces delay) + * :pipe: specifies pipe mode. + + when fwft = True it indicates that transfers may occur + combinatorially through stage processing in the same clock cycle. + This requires that the Stage be a Moore FSM: + https://en.wikipedia.org/wiki/Moore_machine + + when fwft = False it indicates that all output signals are + produced only from internal registers or memory, i.e. that the + Stage is a Mealy FSM: + https://en.wikipedia.org/wiki/Mealy_machine + + data is processed (and located) as follows: + + self.p self.stage temp fn temp fn temp fp self.n + data_i->process()->result->cat->din.FIFO.dout->cat(data_o) + + yes, really: cat produces a Cat() which can be assigned to. + this is how the FIFO gets de-catted without needing a de-cat + function + """ + self.fwft = fwft + self.pipe = pipe + self.fdepth = depth + ControlBase.__init__(self, stage, in_multi, stage_ctl) + + def elaborate(self, platform): + self.m = m = ControlBase.elaborate(self, platform) + + # make a FIFO with a signal of equal width to the data_o. + (fwidth, _) = nmoperator.shape(self.n.data_o) + fifo = Queue(fwidth, self.fdepth, fwft=self.fwft, pipe=self.pipe) + m.submodules.fifo = fifo + + def processfn(data_i): + # store result of processing in combinatorial temporary + result = _spec(self.stage.ospec, "r_temp") + m.d.comb += nmoperator.eq(result, self.process(data_i)) + return nmoperator.cat(result) + + ## prev: make the FIFO (Queue object) "look" like a PrevControl... + m.submodules.fp = fp = PrevControl() + fp.valid_i, fp._ready_o, fp.data_i = fifo.we, fifo.writable, fifo.din + m.d.comb += fp._connect_in(self.p, fn=processfn) + + # next: make the FIFO (Queue object) "look" like a NextControl... + m.submodules.fn = fn = NextControl() + fn.valid_o, fn.ready_i, fn.data_o = fifo.readable, fifo.re, fifo.dout + connections = fn._connect_out(self.n, fn=nmoperator.cat) + valid_eq, ready_eq, data_o = connections + + # ok ok so we can't just do the ready/valid eqs straight: + # first 2 from connections are the ready/valid, 3rd is data. + if self.fwft: + m.d.comb += [valid_eq, ready_eq] # combinatorial on next ready/valid + else: + m.d.sync += [valid_eq, ready_eq] # non-fwft mode needs sync + data_o = self._postprocess(data_o) # XXX TBD, does nothing right now + m.d.comb += data_o + + return m + + +# aka "RegStage". +class UnbufferedPipeline(FIFOControl): + def __init__(self, stage, in_multi=None, stage_ctl=False): + FIFOControl.__init__(self, 1, stage, in_multi, stage_ctl, + fwft=True, pipe=False) + +# aka "BreakReadyStage" XXX had to set fwft=True to get it to work +class PassThroughHandshake(FIFOControl): + def __init__(self, stage, in_multi=None, stage_ctl=False): + FIFOControl.__init__(self, 1, stage, in_multi, stage_ctl, + fwft=True, pipe=True) + +# this is *probably* BufferedHandshake, although test #997 now succeeds. +class BufferedHandshake(FIFOControl): + def __init__(self, stage, in_multi=None, stage_ctl=False): + FIFOControl.__init__(self, 2, stage, in_multi, stage_ctl, + fwft=True, pipe=False) + + +""" +# this is *probably* SimpleHandshake (note: memory cell size=0) +class SimpleHandshake(FIFOControl): + def __init__(self, stage, in_multi=None, stage_ctl=False): + FIFOControl.__init__(self, 0, stage, in_multi, stage_ctl, + fwft=True, pipe=False) +""" diff --git a/src/nmutil/stageapi.py b/src/nmutil/stageapi.py new file mode 100644 index 0000000..b709abd --- /dev/null +++ b/src/nmutil/stageapi.py @@ -0,0 +1,280 @@ +""" Stage API + + Associated development bugs: + * http://bugs.libre-riscv.org/show_bug.cgi?id=148 + * http://bugs.libre-riscv.org/show_bug.cgi?id=64 + * http://bugs.libre-riscv.org/show_bug.cgi?id=57 + + Stage API: + --------- + + stage requires compliance with a strict API that may be + implemented in several means, including as a static class. + + Stages do not HOLD data, and they definitely do not contain + signalling (ready/valid). They do however specify the FORMAT + of the incoming and outgoing data, and they provide a means to + PROCESS that data (from incoming format to outgoing format). + + Stage Blocks really should be combinatorial blocks (Moore FSMs). + It would be ok to have input come in from sync'd sources + (clock-driven, Mealy FSMs) however by doing so they would no longer + be deterministic, and chaining such blocks with such side-effects + together could result in unexpected, unpredictable, unreproduceable + behaviour. + + So generally to be avoided, then unless you know what you are doing. + https://en.wikipedia.org/wiki/Moore_machine + https://en.wikipedia.org/wiki/Mealy_machine + + the methods of a stage instance must be as follows: + + * ispec() - Input data format specification. Takes a bit of explaining. + The requirements are: something that eventually derives from + nmigen Value must be returned *OR* an iterator or iterable + or sequence (list, tuple etc.) or generator must *yield* + thing(s) that (eventually) derive from the nmigen Value class. + + Complex to state, very simple in practice: + see test_buf_pipe.py for over 25 worked examples. + + * ospec() - Output data format specification. + format requirements identical to ispec. + + * process(m, i) - Optional function for processing ispec-formatted data. + returns a combinatorial block of a result that + may be assigned to the output, by way of the "nmoperator.eq" + function. Note that what is returned here can be + extremely flexible. Even a dictionary can be returned + as long as it has fields that match precisely with the + Record into which its values is intended to be assigned. + Again: see example unit tests for details. + + * setup(m, i) - Optional function for setting up submodules. + may be used for more complex stages, to link + the input (i) to submodules. must take responsibility + for adding those submodules to the module (m). + the submodules must be combinatorial blocks and + must have their inputs and output linked combinatorially. + + Both StageCls (for use with non-static classes) and Stage (for use + by static classes) are abstract classes from which, for convenience + and as a courtesy to other developers, anything conforming to the + Stage API may *choose* to derive. See Liskov Substitution Principle: + https://en.wikipedia.org/wiki/Liskov_substitution_principle + + StageChain: + ---------- + + A useful combinatorial wrapper around stages that chains them together + and then presents a Stage-API-conformant interface. By presenting + the same API as the stages it wraps, it can clearly be used recursively. + + StageHelper: + ---------- + + A convenience wrapper around a Stage-API-compliant "thing" which + complies with the Stage API and provides mandatory versions of + all the optional bits. +""" + +from nmigen import Elaboratable +from abc import ABCMeta, abstractmethod +import inspect + +from nmutil import nmoperator + + +def _spec(fn, name=None): + """ useful function that determines if "fn" has an argument "name". + if so, fn(name) is called otherwise fn() is called. + + means that ispec and ospec can be declared with *or without* + a name argument. normally it would be necessary to have + "ispec(name=None)" to achieve the same effect. + """ + if name is None: + return fn() + varnames = dict(inspect.getmembers(fn.__code__))['co_varnames'] + if 'name' in varnames: + return fn(name=name) + return fn() + + +class StageCls(metaclass=ABCMeta): + """ Class-based "Stage" API. requires instantiation (after derivation) + + see "Stage API" above.. Note: python does *not* require derivation + from this class. All that is required is that the pipelines *have* + the functions listed in this class. Derivation from this class + is therefore merely a "courtesy" to maintainers. + """ + @abstractmethod + def ispec(self): pass # REQUIRED + @abstractmethod + def ospec(self): pass # REQUIRED + #@abstractmethod + #def setup(self, m, i): pass # OPTIONAL + #@abstractmethod + #def process(self, i): pass # OPTIONAL + + +class Stage(metaclass=ABCMeta): + """ Static "Stage" API. does not require instantiation (after derivation) + + see "Stage API" above. Note: python does *not* require derivation + from this class. All that is required is that the pipelines *have* + the functions listed in this class. Derivation from this class + is therefore merely a "courtesy" to maintainers. + """ + @staticmethod + @abstractmethod + def ispec(): pass + + @staticmethod + @abstractmethod + def ospec(): pass + + #@staticmethod + #@abstractmethod + #def setup(m, i): pass + + #@staticmethod + #@abstractmethod + #def process(i): pass + + +class StageHelper(Stage): + """ a convenience wrapper around something that is Stage-API-compliant. + (that "something" may be a static class, for example). + + StageHelper happens to also be compliant with the Stage API, + it differs from the stage that it wraps in that all the "optional" + functions are provided (hence the designation "convenience wrapper") + """ + def __init__(self, stage): + self.stage = stage + self._ispecfn = None + self._ospecfn = None + if stage is not None: + self.set_specs(self, self) + + def ospec(self, name=None): + assert self._ospecfn is not None + return _spec(self._ospecfn, name) + + def ispec(self, name=None): + assert self._ispecfn is not None + return _spec(self._ispecfn, name) + + def set_specs(self, p, n): + """ sets up the ispecfn and ospecfn for getting input and output data + """ + if hasattr(p, "stage"): + p = p.stage + if hasattr(n, "stage"): + n = n.stage + self._ispecfn = p.ispec + self._ospecfn = n.ospec + + def new_specs(self, name): + """ allocates new ispec and ospec pair + """ + return (_spec(self.ispec, "%s_i" % name), + _spec(self.ospec, "%s_o" % name)) + + def process(self, i): + if self.stage and hasattr(self.stage, "process"): + return self.stage.process(i) + return i + + def setup(self, m, i): + if self.stage is not None and hasattr(self.stage, "setup"): + self.stage.setup(m, i) + + def _postprocess(self, i): # XXX DISABLED + return i # RETURNS INPUT + if hasattr(self.stage, "postprocess"): + return self.stage.postprocess(i) + return i + + +class StageChain(StageHelper): + """ pass in a list of stages (combinatorial blocks), and they will + automatically be chained together via their input and output specs + into a combinatorial chain, to create one giant combinatorial + block. + + the end result conforms to the exact same Stage API. + + * input to this class will be the input of the first stage + * output of first stage goes into input of second + * output of second goes into input into third + * ... (etc. etc.) + * the output of this class will be the output of the last stage + + NOTE: whilst this is very similar to ControlBase.connect(), it is + *really* important to appreciate that StageChain is pure + combinatorial and bypasses (does not involve, at all, ready/valid + signalling OF ANY KIND). + + ControlBase.connect on the other hand respects, connects, and uses + ready/valid signalling. + + Arguments: + + * :chain: a chain of combinatorial blocks conforming to the Stage API + NOTE: StageChain.ispec and ospect have to have something + to return (beginning and end specs of the chain), + therefore the chain argument must be non-zero length + + * :specallocate: if set, new input and output data will be allocated + and connected (eq'd) to each chained Stage. + in some cases if this is not done, the nmigen warning + "driving from two sources, module is being flattened" + will be issued. + + NOTE: DO NOT use StageChain with combinatorial blocks that have + side-effects (state-based / clock-based input) or conditional + (inter-chain) dependencies, unless you really know what you are doing. + """ + def __init__(self, chain, specallocate=False): + assert len(chain) > 0, "stage chain must be non-zero length" + self.chain = chain + StageHelper.__init__(self, None) + if specallocate: + self.setup = self._sa_setup + else: + self.setup = self._na_setup + self.set_specs(self.chain[0], self.chain[-1]) + + def _sa_setup(self, m, i): + for (idx, c) in enumerate(self.chain): + if hasattr(c, "setup"): + c.setup(m, i) # stage may have some module stuff + ofn = self.chain[idx].ospec # last assignment survives + cname = 'chainin%d' % idx + o = _spec(ofn, cname) + if isinstance(o, Elaboratable): + setattr(m.submodules, cname, o) + m.d.comb += nmoperator.eq(o, c.process(i)) # process input into "o" + if idx == len(self.chain)-1: + break + ifn = self.chain[idx+1].ispec # new input on next loop + i = _spec(ifn, 'chainin%d' % (idx+1)) + m.d.comb += nmoperator.eq(i, o) # assign to next input + self.o = o + return self.o # last loop is the output + + def _na_setup(self, m, i): + for (idx, c) in enumerate(self.chain): + if hasattr(c, "setup"): + c.setup(m, i) # stage may have some module stuff + i = o = c.process(i) # store input into "o" + self.o = o + return self.o # last loop is the output + + def process(self, i): + return self.o # conform to Stage API: return last-loop output + + diff --git a/src/nmutil/test/__init__.py b/src/nmutil/test/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/nmutil/test/example_buf_pipe.py b/src/nmutil/test/example_buf_pipe.py new file mode 100644 index 0000000..61e9b13 --- /dev/null +++ b/src/nmutil/test/example_buf_pipe.py @@ -0,0 +1,103 @@ +""" Pipeline and BufferedHandshake examples +""" + +from nmutil.nmoperator import eq +from nmutil.iocontrol import (PrevControl, NextControl) +from nmutil.singlepipe import (PrevControl, NextControl, ControlBase, + StageCls, Stage, StageChain, + BufferedHandshake, UnbufferedPipeline) + +from nmigen import Signal, Module +from nmigen.cli import verilog, rtlil + + +class ExampleAddStage(StageCls): + """ an example of how to use the buffered pipeline, as a class instance + """ + + def ispec(self): + """ returns a tuple of input signals which will be the incoming data + """ + return (Signal(16), Signal(16)) + + def ospec(self): + """ returns an output signal which will happen to contain the sum + of the two inputs + """ + return Signal(16) + + def process(self, i): + """ process the input data (sums the values in the tuple) and returns it + """ + return i[0] + i[1] + + +class ExampleBufPipeAdd(BufferedHandshake): + """ an example of how to use the buffered pipeline, using a class instance + """ + + def __init__(self): + addstage = ExampleAddStage() + BufferedHandshake.__init__(self, addstage) + + +class ExampleStage(Stage): + """ an example of how to use the buffered pipeline, in a static class + fashion + """ + + def ispec(): + return Signal(16, name="example_input_signal") + + def ospec(): + return Signal(16, name="example_output_signal") + + def process(i): + """ process the input data and returns it (adds 1) + """ + return i + 1 + + +class ExampleStageCls(StageCls): + """ an example of how to use the buffered pipeline, in a static class + fashion + """ + + def ispec(self): + return Signal(16, name="example_input_signal") + + def ospec(self): + return Signal(16, name="example_output_signal") + + def process(self, i): + """ process the input data and returns it (adds 1) + """ + return i + 1 + + +class ExampleBufPipe(BufferedHandshake): + """ an example of how to use the buffered pipeline. + """ + + def __init__(self): + BufferedHandshake.__init__(self, ExampleStage) + + +class ExamplePipeline(UnbufferedPipeline): + """ an example of how to use the unbuffered pipeline. + """ + + def __init__(self): + UnbufferedPipeline.__init__(self, ExampleStage) + + +if __name__ == '__main__': + dut = ExampleBufPipe() + vl = rtlil.convert(dut, ports=dut.ports()) + with open("test_bufpipe.il", "w") as f: + f.write(vl) + + dut = ExamplePipeline() + vl = rtlil.convert(dut, ports=dut.ports()) + with open("test_combpipe.il", "w") as f: + f.write(vl) diff --git a/src/nmutil/test/test_buf_pipe.py b/src/nmutil/test/test_buf_pipe.py new file mode 100644 index 0000000..f0bacbb --- /dev/null +++ b/src/nmutil/test/test_buf_pipe.py @@ -0,0 +1,1563 @@ +""" Unit tests for Buffered and Unbuffered pipelines + + contains useful worked examples of how to use the Pipeline API, + including: + + * Combinatorial Stage "Chaining" + * class-based data stages + * nmigen module-based data stages + * special nmigen module-based data stage, where the stage *is* the module + * Record-based data stages + * static-class data stages + * multi-stage pipelines (and how to connect them) + * how to *use* the pipelines (see Test5) - how to get data in and out + +""" + +from nmigen import Module, Signal, Mux, Const, Elaboratable +from nmigen.hdl.rec import Record +from nmigen.compat.sim import run_simulation +from nmigen.cli import verilog, rtlil + +from nmutil.test.example_buf_pipe import ExampleBufPipe, ExampleBufPipeAdd +from nmutil.test.example_buf_pipe import ExamplePipeline, UnbufferedPipeline +from nmutil.test.example_buf_pipe import ExampleStageCls +from nmutil.iocontrol import PrevControl, NextControl +from nmutil.stageapi import StageChain, StageCls +from nmutil.singlepipe import ControlBase +from nmutil.singlepipe import UnbufferedPipeline2 +from nmutil.singlepipe import SimpleHandshake +from nmutil.singlepipe import BufferedHandshake +from nmutil.singlepipe import PassThroughHandshake +from nmutil.singlepipe import PassThroughStage +from nmutil.singlepipe import FIFOControl +from nmutil.singlepipe import RecordObject +from nmutil.singlepipe import MaskCancellable + +from random import randint, seed + +#seed(4) + + +def check_o_n_valid(dut, val): + o_n_valid = yield dut.n.valid_o + assert o_n_valid == val + +def check_o_n_valid2(dut, val): + o_n_valid = yield dut.n.valid_o + assert o_n_valid == val + + +def tbench(dut): + #yield dut.i_p_rst.eq(1) + yield dut.n.ready_i.eq(0) + #yield dut.p.ready_o.eq(0) + yield + yield + #yield dut.i_p_rst.eq(0) + yield dut.n.ready_i.eq(1) + yield dut.p.data_i.eq(5) + yield dut.p.valid_i.eq(1) + yield + + yield dut.p.data_i.eq(7) + yield from check_o_n_valid(dut, 0) # effects of i_p_valid delayed + yield + yield from check_o_n_valid(dut, 1) # ok *now* i_p_valid effect is felt + + yield dut.p.data_i.eq(2) + yield + yield dut.n.ready_i.eq(0) # begin going into "stall" (next stage says ready) + yield dut.p.data_i.eq(9) + yield + yield dut.p.valid_i.eq(0) + yield dut.p.data_i.eq(12) + yield + yield dut.p.data_i.eq(32) + yield dut.n.ready_i.eq(1) + yield + yield from check_o_n_valid(dut, 1) # buffer still needs to output + yield + yield from check_o_n_valid(dut, 1) # buffer still needs to output + yield + yield from check_o_n_valid(dut, 0) # buffer outputted, *now* we're done. + yield + + +def tbench2(dut): + #yield dut.p.i_rst.eq(1) + yield dut.n.ready_i.eq(0) + #yield dut.p.ready_o.eq(0) + yield + yield + #yield dut.p.i_rst.eq(0) + yield dut.n.ready_i.eq(1) + yield dut.p.data_i.eq(5) + yield dut.p.valid_i.eq(1) + yield + + yield dut.p.data_i.eq(7) + yield from check_o_n_valid2(dut, 0) # effects of i_p_valid delayed 2 clocks + yield + yield from check_o_n_valid2(dut, 0) # effects of i_p_valid delayed 2 clocks + + yield dut.p.data_i.eq(2) + yield + yield from check_o_n_valid2(dut, 1) # ok *now* i_p_valid effect is felt + yield dut.n.ready_i.eq(0) # begin going into "stall" (next stage says ready) + yield dut.p.data_i.eq(9) + yield + yield dut.p.valid_i.eq(0) + yield dut.p.data_i.eq(12) + yield + yield dut.p.data_i.eq(32) + yield dut.n.ready_i.eq(1) + yield + yield from check_o_n_valid2(dut, 1) # buffer still needs to output + yield + yield from check_o_n_valid2(dut, 1) # buffer still needs to output + yield + yield from check_o_n_valid2(dut, 1) # buffer still needs to output + yield + yield from check_o_n_valid2(dut, 0) # buffer outputted, *now* we're done. + yield + yield + yield + + +class Test3: + def __init__(self, dut, resultfn): + self.dut = dut + self.resultfn = resultfn + self.data = [] + for i in range(num_tests): + #data.append(randint(0, 1<<16-1)) + self.data.append(i+1) + self.i = 0 + self.o = 0 + + def send(self): + while self.o != len(self.data): + send_range = randint(0, 3) + for j in range(randint(1,10)): + if send_range == 0: + send = True + else: + send = randint(0, send_range) != 0 + o_p_ready = yield self.dut.p.ready_o + if not o_p_ready: + yield + continue + if send and self.i != len(self.data): + yield self.dut.p.valid_i.eq(1) + yield self.dut.p.data_i.eq(self.data[self.i]) + self.i += 1 + else: + yield self.dut.p.valid_i.eq(0) + yield + + def rcv(self): + while self.o != len(self.data): + stall_range = randint(0, 3) + for j in range(randint(1,10)): + stall = randint(0, stall_range) != 0 + yield self.dut.n.ready_i.eq(stall) + yield + o_n_valid = yield self.dut.n.valid_o + i_n_ready = yield self.dut.n.ready_i_test + if not o_n_valid or not i_n_ready: + continue + data_o = yield self.dut.n.data_o + self.resultfn(data_o, self.data[self.o], self.i, self.o) + self.o += 1 + if self.o == len(self.data): + break + +def resultfn_3(data_o, expected, i, o): + assert data_o == expected + 1, \ + "%d-%d data %x not match %x\n" \ + % (i, o, data_o, expected) + +def data_placeholder(): + data = [] + for i in range(num_tests): + d = PlaceHolder() + d.src1 = randint(0, 1<<16-1) + d.src2 = randint(0, 1<<16-1) + data.append(d) + return data + +def data_dict(): + data = [] + for i in range(num_tests): + data.append({'src1': randint(0, 1<<16-1), + 'src2': randint(0, 1<<16-1)}) + return data + + +class Test5: + def __init__(self, dut, resultfn, data=None, stage_ctl=False): + self.dut = dut + self.resultfn = resultfn + self.stage_ctl = stage_ctl + if data: + self.data = data + else: + self.data = [] + for i in range(num_tests): + self.data.append((randint(0, 1<<16-1), randint(0, 1<<16-1))) + self.i = 0 + self.o = 0 + + def send(self): + while self.o != len(self.data): + send_range = randint(0, 3) + for j in range(randint(1,10)): + if send_range == 0: + send = True + else: + send = randint(0, send_range) != 0 + #send = True + o_p_ready = yield self.dut.p.ready_o + if not o_p_ready: + yield + continue + if send and self.i != len(self.data): + yield self.dut.p.valid_i.eq(1) + for v in self.dut.set_input(self.data[self.i]): + yield v + self.i += 1 + else: + yield self.dut.p.valid_i.eq(0) + yield + + def rcv(self): + while self.o != len(self.data): + stall_range = randint(0, 3) + for j in range(randint(1,10)): + ready = randint(0, stall_range) != 0 + #ready = True + yield self.dut.n.ready_i.eq(ready) + yield + o_n_valid = yield self.dut.n.valid_o + i_n_ready = yield self.dut.n.ready_i_test + if not o_n_valid or not i_n_ready: + continue + if isinstance(self.dut.n.data_o, Record): + data_o = {} + dod = self.dut.n.data_o + for k, v in dod.fields.items(): + data_o[k] = yield v + else: + data_o = yield self.dut.n.data_o + self.resultfn(data_o, self.data[self.o], self.i, self.o) + self.o += 1 + if self.o == len(self.data): + break + +class TestMask: + def __init__(self, dut, resultfn, maskwid, data=None, stage_ctl=False, + latching=False): + self.dut = dut + self.resultfn = resultfn + self.stage_ctl = stage_ctl + self.maskwid = maskwid + self.latching = latching + self.latchmode = 0 + if data: + self.data = data + else: + self.data = [] + for i in range(num_tests): + self.data.append((randint(0, 1<<16-1), randint(0, 1<<16-1))) + self.i = 0 + self.o = 0 + + def send(self): + while self.o != len(self.data): + send_range = randint(0, 3) + for j in range(randint(1,10)): + if send_range == 0: + send = True + else: + send = randint(0, send_range) != 0 + #send = True + o_p_ready = yield self.dut.p.ready_o + if not o_p_ready: + yield + continue + + if self.latching: + latchtest = randint(0, 3) == 0 + if latchtest: + yield self.dut.p.valid_i.eq(0) + yield self.dut.p.mask_i.eq(0) + # wait for pipeline to flush, then invert state + for i in range(10): + yield + self.latchmode = 1 - self.latchmode + yield self.dut.latchmode.eq(self.latchmode) + mode = yield self.dut.latchmode + print ("latching", mode) + + if send and self.i != len(self.data): + print ("send", self.i, self.data[self.i]) + yield self.dut.p.valid_i.eq(1) + yield self.dut.p.mask_i.eq(1<