split out normtopack to separate module

[ieee754fpu.git] / src / add / nmigen_add_experiment.py
diff --git a/src/add/nmigen_add_experiment.py b/src/add/nmigen_add_experiment.py

index c96d5a18293473c6be11de692a99c056ea02ab6b..f828a67ae0142d16884d34552ce7b9e87c031c65 100644 (file)
--- a/src/add/nmigen_add_experiment.py
+++ b/src/add/nmigen_add_experiment.py
@@ -9,287 +9,21 @@ from math import log
  
  from fpbase import FPNumIn, FPNumOut, FPOp, Overflow, FPBase, FPNumBase
  from fpbase import MultiShiftRMerge, Trigger
  
  from fpbase import FPNumIn, FPNumOut, FPOp, Overflow, FPBase, FPNumBase
  from fpbase import MultiShiftRMerge, Trigger
-from singlepipe import (ControlBase, StageChain, UnbufferedPipeline)
+from singlepipe import (ControlBase, StageChain, UnbufferedPipeline,
+                        PassThroughStage)
  from multipipe import CombMuxOutPipe
  from multipipe import PriorityCombMuxInPipe
  
  from multipipe import CombMuxOutPipe
  from multipipe import PriorityCombMuxInPipe
  
-#from fpbase import FPNumShiftMultiRight
-
-
-class FPState(FPBase):
-    def __init__(self, state_from):
-        self.state_from = state_from
-
-    def set_inputs(self, inputs):
-        self.inputs = inputs
-        for k,v in inputs.items():
-            setattr(self, k, v)
-
-    def set_outputs(self, outputs):
-        self.outputs = outputs
-        for k,v in outputs.items():
-            setattr(self, k, v)
-
-
-class FPGetSyncOpsMod:
-    def __init__(self, width, num_ops=2):
-        self.width = width
-        self.num_ops = num_ops
-        inops = []
-        outops = []
-        for i in range(num_ops):
-            inops.append(Signal(width, reset_less=True))
-            outops.append(Signal(width, reset_less=True))
-        self.in_op = inops
-        self.out_op = outops
-        self.stb = Signal(num_ops)
-        self.ack = Signal()
-        self.ready = Signal(reset_less=True)
-        self.out_decode = Signal(reset_less=True)
-
-    def elaborate(self, platform):
-        m = Module()
-        m.d.comb += self.ready.eq(self.stb == Const(-1, (self.num_ops, False)))
-        m.d.comb += self.out_decode.eq(self.ack & self.ready)
-        with m.If(self.out_decode):
-            for i in range(self.num_ops):
-                m.d.comb += [
-                        self.out_op[i].eq(self.in_op[i]),
-                ]
-        return m
-
-    def ports(self):
-        return self.in_op + self.out_op + [self.stb, self.ack]
-
-
-class FPOps(Trigger):
-    def __init__(self, width, num_ops):
-        Trigger.__init__(self)
-        self.width = width
-        self.num_ops = num_ops
-
-        res = []
-        for i in range(num_ops):
-            res.append(Signal(width))
-        self.v  = Array(res)
-
-    def ports(self):
-        res = []
-        for i in range(self.num_ops):
-            res.append(self.v[i])
-        res.append(self.ack)
-        res.append(self.stb)
-        return res
-
-
-class InputGroup:
-    def __init__(self, width, num_ops=2, num_rows=4):
-        self.width = width
-        self.num_ops = num_ops
-        self.num_rows = num_rows
-        self.mmax = int(log(self.num_rows) / log(2))
-        self.rs = []
-        self.mid = Signal(self.mmax, reset_less=True) # multiplex id
-        for i in range(num_rows):
-            self.rs.append(FPGetSyncOpsMod(width, num_ops))
-        self.rs = Array(self.rs)
-
-        self.out_op = FPOps(width, num_ops)
-
-    def elaborate(self, platform):
-        m = Module()
-
-        pe = PriorityEncoder(self.num_rows)
-        m.submodules.selector = pe
-        m.submodules.out_op = self.out_op
-        m.submodules += self.rs
-
-        # connect priority encoder
-        in_ready = []
-        for i in range(self.num_rows):
-            in_ready.append(self.rs[i].ready)
-        m.d.comb += pe.i.eq(Cat(*in_ready))
-
-        active = Signal(reset_less=True)
-        out_en = Signal(reset_less=True)
-        m.d.comb += active.eq(~pe.n) # encoder active
-        m.d.comb += out_en.eq(active & self.out_op.trigger)
-
-        # encoder active: ack relevant input, record MID, pass output
-        with m.If(out_en):
-            rs = self.rs[pe.o]
-            m.d.sync += self.mid.eq(pe.o)
-            m.d.sync += rs.ack.eq(0)
-            m.d.sync += self.out_op.stb.eq(0)
-            for j in range(self.num_ops):
-                m.d.sync += self.out_op.v[j].eq(rs.out_op[j])
-        with m.Else():
-            m.d.sync += self.out_op.stb.eq(1)
-            # acks all default to zero
-            for i in range(self.num_rows):
-                m.d.sync += self.rs[i].ack.eq(1)
-
-        return m
-
-    def ports(self):
-        res = []
-        for i in range(self.num_rows):
-            inop = self.rs[i]
-            res += inop.in_op + [inop.stb]
-        return self.out_op.ports() + res + [self.mid]
-
-
-class FPGetOpMod:
-    def __init__(self, width):
-        self.in_op = FPOp(width)
-        self.out_op = Signal(width)
-        self.out_decode = Signal(reset_less=True)
-
-    def elaborate(self, platform):
-        m = Module()
-        m.d.comb += self.out_decode.eq((self.in_op.ack) & (self.in_op.stb))
-        m.submodules.get_op_in = self.in_op
-        #m.submodules.get_op_out = self.out_op
-        with m.If(self.out_decode):
-            m.d.comb += [
-                self.out_op.eq(self.in_op.v),
-            ]
-        return m
-
-
-class FPGetOp(FPState):
-    """ gets operand
-    """
-
-    def __init__(self, in_state, out_state, in_op, width):
-        FPState.__init__(self, in_state)
-        self.out_state = out_state
-        self.mod = FPGetOpMod(width)
-        self.in_op = in_op
-        self.out_op = Signal(width)
-        self.out_decode = Signal(reset_less=True)
-
-    def setup(self, m, in_op):
-        """ links module to inputs and outputs
-        """
-        setattr(m.submodules, self.state_from, self.mod)
-        m.d.comb += self.mod.in_op.eq(in_op)
-        m.d.comb += self.out_decode.eq(self.mod.out_decode)
-
-    def action(self, m):
-        with m.If(self.out_decode):
-            m.next = self.out_state
-            m.d.sync += [
-                self.in_op.ack.eq(0),
-                self.out_op.eq(self.mod.out_op)
-            ]
-        with m.Else():
-            m.d.sync += self.in_op.ack.eq(1)
-
-
-class FPNumBase2Ops:
-
-    def __init__(self, width, id_wid, m_extra=True):
-        self.a = FPNumBase(width, m_extra)
-        self.b = FPNumBase(width, m_extra)
-        self.mid = Signal(id_wid, reset_less=True)
-
-    def eq(self, i):
-        return [self.a.eq(i.a), self.b.eq(i.b), self.mid.eq(i.mid)]
-
-    def ports(self):
-        return [self.a, self.b, self.mid]
-
-
-class FPADDBaseData:
-
-    def __init__(self, width, id_wid):
-        self.width = width
-        self.id_wid = id_wid
-        self.a  = Signal(width)
-        self.b  = Signal(width)
-        self.mid = Signal(id_wid, reset_less=True)
-
-    def eq(self, i):
-        return [self.a.eq(i.a), self.b.eq(i.b), self.mid.eq(i.mid)]
-
-    def ports(self):
-        return [self.a, self.b, self.mid]
-
-
-class FPGet2OpMod(Trigger):
-    def __init__(self, width, id_wid):
-        Trigger.__init__(self)
-        self.width = width
-        self.id_wid = id_wid
-        self.i = self.ispec()
-        self.o = self.ospec()
-
-    def ispec(self):
-        return FPADDBaseData(self.width, self.id_wid)
-
-    def ospec(self):
-        return FPADDBaseData(self.width, self.id_wid)
-
-    def process(self, i):
-        return self.o
-
-    def elaborate(self, platform):
-        m = Trigger.elaborate(self, platform)
-        with m.If(self.trigger):
-            m.d.comb += [
-                self.o.eq(self.i),
-            ]
-        return m
-
-
-class FPGet2Op(FPState):
-    """ gets operands
-    """
-
-    def __init__(self, in_state, out_state, width, id_wid):
-        FPState.__init__(self, in_state)
-        self.out_state = out_state
-        self.mod = FPGet2OpMod(width, id_wid)
-        self.o = self.mod.ospec()
-        self.in_stb = Signal(reset_less=True)
-        self.out_ack = Signal(reset_less=True)
-        self.out_decode = Signal(reset_less=True)
-
-    def setup(self, m, i, in_stb, in_ack):
-        """ links module to inputs and outputs
-        """
-        m.submodules.get_ops = self.mod
-        m.d.comb += self.mod.i.eq(i)
-        m.d.comb += self.mod.stb.eq(in_stb)
-        m.d.comb += self.out_ack.eq(self.mod.ack)
-        m.d.comb += self.out_decode.eq(self.mod.trigger)
-        m.d.comb += in_ack.eq(self.mod.ack)
-
-    def action(self, m):
-        with m.If(self.out_decode):
-            m.next = self.out_state
-            m.d.sync += [
-                self.mod.ack.eq(0),
-                self.o.eq(self.mod.o),
-            ]
-        with m.Else():
-            m.d.sync += self.mod.ack.eq(1)
-
-
-class FPSCData:
-
-    def __init__(self, width, id_wid):
-        self.a = FPNumBase(width, True)
-        self.b = FPNumBase(width, True)
-        self.z = FPNumOut(width, False)
-        self.oz = Signal(width, reset_less=True)
-        self.out_do_z = Signal(reset_less=True)
-        self.mid = Signal(id_wid, reset_less=True)
-
-    def eq(self, i):
-        return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
-                self.a.eq(i.a), self.b.eq(i.b), self.mid.eq(i.mid)]
+from fpbase import FPState, FPID
+from fpcommon.getop import (FPGetOpMod, FPGetOp, FPNumBase2Ops, FPADDBaseData,                              FPGet2OpMod, FPGet2Op)
+from fpcommon.denorm import (FPSCData, FPAddDeNormMod, FPAddDeNorm)
+from fpcommon.postcalc import FPAddStage1Data
+from fpcommon.postnormalise import (FPNorm1Data, FPNorm1ModSingle,
+                            FPNorm1ModMulti, FPNorm1Single, FPNorm1Multi)
+from fpcommon.roundz import (FPRoundData, FPRoundMod, FPRound)
+from fpcommon.corrections import (FPCorrectionsMod, FPCorrections)
+from fpcommon.pack import (FPPackData, FPPackMod, FPPack)
+from fpcommon.normtopack import FPNormToPack
  
  
  class FPAddSpecialCasesMod:
  
  
  class FPAddSpecialCasesMod:
@@ -412,21 +146,6 @@ class FPAddSpecialCasesMod:
          return m
  
  
          return m
  
  
-class FPID:
-    def __init__(self, id_wid):
-        self.id_wid = id_wid
-        if self.id_wid:
-            self.in_mid = Signal(id_wid, reset_less=True)
-            self.out_mid = Signal(id_wid, reset_less=True)
-        else:
-            self.in_mid = None
-            self.out_mid = None
-
-    def idsync(self, m):
-        if self.id_wid is not None:
-            m.d.sync += self.out_mid.eq(self.in_mid)
-
-
  class FPAddSpecialCases(FPState):
      """ special cases: NaNs, infs, zeros, denormalised
          NOTE: some of these are unique to add.  see "Special Operations"
  class FPAddSpecialCases(FPState):
      """ special cases: NaNs, infs, zeros, denormalised
          NOTE: some of these are unique to add.  see "Special Operations"
@@ -462,114 +181,43 @@ class FPAddSpecialCasesDeNorm(FPState, UnbufferedPipeline):
  
      def __init__(self, width, id_wid):
          FPState.__init__(self, "special_cases")
  
      def __init__(self, width, id_wid):
          FPState.__init__(self, "special_cases")
-        self.smod = FPAddSpecialCasesMod(width, id_wid)
-        self.dmod = FPAddDeNormMod(width, id_wid)
+        self.width = width
+        self.id_wid = id_wid
          UnbufferedPipeline.__init__(self, self) # pipe is its own stage
          UnbufferedPipeline.__init__(self, self) # pipe is its own stage
-        self.o = self.ospec()
+        self.out = self.ospec()
  
      def ispec(self):
  
      def ispec(self):
-        return self.smod.ispec()
+        return FPADDBaseData(self.width, self.id_wid) # SpecialCases ispec
  
      def ospec(self):
  
      def ospec(self):
-        return self.dmod.ospec()
+        return FPSCData(self.width, self.id_wid) # DeNorm ospec
  
      def setup(self, m, i):
          """ links module to inputs and outputs
          """
  
      def setup(self, m, i):
          """ links module to inputs and outputs
          """
-        # these only needed for break-out (early-out)
-        # out_z = self.smod.ospec()
-        # out_do_z = Signal(reset_less=True)
-        self.smod.setup(m, i)
-        self.dmod.setup(m, self.smod.o)
-        #m.d.comb += out_do_z.eq(self.smod.o.out_do_z)
+        smod = FPAddSpecialCasesMod(self.width, self.id_wid)
+        dmod = FPAddDeNormMod(self.width, self.id_wid)
+
+        chain = StageChain([smod, dmod])
+        chain.setup(m, i)
  
  
-        # out_do_z=True, only needed for early-out (split pipeline)
-        #m.d.sync += out_z.z.v.eq(self.smod.o.z.v) # only take output
-        #m.d.sync += out_z.mid.eq(self.smod.o.mid)  # (and mid)
+        # only needed for break-out (early-out)
+        # self.out_do_z = smod.o.out_do_z
  
  
-        # out_do_z=False
-        m.d.comb += self.o.eq(self.dmod.o)
+        self.o = dmod.o
  
      def process(self, i):
          return self.o
  
      def action(self, m):
  
      def process(self, i):
          return self.o
  
      def action(self, m):
+        # for break-out (early-out)
          #with m.If(self.out_do_z):
          #    m.next = "put_z"
          #with m.Else():
          #with m.If(self.out_do_z):
          #    m.next = "put_z"
          #with m.Else():
+            m.d.sync += self.out.eq(self.process(None))
              m.next = "align"
  
  
              m.next = "align"
  
  
-class FPAddDeNormMod(FPState):
-
-    def __init__(self, width, id_wid):
-        self.width = width
-        self.id_wid = id_wid
-        self.i = self.ispec()
-        self.o = self.ospec()
-
-    def ispec(self):
-        return FPSCData(self.width, self.id_wid)
-
-    def ospec(self):
-        return FPSCData(self.width, self.id_wid)
-
-    def setup(self, m, i):
-        """ links module to inputs and outputs
-        """
-        m.submodules.denormalise = self
-        m.d.comb += self.i.eq(i)
-
-    def elaborate(self, platform):
-        m = Module()
-        m.submodules.denorm_in_a = self.i.a
-        m.submodules.denorm_in_b = self.i.b
-        m.submodules.denorm_out_a = self.o.a
-        m.submodules.denorm_out_b = self.o.b
-
-        with m.If(~self.i.out_do_z):
-            # XXX hmmm, don't like repeating identical code
-            m.d.comb += self.o.a.eq(self.i.a)
-            with m.If(self.i.a.exp_n127):
-                m.d.comb += self.o.a.e.eq(self.i.a.N126) # limit a exponent
-            with m.Else():
-                m.d.comb += self.o.a.m[-1].eq(1) # set top mantissa bit
-
-            m.d.comb += self.o.b.eq(self.i.b)
-            with m.If(self.i.b.exp_n127):
-                m.d.comb += self.o.b.e.eq(self.i.b.N126) # limit a exponent
-            with m.Else():
-                m.d.comb += self.o.b.m[-1].eq(1) # set top mantissa bit
-
-        m.d.comb += self.o.mid.eq(self.i.mid)
-        m.d.comb += self.o.z.eq(self.i.z)
-        m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
-        m.d.comb += self.o.oz.eq(self.i.oz)
-
-        return m
-
-
-class FPAddDeNorm(FPState):
-
-    def __init__(self, width, id_wid):
-        FPState.__init__(self, "denormalise")
-        self.mod = FPAddDeNormMod(width)
-        self.out_a = FPNumBase(width)
-        self.out_b = FPNumBase(width)
-
-    def setup(self, m, i):
-        """ links module to inputs and outputs
-        """
-        self.mod.setup(m, i)
-
-        m.d.sync += self.out_a.eq(self.mod.out_a)
-        m.d.sync += self.out_b.eq(self.mod.out_b)
-
-    def action(self, m):
-        # Denormalised Number checks
-        m.next = "align"
-
-
  class FPAddAlignMultiMod(FPState):
  
      def __init__(self, width):
  class FPAddAlignMultiMod(FPState):
  
      def __init__(self, width):
@@ -627,8 +275,6 @@ class FPAddAlignMulti(FPState):
          m.submodules.align = self.mod
          m.d.comb += self.mod.in_a.eq(in_a)
          m.d.comb += self.mod.in_b.eq(in_b)
          m.submodules.align = self.mod
          m.d.comb += self.mod.in_a.eq(in_a)
          m.d.comb += self.mod.in_b.eq(in_b)
-        #m.d.comb += self.out_a.eq(self.mod.out_a)
-        #m.d.comb += self.out_b.eq(self.mod.out_b)
          m.d.comb += self.exp_eq.eq(self.mod.exp_eq)
          m.d.sync += self.out_a.eq(self.mod.out_a)
          m.d.sync += self.out_b.eq(self.mod.out_b)
          m.d.comb += self.exp_eq.eq(self.mod.exp_eq)
          m.d.sync += self.out_a.eq(self.mod.out_a)
          m.d.sync += self.out_b.eq(self.mod.out_b)
@@ -780,7 +426,6 @@ class FPAddAlignSingleAdd(FPState, UnbufferedPipeline):
  
      def ispec(self):
          return FPSCData(self.width, self.id_wid)
  
      def ispec(self):
          return FPSCData(self.width, self.id_wid)
-        #return FPNumBase2Ops(self.width, self.id_wid) # AlignSingle ispec
  
      def ospec(self):
          return FPAddStage1Data(self.width, self.id_wid) # AddStage1 ospec
  
      def ospec(self):
          return FPAddStage1Data(self.width, self.id_wid) # AddStage1 ospec
@@ -797,12 +442,13 @@ class FPAddAlignSingleAdd(FPState, UnbufferedPipeline):
          chain = StageChain([mod, a0mod, a1mod])
          chain.setup(m, i)
  
          chain = StageChain([mod, a0mod, a1mod])
          chain.setup(m, i)
  
-        m.d.comb += self.a1o.eq(a1mod.o)
+        self.o = a1mod.o
  
      def process(self, i):
  
      def process(self, i):
-        return self.a1o
+        return self.o
  
      def action(self, m):
  
      def action(self, m):
+        m.d.sync += self.a1o.eq(self.process(None))
          m.next = "normalise_1"
  
  
          m.next = "normalise_1"
  
  
@@ -909,21 +555,6 @@ class FPAddStage0(FPState):
          m.next = "add_1"
  
  
          m.next = "add_1"
  
  
-class FPAddStage1Data:
-
-    def __init__(self, width, id_wid):
-        self.z = FPNumBase(width, False)
-        self.out_do_z = Signal(reset_less=True)
-        self.oz = Signal(width, reset_less=True)
-        self.of = Overflow()
-        self.mid = Signal(id_wid, reset_less=True)
-
-    def eq(self, i):
-        return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
-                self.of.eq(i.of), self.mid.eq(i.mid)]
-
-
-
  class FPAddStage1Mod(FPState):
      """ Second stage of add: preparation for normalisation.
          detects when tot sum is too big (tot[27] is kinda a carry bit)
  class FPAddStage1Mod(FPState):
      """ Second stage of add: preparation for normalisation.
          detects when tot sum is too big (tot[27] is kinda a carry bit)
@@ -954,10 +585,6 @@ class FPAddStage1Mod(FPState):
  
      def elaborate(self, platform):
          m = Module()
  
      def elaborate(self, platform):
          m = Module()
-        #m.submodules.norm1_in_overflow = self.in_of
-        #m.submodules.norm1_out_overflow = self.out_of
-        #m.submodules.norm1_in_z = self.in_z
-        #m.submodules.norm1_out_z = self.out_z
          m.d.comb += self.o.z.eq(self.i.z)
          # tot[-1] (MSB) gets set when the sum overflows. shift result down
          with m.If(~self.i.out_do_z):
          m.d.comb += self.o.z.eq(self.i.z)
          # tot[-1] (MSB) gets set when the sum overflows. shift result down
          with m.If(~self.i.out_do_z):
@@ -1011,629 +638,63 @@ class FPAddStage1(FPState):
          m.next = "normalise_1"
  
  
          m.next = "normalise_1"
  
  
-class FPNormaliseModSingle:
  
  
-    def __init__(self, width):
-        self.width = width
-        self.in_z = self.ispec()
-        self.out_z = self.ospec()
+class FPPutZ(FPState):
  
  
-    def ispec(self):
-        return FPNumBase(self.width, False)
+    def __init__(self, state, in_z, out_z, in_mid, out_mid, to_state=None):
+        FPState.__init__(self, state)
+        if to_state is None:
+            to_state = "get_ops"
+        self.to_state = to_state
+        self.in_z = in_z
+        self.out_z = out_z
+        self.in_mid = in_mid
+        self.out_mid = out_mid
  
  
-    def ospec(self):
-        return FPNumBase(self.width, False)
+    def action(self, m):
+        if self.in_mid is not None:
+            m.d.sync += self.out_mid.eq(self.in_mid)
+        m.d.sync += [
+          self.out_z.z.v.eq(self.in_z)
+        ]
+        with m.If(self.out_z.z.stb & self.out_z.z.ack):
+            m.d.sync += self.out_z.z.stb.eq(0)
+            m.next = self.to_state
+        with m.Else():
+            m.d.sync += self.out_z.z.stb.eq(1)
  
  
-    def setup(self, m, i):
-        """ links module to inputs and outputs
-        """
-        m.submodules.normalise = self
-        m.d.comb += self.i.eq(i)
  
  
-    def elaborate(self, platform):
-        m = Module()
+class FPPutZIdx(FPState):
  
  
-        mwid = self.out_z.m_width+2
-        pe = PriorityEncoder(mwid)
-        m.submodules.norm_pe = pe
+    def __init__(self, state, in_z, out_zs, in_mid, to_state=None):
+        FPState.__init__(self, state)
+        if to_state is None:
+            to_state = "get_ops"
+        self.to_state = to_state
+        self.in_z = in_z
+        self.out_zs = out_zs
+        self.in_mid = in_mid
  
  
-        m.submodules.norm1_out_z = self.out_z
-        m.submodules.norm1_in_z = self.in_z
+    def action(self, m):
+        outz_stb = Signal(reset_less=True)
+        outz_ack = Signal(reset_less=True)
+        m.d.comb += [outz_stb.eq(self.out_zs[self.in_mid].stb),
+                     outz_ack.eq(self.out_zs[self.in_mid].ack),
+                    ]
+        m.d.sync += [
+          self.out_zs[self.in_mid].v.eq(self.in_z.v)
+        ]
+        with m.If(outz_stb & outz_ack):
+            m.d.sync += self.out_zs[self.in_mid].stb.eq(0)
+            m.next = self.to_state
+        with m.Else():
+            m.d.sync += self.out_zs[self.in_mid].stb.eq(1)
  
  
-        in_z = FPNumBase(self.width, False)
-        in_of = Overflow()
-        m.submodules.norm1_insel_z = in_z
-        m.submodules.norm1_insel_overflow = in_of
  
  
-        espec = (len(in_z.e), True)
-        ediff_n126 = Signal(espec, reset_less=True)
-        msr = MultiShiftRMerge(mwid, espec)
-        m.submodules.multishift_r = msr
-
-        m.d.comb += in_z.eq(self.in_z)
-        m.d.comb += in_of.eq(self.in_of)
-        # initialise out from in (overridden below)
-        m.d.comb += self.out_z.eq(in_z)
-        m.d.comb += self.out_of.eq(in_of)
-        # normalisation decrease condition
-        decrease = Signal(reset_less=True)
-        m.d.comb += decrease.eq(in_z.m_msbzero)
-        # decrease exponent
-        with m.If(decrease):
-            # *sigh* not entirely obvious: count leading zeros (clz)
-            # with a PriorityEncoder: to find from the MSB
-            # we reverse the order of the bits.
-            temp_m = Signal(mwid, reset_less=True)
-            temp_s = Signal(mwid+1, reset_less=True)
-            clz = Signal((len(in_z.e), True), reset_less=True)
-            m.d.comb += [
-                # cat round and guard bits back into the mantissa
-                temp_m.eq(Cat(in_of.round_bit, in_of.guard, in_z.m)),
-                pe.i.eq(temp_m[::-1]),          # inverted
-                clz.eq(pe.o),                   # count zeros from MSB down
-                temp_s.eq(temp_m << clz),       # shift mantissa UP
-                self.out_z.e.eq(in_z.e - clz),  # DECREASE exponent
-                self.out_z.m.eq(temp_s[2:]),    # exclude bits 0&1
-            ]
-
-        return m
-
-class FPNorm1Data:
-
-    def __init__(self, width, id_wid):
-        self.roundz = Signal(reset_less=True)
-        self.z = FPNumBase(width, False)
-        self.out_do_z = Signal(reset_less=True)
-        self.oz = Signal(width, reset_less=True)
-        self.mid = Signal(id_wid, reset_less=True)
-
-    def eq(self, i):
-        return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
-                self.roundz.eq(i.roundz), self.mid.eq(i.mid)]
-
-
-class FPNorm1ModSingle:
-
-    def __init__(self, width, id_wid):
-        self.width = width
-        self.id_wid = id_wid
-        self.i = self.ispec()
-        self.o = self.ospec()
-
-    def ispec(self):
-        return FPAddStage1Data(self.width, self.id_wid)
-
-    def ospec(self):
-        return FPNorm1Data(self.width, self.id_wid)
-
-    def setup(self, m, i):
-        """ links module to inputs and outputs
-        """
-        m.submodules.normalise_1 = self
-        m.d.comb += self.i.eq(i)
-
-    def process(self, i):
-        return self.o
-
-    def elaborate(self, platform):
-        m = Module()
-
-        mwid = self.o.z.m_width+2
-        pe = PriorityEncoder(mwid)
-        m.submodules.norm_pe = pe
-
-        of = Overflow()
-        m.d.comb += self.o.roundz.eq(of.roundz)
-
-        m.submodules.norm1_out_z = self.o.z
-        m.submodules.norm1_out_overflow = of
-        m.submodules.norm1_in_z = self.i.z
-        m.submodules.norm1_in_overflow = self.i.of
-
-        i = self.ispec()
-        m.submodules.norm1_insel_z = i.z
-        m.submodules.norm1_insel_overflow = i.of
-
-        espec = (len(i.z.e), True)
-        ediff_n126 = Signal(espec, reset_less=True)
-        msr = MultiShiftRMerge(mwid, espec)
-        m.submodules.multishift_r = msr
-
-        m.d.comb += i.eq(self.i)
-        # initialise out from in (overridden below)
-        m.d.comb += self.o.z.eq(i.z)
-        m.d.comb += of.eq(i.of)
-        # normalisation increase/decrease conditions
-        decrease = Signal(reset_less=True)
-        increase = Signal(reset_less=True)
-        m.d.comb += decrease.eq(i.z.m_msbzero & i.z.exp_gt_n126)
-        m.d.comb += increase.eq(i.z.exp_lt_n126)
-        # decrease exponent
-        with m.If(~self.i.out_do_z):
-            with m.If(decrease):
-                # *sigh* not entirely obvious: count leading zeros (clz)
-                # with a PriorityEncoder: to find from the MSB
-                # we reverse the order of the bits.
-                temp_m = Signal(mwid, reset_less=True)
-                temp_s = Signal(mwid+1, reset_less=True)
-                clz = Signal((len(i.z.e), True), reset_less=True)
-                # make sure that the amount to decrease by does NOT
-                # go below the minimum non-INF/NaN exponent
-                limclz = Mux(i.z.exp_sub_n126 > pe.o, pe.o,
-                             i.z.exp_sub_n126)
-                m.d.comb += [
-                    # cat round and guard bits back into the mantissa
-                    temp_m.eq(Cat(i.of.round_bit, i.of.guard, i.z.m)),
-                    pe.i.eq(temp_m[::-1]),          # inverted
-                    clz.eq(limclz),                 # count zeros from MSB down
-                    temp_s.eq(temp_m << clz),       # shift mantissa UP
-                    self.o.z.e.eq(i.z.e - clz),  # DECREASE exponent
-                    self.o.z.m.eq(temp_s[2:]),    # exclude bits 0&1
-                    of.m0.eq(temp_s[2]),          # copy of mantissa[0]
-                    # overflow in bits 0..1: got shifted too (leave sticky)
-                    of.guard.eq(temp_s[1]),       # guard
-                    of.round_bit.eq(temp_s[0]),   # round
-                ]
-            # increase exponent
-            with m.Elif(increase):
-                temp_m = Signal(mwid+1, reset_less=True)
-                m.d.comb += [
-                    temp_m.eq(Cat(i.of.sticky, i.of.round_bit, i.of.guard,
-                                  i.z.m)),
-                    ediff_n126.eq(i.z.N126 - i.z.e),
-                    # connect multi-shifter to inp/out mantissa (and ediff)
-                    msr.inp.eq(temp_m),
-                    msr.diff.eq(ediff_n126),
-                    self.o.z.m.eq(msr.m[3:]),
-                    of.m0.eq(temp_s[3]),   # copy of mantissa[0]
-                    # overflow in bits 0..1: got shifted too (leave sticky)
-                    of.guard.eq(temp_s[2]),     # guard
-                    of.round_bit.eq(temp_s[1]), # round
-                    of.sticky.eq(temp_s[0]),    # sticky
-                    self.o.z.e.eq(i.z.e + ediff_n126),
-                ]
-
-        m.d.comb += self.o.mid.eq(self.i.mid)
-        m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
-        m.d.comb += self.o.oz.eq(self.i.oz)
-
-        return m
-
-
-class FPNorm1ModMulti:
-
-    def __init__(self, width, single_cycle=True):
-        self.width = width
-        self.in_select = Signal(reset_less=True)
-        self.in_z = FPNumBase(width, False)
-        self.in_of = Overflow()
-        self.temp_z = FPNumBase(width, False)
-        self.temp_of = Overflow()
-        self.out_z = FPNumBase(width, False)
-        self.out_of = Overflow()
-
-    def elaborate(self, platform):
-        m = Module()
-
-        m.submodules.norm1_out_z = self.out_z
-        m.submodules.norm1_out_overflow = self.out_of
-        m.submodules.norm1_temp_z = self.temp_z
-        m.submodules.norm1_temp_of = self.temp_of
-        m.submodules.norm1_in_z = self.in_z
-        m.submodules.norm1_in_overflow = self.in_of
-
-        in_z = FPNumBase(self.width, False)
-        in_of = Overflow()
-        m.submodules.norm1_insel_z = in_z
-        m.submodules.norm1_insel_overflow = in_of
-
-        # select which of temp or in z/of to use
-        with m.If(self.in_select):
-            m.d.comb += in_z.eq(self.in_z)
-            m.d.comb += in_of.eq(self.in_of)
-        with m.Else():
-            m.d.comb += in_z.eq(self.temp_z)
-            m.d.comb += in_of.eq(self.temp_of)
-        # initialise out from in (overridden below)
-        m.d.comb += self.out_z.eq(in_z)
-        m.d.comb += self.out_of.eq(in_of)
-        # normalisation increase/decrease conditions
-        decrease = Signal(reset_less=True)
-        increase = Signal(reset_less=True)
-        m.d.comb += decrease.eq(in_z.m_msbzero & in_z.exp_gt_n126)
-        m.d.comb += increase.eq(in_z.exp_lt_n126)
-        m.d.comb += self.out_norm.eq(decrease | increase) # loop-end
-        # decrease exponent
-        with m.If(decrease):
-            m.d.comb += [
-                self.out_z.e.eq(in_z.e - 1),  # DECREASE exponent
-                self.out_z.m.eq(in_z.m << 1), # shift mantissa UP
-                self.out_z.m[0].eq(in_of.guard), # steal guard (was tot[2])
-                self.out_of.guard.eq(in_of.round_bit), # round (was tot[1])
-                self.out_of.round_bit.eq(0),        # reset round bit
-                self.out_of.m0.eq(in_of.guard),
-            ]
-        # increase exponent
-        with m.Elif(increase):
-            m.d.comb += [
-                self.out_z.e.eq(in_z.e + 1),  # INCREASE exponent
-                self.out_z.m.eq(in_z.m >> 1), # shift mantissa DOWN
-                self.out_of.guard.eq(in_z.m[0]),
-                self.out_of.m0.eq(in_z.m[1]),
-                self.out_of.round_bit.eq(in_of.guard),
-                self.out_of.sticky.eq(in_of.sticky | in_of.round_bit)
-            ]
-
-        return m
-
-
-class FPNorm1Single(FPState):
-
-    def __init__(self, width, id_wid, single_cycle=True):
-        FPState.__init__(self, "normalise_1")
-        self.mod = FPNorm1ModSingle(width)
-        self.o = self.ospec()
-        self.out_z = FPNumBase(width, False)
-        self.out_roundz = Signal(reset_less=True)
-
-    def ispec(self):
-        return self.mod.ispec()
-
-    def ospec(self):
-        return self.mod.ospec()
-
-    def setup(self, m, i):
-        """ links module to inputs and outputs
-        """
-        self.mod.setup(m, i)
-
-    def action(self, m):
-        m.next = "round"
-
-
-class FPNorm1Multi(FPState):
-
-    def __init__(self, width, id_wid):
-        FPState.__init__(self, "normalise_1")
-        self.mod = FPNorm1ModMulti(width)
-        self.stb = Signal(reset_less=True)
-        self.ack = Signal(reset=0, reset_less=True)
-        self.out_norm = Signal(reset_less=True)
-        self.in_accept = Signal(reset_less=True)
-        self.temp_z = FPNumBase(width)
-        self.temp_of = Overflow()
-        self.out_z = FPNumBase(width)
-        self.out_roundz = Signal(reset_less=True)
-
-    def setup(self, m, in_z, in_of, norm_stb):
-        """ links module to inputs and outputs
-        """
-        self.mod.setup(m, in_z, in_of, norm_stb,
-                       self.in_accept, self.temp_z, self.temp_of,
-                       self.out_z, self.out_norm)
-
-        m.d.comb += self.stb.eq(norm_stb)
-        m.d.sync += self.ack.eq(0) # sets to zero when not in normalise_1 state
-
-    def action(self, m):
-        m.d.comb += self.in_accept.eq((~self.ack) & (self.stb))
-        m.d.sync += self.temp_of.eq(self.mod.out_of)
-        m.d.sync += self.temp_z.eq(self.out_z)
-        with m.If(self.out_norm):
-            with m.If(self.in_accept):
-                m.d.sync += [
-                    self.ack.eq(1),
-                ]
-            with m.Else():
-                m.d.sync += self.ack.eq(0)
-        with m.Else():
-            # normalisation not required (or done).
-            m.next = "round"
-            m.d.sync += self.ack.eq(1)
-            m.d.sync += self.out_roundz.eq(self.mod.out_of.roundz)
-
-
-class FPNormToPack(FPState, UnbufferedPipeline):
-
-    def __init__(self, width, id_wid):
-        FPState.__init__(self, "normalise_1")
-        self.id_wid = id_wid
-        self.width = width
-        UnbufferedPipeline.__init__(self, self) # pipeline is its own stage
-
-    def ispec(self):
-        return FPAddStage1Data(self.width, self.id_wid) # Norm1ModSingle ispec
-
-    def ospec(self):
-        return FPPackData(self.width, self.id_wid) # FPPackMod ospec
-
-    def setup(self, m, i):
-        """ links module to inputs and outputs
-        """
-
-        # Normalisation, Rounding Corrections, Pack - in a chain
-        nmod = FPNorm1ModSingle(self.width, self.id_wid)
-        rmod = FPRoundMod(self.width, self.id_wid)
-        cmod = FPCorrectionsMod(self.width, self.id_wid)
-        pmod = FPPackMod(self.width, self.id_wid)
-        chain = StageChain([nmod, rmod, cmod, pmod])
-        chain.setup(m, i)
-        self.out_z = pmod.ospec()
-
-        m.d.comb += self.out_z.mid.eq(pmod.o.mid)
-        m.d.comb += self.out_z.z.eq(pmod.o.z) # outputs packed result
-
-    def process(self, i):
-        return self.out_z
-
-    def action(self, m):
-        m.next = "pack_put_z"
-
-
-class FPRoundData:
-
-    def __init__(self, width, id_wid):
-        self.z = FPNumBase(width, False)
-        self.out_do_z = Signal(reset_less=True)
-        self.oz = Signal(width, reset_less=True)
-        self.mid = Signal(id_wid, reset_less=True)
-
-    def eq(self, i):
-        return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
-                self.mid.eq(i.mid)]
-
-
-class FPRoundMod:
-
-    def __init__(self, width, id_wid):
-        self.width = width
-        self.id_wid = id_wid
-        self.i = self.ispec()
-        self.out_z = self.ospec()
-
-    def ispec(self):
-        return FPNorm1Data(self.width, self.id_wid)
-
-    def ospec(self):
-        return FPRoundData(self.width, self.id_wid)
-
-    def process(self, i):
-        return self.out_z
-
-    def setup(self, m, i):
-        m.submodules.roundz = self
-        m.d.comb += self.i.eq(i)
-
-    def elaborate(self, platform):
-        m = Module()
-        m.d.comb += self.out_z.eq(self.i) # copies mid, z, out_do_z
-        with m.If(~self.i.out_do_z):
-            with m.If(self.i.roundz):
-                m.d.comb += self.out_z.z.m.eq(self.i.z.m + 1) # mantissa up
-                with m.If(self.i.z.m == self.i.z.m1s): # all 1s
-                    m.d.comb += self.out_z.z.e.eq(self.i.z.e + 1) # exponent up
-
-        return m
-
-
-class FPRound(FPState):
-
-    def __init__(self, width, id_wid):
-        FPState.__init__(self, "round")
-        self.mod = FPRoundMod(width)
-        self.out_z = self.ospec()
-
-    def ispec(self):
-        return self.mod.ispec()
-
-    def ospec(self):
-        return self.mod.ospec()
-
-    def setup(self, m, i):
-        """ links module to inputs and outputs
-        """
-        self.mod.setup(m, i)
-
-        self.idsync(m)
-        m.d.sync += self.out_z.eq(self.mod.out_z)
-        m.d.sync += self.out_z.mid.eq(self.mod.o.mid)
-
-    def action(self, m):
-        m.next = "corrections"
-
-
-class FPCorrectionsMod:
-
-    def __init__(self, width, id_wid):
-        self.width = width
-        self.id_wid = id_wid
-        self.i = self.ispec()
-        self.out_z = self.ospec()
-
-    def ispec(self):
-        return FPRoundData(self.width, self.id_wid)
-
-    def ospec(self):
-        return FPRoundData(self.width, self.id_wid)
-
-    def process(self, i):
-        return self.out_z
-
-    def setup(self, m, i):
-        """ links module to inputs and outputs
-        """
-        m.submodules.corrections = self
-        m.d.comb += self.i.eq(i)
-
-    def elaborate(self, platform):
-        m = Module()
-        m.submodules.corr_in_z = self.i.z
-        m.submodules.corr_out_z = self.out_z.z
-        m.d.comb += self.out_z.eq(self.i) # copies mid, z, out_do_z
-        with m.If(~self.i.out_do_z):
-            with m.If(self.i.z.is_denormalised):
-                m.d.comb += self.out_z.z.e.eq(self.i.z.N127)
-        return m
-
-
-class FPCorrections(FPState):
-
-    def __init__(self, width, id_wid):
-        FPState.__init__(self, "corrections")
-        self.mod = FPCorrectionsMod(width)
-        self.out_z = self.ospec()
-
-    def ispec(self):
-        return self.mod.ispec()
-
-    def ospec(self):
-        return self.mod.ospec()
-
-    def setup(self, m, in_z):
-        """ links module to inputs and outputs
-        """
-        self.mod.setup(m, in_z)
-
-        m.d.sync += self.out_z.eq(self.mod.out_z)
-        m.d.sync += self.out_z.mid.eq(self.mod.o.mid)
-
-    def action(self, m):
-        m.next = "pack"
-
-
-class FPPackData:
-
-    def __init__(self, width, id_wid):
-        self.z = Signal(width, reset_less=True)
-        self.mid = Signal(id_wid, reset_less=True)
-
-    def eq(self, i):
-        return [self.z.eq(i.z), self.mid.eq(i.mid)]
-
-    def ports(self):
-        return [self.z, self.mid]
-
-
-class FPPackMod:
-
-    def __init__(self, width, id_wid):
-        self.width = width
-        self.id_wid = id_wid
-        self.i = self.ispec()
-        self.o = self.ospec()
-
-    def ispec(self):
-        return FPRoundData(self.width, self.id_wid)
-
-    def ospec(self):
-        return FPPackData(self.width, self.id_wid)
-
-    def process(self, i):
-        return self.o
-
-    def setup(self, m, in_z):
-        """ links module to inputs and outputs
-        """
-        m.submodules.pack = self
-        m.d.comb += self.i.eq(in_z)
-
-    def elaborate(self, platform):
-        m = Module()
-        z = FPNumOut(self.width, False)
-        m.submodules.pack_in_z = self.i.z
-        m.submodules.pack_out_z = z
-        m.d.comb += self.o.mid.eq(self.i.mid)
-        with m.If(~self.i.out_do_z):
-            with m.If(self.i.z.is_overflowed):
-                m.d.comb += z.inf(self.i.z.s)
-            with m.Else():
-                m.d.comb += z.create(self.i.z.s, self.i.z.e, self.i.z.m)
-        with m.Else():
-            m.d.comb += z.v.eq(self.i.oz)
-        m.d.comb += self.o.z.eq(z.v)
-        return m
-
-
-class FPPack(FPState):
-
-    def __init__(self, width, id_wid):
-        FPState.__init__(self, "pack")
-        self.mod = FPPackMod(width)
-        self.out_z = self.ospec()
-
-    def ispec(self):
-        return self.mod.ispec()
-
-    def ospec(self):
-        return self.mod.ospec()
-
-    def setup(self, m, in_z):
-        """ links module to inputs and outputs
-        """
-        self.mod.setup(m, in_z)
-
-        m.d.sync += self.out_z.v.eq(self.mod.out_z.v)
-        m.d.sync += self.out_z.mid.eq(self.mod.o.mid)
-
-    def action(self, m):
-        m.next = "pack_put_z"
-
-
-class FPPutZ(FPState):
-
-    def __init__(self, state, in_z, out_z, in_mid, out_mid, to_state=None):
-        FPState.__init__(self, state)
-        if to_state is None:
-            to_state = "get_ops"
-        self.to_state = to_state
-        self.in_z = in_z
-        self.out_z = out_z
-        self.in_mid = in_mid
-        self.out_mid = out_mid
-
-    def action(self, m):
-        if self.in_mid is not None:
-            m.d.sync += self.out_mid.eq(self.in_mid)
-        m.d.sync += [
-          self.out_z.z.v.eq(self.in_z)
-        ]
-        with m.If(self.out_z.z.stb & self.out_z.z.ack):
-            m.d.sync += self.out_z.z.stb.eq(0)
-            m.next = self.to_state
-        with m.Else():
-            m.d.sync += self.out_z.z.stb.eq(1)
-
-
-class FPPutZIdx(FPState):
-
-    def __init__(self, state, in_z, out_zs, in_mid, to_state=None):
-        FPState.__init__(self, state)
-        if to_state is None:
-            to_state = "get_ops"
-        self.to_state = to_state
-        self.in_z = in_z
-        self.out_zs = out_zs
-        self.in_mid = in_mid
-
-    def action(self, m):
-        outz_stb = Signal(reset_less=True)
-        outz_ack = Signal(reset_less=True)
-        m.d.comb += [outz_stb.eq(self.out_zs[self.in_mid].stb),
-                     outz_ack.eq(self.out_zs[self.in_mid].ack),
-                    ]
-        m.d.sync += [
-          self.out_zs[self.in_mid].v.eq(self.in_z.v)
-        ]
-        with m.If(outz_stb & outz_ack):
-            m.d.sync += self.out_zs[self.in_mid].stb.eq(0)
-            m.next = self.to_state
-        with m.Else():
-            m.d.sync += self.out_zs[self.in_mid].stb.eq(1)
-
-class FPOpData:
-    def __init__(self, width, id_wid):
-        self.z = FPOp(width)
-        self.mid = Signal(id_wid, reset_less=True)
+class FPOpData:
+    def __init__(self, width, id_wid):
+        self.z = FPOp(width)
+        self.mid = Signal(id_wid, reset_less=True)
  
      def eq(self, i):
          return [self.z.eq(i.z), self.mid.eq(i.mid)]
  
      def eq(self, i):
          return [self.z.eq(i.z), self.mid.eq(i.mid)]
@@ -1696,9 +757,10 @@ class FPADDBaseMod:
  
          get = self.add_state(FPGet2Op("get_ops", "special_cases",
                                        self.width))
  
          get = self.add_state(FPGet2Op("get_ops", "special_cases",
                                        self.width))
-        get.setup(m, self.i, self.in_t.stb, self.in_t.ack)
+        get.setup(m, self.i)
          a = get.out_op1
          b = get.out_op2
          a = get.out_op1
          b = get.out_op2
+        get.trigger_setup(m, self.in_t.stb, self.in_t.ack)
  
          sc = self.add_state(FPAddSpecialCases(self.width, self.id_wid))
          sc.setup(m, a, b, self.in_mid)
  
          sc = self.add_state(FPAddSpecialCases(self.width, self.id_wid))
          sc.setup(m, a, b, self.in_mid)
@@ -1743,18 +805,20 @@ class FPADDBaseMod:
  
      def get_compact_fragment(self, m, platform=None):
  
  
      def get_compact_fragment(self, m, platform=None):
  
-        get = self.add_state(FPGet2Op("get_ops", "special_cases",
-                                      self.width, self.id_wid))
-        get.setup(m, self.i, self.in_t.stb, self.in_t.ack)
  
  
-        sc = self.add_state(FPAddSpecialCasesDeNorm(self.width, self.id_wid))
-        sc.setup(m, get.o)
+        get = FPGet2Op("get_ops", "special_cases", self.width, self.id_wid)
+        sc = FPAddSpecialCasesDeNorm(self.width, self.id_wid)
+        alm = FPAddAlignSingleAdd(self.width, self.id_wid)
+        n1 = FPNormToPack(self.width, self.id_wid)
+
+        get.trigger_setup(m, self.in_t.stb, self.in_t.ack)
  
  
-        alm = self.add_state(FPAddAlignSingleAdd(self.width, self.id_wid))
-        alm.setup(m, sc.o)
+        chainlist = [get, sc, alm, n1]
+        chain = StageChain(chainlist, specallocate=True)
+        chain.setup(m, self.i)
  
  
-        n1 = self.add_state(FPNormToPack(self.width, self.id_wid))
-        n1.setup(m, alm.a1o)
+        for mod in chainlist:
+            sc = self.add_state(mod)
  
          ppz = self.add_state(FPPutZ("pack_put_z", n1.out_z.z, self.o,
                                      n1.out_z.mid, self.o.mid))
  
          ppz = self.add_state(FPPutZ("pack_put_z", n1.out_z.z, self.o,
                                      n1.out_z.mid, self.o.mid))
@@ -1872,59 +936,30 @@ class FPADDBasePipe(ControlBase):
          return m
  
  
          return m
  
  
-class FPAddInPassThruStage:
-    def __init__(self, width, id_wid):
-        self.width, self.id_wid = width, id_wid
-    def ispec(self): return FPADDBaseData(self.width, self.id_wid)
-    def ospec(self): return self.ispec()
-    def process(self, i): return i
-
-
  class FPADDInMuxPipe(PriorityCombMuxInPipe):
  class FPADDInMuxPipe(PriorityCombMuxInPipe):
-    def __init__(self, width, id_width, num_rows):
+    def __init__(self, width, id_wid, num_rows):
          self.num_rows = num_rows
          self.num_rows = num_rows
-        stage = FPAddInPassThruStage(width, id_width)
+        def iospec(): return FPADDBaseData(width, id_wid)
+        stage = PassThroughStage(iospec)
          PriorityCombMuxInPipe.__init__(self, stage, p_len=self.num_rows)
  
          PriorityCombMuxInPipe.__init__(self, stage, p_len=self.num_rows)
  
-    def ports(self):
-        res = []
-        for i in range(len(self.p)):
-            res += [self.p[i].i_valid, self.p[i].o_ready] + \
-                    self.p[i].i_data.ports()
-        res += [self.n.i_ready, self.n.o_valid] + \
-                self.n.o_data.ports()
-        return res
-
-
-
-
-class FPAddOutPassThruStage:
-    def __init__(self, width, id_wid):
-        self.width, self.id_wid = width, id_wid
-    def ispec(self): return FPPackData(self.width, self.id_wid)
-    def ospec(self): return self.ispec()
-    def process(self, i): return i
-
  
  class FPADDMuxOutPipe(CombMuxOutPipe):
      def __init__(self, width, id_wid, num_rows):
          self.num_rows = num_rows
  
  class FPADDMuxOutPipe(CombMuxOutPipe):
      def __init__(self, width, id_wid, num_rows):
          self.num_rows = num_rows
-        stage = FPAddOutPassThruStage(width, id_wid)
+        def iospec(): return FPPackData(width, id_wid)
+        stage = PassThroughStage(iospec)
          CombMuxOutPipe.__init__(self, stage, n_len=self.num_rows)
  
          CombMuxOutPipe.__init__(self, stage, n_len=self.num_rows)
  
-    def ports(self):
-        res = [self.p.i_valid, self.p.o_ready] + \
-                self.p.i_data.ports()
-        for i in range(len(self.n)):
-            res += [self.n[i].i_ready, self.n[i].o_valid] + \
-                    self.n[i].o_data.ports()
-        return res
-
  
  class FPADDMuxInOut:
      """ Reservation-Station version of FPADD pipeline.
  
  
  class FPADDMuxInOut:
      """ Reservation-Station version of FPADD pipeline.
  
-        fan-in on
+        * fan-in on inputs (an array of FPADDBaseData: a,b,mid)
+        * 3-stage adder pipeline
+        * fan-out on outputs (an array of FPPackData: z,mid)
+
+        Fan-in and Fan-out are combinatorial.
      """
      def __init__(self, width, id_wid, num_rows):
          self.num_rows = num_rows
      """
      def __init__(self, width, id_wid, num_rows):
          self.num_rows = num_rows
@@ -1951,39 +986,6 @@ class FPADDMuxInOut:
          return self._ports
  
  
          return self._ports
  
  
-class ResArray:
-    def __init__(self, width, id_wid):
-        self.width = width
-        self.id_wid = id_wid
-        res = []
-        for i in range(rs_sz):
-            out_z = FPOp(width)
-            out_z.name = "out_z_%d" % i
-            res.append(out_z)
-        self.res = Array(res)
-        self.in_z = FPOp(width)
-        self.in_mid = Signal(self.id_wid, reset_less=True)
-
-    def setup(self, m, in_z, in_mid):
-        m.d.comb += [self.in_z.eq(in_z),
-                     self.in_mid.eq(in_mid)]
-
-    def get_fragment(self, platform=None):
-        """ creates the HDL code-fragment for FPAdd
-        """
-        m = Module()
-        m.submodules.res_in_z = self.in_z
-        m.submodules += self.res
-
-        return m
-
-    def ports(self):
-        res = []
-        for z in self.res:
-            res += z.ports()
-        return res
-
-
  class FPADD(FPID):
      """ FPADD: stages as follows:
  
  class FPADD(FPID):
      """ FPADD: stages as follows: