start converting hardfloat-verilog fmac to nmigen
authorLuke Kenneth Casson Leighton <lkcl@lkcl.net>
Sat, 10 Aug 2019 06:29:09 +0000 (07:29 +0100)
committerLuke Kenneth Casson Leighton <lkcl@lkcl.net>
Sat, 10 Aug 2019 06:29:09 +0000 (07:29 +0100)
src/ieee754/fpdiv/mulAddRecFN.py [new file with mode: 0644]
src/nmutil/multipipe.py
src/nmutil/test/test_inout_feedback_pipe.py

diff --git a/src/ieee754/fpdiv/mulAddRecFN.py b/src/ieee754/fpdiv/mulAddRecFN.py
new file mode 100644 (file)
index 0000000..f355fa4
--- /dev/null
@@ -0,0 +1,527 @@
+"""
+/*============================================================================
+
+This Verilog source file is part of the Berkeley HardFloat IEEE Floating-Point
+Arithmetic Package, Release 1, by John R. Hauser.
+
+Copyright 2019 The Regents of the University of California.  All rights
+reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+    this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice,
+    this list of conditions, and the following disclaimer in the documentation
+    and/or other materials provided with the distribution.
+
+ 3. Neither the name of the University nor the names of its contributors may
+    be used to endorse or promote products derived from this software without
+    specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
+DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+=============================================================================*/
+
+`include "HardFloat_consts.vi"
+`include "HardFloat_specialize.vi"
+
+"""
+
+from nmigen import Elaboratable, Cat, Const, Mux, Module, Signal
+from nmutil.concurrentunit import num_bits
+
+#/*----------------------------------------------------------------------------
+#*----------------------------------------------------------------------------*/
+
+class mulAddRecFNToRaw_preMul(Elaboratable):
+    def __init__(self, expWidth=3, sigWidth=3):
+        # inputs
+        self.control = Signal(floatControlWidth, reset_less=True)
+        self.op = Signal(2, reset_less=True)
+        self.a = Signal(expWidth + sigWidth + 1, reset_less=True)
+        self.b = Signal(expWidth + sigWidth + 1, reset_less=True)
+        self.c = Signal(expWidth + sigWidth + 1, reset_less=True)
+        self.roundingMode = Signal(3, reset_less=True)
+
+        # outputs
+        self.mulAddA = Signal(sigWidth, reset_less=True)
+        self.mulAddB = Signal(sigWidth, reset_less=True)
+        self.mulAddC = Signal(sigWidth*2, reset_less=True)
+        self.intermed_compactState = Signal(6, reset_less=True)
+        self.intermed_sExp = Signal(expWidth + 2, reset_less=True)
+        wid = num_bits(sigWidth + 1)
+        self.intermed_CDom_CAlignDist = Signal(wid, reset_less=True)
+        self.intermed_highAlignedSigC = Signal((sigWidth + 2), reset_less=True)
+
+    def elaborate(self, platform):
+        m = Module()
+        comb = m.d.comb
+
+        #/*-------------------------------------------------------------------
+        #*--------------------------------------------------------------------*/
+        prodWidth = sigWidth*2;
+        sigSumWidth = sigWidth + prodWidth + 3;
+        #/*-------------------------------------------------------------------
+        #*-------------------------------------------------------------------*/
+        isNaNA = Signal(reset_less=True)
+        isInfA = Signal(reset_less=True)
+        isZeroA = Signal(reset_less=True)
+        signA = Signal(reset_less=True)
+
+        sExpA = Signal((expWidth + 2, True), reset_less=True)
+        sigA = Signal(sigWidth+1, reset_less=True)
+        m.submodules.recFNToRawFN_a = rf = recFNToRawFN(expWidth, sigWidth)
+        comb += [(a, isNaNA, isInfA, isZeroA, signA, sExpA, sigA)]
+
+        isSigNaNA = Signal(reset_less=True)
+        m.submodules.isSigNaN_a = nan_a = isSigNaNRecFN(expWidth, sigWidth)
+        comb += [(a, isSigNaNA)]
+
+        isNaNB = Signal(reset_less=True)
+        isInfB = Signal(reset_less=True)
+        isZeroB = Signal(reset_less=True)
+        signB = Signal(reset_less=True)
+
+        sExpB = Signal((expWidth + 2, True), reset_less=True)
+        sigB = Signal(sigWidth+1, reset_less=True)
+        m.submodules.recFNToRawFN_b = rf = recFNToRawFN(expWidth, sigWidth)
+        comb += [(b, isNaNB, isInfB, isZeroB, signB, sExpB, sigB)]
+
+        isSigNaNB = Signal(reset_less=True)
+        m.submodules.isSigNaN_b = nan_b = isSigNaNRecFN(expWidth, sigWidth)
+        comb += [(b, isSigNaNB)]
+
+        isNaNC = Signal(reset_less=True)
+        isInfC = Signal(reset_less=True)
+        isZeroC = Signal(reset_less=True)
+        signC = Signal(reset_less=True)
+
+        sExpC = Signal((expWidth + 2, True), reset_less=True)
+        sigC = Signal(sigWidth+1, reset_less=True)
+        m.submodules.recFNToRawFN_c = rf = recFNToRawFN(expWidth, sigWidth)
+        comb += [(c, isNaNC, isInfC, isZeroC, signC, sExpC, sigC)]
+
+        isSigNaNC = Signal(reset_less=True)
+        m.submodules.isSigNaN_c = nan_c = isSigNaNRecFN(expWidth, sigWidth)
+        comb += [(c, isSigNaNC)]
+
+        #/*-------------------------------------------------------------------
+        #*-------------------------------------------------------------------*/
+        signProd = Signal(reset_less=True)
+        sExpAlignedProd = Signal((expWidth + 3, True), reset_less=True)
+        doSubMags = Signal(reset_less=True)
+        opSignC = Signal(reset_less=True)
+        roundingMode_min = Signal(reset_less=True)
+
+        comb += signProd.eq(signA ^ signB ^ op[1])
+        comb += sExpAlignedProd.eq(sExpA + sExpB + \
+                                    (-(1<<expWidth) + sigWidth + 3))
+        comb += doSubMags.eq(signProd ^ signC ^ op[0])
+        comb += opSignC.eq(signProd ^ doSubMags)
+        comb += roundingMode_min.eq(roundingMode == ROUND_MIN)
+
+        #/*-------------------------------------------------------------------
+        #*-------------------------------------------------------------------*/
+        sNatCAlignDist = Signal((expWidth + 3, True), reset_less=True)
+        posNatCAlignDist = Signal(expWidth + 2, reset_less=True)
+        isMinCAlign = Signal(reset_less=True)
+        CIsDominant = Signal(reset_less=True)
+        sExpSum = Signal((expWidth + 2, True), reset_less=True)
+        CAlignDist = Signal(num_bits(sigSumWidth), reset_less=True)
+        extComplSigC = Signal((sigSumWidth + 3, True), reset_less=True)
+        mainAlignedSigC = Signal(sigSumWidth + 2, reset_less=True)
+
+        CGrainAlign = (sigSumWidth - sigWidth - 1) & 3;
+        grainAlignedSigC = Signal(sigWidth+CGrainAlign + 1, reset_less=True)
+        reduced4SigC = Signal((sigWidth+CGrainAlign)/4 + 1, reset_less=True)
+        m.submodules.compressBy4_sigC = compressBy4(sigWidth + 1 + CGrainAlign)
+        comb += (grainAlignedSigC, reduced4SigC)
+        CExtraMaskHiBound = (sigSumWidth - 1)/4;
+        CExtraMaskLoBound = (sigSumWidth - sigWidth - 1)/4;
+        CExtraMask = Signal(CExtraMaskHiBound - CExtraMaskLoBound,
+                            reset_less=True)
+        m.submodules.lowMask_CExtraMask = lowMaskHiLo(clog2(sigSumWidth) - 2,
+                                                      CExtraMaskHiBound, 
+                                                      CExtraMaskLoBound))
+        comb += (CAlignDist[(clog2(sigSumWidth) - 1):2], CExtraMask);
+        reduced4CExtra = Signal(reset_less=True)
+        alignedSigC = Signal(sigSumWidth, reset_less=True)
+
+        comb += [
+            sNatCAlignDist.eq(sExpAlignedProd - sExpC),
+            posNatCAlignDist.eq(sNatCAlignDist[:expWidth + 2]),
+            isMinCAlign.eq(isZeroA | isZeroB | (sNatCAlignDist < 0))
+            CIsDominant.eq(~isZeroC & \
+                           (isMinCAlign | (posNatCAlignDist <= sigWidth)))
+            sExpSum.eq(Mux(CIsDominant, sExpC, sExpAlignedProd - sigWidth)),
+            CAlignDist.eq(Mux(isMinCAlign, 0,
+                              Mux((posNatCAlignDist < sigSumWidth - 1),
+                                  posNatCAlignDist[:num_bits(sigSumWidth)],
+                                  sigSumWidth - 1))
+            # XXX check! {doSubMags ? ~sigC : sigC,
+            #            {(sigSumWidth - sigWidth + 2){doSubMags}}};
+            extComplSigC.eq(Cat((sigSumWidth - sigWidth + 2){doSubMags}},
+                                Mux(doSubMags, ~sigC, sigC)))
+            # XXX check!  nmigen doesn't have >>> operator, only >>
+            mainAlignedSigC.eq(extComplSigC >>> CAlignDist)
+        localparam CGrainAlign = (sigSumWidth - sigWidth - 1) & 3;
+        wire [(sigWidth + CGrainAlign):0] grainAlignedSigC = sigC<<CGrainAlign;
+        wire [(sigWidth + CGrainAlign)/4:0] reduced4SigC;
+        compressBy4#(sigWidth + 1 + CGrainAlign)
+            compressBy4_sigC(grainAlignedSigC, reduced4SigC);
+        localparam CExtraMaskHiBound = (sigSumWidth - 1)/4;
+        localparam CExtraMaskLoBound = (sigSumWidth - sigWidth - 1)/4;
+        wire [(CExtraMaskHiBound - CExtraMaskLoBound - 1):0] CExtraMask;
+        lowMaskHiLo#(clog2(sigSumWidth) - 2, CExtraMaskHiBound, CExtraMaskLoBound)
+            lowMask_CExtraMask(CAlignDist[(clog2(sigSumWidth) - 1):2], CExtraMask);
+        wire reduced4CExtra = |(reduced4SigC & CExtraMask);
+        wire [(sigSumWidth - 1):0] alignedSigC =
+            {mainAlignedSigC>>3,
+             doSubMags ? (&mainAlignedSigC[2:0]) && !reduced4CExtra
+                 : (|mainAlignedSigC[2:0]) || reduced4CExtra};
+        /*------------------------------------------------------------------------
+        *------------------------------------------------------------------------*/
+        wire isNaNAOrB = isNaNA || isNaNB;
+        wire isNaNAny = isNaNAOrB || isNaNC;
+        wire isInfAOrB = isInfA || isInfB;
+        wire invalidProd = (isInfA && isZeroB) || (isZeroA && isInfB);
+        wire notSigNaN_invalidExc =
+            invalidProd || (!isNaNAOrB && isInfAOrB && isInfC && doSubMags);
+        wire invalidExc =
+            isSigNaNA || isSigNaNB || isSigNaNC || notSigNaN_invalidExc;
+        wire notNaN_addZeros = (isZeroA || isZeroB) && isZeroC;
+        wire specialCase = isNaNAny || isInfAOrB || isInfC || notNaN_addZeros;
+        wire specialNotNaN_signOut =
+            (isInfAOrB && signProd) || (isInfC && opSignC)
+                || (notNaN_addZeros && !roundingMode_min && signProd && opSignC)
+                || (notNaN_addZeros && roundingMode_min && (signProd || opSignC));
+    `ifdef HardFloat_propagateNaNPayloads
+        wire signNaN;
+        wire [(sigWidth - 2):0] fractNaN;
+        propagateFloatNaN_mulAdd#(sigWidth)
+            propagateNaN(
+                control,
+                op,
+                isNaNA,
+                signA,
+                sigA[(sigWidth - 2):0],
+                isNaNB,
+                signB,
+                sigB[(sigWidth - 2):0],
+                invalidProd,
+                isNaNC,
+                signC,
+                sigC[(sigWidth - 2):0],
+                signNaN,
+                fractNaN
+            );
+        wire isNaNOut = isNaNAny || notSigNaN_invalidExc;
+        wire special_signOut =
+            isNaNAny || notSigNaN_invalidExc ? signNaN : specialNotNaN_signOut;
+    `else
+        wire special_signOut = specialNotNaN_signOut;
+    `endif
+        /*------------------------------------------------------------------------
+        *------------------------------------------------------------------------*/
+        assign mulAddA = sigA;
+        assign mulAddB = sigB;
+        assign mulAddC = alignedSigC[prodWidth:1];
+        assign intermed_compactState =
+            {specialCase,
+             invalidExc          || (!specialCase && signProd      ),
+    `ifdef HardFloat_propagateNaNPayloads
+             isNaNOut            || (!specialCase && doSubMags     ),
+    `else
+             isNaNAny            || (!specialCase && doSubMags     ),
+    `endif
+             isInfAOrB || isInfC || (!specialCase && CIsDominant   ),
+             notNaN_addZeros     || (!specialCase && alignedSigC[0]),
+             special_signOut};
+        assign intermed_sExp = sExpSum;
+        assign intermed_CDom_CAlignDist = CAlignDist[(clog2(sigWidth + 1) - 1):0];
+        assign intermed_highAlignedSigC =
+    `ifdef HardFloat_propagateNaNPayloads
+             isNaNOut ? fractNaN :
+    `endif
+              alignedSigC[(sigSumWidth - 1):(prodWidth + 1)];
+
+
+/*----------------------------------------------------------------------------
+*----------------------------------------------------------------------------*/
+
+module
+    mulAddRecFNToRaw_postMul#(parameter expWidth = 3, parameter sigWidth = 3) (
+        intermed_compactState,
+        intermed_sExp,
+        intermed_CDom_CAlignDist,
+        intermed_highAlignedSigC,
+        mulAddResult,
+        roundingMode,
+        invalidExc,
+        out_isNaN,
+        out_isInf,
+        out_isZero,
+        out_sign,
+        out_sExp,
+        out_sig
+    );
+`include "HardFloat_localFuncs.vi"
+    input [5:0] intermed_compactState;
+    input signed [(expWidth + 1):0] intermed_sExp;
+    input [(clog2(sigWidth + 1) - 1):0] intermed_CDom_CAlignDist;
+    input [(sigWidth + 1):0] intermed_highAlignedSigC;
+    input [sigWidth*2:0] mulAddResult;
+    input [2:0] roundingMode;
+    output invalidExc;
+    output out_isNaN;
+    output out_isInf;
+    output out_isZero;
+    output out_sign;
+    output signed [(expWidth + 1):0] out_sExp;
+    output [(sigWidth + 2):0] out_sig;
+
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    localparam prodWidth = sigWidth*2;
+    localparam sigSumWidth = sigWidth + prodWidth + 3;
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    wire specialCase     = intermed_compactState[5];
+    assign invalidExc    = specialCase && intermed_compactState[4];
+    assign out_isNaN     = specialCase && intermed_compactState[3];
+    assign out_isInf     = specialCase && intermed_compactState[2];
+    wire notNaN_addZeros = specialCase && intermed_compactState[1];
+    wire signProd        = intermed_compactState[4];
+    wire doSubMags       = intermed_compactState[3];
+    wire CIsDominant     = intermed_compactState[2];
+    wire bit0AlignedSigC = intermed_compactState[1];
+    wire special_signOut = intermed_compactState[0];
+`ifdef HardFloat_propagateNaNPayloads
+    wire [(sigWidth - 2):0] fractNaN = intermed_highAlignedSigC;
+`endif
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    wire opSignC = signProd ^ doSubMags;
+    wire [(sigWidth + 1):0] incHighAlignedSigC = intermed_highAlignedSigC + 1;
+    wire [(sigSumWidth - 1):0] sigSum =
+        {mulAddResult[prodWidth] ? incHighAlignedSigC
+             : intermed_highAlignedSigC,
+         mulAddResult[(prodWidth - 1):0],
+         bit0AlignedSigC};
+    wire roundingMode_min = (roundingMode == `round_min);
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    wire CDom_sign = opSignC;
+    wire signed [(expWidth + 1):0] CDom_sExp = intermed_sExp - doSubMags;
+    wire [(sigWidth*2 + 1):0] CDom_absSigSum =
+        doSubMags ? ~sigSum[(sigSumWidth - 1):(sigWidth + 1)]
+            : {1'b0, intermed_highAlignedSigC[(sigWidth + 1):sigWidth],
+                   sigSum[(sigSumWidth - 3):(sigWidth + 2)]};
+    wire CDom_absSigSumExtra =
+        doSubMags ? !(&sigSum[sigWidth:1]) : |sigSum[(sigWidth + 1):1];
+    wire [(sigWidth + 4):0] CDom_mainSig =
+        (CDom_absSigSum<<intermed_CDom_CAlignDist)>>(sigWidth - 3);
+    wire [((sigWidth | 3) - 1):0] CDom_grainAlignedLowSig =
+        CDom_absSigSum[(sigWidth - 1):0]<<(~sigWidth & 3);
+    wire [sigWidth/4:0] CDom_reduced4LowSig;
+    compressBy4#(sigWidth | 3)
+        compressBy4_CDom_absSigSum(
+            CDom_grainAlignedLowSig, CDom_reduced4LowSig);
+    wire [(sigWidth/4 - 1):0] CDom_sigExtraMask;
+    lowMaskLoHi#(clog2(sigWidth + 1) - 2, 0, sigWidth/4)
+        lowMask_CDom_sigExtraMask(
+            intermed_CDom_CAlignDist[(clog2(sigWidth + 1) - 1):2],
+            CDom_sigExtraMask
+        );
+    wire CDom_reduced4SigExtra = |(CDom_reduced4LowSig & CDom_sigExtraMask);
+    wire [(sigWidth + 2):0] CDom_sig =
+        {CDom_mainSig>>3,
+         (|CDom_mainSig[2:0]) || CDom_reduced4SigExtra || CDom_absSigSumExtra};
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    wire notCDom_signSigSum = sigSum[prodWidth + 3];
+    wire [(prodWidth + 2):0] notCDom_absSigSum =
+        notCDom_signSigSum ? ~sigSum[(prodWidth + 2):0]
+            : sigSum[(prodWidth + 2):0] + doSubMags;
+    wire [(prodWidth + 2)/2:0] notCDom_reduced2AbsSigSum;
+    compressBy2#(prodWidth + 3)
+        compressBy2_notCDom_absSigSum(
+            notCDom_absSigSum, notCDom_reduced2AbsSigSum);
+    wire [(clog2(prodWidth + 4) - 2):0] notCDom_normDistReduced2;
+    countLeadingZeros#((prodWidth + 2)/2 + 1, clog2(prodWidth + 4) - 1)
+        countLeadingZeros_notCDom(
+            notCDom_reduced2AbsSigSum, notCDom_normDistReduced2);
+    wire [(clog2(prodWidth + 4) - 1):0] notCDom_nearNormDist =
+        notCDom_normDistReduced2<<1;
+    wire signed [(expWidth + 1):0] notCDom_sExp =
+        intermed_sExp - notCDom_nearNormDist;
+    wire [(sigWidth + 4):0] notCDom_mainSig =
+        ({1'b0, notCDom_absSigSum}<<notCDom_nearNormDist)>>(sigWidth - 1);
+    wire [(((sigWidth/2 + 1) | 1) - 1):0] CDom_grainAlignedLowReduced2Sig =
+        notCDom_reduced2AbsSigSum[sigWidth/2:0]<<((sigWidth/2) & 1);
+    wire [(sigWidth + 2)/4:0] notCDom_reduced4AbsSigSum;
+    compressBy2#((sigWidth/2 + 1) | 1)
+        compressBy2_notCDom_reduced2AbsSigSum(
+            CDom_grainAlignedLowReduced2Sig, notCDom_reduced4AbsSigSum);
+    wire [((sigWidth + 2)/4 - 1):0] notCDom_sigExtraMask;
+    lowMaskLoHi#(clog2(prodWidth + 4) - 2, 0, (sigWidth + 2)/4)
+        lowMask_notCDom_sigExtraMask(
+            notCDom_normDistReduced2[(clog2(prodWidth + 4) - 2):1],
+            notCDom_sigExtraMask
+        );
+    wire notCDom_reduced4SigExtra =
+        |(notCDom_reduced4AbsSigSum & notCDom_sigExtraMask);
+    wire [(sigWidth + 2):0] notCDom_sig =
+        {notCDom_mainSig>>3,
+         (|notCDom_mainSig[2:0]) || notCDom_reduced4SigExtra};
+    wire notCDom_completeCancellation =
+        (notCDom_sig[(sigWidth + 2):(sigWidth + 1)] == 0);
+    wire notCDom_sign =
+        notCDom_completeCancellation ? roundingMode_min
+            : signProd ^ notCDom_signSigSum;
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    assign out_isZero =
+        notNaN_addZeros || (!CIsDominant && notCDom_completeCancellation);
+    assign out_sign =
+           ( specialCase                 && special_signOut)
+        || (!specialCase &&  CIsDominant && CDom_sign      )
+        || (!specialCase && !CIsDominant && notCDom_sign   );
+    assign out_sExp = CIsDominant ? CDom_sExp : notCDom_sExp;
+`ifdef HardFloat_propagateNaNPayloads
+    assign out_sig =
+        out_isNaN ? {1'b1, fractNaN, 2'b00}
+            : CIsDominant ? CDom_sig : notCDom_sig;
+`else
+    assign out_sig = CIsDominant ? CDom_sig : notCDom_sig;
+`endif
+
+endmodule
+
+/*----------------------------------------------------------------------------
+*----------------------------------------------------------------------------*/
+
+module
+    mulAddRecFNToRaw#(parameter expWidth = 3, parameter sigWidth = 3) (
+        input [(`floatControlWidth - 1):0] control,
+        input [1:0] op,
+        input [(expWidth + sigWidth):0] a,
+        input [(expWidth + sigWidth):0] b,
+        input [(expWidth + sigWidth):0] c,
+        input [2:0] roundingMode,
+        output invalidExc,
+        output out_isNaN,
+        output out_isInf,
+        output out_isZero,
+        output out_sign,
+        output signed [(expWidth + 1):0] out_sExp,
+        output [(sigWidth + 2):0] out_sig
+    );
+`include "HardFloat_localFuncs.vi"
+
+    wire [(sigWidth - 1):0] mulAddA, mulAddB;
+    wire [(sigWidth*2 - 1):0] mulAddC;
+    wire [5:0] intermed_compactState;
+    wire signed [(expWidth + 1):0] intermed_sExp;
+    wire [(clog2(sigWidth + 1) - 1):0] intermed_CDom_CAlignDist;
+    wire [(sigWidth + 1):0] intermed_highAlignedSigC;
+    mulAddRecFNToRaw_preMul#(expWidth, sigWidth)
+        mulAddToRaw_preMul(
+            control,
+            op,
+            a,
+            b,
+            c,
+            roundingMode,
+            mulAddA,
+            mulAddB,
+            mulAddC,
+            intermed_compactState,
+            intermed_sExp,
+            intermed_CDom_CAlignDist,
+            intermed_highAlignedSigC
+        );
+    wire [sigWidth*2:0] mulAddResult = mulAddA * mulAddB + mulAddC;
+    mulAddRecFNToRaw_postMul#(expWidth, sigWidth)
+        mulAddToRaw_postMul(
+            intermed_compactState,
+            intermed_sExp,
+            intermed_CDom_CAlignDist,
+            intermed_highAlignedSigC,
+            mulAddResult,
+            roundingMode,
+            invalidExc,
+            out_isNaN,
+            out_isInf,
+            out_isZero,
+            out_sign,
+            out_sExp,
+            out_sig
+        );
+
+endmodule
+
+/*----------------------------------------------------------------------------
+*----------------------------------------------------------------------------*/
+
+module
+    mulAddRecFN#(parameter expWidth = 3, parameter sigWidth = 3) (
+        input [(`floatControlWidth - 1):0] control,
+        input [1:0] op,
+        input [(expWidth + sigWidth):0] a,
+        input [(expWidth + sigWidth):0] b,
+        input [(expWidth + sigWidth):0] c,
+        input [2:0] roundingMode,
+        output [(expWidth + sigWidth):0] out,
+        output [4:0] exceptionFlags
+    );
+
+    wire invalidExc, out_isNaN, out_isInf, out_isZero, out_sign;
+    wire signed [(expWidth + 1):0] out_sExp;
+    wire [(sigWidth + 2):0] out_sig;
+    mulAddRecFNToRaw#(expWidth, sigWidth)
+        mulAddRecFNToRaw(
+            control,
+            op,
+            a,
+            b,
+            c,
+            roundingMode,
+            invalidExc,
+            out_isNaN,
+            out_isInf,
+            out_isZero,
+            out_sign,
+            out_sExp,
+            out_sig
+        );
+    roundRawFNToRecFN#(expWidth, sigWidth, 0)
+        roundRawOut(
+            control,
+            invalidExc,
+            1'b0,
+            out_isNaN,
+            out_isInf,
+            out_isZero,
+            out_sign,
+            out_sExp,
+            out_sig,
+            roundingMode,
+            out,
+            exceptionFlags
+        );
+
+endmodule
+
index a5655c35cc904de1370a0b3e0cec565d6651707e..725afc16552bba44a8f597c680c7c84ae6465541 100644 (file)
@@ -114,7 +114,7 @@ class MultiOutControlBase(Elaboratable):
             nmaskwid = maskwid * n_len # fan-out mode
 
         # set up input and output IO ACK (prev/next ready/valid)
-        self.p = PrevControl(in_multi, maskwid=nmaskwid) 
+        self.p = PrevControl(in_multi, maskwid=nmaskwid)
         n = []
         for i in range(n_len):
             n.append(NextControl(maskwid=maskwid))
@@ -226,8 +226,8 @@ class CombMultiOutPipeline(MultiOutControlBase):
         if self.maskwid:
             if self.routemask: # straight "routing" mode - treat like data
                 m.d.comb += self.n[muxid].stop_o.eq(self.p.stop_i)
-                #with m.If(pv):
-                m.d.comb += self.n[muxid].mask_o.eq(self.p.mask_i)
+                with m.If(pv):
+                    m.d.comb += self.n[muxid].mask_o.eq(self.p.mask_i)
             else:
                 ml = [] # accumulate output masks
                 ms = [] # accumulate output stops
@@ -311,7 +311,10 @@ class CombMultiInPipeline(MultiInControlBase):
             m.d.comb += n_ready_in[i].eq(1)
             m.d.comb += p_valid_i[i].eq(0)
             m.d.comb += self.p[i].ready_o.eq(0)
-        m.d.comb += p_valid_i[mid].eq(self.p_mux.active)
+        p = self.p[mid]
+        maskedout = Signal(reset_less=True)
+        m.d.comb += maskedout.eq(p.mask_i & ~p.stop_i)
+        m.d.comb += p_valid_i[mid].eq(maskedout & self.p_mux.active)
         m.d.comb += self.p[mid].ready_o.eq(~data_valid[mid] | self.n.ready_i)
         m.d.comb += n_ready_in[mid].eq(nirn & data_valid[mid])
         anyvalid = Signal(i, reset_less=True)
@@ -321,19 +324,21 @@ class CombMultiInPipeline(MultiInControlBase):
         anyvalid = Cat(*av)
         m.d.comb += self.n.valid_o.eq(anyvalid.bool())
         m.d.comb += data_valid[mid].eq(p_valid_i[mid] | \
-                                    (n_ready_in[mid] & data_valid[mid]))
+                                    (n_ready_in[mid] ))
 
         if self.routemask:
+            # XXX hack - fixes loop
+            m.d.comb += eq(self.n.stop_o, self.p[0].stop_i)
             for i in range(p_len):
                 p = self.p[i]
                 vr = Signal(reset_less=True)
                 maskedout = Signal(reset_less=True)
                 m.d.comb += maskedout.eq(p.mask_i & ~p.stop_i)
                 m.d.comb += vr.eq(maskedout.bool() & p.valid_i & p.ready_o)
+                #m.d.comb += vr.eq(p.valid_i & p.ready_o)
                 with m.If(vr):
                     m.d.comb += eq(self.n.mask_o, self.p[i].mask_i)
                     m.d.comb += eq(r_data[i], self.p[i].data_i)
-                    m.d.comb += eq(self.n.stop_o, self.p[i].stop_i)
         else:
             ml = [] # accumulate output masks
             ms = [] # accumulate output stops
index aea582dd69d39121d5f60c8fdf167dcbe6a21dc6..8cb29801a03c972d02605e3b8fea194245945fe2 100644 (file)
@@ -276,15 +276,23 @@ def test1():
     with open("test_inoutmux_feedback_pipe.il", "w") as f:
         f.write(vl)
 
-    tlen = 3
+    return
+
+    tlen = 100
 
     test = InputTest(dut, tlen)
-    run_simulation(dut, [test.rcv(0), test.rcv(1),
+    run_simulation(dut, [test.rcv(0), #test.rcv(1),
                          #test.rcv(3), test.rcv(2),
-                         test.send(0), test.send(1),
+                         test.send(0), #test.send(1),
                          #test.send(3), test.send(2),
                         ],
                    vcd_name="test_inoutmux_feedback_pipe.vcd")
 
+
 if __name__ == '__main__':
+    #from cProfile import Profile
+    #p = Profile()
+    #p.enable()
     test1()
+    #p.disable()
+    #p.print_stats()