From: Jacob Lifshay Date: Thu, 4 Jun 2020 00:26:04 +0000 (-0700) Subject: move mulAddRecFN.py and nmigen_div_experiment.py to unused dir X-Git-Tag: ls180-24jan2020~55 X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=28eff03dbc3202a06bdfaa54910e89e02fbf1cf5;p=ieee754fpu.git move mulAddRecFN.py and nmigen_div_experiment.py to unused dir --- diff --git a/src/ieee754/fpdiv/mulAddRecFN.py b/src/ieee754/fpdiv/mulAddRecFN.py deleted file mode 100644 index 3432ccc1..00000000 --- a/src/ieee754/fpdiv/mulAddRecFN.py +++ /dev/null @@ -1,574 +0,0 @@ -""" -/*============================================================================ - -This Verilog source file is part of the Berkeley HardFloat IEEE Floating-Point -Arithmetic Package, Release 1, by John R. Hauser. - -Copyright 2019 The Regents of the University of California. All rights -reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - - 1. Redistributions of source code must retain the above copyright notice, - this list of conditions, and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above copyright notice, - this list of conditions, and the following disclaimer in the documentation - and/or other materials provided with the distribution. - - 3. Neither the name of the University nor the names of its contributors may - be used to endorse or promote products derived from this software without - specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY -EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE -DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY -DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES -(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND -ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -=============================================================================*/ - -`include "HardFloat_consts.vi" -`include "HardFloat_specialize.vi" - -""" - -from nmigen import Elaboratable, Cat, Const, Mux, Module, Signal, Repl -from nmutil.concurrentunit import num_bits - -#/*---------------------------------------------------------------------------- -#*----------------------------------------------------------------------------*/ - -class mulAddRecFNToRaw_preMul(Elaboratable): - def __init__(self, expWidth=3, sigWidth=3): - # inputs - self.control = Signal(floatControlWidth, reset_less=True) - self.op = Signal(2, reset_less=True) - self.a = Signal(expWidth + sigWidth + 1, reset_less=True) - self.b = Signal(expWidth + sigWidth + 1, reset_less=True) - self.c = Signal(expWidth + sigWidth + 1, reset_less=True) - self.roundingMode = Signal(3, reset_less=True) - - # outputs - self.mulAddA = Signal(sigWidth, reset_less=True) - self.mulAddB = Signal(sigWidth, reset_less=True) - self.mulAddC = Signal(sigWidth*2, reset_less=True) - self.intermed_compactState = Signal(6, reset_less=True) - self.intermed_sExp = Signal(expWidth + 2, reset_less=True) - wid = num_bits(sigWidth + 1) - self.intermed_CDom_CAlignDist = Signal(wid, reset_less=True) - self.intermed_highAlignedSigC = Signal((sigWidth + 2), reset_less=True) - - def elaborate(self, platform): - m = Module() - comb = m.d.comb - - #/*------------------------------------------------------------------- - #*--------------------------------------------------------------------*/ - prodWidth = sigWidth*2; - sigSumWidth = sigWidth + prodWidth + 3; - #/*------------------------------------------------------------------- - #*-------------------------------------------------------------------*/ - isNaNA = Signal(reset_less=True) - isInfA = Signal(reset_less=True) - isZeroA = Signal(reset_less=True) - signA = Signal(reset_less=True) - - sExpA = Signal((expWidth + 2, True), reset_less=True) - sigA = Signal(sigWidth+1, reset_less=True) - m.submodules.recFNToRawFN_a = rf = recFNToRawFN(expWidth, sigWidth) - comb += [(a, isNaNA, isInfA, isZeroA, signA, sExpA, sigA)] - - isSigNaNA = Signal(reset_less=True) - m.submodules.isSigNaN_a = nan_a = isSigNaNRecFN(expWidth, sigWidth) - comb += [(a, isSigNaNA)] - - isNaNB = Signal(reset_less=True) - isInfB = Signal(reset_less=True) - isZeroB = Signal(reset_less=True) - signB = Signal(reset_less=True) - - sExpB = Signal((expWidth + 2, True), reset_less=True) - sigB = Signal(sigWidth+1, reset_less=True) - m.submodules.recFNToRawFN_b = rf = recFNToRawFN(expWidth, sigWidth) - comb += [(b, isNaNB, isInfB, isZeroB, signB, sExpB, sigB)] - - isSigNaNB = Signal(reset_less=True) - m.submodules.isSigNaN_b = nan_b = isSigNaNRecFN(expWidth, sigWidth) - comb += [(b, isSigNaNB)] - - isNaNC = Signal(reset_less=True) - isInfC = Signal(reset_less=True) - isZeroC = Signal(reset_less=True) - signC = Signal(reset_less=True) - - sExpC = Signal((expWidth + 2, True), reset_less=True) - sigC = Signal(sigWidth+1, reset_less=True) - m.submodules.recFNToRawFN_c = rf = recFNToRawFN(expWidth, sigWidth) - comb += [(c, isNaNC, isInfC, isZeroC, signC, sExpC, sigC)] - - isSigNaNC = Signal(reset_less=True) - m.submodules.isSigNaN_c = nan_c = isSigNaNRecFN(expWidth, sigWidth) - comb += [(c, isSigNaNC)] - - #/*------------------------------------------------------------------- - #*-------------------------------------------------------------------*/ - signProd = Signal(reset_less=True) - sExpAlignedProd = Signal((expWidth + 3, True), reset_less=True) - doSubMags = Signal(reset_less=True) - opSignC = Signal(reset_less=True) - roundingMode_min = Signal(reset_less=True) - - comb += signProd.eq(signA ^ signB ^ op[1]) - comb += sExpAlignedProd.eq(sExpA + sExpB + \ - (-(1<> CAlignDist), - grainAlignedSigC.eq(sigC<>3)), - ] - - #/*------------------------------------------------------------------- - #*-------------------------------------------------------------------*/ - isNaNAOrB = Signal(reset_less=True) - isNaNAny = Signal(reset_less=True) - isInfAOrB = Signal(reset_less=True) - invalidProd = Signal(reset_less=True) - notSigNaN_invalidExc = Signal(reset_less=True) - invalidExc = Signal(reset_less=True) - notNaN_addZeros = Signal(reset_less=True) - specialCase = Signal(reset_less=True) - specialNotNaN_signOut = Signal(reset_less=True) - comb += [ - isNaNAOrB.eq(isNaNA | isNaNB), - isNaNAny.eq(isNaNAOrB | isNaNC), - isInfAOrB.eq(isInfA | isInfB), - invalidProd.eq((isInfA & isZeroB) | (isZeroA & isInfB)), - notSigNaN_invalidExc.eq( - invalidProd | (~isNaNAOrB & isInfAOrB & isInfC & doSubMags)), - invalidExc.eq( - isSigNaNA | isSigNaNB | isSigNaNC | notSigNaN_invalidExc), - notNaN_addZeros.eq((isZeroA | isZeroB) & isZeroC), - specialCase.eq(isNaNAny | isInfAOrB | isInfC | notNaN_addZeros), - specialNotNaN_signOut.eq( - (isInfAOrB & signProd) | (isInfC & opSignC) - | (notNaN_addZeros & ~roundingMode_min & signProd & opSignC) - | (notNaN_addZeros & roundingMode_min & (signProd | opSignC))) - ] - - special_signOut = specialNotNaN_signOut; - #/*------------------------------------------------------------------- - # *-------------------------------------------------------------------*/ - comb += self.mulAddA.eq(sigA) - comb += self.mulAddB.eq(sigB) - comb += self.mulAddC.eq(alignedSigC[1:prodWidth+1]) - comb += self.intermed_compactState.eq(Cat( - special_signOut, - notNaN_addZeros | (~specialCase & alignedSigC[0]), - isInfAOrB | isInfC | (~specialCase & CIsDominant ), - isNaNAny | (~specialCase & doSubMags ), - invalidExc | (~specialCase & signProd ), - specialCase,)) - comb += self.intermed_sExp.eq(sExpSum) - comb += self.intermed_CDom_CAlignDist( - CAlignDist[:clog2(sigWidth + 1)]) - comb += self.intermed_highAlignedSigC.eq( - alignedSigC[(sigSumWidth - 1):(prodWidth + 1)]) - - return m - -#/*------------------------------------------------------------------------ -#*------------------------------------------------------------------------*/ - -class mulAddRecFNToRaw_postMul(Elaboratable): - - def __init__(self, expWidth=3, sigWidth=3): - # inputs - self.intermed_compactState = Signal(6, reset_less=True) - self.intermed_sExp = Signal(expWidth + 2, reset_less=True) - wid = num_bits(sigWidth + 1) - self.intermed_CDom_CAlignDist = Signal(wid, reset_less=True) - self.intermed_highAlignedSigC = Signal((sigWidth + 2), reset_less=True) - self.mulAddResult = Signal(sigWidth*2, reset_less=True) - self.roundingMode = Signal(3, reset_less=True) - - # outputs - self.invalidExc = Signal(reset_less=True) - self.out_isNaN = Signal(reset_less=True) - self.out_isInf = Signal(reset_less=True) - self.out_isZero = Signal(reset_less=True) - self.out_sign = Signal(reset_less=True) - self.out_sExp = Signal((expWidth + 2, True), reset_less=True) - self.out_sig = Signal(sigWidth + 3, reset_less=True) - - def elaborate(self, platform): - m = Module() - comb = m.d.comb - - #/*------------------------------------------------------------------- - #*-------------------------------------------------------------------*/ - prodWidth = sigWidth*2; - sigSumWidth = sigWidth + prodWidth + 3; - - #/*------------------------------------------------------------------- - #*-------------------------------------------------------------------*/ - specialCase = Signal(reset_less=True) - invalidExc = Signal(reset_less=True) - out_isNaN = Signal(reset_less=True) - out_isInf = Signal(reset_less=True) - notNaN_addZeros = Signal(reset_less=True) - signProd = Signal(reset_less=True) - doSubMags = Signal(reset_less=True) - CIsDominant = Signal(reset_less=True) - bit0AlignedSigC = Signal(reset_less=True) - special_signOut = Signal(reset_less=True) - comb += [ - specialCase .eq( intermed_compactState[5] ), - invalidExc .eq( specialCase & intermed_compactState[4] ), - out_isNaN .eq( specialCase & intermed_compactState[3] ), - out_isInf .eq( specialCase & intermed_compactState[2] ), - notNaN_addZeros .eq( specialCase & intermed_compactState[1] ), - signProd .eq( intermed_compactState[4] ), - doSubMags .eq( intermed_compactState[3] ), - CIsDominant .eq( intermed_compactState[2] ), - bit0AlignedSigC .eq( intermed_compactState[1] ), - special_signOut .eq( intermed_compactState[0] ), - ] - - #/*------------------------------------------------------------------- - #*-------------------------------------------------------------------*/ - opSignC = Signal(reset_less=True) - incHighAlignedSigC = Signal(sigWidth + 3, reset_less=True) - sigSum = Signal(sigSumWidth, reset_less=True) - roundingMode_min = Signal(reset_less=True) - - comb += [\ - opSignC.eq(signProd ^ doSubMags), - incHighAlignedSigC.eq(intermed_highAlignedSigC + 1), - sigSum.eq(Cat(bit0AlignedSigC, - mulAddResult[(prodWidth - 1):0], - Mux(mulAddResult[prodWidth], - incHighAlignedSigC, - intermed_highAlignedSigC))), - roundingMode_min.eq(roundingMode == ROUND_MIN), - ] - - #/*------------------------------------------------------------------- - #*-------------------------------------------------------------------*/ - CDom_sign = Signal(reset_less=True) - CDom_sExp = Signal((expWidth + 2, True), reset_less=True) - CDom_absSigSum = Signal(prodWidth+2, reset_less=True) - CDom_absSigSumExtra = Signal(reset_less=True) - CDom_mainSig = Signal(sigWidth+5, reset_less=True) - CDom_grainAlignedLowSig = Signal(sigWidth | 3, reset_less=True) - CDom_reduced4LowSig = Signal(sigWidth/4+1, reset_less=True) - CDom_sigExtraMask = Signal(sigWidth/4, reset_less=True) - - lowMask_CDom_sigExtraMask = lm - m.submodules.lm = lm = lowMaskLoHi(clog2(sigWidth + 1) - 2, 0, - sigWidth/4) - CDom_reduced4SigExtra = Signal(reset_less=True) - CDom_sig = Signal(sigWidth+3, reset_less=True) - - comb += [\ - CDom_sign.eq(opSignC), - CDom_sExp.eq(intermed_sExp - doSubMags), - CDom_absSigSum.eq(Mux(doSubMags, - ~sigSum[sigWidth+1:sigSumWidth], - Cat(sigSum[sigWidth+2 : sigSumWidth - 2], - intermed_highAlignedSigC[(sigWidth + 1):sigWidth], - 0b0))), - CDom_absSigSumExtra.eq(Mux(doSubMags, - ~(sigSum[1:sigWidth+1].all())), - sigSum[1:sigWidth + 2].bool())), - CDom_mainSig.eq( - (CDom_absSigSum<>(sigWidth - 3)), - CDom_grainAlignedLowSig.eq( - CDom_absSigSum[(sigWidth - 1):0]<<(~sigWidth & 3)), - CDom_reduced4LowSig.eq(compressBy4_CDom_absSigSum.out), - compressBy4_CDom_absSigSum.inp.eq(CDom_grainAlignedLowSig), - lowMask_CDom_sigExtraMask.inp.eq( - intermed_CDom_CAlignDist[2:clog2(sigWidth + 1)]), - CDom_sigExtraMask.eq(lowMask_CDom_sigExtraMask.out), - CDom_reduced4SigExtra.eq( - (CDom_reduced4LowSig & CDom_sigExtraMask).bool()), - CDom_sig.eq(Cat((CDom_mainSig[:3]).bool() | - CDom_reduced4SigExtra | - CDom_absSigSumExtra, - CDom_mainSig>>3)), - ] - - #/*------------------------------------------------------------------- - #*-------------------------------------------------------------------*/ - notCDom_signSigSum = Signal(reset_less=True) - notCDom_absSigSum = Signal(prodWidth + 3, reset_less=True) - notCDom_reduced2AbsSigSum = Signal((prodWidth+2)//2+1, reset_less=True) - m.submodules.cb2 = compressBy2_notCDom_absSigSum = \ - compressBy2(prodWidth + 3) - notCDom_normDistReduced2 = Signal(clog2(prodWidth+4) - 1, - reset_less=True) - m.submodules.clz = countLeadingZeros_notCDom = \ - countLeadingZeros((prodWidth + 2)/2 + 1, - clog2(prodWidth + 4) - 1) - notCDom_nearNormDist = Signal(clog2(prodWidth + 4), reset_less=True) - notCDom_sExp = Signal((expWidth + 2, True), reset_less=True) - notCDom_mainSig = Signal(sigWidth + 5, reset_less=True) - sw = (sigWidth/2 + 1) | 1 - CDom_grainAlignedLowReduced2Sig = Signal(sw, reset_less=True) - notCDom_reduced4AbsSigSum = Signal((sigWidth + 2)//4+1, reset_less=True) - m.submodules.cb2r = compressBy2_notCDom_reduced2AbsSigSum = \ - compressBy2(sw) - sw = (sigWidth + 2)//4 - notCDom_sigExtraMask = Signal(sw, reset_less=True) - m.submodules.lms = lowMask_notCDom_sigExtraMask = \ - lowMaskLoHi(clog2(prodWidth + 4) - 2, 0, sw) - notCDom_reduced4SigExtra = Signal(reset_less=True) - notCDom_sig = Signal(sigWidth+3, reset_less=True) - notCDom_completeCancellation = Signal(reset_less=True) - notCDom_sign = Signal(reset_less=True) - - comb += [\ - notCDom_signSigSum.eq(sigSum[prodWidth + 3]), - notCDom_absSigSum.eq(Mux(notCDom_signSigSum, - ~sigSum[:prodWidth + 3], - sigSum[:prodWidth + 3] + doSubMags)), - compressBy2_notCDom_absSigSum.inp.eq(notCDom_absSigSum), - notCDom_reduced2AbsSigSum.eq(compressBy2_notCDom_absSigSum.out), - countLeadingZeros_notCDom.inp.eq(notCDom_reduced2AbsSigSum), - notCDom_normDistReduced2.out.eq(countLeadingZeros_notCDom), - notCDom_nearNormDist.eq(notCDom_normDistReduced2<<1), - notCDom_sExp.eq(intermed_sExp - notCDom_nearNormDist), - notCDom_mainSig.eq((Cat(notCDom_absSigSum, 0)<< - notCDom_nearNormDist)>>(sigWidth - 1)), - CDom_grainAlignedLowReduced2Sig.eq( - notCDom_reduced2AbsSigSum[sigWidth/2:0]<<((sigWidth/2) & 1)), - compressBy2_notCDom_reduced2AbsSigSum.inp.eq( - CDom_grainAlignedLowReduced2Sig), - compressBy2_notCDom_reduced2AbsSigSum.eq( - notCDom_reduced4AbsSigSum.out), - lowMask_notCDom_sigExtraMask.inp.eq( - notCDom_normDistReduced2[1:clog2(prodWidth + 4) - 1]), - notCDom_sigExtraMask.eq(lowMask_notCDom_sigExtraMask.out), - notCDom_reduced4SigExtra.eq( - (notCDom_reduced4AbsSigSum & notCDom_sigExtraMask).bool()), - notCDom_sig.eq(Cat( - notCDom_mainSig[:3].bool() | notCDom_reduced4SigExtra, - notCDom_mainSig>>3)), - notCDom_completeCancellation.eq( - notCDom_sig[(sigWidth + 1):(sigWidth + 3)] == 0), - notCDom_sign.eq(Mux(notCDom_completeCancellation, - roundingMode_min, - signProd ^ notCDom_signSigSum)), - ] - - #/*------------------------------------------------------------------- - #*-------------------------------------------------------------------*/ - comb += [\ - self.out_isZero.eq( notNaN_addZeros | \ - (~CIsDominant & notCDom_completeCancellation)), - out_sign.eq((specialCase & special_signOut) \ - | (~specialCase & CIsDominant & CDom_sign ) \ - | (~specialCase & ~CIsDominant & notCDom_sign )), - out_sExp.eq(Mux(CIsDominant, CDom_sExp, notCDom_sExp)), - out_sig.eq(Mux(CIsDominant, CDom_sig, notCDom_sig)), - ] - - return m - -#/*------------------------------------------------------------------------ -#*------------------------------------------------------------------------*/ - -class mulAddRecFNToRaw(Elaboratable): - def __init__(expWidth=3, sigWidth=3): - self.control = Signal(floatControlWidth, reset_less=True) - self.op = Signal(2, reset_less=True) - self.a = Signal(expWidth + sigWidth + 1, reset_less=True) - self.b = Signal(expWidth + sigWidth + 1, reset_less=True) - self.c = Signal(expWidth + sigWidth + 1, reset_less=True) - self.roundingMode = Signal(3, reset_less=True) - - # output - self.invalidExc = Signal(reset_less=True) - self.out_isNaN = Signal(reset_less=True) - self.out_isInf = Signal(reset_less=True) - self.out_isZero = Signal(reset_less=True) - self.out_sign = Signal(reset_less=True) - self.out_sExp = Signal((expWidth + 2, True), reset_less=True) - self.out_sig = Signal(sigWidth + 3, reset_less=True) - - def elaborate(self, platform): - m = Module() - comb = m.d.comb - - mulAddA = Signal(sigWidth, reset_less=True) - mulAddB = Signal(sigWidth, reset_less=True) - mulAddC = Signal(sigWidth*2, reset_less=True) - intermed_compactState = Signal(6, reset_less=True) - intermed_sExp = Signal(expWidth + 2, reset_less=True) - wid = num_bits(sigWidth + 1) - intermed_CDom_CAlignDist = Signal(wid, reset_less=True) - intermed_highAlignedSigC = Signal((sigWidth + 2), reset_less=True) - - m.submodules.mar = mulAddToRaw_preMul = \ - mulAddRecFNToRaw_preMul(expWidth, sigWidth) - - comb += [\ - mulAddToRaw_preMul.control.eq(self.control), - mulAddToRaw_preMul.op.eq(self.op), - mulAddToRaw_preMul.a.eq(self.a), - mulAddToRaw_preMul.b.eq(self.b), - mulAddToRaw_preMul.roundingMode.eq(self.roundingMode), - mulAddA.eq(mulAddToRaw_preMul.mulAddA), - mulAddB.eq(mulAddToRaw_preMul.mulAddB), - mulAddC.eq(mulAddToRaw_preMul.mulAddC), - intermed_compactState.eq(mulAddToRaw_preMul.intermed_compactState), - intermed_sExp.eq(mulAddToRaw_preMul.intermed_sExp), - intermed_CDom_CAlignDist.eq( - mulAddToRaw_preMul.intermed_CDom_CAlignDist), - intermed_highAlignedSigC.eq( - mulAddToRaw_preMul.intermed_highAlignedSigC), - ] - - mulAddResult = Signal(sigWidth*2+1, reset_less=True) - comb += mulAddResult.eq(mulAddA * mulAddB + mulAddC) - - m.submodules.marp = mulAddToRaw_postMul = \ - mulAddRecFNToRaw_postMul(expWidth, sigWidth) - - comb += [\ - mulAddRecFNToRaw_postMul.intermed_compactState.eq( - intermed_compactState), - mulAddRecFNToRaw_postMul.intermed_sExp.eq(intermed_sExp), - mulAddRecFNToRaw_postMul.intermed_CDom_CAlignDist.eq( - intermed_CDom_CAlignDist), - mulAddRecFNToRaw_postMul.intermed_highAlignedSigC.eq( - intermed_highAlignedSigC), - mulAddRecFNToRaw_postMul.mulAddResult.eq(mulAddResult), - mulAddRecFNToRaw_postMul.roundingMode.eq(roundingMode), - - invalidExc.eq(mulAddRecFNToRaw_postMul.invalidExc), - out_isNaN.eq(mulAddRecFNToRaw_postMul.out_isNaN), - out_isInf.eq(mulAddRecFNToRaw_postMul.out_isInf), - out_isZero.eq(mulAddRecFNToRaw_postMul.out_isZero), - out_sign.eq(mulAddRecFNToRaw_postMul.out_sign), - out_sExp.eq(mulAddRecFNToRaw_postMul.out_sExp), - out_sig.eq(mulAddRecFNToRaw_postMul.out_sig), - ] - - return m - -""" -XXX TODO? - -/*---------------------------------------------------------------------------- -*----------------------------------------------------------------------------*/ - -module - mulAddRecFN#(parameter expWidth = 3, parameter sigWidth = 3) ( - input [(`floatControlWidth - 1):0] control, - input [1:0] op, - input [(expWidth + sigWidth):0] a, - input [(expWidth + sigWidth):0] b, - input [(expWidth + sigWidth):0] c, - input [2:0] roundingMode, - output [(expWidth + sigWidth):0] out, - output [4:0] exceptionFlags - ); - - wire invalidExc, out_isNaN, out_isInf, out_isZero, out_sign; - wire signed [(expWidth + 1):0] out_sExp; - wire [(sigWidth + 2):0] out_sig; - mulAddRecFNToRaw#(expWidth, sigWidth) - mulAddRecFNToRaw( - control, - op, - a, - b, - c, - roundingMode, - invalidExc, - out_isNaN, - out_isInf, - out_isZero, - out_sign, - out_sExp, - out_sig - ); - roundRawFNToRecFN#(expWidth, sigWidth, 0) - roundRawOut( - control, - invalidExc, - 1'b0, - out_isNaN, - out_isInf, - out_isZero, - out_sign, - out_sExp, - out_sig, - roundingMode, - out, - exceptionFlags - ); - -endmodule -""" - diff --git a/src/ieee754/fpdiv/nmigen_div_experiment.py b/src/ieee754/fpdiv/nmigen_div_experiment.py deleted file mode 100644 index 32431e26..00000000 --- a/src/ieee754/fpdiv/nmigen_div_experiment.py +++ /dev/null @@ -1,256 +0,0 @@ -# IEEE Floating Point Divider (Single Precision) -# Copyright (C) Jonathan P Dawson 2013 -# 2013-12-12 - -from nmigen import Module, Signal, Const, Cat, Elaboratable -from nmigen.cli import main, verilog - -from ieee754.fpcommon.fpbase import (FPNumIn, FPNumOut, FPOpIn, - FPOpOut, Overflow, FPBase, FPState, - FPNumBaseRecord) -from nmutil.nmoperator import eq - - -class Div: - def __init__(self, width): - self.width = width - self.quot = Signal(width) # quotient - self.dor = Signal(width) # divisor - self.dend = Signal(width) # dividend - self.rem = Signal(width) # remainder - self.count = Signal(7) # loop count - - self.czero = Const(0, width) - - def reset(self, m): - m.d.sync += [ - self.quot.eq(self.czero), - self.rem.eq(self.czero), - self.count.eq(Const(0, 7)) - ] - - -class FPDIV(FPBase, Elaboratable): - - def __init__(self, width): - FPBase.__init__(self) - self.width = width - - self.in_a = FPOpIn(width) - self.in_b = FPOpIn(width) - self.out_z = FPOpOut(width) - self.in_a.data_i = Signal(width) - self.in_b.data_i = Signal(width) - self.out_z.data_o = Signal(width) - - self.states = [] - - def add_state(self, state): - self.states.append(state) - return state - - def elaborate(self, platform=None): - """ creates the HDL code-fragment for FPDiv - """ - m = Module() - - # Latches - a = FPNumBaseRecord(self.width, False) - b = FPNumBaseRecord(self.width, False) - z = FPNumBaseRecord(self.width, False) - a = FPNumIn(None, a) - b = FPNumIn(None, b) - z = FPNumOut(z) - - div = Div(a.m_width*2 + 3) # double the mantissa width plus g/r/sticky - - of = Overflow() - m.submodules.in_a = a - m.submodules.in_b = b - m.submodules.z = z - #m.submodules.of = of - - print ("a.v", a.v, self.in_a.v) - m.d.comb += a.v.eq(self.in_a.v) - m.d.comb += b.v.eq(self.in_b.v) - - with m.FSM() as fsm: - - # ****** - # gets operand a - - with m.State("get_a"): - res = self.get_op(m, self.in_a, a, "get_b") - m.d.sync += eq([a, self.in_a.ready_o], res) - - # ****** - # gets operand b - - with m.State("get_b"): - res = self.get_op(m, self.in_b, b, "special_cases") - m.d.sync += eq([b, self.in_b.ready_o], res) - - # ****** - # special cases: NaNs, infs, zeros, denormalised - # NOTE: some of these are unique to div. see "Special Operations" - # https://steve.hollasch.net/cgindex/coding/ieeefloat.html - - with m.State("special_cases"): - - # if a is NaN or b is NaN return NaN - with m.If(a.is_nan | b.is_nan): - m.next = "put_z" - m.d.sync += z.nan(1) - - # if a is Inf and b is Inf return NaN - with m.Elif(a.is_inf & b.is_inf): - m.next = "put_z" - m.d.sync += z.nan(1) - - # if a is inf return inf (or NaN if b is zero) - with m.Elif(a.is_inf): - m.next = "put_z" - m.d.sync += z.inf(a.s ^ b.s) - - # if b is inf return zero - with m.Elif(b.is_inf): - m.next = "put_z" - m.d.sync += z.zero(a.s ^ b.s) - - # if a is zero return zero (or NaN if b is zero) - with m.Elif(a.is_zero): - m.next = "put_z" - # if b is zero return NaN - with m.If(b.is_zero): - m.d.sync += z.nan(1) - with m.Else(): - m.d.sync += z.zero(a.s ^ b.s) - - # if b is zero return Inf - with m.Elif(b.is_zero): - m.next = "put_z" - m.d.sync += z.inf(a.s ^ b.s) - - # Denormalised Number checks - with m.Else(): - m.next = "normalise_a" - self.denormalise(m, a) - self.denormalise(m, b) - - # ****** - # normalise_a - - with m.State("normalise_a"): - self.op_normalise(m, a, "normalise_b") - - # ****** - # normalise_b - - with m.State("normalise_b"): - self.op_normalise(m, b, "divide_0") - - # ****** - # First stage of divide. initialise state - - with m.State("divide_0"): - m.next = "divide_1" - m.d.sync += [ - z.s.eq(a.s ^ b.s), # sign - z.e.eq(a.e - b.e), # exponent - div.dend.eq(a.m<<(a.m_width+3)), # 3 bits for g/r/sticky - div.dor.eq(b.m), - ] - div.reset(m) - - # ****** - # Second stage of divide. - - with m.State("divide_1"): - m.next = "divide_2" - m.d.sync += [ - div.quot.eq(div.quot << 1), - div.rem.eq(Cat(div.dend[-1], div.rem[0:])), - div.dend.eq(div.dend << 1), - ] - - # ****** - # Third stage of divide. - # This stage ends by jumping out to divide_3 - # However it defaults to jumping to divide_1 (which comes back here) - - with m.State("divide_2"): - with m.If(div.rem >= div.dor): - m.d.sync += [ - div.quot[0].eq(1), - div.rem.eq(div.rem - div.dor), - ] - with m.If(div.count == div.width-2): - m.next = "divide_3" - with m.Else(): - m.next = "divide_1" - m.d.sync += [ - div.count.eq(div.count + 1), - ] - - # ****** - # Fourth stage of divide. - - with m.State("divide_3"): - m.next = "normalise_1" - m.d.sync += [ - z.m.eq(div.quot[3:]), - of.guard.eq(div.quot[2]), - of.round_bit.eq(div.quot[1]), - of.sticky.eq(div.quot[0] | (div.rem != 0)) - ] - - # ****** - # First stage of normalisation. - - with m.State("normalise_1"): - self.normalise_1(m, z, of, "normalise_2") - - # ****** - # Second stage of normalisation. - - with m.State("normalise_2"): - self.normalise_2(m, z, of, "round") - - # ****** - # rounding stage - - with m.State("round"): - self.roundz(m, z, of.roundz) - m.next = "corrections" - - # ****** - # correction stage - - with m.State("corrections"): - self.corrections(m, z, "pack") - - # ****** - # pack stage - - with m.State("pack"): - self.pack(m, z, "put_z") - - # ****** - # put_z stage - - with m.State("put_z"): - self.put_z(m, z, self.out_z, "get_a") - - return m - - -if __name__ == "__main__": - alu = FPDIV(width=32) - main(alu, ports=alu.in_a.ports() + alu.in_b.ports() + alu.out_z.ports()) - - - # works... but don't use, just do "python fname.py convert -t v" - #print (verilog.convert(alu, ports=[ - # ports=alu.in_a.ports() + \ - # alu.in_b.ports() + \ - # alu.out_z.ports()) diff --git a/src/ieee754/fpdiv/test/test_div.py b/src/ieee754/fpdiv/test/test_div.py deleted file mode 100644 index 0670900b..00000000 --- a/src/ieee754/fpdiv/test/test_div.py +++ /dev/null @@ -1,48 +0,0 @@ -import sys -from random import randint -from random import seed -from operator import truediv - -from nmigen import Module, Signal -from nmigen.compat.sim import run_simulation - -from ieee754.fpdiv.nmigen_div_experiment import FPDIV - -from ieee754.fpcommon.test.unit_test_single import (get_mantissa, - get_exponent, get_sign, is_nan, - is_inf, is_pos_inf, is_neg_inf, - match, get_case, check_case, run_fpunit, - run_edge_cases, run_corner_cases) - - -def testbench(dut): - yield from check_case(dut, 0x80000000, 0x00000000, 0xffc00000) - yield from check_case(dut, 0x00000000, 0x80000000, 0xffc00000) - yield from check_case(dut, 0x0002b017, 0xff3807ab, 0x80000000) - yield from check_case(dut, 0x40000000, 0x3F800000, 0x40000000) - yield from check_case(dut, 0x3F800000, 0x40000000, 0x3F000000) - yield from check_case(dut, 0x3F800000, 0x40400000, 0x3EAAAAAB) - yield from check_case(dut, 0x40400000, 0x41F80000, 0x3DC6318C) - yield from check_case(dut, 0x41F9EB4D, 0x429A4C70, 0x3ECF52B2) - yield from check_case(dut, 0x7F7FFFFE, 0x70033181, 0x4EF9C4C8) - yield from check_case(dut, 0x7F7FFFFE, 0x70000001, 0x4EFFFFFC) - yield from check_case(dut, 0x7F7FFCFF, 0x70200201, 0x4ECCC7D5) - yield from check_case(dut, 0x70200201, 0x7F7FFCFF, 0x302003E2) - - count = 0 - - #regression tests - stimulus_a = [0xbf9b1e94, 0x34082401, 0x5e8ef81, 0x5c75da81, 0x2b017] - stimulus_b = [0xc038ed3a, 0xb328cd45, 0x114f3db, 0x2f642a39, 0xff3807ab] - yield from run_fpunit(dut, stimulus_a, stimulus_b, truediv, get_case) - count += len(stimulus_a) - print (count, "vectors passed") - - yield from run_corner_cases(dut, count, truediv, get_case) - yield from run_edge_cases(dut, count, truediv, get_case) - - -if __name__ == '__main__': - dut = FPDIV(width=32) - run_simulation(dut, testbench(dut), vcd_name="test_div.vcd") - diff --git a/src/ieee754/fpdiv/test/test_div64.py b/src/ieee754/fpdiv/test/test_div64.py deleted file mode 100644 index 16162445..00000000 --- a/src/ieee754/fpdiv/test/test_div64.py +++ /dev/null @@ -1,35 +0,0 @@ -from nmigen import Module, Signal -from nmigen.compat.sim import run_simulation -from operator import truediv - -from ieee754.fpdiv.nmigen_div_experiment import FPDIV - -from ieee754.fpcommon.test.unit_test_double import (get_mantissa, - get_exponent, get_sign, is_nan, - is_inf, is_pos_inf, is_neg_inf, - match, get_case, check_case, run_fpunit, - run_edge_cases, run_corner_cases) - -def testbench(dut): - yield from check_case(dut, 0x4008000000000000, 0x3FF0000000000000, - 0x4008000000000000) - yield from check_case(dut, 0x3FF0000000000000, 0x4008000000000000, - 0x3FD5555555555555) - - count = 0 - - #regression tests - #stimulus_a = [0xbf9b1e94, 0x34082401, 0x5e8ef81, 0x5c75da81, 0x2b017] - #stimulus_b = [0xc038ed3a, 0xb328cd45, 0x114f3db, 0x2f642a39, 0xff3807ab] - #yield from run_fpunit(dut, stimulus_a, stimulus_b, truediv, get_case) - #count += len(stimulus_a) - #print (count, "vectors passed") - - yield from run_corner_cases(dut, count, truediv, get_case) - yield from run_edge_cases(dut, count, truediv, get_case) - - -if __name__ == '__main__': - dut = FPDIV(width=64) - run_simulation(dut, testbench(dut), vcd_name="test_div64.vcd") - diff --git a/src/ieee754/unused/__init__.py b/src/ieee754/unused/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/ieee754/unused/fpdiv/__init__.py b/src/ieee754/unused/fpdiv/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/ieee754/unused/fpdiv/mulAddRecFN.py b/src/ieee754/unused/fpdiv/mulAddRecFN.py new file mode 100644 index 00000000..3432ccc1 --- /dev/null +++ b/src/ieee754/unused/fpdiv/mulAddRecFN.py @@ -0,0 +1,574 @@ +""" +/*============================================================================ + +This Verilog source file is part of the Berkeley HardFloat IEEE Floating-Point +Arithmetic Package, Release 1, by John R. Hauser. + +Copyright 2019 The Regents of the University of California. All rights +reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + +`include "HardFloat_consts.vi" +`include "HardFloat_specialize.vi" + +""" + +from nmigen import Elaboratable, Cat, Const, Mux, Module, Signal, Repl +from nmutil.concurrentunit import num_bits + +#/*---------------------------------------------------------------------------- +#*----------------------------------------------------------------------------*/ + +class mulAddRecFNToRaw_preMul(Elaboratable): + def __init__(self, expWidth=3, sigWidth=3): + # inputs + self.control = Signal(floatControlWidth, reset_less=True) + self.op = Signal(2, reset_less=True) + self.a = Signal(expWidth + sigWidth + 1, reset_less=True) + self.b = Signal(expWidth + sigWidth + 1, reset_less=True) + self.c = Signal(expWidth + sigWidth + 1, reset_less=True) + self.roundingMode = Signal(3, reset_less=True) + + # outputs + self.mulAddA = Signal(sigWidth, reset_less=True) + self.mulAddB = Signal(sigWidth, reset_less=True) + self.mulAddC = Signal(sigWidth*2, reset_less=True) + self.intermed_compactState = Signal(6, reset_less=True) + self.intermed_sExp = Signal(expWidth + 2, reset_less=True) + wid = num_bits(sigWidth + 1) + self.intermed_CDom_CAlignDist = Signal(wid, reset_less=True) + self.intermed_highAlignedSigC = Signal((sigWidth + 2), reset_less=True) + + def elaborate(self, platform): + m = Module() + comb = m.d.comb + + #/*------------------------------------------------------------------- + #*--------------------------------------------------------------------*/ + prodWidth = sigWidth*2; + sigSumWidth = sigWidth + prodWidth + 3; + #/*------------------------------------------------------------------- + #*-------------------------------------------------------------------*/ + isNaNA = Signal(reset_less=True) + isInfA = Signal(reset_less=True) + isZeroA = Signal(reset_less=True) + signA = Signal(reset_less=True) + + sExpA = Signal((expWidth + 2, True), reset_less=True) + sigA = Signal(sigWidth+1, reset_less=True) + m.submodules.recFNToRawFN_a = rf = recFNToRawFN(expWidth, sigWidth) + comb += [(a, isNaNA, isInfA, isZeroA, signA, sExpA, sigA)] + + isSigNaNA = Signal(reset_less=True) + m.submodules.isSigNaN_a = nan_a = isSigNaNRecFN(expWidth, sigWidth) + comb += [(a, isSigNaNA)] + + isNaNB = Signal(reset_less=True) + isInfB = Signal(reset_less=True) + isZeroB = Signal(reset_less=True) + signB = Signal(reset_less=True) + + sExpB = Signal((expWidth + 2, True), reset_less=True) + sigB = Signal(sigWidth+1, reset_less=True) + m.submodules.recFNToRawFN_b = rf = recFNToRawFN(expWidth, sigWidth) + comb += [(b, isNaNB, isInfB, isZeroB, signB, sExpB, sigB)] + + isSigNaNB = Signal(reset_less=True) + m.submodules.isSigNaN_b = nan_b = isSigNaNRecFN(expWidth, sigWidth) + comb += [(b, isSigNaNB)] + + isNaNC = Signal(reset_less=True) + isInfC = Signal(reset_less=True) + isZeroC = Signal(reset_less=True) + signC = Signal(reset_less=True) + + sExpC = Signal((expWidth + 2, True), reset_less=True) + sigC = Signal(sigWidth+1, reset_less=True) + m.submodules.recFNToRawFN_c = rf = recFNToRawFN(expWidth, sigWidth) + comb += [(c, isNaNC, isInfC, isZeroC, signC, sExpC, sigC)] + + isSigNaNC = Signal(reset_less=True) + m.submodules.isSigNaN_c = nan_c = isSigNaNRecFN(expWidth, sigWidth) + comb += [(c, isSigNaNC)] + + #/*------------------------------------------------------------------- + #*-------------------------------------------------------------------*/ + signProd = Signal(reset_less=True) + sExpAlignedProd = Signal((expWidth + 3, True), reset_less=True) + doSubMags = Signal(reset_less=True) + opSignC = Signal(reset_less=True) + roundingMode_min = Signal(reset_less=True) + + comb += signProd.eq(signA ^ signB ^ op[1]) + comb += sExpAlignedProd.eq(sExpA + sExpB + \ + (-(1<> CAlignDist), + grainAlignedSigC.eq(sigC<>3)), + ] + + #/*------------------------------------------------------------------- + #*-------------------------------------------------------------------*/ + isNaNAOrB = Signal(reset_less=True) + isNaNAny = Signal(reset_less=True) + isInfAOrB = Signal(reset_less=True) + invalidProd = Signal(reset_less=True) + notSigNaN_invalidExc = Signal(reset_less=True) + invalidExc = Signal(reset_less=True) + notNaN_addZeros = Signal(reset_less=True) + specialCase = Signal(reset_less=True) + specialNotNaN_signOut = Signal(reset_less=True) + comb += [ + isNaNAOrB.eq(isNaNA | isNaNB), + isNaNAny.eq(isNaNAOrB | isNaNC), + isInfAOrB.eq(isInfA | isInfB), + invalidProd.eq((isInfA & isZeroB) | (isZeroA & isInfB)), + notSigNaN_invalidExc.eq( + invalidProd | (~isNaNAOrB & isInfAOrB & isInfC & doSubMags)), + invalidExc.eq( + isSigNaNA | isSigNaNB | isSigNaNC | notSigNaN_invalidExc), + notNaN_addZeros.eq((isZeroA | isZeroB) & isZeroC), + specialCase.eq(isNaNAny | isInfAOrB | isInfC | notNaN_addZeros), + specialNotNaN_signOut.eq( + (isInfAOrB & signProd) | (isInfC & opSignC) + | (notNaN_addZeros & ~roundingMode_min & signProd & opSignC) + | (notNaN_addZeros & roundingMode_min & (signProd | opSignC))) + ] + + special_signOut = specialNotNaN_signOut; + #/*------------------------------------------------------------------- + # *-------------------------------------------------------------------*/ + comb += self.mulAddA.eq(sigA) + comb += self.mulAddB.eq(sigB) + comb += self.mulAddC.eq(alignedSigC[1:prodWidth+1]) + comb += self.intermed_compactState.eq(Cat( + special_signOut, + notNaN_addZeros | (~specialCase & alignedSigC[0]), + isInfAOrB | isInfC | (~specialCase & CIsDominant ), + isNaNAny | (~specialCase & doSubMags ), + invalidExc | (~specialCase & signProd ), + specialCase,)) + comb += self.intermed_sExp.eq(sExpSum) + comb += self.intermed_CDom_CAlignDist( + CAlignDist[:clog2(sigWidth + 1)]) + comb += self.intermed_highAlignedSigC.eq( + alignedSigC[(sigSumWidth - 1):(prodWidth + 1)]) + + return m + +#/*------------------------------------------------------------------------ +#*------------------------------------------------------------------------*/ + +class mulAddRecFNToRaw_postMul(Elaboratable): + + def __init__(self, expWidth=3, sigWidth=3): + # inputs + self.intermed_compactState = Signal(6, reset_less=True) + self.intermed_sExp = Signal(expWidth + 2, reset_less=True) + wid = num_bits(sigWidth + 1) + self.intermed_CDom_CAlignDist = Signal(wid, reset_less=True) + self.intermed_highAlignedSigC = Signal((sigWidth + 2), reset_less=True) + self.mulAddResult = Signal(sigWidth*2, reset_less=True) + self.roundingMode = Signal(3, reset_less=True) + + # outputs + self.invalidExc = Signal(reset_less=True) + self.out_isNaN = Signal(reset_less=True) + self.out_isInf = Signal(reset_less=True) + self.out_isZero = Signal(reset_less=True) + self.out_sign = Signal(reset_less=True) + self.out_sExp = Signal((expWidth + 2, True), reset_less=True) + self.out_sig = Signal(sigWidth + 3, reset_less=True) + + def elaborate(self, platform): + m = Module() + comb = m.d.comb + + #/*------------------------------------------------------------------- + #*-------------------------------------------------------------------*/ + prodWidth = sigWidth*2; + sigSumWidth = sigWidth + prodWidth + 3; + + #/*------------------------------------------------------------------- + #*-------------------------------------------------------------------*/ + specialCase = Signal(reset_less=True) + invalidExc = Signal(reset_less=True) + out_isNaN = Signal(reset_less=True) + out_isInf = Signal(reset_less=True) + notNaN_addZeros = Signal(reset_less=True) + signProd = Signal(reset_less=True) + doSubMags = Signal(reset_less=True) + CIsDominant = Signal(reset_less=True) + bit0AlignedSigC = Signal(reset_less=True) + special_signOut = Signal(reset_less=True) + comb += [ + specialCase .eq( intermed_compactState[5] ), + invalidExc .eq( specialCase & intermed_compactState[4] ), + out_isNaN .eq( specialCase & intermed_compactState[3] ), + out_isInf .eq( specialCase & intermed_compactState[2] ), + notNaN_addZeros .eq( specialCase & intermed_compactState[1] ), + signProd .eq( intermed_compactState[4] ), + doSubMags .eq( intermed_compactState[3] ), + CIsDominant .eq( intermed_compactState[2] ), + bit0AlignedSigC .eq( intermed_compactState[1] ), + special_signOut .eq( intermed_compactState[0] ), + ] + + #/*------------------------------------------------------------------- + #*-------------------------------------------------------------------*/ + opSignC = Signal(reset_less=True) + incHighAlignedSigC = Signal(sigWidth + 3, reset_less=True) + sigSum = Signal(sigSumWidth, reset_less=True) + roundingMode_min = Signal(reset_less=True) + + comb += [\ + opSignC.eq(signProd ^ doSubMags), + incHighAlignedSigC.eq(intermed_highAlignedSigC + 1), + sigSum.eq(Cat(bit0AlignedSigC, + mulAddResult[(prodWidth - 1):0], + Mux(mulAddResult[prodWidth], + incHighAlignedSigC, + intermed_highAlignedSigC))), + roundingMode_min.eq(roundingMode == ROUND_MIN), + ] + + #/*------------------------------------------------------------------- + #*-------------------------------------------------------------------*/ + CDom_sign = Signal(reset_less=True) + CDom_sExp = Signal((expWidth + 2, True), reset_less=True) + CDom_absSigSum = Signal(prodWidth+2, reset_less=True) + CDom_absSigSumExtra = Signal(reset_less=True) + CDom_mainSig = Signal(sigWidth+5, reset_less=True) + CDom_grainAlignedLowSig = Signal(sigWidth | 3, reset_less=True) + CDom_reduced4LowSig = Signal(sigWidth/4+1, reset_less=True) + CDom_sigExtraMask = Signal(sigWidth/4, reset_less=True) + + lowMask_CDom_sigExtraMask = lm + m.submodules.lm = lm = lowMaskLoHi(clog2(sigWidth + 1) - 2, 0, + sigWidth/4) + CDom_reduced4SigExtra = Signal(reset_less=True) + CDom_sig = Signal(sigWidth+3, reset_less=True) + + comb += [\ + CDom_sign.eq(opSignC), + CDom_sExp.eq(intermed_sExp - doSubMags), + CDom_absSigSum.eq(Mux(doSubMags, + ~sigSum[sigWidth+1:sigSumWidth], + Cat(sigSum[sigWidth+2 : sigSumWidth - 2], + intermed_highAlignedSigC[(sigWidth + 1):sigWidth], + 0b0))), + CDom_absSigSumExtra.eq(Mux(doSubMags, + ~(sigSum[1:sigWidth+1].all())), + sigSum[1:sigWidth + 2].bool())), + CDom_mainSig.eq( + (CDom_absSigSum<>(sigWidth - 3)), + CDom_grainAlignedLowSig.eq( + CDom_absSigSum[(sigWidth - 1):0]<<(~sigWidth & 3)), + CDom_reduced4LowSig.eq(compressBy4_CDom_absSigSum.out), + compressBy4_CDom_absSigSum.inp.eq(CDom_grainAlignedLowSig), + lowMask_CDom_sigExtraMask.inp.eq( + intermed_CDom_CAlignDist[2:clog2(sigWidth + 1)]), + CDom_sigExtraMask.eq(lowMask_CDom_sigExtraMask.out), + CDom_reduced4SigExtra.eq( + (CDom_reduced4LowSig & CDom_sigExtraMask).bool()), + CDom_sig.eq(Cat((CDom_mainSig[:3]).bool() | + CDom_reduced4SigExtra | + CDom_absSigSumExtra, + CDom_mainSig>>3)), + ] + + #/*------------------------------------------------------------------- + #*-------------------------------------------------------------------*/ + notCDom_signSigSum = Signal(reset_less=True) + notCDom_absSigSum = Signal(prodWidth + 3, reset_less=True) + notCDom_reduced2AbsSigSum = Signal((prodWidth+2)//2+1, reset_less=True) + m.submodules.cb2 = compressBy2_notCDom_absSigSum = \ + compressBy2(prodWidth + 3) + notCDom_normDistReduced2 = Signal(clog2(prodWidth+4) - 1, + reset_less=True) + m.submodules.clz = countLeadingZeros_notCDom = \ + countLeadingZeros((prodWidth + 2)/2 + 1, + clog2(prodWidth + 4) - 1) + notCDom_nearNormDist = Signal(clog2(prodWidth + 4), reset_less=True) + notCDom_sExp = Signal((expWidth + 2, True), reset_less=True) + notCDom_mainSig = Signal(sigWidth + 5, reset_less=True) + sw = (sigWidth/2 + 1) | 1 + CDom_grainAlignedLowReduced2Sig = Signal(sw, reset_less=True) + notCDom_reduced4AbsSigSum = Signal((sigWidth + 2)//4+1, reset_less=True) + m.submodules.cb2r = compressBy2_notCDom_reduced2AbsSigSum = \ + compressBy2(sw) + sw = (sigWidth + 2)//4 + notCDom_sigExtraMask = Signal(sw, reset_less=True) + m.submodules.lms = lowMask_notCDom_sigExtraMask = \ + lowMaskLoHi(clog2(prodWidth + 4) - 2, 0, sw) + notCDom_reduced4SigExtra = Signal(reset_less=True) + notCDom_sig = Signal(sigWidth+3, reset_less=True) + notCDom_completeCancellation = Signal(reset_less=True) + notCDom_sign = Signal(reset_less=True) + + comb += [\ + notCDom_signSigSum.eq(sigSum[prodWidth + 3]), + notCDom_absSigSum.eq(Mux(notCDom_signSigSum, + ~sigSum[:prodWidth + 3], + sigSum[:prodWidth + 3] + doSubMags)), + compressBy2_notCDom_absSigSum.inp.eq(notCDom_absSigSum), + notCDom_reduced2AbsSigSum.eq(compressBy2_notCDom_absSigSum.out), + countLeadingZeros_notCDom.inp.eq(notCDom_reduced2AbsSigSum), + notCDom_normDistReduced2.out.eq(countLeadingZeros_notCDom), + notCDom_nearNormDist.eq(notCDom_normDistReduced2<<1), + notCDom_sExp.eq(intermed_sExp - notCDom_nearNormDist), + notCDom_mainSig.eq((Cat(notCDom_absSigSum, 0)<< + notCDom_nearNormDist)>>(sigWidth - 1)), + CDom_grainAlignedLowReduced2Sig.eq( + notCDom_reduced2AbsSigSum[sigWidth/2:0]<<((sigWidth/2) & 1)), + compressBy2_notCDom_reduced2AbsSigSum.inp.eq( + CDom_grainAlignedLowReduced2Sig), + compressBy2_notCDom_reduced2AbsSigSum.eq( + notCDom_reduced4AbsSigSum.out), + lowMask_notCDom_sigExtraMask.inp.eq( + notCDom_normDistReduced2[1:clog2(prodWidth + 4) - 1]), + notCDom_sigExtraMask.eq(lowMask_notCDom_sigExtraMask.out), + notCDom_reduced4SigExtra.eq( + (notCDom_reduced4AbsSigSum & notCDom_sigExtraMask).bool()), + notCDom_sig.eq(Cat( + notCDom_mainSig[:3].bool() | notCDom_reduced4SigExtra, + notCDom_mainSig>>3)), + notCDom_completeCancellation.eq( + notCDom_sig[(sigWidth + 1):(sigWidth + 3)] == 0), + notCDom_sign.eq(Mux(notCDom_completeCancellation, + roundingMode_min, + signProd ^ notCDom_signSigSum)), + ] + + #/*------------------------------------------------------------------- + #*-------------------------------------------------------------------*/ + comb += [\ + self.out_isZero.eq( notNaN_addZeros | \ + (~CIsDominant & notCDom_completeCancellation)), + out_sign.eq((specialCase & special_signOut) \ + | (~specialCase & CIsDominant & CDom_sign ) \ + | (~specialCase & ~CIsDominant & notCDom_sign )), + out_sExp.eq(Mux(CIsDominant, CDom_sExp, notCDom_sExp)), + out_sig.eq(Mux(CIsDominant, CDom_sig, notCDom_sig)), + ] + + return m + +#/*------------------------------------------------------------------------ +#*------------------------------------------------------------------------*/ + +class mulAddRecFNToRaw(Elaboratable): + def __init__(expWidth=3, sigWidth=3): + self.control = Signal(floatControlWidth, reset_less=True) + self.op = Signal(2, reset_less=True) + self.a = Signal(expWidth + sigWidth + 1, reset_less=True) + self.b = Signal(expWidth + sigWidth + 1, reset_less=True) + self.c = Signal(expWidth + sigWidth + 1, reset_less=True) + self.roundingMode = Signal(3, reset_less=True) + + # output + self.invalidExc = Signal(reset_less=True) + self.out_isNaN = Signal(reset_less=True) + self.out_isInf = Signal(reset_less=True) + self.out_isZero = Signal(reset_less=True) + self.out_sign = Signal(reset_less=True) + self.out_sExp = Signal((expWidth + 2, True), reset_less=True) + self.out_sig = Signal(sigWidth + 3, reset_less=True) + + def elaborate(self, platform): + m = Module() + comb = m.d.comb + + mulAddA = Signal(sigWidth, reset_less=True) + mulAddB = Signal(sigWidth, reset_less=True) + mulAddC = Signal(sigWidth*2, reset_less=True) + intermed_compactState = Signal(6, reset_less=True) + intermed_sExp = Signal(expWidth + 2, reset_less=True) + wid = num_bits(sigWidth + 1) + intermed_CDom_CAlignDist = Signal(wid, reset_less=True) + intermed_highAlignedSigC = Signal((sigWidth + 2), reset_less=True) + + m.submodules.mar = mulAddToRaw_preMul = \ + mulAddRecFNToRaw_preMul(expWidth, sigWidth) + + comb += [\ + mulAddToRaw_preMul.control.eq(self.control), + mulAddToRaw_preMul.op.eq(self.op), + mulAddToRaw_preMul.a.eq(self.a), + mulAddToRaw_preMul.b.eq(self.b), + mulAddToRaw_preMul.roundingMode.eq(self.roundingMode), + mulAddA.eq(mulAddToRaw_preMul.mulAddA), + mulAddB.eq(mulAddToRaw_preMul.mulAddB), + mulAddC.eq(mulAddToRaw_preMul.mulAddC), + intermed_compactState.eq(mulAddToRaw_preMul.intermed_compactState), + intermed_sExp.eq(mulAddToRaw_preMul.intermed_sExp), + intermed_CDom_CAlignDist.eq( + mulAddToRaw_preMul.intermed_CDom_CAlignDist), + intermed_highAlignedSigC.eq( + mulAddToRaw_preMul.intermed_highAlignedSigC), + ] + + mulAddResult = Signal(sigWidth*2+1, reset_less=True) + comb += mulAddResult.eq(mulAddA * mulAddB + mulAddC) + + m.submodules.marp = mulAddToRaw_postMul = \ + mulAddRecFNToRaw_postMul(expWidth, sigWidth) + + comb += [\ + mulAddRecFNToRaw_postMul.intermed_compactState.eq( + intermed_compactState), + mulAddRecFNToRaw_postMul.intermed_sExp.eq(intermed_sExp), + mulAddRecFNToRaw_postMul.intermed_CDom_CAlignDist.eq( + intermed_CDom_CAlignDist), + mulAddRecFNToRaw_postMul.intermed_highAlignedSigC.eq( + intermed_highAlignedSigC), + mulAddRecFNToRaw_postMul.mulAddResult.eq(mulAddResult), + mulAddRecFNToRaw_postMul.roundingMode.eq(roundingMode), + + invalidExc.eq(mulAddRecFNToRaw_postMul.invalidExc), + out_isNaN.eq(mulAddRecFNToRaw_postMul.out_isNaN), + out_isInf.eq(mulAddRecFNToRaw_postMul.out_isInf), + out_isZero.eq(mulAddRecFNToRaw_postMul.out_isZero), + out_sign.eq(mulAddRecFNToRaw_postMul.out_sign), + out_sExp.eq(mulAddRecFNToRaw_postMul.out_sExp), + out_sig.eq(mulAddRecFNToRaw_postMul.out_sig), + ] + + return m + +""" +XXX TODO? + +/*---------------------------------------------------------------------------- +*----------------------------------------------------------------------------*/ + +module + mulAddRecFN#(parameter expWidth = 3, parameter sigWidth = 3) ( + input [(`floatControlWidth - 1):0] control, + input [1:0] op, + input [(expWidth + sigWidth):0] a, + input [(expWidth + sigWidth):0] b, + input [(expWidth + sigWidth):0] c, + input [2:0] roundingMode, + output [(expWidth + sigWidth):0] out, + output [4:0] exceptionFlags + ); + + wire invalidExc, out_isNaN, out_isInf, out_isZero, out_sign; + wire signed [(expWidth + 1):0] out_sExp; + wire [(sigWidth + 2):0] out_sig; + mulAddRecFNToRaw#(expWidth, sigWidth) + mulAddRecFNToRaw( + control, + op, + a, + b, + c, + roundingMode, + invalidExc, + out_isNaN, + out_isInf, + out_isZero, + out_sign, + out_sExp, + out_sig + ); + roundRawFNToRecFN#(expWidth, sigWidth, 0) + roundRawOut( + control, + invalidExc, + 1'b0, + out_isNaN, + out_isInf, + out_isZero, + out_sign, + out_sExp, + out_sig, + roundingMode, + out, + exceptionFlags + ); + +endmodule +""" + diff --git a/src/ieee754/unused/fpdiv/nmigen_div_experiment.py b/src/ieee754/unused/fpdiv/nmigen_div_experiment.py new file mode 100644 index 00000000..32431e26 --- /dev/null +++ b/src/ieee754/unused/fpdiv/nmigen_div_experiment.py @@ -0,0 +1,256 @@ +# IEEE Floating Point Divider (Single Precision) +# Copyright (C) Jonathan P Dawson 2013 +# 2013-12-12 + +from nmigen import Module, Signal, Const, Cat, Elaboratable +from nmigen.cli import main, verilog + +from ieee754.fpcommon.fpbase import (FPNumIn, FPNumOut, FPOpIn, + FPOpOut, Overflow, FPBase, FPState, + FPNumBaseRecord) +from nmutil.nmoperator import eq + + +class Div: + def __init__(self, width): + self.width = width + self.quot = Signal(width) # quotient + self.dor = Signal(width) # divisor + self.dend = Signal(width) # dividend + self.rem = Signal(width) # remainder + self.count = Signal(7) # loop count + + self.czero = Const(0, width) + + def reset(self, m): + m.d.sync += [ + self.quot.eq(self.czero), + self.rem.eq(self.czero), + self.count.eq(Const(0, 7)) + ] + + +class FPDIV(FPBase, Elaboratable): + + def __init__(self, width): + FPBase.__init__(self) + self.width = width + + self.in_a = FPOpIn(width) + self.in_b = FPOpIn(width) + self.out_z = FPOpOut(width) + self.in_a.data_i = Signal(width) + self.in_b.data_i = Signal(width) + self.out_z.data_o = Signal(width) + + self.states = [] + + def add_state(self, state): + self.states.append(state) + return state + + def elaborate(self, platform=None): + """ creates the HDL code-fragment for FPDiv + """ + m = Module() + + # Latches + a = FPNumBaseRecord(self.width, False) + b = FPNumBaseRecord(self.width, False) + z = FPNumBaseRecord(self.width, False) + a = FPNumIn(None, a) + b = FPNumIn(None, b) + z = FPNumOut(z) + + div = Div(a.m_width*2 + 3) # double the mantissa width plus g/r/sticky + + of = Overflow() + m.submodules.in_a = a + m.submodules.in_b = b + m.submodules.z = z + #m.submodules.of = of + + print ("a.v", a.v, self.in_a.v) + m.d.comb += a.v.eq(self.in_a.v) + m.d.comb += b.v.eq(self.in_b.v) + + with m.FSM() as fsm: + + # ****** + # gets operand a + + with m.State("get_a"): + res = self.get_op(m, self.in_a, a, "get_b") + m.d.sync += eq([a, self.in_a.ready_o], res) + + # ****** + # gets operand b + + with m.State("get_b"): + res = self.get_op(m, self.in_b, b, "special_cases") + m.d.sync += eq([b, self.in_b.ready_o], res) + + # ****** + # special cases: NaNs, infs, zeros, denormalised + # NOTE: some of these are unique to div. see "Special Operations" + # https://steve.hollasch.net/cgindex/coding/ieeefloat.html + + with m.State("special_cases"): + + # if a is NaN or b is NaN return NaN + with m.If(a.is_nan | b.is_nan): + m.next = "put_z" + m.d.sync += z.nan(1) + + # if a is Inf and b is Inf return NaN + with m.Elif(a.is_inf & b.is_inf): + m.next = "put_z" + m.d.sync += z.nan(1) + + # if a is inf return inf (or NaN if b is zero) + with m.Elif(a.is_inf): + m.next = "put_z" + m.d.sync += z.inf(a.s ^ b.s) + + # if b is inf return zero + with m.Elif(b.is_inf): + m.next = "put_z" + m.d.sync += z.zero(a.s ^ b.s) + + # if a is zero return zero (or NaN if b is zero) + with m.Elif(a.is_zero): + m.next = "put_z" + # if b is zero return NaN + with m.If(b.is_zero): + m.d.sync += z.nan(1) + with m.Else(): + m.d.sync += z.zero(a.s ^ b.s) + + # if b is zero return Inf + with m.Elif(b.is_zero): + m.next = "put_z" + m.d.sync += z.inf(a.s ^ b.s) + + # Denormalised Number checks + with m.Else(): + m.next = "normalise_a" + self.denormalise(m, a) + self.denormalise(m, b) + + # ****** + # normalise_a + + with m.State("normalise_a"): + self.op_normalise(m, a, "normalise_b") + + # ****** + # normalise_b + + with m.State("normalise_b"): + self.op_normalise(m, b, "divide_0") + + # ****** + # First stage of divide. initialise state + + with m.State("divide_0"): + m.next = "divide_1" + m.d.sync += [ + z.s.eq(a.s ^ b.s), # sign + z.e.eq(a.e - b.e), # exponent + div.dend.eq(a.m<<(a.m_width+3)), # 3 bits for g/r/sticky + div.dor.eq(b.m), + ] + div.reset(m) + + # ****** + # Second stage of divide. + + with m.State("divide_1"): + m.next = "divide_2" + m.d.sync += [ + div.quot.eq(div.quot << 1), + div.rem.eq(Cat(div.dend[-1], div.rem[0:])), + div.dend.eq(div.dend << 1), + ] + + # ****** + # Third stage of divide. + # This stage ends by jumping out to divide_3 + # However it defaults to jumping to divide_1 (which comes back here) + + with m.State("divide_2"): + with m.If(div.rem >= div.dor): + m.d.sync += [ + div.quot[0].eq(1), + div.rem.eq(div.rem - div.dor), + ] + with m.If(div.count == div.width-2): + m.next = "divide_3" + with m.Else(): + m.next = "divide_1" + m.d.sync += [ + div.count.eq(div.count + 1), + ] + + # ****** + # Fourth stage of divide. + + with m.State("divide_3"): + m.next = "normalise_1" + m.d.sync += [ + z.m.eq(div.quot[3:]), + of.guard.eq(div.quot[2]), + of.round_bit.eq(div.quot[1]), + of.sticky.eq(div.quot[0] | (div.rem != 0)) + ] + + # ****** + # First stage of normalisation. + + with m.State("normalise_1"): + self.normalise_1(m, z, of, "normalise_2") + + # ****** + # Second stage of normalisation. + + with m.State("normalise_2"): + self.normalise_2(m, z, of, "round") + + # ****** + # rounding stage + + with m.State("round"): + self.roundz(m, z, of.roundz) + m.next = "corrections" + + # ****** + # correction stage + + with m.State("corrections"): + self.corrections(m, z, "pack") + + # ****** + # pack stage + + with m.State("pack"): + self.pack(m, z, "put_z") + + # ****** + # put_z stage + + with m.State("put_z"): + self.put_z(m, z, self.out_z, "get_a") + + return m + + +if __name__ == "__main__": + alu = FPDIV(width=32) + main(alu, ports=alu.in_a.ports() + alu.in_b.ports() + alu.out_z.ports()) + + + # works... but don't use, just do "python fname.py convert -t v" + #print (verilog.convert(alu, ports=[ + # ports=alu.in_a.ports() + \ + # alu.in_b.ports() + \ + # alu.out_z.ports()) diff --git a/src/ieee754/unused/fpdiv/test/__init__.py b/src/ieee754/unused/fpdiv/test/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/ieee754/unused/fpdiv/test/test_div.py b/src/ieee754/unused/fpdiv/test/test_div.py new file mode 100644 index 00000000..35caeb72 --- /dev/null +++ b/src/ieee754/unused/fpdiv/test/test_div.py @@ -0,0 +1,48 @@ +import sys +from random import randint +from random import seed +from operator import truediv + +from nmigen import Module, Signal +from nmigen.compat.sim import run_simulation + +from ieee754.unused.fpdiv.nmigen_div_experiment import FPDIV + +from ieee754.fpcommon.test.unit_test_single import (get_mantissa, + get_exponent, get_sign, is_nan, + is_inf, is_pos_inf, is_neg_inf, + match, get_case, check_case, run_fpunit, + run_edge_cases, run_corner_cases) + + +def tstbench(dut): + yield from check_case(dut, 0x80000000, 0x00000000, 0xffc00000) + yield from check_case(dut, 0x00000000, 0x80000000, 0xffc00000) + yield from check_case(dut, 0x0002b017, 0xff3807ab, 0x80000000) + yield from check_case(dut, 0x40000000, 0x3F800000, 0x40000000) + yield from check_case(dut, 0x3F800000, 0x40000000, 0x3F000000) + yield from check_case(dut, 0x3F800000, 0x40400000, 0x3EAAAAAB) + yield from check_case(dut, 0x40400000, 0x41F80000, 0x3DC6318C) + yield from check_case(dut, 0x41F9EB4D, 0x429A4C70, 0x3ECF52B2) + yield from check_case(dut, 0x7F7FFFFE, 0x70033181, 0x4EF9C4C8) + yield from check_case(dut, 0x7F7FFFFE, 0x70000001, 0x4EFFFFFC) + yield from check_case(dut, 0x7F7FFCFF, 0x70200201, 0x4ECCC7D5) + yield from check_case(dut, 0x70200201, 0x7F7FFCFF, 0x302003E2) + + count = 0 + + #regression tests + stimulus_a = [0xbf9b1e94, 0x34082401, 0x5e8ef81, 0x5c75da81, 0x2b017] + stimulus_b = [0xc038ed3a, 0xb328cd45, 0x114f3db, 0x2f642a39, 0xff3807ab] + yield from run_fpunit(dut, stimulus_a, stimulus_b, truediv, get_case) + count += len(stimulus_a) + print (count, "vectors passed") + + yield from run_corner_cases(dut, count, truediv, get_case) + yield from run_edge_cases(dut, count, truediv, get_case) + + +if __name__ == '__main__': + dut = FPDIV(width=32) + run_simulation(dut, tstbench(dut), vcd_name="test_div.vcd") + diff --git a/src/ieee754/unused/fpdiv/test/test_div64.py b/src/ieee754/unused/fpdiv/test/test_div64.py new file mode 100644 index 00000000..e3c812fe --- /dev/null +++ b/src/ieee754/unused/fpdiv/test/test_div64.py @@ -0,0 +1,35 @@ +from nmigen import Module, Signal +from nmigen.compat.sim import run_simulation +from operator import truediv + +from ieee754.unused.fpdiv.nmigen_div_experiment import FPDIV + +from ieee754.fpcommon.test.unit_test_double import (get_mantissa, + get_exponent, get_sign, is_nan, + is_inf, is_pos_inf, is_neg_inf, + match, get_case, check_case, run_fpunit, + run_edge_cases, run_corner_cases) + +def tstbench(dut): + yield from check_case(dut, 0x4008000000000000, 0x3FF0000000000000, + 0x4008000000000000) + yield from check_case(dut, 0x3FF0000000000000, 0x4008000000000000, + 0x3FD5555555555555) + + count = 0 + + #regression tests + #stimulus_a = [0xbf9b1e94, 0x34082401, 0x5e8ef81, 0x5c75da81, 0x2b017] + #stimulus_b = [0xc038ed3a, 0xb328cd45, 0x114f3db, 0x2f642a39, 0xff3807ab] + #yield from run_fpunit(dut, stimulus_a, stimulus_b, truediv, get_case) + #count += len(stimulus_a) + #print (count, "vectors passed") + + yield from run_corner_cases(dut, count, truediv, get_case) + yield from run_edge_cases(dut, count, truediv, get_case) + + +if __name__ == '__main__': + dut = FPDIV(width=64) + run_simulation(dut, tstbench(dut), vcd_name="test_div64.vcd") +