From c852018bf9103cf85b816b75ba8933c95f6fbf17 Mon Sep 17 00:00:00 2001
From: Luke Kenneth Casson Leighton <lkcl@lkcl.net>
Date: Fri, 3 May 2019 04:47:45 +0100
Subject: [PATCH] fix multiply bit-width

---
 src/ieee754/fpcommon/denorm.py        | 17 ++++++++--------
 src/ieee754/fpcommon/postnormalise.py |  2 +-
 src/ieee754/fpmul/mul0.py             |  7 ++-----
 src/ieee754/fpmul/mulstages.py        |  2 +-
 src/ieee754/fpmul/specialcases.py     | 28 ++++++++-------------------
 src/ieee754/fpmul/test/test_mul.py    |  5 +++--
 6 files changed, 24 insertions(+), 37 deletions(-)

diff --git a/src/ieee754/fpcommon/denorm.py b/src/ieee754/fpcommon/denorm.py
index cf54b0f3..2653e335 100644
--- a/src/ieee754/fpcommon/denorm.py
+++ b/src/ieee754/fpcommon/denorm.py
@@ -2,7 +2,7 @@
 # Copyright (C) Jonathan P Dawson 2013
 # 2013-12-12
 
-from nmigen import Module, Signal
+from nmigen import Module, Signal, Elaboratable
 from nmigen.cli import main, verilog
 from math import log
 
@@ -12,9 +12,9 @@ from ieee754.fpcommon.fpbase import FPState
 
 class FPSCData:
 
-    def __init__(self, width, id_wid):
-        self.a = FPNumBase(width, True)
-        self.b = FPNumBase(width, True)
+    def __init__(self, width, id_wid, m_extra=True):
+        self.a = FPNumBase(width, m_extra)
+        self.b = FPNumBase(width, m_extra)
         self.z = FPNumOut(width, False)
         self.oz = Signal(width, reset_less=True)
         self.out_do_z = Signal(reset_less=True)
@@ -33,19 +33,20 @@ class FPSCData:
                 self.a.eq(i.a), self.b.eq(i.b), self.mid.eq(i.mid)]
 
 
-class FPAddDeNormMod(FPState):
+class FPAddDeNormMod(FPState, Elaboratable):
 
-    def __init__(self, width, id_wid):
+    def __init__(self, width, id_wid, m_extra=True):
         self.width = width
         self.id_wid = id_wid
+        self.m_extra = m_extra
         self.i = self.ispec()
         self.o = self.ospec()
 
     def ispec(self):
-        return FPSCData(self.width, self.id_wid)
+        return FPSCData(self.width, self.id_wid, self.m_extra)
 
     def ospec(self):
-        return FPSCData(self.width, self.id_wid)
+        return FPSCData(self.width, self.id_wid, self.m_extra)
 
     def process(self, i):
         return self.o
diff --git a/src/ieee754/fpcommon/postnormalise.py b/src/ieee754/fpcommon/postnormalise.py
index 48124184..4ab6639c 100644
--- a/src/ieee754/fpcommon/postnormalise.py
+++ b/src/ieee754/fpcommon/postnormalise.py
@@ -71,7 +71,7 @@ class FPNorm1ModSingle(Elaboratable):
 
         espec = (len(i.z.e), True)
         ediff_n126 = Signal(espec, reset_less=True)
-        msr = MultiShiftRMerge(mwid, espec)
+        msr = MultiShiftRMerge(mwid+2, espec)
         m.submodules.multishift_r = msr
 
         m.d.comb += i.eq(self.i)
diff --git a/src/ieee754/fpmul/mul0.py b/src/ieee754/fpmul/mul0.py
index bc264216..b488c95c 100644
--- a/src/ieee754/fpmul/mul0.py
+++ b/src/ieee754/fpmul/mul0.py
@@ -34,7 +34,7 @@ class FPMulStage0Mod(Elaboratable):
         self.o = self.ospec()
 
     def ispec(self):
-        return FPSCData(self.width, self.id_wid)
+        return FPSCData(self.width, self.id_wid, False)
 
     def ospec(self):
         return FPMulStage0Data(self.width, self.id_wid)
@@ -55,12 +55,9 @@ class FPMulStage0Mod(Elaboratable):
         m.submodules.mul0_out_z = self.o.z
 
         # store intermediate tests (and zero-extended mantissas)
-        seq = Signal(reset_less=True)
-        mge = Signal(reset_less=True)
         am0 = Signal(len(self.i.a.m)+1, reset_less=True)
         bm0 = Signal(len(self.i.b.m)+1, reset_less=True)
-        m.d.comb += [seq.eq(self.i.a.s == self.i.b.s),
-                     mge.eq(self.i.a.m >= self.i.b.m),
+        m.d.comb += [
                      am0.eq(Cat(self.i.a.m, 0)),
                      bm0.eq(Cat(self.i.b.m, 0))
                     ]
diff --git a/src/ieee754/fpmul/mulstages.py b/src/ieee754/fpmul/mulstages.py
index adf68d38..e07b05aa 100644
--- a/src/ieee754/fpmul/mulstages.py
+++ b/src/ieee754/fpmul/mulstages.py
@@ -22,7 +22,7 @@ class FPMulStages(FPState, SimpleHandshake):
         self.m1o = self.ospec()
 
     def ispec(self):
-        return FPSCData(self.width, self.id_wid)
+        return FPSCData(self.width, self.id_wid, False)
 
     def ospec(self):
         return FPAddStage1Data(self.width, self.id_wid) # AddStage1 ospec
diff --git a/src/ieee754/fpmul/specialcases.py b/src/ieee754/fpmul/specialcases.py
index 92dd75af..57b6f168 100644
--- a/src/ieee754/fpmul/specialcases.py
+++ b/src/ieee754/fpmul/specialcases.py
@@ -1,6 +1,6 @@
 # IEEE Floating Point Multiplier 
 
-from nmigen import Module, Signal, Cat, Const
+from nmigen import Module, Signal, Cat, Const, Elaboratable
 from nmigen.cli import main, verilog
 from math import log
 
@@ -12,7 +12,7 @@ from ieee754.fpcommon.getop import FPADDBaseData
 from ieee754.fpcommon.denorm import (FPSCData, FPAddDeNormMod)
 
 
-class FPMulSpecialCasesMod:
+class FPMulSpecialCasesMod(Elaboratable):
     """ special cases: NaNs, infs, zeros, denormalised
         see "Special Operations"
         https://steve.hollasch.net/cgindex/coding/ieeefloat.html
@@ -28,7 +28,7 @@ class FPMulSpecialCasesMod:
         return FPADDBaseData(self.width, self.id_wid)
 
     def ospec(self):
-        return FPSCData(self.width, self.id_wid)
+        return FPSCData(self.width, self.id_wid, False)
 
     def setup(self, m, i):
         """ links module to inputs and outputs
@@ -45,8 +45,8 @@ class FPMulSpecialCasesMod:
         m.submodules.sc_out_z = self.o.z
 
         # decode: XXX really should move to separate stage
-        a1 = FPNumDecode(None, self.width)
-        b1 = FPNumDecode(None, self.width)
+        a1 = FPNumDecode(None, self.width, False)
+        b1 = FPNumDecode(None, self.width, False)
         m.submodules.sc_decode_a = a1
         m.submodules.sc_decode_b = b1
         m.d.comb += [a1.v.eq(self.i.a),
@@ -55,18 +55,6 @@ class FPMulSpecialCasesMod:
                      self.o.b.eq(b1)
                     ]
 
-        s_nomatch = Signal(reset_less=True)
-        m.d.comb += s_nomatch.eq(a1.s != b1.s)
-
-        m_match = Signal(reset_less=True)
-        m.d.comb += m_match.eq(a1.m == b1.m)
-
-        e_match = Signal(reset_less=True)
-        m.d.comb += e_match.eq(a1.e == b1.e)
-
-        aeqmb = Signal(reset_less=True)
-        m.d.comb += aeqmb.eq(s_nomatch & m_match & e_match)
-
         obz = Signal(reset_less=True)
         m.d.comb += obz.eq(a1.is_zero & b1.is_zero)
 
@@ -120,7 +108,7 @@ class FPMulSpecialCases(FPState):
 
     def __init__(self, width, id_wid):
         FPState.__init__(self, "special_cases")
-        self.mod = FPAddSpecialCasesMod(width)
+        self.mod = FPMulSpecialCasesMod(width)
         self.out_z = self.mod.ospec()
         self.out_do_z = Signal(reset_less=True)
 
@@ -154,13 +142,13 @@ class FPMulSpecialCasesDeNorm(FPState, SimpleHandshake):
         return FPADDBaseData(self.width, self.id_wid) # SpecialCases ispec
 
     def ospec(self):
-        return FPSCData(self.width, self.id_wid) # DeNorm ospec
+        return FPSCData(self.width, self.id_wid, False) # DeNorm ospec
 
     def setup(self, m, i):
         """ links module to inputs and outputs
         """
         smod = FPMulSpecialCasesMod(self.width, self.id_wid)
-        dmod = FPAddDeNormMod(self.width, self.id_wid)
+        dmod = FPAddDeNormMod(self.width, self.id_wid, False)
 
         chain = StageChain([smod, dmod])
         chain.setup(m, i)
diff --git a/src/ieee754/fpmul/test/test_mul.py b/src/ieee754/fpmul/test/test_mul.py
index f58a9f66..72ec4a0a 100644
--- a/src/ieee754/fpmul/test/test_mul.py
+++ b/src/ieee754/fpmul/test/test_mul.py
@@ -22,9 +22,10 @@ def tbench(dut, maxcount, num_loops):
     count = 0
 
     #regression tests
-    stimulus_a = [0xba57711a, 0xbf9b1e94, 0x34082401, 0x5e8ef81,
+
+    stimulus_a = [0xa4504d7, 0xba57711a, 0xbf9b1e94, 0x34082401, 0x5e8ef81,
                   0x5c75da81, 0x2b017]
-    stimulus_b = [0xee1818c5, 0xc038ed3a, 0xb328cd45, 0x114f3db,
+    stimulus_b = [0xb4658540, 0xee1818c5, 0xc038ed3a, 0xb328cd45, 0x114f3db,
                   0x2f642a39, 0xff3807ab]
     yield from run_fpunit(dut, stimulus_a, stimulus_b, mul, get_case)
     count += len(stimulus_a)
-- 
2.30.2