From 6e672a1a16c00f8d870b50b3de9686bc935db44f Mon Sep 17 00:00:00 2001
From: Luke Kenneth Casson Leighton <lkcl@lkcl.net>
Date: Thu, 18 Jul 2019 14:30:01 +0100
Subject: [PATCH] add larger uint32 and uint64 to fp16 conversion

---
 src/ieee754/fcvt/pipeline.py                | 33 +++++++++++-----
 src/ieee754/fcvt/test/test_fcvt_int_pipe.py | 44 ++++++++++++++++-----
 src/ieee754/fpcommon/test/fpmux.py          | 12 ++++++
 3 files changed, 71 insertions(+), 18 deletions(-)

diff --git a/src/ieee754/fcvt/pipeline.py b/src/ieee754/fcvt/pipeline.py
index 28b5e0e2..941d1aeb 100644
--- a/src/ieee754/fcvt/pipeline.py
+++ b/src/ieee754/fcvt/pipeline.py
@@ -49,7 +49,7 @@ class FPCVTIntToFloatMod(Elaboratable):
     def setup(self, m, i):
         """ links module to inputs and outputs
         """
-        m.submodules.upconvert = self
+        m.submodules.intconvert = self
         m.d.comb += self.i.eq(i)
 
     def process(self, i):
@@ -68,8 +68,9 @@ class FPCVTIntToFloatMod(Elaboratable):
         print("z1", z1.width, z1.rmw, z1.e_width, z1.e_start, z1.e_end)
 
         me = self.in_pspec.width
-        ms = self.o.z.rmw - me
-        print("ms-me", ms, me, self.o.z.rmw)
+        mz = self.o.z.rmw
+        ms = mz - me
+        print("ms-me", ms, me, mz)
 
         # 3 extra bits for guard/round/sticky
         msb = FPMSBHigh(me+3, z1.e_width)
@@ -82,15 +83,29 @@ class FPCVTIntToFloatMod(Elaboratable):
         # conversion can mostly be done manually...
         zo = self.o.z
         m.d.comb += zo.s.eq(0)  # unsigned for now
-        m.d.comb += zo.e.eq(msb.e_out)
-        m.d.comb += zo.m[ms:].eq(msb.m_out[3:])
+        if ms < 0:
+            # larger int to smaller FP (uint32/64 -> fp16 most likely)
+            m.d.comb += zo.e.eq(msb.e_out-1)
+            m.d.comb += zo.m[ms-1:].eq(msb.m_out[-mz-1:])
+        else:
+            # smaller int to larger FP
+            m.d.comb += zo.e.eq(msb.e_out)
+            m.d.comb += zo.m[ms:].eq(msb.m_out[3:])
         m.d.comb += zo.create(zo.s, zo.e, zo.m) # ... here
 
         # initialise rounding (but only activate if needed)
-        m.d.comb += self.o.of.guard.eq(msb.m_out[2])
-        m.d.comb += self.o.of.round_bit.eq(msb.m_out[1])
-        m.d.comb += self.o.of.sticky.eq(msb.m_out[1])
-        m.d.comb += self.o.of.m0.eq(msb.m_out[3])
+        if ms < 0:
+            # larger int to smaller FP (uint32/64 -> fp16 most likely)
+            m.d.comb += self.o.of.guard.eq(msb.m_out[-mz-2])
+            m.d.comb += self.o.of.round_bit.eq(msb.m_out[-mz-3])
+            m.d.comb += self.o.of.sticky.eq(msb.m_out[:-mz-3].bool())
+            m.d.comb += self.o.of.m0.eq(msb.m_out[-mz-1])
+        else:
+            # smaller int to larger FP
+            m.d.comb += self.o.of.guard.eq(msb.m_out[2])
+            m.d.comb += self.o.of.round_bit.eq(msb.m_out[1])
+            m.d.comb += self.o.of.sticky.eq(msb.m_out[:1].bool())
+            m.d.comb += self.o.of.m0.eq(msb.m_out[3])
 
         # special cases active by default
         m.d.comb += self.o.out_do_z.eq(1)
diff --git a/src/ieee754/fcvt/test/test_fcvt_int_pipe.py b/src/ieee754/fcvt/test/test_fcvt_int_pipe.py
index 1b407dcf..fc44458e 100644
--- a/src/ieee754/fcvt/test/test_fcvt_int_pipe.py
+++ b/src/ieee754/fcvt/test/test_fcvt_int_pipe.py
@@ -13,30 +13,56 @@ def to_uint16(x):
 def to_uint32(x):
     return x
 
+def to_uint64(x):
+    return x
+
 def fcvt_64(x):
     return sfpy.float.ui32_to_f64(x)
 
 def fcvt_32(x):
     return sfpy.float.ui32_to_f32(x)
 
-def test_int_pipe_fp16_32():
+def fcvt_16(x):
+    return sfpy.float.ui32_to_f16(x)
+
+def test_int_pipe_ui16_f32():
+    # XXX softfloat-3 doesn't have ui16_to_xxx so use ui32 instead.
+    # should be fine.
     dut = FPCVTIntMuxInOut(16, 32, 4)
-    runfp(dut, 16, "test_fcvt_int_pipe_fp16_32", to_uint16, fcvt_32, True,
+    runfp(dut, 16, "test_fcvt_int_pipe_ui16_f32", to_uint16, fcvt_32, True,
           n_vals=100)
 
-def test_int_pipe_fp16_64():
+def test_int_pipe_ui16_f64():
     dut = FPCVTIntMuxInOut(16, 64, 4)
-    runfp(dut, 16, "test_fcvt_int_pipe_fp16_64", to_uint16, fcvt_64, True,
+    runfp(dut, 16, "test_fcvt_int_pipe_ui16_f64", to_uint16, fcvt_64, True,
           n_vals=100)
 
-def test_int_pipe_fp32_64():
+def test_int_pipe_ui32_f64():
     dut = FPCVTIntMuxInOut(32, 64, 4)
-    runfp(dut, 32, "test_fcvt_int_pipe_fp32_64", to_uint32, fcvt_64, True,
+    runfp(dut, 32, "test_fcvt_int_pipe_ui32_64", to_uint32, fcvt_64, True,
+          n_vals=100)
+
+def test_int_pipe_ui64_f16():
+    # ok, doing 17 bits here because it's pretty pointless (not entirely)
+    # to do random numbers statistically likely 99.999% of the time to be
+    # converted to Inf
+    dut = FPCVTIntMuxInOut(64, 16, 4)
+    runfp(dut, 17, "test_fcvt_int_pipe_ui64_16", to_uint64, fcvt_16, True,
+          n_vals=100)
+
+def test_int_pipe_ui32_f16():
+    # ok, doing 17 bits here because it's pretty pointless (not entirely)
+    # to do random numbers statistically likely 99.999% of the time to be
+    # converted to Inf
+    dut = FPCVTIntMuxInOut(32, 16, 4)
+    runfp(dut, 17, "test_fcvt_int_pipe_ui32_16", to_uint32, fcvt_16, True,
           n_vals=100)
 
 if __name__ == '__main__':
     for i in range(200):
-        test_int_pipe_fp16_32()
-        test_int_pipe_fp16_64()
-        test_int_pipe_fp32_64()
+        test_int_pipe_ui32_f16()
+        test_int_pipe_ui64_f16()
+        test_int_pipe_ui16_f32()
+        test_int_pipe_ui16_f64()
+        test_int_pipe_ui32_f64()
 
diff --git a/src/ieee754/fpcommon/test/fpmux.py b/src/ieee754/fpcommon/test/fpmux.py
index f520a639..c15b09b5 100644
--- a/src/ieee754/fpcommon/test/fpmux.py
+++ b/src/ieee754/fpcommon/test/fpmux.py
@@ -146,6 +146,18 @@ def create_random(num_rows, width, single_op=False, n_vals=10):
                 #op1 = 0x3340f2a7
                 #op1 = 0x33D6BF95
                 #op1 = 0x9885020648d8c0e8
+                #op1 = 0xc26b
+                #op1 = 3
+
+                #op1 = 0x3a66
+                #op1 = 0x5299
+                #op1 = 0xe0eb
+                #op1 = 0x3954
+                #op1 = 0x4dea
+                #op1 = 0x65eb
+
+                #op1 = 0x1841
+
                 vals.append((op1,))
             else:
                 op1 = randint(0, (1<<width)-1)
-- 
2.30.2