From 5d259a597708be1910636bc7a3bc688db9207b3d Mon Sep 17 00:00:00 2001
From: Luke Kenneth Casson Leighton <lkcl@lkcl.net>
Date: Fri, 26 Jul 2019 11:56:27 +0100
Subject: [PATCH] add first version test fp to int convert

---
 src/ieee754/fcvt/pipeline.py                  |  81 +++++++++---
 src/ieee754/fcvt/test/test_fcvt_f2int_pipe.py | 120 ++++++++++++++++++
 src/ieee754/fpcommon/test/fpmux.py            |  11 +-
 3 files changed, 195 insertions(+), 17 deletions(-)
 create mode 100644 src/ieee754/fcvt/test/test_fcvt_f2int_pipe.py

diff --git a/src/ieee754/fcvt/pipeline.py b/src/ieee754/fcvt/pipeline.py
index 7a8e7679..04855b1f 100644
--- a/src/ieee754/fcvt/pipeline.py
+++ b/src/ieee754/fcvt/pipeline.py
@@ -5,18 +5,19 @@
 import sys
 import functools
 
-from nmigen import Module, Signal, Cat, Const, Elaboratable
+from nmigen import Module, Signal, Cat, Const, Mux, Elaboratable
 from nmigen.cli import main, verilog
 
 from nmutil.singlepipe import ControlBase
 from nmutil.concurrentunit import ReservationStations, num_bits
 
+from ieee754.fpcommon.fpbase import Overflow
 from ieee754.fpcommon.getop import FPADDBaseData
 from ieee754.fpcommon.pack import FPPackData
 from ieee754.fpcommon.normtopack import FPNormToPack
 from ieee754.fpcommon.postcalc import FPAddStage1Data
 from ieee754.fpcommon.msbhigh import FPMSBHigh
-from ieee754.fpcommon.fpbase import MultiShiftRMerge
+from ieee754.fpcommon.exphigh import FPEXPHigh
 
 
 from nmigen import Module, Signal, Elaboratable
@@ -83,21 +84,21 @@ class FPCVTFloatToIntMod(Elaboratable):
         m.submodules.sc_decode_a = a1 = FPNumDecode(None, a1)
         m.d.comb += a1.v.eq(self.i.a)
         z1 = self.o.z
-        mz = len(self.o.z)
+        mz = len(z1)
         print("z1", mz)
 
         me = a1.rmw
         ms = mz - me
         print("ms-me", ms, me)
 
-        espec = (len(a1.e_width), True)
+        espec = (a1.e_width, True)
         ediff_intwid = Signal(espec, reset_less=True)
 
         # conversion can mostly be done manually...
-        m.d.comb += self.o.z.s.eq(a1.s)
-        m.d.comb += self.o.z.e.eq(a1.e)
-        m.d.comb += self.o.z.m[ms:].eq(a1.m)
-        m.d.comb += self.o.z.create(a1.s, a1.e, self.o.z.m) # ... here
+        #m.d.comb += self.o.z.s.eq(a1.s)
+        #m.d.comb += self.o.z.e.eq(a1.e)
+        #m.d.comb += self.o.z.m[ms:].eq(a1.m)
+        #m.d.comb += self.o.z.create(a1.s, a1.e, self.o.z.m) # ... here
 
         signed = Signal(reset_less=True)
         m.d.comb += signed.eq(self.i.ctx.op[0])
@@ -120,15 +121,30 @@ class FPCVTFloatToIntMod(Elaboratable):
             with m.Else(): # positive FP, so positive overrun (max INT)
                 m.d.comb += self.o.z.eq((1<<(mz)-1))
 
-        # ok exp should be in range: shift it...
+        # ok exp should be in range: shift and round it
         with m.Else():
-            mantissa = Signal(z1, reset_less=True)
-            l = [0] * ms + [1] + a1.m
-            m.d.comb += mantissa.eq(Cat(*l) >> a1.e)
+            mantissa = Signal(mz, reset_less=True)
+            l = [0] * ms + [1] + [a1.m]
+            m.d.comb += mantissa.eq(Cat(*l))
             m.d.comb += self.o.z.eq(mantissa)
 
+            # shift
+            msr = FPEXPHigh(mz+3, espec[0])
+            m.submodules.norm_exp = msr
+            m.d.comb += [msr.m_in[3:].eq(mantissa),
+                         msr.e_in.eq(a1.e),
+                         msr.ediff.eq(Mux(signed, mz-1, mz))
+                        ]
+
+            of = Overflow()
+            m.d.comb += of.guard.eq(msr.m_out[2])
+            m.d.comb += of.round_bit.eq(msr.m_out[1])
+            m.d.comb += of.sticky.eq(msr.m_out[0])
+            m.d.comb += of.m0.eq(msr.m_out[3])
+            m.d.comb += self.o.z.eq(msr.m_out[3:])
+
         # copy the context (muxid, operator)
-        m.d.comb += self.o.oz.eq(self.o.z.v)
+        #m.d.comb += self.o.oz.eq(self.o.z.v)
         m.d.comb += self.o.ctx.eq(self.i.ctx)
 
         return m
@@ -466,6 +482,23 @@ class FPCVTConvertDeNorm(FPState, SimpleHandshake):
         self.out = self.ospec(None)
 
 
+class FPCVTFtoIntBasePipe(ControlBase):
+    def __init__(self, modkls, e_extra, in_pspec, out_pspec):
+        ControlBase.__init__(self)
+        self.pipe1 = FPCVTConvertDeNorm(modkls, in_pspec, out_pspec)
+        #self.pipe2 = FPNormToPack(out_pspec, e_extra=e_extra)
+
+        #self._eqs = self.connect([self.pipe1, self.pipe2])
+        self._eqs = self.connect([self.pipe1, ])
+
+    def elaborate(self, platform):
+        m = ControlBase.elaborate(self, platform)
+        m.submodules.down = self.pipe1
+        #m.submodules.normpack = self.pipe2
+        m.d.comb += self._eqs
+        return m
+
+
 class FPCVTBasePipe(ControlBase):
     def __init__(self, modkls, e_extra, in_pspec, out_pspec):
         ControlBase.__init__(self)
@@ -493,7 +526,7 @@ class FPCVTMuxInOutBase(ReservationStations):
     """
 
     def __init__(self, modkls, e_extra, in_width, out_width,
-                       num_rows, op_wid=0):
+                       num_rows, op_wid=0, pkls=FPCVTBasePipe):
         self.op_wid = op_wid
         self.id_wid = num_bits(in_width)
         self.out_id_wid = num_bits(out_width)
@@ -501,7 +534,7 @@ class FPCVTMuxInOutBase(ReservationStations):
         self.in_pspec = PipelineSpec(in_width, self.id_wid, self.op_wid)
         self.out_pspec = PipelineSpec(out_width, self.out_id_wid, op_wid)
 
-        self.alu = FPCVTBasePipe(modkls, e_extra, self.in_pspec, self.out_pspec)
+        self.alu = pkls(modkls, e_extra, self.in_pspec, self.out_pspec)
         ReservationStations.__init__(self, num_rows)
 
     def i_specfn(self):
@@ -529,3 +562,21 @@ muxfactoryinput = [("FPCVTDownMuxInOut", FPCVTDownConvertMod, True, ),
 for (name, kls, e_extra) in muxfactoryinput:
     fn = functools.partial(getkls, kls, e_extra)
     setattr(sys.modules[__name__], name, fn)
+
+
+class FPCVTF2IntMuxInOut(FPCVTMuxInOutBase):
+    """ Reservation-Station version of FPCVT pipeline.
+
+        * fan-in on inputs (an array of FPADDBaseData: a,b,mid)
+        * 2-stage multiplier pipeline
+        * fan-out on outputs (an array of FPPackData: z,mid)
+
+        Fan-in and Fan-out are combinatorial.
+    """
+
+    def __init__(self, in_width, out_width, num_rows, op_wid=0):
+        FPCVTMuxInOutBase.__init__(self, FPCVTFloatToIntMod, False,
+                                         in_width, out_width,
+                                         num_rows, op_wid,
+                                         pkls=FPCVTFtoIntBasePipe)
+
diff --git a/src/ieee754/fcvt/test/test_fcvt_f2int_pipe.py b/src/ieee754/fcvt/test/test_fcvt_f2int_pipe.py
new file mode 100644
index 00000000..592e5af0
--- /dev/null
+++ b/src/ieee754/fcvt/test/test_fcvt_f2int_pipe.py
@@ -0,0 +1,120 @@
+""" test of FPCVTMuxInOut
+"""
+
+from ieee754.fcvt.pipeline import (FPCVTF2IntMuxInOut,)
+from ieee754.fpcommon.test.fpmux import runfp
+
+import sfpy
+from sfpy import Float64, Float32, Float16
+
+def fcvt_f64_ui32(x):
+    return sfpy.float.f64_to_ui32(x)
+
+def fcvt_i16_f32(x):
+    print ("fcvt i16_f32", hex(x))
+    return sfpy.float.i32_to_f32(x) # XXX no i16_to_f32, it's ok though
+
+def fcvt_i32_f32(x):
+    print ("fcvt i32_f32", hex(x))
+    return sfpy.float.i32_to_f32(x)
+
+def fcvt_i32_f64(x):
+    print ("fcvt i32_f64", hex(x))
+    return sfpy.float.i32_to_f64(x)
+
+def fcvt_f32_ui32(x):
+    return sfpy.float.f32_to_ui32(x)
+
+def fcvt_64_to_32(x):
+    return sfpy.float.ui64_to_f32(x)
+
+def fcvt_f16_ui32(x):
+    return sfpy.float.f16_to_ui32(x)
+
+######################
+# signed int to fp
+######################
+
+def test_int_pipe_i16_f32():
+    # XXX softfloat-3 doesn't have i16_to_xxx so use ui32 instead.
+    # should be fine.
+    dut = FPCVTIntMuxInOut(16, 32, 4, op_wid=1)
+    runfp(dut, 16, "test_fcvt_int_pipe_i16_f32", to_int16, fcvt_i16_f32, True,
+          n_vals=100, opcode=0x1)
+
+def test_int_pipe_i32_f64():
+    dut = FPCVTIntMuxInOut(32, 64, 4, op_wid=1)
+    runfp(dut, 32, "test_fcvt_int_pipe_i32_f64", to_int32, fcvt_i32_f64, True,
+          n_vals=100, opcode=0x1)
+
+def test_int_pipe_i32_f32():
+    dut = FPCVTIntMuxInOut(32, 32, 4, op_wid=1)
+    runfp(dut, 32, "test_fcvt_int_pipe_i32_f32", to_int32, fcvt_i32_f32, True,
+          n_vals=100, opcode=0x1)
+
+######################
+# fp to unsigned int 
+######################
+
+def test_int_pipe_f16_ui16():
+    # XXX softfloat-3 doesn't have ui16_to_xxx so use ui32 instead.
+    # should be fine.
+    dut = FPCVTF2IntMuxInOut(16, 32, 4, op_wid=1)
+    runfp(dut, 16, "test_fcvt_f2int_pipe_f16_ui16", Float16, fcvt_f16_ui32,
+                True, n_vals=100)
+
+def test_int_pipe_ui16_f64():
+    dut = FPCVTIntMuxInOut(16, 64, 4, op_wid=1)
+    runfp(dut, 16, "test_fcvt_int_pipe_ui16_f64", to_uint16, fcvt_64, True,
+          n_vals=100)
+
+def test_int_pipe_f32_ui32():
+    dut = FPCVTF2IntMuxInOut(32, 32, 4, op_wid=1)
+    runfp(dut, 32, "test_fcvt_f2int_pipe_f32_ui32", Float32, fcvt_f32_ui32,
+                    True, n_vals=100)
+
+def test_int_pipe_ui32_f64():
+    dut = FPCVTIntMuxInOut(32, 64, 4, op_wid=1)
+    runfp(dut, 32, "test_fcvt_int_pipe_ui32_64", to_uint32, fcvt_64, True,
+          n_vals=100)
+
+def test_int_pipe_ui64_f32():
+    # ok, doing 33 bits here because it's pretty pointless (not entirely)
+    # to do random numbers statistically likely 99.999% of the time to be
+    # converted to Inf
+    dut = FPCVTIntMuxInOut(64, 32, 4, op_wid=1)
+    runfp(dut, 33, "test_fcvt_int_pipe_ui64_32", to_uint64, fcvt_64_to_32, True,
+          n_vals=100)
+
+def test_int_pipe_ui64_f16():
+    # ok, doing 17 bits here because it's pretty pointless (not entirely)
+    # to do random numbers statistically likely 99.999% of the time to be
+    # converted to Inf
+    dut = FPCVTIntMuxInOut(64, 16, 4, op_wid=1)
+    runfp(dut, 17, "test_fcvt_int_pipe_ui64_16", to_uint64, fcvt_16, True,
+          n_vals=100)
+
+def test_int_pipe_ui32_f16():
+    # ok, doing 17 bits here because it's pretty pointless (not entirely)
+    # to do random numbers statistically likely 99.999% of the time to be
+    # converted to Inf
+    dut = FPCVTIntMuxInOut(32, 16, 4, op_wid=1)
+    runfp(dut, 17, "test_fcvt_int_pipe_ui32_16", to_uint32, fcvt_16, True,
+          n_vals=100)
+
+if __name__ == '__main__':
+    for i in range(200):
+        test_int_pipe_f16_ui16()
+        test_int_pipe_f32_ui32()
+        continue
+        test_int_pipe_i32_f32()
+        test_int_pipe_i16_f32()
+        test_int_pipe_i32_f64()
+        continue
+        test_int_pipe_ui16_f32()
+        test_int_pipe_ui64_f32()
+        test_int_pipe_ui32_f16()
+        test_int_pipe_ui64_f16()
+        test_int_pipe_ui16_f64()
+        test_int_pipe_ui32_f64()
+
diff --git a/src/ieee754/fpcommon/test/fpmux.py b/src/ieee754/fpcommon/test/fpmux.py
index ef50f81d..cf638c96 100644
--- a/src/ieee754/fpcommon/test/fpmux.py
+++ b/src/ieee754/fpcommon/test/fpmux.py
@@ -38,7 +38,10 @@ class MuxInOut:
                     #print ("test", hex(op1), hex(op2))
                     res = self.fpop(self.fpkls(op1), self.fpkls(op2))
                     self.di[muxid][i] = (op1, op2)
-                self.do[muxid].append(res.bits)
+                if hasattr(res, "bits"):
+                    self.do[muxid].append(res.bits)
+                else:
+                    self.do[muxid].append(res) # for FP to INT
 
     def send(self, muxid):
         for i in range(self.tlen):
@@ -63,7 +66,11 @@ class MuxInOut:
             if self.single_op:
                 fop1 = self.fpkls(op1)
                 res = self.fpop(fop1)
-                print ("send", muxid, i, hex(op1), hex(res.bits),
+                if hasattr(res, "bits"):
+                    r = res.bits
+                else:
+                    r = res
+                print ("send", muxid, i, hex(op1), hex(r),
                                fop1, res)
             else:
                 fop1 = self.fpkls(op1)
-- 
2.30.2