From 65e9d4ede5860dfea323b709d1372fb23c75c3f9 Mon Sep 17 00:00:00 2001
From: Luke Kenneth Casson Leighton <lkcl@lkcl.net>
Date: Thu, 4 Jul 2019 09:34:03 +0100
Subject: [PATCH] add fcvt first version

---
 src/ieee754/fcvt/pipeline.py            | 197 ++++++++++++++++++++++++
 src/ieee754/fcvt/test/test_fcvt_pipe.py |  22 +++
 src/ieee754/fpcommon/test/fpmux.py      |  31 ++--
 3 files changed, 240 insertions(+), 10 deletions(-)
 create mode 100644 src/ieee754/fcvt/pipeline.py
 create mode 100644 src/ieee754/fcvt/test/test_fcvt_pipe.py

diff --git a/src/ieee754/fcvt/pipeline.py b/src/ieee754/fcvt/pipeline.py
new file mode 100644
index 00000000..35b92db9
--- /dev/null
+++ b/src/ieee754/fcvt/pipeline.py
@@ -0,0 +1,197 @@
+# IEEE Floating Point Adder (Single Precision)
+# Copyright (C) Jonathan P Dawson 2013
+# 2013-12-12
+
+from nmigen import Module
+from nmigen.cli import main, verilog
+
+from nmutil.singlepipe import ControlBase
+from nmutil.concurrentunit import ReservationStations, num_bits
+
+from ieee754.fpcommon.getop import FPADDBaseData
+from ieee754.fpcommon.denorm import FPSCData
+from ieee754.fpcommon.pack import FPPackData
+from ieee754.fpcommon.normtopack import FPNormToPack
+
+
+from nmigen import Module, Signal, Elaboratable
+from math import log
+
+from ieee754.fpcommon.fpbase import FPNumIn, FPNumOut, FPNumBaseRecord
+from ieee754.fpcommon.fpbase import FPState, FPNumBase
+from ieee754.fpcommon.getop import FPPipeContext
+
+from nmigen import Module, Signal, Cat, Const, Elaboratable
+
+from ieee754.fpcommon.fpbase import FPNumDecode, FPNumBaseRecord
+from nmutil.singlepipe import SimpleHandshake, StageChain
+
+from ieee754.fpcommon.fpbase import FPState, FPID
+from ieee754.fpcommon.getop import FPADDBaseData
+
+
+class FPCVTSpecialCasesMod(Elaboratable):
+    """ special cases: NaNs, infs, zeros, denormalised
+        see "Special Operations"
+        https://steve.hollasch.net/cgindex/coding/ieeefloat.html
+    """
+
+    def __init__(self, in_width, out_width, pspec):
+        self.in_width = in_width
+        self.out_width = out_width
+        self.pspec = pspec
+        self.i = self.ispec()
+        self.o = self.ospec()
+
+    def ispec(self):
+        return FPADDBaseData(self.in_width, self.pspec)
+
+    def ospec(self):
+        return FPAddStage1Data(self.in_width, self.pspec)
+
+    def setup(self, m, i):
+        """ links module to inputs and outputs
+        """
+        m.submodules.specialcases = self
+        m.d.comb += self.i.eq(i)
+
+    def process(self, i):
+        return self.o
+
+    def elaborate(self, platform):
+        m = Module()
+
+        #m.submodules.sc_out_z = self.o.z
+
+        # decode: XXX really should move to separate stage
+        a1 = FPNumBaseRecord(self.width, False)
+        m.submodules.sc_decode_a = a1 = FPNumDecode(None, a1)
+        m.d.comb += [a1.v.eq(self.i.a),
+                     self.o.a.eq(a1),
+                    ]
+
+        # intermediaries
+        exp_sub_n126 = Signal((a1.e_width, True), reset_less=True)
+        exp_gt127 = Signal(reset_less=True)
+        m.d.comb += exp_sub_n126.eq(a1.e - z1.fp.N126)
+        m.d.comb += exp_gt127.eq(a1.e > z1.fp.P127)
+
+        # if a zero, return zero (signed)
+        with m.If(a1.exp_n127):
+            m.d.comb += self.o.z.zero(a1.s)
+
+        # if a range within z min range (-126)
+        with m.Elif(exp_sub_n126 < 0):
+            m.d.comb += self.o.z.create(a1.s, a1.e, a1.m[-self.o.z.rmw:])
+            m.d.comb += self.o.of.guard.eq(a1.m[-self.o.z.rmw-1])
+            m.d.comb += self.o.of.round.eq(a1.m[-self.o.z.rmw-2])
+            m.d.comb += self.o.of.sticky.eq(a1.m[-self.o.z.rmw-2:] != 0)
+
+        # if a is inf return inf 
+        with m.Elif(a1.is_inf):
+            m.d.comb += self.o.z.inf(a1.s)
+
+        # if a is NaN return NaN
+        with m.Elif(a1.is_nan):
+            m.d.comb += self.o.z.nan(a1.s)
+
+        # if a mantissa greater than 127, return inf
+        with m.Elif(exp_gt127):
+            m.d.comb += self.o.z.inf(a1.s)
+
+        # ok after all that, anything else should fit fine (whew)
+        with m.Else():
+            m.d.comb += self.o.z.create(a1.s, a1.e, a1.m[-self.o.z.rmw:])
+
+        # copy the context (muxid, operator)
+        m.d.comb += self.o.ctx.eq(self.i.ctx)
+
+        return m
+
+
+class FPCVTSpecialCases(FPState):
+    """ special cases: NaNs, infs, zeros, denormalised
+    """
+
+    def __init__(self, width, id_wid):
+        FPState.__init__(self, "special_cases")
+        self.mod = FPCVTSpecialCasesMod(width)
+        self.out_z = self.mod.ospec()
+        self.out_do_z = Signal(reset_less=True)
+
+    def setup(self, m, i):
+        """ links module to inputs and outputs
+        """
+        self.mod.setup(m, i, self.out_do_z)
+        m.d.sync += self.out_z.v.eq(self.mod.out_z.v) # only take the output
+        m.d.sync += self.out_z.ctx.eq(self.mod.o.ctx)  # (and context)
+
+    def action(self, m):
+        self.idsync(m)
+        with m.If(self.out_do_z):
+            m.next = "put_z"
+        with m.Else():
+            m.next = "denormalise"
+
+
+class FPCVTSpecialCasesDeNorm(FPState, SimpleHandshake):
+    """ special cases: NaNs, infs, zeros, denormalised
+    """
+
+    def __init__(self, width, pspec):
+        FPState.__init__(self, "special_cases")
+        self.width = width
+        self.pspec = pspec
+        sc = FPCVTSpecialCasesMod(self.width, self.pspec)
+        SimpleHandshake.__init__(self, sc)
+        self.out = self.ospec()
+
+
+class FPCVTBasePipe(ControlBase):
+    def __init__(self, in_width, out_width, in_pspec, out_pspec):
+        ControlBase.__init__(self)
+        self.pipe1 = FPCVTSpecialCasesDeNorm(in_width, out_width, in_pspec)
+        self.pipe2 = FPNormToPack(out_width, out_pspec)
+
+        self._eqs = self.connect([self.pipe1, self.pipe2])
+
+    def elaborate(self, platform):
+        m = ControlBase.elaborate(self, platform)
+        m.submodules.scnorm = self.pipe1
+        m.submodules.normpack = self.pipe2
+        m.d.comb += self._eqs
+        return m
+
+
+class FPCVTMuxInOut(ReservationStations):
+    """ Reservation-Station version of FPCVT pipeline.
+
+        * fan-in on inputs (an array of FPADDBaseData: a,b,mid)
+        * 2-stage multiplier pipeline
+        * fan-out on outputs (an array of FPPackData: z,mid)
+
+        Fan-in and Fan-out are combinatorial.
+    """
+    def __init__(self, in_width, out_width, num_rows, op_wid=0):
+        self.in_width = in_width
+        self.out_width = out_width
+        self.op_wid = op_wid
+        self.id_wid = num_bits(in_width)
+        self.out_id_wid = num_bits(out_width)
+
+        self.in_pspec = {}
+        self.in_pspec['id_wid'] = self.id_wid
+        self.in_pspec['op_wid'] = self.op_wid
+
+        self.out_pspec = {}
+        self.out_pspec['id_wid'] = self.out_id_wid
+        self.out_pspec['op_wid'] = self.op_wid
+
+        self.alu = FPCVTBasePipe(width, self.in_pspec, self.out_pspec)
+        ReservationStations.__init__(self, num_rows)
+
+    def i_specfn(self):
+        return FPADDBaseData(self.in_width, self.in_pspec)
+
+    def o_specfn(self):
+        return FPPackData(self.out_width, self.out_pspec)
diff --git a/src/ieee754/fcvt/test/test_fcvt_pipe.py b/src/ieee754/fcvt/test/test_fcvt_pipe.py
new file mode 100644
index 00000000..667c2136
--- /dev/null
+++ b/src/ieee754/fcvt/test/test_fcvt_pipe.py
@@ -0,0 +1,22 @@
+""" test of FPMULMuxInOut
+"""
+
+from ieee754.fcvt.pipeline import (FPMULMuxInOut,)
+from ieee754.fpcommon.test.fpmux import runfp
+
+from sfpy import Float64, Float32, Float16
+
+def fcvt_32_16(x):
+    return Float16(x)
+
+def test_pipe_fp32_16():
+    dut = FPMULMuxInOut(32, 16, 4)
+    runfp(dut, 32, "test_fcvt_pipe_fp32_16", Float32, fcvt_32_16)
+
+def test_pipe_fp64():
+    dut = FPMULMuxInOut(64, 4)
+    runfp(dut, 64, "test_fcvt_pipe_fp64", Float64, mul)
+
+if __name__ == '__main__':
+    test_pipe_fp32()
+
diff --git a/src/ieee754/fpcommon/test/fpmux.py b/src/ieee754/fpcommon/test/fpmux.py
index 239c822d..f32cb65c 100644
--- a/src/ieee754/fpcommon/test/fpmux.py
+++ b/src/ieee754/fpcommon/test/fpmux.py
@@ -11,10 +11,11 @@ from nmigen.cli import verilog, rtlil
 
 
 class InputTest:
-    def __init__(self, dut, width, fpkls, fpop):
+    def __init__(self, dut, width, fpkls, fpop, single_op=False):
         self.dut = dut
         self.fpkls = fpkls
         self.fpop = fpop
+        self.single_op = single_op
         self.di = {}
         self.do = {}
         self.tlen = 10
@@ -35,8 +36,12 @@ class InputTest:
                 #op2 = 0xb4658540 # expect 0x8016147c
                 #op1 = 0x40900000
                 #op2 = 0x40200000
-                res = self.fpop(self.fpkls(op1), self.fpkls(op2))
-                self.di[muxid][i] = (op1, op2)
+                if self.single_op:
+                    res = self.fpop(self.fpkls(op1))
+                    self.di[muxid][i] = (op1, op2)
+                else:
+                    res = self.fpop(self.fpkls(op1), self.fpkls(op2))
+                    self.di[muxid][i] = (op1, op2)
                 self.do[muxid].append(res.bits)
 
     def send(self, muxid):
@@ -53,11 +58,17 @@ class InputTest:
                 yield
                 o_p_ready = yield rs.ready_o
 
-            fop1 = self.fpkls(op1)
-            fop2 = self.fpkls(op2)
-            res = self.fpop(fop1, fop2)
-            print ("send", muxid, i, hex(op1), hex(op2), hex(res.bits),
-                           fop1, fop2, res)
+            if self.single_op:
+                fop1 = self.fpkls(op1)
+                res = self.fpop(fop1)
+                print ("send", muxid, i, hex(op1), hex(res.bits),
+                               fop1, res)
+            else:
+                fop1 = self.fpkls(op1)
+                fop2 = self.fpkls(op2)
+                res = self.fpop(fop1, fop2)
+                print ("send", muxid, i, hex(op1), hex(op2), hex(res.bits),
+                               fop1, fop2, res)
 
             yield rs.valid_i.eq(0)
             # wait random period of time before queueing another value
@@ -114,12 +125,12 @@ class InputTest:
         print ("recv ended", muxid)
 
 
-def runfp(dut, width, name, fpkls, fpop):
+def runfp(dut, width, name, fpkls, fpop, single_op=False):
     vl = rtlil.convert(dut, ports=dut.ports())
     with open("%s.il" % name, "w") as f:
         f.write(vl)
 
-    test = InputTest(dut, width, fpkls, fpop)
+    test = InputTest(dut, width, fpkls, fpop, single_op)
     run_simulation(dut, [test.rcv(1), test.rcv(0),
                          test.rcv(3), test.rcv(2),
                          test.send(0), test.send(1),
-- 
2.30.2