From: Michael Nolan <mtnolan2640@gmail.com>
Date: Sun, 26 Jan 2020 20:08:36 +0000 (-0500)
Subject: Add FSGNJ Pipeline stage and overall FSGNJ pipeline
X-Git-Tag: ls180-24jan2020~340
X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=d1f38b108b1bef1d98c31326e5f80ba0a1b057a5;p=ieee754fpu.git

Add FSGNJ Pipeline stage and overall FSGNJ pipeline

My goal here was to create a very bare bones pipeline stage for the
purposes of implementing the FSGNJ functionality later. This code
should be able to at least be imported and the FSNJMuxInOut class
constructed without errors.
---

diff --git a/src/ieee754/fsgnj/fsgnj.py b/src/ieee754/fsgnj/fsgnj.py
new file mode 100644
index 00000000..fa21d39f
--- /dev/null
+++ b/src/ieee754/fsgnj/fsgnj.py
@@ -0,0 +1,136 @@
+# IEEE Floating Point Conversion
+# Copyright (C) 2019 Luke Kenneth Casson Leighton <lkcl@lkcl.net>
+
+from nmigen import Module, Signal, Cat, Mux
+from nmigen.cli import main, verilog
+
+from nmutil.pipemodbase import PipeModBase
+from ieee754.fpcommon.basedata import FPBaseData
+from ieee754.fpcommon.postcalc import FPPostCalcData
+from ieee754.fpcommon.msbhigh import FPMSBHigh
+
+from ieee754.fpcommon.fpbase import FPNumBaseRecord
+
+
+class FSGNJPipeMod(PipeModBase):
+    """ FP Sign injection - replaces operand A's sign bit with one generated from operand B
+
+        self.ctx.i.op & 0x3 == 0x0 : Copy sign bit from operand B
+        self.ctx.i.op & 0x3 == 0x1 : Copy inverted sign bit from operand B
+        self.ctx.i.op & 0x3 == 0x2 : Sign bit is A's sign XOR B's sign
+    """
+    def __init__(self, in_pspec):
+        self.in_pspec = in_pspec
+        super().__init__(in_pspec, "fsgnj")
+
+    def ispec(self):
+        return FPBaseData(self.in_pspec)
+
+    def ospec(self):
+        return FPBaseData(self.in_pspec)
+
+    def elaborate(self, platform):
+        m = Module()
+        comb = m.d.comb
+
+        #m.submodules.sc_out_z = self.o.z
+
+        # decode: XXX really should move to separate stage
+        z1 = self.o.z
+        a = self.i.a
+        print("z1", z1.width, z1.rmw, z1.e_width, z1.e_start, z1.e_end)
+
+        me = self.in_pspec.width
+        mz = z1.rmw
+        ms = mz - me
+        print("ms-me", ms, me, mz)
+
+        # 3 extra bits for guard/round/sticky
+        msb = FPMSBHigh(me+3, z1.e_width)
+        m.submodules.norm_msb = msb
+
+        # signed or unsigned, use operator context
+        signed = Signal(reset_less=True)
+        comb += signed.eq(self.i.ctx.op[0])
+
+        # mantissa (one less bit if signed), and sign
+        mantissa = Signal(me, reset_less=True)
+        sign = Signal(reset_less=True)
+
+        # detect signed/unsigned.  key case: -ve numbers need inversion
+        # to +ve because the FP sign says if it's -ve or not.
+        comb += sign.eq(Mux(signed, a[-1], 0)) # sign in top bit of a
+        comb += mantissa.eq(Mux(signed,
+                                Mux(sign, -a,  # invert input if sign -ve
+                                           a), # leave as-is
+                                a))            # unsigned, use full a
+
+        # set input from full INT
+        comb += msb.m_in.eq(Cat(0, 0, 0, mantissa)) # g/r/s + input
+        comb += msb.e_in.eq(me)                     # exp = int width
+
+        # to do with FP16... not yet resolved why
+        alternative = ms < 0
+
+        if alternative:
+            comb += z1.e.eq(msb.e_out-1)
+            mmsb = msb.m_out[-mz-1:]
+            if mz == 16:
+                # larger int to smaller FP (uint32/64 -> fp16 most likely)
+                comb += z1.m[ms-1:].eq(mmsb)
+            else: # 32? XXX weirdness...
+                comb += z1.m.eq(mmsb)
+        else:
+            # smaller int to larger FP
+            comb += z1.e.eq(msb.e_out)
+            comb += z1.m[ms:].eq(msb.m_out[3:])
+
+        # XXX there is some weirdness involving the sign looping back
+        # see graphviz output
+        # http://bugs.libre-riscv.org/show_bug.cgi?id=135
+        comb += z1.s.eq(sign)
+        comb += z1.create(sign, z1.e, z1.m) # ... here
+
+        # note: post-normalisation actually appears to be capable of
+        # detecting overflow to infinity (FPPackMod).  so it's ok to
+        # drop the bits into the mantissa (with a fixed exponent),
+        # do some rounding (which might result in exceeding the
+        # range of the target FP by re-increasing the exponent),
+        # and basically *not* have to do any kind of range-checking
+        # here: just set up guard/round/sticky, drop the INT into the
+        # mantissa, and away we go.  XXX TODO: see if FPNormaliseMod
+        # is even necessary.  it probably isn't
+
+        # initialise rounding (but only activate if needed)
+        if alternative:
+            # larger int to smaller FP (uint32/64 -> fp16 most likely)
+            comb += self.o.of.guard.eq(msb.m_out[-mz-2])
+            comb += self.o.of.round_bit.eq(msb.m_out[-mz-3])
+            comb += self.o.of.sticky.eq(msb.m_out[:-mz-3].bool())
+            comb += self.o.of.m0.eq(msb.m_out[-mz-1])
+        else:
+            # smaller int to larger FP
+            comb += self.o.of.guard.eq(msb.m_out[2])
+            comb += self.o.of.round_bit.eq(msb.m_out[1])
+            comb += self.o.of.sticky.eq(msb.m_out[:1].bool())
+            comb += self.o.of.m0.eq(msb.m_out[3])
+
+        a_nonzero = Signal(reset_less=True)
+        comb += a_nonzero.eq(~a.bool())
+
+        # prepare zero
+        z_zero = FPNumBaseRecord(z1.width, False, name="z_zero")
+        comb += z_zero.zero(0)
+
+        # special cases?
+        comb += self.o.out_do_z.eq(a_nonzero)
+
+        # detect zero
+        comb += self.o.oz.eq(Mux(a_nonzero, z1.v, z_zero.v))
+
+        # copy the context (muxid, operator)
+        comb += self.o.ctx.eq(self.i.ctx)
+
+        return m
+
+
diff --git a/src/ieee754/fsgnj/pipeline.py b/src/ieee754/fsgnj/pipeline.py
new file mode 100644
index 00000000..a6e717e8
--- /dev/null
+++ b/src/ieee754/fsgnj/pipeline.py
@@ -0,0 +1,73 @@
+"""IEEE754 Floating Point Conversion
+
+Copyright (C) 2019 Luke Kenneth Casson Leighton <lkcl@lkcl.net>
+
+"""
+
+import sys
+import functools
+
+from nmutil.singlepipe import ControlBase
+from nmutil.concurrentunit import ReservationStations, num_bits
+
+from ieee754.fpcommon.normtopack import FPNormToPack
+from ieee754.pipeline import PipelineSpec, DynamicPipe
+
+from ieee754.fsgnj.fsgnj import FSGNJPipeMod
+
+
+# not used, yet
+# from nmigen import Signal
+class SignedOp:
+    def __init__(self):
+        self.signed = Signal(reset_less=True)
+
+    def eq(self, i):
+        return [self.signed.eq(i)]
+
+
+class FSGNJStage(DynamicPipe):
+    """ FPConversion and De-norm
+    """
+
+    def __init__(self, in_pspec):
+        sc = FSGNJPipeMod(in_pspec)
+        in_pspec.stage = sc
+        super().__init__(in_pspec)
+
+
+class FSGNJBasePipe(ControlBase):
+    def __init__(self, pspec):
+        ControlBase.__init__(self)
+        self.pipe1 = FSGNJStage(pspec)
+        self._eqs = self.connect([self.pipe1, ])
+
+    def elaborate(self, platform):
+        m = ControlBase.elaborate(self, platform)
+        m.submodules.fsgnj = self.pipe1
+        m.d.comb += self._eqs
+        return m
+
+
+class FSGNJMuxInOut(ReservationStations):
+    """ Reservation-Station version of FPCVT pipeline.
+
+        * fan-in on inputs (an array of FPBaseData: a,b,mid)
+        * converter pipeline (alu)
+        * fan-out on outputs (an array of FPPackData: z,mid)
+
+        Fan-in and Fan-out are combinatorial.
+    """
+
+    def __init__(self, in_width, num_rows, op_wid=2):
+        self.op_wid = op_wid
+        self.id_wid = num_bits(num_rows)
+
+        self.in_pspec = PipelineSpec(in_width, self.id_wid, self.op_wid)
+
+        self.alu = FSGNJBasePipe(self.in_pspec)
+        ReservationStations.__init__(self, num_rows)
+
+
+
+