From: Luke Kenneth Casson Leighton <lkcl@lkcl.net>
Date: Tue, 16 Jul 2019 16:30:35 +0000 (+0100)
Subject: adjust FPMSBHigh for use in FPNorm: make it possible to shift in the LSB
X-Git-Tag: ls180-24jan2020~821
X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=e7d748726058f570771daec18c74ac2cc8c5c3c6;p=ieee754fpu.git

adjust FPMSBHigh for use in FPNorm: make it possible to shift in the LSB
---

diff --git a/src/ieee754/fcvt/test/test_fcvt_pipe_32_16.py b/src/ieee754/fcvt/test/test_fcvt_pipe_32_16.py
index 3c58d4ac..844c744a 100644
--- a/src/ieee754/fcvt/test/test_fcvt_pipe_32_16.py
+++ b/src/ieee754/fcvt/test/test_fcvt_pipe_32_16.py
@@ -1,7 +1,7 @@
 """ test of FPCVTMuxInOut
 """
 
-from ieee754.fcvt.pipeline import (FPCVTMuxInOut,)
+from ieee754.fcvt.pipeline import (FPCVTDownMuxInOut,)
 from ieee754.fpcommon.test.case_gen import run_pipe_fp
 from ieee754.fpcommon.test import unit_test_single
 from ieee754.fcvt.test.fcvt_data_32_16 import regressions
@@ -12,7 +12,7 @@ def fcvt_16(x):
     return Float16(x)
 
 def test_pipe_fp32_16():
-    dut = FPCVTMuxInOut(32, 16, 4)
+    dut = FPCVTDownMuxInOut(32, 16, 4)
     run_pipe_fp(dut, 32, "fcvt", unit_test_single, Float32,
                 regressions, fcvt_16, 10, True)
 
diff --git a/src/ieee754/fpcommon/msbhigh.py b/src/ieee754/fpcommon/msbhigh.py
index 94e2fe70..3a29935a 100644
--- a/src/ieee754/fpcommon/msbhigh.py
+++ b/src/ieee754/fpcommon/msbhigh.py
@@ -1,7 +1,7 @@
 """ module for adjusting a mantissa and exponent so that the MSB is always 1
 """
 
-from nmigen import Module, Signal, Elaboratable
+from nmigen import Module, Signal, Mux, Elaboratable
 from nmigen.lib.coding import PriorityEncoder
 
 
@@ -15,14 +15,18 @@ class FPMSBHigh(Elaboratable):
 
         * exponent is signed
         * mantissa is unsigned.
+        * loprop: propagates the low bit (LSB) on the shift
+        * limclz: use this to limit the amount of shifting.
 
         examples:
         exp = -30, mantissa = 0b00011 - output: -33, 0b11000
         exp =   2, mantissa = 0b01111 - output:   1, 0b11110
     """
-    def __init__(self, m_width, e_width):
+    def __init__(self, m_width, e_width, limclz=False, loprop=False):
         self.m_width = m_width
         self.e_width = e_width
+        self.loprop = loprop
+        self.limclz = limclz and Signal((e_width, True), reset_less=True)
 
         self.m_in = Signal(m_width, reset_less=True)
         self.e_in = Signal((e_width, True), reset_less=True)
@@ -37,16 +41,36 @@ class FPMSBHigh(Elaboratable):
         m.submodules.pe = pe
 
         # *sigh* not entirely obvious: count leading zeros (clz)
-        # with a PriorityEncoder: to find from the MSB
-        # we reverse the order of the bits.
-        temp = Signal(mwid, reset_less=True)
+        # with a PriorityEncoder.  to find from the MSB
+        # we reverse the order of the bits.  it would be better if PE
+        # took a "reverse" argument.
+
         clz = Signal((len(self.e_out), True), reset_less=True)
+        temp = Signal(mwid, reset_less=True)
+        if self.loprop:
+            temp_r = Signal(mwid, reset_less=True)
+            with m.If(self.m_in[0]):
+                # propagate low bit: do an ASL basically, except
+                # i can't work out how to do it in nmigen sigh
+                m.d.comb += temp_r.eq((self.m_in[0] << clz) - 1)
+
+        # limclz sets a limit (set by the exponent) on how far M can be shifted
+        # this can be used to ensure that near-zero numbers don't then have
+        # to be shifted *back* (e < -126 in the case of FP32 for example)
+        if self.limclz is not False:
+            limclz = Mux(self.limclz > pe.o, pe.o, self.limclz)
+        else:
+            limclz = pe.o
+
         m.d.comb += [
-            pe.i.eq(self.m_in[::-1]),       # inverted
-            clz.eq(pe.o),                 # count zeros from MSB down
+            pe.i.eq(self.m_in[::-1]),     # inverted
+            clz.eq(limclz),          # count zeros from MSB down
             temp.eq((self.m_in << clz)),  # shift mantissa UP
             self.e_out.eq(self.e_in - clz), # DECREASE exponent
-            self.m_out.eq(temp),
         ]
+        if self.loprop:
+            m.d.comb += self.m_out.eq(temp | temp_r)
+        else:
+            m.d.comb += self.m_out.eq(temp),
 
         return m
diff --git a/src/ieee754/fpcommon/postnormalise.py b/src/ieee754/fpcommon/postnormalise.py
index 2fb36796..5bfb447c 100644
--- a/src/ieee754/fpcommon/postnormalise.py
+++ b/src/ieee754/fpcommon/postnormalise.py
@@ -3,7 +3,6 @@
 # 2013-12-12
 
 from nmigen import Module, Signal, Cat, Mux, Elaboratable
-from nmigen.lib.coding import PriorityEncoder
 from nmigen.cli import main, verilog
 from math import log
 
@@ -12,6 +11,7 @@ from ieee754.fpcommon.fpbase import (Overflow, OverflowMod,
 from ieee754.fpcommon.fpbase import MultiShiftRMerge
 from ieee754.fpcommon.fpbase import FPState
 from ieee754.fpcommon.getop import FPPipeContext
+from ieee754.fpcommon.msbhigh import FPMSBHigh
 from .postcalc import FPAddStage1Data
 
 
@@ -58,10 +58,6 @@ class FPNorm1ModSingle(Elaboratable):
     def elaborate(self, platform):
         m = Module()
 
-        mwid = self.o.z.m_width+2
-        pe = PriorityEncoder(mwid)
-        m.submodules.norm_pe = pe
-
         of = OverflowMod("norm1of_")
 
         #m.submodules.norm1_out_z = self.o.z
@@ -78,10 +74,15 @@ class FPNorm1ModSingle(Elaboratable):
         #m.submodules.norm1_insel_overflow = iof = OverflowMod("iof")
 
         espec = (len(insel_z.e), True)
+        mwid = self.o.z.m_width+2
+
         ediff_n126 = Signal(espec, reset_less=True)
         msr = MultiShiftRMerge(mwid+2, espec)
         m.submodules.multishift_r = msr
 
+        msb = FPMSBHigh(mwid, espec[0], True)
+        m.submodules.norm_msb = msb
+
         m.d.comb += i.eq(self.i)
         # initialise out from in (overridden below)
         m.d.comb += self.o.z.eq(insel_z)
@@ -94,34 +95,21 @@ class FPNorm1ModSingle(Elaboratable):
         # decrease exponent
         with m.If(~self.i.out_do_z):
             with m.If(decrease):
-                # *sigh* not entirely obvious: count leading zeros (clz)
-                # with a PriorityEncoder: to find from the MSB
-                # we reverse the order of the bits.
-                temp_m = Signal(mwid+1, reset_less=True)
-                temp_r = Signal(mwid+2, reset_less=True) # mask
-                temp_s = Signal(mwid+2, reset_less=True)
-                clz = Signal((len(insel_z.e), True), reset_less=True)
                 # make sure that the amount to decrease by does NOT
                 # go below the minimum non-INF/NaN exponent
-                limclz = Mux(insel_z.exp_sub_n126 > pe.o, pe.o,
-                             insel_z.exp_sub_n126)
-                with m.If(temp_m[0]):
-                    # propagate low bit: do an ASL basically, except
-                    # i can't work out how to do it in nmigen sigh
-                    m.d.comb += temp_r.eq((temp_m[0] << clz) -1)
+                temp_m = Signal(mwid+1, reset_less=True)
+                m.d.comb += msb.limclz.eq(insel_z.exp_sub_n126)
                 m.d.comb += [
                     # cat round and guard bits back into the mantissa
-                    temp_m.eq(Cat(i.of.sticky, i.of.round_bit, i.of.guard,
+                    msb.m_in.eq(Cat(i.of.sticky, i.of.round_bit, i.of.guard,
                                   insel_z.m)),
-                    pe.i.eq(temp_m[::-1]),          # inverted
-                    clz.eq(limclz),                 # count zeros from MSB down
-                    temp_s.eq((temp_m << clz) | temp_r), # shift mantissa UP
-                    self.o.z.e.eq(insel_z.e - clz),  # DECREASE exponent
-                    self.o.z.m.eq(temp_s[3:]),    # exclude bits 0&1
-                    of.m0.eq(temp_s[3]),          # copy of mantissa[0]
+                    msb.e_in.eq(insel_z.e),
+                    self.o.z.e.eq(msb.e_out),
+                    self.o.z.m.eq(msb.m_out[3:]),    # exclude bits 0&1
+                    of.m0.eq(msb.m_out[3]),          # copy of mantissa[0]
                     # overflow in bits 0..1: got shifted too (leave sticky)
-                    of.guard.eq(temp_s[2]),       # guard
-                    of.round_bit.eq(temp_s[1]),   # round
+                    of.guard.eq(msb.m_out[2]),       # guard
+                    of.round_bit.eq(msb.m_out[1]),   # round
                 ]
             # increase exponent
             with m.Elif(increase):