adjust FPMSBHigh for use in FPNorm: make it possible to shift in the LSB

author Luke Kenneth Casson Leighton <lkcl@lkcl.net>

Tue, 16 Jul 2019 16:30:35 +0000 (17:30 +0100)

committer Luke Kenneth Casson Leighton <lkcl@lkcl.net>

Tue, 16 Jul 2019 16:30:35 +0000 (17:30 +0100)
author Luke Kenneth Casson Leighton <lkcl@lkcl.net>
Tue, 16 Jul 2019 16:30:35 +0000 (17:30 +0100)
committer Luke Kenneth Casson Leighton <lkcl@lkcl.net>
Tue, 16 Jul 2019 16:30:35 +0000 (17:30 +0100)
diff --git a/src/ieee754/fcvt/test/test_fcvt_pipe_32_16.py b/src/ieee754/fcvt/test/test_fcvt_pipe_32_16.py

index 3c58d4ac8a7e7fcc2dbd83bf48ab58b700bd61d8..844c744a8ff2b62cc393c8c7ef003fb45df320d1 100644 (file)
--- a/src/ieee754/fcvt/test/test_fcvt_pipe_32_16.py
+++ b/src/ieee754/fcvt/test/test_fcvt_pipe_32_16.py
@@ -1,7 +1,7 @@
  """ test of FPCVTMuxInOut
  """
  
-from ieee754.fcvt.pipeline import (FPCVTMuxInOut,)
+from ieee754.fcvt.pipeline import (FPCVTDownMuxInOut,)
  from ieee754.fpcommon.test.case_gen import run_pipe_fp
  from ieee754.fpcommon.test import unit_test_single
  from ieee754.fcvt.test.fcvt_data_32_16 import regressions
@@ -12,7 +12,7 @@ def fcvt_16(x):
      return Float16(x)
  
  def test_pipe_fp32_16():
-    dut = FPCVTMuxInOut(32, 16, 4)
+    dut = FPCVTDownMuxInOut(32, 16, 4)
      run_pipe_fp(dut, 32, "fcvt", unit_test_single, Float32,
                  regressions, fcvt_16, 10, True)
  
diff --git a/src/ieee754/fpcommon/msbhigh.py b/src/ieee754/fpcommon/msbhigh.py

index 94e2fe7041e60ceac91fa7e893b7c0f3d8d8867c..3a29935a725f8caf6262cb4a536cad7a712aa683 100644 (file)
--- a/src/ieee754/fpcommon/msbhigh.py
+++ b/src/ieee754/fpcommon/msbhigh.py
@@ -1,7 +1,7 @@
  """ module for adjusting a mantissa and exponent so that the MSB is always 1
  """
  
-from nmigen import Module, Signal, Elaboratable
+from nmigen import Module, Signal, Mux, Elaboratable
  from nmigen.lib.coding import PriorityEncoder
  
  
@@ -15,14 +15,18 @@ class FPMSBHigh(Elaboratable):
  
          * exponent is signed
          * mantissa is unsigned.
+        * loprop: propagates the low bit (LSB) on the shift
+        * limclz: use this to limit the amount of shifting.
  
          examples:
          exp = -30, mantissa = 0b00011 - output: -33, 0b11000
          exp =   2, mantissa = 0b01111 - output:   1, 0b11110
      """
-    def __init__(self, m_width, e_width):
+    def __init__(self, m_width, e_width, limclz=False, loprop=False):
          self.m_width = m_width
          self.e_width = e_width
+        self.loprop = loprop
+        self.limclz = limclz and Signal((e_width, True), reset_less=True)
  
          self.m_in = Signal(m_width, reset_less=True)
          self.e_in = Signal((e_width, True), reset_less=True)
@@ -37,16 +41,36 @@ class FPMSBHigh(Elaboratable):
          m.submodules.pe = pe
  
          # *sigh* not entirely obvious: count leading zeros (clz)
-        # with a PriorityEncoder: to find from the MSB
-        # we reverse the order of the bits.
-        temp = Signal(mwid, reset_less=True)
+        # with a PriorityEncoder.  to find from the MSB
+        # we reverse the order of the bits.  it would be better if PE
+        # took a "reverse" argument.
+
          clz = Signal((len(self.e_out), True), reset_less=True)
+        temp = Signal(mwid, reset_less=True)
+        if self.loprop:
+            temp_r = Signal(mwid, reset_less=True)
+            with m.If(self.m_in[0]):
+                # propagate low bit: do an ASL basically, except
+                # i can't work out how to do it in nmigen sigh
+                m.d.comb += temp_r.eq((self.m_in[0] << clz) - 1)
+
+        # limclz sets a limit (set by the exponent) on how far M can be shifted
+        # this can be used to ensure that near-zero numbers don't then have
+        # to be shifted *back* (e < -126 in the case of FP32 for example)
+        if self.limclz is not False:
+            limclz = Mux(self.limclz > pe.o, pe.o, self.limclz)
+        else:
+            limclz = pe.o
+
          m.d.comb += [
-            pe.i.eq(self.m_in[::-1]),       # inverted
-            clz.eq(pe.o),                 # count zeros from MSB down
+            pe.i.eq(self.m_in[::-1]),     # inverted
+            clz.eq(limclz),          # count zeros from MSB down
              temp.eq((self.m_in << clz)),  # shift mantissa UP
              self.e_out.eq(self.e_in - clz), # DECREASE exponent
-            self.m_out.eq(temp),
          ]
+        if self.loprop:
+            m.d.comb += self.m_out.eq(temp | temp_r)
+        else:
+            m.d.comb += self.m_out.eq(temp),
  
          return m
diff --git a/src/ieee754/fpcommon/postnormalise.py b/src/ieee754/fpcommon/postnormalise.py

index 2fb36796d05bd9bcad79ea701b7f6759532ec24f..5bfb447c958129c450e3968bb78d2f3d527442c6 100644 (file)
--- a/src/ieee754/fpcommon/postnormalise.py
+++ b/src/ieee754/fpcommon/postnormalise.py
@@ -3,7 +3,6 @@
  # 2013-12-12
  
  from nmigen import Module, Signal, Cat, Mux, Elaboratable
-from nmigen.lib.coding import PriorityEncoder
  from nmigen.cli import main, verilog
  from math import log
  
@@ -12,6 +11,7 @@ from ieee754.fpcommon.fpbase import (Overflow, OverflowMod,
  from ieee754.fpcommon.fpbase import MultiShiftRMerge
  from ieee754.fpcommon.fpbase import FPState
  from ieee754.fpcommon.getop import FPPipeContext
+from ieee754.fpcommon.msbhigh import FPMSBHigh
  from .postcalc import FPAddStage1Data
  
  
@@ -58,10 +58,6 @@ class FPNorm1ModSingle(Elaboratable):
      def elaborate(self, platform):
          m = Module()
  
-        mwid = self.o.z.m_width+2
-        pe = PriorityEncoder(mwid)
-        m.submodules.norm_pe = pe
-
          of = OverflowMod("norm1of_")
  
          #m.submodules.norm1_out_z = self.o.z
@@ -78,10 +74,15 @@ class FPNorm1ModSingle(Elaboratable):
          #m.submodules.norm1_insel_overflow = iof = OverflowMod("iof")
  
          espec = (len(insel_z.e), True)
+        mwid = self.o.z.m_width+2
+
          ediff_n126 = Signal(espec, reset_less=True)
          msr = MultiShiftRMerge(mwid+2, espec)
          m.submodules.multishift_r = msr
  
+        msb = FPMSBHigh(mwid, espec[0], True)
+        m.submodules.norm_msb = msb
+
          m.d.comb += i.eq(self.i)
          # initialise out from in (overridden below)
          m.d.comb += self.o.z.eq(insel_z)
@@ -94,34 +95,21 @@ class FPNorm1ModSingle(Elaboratable):
          # decrease exponent
          with m.If(~self.i.out_do_z):
              with m.If(decrease):
-                # *sigh* not entirely obvious: count leading zeros (clz)
-                # with a PriorityEncoder: to find from the MSB
-                # we reverse the order of the bits.
-                temp_m = Signal(mwid+1, reset_less=True)
-                temp_r = Signal(mwid+2, reset_less=True) # mask
-                temp_s = Signal(mwid+2, reset_less=True)
-                clz = Signal((len(insel_z.e), True), reset_less=True)
                  # make sure that the amount to decrease by does NOT
                  # go below the minimum non-INF/NaN exponent
-                limclz = Mux(insel_z.exp_sub_n126 > pe.o, pe.o,
-                             insel_z.exp_sub_n126)
-                with m.If(temp_m[0]):
-                    # propagate low bit: do an ASL basically, except
-                    # i can't work out how to do it in nmigen sigh
-                    m.d.comb += temp_r.eq((temp_m[0] << clz) -1)
+                temp_m = Signal(mwid+1, reset_less=True)
+                m.d.comb += msb.limclz.eq(insel_z.exp_sub_n126)
                  m.d.comb += [
                      # cat round and guard bits back into the mantissa
-                    temp_m.eq(Cat(i.of.sticky, i.of.round_bit, i.of.guard,
+                    msb.m_in.eq(Cat(i.of.sticky, i.of.round_bit, i.of.guard,
                                    insel_z.m)),
-                    pe.i.eq(temp_m[::-1]),          # inverted
-                    clz.eq(limclz),                 # count zeros from MSB down
-                    temp_s.eq((temp_m << clz) | temp_r), # shift mantissa UP
-                    self.o.z.e.eq(insel_z.e - clz),  # DECREASE exponent
-                    self.o.z.m.eq(temp_s[3:]),    # exclude bits 0&1
-                    of.m0.eq(temp_s[3]),          # copy of mantissa[0]
+                    msb.e_in.eq(insel_z.e),
+                    self.o.z.e.eq(msb.e_out),
+                    self.o.z.m.eq(msb.m_out[3:]),    # exclude bits 0&1
+                    of.m0.eq(msb.m_out[3]),          # copy of mantissa[0]
                      # overflow in bits 0..1: got shifted too (leave sticky)
-                    of.guard.eq(temp_s[2]),       # guard
-                    of.round_bit.eq(temp_s[1]),   # round
+                    of.guard.eq(msb.m_out[2]),       # guard
+                    of.round_bit.eq(msb.m_out[1]),   # round
                  ]
              # increase exponent
              with m.Elif(increase):
author	Luke Kenneth Casson Leighton <lkcl@lkcl.net>
	Tue, 16 Jul 2019 16:30:35 +0000 (17:30 +0100)
committer	Luke Kenneth Casson Leighton <lkcl@lkcl.net>
	Tue, 16 Jul 2019 16:30:35 +0000 (17:30 +0100)
src/ieee754/fcvt/test/test_fcvt_pipe_32_16.py		patch \| blob \| history
src/ieee754/fpcommon/msbhigh.py		patch \| blob \| history
src/ieee754/fpcommon/postnormalise.py		patch \| blob \| history