From: Luke Kenneth Casson Leighton Date: Tue, 16 Jul 2019 16:30:35 +0000 (+0100) Subject: adjust FPMSBHigh for use in FPNorm: make it possible to shift in the LSB X-Git-Tag: ls180-24jan2020~821 X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=e7d748726058f570771daec18c74ac2cc8c5c3c6;p=ieee754fpu.git adjust FPMSBHigh for use in FPNorm: make it possible to shift in the LSB --- diff --git a/src/ieee754/fcvt/test/test_fcvt_pipe_32_16.py b/src/ieee754/fcvt/test/test_fcvt_pipe_32_16.py index 3c58d4ac..844c744a 100644 --- a/src/ieee754/fcvt/test/test_fcvt_pipe_32_16.py +++ b/src/ieee754/fcvt/test/test_fcvt_pipe_32_16.py @@ -1,7 +1,7 @@ """ test of FPCVTMuxInOut """ -from ieee754.fcvt.pipeline import (FPCVTMuxInOut,) +from ieee754.fcvt.pipeline import (FPCVTDownMuxInOut,) from ieee754.fpcommon.test.case_gen import run_pipe_fp from ieee754.fpcommon.test import unit_test_single from ieee754.fcvt.test.fcvt_data_32_16 import regressions @@ -12,7 +12,7 @@ def fcvt_16(x): return Float16(x) def test_pipe_fp32_16(): - dut = FPCVTMuxInOut(32, 16, 4) + dut = FPCVTDownMuxInOut(32, 16, 4) run_pipe_fp(dut, 32, "fcvt", unit_test_single, Float32, regressions, fcvt_16, 10, True) diff --git a/src/ieee754/fpcommon/msbhigh.py b/src/ieee754/fpcommon/msbhigh.py index 94e2fe70..3a29935a 100644 --- a/src/ieee754/fpcommon/msbhigh.py +++ b/src/ieee754/fpcommon/msbhigh.py @@ -1,7 +1,7 @@ """ module for adjusting a mantissa and exponent so that the MSB is always 1 """ -from nmigen import Module, Signal, Elaboratable +from nmigen import Module, Signal, Mux, Elaboratable from nmigen.lib.coding import PriorityEncoder @@ -15,14 +15,18 @@ class FPMSBHigh(Elaboratable): * exponent is signed * mantissa is unsigned. + * loprop: propagates the low bit (LSB) on the shift + * limclz: use this to limit the amount of shifting. examples: exp = -30, mantissa = 0b00011 - output: -33, 0b11000 exp = 2, mantissa = 0b01111 - output: 1, 0b11110 """ - def __init__(self, m_width, e_width): + def __init__(self, m_width, e_width, limclz=False, loprop=False): self.m_width = m_width self.e_width = e_width + self.loprop = loprop + self.limclz = limclz and Signal((e_width, True), reset_less=True) self.m_in = Signal(m_width, reset_less=True) self.e_in = Signal((e_width, True), reset_less=True) @@ -37,16 +41,36 @@ class FPMSBHigh(Elaboratable): m.submodules.pe = pe # *sigh* not entirely obvious: count leading zeros (clz) - # with a PriorityEncoder: to find from the MSB - # we reverse the order of the bits. - temp = Signal(mwid, reset_less=True) + # with a PriorityEncoder. to find from the MSB + # we reverse the order of the bits. it would be better if PE + # took a "reverse" argument. + clz = Signal((len(self.e_out), True), reset_less=True) + temp = Signal(mwid, reset_less=True) + if self.loprop: + temp_r = Signal(mwid, reset_less=True) + with m.If(self.m_in[0]): + # propagate low bit: do an ASL basically, except + # i can't work out how to do it in nmigen sigh + m.d.comb += temp_r.eq((self.m_in[0] << clz) - 1) + + # limclz sets a limit (set by the exponent) on how far M can be shifted + # this can be used to ensure that near-zero numbers don't then have + # to be shifted *back* (e < -126 in the case of FP32 for example) + if self.limclz is not False: + limclz = Mux(self.limclz > pe.o, pe.o, self.limclz) + else: + limclz = pe.o + m.d.comb += [ - pe.i.eq(self.m_in[::-1]), # inverted - clz.eq(pe.o), # count zeros from MSB down + pe.i.eq(self.m_in[::-1]), # inverted + clz.eq(limclz), # count zeros from MSB down temp.eq((self.m_in << clz)), # shift mantissa UP self.e_out.eq(self.e_in - clz), # DECREASE exponent - self.m_out.eq(temp), ] + if self.loprop: + m.d.comb += self.m_out.eq(temp | temp_r) + else: + m.d.comb += self.m_out.eq(temp), return m diff --git a/src/ieee754/fpcommon/postnormalise.py b/src/ieee754/fpcommon/postnormalise.py index 2fb36796..5bfb447c 100644 --- a/src/ieee754/fpcommon/postnormalise.py +++ b/src/ieee754/fpcommon/postnormalise.py @@ -3,7 +3,6 @@ # 2013-12-12 from nmigen import Module, Signal, Cat, Mux, Elaboratable -from nmigen.lib.coding import PriorityEncoder from nmigen.cli import main, verilog from math import log @@ -12,6 +11,7 @@ from ieee754.fpcommon.fpbase import (Overflow, OverflowMod, from ieee754.fpcommon.fpbase import MultiShiftRMerge from ieee754.fpcommon.fpbase import FPState from ieee754.fpcommon.getop import FPPipeContext +from ieee754.fpcommon.msbhigh import FPMSBHigh from .postcalc import FPAddStage1Data @@ -58,10 +58,6 @@ class FPNorm1ModSingle(Elaboratable): def elaborate(self, platform): m = Module() - mwid = self.o.z.m_width+2 - pe = PriorityEncoder(mwid) - m.submodules.norm_pe = pe - of = OverflowMod("norm1of_") #m.submodules.norm1_out_z = self.o.z @@ -78,10 +74,15 @@ class FPNorm1ModSingle(Elaboratable): #m.submodules.norm1_insel_overflow = iof = OverflowMod("iof") espec = (len(insel_z.e), True) + mwid = self.o.z.m_width+2 + ediff_n126 = Signal(espec, reset_less=True) msr = MultiShiftRMerge(mwid+2, espec) m.submodules.multishift_r = msr + msb = FPMSBHigh(mwid, espec[0], True) + m.submodules.norm_msb = msb + m.d.comb += i.eq(self.i) # initialise out from in (overridden below) m.d.comb += self.o.z.eq(insel_z) @@ -94,34 +95,21 @@ class FPNorm1ModSingle(Elaboratable): # decrease exponent with m.If(~self.i.out_do_z): with m.If(decrease): - # *sigh* not entirely obvious: count leading zeros (clz) - # with a PriorityEncoder: to find from the MSB - # we reverse the order of the bits. - temp_m = Signal(mwid+1, reset_less=True) - temp_r = Signal(mwid+2, reset_less=True) # mask - temp_s = Signal(mwid+2, reset_less=True) - clz = Signal((len(insel_z.e), True), reset_less=True) # make sure that the amount to decrease by does NOT # go below the minimum non-INF/NaN exponent - limclz = Mux(insel_z.exp_sub_n126 > pe.o, pe.o, - insel_z.exp_sub_n126) - with m.If(temp_m[0]): - # propagate low bit: do an ASL basically, except - # i can't work out how to do it in nmigen sigh - m.d.comb += temp_r.eq((temp_m[0] << clz) -1) + temp_m = Signal(mwid+1, reset_less=True) + m.d.comb += msb.limclz.eq(insel_z.exp_sub_n126) m.d.comb += [ # cat round and guard bits back into the mantissa - temp_m.eq(Cat(i.of.sticky, i.of.round_bit, i.of.guard, + msb.m_in.eq(Cat(i.of.sticky, i.of.round_bit, i.of.guard, insel_z.m)), - pe.i.eq(temp_m[::-1]), # inverted - clz.eq(limclz), # count zeros from MSB down - temp_s.eq((temp_m << clz) | temp_r), # shift mantissa UP - self.o.z.e.eq(insel_z.e - clz), # DECREASE exponent - self.o.z.m.eq(temp_s[3:]), # exclude bits 0&1 - of.m0.eq(temp_s[3]), # copy of mantissa[0] + msb.e_in.eq(insel_z.e), + self.o.z.e.eq(msb.e_out), + self.o.z.m.eq(msb.m_out[3:]), # exclude bits 0&1 + of.m0.eq(msb.m_out[3]), # copy of mantissa[0] # overflow in bits 0..1: got shifted too (leave sticky) - of.guard.eq(temp_s[2]), # guard - of.round_bit.eq(temp_s[1]), # round + of.guard.eq(msb.m_out[2]), # guard + of.round_bit.eq(msb.m_out[1]), # round ] # increase exponent with m.Elif(increase):