src/ieee754/fpcommon/fpbase.py

   1 """IEEE754 Floating Point Library
   2
   3 Copyright (C) 2019 Luke Kenneth Casson Leighton <lkcl@lkcl.net>
   4 Copyright (C) 2019,2021 Jake Lifshay
   5
   6 """
   7
   8
   9 from nmigen import Signal, Cat, Const, Mux, Module, Elaboratable, Array, Value
  10 from math import log
  11 from operator import or_
  12 from functools import reduce
  13
  14 from nmutil.singlepipe import PrevControl, NextControl
  15 from nmutil.pipeline import ObjectProxy
  16 import unittest
  17 import math
  18 import enum
  19
  20 try:
  21     from nmigen.hdl.smtlib2 import RoundingModeEnum
  22     _HAVE_SMTLIB2 = True
  23 except ImportError:
  24     _HAVE_SMTLIB2 = False
  25
  26 # value so FPRoundingMode.to_smtlib2 can detect when no default is supplied
  27 _raise_err = object()
  28
  29
  30 class FPRoundingMode(enum.Enum):
  31     # matches the FPSCR.RN field values, but includes some extra
  32     # values (>= 0b100) used in miscellaneous instructions.
  33
  34     # naming matches smtlib2 names, doc strings are the OpenPower ISA
  35     # specification's names (v3.1 section 7.3.2.6 --
  36     # matches values in section 4.3.6).
  37     RNE = 0b00
  38     """Round to Nearest Even
  39
  40     Rounds to the nearest representable floating-point number, ties are
  41     rounded to the number with the even mantissa. Treats +-Infinity as if
  42     it were a normalized floating-point number when deciding which number
  43     is closer when rounding. See IEEE754 spec. for details.
  44     """
  45
  46     ROUND_NEAREST_TIES_TO_EVEN = RNE
  47     DEFAULT = RNE
  48
  49     RTZ = 0b01
  50     """Round towards Zero
  51
  52     If the result is exactly representable as a floating-point number, return
  53     that, otherwise return the nearest representable floating-point value
  54     with magnitude smaller than the exact answer.
  55     """
  56
  57     ROUND_TOWARDS_ZERO = RTZ
  58
  59     RTP = 0b10
  60     """Round towards +Infinity
  61
  62     If the result is exactly representable as a floating-point number, return
  63     that, otherwise return the nearest representable floating-point value
  64     that is numerically greater than the exact answer. This can round up to
  65     +Infinity.
  66     """
  67
  68     ROUND_TOWARDS_POSITIVE = RTP
  69
  70     RTN = 0b11
  71     """Round towards -Infinity
  72
  73     If the result is exactly representable as a floating-point number, return
  74     that, otherwise return the nearest representable floating-point value
  75     that is numerically less than the exact answer. This can round down to
  76     -Infinity.
  77     """
  78
  79     ROUND_TOWARDS_NEGATIVE = RTN
  80
  81     RNA = 0b100
  82     """Round to Nearest Away
  83
  84     Rounds to the nearest representable floating-point number, ties are
  85     rounded to the number with the maximum magnitude. Treats +-Infinity as if
  86     it were a normalized floating-point number when deciding which number
  87     is closer when rounding. See IEEE754 spec. for details.
  88     """
  89
  90     ROUND_NEAREST_TIES_TO_AWAY = RNA
  91
  92     RTOP = 0b101
  93     """Round to Odd, unsigned zeros are Positive
  94
  95     Not in smtlib2.
  96
  97     If the result is exactly representable as a floating-point number, return
  98     that, otherwise return the nearest representable floating-point value
  99     that has an odd mantissa.
 100
 101     If the result is zero but with otherwise undetermined sign
 102     (e.g. `1.0 - 1.0`), the sign is positive.
 103
 104     This rounding mode is used for instructions with Round To Odd enabled,
 105     and `FPSCR.RN != RTN`.
 106
 107     This is useful to avoid double-rounding errors when doing arithmetic in a
 108     larger type (e.g. f128) but where the answer should be a smaller type
 109     (e.g. f80).
 110     """
 111
 112     ROUND_TO_ODD_UNSIGNED_ZEROS_ARE_POSITIVE = RTOP
 113
 114     RTON = 0b110
 115     """Round to Odd, unsigned zeros are Negative
 116
 117     Not in smtlib2.
 118
 119     If the result is exactly representable as a floating-point number, return
 120     that, otherwise return the nearest representable floating-point value
 121     that has an odd mantissa.
 122
 123     If the result is zero but with otherwise undetermined sign
 124     (e.g. `1.0 - 1.0`), the sign is negative.
 125
 126     This rounding mode is used for instructions with Round To Odd enabled,
 127     and `FPSCR.RN == RTN`.
 128
 129     This is useful to avoid double-rounding errors when doing arithmetic in a
 130     larger type (e.g. f128) but where the answer should be a smaller type
 131     (e.g. f80).
 132     """
 133
 134     ROUND_TO_ODD_UNSIGNED_ZEROS_ARE_NEGATIVE = RTON
 135
 136     @staticmethod
 137     def make_array(f):
 138         l = [None] * len(FPRoundingMode)
 139         for rm in FPRoundingMode:
 140             l[rm.value] = f(rm)
 141         return Array(l)
 142
 143     def overflow_rounds_to_inf(self, sign):
 144         """returns true if an overflow should round to `inf`,
 145         false if it should round to `max_normal`
 146         """
 147         not_sign = ~sign if isinstance(sign, Value) else not sign
 148         if self is FPRoundingMode.RNE:
 149             return True
 150         elif self is FPRoundingMode.RTZ:
 151             return False
 152         elif self is FPRoundingMode.RTP:
 153             return not_sign
 154         elif self is FPRoundingMode.RTN:
 155             return sign
 156         elif self is FPRoundingMode.RNA:
 157             return True
 158         elif self is FPRoundingMode.RTOP:
 159             return False
 160         else:
 161             assert self is FPRoundingMode.RTON
 162             return False
 163
 164     def underflow_rounds_to_zero(self, sign):
 165         """returns true if an underflow should round to `zero`,
 166         false if it should round to `min_denormal`
 167         """
 168         not_sign = ~sign if isinstance(sign, Value) else not sign
 169         if self is FPRoundingMode.RNE:
 170             return True
 171         elif self is FPRoundingMode.RTZ:
 172             return True
 173         elif self is FPRoundingMode.RTP:
 174             return sign
 175         elif self is FPRoundingMode.RTN:
 176             return not_sign
 177         elif self is FPRoundingMode.RNA:
 178             return True
 179         elif self is FPRoundingMode.RTOP:
 180             return False
 181         else:
 182             assert self is FPRoundingMode.RTON
 183             return False
 184
 185     def zero_sign(self):
 186         """which sign an exact zero result should have when it isn't
 187         otherwise determined, e.g. for `1.0 - 1.0`.
 188         """
 189         if self is FPRoundingMode.RNE:
 190             return False
 191         elif self is FPRoundingMode.RTZ:
 192             return False
 193         elif self is FPRoundingMode.RTP:
 194             return False
 195         elif self is FPRoundingMode.RTN:
 196             return True
 197         elif self is FPRoundingMode.RNA:
 198             return False
 199         elif self is FPRoundingMode.RTOP:
 200             return False
 201         else:
 202             assert self is FPRoundingMode.RTON
 203             return True
 204
 205     if _HAVE_SMTLIB2:
 206         def to_smtlib2(self, default=_raise_err):
 207             """return the corresponding smtlib2 rounding mode for `self`. If
 208             there is no corresponding smtlib2 rounding mode, then return
 209             `default` if specified, else raise `ValueError`.
 210             """
 211             if self is FPRoundingMode.RNE:
 212                 return RoundingModeEnum.RNE
 213             elif self is FPRoundingMode.RTZ:
 214                 return RoundingModeEnum.RTZ
 215             elif self is FPRoundingMode.RTP:
 216                 return RoundingModeEnum.RTP
 217             elif self is FPRoundingMode.RTN:
 218                 return RoundingModeEnum.RTN
 219             elif self is FPRoundingMode.RNA:
 220                 return RoundingModeEnum.RNA
 221             else:
 222                 assert self in (FPRoundingMode.RTOP, FPRoundingMode.RTON)
 223                 if default is _raise_err:
 224                     raise ValueError(
 225                         "no corresponding smtlib2 rounding mode", self)
 226                 return default
 227
 228
 229
 230
 231 class FPFormat:
 232     """ Class describing binary floating-point formats based on IEEE 754.
 233
 234     :attribute e_width: the number of bits in the exponent field.
 235     :attribute m_width: the number of bits stored in the mantissa
 236         field.
 237     :attribute has_int_bit: if the FP format has an explicit integer bit (like
 238         the x87 80-bit format). The bit is considered part of the mantissa.
 239     :attribute has_sign: if the FP format has a sign bit. (Some Vulkan
 240         Image/Buffer formats are FP numbers without a sign bit.)
 241     """
 242
 243     def __init__(self,
 244                  e_width,
 245                  m_width,
 246                  has_int_bit=False,
 247                  has_sign=True):
 248         """ Create ``FPFormat`` instance. """
 249         self.e_width = e_width
 250         self.m_width = m_width
 251         self.has_int_bit = has_int_bit
 252         self.has_sign = has_sign
 253
 254     def __eq__(self, other):
 255         """ Check for equality. """
 256         if not isinstance(other, FPFormat):
 257             return NotImplemented
 258         return (self.e_width == other.e_width
 259                 and self.m_width == other.m_width
 260                 and self.has_int_bit == other.has_int_bit
 261                 and self.has_sign == other.has_sign)
 262
 263     @staticmethod
 264     def standard(width):
 265         """ Get standard IEEE 754-2008 format.
 266
 267         :param width: bit-width of requested format.
 268         :returns: the requested ``FPFormat`` instance.
 269         """
 270         if width == 16:
 271             return FPFormat(5, 10)
 272         if width == 32:
 273             return FPFormat(8, 23)
 274         if width == 64:
 275             return FPFormat(11, 52)
 276         if width == 128:
 277             return FPFormat(15, 112)
 278         if width > 128 and width % 32 == 0:
 279             if width > 1000000:  # arbitrary upper limit
 280                 raise ValueError("width too big")
 281             e_width = round(4 * math.log2(width)) - 13
 282             return FPFormat(e_width, width - 1 - e_width)
 283         raise ValueError("width must be the bit-width of a valid IEEE"
 284                          " 754-2008 binary format")
 285
 286     def __repr__(self):
 287         """ Get repr. """
 288         try:
 289             if self == self.standard(self.width):
 290                 return f"FPFormat.standard({self.width})"
 291         except ValueError:
 292             pass
 293         retval = f"FPFormat({self.e_width}, {self.m_width}"
 294         if self.has_int_bit is not False:
 295             retval += f", {self.has_int_bit}"
 296         if self.has_sign is not True:
 297             retval += f", {self.has_sign}"
 298         return retval + ")"
 299
 300     def get_sign_field(self, x):
 301         """ returns the sign bit of its input number, x
 302             (assumes FPFormat is set to signed - has_sign=True)
 303         """
 304         return x >> (self.e_width + self.m_width)
 305
 306     def get_exponent_field(self, x):
 307         """ returns the raw exponent of its input number, x (no bias subtracted)
 308         """
 309         x = ((x >> self.m_width) & self.exponent_inf_nan)
 310         return x
 311
 312     def get_exponent(self, x):
 313         """ returns the exponent of its input number, x
 314         """
 315         return self.get_exponent_field(x) - self.exponent_bias
 316
 317     def get_mantissa_field(self, x):
 318         """ returns the mantissa of its input number, x
 319         """
 320         return x & self.mantissa_mask
 321
 322     def is_zero(self, x):
 323         """ returns true if x is +/- zero
 324         """
 325         return (self.get_exponent(x) == self.e_sub) & \
 326             (self.get_mantissa_field(x) == 0)
 327
 328     def is_subnormal(self, x):
 329         """ returns true if x is subnormal (exp at minimum)
 330         """
 331         return (self.get_exponent(x) == self.e_sub) & \
 332             (self.get_mantissa_field(x) != 0)
 333
 334     def is_inf(self, x):
 335         """ returns true if x is infinite
 336         """
 337         return (self.get_exponent(x) == self.e_max) & \
 338             (self.get_mantissa_field(x) == 0)
 339
 340     def is_nan(self, x):
 341         """ returns true if x is a nan (quiet or signalling)
 342         """
 343         return (self.get_exponent(x) == self.e_max) & \
 344             (self.get_mantissa_field(x) != 0)
 345
 346     def is_quiet_nan(self, x):
 347         """ returns true if x is a quiet nan
 348         """
 349         highbit = 1 << (self.m_width - 1)
 350         return (self.get_exponent(x) == self.e_max) & \
 351             (self.get_mantissa_field(x) != 0) & \
 352             (self.get_mantissa_field(x) & highbit != 0)
 353
 354     def is_nan_signaling(self, x):
 355         """ returns true if x is a signalling nan
 356         """
 357         highbit = 1 << (self.m_width - 1)
 358         return (self.get_exponent(x) == self.e_max) & \
 359             (self.get_mantissa_field(x) != 0) & \
 360             (self.get_mantissa_field(x) & highbit) == 0
 361
 362     @property
 363     def width(self):
 364         """ Get the total number of bits in the FP format. """
 365         return self.has_sign + self.e_width + self.m_width
 366
 367     @property
 368     def mantissa_mask(self):
 369         """ Get a mantissa mask based on the mantissa width """
 370         return (1 << self.m_width) - 1
 371
 372     @property
 373     def exponent_inf_nan(self):
 374         """ Get the value of the exponent field designating infinity/NaN. """
 375         return (1 << self.e_width) - 1
 376
 377     @property
 378     def e_max(self):
 379         """ get the maximum exponent (minus bias)
 380         """
 381         return self.exponent_inf_nan - self.exponent_bias
 382
 383     @property
 384     def e_sub(self):
 385         return self.exponent_denormal_zero - self.exponent_bias
 386     @property
 387     def exponent_denormal_zero(self):
 388         """ Get the value of the exponent field designating denormal/zero. """
 389         return 0
 390
 391     @property
 392     def exponent_min_normal(self):
 393         """ Get the minimum value of the exponent field for normal numbers. """
 394         return 1
 395
 396     @property
 397     def exponent_max_normal(self):
 398         """ Get the maximum value of the exponent field for normal numbers. """
 399         return self.exponent_inf_nan - 1
 400
 401     @property
 402     def exponent_bias(self):
 403         """ Get the exponent bias. """
 404         return (1 << (self.e_width - 1)) - 1
 405
 406     @property
 407     def fraction_width(self):
 408         """ Get the number of mantissa bits that are fraction bits. """
 409         return self.m_width - self.has_int_bit
 410
 411
 412 class TestFPFormat(unittest.TestCase):
 413     """ very quick test for FPFormat
 414     """
 415
 416     def test_fpformat_fp64(self):
 417         f64 = FPFormat.standard(64)
 418         from sfpy import Float64
 419         x = Float64(1.0).bits
 420         print (hex(x))
 421
 422         self.assertEqual(f64.get_exponent(x), 0)
 423         x = Float64(2.0).bits
 424         print (hex(x))
 425         self.assertEqual(f64.get_exponent(x), 1)
 426
 427         x = Float64(1.5).bits
 428         m = f64.get_mantissa_field(x)
 429         print (hex(x), hex(m))
 430         self.assertEqual(m, 0x8000000000000)
 431
 432         s = f64.get_sign_field(x)
 433         print (hex(x), hex(s))
 434         self.assertEqual(s, 0)
 435
 436         x = Float64(-1.5).bits
 437         s = f64.get_sign_field(x)
 438         print (hex(x), hex(s))
 439         self.assertEqual(s, 1)
 440
 441     def test_fpformat_fp32(self):
 442         f32 = FPFormat.standard(32)
 443         from sfpy import Float32
 444         x = Float32(1.0).bits
 445         print (hex(x))
 446
 447         self.assertEqual(f32.get_exponent(x), 0)
 448         x = Float32(2.0).bits
 449         print (hex(x))
 450         self.assertEqual(f32.get_exponent(x), 1)
 451
 452         x = Float32(1.5).bits
 453         m = f32.get_mantissa_field(x)
 454         print (hex(x), hex(m))
 455         self.assertEqual(m, 0x400000)
 456
 457         # NaN test
 458         x = Float32(-1.0).sqrt()
 459         x = x.bits
 460         i = f32.is_nan(x)
 461         print (hex(x), "nan", f32.get_exponent(x), f32.e_max,
 462                f32.get_mantissa_field(x), i)
 463         self.assertEqual(i, True)
 464
 465         # Inf test
 466         x = Float32(1e36) * Float32(1e36) * Float32(1e36)
 467         x = x.bits
 468         i = f32.is_inf(x)
 469         print (hex(x), "inf", f32.get_exponent(x), f32.e_max,
 470                f32.get_mantissa_field(x), i)
 471         self.assertEqual(i, True)
 472
 473         # subnormal
 474         x = Float32(1e-41)
 475         x = x.bits
 476         i = f32.is_subnormal(x)
 477         print (hex(x), "sub", f32.get_exponent(x), f32.e_max,
 478                f32.get_mantissa_field(x), i)
 479         self.assertEqual(i, True)
 480
 481         x = Float32(0.0)
 482         x = x.bits
 483         i = f32.is_subnormal(x)
 484         print (hex(x), "sub", f32.get_exponent(x), f32.e_max,
 485                f32.get_mantissa_field(x), i)
 486         self.assertEqual(i, False)
 487
 488         # zero
 489         i = f32.is_zero(x)
 490         print (hex(x), "zero", f32.get_exponent(x), f32.e_max,
 491                f32.get_mantissa_field(x), i)
 492         self.assertEqual(i, True)
 493
 494
 495 class MultiShiftR:
 496
 497     def __init__(self, width):
 498         self.width = width
 499         self.smax = int(log(width) / log(2))
 500         self.i = Signal(width, reset_less=True)
 501         self.s = Signal(self.smax, reset_less=True)
 502         self.o = Signal(width, reset_less=True)
 503
 504     def elaborate(self, platform):
 505         m = Module()
 506         m.d.comb += self.o.eq(self.i >> self.s)
 507         return m
 508
 509
 510 class MultiShift:
 511     """ Generates variable-length single-cycle shifter from a series
 512         of conditional tests on each bit of the left/right shift operand.
 513         Each bit tested produces output shifted by that number of bits,
 514         in a binary fashion: bit 1 if set shifts by 1 bit, bit 2 if set
 515         shifts by 2 bits, each partial result cascading to the next Mux.
 516
 517         Could be adapted to do arithmetic shift by taking copies of the
 518         MSB instead of zeros.
 519     """
 520
 521     def __init__(self, width):
 522         self.width = width
 523         self.smax = int(log(width) / log(2))
 524
 525     def lshift(self, op, s):
 526         res = op << s
 527         return res[:len(op)]
 528
 529     def rshift(self, op, s):
 530         res = op >> s
 531         return res[:len(op)]
 532
 533
 534 class FPNumBaseRecord:
 535     """ Floating-point Base Number Class.
 536
 537     This class is designed to be passed around in other data structures
 538     (between pipelines and between stages).  Its "friend" is FPNumBase,
 539     which is a *module*.  The reason for the discernment is because
 540     nmigen modules that are not added to submodules results in the
 541     irritating "Elaboration" warning.  Despite not *needing* FPNumBase
 542     in many cases to be added as a submodule (because it is just data)
 543     this was not possible to solve without splitting out the data from
 544     the module.
 545     """
 546
 547     def __init__(self, width, m_extra=True, e_extra=False, name=None):
 548         if name is None:
 549             name = ""
 550             # assert false, "missing name"
 551         else:
 552             name += "_"
 553         self.width = width
 554         m_width = {16: 11, 32: 24, 64: 53}[width]  # 1 extra bit (overflow)
 555         e_width = {16: 7,  32: 10, 64: 13}[width]  # 2 extra bits (overflow)
 556         e_max = 1 << (e_width-3)
 557         self.rmw = m_width - 1  # real mantissa width (not including extras)
 558         self.e_max = e_max
 559         if m_extra:
 560             # mantissa extra bits (top,guard,round)
 561             self.m_extra = 3
 562             m_width += self.m_extra
 563         else:
 564             self.m_extra = 0
 565         if e_extra:
 566             self.e_extra = 6  # enough to cover FP64 when converting to FP16
 567             e_width += self.e_extra
 568         else:
 569             self.e_extra = 0
 570         # print (m_width, e_width, e_max, self.rmw, self.m_extra)
 571         self.m_width = m_width
 572         self.e_width = e_width
 573         self.e_start = self.rmw
 574         self.e_end = self.rmw + self.e_width - 2  # for decoding
 575
 576         self.v = Signal(width, reset_less=True,
 577                         name=name+"v")  # Latched copy of value
 578         self.m = Signal(m_width, reset_less=True, name=name+"m")  # Mantissa
 579         self.e = Signal((e_width, True),
 580                         reset_less=True, name=name+"e")  # exp+2 bits, signed
 581         self.s = Signal(reset_less=True, name=name+"s")  # Sign bit
 582
 583         self.fp = self
 584         self.drop_in(self)
 585
 586     def drop_in(self, fp):
 587         fp.s = self.s
 588         fp.e = self.e
 589         fp.m = self.m
 590         fp.v = self.v
 591         fp.rmw = self.rmw
 592         fp.width = self.width
 593         fp.e_width = self.e_width
 594         fp.e_max = self.e_max
 595         fp.m_width = self.m_width
 596         fp.e_start = self.e_start
 597         fp.e_end = self.e_end
 598         fp.m_extra = self.m_extra
 599
 600         m_width = self.m_width
 601         e_max = self.e_max
 602         e_width = self.e_width
 603
 604         self.mzero = Const(0, (m_width, False))
 605         m_msb = 1 << (self.m_width-2)
 606         self.msb1 = Const(m_msb, (m_width, False))
 607         self.m1s = Const(-1, (m_width, False))
 608         self.P128 = Const(e_max, (e_width, True))
 609         self.P127 = Const(e_max-1, (e_width, True))
 610         self.N127 = Const(-(e_max-1), (e_width, True))
 611         self.N126 = Const(-(e_max-2), (e_width, True))
 612
 613     def create(self, s, e, m):
 614         """ creates a value from sign / exponent / mantissa
 615
 616             bias is added here, to the exponent.
 617
 618             NOTE: order is important, because e_start/e_end can be
 619             a bit too long (overwriting s).
 620         """
 621         return [
 622           self.v[0:self.e_start].eq(m),        # mantissa
 623           self.v[self.e_start:self.e_end].eq(e + self.fp.P127),  # (add bias)
 624           self.v[-1].eq(s),          # sign
 625         ]
 626
 627     def _nan(self, s):
 628         return (s, self.fp.P128, 1 << (self.e_start-1))
 629
 630     def _inf(self, s):
 631         return (s, self.fp.P128, 0)
 632
 633     def _zero(self, s):
 634         return (s, self.fp.N127, 0)
 635
 636     def nan(self, s):
 637         return self.create(*self._nan(s))
 638
 639     def quieted_nan(self, other):
 640         assert isinstance(other, FPNumBaseRecord)
 641         assert self.width == other.width
 642         return self.create(other.s, self.fp.P128,
 643                            other.v[0:self.e_start] | (1 << (self.e_start - 1)))
 644
 645     def inf(self, s):
 646         return self.create(*self._inf(s))
 647
 648     def max_normal(self, s):
 649         return self.create(s, self.fp.P127, ~0)
 650
 651     def min_denormal(self, s):
 652         return self.create(s, self.fp.N127, 1)
 653
 654     def zero(self, s):
 655         return self.create(*self._zero(s))
 656
 657     def create2(self, s, e, m):
 658         """ creates a value from sign / exponent / mantissa
 659
 660             bias is added here, to the exponent
 661         """
 662         e = e + self.P127  # exp (add on bias)
 663         return Cat(m[0:self.e_start],
 664                    e[0:self.e_end-self.e_start],
 665                    s)
 666
 667     def nan2(self, s):
 668         return self.create2(s, self.P128, self.msb1)
 669
 670     def inf2(self, s):
 671         return self.create2(s, self.P128, self.mzero)
 672
 673     def zero2(self, s):
 674         return self.create2(s, self.N127, self.mzero)
 675
 676     def __iter__(self):
 677         yield self.s
 678         yield self.e
 679         yield self.m
 680
 681     def eq(self, inp):
 682         return [self.s.eq(inp.s), self.e.eq(inp.e), self.m.eq(inp.m)]
 683
 684
 685 class FPNumBase(FPNumBaseRecord, Elaboratable):
 686     """ Floating-point Base Number Class
 687     """
 688
 689     def __init__(self, fp):
 690         fp.drop_in(self)
 691         self.fp = fp
 692         e_width = fp.e_width
 693
 694         self.is_nan = Signal(reset_less=True)
 695         self.is_zero = Signal(reset_less=True)
 696         self.is_inf = Signal(reset_less=True)
 697         self.is_overflowed = Signal(reset_less=True)
 698         self.is_denormalised = Signal(reset_less=True)
 699         self.exp_128 = Signal(reset_less=True)
 700         self.exp_sub_n126 = Signal((e_width, True), reset_less=True)
 701         self.exp_lt_n126 = Signal(reset_less=True)
 702         self.exp_zero = Signal(reset_less=True)
 703         self.exp_gt_n126 = Signal(reset_less=True)
 704         self.exp_gt127 = Signal(reset_less=True)
 705         self.exp_n127 = Signal(reset_less=True)
 706         self.exp_n126 = Signal(reset_less=True)
 707         self.m_zero = Signal(reset_less=True)
 708         self.m_msbzero = Signal(reset_less=True)
 709
 710     def elaborate(self, platform):
 711         m = Module()
 712         m.d.comb += self.is_nan.eq(self._is_nan())
 713         m.d.comb += self.is_zero.eq(self._is_zero())
 714         m.d.comb += self.is_inf.eq(self._is_inf())
 715         m.d.comb += self.is_overflowed.eq(self._is_overflowed())
 716         m.d.comb += self.is_denormalised.eq(self._is_denormalised())
 717         m.d.comb += self.exp_128.eq(self.e == self.fp.P128)
 718         m.d.comb += self.exp_sub_n126.eq(self.e - self.fp.N126)
 719         m.d.comb += self.exp_gt_n126.eq(self.exp_sub_n126 > 0)
 720         m.d.comb += self.exp_lt_n126.eq(self.exp_sub_n126 < 0)
 721         m.d.comb += self.exp_zero.eq(self.e == 0)
 722         m.d.comb += self.exp_gt127.eq(self.e > self.fp.P127)
 723         m.d.comb += self.exp_n127.eq(self.e == self.fp.N127)
 724         m.d.comb += self.exp_n126.eq(self.e == self.fp.N126)
 725         m.d.comb += self.m_zero.eq(self.m == self.fp.mzero)
 726         m.d.comb += self.m_msbzero.eq(self.m[self.fp.e_start] == 0)
 727
 728         return m
 729
 730     def _is_nan(self):
 731         return (self.exp_128) & (~self.m_zero)
 732
 733     def _is_inf(self):
 734         return (self.exp_128) & (self.m_zero)
 735
 736     def _is_zero(self):
 737         return (self.exp_n127) & (self.m_zero)
 738
 739     def _is_overflowed(self):
 740         return self.exp_gt127
 741
 742     def _is_denormalised(self):
 743         # XXX NOT to be used for "official" quiet NaN tests!
 744         # particularly when the MSB has been extended
 745         return (self.exp_n126) & (self.m_msbzero)
 746
 747
 748 class FPNumOut(FPNumBase):
 749     """ Floating-point Number Class
 750
 751         Contains signals for an incoming copy of the value, decoded into
 752         sign / exponent / mantissa.
 753         Also contains encoding functions, creation and recognition of
 754         zero, NaN and inf (all signed)
 755
 756         Four extra bits are included in the mantissa: the top bit
 757         (m[-1]) is effectively a carry-overflow.  The other three are
 758         guard (m[2]), round (m[1]), and sticky (m[0])
 759     """
 760
 761     def __init__(self, fp):
 762         FPNumBase.__init__(self, fp)
 763
 764     def elaborate(self, platform):
 765         m = FPNumBase.elaborate(self, platform)
 766
 767         return m
 768
 769
 770 class MultiShiftRMerge(Elaboratable):
 771     """ shifts down (right) and merges lower bits into m[0].
 772         m[0] is the "sticky" bit, basically
 773     """
 774
 775     def __init__(self, width, s_max=None):
 776         if s_max is None:
 777             s_max = int(log(width) / log(2))
 778         self.smax = s_max
 779         self.m = Signal(width, reset_less=True)
 780         self.inp = Signal(width, reset_less=True)
 781         self.diff = Signal(s_max, reset_less=True)
 782         self.width = width
 783
 784     def elaborate(self, platform):
 785         m = Module()
 786
 787         rs = Signal(self.width, reset_less=True)
 788         m_mask = Signal(self.width, reset_less=True)
 789         smask = Signal(self.width, reset_less=True)
 790         stickybit = Signal(reset_less=True)
 791         # XXX GRR frickin nuisance https://github.com/nmigen/nmigen/issues/302
 792         maxslen = Signal(self.smax[0], reset_less=True)
 793         maxsleni = Signal(self.smax[0], reset_less=True)
 794
 795         sm = MultiShift(self.width-1)
 796         m0s = Const(0, self.width-1)
 797         mw = Const(self.width-1, len(self.diff))
 798         m.d.comb += [maxslen.eq(Mux(self.diff > mw, mw, self.diff)),
 799                      maxsleni.eq(Mux(self.diff > mw, 0, mw-self.diff)),
 800                      ]
 801
 802         m.d.comb += [
 803                 # shift mantissa by maxslen, mask by inverse
 804                 rs.eq(sm.rshift(self.inp[1:], maxslen)),
 805                 m_mask.eq(sm.rshift(~m0s, maxsleni)),
 806                 smask.eq(self.inp[1:] & m_mask),
 807                 # sticky bit combines all mask (and mantissa low bit)
 808                 stickybit.eq(smask.bool() | self.inp[0]),
 809                 # mantissa result contains m[0] already.
 810                 self.m.eq(Cat(stickybit, rs))
 811            ]
 812         return m
 813
 814
 815 class FPNumShift(FPNumBase, Elaboratable):
 816     """ Floating-point Number Class for shifting
 817     """
 818
 819     def __init__(self, mainm, op, inv, width, m_extra=True):
 820         FPNumBase.__init__(self, width, m_extra)
 821         self.latch_in = Signal()
 822         self.mainm = mainm
 823         self.inv = inv
 824         self.op = op
 825
 826     def elaborate(self, platform):
 827         m = FPNumBase.elaborate(self, platform)
 828
 829         m.d.comb += self.s.eq(op.s)
 830         m.d.comb += self.e.eq(op.e)
 831         m.d.comb += self.m.eq(op.m)
 832
 833         with self.mainm.State("align"):
 834             with m.If(self.e < self.inv.e):
 835                 m.d.sync += self.shift_down()
 836
 837         return m
 838
 839     def shift_down(self, inp):
 840         """ shifts a mantissa down by one. exponent is increased to compensate
 841
 842             accuracy is lost as a result in the mantissa however there are 3
 843             guard bits (the latter of which is the "sticky" bit)
 844         """
 845         return [self.e.eq(inp.e + 1),
 846                 self.m.eq(Cat(inp.m[0] | inp.m[1], inp.m[2:], 0))
 847                 ]
 848
 849     def shift_down_multi(self, diff):
 850         """ shifts a mantissa down. exponent is increased to compensate
 851
 852             accuracy is lost as a result in the mantissa however there are 3
 853             guard bits (the latter of which is the "sticky" bit)
 854
 855             this code works by variable-shifting the mantissa by up to
 856             its maximum bit-length: no point doing more (it'll still be
 857             zero).
 858
 859             the sticky bit is computed by shifting a batch of 1s by
 860             the same amount, which will introduce zeros.  it's then
 861             inverted and used as a mask to get the LSBs of the mantissa.
 862             those are then |'d into the sticky bit.
 863         """
 864         sm = MultiShift(self.width)
 865         mw = Const(self.m_width-1, len(diff))
 866         maxslen = Mux(diff > mw, mw, diff)
 867         rs = sm.rshift(self.m[1:], maxslen)
 868         maxsleni = mw - maxslen
 869         m_mask = sm.rshift(self.m1s[1:], maxsleni)  # shift and invert
 870
 871         stickybits = reduce(or_, self.m[1:] & m_mask) | self.m[0]
 872         return [self.e.eq(self.e + diff),
 873                 self.m.eq(Cat(stickybits, rs))
 874                 ]
 875
 876     def shift_up_multi(self, diff):
 877         """ shifts a mantissa up. exponent is decreased to compensate
 878         """
 879         sm = MultiShift(self.width)
 880         mw = Const(self.m_width, len(diff))
 881         maxslen = Mux(diff > mw, mw, diff)
 882
 883         return [self.e.eq(self.e - diff),
 884                 self.m.eq(sm.lshift(self.m, maxslen))
 885                 ]
 886
 887
 888 class FPNumDecode(FPNumBase):
 889     """ Floating-point Number Class
 890
 891         Contains signals for an incoming copy of the value, decoded into
 892         sign / exponent / mantissa.
 893         Also contains encoding functions, creation and recognition of
 894         zero, NaN and inf (all signed)
 895
 896         Four extra bits are included in the mantissa: the top bit
 897         (m[-1]) is effectively a carry-overflow.  The other three are
 898         guard (m[2]), round (m[1]), and sticky (m[0])
 899     """
 900
 901     def __init__(self, op, fp):
 902         FPNumBase.__init__(self, fp)
 903         self.op = op
 904
 905     def elaborate(self, platform):
 906         m = FPNumBase.elaborate(self, platform)
 907
 908         m.d.comb += self.decode(self.v)
 909
 910         return m
 911
 912     def decode(self, v):
 913         """ decodes a latched value into sign / exponent / mantissa
 914
 915             bias is subtracted here, from the exponent.  exponent
 916             is extended to 10 bits so that subtract 127 is done on
 917             a 10-bit number
 918         """
 919         args = [0] * self.m_extra + [v[0:self.e_start]]  # pad with extra zeros
 920         #print ("decode", self.e_end)
 921         return [self.m.eq(Cat(*args)),  # mantissa
 922                 self.e.eq(v[self.e_start:self.e_end] - self.fp.P127),  # exp
 923                 self.s.eq(v[-1]),                 # sign
 924                 ]
 925
 926
 927 class FPNumIn(FPNumBase):
 928     """ Floating-point Number Class
 929
 930         Contains signals for an incoming copy of the value, decoded into
 931         sign / exponent / mantissa.
 932         Also contains encoding functions, creation and recognition of
 933         zero, NaN and inf (all signed)
 934
 935         Four extra bits are included in the mantissa: the top bit
 936         (m[-1]) is effectively a carry-overflow.  The other three are
 937         guard (m[2]), round (m[1]), and sticky (m[0])
 938     """
 939
 940     def __init__(self, op, fp):
 941         FPNumBase.__init__(self, fp)
 942         self.latch_in = Signal()
 943         self.op = op
 944
 945     def decode2(self, m):
 946         """ decodes a latched value into sign / exponent / mantissa
 947
 948             bias is subtracted here, from the exponent.  exponent
 949             is extended to 10 bits so that subtract 127 is done on
 950             a 10-bit number
 951         """
 952         v = self.v
 953         args = [0] * self.m_extra + [v[0:self.e_start]]  # pad with extra zeros
 954         #print ("decode", self.e_end)
 955         res = ObjectProxy(m, pipemode=False)
 956         res.m = Cat(*args)                             # mantissa
 957         res.e = v[self.e_start:self.e_end] - self.fp.P127  # exp
 958         res.s = v[-1]                                  # sign
 959         return res
 960
 961     def decode(self, v):
 962         """ decodes a latched value into sign / exponent / mantissa
 963
 964             bias is subtracted here, from the exponent.  exponent
 965             is extended to 10 bits so that subtract 127 is done on
 966             a 10-bit number
 967         """
 968         args = [0] * self.m_extra + [v[0:self.e_start]]  # pad with extra zeros
 969         #print ("decode", self.e_end)
 970         return [self.m.eq(Cat(*args)),  # mantissa
 971                 self.e.eq(v[self.e_start:self.e_end] - self.P127),  # exp
 972                 self.s.eq(v[-1]),                 # sign
 973                 ]
 974
 975     def shift_down(self, inp):
 976         """ shifts a mantissa down by one. exponent is increased to compensate
 977
 978             accuracy is lost as a result in the mantissa however there are 3
 979             guard bits (the latter of which is the "sticky" bit)
 980         """
 981         return [self.e.eq(inp.e + 1),
 982                 self.m.eq(Cat(inp.m[0] | inp.m[1], inp.m[2:], 0))
 983                 ]
 984
 985     def shift_down_multi(self, diff, inp=None):
 986         """ shifts a mantissa down. exponent is increased to compensate
 987
 988             accuracy is lost as a result in the mantissa however there are 3
 989             guard bits (the latter of which is the "sticky" bit)
 990
 991             this code works by variable-shifting the mantissa by up to
 992             its maximum bit-length: no point doing more (it'll still be
 993             zero).
 994
 995             the sticky bit is computed by shifting a batch of 1s by
 996             the same amount, which will introduce zeros.  it's then
 997             inverted and used as a mask to get the LSBs of the mantissa.
 998             those are then |'d into the sticky bit.
 999         """
1000         if inp is None:
1001             inp = self
1002         sm = MultiShift(self.width)
1003         mw = Const(self.m_width-1, len(diff))
1004         maxslen = Mux(diff > mw, mw, diff)
1005         rs = sm.rshift(inp.m[1:], maxslen)
1006         maxsleni = mw - maxslen
1007         m_mask = sm.rshift(self.m1s[1:], maxsleni)  # shift and invert
1008
1009         #stickybit = reduce(or_, inp.m[1:] & m_mask) | inp.m[0]
1010         stickybit = (inp.m[1:] & m_mask).bool() | inp.m[0]
1011         return [self.e.eq(inp.e + diff),
1012                 self.m.eq(Cat(stickybit, rs))
1013                 ]
1014
1015     def shift_up_multi(self, diff):
1016         """ shifts a mantissa up. exponent is decreased to compensate
1017         """
1018         sm = MultiShift(self.width)
1019         mw = Const(self.m_width, len(diff))
1020         maxslen = Mux(diff > mw, mw, diff)
1021
1022         return [self.e.eq(self.e - diff),
1023                 self.m.eq(sm.lshift(self.m, maxslen))
1024                 ]
1025
1026
1027 class Trigger(Elaboratable):
1028     def __init__(self):
1029
1030         self.stb = Signal(reset=0)
1031         self.ack = Signal()
1032         self.trigger = Signal(reset_less=True)
1033
1034     def elaborate(self, platform):
1035         m = Module()
1036         m.d.comb += self.trigger.eq(self.stb & self.ack)
1037         return m
1038
1039     def eq(self, inp):
1040         return [self.stb.eq(inp.stb),
1041                 self.ack.eq(inp.ack)
1042                 ]
1043
1044     def ports(self):
1045         return [self.stb, self.ack]
1046
1047
1048 class FPOpIn(PrevControl):
1049     def __init__(self, width):
1050         PrevControl.__init__(self)
1051         self.width = width
1052
1053     @property
1054     def v(self):
1055         return self.data_i
1056
1057     def chain_inv(self, in_op, extra=None):
1058         stb = in_op.stb
1059         if extra is not None:
1060             stb = stb & extra
1061         return [self.v.eq(in_op.v),          # receive value
1062                 self.stb.eq(stb),      # receive STB
1063                 in_op.ack.eq(~self.ack),  # send ACK
1064                 ]
1065
1066     def chain_from(self, in_op, extra=None):
1067         stb = in_op.stb
1068         if extra is not None:
1069             stb = stb & extra
1070         return [self.v.eq(in_op.v),          # receive value
1071                 self.stb.eq(stb),      # receive STB
1072                 in_op.ack.eq(self.ack),  # send ACK
1073                 ]
1074
1075
1076 class FPOpOut(NextControl):
1077     def __init__(self, width):
1078         NextControl.__init__(self)
1079         self.width = width
1080
1081     @property
1082     def v(self):
1083         return self.data_o
1084
1085     def chain_inv(self, in_op, extra=None):
1086         stb = in_op.stb
1087         if extra is not None:
1088             stb = stb & extra
1089         return [self.v.eq(in_op.v),          # receive value
1090                 self.stb.eq(stb),      # receive STB
1091                 in_op.ack.eq(~self.ack),  # send ACK
1092                 ]
1093
1094     def chain_from(self, in_op, extra=None):
1095         stb = in_op.stb
1096         if extra is not None:
1097             stb = stb & extra
1098         return [self.v.eq(in_op.v),          # receive value
1099                 self.stb.eq(stb),      # receive STB
1100                 in_op.ack.eq(self.ack),  # send ACK
1101                 ]
1102
1103
1104 class Overflow:
1105     # TODO: change FFLAGS to be FPSCR's status flags
1106     FFLAGS_NV = Const(1<<4, 5) # invalid operation
1107     FFLAGS_DZ = Const(1<<3, 5) # divide by zero
1108     FFLAGS_OF = Const(1<<2, 5) # overflow
1109     FFLAGS_UF = Const(1<<1, 5) # underflow
1110     FFLAGS_NX = Const(1<<0, 5) # inexact
1111     def __init__(self, name=None):
1112         if name is None:
1113             name = ""
1114         self.guard = Signal(reset_less=True, name=name+"guard")     # tot[2]
1115         self.round_bit = Signal(reset_less=True, name=name+"round")  # tot[1]
1116         self.sticky = Signal(reset_less=True, name=name+"sticky")   # tot[0]
1117         self.m0 = Signal(reset_less=True, name=name+"m0")  # mantissa bit 0
1118         self.fpflags = Signal(5, reset_less=True, name=name+"fflags")
1119
1120         self.sign = Signal(reset_less=True, name=name+"sign")
1121         """sign bit -- 1 means negative, 0 means positive"""
1122
1123         self.rm = Signal(FPRoundingMode, name=name+"rm",
1124                          reset=FPRoundingMode.DEFAULT)
1125         """rounding mode"""
1126
1127         #self.roundz = Signal(reset_less=True)
1128
1129     def __iter__(self):
1130         yield self.guard
1131         yield self.round_bit
1132         yield self.sticky
1133         yield self.m0
1134         yield self.fpflags
1135         yield self.sign
1136         yield self.rm
1137
1138     def eq(self, inp):
1139         return [self.guard.eq(inp.guard),
1140                 self.round_bit.eq(inp.round_bit),
1141                 self.sticky.eq(inp.sticky),
1142                 self.m0.eq(inp.m0),
1143                 self.fpflags.eq(inp.fpflags),
1144                 self.sign.eq(inp.sign),
1145                 self.rm.eq(inp.rm)]
1146
1147     @property
1148     def roundz_rne(self):
1149         """true if the mantissa should be rounded up for `rm == RNE`
1150
1151         assumes the rounding mode is `ROUND_NEAREST_TIES_TO_EVEN`
1152         """
1153         return self.guard & (self.round_bit | self.sticky | self.m0)
1154
1155     @property
1156     def roundz_rna(self):
1157         """true if the mantissa should be rounded up for `rm == RNA`
1158
1159         assumes the rounding mode is `ROUND_NEAREST_TIES_TO_AWAY`
1160         """
1161         return self.guard
1162
1163     @property
1164     def roundz_rtn(self):
1165         """true if the mantissa should be rounded up for `rm == RTN`
1166
1167         assumes the rounding mode is `ROUND_TOWARDS_NEGATIVE`
1168         """
1169         return self.sign & (self.guard | self.round_bit | self.sticky)
1170
1171     @property
1172     def roundz_rto(self):
1173         """true if the mantissa should be rounded up for `rm in (RTOP, RTON)`
1174
1175         assumes the rounding mode is `ROUND_TO_ODD_UNSIGNED_ZEROS_ARE_POSITIVE`
1176         or `ROUND_TO_ODD_UNSIGNED_ZEROS_ARE_NEGATIVE`
1177         """
1178         return ~self.m0 & (self.guard | self.round_bit | self.sticky)
1179
1180     @property
1181     def roundz_rtp(self):
1182         """true if the mantissa should be rounded up for `rm == RTP`
1183
1184         assumes the rounding mode is `ROUND_TOWARDS_POSITIVE`
1185         """
1186         return ~self.sign & (self.guard | self.round_bit | self.sticky)
1187
1188     @property
1189     def roundz_rtz(self):
1190         """true if the mantissa should be rounded up for `rm == RTZ`
1191
1192         assumes the rounding mode is `ROUND_TOWARDS_ZERO`
1193         """
1194         return False
1195
1196     @property
1197     def roundz(self):
1198         """true if the mantissa should be rounded up for the current rounding
1199         mode `self.rm`
1200         """
1201         d = {
1202             FPRoundingMode.RNA: self.roundz_rna,
1203             FPRoundingMode.RNE: self.roundz_rne,
1204             FPRoundingMode.RTN: self.roundz_rtn,
1205             FPRoundingMode.RTOP: self.roundz_rto,
1206             FPRoundingMode.RTON: self.roundz_rto,
1207             FPRoundingMode.RTP: self.roundz_rtp,
1208             FPRoundingMode.RTZ: self.roundz_rtz,
1209         }
1210         return FPRoundingMode.make_array(lambda rm: d[rm])[self.rm]
1211
1212
1213 class OverflowMod(Elaboratable, Overflow):
1214     def __init__(self, name=None):
1215         Overflow.__init__(self, name)
1216         if name is None:
1217             name = ""
1218         self.roundz_out = Signal(reset_less=True, name=name+"roundz_out")
1219
1220     def __iter__(self):
1221         yield from Overflow.__iter__(self)
1222         yield self.roundz_out
1223
1224     def eq(self, inp):
1225         return [self.roundz_out.eq(inp.roundz_out)] + Overflow.eq(self)
1226
1227     def elaborate(self, platform):
1228         m = Module()
1229         m.d.comb += self.roundz_out.eq(self.roundz) # roundz is a property
1230         return m
1231
1232
1233 class FPBase:
1234     """ IEEE754 Floating Point Base Class
1235
1236         contains common functions for FP manipulation, such as
1237         extracting and packing operands, normalisation, denormalisation,
1238         rounding etc.
1239     """
1240
1241     def get_op(self, m, op, v, next_state):
1242         """ this function moves to the next state and copies the operand
1243             when both stb and ack are 1.
1244             acknowledgement is sent by setting ack to ZERO.
1245         """
1246         res = v.decode2(m)
1247         ack = Signal()
1248         with m.If((op.ready_o) & (op.valid_i_test)):
1249             m.next = next_state
1250             # op is latched in from FPNumIn class on same ack/stb
1251             m.d.comb += ack.eq(0)
1252         with m.Else():
1253             m.d.comb += ack.eq(1)
1254         return [res, ack]
1255
1256     def denormalise(self, m, a):
1257         """ denormalises a number.  this is probably the wrong name for
1258             this function.  for normalised numbers (exponent != minimum)
1259             one *extra* bit (the implicit 1) is added *back in*.
1260             for denormalised numbers, the mantissa is left alone
1261             and the exponent increased by 1.
1262
1263             both cases *effectively multiply the number stored by 2*,
1264             which has to be taken into account when extracting the result.
1265         """
1266         with m.If(a.exp_n127):
1267             m.d.sync += a.e.eq(a.fp.N126)  # limit a exponent
1268         with m.Else():
1269             m.d.sync += a.m[-1].eq(1)  # set top mantissa bit
1270
1271     def op_normalise(self, m, op, next_state):
1272         """ operand normalisation
1273             NOTE: just like "align", this one keeps going round every clock
1274                   until the result's exponent is within acceptable "range"
1275         """
1276         with m.If((op.m[-1] == 0)):  # check last bit of mantissa
1277             m.d.sync += [
1278                 op.e.eq(op.e - 1),  # DECREASE exponent
1279                 op.m.eq(op.m << 1),  # shift mantissa UP
1280             ]
1281         with m.Else():
1282             m.next = next_state
1283
1284     def normalise_1(self, m, z, of, next_state):
1285         """ first stage normalisation
1286
1287             NOTE: just like "align", this one keeps going round every clock
1288                   until the result's exponent is within acceptable "range"
1289             NOTE: the weirdness of reassigning guard and round is due to
1290                   the extra mantissa bits coming from tot[0..2]
1291         """
1292         with m.If((z.m[-1] == 0) & (z.e > z.fp.N126)):
1293             m.d.sync += [
1294                 z.e.eq(z.e - 1),  # DECREASE exponent
1295                 z.m.eq(z.m << 1),  # shift mantissa UP
1296                 z.m[0].eq(of.guard),       # steal guard bit (was tot[2])
1297                 of.guard.eq(of.round_bit),  # steal round_bit (was tot[1])
1298                 of.round_bit.eq(0),        # reset round bit
1299                 of.m0.eq(of.guard),
1300             ]
1301         with m.Else():
1302             m.next = next_state
1303
1304     def normalise_2(self, m, z, of, next_state):
1305         """ second stage normalisation
1306
1307             NOTE: just like "align", this one keeps going round every clock
1308                   until the result's exponent is within acceptable "range"
1309             NOTE: the weirdness of reassigning guard and round is due to
1310                   the extra mantissa bits coming from tot[0..2]
1311         """
1312         with m.If(z.e < z.fp.N126):
1313             m.d.sync += [
1314                 z.e.eq(z.e + 1),  # INCREASE exponent
1315                 z.m.eq(z.m >> 1),  # shift mantissa DOWN
1316                 of.guard.eq(z.m[0]),
1317                 of.m0.eq(z.m[1]),
1318                 of.round_bit.eq(of.guard),
1319                 of.sticky.eq(of.sticky | of.round_bit)
1320             ]
1321         with m.Else():
1322             m.next = next_state
1323
1324     def roundz(self, m, z, roundz):
1325         """ performs rounding on the output.  TODO: different kinds of rounding
1326         """
1327         with m.If(roundz):
1328             m.d.sync += z.m.eq(z.m + 1)  # mantissa rounds up
1329             with m.If(z.m == z.fp.m1s):  # all 1s
1330                 m.d.sync += z.e.eq(z.e + 1)  # exponent rounds up
1331
1332     def corrections(self, m, z, next_state):
1333         """ denormalisation and sign-bug corrections
1334         """
1335         m.next = next_state
1336         # denormalised, correct exponent to zero
1337         with m.If(z.is_denormalised):
1338             m.d.sync += z.e.eq(z.fp.N127)
1339
1340     def pack(self, m, z, next_state):
1341         """ packs the result into the output (detects overflow->Inf)
1342         """
1343         m.next = next_state
1344         # if overflow occurs, return inf
1345         with m.If(z.is_overflowed):
1346             m.d.sync += z.inf(z.s)
1347         with m.Else():
1348             m.d.sync += z.create(z.s, z.e, z.m)
1349
1350     def put_z(self, m, z, out_z, next_state):
1351         """ put_z: stores the result in the output.  raises stb and waits
1352             for ack to be set to 1 before moving to the next state.
1353             resets stb back to zero when that occurs, as acknowledgement.
1354         """
1355         m.d.sync += [
1356           out_z.v.eq(z.v)
1357         ]
1358         with m.If(out_z.valid_o & out_z.ready_i_test):
1359             m.d.sync += out_z.valid_o.eq(0)
1360             m.next = next_state
1361         with m.Else():
1362             m.d.sync += out_z.valid_o.eq(1)
1363
1364
1365 class FPState(FPBase):
1366     def __init__(self, state_from):
1367         self.state_from = state_from
1368
1369     def set_inputs(self, inputs):
1370         self.inputs = inputs
1371         for k, v in inputs.items():
1372             setattr(self, k, v)
1373
1374     def set_outputs(self, outputs):
1375         self.outputs = outputs
1376         for k, v in outputs.items():
1377             setattr(self, k, v)
1378
1379
1380 class FPID:
1381     def __init__(self, id_wid):
1382         self.id_wid = id_wid
1383         if self.id_wid:
1384             self.in_mid = Signal(id_wid, reset_less=True)
1385             self.out_mid = Signal(id_wid, reset_less=True)
1386         else:
1387             self.in_mid = None
1388             self.out_mid = None
1389
1390     def idsync(self, m):
1391         if self.id_wid is not None:
1392             m.d.sync += self.out_mid.eq(self.in_mid)
1393
1394
1395 if __name__ == '__main__':
1396     unittest.main()