src/add/fpbase.py

   1 # IEEE Floating Point Adder (Single Precision)
   2 # Copyright (C) Jonathan P Dawson 2013
   3 # 2013-12-12
   4
   5 from nmigen import Signal, Cat, Const, Mux, Module
   6 from math import log
   7 from operator import or_
   8 from functools import reduce
   9
  10 from pipeline import ObjectProxy
  11
  12
  13 class MultiShiftR:
  14
  15     def __init__(self, width):
  16         self.width = width
  17         self.smax = int(log(width) / log(2))
  18         self.i = Signal(width, reset_less=True)
  19         self.s = Signal(self.smax, reset_less=True)
  20         self.o = Signal(width, reset_less=True)
  21
  22     def elaborate(self, platform):
  23         m = Module()
  24         m.d.comb += self.o.eq(self.i >> self.s)
  25         return m
  26
  27
  28 class MultiShift:
  29     """ Generates variable-length single-cycle shifter from a series
  30         of conditional tests on each bit of the left/right shift operand.
  31         Each bit tested produces output shifted by that number of bits,
  32         in a binary fashion: bit 1 if set shifts by 1 bit, bit 2 if set
  33         shifts by 2 bits, each partial result cascading to the next Mux.
  34
  35         Could be adapted to do arithmetic shift by taking copies of the
  36         MSB instead of zeros.
  37     """
  38
  39     def __init__(self, width):
  40         self.width = width
  41         self.smax = int(log(width) / log(2))
  42
  43     def lshift(self, op, s):
  44         res = op << s
  45         return res[:len(op)]
  46         res = op
  47         for i in range(self.smax):
  48             zeros = [0] * (1<<i)
  49             res = Mux(s & (1<<i), Cat(zeros, res[0:-(1<<i)]), res)
  50         return res
  51
  52     def rshift(self, op, s):
  53         res = op >> s
  54         return res[:len(op)]
  55         res = op
  56         for i in range(self.smax):
  57             zeros = [0] * (1<<i)
  58             res = Mux(s & (1<<i), Cat(res[(1<<i):], zeros), res)
  59         return res
  60
  61
  62 class FPNumBase:
  63     """ Floating-point Base Number Class
  64     """
  65     def __init__(self, width, m_extra=True):
  66         self.width = width
  67         m_width = {16: 11, 32: 24, 64: 53}[width] # 1 extra bit (overflow)
  68         e_width = {16: 7,  32: 10, 64: 13}[width] # 2 extra bits (overflow)
  69         e_max = 1<<(e_width-3)
  70         self.rmw = m_width # real mantissa width (not including extras)
  71         self.e_max = e_max
  72         if m_extra:
  73             # mantissa extra bits (top,guard,round)
  74             self.m_extra = 3
  75             m_width += self.m_extra
  76         else:
  77             self.m_extra = 0
  78         #print (m_width, e_width, e_max, self.rmw, self.m_extra)
  79         self.m_width = m_width
  80         self.e_width = e_width
  81         self.e_start = self.rmw - 1
  82         self.e_end = self.rmw + self.e_width - 3 # for decoding
  83
  84         self.v = Signal(width, reset_less=True)      # Latched copy of value
  85         self.m = Signal(m_width, reset_less=True)    # Mantissa
  86         self.e = Signal((e_width, True), reset_less=True) # Exponent: IEEE754exp+2 bits, signed
  87         self.s = Signal(reset_less=True)           # Sign bit
  88
  89         self.mzero = Const(0, (m_width, False))
  90         m_msb = 1<<(self.m_width-2)
  91         self.msb1 = Const(m_msb, (m_width, False))
  92         self.m1s = Const(-1, (m_width, False))
  93         self.P128 = Const(e_max, (e_width, True))
  94         self.P127 = Const(e_max-1, (e_width, True))
  95         self.N127 = Const(-(e_max-1), (e_width, True))
  96         self.N126 = Const(-(e_max-2), (e_width, True))
  97
  98         self.is_nan = Signal(reset_less=True)
  99         self.is_zero = Signal(reset_less=True)
 100         self.is_inf = Signal(reset_less=True)
 101         self.is_overflowed = Signal(reset_less=True)
 102         self.is_denormalised = Signal(reset_less=True)
 103         self.exp_128 = Signal(reset_less=True)
 104         self.exp_sub_n126 = Signal((e_width, True), reset_less=True)
 105         self.exp_lt_n126 = Signal(reset_less=True)
 106         self.exp_gt_n126 = Signal(reset_less=True)
 107         self.exp_gt127 = Signal(reset_less=True)
 108         self.exp_n127 = Signal(reset_less=True)
 109         self.exp_n126 = Signal(reset_less=True)
 110         self.m_zero = Signal(reset_less=True)
 111         self.m_msbzero = Signal(reset_less=True)
 112
 113     def elaborate(self, platform):
 114         m = Module()
 115         m.d.comb += self.is_nan.eq(self._is_nan())
 116         m.d.comb += self.is_zero.eq(self._is_zero())
 117         m.d.comb += self.is_inf.eq(self._is_inf())
 118         m.d.comb += self.is_overflowed.eq(self._is_overflowed())
 119         m.d.comb += self.is_denormalised.eq(self._is_denormalised())
 120         m.d.comb += self.exp_128.eq(self.e == self.P128)
 121         m.d.comb += self.exp_sub_n126.eq(self.e - self.N126)
 122         m.d.comb += self.exp_gt_n126.eq(self.exp_sub_n126 > 0)
 123         m.d.comb += self.exp_lt_n126.eq(self.exp_sub_n126 < 0)
 124         m.d.comb += self.exp_gt127.eq(self.e > self.P127)
 125         m.d.comb += self.exp_n127.eq(self.e == self.N127)
 126         m.d.comb += self.exp_n126.eq(self.e == self.N126)
 127         m.d.comb += self.m_zero.eq(self.m == self.mzero)
 128         m.d.comb += self.m_msbzero.eq(self.m[self.e_start] == 0)
 129
 130         return m
 131
 132     def _is_nan(self):
 133         return (self.exp_128) & (~self.m_zero)
 134
 135     def _is_inf(self):
 136         return (self.exp_128) & (self.m_zero)
 137
 138     def _is_zero(self):
 139         return (self.exp_n127) & (self.m_zero)
 140
 141     def _is_overflowed(self):
 142         return self.exp_gt127
 143
 144     def _is_denormalised(self):
 145         return (self.exp_n126) & (self.m_msbzero)
 146
 147     def eq(self, inp):
 148         return [self.s.eq(inp.s), self.e.eq(inp.e), self.m.eq(inp.m)]
 149
 150
 151 class FPNumOut(FPNumBase):
 152     """ Floating-point Number Class
 153
 154         Contains signals for an incoming copy of the value, decoded into
 155         sign / exponent / mantissa.
 156         Also contains encoding functions, creation and recognition of
 157         zero, NaN and inf (all signed)
 158
 159         Four extra bits are included in the mantissa: the top bit
 160         (m[-1]) is effectively a carry-overflow.  The other three are
 161         guard (m[2]), round (m[1]), and sticky (m[0])
 162     """
 163     def __init__(self, width, m_extra=True):
 164         FPNumBase.__init__(self, width, m_extra)
 165
 166     def elaborate(self, platform):
 167         m = FPNumBase.elaborate(self, platform)
 168
 169         return m
 170
 171     def create(self, s, e, m):
 172         """ creates a value from sign / exponent / mantissa
 173
 174             bias is added here, to the exponent
 175         """
 176         return [
 177           self.v[-1].eq(s),          # sign
 178           self.v[self.e_start:self.e_end].eq(e + self.P127), # exp (add on bias)
 179           self.v[0:self.e_start].eq(m)         # mantissa
 180         ]
 181
 182     def nan(self, s):
 183         return self.create(s, self.P128, 1<<(self.e_start-1))
 184
 185     def inf(self, s):
 186         return self.create(s, self.P128, 0)
 187
 188     def zero(self, s):
 189         return self.create(s, self.N127, 0)
 190
 191     def create2(self, s, e, m):
 192         """ creates a value from sign / exponent / mantissa
 193
 194             bias is added here, to the exponent
 195         """
 196         e = e + self.P127 # exp (add on bias)
 197         return Cat(m[0:self.e_start],
 198                    e[0:self.e_end-self.e_start],
 199                    s)
 200
 201     def nan2(self, s):
 202         return self.create2(s, self.P128, self.msb1)
 203
 204     def inf2(self, s):
 205         return self.create2(s, self.P128, self.mzero)
 206
 207     def zero2(self, s):
 208         return self.create2(s, self.N127, self.mzero)
 209
 210
 211 class MultiShiftRMerge:
 212     """ shifts down (right) and merges lower bits into m[0].
 213         m[0] is the "sticky" bit, basically
 214     """
 215     def __init__(self, width, s_max=None):
 216         if s_max is None:
 217             s_max = int(log(width) / log(2))
 218         self.smax = s_max
 219         self.m = Signal(width, reset_less=True)
 220         self.inp = Signal(width, reset_less=True)
 221         self.diff = Signal(s_max, reset_less=True)
 222         self.width = width
 223
 224     def elaborate(self, platform):
 225         m = Module()
 226
 227         rs = Signal(self.width, reset_less=True)
 228         m_mask = Signal(self.width, reset_less=True)
 229         smask = Signal(self.width, reset_less=True)
 230         stickybit = Signal(reset_less=True)
 231         maxslen = Signal(self.smax, reset_less=True)
 232         maxsleni = Signal(self.smax, reset_less=True)
 233
 234         sm = MultiShift(self.width-1)
 235         m0s = Const(0, self.width-1)
 236         mw = Const(self.width-1, len(self.diff))
 237         m.d.comb += [maxslen.eq(Mux(self.diff > mw, mw, self.diff)),
 238                      maxsleni.eq(Mux(self.diff > mw, 0, mw-self.diff)),
 239                     ]
 240
 241         m.d.comb += [
 242                 # shift mantissa by maxslen, mask by inverse
 243                 rs.eq(sm.rshift(self.inp[1:], maxslen)),
 244                 m_mask.eq(sm.rshift(~m0s, maxsleni)),
 245                 smask.eq(self.inp[1:] & m_mask),
 246                 # sticky bit combines all mask (and mantissa low bit)
 247                 stickybit.eq(smask.bool() | self.inp[0]),
 248                 # mantissa result contains m[0] already.
 249                 self.m.eq(Cat(stickybit, rs))
 250            ]
 251         return m
 252
 253
 254 class FPNumShift(FPNumBase):
 255     """ Floating-point Number Class for shifting
 256     """
 257     def __init__(self, mainm, op, inv, width, m_extra=True):
 258         FPNumBase.__init__(self, width, m_extra)
 259         self.latch_in = Signal()
 260         self.mainm = mainm
 261         self.inv = inv
 262         self.op = op
 263
 264     def elaborate(self, platform):
 265         m = FPNumBase.elaborate(self, platform)
 266
 267         m.d.comb += self.s.eq(op.s)
 268         m.d.comb += self.e.eq(op.e)
 269         m.d.comb += self.m.eq(op.m)
 270
 271         with self.mainm.State("align"):
 272             with m.If(self.e < self.inv.e):
 273                 m.d.sync += self.shift_down()
 274
 275         return m
 276
 277     def shift_down(self, inp):
 278         """ shifts a mantissa down by one. exponent is increased to compensate
 279
 280             accuracy is lost as a result in the mantissa however there are 3
 281             guard bits (the latter of which is the "sticky" bit)
 282         """
 283         return [self.e.eq(inp.e + 1),
 284                 self.m.eq(Cat(inp.m[0] | inp.m[1], inp.m[2:], 0))
 285                ]
 286
 287     def shift_down_multi(self, diff):
 288         """ shifts a mantissa down. exponent is increased to compensate
 289
 290             accuracy is lost as a result in the mantissa however there are 3
 291             guard bits (the latter of which is the "sticky" bit)
 292
 293             this code works by variable-shifting the mantissa by up to
 294             its maximum bit-length: no point doing more (it'll still be
 295             zero).
 296
 297             the sticky bit is computed by shifting a batch of 1s by
 298             the same amount, which will introduce zeros.  it's then
 299             inverted and used as a mask to get the LSBs of the mantissa.
 300             those are then |'d into the sticky bit.
 301         """
 302         sm = MultiShift(self.width)
 303         mw = Const(self.m_width-1, len(diff))
 304         maxslen = Mux(diff > mw, mw, diff)
 305         rs = sm.rshift(self.m[1:], maxslen)
 306         maxsleni = mw - maxslen
 307         m_mask = sm.rshift(self.m1s[1:], maxsleni) # shift and invert
 308
 309         stickybits = reduce(or_, self.m[1:] & m_mask) | self.m[0]
 310         return [self.e.eq(self.e + diff),
 311                 self.m.eq(Cat(stickybits, rs))
 312                ]
 313
 314     def shift_up_multi(self, diff):
 315         """ shifts a mantissa up. exponent is decreased to compensate
 316         """
 317         sm = MultiShift(self.width)
 318         mw = Const(self.m_width, len(diff))
 319         maxslen = Mux(diff > mw, mw, diff)
 320
 321         return [self.e.eq(self.e - diff),
 322                 self.m.eq(sm.lshift(self.m, maxslen))
 323                ]
 324
 325
 326 class FPNumDecode(FPNumBase):
 327     """ Floating-point Number Class
 328
 329         Contains signals for an incoming copy of the value, decoded into
 330         sign / exponent / mantissa.
 331         Also contains encoding functions, creation and recognition of
 332         zero, NaN and inf (all signed)
 333
 334         Four extra bits are included in the mantissa: the top bit
 335         (m[-1]) is effectively a carry-overflow.  The other three are
 336         guard (m[2]), round (m[1]), and sticky (m[0])
 337     """
 338     def __init__(self, op, width, m_extra=True):
 339         FPNumBase.__init__(self, width, m_extra)
 340         self.op = op
 341
 342     def elaborate(self, platform):
 343         m = FPNumBase.elaborate(self, platform)
 344
 345         m.d.comb += self.decode(self.v)
 346
 347         return m
 348
 349     def decode(self, v):
 350         """ decodes a latched value into sign / exponent / mantissa
 351
 352             bias is subtracted here, from the exponent.  exponent
 353             is extended to 10 bits so that subtract 127 is done on
 354             a 10-bit number
 355         """
 356         args = [0] * self.m_extra + [v[0:self.e_start]] # pad with extra zeros
 357         #print ("decode", self.e_end)
 358         return [self.m.eq(Cat(*args)), # mantissa
 359                 self.e.eq(v[self.e_start:self.e_end] - self.P127), # exp
 360                 self.s.eq(v[-1]),                 # sign
 361                 ]
 362
 363 class FPNumIn(FPNumBase):
 364     """ Floating-point Number Class
 365
 366         Contains signals for an incoming copy of the value, decoded into
 367         sign / exponent / mantissa.
 368         Also contains encoding functions, creation and recognition of
 369         zero, NaN and inf (all signed)
 370
 371         Four extra bits are included in the mantissa: the top bit
 372         (m[-1]) is effectively a carry-overflow.  The other three are
 373         guard (m[2]), round (m[1]), and sticky (m[0])
 374     """
 375     def __init__(self, op, width, m_extra=True):
 376         FPNumBase.__init__(self, width, m_extra)
 377         self.latch_in = Signal()
 378         self.op = op
 379
 380     def decode2(self, m):
 381         """ decodes a latched value into sign / exponent / mantissa
 382
 383             bias is subtracted here, from the exponent.  exponent
 384             is extended to 10 bits so that subtract 127 is done on
 385             a 10-bit number
 386         """
 387         v = self.v
 388         args = [0] * self.m_extra + [v[0:self.e_start]] # pad with extra zeros
 389         #print ("decode", self.e_end)
 390         res = ObjectProxy(m, pipemode=False)
 391         res.m = Cat(*args)                             # mantissa
 392         res.e = v[self.e_start:self.e_end] - self.P127 # exp
 393         res.s = v[-1]                                  # sign
 394         return res
 395
 396     def decode(self, v):
 397         """ decodes a latched value into sign / exponent / mantissa
 398
 399             bias is subtracted here, from the exponent.  exponent
 400             is extended to 10 bits so that subtract 127 is done on
 401             a 10-bit number
 402         """
 403         args = [0] * self.m_extra + [v[0:self.e_start]] # pad with extra zeros
 404         #print ("decode", self.e_end)
 405         return [self.m.eq(Cat(*args)), # mantissa
 406                 self.e.eq(v[self.e_start:self.e_end] - self.P127), # exp
 407                 self.s.eq(v[-1]),                 # sign
 408                 ]
 409
 410     def shift_down(self, inp):
 411         """ shifts a mantissa down by one. exponent is increased to compensate
 412
 413             accuracy is lost as a result in the mantissa however there are 3
 414             guard bits (the latter of which is the "sticky" bit)
 415         """
 416         return [self.e.eq(inp.e + 1),
 417                 self.m.eq(Cat(inp.m[0] | inp.m[1], inp.m[2:], 0))
 418                ]
 419
 420     def shift_down_multi(self, diff, inp=None):
 421         """ shifts a mantissa down. exponent is increased to compensate
 422
 423             accuracy is lost as a result in the mantissa however there are 3
 424             guard bits (the latter of which is the "sticky" bit)
 425
 426             this code works by variable-shifting the mantissa by up to
 427             its maximum bit-length: no point doing more (it'll still be
 428             zero).
 429
 430             the sticky bit is computed by shifting a batch of 1s by
 431             the same amount, which will introduce zeros.  it's then
 432             inverted and used as a mask to get the LSBs of the mantissa.
 433             those are then |'d into the sticky bit.
 434         """
 435         if inp is None:
 436             inp = self
 437         sm = MultiShift(self.width)
 438         mw = Const(self.m_width-1, len(diff))
 439         maxslen = Mux(diff > mw, mw, diff)
 440         rs = sm.rshift(inp.m[1:], maxslen)
 441         maxsleni = mw - maxslen
 442         m_mask = sm.rshift(self.m1s[1:], maxsleni) # shift and invert
 443
 444         #stickybit = reduce(or_, inp.m[1:] & m_mask) | inp.m[0]
 445         stickybit = (inp.m[1:] & m_mask).bool() | inp.m[0]
 446         return [self.e.eq(inp.e + diff),
 447                 self.m.eq(Cat(stickybit, rs))
 448                ]
 449
 450     def shift_up_multi(self, diff):
 451         """ shifts a mantissa up. exponent is decreased to compensate
 452         """
 453         sm = MultiShift(self.width)
 454         mw = Const(self.m_width, len(diff))
 455         maxslen = Mux(diff > mw, mw, diff)
 456
 457         return [self.e.eq(self.e - diff),
 458                 self.m.eq(sm.lshift(self.m, maxslen))
 459                ]
 460
 461 class Trigger:
 462     def __init__(self):
 463
 464         self.stb = Signal(reset=0)
 465         self.ack = Signal()
 466         self.trigger = Signal(reset_less=True)
 467
 468     def elaborate(self, platform):
 469         m = Module()
 470         m.d.comb += self.trigger.eq(self.stb & self.ack)
 471         return m
 472
 473     def eq(self, inp):
 474         return [self.stb.eq(inp.stb),
 475                 self.ack.eq(inp.ack)
 476                ]
 477
 478     def ports(self):
 479         return [self.stb, self.ack]
 480
 481
 482 class FPOp(Trigger):
 483     def __init__(self, width):
 484         Trigger.__init__(self)
 485         self.width = width
 486
 487         self.v   = Signal(width)
 488
 489     def chain_inv(self, in_op, extra=None):
 490         stb = in_op.stb
 491         if extra is not None:
 492             stb = stb & extra
 493         return [self.v.eq(in_op.v),          # receive value
 494                 self.stb.eq(stb),      # receive STB
 495                 in_op.ack.eq(~self.ack), # send ACK
 496                ]
 497
 498     def chain_from(self, in_op, extra=None):
 499         stb = in_op.stb
 500         if extra is not None:
 501             stb = stb & extra
 502         return [self.v.eq(in_op.v),          # receive value
 503                 self.stb.eq(stb),      # receive STB
 504                 in_op.ack.eq(self.ack), # send ACK
 505                ]
 506
 507     def eq(self, inp):
 508         return [self.v.eq(inp.v),
 509                 self.stb.eq(inp.stb),
 510                 self.ack.eq(inp.ack)
 511                ]
 512
 513     def ports(self):
 514         return [self.v, self.stb, self.ack]
 515
 516
 517 class Overflow:
 518     def __init__(self):
 519         self.guard = Signal(reset_less=True)     # tot[2]
 520         self.round_bit = Signal(reset_less=True) # tot[1]
 521         self.sticky = Signal(reset_less=True)    # tot[0]
 522         self.m0 = Signal(reset_less=True)        # mantissa zero bit
 523
 524         self.roundz = Signal(reset_less=True)
 525
 526     def eq(self, inp):
 527         return [self.guard.eq(inp.guard),
 528                 self.round_bit.eq(inp.round_bit),
 529                 self.sticky.eq(inp.sticky),
 530                 self.m0.eq(inp.m0)]
 531
 532     def elaborate(self, platform):
 533         m = Module()
 534         m.d.comb += self.roundz.eq(self.guard & \
 535                                    (self.round_bit | self.sticky | self.m0))
 536         return m
 537
 538
 539 class FPBase:
 540     """ IEEE754 Floating Point Base Class
 541
 542         contains common functions for FP manipulation, such as
 543         extracting and packing operands, normalisation, denormalisation,
 544         rounding etc.
 545     """
 546
 547     def get_op(self, m, op, v, next_state):
 548         """ this function moves to the next state and copies the operand
 549             when both stb and ack are 1.
 550             acknowledgement is sent by setting ack to ZERO.
 551         """
 552         res = v.decode2(m)
 553         ack = Signal()
 554         with m.If((op.ack) & (op.stb)):
 555             m.next = next_state
 556             # op is latched in from FPNumIn class on same ack/stb
 557             m.d.comb += ack.eq(0)
 558         with m.Else():
 559             m.d.comb += ack.eq(1)
 560         return [res, ack]
 561
 562     def denormalise(self, m, a):
 563         """ denormalises a number.  this is probably the wrong name for
 564             this function.  for normalised numbers (exponent != minimum)
 565             one *extra* bit (the implicit 1) is added *back in*.
 566             for denormalised numbers, the mantissa is left alone
 567             and the exponent increased by 1.
 568
 569             both cases *effectively multiply the number stored by 2*,
 570             which has to be taken into account when extracting the result.
 571         """
 572         with m.If(a.exp_n127):
 573             m.d.sync += a.e.eq(a.N126) # limit a exponent
 574         with m.Else():
 575             m.d.sync += a.m[-1].eq(1) # set top mantissa bit
 576
 577     def op_normalise(self, m, op, next_state):
 578         """ operand normalisation
 579             NOTE: just like "align", this one keeps going round every clock
 580                   until the result's exponent is within acceptable "range"
 581         """
 582         with m.If((op.m[-1] == 0)): # check last bit of mantissa
 583             m.d.sync +=[
 584                 op.e.eq(op.e - 1),  # DECREASE exponent
 585                 op.m.eq(op.m << 1), # shift mantissa UP
 586             ]
 587         with m.Else():
 588             m.next = next_state
 589
 590     def normalise_1(self, m, z, of, next_state):
 591         """ first stage normalisation
 592
 593             NOTE: just like "align", this one keeps going round every clock
 594                   until the result's exponent is within acceptable "range"
 595             NOTE: the weirdness of reassigning guard and round is due to
 596                   the extra mantissa bits coming from tot[0..2]
 597         """
 598         with m.If((z.m[-1] == 0) & (z.e > z.N126)):
 599             m.d.sync += [
 600                 z.e.eq(z.e - 1),  # DECREASE exponent
 601                 z.m.eq(z.m << 1), # shift mantissa UP
 602                 z.m[0].eq(of.guard),       # steal guard bit (was tot[2])
 603                 of.guard.eq(of.round_bit), # steal round_bit (was tot[1])
 604                 of.round_bit.eq(0),        # reset round bit
 605                 of.m0.eq(of.guard),
 606             ]
 607         with m.Else():
 608             m.next = next_state
 609
 610     def normalise_2(self, m, z, of, next_state):
 611         """ second stage normalisation
 612
 613             NOTE: just like "align", this one keeps going round every clock
 614                   until the result's exponent is within acceptable "range"
 615             NOTE: the weirdness of reassigning guard and round is due to
 616                   the extra mantissa bits coming from tot[0..2]
 617         """
 618         with m.If(z.e < z.N126):
 619             m.d.sync +=[
 620                 z.e.eq(z.e + 1),  # INCREASE exponent
 621                 z.m.eq(z.m >> 1), # shift mantissa DOWN
 622                 of.guard.eq(z.m[0]),
 623                 of.m0.eq(z.m[1]),
 624                 of.round_bit.eq(of.guard),
 625                 of.sticky.eq(of.sticky | of.round_bit)
 626             ]
 627         with m.Else():
 628             m.next = next_state
 629
 630     def roundz(self, m, z, roundz):
 631         """ performs rounding on the output.  TODO: different kinds of rounding
 632         """
 633         with m.If(roundz):
 634             m.d.sync += z.m.eq(z.m + 1) # mantissa rounds up
 635             with m.If(z.m == z.m1s): # all 1s
 636                 m.d.sync += z.e.eq(z.e + 1) # exponent rounds up
 637
 638     def corrections(self, m, z, next_state):
 639         """ denormalisation and sign-bug corrections
 640         """
 641         m.next = next_state
 642         # denormalised, correct exponent to zero
 643         with m.If(z.is_denormalised):
 644             m.d.sync += z.e.eq(z.N127)
 645
 646     def pack(self, m, z, next_state):
 647         """ packs the result into the output (detects overflow->Inf)
 648         """
 649         m.next = next_state
 650         # if overflow occurs, return inf
 651         with m.If(z.is_overflowed):
 652             m.d.sync += z.inf(z.s)
 653         with m.Else():
 654             m.d.sync += z.create(z.s, z.e, z.m)
 655
 656     def put_z(self, m, z, out_z, next_state):
 657         """ put_z: stores the result in the output.  raises stb and waits
 658             for ack to be set to 1 before moving to the next state.
 659             resets stb back to zero when that occurs, as acknowledgement.
 660         """
 661         m.d.sync += [
 662           out_z.v.eq(z.v)
 663         ]
 664         with m.If(out_z.stb & out_z.ack):
 665             m.d.sync += out_z.stb.eq(0)
 666             m.next = next_state
 667         with m.Else():
 668             m.d.sync += out_z.stb.eq(1)
 669
 670
 671 class FPState(FPBase):
 672     def __init__(self, state_from):
 673         self.state_from = state_from
 674
 675     def set_inputs(self, inputs):
 676         self.inputs = inputs
 677         for k,v in inputs.items():
 678             setattr(self, k, v)
 679
 680     def set_outputs(self, outputs):
 681         self.outputs = outputs
 682         for k,v in outputs.items():
 683             setattr(self, k, v)
 684
 685
 686 class FPID:
 687     def __init__(self, id_wid):
 688         self.id_wid = id_wid
 689         if self.id_wid:
 690             self.in_mid = Signal(id_wid, reset_less=True)
 691             self.out_mid = Signal(id_wid, reset_less=True)
 692         else:
 693             self.in_mid = None
 694             self.out_mid = None
 695
 696     def idsync(self, m):
 697         if self.id_wid is not None:
 698             m.d.sync += self.out_mid.eq(self.in_mid)
 699
 700