1 # IEEE Floating Point Adder (Single Precision)
2 # Copyright (C) Jonathan P Dawson 2013
5 from nmigen
import Signal
, Cat
, Const
, Mux
, Module
, Elaboratable
7 from operator
import or_
8 from functools
import reduce
10 from nmutil
.singlepipe
import PrevControl
, NextControl
11 from nmutil
.pipeline
import ObjectProxy
16 """ Class describing binary floating-point formats based on IEEE 754.
18 :attribute e_width: the number of bits in the exponent field.
19 :attribute m_width: the number of bits stored in the mantissa
21 :attribute has_int_bit: if the FP format has an explicit integer bit (like
22 the x87 80-bit format). The bit is considered part of the mantissa.
23 :attribute has_sign: if the FP format has a sign bit. (Some Vulkan
24 Image/Buffer formats are FP numbers without a sign bit.)
32 """ Create ``FPFormat`` instance. """
33 self
.e_width
= e_width
34 self
.m_width
= m_width
35 self
.has_int_bit
= has_int_bit
36 self
.has_sign
= has_sign
38 def __eq__(self
, other
):
39 """ Check for equality. """
40 if not isinstance(other
, FPFormat
):
42 return (self
.e_width
== other
.e_width
and
43 self
.m_width
== other
.m_width
and
44 self
.has_int_bit
== other
.has_int_bit
and
45 self
.has_sign
== other
.has_sign
)
49 """ Get standard IEEE 754-2008 format.
51 :param width: bit-width of requested format.
52 :returns: the requested ``FPFormat`` instance.
54 if not instanceof(width
, int):
57 return FPFormat(5, 10)
59 return FPFormat(8, 23)
61 return FPFormat(11, 52)
63 return FPFormat(15, 112)
64 if width
> 128 and width
% 32 == 0:
65 if width
> 1000000: # arbitrary upper limit
66 raise ValueError("width too big")
67 e_width
= round(4 * math
.log2(width
)) - 13
68 return FPFormat(e_width
, width
- 1 - e_width
)
69 raise ValueError("width must be the bit-width of a valid IEEE"
70 " 754-2008 binary format")
75 if self
== self
.standard(self
.width
):
76 return f
"FPFormat.standard({self.width})"
79 retval
= f
"FPFormat({self.e_width}, {self.m_width}"
80 if self
.has_int_bit
is not False:
81 retval
+= f
", {self.has_int_bit}"
82 if self
.has_sign
is not True:
83 retval
+= f
", {self.has_sign}"
88 """ Get the total number of bits in the FP format. """
89 return self
.has_sign
+ self
.e_width
+ self
.m_width
92 def exponent_inf_nan(self
):
93 """ Get the value of the exponent field designating infinity/NaN. """
94 return (1 << self
.e_width
) - 1
97 def exponent_denormal_zero(self
):
98 """ Get the value of the exponent field designating denormal/zero. """
102 def exponent_min_normal(self
):
103 """ Get the minimum value of the exponent field for normal numbers. """
107 def exponent_max_normal(self
):
108 """ Get the maximum value of the exponent field for normal numbers. """
109 return self
.exponent_inf_nan
- 1
112 def exponent_bias(self
):
113 """ Get the exponent bias. """
114 return (1 << (self
.e_width
- 1)) - 1
117 def fraction_width(self
):
118 """ Get the number of mantissa bits that are fraction bits. """
119 return self
.m_width
- self
.has_int_bit
124 def __init__(self
, width
):
126 self
.smax
= int(log(width
) / log(2))
127 self
.i
= Signal(width
, reset_less
=True)
128 self
.s
= Signal(self
.smax
, reset_less
=True)
129 self
.o
= Signal(width
, reset_less
=True)
131 def elaborate(self
, platform
):
133 m
.d
.comb
+= self
.o
.eq(self
.i
>> self
.s
)
138 """ Generates variable-length single-cycle shifter from a series
139 of conditional tests on each bit of the left/right shift operand.
140 Each bit tested produces output shifted by that number of bits,
141 in a binary fashion: bit 1 if set shifts by 1 bit, bit 2 if set
142 shifts by 2 bits, each partial result cascading to the next Mux.
144 Could be adapted to do arithmetic shift by taking copies of the
145 MSB instead of zeros.
148 def __init__(self
, width
):
150 self
.smax
= int(log(width
) / log(2))
152 def lshift(self
, op
, s
):
156 def rshift(self
, op
, s
):
161 class FPNumBaseRecord
:
162 """ Floating-point Base Number Class
165 def __init__(self
, width
, m_extra
=True, e_extra
=False):
167 m_width
= {16: 11, 32: 24, 64: 53}[width
] # 1 extra bit (overflow)
168 e_width
= {16: 7, 32: 10, 64: 13}[width
] # 2 extra bits (overflow)
169 e_max
= 1 << (e_width
-3)
170 self
.rmw
= m_width
- 1 # real mantissa width (not including extras)
173 # mantissa extra bits (top,guard,round)
175 m_width
+= self
.m_extra
179 self
.e_extra
= 6 # enough to cover FP64 when converting to FP16
180 e_width
+= self
.e_extra
183 # print (m_width, e_width, e_max, self.rmw, self.m_extra)
184 self
.m_width
= m_width
185 self
.e_width
= e_width
186 self
.e_start
= self
.rmw
187 self
.e_end
= self
.rmw
+ self
.e_width
- 2 # for decoding
189 self
.v
= Signal(width
, reset_less
=True) # Latched copy of value
190 self
.m
= Signal(m_width
, reset_less
=True) # Mantissa
191 self
.e
= Signal((e_width
, True), reset_less
=True) # exp+2 bits, signed
192 self
.s
= Signal(reset_less
=True) # Sign bit
197 def drop_in(self
, fp
):
203 fp
.width
= self
.width
204 fp
.e_width
= self
.e_width
205 fp
.e_max
= self
.e_max
206 fp
.m_width
= self
.m_width
207 fp
.e_start
= self
.e_start
208 fp
.e_end
= self
.e_end
209 fp
.m_extra
= self
.m_extra
211 m_width
= self
.m_width
213 e_width
= self
.e_width
215 self
.mzero
= Const(0, (m_width
, False))
216 m_msb
= 1 << (self
.m_width
-2)
217 self
.msb1
= Const(m_msb
, (m_width
, False))
218 self
.m1s
= Const(-1, (m_width
, False))
219 self
.P128
= Const(e_max
, (e_width
, True))
220 self
.P127
= Const(e_max
-1, (e_width
, True))
221 self
.N127
= Const(-(e_max
-1), (e_width
, True))
222 self
.N126
= Const(-(e_max
-2), (e_width
, True))
224 def create(self
, s
, e
, m
):
225 """ creates a value from sign / exponent / mantissa
227 bias is added here, to the exponent.
229 NOTE: order is important, because e_start/e_end can be
230 a bit too long (overwriting s).
233 self
.v
[0:self
.e_start
].eq(m
), # mantissa
234 self
.v
[self
.e_start
:self
.e_end
].eq(e
+ self
.fp
.P127
), # (add bias)
235 self
.v
[-1].eq(s
), # sign
239 return (s
, self
.fp
.P128
, 1 << (self
.e_start
-1))
242 return (s
, self
.fp
.P128
, 0)
245 return (s
, self
.fp
.N127
, 0)
248 return self
.create(*self
._nan
(s
))
251 return self
.create(*self
._inf
(s
))
254 return self
.create(*self
._zero
(s
))
256 def create2(self
, s
, e
, m
):
257 """ creates a value from sign / exponent / mantissa
259 bias is added here, to the exponent
261 e
= e
+ self
.P127
# exp (add on bias)
262 return Cat(m
[0:self
.e_start
],
263 e
[0:self
.e_end
-self
.e_start
],
267 return self
.create2(s
, self
.P128
, self
.msb1
)
270 return self
.create2(s
, self
.P128
, self
.mzero
)
273 return self
.create2(s
, self
.N127
, self
.mzero
)
281 return [self
.s
.eq(inp
.s
), self
.e
.eq(inp
.e
), self
.m
.eq(inp
.m
)]
284 class FPNumBase(FPNumBaseRecord
, Elaboratable
):
285 """ Floating-point Base Number Class
288 def __init__(self
, fp
):
293 self
.is_nan
= Signal(reset_less
=True)
294 self
.is_zero
= Signal(reset_less
=True)
295 self
.is_inf
= Signal(reset_less
=True)
296 self
.is_overflowed
= Signal(reset_less
=True)
297 self
.is_denormalised
= Signal(reset_less
=True)
298 self
.exp_128
= Signal(reset_less
=True)
299 self
.exp_sub_n126
= Signal((e_width
, True), reset_less
=True)
300 self
.exp_lt_n126
= Signal(reset_less
=True)
301 self
.exp_zero
= Signal(reset_less
=True)
302 self
.exp_gt_n126
= Signal(reset_less
=True)
303 self
.exp_gt127
= Signal(reset_less
=True)
304 self
.exp_n127
= Signal(reset_less
=True)
305 self
.exp_n126
= Signal(reset_less
=True)
306 self
.m_zero
= Signal(reset_less
=True)
307 self
.m_msbzero
= Signal(reset_less
=True)
309 def elaborate(self
, platform
):
311 m
.d
.comb
+= self
.is_nan
.eq(self
._is
_nan
())
312 m
.d
.comb
+= self
.is_zero
.eq(self
._is
_zero
())
313 m
.d
.comb
+= self
.is_inf
.eq(self
._is
_inf
())
314 m
.d
.comb
+= self
.is_overflowed
.eq(self
._is
_overflowed
())
315 m
.d
.comb
+= self
.is_denormalised
.eq(self
._is
_denormalised
())
316 m
.d
.comb
+= self
.exp_128
.eq(self
.e
== self
.fp
.P128
)
317 m
.d
.comb
+= self
.exp_sub_n126
.eq(self
.e
- self
.fp
.N126
)
318 m
.d
.comb
+= self
.exp_gt_n126
.eq(self
.exp_sub_n126
> 0)
319 m
.d
.comb
+= self
.exp_lt_n126
.eq(self
.exp_sub_n126
< 0)
320 m
.d
.comb
+= self
.exp_zero
.eq(self
.e
== 0)
321 m
.d
.comb
+= self
.exp_gt127
.eq(self
.e
> self
.fp
.P127
)
322 m
.d
.comb
+= self
.exp_n127
.eq(self
.e
== self
.fp
.N127
)
323 m
.d
.comb
+= self
.exp_n126
.eq(self
.e
== self
.fp
.N126
)
324 m
.d
.comb
+= self
.m_zero
.eq(self
.m
== self
.fp
.mzero
)
325 m
.d
.comb
+= self
.m_msbzero
.eq(self
.m
[self
.fp
.e_start
] == 0)
330 return (self
.exp_128
) & (~self
.m_zero
)
333 return (self
.exp_128
) & (self
.m_zero
)
336 return (self
.exp_n127
) & (self
.m_zero
)
338 def _is_overflowed(self
):
339 return self
.exp_gt127
341 def _is_denormalised(self
):
342 return (self
.exp_n126
) & (self
.m_msbzero
)
345 class FPNumOut(FPNumBase
):
346 """ Floating-point Number Class
348 Contains signals for an incoming copy of the value, decoded into
349 sign / exponent / mantissa.
350 Also contains encoding functions, creation and recognition of
351 zero, NaN and inf (all signed)
353 Four extra bits are included in the mantissa: the top bit
354 (m[-1]) is effectively a carry-overflow. The other three are
355 guard (m[2]), round (m[1]), and sticky (m[0])
358 def __init__(self
, fp
):
359 FPNumBase
.__init
__(self
, fp
)
361 def elaborate(self
, platform
):
362 m
= FPNumBase
.elaborate(self
, platform
)
367 class MultiShiftRMerge(Elaboratable
):
368 """ shifts down (right) and merges lower bits into m[0].
369 m[0] is the "sticky" bit, basically
372 def __init__(self
, width
, s_max
=None):
374 s_max
= int(log(width
) / log(2))
376 self
.m
= Signal(width
, reset_less
=True)
377 self
.inp
= Signal(width
, reset_less
=True)
378 self
.diff
= Signal(s_max
, reset_less
=True)
381 def elaborate(self
, platform
):
384 rs
= Signal(self
.width
, reset_less
=True)
385 m_mask
= Signal(self
.width
, reset_less
=True)
386 smask
= Signal(self
.width
, reset_less
=True)
387 stickybit
= Signal(reset_less
=True)
388 maxslen
= Signal(self
.smax
, reset_less
=True)
389 maxsleni
= Signal(self
.smax
, reset_less
=True)
391 sm
= MultiShift(self
.width
-1)
392 m0s
= Const(0, self
.width
-1)
393 mw
= Const(self
.width
-1, len(self
.diff
))
394 m
.d
.comb
+= [maxslen
.eq(Mux(self
.diff
> mw
, mw
, self
.diff
)),
395 maxsleni
.eq(Mux(self
.diff
> mw
, 0, mw
-self
.diff
)),
399 # shift mantissa by maxslen, mask by inverse
400 rs
.eq(sm
.rshift(self
.inp
[1:], maxslen
)),
401 m_mask
.eq(sm
.rshift(~m0s
, maxsleni
)),
402 smask
.eq(self
.inp
[1:] & m_mask
),
403 # sticky bit combines all mask (and mantissa low bit)
404 stickybit
.eq(smask
.bool() | self
.inp
[0]),
405 # mantissa result contains m[0] already.
406 self
.m
.eq(Cat(stickybit
, rs
))
411 class FPNumShift(FPNumBase
, Elaboratable
):
412 """ Floating-point Number Class for shifting
415 def __init__(self
, mainm
, op
, inv
, width
, m_extra
=True):
416 FPNumBase
.__init
__(self
, width
, m_extra
)
417 self
.latch_in
= Signal()
422 def elaborate(self
, platform
):
423 m
= FPNumBase
.elaborate(self
, platform
)
425 m
.d
.comb
+= self
.s
.eq(op
.s
)
426 m
.d
.comb
+= self
.e
.eq(op
.e
)
427 m
.d
.comb
+= self
.m
.eq(op
.m
)
429 with self
.mainm
.State("align"):
430 with m
.If(self
.e
< self
.inv
.e
):
431 m
.d
.sync
+= self
.shift_down()
435 def shift_down(self
, inp
):
436 """ shifts a mantissa down by one. exponent is increased to compensate
438 accuracy is lost as a result in the mantissa however there are 3
439 guard bits (the latter of which is the "sticky" bit)
441 return [self
.e
.eq(inp
.e
+ 1),
442 self
.m
.eq(Cat(inp
.m
[0] | inp
.m
[1], inp
.m
[2:], 0))
445 def shift_down_multi(self
, diff
):
446 """ shifts a mantissa down. exponent is increased to compensate
448 accuracy is lost as a result in the mantissa however there are 3
449 guard bits (the latter of which is the "sticky" bit)
451 this code works by variable-shifting the mantissa by up to
452 its maximum bit-length: no point doing more (it'll still be
455 the sticky bit is computed by shifting a batch of 1s by
456 the same amount, which will introduce zeros. it's then
457 inverted and used as a mask to get the LSBs of the mantissa.
458 those are then |'d into the sticky bit.
460 sm
= MultiShift(self
.width
)
461 mw
= Const(self
.m_width
-1, len(diff
))
462 maxslen
= Mux(diff
> mw
, mw
, diff
)
463 rs
= sm
.rshift(self
.m
[1:], maxslen
)
464 maxsleni
= mw
- maxslen
465 m_mask
= sm
.rshift(self
.m1s
[1:], maxsleni
) # shift and invert
467 stickybits
= reduce(or_
, self
.m
[1:] & m_mask
) | self
.m
[0]
468 return [self
.e
.eq(self
.e
+ diff
),
469 self
.m
.eq(Cat(stickybits
, rs
))
472 def shift_up_multi(self
, diff
):
473 """ shifts a mantissa up. exponent is decreased to compensate
475 sm
= MultiShift(self
.width
)
476 mw
= Const(self
.m_width
, len(diff
))
477 maxslen
= Mux(diff
> mw
, mw
, diff
)
479 return [self
.e
.eq(self
.e
- diff
),
480 self
.m
.eq(sm
.lshift(self
.m
, maxslen
))
484 class FPNumDecode(FPNumBase
):
485 """ Floating-point Number Class
487 Contains signals for an incoming copy of the value, decoded into
488 sign / exponent / mantissa.
489 Also contains encoding functions, creation and recognition of
490 zero, NaN and inf (all signed)
492 Four extra bits are included in the mantissa: the top bit
493 (m[-1]) is effectively a carry-overflow. The other three are
494 guard (m[2]), round (m[1]), and sticky (m[0])
497 def __init__(self
, op
, fp
):
498 FPNumBase
.__init
__(self
, fp
)
501 def elaborate(self
, platform
):
502 m
= FPNumBase
.elaborate(self
, platform
)
504 m
.d
.comb
+= self
.decode(self
.v
)
509 """ decodes a latched value into sign / exponent / mantissa
511 bias is subtracted here, from the exponent. exponent
512 is extended to 10 bits so that subtract 127 is done on
515 args
= [0] * self
.m_extra
+ [v
[0:self
.e_start
]] # pad with extra zeros
516 #print ("decode", self.e_end)
517 return [self
.m
.eq(Cat(*args
)), # mantissa
518 self
.e
.eq(v
[self
.e_start
:self
.e_end
] - self
.fp
.P127
), # exp
519 self
.s
.eq(v
[-1]), # sign
523 class FPNumIn(FPNumBase
):
524 """ Floating-point Number Class
526 Contains signals for an incoming copy of the value, decoded into
527 sign / exponent / mantissa.
528 Also contains encoding functions, creation and recognition of
529 zero, NaN and inf (all signed)
531 Four extra bits are included in the mantissa: the top bit
532 (m[-1]) is effectively a carry-overflow. The other three are
533 guard (m[2]), round (m[1]), and sticky (m[0])
536 def __init__(self
, op
, fp
):
537 FPNumBase
.__init
__(self
, fp
)
538 self
.latch_in
= Signal()
541 def decode2(self
, m
):
542 """ decodes a latched value into sign / exponent / mantissa
544 bias is subtracted here, from the exponent. exponent
545 is extended to 10 bits so that subtract 127 is done on
549 args
= [0] * self
.m_extra
+ [v
[0:self
.e_start
]] # pad with extra zeros
550 #print ("decode", self.e_end)
551 res
= ObjectProxy(m
, pipemode
=False)
552 res
.m
= Cat(*args
) # mantissa
553 res
.e
= v
[self
.e_start
:self
.e_end
] - self
.fp
.P127
# exp
558 """ decodes a latched value into sign / exponent / mantissa
560 bias is subtracted here, from the exponent. exponent
561 is extended to 10 bits so that subtract 127 is done on
564 args
= [0] * self
.m_extra
+ [v
[0:self
.e_start
]] # pad with extra zeros
565 #print ("decode", self.e_end)
566 return [self
.m
.eq(Cat(*args
)), # mantissa
567 self
.e
.eq(v
[self
.e_start
:self
.e_end
] - self
.P127
), # exp
568 self
.s
.eq(v
[-1]), # sign
571 def shift_down(self
, inp
):
572 """ shifts a mantissa down by one. exponent is increased to compensate
574 accuracy is lost as a result in the mantissa however there are 3
575 guard bits (the latter of which is the "sticky" bit)
577 return [self
.e
.eq(inp
.e
+ 1),
578 self
.m
.eq(Cat(inp
.m
[0] | inp
.m
[1], inp
.m
[2:], 0))
581 def shift_down_multi(self
, diff
, inp
=None):
582 """ shifts a mantissa down. exponent is increased to compensate
584 accuracy is lost as a result in the mantissa however there are 3
585 guard bits (the latter of which is the "sticky" bit)
587 this code works by variable-shifting the mantissa by up to
588 its maximum bit-length: no point doing more (it'll still be
591 the sticky bit is computed by shifting a batch of 1s by
592 the same amount, which will introduce zeros. it's then
593 inverted and used as a mask to get the LSBs of the mantissa.
594 those are then |'d into the sticky bit.
598 sm
= MultiShift(self
.width
)
599 mw
= Const(self
.m_width
-1, len(diff
))
600 maxslen
= Mux(diff
> mw
, mw
, diff
)
601 rs
= sm
.rshift(inp
.m
[1:], maxslen
)
602 maxsleni
= mw
- maxslen
603 m_mask
= sm
.rshift(self
.m1s
[1:], maxsleni
) # shift and invert
605 #stickybit = reduce(or_, inp.m[1:] & m_mask) | inp.m[0]
606 stickybit
= (inp
.m
[1:] & m_mask
).bool() | inp
.m
[0]
607 return [self
.e
.eq(inp
.e
+ diff
),
608 self
.m
.eq(Cat(stickybit
, rs
))
611 def shift_up_multi(self
, diff
):
612 """ shifts a mantissa up. exponent is decreased to compensate
614 sm
= MultiShift(self
.width
)
615 mw
= Const(self
.m_width
, len(diff
))
616 maxslen
= Mux(diff
> mw
, mw
, diff
)
618 return [self
.e
.eq(self
.e
- diff
),
619 self
.m
.eq(sm
.lshift(self
.m
, maxslen
))
623 class Trigger(Elaboratable
):
626 self
.stb
= Signal(reset
=0)
628 self
.trigger
= Signal(reset_less
=True)
630 def elaborate(self
, platform
):
632 m
.d
.comb
+= self
.trigger
.eq(self
.stb
& self
.ack
)
636 return [self
.stb
.eq(inp
.stb
),
641 return [self
.stb
, self
.ack
]
644 class FPOpIn(PrevControl
):
645 def __init__(self
, width
):
646 PrevControl
.__init
__(self
)
653 def chain_inv(self
, in_op
, extra
=None):
655 if extra
is not None:
657 return [self
.v
.eq(in_op
.v
), # receive value
658 self
.stb
.eq(stb
), # receive STB
659 in_op
.ack
.eq(~self
.ack
), # send ACK
662 def chain_from(self
, in_op
, extra
=None):
664 if extra
is not None:
666 return [self
.v
.eq(in_op
.v
), # receive value
667 self
.stb
.eq(stb
), # receive STB
668 in_op
.ack
.eq(self
.ack
), # send ACK
672 class FPOpOut(NextControl
):
673 def __init__(self
, width
):
674 NextControl
.__init
__(self
)
681 def chain_inv(self
, in_op
, extra
=None):
683 if extra
is not None:
685 return [self
.v
.eq(in_op
.v
), # receive value
686 self
.stb
.eq(stb
), # receive STB
687 in_op
.ack
.eq(~self
.ack
), # send ACK
690 def chain_from(self
, in_op
, extra
=None):
692 if extra
is not None:
694 return [self
.v
.eq(in_op
.v
), # receive value
695 self
.stb
.eq(stb
), # receive STB
696 in_op
.ack
.eq(self
.ack
), # send ACK
700 class Overflow
: # (Elaboratable):
701 def __init__(self
, name
=None):
704 self
.guard
= Signal(reset_less
=True, name
=name
+"guard") # tot[2]
705 self
.round_bit
= Signal(reset_less
=True, name
=name
+"round") # tot[1]
706 self
.sticky
= Signal(reset_less
=True, name
=name
+"sticky") # tot[0]
707 self
.m0
= Signal(reset_less
=True, name
=name
+"m0") # mantissa bit 0
709 #self.roundz = Signal(reset_less=True)
718 return [self
.guard
.eq(inp
.guard
),
719 self
.round_bit
.eq(inp
.round_bit
),
720 self
.sticky
.eq(inp
.sticky
),
725 return self
.guard
& (self
.round_bit | self
.sticky | self
.m0
)
728 class OverflowMod(Elaboratable
, Overflow
):
729 def __init__(self
, name
=None):
730 Overflow
.__init
__(self
, name
)
733 self
.roundz_out
= Signal(reset_less
=True, name
=name
+"roundz_out")
736 yield from Overflow
.__iter
__(self
)
737 yield self
.roundz_out
740 return [self
.roundz_out
.eq(inp
.roundz_out
)] + Overflow
.eq(self
)
742 def elaborate(self
, platform
):
744 m
.d
.comb
+= self
.roundz_out
.eq(self
.roundz
)
749 """ IEEE754 Floating Point Base Class
751 contains common functions for FP manipulation, such as
752 extracting and packing operands, normalisation, denormalisation,
756 def get_op(self
, m
, op
, v
, next_state
):
757 """ this function moves to the next state and copies the operand
758 when both stb and ack are 1.
759 acknowledgement is sent by setting ack to ZERO.
763 with m
.If((op
.ready_o
) & (op
.valid_i_test
)):
765 # op is latched in from FPNumIn class on same ack/stb
766 m
.d
.comb
+= ack
.eq(0)
768 m
.d
.comb
+= ack
.eq(1)
771 def denormalise(self
, m
, a
):
772 """ denormalises a number. this is probably the wrong name for
773 this function. for normalised numbers (exponent != minimum)
774 one *extra* bit (the implicit 1) is added *back in*.
775 for denormalised numbers, the mantissa is left alone
776 and the exponent increased by 1.
778 both cases *effectively multiply the number stored by 2*,
779 which has to be taken into account when extracting the result.
781 with m
.If(a
.exp_n127
):
782 m
.d
.sync
+= a
.e
.eq(a
.fp
.N126
) # limit a exponent
784 m
.d
.sync
+= a
.m
[-1].eq(1) # set top mantissa bit
786 def op_normalise(self
, m
, op
, next_state
):
787 """ operand normalisation
788 NOTE: just like "align", this one keeps going round every clock
789 until the result's exponent is within acceptable "range"
791 with m
.If((op
.m
[-1] == 0)): # check last bit of mantissa
793 op
.e
.eq(op
.e
- 1), # DECREASE exponent
794 op
.m
.eq(op
.m
<< 1), # shift mantissa UP
799 def normalise_1(self
, m
, z
, of
, next_state
):
800 """ first stage normalisation
802 NOTE: just like "align", this one keeps going round every clock
803 until the result's exponent is within acceptable "range"
804 NOTE: the weirdness of reassigning guard and round is due to
805 the extra mantissa bits coming from tot[0..2]
807 with m
.If((z
.m
[-1] == 0) & (z
.e
> z
.fp
.N126
)):
809 z
.e
.eq(z
.e
- 1), # DECREASE exponent
810 z
.m
.eq(z
.m
<< 1), # shift mantissa UP
811 z
.m
[0].eq(of
.guard
), # steal guard bit (was tot[2])
812 of
.guard
.eq(of
.round_bit
), # steal round_bit (was tot[1])
813 of
.round_bit
.eq(0), # reset round bit
819 def normalise_2(self
, m
, z
, of
, next_state
):
820 """ second stage normalisation
822 NOTE: just like "align", this one keeps going round every clock
823 until the result's exponent is within acceptable "range"
824 NOTE: the weirdness of reassigning guard and round is due to
825 the extra mantissa bits coming from tot[0..2]
827 with m
.If(z
.e
< z
.fp
.N126
):
829 z
.e
.eq(z
.e
+ 1), # INCREASE exponent
830 z
.m
.eq(z
.m
>> 1), # shift mantissa DOWN
833 of
.round_bit
.eq(of
.guard
),
834 of
.sticky
.eq(of
.sticky | of
.round_bit
)
839 def roundz(self
, m
, z
, roundz
):
840 """ performs rounding on the output. TODO: different kinds of rounding
843 m
.d
.sync
+= z
.m
.eq(z
.m
+ 1) # mantissa rounds up
844 with m
.If(z
.m
== z
.fp
.m1s
): # all 1s
845 m
.d
.sync
+= z
.e
.eq(z
.e
+ 1) # exponent rounds up
847 def corrections(self
, m
, z
, next_state
):
848 """ denormalisation and sign-bug corrections
851 # denormalised, correct exponent to zero
852 with m
.If(z
.is_denormalised
):
853 m
.d
.sync
+= z
.e
.eq(z
.fp
.N127
)
855 def pack(self
, m
, z
, next_state
):
856 """ packs the result into the output (detects overflow->Inf)
859 # if overflow occurs, return inf
860 with m
.If(z
.is_overflowed
):
861 m
.d
.sync
+= z
.inf(z
.s
)
863 m
.d
.sync
+= z
.create(z
.s
, z
.e
, z
.m
)
865 def put_z(self
, m
, z
, out_z
, next_state
):
866 """ put_z: stores the result in the output. raises stb and waits
867 for ack to be set to 1 before moving to the next state.
868 resets stb back to zero when that occurs, as acknowledgement.
873 with m
.If(out_z
.valid_o
& out_z
.ready_i_test
):
874 m
.d
.sync
+= out_z
.valid_o
.eq(0)
877 m
.d
.sync
+= out_z
.valid_o
.eq(1)
880 class FPState(FPBase
):
881 def __init__(self
, state_from
):
882 self
.state_from
= state_from
884 def set_inputs(self
, inputs
):
886 for k
, v
in inputs
.items():
889 def set_outputs(self
, outputs
):
890 self
.outputs
= outputs
891 for k
, v
in outputs
.items():
896 def __init__(self
, id_wid
):
899 self
.in_mid
= Signal(id_wid
, reset_less
=True)
900 self
.out_mid
= Signal(id_wid
, reset_less
=True)
906 if self
.id_wid
is not None:
907 m
.d
.sync
+= self
.out_mid
.eq(self
.in_mid
)