1 # IEEE Floating Point Adder (Single Precision)
2 # Copyright (C) Jonathan P Dawson 2013
5 from nmigen
import Signal
, Cat
, Const
, Mux
, Module
7 from operator
import or_
8 from functools
import reduce
12 def __init__(self
, width
):
14 self
.smax
= int(log(width
) / log(2))
15 self
.i
= Signal(width
, reset_less
=True)
16 self
.s
= Signal(self
.smax
, reset_less
=True)
17 self
.o
= Signal(width
, reset_less
=True)
19 def elaborate(self
, platform
):
21 m
.d
.comb
+= self
.o
.eq(self
.i
>> self
.s
)
26 """ Generates variable-length single-cycle shifter from a series
27 of conditional tests on each bit of the left/right shift operand.
28 Each bit tested produces output shifted by that number of bits,
29 in a binary fashion: bit 1 if set shifts by 1 bit, bit 2 if set
30 shifts by 2 bits, each partial result cascading to the next Mux.
32 Could be adapted to do arithmetic shift by taking copies of the
36 def __init__(self
, width
):
38 self
.smax
= int(log(width
) / log(2))
40 def lshift(self
, op
, s
):
44 for i
in range(self
.smax
):
46 res
= Mux(s
& (1<<i
), Cat(zeros
, res
[0:-(1<<i
)]), res
)
49 def rshift(self
, op
, s
):
53 for i
in range(self
.smax
):
55 res
= Mux(s
& (1<<i
), Cat(res
[(1<<i
):], zeros
), res
)
60 """ Floating-point Number Class, variable-width TODO (currently 32-bit)
62 Contains signals for an incoming copy of the value, decoded into
63 sign / exponent / mantissa.
64 Also contains encoding functions, creation and recognition of
65 zero, NaN and inf (all signed)
67 Four extra bits are included in the mantissa: the top bit
68 (m[-1]) is effectively a carry-overflow. The other three are
69 guard (m[2]), round (m[1]), and sticky (m[0])
71 def __init__(self
, width
, m_extra
=True):
73 m_width
= {16: 11, 32: 24, 64: 53}[width
] # 1 extra bit (overflow)
74 e_width
= {16: 7, 32: 10, 64: 13}[width
] # 2 extra bits (overflow)
75 e_max
= 1<<(e_width
-3)
76 self
.rmw
= m_width
# real mantissa width (not including extras)
79 # mantissa extra bits (top,guard,round)
81 m_width
+= self
.m_extra
84 #print (m_width, e_width, e_max, self.rmw, self.m_extra)
85 self
.m_width
= m_width
86 self
.e_width
= e_width
87 self
.e_start
= self
.rmw
- 1
88 self
.e_end
= self
.rmw
+ self
.e_width
- 3 # for decoding
90 self
.v
= Signal(width
, reset_less
=True) # Latched copy of value
91 self
.m
= Signal(m_width
, reset_less
=True) # Mantissa
92 self
.e
= Signal((e_width
, True), reset_less
=True) # Exponent: IEEE754exp+2 bits, signed
93 self
.s
= Signal(reset_less
=True) # Sign bit
95 self
.mzero
= Const(0, (m_width
, False))
96 self
.m1s
= Const(-1, (m_width
, False))
97 self
.P128
= Const(e_max
, (e_width
, True))
98 self
.P127
= Const(e_max
-1, (e_width
, True))
99 self
.N127
= Const(-(e_max
-1), (e_width
, True))
100 self
.N126
= Const(-(e_max
-2), (e_width
, True))
102 self
.is_nan
= Signal(reset_less
=True)
103 self
.is_zero
= Signal(reset_less
=True)
104 self
.is_inf
= Signal(reset_less
=True)
105 self
.is_overflowed
= Signal(reset_less
=True)
106 self
.is_denormalised
= Signal(reset_less
=True)
107 self
.exp_128
= Signal(reset_less
=True)
109 def elaborate(self
, platform
):
111 m
.d
.comb
+= self
.is_nan
.eq(self
._is
_nan
())
112 m
.d
.comb
+= self
.is_zero
.eq(self
._is
_zero
())
113 m
.d
.comb
+= self
.is_inf
.eq(self
._is
_inf
())
114 m
.d
.comb
+= self
.is_overflowed
.eq(self
._is
_overflowed
())
115 m
.d
.comb
+= self
.is_denormalised
.eq(self
._is
_denormalised
())
116 m
.d
.comb
+= self
.exp_128
.eq(self
.e
== self
.P128
)
121 """ decodes a latched value into sign / exponent / mantissa
123 bias is subtracted here, from the exponent. exponent
124 is extended to 10 bits so that subtract 127 is done on
127 args
= [0] * self
.m_extra
+ [v
[0:self
.e_start
]] # pad with extra zeros
128 #print ("decode", self.e_end)
129 return [self
.m
.eq(Cat(*args
)), # mantissa
130 self
.e
.eq(v
[self
.e_start
:self
.e_end
] - self
.P127
), # exp
131 self
.s
.eq(v
[-1]), # sign
134 def create(self
, s
, e
, m
):
135 """ creates a value from sign / exponent / mantissa
137 bias is added here, to the exponent
140 self
.v
[-1].eq(s
), # sign
141 self
.v
[self
.e_start
:self
.e_end
].eq(e
+ self
.P127
), # exp (add on bias)
142 self
.v
[0:self
.e_start
].eq(m
) # mantissa
145 def shift_down(self
):
146 """ shifts a mantissa down by one. exponent is increased to compensate
148 accuracy is lost as a result in the mantissa however there are 3
149 guard bits (the latter of which is the "sticky" bit)
151 return [self
.e
.eq(self
.e
+ 1),
152 self
.m
.eq(Cat(self
.m
[0] | self
.m
[1], self
.m
[2:], 0))
155 def shift_down_multi(self
, diff
):
156 """ shifts a mantissa down. exponent is increased to compensate
158 accuracy is lost as a result in the mantissa however there are 3
159 guard bits (the latter of which is the "sticky" bit)
161 this code works by variable-shifting the mantissa by up to
162 its maximum bit-length: no point doing more (it'll still be
165 the sticky bit is computed by shifting a batch of 1s by
166 the same amount, which will introduce zeros. it's then
167 inverted and used as a mask to get the LSBs of the mantissa.
168 those are then |'d into the sticky bit.
170 sm
= MultiShift(self
.width
)
171 mw
= Const(self
.m_width
-1, len(diff
))
172 maxslen
= Mux(diff
> mw
, mw
, diff
)
173 rs
= sm
.rshift(self
.m
[1:], maxslen
)
174 maxsleni
= mw
- maxslen
175 m_mask
= sm
.rshift(self
.m1s
[1:], maxsleni
) # shift and invert
177 stickybits
= reduce(or_
, self
.m
[1:] & m_mask
) | self
.m
[0]
178 return [self
.e
.eq(self
.e
+ diff
),
179 self
.m
.eq(Cat(stickybits
, rs
))
182 def shift_up_multi(self
, diff
):
183 """ shifts a mantissa up. exponent is decreased to compensate
185 sm
= MultiShift(self
.width
)
186 mw
= Const(self
.m_width
, len(diff
))
187 maxslen
= Mux(diff
> mw
, mw
, diff
)
189 return [self
.e
.eq(self
.e
- diff
),
190 self
.m
.eq(sm
.lshift(self
.m
, maxslen
))
194 return self
.create(s
, self
.P128
, 1<<(self
.e_start
-1))
197 return self
.create(s
, self
.P128
, 0)
200 return self
.create(s
, self
.N127
, 0)
203 return (self
.e
== self
.P128
) & (self
.m
!= 0)
206 return (self
.e
== self
.P128
) & (self
.m
== 0)
209 return (self
.e
== self
.N127
) & (self
.m
== self
.mzero
)
211 def _is_overflowed(self
):
212 return (self
.e
> self
.P127
)
214 def _is_denormalised(self
):
215 return (self
.e
== self
.N126
) & (self
.m
[self
.e_start
] == 0)
219 def __init__(self
, width
):
222 self
.v
= Signal(width
)
227 return [self
.v
, self
.stb
, self
.ack
]
232 self
.guard
= Signal(reset_less
=True) # tot[2]
233 self
.round_bit
= Signal(reset_less
=True) # tot[1]
234 self
.sticky
= Signal(reset_less
=True) # tot[0]
235 self
.m0
= Signal(reset_less
=True) # mantissa zero bit
237 self
.roundz
= Signal(reset_less
=True)
239 def elaborate(self
, platform
):
241 m
.d
.comb
+= self
.roundz
.eq(self
.guard
& \
242 (self
.round_bit | self
.sticky | self
.m0
))
247 """ IEEE754 Floating Point Base Class
249 contains common functions for FP manipulation, such as
250 extracting and packing operands, normalisation, denormalisation,
254 def get_op(self
, m
, op
, v
, next_state
):
255 """ this function moves to the next state and copies the operand
256 when both stb and ack are 1.
257 acknowledgement is sent by setting ack to ZERO.
259 with m
.If((op
.ack
) & (op
.stb
)):
266 m
.d
.sync
+= op
.ack
.eq(1)
268 def denormalise(self
, m
, a
):
269 """ denormalises a number. this is probably the wrong name for
270 this function. for normalised numbers (exponent != minimum)
271 one *extra* bit (the implicit 1) is added *back in*.
272 for denormalised numbers, the mantissa is left alone
273 and the exponent increased by 1.
275 both cases *effectively multiply the number stored by 2*,
276 which has to be taken into account when extracting the result.
278 with m
.If(a
.e
== a
.N127
):
279 m
.d
.sync
+= a
.e
.eq(a
.N126
) # limit a exponent
281 m
.d
.sync
+= a
.m
[-1].eq(1) # set top mantissa bit
283 def op_normalise(self
, m
, op
, next_state
):
284 """ operand normalisation
285 NOTE: just like "align", this one keeps going round every clock
286 until the result's exponent is within acceptable "range"
288 with m
.If((op
.m
[-1] == 0)): # check last bit of mantissa
290 op
.e
.eq(op
.e
- 1), # DECREASE exponent
291 op
.m
.eq(op
.m
<< 1), # shift mantissa UP
296 def normalise_1(self
, m
, z
, of
, next_state
):
297 """ first stage normalisation
299 NOTE: just like "align", this one keeps going round every clock
300 until the result's exponent is within acceptable "range"
301 NOTE: the weirdness of reassigning guard and round is due to
302 the extra mantissa bits coming from tot[0..2]
304 with m
.If((z
.m
[-1] == 0) & (z
.e
> z
.N126
)):
306 z
.e
.eq(z
.e
- 1), # DECREASE exponent
307 z
.m
.eq(z
.m
<< 1), # shift mantissa UP
308 z
.m
[0].eq(of
.guard
), # steal guard bit (was tot[2])
309 of
.guard
.eq(of
.round_bit
), # steal round_bit (was tot[1])
310 of
.round_bit
.eq(0), # reset round bit
316 def normalise_2(self
, m
, z
, of
, next_state
):
317 """ second stage normalisation
319 NOTE: just like "align", this one keeps going round every clock
320 until the result's exponent is within acceptable "range"
321 NOTE: the weirdness of reassigning guard and round is due to
322 the extra mantissa bits coming from tot[0..2]
324 with m
.If(z
.e
< z
.N126
):
326 z
.e
.eq(z
.e
+ 1), # INCREASE exponent
327 z
.m
.eq(z
.m
>> 1), # shift mantissa DOWN
330 of
.round_bit
.eq(of
.guard
),
331 of
.sticky
.eq(of
.sticky | of
.round_bit
)
336 def roundz(self
, m
, z
, of
, next_state
):
337 """ performs rounding on the output. TODO: different kinds of rounding
340 with m
.If(of
.roundz
):
341 m
.d
.sync
+= z
.m
.eq(z
.m
+ 1) # mantissa rounds up
342 with m
.If(z
.m
== z
.m1s
): # all 1s
343 m
.d
.sync
+= z
.e
.eq(z
.e
+ 1) # exponent rounds up
345 def corrections(self
, m
, z
, next_state
):
346 """ denormalisation and sign-bug corrections
349 # denormalised, correct exponent to zero
350 with m
.If(z
.is_denormalised
):
351 m
.d
.sync
+= z
.e
.eq(z
.N127
)
353 def pack(self
, m
, z
, next_state
):
354 """ packs the result into the output (detects overflow->Inf)
357 # if overflow occurs, return inf
358 with m
.If(z
.is_overflowed
):
359 m
.d
.sync
+= z
.inf(z
.s
)
361 m
.d
.sync
+= z
.create(z
.s
, z
.e
, z
.m
)
363 def put_z(self
, m
, z
, out_z
, next_state
):
364 """ put_z: stores the result in the output. raises stb and waits
365 for ack to be set to 1 before moving to the next state.
366 resets stb back to zero when that occurs, as acknowledgement.
371 with m
.If(out_z
.stb
& out_z
.ack
):
372 m
.d
.sync
+= out_z
.stb
.eq(0)
375 m
.d
.sync
+= out_z
.stb
.eq(1)