1 # IEEE Floating Point Adder (Single Precision)
2 # Copyright (C) Jonathan P Dawson 2013
5 from nmigen
import Module
, Signal
, Cat
, Const
6 from nmigen
.cli
import main
, verilog
10 """ Floating-point Number Class, variable-width TODO (currently 32-bit)
12 Contains signals for an incoming copy of the value, decoded into
13 sign / exponent / mantissa.
14 Also contains encoding functions, creation and recognition of
15 zero, NaN and inf (all signed)
17 Four extra bits are included in the mantissa: the top bit
18 (m[-1]) is effectively a carry-overflow. The other three are
19 guard (m[2]), round (m[1]), and sticky (m[0])
21 def __init__(self
, width
, m_width
=None):
24 m_width
= width
- 5 # mantissa extra bits (top,guard,round)
25 self
.v
= Signal(width
) # Latched copy of value
26 self
.m
= Signal(m_width
) # Mantissa
27 self
.e
= Signal((10, True)) # Exponent: 10 bits, signed
28 self
.s
= Signal() # Sign bit
30 self
.mzero
= Const(0, (m_width
, False))
31 self
.m1s
= Const(-1, (m_width
, False))
32 self
.P128
= Const(128, (10, True))
33 self
.P127
= Const(127, (10, True))
34 self
.N127
= Const(-127, (10, True))
35 self
.N126
= Const(-126, (10, True))
38 """ decodes a latched value into sign / exponent / mantissa
40 bias is subtracted here, from the exponent. exponent
41 is extended to 10 bits so that subtract 127 is done on
44 return [self
.m
.eq(Cat(0, 0, 0, v
[0:23])), # mantissa
45 self
.e
.eq(v
[23:31] - self
.P127
), # exp (minus bias)
46 self
.s
.eq(v
[31]), # sign
49 def create(self
, s
, e
, m
):
50 """ creates a value from sign / exponent / mantissa
52 bias is added here, to the exponent
55 self
.v
[31].eq(s
), # sign
56 self
.v
[23:31].eq(e
+ self
.P127
), # exp (add on bias)
57 self
.v
[0:23].eq(m
) # mantissa
61 """ shifts a mantissa down by one. exponent is increased to compensate
63 accuracy is lost as a result in the mantissa however there are 3
64 guard bits (the latter of which is the "sticky" bit)
66 return [self
.e
.eq(self
.e
+ 1),
67 self
.m
.eq(Cat(self
.m
[0] | self
.m
[1], self
.m
[2:], 0))
71 return self
.create(s
, self
.P128
, 1<<22)
74 return self
.create(s
, self
.P128
, 0)
77 return self
.create(s
, self
.N127
, 0)
80 return (self
.e
== self
.P128
) & (self
.m
!= 0)
83 return (self
.e
== self
.P128
) & (self
.m
== 0)
86 return (self
.e
== self
.N127
) & (self
.m
== self
.mzero
)
88 def is_overflowed(self
):
89 return (self
.e
> self
.P127
)
91 def is_denormalised(self
):
92 return (self
.e
== self
.N126
) & (self
.m
[23] == 0)
96 def __init__(self
, width
):
99 self
.v
= Signal(width
)
104 return [self
.v
, self
.stb
, self
.ack
]
109 self
.guard
= Signal() # tot[2]
110 self
.round_bit
= Signal() # tot[1]
111 self
.sticky
= Signal() # tot[0]
115 def __init__(self
, width
):
118 self
.in_a
= FPOp(width
)
119 self
.in_b
= FPOp(width
)
120 self
.out_z
= FPOp(width
)
122 def get_op(self
, m
, op
, v
, next_state
):
123 """ this function moves to the next state and copies the operand
124 when both stb and ack are 1.
125 acknowledgement is sent by setting ack to ZERO.
127 with m
.If((op
.ack
) & (op
.stb
)):
134 m
.d
.sync
+= op
.ack
.eq(1)
136 def denormalise(self
, m
, a
):
137 """ denormalises a number
139 with m
.If(a
.e
== a
.N127
):
140 m
.d
.sync
+= a
.e
.eq(-126) # limit a exponent
142 m
.d
.sync
+= a
.m
[-1].eq(1) # set top mantissa bit
144 def normalise_1(self
, m
, z
, of
, next_state
):
145 """ first stage normalisation
147 NOTE: just like "align", this one keeps going round every clock
148 until the result's exponent is within acceptable "range"
149 NOTE: the weirdness of reassigning guard and round is due to
150 the extra mantissa bits coming from tot[0..2]
152 with m
.If((z
.m
[-1] == 0) & (z
.e
> z
.N126
)):
154 z
.e
.eq(z
.e
- 1), # DECREASE exponent
155 z
.m
.eq(z
.m
<< 1), # shift mantissa UP
156 z
.m
[0].eq(of
.guard
), # steal guard bit (was tot[2])
157 of
.guard
.eq(of
.round_bit
), # steal round_bit (was tot[1])
158 of
.round_bit
.eq(0), # reset round bit
163 def normalise_2(self
, m
, z
, of
, next_state
):
164 """ second stage normalisation
166 NOTE: just like "align", this one keeps going round every clock
167 until the result's exponent is within acceptable "range"
168 NOTE: the weirdness of reassigning guard and round is due to
169 the extra mantissa bits coming from tot[0..2]
171 with m
.If(z
.e
< z
.N126
):
173 z
.e
.eq(z
.e
+ 1), # INCREASE exponent
174 z
.m
.eq(z
.m
>> 1), # shift mantissa DOWN
176 of
.round_bit
.eq(of
.guard
),
177 of
.sticky
.eq(of
.sticky | of
.round_bit
)
182 def roundz(self
, m
, z
, of
, next_state
):
183 """ performs rounding on the output. TODO: different kinds of rounding
186 with m
.If(of
.guard
& (of
.round_bit | of
.sticky | z
.m
[0])):
187 m
.d
.sync
+= z
.m
.eq(z
.m
+ 1) # mantissa rounds up
188 with m
.If(z
.m
== z
.m1s
): # all 1s
189 m
.d
.sync
+= z
.e
.eq(z
.e
+ 1) # exponent rounds up
191 def corrections(self
, m
, z
, next_state
):
192 """ denormalisation and sign-bug corrections
195 # denormalised, correct exponent to zero
196 with m
.If(z
.is_denormalised()):
197 m
.d
.sync
+= z
.m
.eq(-127)
198 # FIX SIGN BUG: -a + a = +0.
199 with m
.If((z
.e
== z
.N126
) & (z
.m
[0:] == 0)):
200 m
.d
.sync
+= z
.s
.eq(0)
202 def pack(self
, m
, z
, next_state
):
203 """ packs the result into the output (detects overflow->Inf)
206 # if overflow occurs, return inf
207 with m
.If(z
.is_overflowed()):
210 m
.d
.sync
+= z
.create(z
.s
, z
.e
, z
.m
)
212 def put_z(self
, m
, z
, out_z
, next_state
):
213 """ put_z: stores the result in the output. raises stb and waits
214 for ack to be set to 1 before moving to the next state.
215 resets stb back to zero when that occurs, as acknowledgement.
221 with m
.If(out_z
.stb
& out_z
.ack
):
222 m
.d
.sync
+= out_z
.stb
.eq(0)
225 def get_fragment(self
, platform
=None):
226 """ creates the HDL code-fragment for FPAdd
231 a
= FPNum(self
.width
)
232 b
= FPNum(self
.width
)
233 z
= FPNum(self
.width
, 24)
235 tot
= Signal(28) # sticky/round/guard bits, 23 result, 1 overflow
244 with m
.State("get_a"):
245 self
.get_op(m
, self
.in_a
, a
, "get_b")
250 with m
.State("get_b"):
251 self
.get_op(m
, self
.in_b
, b
, "special_cases")
254 # special cases: NaNs, infs, zeros, denormalised
255 # NOTE: some of these are unique to add. see "Special Operations"
256 # https://steve.hollasch.net/cgindex/coding/ieeefloat.html
258 with m
.State("special_cases"):
260 # if a is NaN or b is NaN return NaN
261 with m
.If(a
.is_nan() | b
.is_nan()):
265 # if a is inf return inf (or NaN)
266 with m
.Elif(a
.is_inf()):
268 m
.d
.sync
+= z
.inf(a
.s
)
269 # if a is inf and signs don't match return NaN
270 with m
.If((b
.e
== b
.P128
) & (a
.s
!= b
.s
)):
271 m
.d
.sync
+= z
.nan(b
.s
)
273 # if b is inf return inf
274 with m
.Elif(b
.is_inf()):
276 m
.d
.sync
+= z
.inf(b
.s
)
278 # if a is zero and b zero return signed-a/b
279 with m
.Elif(a
.is_zero() & b
.is_zero()):
281 m
.d
.sync
+= z
.create(a
.s
& b
.s
, b
.e
[0:8], b
.m
[3:-1])
283 # if a is zero return b
284 with m
.Elif(a
.is_zero()):
286 m
.d
.sync
+= z
.create(b
.s
, b
.e
[0:8], b
.m
[3:-1])
288 # if b is zero return a
289 with m
.Elif(b
.is_zero()):
291 m
.d
.sync
+= z
.create(a
.s
, a
.e
[0:8], a
.m
[3:-1])
293 # Denormalised Number checks
296 self
.denormalise(m
, a
)
297 self
.denormalise(m
, b
)
300 # align. NOTE: this does *not* do single-cycle multi-shifting,
301 # it *STAYS* in the align state until the exponents match
303 with m
.State("align"):
304 # exponent of a greater than b: increment b exp, shift b mant
305 with m
.If(a
.e
> b
.e
):
306 m
.d
.sync
+= b
.shift_down()
307 # exponent of b greater than a: increment a exp, shift a mant
308 with m
.Elif(a
.e
< b
.e
):
309 m
.d
.sync
+= a
.shift_down()
310 # exponents equal: move to next stage.
315 # First stage of add. covers same-sign (add) and subtract
316 # special-casing when mantissas are greater or equal, to
317 # give greatest accuracy.
319 with m
.State("add_0"):
321 m
.d
.sync
+= z
.e
.eq(a
.e
)
322 # same-sign (both negative or both positive) add mantissas
323 with m
.If(a
.s
== b
.s
):
328 # a mantissa greater than b, use a
329 with m
.Elif(a
.m
>= b
.m
):
334 # b mantissa greater than a, use b
342 # Second stage of add: preparation for normalisation.
343 # detects when tot sum is too big (tot[27] is kinda a carry bit)
345 with m
.State("add_1"):
346 m
.next
= "normalise_1"
347 # tot[27] gets set when the sum overflows. shift result down
352 of
.round_bit
.eq(tot
[2]),
353 of
.sticky
.eq(tot
[1] | tot
[0]),
361 of
.round_bit
.eq(tot
[1]),
366 # First stage of normalisation.
368 with m
.State("normalise_1"):
369 self
.normalise_1(m
, z
, of
, "normalise_2")
372 # Second stage of normalisation.
374 with m
.State("normalise_2"):
375 self
.normalise_2(m
, z
, of
, "round")
380 with m
.State("round"):
381 self
.roundz(m
, z
, of
, "corrections")
386 with m
.State("corrections"):
387 self
.corrections(m
, z
, "pack")
392 with m
.State("pack"):
393 self
.pack(m
, z
, "put_z")
398 with m
.State("put_z"):
399 self
.put_z(m
, z
, self
.out_z
, "get_a")
404 if __name__
== "__main__":
405 alu
= FPADD(width
=32)
406 main(alu
, ports
=alu
.in_a
.ports() + alu
.in_b
.ports() + alu
.out_z
.ports())
409 # works... but don't use, just do "python fname.py convert -t v"
410 #print (verilog.convert(alu, ports=[
411 # ports=alu.in_a.ports() + \
412 # alu.in_b.ports() + \