1 # IEEE Floating Point Adder (Single Precision)
2 # Copyright (C) Jonathan P Dawson 2013
5 from nmigen
import Signal
, Cat
, Const
, Mux
, Module
7 from operator
import or_
8 from functools
import reduce
12 def __init__(self
, width
):
14 self
.smax
= int(log(width
) / log(2))
15 self
.i
= Signal(width
)
16 self
.s
= Signal(self
.smax
)
17 self
.o
= Signal(width
)
19 def elaborate(self
, platform
):
21 m
.d
.comb
+= self
.o
.eq(self
.i
>> self
.s
)
26 """ Generates variable-length single-cycle shifter from a series
27 of conditional tests on each bit of the left/right shift operand.
28 Each bit tested produces output shifted by that number of bits,
29 in a binary fashion: bit 1 if set shifts by 1 bit, bit 2 if set
30 shifts by 2 bits, each partial result cascading to the next Mux.
32 Could be adapted to do arithmetic shift by taking copies of the
36 def __init__(self
, width
):
38 self
.smax
= int(log(width
) / log(2))
40 def lshift(self
, op
, s
):
44 for i
in range(self
.smax
):
46 res
= Mux(s
& (1<<i
), Cat(zeros
, res
[0:-(1<<i
)]), res
)
49 def rshift(self
, op
, s
):
53 for i
in range(self
.smax
):
55 res
= Mux(s
& (1<<i
), Cat(res
[(1<<i
):], zeros
), res
)
60 """ Floating-point Number Class, variable-width TODO (currently 32-bit)
62 Contains signals for an incoming copy of the value, decoded into
63 sign / exponent / mantissa.
64 Also contains encoding functions, creation and recognition of
65 zero, NaN and inf (all signed)
67 Four extra bits are included in the mantissa: the top bit
68 (m[-1]) is effectively a carry-overflow. The other three are
69 guard (m[2]), round (m[1]), and sticky (m[0])
71 def __init__(self
, width
, m_extra
=True):
73 m_width
= {32: 24, 64: 53}[width
]
74 e_width
= {32: 10, 64: 13}[width
]
75 e_max
= 1<<(e_width
-3)
76 self
.rmw
= m_width
# real mantissa width (not including extras)
79 # mantissa extra bits (top,guard,round)
81 m_width
+= self
.m_extra
84 #print (m_width, e_width, e_max, self.rmw, self.m_extra)
85 self
.m_width
= m_width
86 self
.e_width
= e_width
87 self
.e_start
= self
.rmw
- 1
88 self
.e_end
= self
.rmw
+ self
.e_width
- 3 # for decoding
90 self
.v
= Signal(width
) # Latched copy of value
91 self
.m
= Signal(m_width
) # Mantissa
92 self
.e
= Signal((e_width
, True)) # Exponent: 10 bits, signed
93 self
.s
= Signal() # Sign bit
95 self
.mzero
= Const(0, (m_width
, False))
96 self
.m1s
= Const(-1, (m_width
, False))
97 self
.P128
= Const(e_max
, (e_width
, True))
98 self
.P127
= Const(e_max
-1, (e_width
, True))
99 self
.N127
= Const(-(e_max
-1), (e_width
, True))
100 self
.N126
= Const(-(e_max
-2), (e_width
, True))
103 """ decodes a latched value into sign / exponent / mantissa
105 bias is subtracted here, from the exponent. exponent
106 is extended to 10 bits so that subtract 127 is done on
109 args
= [0] * self
.m_extra
+ [v
[0:self
.e_start
]] # pad with extra zeros
110 #print ("decode", self.e_end)
111 return [self
.m
.eq(Cat(*args
)), # mantissa
112 self
.e
.eq(v
[self
.e_start
:self
.e_end
] - self
.P127
), # exp
113 self
.s
.eq(v
[-1]), # sign
116 def create(self
, s
, e
, m
):
117 """ creates a value from sign / exponent / mantissa
119 bias is added here, to the exponent
122 self
.v
[-1].eq(s
), # sign
123 self
.v
[self
.e_start
:self
.e_end
].eq(e
+ self
.P127
), # exp (add on bias)
124 self
.v
[0:self
.e_start
].eq(m
) # mantissa
127 def shift_down(self
):
128 """ shifts a mantissa down by one. exponent is increased to compensate
130 accuracy is lost as a result in the mantissa however there are 3
131 guard bits (the latter of which is the "sticky" bit)
133 return [self
.e
.eq(self
.e
+ 1),
134 self
.m
.eq(Cat(self
.m
[0] | self
.m
[1], self
.m
[2:], 0))
137 def shift_down_multi(self
, diff
):
138 """ shifts a mantissa down. exponent is increased to compensate
140 accuracy is lost as a result in the mantissa however there are 3
141 guard bits (the latter of which is the "sticky" bit)
143 this code works by variable-shifting the mantissa by up to
144 its maximum bit-length: no point doing more (it'll still be
147 the sticky bit is computed by shifting a batch of 1s by
148 the same amount, which will introduce zeros. it's then
149 inverted and used as a mask to get the LSBs of the mantissa.
150 those are then |'d into the sticky bit.
152 sm
= MultiShift(self
.width
)
153 mw
= Const(self
.m_width
-1, len(diff
))
154 maxslen
= Mux(diff
> mw
, mw
, diff
)
155 rs
= sm
.rshift(self
.m
[1:], maxslen
)
156 maxsleni
= mw
- maxslen
157 m_mask
= sm
.rshift(self
.m1s
[1:], maxsleni
) # shift and invert
159 stickybits
= reduce(or_
, self
.m
[1:] & m_mask
) | self
.m
[0]
160 return [self
.e
.eq(self
.e
+ diff
),
161 self
.m
.eq(Cat(stickybits
, rs
))
165 return self
.create(s
, self
.P128
, 1<<(self
.e_start
-1))
168 return self
.create(s
, self
.P128
, 0)
171 return self
.create(s
, self
.N127
, 0)
174 return (self
.e
== self
.P128
) & (self
.m
!= 0)
177 return (self
.e
== self
.P128
) & (self
.m
== 0)
180 return (self
.e
== self
.N127
) & (self
.m
== self
.mzero
)
182 def is_overflowed(self
):
183 return (self
.e
> self
.P127
)
185 def is_denormalised(self
):
186 return (self
.e
== self
.N126
) & (self
.m
[self
.e_start
] == 0)
190 def __init__(self
, width
):
193 self
.v
= Signal(width
)
198 return [self
.v
, self
.stb
, self
.ack
]
203 self
.guard
= Signal() # tot[2]
204 self
.round_bit
= Signal() # tot[1]
205 self
.sticky
= Signal() # tot[0]
209 """ IEEE754 Floating Point Base Class
211 contains common functions for FP manipulation, such as
212 extracting and packing operands, normalisation, denormalisation,
216 def get_op(self
, m
, op
, v
, next_state
):
217 """ this function moves to the next state and copies the operand
218 when both stb and ack are 1.
219 acknowledgement is sent by setting ack to ZERO.
221 with m
.If((op
.ack
) & (op
.stb
)):
228 m
.d
.sync
+= op
.ack
.eq(1)
230 def denormalise(self
, m
, a
):
231 """ denormalises a number. this is probably the wrong name for
232 this function. for normalised numbers (exponent != minimum)
233 one *extra* bit (the implicit 1) is added *back in*.
234 for denormalised numbers, the mantissa is left alone
235 and the exponent increased by 1.
237 both cases *effectively multiply the number stored by 2*,
238 which has to be taken into account when extracting the result.
240 with m
.If(a
.e
== a
.N127
):
241 m
.d
.sync
+= a
.e
.eq(a
.N126
) # limit a exponent
243 m
.d
.sync
+= a
.m
[-1].eq(1) # set top mantissa bit
245 def op_normalise(self
, m
, op
, next_state
):
246 """ operand normalisation
247 NOTE: just like "align", this one keeps going round every clock
248 until the result's exponent is within acceptable "range"
250 with m
.If((op
.m
[-1] == 0)): # check last bit of mantissa
252 op
.e
.eq(op
.e
- 1), # DECREASE exponent
253 op
.m
.eq(op
.m
<< 1), # shift mantissa UP
258 def normalise_1(self
, m
, z
, of
, next_state
):
259 """ first stage normalisation
261 NOTE: just like "align", this one keeps going round every clock
262 until the result's exponent is within acceptable "range"
263 NOTE: the weirdness of reassigning guard and round is due to
264 the extra mantissa bits coming from tot[0..2]
266 with m
.If((z
.m
[-1] == 0) & (z
.e
> z
.N126
)):
268 z
.e
.eq(z
.e
- 1), # DECREASE exponent
269 z
.m
.eq(z
.m
<< 1), # shift mantissa UP
270 z
.m
[0].eq(of
.guard
), # steal guard bit (was tot[2])
271 of
.guard
.eq(of
.round_bit
), # steal round_bit (was tot[1])
272 of
.round_bit
.eq(0), # reset round bit
277 def normalise_2(self
, m
, z
, of
, next_state
):
278 """ second stage normalisation
280 NOTE: just like "align", this one keeps going round every clock
281 until the result's exponent is within acceptable "range"
282 NOTE: the weirdness of reassigning guard and round is due to
283 the extra mantissa bits coming from tot[0..2]
285 with m
.If(z
.e
< z
.N126
):
287 z
.e
.eq(z
.e
+ 1), # INCREASE exponent
288 z
.m
.eq(z
.m
>> 1), # shift mantissa DOWN
290 of
.round_bit
.eq(of
.guard
),
291 of
.sticky
.eq(of
.sticky | of
.round_bit
)
296 def roundz(self
, m
, z
, of
, next_state
):
297 """ performs rounding on the output. TODO: different kinds of rounding
300 with m
.If(of
.guard
& (of
.round_bit | of
.sticky | z
.m
[0])):
301 m
.d
.sync
+= z
.m
.eq(z
.m
+ 1) # mantissa rounds up
302 with m
.If(z
.m
== z
.m1s
): # all 1s
303 m
.d
.sync
+= z
.e
.eq(z
.e
+ 1) # exponent rounds up
305 def corrections(self
, m
, z
, next_state
):
306 """ denormalisation and sign-bug corrections
309 # denormalised, correct exponent to zero
310 with m
.If(z
.is_denormalised()):
311 m
.d
.sync
+= z
.m
.eq(z
.N127
)
312 # FIX SIGN BUG: -a + a = +0.
313 with m
.If((z
.e
== z
.N126
) & (z
.m
[0:] == 0)):
314 m
.d
.sync
+= z
.s
.eq(0)
316 def pack(self
, m
, z
, next_state
):
317 """ packs the result into the output (detects overflow->Inf)
320 # if overflow occurs, return inf
321 with m
.If(z
.is_overflowed()):
324 m
.d
.sync
+= z
.create(z
.s
, z
.e
, z
.m
)
326 def put_z(self
, m
, z
, out_z
, next_state
):
327 """ put_z: stores the result in the output. raises stb and waits
328 for ack to be set to 1 before moving to the next state.
329 resets stb back to zero when that occurs, as acknowledgement.
335 with m
.If(out_z
.stb
& out_z
.ack
):
336 m
.d
.sync
+= out_z
.stb
.eq(0)