1 # IEEE Floating Point Adder (Single Precision)
2 # Copyright (C) Jonathan P Dawson 2013
5 from nmigen
import Module
, Signal
, Cat
, Mux
6 from nmigen
.lib
.coding
import PriorityEncoder
7 from nmigen
.cli
import main
, verilog
9 from fpbase
import FPNumIn
, FPNumOut
, FPOp
, Overflow
, FPBase
, FPNumBase
10 from fpbase
import MultiShiftRMerge
11 #from fpbase import FPNumShiftMultiRight
13 class FPState(FPBase
):
14 def __init__(self
, state_from
):
15 self
.state_from
= state_from
17 def set_inputs(self
, inputs
):
19 for k
,v
in inputs
.items():
22 def set_outputs(self
, outputs
):
23 self
.outputs
= outputs
24 for k
,v
in outputs
.items():
29 def __init__(self
, width
):
30 self
.in_op
= FPOp(width
)
31 self
.out_op
= FPNumIn(self
.in_op
, width
)
32 self
.out_decode
= Signal(reset_less
=True)
34 def elaborate(self
, platform
):
36 m
.d
.comb
+= self
.out_decode
.eq((self
.in_op
.ack
) & (self
.in_op
.stb
))
37 #m.submodules.get_op_in = self.in_op
38 m
.submodules
.get_op_out
= self
.out_op
39 with m
.If(self
.out_decode
):
41 self
.out_op
.decode(self
.in_op
.v
),
46 class FPGetOp(FPState
):
50 def __init__(self
, in_state
, out_state
, in_op
, width
):
51 FPState
.__init
__(self
, in_state
)
52 self
.out_state
= out_state
53 self
.mod
= FPGetOpMod(width
)
55 self
.out_op
= FPNumIn(in_op
, width
)
56 self
.out_decode
= Signal(reset_less
=True)
58 def setup(self
, m
, in_op
):
59 """ links module to inputs and outputs
61 setattr(m
.submodules
, self
.state_from
, self
.mod
)
62 m
.d
.comb
+= self
.mod
.in_op
.copy(in_op
)
63 m
.d
.comb
+= self
.out_op
.v
.eq(self
.mod
.out_op
.v
)
64 m
.d
.comb
+= self
.out_decode
.eq(self
.mod
.out_decode
)
67 with m
.If(self
.out_decode
):
68 m
.next
= self
.out_state
71 self
.out_op
.copy(self
.mod
.out_op
)
74 m
.d
.sync
+= self
.in_op
.ack
.eq(1)
77 class FPAddSpecialCasesMod
:
78 """ special cases: NaNs, infs, zeros, denormalised
79 NOTE: some of these are unique to add. see "Special Operations"
80 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
83 def __init__(self
, width
):
84 self
.in_a
= FPNumBase(width
)
85 self
.in_b
= FPNumBase(width
)
86 self
.out_z
= FPNumOut(width
, False)
87 self
.out_do_z
= Signal(reset_less
=True)
89 def elaborate(self
, platform
):
92 m
.submodules
.sc_in_a
= self
.in_a
93 m
.submodules
.sc_in_b
= self
.in_b
94 m
.submodules
.sc_out_z
= self
.out_z
97 m
.d
.comb
+= s_nomatch
.eq(self
.in_a
.s
!= self
.in_b
.s
)
100 m
.d
.comb
+= m_match
.eq(self
.in_a
.m
== self
.in_b
.m
)
102 # if a is NaN or b is NaN return NaN
103 with m
.If(self
.in_a
.is_nan | self
.in_b
.is_nan
):
104 m
.d
.comb
+= self
.out_do_z
.eq(1)
105 m
.d
.comb
+= self
.out_z
.nan(0)
107 # XXX WEIRDNESS for FP16 non-canonical NaN handling
110 ## if a is zero and b is NaN return -b
111 #with m.If(a.is_zero & (a.s==0) & b.is_nan):
112 # m.d.comb += self.out_do_z.eq(1)
113 # m.d.comb += z.create(b.s, b.e, Cat(b.m[3:-2], ~b.m[0]))
115 ## if b is zero and a is NaN return -a
116 #with m.Elif(b.is_zero & (b.s==0) & a.is_nan):
117 # m.d.comb += self.out_do_z.eq(1)
118 # m.d.comb += z.create(a.s, a.e, Cat(a.m[3:-2], ~a.m[0]))
120 ## if a is -zero and b is NaN return -b
121 #with m.Elif(a.is_zero & (a.s==1) & b.is_nan):
122 # m.d.comb += self.out_do_z.eq(1)
123 # m.d.comb += z.create(a.s & b.s, b.e, Cat(b.m[3:-2], 1))
125 ## if b is -zero and a is NaN return -a
126 #with m.Elif(b.is_zero & (b.s==1) & a.is_nan):
127 # m.d.comb += self.out_do_z.eq(1)
128 # m.d.comb += z.create(a.s & b.s, a.e, Cat(a.m[3:-2], 1))
130 # if a is inf return inf (or NaN)
131 with m
.Elif(self
.in_a
.is_inf
):
132 m
.d
.comb
+= self
.out_do_z
.eq(1)
133 m
.d
.comb
+= self
.out_z
.inf(self
.in_a
.s
)
134 # if a is inf and signs don't match return NaN
135 with m
.If(self
.in_b
.exp_128
& s_nomatch
):
136 m
.d
.comb
+= self
.out_z
.nan(0)
138 # if b is inf return inf
139 with m
.Elif(self
.in_b
.is_inf
):
140 m
.d
.comb
+= self
.out_do_z
.eq(1)
141 m
.d
.comb
+= self
.out_z
.inf(self
.in_b
.s
)
143 # if a is zero and b zero return signed-a/b
144 with m
.Elif(self
.in_a
.is_zero
& self
.in_b
.is_zero
):
145 m
.d
.comb
+= self
.out_do_z
.eq(1)
146 m
.d
.comb
+= self
.out_z
.create(self
.in_a
.s
& self
.in_b
.s
,
150 # if a is zero return b
151 with m
.Elif(self
.in_a
.is_zero
):
152 m
.d
.comb
+= self
.out_do_z
.eq(1)
153 m
.d
.comb
+= self
.out_z
.create(self
.in_b
.s
, self
.in_b
.e
,
156 # if b is zero return a
157 with m
.Elif(self
.in_b
.is_zero
):
158 m
.d
.comb
+= self
.out_do_z
.eq(1)
159 m
.d
.comb
+= self
.out_z
.create(self
.in_a
.s
, self
.in_a
.e
,
162 # if a equal to -b return zero (+ve zero)
163 with m
.Elif(s_nomatch
& m_match
& (self
.in_a
.e
== self
.in_b
.e
)):
164 m
.d
.comb
+= self
.out_do_z
.eq(1)
165 m
.d
.comb
+= self
.out_z
.zero(0)
167 # Denormalised Number checks
169 m
.d
.comb
+= self
.out_do_z
.eq(0)
175 def __init__(self
, id_wid
):
178 self
.in_mid
= Signal(width
, reset_less
)
179 self
.out_mid
= Signal(width
, reset_less
)
186 m
.d
.sync
+= self
.out_mid
.eq(self
.in_mid
)
189 class FPAddSpecialCases(FPState
, FPID
):
190 """ special cases: NaNs, infs, zeros, denormalised
191 NOTE: some of these are unique to add. see "Special Operations"
192 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
195 def __init__(self
, width
, id_wid
):
196 FPState
.__init
__(self
, "special_cases")
197 FPID
.__init
__(self
, id_wid
)
198 self
.mod
= FPAddSpecialCasesMod(width
)
199 self
.out_z
= FPNumOut(width
, False)
200 self
.out_do_z
= Signal(reset_less
=True)
202 def setup(self
, m
, in_a
, in_b
, in_mid
):
203 """ links module to inputs and outputs
205 m
.submodules
.specialcases
= self
.mod
206 m
.d
.comb
+= self
.mod
.in_a
.copy(in_a
)
207 m
.d
.comb
+= self
.mod
.in_b
.copy(in_b
)
208 #m.d.comb += self.out_z.v.eq(self.mod.out_z.v)
209 m
.d
.comb
+= self
.out_do_z
.eq(self
.mod
.out_do_z
)
211 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
215 with m
.If(self
.out_do_z
):
216 m
.d
.sync
+= self
.out_z
.v
.eq(self
.mod
.out_z
.v
) # only take the output
219 m
.next
= "denormalise"
222 class FPAddDeNormMod(FPState
):
224 def __init__(self
, width
):
225 self
.in_a
= FPNumBase(width
)
226 self
.in_b
= FPNumBase(width
)
227 self
.out_a
= FPNumBase(width
)
228 self
.out_b
= FPNumBase(width
)
230 def elaborate(self
, platform
):
232 m
.submodules
.denorm_in_a
= self
.in_a
233 m
.submodules
.denorm_in_b
= self
.in_b
234 m
.submodules
.denorm_out_a
= self
.out_a
235 m
.submodules
.denorm_out_b
= self
.out_b
236 # hmmm, don't like repeating identical code
237 m
.d
.comb
+= self
.out_a
.copy(self
.in_a
)
238 with m
.If(self
.in_a
.exp_n127
):
239 m
.d
.comb
+= self
.out_a
.e
.eq(self
.in_a
.N126
) # limit a exponent
241 m
.d
.comb
+= self
.out_a
.m
[-1].eq(1) # set top mantissa bit
243 m
.d
.comb
+= self
.out_b
.copy(self
.in_b
)
244 with m
.If(self
.in_b
.exp_n127
):
245 m
.d
.comb
+= self
.out_b
.e
.eq(self
.in_b
.N126
) # limit a exponent
247 m
.d
.comb
+= self
.out_b
.m
[-1].eq(1) # set top mantissa bit
252 class FPAddDeNorm(FPState
, FPID
):
254 def __init__(self
, width
, id_wid
):
255 FPState
.__init
__(self
, "denormalise")
256 FPID
.__init
__(self
, id_wid
)
257 self
.mod
= FPAddDeNormMod(width
)
258 self
.out_a
= FPNumBase(width
)
259 self
.out_b
= FPNumBase(width
)
261 def setup(self
, m
, in_a
, in_b
, in_mid
):
262 """ links module to inputs and outputs
264 m
.submodules
.denormalise
= self
.mod
265 m
.d
.comb
+= self
.mod
.in_a
.copy(in_a
)
266 m
.d
.comb
+= self
.mod
.in_b
.copy(in_b
)
268 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
272 # Denormalised Number checks
274 m
.d
.sync
+= self
.out_a
.copy(self
.mod
.out_a
)
275 m
.d
.sync
+= self
.out_b
.copy(self
.mod
.out_b
)
278 class FPAddAlignMultiMod(FPState
):
280 def __init__(self
, width
):
281 self
.in_a
= FPNumBase(width
)
282 self
.in_b
= FPNumBase(width
)
283 self
.out_a
= FPNumIn(None, width
)
284 self
.out_b
= FPNumIn(None, width
)
285 self
.exp_eq
= Signal(reset_less
=True)
287 def elaborate(self
, platform
):
288 # This one however (single-cycle) will do the shift
293 m
.submodules
.align_in_a
= self
.in_a
294 m
.submodules
.align_in_b
= self
.in_b
295 m
.submodules
.align_out_a
= self
.out_a
296 m
.submodules
.align_out_b
= self
.out_b
298 # NOTE: this does *not* do single-cycle multi-shifting,
299 # it *STAYS* in the align state until exponents match
301 # exponent of a greater than b: shift b down
302 m
.d
.comb
+= self
.exp_eq
.eq(0)
303 m
.d
.comb
+= self
.out_a
.copy(self
.in_a
)
304 m
.d
.comb
+= self
.out_b
.copy(self
.in_b
)
305 agtb
= Signal(reset_less
=True)
306 altb
= Signal(reset_less
=True)
307 m
.d
.comb
+= agtb
.eq(self
.in_a
.e
> self
.in_b
.e
)
308 m
.d
.comb
+= altb
.eq(self
.in_a
.e
< self
.in_b
.e
)
310 m
.d
.comb
+= self
.out_b
.shift_down(self
.in_b
)
311 # exponent of b greater than a: shift a down
313 m
.d
.comb
+= self
.out_a
.shift_down(self
.in_a
)
314 # exponents equal: move to next stage.
316 m
.d
.comb
+= self
.exp_eq
.eq(1)
320 class FPAddAlignMulti(FPState
, FPID
):
322 def __init__(self
, width
, id_wid
):
323 FPID
.__init
__(self
, id_wid
)
324 FPState
.__init
__(self
, "align")
325 self
.mod
= FPAddAlignMultiMod(width
)
326 self
.out_a
= FPNumIn(None, width
)
327 self
.out_b
= FPNumIn(None, width
)
328 self
.exp_eq
= Signal(reset_less
=True)
330 def setup(self
, m
, in_a
, in_b
, in_mid
):
331 """ links module to inputs and outputs
333 m
.submodules
.align
= self
.mod
334 m
.d
.comb
+= self
.mod
.in_a
.copy(in_a
)
335 m
.d
.comb
+= self
.mod
.in_b
.copy(in_b
)
336 #m.d.comb += self.out_a.copy(self.mod.out_a)
337 #m.d.comb += self.out_b.copy(self.mod.out_b)
338 m
.d
.comb
+= self
.exp_eq
.eq(self
.mod
.exp_eq
)
340 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
344 m
.d
.sync
+= self
.out_a
.copy(self
.mod
.out_a
)
345 m
.d
.sync
+= self
.out_b
.copy(self
.mod
.out_b
)
346 with m
.If(self
.exp_eq
):
350 class FPAddAlignSingleMod
:
352 def __init__(self
, width
):
354 self
.in_a
= FPNumBase(width
)
355 self
.in_b
= FPNumBase(width
)
356 self
.out_a
= FPNumIn(None, width
)
357 self
.out_b
= FPNumIn(None, width
)
359 def elaborate(self
, platform
):
360 """ Aligns A against B or B against A, depending on which has the
361 greater exponent. This is done in a *single* cycle using
362 variable-width bit-shift
364 the shifter used here is quite expensive in terms of gates.
365 Mux A or B in (and out) into temporaries, as only one of them
366 needs to be aligned against the other
370 m
.submodules
.align_in_a
= self
.in_a
371 m
.submodules
.align_in_b
= self
.in_b
372 m
.submodules
.align_out_a
= self
.out_a
373 m
.submodules
.align_out_b
= self
.out_b
375 # temporary (muxed) input and output to be shifted
376 t_inp
= FPNumBase(self
.width
)
377 t_out
= FPNumIn(None, self
.width
)
378 espec
= (len(self
.in_a
.e
), True)
379 msr
= MultiShiftRMerge(self
.in_a
.m_width
, espec
)
380 m
.submodules
.align_t_in
= t_inp
381 m
.submodules
.align_t_out
= t_out
382 m
.submodules
.multishift_r
= msr
384 ediff
= Signal(espec
, reset_less
=True)
385 ediffr
= Signal(espec
, reset_less
=True)
386 tdiff
= Signal(espec
, reset_less
=True)
387 elz
= Signal(reset_less
=True)
388 egz
= Signal(reset_less
=True)
390 # connect multi-shifter to t_inp/out mantissa (and tdiff)
391 m
.d
.comb
+= msr
.inp
.eq(t_inp
.m
)
392 m
.d
.comb
+= msr
.diff
.eq(tdiff
)
393 m
.d
.comb
+= t_out
.m
.eq(msr
.m
)
394 m
.d
.comb
+= t_out
.e
.eq(t_inp
.e
+ tdiff
)
395 m
.d
.comb
+= t_out
.s
.eq(t_inp
.s
)
397 m
.d
.comb
+= ediff
.eq(self
.in_a
.e
- self
.in_b
.e
)
398 m
.d
.comb
+= ediffr
.eq(self
.in_b
.e
- self
.in_a
.e
)
399 m
.d
.comb
+= elz
.eq(self
.in_a
.e
< self
.in_b
.e
)
400 m
.d
.comb
+= egz
.eq(self
.in_a
.e
> self
.in_b
.e
)
402 # default: A-exp == B-exp, A and B untouched (fall through)
403 m
.d
.comb
+= self
.out_a
.copy(self
.in_a
)
404 m
.d
.comb
+= self
.out_b
.copy(self
.in_b
)
405 # only one shifter (muxed)
406 #m.d.comb += t_out.shift_down_multi(tdiff, t_inp)
407 # exponent of a greater than b: shift b down
409 m
.d
.comb
+= [t_inp
.copy(self
.in_b
),
411 self
.out_b
.copy(t_out
),
412 self
.out_b
.s
.eq(self
.in_b
.s
), # whoops forgot sign
414 # exponent of b greater than a: shift a down
416 m
.d
.comb
+= [t_inp
.copy(self
.in_a
),
418 self
.out_a
.copy(t_out
),
419 self
.out_a
.s
.eq(self
.in_a
.s
), # whoops forgot sign
424 class FPAddAlignSingle(FPState
, FPID
):
426 def __init__(self
, width
, id_wid
):
427 FPState
.__init
__(self
, "align")
428 FPID
.__init
__(self
, id_wid
)
429 self
.mod
= FPAddAlignSingleMod(width
)
430 self
.out_a
= FPNumIn(None, width
)
431 self
.out_b
= FPNumIn(None, width
)
433 def setup(self
, m
, in_a
, in_b
, in_mid
):
434 """ links module to inputs and outputs
436 m
.submodules
.align
= self
.mod
437 m
.d
.comb
+= self
.mod
.in_a
.copy(in_a
)
438 m
.d
.comb
+= self
.mod
.in_b
.copy(in_b
)
440 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
444 # NOTE: could be done as comb
445 m
.d
.sync
+= self
.out_a
.copy(self
.mod
.out_a
)
446 m
.d
.sync
+= self
.out_b
.copy(self
.mod
.out_b
)
450 class FPAddStage0Mod
:
452 def __init__(self
, width
):
453 self
.in_a
= FPNumBase(width
)
454 self
.in_b
= FPNumBase(width
)
455 self
.in_z
= FPNumBase(width
, False)
456 self
.out_z
= FPNumBase(width
, False)
457 self
.out_tot
= Signal(self
.out_z
.m_width
+ 4, reset_less
=True)
459 def elaborate(self
, platform
):
461 m
.submodules
.add0_in_a
= self
.in_a
462 m
.submodules
.add0_in_b
= self
.in_b
463 m
.submodules
.add0_out_z
= self
.out_z
465 m
.d
.comb
+= self
.out_z
.e
.eq(self
.in_a
.e
)
467 # store intermediate tests (and zero-extended mantissas)
468 seq
= Signal(reset_less
=True)
469 mge
= Signal(reset_less
=True)
470 am0
= Signal(len(self
.in_a
.m
)+1, reset_less
=True)
471 bm0
= Signal(len(self
.in_b
.m
)+1, reset_less
=True)
472 m
.d
.comb
+= [seq
.eq(self
.in_a
.s
== self
.in_b
.s
),
473 mge
.eq(self
.in_a
.m
>= self
.in_b
.m
),
474 am0
.eq(Cat(self
.in_a
.m
, 0)),
475 bm0
.eq(Cat(self
.in_b
.m
, 0))
477 # same-sign (both negative or both positive) add mantissas
480 self
.out_tot
.eq(am0
+ bm0
),
481 self
.out_z
.s
.eq(self
.in_a
.s
)
483 # a mantissa greater than b, use a
486 self
.out_tot
.eq(am0
- bm0
),
487 self
.out_z
.s
.eq(self
.in_a
.s
)
489 # b mantissa greater than a, use b
492 self
.out_tot
.eq(bm0
- am0
),
493 self
.out_z
.s
.eq(self
.in_b
.s
)
498 class FPAddStage0(FPState
, FPID
):
499 """ First stage of add. covers same-sign (add) and subtract
500 special-casing when mantissas are greater or equal, to
501 give greatest accuracy.
504 def __init__(self
, width
, id_wid
):
505 FPState
.__init
__(self
, "add_0")
506 FPID
.__init
__(self
, id_wid
)
507 self
.mod
= FPAddStage0Mod(width
)
508 self
.out_z
= FPNumBase(width
, False)
509 self
.out_tot
= Signal(self
.out_z
.m_width
+ 4, reset_less
=True)
511 def setup(self
, m
, in_a
, in_b
, in_mid
):
512 """ links module to inputs and outputs
514 m
.submodules
.add0
= self
.mod
515 m
.d
.comb
+= self
.mod
.in_a
.copy(in_a
)
516 m
.d
.comb
+= self
.mod
.in_b
.copy(in_b
)
518 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
522 # NOTE: these could be done as combinatorial (merge add0+add1)
523 m
.d
.sync
+= self
.out_z
.copy(self
.mod
.out_z
)
524 m
.d
.sync
+= self
.out_tot
.eq(self
.mod
.out_tot
)
528 class FPAddStage1Mod(FPState
):
529 """ Second stage of add: preparation for normalisation.
530 detects when tot sum is too big (tot[27] is kinda a carry bit)
533 def __init__(self
, width
):
534 self
.out_norm
= Signal(reset_less
=True)
535 self
.in_z
= FPNumBase(width
, False)
536 self
.in_tot
= Signal(self
.in_z
.m_width
+ 4, reset_less
=True)
537 self
.out_z
= FPNumBase(width
, False)
538 self
.out_of
= Overflow()
540 def elaborate(self
, platform
):
542 #m.submodules.norm1_in_overflow = self.in_of
543 #m.submodules.norm1_out_overflow = self.out_of
544 #m.submodules.norm1_in_z = self.in_z
545 #m.submodules.norm1_out_z = self.out_z
546 m
.d
.comb
+= self
.out_z
.copy(self
.in_z
)
547 # tot[27] gets set when the sum overflows. shift result down
548 with m
.If(self
.in_tot
[-1]):
550 self
.out_z
.m
.eq(self
.in_tot
[4:]),
551 self
.out_of
.m0
.eq(self
.in_tot
[4]),
552 self
.out_of
.guard
.eq(self
.in_tot
[3]),
553 self
.out_of
.round_bit
.eq(self
.in_tot
[2]),
554 self
.out_of
.sticky
.eq(self
.in_tot
[1] | self
.in_tot
[0]),
555 self
.out_z
.e
.eq(self
.in_z
.e
+ 1)
560 self
.out_z
.m
.eq(self
.in_tot
[3:]),
561 self
.out_of
.m0
.eq(self
.in_tot
[3]),
562 self
.out_of
.guard
.eq(self
.in_tot
[2]),
563 self
.out_of
.round_bit
.eq(self
.in_tot
[1]),
564 self
.out_of
.sticky
.eq(self
.in_tot
[0])
569 class FPAddStage1(FPState
):
571 def __init__(self
, width
):
572 FPState
.__init
__(self
, "add_1")
573 self
.mod
= FPAddStage1Mod(width
)
574 self
.out_z
= FPNumBase(width
, False)
575 self
.out_of
= Overflow()
576 self
.norm_stb
= Signal()
578 def setup(self
, m
, in_tot
, in_z
):
579 """ links module to inputs and outputs
581 m
.submodules
.add1
= self
.mod
583 m
.d
.comb
+= self
.mod
.in_z
.copy(in_z
)
584 m
.d
.comb
+= self
.mod
.in_tot
.eq(in_tot
)
586 m
.d
.sync
+= self
.norm_stb
.eq(0) # sets to zero when not in add1 state
589 m
.submodules
.add1_out_overflow
= self
.out_of
590 m
.d
.sync
+= self
.out_of
.copy(self
.mod
.out_of
)
591 m
.d
.sync
+= self
.out_z
.copy(self
.mod
.out_z
)
592 m
.d
.sync
+= self
.norm_stb
.eq(1)
593 m
.next
= "normalise_1"
596 class FPNorm1ModSingle
:
598 def __init__(self
, width
):
600 self
.in_select
= Signal(reset_less
=True)
601 self
.out_norm
= Signal(reset_less
=True)
602 self
.in_z
= FPNumBase(width
, False)
603 self
.in_of
= Overflow()
604 self
.temp_z
= FPNumBase(width
, False)
605 self
.temp_of
= Overflow()
606 self
.out_z
= FPNumBase(width
, False)
607 self
.out_of
= Overflow()
609 def elaborate(self
, platform
):
612 mwid
= self
.out_z
.m_width
+2
613 pe
= PriorityEncoder(mwid
)
614 m
.submodules
.norm_pe
= pe
616 m
.submodules
.norm1_out_z
= self
.out_z
617 m
.submodules
.norm1_out_overflow
= self
.out_of
618 m
.submodules
.norm1_temp_z
= self
.temp_z
619 m
.submodules
.norm1_temp_of
= self
.temp_of
620 m
.submodules
.norm1_in_z
= self
.in_z
621 m
.submodules
.norm1_in_overflow
= self
.in_of
623 in_z
= FPNumBase(self
.width
, False)
625 m
.submodules
.norm1_insel_z
= in_z
626 m
.submodules
.norm1_insel_overflow
= in_of
628 espec
= (len(in_z
.e
), True)
629 ediff_n126
= Signal(espec
, reset_less
=True)
630 msr
= MultiShiftRMerge(mwid
, espec
)
631 m
.submodules
.multishift_r
= msr
633 # select which of temp or in z/of to use
634 with m
.If(self
.in_select
):
635 m
.d
.comb
+= in_z
.copy(self
.in_z
)
636 m
.d
.comb
+= in_of
.copy(self
.in_of
)
638 m
.d
.comb
+= in_z
.copy(self
.temp_z
)
639 m
.d
.comb
+= in_of
.copy(self
.temp_of
)
640 # initialise out from in (overridden below)
641 m
.d
.comb
+= self
.out_z
.copy(in_z
)
642 m
.d
.comb
+= self
.out_of
.copy(in_of
)
643 # normalisation increase/decrease conditions
644 decrease
= Signal(reset_less
=True)
645 increase
= Signal(reset_less
=True)
646 m
.d
.comb
+= decrease
.eq(in_z
.m_msbzero
& in_z
.exp_gt_n126
)
647 m
.d
.comb
+= increase
.eq(in_z
.exp_lt_n126
)
648 m
.d
.comb
+= self
.out_norm
.eq(0) # loop-end condition
651 # *sigh* not entirely obvious: count leading zeros (clz)
652 # with a PriorityEncoder: to find from the MSB
653 # we reverse the order of the bits.
654 temp_m
= Signal(mwid
, reset_less
=True)
655 temp_s
= Signal(mwid
+1, reset_less
=True)
656 clz
= Signal((len(in_z
.e
), True), reset_less
=True)
657 # make sure that the amount to decrease by does NOT
658 # go below the minimum non-INF/NaN exponent
659 limclz
= Mux(in_z
.exp_sub_n126
> pe
.o
, pe
.o
,
662 # cat round and guard bits back into the mantissa
663 temp_m
.eq(Cat(in_of
.round_bit
, in_of
.guard
, in_z
.m
)),
664 pe
.i
.eq(temp_m
[::-1]), # inverted
665 clz
.eq(limclz
), # count zeros from MSB down
666 temp_s
.eq(temp_m
<< clz
), # shift mantissa UP
667 self
.out_z
.e
.eq(in_z
.e
- clz
), # DECREASE exponent
668 self
.out_z
.m
.eq(temp_s
[2:]), # exclude bits 0&1
669 self
.out_of
.m0
.eq(temp_s
[2]), # copy of mantissa[0]
670 # overflow in bits 0..1: got shifted too (leave sticky)
671 self
.out_of
.guard
.eq(temp_s
[1]), # guard
672 self
.out_of
.round_bit
.eq(temp_s
[0]), # round
675 with m
.Elif(increase
):
676 temp_m
= Signal(mwid
+1, reset_less
=True)
678 temp_m
.eq(Cat(in_of
.sticky
, in_of
.round_bit
, in_of
.guard
,
680 ediff_n126
.eq(in_z
.N126
- in_z
.e
),
681 # connect multi-shifter to inp/out mantissa (and ediff)
683 msr
.diff
.eq(ediff_n126
),
684 self
.out_z
.m
.eq(msr
.m
[3:]),
685 self
.out_of
.m0
.eq(temp_s
[3]), # copy of mantissa[0]
686 # overflow in bits 0..1: got shifted too (leave sticky)
687 self
.out_of
.guard
.eq(temp_s
[2]), # guard
688 self
.out_of
.round_bit
.eq(temp_s
[1]), # round
689 self
.out_of
.sticky
.eq(temp_s
[0]), # sticky
690 self
.out_z
.e
.eq(in_z
.e
+ ediff_n126
),
696 class FPNorm1ModMulti
:
698 def __init__(self
, width
, single_cycle
=True):
700 self
.in_select
= Signal(reset_less
=True)
701 self
.out_norm
= Signal(reset_less
=True)
702 self
.in_z
= FPNumBase(width
, False)
703 self
.in_of
= Overflow()
704 self
.temp_z
= FPNumBase(width
, False)
705 self
.temp_of
= Overflow()
706 self
.out_z
= FPNumBase(width
, False)
707 self
.out_of
= Overflow()
709 def elaborate(self
, platform
):
712 m
.submodules
.norm1_out_z
= self
.out_z
713 m
.submodules
.norm1_out_overflow
= self
.out_of
714 m
.submodules
.norm1_temp_z
= self
.temp_z
715 m
.submodules
.norm1_temp_of
= self
.temp_of
716 m
.submodules
.norm1_in_z
= self
.in_z
717 m
.submodules
.norm1_in_overflow
= self
.in_of
719 in_z
= FPNumBase(self
.width
, False)
721 m
.submodules
.norm1_insel_z
= in_z
722 m
.submodules
.norm1_insel_overflow
= in_of
724 # select which of temp or in z/of to use
725 with m
.If(self
.in_select
):
726 m
.d
.comb
+= in_z
.copy(self
.in_z
)
727 m
.d
.comb
+= in_of
.copy(self
.in_of
)
729 m
.d
.comb
+= in_z
.copy(self
.temp_z
)
730 m
.d
.comb
+= in_of
.copy(self
.temp_of
)
731 # initialise out from in (overridden below)
732 m
.d
.comb
+= self
.out_z
.copy(in_z
)
733 m
.d
.comb
+= self
.out_of
.copy(in_of
)
734 # normalisation increase/decrease conditions
735 decrease
= Signal(reset_less
=True)
736 increase
= Signal(reset_less
=True)
737 m
.d
.comb
+= decrease
.eq(in_z
.m_msbzero
& in_z
.exp_gt_n126
)
738 m
.d
.comb
+= increase
.eq(in_z
.exp_lt_n126
)
739 m
.d
.comb
+= self
.out_norm
.eq(decrease | increase
) # loop-end
743 self
.out_z
.e
.eq(in_z
.e
- 1), # DECREASE exponent
744 self
.out_z
.m
.eq(in_z
.m
<< 1), # shift mantissa UP
745 self
.out_z
.m
[0].eq(in_of
.guard
), # steal guard (was tot[2])
746 self
.out_of
.guard
.eq(in_of
.round_bit
), # round (was tot[1])
747 self
.out_of
.round_bit
.eq(0), # reset round bit
748 self
.out_of
.m0
.eq(in_of
.guard
),
751 with m
.Elif(increase
):
753 self
.out_z
.e
.eq(in_z
.e
+ 1), # INCREASE exponent
754 self
.out_z
.m
.eq(in_z
.m
>> 1), # shift mantissa DOWN
755 self
.out_of
.guard
.eq(in_z
.m
[0]),
756 self
.out_of
.m0
.eq(in_z
.m
[1]),
757 self
.out_of
.round_bit
.eq(in_of
.guard
),
758 self
.out_of
.sticky
.eq(in_of
.sticky | in_of
.round_bit
)
764 class FPNorm1(FPState
):
766 def __init__(self
, width
, single_cycle
=True):
767 FPState
.__init
__(self
, "normalise_1")
769 self
.mod
= FPNorm1ModSingle(width
)
771 self
.mod
= FPNorm1ModMulti(width
)
772 self
.stb
= Signal(reset_less
=True)
773 self
.ack
= Signal(reset
=0, reset_less
=True)
774 self
.out_norm
= Signal(reset_less
=True)
775 self
.in_accept
= Signal(reset_less
=True)
776 self
.temp_z
= FPNumBase(width
)
777 self
.temp_of
= Overflow()
778 self
.out_z
= FPNumBase(width
)
779 self
.out_roundz
= Signal(reset_less
=True)
781 def setup(self
, m
, in_z
, in_of
, norm_stb
):
782 """ links module to inputs and outputs
784 m
.submodules
.normalise_1
= self
.mod
786 m
.d
.comb
+= self
.mod
.in_z
.copy(in_z
)
787 m
.d
.comb
+= self
.mod
.in_of
.copy(in_of
)
789 m
.d
.comb
+= self
.mod
.in_select
.eq(self
.in_accept
)
790 m
.d
.comb
+= self
.mod
.temp_z
.copy(self
.temp_z
)
791 m
.d
.comb
+= self
.mod
.temp_of
.copy(self
.temp_of
)
793 m
.d
.comb
+= self
.out_z
.copy(self
.mod
.out_z
)
794 m
.d
.comb
+= self
.out_norm
.eq(self
.mod
.out_norm
)
796 m
.d
.comb
+= self
.stb
.eq(norm_stb
)
797 m
.d
.sync
+= self
.ack
.eq(0) # sets to zero when not in normalise_1 state
801 m
.d
.comb
+= self
.in_accept
.eq((~self
.ack
) & (self
.stb
))
802 m
.d
.sync
+= self
.temp_of
.copy(self
.mod
.out_of
)
803 m
.d
.sync
+= self
.temp_z
.copy(self
.out_z
)
804 with m
.If(self
.out_norm
):
805 with m
.If(self
.in_accept
):
810 m
.d
.sync
+= self
.ack
.eq(0)
812 # normalisation not required (or done).
814 m
.d
.sync
+= self
.ack
.eq(1)
815 m
.d
.sync
+= self
.out_roundz
.eq(self
.mod
.out_of
.roundz
)
820 def __init__(self
, width
):
821 self
.in_roundz
= Signal(reset_less
=True)
822 self
.in_z
= FPNumBase(width
, False)
823 self
.out_z
= FPNumBase(width
, False)
825 def elaborate(self
, platform
):
827 m
.d
.comb
+= self
.out_z
.copy(self
.in_z
)
828 with m
.If(self
.in_roundz
):
829 m
.d
.comb
+= self
.out_z
.m
.eq(self
.in_z
.m
+ 1) # mantissa rounds up
830 with m
.If(self
.in_z
.m
== self
.in_z
.m1s
): # all 1s
831 m
.d
.comb
+= self
.out_z
.e
.eq(self
.in_z
.e
+ 1) # exponent up
835 class FPRound(FPState
):
837 def __init__(self
, width
):
838 FPState
.__init
__(self
, "round")
839 self
.mod
= FPRoundMod(width
)
840 self
.out_z
= FPNumBase(width
)
842 def setup(self
, m
, in_z
, roundz
):
843 """ links module to inputs and outputs
845 m
.submodules
.roundz
= self
.mod
847 m
.d
.comb
+= self
.mod
.in_z
.copy(in_z
)
848 m
.d
.comb
+= self
.mod
.in_roundz
.eq(roundz
)
851 m
.d
.sync
+= self
.out_z
.copy(self
.mod
.out_z
)
852 m
.next
= "corrections"
855 class FPCorrectionsMod
:
857 def __init__(self
, width
):
858 self
.in_z
= FPNumOut(width
, False)
859 self
.out_z
= FPNumOut(width
, False)
861 def elaborate(self
, platform
):
863 m
.submodules
.corr_in_z
= self
.in_z
864 m
.submodules
.corr_out_z
= self
.out_z
865 m
.d
.comb
+= self
.out_z
.copy(self
.in_z
)
866 with m
.If(self
.in_z
.is_denormalised
):
867 m
.d
.comb
+= self
.out_z
.e
.eq(self
.in_z
.N127
)
871 class FPCorrections(FPState
):
873 def __init__(self
, width
):
874 FPState
.__init
__(self
, "corrections")
875 self
.mod
= FPCorrectionsMod(width
)
876 self
.out_z
= FPNumBase(width
)
878 def setup(self
, m
, in_z
):
879 """ links module to inputs and outputs
881 m
.submodules
.corrections
= self
.mod
882 m
.d
.comb
+= self
.mod
.in_z
.copy(in_z
)
885 m
.d
.sync
+= self
.out_z
.copy(self
.mod
.out_z
)
891 def __init__(self
, width
):
892 self
.in_z
= FPNumOut(width
, False)
893 self
.out_z
= FPNumOut(width
, False)
895 def elaborate(self
, platform
):
897 m
.submodules
.pack_in_z
= self
.in_z
898 with m
.If(self
.in_z
.is_overflowed
):
899 m
.d
.comb
+= self
.out_z
.inf(self
.in_z
.s
)
901 m
.d
.comb
+= self
.out_z
.create(self
.in_z
.s
, self
.in_z
.e
, self
.in_z
.m
)
905 class FPPack(FPState
):
907 def __init__(self
, width
):
908 FPState
.__init
__(self
, "pack")
909 self
.mod
= FPPackMod(width
)
910 self
.out_z
= FPNumOut(width
, False)
912 def setup(self
, m
, in_z
):
913 """ links module to inputs and outputs
915 m
.submodules
.pack
= self
.mod
916 m
.d
.comb
+= self
.mod
.in_z
.copy(in_z
)
919 m
.d
.sync
+= self
.out_z
.v
.eq(self
.mod
.out_z
.v
)
920 m
.next
= "pack_put_z"
923 class FPPutZ(FPState
):
925 def __init__(self
, state
, in_z
, out_z
):
926 FPState
.__init
__(self
, state
)
932 self
.out_z
.v
.eq(self
.in_z
.v
)
934 with m
.If(self
.out_z
.stb
& self
.out_z
.ack
):
935 m
.d
.sync
+= self
.out_z
.stb
.eq(0)
938 m
.d
.sync
+= self
.out_z
.stb
.eq(1)
943 def __init__(self
, width
, id_wid
=None, single_cycle
=False):
946 * width: bit-width of IEEE754. supported: 16, 32, 64
947 * id_wid: an identifier that is sync-connected to the input
948 * single_cycle: True indicates each stage to complete in 1 clock
950 FPID
.__init
__(self
, id_wid
)
952 self
.single_cycle
= single_cycle
954 self
.in_a
= FPOp(width
)
955 self
.in_b
= FPOp(width
)
956 self
.out_z
= FPOp(width
)
960 def add_state(self
, state
):
961 self
.states
.append(state
)
964 def get_fragment(self
, platform
=None):
965 """ creates the HDL code-fragment for FPAdd
968 m
.submodules
.in_a
= self
.in_a
969 m
.submodules
.in_b
= self
.in_b
970 m
.submodules
.out_z
= self
.out_z
972 geta
= self
.add_state(FPGetOp("get_a", "get_b",
973 self
.in_a
, self
.width
))
974 geta
.setup(m
, self
.in_a
)
977 getb
= self
.add_state(FPGetOp("get_b", "special_cases",
978 self
.in_b
, self
.width
))
979 getb
.setup(m
, self
.in_b
)
982 sc
= self
.add_state(FPAddSpecialCases(self
.width
, self
.id_wid
))
983 sc
.setup(m
, a
, b
, self
.in_mid
)
985 dn
= self
.add_state(FPAddDeNorm(self
.width
, self
.id_wid
))
986 dn
.setup(m
, a
, b
, sc
.in_mid
)
988 if self
.single_cycle
:
989 alm
= self
.add_state(FPAddAlignSingle(self
.width
, self
.id_wid
))
990 alm
.setup(m
, dn
.out_a
, dn
.out_b
, dn
.in_mid
)
992 alm
= self
.add_state(FPAddAlignMulti(self
.width
, self
.id_wid
))
993 alm
.setup(m
, dn
.out_a
, dn
.out_b
, dn
.in_mid
)
995 add0
= self
.add_state(FPAddStage0(self
.width
, self
.id_wid
))
996 add0
.setup(m
, alm
.out_a
, alm
.out_b
, alm
.in_mid
)
998 add1
= self
.add_state(FPAddStage1(self
.width
))
999 add1
.setup(m
, add0
.out_tot
, add0
.out_z
)
1001 n1
= self
.add_state(FPNorm1(self
.width
))
1002 n1
.setup(m
, add1
.out_z
, add1
.out_of
, add1
.norm_stb
)
1004 rn
= self
.add_state(FPRound(self
.width
))
1005 rn
.setup(m
, n1
.out_z
, n1
.out_roundz
)
1007 cor
= self
.add_state(FPCorrections(self
.width
))
1008 cor
.setup(m
, rn
.out_z
)
1010 pa
= self
.add_state(FPPack(self
.width
))
1011 pa
.setup(m
, cor
.out_z
)
1013 ppz
= self
.add_state(FPPutZ("pack_put_z", pa
.out_z
, self
.out_z
))
1015 pz
= self
.add_state(FPPutZ("put_z", sc
.out_z
, self
.out_z
))
1017 with m
.FSM() as fsm
:
1019 for state
in self
.states
:
1020 with m
.State(state
.state_from
):
1026 if __name__
== "__main__":
1027 alu
= FPADD(width
=32, single_cycle
=True)
1028 main(alu
, ports
=alu
.in_a
.ports() + alu
.in_b
.ports() + alu
.out_z
.ports())
1031 # works... but don't use, just do "python fname.py convert -t v"
1032 #print (verilog.convert(alu, ports=[
1033 # ports=alu.in_a.ports() + \
1034 # alu.in_b.ports() + \
1035 # alu.out_z.ports())