542b18218a1f1dab0a1870b6e3b9ea1d753cc027
1 # IEEE Floating Point Adder (Single Precision)
2 # Copyright (C) Jonathan P Dawson 2013
5 from nmigen
import Module
, Signal
, Cat
, Mux
, Array
, Const
6 from nmigen
.lib
.coding
import PriorityEncoder
7 from nmigen
.cli
import main
, verilog
9 from fpbase
import FPNumIn
, FPNumOut
, FPOp
, Overflow
, FPBase
, FPNumBase
10 from fpbase
import MultiShiftRMerge
, Trigger
11 #from fpbase import FPNumShiftMultiRight
14 class FPState(FPBase
):
15 def __init__(self
, state_from
):
16 self
.state_from
= state_from
18 def set_inputs(self
, inputs
):
20 for k
,v
in inputs
.items():
23 def set_outputs(self
, outputs
):
24 self
.outputs
= outputs
25 for k
,v
in outputs
.items():
29 class FPGetSyncOpsMod
:
30 def __init__(self
, width
, num_ops
=2):
32 self
.num_ops
= num_ops
35 for i
in range(num_ops
):
36 inops
.append(Signal(width
, reset_less
=True))
37 outops
.append(Signal(width
, reset_less
=True))
40 self
.stb
= Signal(num_ops
)
42 self
.out_decode
= Signal(reset_less
=True)
44 def elaborate(self
, platform
):
46 stb
= Signal(reset_less
=True)
47 m
.d
.comb
+= stb
.eq(self
.stb
== Const(-1, (self
.num_ops
, False)))
48 m
.d
.comb
+= self
.out_decode
.eq(self
.ack
& stb
)
49 with m
.If(self
.out_decode
):
50 for i
in range(self
.num_ops
):
52 self
.out_op
[i
].eq(self
.in_op
[i
]),
57 return self
.in_op
+ self
.out_op
+ [self
.stb
, self
.ack
]
60 def __init__(self
, width
):
61 self
.in_op
= FPOp(width
)
62 self
.out_op
= Signal(width
)
63 self
.out_decode
= Signal(reset_less
=True)
65 def elaborate(self
, platform
):
67 m
.d
.comb
+= self
.out_decode
.eq((self
.in_op
.ack
) & (self
.in_op
.stb
))
68 m
.submodules
.get_op_in
= self
.in_op
69 #m.submodules.get_op_out = self.out_op
70 with m
.If(self
.out_decode
):
72 self
.out_op
.eq(self
.in_op
.v
),
77 class FPGetOp(FPState
):
81 def __init__(self
, in_state
, out_state
, in_op
, width
):
82 FPState
.__init
__(self
, in_state
)
83 self
.out_state
= out_state
84 self
.mod
= FPGetOpMod(width
)
86 self
.out_op
= Signal(width
)
87 self
.out_decode
= Signal(reset_less
=True)
89 def setup(self
, m
, in_op
):
90 """ links module to inputs and outputs
92 setattr(m
.submodules
, self
.state_from
, self
.mod
)
93 m
.d
.comb
+= self
.mod
.in_op
.copy(in_op
)
94 #m.d.comb += self.out_op.eq(self.mod.out_op)
95 m
.d
.comb
+= self
.out_decode
.eq(self
.mod
.out_decode
)
98 with m
.If(self
.out_decode
):
99 m
.next
= self
.out_state
101 self
.in_op
.ack
.eq(0),
102 self
.out_op
.eq(self
.mod
.out_op
)
105 m
.d
.sync
+= self
.in_op
.ack
.eq(1)
108 class FPGet2OpMod(Trigger
):
109 def __init__(self
, width
):
110 Trigger
.__init
__(self
)
111 self
.in_op1
= Signal(width
, reset_less
=True)
112 self
.in_op2
= Signal(width
, reset_less
=True)
113 self
.out_op1
= FPNumIn(None, width
)
114 self
.out_op2
= FPNumIn(None, width
)
116 def elaborate(self
, platform
):
117 m
= Trigger
.elaborate(self
, platform
)
118 #m.submodules.get_op_in = self.in_op
119 m
.submodules
.get_op1_out
= self
.out_op1
120 m
.submodules
.get_op2_out
= self
.out_op2
121 with m
.If(self
.trigger
):
123 self
.out_op1
.decode(self
.in_op1
),
124 self
.out_op2
.decode(self
.in_op2
),
129 class FPGet2Op(FPState
):
133 def __init__(self
, in_state
, out_state
, in_op1
, in_op2
, width
):
134 FPState
.__init
__(self
, in_state
)
135 self
.out_state
= out_state
136 self
.mod
= FPGet2OpMod(width
)
139 self
.out_op1
= FPNumIn(None, width
)
140 self
.out_op2
= FPNumIn(None, width
)
141 self
.in_stb
= Signal(reset_less
=True)
142 self
.out_ack
= Signal(reset_less
=True)
143 self
.out_decode
= Signal(reset_less
=True)
145 def setup(self
, m
, in_op1
, in_op2
, in_stb
, in_ack
):
146 """ links module to inputs and outputs
148 m
.submodules
.get_ops
= self
.mod
149 m
.d
.comb
+= self
.mod
.in_op1
.eq(in_op1
)
150 m
.d
.comb
+= self
.mod
.in_op2
.eq(in_op2
)
151 m
.d
.comb
+= self
.mod
.stb
.eq(in_stb
)
152 m
.d
.comb
+= self
.out_ack
.eq(self
.mod
.ack
)
153 m
.d
.comb
+= self
.out_decode
.eq(self
.mod
.trigger
)
154 m
.d
.comb
+= in_ack
.eq(self
.mod
.ack
)
157 with m
.If(self
.out_decode
):
158 m
.next
= self
.out_state
161 #self.out_op1.v.eq(self.mod.out_op1.v),
162 #self.out_op2.v.eq(self.mod.out_op2.v),
163 self
.out_op1
.copy(self
.mod
.out_op1
),
164 self
.out_op2
.copy(self
.mod
.out_op2
)
167 m
.d
.sync
+= self
.mod
.ack
.eq(1)
170 class FPAddSpecialCasesMod
:
171 """ special cases: NaNs, infs, zeros, denormalised
172 NOTE: some of these are unique to add. see "Special Operations"
173 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
176 def __init__(self
, width
):
177 self
.in_a
= FPNumBase(width
)
178 self
.in_b
= FPNumBase(width
)
179 self
.out_z
= FPNumOut(width
, False)
180 self
.out_do_z
= Signal(reset_less
=True)
182 def setup(self
, m
, in_a
, in_b
, out_do_z
):
183 """ links module to inputs and outputs
185 m
.submodules
.specialcases
= self
186 m
.d
.comb
+= self
.in_a
.copy(in_a
)
187 m
.d
.comb
+= self
.in_b
.copy(in_b
)
188 m
.d
.comb
+= out_do_z
.eq(self
.out_do_z
)
190 def elaborate(self
, platform
):
193 m
.submodules
.sc_in_a
= self
.in_a
194 m
.submodules
.sc_in_b
= self
.in_b
195 m
.submodules
.sc_out_z
= self
.out_z
198 m
.d
.comb
+= s_nomatch
.eq(self
.in_a
.s
!= self
.in_b
.s
)
201 m
.d
.comb
+= m_match
.eq(self
.in_a
.m
== self
.in_b
.m
)
203 # if a is NaN or b is NaN return NaN
204 with m
.If(self
.in_a
.is_nan | self
.in_b
.is_nan
):
205 m
.d
.comb
+= self
.out_do_z
.eq(1)
206 m
.d
.comb
+= self
.out_z
.nan(0)
208 # XXX WEIRDNESS for FP16 non-canonical NaN handling
211 ## if a is zero and b is NaN return -b
212 #with m.If(a.is_zero & (a.s==0) & b.is_nan):
213 # m.d.comb += self.out_do_z.eq(1)
214 # m.d.comb += z.create(b.s, b.e, Cat(b.m[3:-2], ~b.m[0]))
216 ## if b is zero and a is NaN return -a
217 #with m.Elif(b.is_zero & (b.s==0) & a.is_nan):
218 # m.d.comb += self.out_do_z.eq(1)
219 # m.d.comb += z.create(a.s, a.e, Cat(a.m[3:-2], ~a.m[0]))
221 ## if a is -zero and b is NaN return -b
222 #with m.Elif(a.is_zero & (a.s==1) & b.is_nan):
223 # m.d.comb += self.out_do_z.eq(1)
224 # m.d.comb += z.create(a.s & b.s, b.e, Cat(b.m[3:-2], 1))
226 ## if b is -zero and a is NaN return -a
227 #with m.Elif(b.is_zero & (b.s==1) & a.is_nan):
228 # m.d.comb += self.out_do_z.eq(1)
229 # m.d.comb += z.create(a.s & b.s, a.e, Cat(a.m[3:-2], 1))
231 # if a is inf return inf (or NaN)
232 with m
.Elif(self
.in_a
.is_inf
):
233 m
.d
.comb
+= self
.out_do_z
.eq(1)
234 m
.d
.comb
+= self
.out_z
.inf(self
.in_a
.s
)
235 # if a is inf and signs don't match return NaN
236 with m
.If(self
.in_b
.exp_128
& s_nomatch
):
237 m
.d
.comb
+= self
.out_z
.nan(0)
239 # if b is inf return inf
240 with m
.Elif(self
.in_b
.is_inf
):
241 m
.d
.comb
+= self
.out_do_z
.eq(1)
242 m
.d
.comb
+= self
.out_z
.inf(self
.in_b
.s
)
244 # if a is zero and b zero return signed-a/b
245 with m
.Elif(self
.in_a
.is_zero
& self
.in_b
.is_zero
):
246 m
.d
.comb
+= self
.out_do_z
.eq(1)
247 m
.d
.comb
+= self
.out_z
.create(self
.in_a
.s
& self
.in_b
.s
,
251 # if a is zero return b
252 with m
.Elif(self
.in_a
.is_zero
):
253 m
.d
.comb
+= self
.out_do_z
.eq(1)
254 m
.d
.comb
+= self
.out_z
.create(self
.in_b
.s
, self
.in_b
.e
,
257 # if b is zero return a
258 with m
.Elif(self
.in_b
.is_zero
):
259 m
.d
.comb
+= self
.out_do_z
.eq(1)
260 m
.d
.comb
+= self
.out_z
.create(self
.in_a
.s
, self
.in_a
.e
,
263 # if a equal to -b return zero (+ve zero)
264 with m
.Elif(s_nomatch
& m_match
& (self
.in_a
.e
== self
.in_b
.e
)):
265 m
.d
.comb
+= self
.out_do_z
.eq(1)
266 m
.d
.comb
+= self
.out_z
.zero(0)
268 # Denormalised Number checks
270 m
.d
.comb
+= self
.out_do_z
.eq(0)
276 def __init__(self
, id_wid
):
279 self
.in_mid
= Signal(id_wid
, reset_less
=True)
280 self
.out_mid
= Signal(id_wid
, reset_less
=True)
286 if self
.id_wid
is not None:
287 m
.d
.sync
+= self
.out_mid
.eq(self
.in_mid
)
290 class FPAddSpecialCases(FPState
, FPID
):
291 """ special cases: NaNs, infs, zeros, denormalised
292 NOTE: some of these are unique to add. see "Special Operations"
293 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
296 def __init__(self
, width
, id_wid
):
297 FPState
.__init
__(self
, "special_cases")
298 FPID
.__init
__(self
, id_wid
)
299 self
.mod
= FPAddSpecialCasesMod(width
)
300 self
.out_z
= FPNumOut(width
, False)
301 self
.out_do_z
= Signal(reset_less
=True)
303 def setup(self
, m
, in_a
, in_b
, in_mid
):
304 """ links module to inputs and outputs
306 self
.mod
.setup(m
, in_a
, in_b
, self
.out_do_z
)
307 if self
.in_mid
is not None:
308 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
312 with m
.If(self
.out_do_z
):
313 m
.d
.sync
+= self
.out_z
.v
.eq(self
.mod
.out_z
.v
) # only take the output
316 m
.next
= "denormalise"
319 class FPAddSpecialCasesDeNorm(FPState
, FPID
):
320 """ special cases: NaNs, infs, zeros, denormalised
321 NOTE: some of these are unique to add. see "Special Operations"
322 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
325 def __init__(self
, width
, id_wid
):
326 FPState
.__init
__(self
, "special_cases")
327 FPID
.__init
__(self
, id_wid
)
328 self
.smod
= FPAddSpecialCasesMod(width
)
329 self
.out_z
= FPNumOut(width
, False)
330 self
.out_do_z
= Signal(reset_less
=True)
332 self
.dmod
= FPAddDeNormMod(width
)
333 self
.out_a
= FPNumBase(width
)
334 self
.out_b
= FPNumBase(width
)
336 def setup(self
, m
, in_a
, in_b
, in_mid
):
337 """ links module to inputs and outputs
339 self
.smod
.setup(m
, in_a
, in_b
, self
.out_do_z
)
340 self
.dmod
.setup(m
, in_a
, in_b
)
341 if self
.in_mid
is not None:
342 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
346 with m
.If(self
.out_do_z
):
347 m
.d
.sync
+= self
.out_z
.v
.eq(self
.smod
.out_z
.v
) # only take output
351 m
.d
.sync
+= self
.out_a
.copy(self
.dmod
.out_a
)
352 m
.d
.sync
+= self
.out_b
.copy(self
.dmod
.out_b
)
355 class FPAddDeNormMod(FPState
):
357 def __init__(self
, width
):
358 self
.in_a
= FPNumBase(width
)
359 self
.in_b
= FPNumBase(width
)
360 self
.out_a
= FPNumBase(width
)
361 self
.out_b
= FPNumBase(width
)
363 def setup(self
, m
, in_a
, in_b
):
364 """ links module to inputs and outputs
366 m
.submodules
.denormalise
= self
367 m
.d
.comb
+= self
.in_a
.copy(in_a
)
368 m
.d
.comb
+= self
.in_b
.copy(in_b
)
370 def elaborate(self
, platform
):
372 m
.submodules
.denorm_in_a
= self
.in_a
373 m
.submodules
.denorm_in_b
= self
.in_b
374 m
.submodules
.denorm_out_a
= self
.out_a
375 m
.submodules
.denorm_out_b
= self
.out_b
376 # hmmm, don't like repeating identical code
377 m
.d
.comb
+= self
.out_a
.copy(self
.in_a
)
378 with m
.If(self
.in_a
.exp_n127
):
379 m
.d
.comb
+= self
.out_a
.e
.eq(self
.in_a
.N126
) # limit a exponent
381 m
.d
.comb
+= self
.out_a
.m
[-1].eq(1) # set top mantissa bit
383 m
.d
.comb
+= self
.out_b
.copy(self
.in_b
)
384 with m
.If(self
.in_b
.exp_n127
):
385 m
.d
.comb
+= self
.out_b
.e
.eq(self
.in_b
.N126
) # limit a exponent
387 m
.d
.comb
+= self
.out_b
.m
[-1].eq(1) # set top mantissa bit
392 class FPAddDeNorm(FPState
, FPID
):
394 def __init__(self
, width
, id_wid
):
395 FPState
.__init
__(self
, "denormalise")
396 FPID
.__init
__(self
, id_wid
)
397 self
.mod
= FPAddDeNormMod(width
)
398 self
.out_a
= FPNumBase(width
)
399 self
.out_b
= FPNumBase(width
)
401 def setup(self
, m
, in_a
, in_b
, in_mid
):
402 """ links module to inputs and outputs
404 self
.mod
.setup(m
, in_a
, in_b
)
405 if self
.in_mid
is not None:
406 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
410 # Denormalised Number checks
412 m
.d
.sync
+= self
.out_a
.copy(self
.mod
.out_a
)
413 m
.d
.sync
+= self
.out_b
.copy(self
.mod
.out_b
)
416 class FPAddAlignMultiMod(FPState
):
418 def __init__(self
, width
):
419 self
.in_a
= FPNumBase(width
)
420 self
.in_b
= FPNumBase(width
)
421 self
.out_a
= FPNumIn(None, width
)
422 self
.out_b
= FPNumIn(None, width
)
423 self
.exp_eq
= Signal(reset_less
=True)
425 def elaborate(self
, platform
):
426 # This one however (single-cycle) will do the shift
431 m
.submodules
.align_in_a
= self
.in_a
432 m
.submodules
.align_in_b
= self
.in_b
433 m
.submodules
.align_out_a
= self
.out_a
434 m
.submodules
.align_out_b
= self
.out_b
436 # NOTE: this does *not* do single-cycle multi-shifting,
437 # it *STAYS* in the align state until exponents match
439 # exponent of a greater than b: shift b down
440 m
.d
.comb
+= self
.exp_eq
.eq(0)
441 m
.d
.comb
+= self
.out_a
.copy(self
.in_a
)
442 m
.d
.comb
+= self
.out_b
.copy(self
.in_b
)
443 agtb
= Signal(reset_less
=True)
444 altb
= Signal(reset_less
=True)
445 m
.d
.comb
+= agtb
.eq(self
.in_a
.e
> self
.in_b
.e
)
446 m
.d
.comb
+= altb
.eq(self
.in_a
.e
< self
.in_b
.e
)
448 m
.d
.comb
+= self
.out_b
.shift_down(self
.in_b
)
449 # exponent of b greater than a: shift a down
451 m
.d
.comb
+= self
.out_a
.shift_down(self
.in_a
)
452 # exponents equal: move to next stage.
454 m
.d
.comb
+= self
.exp_eq
.eq(1)
458 class FPAddAlignMulti(FPState
, FPID
):
460 def __init__(self
, width
, id_wid
):
461 FPID
.__init
__(self
, id_wid
)
462 FPState
.__init
__(self
, "align")
463 self
.mod
= FPAddAlignMultiMod(width
)
464 self
.out_a
= FPNumIn(None, width
)
465 self
.out_b
= FPNumIn(None, width
)
466 self
.exp_eq
= Signal(reset_less
=True)
468 def setup(self
, m
, in_a
, in_b
, in_mid
):
469 """ links module to inputs and outputs
471 m
.submodules
.align
= self
.mod
472 m
.d
.comb
+= self
.mod
.in_a
.copy(in_a
)
473 m
.d
.comb
+= self
.mod
.in_b
.copy(in_b
)
474 #m.d.comb += self.out_a.copy(self.mod.out_a)
475 #m.d.comb += self.out_b.copy(self.mod.out_b)
476 m
.d
.comb
+= self
.exp_eq
.eq(self
.mod
.exp_eq
)
477 if self
.in_mid
is not None:
478 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
482 m
.d
.sync
+= self
.out_a
.copy(self
.mod
.out_a
)
483 m
.d
.sync
+= self
.out_b
.copy(self
.mod
.out_b
)
484 with m
.If(self
.exp_eq
):
488 class FPAddAlignSingleMod
:
490 def __init__(self
, width
):
492 self
.in_a
= FPNumBase(width
)
493 self
.in_b
= FPNumBase(width
)
494 self
.out_a
= FPNumIn(None, width
)
495 self
.out_b
= FPNumIn(None, width
)
497 def setup(self
, m
, in_a
, in_b
):
498 """ links module to inputs and outputs
500 m
.submodules
.align
= self
501 m
.d
.comb
+= self
.in_a
.copy(in_a
)
502 m
.d
.comb
+= self
.in_b
.copy(in_b
)
504 def elaborate(self
, platform
):
505 """ Aligns A against B or B against A, depending on which has the
506 greater exponent. This is done in a *single* cycle using
507 variable-width bit-shift
509 the shifter used here is quite expensive in terms of gates.
510 Mux A or B in (and out) into temporaries, as only one of them
511 needs to be aligned against the other
515 m
.submodules
.align_in_a
= self
.in_a
516 m
.submodules
.align_in_b
= self
.in_b
517 m
.submodules
.align_out_a
= self
.out_a
518 m
.submodules
.align_out_b
= self
.out_b
520 # temporary (muxed) input and output to be shifted
521 t_inp
= FPNumBase(self
.width
)
522 t_out
= FPNumIn(None, self
.width
)
523 espec
= (len(self
.in_a
.e
), True)
524 msr
= MultiShiftRMerge(self
.in_a
.m_width
, espec
)
525 m
.submodules
.align_t_in
= t_inp
526 m
.submodules
.align_t_out
= t_out
527 m
.submodules
.multishift_r
= msr
529 ediff
= Signal(espec
, reset_less
=True)
530 ediffr
= Signal(espec
, reset_less
=True)
531 tdiff
= Signal(espec
, reset_less
=True)
532 elz
= Signal(reset_less
=True)
533 egz
= Signal(reset_less
=True)
535 # connect multi-shifter to t_inp/out mantissa (and tdiff)
536 m
.d
.comb
+= msr
.inp
.eq(t_inp
.m
)
537 m
.d
.comb
+= msr
.diff
.eq(tdiff
)
538 m
.d
.comb
+= t_out
.m
.eq(msr
.m
)
539 m
.d
.comb
+= t_out
.e
.eq(t_inp
.e
+ tdiff
)
540 m
.d
.comb
+= t_out
.s
.eq(t_inp
.s
)
542 m
.d
.comb
+= ediff
.eq(self
.in_a
.e
- self
.in_b
.e
)
543 m
.d
.comb
+= ediffr
.eq(self
.in_b
.e
- self
.in_a
.e
)
544 m
.d
.comb
+= elz
.eq(self
.in_a
.e
< self
.in_b
.e
)
545 m
.d
.comb
+= egz
.eq(self
.in_a
.e
> self
.in_b
.e
)
547 # default: A-exp == B-exp, A and B untouched (fall through)
548 m
.d
.comb
+= self
.out_a
.copy(self
.in_a
)
549 m
.d
.comb
+= self
.out_b
.copy(self
.in_b
)
550 # only one shifter (muxed)
551 #m.d.comb += t_out.shift_down_multi(tdiff, t_inp)
552 # exponent of a greater than b: shift b down
554 m
.d
.comb
+= [t_inp
.copy(self
.in_b
),
556 self
.out_b
.copy(t_out
),
557 self
.out_b
.s
.eq(self
.in_b
.s
), # whoops forgot sign
559 # exponent of b greater than a: shift a down
561 m
.d
.comb
+= [t_inp
.copy(self
.in_a
),
563 self
.out_a
.copy(t_out
),
564 self
.out_a
.s
.eq(self
.in_a
.s
), # whoops forgot sign
569 class FPAddAlignSingle(FPState
, FPID
):
571 def __init__(self
, width
, id_wid
):
572 FPState
.__init
__(self
, "align")
573 FPID
.__init
__(self
, id_wid
)
574 self
.mod
= FPAddAlignSingleMod(width
)
575 self
.out_a
= FPNumIn(None, width
)
576 self
.out_b
= FPNumIn(None, width
)
578 def setup(self
, m
, in_a
, in_b
, in_mid
):
579 """ links module to inputs and outputs
581 self
.mod
.setup(m
, in_a
, in_b
)
582 if self
.in_mid
is not None:
583 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
587 # NOTE: could be done as comb
588 m
.d
.sync
+= self
.out_a
.copy(self
.mod
.out_a
)
589 m
.d
.sync
+= self
.out_b
.copy(self
.mod
.out_b
)
593 class FPAddAlignSingleAdd(FPState
, FPID
):
595 def __init__(self
, width
, id_wid
):
596 FPState
.__init
__(self
, "align")
597 FPID
.__init
__(self
, id_wid
)
598 self
.mod
= FPAddAlignSingleMod(width
)
599 self
.out_a
= FPNumIn(None, width
)
600 self
.out_b
= FPNumIn(None, width
)
602 self
.a0mod
= FPAddStage0Mod(width
)
603 self
.a0_out_z
= FPNumBase(width
, False)
604 self
.out_tot
= Signal(self
.a0_out_z
.m_width
+ 4, reset_less
=True)
605 self
.a0_out_z
= FPNumBase(width
, False)
607 self
.a1mod
= FPAddStage1Mod(width
)
608 self
.out_z
= FPNumBase(width
, False)
609 self
.out_of
= Overflow()
611 def setup(self
, m
, in_a
, in_b
, in_mid
):
612 """ links module to inputs and outputs
614 self
.mod
.setup(m
, in_a
, in_b
)
615 m
.d
.comb
+= self
.out_a
.copy(self
.mod
.out_a
)
616 m
.d
.comb
+= self
.out_b
.copy(self
.mod
.out_b
)
618 self
.a0mod
.setup(m
, self
.out_a
, self
.out_b
)
619 m
.d
.comb
+= self
.a0_out_z
.copy(self
.a0mod
.out_z
)
620 m
.d
.comb
+= self
.out_tot
.eq(self
.a0mod
.out_tot
)
622 self
.a1mod
.setup(m
, self
.out_tot
, self
.a0_out_z
)
624 if self
.in_mid
is not None:
625 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
629 m
.d
.sync
+= self
.out_of
.copy(self
.a1mod
.out_of
)
630 m
.d
.sync
+= self
.out_z
.copy(self
.a1mod
.out_z
)
631 m
.next
= "normalise_1"
634 class FPAddStage0Mod
:
636 def __init__(self
, width
):
637 self
.in_a
= FPNumBase(width
)
638 self
.in_b
= FPNumBase(width
)
639 self
.in_z
= FPNumBase(width
, False)
640 self
.out_z
= FPNumBase(width
, False)
641 self
.out_tot
= Signal(self
.out_z
.m_width
+ 4, reset_less
=True)
643 def setup(self
, m
, in_a
, in_b
):
644 """ links module to inputs and outputs
646 m
.submodules
.add0
= self
647 m
.d
.comb
+= self
.in_a
.copy(in_a
)
648 m
.d
.comb
+= self
.in_b
.copy(in_b
)
650 def elaborate(self
, platform
):
652 m
.submodules
.add0_in_a
= self
.in_a
653 m
.submodules
.add0_in_b
= self
.in_b
654 m
.submodules
.add0_out_z
= self
.out_z
656 m
.d
.comb
+= self
.out_z
.e
.eq(self
.in_a
.e
)
658 # store intermediate tests (and zero-extended mantissas)
659 seq
= Signal(reset_less
=True)
660 mge
= Signal(reset_less
=True)
661 am0
= Signal(len(self
.in_a
.m
)+1, reset_less
=True)
662 bm0
= Signal(len(self
.in_b
.m
)+1, reset_less
=True)
663 m
.d
.comb
+= [seq
.eq(self
.in_a
.s
== self
.in_b
.s
),
664 mge
.eq(self
.in_a
.m
>= self
.in_b
.m
),
665 am0
.eq(Cat(self
.in_a
.m
, 0)),
666 bm0
.eq(Cat(self
.in_b
.m
, 0))
668 # same-sign (both negative or both positive) add mantissas
671 self
.out_tot
.eq(am0
+ bm0
),
672 self
.out_z
.s
.eq(self
.in_a
.s
)
674 # a mantissa greater than b, use a
677 self
.out_tot
.eq(am0
- bm0
),
678 self
.out_z
.s
.eq(self
.in_a
.s
)
680 # b mantissa greater than a, use b
683 self
.out_tot
.eq(bm0
- am0
),
684 self
.out_z
.s
.eq(self
.in_b
.s
)
689 class FPAddStage0(FPState
, FPID
):
690 """ First stage of add. covers same-sign (add) and subtract
691 special-casing when mantissas are greater or equal, to
692 give greatest accuracy.
695 def __init__(self
, width
, id_wid
):
696 FPState
.__init
__(self
, "add_0")
697 FPID
.__init
__(self
, id_wid
)
698 self
.mod
= FPAddStage0Mod(width
)
699 self
.out_z
= FPNumBase(width
, False)
700 self
.out_tot
= Signal(self
.out_z
.m_width
+ 4, reset_less
=True)
702 def setup(self
, m
, in_a
, in_b
, in_mid
):
703 """ links module to inputs and outputs
705 self
.mod
.setup(m
, in_a
, in_b
)
706 if self
.in_mid
is not None:
707 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
711 # NOTE: these could be done as combinatorial (merge add0+add1)
712 m
.d
.sync
+= self
.out_z
.copy(self
.mod
.out_z
)
713 m
.d
.sync
+= self
.out_tot
.eq(self
.mod
.out_tot
)
717 class FPAddStage1Mod(FPState
):
718 """ Second stage of add: preparation for normalisation.
719 detects when tot sum is too big (tot[27] is kinda a carry bit)
722 def __init__(self
, width
):
723 self
.out_norm
= Signal(reset_less
=True)
724 self
.in_z
= FPNumBase(width
, False)
725 self
.in_tot
= Signal(self
.in_z
.m_width
+ 4, reset_less
=True)
726 self
.out_z
= FPNumBase(width
, False)
727 self
.out_of
= Overflow()
729 def setup(self
, m
, in_tot
, in_z
):
730 """ links module to inputs and outputs
732 m
.submodules
.add1
= self
733 m
.submodules
.add1_out_overflow
= self
.out_of
735 m
.d
.comb
+= self
.in_z
.copy(in_z
)
736 m
.d
.comb
+= self
.in_tot
.eq(in_tot
)
738 def elaborate(self
, platform
):
740 #m.submodules.norm1_in_overflow = self.in_of
741 #m.submodules.norm1_out_overflow = self.out_of
742 #m.submodules.norm1_in_z = self.in_z
743 #m.submodules.norm1_out_z = self.out_z
744 m
.d
.comb
+= self
.out_z
.copy(self
.in_z
)
745 # tot[27] gets set when the sum overflows. shift result down
746 with m
.If(self
.in_tot
[-1]):
748 self
.out_z
.m
.eq(self
.in_tot
[4:]),
749 self
.out_of
.m0
.eq(self
.in_tot
[4]),
750 self
.out_of
.guard
.eq(self
.in_tot
[3]),
751 self
.out_of
.round_bit
.eq(self
.in_tot
[2]),
752 self
.out_of
.sticky
.eq(self
.in_tot
[1] | self
.in_tot
[0]),
753 self
.out_z
.e
.eq(self
.in_z
.e
+ 1)
758 self
.out_z
.m
.eq(self
.in_tot
[3:]),
759 self
.out_of
.m0
.eq(self
.in_tot
[3]),
760 self
.out_of
.guard
.eq(self
.in_tot
[2]),
761 self
.out_of
.round_bit
.eq(self
.in_tot
[1]),
762 self
.out_of
.sticky
.eq(self
.in_tot
[0])
767 class FPAddStage1(FPState
, FPID
):
769 def __init__(self
, width
, id_wid
):
770 FPState
.__init
__(self
, "add_1")
771 FPID
.__init
__(self
, id_wid
)
772 self
.mod
= FPAddStage1Mod(width
)
773 self
.out_z
= FPNumBase(width
, False)
774 self
.out_of
= Overflow()
775 self
.norm_stb
= Signal()
777 def setup(self
, m
, in_tot
, in_z
, in_mid
):
778 """ links module to inputs and outputs
780 self
.mod
.setup(m
, in_tot
, in_z
)
782 m
.d
.sync
+= self
.norm_stb
.eq(0) # sets to zero when not in add1 state
784 if self
.in_mid
is not None:
785 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
789 m
.d
.sync
+= self
.out_of
.copy(self
.mod
.out_of
)
790 m
.d
.sync
+= self
.out_z
.copy(self
.mod
.out_z
)
791 m
.d
.sync
+= self
.norm_stb
.eq(1)
792 m
.next
= "normalise_1"
795 class FPNorm1ModSingle
:
797 def __init__(self
, width
):
799 self
.out_norm
= Signal(reset_less
=True)
800 self
.in_z
= FPNumBase(width
, False)
801 self
.in_of
= Overflow()
802 self
.out_z
= FPNumBase(width
, False)
803 self
.out_of
= Overflow()
805 def setup(self
, m
, in_z
, in_of
, out_z
):
806 """ links module to inputs and outputs
808 m
.submodules
.normalise_1
= self
810 m
.d
.comb
+= self
.in_z
.copy(in_z
)
811 m
.d
.comb
+= self
.in_of
.copy(in_of
)
813 m
.d
.comb
+= out_z
.copy(self
.out_z
)
815 def elaborate(self
, platform
):
818 mwid
= self
.out_z
.m_width
+2
819 pe
= PriorityEncoder(mwid
)
820 m
.submodules
.norm_pe
= pe
822 m
.submodules
.norm1_out_z
= self
.out_z
823 m
.submodules
.norm1_out_overflow
= self
.out_of
824 m
.submodules
.norm1_in_z
= self
.in_z
825 m
.submodules
.norm1_in_overflow
= self
.in_of
827 in_z
= FPNumBase(self
.width
, False)
829 m
.submodules
.norm1_insel_z
= in_z
830 m
.submodules
.norm1_insel_overflow
= in_of
832 espec
= (len(in_z
.e
), True)
833 ediff_n126
= Signal(espec
, reset_less
=True)
834 msr
= MultiShiftRMerge(mwid
, espec
)
835 m
.submodules
.multishift_r
= msr
837 m
.d
.comb
+= in_z
.copy(self
.in_z
)
838 m
.d
.comb
+= in_of
.copy(self
.in_of
)
839 # initialise out from in (overridden below)
840 m
.d
.comb
+= self
.out_z
.copy(in_z
)
841 m
.d
.comb
+= self
.out_of
.copy(in_of
)
842 # normalisation increase/decrease conditions
843 decrease
= Signal(reset_less
=True)
844 increase
= Signal(reset_less
=True)
845 m
.d
.comb
+= decrease
.eq(in_z
.m_msbzero
& in_z
.exp_gt_n126
)
846 m
.d
.comb
+= increase
.eq(in_z
.exp_lt_n126
)
849 # *sigh* not entirely obvious: count leading zeros (clz)
850 # with a PriorityEncoder: to find from the MSB
851 # we reverse the order of the bits.
852 temp_m
= Signal(mwid
, reset_less
=True)
853 temp_s
= Signal(mwid
+1, reset_less
=True)
854 clz
= Signal((len(in_z
.e
), True), reset_less
=True)
855 # make sure that the amount to decrease by does NOT
856 # go below the minimum non-INF/NaN exponent
857 limclz
= Mux(in_z
.exp_sub_n126
> pe
.o
, pe
.o
,
860 # cat round and guard bits back into the mantissa
861 temp_m
.eq(Cat(in_of
.round_bit
, in_of
.guard
, in_z
.m
)),
862 pe
.i
.eq(temp_m
[::-1]), # inverted
863 clz
.eq(limclz
), # count zeros from MSB down
864 temp_s
.eq(temp_m
<< clz
), # shift mantissa UP
865 self
.out_z
.e
.eq(in_z
.e
- clz
), # DECREASE exponent
866 self
.out_z
.m
.eq(temp_s
[2:]), # exclude bits 0&1
867 self
.out_of
.m0
.eq(temp_s
[2]), # copy of mantissa[0]
868 # overflow in bits 0..1: got shifted too (leave sticky)
869 self
.out_of
.guard
.eq(temp_s
[1]), # guard
870 self
.out_of
.round_bit
.eq(temp_s
[0]), # round
873 with m
.Elif(increase
):
874 temp_m
= Signal(mwid
+1, reset_less
=True)
876 temp_m
.eq(Cat(in_of
.sticky
, in_of
.round_bit
, in_of
.guard
,
878 ediff_n126
.eq(in_z
.N126
- in_z
.e
),
879 # connect multi-shifter to inp/out mantissa (and ediff)
881 msr
.diff
.eq(ediff_n126
),
882 self
.out_z
.m
.eq(msr
.m
[3:]),
883 self
.out_of
.m0
.eq(temp_s
[3]), # copy of mantissa[0]
884 # overflow in bits 0..1: got shifted too (leave sticky)
885 self
.out_of
.guard
.eq(temp_s
[2]), # guard
886 self
.out_of
.round_bit
.eq(temp_s
[1]), # round
887 self
.out_of
.sticky
.eq(temp_s
[0]), # sticky
888 self
.out_z
.e
.eq(in_z
.e
+ ediff_n126
),
894 class FPNorm1ModMulti
:
896 def __init__(self
, width
, single_cycle
=True):
898 self
.in_select
= Signal(reset_less
=True)
899 self
.out_norm
= Signal(reset_less
=True)
900 self
.in_z
= FPNumBase(width
, False)
901 self
.in_of
= Overflow()
902 self
.temp_z
= FPNumBase(width
, False)
903 self
.temp_of
= Overflow()
904 self
.out_z
= FPNumBase(width
, False)
905 self
.out_of
= Overflow()
907 def elaborate(self
, platform
):
910 m
.submodules
.norm1_out_z
= self
.out_z
911 m
.submodules
.norm1_out_overflow
= self
.out_of
912 m
.submodules
.norm1_temp_z
= self
.temp_z
913 m
.submodules
.norm1_temp_of
= self
.temp_of
914 m
.submodules
.norm1_in_z
= self
.in_z
915 m
.submodules
.norm1_in_overflow
= self
.in_of
917 in_z
= FPNumBase(self
.width
, False)
919 m
.submodules
.norm1_insel_z
= in_z
920 m
.submodules
.norm1_insel_overflow
= in_of
922 # select which of temp or in z/of to use
923 with m
.If(self
.in_select
):
924 m
.d
.comb
+= in_z
.copy(self
.in_z
)
925 m
.d
.comb
+= in_of
.copy(self
.in_of
)
927 m
.d
.comb
+= in_z
.copy(self
.temp_z
)
928 m
.d
.comb
+= in_of
.copy(self
.temp_of
)
929 # initialise out from in (overridden below)
930 m
.d
.comb
+= self
.out_z
.copy(in_z
)
931 m
.d
.comb
+= self
.out_of
.copy(in_of
)
932 # normalisation increase/decrease conditions
933 decrease
= Signal(reset_less
=True)
934 increase
= Signal(reset_less
=True)
935 m
.d
.comb
+= decrease
.eq(in_z
.m_msbzero
& in_z
.exp_gt_n126
)
936 m
.d
.comb
+= increase
.eq(in_z
.exp_lt_n126
)
937 m
.d
.comb
+= self
.out_norm
.eq(decrease | increase
) # loop-end
941 self
.out_z
.e
.eq(in_z
.e
- 1), # DECREASE exponent
942 self
.out_z
.m
.eq(in_z
.m
<< 1), # shift mantissa UP
943 self
.out_z
.m
[0].eq(in_of
.guard
), # steal guard (was tot[2])
944 self
.out_of
.guard
.eq(in_of
.round_bit
), # round (was tot[1])
945 self
.out_of
.round_bit
.eq(0), # reset round bit
946 self
.out_of
.m0
.eq(in_of
.guard
),
949 with m
.Elif(increase
):
951 self
.out_z
.e
.eq(in_z
.e
+ 1), # INCREASE exponent
952 self
.out_z
.m
.eq(in_z
.m
>> 1), # shift mantissa DOWN
953 self
.out_of
.guard
.eq(in_z
.m
[0]),
954 self
.out_of
.m0
.eq(in_z
.m
[1]),
955 self
.out_of
.round_bit
.eq(in_of
.guard
),
956 self
.out_of
.sticky
.eq(in_of
.sticky | in_of
.round_bit
)
962 class FPNorm1Single(FPState
, FPID
):
964 def __init__(self
, width
, id_wid
, single_cycle
=True):
965 FPID
.__init
__(self
, id_wid
)
966 FPState
.__init
__(self
, "normalise_1")
967 self
.mod
= FPNorm1ModSingle(width
)
968 self
.out_norm
= Signal(reset_less
=True)
969 self
.out_z
= FPNumBase(width
)
970 self
.out_roundz
= Signal(reset_less
=True)
972 def setup(self
, m
, in_z
, in_of
, in_mid
):
973 """ links module to inputs and outputs
975 self
.mod
.setup(m
, in_z
, in_of
, self
.out_z
)
977 if self
.in_mid
is not None:
978 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
982 m
.d
.sync
+= self
.out_roundz
.eq(self
.mod
.out_of
.roundz
)
986 class FPNorm1Multi(FPState
, FPID
):
988 def __init__(self
, width
, id_wid
):
989 FPID
.__init
__(self
, id_wid
)
990 FPState
.__init
__(self
, "normalise_1")
991 self
.mod
= FPNorm1ModMulti(width
)
992 self
.stb
= Signal(reset_less
=True)
993 self
.ack
= Signal(reset
=0, reset_less
=True)
994 self
.out_norm
= Signal(reset_less
=True)
995 self
.in_accept
= Signal(reset_less
=True)
996 self
.temp_z
= FPNumBase(width
)
997 self
.temp_of
= Overflow()
998 self
.out_z
= FPNumBase(width
)
999 self
.out_roundz
= Signal(reset_less
=True)
1001 def setup(self
, m
, in_z
, in_of
, norm_stb
, in_mid
):
1002 """ links module to inputs and outputs
1004 self
.mod
.setup(m
, in_z
, in_of
, norm_stb
,
1005 self
.in_accept
, self
.temp_z
, self
.temp_of
,
1006 self
.out_z
, self
.out_norm
)
1008 m
.d
.comb
+= self
.stb
.eq(norm_stb
)
1009 m
.d
.sync
+= self
.ack
.eq(0) # sets to zero when not in normalise_1 state
1011 if self
.in_mid
is not None:
1012 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
1014 def action(self
, m
):
1016 m
.d
.comb
+= self
.in_accept
.eq((~self
.ack
) & (self
.stb
))
1017 m
.d
.sync
+= self
.temp_of
.copy(self
.mod
.out_of
)
1018 m
.d
.sync
+= self
.temp_z
.copy(self
.out_z
)
1019 with m
.If(self
.out_norm
):
1020 with m
.If(self
.in_accept
):
1025 m
.d
.sync
+= self
.ack
.eq(0)
1027 # normalisation not required (or done).
1029 m
.d
.sync
+= self
.ack
.eq(1)
1030 m
.d
.sync
+= self
.out_roundz
.eq(self
.mod
.out_of
.roundz
)
1033 class FPNormToPack(FPState
, FPID
):
1035 def __init__(self
, width
, id_wid
):
1036 FPID
.__init
__(self
, id_wid
)
1037 FPState
.__init
__(self
, "normalise_1")
1040 def setup(self
, m
, in_z
, in_of
, in_mid
):
1041 """ links module to inputs and outputs
1044 # Normalisation (chained to input in_z+in_of)
1045 nmod
= FPNorm1ModSingle(self
.width
)
1046 n_out_z
= FPNumBase(self
.width
)
1047 n_out_roundz
= Signal(reset_less
=True)
1048 nmod
.setup(m
, in_z
, in_of
, n_out_z
)
1050 # Rounding (chained to normalisation)
1051 rmod
= FPRoundMod(self
.width
)
1052 r_out_z
= FPNumBase(self
.width
)
1053 rmod
.setup(m
, n_out_z
, n_out_roundz
)
1054 m
.d
.comb
+= n_out_roundz
.eq(nmod
.out_of
.roundz
)
1055 m
.d
.comb
+= r_out_z
.copy(rmod
.out_z
)
1057 # Corrections (chained to rounding)
1058 cmod
= FPCorrectionsMod(self
.width
)
1059 c_out_z
= FPNumBase(self
.width
)
1060 cmod
.setup(m
, r_out_z
)
1061 m
.d
.comb
+= c_out_z
.copy(cmod
.out_z
)
1063 # Pack (chained to corrections)
1064 self
.pmod
= FPPackMod(self
.width
)
1065 self
.out_z
= FPNumBase(self
.width
)
1066 self
.pmod
.setup(m
, c_out_z
)
1069 if self
.in_mid
is not None:
1070 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
1072 def action(self
, m
):
1073 self
.idsync(m
) # copies incoming ID to outgoing
1074 m
.d
.sync
+= self
.out_z
.v
.eq(self
.pmod
.out_z
.v
) # outputs packed result
1075 m
.next
= "pack_put_z"
1080 def __init__(self
, width
):
1081 self
.in_roundz
= Signal(reset_less
=True)
1082 self
.in_z
= FPNumBase(width
, False)
1083 self
.out_z
= FPNumBase(width
, False)
1085 def setup(self
, m
, in_z
, roundz
):
1086 m
.submodules
.roundz
= self
1088 m
.d
.comb
+= self
.in_z
.copy(in_z
)
1089 m
.d
.comb
+= self
.in_roundz
.eq(roundz
)
1091 def elaborate(self
, platform
):
1093 m
.d
.comb
+= self
.out_z
.copy(self
.in_z
)
1094 with m
.If(self
.in_roundz
):
1095 m
.d
.comb
+= self
.out_z
.m
.eq(self
.in_z
.m
+ 1) # mantissa rounds up
1096 with m
.If(self
.in_z
.m
== self
.in_z
.m1s
): # all 1s
1097 m
.d
.comb
+= self
.out_z
.e
.eq(self
.in_z
.e
+ 1) # exponent up
1101 class FPRound(FPState
, FPID
):
1103 def __init__(self
, width
, id_wid
):
1104 FPState
.__init
__(self
, "round")
1105 FPID
.__init
__(self
, id_wid
)
1106 self
.mod
= FPRoundMod(width
)
1107 self
.out_z
= FPNumBase(width
)
1109 def setup(self
, m
, in_z
, roundz
, in_mid
):
1110 """ links module to inputs and outputs
1112 self
.mod
.setup(m
, in_z
, roundz
)
1114 if self
.in_mid
is not None:
1115 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
1117 def action(self
, m
):
1119 m
.d
.sync
+= self
.out_z
.copy(self
.mod
.out_z
)
1120 m
.next
= "corrections"
1123 class FPCorrectionsMod
:
1125 def __init__(self
, width
):
1126 self
.in_z
= FPNumOut(width
, False)
1127 self
.out_z
= FPNumOut(width
, False)
1129 def setup(self
, m
, in_z
):
1130 """ links module to inputs and outputs
1132 m
.submodules
.corrections
= self
1133 m
.d
.comb
+= self
.in_z
.copy(in_z
)
1135 def elaborate(self
, platform
):
1137 m
.submodules
.corr_in_z
= self
.in_z
1138 m
.submodules
.corr_out_z
= self
.out_z
1139 m
.d
.comb
+= self
.out_z
.copy(self
.in_z
)
1140 with m
.If(self
.in_z
.is_denormalised
):
1141 m
.d
.comb
+= self
.out_z
.e
.eq(self
.in_z
.N127
)
1145 class FPCorrections(FPState
, FPID
):
1147 def __init__(self
, width
, id_wid
):
1148 FPState
.__init
__(self
, "corrections")
1149 FPID
.__init
__(self
, id_wid
)
1150 self
.mod
= FPCorrectionsMod(width
)
1151 self
.out_z
= FPNumBase(width
)
1153 def setup(self
, m
, in_z
, in_mid
):
1154 """ links module to inputs and outputs
1156 self
.mod
.setup(m
, in_z
)
1157 if self
.in_mid
is not None:
1158 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
1160 def action(self
, m
):
1162 m
.d
.sync
+= self
.out_z
.copy(self
.mod
.out_z
)
1168 def __init__(self
, width
):
1169 self
.in_z
= FPNumOut(width
, False)
1170 self
.out_z
= FPNumOut(width
, False)
1172 def setup(self
, m
, in_z
):
1173 """ links module to inputs and outputs
1175 m
.submodules
.pack
= self
1176 m
.d
.comb
+= self
.in_z
.copy(in_z
)
1178 def elaborate(self
, platform
):
1180 m
.submodules
.pack_in_z
= self
.in_z
1181 with m
.If(self
.in_z
.is_overflowed
):
1182 m
.d
.comb
+= self
.out_z
.inf(self
.in_z
.s
)
1184 m
.d
.comb
+= self
.out_z
.create(self
.in_z
.s
, self
.in_z
.e
, self
.in_z
.m
)
1188 class FPPack(FPState
, FPID
):
1190 def __init__(self
, width
, id_wid
):
1191 FPState
.__init
__(self
, "pack")
1192 FPID
.__init
__(self
, id_wid
)
1193 self
.mod
= FPPackMod(width
)
1194 self
.out_z
= FPNumOut(width
, False)
1196 def setup(self
, m
, in_z
, in_mid
):
1197 """ links module to inputs and outputs
1199 self
.mod
.setup(m
, in_z
)
1200 if self
.in_mid
is not None:
1201 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
1203 def action(self
, m
):
1205 m
.d
.sync
+= self
.out_z
.v
.eq(self
.mod
.out_z
.v
)
1206 m
.next
= "pack_put_z"
1209 class FPPutZ(FPState
):
1211 def __init__(self
, state
, in_z
, out_z
, in_mid
, out_mid
, to_state
=None):
1212 FPState
.__init
__(self
, state
)
1213 if to_state
is None:
1214 to_state
= "get_ops"
1215 self
.to_state
= to_state
1218 self
.in_mid
= in_mid
1219 self
.out_mid
= out_mid
1221 def action(self
, m
):
1222 if self
.in_mid
is not None:
1223 m
.d
.sync
+= self
.out_mid
.eq(self
.in_mid
)
1225 self
.out_z
.v
.eq(self
.in_z
.v
)
1227 with m
.If(self
.out_z
.stb
& self
.out_z
.ack
):
1228 m
.d
.sync
+= self
.out_z
.stb
.eq(0)
1229 m
.next
= self
.to_state
1231 m
.d
.sync
+= self
.out_z
.stb
.eq(1)
1234 class FPPutZIdx(FPState
):
1236 def __init__(self
, state
, in_z
, out_zs
, in_mid
, to_state
=None):
1237 FPState
.__init
__(self
, state
)
1238 if to_state
is None:
1239 to_state
= "get_ops"
1240 self
.to_state
= to_state
1242 self
.out_zs
= out_zs
1243 self
.in_mid
= in_mid
1245 def action(self
, m
):
1246 outz_stb
= Signal(reset_less
=True)
1247 outz_ack
= Signal(reset_less
=True)
1248 m
.d
.comb
+= [outz_stb
.eq(self
.out_zs
[self
.in_mid
].stb
),
1249 outz_ack
.eq(self
.out_zs
[self
.in_mid
].ack
),
1252 self
.out_zs
[self
.in_mid
].v
.eq(self
.in_z
.v
)
1254 with m
.If(outz_stb
& outz_ack
):
1255 m
.d
.sync
+= self
.out_zs
[self
.in_mid
].stb
.eq(0)
1256 m
.next
= self
.to_state
1258 m
.d
.sync
+= self
.out_zs
[self
.in_mid
].stb
.eq(1)
1261 class FPADDBaseMod(FPID
):
1263 def __init__(self
, width
, id_wid
=None, single_cycle
=False, compact
=True):
1266 * width: bit-width of IEEE754. supported: 16, 32, 64
1267 * id_wid: an identifier that is sync-connected to the input
1268 * single_cycle: True indicates each stage to complete in 1 clock
1269 * compact: True indicates a reduced number of stages
1271 FPID
.__init
__(self
, id_wid
)
1273 self
.single_cycle
= single_cycle
1274 self
.compact
= compact
1276 self
.in_t
= Trigger()
1277 self
.in_a
= Signal(width
)
1278 self
.in_b
= Signal(width
)
1279 self
.out_z
= FPOp(width
)
1283 def add_state(self
, state
):
1284 self
.states
.append(state
)
1287 def get_fragment(self
, platform
=None):
1288 """ creates the HDL code-fragment for FPAdd
1291 m
.submodules
.out_z
= self
.out_z
1292 m
.submodules
.in_t
= self
.in_t
1294 self
.get_compact_fragment(m
, platform
)
1296 self
.get_longer_fragment(m
, platform
)
1298 with m
.FSM() as fsm
:
1300 for state
in self
.states
:
1301 with m
.State(state
.state_from
):
1306 def get_longer_fragment(self
, m
, platform
=None):
1308 get
= self
.add_state(FPGet2Op("get_ops", "special_cases",
1309 self
.in_a
, self
.in_b
, self
.width
))
1310 get
.setup(m
, self
.in_a
, self
.in_b
, self
.in_t
.stb
, self
.in_t
.ack
)
1314 sc
= self
.add_state(FPAddSpecialCases(self
.width
, self
.id_wid
))
1315 sc
.setup(m
, a
, b
, self
.in_mid
)
1317 dn
= self
.add_state(FPAddDeNorm(self
.width
, self
.id_wid
))
1318 dn
.setup(m
, a
, b
, sc
.in_mid
)
1320 if self
.single_cycle
:
1321 alm
= self
.add_state(FPAddAlignSingle(self
.width
, self
.id_wid
))
1322 alm
.setup(m
, dn
.out_a
, dn
.out_b
, dn
.in_mid
)
1324 alm
= self
.add_state(FPAddAlignMulti(self
.width
, self
.id_wid
))
1325 alm
.setup(m
, dn
.out_a
, dn
.out_b
, dn
.in_mid
)
1327 add0
= self
.add_state(FPAddStage0(self
.width
, self
.id_wid
))
1328 add0
.setup(m
, alm
.out_a
, alm
.out_b
, alm
.in_mid
)
1330 add1
= self
.add_state(FPAddStage1(self
.width
, self
.id_wid
))
1331 add1
.setup(m
, add0
.out_tot
, add0
.out_z
, add0
.in_mid
)
1333 if self
.single_cycle
:
1334 n1
= self
.add_state(FPNorm1Single(self
.width
, self
.id_wid
))
1335 n1
.setup(m
, add1
.out_z
, add1
.out_of
, add0
.in_mid
)
1337 n1
= self
.add_state(FPNorm1Multi(self
.width
, self
.id_wid
))
1338 n1
.setup(m
, add1
.out_z
, add1
.out_of
, add1
.norm_stb
, add0
.in_mid
)
1340 rn
= self
.add_state(FPRound(self
.width
, self
.id_wid
))
1341 rn
.setup(m
, n1
.out_z
, n1
.out_roundz
, n1
.in_mid
)
1343 cor
= self
.add_state(FPCorrections(self
.width
, self
.id_wid
))
1344 cor
.setup(m
, rn
.out_z
, rn
.in_mid
)
1346 pa
= self
.add_state(FPPack(self
.width
, self
.id_wid
))
1347 pa
.setup(m
, cor
.out_z
, rn
.in_mid
)
1349 ppz
= self
.add_state(FPPutZ("pack_put_z", pa
.out_z
, self
.out_z
,
1350 pa
.in_mid
, self
.out_mid
))
1352 pz
= self
.add_state(FPPutZ("put_z", sc
.out_z
, self
.out_z
,
1353 pa
.in_mid
, self
.out_mid
))
1355 def get_compact_fragment(self
, m
, platform
=None):
1357 get
= self
.add_state(FPGet2Op("get_ops", "special_cases",
1358 self
.in_a
, self
.in_b
, self
.width
))
1359 get
.setup(m
, self
.in_a
, self
.in_b
, self
.in_t
.stb
, self
.in_t
.ack
)
1363 sc
= self
.add_state(FPAddSpecialCasesDeNorm(self
.width
, self
.id_wid
))
1364 sc
.setup(m
, a
, b
, self
.in_mid
)
1366 alm
= self
.add_state(FPAddAlignSingleAdd(self
.width
, self
.id_wid
))
1367 alm
.setup(m
, sc
.out_a
, sc
.out_b
, sc
.in_mid
)
1369 n1
= self
.add_state(FPNormToPack(self
.width
, self
.id_wid
))
1370 n1
.setup(m
, alm
.out_z
, alm
.out_of
, alm
.in_mid
)
1372 ppz
= self
.add_state(FPPutZ("pack_put_z", n1
.out_z
, self
.out_z
,
1373 n1
.in_mid
, self
.out_mid
))
1375 pz
= self
.add_state(FPPutZ("put_z", sc
.out_z
, self
.out_z
,
1376 sc
.in_mid
, self
.out_mid
))
1379 class FPADDBase(FPState
, FPID
):
1381 def __init__(self
, width
, id_wid
=None, single_cycle
=False):
1384 * width: bit-width of IEEE754. supported: 16, 32, 64
1385 * id_wid: an identifier that is sync-connected to the input
1386 * single_cycle: True indicates each stage to complete in 1 clock
1388 FPID
.__init
__(self
, id_wid
)
1389 FPState
.__init
__(self
, "fpadd")
1391 self
.single_cycle
= single_cycle
1392 self
.mod
= FPADDBaseMod(width
, id_wid
, single_cycle
)
1394 self
.in_t
= Trigger()
1395 self
.in_a
= Signal(width
)
1396 self
.in_b
= Signal(width
)
1397 #self.out_z = FPOp(width)
1399 self
.z_done
= Signal(reset_less
=True) # connects to out_z Strobe
1400 self
.in_accept
= Signal(reset_less
=True)
1401 self
.add_stb
= Signal(reset_less
=True)
1402 self
.add_ack
= Signal(reset
=0, reset_less
=True)
1404 def setup(self
, m
, a
, b
, add_stb
, in_mid
, out_z
, out_mid
):
1406 self
.out_mid
= out_mid
1407 m
.d
.comb
+= [self
.in_a
.eq(a
),
1409 self
.mod
.in_a
.eq(self
.in_a
),
1410 self
.mod
.in_b
.eq(self
.in_b
),
1411 self
.in_mid
.eq(in_mid
),
1412 self
.mod
.in_mid
.eq(self
.in_mid
),
1413 self
.z_done
.eq(self
.mod
.out_z
.trigger
),
1414 #self.add_stb.eq(add_stb),
1415 self
.mod
.in_t
.stb
.eq(self
.in_t
.stb
),
1416 self
.in_t
.ack
.eq(self
.mod
.in_t
.ack
),
1417 self
.out_mid
.eq(self
.mod
.out_mid
),
1418 self
.out_z
.v
.eq(self
.mod
.out_z
.v
),
1419 self
.out_z
.stb
.eq(self
.mod
.out_z
.stb
),
1420 self
.mod
.out_z
.ack
.eq(self
.out_z
.ack
),
1423 m
.d
.sync
+= self
.add_stb
.eq(add_stb
)
1424 m
.d
.sync
+= self
.add_ack
.eq(0) # sets to zero when not in active state
1425 m
.d
.sync
+= self
.out_z
.ack
.eq(0) # likewise
1426 #m.d.sync += self.in_t.stb.eq(0)
1428 m
.submodules
.fpadd
= self
.mod
1430 def action(self
, m
):
1432 # in_accept is set on incoming strobe HIGH and ack LOW.
1433 m
.d
.comb
+= self
.in_accept
.eq((~self
.add_ack
) & (self
.add_stb
))
1435 #with m.If(self.in_t.ack):
1436 # m.d.sync += self.in_t.stb.eq(0)
1437 with m
.If(~self
.z_done
):
1438 # not done: test for accepting an incoming operand pair
1439 with m
.If(self
.in_accept
):
1441 self
.add_ack
.eq(1), # acknowledge receipt...
1442 self
.in_t
.stb
.eq(1), # initiate add
1445 m
.d
.sync
+= [self
.add_ack
.eq(0),
1446 self
.in_t
.stb
.eq(0),
1447 self
.out_z
.ack
.eq(1),
1450 # done: acknowledge, and write out id and value
1451 m
.d
.sync
+= [self
.add_ack
.eq(1),
1458 if self
.in_mid
is not None:
1459 m
.d
.sync
+= self
.out_mid
.eq(self
.mod
.out_mid
)
1462 self
.out_z
.v
.eq(self
.mod
.out_z
.v
)
1464 # move to output state on detecting z ack
1465 with m
.If(self
.out_z
.trigger
):
1466 m
.d
.sync
+= self
.out_z
.stb
.eq(0)
1469 m
.d
.sync
+= self
.out_z
.stb
.eq(1)
1472 def __init__(self
, width
, id_wid
):
1474 self
.id_wid
= id_wid
1476 for i
in range(rs_sz
):
1478 out_z
.name
= "out_z_%d" % i
1480 self
.res
= Array(res
)
1481 self
.in_z
= FPOp(width
)
1482 self
.in_mid
= Signal(self
.id_wid
, reset_less
=True)
1484 def setup(self
, m
, in_z
, in_mid
):
1485 m
.d
.comb
+= [self
.in_z
.copy(in_z
),
1486 self
.in_mid
.eq(in_mid
)]
1488 def get_fragment(self
, platform
=None):
1489 """ creates the HDL code-fragment for FPAdd
1492 m
.submodules
.res_in_z
= self
.in_z
1493 m
.submodules
+= self
.res
1505 """ FPADD: stages as follows:
1511 FPAddBase---> FPAddBaseMod
1513 PutZ GetOps->Specials->Align->Add1/2->Norm->Round/Pack->PutZ
1515 FPAddBase is tricky: it is both a stage and *has* stages.
1516 Connection to FPAddBaseMod therefore requires an in stb/ack
1517 and an out stb/ack. Just as with Add1-Norm1 interaction, FPGetOp
1518 needs to be the thing that raises the incoming stb.
1521 def __init__(self
, width
, id_wid
=None, single_cycle
=False, rs_sz
=2):
1524 * width: bit-width of IEEE754. supported: 16, 32, 64
1525 * id_wid: an identifier that is sync-connected to the input
1526 * single_cycle: True indicates each stage to complete in 1 clock
1529 self
.id_wid
= id_wid
1530 self
.single_cycle
= single_cycle
1532 #self.out_z = FPOp(width)
1533 self
.ids
= FPID(id_wid
)
1536 for i
in range(rs_sz
):
1539 in_a
.name
= "in_a_%d" % i
1540 in_b
.name
= "in_b_%d" % i
1541 rs
.append((in_a
, in_b
))
1545 for i
in range(rs_sz
):
1547 out_z
.name
= "out_z_%d" % i
1549 self
.res
= Array(res
)
1553 def add_state(self
, state
):
1554 self
.states
.append(state
)
1557 def get_fragment(self
, platform
=None):
1558 """ creates the HDL code-fragment for FPAdd
1561 m
.submodules
+= self
.rs
1563 in_a
= self
.rs
[0][0]
1564 in_b
= self
.rs
[0][1]
1566 out_z
= FPOp(self
.width
)
1567 out_mid
= Signal(self
.id_wid
, reset_less
=True)
1568 m
.submodules
.out_z
= out_z
1570 geta
= self
.add_state(FPGetOp("get_a", "get_b",
1575 getb
= self
.add_state(FPGetOp("get_b", "fpadd",
1580 ab
= FPADDBase(self
.width
, self
.id_wid
, self
.single_cycle
)
1581 ab
= self
.add_state(ab
)
1582 ab
.setup(m
, a
, b
, getb
.out_decode
, self
.ids
.in_mid
,
1585 pz
= self
.add_state(FPPutZIdx("put_z", ab
.out_z
, self
.res
,
1588 with m
.FSM() as fsm
:
1590 for state
in self
.states
:
1591 with m
.State(state
.state_from
):
1597 if __name__
== "__main__":
1599 alu
= FPADD(width
=32, id_wid
=5, single_cycle
=True)
1600 main(alu
, ports
=alu
.rs
[0][0].ports() + \
1601 alu
.rs
[0][1].ports() + \
1602 alu
.res
[0].ports() + \
1603 [alu
.ids
.in_mid
, alu
.ids
.out_mid
])
1605 alu
= FPADDBase(width
=32, id_wid
=5, single_cycle
=True)
1606 main(alu
, ports
=[alu
.in_a
, alu
.in_b
] + \
1607 alu
.in_t
.ports() + \
1608 alu
.out_z
.ports() + \
1609 [alu
.in_mid
, alu
.out_mid
])
1612 # works... but don't use, just do "python fname.py convert -t v"
1613 #print (verilog.convert(alu, ports=[
1614 # ports=alu.in_a.ports() + \
1615 # alu.in_b.ports() + \
1616 # alu.out_z.ports())