7646cc4ea7feda225a3803d41b5998b5aa82e0b1
1 # IEEE Floating Point Adder (Single Precision)
2 # Copyright (C) Jonathan P Dawson 2013
5 from nmigen
import Module
, Signal
, Cat
, Mux
, Array
, Const
6 from nmigen
.lib
.coding
import PriorityEncoder
7 from nmigen
.cli
import main
, verilog
10 from fpbase
import FPNumIn
, FPNumOut
, FPOp
, Overflow
, FPBase
, FPNumBase
11 from fpbase
import MultiShiftRMerge
, Trigger
12 from singlepipe
import (ControlBase
, StageChain
, UnbufferedPipeline
,
14 from multipipe
import CombMuxOutPipe
15 from multipipe
import PriorityCombMuxInPipe
17 #from fpbase import FPNumShiftMultiRight
20 class FPState(FPBase
):
21 def __init__(self
, state_from
):
22 self
.state_from
= state_from
24 def set_inputs(self
, inputs
):
26 for k
,v
in inputs
.items():
29 def set_outputs(self
, outputs
):
30 self
.outputs
= outputs
31 for k
,v
in outputs
.items():
36 def __init__(self
, width
):
37 self
.in_op
= FPOp(width
)
38 self
.out_op
= Signal(width
)
39 self
.out_decode
= Signal(reset_less
=True)
41 def elaborate(self
, platform
):
43 m
.d
.comb
+= self
.out_decode
.eq((self
.in_op
.ack
) & (self
.in_op
.stb
))
44 m
.submodules
.get_op_in
= self
.in_op
45 #m.submodules.get_op_out = self.out_op
46 with m
.If(self
.out_decode
):
48 self
.out_op
.eq(self
.in_op
.v
),
53 class FPGetOp(FPState
):
57 def __init__(self
, in_state
, out_state
, in_op
, width
):
58 FPState
.__init
__(self
, in_state
)
59 self
.out_state
= out_state
60 self
.mod
= FPGetOpMod(width
)
62 self
.out_op
= Signal(width
)
63 self
.out_decode
= Signal(reset_less
=True)
65 def setup(self
, m
, in_op
):
66 """ links module to inputs and outputs
68 setattr(m
.submodules
, self
.state_from
, self
.mod
)
69 m
.d
.comb
+= self
.mod
.in_op
.eq(in_op
)
70 m
.d
.comb
+= self
.out_decode
.eq(self
.mod
.out_decode
)
73 with m
.If(self
.out_decode
):
74 m
.next
= self
.out_state
77 self
.out_op
.eq(self
.mod
.out_op
)
80 m
.d
.sync
+= self
.in_op
.ack
.eq(1)
85 def __init__(self
, width
, id_wid
, m_extra
=True):
86 self
.a
= FPNumBase(width
, m_extra
)
87 self
.b
= FPNumBase(width
, m_extra
)
88 self
.mid
= Signal(id_wid
, reset_less
=True)
91 return [self
.a
.eq(i
.a
), self
.b
.eq(i
.b
), self
.mid
.eq(i
.mid
)]
94 return [self
.a
, self
.b
, self
.mid
]
99 def __init__(self
, width
, id_wid
):
102 self
.a
= Signal(width
)
103 self
.b
= Signal(width
)
104 self
.mid
= Signal(id_wid
, reset_less
=True)
107 return [self
.a
.eq(i
.a
), self
.b
.eq(i
.b
), self
.mid
.eq(i
.mid
)]
110 return [self
.a
, self
.b
, self
.mid
]
113 class FPGet2OpMod(Trigger
):
114 def __init__(self
, width
, id_wid
):
115 Trigger
.__init
__(self
)
118 self
.i
= self
.ispec()
119 self
.o
= self
.ospec()
122 return FPADDBaseData(self
.width
, self
.id_wid
)
125 return FPADDBaseData(self
.width
, self
.id_wid
)
127 def process(self
, i
):
130 def elaborate(self
, platform
):
131 m
= Trigger
.elaborate(self
, platform
)
132 with m
.If(self
.trigger
):
139 class FPGet2Op(FPState
):
143 def __init__(self
, in_state
, out_state
, width
, id_wid
):
144 FPState
.__init
__(self
, in_state
)
145 self
.out_state
= out_state
146 self
.mod
= FPGet2OpMod(width
, id_wid
)
147 self
.o
= self
.mod
.ospec()
148 self
.in_stb
= Signal(reset_less
=True)
149 self
.out_ack
= Signal(reset_less
=True)
150 self
.out_decode
= Signal(reset_less
=True)
152 def setup(self
, m
, i
, in_stb
, in_ack
):
153 """ links module to inputs and outputs
155 m
.submodules
.get_ops
= self
.mod
156 m
.d
.comb
+= self
.mod
.i
.eq(i
)
157 m
.d
.comb
+= self
.mod
.stb
.eq(in_stb
)
158 m
.d
.comb
+= self
.out_ack
.eq(self
.mod
.ack
)
159 m
.d
.comb
+= self
.out_decode
.eq(self
.mod
.trigger
)
160 m
.d
.comb
+= in_ack
.eq(self
.mod
.ack
)
163 with m
.If(self
.out_decode
):
164 m
.next
= self
.out_state
167 self
.o
.eq(self
.mod
.o
),
170 m
.d
.sync
+= self
.mod
.ack
.eq(1)
175 def __init__(self
, width
, id_wid
):
176 self
.a
= FPNumBase(width
, True)
177 self
.b
= FPNumBase(width
, True)
178 self
.z
= FPNumOut(width
, False)
179 self
.oz
= Signal(width
, reset_less
=True)
180 self
.out_do_z
= Signal(reset_less
=True)
181 self
.mid
= Signal(id_wid
, reset_less
=True)
184 return [self
.z
.eq(i
.z
), self
.out_do_z
.eq(i
.out_do_z
), self
.oz
.eq(i
.oz
),
185 self
.a
.eq(i
.a
), self
.b
.eq(i
.b
), self
.mid
.eq(i
.mid
)]
188 class FPAddSpecialCasesMod
:
189 """ special cases: NaNs, infs, zeros, denormalised
190 NOTE: some of these are unique to add. see "Special Operations"
191 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
194 def __init__(self
, width
, id_wid
):
197 self
.i
= self
.ispec()
198 self
.o
= self
.ospec()
201 return FPADDBaseData(self
.width
, self
.id_wid
)
204 return FPSCData(self
.width
, self
.id_wid
)
206 def setup(self
, m
, i
):
207 """ links module to inputs and outputs
209 m
.submodules
.specialcases
= self
210 m
.d
.comb
+= self
.i
.eq(i
)
212 def process(self
, i
):
215 def elaborate(self
, platform
):
218 m
.submodules
.sc_out_z
= self
.o
.z
220 # decode: XXX really should move to separate stage
221 a1
= FPNumIn(None, self
.width
)
222 b1
= FPNumIn(None, self
.width
)
223 m
.submodules
.sc_decode_a
= a1
224 m
.submodules
.sc_decode_b
= b1
225 m
.d
.comb
+= [a1
.decode(self
.i
.a
),
230 m
.d
.comb
+= s_nomatch
.eq(a1
.s
!= b1
.s
)
233 m
.d
.comb
+= m_match
.eq(a1
.m
== b1
.m
)
235 # if a is NaN or b is NaN return NaN
236 with m
.If(a1
.is_nan | b1
.is_nan
):
237 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
238 m
.d
.comb
+= self
.o
.z
.nan(0)
240 # XXX WEIRDNESS for FP16 non-canonical NaN handling
243 ## if a is zero and b is NaN return -b
244 #with m.If(a.is_zero & (a.s==0) & b.is_nan):
245 # m.d.comb += self.o.out_do_z.eq(1)
246 # m.d.comb += z.create(b.s, b.e, Cat(b.m[3:-2], ~b.m[0]))
248 ## if b is zero and a is NaN return -a
249 #with m.Elif(b.is_zero & (b.s==0) & a.is_nan):
250 # m.d.comb += self.o.out_do_z.eq(1)
251 # m.d.comb += z.create(a.s, a.e, Cat(a.m[3:-2], ~a.m[0]))
253 ## if a is -zero and b is NaN return -b
254 #with m.Elif(a.is_zero & (a.s==1) & b.is_nan):
255 # m.d.comb += self.o.out_do_z.eq(1)
256 # m.d.comb += z.create(a.s & b.s, b.e, Cat(b.m[3:-2], 1))
258 ## if b is -zero and a is NaN return -a
259 #with m.Elif(b.is_zero & (b.s==1) & a.is_nan):
260 # m.d.comb += self.o.out_do_z.eq(1)
261 # m.d.comb += z.create(a.s & b.s, a.e, Cat(a.m[3:-2], 1))
263 # if a is inf return inf (or NaN)
264 with m
.Elif(a1
.is_inf
):
265 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
266 m
.d
.comb
+= self
.o
.z
.inf(a1
.s
)
267 # if a is inf and signs don't match return NaN
268 with m
.If(b1
.exp_128
& s_nomatch
):
269 m
.d
.comb
+= self
.o
.z
.nan(0)
271 # if b is inf return inf
272 with m
.Elif(b1
.is_inf
):
273 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
274 m
.d
.comb
+= self
.o
.z
.inf(b1
.s
)
276 # if a is zero and b zero return signed-a/b
277 with m
.Elif(a1
.is_zero
& b1
.is_zero
):
278 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
279 m
.d
.comb
+= self
.o
.z
.create(a1
.s
& b1
.s
, b1
.e
, b1
.m
[3:-1])
281 # if a is zero return b
282 with m
.Elif(a1
.is_zero
):
283 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
284 m
.d
.comb
+= self
.o
.z
.create(b1
.s
, b1
.e
, b1
.m
[3:-1])
286 # if b is zero return a
287 with m
.Elif(b1
.is_zero
):
288 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
289 m
.d
.comb
+= self
.o
.z
.create(a1
.s
, a1
.e
, a1
.m
[3:-1])
291 # if a equal to -b return zero (+ve zero)
292 with m
.Elif(s_nomatch
& m_match
& (a1
.e
== b1
.e
)):
293 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
294 m
.d
.comb
+= self
.o
.z
.zero(0)
296 # Denormalised Number checks next, so pass a/b data through
298 m
.d
.comb
+= self
.o
.out_do_z
.eq(0)
299 m
.d
.comb
+= self
.o
.a
.eq(a1
)
300 m
.d
.comb
+= self
.o
.b
.eq(b1
)
302 m
.d
.comb
+= self
.o
.oz
.eq(self
.o
.z
.v
)
303 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
309 def __init__(self
, id_wid
):
312 self
.in_mid
= Signal(id_wid
, reset_less
=True)
313 self
.out_mid
= Signal(id_wid
, reset_less
=True)
319 if self
.id_wid
is not None:
320 m
.d
.sync
+= self
.out_mid
.eq(self
.in_mid
)
323 class FPAddSpecialCases(FPState
):
324 """ special cases: NaNs, infs, zeros, denormalised
325 NOTE: some of these are unique to add. see "Special Operations"
326 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
329 def __init__(self
, width
, id_wid
):
330 FPState
.__init
__(self
, "special_cases")
331 self
.mod
= FPAddSpecialCasesMod(width
)
332 self
.out_z
= self
.mod
.ospec()
333 self
.out_do_z
= Signal(reset_less
=True)
335 def setup(self
, m
, i
):
336 """ links module to inputs and outputs
338 self
.mod
.setup(m
, i
, self
.out_do_z
)
339 m
.d
.sync
+= self
.out_z
.v
.eq(self
.mod
.out_z
.v
) # only take the output
340 m
.d
.sync
+= self
.out_z
.mid
.eq(self
.mod
.o
.mid
) # (and mid)
344 with m
.If(self
.out_do_z
):
347 m
.next
= "denormalise"
350 class FPAddSpecialCasesDeNorm(FPState
, UnbufferedPipeline
):
351 """ special cases: NaNs, infs, zeros, denormalised
352 NOTE: some of these are unique to add. see "Special Operations"
353 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
356 def __init__(self
, width
, id_wid
):
357 FPState
.__init
__(self
, "special_cases")
358 self
.smod
= FPAddSpecialCasesMod(width
, id_wid
)
359 self
.dmod
= FPAddDeNormMod(width
, id_wid
)
360 UnbufferedPipeline
.__init
__(self
, self
) # pipe is its own stage
361 self
.o
= self
.ospec()
364 return self
.smod
.ispec()
367 return self
.dmod
.ospec()
369 def setup(self
, m
, i
):
370 """ links module to inputs and outputs
372 # these only needed for break-out (early-out)
373 # out_z = self.smod.ospec()
374 # out_do_z = Signal(reset_less=True)
375 self
.smod
.setup(m
, i
)
376 self
.dmod
.setup(m
, self
.smod
.o
)
377 #m.d.comb += out_do_z.eq(self.smod.o.out_do_z)
379 # out_do_z=True, only needed for early-out (split pipeline)
380 #m.d.sync += out_z.z.v.eq(self.smod.o.z.v) # only take output
381 #m.d.sync += out_z.mid.eq(self.smod.o.mid) # (and mid)
384 # XXX TODO: sync for state-based
385 m
.d
.comb
+= self
.o
.eq(self
.dmod
.o
)
387 def process(self
, i
):
391 #with m.If(self.out_do_z):
397 class FPAddDeNormMod(FPState
):
399 def __init__(self
, width
, id_wid
):
402 self
.i
= self
.ispec()
403 self
.o
= self
.ospec()
406 return FPSCData(self
.width
, self
.id_wid
)
409 return FPSCData(self
.width
, self
.id_wid
)
411 def setup(self
, m
, i
):
412 """ links module to inputs and outputs
414 m
.submodules
.denormalise
= self
415 m
.d
.comb
+= self
.i
.eq(i
)
417 def elaborate(self
, platform
):
419 m
.submodules
.denorm_in_a
= self
.i
.a
420 m
.submodules
.denorm_in_b
= self
.i
.b
421 m
.submodules
.denorm_out_a
= self
.o
.a
422 m
.submodules
.denorm_out_b
= self
.o
.b
424 with m
.If(~self
.i
.out_do_z
):
425 # XXX hmmm, don't like repeating identical code
426 m
.d
.comb
+= self
.o
.a
.eq(self
.i
.a
)
427 with m
.If(self
.i
.a
.exp_n127
):
428 m
.d
.comb
+= self
.o
.a
.e
.eq(self
.i
.a
.N126
) # limit a exponent
430 m
.d
.comb
+= self
.o
.a
.m
[-1].eq(1) # set top mantissa bit
432 m
.d
.comb
+= self
.o
.b
.eq(self
.i
.b
)
433 with m
.If(self
.i
.b
.exp_n127
):
434 m
.d
.comb
+= self
.o
.b
.e
.eq(self
.i
.b
.N126
) # limit a exponent
436 m
.d
.comb
+= self
.o
.b
.m
[-1].eq(1) # set top mantissa bit
438 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
439 m
.d
.comb
+= self
.o
.z
.eq(self
.i
.z
)
440 m
.d
.comb
+= self
.o
.out_do_z
.eq(self
.i
.out_do_z
)
441 m
.d
.comb
+= self
.o
.oz
.eq(self
.i
.oz
)
446 class FPAddDeNorm(FPState
):
448 def __init__(self
, width
, id_wid
):
449 FPState
.__init
__(self
, "denormalise")
450 self
.mod
= FPAddDeNormMod(width
)
451 self
.out_a
= FPNumBase(width
)
452 self
.out_b
= FPNumBase(width
)
454 def setup(self
, m
, i
):
455 """ links module to inputs and outputs
459 m
.d
.sync
+= self
.out_a
.eq(self
.mod
.out_a
)
460 m
.d
.sync
+= self
.out_b
.eq(self
.mod
.out_b
)
463 # Denormalised Number checks
467 class FPAddAlignMultiMod(FPState
):
469 def __init__(self
, width
):
470 self
.in_a
= FPNumBase(width
)
471 self
.in_b
= FPNumBase(width
)
472 self
.out_a
= FPNumIn(None, width
)
473 self
.out_b
= FPNumIn(None, width
)
474 self
.exp_eq
= Signal(reset_less
=True)
476 def elaborate(self
, platform
):
477 # This one however (single-cycle) will do the shift
482 m
.submodules
.align_in_a
= self
.in_a
483 m
.submodules
.align_in_b
= self
.in_b
484 m
.submodules
.align_out_a
= self
.out_a
485 m
.submodules
.align_out_b
= self
.out_b
487 # NOTE: this does *not* do single-cycle multi-shifting,
488 # it *STAYS* in the align state until exponents match
490 # exponent of a greater than b: shift b down
491 m
.d
.comb
+= self
.exp_eq
.eq(0)
492 m
.d
.comb
+= self
.out_a
.eq(self
.in_a
)
493 m
.d
.comb
+= self
.out_b
.eq(self
.in_b
)
494 agtb
= Signal(reset_less
=True)
495 altb
= Signal(reset_less
=True)
496 m
.d
.comb
+= agtb
.eq(self
.in_a
.e
> self
.in_b
.e
)
497 m
.d
.comb
+= altb
.eq(self
.in_a
.e
< self
.in_b
.e
)
499 m
.d
.comb
+= self
.out_b
.shift_down(self
.in_b
)
500 # exponent of b greater than a: shift a down
502 m
.d
.comb
+= self
.out_a
.shift_down(self
.in_a
)
503 # exponents equal: move to next stage.
505 m
.d
.comb
+= self
.exp_eq
.eq(1)
509 class FPAddAlignMulti(FPState
):
511 def __init__(self
, width
, id_wid
):
512 FPState
.__init
__(self
, "align")
513 self
.mod
= FPAddAlignMultiMod(width
)
514 self
.out_a
= FPNumIn(None, width
)
515 self
.out_b
= FPNumIn(None, width
)
516 self
.exp_eq
= Signal(reset_less
=True)
518 def setup(self
, m
, in_a
, in_b
):
519 """ links module to inputs and outputs
521 m
.submodules
.align
= self
.mod
522 m
.d
.comb
+= self
.mod
.in_a
.eq(in_a
)
523 m
.d
.comb
+= self
.mod
.in_b
.eq(in_b
)
524 m
.d
.comb
+= self
.exp_eq
.eq(self
.mod
.exp_eq
)
525 m
.d
.sync
+= self
.out_a
.eq(self
.mod
.out_a
)
526 m
.d
.sync
+= self
.out_b
.eq(self
.mod
.out_b
)
529 with m
.If(self
.exp_eq
):
535 def __init__(self
, width
, id_wid
):
536 self
.a
= FPNumIn(None, width
)
537 self
.b
= FPNumIn(None, width
)
538 self
.z
= FPNumOut(width
, False)
539 self
.out_do_z
= Signal(reset_less
=True)
540 self
.oz
= Signal(width
, reset_less
=True)
541 self
.mid
= Signal(id_wid
, reset_less
=True)
544 return [self
.z
.eq(i
.z
), self
.out_do_z
.eq(i
.out_do_z
), self
.oz
.eq(i
.oz
),
545 self
.a
.eq(i
.a
), self
.b
.eq(i
.b
), self
.mid
.eq(i
.mid
)]
548 class FPAddAlignSingleMod
:
550 def __init__(self
, width
, id_wid
):
553 self
.i
= self
.ispec()
554 self
.o
= self
.ospec()
557 return FPSCData(self
.width
, self
.id_wid
)
560 return FPNumIn2Ops(self
.width
, self
.id_wid
)
562 def process(self
, i
):
565 def setup(self
, m
, i
):
566 """ links module to inputs and outputs
568 m
.submodules
.align
= self
569 m
.d
.comb
+= self
.i
.eq(i
)
571 def elaborate(self
, platform
):
572 """ Aligns A against B or B against A, depending on which has the
573 greater exponent. This is done in a *single* cycle using
574 variable-width bit-shift
576 the shifter used here is quite expensive in terms of gates.
577 Mux A or B in (and out) into temporaries, as only one of them
578 needs to be aligned against the other
582 m
.submodules
.align_in_a
= self
.i
.a
583 m
.submodules
.align_in_b
= self
.i
.b
584 m
.submodules
.align_out_a
= self
.o
.a
585 m
.submodules
.align_out_b
= self
.o
.b
587 # temporary (muxed) input and output to be shifted
588 t_inp
= FPNumBase(self
.width
)
589 t_out
= FPNumIn(None, self
.width
)
590 espec
= (len(self
.i
.a
.e
), True)
591 msr
= MultiShiftRMerge(self
.i
.a
.m_width
, espec
)
592 m
.submodules
.align_t_in
= t_inp
593 m
.submodules
.align_t_out
= t_out
594 m
.submodules
.multishift_r
= msr
596 ediff
= Signal(espec
, reset_less
=True)
597 ediffr
= Signal(espec
, reset_less
=True)
598 tdiff
= Signal(espec
, reset_less
=True)
599 elz
= Signal(reset_less
=True)
600 egz
= Signal(reset_less
=True)
602 # connect multi-shifter to t_inp/out mantissa (and tdiff)
603 m
.d
.comb
+= msr
.inp
.eq(t_inp
.m
)
604 m
.d
.comb
+= msr
.diff
.eq(tdiff
)
605 m
.d
.comb
+= t_out
.m
.eq(msr
.m
)
606 m
.d
.comb
+= t_out
.e
.eq(t_inp
.e
+ tdiff
)
607 m
.d
.comb
+= t_out
.s
.eq(t_inp
.s
)
609 m
.d
.comb
+= ediff
.eq(self
.i
.a
.e
- self
.i
.b
.e
)
610 m
.d
.comb
+= ediffr
.eq(self
.i
.b
.e
- self
.i
.a
.e
)
611 m
.d
.comb
+= elz
.eq(self
.i
.a
.e
< self
.i
.b
.e
)
612 m
.d
.comb
+= egz
.eq(self
.i
.a
.e
> self
.i
.b
.e
)
614 # default: A-exp == B-exp, A and B untouched (fall through)
615 m
.d
.comb
+= self
.o
.a
.eq(self
.i
.a
)
616 m
.d
.comb
+= self
.o
.b
.eq(self
.i
.b
)
617 # only one shifter (muxed)
618 #m.d.comb += t_out.shift_down_multi(tdiff, t_inp)
619 # exponent of a greater than b: shift b down
620 with m
.If(~self
.i
.out_do_z
):
622 m
.d
.comb
+= [t_inp
.eq(self
.i
.b
),
625 self
.o
.b
.s
.eq(self
.i
.b
.s
), # whoops forgot sign
627 # exponent of b greater than a: shift a down
629 m
.d
.comb
+= [t_inp
.eq(self
.i
.a
),
632 self
.o
.a
.s
.eq(self
.i
.a
.s
), # whoops forgot sign
635 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
636 m
.d
.comb
+= self
.o
.z
.eq(self
.i
.z
)
637 m
.d
.comb
+= self
.o
.out_do_z
.eq(self
.i
.out_do_z
)
638 m
.d
.comb
+= self
.o
.oz
.eq(self
.i
.oz
)
643 class FPAddAlignSingle(FPState
):
645 def __init__(self
, width
, id_wid
):
646 FPState
.__init
__(self
, "align")
647 self
.mod
= FPAddAlignSingleMod(width
, id_wid
)
648 self
.out_a
= FPNumIn(None, width
)
649 self
.out_b
= FPNumIn(None, width
)
651 def setup(self
, m
, i
):
652 """ links module to inputs and outputs
656 # NOTE: could be done as comb
657 m
.d
.sync
+= self
.out_a
.eq(self
.mod
.out_a
)
658 m
.d
.sync
+= self
.out_b
.eq(self
.mod
.out_b
)
664 class FPAddAlignSingleAdd(FPState
, UnbufferedPipeline
):
666 def __init__(self
, width
, id_wid
):
667 FPState
.__init
__(self
, "align")
670 UnbufferedPipeline
.__init
__(self
, self
) # pipeline is its own stage
671 self
.a1o
= self
.ospec()
674 return FPSCData(self
.width
, self
.id_wid
)
677 return FPAddStage1Data(self
.width
, self
.id_wid
) # AddStage1 ospec
679 def setup(self
, m
, i
):
680 """ links module to inputs and outputs
683 # chain AddAlignSingle, AddStage0 and AddStage1
684 mod
= FPAddAlignSingleMod(self
.width
, self
.id_wid
)
685 a0mod
= FPAddStage0Mod(self
.width
, self
.id_wid
)
686 a1mod
= FPAddStage1Mod(self
.width
, self
.id_wid
)
688 chain
= StageChain([mod
, a0mod
, a1mod
])
693 def process(self
, i
):
697 m
.d
.sync
+= self
.a1o
.eq(self
.process(None))
698 m
.next
= "normalise_1"
701 class FPAddStage0Data
:
703 def __init__(self
, width
, id_wid
):
704 self
.z
= FPNumBase(width
, False)
705 self
.out_do_z
= Signal(reset_less
=True)
706 self
.oz
= Signal(width
, reset_less
=True)
707 self
.tot
= Signal(self
.z
.m_width
+ 4, reset_less
=True)
708 self
.mid
= Signal(id_wid
, reset_less
=True)
711 return [self
.z
.eq(i
.z
), self
.out_do_z
.eq(i
.out_do_z
), self
.oz
.eq(i
.oz
),
712 self
.tot
.eq(i
.tot
), self
.mid
.eq(i
.mid
)]
715 class FPAddStage0Mod
:
717 def __init__(self
, width
, id_wid
):
720 self
.i
= self
.ispec()
721 self
.o
= self
.ospec()
724 return FPSCData(self
.width
, self
.id_wid
)
727 return FPAddStage0Data(self
.width
, self
.id_wid
)
729 def process(self
, i
):
732 def setup(self
, m
, i
):
733 """ links module to inputs and outputs
735 m
.submodules
.add0
= self
736 m
.d
.comb
+= self
.i
.eq(i
)
738 def elaborate(self
, platform
):
740 m
.submodules
.add0_in_a
= self
.i
.a
741 m
.submodules
.add0_in_b
= self
.i
.b
742 m
.submodules
.add0_out_z
= self
.o
.z
744 # store intermediate tests (and zero-extended mantissas)
745 seq
= Signal(reset_less
=True)
746 mge
= Signal(reset_less
=True)
747 am0
= Signal(len(self
.i
.a
.m
)+1, reset_less
=True)
748 bm0
= Signal(len(self
.i
.b
.m
)+1, reset_less
=True)
749 m
.d
.comb
+= [seq
.eq(self
.i
.a
.s
== self
.i
.b
.s
),
750 mge
.eq(self
.i
.a
.m
>= self
.i
.b
.m
),
751 am0
.eq(Cat(self
.i
.a
.m
, 0)),
752 bm0
.eq(Cat(self
.i
.b
.m
, 0))
754 # same-sign (both negative or both positive) add mantissas
755 with m
.If(~self
.i
.out_do_z
):
756 m
.d
.comb
+= self
.o
.z
.e
.eq(self
.i
.a
.e
)
759 self
.o
.tot
.eq(am0
+ bm0
),
760 self
.o
.z
.s
.eq(self
.i
.a
.s
)
762 # a mantissa greater than b, use a
765 self
.o
.tot
.eq(am0
- bm0
),
766 self
.o
.z
.s
.eq(self
.i
.a
.s
)
768 # b mantissa greater than a, use b
771 self
.o
.tot
.eq(bm0
- am0
),
772 self
.o
.z
.s
.eq(self
.i
.b
.s
)
775 m
.d
.comb
+= self
.o
.oz
.eq(self
.i
.oz
)
776 m
.d
.comb
+= self
.o
.out_do_z
.eq(self
.i
.out_do_z
)
777 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
781 class FPAddStage0(FPState
):
782 """ First stage of add. covers same-sign (add) and subtract
783 special-casing when mantissas are greater or equal, to
784 give greatest accuracy.
787 def __init__(self
, width
, id_wid
):
788 FPState
.__init
__(self
, "add_0")
789 self
.mod
= FPAddStage0Mod(width
)
790 self
.o
= self
.mod
.ospec()
792 def setup(self
, m
, i
):
793 """ links module to inputs and outputs
797 # NOTE: these could be done as combinatorial (merge add0+add1)
798 m
.d
.sync
+= self
.o
.eq(self
.mod
.o
)
804 class FPAddStage1Data
:
806 def __init__(self
, width
, id_wid
):
807 self
.z
= FPNumBase(width
, False)
808 self
.out_do_z
= Signal(reset_less
=True)
809 self
.oz
= Signal(width
, reset_less
=True)
811 self
.mid
= Signal(id_wid
, reset_less
=True)
814 return [self
.z
.eq(i
.z
), self
.out_do_z
.eq(i
.out_do_z
), self
.oz
.eq(i
.oz
),
815 self
.of
.eq(i
.of
), self
.mid
.eq(i
.mid
)]
819 class FPAddStage1Mod(FPState
):
820 """ Second stage of add: preparation for normalisation.
821 detects when tot sum is too big (tot[27] is kinda a carry bit)
824 def __init__(self
, width
, id_wid
):
827 self
.i
= self
.ispec()
828 self
.o
= self
.ospec()
831 return FPAddStage0Data(self
.width
, self
.id_wid
)
834 return FPAddStage1Data(self
.width
, self
.id_wid
)
836 def process(self
, i
):
839 def setup(self
, m
, i
):
840 """ links module to inputs and outputs
842 m
.submodules
.add1
= self
843 m
.submodules
.add1_out_overflow
= self
.o
.of
845 m
.d
.comb
+= self
.i
.eq(i
)
847 def elaborate(self
, platform
):
849 m
.d
.comb
+= self
.o
.z
.eq(self
.i
.z
)
850 # tot[-1] (MSB) gets set when the sum overflows. shift result down
851 with m
.If(~self
.i
.out_do_z
):
852 with m
.If(self
.i
.tot
[-1]):
854 self
.o
.z
.m
.eq(self
.i
.tot
[4:]),
855 self
.o
.of
.m0
.eq(self
.i
.tot
[4]),
856 self
.o
.of
.guard
.eq(self
.i
.tot
[3]),
857 self
.o
.of
.round_bit
.eq(self
.i
.tot
[2]),
858 self
.o
.of
.sticky
.eq(self
.i
.tot
[1] | self
.i
.tot
[0]),
859 self
.o
.z
.e
.eq(self
.i
.z
.e
+ 1)
861 # tot[-1] (MSB) zero case
864 self
.o
.z
.m
.eq(self
.i
.tot
[3:]),
865 self
.o
.of
.m0
.eq(self
.i
.tot
[3]),
866 self
.o
.of
.guard
.eq(self
.i
.tot
[2]),
867 self
.o
.of
.round_bit
.eq(self
.i
.tot
[1]),
868 self
.o
.of
.sticky
.eq(self
.i
.tot
[0])
871 m
.d
.comb
+= self
.o
.out_do_z
.eq(self
.i
.out_do_z
)
872 m
.d
.comb
+= self
.o
.oz
.eq(self
.i
.oz
)
873 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
878 class FPAddStage1(FPState
):
880 def __init__(self
, width
, id_wid
):
881 FPState
.__init
__(self
, "add_1")
882 self
.mod
= FPAddStage1Mod(width
)
883 self
.out_z
= FPNumBase(width
, False)
884 self
.out_of
= Overflow()
885 self
.norm_stb
= Signal()
887 def setup(self
, m
, i
):
888 """ links module to inputs and outputs
892 m
.d
.sync
+= self
.norm_stb
.eq(0) # sets to zero when not in add1 state
894 m
.d
.sync
+= self
.out_of
.eq(self
.mod
.out_of
)
895 m
.d
.sync
+= self
.out_z
.eq(self
.mod
.out_z
)
896 m
.d
.sync
+= self
.norm_stb
.eq(1)
899 m
.next
= "normalise_1"
902 class FPNormaliseModSingle
:
904 def __init__(self
, width
):
906 self
.in_z
= self
.ispec()
907 self
.out_z
= self
.ospec()
910 return FPNumBase(self
.width
, False)
913 return FPNumBase(self
.width
, False)
915 def setup(self
, m
, i
):
916 """ links module to inputs and outputs
918 m
.submodules
.normalise
= self
919 m
.d
.comb
+= self
.i
.eq(i
)
921 def elaborate(self
, platform
):
924 mwid
= self
.out_z
.m_width
+2
925 pe
= PriorityEncoder(mwid
)
926 m
.submodules
.norm_pe
= pe
928 m
.submodules
.norm1_out_z
= self
.out_z
929 m
.submodules
.norm1_in_z
= self
.in_z
931 in_z
= FPNumBase(self
.width
, False)
933 m
.submodules
.norm1_insel_z
= in_z
934 m
.submodules
.norm1_insel_overflow
= in_of
936 espec
= (len(in_z
.e
), True)
937 ediff_n126
= Signal(espec
, reset_less
=True)
938 msr
= MultiShiftRMerge(mwid
, espec
)
939 m
.submodules
.multishift_r
= msr
941 m
.d
.comb
+= in_z
.eq(self
.in_z
)
942 m
.d
.comb
+= in_of
.eq(self
.in_of
)
943 # initialise out from in (overridden below)
944 m
.d
.comb
+= self
.out_z
.eq(in_z
)
945 m
.d
.comb
+= self
.out_of
.eq(in_of
)
946 # normalisation decrease condition
947 decrease
= Signal(reset_less
=True)
948 m
.d
.comb
+= decrease
.eq(in_z
.m_msbzero
)
951 # *sigh* not entirely obvious: count leading zeros (clz)
952 # with a PriorityEncoder: to find from the MSB
953 # we reverse the order of the bits.
954 temp_m
= Signal(mwid
, reset_less
=True)
955 temp_s
= Signal(mwid
+1, reset_less
=True)
956 clz
= Signal((len(in_z
.e
), True), reset_less
=True)
958 # cat round and guard bits back into the mantissa
959 temp_m
.eq(Cat(in_of
.round_bit
, in_of
.guard
, in_z
.m
)),
960 pe
.i
.eq(temp_m
[::-1]), # inverted
961 clz
.eq(pe
.o
), # count zeros from MSB down
962 temp_s
.eq(temp_m
<< clz
), # shift mantissa UP
963 self
.out_z
.e
.eq(in_z
.e
- clz
), # DECREASE exponent
964 self
.out_z
.m
.eq(temp_s
[2:]), # exclude bits 0&1
972 def __init__(self
, width
, id_wid
):
973 self
.roundz
= Signal(reset_less
=True)
974 self
.z
= FPNumBase(width
, False)
975 self
.out_do_z
= Signal(reset_less
=True)
976 self
.oz
= Signal(width
, reset_less
=True)
977 self
.mid
= Signal(id_wid
, reset_less
=True)
980 return [self
.z
.eq(i
.z
), self
.out_do_z
.eq(i
.out_do_z
), self
.oz
.eq(i
.oz
),
981 self
.roundz
.eq(i
.roundz
), self
.mid
.eq(i
.mid
)]
984 class FPNorm1ModSingle
:
986 def __init__(self
, width
, id_wid
):
989 self
.i
= self
.ispec()
990 self
.o
= self
.ospec()
993 return FPAddStage1Data(self
.width
, self
.id_wid
)
996 return FPNorm1Data(self
.width
, self
.id_wid
)
998 def setup(self
, m
, i
):
999 """ links module to inputs and outputs
1001 m
.submodules
.normalise_1
= self
1002 m
.d
.comb
+= self
.i
.eq(i
)
1004 def process(self
, i
):
1007 def elaborate(self
, platform
):
1010 mwid
= self
.o
.z
.m_width
+2
1011 pe
= PriorityEncoder(mwid
)
1012 m
.submodules
.norm_pe
= pe
1015 m
.d
.comb
+= self
.o
.roundz
.eq(of
.roundz
)
1017 m
.submodules
.norm1_out_z
= self
.o
.z
1018 m
.submodules
.norm1_out_overflow
= of
1019 m
.submodules
.norm1_in_z
= self
.i
.z
1020 m
.submodules
.norm1_in_overflow
= self
.i
.of
1023 m
.submodules
.norm1_insel_z
= i
.z
1024 m
.submodules
.norm1_insel_overflow
= i
.of
1026 espec
= (len(i
.z
.e
), True)
1027 ediff_n126
= Signal(espec
, reset_less
=True)
1028 msr
= MultiShiftRMerge(mwid
, espec
)
1029 m
.submodules
.multishift_r
= msr
1031 m
.d
.comb
+= i
.eq(self
.i
)
1032 # initialise out from in (overridden below)
1033 m
.d
.comb
+= self
.o
.z
.eq(i
.z
)
1034 m
.d
.comb
+= of
.eq(i
.of
)
1035 # normalisation increase/decrease conditions
1036 decrease
= Signal(reset_less
=True)
1037 increase
= Signal(reset_less
=True)
1038 m
.d
.comb
+= decrease
.eq(i
.z
.m_msbzero
& i
.z
.exp_gt_n126
)
1039 m
.d
.comb
+= increase
.eq(i
.z
.exp_lt_n126
)
1041 with m
.If(~self
.i
.out_do_z
):
1042 with m
.If(decrease
):
1043 # *sigh* not entirely obvious: count leading zeros (clz)
1044 # with a PriorityEncoder: to find from the MSB
1045 # we reverse the order of the bits.
1046 temp_m
= Signal(mwid
, reset_less
=True)
1047 temp_s
= Signal(mwid
+1, reset_less
=True)
1048 clz
= Signal((len(i
.z
.e
), True), reset_less
=True)
1049 # make sure that the amount to decrease by does NOT
1050 # go below the minimum non-INF/NaN exponent
1051 limclz
= Mux(i
.z
.exp_sub_n126
> pe
.o
, pe
.o
,
1054 # cat round and guard bits back into the mantissa
1055 temp_m
.eq(Cat(i
.of
.round_bit
, i
.of
.guard
, i
.z
.m
)),
1056 pe
.i
.eq(temp_m
[::-1]), # inverted
1057 clz
.eq(limclz
), # count zeros from MSB down
1058 temp_s
.eq(temp_m
<< clz
), # shift mantissa UP
1059 self
.o
.z
.e
.eq(i
.z
.e
- clz
), # DECREASE exponent
1060 self
.o
.z
.m
.eq(temp_s
[2:]), # exclude bits 0&1
1061 of
.m0
.eq(temp_s
[2]), # copy of mantissa[0]
1062 # overflow in bits 0..1: got shifted too (leave sticky)
1063 of
.guard
.eq(temp_s
[1]), # guard
1064 of
.round_bit
.eq(temp_s
[0]), # round
1067 with m
.Elif(increase
):
1068 temp_m
= Signal(mwid
+1, reset_less
=True)
1070 temp_m
.eq(Cat(i
.of
.sticky
, i
.of
.round_bit
, i
.of
.guard
,
1072 ediff_n126
.eq(i
.z
.N126
- i
.z
.e
),
1073 # connect multi-shifter to inp/out mantissa (and ediff)
1075 msr
.diff
.eq(ediff_n126
),
1076 self
.o
.z
.m
.eq(msr
.m
[3:]),
1077 of
.m0
.eq(temp_s
[3]), # copy of mantissa[0]
1078 # overflow in bits 0..1: got shifted too (leave sticky)
1079 of
.guard
.eq(temp_s
[2]), # guard
1080 of
.round_bit
.eq(temp_s
[1]), # round
1081 of
.sticky
.eq(temp_s
[0]), # sticky
1082 self
.o
.z
.e
.eq(i
.z
.e
+ ediff_n126
),
1085 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
1086 m
.d
.comb
+= self
.o
.out_do_z
.eq(self
.i
.out_do_z
)
1087 m
.d
.comb
+= self
.o
.oz
.eq(self
.i
.oz
)
1092 class FPNorm1ModMulti
:
1094 def __init__(self
, width
, single_cycle
=True):
1096 self
.in_select
= Signal(reset_less
=True)
1097 self
.in_z
= FPNumBase(width
, False)
1098 self
.in_of
= Overflow()
1099 self
.temp_z
= FPNumBase(width
, False)
1100 self
.temp_of
= Overflow()
1101 self
.out_z
= FPNumBase(width
, False)
1102 self
.out_of
= Overflow()
1104 def elaborate(self
, platform
):
1107 m
.submodules
.norm1_out_z
= self
.out_z
1108 m
.submodules
.norm1_out_overflow
= self
.out_of
1109 m
.submodules
.norm1_temp_z
= self
.temp_z
1110 m
.submodules
.norm1_temp_of
= self
.temp_of
1111 m
.submodules
.norm1_in_z
= self
.in_z
1112 m
.submodules
.norm1_in_overflow
= self
.in_of
1114 in_z
= FPNumBase(self
.width
, False)
1116 m
.submodules
.norm1_insel_z
= in_z
1117 m
.submodules
.norm1_insel_overflow
= in_of
1119 # select which of temp or in z/of to use
1120 with m
.If(self
.in_select
):
1121 m
.d
.comb
+= in_z
.eq(self
.in_z
)
1122 m
.d
.comb
+= in_of
.eq(self
.in_of
)
1124 m
.d
.comb
+= in_z
.eq(self
.temp_z
)
1125 m
.d
.comb
+= in_of
.eq(self
.temp_of
)
1126 # initialise out from in (overridden below)
1127 m
.d
.comb
+= self
.out_z
.eq(in_z
)
1128 m
.d
.comb
+= self
.out_of
.eq(in_of
)
1129 # normalisation increase/decrease conditions
1130 decrease
= Signal(reset_less
=True)
1131 increase
= Signal(reset_less
=True)
1132 m
.d
.comb
+= decrease
.eq(in_z
.m_msbzero
& in_z
.exp_gt_n126
)
1133 m
.d
.comb
+= increase
.eq(in_z
.exp_lt_n126
)
1134 m
.d
.comb
+= self
.out_norm
.eq(decrease | increase
) # loop-end
1136 with m
.If(decrease
):
1138 self
.out_z
.e
.eq(in_z
.e
- 1), # DECREASE exponent
1139 self
.out_z
.m
.eq(in_z
.m
<< 1), # shift mantissa UP
1140 self
.out_z
.m
[0].eq(in_of
.guard
), # steal guard (was tot[2])
1141 self
.out_of
.guard
.eq(in_of
.round_bit
), # round (was tot[1])
1142 self
.out_of
.round_bit
.eq(0), # reset round bit
1143 self
.out_of
.m0
.eq(in_of
.guard
),
1146 with m
.Elif(increase
):
1148 self
.out_z
.e
.eq(in_z
.e
+ 1), # INCREASE exponent
1149 self
.out_z
.m
.eq(in_z
.m
>> 1), # shift mantissa DOWN
1150 self
.out_of
.guard
.eq(in_z
.m
[0]),
1151 self
.out_of
.m0
.eq(in_z
.m
[1]),
1152 self
.out_of
.round_bit
.eq(in_of
.guard
),
1153 self
.out_of
.sticky
.eq(in_of
.sticky | in_of
.round_bit
)
1159 class FPNorm1Single(FPState
):
1161 def __init__(self
, width
, id_wid
, single_cycle
=True):
1162 FPState
.__init
__(self
, "normalise_1")
1163 self
.mod
= FPNorm1ModSingle(width
)
1164 self
.o
= self
.ospec()
1165 self
.out_z
= FPNumBase(width
, False)
1166 self
.out_roundz
= Signal(reset_less
=True)
1169 return self
.mod
.ispec()
1172 return self
.mod
.ospec()
1174 def setup(self
, m
, i
):
1175 """ links module to inputs and outputs
1177 self
.mod
.setup(m
, i
)
1179 def action(self
, m
):
1183 class FPNorm1Multi(FPState
):
1185 def __init__(self
, width
, id_wid
):
1186 FPState
.__init
__(self
, "normalise_1")
1187 self
.mod
= FPNorm1ModMulti(width
)
1188 self
.stb
= Signal(reset_less
=True)
1189 self
.ack
= Signal(reset
=0, reset_less
=True)
1190 self
.out_norm
= Signal(reset_less
=True)
1191 self
.in_accept
= Signal(reset_less
=True)
1192 self
.temp_z
= FPNumBase(width
)
1193 self
.temp_of
= Overflow()
1194 self
.out_z
= FPNumBase(width
)
1195 self
.out_roundz
= Signal(reset_less
=True)
1197 def setup(self
, m
, in_z
, in_of
, norm_stb
):
1198 """ links module to inputs and outputs
1200 self
.mod
.setup(m
, in_z
, in_of
, norm_stb
,
1201 self
.in_accept
, self
.temp_z
, self
.temp_of
,
1202 self
.out_z
, self
.out_norm
)
1204 m
.d
.comb
+= self
.stb
.eq(norm_stb
)
1205 m
.d
.sync
+= self
.ack
.eq(0) # sets to zero when not in normalise_1 state
1207 def action(self
, m
):
1208 m
.d
.comb
+= self
.in_accept
.eq((~self
.ack
) & (self
.stb
))
1209 m
.d
.sync
+= self
.temp_of
.eq(self
.mod
.out_of
)
1210 m
.d
.sync
+= self
.temp_z
.eq(self
.out_z
)
1211 with m
.If(self
.out_norm
):
1212 with m
.If(self
.in_accept
):
1217 m
.d
.sync
+= self
.ack
.eq(0)
1219 # normalisation not required (or done).
1221 m
.d
.sync
+= self
.ack
.eq(1)
1222 m
.d
.sync
+= self
.out_roundz
.eq(self
.mod
.out_of
.roundz
)
1225 class FPNormToPack(FPState
, UnbufferedPipeline
):
1227 def __init__(self
, width
, id_wid
):
1228 FPState
.__init
__(self
, "normalise_1")
1229 self
.id_wid
= id_wid
1231 UnbufferedPipeline
.__init
__(self
, self
) # pipeline is its own stage
1234 return FPAddStage1Data(self
.width
, self
.id_wid
) # Norm1ModSingle ispec
1237 return FPPackData(self
.width
, self
.id_wid
) # FPPackMod ospec
1239 def setup(self
, m
, i
):
1240 """ links module to inputs and outputs
1243 # Normalisation, Rounding Corrections, Pack - in a chain
1244 nmod
= FPNorm1ModSingle(self
.width
, self
.id_wid
)
1245 rmod
= FPRoundMod(self
.width
, self
.id_wid
)
1246 cmod
= FPCorrectionsMod(self
.width
, self
.id_wid
)
1247 pmod
= FPPackMod(self
.width
, self
.id_wid
)
1248 chain
= StageChain([nmod
, rmod
, cmod
, pmod
])
1250 self
.out_z
= pmod
.ospec()
1254 def process(self
, i
):
1257 def action(self
, m
):
1258 m
.d
.sync
+= self
.out_z
.eq(self
.process(None))
1259 m
.next
= "pack_put_z"
1264 def __init__(self
, width
, id_wid
):
1265 self
.z
= FPNumBase(width
, False)
1266 self
.out_do_z
= Signal(reset_less
=True)
1267 self
.oz
= Signal(width
, reset_less
=True)
1268 self
.mid
= Signal(id_wid
, reset_less
=True)
1271 return [self
.z
.eq(i
.z
), self
.out_do_z
.eq(i
.out_do_z
), self
.oz
.eq(i
.oz
),
1277 def __init__(self
, width
, id_wid
):
1279 self
.id_wid
= id_wid
1280 self
.i
= self
.ispec()
1281 self
.out_z
= self
.ospec()
1284 return FPNorm1Data(self
.width
, self
.id_wid
)
1287 return FPRoundData(self
.width
, self
.id_wid
)
1289 def process(self
, i
):
1292 def setup(self
, m
, i
):
1293 m
.submodules
.roundz
= self
1294 m
.d
.comb
+= self
.i
.eq(i
)
1296 def elaborate(self
, platform
):
1298 m
.d
.comb
+= self
.out_z
.eq(self
.i
) # copies mid, z, out_do_z
1299 with m
.If(~self
.i
.out_do_z
):
1300 with m
.If(self
.i
.roundz
):
1301 m
.d
.comb
+= self
.out_z
.z
.m
.eq(self
.i
.z
.m
+ 1) # mantissa up
1302 with m
.If(self
.i
.z
.m
== self
.i
.z
.m1s
): # all 1s
1303 m
.d
.comb
+= self
.out_z
.z
.e
.eq(self
.i
.z
.e
+ 1) # exponent up
1308 class FPRound(FPState
):
1310 def __init__(self
, width
, id_wid
):
1311 FPState
.__init
__(self
, "round")
1312 self
.mod
= FPRoundMod(width
)
1313 self
.out_z
= self
.ospec()
1316 return self
.mod
.ispec()
1319 return self
.mod
.ospec()
1321 def setup(self
, m
, i
):
1322 """ links module to inputs and outputs
1324 self
.mod
.setup(m
, i
)
1327 m
.d
.sync
+= self
.out_z
.eq(self
.mod
.out_z
)
1328 m
.d
.sync
+= self
.out_z
.mid
.eq(self
.mod
.o
.mid
)
1330 def action(self
, m
):
1331 m
.next
= "corrections"
1334 class FPCorrectionsMod
:
1336 def __init__(self
, width
, id_wid
):
1338 self
.id_wid
= id_wid
1339 self
.i
= self
.ispec()
1340 self
.out_z
= self
.ospec()
1343 return FPRoundData(self
.width
, self
.id_wid
)
1346 return FPRoundData(self
.width
, self
.id_wid
)
1348 def process(self
, i
):
1351 def setup(self
, m
, i
):
1352 """ links module to inputs and outputs
1354 m
.submodules
.corrections
= self
1355 m
.d
.comb
+= self
.i
.eq(i
)
1357 def elaborate(self
, platform
):
1359 m
.submodules
.corr_in_z
= self
.i
.z
1360 m
.submodules
.corr_out_z
= self
.out_z
.z
1361 m
.d
.comb
+= self
.out_z
.eq(self
.i
) # copies mid, z, out_do_z
1362 with m
.If(~self
.i
.out_do_z
):
1363 with m
.If(self
.i
.z
.is_denormalised
):
1364 m
.d
.comb
+= self
.out_z
.z
.e
.eq(self
.i
.z
.N127
)
1368 class FPCorrections(FPState
):
1370 def __init__(self
, width
, id_wid
):
1371 FPState
.__init
__(self
, "corrections")
1372 self
.mod
= FPCorrectionsMod(width
)
1373 self
.out_z
= self
.ospec()
1376 return self
.mod
.ispec()
1379 return self
.mod
.ospec()
1381 def setup(self
, m
, in_z
):
1382 """ links module to inputs and outputs
1384 self
.mod
.setup(m
, in_z
)
1386 m
.d
.sync
+= self
.out_z
.eq(self
.mod
.out_z
)
1387 m
.d
.sync
+= self
.out_z
.mid
.eq(self
.mod
.o
.mid
)
1389 def action(self
, m
):
1395 def __init__(self
, width
, id_wid
):
1396 self
.z
= Signal(width
, reset_less
=True)
1397 self
.mid
= Signal(id_wid
, reset_less
=True)
1400 return [self
.z
.eq(i
.z
), self
.mid
.eq(i
.mid
)]
1403 return [self
.z
, self
.mid
]
1408 def __init__(self
, width
, id_wid
):
1410 self
.id_wid
= id_wid
1411 self
.i
= self
.ispec()
1412 self
.o
= self
.ospec()
1415 return FPRoundData(self
.width
, self
.id_wid
)
1418 return FPPackData(self
.width
, self
.id_wid
)
1420 def process(self
, i
):
1423 def setup(self
, m
, in_z
):
1424 """ links module to inputs and outputs
1426 m
.submodules
.pack
= self
1427 m
.d
.comb
+= self
.i
.eq(in_z
)
1429 def elaborate(self
, platform
):
1431 z
= FPNumOut(self
.width
, False)
1432 m
.submodules
.pack_in_z
= self
.i
.z
1433 m
.submodules
.pack_out_z
= z
1434 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
1435 with m
.If(~self
.i
.out_do_z
):
1436 with m
.If(self
.i
.z
.is_overflowed
):
1437 m
.d
.comb
+= z
.inf(self
.i
.z
.s
)
1439 m
.d
.comb
+= z
.create(self
.i
.z
.s
, self
.i
.z
.e
, self
.i
.z
.m
)
1441 m
.d
.comb
+= z
.v
.eq(self
.i
.oz
)
1442 m
.d
.comb
+= self
.o
.z
.eq(z
.v
)
1446 class FPPack(FPState
):
1448 def __init__(self
, width
, id_wid
):
1449 FPState
.__init
__(self
, "pack")
1450 self
.mod
= FPPackMod(width
)
1451 self
.out_z
= self
.ospec()
1454 return self
.mod
.ispec()
1457 return self
.mod
.ospec()
1459 def setup(self
, m
, in_z
):
1460 """ links module to inputs and outputs
1462 self
.mod
.setup(m
, in_z
)
1464 m
.d
.sync
+= self
.out_z
.v
.eq(self
.mod
.out_z
.v
)
1465 m
.d
.sync
+= self
.out_z
.mid
.eq(self
.mod
.o
.mid
)
1467 def action(self
, m
):
1468 m
.next
= "pack_put_z"
1471 class FPPutZ(FPState
):
1473 def __init__(self
, state
, in_z
, out_z
, in_mid
, out_mid
, to_state
=None):
1474 FPState
.__init
__(self
, state
)
1475 if to_state
is None:
1476 to_state
= "get_ops"
1477 self
.to_state
= to_state
1480 self
.in_mid
= in_mid
1481 self
.out_mid
= out_mid
1483 def action(self
, m
):
1484 if self
.in_mid
is not None:
1485 m
.d
.sync
+= self
.out_mid
.eq(self
.in_mid
)
1487 self
.out_z
.z
.v
.eq(self
.in_z
)
1489 with m
.If(self
.out_z
.z
.stb
& self
.out_z
.z
.ack
):
1490 m
.d
.sync
+= self
.out_z
.z
.stb
.eq(0)
1491 m
.next
= self
.to_state
1493 m
.d
.sync
+= self
.out_z
.z
.stb
.eq(1)
1496 class FPPutZIdx(FPState
):
1498 def __init__(self
, state
, in_z
, out_zs
, in_mid
, to_state
=None):
1499 FPState
.__init
__(self
, state
)
1500 if to_state
is None:
1501 to_state
= "get_ops"
1502 self
.to_state
= to_state
1504 self
.out_zs
= out_zs
1505 self
.in_mid
= in_mid
1507 def action(self
, m
):
1508 outz_stb
= Signal(reset_less
=True)
1509 outz_ack
= Signal(reset_less
=True)
1510 m
.d
.comb
+= [outz_stb
.eq(self
.out_zs
[self
.in_mid
].stb
),
1511 outz_ack
.eq(self
.out_zs
[self
.in_mid
].ack
),
1514 self
.out_zs
[self
.in_mid
].v
.eq(self
.in_z
.v
)
1516 with m
.If(outz_stb
& outz_ack
):
1517 m
.d
.sync
+= self
.out_zs
[self
.in_mid
].stb
.eq(0)
1518 m
.next
= self
.to_state
1520 m
.d
.sync
+= self
.out_zs
[self
.in_mid
].stb
.eq(1)
1523 def __init__(self
, width
, id_wid
):
1524 self
.z
= FPOp(width
)
1525 self
.mid
= Signal(id_wid
, reset_less
=True)
1528 return [self
.z
.eq(i
.z
), self
.mid
.eq(i
.mid
)]
1531 return [self
.z
, self
.mid
]
1536 def __init__(self
, width
, id_wid
=None, single_cycle
=False, compact
=True):
1539 * width: bit-width of IEEE754. supported: 16, 32, 64
1540 * id_wid: an identifier that is sync-connected to the input
1541 * single_cycle: True indicates each stage to complete in 1 clock
1542 * compact: True indicates a reduced number of stages
1545 self
.id_wid
= id_wid
1546 self
.single_cycle
= single_cycle
1547 self
.compact
= compact
1549 self
.in_t
= Trigger()
1550 self
.i
= self
.ispec()
1551 self
.o
= self
.ospec()
1556 return FPADDBaseData(self
.width
, self
.id_wid
)
1559 return FPOpData(self
.width
, self
.id_wid
)
1561 def add_state(self
, state
):
1562 self
.states
.append(state
)
1565 def get_fragment(self
, platform
=None):
1566 """ creates the HDL code-fragment for FPAdd
1569 m
.submodules
.out_z
= self
.o
.z
1570 m
.submodules
.in_t
= self
.in_t
1572 self
.get_compact_fragment(m
, platform
)
1574 self
.get_longer_fragment(m
, platform
)
1576 with m
.FSM() as fsm
:
1578 for state
in self
.states
:
1579 with m
.State(state
.state_from
):
1584 def get_longer_fragment(self
, m
, platform
=None):
1586 get
= self
.add_state(FPGet2Op("get_ops", "special_cases",
1588 get
.setup(m
, self
.i
, self
.in_t
.stb
, self
.in_t
.ack
)
1592 sc
= self
.add_state(FPAddSpecialCases(self
.width
, self
.id_wid
))
1593 sc
.setup(m
, a
, b
, self
.in_mid
)
1595 dn
= self
.add_state(FPAddDeNorm(self
.width
, self
.id_wid
))
1596 dn
.setup(m
, a
, b
, sc
.in_mid
)
1598 if self
.single_cycle
:
1599 alm
= self
.add_state(FPAddAlignSingle(self
.width
, self
.id_wid
))
1600 alm
.setup(m
, dn
.out_a
, dn
.out_b
, dn
.in_mid
)
1602 alm
= self
.add_state(FPAddAlignMulti(self
.width
, self
.id_wid
))
1603 alm
.setup(m
, dn
.out_a
, dn
.out_b
, dn
.in_mid
)
1605 add0
= self
.add_state(FPAddStage0(self
.width
, self
.id_wid
))
1606 add0
.setup(m
, alm
.out_a
, alm
.out_b
, alm
.in_mid
)
1608 add1
= self
.add_state(FPAddStage1(self
.width
, self
.id_wid
))
1609 add1
.setup(m
, add0
.out_tot
, add0
.out_z
, add0
.in_mid
)
1611 if self
.single_cycle
:
1612 n1
= self
.add_state(FPNorm1Single(self
.width
, self
.id_wid
))
1613 n1
.setup(m
, add1
.out_z
, add1
.out_of
, add0
.in_mid
)
1615 n1
= self
.add_state(FPNorm1Multi(self
.width
, self
.id_wid
))
1616 n1
.setup(m
, add1
.out_z
, add1
.out_of
, add1
.norm_stb
, add0
.in_mid
)
1618 rn
= self
.add_state(FPRound(self
.width
, self
.id_wid
))
1619 rn
.setup(m
, n1
.out_z
, n1
.out_roundz
, n1
.in_mid
)
1621 cor
= self
.add_state(FPCorrections(self
.width
, self
.id_wid
))
1622 cor
.setup(m
, rn
.out_z
, rn
.in_mid
)
1624 pa
= self
.add_state(FPPack(self
.width
, self
.id_wid
))
1625 pa
.setup(m
, cor
.out_z
, rn
.in_mid
)
1627 ppz
= self
.add_state(FPPutZ("pack_put_z", pa
.out_z
, self
.out_z
,
1628 pa
.in_mid
, self
.out_mid
))
1630 pz
= self
.add_state(FPPutZ("put_z", sc
.out_z
, self
.out_z
,
1631 pa
.in_mid
, self
.out_mid
))
1633 def get_compact_fragment(self
, m
, platform
=None):
1635 get
= self
.add_state(FPGet2Op("get_ops", "special_cases",
1636 self
.width
, self
.id_wid
))
1637 get
.setup(m
, self
.i
, self
.in_t
.stb
, self
.in_t
.ack
)
1639 sc
= self
.add_state(FPAddSpecialCasesDeNorm(self
.width
, self
.id_wid
))
1642 alm
= self
.add_state(FPAddAlignSingleAdd(self
.width
, self
.id_wid
))
1645 n1
= self
.add_state(FPNormToPack(self
.width
, self
.id_wid
))
1646 n1
.setup(m
, alm
.a1o
)
1648 ppz
= self
.add_state(FPPutZ("pack_put_z", n1
.out_z
.z
, self
.o
,
1649 n1
.out_z
.mid
, self
.o
.mid
))
1651 #pz = self.add_state(FPPutZ("put_z", sc.out_z.z, self.o,
1652 # sc.o.mid, self.o.mid))
1655 class FPADDBase(FPState
):
1657 def __init__(self
, width
, id_wid
=None, single_cycle
=False):
1660 * width: bit-width of IEEE754. supported: 16, 32, 64
1661 * id_wid: an identifier that is sync-connected to the input
1662 * single_cycle: True indicates each stage to complete in 1 clock
1664 FPState
.__init
__(self
, "fpadd")
1666 self
.single_cycle
= single_cycle
1667 self
.mod
= FPADDBaseMod(width
, id_wid
, single_cycle
)
1668 self
.o
= self
.ospec()
1670 self
.in_t
= Trigger()
1671 self
.i
= self
.ispec()
1673 self
.z_done
= Signal(reset_less
=True) # connects to out_z Strobe
1674 self
.in_accept
= Signal(reset_less
=True)
1675 self
.add_stb
= Signal(reset_less
=True)
1676 self
.add_ack
= Signal(reset
=0, reset_less
=True)
1679 return self
.mod
.ispec()
1682 return self
.mod
.ospec()
1684 def setup(self
, m
, i
, add_stb
, in_mid
):
1685 m
.d
.comb
+= [self
.i
.eq(i
),
1686 self
.mod
.i
.eq(self
.i
),
1687 self
.z_done
.eq(self
.mod
.o
.z
.trigger
),
1688 #self.add_stb.eq(add_stb),
1689 self
.mod
.in_t
.stb
.eq(self
.in_t
.stb
),
1690 self
.in_t
.ack
.eq(self
.mod
.in_t
.ack
),
1691 self
.o
.mid
.eq(self
.mod
.o
.mid
),
1692 self
.o
.z
.v
.eq(self
.mod
.o
.z
.v
),
1693 self
.o
.z
.stb
.eq(self
.mod
.o
.z
.stb
),
1694 self
.mod
.o
.z
.ack
.eq(self
.o
.z
.ack
),
1697 m
.d
.sync
+= self
.add_stb
.eq(add_stb
)
1698 m
.d
.sync
+= self
.add_ack
.eq(0) # sets to zero when not in active state
1699 m
.d
.sync
+= self
.o
.z
.ack
.eq(0) # likewise
1700 #m.d.sync += self.in_t.stb.eq(0)
1702 m
.submodules
.fpadd
= self
.mod
1704 def action(self
, m
):
1706 # in_accept is set on incoming strobe HIGH and ack LOW.
1707 m
.d
.comb
+= self
.in_accept
.eq((~self
.add_ack
) & (self
.add_stb
))
1709 #with m.If(self.in_t.ack):
1710 # m.d.sync += self.in_t.stb.eq(0)
1711 with m
.If(~self
.z_done
):
1712 # not done: test for accepting an incoming operand pair
1713 with m
.If(self
.in_accept
):
1715 self
.add_ack
.eq(1), # acknowledge receipt...
1716 self
.in_t
.stb
.eq(1), # initiate add
1719 m
.d
.sync
+= [self
.add_ack
.eq(0),
1720 self
.in_t
.stb
.eq(0),
1724 # done: acknowledge, and write out id and value
1725 m
.d
.sync
+= [self
.add_ack
.eq(1),
1732 if self
.in_mid
is not None:
1733 m
.d
.sync
+= self
.out_mid
.eq(self
.mod
.out_mid
)
1736 self
.out_z
.v
.eq(self
.mod
.out_z
.v
)
1738 # move to output state on detecting z ack
1739 with m
.If(self
.out_z
.trigger
):
1740 m
.d
.sync
+= self
.out_z
.stb
.eq(0)
1743 m
.d
.sync
+= self
.out_z
.stb
.eq(1)
1746 class FPADDBasePipe(ControlBase
):
1747 def __init__(self
, width
, id_wid
):
1748 ControlBase
.__init
__(self
)
1749 self
.pipe1
= FPAddSpecialCasesDeNorm(width
, id_wid
)
1750 self
.pipe2
= FPAddAlignSingleAdd(width
, id_wid
)
1751 self
.pipe3
= FPNormToPack(width
, id_wid
)
1753 self
._eqs
= self
.connect([self
.pipe1
, self
.pipe2
, self
.pipe3
])
1755 def elaborate(self
, platform
):
1757 m
.submodules
.scnorm
= self
.pipe1
1758 m
.submodules
.addalign
= self
.pipe2
1759 m
.submodules
.normpack
= self
.pipe3
1760 m
.d
.comb
+= self
._eqs
1764 class FPADDInMuxPipe(PriorityCombMuxInPipe
):
1765 def __init__(self
, width
, id_wid
, num_rows
):
1766 self
.num_rows
= num_rows
1767 def iospec(): return FPADDBaseData(width
, id_wid
)
1768 stage
= PassThroughStage(iospec
)
1769 PriorityCombMuxInPipe
.__init
__(self
, stage
, p_len
=self
.num_rows
)
1772 class FPADDMuxOutPipe(CombMuxOutPipe
):
1773 def __init__(self
, width
, id_wid
, num_rows
):
1774 self
.num_rows
= num_rows
1775 def iospec(): return FPPackData(width
, id_wid
)
1776 stage
= PassThroughStage(iospec
)
1777 CombMuxOutPipe
.__init
__(self
, stage
, n_len
=self
.num_rows
)
1780 class FPADDMuxInOut
:
1781 """ Reservation-Station version of FPADD pipeline.
1783 * fan-in on inputs (an array of FPADDBaseData: a,b,mid)
1784 * 3-stage adder pipeline
1785 * fan-out on outputs (an array of FPPackData: z,mid)
1787 Fan-in and Fan-out are combinatorial.
1789 def __init__(self
, width
, id_wid
, num_rows
):
1790 self
.num_rows
= num_rows
1791 self
.inpipe
= FPADDInMuxPipe(width
, id_wid
, num_rows
) # fan-in
1792 self
.fpadd
= FPADDBasePipe(width
, id_wid
) # add stage
1793 self
.outpipe
= FPADDMuxOutPipe(width
, id_wid
, num_rows
) # fan-out
1795 self
.p
= self
.inpipe
.p
# kinda annoying,
1796 self
.n
= self
.outpipe
.n
# use pipe in/out as this class in/out
1797 self
._ports
= self
.inpipe
.ports() + self
.outpipe
.ports()
1799 def elaborate(self
, platform
):
1801 m
.submodules
.inpipe
= self
.inpipe
1802 m
.submodules
.fpadd
= self
.fpadd
1803 m
.submodules
.outpipe
= self
.outpipe
1805 m
.d
.comb
+= self
.inpipe
.n
.connect_to_next(self
.fpadd
.p
)
1806 m
.d
.comb
+= self
.fpadd
.connect_to_next(self
.outpipe
)
1815 """ FPADD: stages as follows:
1821 FPAddBase---> FPAddBaseMod
1823 PutZ GetOps->Specials->Align->Add1/2->Norm->Round/Pack->PutZ
1825 FPAddBase is tricky: it is both a stage and *has* stages.
1826 Connection to FPAddBaseMod therefore requires an in stb/ack
1827 and an out stb/ack. Just as with Add1-Norm1 interaction, FPGetOp
1828 needs to be the thing that raises the incoming stb.
1831 def __init__(self
, width
, id_wid
=None, single_cycle
=False, rs_sz
=2):
1834 * width: bit-width of IEEE754. supported: 16, 32, 64
1835 * id_wid: an identifier that is sync-connected to the input
1836 * single_cycle: True indicates each stage to complete in 1 clock
1839 self
.id_wid
= id_wid
1840 self
.single_cycle
= single_cycle
1842 #self.out_z = FPOp(width)
1843 self
.ids
= FPID(id_wid
)
1846 for i
in range(rs_sz
):
1849 in_a
.name
= "in_a_%d" % i
1850 in_b
.name
= "in_b_%d" % i
1851 rs
.append((in_a
, in_b
))
1855 for i
in range(rs_sz
):
1857 out_z
.name
= "out_z_%d" % i
1859 self
.res
= Array(res
)
1863 def add_state(self
, state
):
1864 self
.states
.append(state
)
1867 def get_fragment(self
, platform
=None):
1868 """ creates the HDL code-fragment for FPAdd
1871 m
.submodules
+= self
.rs
1873 in_a
= self
.rs
[0][0]
1874 in_b
= self
.rs
[0][1]
1876 geta
= self
.add_state(FPGetOp("get_a", "get_b",
1881 getb
= self
.add_state(FPGetOp("get_b", "fpadd",
1886 ab
= FPADDBase(self
.width
, self
.id_wid
, self
.single_cycle
)
1887 ab
= self
.add_state(ab
)
1888 abd
= ab
.ispec() # create an input spec object for FPADDBase
1889 m
.d
.sync
+= [abd
.a
.eq(a
), abd
.b
.eq(b
), abd
.mid
.eq(self
.ids
.in_mid
)]
1890 ab
.setup(m
, abd
, getb
.out_decode
, self
.ids
.in_mid
)
1893 pz
= self
.add_state(FPPutZIdx("put_z", o
.z
, self
.res
,
1896 with m
.FSM() as fsm
:
1898 for state
in self
.states
:
1899 with m
.State(state
.state_from
):
1905 if __name__
== "__main__":
1907 alu
= FPADD(width
=32, id_wid
=5, single_cycle
=True)
1908 main(alu
, ports
=alu
.rs
[0][0].ports() + \
1909 alu
.rs
[0][1].ports() + \
1910 alu
.res
[0].ports() + \
1911 [alu
.ids
.in_mid
, alu
.ids
.out_mid
])
1913 alu
= FPADDBase(width
=32, id_wid
=5, single_cycle
=True)
1914 main(alu
, ports
=[alu
.in_a
, alu
.in_b
] + \
1915 alu
.in_t
.ports() + \
1916 alu
.out_z
.ports() + \
1917 [alu
.in_mid
, alu
.out_mid
])
1920 # works... but don't use, just do "python fname.py convert -t v"
1921 #print (verilog.convert(alu, ports=[
1922 # ports=alu.in_a.ports() + \
1923 # alu.in_b.ports() + \
1924 # alu.out_z.ports())