1 # IEEE Floating Point Adder (Single Precision)
2 # Copyright (C) Jonathan P Dawson 2013
5 from nmigen
import Module
, Signal
, Cat
, Mux
, Array
, Const
6 from nmigen
.lib
.coding
import PriorityEncoder
7 from nmigen
.cli
import main
, verilog
10 from fpbase
import FPNumIn
, FPNumOut
, FPOp
, Overflow
, FPBase
, FPNumBase
11 from fpbase
import MultiShiftRMerge
, Trigger
12 from example_buf_pipe
import StageChain
13 #from fpbase import FPNumShiftMultiRight
16 class FPState(FPBase
):
17 def __init__(self
, state_from
):
18 self
.state_from
= state_from
20 def set_inputs(self
, inputs
):
22 for k
,v
in inputs
.items():
25 def set_outputs(self
, outputs
):
26 self
.outputs
= outputs
27 for k
,v
in outputs
.items():
31 class FPGetSyncOpsMod
:
32 def __init__(self
, width
, num_ops
=2):
34 self
.num_ops
= num_ops
37 for i
in range(num_ops
):
38 inops
.append(Signal(width
, reset_less
=True))
39 outops
.append(Signal(width
, reset_less
=True))
42 self
.stb
= Signal(num_ops
)
44 self
.ready
= Signal(reset_less
=True)
45 self
.out_decode
= Signal(reset_less
=True)
47 def elaborate(self
, platform
):
49 m
.d
.comb
+= self
.ready
.eq(self
.stb
== Const(-1, (self
.num_ops
, False)))
50 m
.d
.comb
+= self
.out_decode
.eq(self
.ack
& self
.ready
)
51 with m
.If(self
.out_decode
):
52 for i
in range(self
.num_ops
):
54 self
.out_op
[i
].eq(self
.in_op
[i
]),
59 return self
.in_op
+ self
.out_op
+ [self
.stb
, self
.ack
]
63 def __init__(self
, width
, num_ops
):
64 Trigger
.__init
__(self
)
66 self
.num_ops
= num_ops
69 for i
in range(num_ops
):
70 res
.append(Signal(width
))
75 for i
in range(self
.num_ops
):
83 def __init__(self
, width
, num_ops
=2, num_rows
=4):
85 self
.num_ops
= num_ops
86 self
.num_rows
= num_rows
87 self
.mmax
= int(log(self
.num_rows
) / log(2))
89 self
.mid
= Signal(self
.mmax
, reset_less
=True) # multiplex id
90 for i
in range(num_rows
):
91 self
.rs
.append(FPGetSyncOpsMod(width
, num_ops
))
92 self
.rs
= Array(self
.rs
)
94 self
.out_op
= FPOps(width
, num_ops
)
96 def elaborate(self
, platform
):
99 pe
= PriorityEncoder(self
.num_rows
)
100 m
.submodules
.selector
= pe
101 m
.submodules
.out_op
= self
.out_op
102 m
.submodules
+= self
.rs
104 # connect priority encoder
106 for i
in range(self
.num_rows
):
107 in_ready
.append(self
.rs
[i
].ready
)
108 m
.d
.comb
+= pe
.i
.eq(Cat(*in_ready
))
110 active
= Signal(reset_less
=True)
111 out_en
= Signal(reset_less
=True)
112 m
.d
.comb
+= active
.eq(~pe
.n
) # encoder active
113 m
.d
.comb
+= out_en
.eq(active
& self
.out_op
.trigger
)
115 # encoder active: ack relevant input, record MID, pass output
118 m
.d
.sync
+= self
.mid
.eq(pe
.o
)
119 m
.d
.sync
+= rs
.ack
.eq(0)
120 m
.d
.sync
+= self
.out_op
.stb
.eq(0)
121 for j
in range(self
.num_ops
):
122 m
.d
.sync
+= self
.out_op
.v
[j
].eq(rs
.out_op
[j
])
124 m
.d
.sync
+= self
.out_op
.stb
.eq(1)
125 # acks all default to zero
126 for i
in range(self
.num_rows
):
127 m
.d
.sync
+= self
.rs
[i
].ack
.eq(1)
133 for i
in range(self
.num_rows
):
135 res
+= inop
.in_op
+ [inop
.stb
]
136 return self
.out_op
.ports() + res
+ [self
.mid
]
140 def __init__(self
, width
):
141 self
.in_op
= FPOp(width
)
142 self
.out_op
= Signal(width
)
143 self
.out_decode
= Signal(reset_less
=True)
145 def elaborate(self
, platform
):
147 m
.d
.comb
+= self
.out_decode
.eq((self
.in_op
.ack
) & (self
.in_op
.stb
))
148 m
.submodules
.get_op_in
= self
.in_op
149 #m.submodules.get_op_out = self.out_op
150 with m
.If(self
.out_decode
):
152 self
.out_op
.eq(self
.in_op
.v
),
157 class FPGetOp(FPState
):
161 def __init__(self
, in_state
, out_state
, in_op
, width
):
162 FPState
.__init
__(self
, in_state
)
163 self
.out_state
= out_state
164 self
.mod
= FPGetOpMod(width
)
166 self
.out_op
= Signal(width
)
167 self
.out_decode
= Signal(reset_less
=True)
169 def setup(self
, m
, in_op
):
170 """ links module to inputs and outputs
172 setattr(m
.submodules
, self
.state_from
, self
.mod
)
173 m
.d
.comb
+= self
.mod
.in_op
.eq(in_op
)
174 m
.d
.comb
+= self
.out_decode
.eq(self
.mod
.out_decode
)
177 with m
.If(self
.out_decode
):
178 m
.next
= self
.out_state
180 self
.in_op
.ack
.eq(0),
181 self
.out_op
.eq(self
.mod
.out_op
)
184 m
.d
.sync
+= self
.in_op
.ack
.eq(1)
187 class FPGet2OpMod(Trigger
):
188 def __init__(self
, width
, id_wid
):
189 Trigger
.__init
__(self
)
192 self
.i
= self
.ispec()
193 self
.o
= self
.ospec()
196 return FPADDBaseData(self
.width
, self
.id_wid
)
199 return FPNumBase2Ops(self
.width
, self
.id_wid
)
201 def elaborate(self
, platform
):
202 m
= Trigger
.elaborate(self
, platform
)
203 m
.submodules
.get_op1_out
= self
.o
.a
204 m
.submodules
.get_op2_out
= self
.o
.b
205 out_op1
= FPNumIn(None, self
.width
)
206 out_op2
= FPNumIn(None, self
.width
)
207 with m
.If(self
.trigger
):
209 out_op1
.decode(self
.i
.a
),
210 out_op2
.decode(self
.i
.b
),
211 self
.o
.a
.eq(out_op1
),
212 self
.o
.b
.eq(out_op2
),
213 self
.o
.mid
.eq(self
.i
.mid
)
218 class FPGet2Op(FPState
):
222 def __init__(self
, in_state
, out_state
, width
, id_wid
):
223 FPState
.__init
__(self
, in_state
)
224 self
.out_state
= out_state
225 self
.mod
= FPGet2OpMod(width
, id_wid
)
226 self
.o
= self
.mod
.ospec()
227 self
.in_stb
= Signal(reset_less
=True)
228 self
.out_ack
= Signal(reset_less
=True)
229 self
.out_decode
= Signal(reset_less
=True)
231 def setup(self
, m
, i
, in_stb
, in_ack
):
232 """ links module to inputs and outputs
234 m
.submodules
.get_ops
= self
.mod
235 m
.d
.comb
+= self
.mod
.i
.eq(i
)
236 m
.d
.comb
+= self
.mod
.stb
.eq(in_stb
)
237 m
.d
.comb
+= self
.out_ack
.eq(self
.mod
.ack
)
238 m
.d
.comb
+= self
.out_decode
.eq(self
.mod
.trigger
)
239 m
.d
.comb
+= in_ack
.eq(self
.mod
.ack
)
242 with m
.If(self
.out_decode
):
243 m
.next
= self
.out_state
246 self
.o
.eq(self
.mod
.o
),
249 m
.d
.sync
+= self
.mod
.ack
.eq(1)
254 def __init__(self
, width
, id_wid
, m_extra
=True):
255 self
.a
= FPNumBase(width
, m_extra
)
256 self
.b
= FPNumBase(width
, m_extra
)
257 self
.mid
= Signal(id_wid
, reset_less
=True)
260 return [self
.a
.eq(i
.a
), self
.b
.eq(i
.b
), self
.mid
.eq(i
.mid
)]
263 class FPAddSpecialCasesMod
:
264 """ special cases: NaNs, infs, zeros, denormalised
265 NOTE: some of these are unique to add. see "Special Operations"
266 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
269 def __init__(self
, width
, id_wid
):
272 self
.i
= self
.ispec()
273 self
.o
= self
.ospec()
274 self
.out_do_z
= Signal(reset_less
=True)
277 return FPNumBase2Ops(self
.width
, self
.id_wid
)
280 return FPPackData(self
.width
, self
.id_wid
)
282 def setup(self
, m
, i
, out_do_z
):
283 """ links module to inputs and outputs
285 m
.submodules
.specialcases
= self
286 m
.d
.comb
+= self
.i
.eq(i
)
287 m
.d
.comb
+= out_do_z
.eq(self
.out_do_z
)
289 def elaborate(self
, platform
):
292 m
.submodules
.sc_in_a
= self
.i
.a
293 m
.submodules
.sc_in_b
= self
.i
.b
294 m
.submodules
.sc_out_z
= self
.o
.z
297 m
.d
.comb
+= s_nomatch
.eq(self
.i
.a
.s
!= self
.i
.b
.s
)
300 m
.d
.comb
+= m_match
.eq(self
.i
.a
.m
== self
.i
.b
.m
)
302 # if a is NaN or b is NaN return NaN
303 with m
.If(self
.i
.a
.is_nan | self
.i
.b
.is_nan
):
304 m
.d
.comb
+= self
.out_do_z
.eq(1)
305 m
.d
.comb
+= self
.o
.z
.nan(0)
307 # XXX WEIRDNESS for FP16 non-canonical NaN handling
310 ## if a is zero and b is NaN return -b
311 #with m.If(a.is_zero & (a.s==0) & b.is_nan):
312 # m.d.comb += self.out_do_z.eq(1)
313 # m.d.comb += z.create(b.s, b.e, Cat(b.m[3:-2], ~b.m[0]))
315 ## if b is zero and a is NaN return -a
316 #with m.Elif(b.is_zero & (b.s==0) & a.is_nan):
317 # m.d.comb += self.out_do_z.eq(1)
318 # m.d.comb += z.create(a.s, a.e, Cat(a.m[3:-2], ~a.m[0]))
320 ## if a is -zero and b is NaN return -b
321 #with m.Elif(a.is_zero & (a.s==1) & b.is_nan):
322 # m.d.comb += self.out_do_z.eq(1)
323 # m.d.comb += z.create(a.s & b.s, b.e, Cat(b.m[3:-2], 1))
325 ## if b is -zero and a is NaN return -a
326 #with m.Elif(b.is_zero & (b.s==1) & a.is_nan):
327 # m.d.comb += self.out_do_z.eq(1)
328 # m.d.comb += z.create(a.s & b.s, a.e, Cat(a.m[3:-2], 1))
330 # if a is inf return inf (or NaN)
331 with m
.Elif(self
.i
.a
.is_inf
):
332 m
.d
.comb
+= self
.out_do_z
.eq(1)
333 m
.d
.comb
+= self
.o
.z
.inf(self
.i
.a
.s
)
334 # if a is inf and signs don't match return NaN
335 with m
.If(self
.i
.b
.exp_128
& s_nomatch
):
336 m
.d
.comb
+= self
.o
.z
.nan(0)
338 # if b is inf return inf
339 with m
.Elif(self
.i
.b
.is_inf
):
340 m
.d
.comb
+= self
.out_do_z
.eq(1)
341 m
.d
.comb
+= self
.o
.z
.inf(self
.i
.b
.s
)
343 # if a is zero and b zero return signed-a/b
344 with m
.Elif(self
.i
.a
.is_zero
& self
.i
.b
.is_zero
):
345 m
.d
.comb
+= self
.out_do_z
.eq(1)
346 m
.d
.comb
+= self
.o
.z
.create(self
.i
.a
.s
& self
.i
.b
.s
,
350 # if a is zero return b
351 with m
.Elif(self
.i
.a
.is_zero
):
352 m
.d
.comb
+= self
.out_do_z
.eq(1)
353 m
.d
.comb
+= self
.o
.z
.create(self
.i
.b
.s
, self
.i
.b
.e
,
356 # if b is zero return a
357 with m
.Elif(self
.i
.b
.is_zero
):
358 m
.d
.comb
+= self
.out_do_z
.eq(1)
359 m
.d
.comb
+= self
.o
.z
.create(self
.i
.a
.s
, self
.i
.a
.e
,
362 # if a equal to -b return zero (+ve zero)
363 with m
.Elif(s_nomatch
& m_match
& (self
.i
.a
.e
== self
.i
.b
.e
)):
364 m
.d
.comb
+= self
.out_do_z
.eq(1)
365 m
.d
.comb
+= self
.o
.z
.zero(0)
367 # Denormalised Number checks
369 m
.d
.comb
+= self
.out_do_z
.eq(0)
371 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
377 def __init__(self
, id_wid
):
380 self
.in_mid
= Signal(id_wid
, reset_less
=True)
381 self
.out_mid
= Signal(id_wid
, reset_less
=True)
387 if self
.id_wid
is not None:
388 m
.d
.sync
+= self
.out_mid
.eq(self
.in_mid
)
391 class FPAddSpecialCases(FPState
):
392 """ special cases: NaNs, infs, zeros, denormalised
393 NOTE: some of these are unique to add. see "Special Operations"
394 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
397 def __init__(self
, width
, id_wid
):
398 FPState
.__init
__(self
, "special_cases")
399 self
.mod
= FPAddSpecialCasesMod(width
)
400 self
.out_z
= self
.mod
.ospec()
401 self
.out_do_z
= Signal(reset_less
=True)
403 def setup(self
, m
, i
):
404 """ links module to inputs and outputs
406 self
.mod
.setup(m
, i
, self
.out_do_z
)
407 m
.d
.sync
+= self
.out_z
.v
.eq(self
.mod
.out_z
.v
) # only take the output
408 m
.d
.sync
+= self
.out_z
.mid
.eq(self
.mod
.o
.mid
) # (and mid)
412 with m
.If(self
.out_do_z
):
415 m
.next
= "denormalise"
418 class FPAddSpecialCasesDeNorm(FPState
):
419 """ special cases: NaNs, infs, zeros, denormalised
420 NOTE: some of these are unique to add. see "Special Operations"
421 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
424 def __init__(self
, width
, id_wid
):
425 FPState
.__init
__(self
, "special_cases")
426 self
.smod
= FPAddSpecialCasesMod(width
, id_wid
)
427 self
.out_z
= self
.smod
.ospec()
428 self
.out_do_z
= Signal(reset_less
=True)
430 self
.dmod
= FPAddDeNormMod(width
, id_wid
)
431 self
.o
= self
.dmod
.ospec()
433 def setup(self
, m
, i
):
434 """ links module to inputs and outputs
436 self
.smod
.setup(m
, i
, self
.out_do_z
)
437 self
.dmod
.setup(m
, i
)
440 m
.d
.sync
+= self
.out_z
.z
.v
.eq(self
.smod
.o
.z
.v
) # only take output
441 m
.d
.sync
+= self
.out_z
.mid
.eq(self
.smod
.o
.mid
) # (and mid)
443 m
.d
.sync
+= self
.o
.eq(self
.dmod
.o
)
446 with m
.If(self
.out_do_z
):
452 class FPAddDeNormMod(FPState
):
454 def __init__(self
, width
, id_wid
):
457 self
.i
= self
.ispec()
458 self
.o
= self
.ospec()
461 return FPNumBase2Ops(self
.width
, self
.id_wid
)
464 return FPNumBase2Ops(self
.width
, self
.id_wid
)
466 def setup(self
, m
, i
):
467 """ links module to inputs and outputs
469 m
.submodules
.denormalise
= self
470 m
.d
.comb
+= self
.i
.eq(i
)
472 def elaborate(self
, platform
):
474 m
.submodules
.denorm_in_a
= self
.i
.a
475 m
.submodules
.denorm_in_b
= self
.i
.b
476 m
.submodules
.denorm_out_a
= self
.o
.a
477 m
.submodules
.denorm_out_b
= self
.o
.b
478 # hmmm, don't like repeating identical code
479 m
.d
.comb
+= self
.o
.a
.eq(self
.i
.a
)
480 with m
.If(self
.i
.a
.exp_n127
):
481 m
.d
.comb
+= self
.o
.a
.e
.eq(self
.i
.a
.N126
) # limit a exponent
483 m
.d
.comb
+= self
.o
.a
.m
[-1].eq(1) # set top mantissa bit
485 m
.d
.comb
+= self
.o
.b
.eq(self
.i
.b
)
486 with m
.If(self
.i
.b
.exp_n127
):
487 m
.d
.comb
+= self
.o
.b
.e
.eq(self
.i
.b
.N126
) # limit a exponent
489 m
.d
.comb
+= self
.o
.b
.m
[-1].eq(1) # set top mantissa bit
491 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
496 class FPAddDeNorm(FPState
):
498 def __init__(self
, width
, id_wid
):
499 FPState
.__init
__(self
, "denormalise")
500 self
.mod
= FPAddDeNormMod(width
)
501 self
.out_a
= FPNumBase(width
)
502 self
.out_b
= FPNumBase(width
)
504 def setup(self
, m
, i
):
505 """ links module to inputs and outputs
509 m
.d
.sync
+= self
.out_a
.eq(self
.mod
.out_a
)
510 m
.d
.sync
+= self
.out_b
.eq(self
.mod
.out_b
)
513 # Denormalised Number checks
517 class FPAddAlignMultiMod(FPState
):
519 def __init__(self
, width
):
520 self
.in_a
= FPNumBase(width
)
521 self
.in_b
= FPNumBase(width
)
522 self
.out_a
= FPNumIn(None, width
)
523 self
.out_b
= FPNumIn(None, width
)
524 self
.exp_eq
= Signal(reset_less
=True)
526 def elaborate(self
, platform
):
527 # This one however (single-cycle) will do the shift
532 m
.submodules
.align_in_a
= self
.in_a
533 m
.submodules
.align_in_b
= self
.in_b
534 m
.submodules
.align_out_a
= self
.out_a
535 m
.submodules
.align_out_b
= self
.out_b
537 # NOTE: this does *not* do single-cycle multi-shifting,
538 # it *STAYS* in the align state until exponents match
540 # exponent of a greater than b: shift b down
541 m
.d
.comb
+= self
.exp_eq
.eq(0)
542 m
.d
.comb
+= self
.out_a
.eq(self
.in_a
)
543 m
.d
.comb
+= self
.out_b
.eq(self
.in_b
)
544 agtb
= Signal(reset_less
=True)
545 altb
= Signal(reset_less
=True)
546 m
.d
.comb
+= agtb
.eq(self
.in_a
.e
> self
.in_b
.e
)
547 m
.d
.comb
+= altb
.eq(self
.in_a
.e
< self
.in_b
.e
)
549 m
.d
.comb
+= self
.out_b
.shift_down(self
.in_b
)
550 # exponent of b greater than a: shift a down
552 m
.d
.comb
+= self
.out_a
.shift_down(self
.in_a
)
553 # exponents equal: move to next stage.
555 m
.d
.comb
+= self
.exp_eq
.eq(1)
559 class FPAddAlignMulti(FPState
):
561 def __init__(self
, width
, id_wid
):
562 FPState
.__init
__(self
, "align")
563 self
.mod
= FPAddAlignMultiMod(width
)
564 self
.out_a
= FPNumIn(None, width
)
565 self
.out_b
= FPNumIn(None, width
)
566 self
.exp_eq
= Signal(reset_less
=True)
568 def setup(self
, m
, in_a
, in_b
):
569 """ links module to inputs and outputs
571 m
.submodules
.align
= self
.mod
572 m
.d
.comb
+= self
.mod
.in_a
.eq(in_a
)
573 m
.d
.comb
+= self
.mod
.in_b
.eq(in_b
)
574 #m.d.comb += self.out_a.eq(self.mod.out_a)
575 #m.d.comb += self.out_b.eq(self.mod.out_b)
576 m
.d
.comb
+= self
.exp_eq
.eq(self
.mod
.exp_eq
)
577 m
.d
.sync
+= self
.out_a
.eq(self
.mod
.out_a
)
578 m
.d
.sync
+= self
.out_b
.eq(self
.mod
.out_b
)
581 with m
.If(self
.exp_eq
):
587 def __init__(self
, width
, id_wid
):
588 self
.a
= FPNumIn(None, width
)
589 self
.b
= FPNumIn(None, width
)
590 self
.mid
= Signal(id_wid
, reset_less
=True)
593 return [self
.a
.eq(i
.a
), self
.b
.eq(i
.b
), self
.mid
.eq(i
.mid
)]
596 class FPAddAlignSingleMod
:
598 def __init__(self
, width
, id_wid
):
601 self
.i
= self
.ispec()
602 self
.o
= self
.ospec()
605 return FPNumBase2Ops(self
.width
, self
.id_wid
)
608 return FPNumIn2Ops(self
.width
, self
.id_wid
)
610 def setup(self
, m
, i
):
611 """ links module to inputs and outputs
613 m
.submodules
.align
= self
614 m
.d
.comb
+= self
.i
.eq(i
)
616 def elaborate(self
, platform
):
617 """ Aligns A against B or B against A, depending on which has the
618 greater exponent. This is done in a *single* cycle using
619 variable-width bit-shift
621 the shifter used here is quite expensive in terms of gates.
622 Mux A or B in (and out) into temporaries, as only one of them
623 needs to be aligned against the other
627 m
.submodules
.align_in_a
= self
.i
.a
628 m
.submodules
.align_in_b
= self
.i
.b
629 m
.submodules
.align_out_a
= self
.o
.a
630 m
.submodules
.align_out_b
= self
.o
.b
632 # temporary (muxed) input and output to be shifted
633 t_inp
= FPNumBase(self
.width
)
634 t_out
= FPNumIn(None, self
.width
)
635 espec
= (len(self
.i
.a
.e
), True)
636 msr
= MultiShiftRMerge(self
.i
.a
.m_width
, espec
)
637 m
.submodules
.align_t_in
= t_inp
638 m
.submodules
.align_t_out
= t_out
639 m
.submodules
.multishift_r
= msr
641 ediff
= Signal(espec
, reset_less
=True)
642 ediffr
= Signal(espec
, reset_less
=True)
643 tdiff
= Signal(espec
, reset_less
=True)
644 elz
= Signal(reset_less
=True)
645 egz
= Signal(reset_less
=True)
647 # connect multi-shifter to t_inp/out mantissa (and tdiff)
648 m
.d
.comb
+= msr
.inp
.eq(t_inp
.m
)
649 m
.d
.comb
+= msr
.diff
.eq(tdiff
)
650 m
.d
.comb
+= t_out
.m
.eq(msr
.m
)
651 m
.d
.comb
+= t_out
.e
.eq(t_inp
.e
+ tdiff
)
652 m
.d
.comb
+= t_out
.s
.eq(t_inp
.s
)
654 m
.d
.comb
+= ediff
.eq(self
.i
.a
.e
- self
.i
.b
.e
)
655 m
.d
.comb
+= ediffr
.eq(self
.i
.b
.e
- self
.i
.a
.e
)
656 m
.d
.comb
+= elz
.eq(self
.i
.a
.e
< self
.i
.b
.e
)
657 m
.d
.comb
+= egz
.eq(self
.i
.a
.e
> self
.i
.b
.e
)
659 # default: A-exp == B-exp, A and B untouched (fall through)
660 m
.d
.comb
+= self
.o
.a
.eq(self
.i
.a
)
661 m
.d
.comb
+= self
.o
.b
.eq(self
.i
.b
)
662 # only one shifter (muxed)
663 #m.d.comb += t_out.shift_down_multi(tdiff, t_inp)
664 # exponent of a greater than b: shift b down
666 m
.d
.comb
+= [t_inp
.eq(self
.i
.b
),
669 self
.o
.b
.s
.eq(self
.i
.b
.s
), # whoops forgot sign
671 # exponent of b greater than a: shift a down
673 m
.d
.comb
+= [t_inp
.eq(self
.i
.a
),
676 self
.o
.a
.s
.eq(self
.i
.a
.s
), # whoops forgot sign
679 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
684 class FPAddAlignSingle(FPState
):
686 def __init__(self
, width
, id_wid
):
687 FPState
.__init
__(self
, "align")
688 self
.mod
= FPAddAlignSingleMod(width
, id_wid
)
689 self
.out_a
= FPNumIn(None, width
)
690 self
.out_b
= FPNumIn(None, width
)
692 def setup(self
, m
, i
):
693 """ links module to inputs and outputs
697 # NOTE: could be done as comb
698 m
.d
.sync
+= self
.out_a
.eq(self
.mod
.out_a
)
699 m
.d
.sync
+= self
.out_b
.eq(self
.mod
.out_b
)
705 class FPAddAlignSingleAdd(FPState
):
707 def __init__(self
, width
, id_wid
):
708 FPState
.__init
__(self
, "align")
711 self
.a1o
= self
.ospec()
714 return FPNumBase2Ops(self
.width
, self
.id_wid
) # AlignSingle ispec
717 return FPAddStage1Data(self
.width
, self
.id_wid
) # AddStage1 ospec
719 def setup(self
, m
, i
):
720 """ links module to inputs and outputs
722 mod
= FPAddAlignSingleMod(self
.width
, self
.id_wid
)
725 m
.d
.comb
+= o
.eq(mod
.o
)
727 a0mod
= FPAddStage0Mod(self
.width
, self
.id_wid
)
730 m
.d
.comb
+= a0o
.eq(a0mod
.o
)
732 a1mod
= FPAddStage1Mod(self
.width
, self
.id_wid
)
734 self
.a1modo
= a1mod
.o
736 m
.d
.sync
+= self
.a1o
.eq(self
.a1modo
)
739 m
.next
= "normalise_1"
742 class FPAddStage0Data
:
744 def __init__(self
, width
, id_wid
):
745 self
.z
= FPNumBase(width
, False)
746 self
.tot
= Signal(self
.z
.m_width
+ 4, reset_less
=True)
747 self
.mid
= Signal(id_wid
, reset_less
=True)
750 return [self
.z
.eq(i
.z
), self
.tot
.eq(i
.tot
), self
.mid
.eq(i
.mid
)]
753 class FPAddStage0Mod
:
755 def __init__(self
, width
, id_wid
):
758 self
.i
= self
.ispec()
759 self
.o
= self
.ospec()
762 return FPNumBase2Ops(self
.width
, self
.id_wid
)
765 return FPAddStage0Data(self
.width
, self
.id_wid
)
767 def setup(self
, m
, i
):
768 """ links module to inputs and outputs
770 m
.submodules
.add0
= self
771 m
.d
.comb
+= self
.i
.eq(i
)
773 def elaborate(self
, platform
):
775 m
.submodules
.add0_in_a
= self
.i
.a
776 m
.submodules
.add0_in_b
= self
.i
.b
777 m
.submodules
.add0_out_z
= self
.o
.z
779 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
780 m
.d
.comb
+= self
.o
.z
.e
.eq(self
.i
.a
.e
)
782 # store intermediate tests (and zero-extended mantissas)
783 seq
= Signal(reset_less
=True)
784 mge
= Signal(reset_less
=True)
785 am0
= Signal(len(self
.i
.a
.m
)+1, reset_less
=True)
786 bm0
= Signal(len(self
.i
.b
.m
)+1, reset_less
=True)
787 m
.d
.comb
+= [seq
.eq(self
.i
.a
.s
== self
.i
.b
.s
),
788 mge
.eq(self
.i
.a
.m
>= self
.i
.b
.m
),
789 am0
.eq(Cat(self
.i
.a
.m
, 0)),
790 bm0
.eq(Cat(self
.i
.b
.m
, 0))
792 # same-sign (both negative or both positive) add mantissas
795 self
.o
.tot
.eq(am0
+ bm0
),
796 self
.o
.z
.s
.eq(self
.i
.a
.s
)
798 # a mantissa greater than b, use a
801 self
.o
.tot
.eq(am0
- bm0
),
802 self
.o
.z
.s
.eq(self
.i
.a
.s
)
804 # b mantissa greater than a, use b
807 self
.o
.tot
.eq(bm0
- am0
),
808 self
.o
.z
.s
.eq(self
.i
.b
.s
)
813 class FPAddStage0(FPState
):
814 """ First stage of add. covers same-sign (add) and subtract
815 special-casing when mantissas are greater or equal, to
816 give greatest accuracy.
819 def __init__(self
, width
, id_wid
):
820 FPState
.__init
__(self
, "add_0")
821 self
.mod
= FPAddStage0Mod(width
)
822 self
.o
= self
.mod
.ospec()
824 def setup(self
, m
, i
):
825 """ links module to inputs and outputs
829 # NOTE: these could be done as combinatorial (merge add0+add1)
830 m
.d
.sync
+= self
.o
.eq(self
.mod
.o
)
836 class FPAddStage1Data
:
838 def __init__(self
, width
, id_wid
):
839 self
.z
= FPNumBase(width
, False)
841 self
.mid
= Signal(id_wid
, reset_less
=True)
844 return [self
.z
.eq(i
.z
), self
.of
.eq(i
.of
), self
.mid
.eq(i
.mid
)]
848 class FPAddStage1Mod(FPState
):
849 """ Second stage of add: preparation for normalisation.
850 detects when tot sum is too big (tot[27] is kinda a carry bit)
853 def __init__(self
, width
, id_wid
):
856 self
.i
= self
.ispec()
857 self
.o
= self
.ospec()
860 return FPAddStage0Data(self
.width
, self
.id_wid
)
863 return FPAddStage1Data(self
.width
, self
.id_wid
)
865 def setup(self
, m
, i
):
866 """ links module to inputs and outputs
868 m
.submodules
.add1
= self
869 m
.submodules
.add1_out_overflow
= self
.o
.of
871 m
.d
.comb
+= self
.i
.eq(i
)
873 def elaborate(self
, platform
):
875 #m.submodules.norm1_in_overflow = self.in_of
876 #m.submodules.norm1_out_overflow = self.out_of
877 #m.submodules.norm1_in_z = self.in_z
878 #m.submodules.norm1_out_z = self.out_z
879 m
.d
.comb
+= self
.o
.z
.eq(self
.i
.z
)
880 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
881 # tot[-1] (MSB) gets set when the sum overflows. shift result down
882 with m
.If(self
.i
.tot
[-1]):
884 self
.o
.z
.m
.eq(self
.i
.tot
[4:]),
885 self
.o
.of
.m0
.eq(self
.i
.tot
[4]),
886 self
.o
.of
.guard
.eq(self
.i
.tot
[3]),
887 self
.o
.of
.round_bit
.eq(self
.i
.tot
[2]),
888 self
.o
.of
.sticky
.eq(self
.i
.tot
[1] | self
.i
.tot
[0]),
889 self
.o
.z
.e
.eq(self
.i
.z
.e
+ 1)
891 # tot[-1] (MSB) zero case
894 self
.o
.z
.m
.eq(self
.i
.tot
[3:]),
895 self
.o
.of
.m0
.eq(self
.i
.tot
[3]),
896 self
.o
.of
.guard
.eq(self
.i
.tot
[2]),
897 self
.o
.of
.round_bit
.eq(self
.i
.tot
[1]),
898 self
.o
.of
.sticky
.eq(self
.i
.tot
[0])
903 class FPAddStage1(FPState
):
905 def __init__(self
, width
, id_wid
):
906 FPState
.__init
__(self
, "add_1")
907 self
.mod
= FPAddStage1Mod(width
)
908 self
.out_z
= FPNumBase(width
, False)
909 self
.out_of
= Overflow()
910 self
.norm_stb
= Signal()
912 def setup(self
, m
, i
):
913 """ links module to inputs and outputs
917 m
.d
.sync
+= self
.norm_stb
.eq(0) # sets to zero when not in add1 state
919 m
.d
.sync
+= self
.out_of
.eq(self
.mod
.out_of
)
920 m
.d
.sync
+= self
.out_z
.eq(self
.mod
.out_z
)
921 m
.d
.sync
+= self
.norm_stb
.eq(1)
924 m
.next
= "normalise_1"
927 class FPNormaliseModSingle
:
929 def __init__(self
, width
):
931 self
.in_z
= self
.ispec()
932 self
.out_z
= self
.ospec()
935 return FPNumBase(self
.width
, False)
938 return FPNumBase(self
.width
, False)
940 def setup(self
, m
, i
):
941 """ links module to inputs and outputs
943 m
.submodules
.normalise
= self
944 m
.d
.comb
+= self
.i
.eq(i
)
946 def elaborate(self
, platform
):
949 mwid
= self
.out_z
.m_width
+2
950 pe
= PriorityEncoder(mwid
)
951 m
.submodules
.norm_pe
= pe
953 m
.submodules
.norm1_out_z
= self
.out_z
954 m
.submodules
.norm1_in_z
= self
.in_z
956 in_z
= FPNumBase(self
.width
, False)
958 m
.submodules
.norm1_insel_z
= in_z
959 m
.submodules
.norm1_insel_overflow
= in_of
961 espec
= (len(in_z
.e
), True)
962 ediff_n126
= Signal(espec
, reset_less
=True)
963 msr
= MultiShiftRMerge(mwid
, espec
)
964 m
.submodules
.multishift_r
= msr
966 m
.d
.comb
+= in_z
.eq(self
.in_z
)
967 m
.d
.comb
+= in_of
.eq(self
.in_of
)
968 # initialise out from in (overridden below)
969 m
.d
.comb
+= self
.out_z
.eq(in_z
)
970 m
.d
.comb
+= self
.out_of
.eq(in_of
)
971 # normalisation decrease condition
972 decrease
= Signal(reset_less
=True)
973 m
.d
.comb
+= decrease
.eq(in_z
.m_msbzero
)
976 # *sigh* not entirely obvious: count leading zeros (clz)
977 # with a PriorityEncoder: to find from the MSB
978 # we reverse the order of the bits.
979 temp_m
= Signal(mwid
, reset_less
=True)
980 temp_s
= Signal(mwid
+1, reset_less
=True)
981 clz
= Signal((len(in_z
.e
), True), reset_less
=True)
983 # cat round and guard bits back into the mantissa
984 temp_m
.eq(Cat(in_of
.round_bit
, in_of
.guard
, in_z
.m
)),
985 pe
.i
.eq(temp_m
[::-1]), # inverted
986 clz
.eq(pe
.o
), # count zeros from MSB down
987 temp_s
.eq(temp_m
<< clz
), # shift mantissa UP
988 self
.out_z
.e
.eq(in_z
.e
- clz
), # DECREASE exponent
989 self
.out_z
.m
.eq(temp_s
[2:]), # exclude bits 0&1
996 def __init__(self
, width
, id_wid
):
997 self
.roundz
= Signal(reset_less
=True)
998 self
.z
= FPNumBase(width
, False)
999 self
.mid
= Signal(id_wid
, reset_less
=True)
1002 return [self
.z
.eq(i
.z
), self
.roundz
.eq(i
.roundz
), self
.mid
.eq(i
.mid
)]
1005 class FPNorm1ModSingle
:
1007 def __init__(self
, width
, id_wid
):
1009 self
.id_wid
= id_wid
1010 self
.i
= self
.ispec()
1011 self
.o
= self
.ospec()
1014 return FPAddStage1Data(self
.width
, self
.id_wid
)
1017 return FPNorm1Data(self
.width
, self
.id_wid
)
1019 def setup(self
, m
, i
):
1020 """ links module to inputs and outputs
1022 m
.submodules
.normalise_1
= self
1023 m
.d
.comb
+= self
.i
.eq(i
)
1025 def process(self
, i
):
1028 def elaborate(self
, platform
):
1031 mwid
= self
.o
.z
.m_width
+2
1032 pe
= PriorityEncoder(mwid
)
1033 m
.submodules
.norm_pe
= pe
1036 m
.d
.comb
+= self
.o
.roundz
.eq(of
.roundz
)
1038 m
.submodules
.norm1_out_z
= self
.o
.z
1039 m
.submodules
.norm1_out_overflow
= of
1040 m
.submodules
.norm1_in_z
= self
.i
.z
1041 m
.submodules
.norm1_in_overflow
= self
.i
.of
1044 m
.submodules
.norm1_insel_z
= i
.z
1045 m
.submodules
.norm1_insel_overflow
= i
.of
1047 espec
= (len(i
.z
.e
), True)
1048 ediff_n126
= Signal(espec
, reset_less
=True)
1049 msr
= MultiShiftRMerge(mwid
, espec
)
1050 m
.submodules
.multishift_r
= msr
1052 m
.d
.comb
+= i
.eq(self
.i
)
1053 # initialise out from in (overridden below)
1054 m
.d
.comb
+= self
.o
.z
.eq(i
.z
)
1055 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
1056 m
.d
.comb
+= of
.eq(i
.of
)
1057 # normalisation increase/decrease conditions
1058 decrease
= Signal(reset_less
=True)
1059 increase
= Signal(reset_less
=True)
1060 m
.d
.comb
+= decrease
.eq(i
.z
.m_msbzero
& i
.z
.exp_gt_n126
)
1061 m
.d
.comb
+= increase
.eq(i
.z
.exp_lt_n126
)
1063 with m
.If(decrease
):
1064 # *sigh* not entirely obvious: count leading zeros (clz)
1065 # with a PriorityEncoder: to find from the MSB
1066 # we reverse the order of the bits.
1067 temp_m
= Signal(mwid
, reset_less
=True)
1068 temp_s
= Signal(mwid
+1, reset_less
=True)
1069 clz
= Signal((len(i
.z
.e
), True), reset_less
=True)
1070 # make sure that the amount to decrease by does NOT
1071 # go below the minimum non-INF/NaN exponent
1072 limclz
= Mux(i
.z
.exp_sub_n126
> pe
.o
, pe
.o
,
1075 # cat round and guard bits back into the mantissa
1076 temp_m
.eq(Cat(i
.of
.round_bit
, i
.of
.guard
, i
.z
.m
)),
1077 pe
.i
.eq(temp_m
[::-1]), # inverted
1078 clz
.eq(limclz
), # count zeros from MSB down
1079 temp_s
.eq(temp_m
<< clz
), # shift mantissa UP
1080 self
.o
.z
.e
.eq(i
.z
.e
- clz
), # DECREASE exponent
1081 self
.o
.z
.m
.eq(temp_s
[2:]), # exclude bits 0&1
1082 of
.m0
.eq(temp_s
[2]), # copy of mantissa[0]
1083 # overflow in bits 0..1: got shifted too (leave sticky)
1084 of
.guard
.eq(temp_s
[1]), # guard
1085 of
.round_bit
.eq(temp_s
[0]), # round
1088 with m
.Elif(increase
):
1089 temp_m
= Signal(mwid
+1, reset_less
=True)
1091 temp_m
.eq(Cat(i
.of
.sticky
, i
.of
.round_bit
, i
.of
.guard
,
1093 ediff_n126
.eq(i
.z
.N126
- i
.z
.e
),
1094 # connect multi-shifter to inp/out mantissa (and ediff)
1096 msr
.diff
.eq(ediff_n126
),
1097 self
.o
.z
.m
.eq(msr
.m
[3:]),
1098 of
.m0
.eq(temp_s
[3]), # copy of mantissa[0]
1099 # overflow in bits 0..1: got shifted too (leave sticky)
1100 of
.guard
.eq(temp_s
[2]), # guard
1101 of
.round_bit
.eq(temp_s
[1]), # round
1102 of
.sticky
.eq(temp_s
[0]), # sticky
1103 self
.o
.z
.e
.eq(i
.z
.e
+ ediff_n126
),
1109 class FPNorm1ModMulti
:
1111 def __init__(self
, width
, single_cycle
=True):
1113 self
.in_select
= Signal(reset_less
=True)
1114 self
.in_z
= FPNumBase(width
, False)
1115 self
.in_of
= Overflow()
1116 self
.temp_z
= FPNumBase(width
, False)
1117 self
.temp_of
= Overflow()
1118 self
.out_z
= FPNumBase(width
, False)
1119 self
.out_of
= Overflow()
1121 def elaborate(self
, platform
):
1124 m
.submodules
.norm1_out_z
= self
.out_z
1125 m
.submodules
.norm1_out_overflow
= self
.out_of
1126 m
.submodules
.norm1_temp_z
= self
.temp_z
1127 m
.submodules
.norm1_temp_of
= self
.temp_of
1128 m
.submodules
.norm1_in_z
= self
.in_z
1129 m
.submodules
.norm1_in_overflow
= self
.in_of
1131 in_z
= FPNumBase(self
.width
, False)
1133 m
.submodules
.norm1_insel_z
= in_z
1134 m
.submodules
.norm1_insel_overflow
= in_of
1136 # select which of temp or in z/of to use
1137 with m
.If(self
.in_select
):
1138 m
.d
.comb
+= in_z
.eq(self
.in_z
)
1139 m
.d
.comb
+= in_of
.eq(self
.in_of
)
1141 m
.d
.comb
+= in_z
.eq(self
.temp_z
)
1142 m
.d
.comb
+= in_of
.eq(self
.temp_of
)
1143 # initialise out from in (overridden below)
1144 m
.d
.comb
+= self
.out_z
.eq(in_z
)
1145 m
.d
.comb
+= self
.out_of
.eq(in_of
)
1146 # normalisation increase/decrease conditions
1147 decrease
= Signal(reset_less
=True)
1148 increase
= Signal(reset_less
=True)
1149 m
.d
.comb
+= decrease
.eq(in_z
.m_msbzero
& in_z
.exp_gt_n126
)
1150 m
.d
.comb
+= increase
.eq(in_z
.exp_lt_n126
)
1151 m
.d
.comb
+= self
.out_norm
.eq(decrease | increase
) # loop-end
1153 with m
.If(decrease
):
1155 self
.out_z
.e
.eq(in_z
.e
- 1), # DECREASE exponent
1156 self
.out_z
.m
.eq(in_z
.m
<< 1), # shift mantissa UP
1157 self
.out_z
.m
[0].eq(in_of
.guard
), # steal guard (was tot[2])
1158 self
.out_of
.guard
.eq(in_of
.round_bit
), # round (was tot[1])
1159 self
.out_of
.round_bit
.eq(0), # reset round bit
1160 self
.out_of
.m0
.eq(in_of
.guard
),
1163 with m
.Elif(increase
):
1165 self
.out_z
.e
.eq(in_z
.e
+ 1), # INCREASE exponent
1166 self
.out_z
.m
.eq(in_z
.m
>> 1), # shift mantissa DOWN
1167 self
.out_of
.guard
.eq(in_z
.m
[0]),
1168 self
.out_of
.m0
.eq(in_z
.m
[1]),
1169 self
.out_of
.round_bit
.eq(in_of
.guard
),
1170 self
.out_of
.sticky
.eq(in_of
.sticky | in_of
.round_bit
)
1176 class FPNorm1Single(FPState
):
1178 def __init__(self
, width
, id_wid
, single_cycle
=True):
1179 FPState
.__init
__(self
, "normalise_1")
1180 self
.mod
= FPNorm1ModSingle(width
)
1181 self
.o
= self
.ospec()
1182 self
.out_z
= FPNumBase(width
, False)
1183 self
.out_roundz
= Signal(reset_less
=True)
1186 return self
.mod
.ispec()
1189 return self
.mod
.ospec()
1191 def setup(self
, m
, i
):
1192 """ links module to inputs and outputs
1194 self
.mod
.setup(m
, i
)
1196 def action(self
, m
):
1200 class FPNorm1Multi(FPState
):
1202 def __init__(self
, width
, id_wid
):
1203 FPState
.__init
__(self
, "normalise_1")
1204 self
.mod
= FPNorm1ModMulti(width
)
1205 self
.stb
= Signal(reset_less
=True)
1206 self
.ack
= Signal(reset
=0, reset_less
=True)
1207 self
.out_norm
= Signal(reset_less
=True)
1208 self
.in_accept
= Signal(reset_less
=True)
1209 self
.temp_z
= FPNumBase(width
)
1210 self
.temp_of
= Overflow()
1211 self
.out_z
= FPNumBase(width
)
1212 self
.out_roundz
= Signal(reset_less
=True)
1214 def setup(self
, m
, in_z
, in_of
, norm_stb
):
1215 """ links module to inputs and outputs
1217 self
.mod
.setup(m
, in_z
, in_of
, norm_stb
,
1218 self
.in_accept
, self
.temp_z
, self
.temp_of
,
1219 self
.out_z
, self
.out_norm
)
1221 m
.d
.comb
+= self
.stb
.eq(norm_stb
)
1222 m
.d
.sync
+= self
.ack
.eq(0) # sets to zero when not in normalise_1 state
1224 def action(self
, m
):
1225 m
.d
.comb
+= self
.in_accept
.eq((~self
.ack
) & (self
.stb
))
1226 m
.d
.sync
+= self
.temp_of
.eq(self
.mod
.out_of
)
1227 m
.d
.sync
+= self
.temp_z
.eq(self
.out_z
)
1228 with m
.If(self
.out_norm
):
1229 with m
.If(self
.in_accept
):
1234 m
.d
.sync
+= self
.ack
.eq(0)
1236 # normalisation not required (or done).
1238 m
.d
.sync
+= self
.ack
.eq(1)
1239 m
.d
.sync
+= self
.out_roundz
.eq(self
.mod
.out_of
.roundz
)
1242 class FPNormToPack(FPState
):
1244 def __init__(self
, width
, id_wid
):
1245 FPState
.__init
__(self
, "normalise_1")
1246 self
.id_wid
= id_wid
1250 return FPAddStage1Data(self
.width
, self
.id_wid
) # Norm1ModSingle ispec
1253 return FPPackData(self
.width
, self
.id_wid
) # FPPackMod ospec
1255 def setup(self
, m
, i
):
1256 """ links module to inputs and outputs
1259 # Normalisation, Rounding Corrections, Pack - in a chain
1260 nmod
= FPNorm1ModSingle(self
.width
, self
.id_wid
)
1261 rmod
= FPRoundMod(self
.width
, self
.id_wid
)
1262 cmod
= FPCorrectionsMod(self
.width
, self
.id_wid
)
1263 pmod
= FPPackMod(self
.width
, self
.id_wid
)
1264 chain
= StageChain([nmod
, rmod
, cmod
, pmod
])
1266 self
.out_z
= pmod
.ospec()
1268 m
.d
.sync
+= self
.out_z
.mid
.eq(pmod
.o
.mid
)
1269 m
.d
.sync
+= self
.out_z
.z
.v
.eq(pmod
.o
.z
.v
) # outputs packed result
1271 def action(self
, m
):
1272 m
.next
= "pack_put_z"
1277 def __init__(self
, width
, id_wid
):
1278 self
.z
= FPNumBase(width
, False)
1279 self
.mid
= Signal(id_wid
, reset_less
=True)
1282 return [self
.z
.eq(i
.z
), self
.mid
.eq(i
.mid
)]
1287 def __init__(self
, width
, id_wid
):
1289 self
.id_wid
= id_wid
1290 self
.i
= self
.ispec()
1291 self
.out_z
= self
.ospec()
1294 return FPNorm1Data(self
.width
, self
.id_wid
)
1297 return FPRoundData(self
.width
, self
.id_wid
)
1299 def process(self
, i
):
1302 def setup(self
, m
, i
):
1303 m
.submodules
.roundz
= self
1304 m
.d
.comb
+= self
.i
.eq(i
)
1306 def elaborate(self
, platform
):
1308 m
.d
.comb
+= self
.out_z
.eq(self
.i
)
1309 with m
.If(self
.i
.roundz
):
1310 m
.d
.comb
+= self
.out_z
.z
.m
.eq(self
.i
.z
.m
+ 1) # mantissa rounds up
1311 with m
.If(self
.i
.z
.m
== self
.i
.z
.m1s
): # all 1s
1312 m
.d
.comb
+= self
.out_z
.z
.e
.eq(self
.i
.z
.e
+ 1) # exponent up
1316 class FPRound(FPState
):
1318 def __init__(self
, width
, id_wid
):
1319 FPState
.__init
__(self
, "round")
1320 self
.mod
= FPRoundMod(width
)
1321 self
.out_z
= self
.ospec()
1324 return self
.mod
.ispec()
1327 return self
.mod
.ospec()
1329 def setup(self
, m
, i
):
1330 """ links module to inputs and outputs
1332 self
.mod
.setup(m
, i
)
1335 m
.d
.sync
+= self
.out_z
.eq(self
.mod
.out_z
)
1336 m
.d
.sync
+= self
.out_z
.mid
.eq(self
.mod
.o
.mid
)
1338 def action(self
, m
):
1339 m
.next
= "corrections"
1342 class FPCorrectionsMod
:
1344 def __init__(self
, width
, id_wid
):
1346 self
.id_wid
= id_wid
1347 self
.i
= self
.ispec()
1348 self
.out_z
= self
.ospec()
1351 return FPRoundData(self
.width
, self
.id_wid
)
1354 return FPRoundData(self
.width
, self
.id_wid
)
1356 def process(self
, i
):
1359 def setup(self
, m
, i
):
1360 """ links module to inputs and outputs
1362 m
.submodules
.corrections
= self
1363 m
.d
.comb
+= self
.i
.eq(i
)
1365 def elaborate(self
, platform
):
1367 m
.submodules
.corr_in_z
= self
.i
.z
1368 m
.submodules
.corr_out_z
= self
.out_z
.z
1369 m
.d
.comb
+= self
.out_z
.eq(self
.i
)
1370 with m
.If(self
.i
.z
.is_denormalised
):
1371 m
.d
.comb
+= self
.out_z
.z
.e
.eq(self
.i
.z
.N127
)
1375 class FPCorrections(FPState
):
1377 def __init__(self
, width
, id_wid
):
1378 FPState
.__init
__(self
, "corrections")
1379 self
.mod
= FPCorrectionsMod(width
)
1380 self
.out_z
= self
.ospec()
1383 return self
.mod
.ispec()
1386 return self
.mod
.ospec()
1388 def setup(self
, m
, in_z
):
1389 """ links module to inputs and outputs
1391 self
.mod
.setup(m
, in_z
)
1393 m
.d
.sync
+= self
.out_z
.eq(self
.mod
.out_z
)
1394 m
.d
.sync
+= self
.out_z
.mid
.eq(self
.mod
.o
.mid
)
1396 def action(self
, m
):
1402 def __init__(self
, width
, id_wid
):
1403 self
.z
= FPNumOut(width
, False)
1404 self
.mid
= Signal(id_wid
, reset_less
=True)
1407 return [self
.z
.eq(i
.z
), self
.mid
.eq(i
.mid
)]
1412 def __init__(self
, width
, id_wid
):
1414 self
.id_wid
= id_wid
1415 self
.i
= self
.ispec()
1416 self
.o
= self
.ospec()
1419 return FPRoundData(self
.width
, self
.id_wid
)
1422 return FPPackData(self
.width
, self
.id_wid
)
1424 def process(self
, i
):
1427 def setup(self
, m
, in_z
):
1428 """ links module to inputs and outputs
1430 m
.submodules
.pack
= self
1431 m
.d
.comb
+= self
.i
.eq(in_z
)
1433 def elaborate(self
, platform
):
1435 m
.submodules
.pack_in_z
= self
.i
.z
1436 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
1437 with m
.If(self
.i
.z
.is_overflowed
):
1438 m
.d
.comb
+= self
.o
.z
.inf(self
.i
.z
.s
)
1440 m
.d
.comb
+= self
.o
.z
.create(self
.i
.z
.s
, self
.i
.z
.e
, self
.i
.z
.m
)
1445 def __init__(self
, width
, id_wid
):
1446 self
.z
= FPNumOut(width
, False)
1447 self
.mid
= Signal(id_wid
, reset_less
=True)
1450 return [self
.z
.eq(i
.z
), self
.mid
.eq(i
.mid
)]
1453 class FPPack(FPState
):
1455 def __init__(self
, width
, id_wid
):
1456 FPState
.__init
__(self
, "pack")
1457 self
.mod
= FPPackMod(width
)
1458 self
.out_z
= self
.ospec()
1461 return self
.mod
.ispec()
1464 return self
.mod
.ospec()
1466 def setup(self
, m
, in_z
):
1467 """ links module to inputs and outputs
1469 self
.mod
.setup(m
, in_z
)
1471 m
.d
.sync
+= self
.out_z
.v
.eq(self
.mod
.out_z
.v
)
1472 m
.d
.sync
+= self
.out_z
.mid
.eq(self
.mod
.o
.mid
)
1474 def action(self
, m
):
1475 m
.next
= "pack_put_z"
1478 class FPPutZ(FPState
):
1480 def __init__(self
, state
, in_z
, out_z
, in_mid
, out_mid
, to_state
=None):
1481 FPState
.__init
__(self
, state
)
1482 if to_state
is None:
1483 to_state
= "get_ops"
1484 self
.to_state
= to_state
1487 self
.in_mid
= in_mid
1488 self
.out_mid
= out_mid
1490 def action(self
, m
):
1491 if self
.in_mid
is not None:
1492 m
.d
.sync
+= self
.out_mid
.eq(self
.in_mid
)
1494 self
.out_z
.z
.v
.eq(self
.in_z
.v
)
1496 with m
.If(self
.out_z
.z
.stb
& self
.out_z
.z
.ack
):
1497 m
.d
.sync
+= self
.out_z
.z
.stb
.eq(0)
1498 m
.next
= self
.to_state
1500 m
.d
.sync
+= self
.out_z
.z
.stb
.eq(1)
1503 class FPPutZIdx(FPState
):
1505 def __init__(self
, state
, in_z
, out_zs
, in_mid
, to_state
=None):
1506 FPState
.__init
__(self
, state
)
1507 if to_state
is None:
1508 to_state
= "get_ops"
1509 self
.to_state
= to_state
1511 self
.out_zs
= out_zs
1512 self
.in_mid
= in_mid
1514 def action(self
, m
):
1515 outz_stb
= Signal(reset_less
=True)
1516 outz_ack
= Signal(reset_less
=True)
1517 m
.d
.comb
+= [outz_stb
.eq(self
.out_zs
[self
.in_mid
].stb
),
1518 outz_ack
.eq(self
.out_zs
[self
.in_mid
].ack
),
1521 self
.out_zs
[self
.in_mid
].v
.eq(self
.in_z
.v
)
1523 with m
.If(outz_stb
& outz_ack
):
1524 m
.d
.sync
+= self
.out_zs
[self
.in_mid
].stb
.eq(0)
1525 m
.next
= self
.to_state
1527 m
.d
.sync
+= self
.out_zs
[self
.in_mid
].stb
.eq(1)
1529 class FPADDBaseData
:
1531 def __init__(self
, width
, id_wid
):
1533 self
.id_wid
= id_wid
1534 self
.a
= Signal(width
)
1535 self
.b
= Signal(width
)
1536 self
.mid
= Signal(id_wid
, reset_less
=True)
1539 return [self
.a
.eq(i
.a
), self
.b
.eq(i
.b
), self
.mid
.eq(i
.mid
)]
1543 def __init__(self
, width
, id_wid
):
1544 self
.z
= FPOp(width
)
1545 self
.mid
= Signal(id_wid
, reset_less
=True)
1548 return [self
.z
.eq(i
.z
), self
.mid
.eq(i
.mid
)]
1553 def __init__(self
, width
, id_wid
=None, single_cycle
=False, compact
=True):
1556 * width: bit-width of IEEE754. supported: 16, 32, 64
1557 * id_wid: an identifier that is sync-connected to the input
1558 * single_cycle: True indicates each stage to complete in 1 clock
1559 * compact: True indicates a reduced number of stages
1562 self
.id_wid
= id_wid
1563 self
.single_cycle
= single_cycle
1564 self
.compact
= compact
1566 self
.in_t
= Trigger()
1567 self
.i
= self
.ispec()
1568 self
.o
= self
.ospec()
1573 return FPADDBaseData(self
.width
, self
.id_wid
)
1576 return FPOpData(self
.width
, self
.id_wid
)
1578 def add_state(self
, state
):
1579 self
.states
.append(state
)
1582 def get_fragment(self
, platform
=None):
1583 """ creates the HDL code-fragment for FPAdd
1586 m
.submodules
.out_z
= self
.o
.z
1587 m
.submodules
.in_t
= self
.in_t
1589 self
.get_compact_fragment(m
, platform
)
1591 self
.get_longer_fragment(m
, platform
)
1593 with m
.FSM() as fsm
:
1595 for state
in self
.states
:
1596 with m
.State(state
.state_from
):
1601 def get_longer_fragment(self
, m
, platform
=None):
1603 get
= self
.add_state(FPGet2Op("get_ops", "special_cases",
1605 get
.setup(m
, self
.i
, self
.in_t
.stb
, self
.in_t
.ack
)
1609 sc
= self
.add_state(FPAddSpecialCases(self
.width
, self
.id_wid
))
1610 sc
.setup(m
, a
, b
, self
.in_mid
)
1612 dn
= self
.add_state(FPAddDeNorm(self
.width
, self
.id_wid
))
1613 dn
.setup(m
, a
, b
, sc
.in_mid
)
1615 if self
.single_cycle
:
1616 alm
= self
.add_state(FPAddAlignSingle(self
.width
, self
.id_wid
))
1617 alm
.setup(m
, dn
.out_a
, dn
.out_b
, dn
.in_mid
)
1619 alm
= self
.add_state(FPAddAlignMulti(self
.width
, self
.id_wid
))
1620 alm
.setup(m
, dn
.out_a
, dn
.out_b
, dn
.in_mid
)
1622 add0
= self
.add_state(FPAddStage0(self
.width
, self
.id_wid
))
1623 add0
.setup(m
, alm
.out_a
, alm
.out_b
, alm
.in_mid
)
1625 add1
= self
.add_state(FPAddStage1(self
.width
, self
.id_wid
))
1626 add1
.setup(m
, add0
.out_tot
, add0
.out_z
, add0
.in_mid
)
1628 if self
.single_cycle
:
1629 n1
= self
.add_state(FPNorm1Single(self
.width
, self
.id_wid
))
1630 n1
.setup(m
, add1
.out_z
, add1
.out_of
, add0
.in_mid
)
1632 n1
= self
.add_state(FPNorm1Multi(self
.width
, self
.id_wid
))
1633 n1
.setup(m
, add1
.out_z
, add1
.out_of
, add1
.norm_stb
, add0
.in_mid
)
1635 rn
= self
.add_state(FPRound(self
.width
, self
.id_wid
))
1636 rn
.setup(m
, n1
.out_z
, n1
.out_roundz
, n1
.in_mid
)
1638 cor
= self
.add_state(FPCorrections(self
.width
, self
.id_wid
))
1639 cor
.setup(m
, rn
.out_z
, rn
.in_mid
)
1641 pa
= self
.add_state(FPPack(self
.width
, self
.id_wid
))
1642 pa
.setup(m
, cor
.out_z
, rn
.in_mid
)
1644 ppz
= self
.add_state(FPPutZ("pack_put_z", pa
.out_z
, self
.out_z
,
1645 pa
.in_mid
, self
.out_mid
))
1647 pz
= self
.add_state(FPPutZ("put_z", sc
.out_z
, self
.out_z
,
1648 pa
.in_mid
, self
.out_mid
))
1650 def get_compact_fragment(self
, m
, platform
=None):
1652 get
= self
.add_state(FPGet2Op("get_ops", "special_cases",
1653 self
.width
, self
.id_wid
))
1654 get
.setup(m
, self
.i
, self
.in_t
.stb
, self
.in_t
.ack
)
1656 sc
= self
.add_state(FPAddSpecialCasesDeNorm(self
.width
, self
.id_wid
))
1659 alm
= self
.add_state(FPAddAlignSingleAdd(self
.width
, self
.id_wid
))
1662 n1
= self
.add_state(FPNormToPack(self
.width
, self
.id_wid
))
1663 n1
.setup(m
, alm
.a1o
)
1665 ppz
= self
.add_state(FPPutZ("pack_put_z", n1
.out_z
.z
, self
.o
,
1666 n1
.out_z
.mid
, self
.o
.mid
))
1668 pz
= self
.add_state(FPPutZ("put_z", sc
.out_z
.z
, self
.o
,
1669 sc
.o
.mid
, self
.o
.mid
))
1672 class FPADDBase(FPState
):
1674 def __init__(self
, width
, id_wid
=None, single_cycle
=False):
1677 * width: bit-width of IEEE754. supported: 16, 32, 64
1678 * id_wid: an identifier that is sync-connected to the input
1679 * single_cycle: True indicates each stage to complete in 1 clock
1681 FPState
.__init
__(self
, "fpadd")
1683 self
.single_cycle
= single_cycle
1684 self
.mod
= FPADDBaseMod(width
, id_wid
, single_cycle
)
1685 self
.o
= self
.ospec()
1687 self
.in_t
= Trigger()
1688 self
.i
= self
.ispec()
1690 self
.z_done
= Signal(reset_less
=True) # connects to out_z Strobe
1691 self
.in_accept
= Signal(reset_less
=True)
1692 self
.add_stb
= Signal(reset_less
=True)
1693 self
.add_ack
= Signal(reset
=0, reset_less
=True)
1696 return self
.mod
.ispec()
1699 return self
.mod
.ospec()
1701 def setup(self
, m
, i
, add_stb
, in_mid
):
1702 m
.d
.comb
+= [self
.i
.eq(i
),
1703 self
.mod
.i
.eq(self
.i
),
1704 self
.z_done
.eq(self
.mod
.o
.z
.trigger
),
1705 #self.add_stb.eq(add_stb),
1706 self
.mod
.in_t
.stb
.eq(self
.in_t
.stb
),
1707 self
.in_t
.ack
.eq(self
.mod
.in_t
.ack
),
1708 self
.o
.mid
.eq(self
.mod
.o
.mid
),
1709 self
.o
.z
.v
.eq(self
.mod
.o
.z
.v
),
1710 self
.o
.z
.stb
.eq(self
.mod
.o
.z
.stb
),
1711 self
.mod
.o
.z
.ack
.eq(self
.o
.z
.ack
),
1714 m
.d
.sync
+= self
.add_stb
.eq(add_stb
)
1715 m
.d
.sync
+= self
.add_ack
.eq(0) # sets to zero when not in active state
1716 m
.d
.sync
+= self
.o
.z
.ack
.eq(0) # likewise
1717 #m.d.sync += self.in_t.stb.eq(0)
1719 m
.submodules
.fpadd
= self
.mod
1721 def action(self
, m
):
1723 # in_accept is set on incoming strobe HIGH and ack LOW.
1724 m
.d
.comb
+= self
.in_accept
.eq((~self
.add_ack
) & (self
.add_stb
))
1726 #with m.If(self.in_t.ack):
1727 # m.d.sync += self.in_t.stb.eq(0)
1728 with m
.If(~self
.z_done
):
1729 # not done: test for accepting an incoming operand pair
1730 with m
.If(self
.in_accept
):
1732 self
.add_ack
.eq(1), # acknowledge receipt...
1733 self
.in_t
.stb
.eq(1), # initiate add
1736 m
.d
.sync
+= [self
.add_ack
.eq(0),
1737 self
.in_t
.stb
.eq(0),
1741 # done: acknowledge, and write out id and value
1742 m
.d
.sync
+= [self
.add_ack
.eq(1),
1749 if self
.in_mid
is not None:
1750 m
.d
.sync
+= self
.out_mid
.eq(self
.mod
.out_mid
)
1753 self
.out_z
.v
.eq(self
.mod
.out_z
.v
)
1755 # move to output state on detecting z ack
1756 with m
.If(self
.out_z
.trigger
):
1757 m
.d
.sync
+= self
.out_z
.stb
.eq(0)
1760 m
.d
.sync
+= self
.out_z
.stb
.eq(1)
1764 def __init__(self
, width
, id_wid
):
1766 self
.id_wid
= id_wid
1768 for i
in range(rs_sz
):
1770 out_z
.name
= "out_z_%d" % i
1772 self
.res
= Array(res
)
1773 self
.in_z
= FPOp(width
)
1774 self
.in_mid
= Signal(self
.id_wid
, reset_less
=True)
1776 def setup(self
, m
, in_z
, in_mid
):
1777 m
.d
.comb
+= [self
.in_z
.eq(in_z
),
1778 self
.in_mid
.eq(in_mid
)]
1780 def get_fragment(self
, platform
=None):
1781 """ creates the HDL code-fragment for FPAdd
1784 m
.submodules
.res_in_z
= self
.in_z
1785 m
.submodules
+= self
.res
1797 """ FPADD: stages as follows:
1803 FPAddBase---> FPAddBaseMod
1805 PutZ GetOps->Specials->Align->Add1/2->Norm->Round/Pack->PutZ
1807 FPAddBase is tricky: it is both a stage and *has* stages.
1808 Connection to FPAddBaseMod therefore requires an in stb/ack
1809 and an out stb/ack. Just as with Add1-Norm1 interaction, FPGetOp
1810 needs to be the thing that raises the incoming stb.
1813 def __init__(self
, width
, id_wid
=None, single_cycle
=False, rs_sz
=2):
1816 * width: bit-width of IEEE754. supported: 16, 32, 64
1817 * id_wid: an identifier that is sync-connected to the input
1818 * single_cycle: True indicates each stage to complete in 1 clock
1821 self
.id_wid
= id_wid
1822 self
.single_cycle
= single_cycle
1824 #self.out_z = FPOp(width)
1825 self
.ids
= FPID(id_wid
)
1828 for i
in range(rs_sz
):
1831 in_a
.name
= "in_a_%d" % i
1832 in_b
.name
= "in_b_%d" % i
1833 rs
.append((in_a
, in_b
))
1837 for i
in range(rs_sz
):
1839 out_z
.name
= "out_z_%d" % i
1841 self
.res
= Array(res
)
1845 def add_state(self
, state
):
1846 self
.states
.append(state
)
1849 def get_fragment(self
, platform
=None):
1850 """ creates the HDL code-fragment for FPAdd
1853 m
.submodules
+= self
.rs
1855 in_a
= self
.rs
[0][0]
1856 in_b
= self
.rs
[0][1]
1858 geta
= self
.add_state(FPGetOp("get_a", "get_b",
1863 getb
= self
.add_state(FPGetOp("get_b", "fpadd",
1868 ab
= FPADDBase(self
.width
, self
.id_wid
, self
.single_cycle
)
1869 ab
= self
.add_state(ab
)
1870 abd
= ab
.ispec() # create an input spec object for FPADDBase
1871 m
.d
.sync
+= [abd
.a
.eq(a
), abd
.b
.eq(b
), abd
.mid
.eq(self
.ids
.in_mid
)]
1872 ab
.setup(m
, abd
, getb
.out_decode
, self
.ids
.in_mid
)
1875 pz
= self
.add_state(FPPutZIdx("put_z", o
.z
, self
.res
,
1878 with m
.FSM() as fsm
:
1880 for state
in self
.states
:
1881 with m
.State(state
.state_from
):
1887 if __name__
== "__main__":
1889 alu
= FPADD(width
=32, id_wid
=5, single_cycle
=True)
1890 main(alu
, ports
=alu
.rs
[0][0].ports() + \
1891 alu
.rs
[0][1].ports() + \
1892 alu
.res
[0].ports() + \
1893 [alu
.ids
.in_mid
, alu
.ids
.out_mid
])
1895 alu
= FPADDBase(width
=32, id_wid
=5, single_cycle
=True)
1896 main(alu
, ports
=[alu
.in_a
, alu
.in_b
] + \
1897 alu
.in_t
.ports() + \
1898 alu
.out_z
.ports() + \
1899 [alu
.in_mid
, alu
.out_mid
])
1902 # works... but don't use, just do "python fname.py convert -t v"
1903 #print (verilog.convert(alu, ports=[
1904 # ports=alu.in_a.ports() + \
1905 # alu.in_b.ports() + \
1906 # alu.out_z.ports())