1 # IEEE Floating Point Adder (Single Precision)
2 # Copyright (C) Jonathan P Dawson 2013
5 from nmigen
import Module
, Signal
, Cat
, Mux
, Array
, Const
6 from nmigen
.lib
.coding
import PriorityEncoder
7 from nmigen
.cli
import main
, verilog
10 from fpbase
import FPNumIn
, FPNumOut
, FPOp
, Overflow
, FPBase
, FPNumBase
11 from fpbase
import MultiShiftRMerge
, Trigger
12 #from fpbase import FPNumShiftMultiRight
15 class FPState(FPBase
):
16 def __init__(self
, state_from
):
17 self
.state_from
= state_from
19 def set_inputs(self
, inputs
):
21 for k
,v
in inputs
.items():
24 def set_outputs(self
, outputs
):
25 self
.outputs
= outputs
26 for k
,v
in outputs
.items():
30 class FPGetSyncOpsMod
:
31 def __init__(self
, width
, num_ops
=2):
33 self
.num_ops
= num_ops
36 for i
in range(num_ops
):
37 inops
.append(Signal(width
, reset_less
=True))
38 outops
.append(Signal(width
, reset_less
=True))
41 self
.stb
= Signal(num_ops
)
43 self
.ready
= Signal(reset_less
=True)
44 self
.out_decode
= Signal(reset_less
=True)
46 def elaborate(self
, platform
):
48 m
.d
.comb
+= self
.ready
.eq(self
.stb
== Const(-1, (self
.num_ops
, False)))
49 m
.d
.comb
+= self
.out_decode
.eq(self
.ack
& self
.ready
)
50 with m
.If(self
.out_decode
):
51 for i
in range(self
.num_ops
):
53 self
.out_op
[i
].eq(self
.in_op
[i
]),
58 return self
.in_op
+ self
.out_op
+ [self
.stb
, self
.ack
]
62 def __init__(self
, width
, num_ops
):
63 Trigger
.__init
__(self
)
65 self
.num_ops
= num_ops
68 for i
in range(num_ops
):
69 res
.append(Signal(width
))
74 for i
in range(self
.num_ops
):
82 def __init__(self
, width
, num_ops
=2, num_rows
=4):
84 self
.num_ops
= num_ops
85 self
.num_rows
= num_rows
86 self
.mmax
= int(log(self
.num_rows
) / log(2))
88 self
.mid
= Signal(self
.mmax
, reset_less
=True) # multiplex id
89 for i
in range(num_rows
):
90 self
.rs
.append(FPGetSyncOpsMod(width
, num_ops
))
91 self
.rs
= Array(self
.rs
)
93 self
.out_op
= FPOps(width
, num_ops
)
95 def elaborate(self
, platform
):
98 pe
= PriorityEncoder(self
.num_rows
)
99 m
.submodules
.selector
= pe
100 m
.submodules
.out_op
= self
.out_op
101 m
.submodules
+= self
.rs
103 # connect priority encoder
105 for i
in range(self
.num_rows
):
106 in_ready
.append(self
.rs
[i
].ready
)
107 m
.d
.comb
+= pe
.i
.eq(Cat(*in_ready
))
109 active
= Signal(reset_less
=True)
110 out_en
= Signal(reset_less
=True)
111 m
.d
.comb
+= active
.eq(~pe
.n
) # encoder active
112 m
.d
.comb
+= out_en
.eq(active
& self
.out_op
.trigger
)
114 # encoder active: ack relevant input, record MID, pass output
117 m
.d
.sync
+= self
.mid
.eq(pe
.o
)
118 m
.d
.sync
+= rs
.ack
.eq(0)
119 m
.d
.sync
+= self
.out_op
.stb
.eq(0)
120 for j
in range(self
.num_ops
):
121 m
.d
.sync
+= self
.out_op
.v
[j
].eq(rs
.out_op
[j
])
123 m
.d
.sync
+= self
.out_op
.stb
.eq(1)
124 # acks all default to zero
125 for i
in range(self
.num_rows
):
126 m
.d
.sync
+= self
.rs
[i
].ack
.eq(1)
132 for i
in range(self
.num_rows
):
134 res
+= inop
.in_op
+ [inop
.stb
]
135 return self
.out_op
.ports() + res
+ [self
.mid
]
139 def __init__(self
, width
):
140 self
.in_op
= FPOp(width
)
141 self
.out_op
= Signal(width
)
142 self
.out_decode
= Signal(reset_less
=True)
144 def elaborate(self
, platform
):
146 m
.d
.comb
+= self
.out_decode
.eq((self
.in_op
.ack
) & (self
.in_op
.stb
))
147 m
.submodules
.get_op_in
= self
.in_op
148 #m.submodules.get_op_out = self.out_op
149 with m
.If(self
.out_decode
):
151 self
.out_op
.eq(self
.in_op
.v
),
156 class FPGetOp(FPState
):
160 def __init__(self
, in_state
, out_state
, in_op
, width
):
161 FPState
.__init
__(self
, in_state
)
162 self
.out_state
= out_state
163 self
.mod
= FPGetOpMod(width
)
165 self
.out_op
= Signal(width
)
166 self
.out_decode
= Signal(reset_less
=True)
168 def setup(self
, m
, in_op
):
169 """ links module to inputs and outputs
171 setattr(m
.submodules
, self
.state_from
, self
.mod
)
172 m
.d
.comb
+= self
.mod
.in_op
.eq(in_op
)
173 m
.d
.comb
+= self
.out_decode
.eq(self
.mod
.out_decode
)
176 with m
.If(self
.out_decode
):
177 m
.next
= self
.out_state
179 self
.in_op
.ack
.eq(0),
180 self
.out_op
.eq(self
.mod
.out_op
)
183 m
.d
.sync
+= self
.in_op
.ack
.eq(1)
186 class FPGet2OpMod(Trigger
):
187 def __init__(self
, width
, id_wid
):
188 Trigger
.__init
__(self
)
191 self
.i
= self
.ispec()
192 self
.o
= self
.ospec()
195 return FPADDBaseData(self
.width
, self
.id_wid
)
198 return FPNumBase2Ops(self
.width
, self
.id_wid
)
200 def elaborate(self
, platform
):
201 m
= Trigger
.elaborate(self
, platform
)
202 m
.submodules
.get_op1_out
= self
.o
.a
203 m
.submodules
.get_op2_out
= self
.o
.b
204 out_op1
= FPNumIn(None, self
.width
)
205 out_op2
= FPNumIn(None, self
.width
)
206 with m
.If(self
.trigger
):
208 out_op1
.decode(self
.i
.a
),
209 out_op2
.decode(self
.i
.b
),
210 self
.o
.a
.eq(out_op1
),
211 self
.o
.b
.eq(out_op2
),
216 class FPGet2Op(FPState
):
220 def __init__(self
, in_state
, out_state
, width
, id_wid
):
221 FPState
.__init
__(self
, in_state
)
222 self
.out_state
= out_state
223 self
.mod
= FPGet2OpMod(width
, id_wid
)
224 self
.o
= self
.mod
.ospec()
225 self
.in_stb
= Signal(reset_less
=True)
226 self
.out_ack
= Signal(reset_less
=True)
227 self
.out_decode
= Signal(reset_less
=True)
229 def setup(self
, m
, i
, in_stb
, in_ack
):
230 """ links module to inputs and outputs
232 m
.submodules
.get_ops
= self
.mod
233 m
.d
.comb
+= self
.mod
.i
.eq(i
)
234 m
.d
.comb
+= self
.mod
.stb
.eq(in_stb
)
235 m
.d
.comb
+= self
.out_ack
.eq(self
.mod
.ack
)
236 m
.d
.comb
+= self
.out_decode
.eq(self
.mod
.trigger
)
237 m
.d
.comb
+= in_ack
.eq(self
.mod
.ack
)
240 with m
.If(self
.out_decode
):
241 m
.next
= self
.out_state
244 self
.o
.eq(self
.mod
.o
),
247 m
.d
.sync
+= self
.mod
.ack
.eq(1)
252 def __init__(self
, width
, id_wid
, m_extra
=True):
253 self
.a
= FPNumBase(width
, m_extra
)
254 self
.b
= FPNumBase(width
, m_extra
)
255 self
.mid
= Signal(id_wid
, reset_less
=True)
258 return [self
.a
.eq(i
.a
), self
.b
.eq(i
.b
), self
.mid
.eq(i
.mid
)]
261 class FPAddSpecialCasesMod
:
262 """ special cases: NaNs, infs, zeros, denormalised
263 NOTE: some of these are unique to add. see "Special Operations"
264 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
267 def __init__(self
, width
, id_wid
):
270 self
.i
= self
.ispec()
271 self
.o
= self
.ospec()
272 self
.out_do_z
= Signal(reset_less
=True)
275 return FPNumBase2Ops(self
.width
, self
.id_wid
)
278 return FPPackData(self
.width
, self
.id_wid
)
280 def setup(self
, m
, i
, out_do_z
):
281 """ links module to inputs and outputs
283 m
.submodules
.specialcases
= self
284 m
.d
.comb
+= self
.i
.eq(i
)
285 m
.d
.comb
+= out_do_z
.eq(self
.out_do_z
)
287 def elaborate(self
, platform
):
290 m
.submodules
.sc_in_a
= self
.i
.a
291 m
.submodules
.sc_in_b
= self
.i
.b
292 m
.submodules
.sc_out_z
= self
.o
.z
295 m
.d
.comb
+= s_nomatch
.eq(self
.i
.a
.s
!= self
.i
.b
.s
)
298 m
.d
.comb
+= m_match
.eq(self
.i
.a
.m
== self
.i
.b
.m
)
300 # if a is NaN or b is NaN return NaN
301 with m
.If(self
.i
.a
.is_nan | self
.i
.b
.is_nan
):
302 m
.d
.comb
+= self
.out_do_z
.eq(1)
303 m
.d
.comb
+= self
.o
.z
.nan(0)
305 # XXX WEIRDNESS for FP16 non-canonical NaN handling
308 ## if a is zero and b is NaN return -b
309 #with m.If(a.is_zero & (a.s==0) & b.is_nan):
310 # m.d.comb += self.out_do_z.eq(1)
311 # m.d.comb += z.create(b.s, b.e, Cat(b.m[3:-2], ~b.m[0]))
313 ## if b is zero and a is NaN return -a
314 #with m.Elif(b.is_zero & (b.s==0) & a.is_nan):
315 # m.d.comb += self.out_do_z.eq(1)
316 # m.d.comb += z.create(a.s, a.e, Cat(a.m[3:-2], ~a.m[0]))
318 ## if a is -zero and b is NaN return -b
319 #with m.Elif(a.is_zero & (a.s==1) & b.is_nan):
320 # m.d.comb += self.out_do_z.eq(1)
321 # m.d.comb += z.create(a.s & b.s, b.e, Cat(b.m[3:-2], 1))
323 ## if b is -zero and a is NaN return -a
324 #with m.Elif(b.is_zero & (b.s==1) & a.is_nan):
325 # m.d.comb += self.out_do_z.eq(1)
326 # m.d.comb += z.create(a.s & b.s, a.e, Cat(a.m[3:-2], 1))
328 # if a is inf return inf (or NaN)
329 with m
.Elif(self
.i
.a
.is_inf
):
330 m
.d
.comb
+= self
.out_do_z
.eq(1)
331 m
.d
.comb
+= self
.o
.z
.inf(self
.i
.a
.s
)
332 # if a is inf and signs don't match return NaN
333 with m
.If(self
.i
.b
.exp_128
& s_nomatch
):
334 m
.d
.comb
+= self
.o
.z
.nan(0)
336 # if b is inf return inf
337 with m
.Elif(self
.i
.b
.is_inf
):
338 m
.d
.comb
+= self
.out_do_z
.eq(1)
339 m
.d
.comb
+= self
.o
.z
.inf(self
.i
.b
.s
)
341 # if a is zero and b zero return signed-a/b
342 with m
.Elif(self
.i
.a
.is_zero
& self
.i
.b
.is_zero
):
343 m
.d
.comb
+= self
.out_do_z
.eq(1)
344 m
.d
.comb
+= self
.o
.z
.create(self
.i
.a
.s
& self
.i
.b
.s
,
348 # if a is zero return b
349 with m
.Elif(self
.i
.a
.is_zero
):
350 m
.d
.comb
+= self
.out_do_z
.eq(1)
351 m
.d
.comb
+= self
.o
.z
.create(self
.i
.b
.s
, self
.i
.b
.e
,
354 # if b is zero return a
355 with m
.Elif(self
.i
.b
.is_zero
):
356 m
.d
.comb
+= self
.out_do_z
.eq(1)
357 m
.d
.comb
+= self
.o
.z
.create(self
.i
.a
.s
, self
.i
.a
.e
,
360 # if a equal to -b return zero (+ve zero)
361 with m
.Elif(s_nomatch
& m_match
& (self
.i
.a
.e
== self
.i
.b
.e
)):
362 m
.d
.comb
+= self
.out_do_z
.eq(1)
363 m
.d
.comb
+= self
.o
.z
.zero(0)
365 # Denormalised Number checks
367 m
.d
.comb
+= self
.out_do_z
.eq(0)
373 def __init__(self
, id_wid
):
376 self
.in_mid
= Signal(id_wid
, reset_less
=True)
377 self
.out_mid
= Signal(id_wid
, reset_less
=True)
383 if self
.id_wid
is not None:
384 m
.d
.sync
+= self
.out_mid
.eq(self
.in_mid
)
387 class FPAddSpecialCases(FPState
, FPID
):
388 """ special cases: NaNs, infs, zeros, denormalised
389 NOTE: some of these are unique to add. see "Special Operations"
390 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
393 def __init__(self
, width
, id_wid
):
394 FPState
.__init
__(self
, "special_cases")
395 FPID
.__init
__(self
, id_wid
)
396 self
.mod
= FPAddSpecialCasesMod(width
)
397 self
.out_z
= self
.mod
.ospec()
398 self
.out_do_z
= Signal(reset_less
=True)
400 def setup(self
, m
, in_a
, in_b
, in_mid
):
401 """ links module to inputs and outputs
403 self
.mod
.setup(m
, in_a
, in_b
, self
.out_do_z
)
404 if self
.in_mid
is not None:
405 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
409 with m
.If(self
.out_do_z
):
410 m
.d
.sync
+= self
.out_z
.v
.eq(self
.mod
.out_z
.v
) # only take the output
413 m
.next
= "denormalise"
416 class FPAddSpecialCasesDeNorm(FPState
, FPID
):
417 """ special cases: NaNs, infs, zeros, denormalised
418 NOTE: some of these are unique to add. see "Special Operations"
419 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
422 def __init__(self
, width
, id_wid
):
423 FPState
.__init
__(self
, "special_cases")
424 FPID
.__init
__(self
, id_wid
)
425 self
.smod
= FPAddSpecialCasesMod(width
, id_wid
)
426 self
.out_z
= self
.smod
.ospec()
427 self
.out_do_z
= Signal(reset_less
=True)
429 self
.dmod
= FPAddDeNormMod(width
, id_wid
)
430 self
.o
= self
.dmod
.ospec()
432 def setup(self
, m
, i
, in_mid
):
433 """ links module to inputs and outputs
435 self
.smod
.setup(m
, i
, self
.out_do_z
)
436 self
.dmod
.setup(m
, i
)
437 if self
.in_mid
is not None:
438 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
442 with m
.If(self
.out_do_z
):
443 m
.d
.sync
+= self
.out_z
.z
.v
.eq(self
.smod
.o
.z
.v
) # only take output
447 m
.d
.sync
+= self
.o
.a
.eq(self
.dmod
.o
.a
)
448 m
.d
.sync
+= self
.o
.b
.eq(self
.dmod
.o
.b
)
451 class FPAddDeNormMod(FPState
):
453 def __init__(self
, width
, id_wid
):
456 self
.i
= self
.ispec()
457 self
.o
= self
.ospec()
460 return FPNumBase2Ops(self
.width
, self
.id_wid
)
463 return FPNumBase2Ops(self
.width
, self
.id_wid
)
465 def setup(self
, m
, i
):
466 """ links module to inputs and outputs
468 m
.submodules
.denormalise
= self
469 m
.d
.comb
+= self
.i
.eq(i
)
471 def elaborate(self
, platform
):
473 m
.submodules
.denorm_in_a
= self
.i
.a
474 m
.submodules
.denorm_in_b
= self
.i
.b
475 m
.submodules
.denorm_out_a
= self
.o
.a
476 m
.submodules
.denorm_out_b
= self
.o
.b
477 # hmmm, don't like repeating identical code
478 m
.d
.comb
+= self
.o
.a
.eq(self
.i
.a
)
479 with m
.If(self
.i
.a
.exp_n127
):
480 m
.d
.comb
+= self
.o
.a
.e
.eq(self
.i
.a
.N126
) # limit a exponent
482 m
.d
.comb
+= self
.o
.a
.m
[-1].eq(1) # set top mantissa bit
484 m
.d
.comb
+= self
.o
.b
.eq(self
.i
.b
)
485 with m
.If(self
.i
.b
.exp_n127
):
486 m
.d
.comb
+= self
.o
.b
.e
.eq(self
.i
.b
.N126
) # limit a exponent
488 m
.d
.comb
+= self
.o
.b
.m
[-1].eq(1) # set top mantissa bit
493 class FPAddDeNorm(FPState
, FPID
):
495 def __init__(self
, width
, id_wid
):
496 FPState
.__init
__(self
, "denormalise")
497 FPID
.__init
__(self
, id_wid
)
498 self
.mod
= FPAddDeNormMod(width
)
499 self
.out_a
= FPNumBase(width
)
500 self
.out_b
= FPNumBase(width
)
502 def setup(self
, m
, in_a
, in_b
, in_mid
):
503 """ links module to inputs and outputs
505 self
.mod
.setup(m
, in_a
, in_b
)
506 if self
.in_mid
is not None:
507 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
511 # Denormalised Number checks
513 m
.d
.sync
+= self
.out_a
.eq(self
.mod
.out_a
)
514 m
.d
.sync
+= self
.out_b
.eq(self
.mod
.out_b
)
517 class FPAddAlignMultiMod(FPState
):
519 def __init__(self
, width
):
520 self
.in_a
= FPNumBase(width
)
521 self
.in_b
= FPNumBase(width
)
522 self
.out_a
= FPNumIn(None, width
)
523 self
.out_b
= FPNumIn(None, width
)
524 self
.exp_eq
= Signal(reset_less
=True)
526 def elaborate(self
, platform
):
527 # This one however (single-cycle) will do the shift
532 m
.submodules
.align_in_a
= self
.in_a
533 m
.submodules
.align_in_b
= self
.in_b
534 m
.submodules
.align_out_a
= self
.out_a
535 m
.submodules
.align_out_b
= self
.out_b
537 # NOTE: this does *not* do single-cycle multi-shifting,
538 # it *STAYS* in the align state until exponents match
540 # exponent of a greater than b: shift b down
541 m
.d
.comb
+= self
.exp_eq
.eq(0)
542 m
.d
.comb
+= self
.out_a
.eq(self
.in_a
)
543 m
.d
.comb
+= self
.out_b
.eq(self
.in_b
)
544 agtb
= Signal(reset_less
=True)
545 altb
= Signal(reset_less
=True)
546 m
.d
.comb
+= agtb
.eq(self
.in_a
.e
> self
.in_b
.e
)
547 m
.d
.comb
+= altb
.eq(self
.in_a
.e
< self
.in_b
.e
)
549 m
.d
.comb
+= self
.out_b
.shift_down(self
.in_b
)
550 # exponent of b greater than a: shift a down
552 m
.d
.comb
+= self
.out_a
.shift_down(self
.in_a
)
553 # exponents equal: move to next stage.
555 m
.d
.comb
+= self
.exp_eq
.eq(1)
559 class FPAddAlignMulti(FPState
, FPID
):
561 def __init__(self
, width
, id_wid
):
562 FPID
.__init
__(self
, id_wid
)
563 FPState
.__init
__(self
, "align")
564 self
.mod
= FPAddAlignMultiMod(width
)
565 self
.out_a
= FPNumIn(None, width
)
566 self
.out_b
= FPNumIn(None, width
)
567 self
.exp_eq
= Signal(reset_less
=True)
569 def setup(self
, m
, in_a
, in_b
, in_mid
):
570 """ links module to inputs and outputs
572 m
.submodules
.align
= self
.mod
573 m
.d
.comb
+= self
.mod
.in_a
.eq(in_a
)
574 m
.d
.comb
+= self
.mod
.in_b
.eq(in_b
)
575 #m.d.comb += self.out_a.eq(self.mod.out_a)
576 #m.d.comb += self.out_b.eq(self.mod.out_b)
577 m
.d
.comb
+= self
.exp_eq
.eq(self
.mod
.exp_eq
)
578 if self
.in_mid
is not None:
579 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
583 m
.d
.sync
+= self
.out_a
.eq(self
.mod
.out_a
)
584 m
.d
.sync
+= self
.out_b
.eq(self
.mod
.out_b
)
585 with m
.If(self
.exp_eq
):
591 def __init__(self
, width
, id_wid
):
592 self
.a
= FPNumIn(None, width
)
593 self
.b
= FPNumIn(None, width
)
594 self
.mid
= Signal(id_wid
, reset_less
=True)
597 return [self
.a
.eq(i
.a
), self
.b
.eq(i
.b
), self
.mid
.eq(i
.mid
)]
600 class FPAddAlignSingleMod
:
602 def __init__(self
, width
, id_wid
):
605 self
.i
= self
.ispec()
606 self
.o
= self
.ospec()
609 return FPNumBase2Ops(self
.width
, self
.id_wid
)
612 return FPNumIn2Ops(self
.width
, self
.id_wid
)
614 def setup(self
, m
, i
):
615 """ links module to inputs and outputs
617 m
.submodules
.align
= self
618 m
.d
.comb
+= self
.i
.eq(i
)
620 def elaborate(self
, platform
):
621 """ Aligns A against B or B against A, depending on which has the
622 greater exponent. This is done in a *single* cycle using
623 variable-width bit-shift
625 the shifter used here is quite expensive in terms of gates.
626 Mux A or B in (and out) into temporaries, as only one of them
627 needs to be aligned against the other
631 m
.submodules
.align_in_a
= self
.i
.a
632 m
.submodules
.align_in_b
= self
.i
.b
633 m
.submodules
.align_out_a
= self
.o
.a
634 m
.submodules
.align_out_b
= self
.o
.b
636 # temporary (muxed) input and output to be shifted
637 t_inp
= FPNumBase(self
.width
)
638 t_out
= FPNumIn(None, self
.width
)
639 espec
= (len(self
.i
.a
.e
), True)
640 msr
= MultiShiftRMerge(self
.i
.a
.m_width
, espec
)
641 m
.submodules
.align_t_in
= t_inp
642 m
.submodules
.align_t_out
= t_out
643 m
.submodules
.multishift_r
= msr
645 ediff
= Signal(espec
, reset_less
=True)
646 ediffr
= Signal(espec
, reset_less
=True)
647 tdiff
= Signal(espec
, reset_less
=True)
648 elz
= Signal(reset_less
=True)
649 egz
= Signal(reset_less
=True)
651 # connect multi-shifter to t_inp/out mantissa (and tdiff)
652 m
.d
.comb
+= msr
.inp
.eq(t_inp
.m
)
653 m
.d
.comb
+= msr
.diff
.eq(tdiff
)
654 m
.d
.comb
+= t_out
.m
.eq(msr
.m
)
655 m
.d
.comb
+= t_out
.e
.eq(t_inp
.e
+ tdiff
)
656 m
.d
.comb
+= t_out
.s
.eq(t_inp
.s
)
658 m
.d
.comb
+= ediff
.eq(self
.i
.a
.e
- self
.i
.b
.e
)
659 m
.d
.comb
+= ediffr
.eq(self
.i
.b
.e
- self
.i
.a
.e
)
660 m
.d
.comb
+= elz
.eq(self
.i
.a
.e
< self
.i
.b
.e
)
661 m
.d
.comb
+= egz
.eq(self
.i
.a
.e
> self
.i
.b
.e
)
663 # default: A-exp == B-exp, A and B untouched (fall through)
664 m
.d
.comb
+= self
.o
.a
.eq(self
.i
.a
)
665 m
.d
.comb
+= self
.o
.b
.eq(self
.i
.b
)
666 # only one shifter (muxed)
667 #m.d.comb += t_out.shift_down_multi(tdiff, t_inp)
668 # exponent of a greater than b: shift b down
670 m
.d
.comb
+= [t_inp
.eq(self
.i
.b
),
673 self
.o
.b
.s
.eq(self
.i
.b
.s
), # whoops forgot sign
675 # exponent of b greater than a: shift a down
677 m
.d
.comb
+= [t_inp
.eq(self
.i
.a
),
680 self
.o
.a
.s
.eq(self
.i
.a
.s
), # whoops forgot sign
685 class FPAddAlignSingle(FPState
, FPID
):
687 def __init__(self
, width
, id_wid
):
688 FPState
.__init
__(self
, "align")
689 FPID
.__init
__(self
, id_wid
)
690 self
.mod
= FPAddAlignSingleMod(width
, id_wid
)
691 self
.out_a
= FPNumIn(None, width
)
692 self
.out_b
= FPNumIn(None, width
)
694 def setup(self
, m
, in_a
, in_b
, in_mid
):
695 """ links module to inputs and outputs
697 self
.mod
.setup(m
, in_a
, in_b
)
698 if self
.in_mid
is not None:
699 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
703 # NOTE: could be done as comb
704 m
.d
.sync
+= self
.out_a
.eq(self
.mod
.out_a
)
705 m
.d
.sync
+= self
.out_b
.eq(self
.mod
.out_b
)
709 class FPAddAlignSingleAdd(FPState
, FPID
):
711 def __init__(self
, width
, id_wid
):
712 FPState
.__init
__(self
, "align")
713 FPID
.__init
__(self
, id_wid
)
716 self
.mod
= FPAddAlignSingleMod(width
, id_wid
)
717 self
.o
= self
.mod
.ospec()
719 self
.a1mod
= FPAddStage1Mod(width
, id_wid
)
720 self
.a1o
= self
.a1mod
.ospec()
722 def setup(self
, m
, i
, in_mid
):
723 """ links module to inputs and outputs
726 m
.d
.comb
+= self
.o
.eq(self
.mod
.o
)
728 a0mod
= FPAddStage0Mod(self
.width
, self
.id_wid
)
729 a0mod
.setup(m
, self
.o
)
731 m
.d
.comb
+= a0o
.eq(a0mod
.o
)
733 self
.a1mod
.setup(m
, a0o
)
735 if self
.in_mid
is not None:
736 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
740 m
.d
.sync
+= self
.a1o
.eq(self
.a1mod
.o
)
741 m
.next
= "normalise_1"
744 class FPAddStage0Data
:
746 def __init__(self
, width
, id_wid
):
747 self
.z
= FPNumBase(width
, False)
748 self
.tot
= Signal(self
.z
.m_width
+ 4, reset_less
=True)
749 self
.mid
= Signal(id_wid
, reset_less
=True)
752 return [self
.z
.eq(i
.z
), self
.tot
.eq(i
.tot
), self
.mid
.eq(i
.mid
)]
755 class FPAddStage0Mod
:
757 def __init__(self
, width
, id_wid
):
760 self
.i
= self
.ispec()
761 self
.o
= self
.ospec()
764 return FPNumBase2Ops(self
.width
, self
.id_wid
)
767 return FPAddStage0Data(self
.width
, self
.id_wid
)
769 def setup(self
, m
, i
):
770 """ links module to inputs and outputs
772 m
.submodules
.add0
= self
773 m
.d
.comb
+= self
.i
.eq(i
)
775 def elaborate(self
, platform
):
777 m
.submodules
.add0_in_a
= self
.i
.a
778 m
.submodules
.add0_in_b
= self
.i
.b
779 m
.submodules
.add0_out_z
= self
.o
.z
781 m
.d
.comb
+= self
.o
.z
.e
.eq(self
.i
.a
.e
)
783 # store intermediate tests (and zero-extended mantissas)
784 seq
= Signal(reset_less
=True)
785 mge
= Signal(reset_less
=True)
786 am0
= Signal(len(self
.i
.a
.m
)+1, reset_less
=True)
787 bm0
= Signal(len(self
.i
.b
.m
)+1, reset_less
=True)
788 m
.d
.comb
+= [seq
.eq(self
.i
.a
.s
== self
.i
.b
.s
),
789 mge
.eq(self
.i
.a
.m
>= self
.i
.b
.m
),
790 am0
.eq(Cat(self
.i
.a
.m
, 0)),
791 bm0
.eq(Cat(self
.i
.b
.m
, 0))
793 # same-sign (both negative or both positive) add mantissas
796 self
.o
.tot
.eq(am0
+ bm0
),
797 self
.o
.z
.s
.eq(self
.i
.a
.s
)
799 # a mantissa greater than b, use a
802 self
.o
.tot
.eq(am0
- bm0
),
803 self
.o
.z
.s
.eq(self
.i
.a
.s
)
805 # b mantissa greater than a, use b
808 self
.o
.tot
.eq(bm0
- am0
),
809 self
.o
.z
.s
.eq(self
.i
.b
.s
)
814 class FPAddStage0(FPState
, FPID
):
815 """ First stage of add. covers same-sign (add) and subtract
816 special-casing when mantissas are greater or equal, to
817 give greatest accuracy.
820 def __init__(self
, width
, id_wid
):
821 FPState
.__init
__(self
, "add_0")
822 FPID
.__init
__(self
, id_wid
)
823 self
.mod
= FPAddStage0Mod(width
)
824 self
.o
= self
.mod
.ospec()
826 def setup(self
, m
, i
, in_mid
):
827 """ links module to inputs and outputs
830 if self
.in_mid
is not None:
831 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
835 # NOTE: these could be done as combinatorial (merge add0+add1)
836 m
.d
.sync
+= self
.o
.eq(self
.mod
.o
)
840 class FPAddStage1Data
:
842 def __init__(self
, width
, id_wid
):
843 self
.z
= FPNumBase(width
, False)
845 self
.mid
= Signal(id_wid
, reset_less
=True)
848 return [self
.z
.eq(i
.z
), self
.of
.eq(i
.of
), self
.mid
.eq(i
.mid
)]
852 class FPAddStage1Mod(FPState
):
853 """ Second stage of add: preparation for normalisation.
854 detects when tot sum is too big (tot[27] is kinda a carry bit)
857 def __init__(self
, width
, id_wid
):
860 self
.i
= self
.ispec()
861 self
.o
= self
.ospec()
864 return FPAddStage0Data(self
.width
, self
.id_wid
)
867 return FPAddStage1Data(self
.width
, self
.id_wid
)
869 def setup(self
, m
, i
):
870 """ links module to inputs and outputs
872 m
.submodules
.add1
= self
873 m
.submodules
.add1_out_overflow
= self
.o
.of
875 m
.d
.comb
+= self
.i
.eq(i
)
877 def elaborate(self
, platform
):
879 #m.submodules.norm1_in_overflow = self.in_of
880 #m.submodules.norm1_out_overflow = self.out_of
881 #m.submodules.norm1_in_z = self.in_z
882 #m.submodules.norm1_out_z = self.out_z
883 m
.d
.comb
+= self
.o
.z
.eq(self
.i
.z
)
884 # tot[-1] (MSB) gets set when the sum overflows. shift result down
885 with m
.If(self
.i
.tot
[-1]):
887 self
.o
.z
.m
.eq(self
.i
.tot
[4:]),
888 self
.o
.of
.m0
.eq(self
.i
.tot
[4]),
889 self
.o
.of
.guard
.eq(self
.i
.tot
[3]),
890 self
.o
.of
.round_bit
.eq(self
.i
.tot
[2]),
891 self
.o
.of
.sticky
.eq(self
.i
.tot
[1] | self
.i
.tot
[0]),
892 self
.o
.z
.e
.eq(self
.i
.z
.e
+ 1)
894 # tot[-1] (MSB) zero case
897 self
.o
.z
.m
.eq(self
.i
.tot
[3:]),
898 self
.o
.of
.m0
.eq(self
.i
.tot
[3]),
899 self
.o
.of
.guard
.eq(self
.i
.tot
[2]),
900 self
.o
.of
.round_bit
.eq(self
.i
.tot
[1]),
901 self
.o
.of
.sticky
.eq(self
.i
.tot
[0])
906 class FPAddStage1(FPState
, FPID
):
908 def __init__(self
, width
, id_wid
):
909 FPState
.__init
__(self
, "add_1")
910 FPID
.__init
__(self
, id_wid
)
911 self
.mod
= FPAddStage1Mod(width
)
912 self
.out_z
= FPNumBase(width
, False)
913 self
.out_of
= Overflow()
914 self
.norm_stb
= Signal()
916 def setup(self
, m
, i
, in_mid
):
917 """ links module to inputs and outputs
921 m
.d
.sync
+= self
.norm_stb
.eq(0) # sets to zero when not in add1 state
923 if self
.in_mid
is not None:
924 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
928 m
.d
.sync
+= self
.out_of
.eq(self
.mod
.out_of
)
929 m
.d
.sync
+= self
.out_z
.eq(self
.mod
.out_z
)
930 m
.d
.sync
+= self
.norm_stb
.eq(1)
931 m
.next
= "normalise_1"
934 class FPNormaliseModSingle
:
936 def __init__(self
, width
):
938 self
.in_z
= self
.ispec()
939 self
.out_z
= self
.ospec()
942 return FPNumBase(self
.width
, False)
945 return FPNumBase(self
.width
, False)
947 def setup(self
, m
, i
):
948 """ links module to inputs and outputs
950 m
.submodules
.normalise
= self
951 m
.d
.comb
+= self
.i
.eq(i
)
953 def elaborate(self
, platform
):
956 mwid
= self
.out_z
.m_width
+2
957 pe
= PriorityEncoder(mwid
)
958 m
.submodules
.norm_pe
= pe
960 m
.submodules
.norm1_out_z
= self
.out_z
961 m
.submodules
.norm1_in_z
= self
.in_z
963 in_z
= FPNumBase(self
.width
, False)
965 m
.submodules
.norm1_insel_z
= in_z
966 m
.submodules
.norm1_insel_overflow
= in_of
968 espec
= (len(in_z
.e
), True)
969 ediff_n126
= Signal(espec
, reset_less
=True)
970 msr
= MultiShiftRMerge(mwid
, espec
)
971 m
.submodules
.multishift_r
= msr
973 m
.d
.comb
+= in_z
.eq(self
.in_z
)
974 m
.d
.comb
+= in_of
.eq(self
.in_of
)
975 # initialise out from in (overridden below)
976 m
.d
.comb
+= self
.out_z
.eq(in_z
)
977 m
.d
.comb
+= self
.out_of
.eq(in_of
)
978 # normalisation decrease condition
979 decrease
= Signal(reset_less
=True)
980 m
.d
.comb
+= decrease
.eq(in_z
.m_msbzero
)
983 # *sigh* not entirely obvious: count leading zeros (clz)
984 # with a PriorityEncoder: to find from the MSB
985 # we reverse the order of the bits.
986 temp_m
= Signal(mwid
, reset_less
=True)
987 temp_s
= Signal(mwid
+1, reset_less
=True)
988 clz
= Signal((len(in_z
.e
), True), reset_less
=True)
990 # cat round and guard bits back into the mantissa
991 temp_m
.eq(Cat(in_of
.round_bit
, in_of
.guard
, in_z
.m
)),
992 pe
.i
.eq(temp_m
[::-1]), # inverted
993 clz
.eq(pe
.o
), # count zeros from MSB down
994 temp_s
.eq(temp_m
<< clz
), # shift mantissa UP
995 self
.out_z
.e
.eq(in_z
.e
- clz
), # DECREASE exponent
996 self
.out_z
.m
.eq(temp_s
[2:]), # exclude bits 0&1
1003 def __init__(self
, width
, id_wid
):
1004 self
.roundz
= Signal(reset_less
=True)
1005 self
.z
= FPNumBase(width
, False)
1006 self
.mid
= Signal(id_wid
, reset_less
=True)
1009 return [self
.z
.eq(i
.z
), self
.roundz
.eq(i
.roundz
), self
.mid
.eq(i
.mid
)]
1012 class FPNorm1ModSingle
:
1014 def __init__(self
, width
, id_wid
):
1016 self
.id_wid
= id_wid
1017 self
.i
= self
.ispec()
1018 self
.o
= self
.ospec()
1021 return FPAddStage1Data(self
.width
, self
.id_wid
)
1024 return FPNorm1Data(self
.width
, self
.id_wid
)
1026 def setup(self
, m
, i
):
1027 """ links module to inputs and outputs
1029 m
.submodules
.normalise_1
= self
1030 m
.d
.comb
+= self
.i
.eq(i
)
1032 def elaborate(self
, platform
):
1035 mwid
= self
.o
.z
.m_width
+2
1036 pe
= PriorityEncoder(mwid
)
1037 m
.submodules
.norm_pe
= pe
1040 m
.d
.comb
+= self
.o
.roundz
.eq(of
.roundz
)
1042 m
.submodules
.norm1_out_z
= self
.o
.z
1043 m
.submodules
.norm1_out_overflow
= of
1044 m
.submodules
.norm1_in_z
= self
.i
.z
1045 m
.submodules
.norm1_in_overflow
= self
.i
.of
1048 m
.submodules
.norm1_insel_z
= i
.z
1049 m
.submodules
.norm1_insel_overflow
= i
.of
1051 espec
= (len(i
.z
.e
), True)
1052 ediff_n126
= Signal(espec
, reset_less
=True)
1053 msr
= MultiShiftRMerge(mwid
, espec
)
1054 m
.submodules
.multishift_r
= msr
1056 m
.d
.comb
+= i
.eq(self
.i
)
1057 # initialise out from in (overridden below)
1058 m
.d
.comb
+= self
.o
.z
.eq(i
.z
)
1059 m
.d
.comb
+= of
.eq(i
.of
)
1060 # normalisation increase/decrease conditions
1061 decrease
= Signal(reset_less
=True)
1062 increase
= Signal(reset_less
=True)
1063 m
.d
.comb
+= decrease
.eq(i
.z
.m_msbzero
& i
.z
.exp_gt_n126
)
1064 m
.d
.comb
+= increase
.eq(i
.z
.exp_lt_n126
)
1066 with m
.If(decrease
):
1067 # *sigh* not entirely obvious: count leading zeros (clz)
1068 # with a PriorityEncoder: to find from the MSB
1069 # we reverse the order of the bits.
1070 temp_m
= Signal(mwid
, reset_less
=True)
1071 temp_s
= Signal(mwid
+1, reset_less
=True)
1072 clz
= Signal((len(i
.z
.e
), True), reset_less
=True)
1073 # make sure that the amount to decrease by does NOT
1074 # go below the minimum non-INF/NaN exponent
1075 limclz
= Mux(i
.z
.exp_sub_n126
> pe
.o
, pe
.o
,
1078 # cat round and guard bits back into the mantissa
1079 temp_m
.eq(Cat(i
.of
.round_bit
, i
.of
.guard
, i
.z
.m
)),
1080 pe
.i
.eq(temp_m
[::-1]), # inverted
1081 clz
.eq(limclz
), # count zeros from MSB down
1082 temp_s
.eq(temp_m
<< clz
), # shift mantissa UP
1083 self
.o
.z
.e
.eq(i
.z
.e
- clz
), # DECREASE exponent
1084 self
.o
.z
.m
.eq(temp_s
[2:]), # exclude bits 0&1
1085 of
.m0
.eq(temp_s
[2]), # copy of mantissa[0]
1086 # overflow in bits 0..1: got shifted too (leave sticky)
1087 of
.guard
.eq(temp_s
[1]), # guard
1088 of
.round_bit
.eq(temp_s
[0]), # round
1091 with m
.Elif(increase
):
1092 temp_m
= Signal(mwid
+1, reset_less
=True)
1094 temp_m
.eq(Cat(i
.of
.sticky
, i
.of
.round_bit
, i
.of
.guard
,
1096 ediff_n126
.eq(i
.z
.N126
- i
.z
.e
),
1097 # connect multi-shifter to inp/out mantissa (and ediff)
1099 msr
.diff
.eq(ediff_n126
),
1100 self
.o
.z
.m
.eq(msr
.m
[3:]),
1101 of
.m0
.eq(temp_s
[3]), # copy of mantissa[0]
1102 # overflow in bits 0..1: got shifted too (leave sticky)
1103 of
.guard
.eq(temp_s
[2]), # guard
1104 of
.round_bit
.eq(temp_s
[1]), # round
1105 of
.sticky
.eq(temp_s
[0]), # sticky
1106 self
.o
.z
.e
.eq(i
.z
.e
+ ediff_n126
),
1112 class FPNorm1ModMulti
:
1114 def __init__(self
, width
, single_cycle
=True):
1116 self
.in_select
= Signal(reset_less
=True)
1117 self
.in_z
= FPNumBase(width
, False)
1118 self
.in_of
= Overflow()
1119 self
.temp_z
= FPNumBase(width
, False)
1120 self
.temp_of
= Overflow()
1121 self
.out_z
= FPNumBase(width
, False)
1122 self
.out_of
= Overflow()
1124 def elaborate(self
, platform
):
1127 m
.submodules
.norm1_out_z
= self
.out_z
1128 m
.submodules
.norm1_out_overflow
= self
.out_of
1129 m
.submodules
.norm1_temp_z
= self
.temp_z
1130 m
.submodules
.norm1_temp_of
= self
.temp_of
1131 m
.submodules
.norm1_in_z
= self
.in_z
1132 m
.submodules
.norm1_in_overflow
= self
.in_of
1134 in_z
= FPNumBase(self
.width
, False)
1136 m
.submodules
.norm1_insel_z
= in_z
1137 m
.submodules
.norm1_insel_overflow
= in_of
1139 # select which of temp or in z/of to use
1140 with m
.If(self
.in_select
):
1141 m
.d
.comb
+= in_z
.eq(self
.in_z
)
1142 m
.d
.comb
+= in_of
.eq(self
.in_of
)
1144 m
.d
.comb
+= in_z
.eq(self
.temp_z
)
1145 m
.d
.comb
+= in_of
.eq(self
.temp_of
)
1146 # initialise out from in (overridden below)
1147 m
.d
.comb
+= self
.out_z
.eq(in_z
)
1148 m
.d
.comb
+= self
.out_of
.eq(in_of
)
1149 # normalisation increase/decrease conditions
1150 decrease
= Signal(reset_less
=True)
1151 increase
= Signal(reset_less
=True)
1152 m
.d
.comb
+= decrease
.eq(in_z
.m_msbzero
& in_z
.exp_gt_n126
)
1153 m
.d
.comb
+= increase
.eq(in_z
.exp_lt_n126
)
1154 m
.d
.comb
+= self
.out_norm
.eq(decrease | increase
) # loop-end
1156 with m
.If(decrease
):
1158 self
.out_z
.e
.eq(in_z
.e
- 1), # DECREASE exponent
1159 self
.out_z
.m
.eq(in_z
.m
<< 1), # shift mantissa UP
1160 self
.out_z
.m
[0].eq(in_of
.guard
), # steal guard (was tot[2])
1161 self
.out_of
.guard
.eq(in_of
.round_bit
), # round (was tot[1])
1162 self
.out_of
.round_bit
.eq(0), # reset round bit
1163 self
.out_of
.m0
.eq(in_of
.guard
),
1166 with m
.Elif(increase
):
1168 self
.out_z
.e
.eq(in_z
.e
+ 1), # INCREASE exponent
1169 self
.out_z
.m
.eq(in_z
.m
>> 1), # shift mantissa DOWN
1170 self
.out_of
.guard
.eq(in_z
.m
[0]),
1171 self
.out_of
.m0
.eq(in_z
.m
[1]),
1172 self
.out_of
.round_bit
.eq(in_of
.guard
),
1173 self
.out_of
.sticky
.eq(in_of
.sticky | in_of
.round_bit
)
1179 class FPNorm1Single(FPState
, FPID
):
1181 def __init__(self
, width
, id_wid
, single_cycle
=True):
1182 FPID
.__init
__(self
, id_wid
)
1183 FPState
.__init
__(self
, "normalise_1")
1184 self
.mod
= FPNorm1ModSingle(width
)
1185 self
.out_z
= FPNumBase(width
, False)
1186 self
.out_roundz
= Signal(reset_less
=True)
1188 def setup(self
, m
, i
, in_mid
):
1189 """ links module to inputs and outputs
1191 self
.mod
.setup(m
, i
, self
.out_z
)
1193 if self
.in_mid
is not None:
1194 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
1196 def action(self
, m
):
1198 m
.d
.sync
+= self
.out_roundz
.eq(self
.mod
.out_of
.roundz
)
1202 class FPNorm1Multi(FPState
, FPID
):
1204 def __init__(self
, width
, id_wid
):
1205 FPID
.__init
__(self
, id_wid
)
1206 FPState
.__init
__(self
, "normalise_1")
1207 self
.mod
= FPNorm1ModMulti(width
)
1208 self
.stb
= Signal(reset_less
=True)
1209 self
.ack
= Signal(reset
=0, reset_less
=True)
1210 self
.out_norm
= Signal(reset_less
=True)
1211 self
.in_accept
= Signal(reset_less
=True)
1212 self
.temp_z
= FPNumBase(width
)
1213 self
.temp_of
= Overflow()
1214 self
.out_z
= FPNumBase(width
)
1215 self
.out_roundz
= Signal(reset_less
=True)
1217 def setup(self
, m
, in_z
, in_of
, norm_stb
, in_mid
):
1218 """ links module to inputs and outputs
1220 self
.mod
.setup(m
, in_z
, in_of
, norm_stb
,
1221 self
.in_accept
, self
.temp_z
, self
.temp_of
,
1222 self
.out_z
, self
.out_norm
)
1224 m
.d
.comb
+= self
.stb
.eq(norm_stb
)
1225 m
.d
.sync
+= self
.ack
.eq(0) # sets to zero when not in normalise_1 state
1227 if self
.in_mid
is not None:
1228 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
1230 def action(self
, m
):
1232 m
.d
.comb
+= self
.in_accept
.eq((~self
.ack
) & (self
.stb
))
1233 m
.d
.sync
+= self
.temp_of
.eq(self
.mod
.out_of
)
1234 m
.d
.sync
+= self
.temp_z
.eq(self
.out_z
)
1235 with m
.If(self
.out_norm
):
1236 with m
.If(self
.in_accept
):
1241 m
.d
.sync
+= self
.ack
.eq(0)
1243 # normalisation not required (or done).
1245 m
.d
.sync
+= self
.ack
.eq(1)
1246 m
.d
.sync
+= self
.out_roundz
.eq(self
.mod
.out_of
.roundz
)
1249 class FPNormToPack(FPState
, FPID
):
1251 def __init__(self
, width
, id_wid
):
1252 FPID
.__init
__(self
, id_wid
)
1253 FPState
.__init
__(self
, "normalise_1")
1256 def setup(self
, m
, i
, in_mid
):
1257 """ links module to inputs and outputs
1260 # Normalisation (chained to input in_z+in_of)
1261 nmod
= FPNorm1ModSingle(self
.width
, self
.id_wid
)
1263 n_out
= nmod
.ospec()
1264 m
.d
.comb
+= n_out
.eq(nmod
.o
)
1266 # Rounding (chained to normalisation)
1267 rmod
= FPRoundMod(self
.width
, self
.id_wid
)
1268 rmod
.setup(m
, n_out
)
1269 r_out_z
= rmod
.ospec()
1270 m
.d
.comb
+= r_out_z
.eq(rmod
.out_z
)
1272 # Corrections (chained to rounding)
1273 cmod
= FPCorrectionsMod(self
.width
, self
.id_wid
)
1274 cmod
.setup(m
, r_out_z
)
1275 c_out_z
= cmod
.ospec()
1276 m
.d
.comb
+= c_out_z
.eq(cmod
.out_z
)
1278 # Pack (chained to corrections)
1279 self
.pmod
= FPPackMod(self
.width
, self
.id_wid
)
1280 self
.pmod
.setup(m
, c_out_z
)
1281 self
.out_z
= self
.pmod
.ospec()
1284 if self
.in_mid
is not None:
1285 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
1287 def action(self
, m
):
1288 self
.idsync(m
) # copies incoming ID to outgoing
1289 m
.d
.sync
+= self
.out_z
.z
.v
.eq(self
.pmod
.o
.z
.v
) # outputs packed result
1290 m
.next
= "pack_put_z"
1295 def __init__(self
, width
, id_wid
):
1296 self
.z
= FPNumBase(width
, False)
1297 self
.mid
= Signal(id_wid
, reset_less
=True)
1300 return [self
.z
.eq(i
.z
), self
.mid
.eq(i
.mid
)]
1305 def __init__(self
, width
, id_wid
):
1307 self
.id_wid
= id_wid
1308 self
.i
= self
.ispec()
1309 self
.out_z
= self
.ospec()
1312 return FPNorm1Data(self
.width
, self
.id_wid
)
1315 return FPRoundData(self
.width
, self
.id_wid
)
1317 def setup(self
, m
, i
):
1318 m
.submodules
.roundz
= self
1319 m
.d
.comb
+= self
.i
.eq(i
)
1321 def elaborate(self
, platform
):
1323 m
.d
.comb
+= self
.out_z
.eq(self
.i
)
1324 with m
.If(self
.i
.roundz
):
1325 m
.d
.comb
+= self
.out_z
.z
.m
.eq(self
.i
.z
.m
+ 1) # mantissa rounds up
1326 with m
.If(self
.i
.z
.m
== self
.i
.z
.m1s
): # all 1s
1327 m
.d
.comb
+= self
.out_z
.z
.e
.eq(self
.i
.z
.e
+ 1) # exponent up
1331 class FPRound(FPState
, FPID
):
1333 def __init__(self
, width
, id_wid
):
1334 FPState
.__init
__(self
, "round")
1335 FPID
.__init
__(self
, id_wid
)
1336 self
.mod
= FPRoundMod(width
)
1337 self
.out_z
= self
.mod
.ospec()
1339 def setup(self
, m
, i
, in_mid
):
1340 """ links module to inputs and outputs
1342 self
.mod
.setup(m
, i
)
1344 if self
.in_mid
is not None:
1345 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
1347 def action(self
, m
):
1349 m
.d
.sync
+= self
.out_z
.eq(self
.mod
.out_z
)
1350 m
.next
= "corrections"
1353 class FPCorrectionsMod
:
1355 def __init__(self
, width
, id_wid
):
1357 self
.id_wid
= id_wid
1358 self
.i
= self
.ispec()
1359 self
.out_z
= self
.ospec()
1362 return FPRoundData(self
.width
, self
.id_wid
)
1365 return FPRoundData(self
.width
, self
.id_wid
)
1367 def setup(self
, m
, i
):
1368 """ links module to inputs and outputs
1370 m
.submodules
.corrections
= self
1371 m
.d
.comb
+= self
.i
.eq(i
)
1373 def elaborate(self
, platform
):
1375 m
.submodules
.corr_in_z
= self
.i
.z
1376 m
.submodules
.corr_out_z
= self
.out_z
.z
1377 m
.d
.comb
+= self
.out_z
.eq(self
.i
)
1378 with m
.If(self
.i
.z
.is_denormalised
):
1379 m
.d
.comb
+= self
.out_z
.z
.e
.eq(self
.i
.z
.N127
)
1383 class FPCorrections(FPState
, FPID
):
1385 def __init__(self
, width
, id_wid
):
1386 FPState
.__init
__(self
, "corrections")
1387 FPID
.__init
__(self
, id_wid
)
1388 self
.mod
= FPCorrectionsMod(width
)
1389 self
.out_z
= self
.ospec()
1392 return self
.mod
.ispec()
1395 return self
.mod
.ospec()
1397 def setup(self
, m
, in_z
, in_mid
):
1398 """ links module to inputs and outputs
1400 self
.mod
.setup(m
, in_z
)
1401 if self
.in_mid
is not None:
1402 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
1404 def action(self
, m
):
1406 m
.d
.sync
+= self
.out_z
.eq(self
.mod
.out_z
)
1412 def __init__(self
, width
, id_wid
):
1413 self
.z
= FPNumOut(width
, False)
1414 self
.mid
= Signal(id_wid
, reset_less
=True)
1417 return [self
.z
.eq(i
.z
), self
.mid
.eq(i
.mid
)]
1422 def __init__(self
, width
, id_wid
):
1424 self
.id_wid
= id_wid
1425 self
.i
= self
.ispec()
1426 self
.o
= self
.ospec()
1429 return FPRoundData(self
.width
, self
.id_wid
)
1432 return FPPackData(self
.width
, self
.id_wid
)
1434 def setup(self
, m
, in_z
):
1435 """ links module to inputs and outputs
1437 m
.submodules
.pack
= self
1438 m
.d
.comb
+= self
.i
.eq(in_z
)
1440 def elaborate(self
, platform
):
1442 m
.submodules
.pack_in_z
= self
.i
.z
1443 with m
.If(self
.i
.z
.is_overflowed
):
1444 m
.d
.comb
+= self
.o
.z
.inf(self
.i
.z
.s
)
1446 m
.d
.comb
+= self
.o
.z
.create(self
.i
.z
.s
, self
.i
.z
.e
, self
.i
.z
.m
)
1451 def __init__(self
, width
, id_wid
):
1452 self
.z
= FPNumOut(width
, False)
1453 self
.mid
= Signal(id_wid
, reset_less
=True)
1456 return [self
.z
.eq(i
.z
), self
.mid
.eq(i
.mid
)]
1459 class FPPack(FPState
, FPID
):
1461 def __init__(self
, width
, id_wid
):
1462 FPState
.__init
__(self
, "pack")
1463 FPID
.__init
__(self
, id_wid
)
1464 self
.mod
= FPPackMod(width
)
1465 self
.out_z
= self
.ospec()
1468 return self
.mod
.ispec()
1471 return self
.mod
.ospec()
1473 def setup(self
, m
, in_z
, in_mid
):
1474 """ links module to inputs and outputs
1476 self
.mod
.setup(m
, in_z
)
1477 if self
.in_mid
is not None:
1478 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
1480 def action(self
, m
):
1482 m
.d
.sync
+= self
.out_z
.v
.eq(self
.mod
.out_z
.v
)
1483 m
.next
= "pack_put_z"
1486 class FPPutZ(FPState
):
1488 def __init__(self
, state
, in_z
, out_z
, in_mid
, out_mid
, to_state
=None):
1489 FPState
.__init
__(self
, state
)
1490 if to_state
is None:
1491 to_state
= "get_ops"
1492 self
.to_state
= to_state
1495 self
.in_mid
= in_mid
1496 self
.out_mid
= out_mid
1498 def action(self
, m
):
1499 if self
.in_mid
is not None:
1500 m
.d
.sync
+= self
.out_mid
.eq(self
.in_mid
)
1502 self
.out_z
.z
.v
.eq(self
.in_z
.v
)
1504 with m
.If(self
.out_z
.z
.stb
& self
.out_z
.z
.ack
):
1505 m
.d
.sync
+= self
.out_z
.z
.stb
.eq(0)
1506 m
.next
= self
.to_state
1508 m
.d
.sync
+= self
.out_z
.z
.stb
.eq(1)
1511 class FPPutZIdx(FPState
):
1513 def __init__(self
, state
, in_z
, out_zs
, in_mid
, to_state
=None):
1514 FPState
.__init
__(self
, state
)
1515 if to_state
is None:
1516 to_state
= "get_ops"
1517 self
.to_state
= to_state
1519 self
.out_zs
= out_zs
1520 self
.in_mid
= in_mid
1522 def action(self
, m
):
1523 outz_stb
= Signal(reset_less
=True)
1524 outz_ack
= Signal(reset_less
=True)
1525 m
.d
.comb
+= [outz_stb
.eq(self
.out_zs
[self
.in_mid
].stb
),
1526 outz_ack
.eq(self
.out_zs
[self
.in_mid
].ack
),
1529 self
.out_zs
[self
.in_mid
].v
.eq(self
.in_z
.v
)
1531 with m
.If(outz_stb
& outz_ack
):
1532 m
.d
.sync
+= self
.out_zs
[self
.in_mid
].stb
.eq(0)
1533 m
.next
= self
.to_state
1535 m
.d
.sync
+= self
.out_zs
[self
.in_mid
].stb
.eq(1)
1537 class FPADDBaseData
:
1539 def __init__(self
, width
, id_wid
):
1541 self
.id_wid
= id_wid
1542 self
.a
= Signal(width
)
1543 self
.b
= Signal(width
)
1544 self
.mid
= Signal(id_wid
, reset_less
=True)
1547 return [self
.a
.eq(i
.a
), self
.b
.eq(i
.b
), self
.mid
.eq(i
.mid
)]
1551 def __init__(self
, width
, id_wid
):
1552 self
.z
= FPOp(width
)
1553 self
.mid
= Signal(id_wid
, reset_less
=True)
1556 return [self
.z
.eq(i
.z
), self
.mid
.eq(i
.mid
)]
1559 class FPADDBaseMod(FPID
):
1561 def __init__(self
, width
, id_wid
=None, single_cycle
=False, compact
=True):
1564 * width: bit-width of IEEE754. supported: 16, 32, 64
1565 * id_wid: an identifier that is sync-connected to the input
1566 * single_cycle: True indicates each stage to complete in 1 clock
1567 * compact: True indicates a reduced number of stages
1569 FPID
.__init
__(self
, id_wid
)
1571 self
.id_wid
= id_wid
1572 self
.single_cycle
= single_cycle
1573 self
.compact
= compact
1575 self
.in_t
= Trigger()
1576 self
.i
= self
.ispec()
1577 self
.o
= self
.ospec()
1582 return FPADDBaseData(self
.width
, self
.id_wid
)
1585 return FPOpData(self
.width
, self
.id_wid
)
1587 def add_state(self
, state
):
1588 self
.states
.append(state
)
1591 def get_fragment(self
, platform
=None):
1592 """ creates the HDL code-fragment for FPAdd
1595 m
.submodules
.out_z
= self
.o
.z
1596 m
.submodules
.in_t
= self
.in_t
1598 self
.get_compact_fragment(m
, platform
)
1600 self
.get_longer_fragment(m
, platform
)
1602 with m
.FSM() as fsm
:
1604 for state
in self
.states
:
1605 with m
.State(state
.state_from
):
1610 def get_longer_fragment(self
, m
, platform
=None):
1612 get
= self
.add_state(FPGet2Op("get_ops", "special_cases",
1614 get
.setup(m
, self
.in_a
, self
.in_b
, self
.in_t
.stb
, self
.in_t
.ack
)
1618 sc
= self
.add_state(FPAddSpecialCases(self
.width
, self
.id_wid
))
1619 sc
.setup(m
, a
, b
, self
.in_mid
)
1621 dn
= self
.add_state(FPAddDeNorm(self
.width
, self
.id_wid
))
1622 dn
.setup(m
, a
, b
, sc
.in_mid
)
1624 if self
.single_cycle
:
1625 alm
= self
.add_state(FPAddAlignSingle(self
.width
, self
.id_wid
))
1626 alm
.setup(m
, dn
.out_a
, dn
.out_b
, dn
.in_mid
)
1628 alm
= self
.add_state(FPAddAlignMulti(self
.width
, self
.id_wid
))
1629 alm
.setup(m
, dn
.out_a
, dn
.out_b
, dn
.in_mid
)
1631 add0
= self
.add_state(FPAddStage0(self
.width
, self
.id_wid
))
1632 add0
.setup(m
, alm
.out_a
, alm
.out_b
, alm
.in_mid
)
1634 add1
= self
.add_state(FPAddStage1(self
.width
, self
.id_wid
))
1635 add1
.setup(m
, add0
.out_tot
, add0
.out_z
, add0
.in_mid
)
1637 if self
.single_cycle
:
1638 n1
= self
.add_state(FPNorm1Single(self
.width
, self
.id_wid
))
1639 n1
.setup(m
, add1
.out_z
, add1
.out_of
, add0
.in_mid
)
1641 n1
= self
.add_state(FPNorm1Multi(self
.width
, self
.id_wid
))
1642 n1
.setup(m
, add1
.out_z
, add1
.out_of
, add1
.norm_stb
, add0
.in_mid
)
1644 rn
= self
.add_state(FPRound(self
.width
, self
.id_wid
))
1645 rn
.setup(m
, n1
.out_z
, n1
.out_roundz
, n1
.in_mid
)
1647 cor
= self
.add_state(FPCorrections(self
.width
, self
.id_wid
))
1648 cor
.setup(m
, rn
.out_z
, rn
.in_mid
)
1650 pa
= self
.add_state(FPPack(self
.width
, self
.id_wid
))
1651 pa
.setup(m
, cor
.out_z
, rn
.in_mid
)
1653 ppz
= self
.add_state(FPPutZ("pack_put_z", pa
.out_z
, self
.out_z
,
1654 pa
.in_mid
, self
.out_mid
))
1656 pz
= self
.add_state(FPPutZ("put_z", sc
.out_z
, self
.out_z
,
1657 pa
.in_mid
, self
.out_mid
))
1659 def get_compact_fragment(self
, m
, platform
=None):
1661 get
= self
.add_state(FPGet2Op("get_ops", "special_cases",
1662 self
.width
, self
.id_wid
))
1663 get
.setup(m
, self
.i
, self
.in_t
.stb
, self
.in_t
.ack
)
1665 sc
= self
.add_state(FPAddSpecialCasesDeNorm(self
.width
, self
.id_wid
))
1666 sc
.setup(m
, get
.o
, self
.in_mid
)
1668 alm
= self
.add_state(FPAddAlignSingleAdd(self
.width
, self
.id_wid
))
1669 alm
.setup(m
, sc
.o
, sc
.in_mid
)
1671 n1
= self
.add_state(FPNormToPack(self
.width
, self
.id_wid
))
1672 n1
.setup(m
, alm
.a1o
, alm
.in_mid
)
1674 ppz
= self
.add_state(FPPutZ("pack_put_z", n1
.out_z
.z
, self
.o
,
1675 n1
.in_mid
, self
.out_mid
))
1677 pz
= self
.add_state(FPPutZ("put_z", sc
.out_z
.z
, self
.o
,
1678 sc
.in_mid
, self
.out_mid
))
1681 class FPADDBase(FPState
, FPID
):
1683 def __init__(self
, width
, id_wid
=None, single_cycle
=False):
1686 * width: bit-width of IEEE754. supported: 16, 32, 64
1687 * id_wid: an identifier that is sync-connected to the input
1688 * single_cycle: True indicates each stage to complete in 1 clock
1690 FPID
.__init
__(self
, id_wid
)
1691 FPState
.__init
__(self
, "fpadd")
1693 self
.single_cycle
= single_cycle
1694 self
.mod
= FPADDBaseMod(width
, id_wid
, single_cycle
)
1695 self
.o
= self
.ospec()
1697 self
.in_t
= Trigger()
1698 self
.i
= self
.ispec()
1700 self
.z_done
= Signal(reset_less
=True) # connects to out_z Strobe
1701 self
.in_accept
= Signal(reset_less
=True)
1702 self
.add_stb
= Signal(reset_less
=True)
1703 self
.add_ack
= Signal(reset
=0, reset_less
=True)
1706 return self
.mod
.ispec()
1709 return self
.mod
.ospec()
1711 def setup(self
, m
, i
, add_stb
, in_mid
):
1712 m
.d
.comb
+= [self
.i
.eq(i
),
1713 self
.mod
.i
.eq(self
.i
),
1714 self
.in_mid
.eq(in_mid
),
1715 self
.mod
.in_mid
.eq(self
.in_mid
),
1716 self
.z_done
.eq(self
.mod
.o
.z
.trigger
),
1717 #self.add_stb.eq(add_stb),
1718 self
.mod
.in_t
.stb
.eq(self
.in_t
.stb
),
1719 self
.in_t
.ack
.eq(self
.mod
.in_t
.ack
),
1720 self
.o
.mid
.eq(self
.mod
.o
.mid
),
1721 self
.o
.z
.v
.eq(self
.mod
.o
.z
.v
),
1722 self
.o
.z
.stb
.eq(self
.mod
.o
.z
.stb
),
1723 self
.mod
.o
.z
.ack
.eq(self
.o
.z
.ack
),
1726 m
.d
.sync
+= self
.add_stb
.eq(add_stb
)
1727 m
.d
.sync
+= self
.add_ack
.eq(0) # sets to zero when not in active state
1728 m
.d
.sync
+= self
.o
.z
.ack
.eq(0) # likewise
1729 #m.d.sync += self.in_t.stb.eq(0)
1731 m
.submodules
.fpadd
= self
.mod
1733 def action(self
, m
):
1735 # in_accept is set on incoming strobe HIGH and ack LOW.
1736 m
.d
.comb
+= self
.in_accept
.eq((~self
.add_ack
) & (self
.add_stb
))
1738 #with m.If(self.in_t.ack):
1739 # m.d.sync += self.in_t.stb.eq(0)
1740 with m
.If(~self
.z_done
):
1741 # not done: test for accepting an incoming operand pair
1742 with m
.If(self
.in_accept
):
1744 self
.add_ack
.eq(1), # acknowledge receipt...
1745 self
.in_t
.stb
.eq(1), # initiate add
1748 m
.d
.sync
+= [self
.add_ack
.eq(0),
1749 self
.in_t
.stb
.eq(0),
1753 # done: acknowledge, and write out id and value
1754 m
.d
.sync
+= [self
.add_ack
.eq(1),
1761 if self
.in_mid
is not None:
1762 m
.d
.sync
+= self
.out_mid
.eq(self
.mod
.out_mid
)
1765 self
.out_z
.v
.eq(self
.mod
.out_z
.v
)
1767 # move to output state on detecting z ack
1768 with m
.If(self
.out_z
.trigger
):
1769 m
.d
.sync
+= self
.out_z
.stb
.eq(0)
1772 m
.d
.sync
+= self
.out_z
.stb
.eq(1)
1776 def __init__(self
, width
, id_wid
):
1778 self
.id_wid
= id_wid
1780 for i
in range(rs_sz
):
1782 out_z
.name
= "out_z_%d" % i
1784 self
.res
= Array(res
)
1785 self
.in_z
= FPOp(width
)
1786 self
.in_mid
= Signal(self
.id_wid
, reset_less
=True)
1788 def setup(self
, m
, in_z
, in_mid
):
1789 m
.d
.comb
+= [self
.in_z
.eq(in_z
),
1790 self
.in_mid
.eq(in_mid
)]
1792 def get_fragment(self
, platform
=None):
1793 """ creates the HDL code-fragment for FPAdd
1796 m
.submodules
.res_in_z
= self
.in_z
1797 m
.submodules
+= self
.res
1809 """ FPADD: stages as follows:
1815 FPAddBase---> FPAddBaseMod
1817 PutZ GetOps->Specials->Align->Add1/2->Norm->Round/Pack->PutZ
1819 FPAddBase is tricky: it is both a stage and *has* stages.
1820 Connection to FPAddBaseMod therefore requires an in stb/ack
1821 and an out stb/ack. Just as with Add1-Norm1 interaction, FPGetOp
1822 needs to be the thing that raises the incoming stb.
1825 def __init__(self
, width
, id_wid
=None, single_cycle
=False, rs_sz
=2):
1828 * width: bit-width of IEEE754. supported: 16, 32, 64
1829 * id_wid: an identifier that is sync-connected to the input
1830 * single_cycle: True indicates each stage to complete in 1 clock
1833 self
.id_wid
= id_wid
1834 self
.single_cycle
= single_cycle
1836 #self.out_z = FPOp(width)
1837 self
.ids
= FPID(id_wid
)
1840 for i
in range(rs_sz
):
1843 in_a
.name
= "in_a_%d" % i
1844 in_b
.name
= "in_b_%d" % i
1845 rs
.append((in_a
, in_b
))
1849 for i
in range(rs_sz
):
1851 out_z
.name
= "out_z_%d" % i
1853 self
.res
= Array(res
)
1857 def add_state(self
, state
):
1858 self
.states
.append(state
)
1861 def get_fragment(self
, platform
=None):
1862 """ creates the HDL code-fragment for FPAdd
1865 m
.submodules
+= self
.rs
1867 in_a
= self
.rs
[0][0]
1868 in_b
= self
.rs
[0][1]
1870 geta
= self
.add_state(FPGetOp("get_a", "get_b",
1875 getb
= self
.add_state(FPGetOp("get_b", "fpadd",
1880 ab
= FPADDBase(self
.width
, self
.id_wid
, self
.single_cycle
)
1881 ab
= self
.add_state(ab
)
1882 abd
= ab
.ispec() # create an input spec object for FPADDBase
1883 m
.d
.sync
+= [abd
.a
.eq(a
), abd
.b
.eq(b
), abd
.mid
.eq(self
.ids
.in_mid
)]
1884 ab
.setup(m
, abd
, getb
.out_decode
, self
.ids
.in_mid
)
1887 pz
= self
.add_state(FPPutZIdx("put_z", o
.z
, self
.res
,
1890 with m
.FSM() as fsm
:
1892 for state
in self
.states
:
1893 with m
.State(state
.state_from
):
1899 if __name__
== "__main__":
1901 alu
= FPADD(width
=32, id_wid
=5, single_cycle
=True)
1902 main(alu
, ports
=alu
.rs
[0][0].ports() + \
1903 alu
.rs
[0][1].ports() + \
1904 alu
.res
[0].ports() + \
1905 [alu
.ids
.in_mid
, alu
.ids
.out_mid
])
1907 alu
= FPADDBase(width
=32, id_wid
=5, single_cycle
=True)
1908 main(alu
, ports
=[alu
.in_a
, alu
.in_b
] + \
1909 alu
.in_t
.ports() + \
1910 alu
.out_z
.ports() + \
1911 [alu
.in_mid
, alu
.out_mid
])
1914 # works... but don't use, just do "python fname.py convert -t v"
1915 #print (verilog.convert(alu, ports=[
1916 # ports=alu.in_a.ports() + \
1917 # alu.in_b.ports() + \
1918 # alu.out_z.ports())