457a2fc7a965d39f0a0f8a05100d48dd3079ff53
1 # IEEE Floating Point Adder (Single Precision)
2 # Copyright (C) Jonathan P Dawson 2013
5 from nmigen
import Module
, Signal
, Cat
, Mux
, Array
, Const
6 from nmigen
.lib
.coding
import PriorityEncoder
7 from nmigen
.cli
import main
, verilog
10 from fpbase
import FPNumIn
, FPNumOut
, FPOp
, Overflow
, FPBase
, FPNumBase
11 from fpbase
import MultiShiftRMerge
, Trigger
12 #from fpbase import FPNumShiftMultiRight
15 class FPState(FPBase
):
16 def __init__(self
, state_from
):
17 self
.state_from
= state_from
19 def set_inputs(self
, inputs
):
21 for k
,v
in inputs
.items():
24 def set_outputs(self
, outputs
):
25 self
.outputs
= outputs
26 for k
,v
in outputs
.items():
30 class FPGetSyncOpsMod
:
31 def __init__(self
, width
, num_ops
=2):
33 self
.num_ops
= num_ops
36 for i
in range(num_ops
):
37 inops
.append(Signal(width
, reset_less
=True))
38 outops
.append(Signal(width
, reset_less
=True))
41 self
.stb
= Signal(num_ops
)
43 self
.ready
= Signal(reset_less
=True)
44 self
.out_decode
= Signal(reset_less
=True)
46 def elaborate(self
, platform
):
48 m
.d
.comb
+= self
.ready
.eq(self
.stb
== Const(-1, (self
.num_ops
, False)))
49 m
.d
.comb
+= self
.out_decode
.eq(self
.ack
& self
.ready
)
50 with m
.If(self
.out_decode
):
51 for i
in range(self
.num_ops
):
53 self
.out_op
[i
].eq(self
.in_op
[i
]),
58 return self
.in_op
+ self
.out_op
+ [self
.stb
, self
.ack
]
62 def __init__(self
, width
, num_ops
):
63 Trigger
.__init
__(self
)
65 self
.num_ops
= num_ops
68 for i
in range(num_ops
):
69 res
.append(Signal(width
))
74 for i
in range(self
.num_ops
):
82 def __init__(self
, width
, num_ops
=2, num_rows
=4):
84 self
.num_ops
= num_ops
85 self
.num_rows
= num_rows
86 self
.mmax
= int(log(self
.num_rows
) / log(2))
88 self
.mid
= Signal(self
.mmax
, reset_less
=True) # multiplex id
89 for i
in range(num_rows
):
90 self
.rs
.append(FPGetSyncOpsMod(width
, num_ops
))
91 self
.rs
= Array(self
.rs
)
93 self
.out_op
= FPOps(width
, num_ops
)
95 def elaborate(self
, platform
):
98 pe
= PriorityEncoder(self
.num_rows
)
99 m
.submodules
.selector
= pe
100 m
.submodules
.out_op
= self
.out_op
101 m
.submodules
+= self
.rs
103 # connect priority encoder
105 for i
in range(self
.num_rows
):
106 in_ready
.append(self
.rs
[i
].ready
)
107 m
.d
.comb
+= pe
.i
.eq(Cat(*in_ready
))
109 active
= Signal(reset_less
=True)
110 out_en
= Signal(reset_less
=True)
111 m
.d
.comb
+= active
.eq(~pe
.n
) # encoder active
112 m
.d
.comb
+= out_en
.eq(active
& self
.out_op
.trigger
)
114 # encoder active: ack relevant input, record MID, pass output
117 m
.d
.sync
+= self
.mid
.eq(pe
.o
)
118 m
.d
.sync
+= rs
.ack
.eq(0)
119 m
.d
.sync
+= self
.out_op
.stb
.eq(0)
120 for j
in range(self
.num_ops
):
121 m
.d
.sync
+= self
.out_op
.v
[j
].eq(rs
.out_op
[j
])
123 m
.d
.sync
+= self
.out_op
.stb
.eq(1)
124 # acks all default to zero
125 for i
in range(self
.num_rows
):
126 m
.d
.sync
+= self
.rs
[i
].ack
.eq(1)
132 for i
in range(self
.num_rows
):
134 res
+= inop
.in_op
+ [inop
.stb
]
135 return self
.out_op
.ports() + res
+ [self
.mid
]
139 def __init__(self
, width
):
140 self
.in_op
= FPOp(width
)
141 self
.out_op
= Signal(width
)
142 self
.out_decode
= Signal(reset_less
=True)
144 def elaborate(self
, platform
):
146 m
.d
.comb
+= self
.out_decode
.eq((self
.in_op
.ack
) & (self
.in_op
.stb
))
147 m
.submodules
.get_op_in
= self
.in_op
148 #m.submodules.get_op_out = self.out_op
149 with m
.If(self
.out_decode
):
151 self
.out_op
.eq(self
.in_op
.v
),
156 class FPGetOp(FPState
):
160 def __init__(self
, in_state
, out_state
, in_op
, width
):
161 FPState
.__init
__(self
, in_state
)
162 self
.out_state
= out_state
163 self
.mod
= FPGetOpMod(width
)
165 self
.out_op
= Signal(width
)
166 self
.out_decode
= Signal(reset_less
=True)
168 def setup(self
, m
, in_op
):
169 """ links module to inputs and outputs
171 setattr(m
.submodules
, self
.state_from
, self
.mod
)
172 m
.d
.comb
+= self
.mod
.in_op
.eq(in_op
)
173 m
.d
.comb
+= self
.out_decode
.eq(self
.mod
.out_decode
)
176 with m
.If(self
.out_decode
):
177 m
.next
= self
.out_state
179 self
.in_op
.ack
.eq(0),
180 self
.out_op
.eq(self
.mod
.out_op
)
183 m
.d
.sync
+= self
.in_op
.ack
.eq(1)
186 class FPGet2OpMod(Trigger
):
187 def __init__(self
, width
, id_wid
):
188 Trigger
.__init
__(self
)
191 self
.i
= self
.ispec()
192 self
.o
= self
.ospec()
195 return FPADDBaseData(self
.width
, self
.id_wid
)
198 return FPNumBase2Ops(self
.width
, self
.id_wid
)
200 def elaborate(self
, platform
):
201 m
= Trigger
.elaborate(self
, platform
)
202 m
.submodules
.get_op1_out
= self
.o
.a
203 m
.submodules
.get_op2_out
= self
.o
.b
204 out_op1
= FPNumIn(None, self
.width
)
205 out_op2
= FPNumIn(None, self
.width
)
206 with m
.If(self
.trigger
):
208 out_op1
.decode(self
.i
.a
),
209 out_op2
.decode(self
.i
.b
),
210 self
.o
.a
.eq(out_op1
),
211 self
.o
.b
.eq(out_op2
),
216 class FPGet2Op(FPState
):
220 def __init__(self
, in_state
, out_state
, width
, id_wid
):
221 FPState
.__init
__(self
, in_state
)
222 self
.out_state
= out_state
223 self
.mod
= FPGet2OpMod(width
, id_wid
)
224 self
.o
= self
.mod
.ospec()
225 self
.in_stb
= Signal(reset_less
=True)
226 self
.out_ack
= Signal(reset_less
=True)
227 self
.out_decode
= Signal(reset_less
=True)
229 def setup(self
, m
, i
, in_stb
, in_ack
):
230 """ links module to inputs and outputs
232 m
.submodules
.get_ops
= self
.mod
233 m
.d
.comb
+= self
.mod
.i
.eq(i
)
234 m
.d
.comb
+= self
.mod
.stb
.eq(in_stb
)
235 m
.d
.comb
+= self
.out_ack
.eq(self
.mod
.ack
)
236 m
.d
.comb
+= self
.out_decode
.eq(self
.mod
.trigger
)
237 m
.d
.comb
+= in_ack
.eq(self
.mod
.ack
)
240 with m
.If(self
.out_decode
):
241 m
.next
= self
.out_state
244 self
.o
.eq(self
.mod
.o
),
247 m
.d
.sync
+= self
.mod
.ack
.eq(1)
252 def __init__(self
, width
, id_wid
, m_extra
=True):
253 self
.a
= FPNumBase(width
, m_extra
)
254 self
.b
= FPNumBase(width
, m_extra
)
255 self
.mid
= Signal(id_wid
, reset_less
=True)
258 return [self
.a
.eq(i
.a
), self
.b
.eq(i
.b
), self
.mid
.eq(i
.mid
)]
261 class FPAddSpecialCasesMod
:
262 """ special cases: NaNs, infs, zeros, denormalised
263 NOTE: some of these are unique to add. see "Special Operations"
264 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
267 def __init__(self
, width
, id_wid
):
270 self
.i
= self
.ispec()
271 self
.o
= self
.ospec()
272 self
.out_do_z
= Signal(reset_less
=True)
275 return FPNumBase2Ops(self
.width
, self
.id_wid
)
278 return FPPackData(self
.width
, self
.id_wid
)
280 def setup(self
, m
, i
, out_do_z
):
281 """ links module to inputs and outputs
283 m
.submodules
.specialcases
= self
284 m
.d
.comb
+= self
.i
.eq(i
)
285 m
.d
.comb
+= out_do_z
.eq(self
.out_do_z
)
287 def elaborate(self
, platform
):
290 m
.submodules
.sc_in_a
= self
.i
.a
291 m
.submodules
.sc_in_b
= self
.i
.b
292 m
.submodules
.sc_out_z
= self
.o
.z
295 m
.d
.comb
+= s_nomatch
.eq(self
.i
.a
.s
!= self
.i
.b
.s
)
298 m
.d
.comb
+= m_match
.eq(self
.i
.a
.m
== self
.i
.b
.m
)
300 # if a is NaN or b is NaN return NaN
301 with m
.If(self
.i
.a
.is_nan | self
.i
.b
.is_nan
):
302 m
.d
.comb
+= self
.out_do_z
.eq(1)
303 m
.d
.comb
+= self
.o
.z
.nan(0)
305 # XXX WEIRDNESS for FP16 non-canonical NaN handling
308 ## if a is zero and b is NaN return -b
309 #with m.If(a.is_zero & (a.s==0) & b.is_nan):
310 # m.d.comb += self.out_do_z.eq(1)
311 # m.d.comb += z.create(b.s, b.e, Cat(b.m[3:-2], ~b.m[0]))
313 ## if b is zero and a is NaN return -a
314 #with m.Elif(b.is_zero & (b.s==0) & a.is_nan):
315 # m.d.comb += self.out_do_z.eq(1)
316 # m.d.comb += z.create(a.s, a.e, Cat(a.m[3:-2], ~a.m[0]))
318 ## if a is -zero and b is NaN return -b
319 #with m.Elif(a.is_zero & (a.s==1) & b.is_nan):
320 # m.d.comb += self.out_do_z.eq(1)
321 # m.d.comb += z.create(a.s & b.s, b.e, Cat(b.m[3:-2], 1))
323 ## if b is -zero and a is NaN return -a
324 #with m.Elif(b.is_zero & (b.s==1) & a.is_nan):
325 # m.d.comb += self.out_do_z.eq(1)
326 # m.d.comb += z.create(a.s & b.s, a.e, Cat(a.m[3:-2], 1))
328 # if a is inf return inf (or NaN)
329 with m
.Elif(self
.i
.a
.is_inf
):
330 m
.d
.comb
+= self
.out_do_z
.eq(1)
331 m
.d
.comb
+= self
.o
.z
.inf(self
.i
.a
.s
)
332 # if a is inf and signs don't match return NaN
333 with m
.If(self
.i
.b
.exp_128
& s_nomatch
):
334 m
.d
.comb
+= self
.o
.z
.nan(0)
336 # if b is inf return inf
337 with m
.Elif(self
.i
.b
.is_inf
):
338 m
.d
.comb
+= self
.out_do_z
.eq(1)
339 m
.d
.comb
+= self
.o
.z
.inf(self
.i
.b
.s
)
341 # if a is zero and b zero return signed-a/b
342 with m
.Elif(self
.i
.a
.is_zero
& self
.i
.b
.is_zero
):
343 m
.d
.comb
+= self
.out_do_z
.eq(1)
344 m
.d
.comb
+= self
.o
.z
.create(self
.i
.a
.s
& self
.i
.b
.s
,
348 # if a is zero return b
349 with m
.Elif(self
.i
.a
.is_zero
):
350 m
.d
.comb
+= self
.out_do_z
.eq(1)
351 m
.d
.comb
+= self
.o
.z
.create(self
.i
.b
.s
, self
.i
.b
.e
,
354 # if b is zero return a
355 with m
.Elif(self
.i
.b
.is_zero
):
356 m
.d
.comb
+= self
.out_do_z
.eq(1)
357 m
.d
.comb
+= self
.o
.z
.create(self
.i
.a
.s
, self
.i
.a
.e
,
360 # if a equal to -b return zero (+ve zero)
361 with m
.Elif(s_nomatch
& m_match
& (self
.i
.a
.e
== self
.i
.b
.e
)):
362 m
.d
.comb
+= self
.out_do_z
.eq(1)
363 m
.d
.comb
+= self
.o
.z
.zero(0)
365 # Denormalised Number checks
367 m
.d
.comb
+= self
.out_do_z
.eq(0)
373 def __init__(self
, id_wid
):
376 self
.in_mid
= Signal(id_wid
, reset_less
=True)
377 self
.out_mid
= Signal(id_wid
, reset_less
=True)
383 if self
.id_wid
is not None:
384 m
.d
.sync
+= self
.out_mid
.eq(self
.in_mid
)
387 class FPAddSpecialCases(FPState
, FPID
):
388 """ special cases: NaNs, infs, zeros, denormalised
389 NOTE: some of these are unique to add. see "Special Operations"
390 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
393 def __init__(self
, width
, id_wid
):
394 FPState
.__init
__(self
, "special_cases")
395 FPID
.__init
__(self
, id_wid
)
396 self
.mod
= FPAddSpecialCasesMod(width
)
397 self
.out_z
= self
.mod
.ospec()
398 self
.out_do_z
= Signal(reset_less
=True)
400 def setup(self
, m
, in_a
, in_b
, in_mid
):
401 """ links module to inputs and outputs
403 self
.mod
.setup(m
, in_a
, in_b
, self
.out_do_z
)
404 if self
.in_mid
is not None:
405 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
409 with m
.If(self
.out_do_z
):
410 m
.d
.sync
+= self
.out_z
.v
.eq(self
.mod
.out_z
.v
) # only take the output
413 m
.next
= "denormalise"
416 class FPAddSpecialCasesDeNorm(FPState
, FPID
):
417 """ special cases: NaNs, infs, zeros, denormalised
418 NOTE: some of these are unique to add. see "Special Operations"
419 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
422 def __init__(self
, width
, id_wid
):
423 FPState
.__init
__(self
, "special_cases")
424 FPID
.__init
__(self
, id_wid
)
425 self
.smod
= FPAddSpecialCasesMod(width
, id_wid
)
426 self
.out_z
= self
.smod
.ospec()
427 self
.out_do_z
= Signal(reset_less
=True)
429 self
.dmod
= FPAddDeNormMod(width
, id_wid
)
430 self
.o
= self
.dmod
.ospec()
432 def setup(self
, m
, i
, in_mid
):
433 """ links module to inputs and outputs
435 self
.smod
.setup(m
, i
, self
.out_do_z
)
436 self
.dmod
.setup(m
, i
)
437 if self
.in_mid
is not None:
438 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
442 with m
.If(self
.out_do_z
):
443 m
.d
.sync
+= self
.out_z
.z
.v
.eq(self
.smod
.o
.z
.v
) # only take output
447 m
.d
.sync
+= self
.o
.a
.eq(self
.dmod
.o
.a
)
448 m
.d
.sync
+= self
.o
.b
.eq(self
.dmod
.o
.b
)
451 class FPAddDeNormMod(FPState
):
453 def __init__(self
, width
, id_wid
):
456 self
.i
= self
.ispec()
457 self
.o
= self
.ospec()
460 return FPNumBase2Ops(self
.width
, self
.id_wid
)
463 return FPNumBase2Ops(self
.width
, self
.id_wid
)
465 def setup(self
, m
, i
):
466 """ links module to inputs and outputs
468 m
.submodules
.denormalise
= self
469 m
.d
.comb
+= self
.i
.eq(i
)
471 def elaborate(self
, platform
):
473 m
.submodules
.denorm_in_a
= self
.i
.a
474 m
.submodules
.denorm_in_b
= self
.i
.b
475 m
.submodules
.denorm_out_a
= self
.o
.a
476 m
.submodules
.denorm_out_b
= self
.o
.b
477 # hmmm, don't like repeating identical code
478 m
.d
.comb
+= self
.o
.a
.eq(self
.i
.a
)
479 with m
.If(self
.i
.a
.exp_n127
):
480 m
.d
.comb
+= self
.o
.a
.e
.eq(self
.i
.a
.N126
) # limit a exponent
482 m
.d
.comb
+= self
.o
.a
.m
[-1].eq(1) # set top mantissa bit
484 m
.d
.comb
+= self
.o
.b
.eq(self
.i
.b
)
485 with m
.If(self
.i
.b
.exp_n127
):
486 m
.d
.comb
+= self
.o
.b
.e
.eq(self
.i
.b
.N126
) # limit a exponent
488 m
.d
.comb
+= self
.o
.b
.m
[-1].eq(1) # set top mantissa bit
493 class FPAddDeNorm(FPState
, FPID
):
495 def __init__(self
, width
, id_wid
):
496 FPState
.__init
__(self
, "denormalise")
497 FPID
.__init
__(self
, id_wid
)
498 self
.mod
= FPAddDeNormMod(width
)
499 self
.out_a
= FPNumBase(width
)
500 self
.out_b
= FPNumBase(width
)
502 def setup(self
, m
, in_a
, in_b
, in_mid
):
503 """ links module to inputs and outputs
505 self
.mod
.setup(m
, in_a
, in_b
)
506 if self
.in_mid
is not None:
507 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
511 # Denormalised Number checks
513 m
.d
.sync
+= self
.out_a
.eq(self
.mod
.out_a
)
514 m
.d
.sync
+= self
.out_b
.eq(self
.mod
.out_b
)
517 class FPAddAlignMultiMod(FPState
):
519 def __init__(self
, width
):
520 self
.in_a
= FPNumBase(width
)
521 self
.in_b
= FPNumBase(width
)
522 self
.out_a
= FPNumIn(None, width
)
523 self
.out_b
= FPNumIn(None, width
)
524 self
.exp_eq
= Signal(reset_less
=True)
526 def elaborate(self
, platform
):
527 # This one however (single-cycle) will do the shift
532 m
.submodules
.align_in_a
= self
.in_a
533 m
.submodules
.align_in_b
= self
.in_b
534 m
.submodules
.align_out_a
= self
.out_a
535 m
.submodules
.align_out_b
= self
.out_b
537 # NOTE: this does *not* do single-cycle multi-shifting,
538 # it *STAYS* in the align state until exponents match
540 # exponent of a greater than b: shift b down
541 m
.d
.comb
+= self
.exp_eq
.eq(0)
542 m
.d
.comb
+= self
.out_a
.eq(self
.in_a
)
543 m
.d
.comb
+= self
.out_b
.eq(self
.in_b
)
544 agtb
= Signal(reset_less
=True)
545 altb
= Signal(reset_less
=True)
546 m
.d
.comb
+= agtb
.eq(self
.in_a
.e
> self
.in_b
.e
)
547 m
.d
.comb
+= altb
.eq(self
.in_a
.e
< self
.in_b
.e
)
549 m
.d
.comb
+= self
.out_b
.shift_down(self
.in_b
)
550 # exponent of b greater than a: shift a down
552 m
.d
.comb
+= self
.out_a
.shift_down(self
.in_a
)
553 # exponents equal: move to next stage.
555 m
.d
.comb
+= self
.exp_eq
.eq(1)
559 class FPAddAlignMulti(FPState
, FPID
):
561 def __init__(self
, width
, id_wid
):
562 FPID
.__init
__(self
, id_wid
)
563 FPState
.__init
__(self
, "align")
564 self
.mod
= FPAddAlignMultiMod(width
)
565 self
.out_a
= FPNumIn(None, width
)
566 self
.out_b
= FPNumIn(None, width
)
567 self
.exp_eq
= Signal(reset_less
=True)
569 def setup(self
, m
, in_a
, in_b
, in_mid
):
570 """ links module to inputs and outputs
572 m
.submodules
.align
= self
.mod
573 m
.d
.comb
+= self
.mod
.in_a
.eq(in_a
)
574 m
.d
.comb
+= self
.mod
.in_b
.eq(in_b
)
575 #m.d.comb += self.out_a.eq(self.mod.out_a)
576 #m.d.comb += self.out_b.eq(self.mod.out_b)
577 m
.d
.comb
+= self
.exp_eq
.eq(self
.mod
.exp_eq
)
578 if self
.in_mid
is not None:
579 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
583 m
.d
.sync
+= self
.out_a
.eq(self
.mod
.out_a
)
584 m
.d
.sync
+= self
.out_b
.eq(self
.mod
.out_b
)
585 with m
.If(self
.exp_eq
):
591 def __init__(self
, width
, id_wid
):
592 self
.a
= FPNumIn(None, width
)
593 self
.b
= FPNumIn(None, width
)
594 self
.mid
= Signal(id_wid
, reset_less
=True)
597 return [self
.a
.eq(i
.a
), self
.b
.eq(i
.b
), self
.mid
.eq(i
.mid
)]
600 class FPAddAlignSingleMod
:
602 def __init__(self
, width
, id_wid
):
605 self
.i
= self
.ispec()
606 self
.o
= self
.ospec()
609 return FPNumBase2Ops(self
.width
, self
.id_wid
)
612 return FPNumIn2Ops(self
.width
, self
.id_wid
)
614 def setup(self
, m
, i
):
615 """ links module to inputs and outputs
617 m
.submodules
.align
= self
618 m
.d
.comb
+= self
.i
.eq(i
)
620 def elaborate(self
, platform
):
621 """ Aligns A against B or B against A, depending on which has the
622 greater exponent. This is done in a *single* cycle using
623 variable-width bit-shift
625 the shifter used here is quite expensive in terms of gates.
626 Mux A or B in (and out) into temporaries, as only one of them
627 needs to be aligned against the other
631 m
.submodules
.align_in_a
= self
.i
.a
632 m
.submodules
.align_in_b
= self
.i
.b
633 m
.submodules
.align_out_a
= self
.o
.a
634 m
.submodules
.align_out_b
= self
.o
.b
636 # temporary (muxed) input and output to be shifted
637 t_inp
= FPNumBase(self
.width
)
638 t_out
= FPNumIn(None, self
.width
)
639 espec
= (len(self
.i
.a
.e
), True)
640 msr
= MultiShiftRMerge(self
.i
.a
.m_width
, espec
)
641 m
.submodules
.align_t_in
= t_inp
642 m
.submodules
.align_t_out
= t_out
643 m
.submodules
.multishift_r
= msr
645 ediff
= Signal(espec
, reset_less
=True)
646 ediffr
= Signal(espec
, reset_less
=True)
647 tdiff
= Signal(espec
, reset_less
=True)
648 elz
= Signal(reset_less
=True)
649 egz
= Signal(reset_less
=True)
651 # connect multi-shifter to t_inp/out mantissa (and tdiff)
652 m
.d
.comb
+= msr
.inp
.eq(t_inp
.m
)
653 m
.d
.comb
+= msr
.diff
.eq(tdiff
)
654 m
.d
.comb
+= t_out
.m
.eq(msr
.m
)
655 m
.d
.comb
+= t_out
.e
.eq(t_inp
.e
+ tdiff
)
656 m
.d
.comb
+= t_out
.s
.eq(t_inp
.s
)
658 m
.d
.comb
+= ediff
.eq(self
.i
.a
.e
- self
.i
.b
.e
)
659 m
.d
.comb
+= ediffr
.eq(self
.i
.b
.e
- self
.i
.a
.e
)
660 m
.d
.comb
+= elz
.eq(self
.i
.a
.e
< self
.i
.b
.e
)
661 m
.d
.comb
+= egz
.eq(self
.i
.a
.e
> self
.i
.b
.e
)
663 # default: A-exp == B-exp, A and B untouched (fall through)
664 m
.d
.comb
+= self
.o
.a
.eq(self
.i
.a
)
665 m
.d
.comb
+= self
.o
.b
.eq(self
.i
.b
)
666 # only one shifter (muxed)
667 #m.d.comb += t_out.shift_down_multi(tdiff, t_inp)
668 # exponent of a greater than b: shift b down
670 m
.d
.comb
+= [t_inp
.eq(self
.i
.b
),
673 self
.o
.b
.s
.eq(self
.i
.b
.s
), # whoops forgot sign
675 # exponent of b greater than a: shift a down
677 m
.d
.comb
+= [t_inp
.eq(self
.i
.a
),
680 self
.o
.a
.s
.eq(self
.i
.a
.s
), # whoops forgot sign
685 class FPAddAlignSingle(FPState
, FPID
):
687 def __init__(self
, width
, id_wid
):
688 FPState
.__init
__(self
, "align")
689 FPID
.__init
__(self
, id_wid
)
690 self
.mod
= FPAddAlignSingleMod(width
, id_wid
)
691 self
.out_a
= FPNumIn(None, width
)
692 self
.out_b
= FPNumIn(None, width
)
694 def setup(self
, m
, in_a
, in_b
, in_mid
):
695 """ links module to inputs and outputs
697 self
.mod
.setup(m
, in_a
, in_b
)
698 if self
.in_mid
is not None:
699 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
703 # NOTE: could be done as comb
704 m
.d
.sync
+= self
.out_a
.eq(self
.mod
.out_a
)
705 m
.d
.sync
+= self
.out_b
.eq(self
.mod
.out_b
)
709 class FPAddAlignSingleAdd(FPState
, FPID
):
711 def __init__(self
, width
, id_wid
):
712 FPState
.__init
__(self
, "align")
713 FPID
.__init
__(self
, id_wid
)
716 self
.o
= self
.ispec()
717 self
.a1o
= self
.ospec()
720 return FPNumBase2Ops(self
.width
, self
.id_wid
) # AlignSingle ispec
723 return FPAddStage1Data(self
.width
, self
.id_wid
) # AddStage1 ospec
725 def setup(self
, m
, i
, in_mid
):
726 """ links module to inputs and outputs
728 mod
= FPAddAlignSingleMod(self
.width
, self
.id_wid
)
730 m
.d
.comb
+= self
.o
.eq(mod
.o
)
732 a0mod
= FPAddStage0Mod(self
.width
, self
.id_wid
)
733 a0mod
.setup(m
, self
.o
)
735 m
.d
.comb
+= a0o
.eq(a0mod
.o
)
737 a1mod
= FPAddStage1Mod(self
.width
, self
.id_wid
)
739 self
.a1modo
= a1mod
.o
741 if self
.in_mid
is not None:
742 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
746 m
.d
.sync
+= self
.a1o
.eq(self
.a1modo
)
747 m
.next
= "normalise_1"
750 class FPAddStage0Data
:
752 def __init__(self
, width
, id_wid
):
753 self
.z
= FPNumBase(width
, False)
754 self
.tot
= Signal(self
.z
.m_width
+ 4, reset_less
=True)
755 self
.mid
= Signal(id_wid
, reset_less
=True)
758 return [self
.z
.eq(i
.z
), self
.tot
.eq(i
.tot
), self
.mid
.eq(i
.mid
)]
761 class FPAddStage0Mod
:
763 def __init__(self
, width
, id_wid
):
766 self
.i
= self
.ispec()
767 self
.o
= self
.ospec()
770 return FPNumBase2Ops(self
.width
, self
.id_wid
)
773 return FPAddStage0Data(self
.width
, self
.id_wid
)
775 def setup(self
, m
, i
):
776 """ links module to inputs and outputs
778 m
.submodules
.add0
= self
779 m
.d
.comb
+= self
.i
.eq(i
)
781 def elaborate(self
, platform
):
783 m
.submodules
.add0_in_a
= self
.i
.a
784 m
.submodules
.add0_in_b
= self
.i
.b
785 m
.submodules
.add0_out_z
= self
.o
.z
787 m
.d
.comb
+= self
.o
.z
.e
.eq(self
.i
.a
.e
)
789 # store intermediate tests (and zero-extended mantissas)
790 seq
= Signal(reset_less
=True)
791 mge
= Signal(reset_less
=True)
792 am0
= Signal(len(self
.i
.a
.m
)+1, reset_less
=True)
793 bm0
= Signal(len(self
.i
.b
.m
)+1, reset_less
=True)
794 m
.d
.comb
+= [seq
.eq(self
.i
.a
.s
== self
.i
.b
.s
),
795 mge
.eq(self
.i
.a
.m
>= self
.i
.b
.m
),
796 am0
.eq(Cat(self
.i
.a
.m
, 0)),
797 bm0
.eq(Cat(self
.i
.b
.m
, 0))
799 # same-sign (both negative or both positive) add mantissas
802 self
.o
.tot
.eq(am0
+ bm0
),
803 self
.o
.z
.s
.eq(self
.i
.a
.s
)
805 # a mantissa greater than b, use a
808 self
.o
.tot
.eq(am0
- bm0
),
809 self
.o
.z
.s
.eq(self
.i
.a
.s
)
811 # b mantissa greater than a, use b
814 self
.o
.tot
.eq(bm0
- am0
),
815 self
.o
.z
.s
.eq(self
.i
.b
.s
)
820 class FPAddStage0(FPState
, FPID
):
821 """ First stage of add. covers same-sign (add) and subtract
822 special-casing when mantissas are greater or equal, to
823 give greatest accuracy.
826 def __init__(self
, width
, id_wid
):
827 FPState
.__init
__(self
, "add_0")
828 FPID
.__init
__(self
, id_wid
)
829 self
.mod
= FPAddStage0Mod(width
)
830 self
.o
= self
.mod
.ospec()
832 def setup(self
, m
, i
, in_mid
):
833 """ links module to inputs and outputs
836 if self
.in_mid
is not None:
837 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
841 # NOTE: these could be done as combinatorial (merge add0+add1)
842 m
.d
.sync
+= self
.o
.eq(self
.mod
.o
)
846 class FPAddStage1Data
:
848 def __init__(self
, width
, id_wid
):
849 self
.z
= FPNumBase(width
, False)
851 self
.mid
= Signal(id_wid
, reset_less
=True)
854 return [self
.z
.eq(i
.z
), self
.of
.eq(i
.of
), self
.mid
.eq(i
.mid
)]
858 class FPAddStage1Mod(FPState
):
859 """ Second stage of add: preparation for normalisation.
860 detects when tot sum is too big (tot[27] is kinda a carry bit)
863 def __init__(self
, width
, id_wid
):
866 self
.i
= self
.ispec()
867 self
.o
= self
.ospec()
870 return FPAddStage0Data(self
.width
, self
.id_wid
)
873 return FPAddStage1Data(self
.width
, self
.id_wid
)
875 def setup(self
, m
, i
):
876 """ links module to inputs and outputs
878 m
.submodules
.add1
= self
879 m
.submodules
.add1_out_overflow
= self
.o
.of
881 m
.d
.comb
+= self
.i
.eq(i
)
883 def elaborate(self
, platform
):
885 #m.submodules.norm1_in_overflow = self.in_of
886 #m.submodules.norm1_out_overflow = self.out_of
887 #m.submodules.norm1_in_z = self.in_z
888 #m.submodules.norm1_out_z = self.out_z
889 m
.d
.comb
+= self
.o
.z
.eq(self
.i
.z
)
890 # tot[-1] (MSB) gets set when the sum overflows. shift result down
891 with m
.If(self
.i
.tot
[-1]):
893 self
.o
.z
.m
.eq(self
.i
.tot
[4:]),
894 self
.o
.of
.m0
.eq(self
.i
.tot
[4]),
895 self
.o
.of
.guard
.eq(self
.i
.tot
[3]),
896 self
.o
.of
.round_bit
.eq(self
.i
.tot
[2]),
897 self
.o
.of
.sticky
.eq(self
.i
.tot
[1] | self
.i
.tot
[0]),
898 self
.o
.z
.e
.eq(self
.i
.z
.e
+ 1)
900 # tot[-1] (MSB) zero case
903 self
.o
.z
.m
.eq(self
.i
.tot
[3:]),
904 self
.o
.of
.m0
.eq(self
.i
.tot
[3]),
905 self
.o
.of
.guard
.eq(self
.i
.tot
[2]),
906 self
.o
.of
.round_bit
.eq(self
.i
.tot
[1]),
907 self
.o
.of
.sticky
.eq(self
.i
.tot
[0])
912 class FPAddStage1(FPState
, FPID
):
914 def __init__(self
, width
, id_wid
):
915 FPState
.__init
__(self
, "add_1")
916 FPID
.__init
__(self
, id_wid
)
917 self
.mod
= FPAddStage1Mod(width
)
918 self
.out_z
= FPNumBase(width
, False)
919 self
.out_of
= Overflow()
920 self
.norm_stb
= Signal()
922 def setup(self
, m
, i
, in_mid
):
923 """ links module to inputs and outputs
927 m
.d
.sync
+= self
.norm_stb
.eq(0) # sets to zero when not in add1 state
929 if self
.in_mid
is not None:
930 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
934 m
.d
.sync
+= self
.out_of
.eq(self
.mod
.out_of
)
935 m
.d
.sync
+= self
.out_z
.eq(self
.mod
.out_z
)
936 m
.d
.sync
+= self
.norm_stb
.eq(1)
937 m
.next
= "normalise_1"
940 class FPNormaliseModSingle
:
942 def __init__(self
, width
):
944 self
.in_z
= self
.ispec()
945 self
.out_z
= self
.ospec()
948 return FPNumBase(self
.width
, False)
951 return FPNumBase(self
.width
, False)
953 def setup(self
, m
, i
):
954 """ links module to inputs and outputs
956 m
.submodules
.normalise
= self
957 m
.d
.comb
+= self
.i
.eq(i
)
959 def elaborate(self
, platform
):
962 mwid
= self
.out_z
.m_width
+2
963 pe
= PriorityEncoder(mwid
)
964 m
.submodules
.norm_pe
= pe
966 m
.submodules
.norm1_out_z
= self
.out_z
967 m
.submodules
.norm1_in_z
= self
.in_z
969 in_z
= FPNumBase(self
.width
, False)
971 m
.submodules
.norm1_insel_z
= in_z
972 m
.submodules
.norm1_insel_overflow
= in_of
974 espec
= (len(in_z
.e
), True)
975 ediff_n126
= Signal(espec
, reset_less
=True)
976 msr
= MultiShiftRMerge(mwid
, espec
)
977 m
.submodules
.multishift_r
= msr
979 m
.d
.comb
+= in_z
.eq(self
.in_z
)
980 m
.d
.comb
+= in_of
.eq(self
.in_of
)
981 # initialise out from in (overridden below)
982 m
.d
.comb
+= self
.out_z
.eq(in_z
)
983 m
.d
.comb
+= self
.out_of
.eq(in_of
)
984 # normalisation decrease condition
985 decrease
= Signal(reset_less
=True)
986 m
.d
.comb
+= decrease
.eq(in_z
.m_msbzero
)
989 # *sigh* not entirely obvious: count leading zeros (clz)
990 # with a PriorityEncoder: to find from the MSB
991 # we reverse the order of the bits.
992 temp_m
= Signal(mwid
, reset_less
=True)
993 temp_s
= Signal(mwid
+1, reset_less
=True)
994 clz
= Signal((len(in_z
.e
), True), reset_less
=True)
996 # cat round and guard bits back into the mantissa
997 temp_m
.eq(Cat(in_of
.round_bit
, in_of
.guard
, in_z
.m
)),
998 pe
.i
.eq(temp_m
[::-1]), # inverted
999 clz
.eq(pe
.o
), # count zeros from MSB down
1000 temp_s
.eq(temp_m
<< clz
), # shift mantissa UP
1001 self
.out_z
.e
.eq(in_z
.e
- clz
), # DECREASE exponent
1002 self
.out_z
.m
.eq(temp_s
[2:]), # exclude bits 0&1
1009 def __init__(self
, width
, id_wid
):
1010 self
.roundz
= Signal(reset_less
=True)
1011 self
.z
= FPNumBase(width
, False)
1012 self
.mid
= Signal(id_wid
, reset_less
=True)
1015 return [self
.z
.eq(i
.z
), self
.roundz
.eq(i
.roundz
), self
.mid
.eq(i
.mid
)]
1018 class FPNorm1ModSingle
:
1020 def __init__(self
, width
, id_wid
):
1022 self
.id_wid
= id_wid
1023 self
.i
= self
.ispec()
1024 self
.o
= self
.ospec()
1027 return FPAddStage1Data(self
.width
, self
.id_wid
)
1030 return FPNorm1Data(self
.width
, self
.id_wid
)
1032 def setup(self
, m
, i
):
1033 """ links module to inputs and outputs
1035 m
.submodules
.normalise_1
= self
1036 m
.d
.comb
+= self
.i
.eq(i
)
1038 def elaborate(self
, platform
):
1041 mwid
= self
.o
.z
.m_width
+2
1042 pe
= PriorityEncoder(mwid
)
1043 m
.submodules
.norm_pe
= pe
1046 m
.d
.comb
+= self
.o
.roundz
.eq(of
.roundz
)
1048 m
.submodules
.norm1_out_z
= self
.o
.z
1049 m
.submodules
.norm1_out_overflow
= of
1050 m
.submodules
.norm1_in_z
= self
.i
.z
1051 m
.submodules
.norm1_in_overflow
= self
.i
.of
1054 m
.submodules
.norm1_insel_z
= i
.z
1055 m
.submodules
.norm1_insel_overflow
= i
.of
1057 espec
= (len(i
.z
.e
), True)
1058 ediff_n126
= Signal(espec
, reset_less
=True)
1059 msr
= MultiShiftRMerge(mwid
, espec
)
1060 m
.submodules
.multishift_r
= msr
1062 m
.d
.comb
+= i
.eq(self
.i
)
1063 # initialise out from in (overridden below)
1064 m
.d
.comb
+= self
.o
.z
.eq(i
.z
)
1065 m
.d
.comb
+= of
.eq(i
.of
)
1066 # normalisation increase/decrease conditions
1067 decrease
= Signal(reset_less
=True)
1068 increase
= Signal(reset_less
=True)
1069 m
.d
.comb
+= decrease
.eq(i
.z
.m_msbzero
& i
.z
.exp_gt_n126
)
1070 m
.d
.comb
+= increase
.eq(i
.z
.exp_lt_n126
)
1072 with m
.If(decrease
):
1073 # *sigh* not entirely obvious: count leading zeros (clz)
1074 # with a PriorityEncoder: to find from the MSB
1075 # we reverse the order of the bits.
1076 temp_m
= Signal(mwid
, reset_less
=True)
1077 temp_s
= Signal(mwid
+1, reset_less
=True)
1078 clz
= Signal((len(i
.z
.e
), True), reset_less
=True)
1079 # make sure that the amount to decrease by does NOT
1080 # go below the minimum non-INF/NaN exponent
1081 limclz
= Mux(i
.z
.exp_sub_n126
> pe
.o
, pe
.o
,
1084 # cat round and guard bits back into the mantissa
1085 temp_m
.eq(Cat(i
.of
.round_bit
, i
.of
.guard
, i
.z
.m
)),
1086 pe
.i
.eq(temp_m
[::-1]), # inverted
1087 clz
.eq(limclz
), # count zeros from MSB down
1088 temp_s
.eq(temp_m
<< clz
), # shift mantissa UP
1089 self
.o
.z
.e
.eq(i
.z
.e
- clz
), # DECREASE exponent
1090 self
.o
.z
.m
.eq(temp_s
[2:]), # exclude bits 0&1
1091 of
.m0
.eq(temp_s
[2]), # copy of mantissa[0]
1092 # overflow in bits 0..1: got shifted too (leave sticky)
1093 of
.guard
.eq(temp_s
[1]), # guard
1094 of
.round_bit
.eq(temp_s
[0]), # round
1097 with m
.Elif(increase
):
1098 temp_m
= Signal(mwid
+1, reset_less
=True)
1100 temp_m
.eq(Cat(i
.of
.sticky
, i
.of
.round_bit
, i
.of
.guard
,
1102 ediff_n126
.eq(i
.z
.N126
- i
.z
.e
),
1103 # connect multi-shifter to inp/out mantissa (and ediff)
1105 msr
.diff
.eq(ediff_n126
),
1106 self
.o
.z
.m
.eq(msr
.m
[3:]),
1107 of
.m0
.eq(temp_s
[3]), # copy of mantissa[0]
1108 # overflow in bits 0..1: got shifted too (leave sticky)
1109 of
.guard
.eq(temp_s
[2]), # guard
1110 of
.round_bit
.eq(temp_s
[1]), # round
1111 of
.sticky
.eq(temp_s
[0]), # sticky
1112 self
.o
.z
.e
.eq(i
.z
.e
+ ediff_n126
),
1118 class FPNorm1ModMulti
:
1120 def __init__(self
, width
, single_cycle
=True):
1122 self
.in_select
= Signal(reset_less
=True)
1123 self
.in_z
= FPNumBase(width
, False)
1124 self
.in_of
= Overflow()
1125 self
.temp_z
= FPNumBase(width
, False)
1126 self
.temp_of
= Overflow()
1127 self
.out_z
= FPNumBase(width
, False)
1128 self
.out_of
= Overflow()
1130 def elaborate(self
, platform
):
1133 m
.submodules
.norm1_out_z
= self
.out_z
1134 m
.submodules
.norm1_out_overflow
= self
.out_of
1135 m
.submodules
.norm1_temp_z
= self
.temp_z
1136 m
.submodules
.norm1_temp_of
= self
.temp_of
1137 m
.submodules
.norm1_in_z
= self
.in_z
1138 m
.submodules
.norm1_in_overflow
= self
.in_of
1140 in_z
= FPNumBase(self
.width
, False)
1142 m
.submodules
.norm1_insel_z
= in_z
1143 m
.submodules
.norm1_insel_overflow
= in_of
1145 # select which of temp or in z/of to use
1146 with m
.If(self
.in_select
):
1147 m
.d
.comb
+= in_z
.eq(self
.in_z
)
1148 m
.d
.comb
+= in_of
.eq(self
.in_of
)
1150 m
.d
.comb
+= in_z
.eq(self
.temp_z
)
1151 m
.d
.comb
+= in_of
.eq(self
.temp_of
)
1152 # initialise out from in (overridden below)
1153 m
.d
.comb
+= self
.out_z
.eq(in_z
)
1154 m
.d
.comb
+= self
.out_of
.eq(in_of
)
1155 # normalisation increase/decrease conditions
1156 decrease
= Signal(reset_less
=True)
1157 increase
= Signal(reset_less
=True)
1158 m
.d
.comb
+= decrease
.eq(in_z
.m_msbzero
& in_z
.exp_gt_n126
)
1159 m
.d
.comb
+= increase
.eq(in_z
.exp_lt_n126
)
1160 m
.d
.comb
+= self
.out_norm
.eq(decrease | increase
) # loop-end
1162 with m
.If(decrease
):
1164 self
.out_z
.e
.eq(in_z
.e
- 1), # DECREASE exponent
1165 self
.out_z
.m
.eq(in_z
.m
<< 1), # shift mantissa UP
1166 self
.out_z
.m
[0].eq(in_of
.guard
), # steal guard (was tot[2])
1167 self
.out_of
.guard
.eq(in_of
.round_bit
), # round (was tot[1])
1168 self
.out_of
.round_bit
.eq(0), # reset round bit
1169 self
.out_of
.m0
.eq(in_of
.guard
),
1172 with m
.Elif(increase
):
1174 self
.out_z
.e
.eq(in_z
.e
+ 1), # INCREASE exponent
1175 self
.out_z
.m
.eq(in_z
.m
>> 1), # shift mantissa DOWN
1176 self
.out_of
.guard
.eq(in_z
.m
[0]),
1177 self
.out_of
.m0
.eq(in_z
.m
[1]),
1178 self
.out_of
.round_bit
.eq(in_of
.guard
),
1179 self
.out_of
.sticky
.eq(in_of
.sticky | in_of
.round_bit
)
1185 class FPNorm1Single(FPState
, FPID
):
1187 def __init__(self
, width
, id_wid
, single_cycle
=True):
1188 FPID
.__init
__(self
, id_wid
)
1189 FPState
.__init
__(self
, "normalise_1")
1190 self
.mod
= FPNorm1ModSingle(width
)
1191 self
.out_z
= FPNumBase(width
, False)
1192 self
.out_roundz
= Signal(reset_less
=True)
1194 def setup(self
, m
, i
, in_mid
):
1195 """ links module to inputs and outputs
1197 self
.mod
.setup(m
, i
, self
.out_z
)
1199 if self
.in_mid
is not None:
1200 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
1202 def action(self
, m
):
1204 m
.d
.sync
+= self
.out_roundz
.eq(self
.mod
.out_of
.roundz
)
1208 class FPNorm1Multi(FPState
, FPID
):
1210 def __init__(self
, width
, id_wid
):
1211 FPID
.__init
__(self
, id_wid
)
1212 FPState
.__init
__(self
, "normalise_1")
1213 self
.mod
= FPNorm1ModMulti(width
)
1214 self
.stb
= Signal(reset_less
=True)
1215 self
.ack
= Signal(reset
=0, reset_less
=True)
1216 self
.out_norm
= Signal(reset_less
=True)
1217 self
.in_accept
= Signal(reset_less
=True)
1218 self
.temp_z
= FPNumBase(width
)
1219 self
.temp_of
= Overflow()
1220 self
.out_z
= FPNumBase(width
)
1221 self
.out_roundz
= Signal(reset_less
=True)
1223 def setup(self
, m
, in_z
, in_of
, norm_stb
, in_mid
):
1224 """ links module to inputs and outputs
1226 self
.mod
.setup(m
, in_z
, in_of
, norm_stb
,
1227 self
.in_accept
, self
.temp_z
, self
.temp_of
,
1228 self
.out_z
, self
.out_norm
)
1230 m
.d
.comb
+= self
.stb
.eq(norm_stb
)
1231 m
.d
.sync
+= self
.ack
.eq(0) # sets to zero when not in normalise_1 state
1233 if self
.in_mid
is not None:
1234 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
1236 def action(self
, m
):
1238 m
.d
.comb
+= self
.in_accept
.eq((~self
.ack
) & (self
.stb
))
1239 m
.d
.sync
+= self
.temp_of
.eq(self
.mod
.out_of
)
1240 m
.d
.sync
+= self
.temp_z
.eq(self
.out_z
)
1241 with m
.If(self
.out_norm
):
1242 with m
.If(self
.in_accept
):
1247 m
.d
.sync
+= self
.ack
.eq(0)
1249 # normalisation not required (or done).
1251 m
.d
.sync
+= self
.ack
.eq(1)
1252 m
.d
.sync
+= self
.out_roundz
.eq(self
.mod
.out_of
.roundz
)
1255 class FPNormToPack(FPState
, FPID
):
1257 def __init__(self
, width
, id_wid
):
1258 FPID
.__init
__(self
, id_wid
)
1259 FPState
.__init
__(self
, "normalise_1")
1260 self
.id_wid
= id_wid
1264 return FPAddStage1Data(self
.width
, self
.id_wid
) # Norm1ModSingle ispec
1267 return FPPackData(self
.width
, self
.id_wid
) # FPPackMod ospec
1269 def setup(self
, m
, i
, in_mid
):
1270 """ links module to inputs and outputs
1273 # Normalisation (chained to input in_z+in_of)
1274 nmod
= FPNorm1ModSingle(self
.width
, self
.id_wid
)
1276 n_out
= nmod
.ospec()
1277 m
.d
.comb
+= n_out
.eq(nmod
.o
)
1279 # Rounding (chained to normalisation)
1280 rmod
= FPRoundMod(self
.width
, self
.id_wid
)
1281 rmod
.setup(m
, n_out
)
1282 r_out_z
= rmod
.ospec()
1283 m
.d
.comb
+= r_out_z
.eq(rmod
.out_z
)
1285 # Corrections (chained to rounding)
1286 cmod
= FPCorrectionsMod(self
.width
, self
.id_wid
)
1287 cmod
.setup(m
, r_out_z
)
1288 c_out_z
= cmod
.ospec()
1289 m
.d
.comb
+= c_out_z
.eq(cmod
.out_z
)
1291 # Pack (chained to corrections)
1292 self
.pmod
= FPPackMod(self
.width
, self
.id_wid
)
1293 self
.pmod
.setup(m
, c_out_z
)
1294 self
.out_z
= self
.pmod
.ospec()
1297 if self
.in_mid
is not None:
1298 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
1300 def action(self
, m
):
1301 self
.idsync(m
) # copies incoming ID to outgoing
1302 m
.d
.sync
+= self
.out_z
.z
.v
.eq(self
.pmod
.o
.z
.v
) # outputs packed result
1303 m
.next
= "pack_put_z"
1308 def __init__(self
, width
, id_wid
):
1309 self
.z
= FPNumBase(width
, False)
1310 self
.mid
= Signal(id_wid
, reset_less
=True)
1313 return [self
.z
.eq(i
.z
), self
.mid
.eq(i
.mid
)]
1318 def __init__(self
, width
, id_wid
):
1320 self
.id_wid
= id_wid
1321 self
.i
= self
.ispec()
1322 self
.out_z
= self
.ospec()
1325 return FPNorm1Data(self
.width
, self
.id_wid
)
1328 return FPRoundData(self
.width
, self
.id_wid
)
1330 def setup(self
, m
, i
):
1331 m
.submodules
.roundz
= self
1332 m
.d
.comb
+= self
.i
.eq(i
)
1334 def elaborate(self
, platform
):
1336 m
.d
.comb
+= self
.out_z
.eq(self
.i
)
1337 with m
.If(self
.i
.roundz
):
1338 m
.d
.comb
+= self
.out_z
.z
.m
.eq(self
.i
.z
.m
+ 1) # mantissa rounds up
1339 with m
.If(self
.i
.z
.m
== self
.i
.z
.m1s
): # all 1s
1340 m
.d
.comb
+= self
.out_z
.z
.e
.eq(self
.i
.z
.e
+ 1) # exponent up
1344 class FPRound(FPState
, FPID
):
1346 def __init__(self
, width
, id_wid
):
1347 FPState
.__init
__(self
, "round")
1348 FPID
.__init
__(self
, id_wid
)
1349 self
.mod
= FPRoundMod(width
)
1350 self
.out_z
= self
.ospec()
1353 return self
.mod
.ispec()
1356 return self
.mod
.ospec()
1358 def setup(self
, m
, i
, in_mid
):
1359 """ links module to inputs and outputs
1361 self
.mod
.setup(m
, i
)
1363 if self
.in_mid
is not None:
1364 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
1366 def action(self
, m
):
1368 m
.d
.sync
+= self
.out_z
.eq(self
.mod
.out_z
)
1369 m
.next
= "corrections"
1372 class FPCorrectionsMod
:
1374 def __init__(self
, width
, id_wid
):
1376 self
.id_wid
= id_wid
1377 self
.i
= self
.ispec()
1378 self
.out_z
= self
.ospec()
1381 return FPRoundData(self
.width
, self
.id_wid
)
1384 return FPRoundData(self
.width
, self
.id_wid
)
1386 def setup(self
, m
, i
):
1387 """ links module to inputs and outputs
1389 m
.submodules
.corrections
= self
1390 m
.d
.comb
+= self
.i
.eq(i
)
1392 def elaborate(self
, platform
):
1394 m
.submodules
.corr_in_z
= self
.i
.z
1395 m
.submodules
.corr_out_z
= self
.out_z
.z
1396 m
.d
.comb
+= self
.out_z
.eq(self
.i
)
1397 with m
.If(self
.i
.z
.is_denormalised
):
1398 m
.d
.comb
+= self
.out_z
.z
.e
.eq(self
.i
.z
.N127
)
1402 class FPCorrections(FPState
, FPID
):
1404 def __init__(self
, width
, id_wid
):
1405 FPState
.__init
__(self
, "corrections")
1406 FPID
.__init
__(self
, id_wid
)
1407 self
.mod
= FPCorrectionsMod(width
)
1408 self
.out_z
= self
.ospec()
1411 return self
.mod
.ispec()
1414 return self
.mod
.ospec()
1416 def setup(self
, m
, in_z
, in_mid
):
1417 """ links module to inputs and outputs
1419 self
.mod
.setup(m
, in_z
)
1420 if self
.in_mid
is not None:
1421 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
1423 def action(self
, m
):
1425 m
.d
.sync
+= self
.out_z
.eq(self
.mod
.out_z
)
1431 def __init__(self
, width
, id_wid
):
1432 self
.z
= FPNumOut(width
, False)
1433 self
.mid
= Signal(id_wid
, reset_less
=True)
1436 return [self
.z
.eq(i
.z
), self
.mid
.eq(i
.mid
)]
1441 def __init__(self
, width
, id_wid
):
1443 self
.id_wid
= id_wid
1444 self
.i
= self
.ispec()
1445 self
.o
= self
.ospec()
1448 return FPRoundData(self
.width
, self
.id_wid
)
1451 return FPPackData(self
.width
, self
.id_wid
)
1453 def setup(self
, m
, in_z
):
1454 """ links module to inputs and outputs
1456 m
.submodules
.pack
= self
1457 m
.d
.comb
+= self
.i
.eq(in_z
)
1459 def elaborate(self
, platform
):
1461 m
.submodules
.pack_in_z
= self
.i
.z
1462 with m
.If(self
.i
.z
.is_overflowed
):
1463 m
.d
.comb
+= self
.o
.z
.inf(self
.i
.z
.s
)
1465 m
.d
.comb
+= self
.o
.z
.create(self
.i
.z
.s
, self
.i
.z
.e
, self
.i
.z
.m
)
1470 def __init__(self
, width
, id_wid
):
1471 self
.z
= FPNumOut(width
, False)
1472 self
.mid
= Signal(id_wid
, reset_less
=True)
1475 return [self
.z
.eq(i
.z
), self
.mid
.eq(i
.mid
)]
1478 class FPPack(FPState
, FPID
):
1480 def __init__(self
, width
, id_wid
):
1481 FPState
.__init
__(self
, "pack")
1482 FPID
.__init
__(self
, id_wid
)
1483 self
.mod
= FPPackMod(width
)
1484 self
.out_z
= self
.ospec()
1487 return self
.mod
.ispec()
1490 return self
.mod
.ospec()
1492 def setup(self
, m
, in_z
, in_mid
):
1493 """ links module to inputs and outputs
1495 self
.mod
.setup(m
, in_z
)
1496 if self
.in_mid
is not None:
1497 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
1499 def action(self
, m
):
1501 m
.d
.sync
+= self
.out_z
.v
.eq(self
.mod
.out_z
.v
)
1502 m
.next
= "pack_put_z"
1505 class FPPutZ(FPState
):
1507 def __init__(self
, state
, in_z
, out_z
, in_mid
, out_mid
, to_state
=None):
1508 FPState
.__init
__(self
, state
)
1509 if to_state
is None:
1510 to_state
= "get_ops"
1511 self
.to_state
= to_state
1514 self
.in_mid
= in_mid
1515 self
.out_mid
= out_mid
1517 def action(self
, m
):
1518 if self
.in_mid
is not None:
1519 m
.d
.sync
+= self
.out_mid
.eq(self
.in_mid
)
1521 self
.out_z
.z
.v
.eq(self
.in_z
.v
)
1523 with m
.If(self
.out_z
.z
.stb
& self
.out_z
.z
.ack
):
1524 m
.d
.sync
+= self
.out_z
.z
.stb
.eq(0)
1525 m
.next
= self
.to_state
1527 m
.d
.sync
+= self
.out_z
.z
.stb
.eq(1)
1530 class FPPutZIdx(FPState
):
1532 def __init__(self
, state
, in_z
, out_zs
, in_mid
, to_state
=None):
1533 FPState
.__init
__(self
, state
)
1534 if to_state
is None:
1535 to_state
= "get_ops"
1536 self
.to_state
= to_state
1538 self
.out_zs
= out_zs
1539 self
.in_mid
= in_mid
1541 def action(self
, m
):
1542 outz_stb
= Signal(reset_less
=True)
1543 outz_ack
= Signal(reset_less
=True)
1544 m
.d
.comb
+= [outz_stb
.eq(self
.out_zs
[self
.in_mid
].stb
),
1545 outz_ack
.eq(self
.out_zs
[self
.in_mid
].ack
),
1548 self
.out_zs
[self
.in_mid
].v
.eq(self
.in_z
.v
)
1550 with m
.If(outz_stb
& outz_ack
):
1551 m
.d
.sync
+= self
.out_zs
[self
.in_mid
].stb
.eq(0)
1552 m
.next
= self
.to_state
1554 m
.d
.sync
+= self
.out_zs
[self
.in_mid
].stb
.eq(1)
1556 class FPADDBaseData
:
1558 def __init__(self
, width
, id_wid
):
1560 self
.id_wid
= id_wid
1561 self
.a
= Signal(width
)
1562 self
.b
= Signal(width
)
1563 self
.mid
= Signal(id_wid
, reset_less
=True)
1566 return [self
.a
.eq(i
.a
), self
.b
.eq(i
.b
), self
.mid
.eq(i
.mid
)]
1570 def __init__(self
, width
, id_wid
):
1571 self
.z
= FPOp(width
)
1572 self
.mid
= Signal(id_wid
, reset_less
=True)
1575 return [self
.z
.eq(i
.z
), self
.mid
.eq(i
.mid
)]
1578 class FPADDBaseMod(FPID
):
1580 def __init__(self
, width
, id_wid
=None, single_cycle
=False, compact
=True):
1583 * width: bit-width of IEEE754. supported: 16, 32, 64
1584 * id_wid: an identifier that is sync-connected to the input
1585 * single_cycle: True indicates each stage to complete in 1 clock
1586 * compact: True indicates a reduced number of stages
1588 FPID
.__init
__(self
, id_wid
)
1590 self
.id_wid
= id_wid
1591 self
.single_cycle
= single_cycle
1592 self
.compact
= compact
1594 self
.in_t
= Trigger()
1595 self
.i
= self
.ispec()
1596 self
.o
= self
.ospec()
1601 return FPADDBaseData(self
.width
, self
.id_wid
)
1604 return FPOpData(self
.width
, self
.id_wid
)
1606 def add_state(self
, state
):
1607 self
.states
.append(state
)
1610 def get_fragment(self
, platform
=None):
1611 """ creates the HDL code-fragment for FPAdd
1614 m
.submodules
.out_z
= self
.o
.z
1615 m
.submodules
.in_t
= self
.in_t
1617 self
.get_compact_fragment(m
, platform
)
1619 self
.get_longer_fragment(m
, platform
)
1621 with m
.FSM() as fsm
:
1623 for state
in self
.states
:
1624 with m
.State(state
.state_from
):
1629 def get_longer_fragment(self
, m
, platform
=None):
1631 get
= self
.add_state(FPGet2Op("get_ops", "special_cases",
1633 get
.setup(m
, self
.i
, self
.in_t
.stb
, self
.in_t
.ack
)
1637 sc
= self
.add_state(FPAddSpecialCases(self
.width
, self
.id_wid
))
1638 sc
.setup(m
, a
, b
, self
.in_mid
)
1640 dn
= self
.add_state(FPAddDeNorm(self
.width
, self
.id_wid
))
1641 dn
.setup(m
, a
, b
, sc
.in_mid
)
1643 if self
.single_cycle
:
1644 alm
= self
.add_state(FPAddAlignSingle(self
.width
, self
.id_wid
))
1645 alm
.setup(m
, dn
.out_a
, dn
.out_b
, dn
.in_mid
)
1647 alm
= self
.add_state(FPAddAlignMulti(self
.width
, self
.id_wid
))
1648 alm
.setup(m
, dn
.out_a
, dn
.out_b
, dn
.in_mid
)
1650 add0
= self
.add_state(FPAddStage0(self
.width
, self
.id_wid
))
1651 add0
.setup(m
, alm
.out_a
, alm
.out_b
, alm
.in_mid
)
1653 add1
= self
.add_state(FPAddStage1(self
.width
, self
.id_wid
))
1654 add1
.setup(m
, add0
.out_tot
, add0
.out_z
, add0
.in_mid
)
1656 if self
.single_cycle
:
1657 n1
= self
.add_state(FPNorm1Single(self
.width
, self
.id_wid
))
1658 n1
.setup(m
, add1
.out_z
, add1
.out_of
, add0
.in_mid
)
1660 n1
= self
.add_state(FPNorm1Multi(self
.width
, self
.id_wid
))
1661 n1
.setup(m
, add1
.out_z
, add1
.out_of
, add1
.norm_stb
, add0
.in_mid
)
1663 rn
= self
.add_state(FPRound(self
.width
, self
.id_wid
))
1664 rn
.setup(m
, n1
.out_z
, n1
.out_roundz
, n1
.in_mid
)
1666 cor
= self
.add_state(FPCorrections(self
.width
, self
.id_wid
))
1667 cor
.setup(m
, rn
.out_z
, rn
.in_mid
)
1669 pa
= self
.add_state(FPPack(self
.width
, self
.id_wid
))
1670 pa
.setup(m
, cor
.out_z
, rn
.in_mid
)
1672 ppz
= self
.add_state(FPPutZ("pack_put_z", pa
.out_z
, self
.out_z
,
1673 pa
.in_mid
, self
.out_mid
))
1675 pz
= self
.add_state(FPPutZ("put_z", sc
.out_z
, self
.out_z
,
1676 pa
.in_mid
, self
.out_mid
))
1678 def get_compact_fragment(self
, m
, platform
=None):
1680 get
= self
.add_state(FPGet2Op("get_ops", "special_cases",
1681 self
.width
, self
.id_wid
))
1682 get
.setup(m
, self
.i
, self
.in_t
.stb
, self
.in_t
.ack
)
1684 sc
= self
.add_state(FPAddSpecialCasesDeNorm(self
.width
, self
.id_wid
))
1685 sc
.setup(m
, get
.o
, self
.in_mid
)
1687 alm
= self
.add_state(FPAddAlignSingleAdd(self
.width
, self
.id_wid
))
1688 alm
.setup(m
, sc
.o
, sc
.in_mid
)
1690 n1
= self
.add_state(FPNormToPack(self
.width
, self
.id_wid
))
1691 n1
.setup(m
, alm
.a1o
, alm
.in_mid
)
1693 ppz
= self
.add_state(FPPutZ("pack_put_z", n1
.out_z
.z
, self
.o
,
1694 n1
.in_mid
, self
.out_mid
))
1696 pz
= self
.add_state(FPPutZ("put_z", sc
.out_z
.z
, self
.o
,
1697 sc
.in_mid
, self
.out_mid
))
1700 class FPADDBase(FPState
, FPID
):
1702 def __init__(self
, width
, id_wid
=None, single_cycle
=False):
1705 * width: bit-width of IEEE754. supported: 16, 32, 64
1706 * id_wid: an identifier that is sync-connected to the input
1707 * single_cycle: True indicates each stage to complete in 1 clock
1709 FPID
.__init
__(self
, id_wid
)
1710 FPState
.__init
__(self
, "fpadd")
1712 self
.single_cycle
= single_cycle
1713 self
.mod
= FPADDBaseMod(width
, id_wid
, single_cycle
)
1714 self
.o
= self
.ospec()
1716 self
.in_t
= Trigger()
1717 self
.i
= self
.ispec()
1719 self
.z_done
= Signal(reset_less
=True) # connects to out_z Strobe
1720 self
.in_accept
= Signal(reset_less
=True)
1721 self
.add_stb
= Signal(reset_less
=True)
1722 self
.add_ack
= Signal(reset
=0, reset_less
=True)
1725 return self
.mod
.ispec()
1728 return self
.mod
.ospec()
1730 def setup(self
, m
, i
, add_stb
, in_mid
):
1731 m
.d
.comb
+= [self
.i
.eq(i
),
1732 self
.mod
.i
.eq(self
.i
),
1733 self
.in_mid
.eq(in_mid
),
1734 self
.mod
.in_mid
.eq(self
.in_mid
),
1735 self
.z_done
.eq(self
.mod
.o
.z
.trigger
),
1736 #self.add_stb.eq(add_stb),
1737 self
.mod
.in_t
.stb
.eq(self
.in_t
.stb
),
1738 self
.in_t
.ack
.eq(self
.mod
.in_t
.ack
),
1739 self
.o
.mid
.eq(self
.mod
.o
.mid
),
1740 self
.o
.z
.v
.eq(self
.mod
.o
.z
.v
),
1741 self
.o
.z
.stb
.eq(self
.mod
.o
.z
.stb
),
1742 self
.mod
.o
.z
.ack
.eq(self
.o
.z
.ack
),
1745 m
.d
.sync
+= self
.add_stb
.eq(add_stb
)
1746 m
.d
.sync
+= self
.add_ack
.eq(0) # sets to zero when not in active state
1747 m
.d
.sync
+= self
.o
.z
.ack
.eq(0) # likewise
1748 #m.d.sync += self.in_t.stb.eq(0)
1750 m
.submodules
.fpadd
= self
.mod
1752 def action(self
, m
):
1754 # in_accept is set on incoming strobe HIGH and ack LOW.
1755 m
.d
.comb
+= self
.in_accept
.eq((~self
.add_ack
) & (self
.add_stb
))
1757 #with m.If(self.in_t.ack):
1758 # m.d.sync += self.in_t.stb.eq(0)
1759 with m
.If(~self
.z_done
):
1760 # not done: test for accepting an incoming operand pair
1761 with m
.If(self
.in_accept
):
1763 self
.add_ack
.eq(1), # acknowledge receipt...
1764 self
.in_t
.stb
.eq(1), # initiate add
1767 m
.d
.sync
+= [self
.add_ack
.eq(0),
1768 self
.in_t
.stb
.eq(0),
1772 # done: acknowledge, and write out id and value
1773 m
.d
.sync
+= [self
.add_ack
.eq(1),
1780 if self
.in_mid
is not None:
1781 m
.d
.sync
+= self
.out_mid
.eq(self
.mod
.out_mid
)
1784 self
.out_z
.v
.eq(self
.mod
.out_z
.v
)
1786 # move to output state on detecting z ack
1787 with m
.If(self
.out_z
.trigger
):
1788 m
.d
.sync
+= self
.out_z
.stb
.eq(0)
1791 m
.d
.sync
+= self
.out_z
.stb
.eq(1)
1795 def __init__(self
, width
, id_wid
):
1797 self
.id_wid
= id_wid
1799 for i
in range(rs_sz
):
1801 out_z
.name
= "out_z_%d" % i
1803 self
.res
= Array(res
)
1804 self
.in_z
= FPOp(width
)
1805 self
.in_mid
= Signal(self
.id_wid
, reset_less
=True)
1807 def setup(self
, m
, in_z
, in_mid
):
1808 m
.d
.comb
+= [self
.in_z
.eq(in_z
),
1809 self
.in_mid
.eq(in_mid
)]
1811 def get_fragment(self
, platform
=None):
1812 """ creates the HDL code-fragment for FPAdd
1815 m
.submodules
.res_in_z
= self
.in_z
1816 m
.submodules
+= self
.res
1828 """ FPADD: stages as follows:
1834 FPAddBase---> FPAddBaseMod
1836 PutZ GetOps->Specials->Align->Add1/2->Norm->Round/Pack->PutZ
1838 FPAddBase is tricky: it is both a stage and *has* stages.
1839 Connection to FPAddBaseMod therefore requires an in stb/ack
1840 and an out stb/ack. Just as with Add1-Norm1 interaction, FPGetOp
1841 needs to be the thing that raises the incoming stb.
1844 def __init__(self
, width
, id_wid
=None, single_cycle
=False, rs_sz
=2):
1847 * width: bit-width of IEEE754. supported: 16, 32, 64
1848 * id_wid: an identifier that is sync-connected to the input
1849 * single_cycle: True indicates each stage to complete in 1 clock
1852 self
.id_wid
= id_wid
1853 self
.single_cycle
= single_cycle
1855 #self.out_z = FPOp(width)
1856 self
.ids
= FPID(id_wid
)
1859 for i
in range(rs_sz
):
1862 in_a
.name
= "in_a_%d" % i
1863 in_b
.name
= "in_b_%d" % i
1864 rs
.append((in_a
, in_b
))
1868 for i
in range(rs_sz
):
1870 out_z
.name
= "out_z_%d" % i
1872 self
.res
= Array(res
)
1876 def add_state(self
, state
):
1877 self
.states
.append(state
)
1880 def get_fragment(self
, platform
=None):
1881 """ creates the HDL code-fragment for FPAdd
1884 m
.submodules
+= self
.rs
1886 in_a
= self
.rs
[0][0]
1887 in_b
= self
.rs
[0][1]
1889 geta
= self
.add_state(FPGetOp("get_a", "get_b",
1894 getb
= self
.add_state(FPGetOp("get_b", "fpadd",
1899 ab
= FPADDBase(self
.width
, self
.id_wid
, self
.single_cycle
)
1900 ab
= self
.add_state(ab
)
1901 abd
= ab
.ispec() # create an input spec object for FPADDBase
1902 m
.d
.sync
+= [abd
.a
.eq(a
), abd
.b
.eq(b
), abd
.mid
.eq(self
.ids
.in_mid
)]
1903 ab
.setup(m
, abd
, getb
.out_decode
, self
.ids
.in_mid
)
1906 pz
= self
.add_state(FPPutZIdx("put_z", o
.z
, self
.res
,
1909 with m
.FSM() as fsm
:
1911 for state
in self
.states
:
1912 with m
.State(state
.state_from
):
1918 if __name__
== "__main__":
1920 alu
= FPADD(width
=32, id_wid
=5, single_cycle
=True)
1921 main(alu
, ports
=alu
.rs
[0][0].ports() + \
1922 alu
.rs
[0][1].ports() + \
1923 alu
.res
[0].ports() + \
1924 [alu
.ids
.in_mid
, alu
.ids
.out_mid
])
1926 alu
= FPADDBase(width
=32, id_wid
=5, single_cycle
=True)
1927 main(alu
, ports
=[alu
.in_a
, alu
.in_b
] + \
1928 alu
.in_t
.ports() + \
1929 alu
.out_z
.ports() + \
1930 [alu
.in_mid
, alu
.out_mid
])
1933 # works... but don't use, just do "python fname.py convert -t v"
1934 #print (verilog.convert(alu, ports=[
1935 # ports=alu.in_a.ports() + \
1936 # alu.in_b.ports() + \
1937 # alu.out_z.ports())