1 # IEEE Floating Point Adder (Single Precision)
2 # Copyright (C) Jonathan P Dawson 2013
5 from nmigen
import Module
, Signal
, Cat
, Mux
, Array
, Const
6 from nmigen
.lib
.coding
import PriorityEncoder
7 from nmigen
.cli
import main
, verilog
10 from fpbase
import FPNumIn
, FPNumOut
, FPOp
, Overflow
, FPBase
, FPNumBase
11 from fpbase
import MultiShiftRMerge
, Trigger
12 #from fpbase import FPNumShiftMultiRight
15 class FPState(FPBase
):
16 def __init__(self
, state_from
):
17 self
.state_from
= state_from
19 def set_inputs(self
, inputs
):
21 for k
,v
in inputs
.items():
24 def set_outputs(self
, outputs
):
25 self
.outputs
= outputs
26 for k
,v
in outputs
.items():
30 class FPGetSyncOpsMod
:
31 def __init__(self
, width
, num_ops
=2):
33 self
.num_ops
= num_ops
36 for i
in range(num_ops
):
37 inops
.append(Signal(width
, reset_less
=True))
38 outops
.append(Signal(width
, reset_less
=True))
41 self
.stb
= Signal(num_ops
)
43 self
.ready
= Signal(reset_less
=True)
44 self
.out_decode
= Signal(reset_less
=True)
46 def elaborate(self
, platform
):
48 m
.d
.comb
+= self
.ready
.eq(self
.stb
== Const(-1, (self
.num_ops
, False)))
49 m
.d
.comb
+= self
.out_decode
.eq(self
.ack
& self
.ready
)
50 with m
.If(self
.out_decode
):
51 for i
in range(self
.num_ops
):
53 self
.out_op
[i
].eq(self
.in_op
[i
]),
58 return self
.in_op
+ self
.out_op
+ [self
.stb
, self
.ack
]
62 def __init__(self
, width
, num_ops
):
63 Trigger
.__init
__(self
)
65 self
.num_ops
= num_ops
68 for i
in range(num_ops
):
69 res
.append(Signal(width
))
74 for i
in range(self
.num_ops
):
82 def __init__(self
, width
, num_ops
=2, num_rows
=4):
84 self
.num_ops
= num_ops
85 self
.num_rows
= num_rows
86 self
.mmax
= int(log(self
.num_rows
) / log(2))
88 self
.mid
= Signal(self
.mmax
, reset_less
=True) # multiplex id
89 for i
in range(num_rows
):
90 self
.rs
.append(FPGetSyncOpsMod(width
, num_ops
))
91 self
.rs
= Array(self
.rs
)
93 self
.out_op
= FPOps(width
, num_ops
)
95 def elaborate(self
, platform
):
98 pe
= PriorityEncoder(self
.num_rows
)
99 m
.submodules
.selector
= pe
100 m
.submodules
.out_op
= self
.out_op
101 m
.submodules
+= self
.rs
103 # connect priority encoder
105 for i
in range(self
.num_rows
):
106 in_ready
.append(self
.rs
[i
].ready
)
107 m
.d
.comb
+= pe
.i
.eq(Cat(*in_ready
))
109 active
= Signal(reset_less
=True)
110 out_en
= Signal(reset_less
=True)
111 m
.d
.comb
+= active
.eq(~pe
.n
) # encoder active
112 m
.d
.comb
+= out_en
.eq(active
& self
.out_op
.trigger
)
114 # encoder active: ack relevant input, record MID, pass output
117 m
.d
.sync
+= self
.mid
.eq(pe
.o
)
118 m
.d
.sync
+= rs
.ack
.eq(0)
119 m
.d
.sync
+= self
.out_op
.stb
.eq(0)
120 for j
in range(self
.num_ops
):
121 m
.d
.sync
+= self
.out_op
.v
[j
].eq(rs
.out_op
[j
])
123 m
.d
.sync
+= self
.out_op
.stb
.eq(1)
124 # acks all default to zero
125 for i
in range(self
.num_rows
):
126 m
.d
.sync
+= self
.rs
[i
].ack
.eq(1)
132 for i
in range(self
.num_rows
):
134 res
+= inop
.in_op
+ [inop
.stb
]
135 return self
.out_op
.ports() + res
+ [self
.mid
]
139 def __init__(self
, width
):
140 self
.in_op
= FPOp(width
)
141 self
.out_op
= Signal(width
)
142 self
.out_decode
= Signal(reset_less
=True)
144 def elaborate(self
, platform
):
146 m
.d
.comb
+= self
.out_decode
.eq((self
.in_op
.ack
) & (self
.in_op
.stb
))
147 m
.submodules
.get_op_in
= self
.in_op
148 #m.submodules.get_op_out = self.out_op
149 with m
.If(self
.out_decode
):
151 self
.out_op
.eq(self
.in_op
.v
),
156 class FPGetOp(FPState
):
160 def __init__(self
, in_state
, out_state
, in_op
, width
):
161 FPState
.__init
__(self
, in_state
)
162 self
.out_state
= out_state
163 self
.mod
= FPGetOpMod(width
)
165 self
.out_op
= Signal(width
)
166 self
.out_decode
= Signal(reset_less
=True)
168 def setup(self
, m
, in_op
):
169 """ links module to inputs and outputs
171 setattr(m
.submodules
, self
.state_from
, self
.mod
)
172 m
.d
.comb
+= self
.mod
.in_op
.eq(in_op
)
173 #m.d.comb += self.out_op.eq(self.mod.out_op)
174 m
.d
.comb
+= self
.out_decode
.eq(self
.mod
.out_decode
)
177 with m
.If(self
.out_decode
):
178 m
.next
= self
.out_state
180 self
.in_op
.ack
.eq(0),
181 self
.out_op
.eq(self
.mod
.out_op
)
184 m
.d
.sync
+= self
.in_op
.ack
.eq(1)
187 class FPGet2OpMod(Trigger
):
188 def __init__(self
, width
, id_wid
):
189 Trigger
.__init
__(self
)
192 self
.i
= self
.ispec()
193 self
.o
= self
.ospec()
196 return FPADDBaseData(self
.width
, self
.id_wid
)
199 return FPNumBase2Ops(self
.width
, self
.id_wid
)
201 def elaborate(self
, platform
):
202 m
= Trigger
.elaborate(self
, platform
)
203 #m.submodules.get_op_in = self.in_op
204 m
.submodules
.get_op1_out
= self
.o
.a
205 m
.submodules
.get_op2_out
= self
.o
.b
206 out_op1
= FPNumIn(None, self
.width
)
207 out_op2
= FPNumIn(None, self
.width
)
208 with m
.If(self
.trigger
):
210 out_op1
.decode(self
.i
.a
),
211 out_op2
.decode(self
.i
.b
),
212 self
.o
.a
.eq(out_op1
),
213 self
.o
.b
.eq(out_op2
),
218 class FPGet2Op(FPState
):
222 def __init__(self
, in_state
, out_state
, width
, id_wid
):
223 FPState
.__init
__(self
, in_state
)
224 self
.out_state
= out_state
225 self
.mod
= FPGet2OpMod(width
, id_wid
)
226 self
.o
= self
.mod
.ospec()
227 self
.in_stb
= Signal(reset_less
=True)
228 self
.out_ack
= Signal(reset_less
=True)
229 self
.out_decode
= Signal(reset_less
=True)
231 def setup(self
, m
, i
, in_stb
, in_ack
):
232 """ links module to inputs and outputs
234 m
.submodules
.get_ops
= self
.mod
235 m
.d
.comb
+= self
.mod
.i
.eq(i
)
236 m
.d
.comb
+= self
.mod
.stb
.eq(in_stb
)
237 m
.d
.comb
+= self
.out_ack
.eq(self
.mod
.ack
)
238 m
.d
.comb
+= self
.out_decode
.eq(self
.mod
.trigger
)
239 m
.d
.comb
+= in_ack
.eq(self
.mod
.ack
)
242 with m
.If(self
.out_decode
):
243 m
.next
= self
.out_state
246 self
.o
.eq(self
.mod
.o
),
249 m
.d
.sync
+= self
.mod
.ack
.eq(1)
254 def __init__(self
, width
, id_wid
, m_extra
=True):
255 self
.a
= FPNumBase(width
, m_extra
)
256 self
.b
= FPNumBase(width
, m_extra
)
257 self
.mid
= Signal(id_wid
, reset_less
=True)
260 return [self
.a
.eq(i
.a
), self
.b
.eq(i
.b
), self
.mid
.eq(i
.mid
)]
263 class FPAddSpecialCasesMod
:
264 """ special cases: NaNs, infs, zeros, denormalised
265 NOTE: some of these are unique to add. see "Special Operations"
266 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
269 def __init__(self
, width
, id_wid
):
272 self
.i
= self
.ispec()
273 self
.o
= self
.ospec()
274 self
.out_do_z
= Signal(reset_less
=True)
277 return FPNumBase2Ops(self
.width
, self
.id_wid
)
280 return FPPackData(self
.width
, self
.id_wid
)
282 def setup(self
, m
, i
, out_do_z
):
283 """ links module to inputs and outputs
285 m
.submodules
.specialcases
= self
286 m
.d
.comb
+= self
.i
.eq(i
)
287 m
.d
.comb
+= out_do_z
.eq(self
.out_do_z
)
289 def elaborate(self
, platform
):
292 m
.submodules
.sc_in_a
= self
.i
.a
293 m
.submodules
.sc_in_b
= self
.i
.b
294 m
.submodules
.sc_out_z
= self
.o
.z
297 m
.d
.comb
+= s_nomatch
.eq(self
.i
.a
.s
!= self
.i
.b
.s
)
300 m
.d
.comb
+= m_match
.eq(self
.i
.a
.m
== self
.i
.b
.m
)
302 # if a is NaN or b is NaN return NaN
303 with m
.If(self
.i
.a
.is_nan | self
.i
.b
.is_nan
):
304 m
.d
.comb
+= self
.out_do_z
.eq(1)
305 m
.d
.comb
+= self
.o
.z
.nan(0)
307 # XXX WEIRDNESS for FP16 non-canonical NaN handling
310 ## if a is zero and b is NaN return -b
311 #with m.If(a.is_zero & (a.s==0) & b.is_nan):
312 # m.d.comb += self.out_do_z.eq(1)
313 # m.d.comb += z.create(b.s, b.e, Cat(b.m[3:-2], ~b.m[0]))
315 ## if b is zero and a is NaN return -a
316 #with m.Elif(b.is_zero & (b.s==0) & a.is_nan):
317 # m.d.comb += self.out_do_z.eq(1)
318 # m.d.comb += z.create(a.s, a.e, Cat(a.m[3:-2], ~a.m[0]))
320 ## if a is -zero and b is NaN return -b
321 #with m.Elif(a.is_zero & (a.s==1) & b.is_nan):
322 # m.d.comb += self.out_do_z.eq(1)
323 # m.d.comb += z.create(a.s & b.s, b.e, Cat(b.m[3:-2], 1))
325 ## if b is -zero and a is NaN return -a
326 #with m.Elif(b.is_zero & (b.s==1) & a.is_nan):
327 # m.d.comb += self.out_do_z.eq(1)
328 # m.d.comb += z.create(a.s & b.s, a.e, Cat(a.m[3:-2], 1))
330 # if a is inf return inf (or NaN)
331 with m
.Elif(self
.i
.a
.is_inf
):
332 m
.d
.comb
+= self
.out_do_z
.eq(1)
333 m
.d
.comb
+= self
.o
.z
.inf(self
.i
.a
.s
)
334 # if a is inf and signs don't match return NaN
335 with m
.If(self
.i
.b
.exp_128
& s_nomatch
):
336 m
.d
.comb
+= self
.o
.z
.nan(0)
338 # if b is inf return inf
339 with m
.Elif(self
.i
.b
.is_inf
):
340 m
.d
.comb
+= self
.out_do_z
.eq(1)
341 m
.d
.comb
+= self
.o
.z
.inf(self
.i
.b
.s
)
343 # if a is zero and b zero return signed-a/b
344 with m
.Elif(self
.i
.a
.is_zero
& self
.i
.b
.is_zero
):
345 m
.d
.comb
+= self
.out_do_z
.eq(1)
346 m
.d
.comb
+= self
.o
.z
.create(self
.i
.a
.s
& self
.i
.b
.s
,
350 # if a is zero return b
351 with m
.Elif(self
.i
.a
.is_zero
):
352 m
.d
.comb
+= self
.out_do_z
.eq(1)
353 m
.d
.comb
+= self
.o
.z
.create(self
.i
.b
.s
, self
.i
.b
.e
,
356 # if b is zero return a
357 with m
.Elif(self
.i
.b
.is_zero
):
358 m
.d
.comb
+= self
.out_do_z
.eq(1)
359 m
.d
.comb
+= self
.o
.z
.create(self
.i
.a
.s
, self
.i
.a
.e
,
362 # if a equal to -b return zero (+ve zero)
363 with m
.Elif(s_nomatch
& m_match
& (self
.i
.a
.e
== self
.i
.b
.e
)):
364 m
.d
.comb
+= self
.out_do_z
.eq(1)
365 m
.d
.comb
+= self
.o
.z
.zero(0)
367 # Denormalised Number checks
369 m
.d
.comb
+= self
.out_do_z
.eq(0)
375 def __init__(self
, id_wid
):
378 self
.in_mid
= Signal(id_wid
, reset_less
=True)
379 self
.out_mid
= Signal(id_wid
, reset_less
=True)
385 if self
.id_wid
is not None:
386 m
.d
.sync
+= self
.out_mid
.eq(self
.in_mid
)
389 class FPAddSpecialCases(FPState
, FPID
):
390 """ special cases: NaNs, infs, zeros, denormalised
391 NOTE: some of these are unique to add. see "Special Operations"
392 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
395 def __init__(self
, width
, id_wid
):
396 FPState
.__init
__(self
, "special_cases")
397 FPID
.__init
__(self
, id_wid
)
398 self
.mod
= FPAddSpecialCasesMod(width
)
399 self
.out_z
= self
.mod
.ospec()
400 self
.out_do_z
= Signal(reset_less
=True)
402 def setup(self
, m
, in_a
, in_b
, in_mid
):
403 """ links module to inputs and outputs
405 self
.mod
.setup(m
, in_a
, in_b
, self
.out_do_z
)
406 if self
.in_mid
is not None:
407 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
411 with m
.If(self
.out_do_z
):
412 m
.d
.sync
+= self
.out_z
.v
.eq(self
.mod
.out_z
.v
) # only take the output
415 m
.next
= "denormalise"
418 class FPAddSpecialCasesDeNorm(FPState
, FPID
):
419 """ special cases: NaNs, infs, zeros, denormalised
420 NOTE: some of these are unique to add. see "Special Operations"
421 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
424 def __init__(self
, width
, id_wid
):
425 FPState
.__init
__(self
, "special_cases")
426 FPID
.__init
__(self
, id_wid
)
427 self
.smod
= FPAddSpecialCasesMod(width
, id_wid
)
428 self
.out_z
= self
.smod
.ospec()
429 self
.out_do_z
= Signal(reset_less
=True)
431 self
.dmod
= FPAddDeNormMod(width
, id_wid
)
432 self
.o
= self
.dmod
.ospec()
434 def setup(self
, m
, i
, in_mid
):
435 """ links module to inputs and outputs
437 self
.smod
.setup(m
, i
, self
.out_do_z
)
438 self
.dmod
.setup(m
, i
)
439 if self
.in_mid
is not None:
440 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
444 with m
.If(self
.out_do_z
):
445 m
.d
.sync
+= self
.out_z
.z
.v
.eq(self
.smod
.o
.z
.v
) # only take output
449 m
.d
.sync
+= self
.o
.a
.eq(self
.dmod
.o
.a
)
450 m
.d
.sync
+= self
.o
.b
.eq(self
.dmod
.o
.b
)
453 class FPAddDeNormMod(FPState
):
455 def __init__(self
, width
, id_wid
):
458 self
.i
= self
.ispec()
459 self
.o
= self
.ospec()
462 return FPNumBase2Ops(self
.width
, self
.id_wid
)
465 return FPNumBase2Ops(self
.width
, self
.id_wid
)
467 def setup(self
, m
, i
):
468 """ links module to inputs and outputs
470 m
.submodules
.denormalise
= self
471 m
.d
.comb
+= self
.i
.eq(i
)
473 def elaborate(self
, platform
):
475 m
.submodules
.denorm_in_a
= self
.i
.a
476 m
.submodules
.denorm_in_b
= self
.i
.b
477 m
.submodules
.denorm_out_a
= self
.o
.a
478 m
.submodules
.denorm_out_b
= self
.o
.b
479 # hmmm, don't like repeating identical code
480 m
.d
.comb
+= self
.o
.a
.eq(self
.i
.a
)
481 with m
.If(self
.i
.a
.exp_n127
):
482 m
.d
.comb
+= self
.o
.a
.e
.eq(self
.i
.a
.N126
) # limit a exponent
484 m
.d
.comb
+= self
.o
.a
.m
[-1].eq(1) # set top mantissa bit
486 m
.d
.comb
+= self
.o
.b
.eq(self
.i
.b
)
487 with m
.If(self
.i
.b
.exp_n127
):
488 m
.d
.comb
+= self
.o
.b
.e
.eq(self
.i
.b
.N126
) # limit a exponent
490 m
.d
.comb
+= self
.o
.b
.m
[-1].eq(1) # set top mantissa bit
495 class FPAddDeNorm(FPState
, FPID
):
497 def __init__(self
, width
, id_wid
):
498 FPState
.__init
__(self
, "denormalise")
499 FPID
.__init
__(self
, id_wid
)
500 self
.mod
= FPAddDeNormMod(width
)
501 self
.out_a
= FPNumBase(width
)
502 self
.out_b
= FPNumBase(width
)
504 def setup(self
, m
, in_a
, in_b
, in_mid
):
505 """ links module to inputs and outputs
507 self
.mod
.setup(m
, in_a
, in_b
)
508 if self
.in_mid
is not None:
509 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
513 # Denormalised Number checks
515 m
.d
.sync
+= self
.out_a
.eq(self
.mod
.out_a
)
516 m
.d
.sync
+= self
.out_b
.eq(self
.mod
.out_b
)
519 class FPAddAlignMultiMod(FPState
):
521 def __init__(self
, width
):
522 self
.in_a
= FPNumBase(width
)
523 self
.in_b
= FPNumBase(width
)
524 self
.out_a
= FPNumIn(None, width
)
525 self
.out_b
= FPNumIn(None, width
)
526 self
.exp_eq
= Signal(reset_less
=True)
528 def elaborate(self
, platform
):
529 # This one however (single-cycle) will do the shift
534 m
.submodules
.align_in_a
= self
.in_a
535 m
.submodules
.align_in_b
= self
.in_b
536 m
.submodules
.align_out_a
= self
.out_a
537 m
.submodules
.align_out_b
= self
.out_b
539 # NOTE: this does *not* do single-cycle multi-shifting,
540 # it *STAYS* in the align state until exponents match
542 # exponent of a greater than b: shift b down
543 m
.d
.comb
+= self
.exp_eq
.eq(0)
544 m
.d
.comb
+= self
.out_a
.eq(self
.in_a
)
545 m
.d
.comb
+= self
.out_b
.eq(self
.in_b
)
546 agtb
= Signal(reset_less
=True)
547 altb
= Signal(reset_less
=True)
548 m
.d
.comb
+= agtb
.eq(self
.in_a
.e
> self
.in_b
.e
)
549 m
.d
.comb
+= altb
.eq(self
.in_a
.e
< self
.in_b
.e
)
551 m
.d
.comb
+= self
.out_b
.shift_down(self
.in_b
)
552 # exponent of b greater than a: shift a down
554 m
.d
.comb
+= self
.out_a
.shift_down(self
.in_a
)
555 # exponents equal: move to next stage.
557 m
.d
.comb
+= self
.exp_eq
.eq(1)
561 class FPAddAlignMulti(FPState
, FPID
):
563 def __init__(self
, width
, id_wid
):
564 FPID
.__init
__(self
, id_wid
)
565 FPState
.__init
__(self
, "align")
566 self
.mod
= FPAddAlignMultiMod(width
)
567 self
.out_a
= FPNumIn(None, width
)
568 self
.out_b
= FPNumIn(None, width
)
569 self
.exp_eq
= Signal(reset_less
=True)
571 def setup(self
, m
, in_a
, in_b
, in_mid
):
572 """ links module to inputs and outputs
574 m
.submodules
.align
= self
.mod
575 m
.d
.comb
+= self
.mod
.in_a
.eq(in_a
)
576 m
.d
.comb
+= self
.mod
.in_b
.eq(in_b
)
577 #m.d.comb += self.out_a.eq(self.mod.out_a)
578 #m.d.comb += self.out_b.eq(self.mod.out_b)
579 m
.d
.comb
+= self
.exp_eq
.eq(self
.mod
.exp_eq
)
580 if self
.in_mid
is not None:
581 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
585 m
.d
.sync
+= self
.out_a
.eq(self
.mod
.out_a
)
586 m
.d
.sync
+= self
.out_b
.eq(self
.mod
.out_b
)
587 with m
.If(self
.exp_eq
):
593 def __init__(self
, width
, id_wid
):
594 self
.a
= FPNumIn(None, width
)
595 self
.b
= FPNumIn(None, width
)
596 self
.mid
= Signal(id_wid
, reset_less
=True)
599 return [self
.a
.eq(i
.a
), self
.b
.eq(i
.b
), self
.mid
.eq(i
.mid
)]
602 class FPAddAlignSingleMod
:
604 def __init__(self
, width
, id_wid
):
607 self
.i
= self
.ispec()
608 self
.o
= self
.ospec()
611 return FPNumBase2Ops(self
.width
, self
.id_wid
)
614 return FPNumIn2Ops(self
.width
, self
.id_wid
)
616 def setup(self
, m
, i
):
617 """ links module to inputs and outputs
619 m
.submodules
.align
= self
620 m
.d
.comb
+= self
.i
.eq(i
)
622 def elaborate(self
, platform
):
623 """ Aligns A against B or B against A, depending on which has the
624 greater exponent. This is done in a *single* cycle using
625 variable-width bit-shift
627 the shifter used here is quite expensive in terms of gates.
628 Mux A or B in (and out) into temporaries, as only one of them
629 needs to be aligned against the other
633 m
.submodules
.align_in_a
= self
.i
.a
634 m
.submodules
.align_in_b
= self
.i
.b
635 m
.submodules
.align_out_a
= self
.o
.a
636 m
.submodules
.align_out_b
= self
.o
.b
638 # temporary (muxed) input and output to be shifted
639 t_inp
= FPNumBase(self
.width
)
640 t_out
= FPNumIn(None, self
.width
)
641 espec
= (len(self
.i
.a
.e
), True)
642 msr
= MultiShiftRMerge(self
.i
.a
.m_width
, espec
)
643 m
.submodules
.align_t_in
= t_inp
644 m
.submodules
.align_t_out
= t_out
645 m
.submodules
.multishift_r
= msr
647 ediff
= Signal(espec
, reset_less
=True)
648 ediffr
= Signal(espec
, reset_less
=True)
649 tdiff
= Signal(espec
, reset_less
=True)
650 elz
= Signal(reset_less
=True)
651 egz
= Signal(reset_less
=True)
653 # connect multi-shifter to t_inp/out mantissa (and tdiff)
654 m
.d
.comb
+= msr
.inp
.eq(t_inp
.m
)
655 m
.d
.comb
+= msr
.diff
.eq(tdiff
)
656 m
.d
.comb
+= t_out
.m
.eq(msr
.m
)
657 m
.d
.comb
+= t_out
.e
.eq(t_inp
.e
+ tdiff
)
658 m
.d
.comb
+= t_out
.s
.eq(t_inp
.s
)
660 m
.d
.comb
+= ediff
.eq(self
.i
.a
.e
- self
.i
.b
.e
)
661 m
.d
.comb
+= ediffr
.eq(self
.i
.b
.e
- self
.i
.a
.e
)
662 m
.d
.comb
+= elz
.eq(self
.i
.a
.e
< self
.i
.b
.e
)
663 m
.d
.comb
+= egz
.eq(self
.i
.a
.e
> self
.i
.b
.e
)
665 # default: A-exp == B-exp, A and B untouched (fall through)
666 m
.d
.comb
+= self
.o
.a
.eq(self
.i
.a
)
667 m
.d
.comb
+= self
.o
.b
.eq(self
.i
.b
)
668 # only one shifter (muxed)
669 #m.d.comb += t_out.shift_down_multi(tdiff, t_inp)
670 # exponent of a greater than b: shift b down
672 m
.d
.comb
+= [t_inp
.eq(self
.i
.b
),
675 self
.o
.b
.s
.eq(self
.i
.b
.s
), # whoops forgot sign
677 # exponent of b greater than a: shift a down
679 m
.d
.comb
+= [t_inp
.eq(self
.i
.a
),
682 self
.o
.a
.s
.eq(self
.i
.a
.s
), # whoops forgot sign
687 class FPAddAlignSingle(FPState
, FPID
):
689 def __init__(self
, width
, id_wid
):
690 FPState
.__init
__(self
, "align")
691 FPID
.__init
__(self
, id_wid
)
692 self
.mod
= FPAddAlignSingleMod(width
, id_wid
)
693 self
.out_a
= FPNumIn(None, width
)
694 self
.out_b
= FPNumIn(None, width
)
696 def setup(self
, m
, in_a
, in_b
, in_mid
):
697 """ links module to inputs and outputs
699 self
.mod
.setup(m
, in_a
, in_b
)
700 if self
.in_mid
is not None:
701 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
705 # NOTE: could be done as comb
706 m
.d
.sync
+= self
.out_a
.eq(self
.mod
.out_a
)
707 m
.d
.sync
+= self
.out_b
.eq(self
.mod
.out_b
)
711 class FPAddAlignSingleAdd(FPState
, FPID
):
713 def __init__(self
, width
, id_wid
):
714 FPState
.__init
__(self
, "align")
715 FPID
.__init
__(self
, id_wid
)
718 self
.mod
= FPAddAlignSingleMod(width
, id_wid
)
719 self
.o
= self
.mod
.ospec()
721 self
.a1mod
= FPAddStage1Mod(width
, id_wid
)
722 self
.a1o
= self
.a1mod
.ospec()
724 def setup(self
, m
, i
, in_mid
):
725 """ links module to inputs and outputs
728 m
.d
.comb
+= self
.o
.eq(self
.mod
.o
)
730 a0mod
= FPAddStage0Mod(self
.width
, self
.id_wid
)
731 a0mod
.setup(m
, self
.o
)
733 m
.d
.comb
+= a0o
.eq(a0mod
.o
)
735 self
.a1mod
.setup(m
, a0o
)
737 if self
.in_mid
is not None:
738 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
742 m
.d
.sync
+= self
.a1o
.eq(self
.a1mod
.o
)
743 m
.next
= "normalise_1"
746 class FPAddStage0Data
:
748 def __init__(self
, width
, id_wid
):
749 self
.z
= FPNumBase(width
, False)
750 self
.tot
= Signal(self
.z
.m_width
+ 4, reset_less
=True)
751 self
.mid
= Signal(id_wid
, reset_less
=True)
754 return [self
.z
.eq(i
.z
), self
.tot
.eq(i
.tot
), self
.mid
.eq(i
.mid
)]
757 class FPAddStage0Mod
:
759 def __init__(self
, width
, id_wid
):
762 self
.i
= self
.ispec()
763 self
.o
= self
.ospec()
766 return FPNumBase2Ops(self
.width
, self
.id_wid
)
769 return FPAddStage0Data(self
.width
, self
.id_wid
)
771 def setup(self
, m
, i
):
772 """ links module to inputs and outputs
774 m
.submodules
.add0
= self
775 m
.d
.comb
+= self
.i
.eq(i
)
777 def elaborate(self
, platform
):
779 m
.submodules
.add0_in_a
= self
.i
.a
780 m
.submodules
.add0_in_b
= self
.i
.b
781 m
.submodules
.add0_out_z
= self
.o
.z
783 m
.d
.comb
+= self
.o
.z
.e
.eq(self
.i
.a
.e
)
785 # store intermediate tests (and zero-extended mantissas)
786 seq
= Signal(reset_less
=True)
787 mge
= Signal(reset_less
=True)
788 am0
= Signal(len(self
.i
.a
.m
)+1, reset_less
=True)
789 bm0
= Signal(len(self
.i
.b
.m
)+1, reset_less
=True)
790 m
.d
.comb
+= [seq
.eq(self
.i
.a
.s
== self
.i
.b
.s
),
791 mge
.eq(self
.i
.a
.m
>= self
.i
.b
.m
),
792 am0
.eq(Cat(self
.i
.a
.m
, 0)),
793 bm0
.eq(Cat(self
.i
.b
.m
, 0))
795 # same-sign (both negative or both positive) add mantissas
798 self
.o
.tot
.eq(am0
+ bm0
),
799 self
.o
.z
.s
.eq(self
.i
.a
.s
)
801 # a mantissa greater than b, use a
804 self
.o
.tot
.eq(am0
- bm0
),
805 self
.o
.z
.s
.eq(self
.i
.a
.s
)
807 # b mantissa greater than a, use b
810 self
.o
.tot
.eq(bm0
- am0
),
811 self
.o
.z
.s
.eq(self
.i
.b
.s
)
816 class FPAddStage0(FPState
, FPID
):
817 """ First stage of add. covers same-sign (add) and subtract
818 special-casing when mantissas are greater or equal, to
819 give greatest accuracy.
822 def __init__(self
, width
, id_wid
):
823 FPState
.__init
__(self
, "add_0")
824 FPID
.__init
__(self
, id_wid
)
825 self
.mod
= FPAddStage0Mod(width
)
826 self
.o
= self
.mod
.ospec()
828 def setup(self
, m
, i
, in_mid
):
829 """ links module to inputs and outputs
832 if self
.in_mid
is not None:
833 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
837 # NOTE: these could be done as combinatorial (merge add0+add1)
838 m
.d
.sync
+= self
.o
.eq(self
.mod
.o
)
842 class FPAddStage1Data
:
844 def __init__(self
, width
, id_wid
):
845 self
.z
= FPNumBase(width
, False)
847 self
.mid
= Signal(id_wid
, reset_less
=True)
850 return [self
.z
.eq(i
.z
), self
.of
.eq(i
.of
), self
.mid
.eq(i
.mid
)]
854 class FPAddStage1Mod(FPState
):
855 """ Second stage of add: preparation for normalisation.
856 detects when tot sum is too big (tot[27] is kinda a carry bit)
859 def __init__(self
, width
, id_wid
):
862 self
.i
= self
.ispec()
863 self
.o
= self
.ospec()
866 return FPAddStage0Data(self
.width
, self
.id_wid
)
869 return FPAddStage1Data(self
.width
, self
.id_wid
)
871 def setup(self
, m
, i
):
872 """ links module to inputs and outputs
874 m
.submodules
.add1
= self
875 m
.submodules
.add1_out_overflow
= self
.o
.of
877 m
.d
.comb
+= self
.i
.eq(i
)
879 def elaborate(self
, platform
):
881 #m.submodules.norm1_in_overflow = self.in_of
882 #m.submodules.norm1_out_overflow = self.out_of
883 #m.submodules.norm1_in_z = self.in_z
884 #m.submodules.norm1_out_z = self.out_z
885 m
.d
.comb
+= self
.o
.z
.eq(self
.i
.z
)
886 # tot[-1] (MSB) gets set when the sum overflows. shift result down
887 with m
.If(self
.i
.tot
[-1]):
889 self
.o
.z
.m
.eq(self
.i
.tot
[4:]),
890 self
.o
.of
.m0
.eq(self
.i
.tot
[4]),
891 self
.o
.of
.guard
.eq(self
.i
.tot
[3]),
892 self
.o
.of
.round_bit
.eq(self
.i
.tot
[2]),
893 self
.o
.of
.sticky
.eq(self
.i
.tot
[1] | self
.i
.tot
[0]),
894 self
.o
.z
.e
.eq(self
.i
.z
.e
+ 1)
896 # tot[-1] (MSB) zero case
899 self
.o
.z
.m
.eq(self
.i
.tot
[3:]),
900 self
.o
.of
.m0
.eq(self
.i
.tot
[3]),
901 self
.o
.of
.guard
.eq(self
.i
.tot
[2]),
902 self
.o
.of
.round_bit
.eq(self
.i
.tot
[1]),
903 self
.o
.of
.sticky
.eq(self
.i
.tot
[0])
908 class FPAddStage1(FPState
, FPID
):
910 def __init__(self
, width
, id_wid
):
911 FPState
.__init
__(self
, "add_1")
912 FPID
.__init
__(self
, id_wid
)
913 self
.mod
= FPAddStage1Mod(width
)
914 self
.out_z
= FPNumBase(width
, False)
915 self
.out_of
= Overflow()
916 self
.norm_stb
= Signal()
918 def setup(self
, m
, i
, in_mid
):
919 """ links module to inputs and outputs
923 m
.d
.sync
+= self
.norm_stb
.eq(0) # sets to zero when not in add1 state
925 if self
.in_mid
is not None:
926 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
930 m
.d
.sync
+= self
.out_of
.eq(self
.mod
.out_of
)
931 m
.d
.sync
+= self
.out_z
.eq(self
.mod
.out_z
)
932 m
.d
.sync
+= self
.norm_stb
.eq(1)
933 m
.next
= "normalise_1"
936 class FPNormaliseModSingle
:
938 def __init__(self
, width
):
940 self
.in_z
= self
.ispec()
941 self
.out_z
= self
.ospec()
944 return FPNumBase(self
.width
, False)
947 return FPNumBase(self
.width
, False)
949 def setup(self
, m
, in_z
, out_z
):
950 """ links module to inputs and outputs
952 m
.submodules
.normalise
= self
953 m
.d
.comb
+= self
.in_z
.eq(in_z
)
954 m
.d
.comb
+= out_z
.eq(self
.out_z
)
956 def elaborate(self
, platform
):
959 mwid
= self
.out_z
.m_width
+2
960 pe
= PriorityEncoder(mwid
)
961 m
.submodules
.norm_pe
= pe
963 m
.submodules
.norm1_out_z
= self
.out_z
964 m
.submodules
.norm1_in_z
= self
.in_z
966 in_z
= FPNumBase(self
.width
, False)
968 m
.submodules
.norm1_insel_z
= in_z
969 m
.submodules
.norm1_insel_overflow
= in_of
971 espec
= (len(in_z
.e
), True)
972 ediff_n126
= Signal(espec
, reset_less
=True)
973 msr
= MultiShiftRMerge(mwid
, espec
)
974 m
.submodules
.multishift_r
= msr
976 m
.d
.comb
+= in_z
.eq(self
.in_z
)
977 m
.d
.comb
+= in_of
.eq(self
.in_of
)
978 # initialise out from in (overridden below)
979 m
.d
.comb
+= self
.out_z
.eq(in_z
)
980 m
.d
.comb
+= self
.out_of
.eq(in_of
)
981 # normalisation decrease condition
982 decrease
= Signal(reset_less
=True)
983 m
.d
.comb
+= decrease
.eq(in_z
.m_msbzero
)
986 # *sigh* not entirely obvious: count leading zeros (clz)
987 # with a PriorityEncoder: to find from the MSB
988 # we reverse the order of the bits.
989 temp_m
= Signal(mwid
, reset_less
=True)
990 temp_s
= Signal(mwid
+1, reset_less
=True)
991 clz
= Signal((len(in_z
.e
), True), reset_less
=True)
993 # cat round and guard bits back into the mantissa
994 temp_m
.eq(Cat(in_of
.round_bit
, in_of
.guard
, in_z
.m
)),
995 pe
.i
.eq(temp_m
[::-1]), # inverted
996 clz
.eq(pe
.o
), # count zeros from MSB down
997 temp_s
.eq(temp_m
<< clz
), # shift mantissa UP
998 self
.out_z
.e
.eq(in_z
.e
- clz
), # DECREASE exponent
999 self
.out_z
.m
.eq(temp_s
[2:]), # exclude bits 0&1
1006 def __init__(self
, width
, id_wid
):
1007 self
.roundz
= Signal(reset_less
=True)
1008 self
.z
= FPNumBase(width
, False)
1009 self
.mid
= Signal(id_wid
, reset_less
=True)
1012 return [self
.z
.eq(i
.z
), self
.roundz
.eq(i
.roundz
), self
.mid
.eq(i
.mid
)]
1015 class FPNorm1ModSingle
:
1017 def __init__(self
, width
, id_wid
):
1019 self
.id_wid
= id_wid
1020 self
.i
= self
.ispec()
1021 self
.o
= self
.ospec()
1024 return FPAddStage1Data(self
.width
, self
.id_wid
)
1027 return FPNorm1Data(self
.width
, self
.id_wid
)
1029 def setup(self
, m
, i
):
1030 """ links module to inputs and outputs
1032 m
.submodules
.normalise_1
= self
1033 m
.d
.comb
+= self
.i
.eq(i
)
1035 def elaborate(self
, platform
):
1038 mwid
= self
.o
.z
.m_width
+2
1039 pe
= PriorityEncoder(mwid
)
1040 m
.submodules
.norm_pe
= pe
1043 m
.d
.comb
+= self
.o
.roundz
.eq(of
.roundz
)
1045 m
.submodules
.norm1_out_z
= self
.o
.z
1046 m
.submodules
.norm1_out_overflow
= of
1047 m
.submodules
.norm1_in_z
= self
.i
.z
1048 m
.submodules
.norm1_in_overflow
= self
.i
.of
1051 m
.submodules
.norm1_insel_z
= i
.z
1052 m
.submodules
.norm1_insel_overflow
= i
.of
1054 espec
= (len(i
.z
.e
), True)
1055 ediff_n126
= Signal(espec
, reset_less
=True)
1056 msr
= MultiShiftRMerge(mwid
, espec
)
1057 m
.submodules
.multishift_r
= msr
1059 m
.d
.comb
+= i
.eq(self
.i
)
1060 # initialise out from in (overridden below)
1061 m
.d
.comb
+= self
.o
.z
.eq(i
.z
)
1062 m
.d
.comb
+= of
.eq(i
.of
)
1063 # normalisation increase/decrease conditions
1064 decrease
= Signal(reset_less
=True)
1065 increase
= Signal(reset_less
=True)
1066 m
.d
.comb
+= decrease
.eq(i
.z
.m_msbzero
& i
.z
.exp_gt_n126
)
1067 m
.d
.comb
+= increase
.eq(i
.z
.exp_lt_n126
)
1069 with m
.If(decrease
):
1070 # *sigh* not entirely obvious: count leading zeros (clz)
1071 # with a PriorityEncoder: to find from the MSB
1072 # we reverse the order of the bits.
1073 temp_m
= Signal(mwid
, reset_less
=True)
1074 temp_s
= Signal(mwid
+1, reset_less
=True)
1075 clz
= Signal((len(i
.z
.e
), True), reset_less
=True)
1076 # make sure that the amount to decrease by does NOT
1077 # go below the minimum non-INF/NaN exponent
1078 limclz
= Mux(i
.z
.exp_sub_n126
> pe
.o
, pe
.o
,
1081 # cat round and guard bits back into the mantissa
1082 temp_m
.eq(Cat(i
.of
.round_bit
, i
.of
.guard
, i
.z
.m
)),
1083 pe
.i
.eq(temp_m
[::-1]), # inverted
1084 clz
.eq(limclz
), # count zeros from MSB down
1085 temp_s
.eq(temp_m
<< clz
), # shift mantissa UP
1086 self
.o
.z
.e
.eq(i
.z
.e
- clz
), # DECREASE exponent
1087 self
.o
.z
.m
.eq(temp_s
[2:]), # exclude bits 0&1
1088 of
.m0
.eq(temp_s
[2]), # copy of mantissa[0]
1089 # overflow in bits 0..1: got shifted too (leave sticky)
1090 of
.guard
.eq(temp_s
[1]), # guard
1091 of
.round_bit
.eq(temp_s
[0]), # round
1094 with m
.Elif(increase
):
1095 temp_m
= Signal(mwid
+1, reset_less
=True)
1097 temp_m
.eq(Cat(i
.of
.sticky
, i
.of
.round_bit
, i
.of
.guard
,
1099 ediff_n126
.eq(i
.z
.N126
- i
.z
.e
),
1100 # connect multi-shifter to inp/out mantissa (and ediff)
1102 msr
.diff
.eq(ediff_n126
),
1103 self
.o
.z
.m
.eq(msr
.m
[3:]),
1104 of
.m0
.eq(temp_s
[3]), # copy of mantissa[0]
1105 # overflow in bits 0..1: got shifted too (leave sticky)
1106 of
.guard
.eq(temp_s
[2]), # guard
1107 of
.round_bit
.eq(temp_s
[1]), # round
1108 of
.sticky
.eq(temp_s
[0]), # sticky
1109 self
.o
.z
.e
.eq(i
.z
.e
+ ediff_n126
),
1115 class FPNorm1ModMulti
:
1117 def __init__(self
, width
, single_cycle
=True):
1119 self
.in_select
= Signal(reset_less
=True)
1120 self
.in_z
= FPNumBase(width
, False)
1121 self
.in_of
= Overflow()
1122 self
.temp_z
= FPNumBase(width
, False)
1123 self
.temp_of
= Overflow()
1124 self
.out_z
= FPNumBase(width
, False)
1125 self
.out_of
= Overflow()
1127 def elaborate(self
, platform
):
1130 m
.submodules
.norm1_out_z
= self
.out_z
1131 m
.submodules
.norm1_out_overflow
= self
.out_of
1132 m
.submodules
.norm1_temp_z
= self
.temp_z
1133 m
.submodules
.norm1_temp_of
= self
.temp_of
1134 m
.submodules
.norm1_in_z
= self
.in_z
1135 m
.submodules
.norm1_in_overflow
= self
.in_of
1137 in_z
= FPNumBase(self
.width
, False)
1139 m
.submodules
.norm1_insel_z
= in_z
1140 m
.submodules
.norm1_insel_overflow
= in_of
1142 # select which of temp or in z/of to use
1143 with m
.If(self
.in_select
):
1144 m
.d
.comb
+= in_z
.eq(self
.in_z
)
1145 m
.d
.comb
+= in_of
.eq(self
.in_of
)
1147 m
.d
.comb
+= in_z
.eq(self
.temp_z
)
1148 m
.d
.comb
+= in_of
.eq(self
.temp_of
)
1149 # initialise out from in (overridden below)
1150 m
.d
.comb
+= self
.out_z
.eq(in_z
)
1151 m
.d
.comb
+= self
.out_of
.eq(in_of
)
1152 # normalisation increase/decrease conditions
1153 decrease
= Signal(reset_less
=True)
1154 increase
= Signal(reset_less
=True)
1155 m
.d
.comb
+= decrease
.eq(in_z
.m_msbzero
& in_z
.exp_gt_n126
)
1156 m
.d
.comb
+= increase
.eq(in_z
.exp_lt_n126
)
1157 m
.d
.comb
+= self
.out_norm
.eq(decrease | increase
) # loop-end
1159 with m
.If(decrease
):
1161 self
.out_z
.e
.eq(in_z
.e
- 1), # DECREASE exponent
1162 self
.out_z
.m
.eq(in_z
.m
<< 1), # shift mantissa UP
1163 self
.out_z
.m
[0].eq(in_of
.guard
), # steal guard (was tot[2])
1164 self
.out_of
.guard
.eq(in_of
.round_bit
), # round (was tot[1])
1165 self
.out_of
.round_bit
.eq(0), # reset round bit
1166 self
.out_of
.m0
.eq(in_of
.guard
),
1169 with m
.Elif(increase
):
1171 self
.out_z
.e
.eq(in_z
.e
+ 1), # INCREASE exponent
1172 self
.out_z
.m
.eq(in_z
.m
>> 1), # shift mantissa DOWN
1173 self
.out_of
.guard
.eq(in_z
.m
[0]),
1174 self
.out_of
.m0
.eq(in_z
.m
[1]),
1175 self
.out_of
.round_bit
.eq(in_of
.guard
),
1176 self
.out_of
.sticky
.eq(in_of
.sticky | in_of
.round_bit
)
1182 class FPNorm1Single(FPState
, FPID
):
1184 def __init__(self
, width
, id_wid
, single_cycle
=True):
1185 FPID
.__init
__(self
, id_wid
)
1186 FPState
.__init
__(self
, "normalise_1")
1187 self
.mod
= FPNorm1ModSingle(width
)
1188 self
.out_z
= FPNumBase(width
, False)
1189 self
.out_roundz
= Signal(reset_less
=True)
1191 def setup(self
, m
, in_z
, in_of
, in_mid
):
1192 """ links module to inputs and outputs
1194 self
.mod
.setup(m
, in_z
, in_of
, self
.out_z
)
1196 if self
.in_mid
is not None:
1197 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
1199 def action(self
, m
):
1201 m
.d
.sync
+= self
.out_roundz
.eq(self
.mod
.out_of
.roundz
)
1205 class FPNorm1Multi(FPState
, FPID
):
1207 def __init__(self
, width
, id_wid
):
1208 FPID
.__init
__(self
, id_wid
)
1209 FPState
.__init
__(self
, "normalise_1")
1210 self
.mod
= FPNorm1ModMulti(width
)
1211 self
.stb
= Signal(reset_less
=True)
1212 self
.ack
= Signal(reset
=0, reset_less
=True)
1213 self
.out_norm
= Signal(reset_less
=True)
1214 self
.in_accept
= Signal(reset_less
=True)
1215 self
.temp_z
= FPNumBase(width
)
1216 self
.temp_of
= Overflow()
1217 self
.out_z
= FPNumBase(width
)
1218 self
.out_roundz
= Signal(reset_less
=True)
1220 def setup(self
, m
, in_z
, in_of
, norm_stb
, in_mid
):
1221 """ links module to inputs and outputs
1223 self
.mod
.setup(m
, in_z
, in_of
, norm_stb
,
1224 self
.in_accept
, self
.temp_z
, self
.temp_of
,
1225 self
.out_z
, self
.out_norm
)
1227 m
.d
.comb
+= self
.stb
.eq(norm_stb
)
1228 m
.d
.sync
+= self
.ack
.eq(0) # sets to zero when not in normalise_1 state
1230 if self
.in_mid
is not None:
1231 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
1233 def action(self
, m
):
1235 m
.d
.comb
+= self
.in_accept
.eq((~self
.ack
) & (self
.stb
))
1236 m
.d
.sync
+= self
.temp_of
.eq(self
.mod
.out_of
)
1237 m
.d
.sync
+= self
.temp_z
.eq(self
.out_z
)
1238 with m
.If(self
.out_norm
):
1239 with m
.If(self
.in_accept
):
1244 m
.d
.sync
+= self
.ack
.eq(0)
1246 # normalisation not required (or done).
1248 m
.d
.sync
+= self
.ack
.eq(1)
1249 m
.d
.sync
+= self
.out_roundz
.eq(self
.mod
.out_of
.roundz
)
1252 class FPNormToPack(FPState
, FPID
):
1254 def __init__(self
, width
, id_wid
):
1255 FPID
.__init
__(self
, id_wid
)
1256 FPState
.__init
__(self
, "normalise_1")
1259 def setup(self
, m
, i
, in_mid
):
1260 """ links module to inputs and outputs
1263 # Normalisation (chained to input in_z+in_of)
1264 nmod
= FPNorm1ModSingle(self
.width
, self
.id_wid
)
1266 n_out
= nmod
.ospec()
1267 m
.d
.comb
+= n_out
.eq(nmod
.o
)
1269 # Rounding (chained to normalisation)
1270 rmod
= FPRoundMod(self
.width
, self
.id_wid
)
1271 rmod
.setup(m
, n_out
)
1272 r_out_z
= rmod
.ospec()
1273 m
.d
.comb
+= r_out_z
.eq(rmod
.out_z
)
1275 # Corrections (chained to rounding)
1276 cmod
= FPCorrectionsMod(self
.width
, self
.id_wid
)
1277 cmod
.setup(m
, r_out_z
)
1278 c_out_z
= cmod
.ospec()
1279 m
.d
.comb
+= c_out_z
.eq(cmod
.out_z
)
1281 # Pack (chained to corrections)
1282 self
.pmod
= FPPackMod(self
.width
, self
.id_wid
)
1283 self
.pmod
.setup(m
, c_out_z
)
1284 self
.out_z
= self
.pmod
.ospec()
1287 if self
.in_mid
is not None:
1288 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
1290 def action(self
, m
):
1291 self
.idsync(m
) # copies incoming ID to outgoing
1292 m
.d
.sync
+= self
.out_z
.z
.v
.eq(self
.pmod
.o
.z
.v
) # outputs packed result
1293 m
.next
= "pack_put_z"
1298 def __init__(self
, width
, id_wid
):
1299 self
.z
= FPNumBase(width
, False)
1300 self
.mid
= Signal(id_wid
, reset_less
=True)
1303 return [self
.z
.eq(i
.z
), self
.mid
.eq(i
.mid
)]
1308 def __init__(self
, width
, id_wid
):
1310 self
.id_wid
= id_wid
1311 self
.i
= self
.ispec()
1312 self
.out_z
= self
.ospec()
1315 return FPNorm1Data(self
.width
, self
.id_wid
)
1318 return FPRoundData(self
.width
, self
.id_wid
)
1320 def setup(self
, m
, i
):
1321 m
.submodules
.roundz
= self
1322 m
.d
.comb
+= self
.i
.eq(i
)
1324 def elaborate(self
, platform
):
1326 m
.d
.comb
+= self
.out_z
.eq(self
.i
)
1327 with m
.If(self
.i
.roundz
):
1328 m
.d
.comb
+= self
.out_z
.z
.m
.eq(self
.i
.z
.m
+ 1) # mantissa rounds up
1329 with m
.If(self
.i
.z
.m
== self
.i
.z
.m1s
): # all 1s
1330 m
.d
.comb
+= self
.out_z
.z
.e
.eq(self
.i
.z
.e
+ 1) # exponent up
1334 class FPRound(FPState
, FPID
):
1336 def __init__(self
, width
, id_wid
):
1337 FPState
.__init
__(self
, "round")
1338 FPID
.__init
__(self
, id_wid
)
1339 self
.mod
= FPRoundMod(width
)
1340 self
.out_z
= self
.mod
.ospec()
1342 def setup(self
, m
, in_z
, roundz
, in_mid
):
1343 """ links module to inputs and outputs
1345 self
.mod
.setup(m
, in_z
, roundz
)
1347 if self
.in_mid
is not None:
1348 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
1350 def action(self
, m
):
1352 m
.d
.sync
+= self
.out_z
.eq(self
.mod
.out_z
)
1353 m
.next
= "corrections"
1356 class FPCorrectionsMod
:
1358 def __init__(self
, width
, id_wid
):
1360 self
.id_wid
= id_wid
1361 self
.in_z
= self
.ispec()
1362 self
.out_z
= self
.ospec()
1365 return FPRoundData(self
.width
, self
.id_wid
)
1368 return FPRoundData(self
.width
, self
.id_wid
)
1370 def setup(self
, m
, in_z
):
1371 """ links module to inputs and outputs
1373 m
.submodules
.corrections
= self
1374 m
.d
.comb
+= self
.in_z
.eq(in_z
)
1376 def elaborate(self
, platform
):
1378 m
.submodules
.corr_in_z
= self
.in_z
.z
1379 m
.submodules
.corr_out_z
= self
.out_z
.z
1380 m
.d
.comb
+= self
.out_z
.eq(self
.in_z
)
1381 with m
.If(self
.in_z
.z
.is_denormalised
):
1382 m
.d
.comb
+= self
.out_z
.z
.e
.eq(self
.in_z
.z
.N127
)
1386 class FPCorrections(FPState
, FPID
):
1388 def __init__(self
, width
, id_wid
):
1389 FPState
.__init
__(self
, "corrections")
1390 FPID
.__init
__(self
, id_wid
)
1391 self
.mod
= FPCorrectionsMod(width
)
1392 self
.out_z
= self
.mod
.ospec()
1394 def setup(self
, m
, in_z
, in_mid
):
1395 """ links module to inputs and outputs
1397 self
.mod
.setup(m
, in_z
)
1398 if self
.in_mid
is not None:
1399 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
1401 def action(self
, m
):
1403 m
.d
.sync
+= self
.out_z
.eq(self
.mod
.out_z
)
1409 def __init__(self
, width
, id_wid
):
1410 self
.z
= FPNumOut(width
, False)
1411 self
.mid
= Signal(id_wid
, reset_less
=True)
1414 return [self
.z
.eq(i
.z
), self
.mid
.eq(i
.mid
)]
1419 def __init__(self
, width
, id_wid
):
1421 self
.id_wid
= id_wid
1422 self
.i
= self
.ispec()
1423 self
.o
= self
.ospec()
1426 return FPRoundData(self
.width
, self
.id_wid
)
1429 return FPPackData(self
.width
, self
.id_wid
)
1431 def setup(self
, m
, in_z
):
1432 """ links module to inputs and outputs
1434 m
.submodules
.pack
= self
1435 m
.d
.comb
+= self
.i
.eq(in_z
)
1437 def elaborate(self
, platform
):
1439 m
.submodules
.pack_in_z
= self
.i
.z
1440 with m
.If(self
.i
.z
.is_overflowed
):
1441 m
.d
.comb
+= self
.o
.z
.inf(self
.i
.z
.s
)
1443 m
.d
.comb
+= self
.o
.z
.create(self
.i
.z
.s
, self
.i
.z
.e
, self
.i
.z
.m
)
1448 def __init__(self
, width
, id_wid
):
1449 self
.z
= FPNumOut(width
, False)
1450 self
.mid
= Signal(id_wid
, reset_less
=True)
1453 return [self
.z
.eq(i
.z
), self
.mid
.eq(i
.mid
)]
1456 class FPPack(FPState
, FPID
):
1458 def __init__(self
, width
, id_wid
):
1459 FPState
.__init
__(self
, "pack")
1460 FPID
.__init
__(self
, id_wid
)
1461 self
.mod
= FPPackMod(width
)
1462 self
.out_z
= self
.ospec()
1465 return self
.mod
.ispec()
1468 return self
.mod
.ospec()
1470 def setup(self
, m
, in_z
, in_mid
):
1471 """ links module to inputs and outputs
1473 self
.mod
.setup(m
, in_z
)
1474 if self
.in_mid
is not None:
1475 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
1477 def action(self
, m
):
1479 m
.d
.sync
+= self
.out_z
.v
.eq(self
.mod
.out_z
.v
)
1480 m
.next
= "pack_put_z"
1483 class FPPutZ(FPState
):
1485 def __init__(self
, state
, in_z
, out_z
, in_mid
, out_mid
, to_state
=None):
1486 FPState
.__init
__(self
, state
)
1487 if to_state
is None:
1488 to_state
= "get_ops"
1489 self
.to_state
= to_state
1492 self
.in_mid
= in_mid
1493 self
.out_mid
= out_mid
1495 def action(self
, m
):
1496 if self
.in_mid
is not None:
1497 m
.d
.sync
+= self
.out_mid
.eq(self
.in_mid
)
1499 self
.out_z
.z
.v
.eq(self
.in_z
.v
)
1501 with m
.If(self
.out_z
.z
.stb
& self
.out_z
.z
.ack
):
1502 m
.d
.sync
+= self
.out_z
.z
.stb
.eq(0)
1503 m
.next
= self
.to_state
1505 m
.d
.sync
+= self
.out_z
.z
.stb
.eq(1)
1508 class FPPutZIdx(FPState
):
1510 def __init__(self
, state
, in_z
, out_zs
, in_mid
, to_state
=None):
1511 FPState
.__init
__(self
, state
)
1512 if to_state
is None:
1513 to_state
= "get_ops"
1514 self
.to_state
= to_state
1516 self
.out_zs
= out_zs
1517 self
.in_mid
= in_mid
1519 def action(self
, m
):
1520 outz_stb
= Signal(reset_less
=True)
1521 outz_ack
= Signal(reset_less
=True)
1522 m
.d
.comb
+= [outz_stb
.eq(self
.out_zs
[self
.in_mid
].stb
),
1523 outz_ack
.eq(self
.out_zs
[self
.in_mid
].ack
),
1526 self
.out_zs
[self
.in_mid
].v
.eq(self
.in_z
.v
)
1528 with m
.If(outz_stb
& outz_ack
):
1529 m
.d
.sync
+= self
.out_zs
[self
.in_mid
].stb
.eq(0)
1530 m
.next
= self
.to_state
1532 m
.d
.sync
+= self
.out_zs
[self
.in_mid
].stb
.eq(1)
1534 class FPADDBaseData
:
1536 def __init__(self
, width
, id_wid
):
1538 self
.id_wid
= id_wid
1539 self
.a
= Signal(width
)
1540 self
.b
= Signal(width
)
1541 self
.mid
= Signal(id_wid
, reset_less
=True)
1544 return [self
.a
.eq(i
.a
), self
.b
.eq(i
.b
), self
.mid
.eq(i
.mid
)]
1548 def __init__(self
, width
, id_wid
):
1549 self
.z
= FPOp(width
)
1550 self
.mid
= Signal(id_wid
, reset_less
=True)
1553 return [self
.z
.eq(i
.z
), self
.mid
.eq(i
.mid
)]
1556 class FPADDBaseMod(FPID
):
1558 def __init__(self
, width
, id_wid
=None, single_cycle
=False, compact
=True):
1561 * width: bit-width of IEEE754. supported: 16, 32, 64
1562 * id_wid: an identifier that is sync-connected to the input
1563 * single_cycle: True indicates each stage to complete in 1 clock
1564 * compact: True indicates a reduced number of stages
1566 FPID
.__init
__(self
, id_wid
)
1568 self
.id_wid
= id_wid
1569 self
.single_cycle
= single_cycle
1570 self
.compact
= compact
1572 self
.in_t
= Trigger()
1573 self
.i
= self
.ispec()
1574 self
.o
= self
.ospec()
1579 return FPADDBaseData(self
.width
, self
.id_wid
)
1582 return FPOpData(self
.width
, self
.id_wid
)
1584 def add_state(self
, state
):
1585 self
.states
.append(state
)
1588 def get_fragment(self
, platform
=None):
1589 """ creates the HDL code-fragment for FPAdd
1592 m
.submodules
.out_z
= self
.o
.z
1593 m
.submodules
.in_t
= self
.in_t
1595 self
.get_compact_fragment(m
, platform
)
1597 self
.get_longer_fragment(m
, platform
)
1599 with m
.FSM() as fsm
:
1601 for state
in self
.states
:
1602 with m
.State(state
.state_from
):
1607 def get_longer_fragment(self
, m
, platform
=None):
1609 get
= self
.add_state(FPGet2Op("get_ops", "special_cases",
1611 get
.setup(m
, self
.in_a
, self
.in_b
, self
.in_t
.stb
, self
.in_t
.ack
)
1615 sc
= self
.add_state(FPAddSpecialCases(self
.width
, self
.id_wid
))
1616 sc
.setup(m
, a
, b
, self
.in_mid
)
1618 dn
= self
.add_state(FPAddDeNorm(self
.width
, self
.id_wid
))
1619 dn
.setup(m
, a
, b
, sc
.in_mid
)
1621 if self
.single_cycle
:
1622 alm
= self
.add_state(FPAddAlignSingle(self
.width
, self
.id_wid
))
1623 alm
.setup(m
, dn
.out_a
, dn
.out_b
, dn
.in_mid
)
1625 alm
= self
.add_state(FPAddAlignMulti(self
.width
, self
.id_wid
))
1626 alm
.setup(m
, dn
.out_a
, dn
.out_b
, dn
.in_mid
)
1628 add0
= self
.add_state(FPAddStage0(self
.width
, self
.id_wid
))
1629 add0
.setup(m
, alm
.out_a
, alm
.out_b
, alm
.in_mid
)
1631 add1
= self
.add_state(FPAddStage1(self
.width
, self
.id_wid
))
1632 add1
.setup(m
, add0
.out_tot
, add0
.out_z
, add0
.in_mid
)
1634 if self
.single_cycle
:
1635 n1
= self
.add_state(FPNorm1Single(self
.width
, self
.id_wid
))
1636 n1
.setup(m
, add1
.out_z
, add1
.out_of
, add0
.in_mid
)
1638 n1
= self
.add_state(FPNorm1Multi(self
.width
, self
.id_wid
))
1639 n1
.setup(m
, add1
.out_z
, add1
.out_of
, add1
.norm_stb
, add0
.in_mid
)
1641 rn
= self
.add_state(FPRound(self
.width
, self
.id_wid
))
1642 rn
.setup(m
, n1
.out_z
, n1
.out_roundz
, n1
.in_mid
)
1644 cor
= self
.add_state(FPCorrections(self
.width
, self
.id_wid
))
1645 cor
.setup(m
, rn
.out_z
, rn
.in_mid
)
1647 pa
= self
.add_state(FPPack(self
.width
, self
.id_wid
))
1648 pa
.setup(m
, cor
.out_z
, rn
.in_mid
)
1650 ppz
= self
.add_state(FPPutZ("pack_put_z", pa
.out_z
, self
.out_z
,
1651 pa
.in_mid
, self
.out_mid
))
1653 pz
= self
.add_state(FPPutZ("put_z", sc
.out_z
, self
.out_z
,
1654 pa
.in_mid
, self
.out_mid
))
1656 def get_compact_fragment(self
, m
, platform
=None):
1658 get
= self
.add_state(FPGet2Op("get_ops", "special_cases",
1659 self
.width
, self
.id_wid
))
1660 get
.setup(m
, self
.i
, self
.in_t
.stb
, self
.in_t
.ack
)
1662 sc
= self
.add_state(FPAddSpecialCasesDeNorm(self
.width
, self
.id_wid
))
1663 sc
.setup(m
, get
.o
, self
.in_mid
)
1665 alm
= self
.add_state(FPAddAlignSingleAdd(self
.width
, self
.id_wid
))
1666 alm
.setup(m
, sc
.o
, sc
.in_mid
)
1668 n1
= self
.add_state(FPNormToPack(self
.width
, self
.id_wid
))
1669 n1
.setup(m
, alm
.a1o
, alm
.in_mid
)
1671 ppz
= self
.add_state(FPPutZ("pack_put_z", n1
.out_z
.z
, self
.o
,
1672 n1
.in_mid
, self
.out_mid
))
1674 pz
= self
.add_state(FPPutZ("put_z", sc
.out_z
.z
, self
.o
,
1675 sc
.in_mid
, self
.out_mid
))
1678 class FPADDBase(FPState
, FPID
):
1680 def __init__(self
, width
, id_wid
=None, single_cycle
=False):
1683 * width: bit-width of IEEE754. supported: 16, 32, 64
1684 * id_wid: an identifier that is sync-connected to the input
1685 * single_cycle: True indicates each stage to complete in 1 clock
1687 FPID
.__init
__(self
, id_wid
)
1688 FPState
.__init
__(self
, "fpadd")
1690 self
.single_cycle
= single_cycle
1691 self
.mod
= FPADDBaseMod(width
, id_wid
, single_cycle
)
1692 self
.o
= self
.ospec()
1694 self
.in_t
= Trigger()
1695 self
.i
= self
.ispec()
1697 self
.z_done
= Signal(reset_less
=True) # connects to out_z Strobe
1698 self
.in_accept
= Signal(reset_less
=True)
1699 self
.add_stb
= Signal(reset_less
=True)
1700 self
.add_ack
= Signal(reset
=0, reset_less
=True)
1703 return self
.mod
.ispec()
1706 return self
.mod
.ospec()
1708 def setup(self
, m
, i
, add_stb
, in_mid
):
1709 m
.d
.comb
+= [self
.i
.eq(i
),
1710 self
.mod
.i
.eq(self
.i
),
1711 self
.in_mid
.eq(in_mid
),
1712 self
.mod
.in_mid
.eq(self
.in_mid
),
1713 self
.z_done
.eq(self
.mod
.o
.z
.trigger
),
1714 #self.add_stb.eq(add_stb),
1715 self
.mod
.in_t
.stb
.eq(self
.in_t
.stb
),
1716 self
.in_t
.ack
.eq(self
.mod
.in_t
.ack
),
1717 self
.o
.mid
.eq(self
.mod
.o
.mid
),
1718 self
.o
.z
.v
.eq(self
.mod
.o
.z
.v
),
1719 self
.o
.z
.stb
.eq(self
.mod
.o
.z
.stb
),
1720 self
.mod
.o
.z
.ack
.eq(self
.o
.z
.ack
),
1723 m
.d
.sync
+= self
.add_stb
.eq(add_stb
)
1724 m
.d
.sync
+= self
.add_ack
.eq(0) # sets to zero when not in active state
1725 m
.d
.sync
+= self
.o
.z
.ack
.eq(0) # likewise
1726 #m.d.sync += self.in_t.stb.eq(0)
1728 m
.submodules
.fpadd
= self
.mod
1730 def action(self
, m
):
1732 # in_accept is set on incoming strobe HIGH and ack LOW.
1733 m
.d
.comb
+= self
.in_accept
.eq((~self
.add_ack
) & (self
.add_stb
))
1735 #with m.If(self.in_t.ack):
1736 # m.d.sync += self.in_t.stb.eq(0)
1737 with m
.If(~self
.z_done
):
1738 # not done: test for accepting an incoming operand pair
1739 with m
.If(self
.in_accept
):
1741 self
.add_ack
.eq(1), # acknowledge receipt...
1742 self
.in_t
.stb
.eq(1), # initiate add
1745 m
.d
.sync
+= [self
.add_ack
.eq(0),
1746 self
.in_t
.stb
.eq(0),
1750 # done: acknowledge, and write out id and value
1751 m
.d
.sync
+= [self
.add_ack
.eq(1),
1758 if self
.in_mid
is not None:
1759 m
.d
.sync
+= self
.out_mid
.eq(self
.mod
.out_mid
)
1762 self
.out_z
.v
.eq(self
.mod
.out_z
.v
)
1764 # move to output state on detecting z ack
1765 with m
.If(self
.out_z
.trigger
):
1766 m
.d
.sync
+= self
.out_z
.stb
.eq(0)
1769 m
.d
.sync
+= self
.out_z
.stb
.eq(1)
1772 def __init__(self
, width
, id_wid
):
1774 self
.id_wid
= id_wid
1776 for i
in range(rs_sz
):
1778 out_z
.name
= "out_z_%d" % i
1780 self
.res
= Array(res
)
1781 self
.in_z
= FPOp(width
)
1782 self
.in_mid
= Signal(self
.id_wid
, reset_less
=True)
1784 def setup(self
, m
, in_z
, in_mid
):
1785 m
.d
.comb
+= [self
.in_z
.eq(in_z
),
1786 self
.in_mid
.eq(in_mid
)]
1788 def get_fragment(self
, platform
=None):
1789 """ creates the HDL code-fragment for FPAdd
1792 m
.submodules
.res_in_z
= self
.in_z
1793 m
.submodules
+= self
.res
1805 """ FPADD: stages as follows:
1811 FPAddBase---> FPAddBaseMod
1813 PutZ GetOps->Specials->Align->Add1/2->Norm->Round/Pack->PutZ
1815 FPAddBase is tricky: it is both a stage and *has* stages.
1816 Connection to FPAddBaseMod therefore requires an in stb/ack
1817 and an out stb/ack. Just as with Add1-Norm1 interaction, FPGetOp
1818 needs to be the thing that raises the incoming stb.
1821 def __init__(self
, width
, id_wid
=None, single_cycle
=False, rs_sz
=2):
1824 * width: bit-width of IEEE754. supported: 16, 32, 64
1825 * id_wid: an identifier that is sync-connected to the input
1826 * single_cycle: True indicates each stage to complete in 1 clock
1829 self
.id_wid
= id_wid
1830 self
.single_cycle
= single_cycle
1832 #self.out_z = FPOp(width)
1833 self
.ids
= FPID(id_wid
)
1836 for i
in range(rs_sz
):
1839 in_a
.name
= "in_a_%d" % i
1840 in_b
.name
= "in_b_%d" % i
1841 rs
.append((in_a
, in_b
))
1845 for i
in range(rs_sz
):
1847 out_z
.name
= "out_z_%d" % i
1849 self
.res
= Array(res
)
1853 def add_state(self
, state
):
1854 self
.states
.append(state
)
1857 def get_fragment(self
, platform
=None):
1858 """ creates the HDL code-fragment for FPAdd
1861 m
.submodules
+= self
.rs
1863 in_a
= self
.rs
[0][0]
1864 in_b
= self
.rs
[0][1]
1866 geta
= self
.add_state(FPGetOp("get_a", "get_b",
1871 getb
= self
.add_state(FPGetOp("get_b", "fpadd",
1876 ab
= FPADDBase(self
.width
, self
.id_wid
, self
.single_cycle
)
1877 ab
= self
.add_state(ab
)
1878 abd
= ab
.ispec() # create an input spec object for FPADDBase
1879 m
.d
.sync
+= [abd
.a
.eq(a
), abd
.b
.eq(b
), abd
.mid
.eq(self
.ids
.in_mid
)]
1880 ab
.setup(m
, abd
, getb
.out_decode
, self
.ids
.in_mid
)
1883 pz
= self
.add_state(FPPutZIdx("put_z", o
.z
, self
.res
,
1886 with m
.FSM() as fsm
:
1888 for state
in self
.states
:
1889 with m
.State(state
.state_from
):
1895 if __name__
== "__main__":
1897 alu
= FPADD(width
=32, id_wid
=5, single_cycle
=True)
1898 main(alu
, ports
=alu
.rs
[0][0].ports() + \
1899 alu
.rs
[0][1].ports() + \
1900 alu
.res
[0].ports() + \
1901 [alu
.ids
.in_mid
, alu
.ids
.out_mid
])
1903 alu
= FPADDBase(width
=32, id_wid
=5, single_cycle
=True)
1904 main(alu
, ports
=[alu
.in_a
, alu
.in_b
] + \
1905 alu
.in_t
.ports() + \
1906 alu
.out_z
.ports() + \
1907 [alu
.in_mid
, alu
.out_mid
])
1910 # works... but don't use, just do "python fname.py convert -t v"
1911 #print (verilog.convert(alu, ports=[
1912 # ports=alu.in_a.ports() + \
1913 # alu.in_b.ports() + \
1914 # alu.out_z.ports())