1 # IEEE Floating Point Adder (Single Precision)
2 # Copyright (C) Jonathan P Dawson 2013
5 from nmigen
import Module
, Signal
, Cat
, Mux
, Array
, Const
6 from nmigen
.lib
.coding
import PriorityEncoder
7 from nmigen
.cli
import main
, verilog
10 from fpbase
import FPNumIn
, FPNumOut
, FPOp
, Overflow
, FPBase
, FPNumBase
11 from fpbase
import MultiShiftRMerge
, Trigger
12 #from fpbase import FPNumShiftMultiRight
15 class FPState(FPBase
):
16 def __init__(self
, state_from
):
17 self
.state_from
= state_from
19 def set_inputs(self
, inputs
):
21 for k
,v
in inputs
.items():
24 def set_outputs(self
, outputs
):
25 self
.outputs
= outputs
26 for k
,v
in outputs
.items():
30 class FPGetSyncOpsMod
:
31 def __init__(self
, width
, num_ops
=2):
33 self
.num_ops
= num_ops
36 for i
in range(num_ops
):
37 inops
.append(Signal(width
, reset_less
=True))
38 outops
.append(Signal(width
, reset_less
=True))
41 self
.stb
= Signal(num_ops
)
43 self
.ready
= Signal(reset_less
=True)
44 self
.out_decode
= Signal(reset_less
=True)
46 def elaborate(self
, platform
):
48 m
.d
.comb
+= self
.ready
.eq(self
.stb
== Const(-1, (self
.num_ops
, False)))
49 m
.d
.comb
+= self
.out_decode
.eq(self
.ack
& self
.ready
)
50 with m
.If(self
.out_decode
):
51 for i
in range(self
.num_ops
):
53 self
.out_op
[i
].eq(self
.in_op
[i
]),
58 return self
.in_op
+ self
.out_op
+ [self
.stb
, self
.ack
]
62 def __init__(self
, width
, num_ops
):
63 Trigger
.__init
__(self
)
65 self
.num_ops
= num_ops
68 for i
in range(num_ops
):
69 res
.append(Signal(width
))
74 for i
in range(self
.num_ops
):
82 def __init__(self
, width
, num_ops
=2, num_rows
=4):
84 self
.num_ops
= num_ops
85 self
.num_rows
= num_rows
86 self
.mmax
= int(log(self
.num_rows
) / log(2))
88 self
.mid
= Signal(self
.mmax
, reset_less
=True) # multiplex id
89 for i
in range(num_rows
):
90 self
.rs
.append(FPGetSyncOpsMod(width
, num_ops
))
91 self
.rs
= Array(self
.rs
)
93 self
.out_op
= FPOps(width
, num_ops
)
95 def elaborate(self
, platform
):
98 pe
= PriorityEncoder(self
.num_rows
)
99 m
.submodules
.selector
= pe
100 m
.submodules
.out_op
= self
.out_op
101 m
.submodules
+= self
.rs
103 # connect priority encoder
105 for i
in range(self
.num_rows
):
106 in_ready
.append(self
.rs
[i
].ready
)
107 m
.d
.comb
+= pe
.i
.eq(Cat(*in_ready
))
109 active
= Signal(reset_less
=True)
110 out_en
= Signal(reset_less
=True)
111 m
.d
.comb
+= active
.eq(~pe
.n
) # encoder active
112 m
.d
.comb
+= out_en
.eq(active
& self
.out_op
.trigger
)
114 # encoder active: ack relevant input, record MID, pass output
117 m
.d
.sync
+= self
.mid
.eq(pe
.o
)
118 m
.d
.sync
+= rs
.ack
.eq(0)
119 m
.d
.sync
+= self
.out_op
.stb
.eq(0)
120 for j
in range(self
.num_ops
):
121 m
.d
.sync
+= self
.out_op
.v
[j
].eq(rs
.out_op
[j
])
123 m
.d
.sync
+= self
.out_op
.stb
.eq(1)
124 # acks all default to zero
125 for i
in range(self
.num_rows
):
126 m
.d
.sync
+= self
.rs
[i
].ack
.eq(1)
132 for i
in range(self
.num_rows
):
134 res
+= inop
.in_op
+ [inop
.stb
]
135 return self
.out_op
.ports() + res
+ [self
.mid
]
139 def __init__(self
, width
):
140 self
.in_op
= FPOp(width
)
141 self
.out_op
= Signal(width
)
142 self
.out_decode
= Signal(reset_less
=True)
144 def elaborate(self
, platform
):
146 m
.d
.comb
+= self
.out_decode
.eq((self
.in_op
.ack
) & (self
.in_op
.stb
))
147 m
.submodules
.get_op_in
= self
.in_op
148 #m.submodules.get_op_out = self.out_op
149 with m
.If(self
.out_decode
):
151 self
.out_op
.eq(self
.in_op
.v
),
156 class FPGetOp(FPState
):
160 def __init__(self
, in_state
, out_state
, in_op
, width
):
161 FPState
.__init
__(self
, in_state
)
162 self
.out_state
= out_state
163 self
.mod
= FPGetOpMod(width
)
165 self
.out_op
= Signal(width
)
166 self
.out_decode
= Signal(reset_less
=True)
168 def setup(self
, m
, in_op
):
169 """ links module to inputs and outputs
171 setattr(m
.submodules
, self
.state_from
, self
.mod
)
172 m
.d
.comb
+= self
.mod
.in_op
.eq(in_op
)
173 #m.d.comb += self.out_op.eq(self.mod.out_op)
174 m
.d
.comb
+= self
.out_decode
.eq(self
.mod
.out_decode
)
177 with m
.If(self
.out_decode
):
178 m
.next
= self
.out_state
180 self
.in_op
.ack
.eq(0),
181 self
.out_op
.eq(self
.mod
.out_op
)
184 m
.d
.sync
+= self
.in_op
.ack
.eq(1)
187 class FPGet2OpMod(Trigger
):
188 def __init__(self
, width
, id_wid
):
189 Trigger
.__init
__(self
)
192 self
.i
= self
.ispec()
193 self
.o
= self
.ospec()
196 return FPADDBaseData(self
.width
, self
.id_wid
)
199 return FPNumBase2Ops(self
.width
, self
.id_wid
)
201 def elaborate(self
, platform
):
202 m
= Trigger
.elaborate(self
, platform
)
203 #m.submodules.get_op_in = self.in_op
204 m
.submodules
.get_op1_out
= self
.o
.a
205 m
.submodules
.get_op2_out
= self
.o
.b
206 out_op1
= FPNumIn(None, self
.width
)
207 out_op2
= FPNumIn(None, self
.width
)
208 with m
.If(self
.trigger
):
210 out_op1
.decode(self
.i
.a
),
211 out_op2
.decode(self
.i
.b
),
212 self
.o
.a
.eq(out_op1
),
213 self
.o
.b
.eq(out_op2
),
218 class FPGet2Op(FPState
):
222 def __init__(self
, in_state
, out_state
, width
, id_wid
):
223 FPState
.__init
__(self
, in_state
)
224 self
.out_state
= out_state
225 self
.mod
= FPGet2OpMod(width
, id_wid
)
226 self
.o
= self
.mod
.ospec()
227 self
.in_stb
= Signal(reset_less
=True)
228 self
.out_ack
= Signal(reset_less
=True)
229 self
.out_decode
= Signal(reset_less
=True)
231 def setup(self
, m
, i
, in_stb
, in_ack
):
232 """ links module to inputs and outputs
234 m
.submodules
.get_ops
= self
.mod
235 m
.d
.comb
+= self
.mod
.i
.eq(i
)
236 m
.d
.comb
+= self
.mod
.stb
.eq(in_stb
)
237 m
.d
.comb
+= self
.out_ack
.eq(self
.mod
.ack
)
238 m
.d
.comb
+= self
.out_decode
.eq(self
.mod
.trigger
)
239 m
.d
.comb
+= in_ack
.eq(self
.mod
.ack
)
242 with m
.If(self
.out_decode
):
243 m
.next
= self
.out_state
246 self
.o
.eq(self
.mod
.o
),
249 m
.d
.sync
+= self
.mod
.ack
.eq(1)
254 def __init__(self
, width
, id_wid
, m_extra
=True):
255 self
.a
= FPNumBase(width
, m_extra
)
256 self
.b
= FPNumBase(width
, m_extra
)
257 self
.mid
= Signal(id_wid
, reset_less
=True)
260 return [self
.a
.eq(i
.a
), self
.b
.eq(i
.b
), self
.mid
.eq(i
.mid
)]
263 class FPAddSpecialCasesMod
:
264 """ special cases: NaNs, infs, zeros, denormalised
265 NOTE: some of these are unique to add. see "Special Operations"
266 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
269 def __init__(self
, width
, id_wid
):
272 self
.i
= self
.ispec()
273 self
.o
= self
.ospec()
274 self
.out_do_z
= Signal(reset_less
=True)
277 return FPNumBase2Ops(self
.width
, self
.id_wid
)
280 return FPPackData(self
.width
, self
.id_wid
)
282 def setup(self
, m
, i
, out_do_z
):
283 """ links module to inputs and outputs
285 m
.submodules
.specialcases
= self
286 m
.d
.comb
+= self
.i
.eq(i
)
287 m
.d
.comb
+= out_do_z
.eq(self
.out_do_z
)
289 def elaborate(self
, platform
):
292 m
.submodules
.sc_in_a
= self
.i
.a
293 m
.submodules
.sc_in_b
= self
.i
.b
294 m
.submodules
.sc_out_z
= self
.o
.z
297 m
.d
.comb
+= s_nomatch
.eq(self
.i
.a
.s
!= self
.i
.b
.s
)
300 m
.d
.comb
+= m_match
.eq(self
.i
.a
.m
== self
.i
.b
.m
)
302 # if a is NaN or b is NaN return NaN
303 with m
.If(self
.i
.a
.is_nan | self
.i
.b
.is_nan
):
304 m
.d
.comb
+= self
.out_do_z
.eq(1)
305 m
.d
.comb
+= self
.o
.z
.nan(0)
307 # XXX WEIRDNESS for FP16 non-canonical NaN handling
310 ## if a is zero and b is NaN return -b
311 #with m.If(a.is_zero & (a.s==0) & b.is_nan):
312 # m.d.comb += self.out_do_z.eq(1)
313 # m.d.comb += z.create(b.s, b.e, Cat(b.m[3:-2], ~b.m[0]))
315 ## if b is zero and a is NaN return -a
316 #with m.Elif(b.is_zero & (b.s==0) & a.is_nan):
317 # m.d.comb += self.out_do_z.eq(1)
318 # m.d.comb += z.create(a.s, a.e, Cat(a.m[3:-2], ~a.m[0]))
320 ## if a is -zero and b is NaN return -b
321 #with m.Elif(a.is_zero & (a.s==1) & b.is_nan):
322 # m.d.comb += self.out_do_z.eq(1)
323 # m.d.comb += z.create(a.s & b.s, b.e, Cat(b.m[3:-2], 1))
325 ## if b is -zero and a is NaN return -a
326 #with m.Elif(b.is_zero & (b.s==1) & a.is_nan):
327 # m.d.comb += self.out_do_z.eq(1)
328 # m.d.comb += z.create(a.s & b.s, a.e, Cat(a.m[3:-2], 1))
330 # if a is inf return inf (or NaN)
331 with m
.Elif(self
.i
.a
.is_inf
):
332 m
.d
.comb
+= self
.out_do_z
.eq(1)
333 m
.d
.comb
+= self
.o
.z
.inf(self
.i
.a
.s
)
334 # if a is inf and signs don't match return NaN
335 with m
.If(self
.i
.b
.exp_128
& s_nomatch
):
336 m
.d
.comb
+= self
.o
.z
.nan(0)
338 # if b is inf return inf
339 with m
.Elif(self
.i
.b
.is_inf
):
340 m
.d
.comb
+= self
.out_do_z
.eq(1)
341 m
.d
.comb
+= self
.o
.z
.inf(self
.i
.b
.s
)
343 # if a is zero and b zero return signed-a/b
344 with m
.Elif(self
.i
.a
.is_zero
& self
.i
.b
.is_zero
):
345 m
.d
.comb
+= self
.out_do_z
.eq(1)
346 m
.d
.comb
+= self
.o
.z
.create(self
.i
.a
.s
& self
.i
.b
.s
,
350 # if a is zero return b
351 with m
.Elif(self
.i
.a
.is_zero
):
352 m
.d
.comb
+= self
.out_do_z
.eq(1)
353 m
.d
.comb
+= self
.o
.z
.create(self
.i
.b
.s
, self
.i
.b
.e
,
356 # if b is zero return a
357 with m
.Elif(self
.i
.b
.is_zero
):
358 m
.d
.comb
+= self
.out_do_z
.eq(1)
359 m
.d
.comb
+= self
.o
.z
.create(self
.i
.a
.s
, self
.i
.a
.e
,
362 # if a equal to -b return zero (+ve zero)
363 with m
.Elif(s_nomatch
& m_match
& (self
.i
.a
.e
== self
.i
.b
.e
)):
364 m
.d
.comb
+= self
.out_do_z
.eq(1)
365 m
.d
.comb
+= self
.o
.z
.zero(0)
367 # Denormalised Number checks
369 m
.d
.comb
+= self
.out_do_z
.eq(0)
375 def __init__(self
, id_wid
):
378 self
.in_mid
= Signal(id_wid
, reset_less
=True)
379 self
.out_mid
= Signal(id_wid
, reset_less
=True)
385 if self
.id_wid
is not None:
386 m
.d
.sync
+= self
.out_mid
.eq(self
.in_mid
)
389 class FPAddSpecialCases(FPState
, FPID
):
390 """ special cases: NaNs, infs, zeros, denormalised
391 NOTE: some of these are unique to add. see "Special Operations"
392 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
395 def __init__(self
, width
, id_wid
):
396 FPState
.__init
__(self
, "special_cases")
397 FPID
.__init
__(self
, id_wid
)
398 self
.mod
= FPAddSpecialCasesMod(width
)
399 self
.out_z
= self
.mod
.ospec()
400 self
.out_do_z
= Signal(reset_less
=True)
402 def setup(self
, m
, in_a
, in_b
, in_mid
):
403 """ links module to inputs and outputs
405 self
.mod
.setup(m
, in_a
, in_b
, self
.out_do_z
)
406 if self
.in_mid
is not None:
407 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
411 with m
.If(self
.out_do_z
):
412 m
.d
.sync
+= self
.out_z
.v
.eq(self
.mod
.out_z
.v
) # only take the output
415 m
.next
= "denormalise"
418 class FPAddSpecialCasesDeNorm(FPState
, FPID
):
419 """ special cases: NaNs, infs, zeros, denormalised
420 NOTE: some of these are unique to add. see "Special Operations"
421 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
424 def __init__(self
, width
, id_wid
):
425 FPState
.__init
__(self
, "special_cases")
426 FPID
.__init
__(self
, id_wid
)
427 self
.smod
= FPAddSpecialCasesMod(width
, id_wid
)
428 self
.out_z
= self
.smod
.ospec()
429 self
.out_do_z
= Signal(reset_less
=True)
431 self
.dmod
= FPAddDeNormMod(width
, id_wid
)
432 self
.o
= self
.dmod
.ospec()
434 def setup(self
, m
, i
, in_mid
):
435 """ links module to inputs and outputs
437 self
.smod
.setup(m
, i
, self
.out_do_z
)
438 self
.dmod
.setup(m
, i
)
439 if self
.in_mid
is not None:
440 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
444 with m
.If(self
.out_do_z
):
445 m
.d
.sync
+= self
.out_z
.z
.v
.eq(self
.smod
.o
.z
.v
) # only take output
449 m
.d
.sync
+= self
.o
.a
.eq(self
.dmod
.o
.a
)
450 m
.d
.sync
+= self
.o
.b
.eq(self
.dmod
.o
.b
)
453 class FPAddDeNormMod(FPState
):
455 def __init__(self
, width
, id_wid
):
458 self
.i
= self
.ispec()
459 self
.o
= self
.ospec()
462 return FPNumBase2Ops(self
.width
, self
.id_wid
)
465 return FPNumBase2Ops(self
.width
, self
.id_wid
)
467 def setup(self
, m
, i
):
468 """ links module to inputs and outputs
470 m
.submodules
.denormalise
= self
471 m
.d
.comb
+= self
.i
.eq(i
)
473 def elaborate(self
, platform
):
475 m
.submodules
.denorm_in_a
= self
.i
.a
476 m
.submodules
.denorm_in_b
= self
.i
.b
477 m
.submodules
.denorm_out_a
= self
.o
.a
478 m
.submodules
.denorm_out_b
= self
.o
.b
479 # hmmm, don't like repeating identical code
480 m
.d
.comb
+= self
.o
.a
.eq(self
.i
.a
)
481 with m
.If(self
.i
.a
.exp_n127
):
482 m
.d
.comb
+= self
.o
.a
.e
.eq(self
.i
.a
.N126
) # limit a exponent
484 m
.d
.comb
+= self
.o
.a
.m
[-1].eq(1) # set top mantissa bit
486 m
.d
.comb
+= self
.o
.b
.eq(self
.i
.b
)
487 with m
.If(self
.i
.b
.exp_n127
):
488 m
.d
.comb
+= self
.o
.b
.e
.eq(self
.i
.b
.N126
) # limit a exponent
490 m
.d
.comb
+= self
.o
.b
.m
[-1].eq(1) # set top mantissa bit
495 class FPAddDeNorm(FPState
, FPID
):
497 def __init__(self
, width
, id_wid
):
498 FPState
.__init
__(self
, "denormalise")
499 FPID
.__init
__(self
, id_wid
)
500 self
.mod
= FPAddDeNormMod(width
)
501 self
.out_a
= FPNumBase(width
)
502 self
.out_b
= FPNumBase(width
)
504 def setup(self
, m
, in_a
, in_b
, in_mid
):
505 """ links module to inputs and outputs
507 self
.mod
.setup(m
, in_a
, in_b
)
508 if self
.in_mid
is not None:
509 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
513 # Denormalised Number checks
515 m
.d
.sync
+= self
.out_a
.eq(self
.mod
.out_a
)
516 m
.d
.sync
+= self
.out_b
.eq(self
.mod
.out_b
)
519 class FPAddAlignMultiMod(FPState
):
521 def __init__(self
, width
):
522 self
.in_a
= FPNumBase(width
)
523 self
.in_b
= FPNumBase(width
)
524 self
.out_a
= FPNumIn(None, width
)
525 self
.out_b
= FPNumIn(None, width
)
526 self
.exp_eq
= Signal(reset_less
=True)
528 def elaborate(self
, platform
):
529 # This one however (single-cycle) will do the shift
534 m
.submodules
.align_in_a
= self
.in_a
535 m
.submodules
.align_in_b
= self
.in_b
536 m
.submodules
.align_out_a
= self
.out_a
537 m
.submodules
.align_out_b
= self
.out_b
539 # NOTE: this does *not* do single-cycle multi-shifting,
540 # it *STAYS* in the align state until exponents match
542 # exponent of a greater than b: shift b down
543 m
.d
.comb
+= self
.exp_eq
.eq(0)
544 m
.d
.comb
+= self
.out_a
.eq(self
.in_a
)
545 m
.d
.comb
+= self
.out_b
.eq(self
.in_b
)
546 agtb
= Signal(reset_less
=True)
547 altb
= Signal(reset_less
=True)
548 m
.d
.comb
+= agtb
.eq(self
.in_a
.e
> self
.in_b
.e
)
549 m
.d
.comb
+= altb
.eq(self
.in_a
.e
< self
.in_b
.e
)
551 m
.d
.comb
+= self
.out_b
.shift_down(self
.in_b
)
552 # exponent of b greater than a: shift a down
554 m
.d
.comb
+= self
.out_a
.shift_down(self
.in_a
)
555 # exponents equal: move to next stage.
557 m
.d
.comb
+= self
.exp_eq
.eq(1)
561 class FPAddAlignMulti(FPState
, FPID
):
563 def __init__(self
, width
, id_wid
):
564 FPID
.__init
__(self
, id_wid
)
565 FPState
.__init
__(self
, "align")
566 self
.mod
= FPAddAlignMultiMod(width
)
567 self
.out_a
= FPNumIn(None, width
)
568 self
.out_b
= FPNumIn(None, width
)
569 self
.exp_eq
= Signal(reset_less
=True)
571 def setup(self
, m
, in_a
, in_b
, in_mid
):
572 """ links module to inputs and outputs
574 m
.submodules
.align
= self
.mod
575 m
.d
.comb
+= self
.mod
.in_a
.eq(in_a
)
576 m
.d
.comb
+= self
.mod
.in_b
.eq(in_b
)
577 #m.d.comb += self.out_a.eq(self.mod.out_a)
578 #m.d.comb += self.out_b.eq(self.mod.out_b)
579 m
.d
.comb
+= self
.exp_eq
.eq(self
.mod
.exp_eq
)
580 if self
.in_mid
is not None:
581 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
585 m
.d
.sync
+= self
.out_a
.eq(self
.mod
.out_a
)
586 m
.d
.sync
+= self
.out_b
.eq(self
.mod
.out_b
)
587 with m
.If(self
.exp_eq
):
593 def __init__(self
, width
, id_wid
):
594 self
.a
= FPNumIn(None, width
)
595 self
.b
= FPNumIn(None, width
)
596 self
.mid
= Signal(id_wid
, reset_less
=True)
599 return [self
.a
.eq(i
.a
), self
.b
.eq(i
.b
), self
.mid
.eq(i
.mid
)]
602 class FPAddAlignSingleMod
:
604 def __init__(self
, width
, id_wid
):
607 self
.i
= self
.ispec()
608 self
.o
= self
.ospec()
611 return FPNumBase2Ops(self
.width
, self
.id_wid
)
614 return FPNumIn2Ops(self
.width
, self
.id_wid
)
616 def setup(self
, m
, i
):
617 """ links module to inputs and outputs
619 m
.submodules
.align
= self
620 m
.d
.comb
+= self
.i
.eq(i
)
622 def elaborate(self
, platform
):
623 """ Aligns A against B or B against A, depending on which has the
624 greater exponent. This is done in a *single* cycle using
625 variable-width bit-shift
627 the shifter used here is quite expensive in terms of gates.
628 Mux A or B in (and out) into temporaries, as only one of them
629 needs to be aligned against the other
633 m
.submodules
.align_in_a
= self
.i
.a
634 m
.submodules
.align_in_b
= self
.i
.b
635 m
.submodules
.align_out_a
= self
.o
.a
636 m
.submodules
.align_out_b
= self
.o
.b
638 # temporary (muxed) input and output to be shifted
639 t_inp
= FPNumBase(self
.width
)
640 t_out
= FPNumIn(None, self
.width
)
641 espec
= (len(self
.i
.a
.e
), True)
642 msr
= MultiShiftRMerge(self
.i
.a
.m_width
, espec
)
643 m
.submodules
.align_t_in
= t_inp
644 m
.submodules
.align_t_out
= t_out
645 m
.submodules
.multishift_r
= msr
647 ediff
= Signal(espec
, reset_less
=True)
648 ediffr
= Signal(espec
, reset_less
=True)
649 tdiff
= Signal(espec
, reset_less
=True)
650 elz
= Signal(reset_less
=True)
651 egz
= Signal(reset_less
=True)
653 # connect multi-shifter to t_inp/out mantissa (and tdiff)
654 m
.d
.comb
+= msr
.inp
.eq(t_inp
.m
)
655 m
.d
.comb
+= msr
.diff
.eq(tdiff
)
656 m
.d
.comb
+= t_out
.m
.eq(msr
.m
)
657 m
.d
.comb
+= t_out
.e
.eq(t_inp
.e
+ tdiff
)
658 m
.d
.comb
+= t_out
.s
.eq(t_inp
.s
)
660 m
.d
.comb
+= ediff
.eq(self
.i
.a
.e
- self
.i
.b
.e
)
661 m
.d
.comb
+= ediffr
.eq(self
.i
.b
.e
- self
.i
.a
.e
)
662 m
.d
.comb
+= elz
.eq(self
.i
.a
.e
< self
.i
.b
.e
)
663 m
.d
.comb
+= egz
.eq(self
.i
.a
.e
> self
.i
.b
.e
)
665 # default: A-exp == B-exp, A and B untouched (fall through)
666 m
.d
.comb
+= self
.o
.a
.eq(self
.i
.a
)
667 m
.d
.comb
+= self
.o
.b
.eq(self
.i
.b
)
668 # only one shifter (muxed)
669 #m.d.comb += t_out.shift_down_multi(tdiff, t_inp)
670 # exponent of a greater than b: shift b down
672 m
.d
.comb
+= [t_inp
.eq(self
.i
.b
),
675 self
.o
.b
.s
.eq(self
.i
.b
.s
), # whoops forgot sign
677 # exponent of b greater than a: shift a down
679 m
.d
.comb
+= [t_inp
.eq(self
.i
.a
),
682 self
.o
.a
.s
.eq(self
.i
.a
.s
), # whoops forgot sign
687 class FPAddAlignSingle(FPState
, FPID
):
689 def __init__(self
, width
, id_wid
):
690 FPState
.__init
__(self
, "align")
691 FPID
.__init
__(self
, id_wid
)
692 self
.mod
= FPAddAlignSingleMod(width
, id_wid
)
693 self
.out_a
= FPNumIn(None, width
)
694 self
.out_b
= FPNumIn(None, width
)
696 def setup(self
, m
, in_a
, in_b
, in_mid
):
697 """ links module to inputs and outputs
699 self
.mod
.setup(m
, in_a
, in_b
)
700 if self
.in_mid
is not None:
701 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
705 # NOTE: could be done as comb
706 m
.d
.sync
+= self
.out_a
.eq(self
.mod
.out_a
)
707 m
.d
.sync
+= self
.out_b
.eq(self
.mod
.out_b
)
711 class FPAddAlignSingleAdd(FPState
, FPID
):
713 def __init__(self
, width
, id_wid
):
714 FPState
.__init
__(self
, "align")
715 FPID
.__init
__(self
, id_wid
)
716 self
.mod
= FPAddAlignSingleMod(width
, id_wid
)
717 self
.o
= self
.mod
.ospec()
719 self
.a0mod
= FPAddStage0Mod(width
, id_wid
)
720 self
.a0o
= self
.a0mod
.ospec()
722 self
.a1mod
= FPAddStage1Mod(width
, id_wid
)
723 self
.a1o
= self
.a1mod
.ospec()
725 def setup(self
, m
, i
, in_mid
):
726 """ links module to inputs and outputs
729 m
.d
.comb
+= self
.o
.eq(self
.mod
.o
)
731 self
.a0mod
.setup(m
, self
.o
)
732 m
.d
.comb
+= self
.a0o
.eq(self
.a0mod
.o
)
734 self
.a1mod
.setup(m
, self
.a0o
)
736 if self
.in_mid
is not None:
737 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
741 m
.d
.sync
+= self
.a1o
.eq(self
.a1mod
.o
)
742 m
.next
= "normalise_1"
745 class FPAddStage0Data
:
747 def __init__(self
, width
, id_wid
):
748 self
.z
= FPNumBase(width
, False)
749 self
.tot
= Signal(self
.z
.m_width
+ 4, reset_less
=True)
750 self
.mid
= Signal(id_wid
, reset_less
=True)
753 return [self
.z
.eq(i
.z
), self
.tot
.eq(i
.tot
), self
.mid
.eq(i
.mid
)]
756 class FPAddStage0Mod
:
758 def __init__(self
, width
, id_wid
):
761 self
.i
= self
.ispec()
762 self
.o
= self
.ospec()
765 return FPNumBase2Ops(self
.width
, self
.id_wid
)
768 return FPAddStage0Data(self
.width
, self
.id_wid
)
770 def setup(self
, m
, i
):
771 """ links module to inputs and outputs
773 m
.submodules
.add0
= self
774 m
.d
.comb
+= self
.i
.eq(i
)
776 def elaborate(self
, platform
):
778 m
.submodules
.add0_in_a
= self
.i
.a
779 m
.submodules
.add0_in_b
= self
.i
.b
780 m
.submodules
.add0_out_z
= self
.o
.z
782 m
.d
.comb
+= self
.o
.z
.e
.eq(self
.i
.a
.e
)
784 # store intermediate tests (and zero-extended mantissas)
785 seq
= Signal(reset_less
=True)
786 mge
= Signal(reset_less
=True)
787 am0
= Signal(len(self
.i
.a
.m
)+1, reset_less
=True)
788 bm0
= Signal(len(self
.i
.b
.m
)+1, reset_less
=True)
789 m
.d
.comb
+= [seq
.eq(self
.i
.a
.s
== self
.i
.b
.s
),
790 mge
.eq(self
.i
.a
.m
>= self
.i
.b
.m
),
791 am0
.eq(Cat(self
.i
.a
.m
, 0)),
792 bm0
.eq(Cat(self
.i
.b
.m
, 0))
794 # same-sign (both negative or both positive) add mantissas
797 self
.o
.tot
.eq(am0
+ bm0
),
798 self
.o
.z
.s
.eq(self
.i
.a
.s
)
800 # a mantissa greater than b, use a
803 self
.o
.tot
.eq(am0
- bm0
),
804 self
.o
.z
.s
.eq(self
.i
.a
.s
)
806 # b mantissa greater than a, use b
809 self
.o
.tot
.eq(bm0
- am0
),
810 self
.o
.z
.s
.eq(self
.i
.b
.s
)
815 class FPAddStage0(FPState
, FPID
):
816 """ First stage of add. covers same-sign (add) and subtract
817 special-casing when mantissas are greater or equal, to
818 give greatest accuracy.
821 def __init__(self
, width
, id_wid
):
822 FPState
.__init
__(self
, "add_0")
823 FPID
.__init
__(self
, id_wid
)
824 self
.mod
= FPAddStage0Mod(width
)
825 self
.o
= self
.mod
.ospec()
827 def setup(self
, m
, in_a
, in_b
, in_mid
):
828 """ links module to inputs and outputs
830 self
.mod
.setup(m
, in_a
, in_b
)
831 if self
.in_mid
is not None:
832 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
836 # NOTE: these could be done as combinatorial (merge add0+add1)
837 m
.d
.sync
+= self
.o
.eq(self
.mod
.o
)
841 class FPAddStage1Data
:
843 def __init__(self
, width
, id_wid
):
844 self
.z
= FPNumBase(width
, False)
846 self
.mid
= Signal(id_wid
, reset_less
=True)
849 return [self
.z
.eq(i
.z
), self
.of
.eq(i
.of
), self
.mid
.eq(i
.mid
)]
853 class FPAddStage1Mod(FPState
):
854 """ Second stage of add: preparation for normalisation.
855 detects when tot sum is too big (tot[27] is kinda a carry bit)
858 def __init__(self
, width
, id_wid
):
861 self
.i
= self
.ispec()
862 self
.o
= self
.ospec()
865 return FPAddStage0Data(self
.width
, self
.id_wid
)
868 return FPAddStage1Data(self
.width
, self
.id_wid
)
870 def setup(self
, m
, i
):
871 """ links module to inputs and outputs
873 m
.submodules
.add1
= self
874 m
.submodules
.add1_out_overflow
= self
.o
.of
876 m
.d
.comb
+= self
.i
.eq(i
)
878 def elaborate(self
, platform
):
880 #m.submodules.norm1_in_overflow = self.in_of
881 #m.submodules.norm1_out_overflow = self.out_of
882 #m.submodules.norm1_in_z = self.in_z
883 #m.submodules.norm1_out_z = self.out_z
884 m
.d
.comb
+= self
.o
.z
.eq(self
.i
.z
)
885 # tot[-1] (MSB) gets set when the sum overflows. shift result down
886 with m
.If(self
.i
.tot
[-1]):
888 self
.o
.z
.m
.eq(self
.i
.tot
[4:]),
889 self
.o
.of
.m0
.eq(self
.i
.tot
[4]),
890 self
.o
.of
.guard
.eq(self
.i
.tot
[3]),
891 self
.o
.of
.round_bit
.eq(self
.i
.tot
[2]),
892 self
.o
.of
.sticky
.eq(self
.i
.tot
[1] | self
.i
.tot
[0]),
893 self
.o
.z
.e
.eq(self
.i
.z
.e
+ 1)
895 # tot[-1] (MSB) zero case
898 self
.o
.z
.m
.eq(self
.i
.tot
[3:]),
899 self
.o
.of
.m0
.eq(self
.i
.tot
[3]),
900 self
.o
.of
.guard
.eq(self
.i
.tot
[2]),
901 self
.o
.of
.round_bit
.eq(self
.i
.tot
[1]),
902 self
.o
.of
.sticky
.eq(self
.i
.tot
[0])
907 class FPAddStage1(FPState
, FPID
):
909 def __init__(self
, width
, id_wid
):
910 FPState
.__init
__(self
, "add_1")
911 FPID
.__init
__(self
, id_wid
)
912 self
.mod
= FPAddStage1Mod(width
)
913 self
.out_z
= FPNumBase(width
, False)
914 self
.out_of
= Overflow()
915 self
.norm_stb
= Signal()
917 def setup(self
, m
, in_tot
, in_z
, in_mid
):
918 """ links module to inputs and outputs
920 self
.mod
.setup(m
, in_tot
, in_z
)
922 m
.d
.sync
+= self
.norm_stb
.eq(0) # sets to zero when not in add1 state
924 if self
.in_mid
is not None:
925 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
929 m
.d
.sync
+= self
.out_of
.eq(self
.mod
.out_of
)
930 m
.d
.sync
+= self
.out_z
.eq(self
.mod
.out_z
)
931 m
.d
.sync
+= self
.norm_stb
.eq(1)
932 m
.next
= "normalise_1"
935 class FPNormaliseModSingle
:
937 def __init__(self
, width
):
939 self
.in_z
= self
.ispec()
940 self
.out_z
= self
.ospec()
943 return FPNumBase(self
.width
, False)
946 return FPNumBase(self
.width
, False)
948 def setup(self
, m
, in_z
, out_z
):
949 """ links module to inputs and outputs
951 m
.submodules
.normalise
= self
952 m
.d
.comb
+= self
.in_z
.eq(in_z
)
953 m
.d
.comb
+= out_z
.eq(self
.out_z
)
955 def elaborate(self
, platform
):
958 mwid
= self
.out_z
.m_width
+2
959 pe
= PriorityEncoder(mwid
)
960 m
.submodules
.norm_pe
= pe
962 m
.submodules
.norm1_out_z
= self
.out_z
963 m
.submodules
.norm1_in_z
= self
.in_z
965 in_z
= FPNumBase(self
.width
, False)
967 m
.submodules
.norm1_insel_z
= in_z
968 m
.submodules
.norm1_insel_overflow
= in_of
970 espec
= (len(in_z
.e
), True)
971 ediff_n126
= Signal(espec
, reset_less
=True)
972 msr
= MultiShiftRMerge(mwid
, espec
)
973 m
.submodules
.multishift_r
= msr
975 m
.d
.comb
+= in_z
.eq(self
.in_z
)
976 m
.d
.comb
+= in_of
.eq(self
.in_of
)
977 # initialise out from in (overridden below)
978 m
.d
.comb
+= self
.out_z
.eq(in_z
)
979 m
.d
.comb
+= self
.out_of
.eq(in_of
)
980 # normalisation decrease condition
981 decrease
= Signal(reset_less
=True)
982 m
.d
.comb
+= decrease
.eq(in_z
.m_msbzero
)
985 # *sigh* not entirely obvious: count leading zeros (clz)
986 # with a PriorityEncoder: to find from the MSB
987 # we reverse the order of the bits.
988 temp_m
= Signal(mwid
, reset_less
=True)
989 temp_s
= Signal(mwid
+1, reset_less
=True)
990 clz
= Signal((len(in_z
.e
), True), reset_less
=True)
992 # cat round and guard bits back into the mantissa
993 temp_m
.eq(Cat(in_of
.round_bit
, in_of
.guard
, in_z
.m
)),
994 pe
.i
.eq(temp_m
[::-1]), # inverted
995 clz
.eq(pe
.o
), # count zeros from MSB down
996 temp_s
.eq(temp_m
<< clz
), # shift mantissa UP
997 self
.out_z
.e
.eq(in_z
.e
- clz
), # DECREASE exponent
998 self
.out_z
.m
.eq(temp_s
[2:]), # exclude bits 0&1
1005 def __init__(self
, width
, id_wid
):
1006 self
.roundz
= Signal(reset_less
=True)
1007 self
.z
= FPNumBase(width
, False)
1008 self
.mid
= Signal(id_wid
, reset_less
=True)
1011 return [self
.z
.eq(i
.z
), self
.roundz
.eq(i
.roundz
), self
.mid
.eq(i
.mid
)]
1014 class FPNorm1ModSingle
:
1016 def __init__(self
, width
, id_wid
):
1018 self
.id_wid
= id_wid
1019 self
.i
= self
.ispec()
1020 self
.o
= self
.ospec()
1023 return FPAddStage1Data(self
.width
, self
.id_wid
)
1026 return FPNorm1Data(self
.width
, self
.id_wid
)
1028 def setup(self
, m
, i
):
1029 """ links module to inputs and outputs
1031 m
.submodules
.normalise_1
= self
1032 m
.d
.comb
+= self
.i
.eq(i
)
1034 def elaborate(self
, platform
):
1037 mwid
= self
.o
.z
.m_width
+2
1038 pe
= PriorityEncoder(mwid
)
1039 m
.submodules
.norm_pe
= pe
1042 m
.d
.comb
+= self
.o
.roundz
.eq(of
.roundz
)
1044 m
.submodules
.norm1_out_z
= self
.o
.z
1045 m
.submodules
.norm1_out_overflow
= of
1046 m
.submodules
.norm1_in_z
= self
.i
.z
1047 m
.submodules
.norm1_in_overflow
= self
.i
.of
1050 m
.submodules
.norm1_insel_z
= i
.z
1051 m
.submodules
.norm1_insel_overflow
= i
.of
1053 espec
= (len(i
.z
.e
), True)
1054 ediff_n126
= Signal(espec
, reset_less
=True)
1055 msr
= MultiShiftRMerge(mwid
, espec
)
1056 m
.submodules
.multishift_r
= msr
1058 m
.d
.comb
+= i
.eq(self
.i
)
1059 # initialise out from in (overridden below)
1060 m
.d
.comb
+= self
.o
.z
.eq(i
.z
)
1061 m
.d
.comb
+= of
.eq(i
.of
)
1062 # normalisation increase/decrease conditions
1063 decrease
= Signal(reset_less
=True)
1064 increase
= Signal(reset_less
=True)
1065 m
.d
.comb
+= decrease
.eq(i
.z
.m_msbzero
& i
.z
.exp_gt_n126
)
1066 m
.d
.comb
+= increase
.eq(i
.z
.exp_lt_n126
)
1068 with m
.If(decrease
):
1069 # *sigh* not entirely obvious: count leading zeros (clz)
1070 # with a PriorityEncoder: to find from the MSB
1071 # we reverse the order of the bits.
1072 temp_m
= Signal(mwid
, reset_less
=True)
1073 temp_s
= Signal(mwid
+1, reset_less
=True)
1074 clz
= Signal((len(i
.z
.e
), True), reset_less
=True)
1075 # make sure that the amount to decrease by does NOT
1076 # go below the minimum non-INF/NaN exponent
1077 limclz
= Mux(i
.z
.exp_sub_n126
> pe
.o
, pe
.o
,
1080 # cat round and guard bits back into the mantissa
1081 temp_m
.eq(Cat(i
.of
.round_bit
, i
.of
.guard
, i
.z
.m
)),
1082 pe
.i
.eq(temp_m
[::-1]), # inverted
1083 clz
.eq(limclz
), # count zeros from MSB down
1084 temp_s
.eq(temp_m
<< clz
), # shift mantissa UP
1085 self
.o
.z
.e
.eq(i
.z
.e
- clz
), # DECREASE exponent
1086 self
.o
.z
.m
.eq(temp_s
[2:]), # exclude bits 0&1
1087 of
.m0
.eq(temp_s
[2]), # copy of mantissa[0]
1088 # overflow in bits 0..1: got shifted too (leave sticky)
1089 of
.guard
.eq(temp_s
[1]), # guard
1090 of
.round_bit
.eq(temp_s
[0]), # round
1093 with m
.Elif(increase
):
1094 temp_m
= Signal(mwid
+1, reset_less
=True)
1096 temp_m
.eq(Cat(i
.of
.sticky
, i
.of
.round_bit
, i
.of
.guard
,
1098 ediff_n126
.eq(i
.z
.N126
- i
.z
.e
),
1099 # connect multi-shifter to inp/out mantissa (and ediff)
1101 msr
.diff
.eq(ediff_n126
),
1102 self
.o
.z
.m
.eq(msr
.m
[3:]),
1103 of
.m0
.eq(temp_s
[3]), # copy of mantissa[0]
1104 # overflow in bits 0..1: got shifted too (leave sticky)
1105 of
.guard
.eq(temp_s
[2]), # guard
1106 of
.round_bit
.eq(temp_s
[1]), # round
1107 of
.sticky
.eq(temp_s
[0]), # sticky
1108 self
.o
.z
.e
.eq(i
.z
.e
+ ediff_n126
),
1114 class FPNorm1ModMulti
:
1116 def __init__(self
, width
, single_cycle
=True):
1118 self
.in_select
= Signal(reset_less
=True)
1119 self
.in_z
= FPNumBase(width
, False)
1120 self
.in_of
= Overflow()
1121 self
.temp_z
= FPNumBase(width
, False)
1122 self
.temp_of
= Overflow()
1123 self
.out_z
= FPNumBase(width
, False)
1124 self
.out_of
= Overflow()
1126 def elaborate(self
, platform
):
1129 m
.submodules
.norm1_out_z
= self
.out_z
1130 m
.submodules
.norm1_out_overflow
= self
.out_of
1131 m
.submodules
.norm1_temp_z
= self
.temp_z
1132 m
.submodules
.norm1_temp_of
= self
.temp_of
1133 m
.submodules
.norm1_in_z
= self
.in_z
1134 m
.submodules
.norm1_in_overflow
= self
.in_of
1136 in_z
= FPNumBase(self
.width
, False)
1138 m
.submodules
.norm1_insel_z
= in_z
1139 m
.submodules
.norm1_insel_overflow
= in_of
1141 # select which of temp or in z/of to use
1142 with m
.If(self
.in_select
):
1143 m
.d
.comb
+= in_z
.eq(self
.in_z
)
1144 m
.d
.comb
+= in_of
.eq(self
.in_of
)
1146 m
.d
.comb
+= in_z
.eq(self
.temp_z
)
1147 m
.d
.comb
+= in_of
.eq(self
.temp_of
)
1148 # initialise out from in (overridden below)
1149 m
.d
.comb
+= self
.out_z
.eq(in_z
)
1150 m
.d
.comb
+= self
.out_of
.eq(in_of
)
1151 # normalisation increase/decrease conditions
1152 decrease
= Signal(reset_less
=True)
1153 increase
= Signal(reset_less
=True)
1154 m
.d
.comb
+= decrease
.eq(in_z
.m_msbzero
& in_z
.exp_gt_n126
)
1155 m
.d
.comb
+= increase
.eq(in_z
.exp_lt_n126
)
1156 m
.d
.comb
+= self
.out_norm
.eq(decrease | increase
) # loop-end
1158 with m
.If(decrease
):
1160 self
.out_z
.e
.eq(in_z
.e
- 1), # DECREASE exponent
1161 self
.out_z
.m
.eq(in_z
.m
<< 1), # shift mantissa UP
1162 self
.out_z
.m
[0].eq(in_of
.guard
), # steal guard (was tot[2])
1163 self
.out_of
.guard
.eq(in_of
.round_bit
), # round (was tot[1])
1164 self
.out_of
.round_bit
.eq(0), # reset round bit
1165 self
.out_of
.m0
.eq(in_of
.guard
),
1168 with m
.Elif(increase
):
1170 self
.out_z
.e
.eq(in_z
.e
+ 1), # INCREASE exponent
1171 self
.out_z
.m
.eq(in_z
.m
>> 1), # shift mantissa DOWN
1172 self
.out_of
.guard
.eq(in_z
.m
[0]),
1173 self
.out_of
.m0
.eq(in_z
.m
[1]),
1174 self
.out_of
.round_bit
.eq(in_of
.guard
),
1175 self
.out_of
.sticky
.eq(in_of
.sticky | in_of
.round_bit
)
1181 class FPNorm1Single(FPState
, FPID
):
1183 def __init__(self
, width
, id_wid
, single_cycle
=True):
1184 FPID
.__init
__(self
, id_wid
)
1185 FPState
.__init
__(self
, "normalise_1")
1186 self
.mod
= FPNorm1ModSingle(width
)
1187 self
.out_z
= FPNumBase(width
, False)
1188 self
.out_roundz
= Signal(reset_less
=True)
1190 def setup(self
, m
, in_z
, in_of
, in_mid
):
1191 """ links module to inputs and outputs
1193 self
.mod
.setup(m
, in_z
, in_of
, self
.out_z
)
1195 if self
.in_mid
is not None:
1196 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
1198 def action(self
, m
):
1200 m
.d
.sync
+= self
.out_roundz
.eq(self
.mod
.out_of
.roundz
)
1204 class FPNorm1Multi(FPState
, FPID
):
1206 def __init__(self
, width
, id_wid
):
1207 FPID
.__init
__(self
, id_wid
)
1208 FPState
.__init
__(self
, "normalise_1")
1209 self
.mod
= FPNorm1ModMulti(width
)
1210 self
.stb
= Signal(reset_less
=True)
1211 self
.ack
= Signal(reset
=0, reset_less
=True)
1212 self
.out_norm
= Signal(reset_less
=True)
1213 self
.in_accept
= Signal(reset_less
=True)
1214 self
.temp_z
= FPNumBase(width
)
1215 self
.temp_of
= Overflow()
1216 self
.out_z
= FPNumBase(width
)
1217 self
.out_roundz
= Signal(reset_less
=True)
1219 def setup(self
, m
, in_z
, in_of
, norm_stb
, in_mid
):
1220 """ links module to inputs and outputs
1222 self
.mod
.setup(m
, in_z
, in_of
, norm_stb
,
1223 self
.in_accept
, self
.temp_z
, self
.temp_of
,
1224 self
.out_z
, self
.out_norm
)
1226 m
.d
.comb
+= self
.stb
.eq(norm_stb
)
1227 m
.d
.sync
+= self
.ack
.eq(0) # sets to zero when not in normalise_1 state
1229 if self
.in_mid
is not None:
1230 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
1232 def action(self
, m
):
1234 m
.d
.comb
+= self
.in_accept
.eq((~self
.ack
) & (self
.stb
))
1235 m
.d
.sync
+= self
.temp_of
.eq(self
.mod
.out_of
)
1236 m
.d
.sync
+= self
.temp_z
.eq(self
.out_z
)
1237 with m
.If(self
.out_norm
):
1238 with m
.If(self
.in_accept
):
1243 m
.d
.sync
+= self
.ack
.eq(0)
1245 # normalisation not required (or done).
1247 m
.d
.sync
+= self
.ack
.eq(1)
1248 m
.d
.sync
+= self
.out_roundz
.eq(self
.mod
.out_of
.roundz
)
1251 class FPNormToPack(FPState
, FPID
):
1253 def __init__(self
, width
, id_wid
):
1254 FPID
.__init
__(self
, id_wid
)
1255 FPState
.__init
__(self
, "normalise_1")
1258 def setup(self
, m
, i
, in_mid
):
1259 """ links module to inputs and outputs
1262 # Normalisation (chained to input in_z+in_of)
1263 nmod
= FPNorm1ModSingle(self
.width
, self
.id_wid
)
1265 n_out
= nmod
.ospec()
1266 m
.d
.comb
+= n_out
.eq(nmod
.o
)
1268 # Rounding (chained to normalisation)
1269 rmod
= FPRoundMod(self
.width
, self
.id_wid
)
1270 rmod
.setup(m
, n_out
)
1271 r_out_z
= rmod
.ospec()
1272 m
.d
.comb
+= r_out_z
.eq(rmod
.out_z
)
1274 # Corrections (chained to rounding)
1275 cmod
= FPCorrectionsMod(self
.width
, self
.id_wid
)
1276 cmod
.setup(m
, r_out_z
)
1277 c_out_z
= cmod
.ospec()
1278 m
.d
.comb
+= c_out_z
.eq(cmod
.out_z
)
1280 # Pack (chained to corrections)
1281 self
.pmod
= FPPackMod(self
.width
, self
.id_wid
)
1282 self
.pmod
.setup(m
, c_out_z
)
1283 self
.out_z
= self
.pmod
.ospec()
1286 if self
.in_mid
is not None:
1287 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
1289 def action(self
, m
):
1290 self
.idsync(m
) # copies incoming ID to outgoing
1291 m
.d
.sync
+= self
.out_z
.z
.v
.eq(self
.pmod
.o
.z
.v
) # outputs packed result
1292 m
.next
= "pack_put_z"
1297 def __init__(self
, width
, id_wid
):
1298 self
.z
= FPNumBase(width
, False)
1299 self
.mid
= Signal(id_wid
, reset_less
=True)
1302 return [self
.z
.eq(i
.z
), self
.mid
.eq(i
.mid
)]
1307 def __init__(self
, width
, id_wid
):
1309 self
.id_wid
= id_wid
1310 self
.i
= self
.ispec()
1311 self
.out_z
= self
.ospec()
1314 return FPNorm1Data(self
.width
, self
.id_wid
)
1317 return FPRoundData(self
.width
, self
.id_wid
)
1319 def setup(self
, m
, i
):
1320 m
.submodules
.roundz
= self
1321 m
.d
.comb
+= self
.i
.eq(i
)
1323 def elaborate(self
, platform
):
1325 m
.d
.comb
+= self
.out_z
.eq(self
.i
)
1326 with m
.If(self
.i
.roundz
):
1327 m
.d
.comb
+= self
.out_z
.z
.m
.eq(self
.i
.z
.m
+ 1) # mantissa rounds up
1328 with m
.If(self
.i
.z
.m
== self
.i
.z
.m1s
): # all 1s
1329 m
.d
.comb
+= self
.out_z
.z
.e
.eq(self
.i
.z
.e
+ 1) # exponent up
1333 class FPRound(FPState
, FPID
):
1335 def __init__(self
, width
, id_wid
):
1336 FPState
.__init
__(self
, "round")
1337 FPID
.__init
__(self
, id_wid
)
1338 self
.mod
= FPRoundMod(width
)
1339 self
.out_z
= self
.mod
.ospec()
1341 def setup(self
, m
, in_z
, roundz
, in_mid
):
1342 """ links module to inputs and outputs
1344 self
.mod
.setup(m
, in_z
, roundz
)
1346 if self
.in_mid
is not None:
1347 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
1349 def action(self
, m
):
1351 m
.d
.sync
+= self
.out_z
.eq(self
.mod
.out_z
)
1352 m
.next
= "corrections"
1355 class FPCorrectionsMod
:
1357 def __init__(self
, width
, id_wid
):
1359 self
.id_wid
= id_wid
1360 self
.in_z
= self
.ispec()
1361 self
.out_z
= self
.ospec()
1364 return FPRoundData(self
.width
, self
.id_wid
)
1367 return FPRoundData(self
.width
, self
.id_wid
)
1369 def setup(self
, m
, in_z
):
1370 """ links module to inputs and outputs
1372 m
.submodules
.corrections
= self
1373 m
.d
.comb
+= self
.in_z
.eq(in_z
)
1375 def elaborate(self
, platform
):
1377 m
.submodules
.corr_in_z
= self
.in_z
.z
1378 m
.submodules
.corr_out_z
= self
.out_z
.z
1379 m
.d
.comb
+= self
.out_z
.eq(self
.in_z
)
1380 with m
.If(self
.in_z
.z
.is_denormalised
):
1381 m
.d
.comb
+= self
.out_z
.z
.e
.eq(self
.in_z
.z
.N127
)
1385 class FPCorrections(FPState
, FPID
):
1387 def __init__(self
, width
, id_wid
):
1388 FPState
.__init
__(self
, "corrections")
1389 FPID
.__init
__(self
, id_wid
)
1390 self
.mod
= FPCorrectionsMod(width
)
1391 self
.out_z
= self
.mod
.ospec()
1393 def setup(self
, m
, in_z
, in_mid
):
1394 """ links module to inputs and outputs
1396 self
.mod
.setup(m
, in_z
)
1397 if self
.in_mid
is not None:
1398 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
1400 def action(self
, m
):
1402 m
.d
.sync
+= self
.out_z
.eq(self
.mod
.out_z
)
1408 def __init__(self
, width
, id_wid
):
1409 self
.z
= FPNumOut(width
, False)
1410 self
.mid
= Signal(id_wid
, reset_less
=True)
1413 return [self
.z
.eq(i
.z
), self
.mid
.eq(i
.mid
)]
1418 def __init__(self
, width
, id_wid
):
1420 self
.id_wid
= id_wid
1421 self
.i
= self
.ispec()
1422 self
.o
= self
.ospec()
1425 return FPRoundData(self
.width
, self
.id_wid
)
1428 return FPPackData(self
.width
, self
.id_wid
)
1430 def setup(self
, m
, in_z
):
1431 """ links module to inputs and outputs
1433 m
.submodules
.pack
= self
1434 m
.d
.comb
+= self
.i
.eq(in_z
)
1436 def elaborate(self
, platform
):
1438 m
.submodules
.pack_in_z
= self
.i
.z
1439 with m
.If(self
.i
.z
.is_overflowed
):
1440 m
.d
.comb
+= self
.o
.z
.inf(self
.i
.z
.s
)
1442 m
.d
.comb
+= self
.o
.z
.create(self
.i
.z
.s
, self
.i
.z
.e
, self
.i
.z
.m
)
1447 def __init__(self
, width
, id_wid
):
1448 self
.z
= FPNumOut(width
, False)
1449 self
.mid
= Signal(id_wid
, reset_less
=True)
1452 return [self
.z
.eq(i
.z
), self
.mid
.eq(i
.mid
)]
1455 class FPPack(FPState
, FPID
):
1457 def __init__(self
, width
, id_wid
):
1458 FPState
.__init
__(self
, "pack")
1459 FPID
.__init
__(self
, id_wid
)
1460 self
.mod
= FPPackMod(width
)
1461 self
.out_z
= self
.ospec()
1464 return self
.mod
.ispec()
1467 return self
.mod
.ospec()
1469 def setup(self
, m
, in_z
, in_mid
):
1470 """ links module to inputs and outputs
1472 self
.mod
.setup(m
, in_z
)
1473 if self
.in_mid
is not None:
1474 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
1476 def action(self
, m
):
1478 m
.d
.sync
+= self
.out_z
.v
.eq(self
.mod
.out_z
.v
)
1479 m
.next
= "pack_put_z"
1482 class FPPutZ(FPState
):
1484 def __init__(self
, state
, in_z
, out_z
, in_mid
, out_mid
, to_state
=None):
1485 FPState
.__init
__(self
, state
)
1486 if to_state
is None:
1487 to_state
= "get_ops"
1488 self
.to_state
= to_state
1491 self
.in_mid
= in_mid
1492 self
.out_mid
= out_mid
1494 def action(self
, m
):
1495 if self
.in_mid
is not None:
1496 m
.d
.sync
+= self
.out_mid
.eq(self
.in_mid
)
1498 self
.out_z
.z
.v
.eq(self
.in_z
.v
)
1500 with m
.If(self
.out_z
.z
.stb
& self
.out_z
.z
.ack
):
1501 m
.d
.sync
+= self
.out_z
.z
.stb
.eq(0)
1502 m
.next
= self
.to_state
1504 m
.d
.sync
+= self
.out_z
.z
.stb
.eq(1)
1507 class FPPutZIdx(FPState
):
1509 def __init__(self
, state
, in_z
, out_zs
, in_mid
, to_state
=None):
1510 FPState
.__init
__(self
, state
)
1511 if to_state
is None:
1512 to_state
= "get_ops"
1513 self
.to_state
= to_state
1515 self
.out_zs
= out_zs
1516 self
.in_mid
= in_mid
1518 def action(self
, m
):
1519 outz_stb
= Signal(reset_less
=True)
1520 outz_ack
= Signal(reset_less
=True)
1521 m
.d
.comb
+= [outz_stb
.eq(self
.out_zs
[self
.in_mid
].stb
),
1522 outz_ack
.eq(self
.out_zs
[self
.in_mid
].ack
),
1525 self
.out_zs
[self
.in_mid
].v
.eq(self
.in_z
.v
)
1527 with m
.If(outz_stb
& outz_ack
):
1528 m
.d
.sync
+= self
.out_zs
[self
.in_mid
].stb
.eq(0)
1529 m
.next
= self
.to_state
1531 m
.d
.sync
+= self
.out_zs
[self
.in_mid
].stb
.eq(1)
1533 class FPADDBaseData
:
1535 def __init__(self
, width
, id_wid
):
1537 self
.id_wid
= id_wid
1538 self
.a
= Signal(width
)
1539 self
.b
= Signal(width
)
1540 self
.mid
= Signal(id_wid
, reset_less
=True)
1543 return [self
.a
.eq(i
.a
), self
.b
.eq(i
.b
), self
.mid
.eq(i
.mid
)]
1547 def __init__(self
, width
, id_wid
):
1548 self
.z
= FPOp(width
)
1549 self
.mid
= Signal(id_wid
, reset_less
=True)
1552 return [self
.z
.eq(i
.z
), self
.mid
.eq(i
.mid
)]
1555 class FPADDBaseMod(FPID
):
1557 def __init__(self
, width
, id_wid
=None, single_cycle
=False, compact
=True):
1560 * width: bit-width of IEEE754. supported: 16, 32, 64
1561 * id_wid: an identifier that is sync-connected to the input
1562 * single_cycle: True indicates each stage to complete in 1 clock
1563 * compact: True indicates a reduced number of stages
1565 FPID
.__init
__(self
, id_wid
)
1567 self
.id_wid
= id_wid
1568 self
.single_cycle
= single_cycle
1569 self
.compact
= compact
1571 self
.in_t
= Trigger()
1572 self
.i
= self
.ispec()
1573 self
.o
= self
.ospec()
1578 return FPADDBaseData(self
.width
, self
.id_wid
)
1581 return FPOpData(self
.width
, self
.id_wid
)
1583 def add_state(self
, state
):
1584 self
.states
.append(state
)
1587 def get_fragment(self
, platform
=None):
1588 """ creates the HDL code-fragment for FPAdd
1591 m
.submodules
.out_z
= self
.o
.z
1592 m
.submodules
.in_t
= self
.in_t
1594 self
.get_compact_fragment(m
, platform
)
1596 self
.get_longer_fragment(m
, platform
)
1598 with m
.FSM() as fsm
:
1600 for state
in self
.states
:
1601 with m
.State(state
.state_from
):
1606 def get_longer_fragment(self
, m
, platform
=None):
1608 get
= self
.add_state(FPGet2Op("get_ops", "special_cases",
1610 get
.setup(m
, self
.in_a
, self
.in_b
, self
.in_t
.stb
, self
.in_t
.ack
)
1614 sc
= self
.add_state(FPAddSpecialCases(self
.width
, self
.id_wid
))
1615 sc
.setup(m
, a
, b
, self
.in_mid
)
1617 dn
= self
.add_state(FPAddDeNorm(self
.width
, self
.id_wid
))
1618 dn
.setup(m
, a
, b
, sc
.in_mid
)
1620 if self
.single_cycle
:
1621 alm
= self
.add_state(FPAddAlignSingle(self
.width
, self
.id_wid
))
1622 alm
.setup(m
, dn
.out_a
, dn
.out_b
, dn
.in_mid
)
1624 alm
= self
.add_state(FPAddAlignMulti(self
.width
, self
.id_wid
))
1625 alm
.setup(m
, dn
.out_a
, dn
.out_b
, dn
.in_mid
)
1627 add0
= self
.add_state(FPAddStage0(self
.width
, self
.id_wid
))
1628 add0
.setup(m
, alm
.out_a
, alm
.out_b
, alm
.in_mid
)
1630 add1
= self
.add_state(FPAddStage1(self
.width
, self
.id_wid
))
1631 add1
.setup(m
, add0
.out_tot
, add0
.out_z
, add0
.in_mid
)
1633 if self
.single_cycle
:
1634 n1
= self
.add_state(FPNorm1Single(self
.width
, self
.id_wid
))
1635 n1
.setup(m
, add1
.out_z
, add1
.out_of
, add0
.in_mid
)
1637 n1
= self
.add_state(FPNorm1Multi(self
.width
, self
.id_wid
))
1638 n1
.setup(m
, add1
.out_z
, add1
.out_of
, add1
.norm_stb
, add0
.in_mid
)
1640 rn
= self
.add_state(FPRound(self
.width
, self
.id_wid
))
1641 rn
.setup(m
, n1
.out_z
, n1
.out_roundz
, n1
.in_mid
)
1643 cor
= self
.add_state(FPCorrections(self
.width
, self
.id_wid
))
1644 cor
.setup(m
, rn
.out_z
, rn
.in_mid
)
1646 pa
= self
.add_state(FPPack(self
.width
, self
.id_wid
))
1647 pa
.setup(m
, cor
.out_z
, rn
.in_mid
)
1649 ppz
= self
.add_state(FPPutZ("pack_put_z", pa
.out_z
, self
.out_z
,
1650 pa
.in_mid
, self
.out_mid
))
1652 pz
= self
.add_state(FPPutZ("put_z", sc
.out_z
, self
.out_z
,
1653 pa
.in_mid
, self
.out_mid
))
1655 def get_compact_fragment(self
, m
, platform
=None):
1657 get
= self
.add_state(FPGet2Op("get_ops", "special_cases",
1658 self
.width
, self
.id_wid
))
1659 get
.setup(m
, self
.i
, self
.in_t
.stb
, self
.in_t
.ack
)
1661 sc
= self
.add_state(FPAddSpecialCasesDeNorm(self
.width
, self
.id_wid
))
1662 sc
.setup(m
, get
.o
, self
.in_mid
)
1664 alm
= self
.add_state(FPAddAlignSingleAdd(self
.width
, self
.id_wid
))
1665 alm
.setup(m
, sc
.o
, sc
.in_mid
)
1667 n1
= self
.add_state(FPNormToPack(self
.width
, self
.id_wid
))
1668 n1
.setup(m
, alm
.a1o
, alm
.in_mid
)
1670 ppz
= self
.add_state(FPPutZ("pack_put_z", n1
.out_z
.z
, self
.o
,
1671 n1
.in_mid
, self
.out_mid
))
1673 pz
= self
.add_state(FPPutZ("put_z", sc
.out_z
.z
, self
.o
,
1674 sc
.in_mid
, self
.out_mid
))
1677 class FPADDBase(FPState
, FPID
):
1679 def __init__(self
, width
, id_wid
=None, single_cycle
=False):
1682 * width: bit-width of IEEE754. supported: 16, 32, 64
1683 * id_wid: an identifier that is sync-connected to the input
1684 * single_cycle: True indicates each stage to complete in 1 clock
1686 FPID
.__init
__(self
, id_wid
)
1687 FPState
.__init
__(self
, "fpadd")
1689 self
.single_cycle
= single_cycle
1690 self
.mod
= FPADDBaseMod(width
, id_wid
, single_cycle
)
1691 self
.o
= self
.ospec()
1693 self
.in_t
= Trigger()
1694 self
.i
= self
.ispec()
1696 self
.z_done
= Signal(reset_less
=True) # connects to out_z Strobe
1697 self
.in_accept
= Signal(reset_less
=True)
1698 self
.add_stb
= Signal(reset_less
=True)
1699 self
.add_ack
= Signal(reset
=0, reset_less
=True)
1702 return self
.mod
.ispec()
1705 return self
.mod
.ospec()
1707 def setup(self
, m
, i
, add_stb
, in_mid
):
1708 m
.d
.comb
+= [self
.i
.eq(i
),
1709 self
.mod
.i
.eq(self
.i
),
1710 self
.in_mid
.eq(in_mid
),
1711 self
.mod
.in_mid
.eq(self
.in_mid
),
1712 self
.z_done
.eq(self
.mod
.o
.z
.trigger
),
1713 #self.add_stb.eq(add_stb),
1714 self
.mod
.in_t
.stb
.eq(self
.in_t
.stb
),
1715 self
.in_t
.ack
.eq(self
.mod
.in_t
.ack
),
1716 self
.o
.mid
.eq(self
.mod
.o
.mid
),
1717 self
.o
.z
.v
.eq(self
.mod
.o
.z
.v
),
1718 self
.o
.z
.stb
.eq(self
.mod
.o
.z
.stb
),
1719 self
.mod
.o
.z
.ack
.eq(self
.o
.z
.ack
),
1722 m
.d
.sync
+= self
.add_stb
.eq(add_stb
)
1723 m
.d
.sync
+= self
.add_ack
.eq(0) # sets to zero when not in active state
1724 m
.d
.sync
+= self
.o
.z
.ack
.eq(0) # likewise
1725 #m.d.sync += self.in_t.stb.eq(0)
1727 m
.submodules
.fpadd
= self
.mod
1729 def action(self
, m
):
1731 # in_accept is set on incoming strobe HIGH and ack LOW.
1732 m
.d
.comb
+= self
.in_accept
.eq((~self
.add_ack
) & (self
.add_stb
))
1734 #with m.If(self.in_t.ack):
1735 # m.d.sync += self.in_t.stb.eq(0)
1736 with m
.If(~self
.z_done
):
1737 # not done: test for accepting an incoming operand pair
1738 with m
.If(self
.in_accept
):
1740 self
.add_ack
.eq(1), # acknowledge receipt...
1741 self
.in_t
.stb
.eq(1), # initiate add
1744 m
.d
.sync
+= [self
.add_ack
.eq(0),
1745 self
.in_t
.stb
.eq(0),
1749 # done: acknowledge, and write out id and value
1750 m
.d
.sync
+= [self
.add_ack
.eq(1),
1757 if self
.in_mid
is not None:
1758 m
.d
.sync
+= self
.out_mid
.eq(self
.mod
.out_mid
)
1761 self
.out_z
.v
.eq(self
.mod
.out_z
.v
)
1763 # move to output state on detecting z ack
1764 with m
.If(self
.out_z
.trigger
):
1765 m
.d
.sync
+= self
.out_z
.stb
.eq(0)
1768 m
.d
.sync
+= self
.out_z
.stb
.eq(1)
1771 def __init__(self
, width
, id_wid
):
1773 self
.id_wid
= id_wid
1775 for i
in range(rs_sz
):
1777 out_z
.name
= "out_z_%d" % i
1779 self
.res
= Array(res
)
1780 self
.in_z
= FPOp(width
)
1781 self
.in_mid
= Signal(self
.id_wid
, reset_less
=True)
1783 def setup(self
, m
, in_z
, in_mid
):
1784 m
.d
.comb
+= [self
.in_z
.eq(in_z
),
1785 self
.in_mid
.eq(in_mid
)]
1787 def get_fragment(self
, platform
=None):
1788 """ creates the HDL code-fragment for FPAdd
1791 m
.submodules
.res_in_z
= self
.in_z
1792 m
.submodules
+= self
.res
1804 """ FPADD: stages as follows:
1810 FPAddBase---> FPAddBaseMod
1812 PutZ GetOps->Specials->Align->Add1/2->Norm->Round/Pack->PutZ
1814 FPAddBase is tricky: it is both a stage and *has* stages.
1815 Connection to FPAddBaseMod therefore requires an in stb/ack
1816 and an out stb/ack. Just as with Add1-Norm1 interaction, FPGetOp
1817 needs to be the thing that raises the incoming stb.
1820 def __init__(self
, width
, id_wid
=None, single_cycle
=False, rs_sz
=2):
1823 * width: bit-width of IEEE754. supported: 16, 32, 64
1824 * id_wid: an identifier that is sync-connected to the input
1825 * single_cycle: True indicates each stage to complete in 1 clock
1828 self
.id_wid
= id_wid
1829 self
.single_cycle
= single_cycle
1831 #self.out_z = FPOp(width)
1832 self
.ids
= FPID(id_wid
)
1835 for i
in range(rs_sz
):
1838 in_a
.name
= "in_a_%d" % i
1839 in_b
.name
= "in_b_%d" % i
1840 rs
.append((in_a
, in_b
))
1844 for i
in range(rs_sz
):
1846 out_z
.name
= "out_z_%d" % i
1848 self
.res
= Array(res
)
1852 def add_state(self
, state
):
1853 self
.states
.append(state
)
1856 def get_fragment(self
, platform
=None):
1857 """ creates the HDL code-fragment for FPAdd
1860 m
.submodules
+= self
.rs
1862 in_a
= self
.rs
[0][0]
1863 in_b
= self
.rs
[0][1]
1865 geta
= self
.add_state(FPGetOp("get_a", "get_b",
1870 getb
= self
.add_state(FPGetOp("get_b", "fpadd",
1875 ab
= FPADDBase(self
.width
, self
.id_wid
, self
.single_cycle
)
1876 ab
= self
.add_state(ab
)
1877 abd
= ab
.ispec() # create an input spec object for FPADDBase
1878 m
.d
.sync
+= [abd
.a
.eq(a
), abd
.b
.eq(b
), abd
.mid
.eq(self
.ids
.in_mid
)]
1879 ab
.setup(m
, abd
, getb
.out_decode
, self
.ids
.in_mid
)
1882 pz
= self
.add_state(FPPutZIdx("put_z", o
.z
, self
.res
,
1885 with m
.FSM() as fsm
:
1887 for state
in self
.states
:
1888 with m
.State(state
.state_from
):
1894 if __name__
== "__main__":
1896 alu
= FPADD(width
=32, id_wid
=5, single_cycle
=True)
1897 main(alu
, ports
=alu
.rs
[0][0].ports() + \
1898 alu
.rs
[0][1].ports() + \
1899 alu
.res
[0].ports() + \
1900 [alu
.ids
.in_mid
, alu
.ids
.out_mid
])
1902 alu
= FPADDBase(width
=32, id_wid
=5, single_cycle
=True)
1903 main(alu
, ports
=[alu
.in_a
, alu
.in_b
] + \
1904 alu
.in_t
.ports() + \
1905 alu
.out_z
.ports() + \
1906 [alu
.in_mid
, alu
.out_mid
])
1909 # works... but don't use, just do "python fname.py convert -t v"
1910 #print (verilog.convert(alu, ports=[
1911 # ports=alu.in_a.ports() + \
1912 # alu.in_b.ports() + \
1913 # alu.out_z.ports())