1 # IEEE Floating Point Adder (Single Precision)
2 # Copyright (C) Jonathan P Dawson 2013
5 from nmigen
import Module
, Signal
, Cat
, Mux
, Array
, Const
6 from nmigen
.lib
.coding
import PriorityEncoder
7 from nmigen
.cli
import main
, verilog
10 from fpbase
import FPNumIn
, FPNumOut
, FPOp
, Overflow
, FPBase
, FPNumBase
11 from fpbase
import MultiShiftRMerge
, Trigger
12 #from fpbase import FPNumShiftMultiRight
15 class FPState(FPBase
):
16 def __init__(self
, state_from
):
17 self
.state_from
= state_from
19 def set_inputs(self
, inputs
):
21 for k
,v
in inputs
.items():
24 def set_outputs(self
, outputs
):
25 self
.outputs
= outputs
26 for k
,v
in outputs
.items():
30 class FPGetSyncOpsMod
:
31 def __init__(self
, width
, num_ops
=2):
33 self
.num_ops
= num_ops
36 for i
in range(num_ops
):
37 inops
.append(Signal(width
, reset_less
=True))
38 outops
.append(Signal(width
, reset_less
=True))
41 self
.stb
= Signal(num_ops
)
43 self
.ready
= Signal(reset_less
=True)
44 self
.out_decode
= Signal(reset_less
=True)
46 def elaborate(self
, platform
):
48 m
.d
.comb
+= self
.ready
.eq(self
.stb
== Const(-1, (self
.num_ops
, False)))
49 m
.d
.comb
+= self
.out_decode
.eq(self
.ack
& self
.ready
)
50 with m
.If(self
.out_decode
):
51 for i
in range(self
.num_ops
):
53 self
.out_op
[i
].eq(self
.in_op
[i
]),
58 return self
.in_op
+ self
.out_op
+ [self
.stb
, self
.ack
]
62 def __init__(self
, width
, num_ops
):
63 Trigger
.__init
__(self
)
65 self
.num_ops
= num_ops
68 for i
in range(num_ops
):
69 res
.append(Signal(width
))
74 for i
in range(self
.num_ops
):
82 def __init__(self
, width
, num_ops
=2, num_rows
=4):
84 self
.num_ops
= num_ops
85 self
.num_rows
= num_rows
86 self
.mmax
= int(log(self
.num_rows
) / log(2))
88 self
.mid
= Signal(self
.mmax
, reset_less
=True) # multiplex id
89 for i
in range(num_rows
):
90 self
.rs
.append(FPGetSyncOpsMod(width
, num_ops
))
91 self
.rs
= Array(self
.rs
)
93 self
.out_op
= FPOps(width
, num_ops
)
95 def elaborate(self
, platform
):
98 pe
= PriorityEncoder(self
.num_rows
)
99 m
.submodules
.selector
= pe
100 m
.submodules
.out_op
= self
.out_op
101 m
.submodules
.out_op_v
= self
.out_op
.v
102 m
.submodules
+= self
.rs
104 # connect priority encoder
106 for i
in range(self
.num_rows
):
107 in_ready
.append(self
.rs
[i
].ready
)
108 m
.d
.comb
+= pe
.i
.eq(Cat(*in_ready
))
110 active
= Signal(reset_less
=True)
111 out_en
= Signal(reset_less
=True)
112 m
.d
.comb
+= active
.eq(~pe
.n
) # encoder active
113 m
.d
.comb
+= out_en
.eq(active
& self
.out_op
.trigger
)
115 # encoder active: ack relevant input, record MID, pass output
118 m
.d
.sync
+= self
.mid
.eq(pe
.o
)
119 m
.d
.sync
+= rs
.ack
.eq(0)
120 m
.d
.sync
+= self
.out_op
.stb
.eq(0)
121 for j
in range(self
.num_ops
):
122 m
.d
.sync
+= self
.out_op
.v
[j
].eq(rs
.out_op
[j
])
124 m
.d
.sync
+= self
.out_op
.stb
.eq(1)
125 # acks all default to zero
126 for i
in range(self
.num_rows
):
127 m
.d
.sync
+= self
.rs
[i
].ack
.eq(1)
133 for i
in range(self
.num_rows
):
135 res
+= inop
.in_op
+ [inop
.stb
]
136 return self
.out_op
.ports() + res
+ [self
.mid
]
140 def __init__(self
, width
):
141 self
.in_op
= FPOp(width
)
142 self
.out_op
= Signal(width
)
143 self
.out_decode
= Signal(reset_less
=True)
145 def elaborate(self
, platform
):
147 m
.d
.comb
+= self
.out_decode
.eq((self
.in_op
.ack
) & (self
.in_op
.stb
))
148 m
.submodules
.get_op_in
= self
.in_op
149 #m.submodules.get_op_out = self.out_op
150 with m
.If(self
.out_decode
):
152 self
.out_op
.eq(self
.in_op
.v
),
157 class FPGetOp(FPState
):
161 def __init__(self
, in_state
, out_state
, in_op
, width
):
162 FPState
.__init
__(self
, in_state
)
163 self
.out_state
= out_state
164 self
.mod
= FPGetOpMod(width
)
166 self
.out_op
= Signal(width
)
167 self
.out_decode
= Signal(reset_less
=True)
169 def setup(self
, m
, in_op
):
170 """ links module to inputs and outputs
172 setattr(m
.submodules
, self
.state_from
, self
.mod
)
173 m
.d
.comb
+= self
.mod
.in_op
.copy(in_op
)
174 #m.d.comb += self.out_op.eq(self.mod.out_op)
175 m
.d
.comb
+= self
.out_decode
.eq(self
.mod
.out_decode
)
178 with m
.If(self
.out_decode
):
179 m
.next
= self
.out_state
181 self
.in_op
.ack
.eq(0),
182 self
.out_op
.eq(self
.mod
.out_op
)
185 m
.d
.sync
+= self
.in_op
.ack
.eq(1)
188 class FPGet2OpMod(Trigger
):
189 def __init__(self
, width
):
190 Trigger
.__init
__(self
)
191 self
.in_op1
= Signal(width
, reset_less
=True)
192 self
.in_op2
= Signal(width
, reset_less
=True)
193 self
.out_op1
= FPNumIn(None, width
)
194 self
.out_op2
= FPNumIn(None, width
)
196 def elaborate(self
, platform
):
197 m
= Trigger
.elaborate(self
, platform
)
198 #m.submodules.get_op_in = self.in_op
199 m
.submodules
.get_op1_out
= self
.out_op1
200 m
.submodules
.get_op2_out
= self
.out_op2
201 with m
.If(self
.trigger
):
203 self
.out_op1
.decode(self
.in_op1
),
204 self
.out_op2
.decode(self
.in_op2
),
209 class FPGet2Op(FPState
):
213 def __init__(self
, in_state
, out_state
, in_op1
, in_op2
, width
):
214 FPState
.__init
__(self
, in_state
)
215 self
.out_state
= out_state
216 self
.mod
= FPGet2OpMod(width
)
219 self
.out_op1
= FPNumIn(None, width
)
220 self
.out_op2
= FPNumIn(None, width
)
221 self
.in_stb
= Signal(reset_less
=True)
222 self
.out_ack
= Signal(reset_less
=True)
223 self
.out_decode
= Signal(reset_less
=True)
225 def setup(self
, m
, in_op1
, in_op2
, in_stb
, in_ack
):
226 """ links module to inputs and outputs
228 m
.submodules
.get_ops
= self
.mod
229 m
.d
.comb
+= self
.mod
.in_op1
.eq(in_op1
)
230 m
.d
.comb
+= self
.mod
.in_op2
.eq(in_op2
)
231 m
.d
.comb
+= self
.mod
.stb
.eq(in_stb
)
232 m
.d
.comb
+= self
.out_ack
.eq(self
.mod
.ack
)
233 m
.d
.comb
+= self
.out_decode
.eq(self
.mod
.trigger
)
234 m
.d
.comb
+= in_ack
.eq(self
.mod
.ack
)
237 with m
.If(self
.out_decode
):
238 m
.next
= self
.out_state
241 #self.out_op1.v.eq(self.mod.out_op1.v),
242 #self.out_op2.v.eq(self.mod.out_op2.v),
243 self
.out_op1
.copy(self
.mod
.out_op1
),
244 self
.out_op2
.copy(self
.mod
.out_op2
)
247 m
.d
.sync
+= self
.mod
.ack
.eq(1)
250 class FPAddSpecialCasesMod
:
251 """ special cases: NaNs, infs, zeros, denormalised
252 NOTE: some of these are unique to add. see "Special Operations"
253 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
256 def __init__(self
, width
):
257 self
.in_a
= FPNumBase(width
)
258 self
.in_b
= FPNumBase(width
)
259 self
.out_z
= FPNumOut(width
, False)
260 self
.out_do_z
= Signal(reset_less
=True)
262 def setup(self
, m
, in_a
, in_b
, out_do_z
):
263 """ links module to inputs and outputs
265 m
.submodules
.specialcases
= self
266 m
.d
.comb
+= self
.in_a
.copy(in_a
)
267 m
.d
.comb
+= self
.in_b
.copy(in_b
)
268 m
.d
.comb
+= out_do_z
.eq(self
.out_do_z
)
270 def elaborate(self
, platform
):
273 m
.submodules
.sc_in_a
= self
.in_a
274 m
.submodules
.sc_in_b
= self
.in_b
275 m
.submodules
.sc_out_z
= self
.out_z
278 m
.d
.comb
+= s_nomatch
.eq(self
.in_a
.s
!= self
.in_b
.s
)
281 m
.d
.comb
+= m_match
.eq(self
.in_a
.m
== self
.in_b
.m
)
283 # if a is NaN or b is NaN return NaN
284 with m
.If(self
.in_a
.is_nan | self
.in_b
.is_nan
):
285 m
.d
.comb
+= self
.out_do_z
.eq(1)
286 m
.d
.comb
+= self
.out_z
.nan(0)
288 # XXX WEIRDNESS for FP16 non-canonical NaN handling
291 ## if a is zero and b is NaN return -b
292 #with m.If(a.is_zero & (a.s==0) & b.is_nan):
293 # m.d.comb += self.out_do_z.eq(1)
294 # m.d.comb += z.create(b.s, b.e, Cat(b.m[3:-2], ~b.m[0]))
296 ## if b is zero and a is NaN return -a
297 #with m.Elif(b.is_zero & (b.s==0) & a.is_nan):
298 # m.d.comb += self.out_do_z.eq(1)
299 # m.d.comb += z.create(a.s, a.e, Cat(a.m[3:-2], ~a.m[0]))
301 ## if a is -zero and b is NaN return -b
302 #with m.Elif(a.is_zero & (a.s==1) & b.is_nan):
303 # m.d.comb += self.out_do_z.eq(1)
304 # m.d.comb += z.create(a.s & b.s, b.e, Cat(b.m[3:-2], 1))
306 ## if b is -zero and a is NaN return -a
307 #with m.Elif(b.is_zero & (b.s==1) & a.is_nan):
308 # m.d.comb += self.out_do_z.eq(1)
309 # m.d.comb += z.create(a.s & b.s, a.e, Cat(a.m[3:-2], 1))
311 # if a is inf return inf (or NaN)
312 with m
.Elif(self
.in_a
.is_inf
):
313 m
.d
.comb
+= self
.out_do_z
.eq(1)
314 m
.d
.comb
+= self
.out_z
.inf(self
.in_a
.s
)
315 # if a is inf and signs don't match return NaN
316 with m
.If(self
.in_b
.exp_128
& s_nomatch
):
317 m
.d
.comb
+= self
.out_z
.nan(0)
319 # if b is inf return inf
320 with m
.Elif(self
.in_b
.is_inf
):
321 m
.d
.comb
+= self
.out_do_z
.eq(1)
322 m
.d
.comb
+= self
.out_z
.inf(self
.in_b
.s
)
324 # if a is zero and b zero return signed-a/b
325 with m
.Elif(self
.in_a
.is_zero
& self
.in_b
.is_zero
):
326 m
.d
.comb
+= self
.out_do_z
.eq(1)
327 m
.d
.comb
+= self
.out_z
.create(self
.in_a
.s
& self
.in_b
.s
,
331 # if a is zero return b
332 with m
.Elif(self
.in_a
.is_zero
):
333 m
.d
.comb
+= self
.out_do_z
.eq(1)
334 m
.d
.comb
+= self
.out_z
.create(self
.in_b
.s
, self
.in_b
.e
,
337 # if b is zero return a
338 with m
.Elif(self
.in_b
.is_zero
):
339 m
.d
.comb
+= self
.out_do_z
.eq(1)
340 m
.d
.comb
+= self
.out_z
.create(self
.in_a
.s
, self
.in_a
.e
,
343 # if a equal to -b return zero (+ve zero)
344 with m
.Elif(s_nomatch
& m_match
& (self
.in_a
.e
== self
.in_b
.e
)):
345 m
.d
.comb
+= self
.out_do_z
.eq(1)
346 m
.d
.comb
+= self
.out_z
.zero(0)
348 # Denormalised Number checks
350 m
.d
.comb
+= self
.out_do_z
.eq(0)
356 def __init__(self
, id_wid
):
359 self
.in_mid
= Signal(id_wid
, reset_less
=True)
360 self
.out_mid
= Signal(id_wid
, reset_less
=True)
366 if self
.id_wid
is not None:
367 m
.d
.sync
+= self
.out_mid
.eq(self
.in_mid
)
370 class FPAddSpecialCases(FPState
, FPID
):
371 """ special cases: NaNs, infs, zeros, denormalised
372 NOTE: some of these are unique to add. see "Special Operations"
373 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
376 def __init__(self
, width
, id_wid
):
377 FPState
.__init
__(self
, "special_cases")
378 FPID
.__init
__(self
, id_wid
)
379 self
.mod
= FPAddSpecialCasesMod(width
)
380 self
.out_z
= FPNumOut(width
, False)
381 self
.out_do_z
= Signal(reset_less
=True)
383 def setup(self
, m
, in_a
, in_b
, in_mid
):
384 """ links module to inputs and outputs
386 self
.mod
.setup(m
, in_a
, in_b
, self
.out_do_z
)
387 if self
.in_mid
is not None:
388 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
392 with m
.If(self
.out_do_z
):
393 m
.d
.sync
+= self
.out_z
.v
.eq(self
.mod
.out_z
.v
) # only take the output
396 m
.next
= "denormalise"
399 class FPAddSpecialCasesDeNorm(FPState
, FPID
):
400 """ special cases: NaNs, infs, zeros, denormalised
401 NOTE: some of these are unique to add. see "Special Operations"
402 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
405 def __init__(self
, width
, id_wid
):
406 FPState
.__init
__(self
, "special_cases")
407 FPID
.__init
__(self
, id_wid
)
408 self
.smod
= FPAddSpecialCasesMod(width
)
409 self
.out_z
= FPNumOut(width
, False)
410 self
.out_do_z
= Signal(reset_less
=True)
412 self
.dmod
= FPAddDeNormMod(width
)
413 self
.out_a
= FPNumBase(width
)
414 self
.out_b
= FPNumBase(width
)
416 def setup(self
, m
, in_a
, in_b
, in_mid
):
417 """ links module to inputs and outputs
419 self
.smod
.setup(m
, in_a
, in_b
, self
.out_do_z
)
420 self
.dmod
.setup(m
, in_a
, in_b
)
421 if self
.in_mid
is not None:
422 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
426 with m
.If(self
.out_do_z
):
427 m
.d
.sync
+= self
.out_z
.v
.eq(self
.smod
.out_z
.v
) # only take output
431 m
.d
.sync
+= self
.out_a
.copy(self
.dmod
.out_a
)
432 m
.d
.sync
+= self
.out_b
.copy(self
.dmod
.out_b
)
435 class FPAddDeNormMod(FPState
):
437 def __init__(self
, width
):
438 self
.in_a
= FPNumBase(width
)
439 self
.in_b
= FPNumBase(width
)
440 self
.out_a
= FPNumBase(width
)
441 self
.out_b
= FPNumBase(width
)
443 def setup(self
, m
, in_a
, in_b
):
444 """ links module to inputs and outputs
446 m
.submodules
.denormalise
= self
447 m
.d
.comb
+= self
.in_a
.copy(in_a
)
448 m
.d
.comb
+= self
.in_b
.copy(in_b
)
450 def elaborate(self
, platform
):
452 m
.submodules
.denorm_in_a
= self
.in_a
453 m
.submodules
.denorm_in_b
= self
.in_b
454 m
.submodules
.denorm_out_a
= self
.out_a
455 m
.submodules
.denorm_out_b
= self
.out_b
456 # hmmm, don't like repeating identical code
457 m
.d
.comb
+= self
.out_a
.copy(self
.in_a
)
458 with m
.If(self
.in_a
.exp_n127
):
459 m
.d
.comb
+= self
.out_a
.e
.eq(self
.in_a
.N126
) # limit a exponent
461 m
.d
.comb
+= self
.out_a
.m
[-1].eq(1) # set top mantissa bit
463 m
.d
.comb
+= self
.out_b
.copy(self
.in_b
)
464 with m
.If(self
.in_b
.exp_n127
):
465 m
.d
.comb
+= self
.out_b
.e
.eq(self
.in_b
.N126
) # limit a exponent
467 m
.d
.comb
+= self
.out_b
.m
[-1].eq(1) # set top mantissa bit
472 class FPAddDeNorm(FPState
, FPID
):
474 def __init__(self
, width
, id_wid
):
475 FPState
.__init
__(self
, "denormalise")
476 FPID
.__init
__(self
, id_wid
)
477 self
.mod
= FPAddDeNormMod(width
)
478 self
.out_a
= FPNumBase(width
)
479 self
.out_b
= FPNumBase(width
)
481 def setup(self
, m
, in_a
, in_b
, in_mid
):
482 """ links module to inputs and outputs
484 self
.mod
.setup(m
, in_a
, in_b
)
485 if self
.in_mid
is not None:
486 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
490 # Denormalised Number checks
492 m
.d
.sync
+= self
.out_a
.copy(self
.mod
.out_a
)
493 m
.d
.sync
+= self
.out_b
.copy(self
.mod
.out_b
)
496 class FPAddAlignMultiMod(FPState
):
498 def __init__(self
, width
):
499 self
.in_a
= FPNumBase(width
)
500 self
.in_b
= FPNumBase(width
)
501 self
.out_a
= FPNumIn(None, width
)
502 self
.out_b
= FPNumIn(None, width
)
503 self
.exp_eq
= Signal(reset_less
=True)
505 def elaborate(self
, platform
):
506 # This one however (single-cycle) will do the shift
511 m
.submodules
.align_in_a
= self
.in_a
512 m
.submodules
.align_in_b
= self
.in_b
513 m
.submodules
.align_out_a
= self
.out_a
514 m
.submodules
.align_out_b
= self
.out_b
516 # NOTE: this does *not* do single-cycle multi-shifting,
517 # it *STAYS* in the align state until exponents match
519 # exponent of a greater than b: shift b down
520 m
.d
.comb
+= self
.exp_eq
.eq(0)
521 m
.d
.comb
+= self
.out_a
.copy(self
.in_a
)
522 m
.d
.comb
+= self
.out_b
.copy(self
.in_b
)
523 agtb
= Signal(reset_less
=True)
524 altb
= Signal(reset_less
=True)
525 m
.d
.comb
+= agtb
.eq(self
.in_a
.e
> self
.in_b
.e
)
526 m
.d
.comb
+= altb
.eq(self
.in_a
.e
< self
.in_b
.e
)
528 m
.d
.comb
+= self
.out_b
.shift_down(self
.in_b
)
529 # exponent of b greater than a: shift a down
531 m
.d
.comb
+= self
.out_a
.shift_down(self
.in_a
)
532 # exponents equal: move to next stage.
534 m
.d
.comb
+= self
.exp_eq
.eq(1)
538 class FPAddAlignMulti(FPState
, FPID
):
540 def __init__(self
, width
, id_wid
):
541 FPID
.__init
__(self
, id_wid
)
542 FPState
.__init
__(self
, "align")
543 self
.mod
= FPAddAlignMultiMod(width
)
544 self
.out_a
= FPNumIn(None, width
)
545 self
.out_b
= FPNumIn(None, width
)
546 self
.exp_eq
= Signal(reset_less
=True)
548 def setup(self
, m
, in_a
, in_b
, in_mid
):
549 """ links module to inputs and outputs
551 m
.submodules
.align
= self
.mod
552 m
.d
.comb
+= self
.mod
.in_a
.copy(in_a
)
553 m
.d
.comb
+= self
.mod
.in_b
.copy(in_b
)
554 #m.d.comb += self.out_a.copy(self.mod.out_a)
555 #m.d.comb += self.out_b.copy(self.mod.out_b)
556 m
.d
.comb
+= self
.exp_eq
.eq(self
.mod
.exp_eq
)
557 if self
.in_mid
is not None:
558 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
562 m
.d
.sync
+= self
.out_a
.copy(self
.mod
.out_a
)
563 m
.d
.sync
+= self
.out_b
.copy(self
.mod
.out_b
)
564 with m
.If(self
.exp_eq
):
568 class FPAddAlignSingleMod
:
570 def __init__(self
, width
):
572 self
.in_a
= FPNumBase(width
)
573 self
.in_b
= FPNumBase(width
)
574 self
.out_a
= FPNumIn(None, width
)
575 self
.out_b
= FPNumIn(None, width
)
577 def setup(self
, m
, in_a
, in_b
):
578 """ links module to inputs and outputs
580 m
.submodules
.align
= self
581 m
.d
.comb
+= self
.in_a
.copy(in_a
)
582 m
.d
.comb
+= self
.in_b
.copy(in_b
)
584 def elaborate(self
, platform
):
585 """ Aligns A against B or B against A, depending on which has the
586 greater exponent. This is done in a *single* cycle using
587 variable-width bit-shift
589 the shifter used here is quite expensive in terms of gates.
590 Mux A or B in (and out) into temporaries, as only one of them
591 needs to be aligned against the other
595 m
.submodules
.align_in_a
= self
.in_a
596 m
.submodules
.align_in_b
= self
.in_b
597 m
.submodules
.align_out_a
= self
.out_a
598 m
.submodules
.align_out_b
= self
.out_b
600 # temporary (muxed) input and output to be shifted
601 t_inp
= FPNumBase(self
.width
)
602 t_out
= FPNumIn(None, self
.width
)
603 espec
= (len(self
.in_a
.e
), True)
604 msr
= MultiShiftRMerge(self
.in_a
.m_width
, espec
)
605 m
.submodules
.align_t_in
= t_inp
606 m
.submodules
.align_t_out
= t_out
607 m
.submodules
.multishift_r
= msr
609 ediff
= Signal(espec
, reset_less
=True)
610 ediffr
= Signal(espec
, reset_less
=True)
611 tdiff
= Signal(espec
, reset_less
=True)
612 elz
= Signal(reset_less
=True)
613 egz
= Signal(reset_less
=True)
615 # connect multi-shifter to t_inp/out mantissa (and tdiff)
616 m
.d
.comb
+= msr
.inp
.eq(t_inp
.m
)
617 m
.d
.comb
+= msr
.diff
.eq(tdiff
)
618 m
.d
.comb
+= t_out
.m
.eq(msr
.m
)
619 m
.d
.comb
+= t_out
.e
.eq(t_inp
.e
+ tdiff
)
620 m
.d
.comb
+= t_out
.s
.eq(t_inp
.s
)
622 m
.d
.comb
+= ediff
.eq(self
.in_a
.e
- self
.in_b
.e
)
623 m
.d
.comb
+= ediffr
.eq(self
.in_b
.e
- self
.in_a
.e
)
624 m
.d
.comb
+= elz
.eq(self
.in_a
.e
< self
.in_b
.e
)
625 m
.d
.comb
+= egz
.eq(self
.in_a
.e
> self
.in_b
.e
)
627 # default: A-exp == B-exp, A and B untouched (fall through)
628 m
.d
.comb
+= self
.out_a
.copy(self
.in_a
)
629 m
.d
.comb
+= self
.out_b
.copy(self
.in_b
)
630 # only one shifter (muxed)
631 #m.d.comb += t_out.shift_down_multi(tdiff, t_inp)
632 # exponent of a greater than b: shift b down
634 m
.d
.comb
+= [t_inp
.copy(self
.in_b
),
636 self
.out_b
.copy(t_out
),
637 self
.out_b
.s
.eq(self
.in_b
.s
), # whoops forgot sign
639 # exponent of b greater than a: shift a down
641 m
.d
.comb
+= [t_inp
.copy(self
.in_a
),
643 self
.out_a
.copy(t_out
),
644 self
.out_a
.s
.eq(self
.in_a
.s
), # whoops forgot sign
649 class FPAddAlignSingle(FPState
, FPID
):
651 def __init__(self
, width
, id_wid
):
652 FPState
.__init
__(self
, "align")
653 FPID
.__init
__(self
, id_wid
)
654 self
.mod
= FPAddAlignSingleMod(width
)
655 self
.out_a
= FPNumIn(None, width
)
656 self
.out_b
= FPNumIn(None, width
)
658 def setup(self
, m
, in_a
, in_b
, in_mid
):
659 """ links module to inputs and outputs
661 self
.mod
.setup(m
, in_a
, in_b
)
662 if self
.in_mid
is not None:
663 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
667 # NOTE: could be done as comb
668 m
.d
.sync
+= self
.out_a
.copy(self
.mod
.out_a
)
669 m
.d
.sync
+= self
.out_b
.copy(self
.mod
.out_b
)
673 class FPAddAlignSingleAdd(FPState
, FPID
):
675 def __init__(self
, width
, id_wid
):
676 FPState
.__init
__(self
, "align")
677 FPID
.__init
__(self
, id_wid
)
678 self
.mod
= FPAddAlignSingleMod(width
)
679 self
.out_a
= FPNumIn(None, width
)
680 self
.out_b
= FPNumIn(None, width
)
682 self
.a0mod
= FPAddStage0Mod(width
)
683 self
.a0_out_z
= FPNumBase(width
, False)
684 self
.out_tot
= Signal(self
.a0_out_z
.m_width
+ 4, reset_less
=True)
685 self
.a0_out_z
= FPNumBase(width
, False)
687 self
.a1mod
= FPAddStage1Mod(width
)
688 self
.out_z
= FPNumBase(width
, False)
689 self
.out_of
= Overflow()
691 def setup(self
, m
, in_a
, in_b
, in_mid
):
692 """ links module to inputs and outputs
694 self
.mod
.setup(m
, in_a
, in_b
)
695 m
.d
.comb
+= self
.out_a
.copy(self
.mod
.out_a
)
696 m
.d
.comb
+= self
.out_b
.copy(self
.mod
.out_b
)
698 self
.a0mod
.setup(m
, self
.out_a
, self
.out_b
)
699 m
.d
.comb
+= self
.a0_out_z
.copy(self
.a0mod
.out_z
)
700 m
.d
.comb
+= self
.out_tot
.eq(self
.a0mod
.out_tot
)
702 self
.a1mod
.setup(m
, self
.out_tot
, self
.a0_out_z
)
704 if self
.in_mid
is not None:
705 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
709 m
.d
.sync
+= self
.out_of
.copy(self
.a1mod
.out_of
)
710 m
.d
.sync
+= self
.out_z
.copy(self
.a1mod
.out_z
)
711 m
.next
= "normalise_1"
714 class FPAddStage0Mod
:
716 def __init__(self
, width
):
717 self
.in_a
= FPNumBase(width
)
718 self
.in_b
= FPNumBase(width
)
719 self
.in_z
= FPNumBase(width
, False)
720 self
.out_z
= FPNumBase(width
, False)
721 self
.out_tot
= Signal(self
.out_z
.m_width
+ 4, reset_less
=True)
723 def setup(self
, m
, in_a
, in_b
):
724 """ links module to inputs and outputs
726 m
.submodules
.add0
= self
727 m
.d
.comb
+= self
.in_a
.copy(in_a
)
728 m
.d
.comb
+= self
.in_b
.copy(in_b
)
730 def elaborate(self
, platform
):
732 m
.submodules
.add0_in_a
= self
.in_a
733 m
.submodules
.add0_in_b
= self
.in_b
734 m
.submodules
.add0_out_z
= self
.out_z
736 m
.d
.comb
+= self
.out_z
.e
.eq(self
.in_a
.e
)
738 # store intermediate tests (and zero-extended mantissas)
739 seq
= Signal(reset_less
=True)
740 mge
= Signal(reset_less
=True)
741 am0
= Signal(len(self
.in_a
.m
)+1, reset_less
=True)
742 bm0
= Signal(len(self
.in_b
.m
)+1, reset_less
=True)
743 m
.d
.comb
+= [seq
.eq(self
.in_a
.s
== self
.in_b
.s
),
744 mge
.eq(self
.in_a
.m
>= self
.in_b
.m
),
745 am0
.eq(Cat(self
.in_a
.m
, 0)),
746 bm0
.eq(Cat(self
.in_b
.m
, 0))
748 # same-sign (both negative or both positive) add mantissas
751 self
.out_tot
.eq(am0
+ bm0
),
752 self
.out_z
.s
.eq(self
.in_a
.s
)
754 # a mantissa greater than b, use a
757 self
.out_tot
.eq(am0
- bm0
),
758 self
.out_z
.s
.eq(self
.in_a
.s
)
760 # b mantissa greater than a, use b
763 self
.out_tot
.eq(bm0
- am0
),
764 self
.out_z
.s
.eq(self
.in_b
.s
)
769 class FPAddStage0(FPState
, FPID
):
770 """ First stage of add. covers same-sign (add) and subtract
771 special-casing when mantissas are greater or equal, to
772 give greatest accuracy.
775 def __init__(self
, width
, id_wid
):
776 FPState
.__init
__(self
, "add_0")
777 FPID
.__init
__(self
, id_wid
)
778 self
.mod
= FPAddStage0Mod(width
)
779 self
.out_z
= FPNumBase(width
, False)
780 self
.out_tot
= Signal(self
.out_z
.m_width
+ 4, reset_less
=True)
782 def setup(self
, m
, in_a
, in_b
, in_mid
):
783 """ links module to inputs and outputs
785 self
.mod
.setup(m
, in_a
, in_b
)
786 if self
.in_mid
is not None:
787 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
791 # NOTE: these could be done as combinatorial (merge add0+add1)
792 m
.d
.sync
+= self
.out_z
.copy(self
.mod
.out_z
)
793 m
.d
.sync
+= self
.out_tot
.eq(self
.mod
.out_tot
)
797 class FPAddStage1Mod(FPState
):
798 """ Second stage of add: preparation for normalisation.
799 detects when tot sum is too big (tot[27] is kinda a carry bit)
802 def __init__(self
, width
):
803 self
.out_norm
= Signal(reset_less
=True)
804 self
.in_z
= FPNumBase(width
, False)
805 self
.in_tot
= Signal(self
.in_z
.m_width
+ 4, reset_less
=True)
806 self
.out_z
= FPNumBase(width
, False)
807 self
.out_of
= Overflow()
809 def setup(self
, m
, in_tot
, in_z
):
810 """ links module to inputs and outputs
812 m
.submodules
.add1
= self
813 m
.submodules
.add1_out_overflow
= self
.out_of
815 m
.d
.comb
+= self
.in_z
.copy(in_z
)
816 m
.d
.comb
+= self
.in_tot
.eq(in_tot
)
818 def elaborate(self
, platform
):
820 #m.submodules.norm1_in_overflow = self.in_of
821 #m.submodules.norm1_out_overflow = self.out_of
822 #m.submodules.norm1_in_z = self.in_z
823 #m.submodules.norm1_out_z = self.out_z
824 m
.d
.comb
+= self
.out_z
.copy(self
.in_z
)
825 # tot[27] gets set when the sum overflows. shift result down
826 with m
.If(self
.in_tot
[-1]):
828 self
.out_z
.m
.eq(self
.in_tot
[4:]),
829 self
.out_of
.m0
.eq(self
.in_tot
[4]),
830 self
.out_of
.guard
.eq(self
.in_tot
[3]),
831 self
.out_of
.round_bit
.eq(self
.in_tot
[2]),
832 self
.out_of
.sticky
.eq(self
.in_tot
[1] | self
.in_tot
[0]),
833 self
.out_z
.e
.eq(self
.in_z
.e
+ 1)
838 self
.out_z
.m
.eq(self
.in_tot
[3:]),
839 self
.out_of
.m0
.eq(self
.in_tot
[3]),
840 self
.out_of
.guard
.eq(self
.in_tot
[2]),
841 self
.out_of
.round_bit
.eq(self
.in_tot
[1]),
842 self
.out_of
.sticky
.eq(self
.in_tot
[0])
847 class FPAddStage1(FPState
, FPID
):
849 def __init__(self
, width
, id_wid
):
850 FPState
.__init
__(self
, "add_1")
851 FPID
.__init
__(self
, id_wid
)
852 self
.mod
= FPAddStage1Mod(width
)
853 self
.out_z
= FPNumBase(width
, False)
854 self
.out_of
= Overflow()
855 self
.norm_stb
= Signal()
857 def setup(self
, m
, in_tot
, in_z
, in_mid
):
858 """ links module to inputs and outputs
860 self
.mod
.setup(m
, in_tot
, in_z
)
862 m
.d
.sync
+= self
.norm_stb
.eq(0) # sets to zero when not in add1 state
864 if self
.in_mid
is not None:
865 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
869 m
.d
.sync
+= self
.out_of
.copy(self
.mod
.out_of
)
870 m
.d
.sync
+= self
.out_z
.copy(self
.mod
.out_z
)
871 m
.d
.sync
+= self
.norm_stb
.eq(1)
872 m
.next
= "normalise_1"
875 class FPNorm1ModSingle
:
877 def __init__(self
, width
):
879 self
.out_norm
= Signal(reset_less
=True)
880 self
.in_z
= FPNumBase(width
, False)
881 self
.in_of
= Overflow()
882 self
.out_z
= FPNumBase(width
, False)
883 self
.out_of
= Overflow()
885 def setup(self
, m
, in_z
, in_of
, out_z
):
886 """ links module to inputs and outputs
888 m
.submodules
.normalise_1
= self
890 m
.d
.comb
+= self
.in_z
.copy(in_z
)
891 m
.d
.comb
+= self
.in_of
.copy(in_of
)
893 m
.d
.comb
+= out_z
.copy(self
.out_z
)
895 def elaborate(self
, platform
):
898 mwid
= self
.out_z
.m_width
+2
899 pe
= PriorityEncoder(mwid
)
900 m
.submodules
.norm_pe
= pe
902 m
.submodules
.norm1_out_z
= self
.out_z
903 m
.submodules
.norm1_out_overflow
= self
.out_of
904 m
.submodules
.norm1_in_z
= self
.in_z
905 m
.submodules
.norm1_in_overflow
= self
.in_of
907 in_z
= FPNumBase(self
.width
, False)
909 m
.submodules
.norm1_insel_z
= in_z
910 m
.submodules
.norm1_insel_overflow
= in_of
912 espec
= (len(in_z
.e
), True)
913 ediff_n126
= Signal(espec
, reset_less
=True)
914 msr
= MultiShiftRMerge(mwid
, espec
)
915 m
.submodules
.multishift_r
= msr
917 m
.d
.comb
+= in_z
.copy(self
.in_z
)
918 m
.d
.comb
+= in_of
.copy(self
.in_of
)
919 # initialise out from in (overridden below)
920 m
.d
.comb
+= self
.out_z
.copy(in_z
)
921 m
.d
.comb
+= self
.out_of
.copy(in_of
)
922 # normalisation increase/decrease conditions
923 decrease
= Signal(reset_less
=True)
924 increase
= Signal(reset_less
=True)
925 m
.d
.comb
+= decrease
.eq(in_z
.m_msbzero
& in_z
.exp_gt_n126
)
926 m
.d
.comb
+= increase
.eq(in_z
.exp_lt_n126
)
929 # *sigh* not entirely obvious: count leading zeros (clz)
930 # with a PriorityEncoder: to find from the MSB
931 # we reverse the order of the bits.
932 temp_m
= Signal(mwid
, reset_less
=True)
933 temp_s
= Signal(mwid
+1, reset_less
=True)
934 clz
= Signal((len(in_z
.e
), True), reset_less
=True)
935 # make sure that the amount to decrease by does NOT
936 # go below the minimum non-INF/NaN exponent
937 limclz
= Mux(in_z
.exp_sub_n126
> pe
.o
, pe
.o
,
940 # cat round and guard bits back into the mantissa
941 temp_m
.eq(Cat(in_of
.round_bit
, in_of
.guard
, in_z
.m
)),
942 pe
.i
.eq(temp_m
[::-1]), # inverted
943 clz
.eq(limclz
), # count zeros from MSB down
944 temp_s
.eq(temp_m
<< clz
), # shift mantissa UP
945 self
.out_z
.e
.eq(in_z
.e
- clz
), # DECREASE exponent
946 self
.out_z
.m
.eq(temp_s
[2:]), # exclude bits 0&1
947 self
.out_of
.m0
.eq(temp_s
[2]), # copy of mantissa[0]
948 # overflow in bits 0..1: got shifted too (leave sticky)
949 self
.out_of
.guard
.eq(temp_s
[1]), # guard
950 self
.out_of
.round_bit
.eq(temp_s
[0]), # round
953 with m
.Elif(increase
):
954 temp_m
= Signal(mwid
+1, reset_less
=True)
956 temp_m
.eq(Cat(in_of
.sticky
, in_of
.round_bit
, in_of
.guard
,
958 ediff_n126
.eq(in_z
.N126
- in_z
.e
),
959 # connect multi-shifter to inp/out mantissa (and ediff)
961 msr
.diff
.eq(ediff_n126
),
962 self
.out_z
.m
.eq(msr
.m
[3:]),
963 self
.out_of
.m0
.eq(temp_s
[3]), # copy of mantissa[0]
964 # overflow in bits 0..1: got shifted too (leave sticky)
965 self
.out_of
.guard
.eq(temp_s
[2]), # guard
966 self
.out_of
.round_bit
.eq(temp_s
[1]), # round
967 self
.out_of
.sticky
.eq(temp_s
[0]), # sticky
968 self
.out_z
.e
.eq(in_z
.e
+ ediff_n126
),
974 class FPNorm1ModMulti
:
976 def __init__(self
, width
, single_cycle
=True):
978 self
.in_select
= Signal(reset_less
=True)
979 self
.out_norm
= Signal(reset_less
=True)
980 self
.in_z
= FPNumBase(width
, False)
981 self
.in_of
= Overflow()
982 self
.temp_z
= FPNumBase(width
, False)
983 self
.temp_of
= Overflow()
984 self
.out_z
= FPNumBase(width
, False)
985 self
.out_of
= Overflow()
987 def elaborate(self
, platform
):
990 m
.submodules
.norm1_out_z
= self
.out_z
991 m
.submodules
.norm1_out_overflow
= self
.out_of
992 m
.submodules
.norm1_temp_z
= self
.temp_z
993 m
.submodules
.norm1_temp_of
= self
.temp_of
994 m
.submodules
.norm1_in_z
= self
.in_z
995 m
.submodules
.norm1_in_overflow
= self
.in_of
997 in_z
= FPNumBase(self
.width
, False)
999 m
.submodules
.norm1_insel_z
= in_z
1000 m
.submodules
.norm1_insel_overflow
= in_of
1002 # select which of temp or in z/of to use
1003 with m
.If(self
.in_select
):
1004 m
.d
.comb
+= in_z
.copy(self
.in_z
)
1005 m
.d
.comb
+= in_of
.copy(self
.in_of
)
1007 m
.d
.comb
+= in_z
.copy(self
.temp_z
)
1008 m
.d
.comb
+= in_of
.copy(self
.temp_of
)
1009 # initialise out from in (overridden below)
1010 m
.d
.comb
+= self
.out_z
.copy(in_z
)
1011 m
.d
.comb
+= self
.out_of
.copy(in_of
)
1012 # normalisation increase/decrease conditions
1013 decrease
= Signal(reset_less
=True)
1014 increase
= Signal(reset_less
=True)
1015 m
.d
.comb
+= decrease
.eq(in_z
.m_msbzero
& in_z
.exp_gt_n126
)
1016 m
.d
.comb
+= increase
.eq(in_z
.exp_lt_n126
)
1017 m
.d
.comb
+= self
.out_norm
.eq(decrease | increase
) # loop-end
1019 with m
.If(decrease
):
1021 self
.out_z
.e
.eq(in_z
.e
- 1), # DECREASE exponent
1022 self
.out_z
.m
.eq(in_z
.m
<< 1), # shift mantissa UP
1023 self
.out_z
.m
[0].eq(in_of
.guard
), # steal guard (was tot[2])
1024 self
.out_of
.guard
.eq(in_of
.round_bit
), # round (was tot[1])
1025 self
.out_of
.round_bit
.eq(0), # reset round bit
1026 self
.out_of
.m0
.eq(in_of
.guard
),
1029 with m
.Elif(increase
):
1031 self
.out_z
.e
.eq(in_z
.e
+ 1), # INCREASE exponent
1032 self
.out_z
.m
.eq(in_z
.m
>> 1), # shift mantissa DOWN
1033 self
.out_of
.guard
.eq(in_z
.m
[0]),
1034 self
.out_of
.m0
.eq(in_z
.m
[1]),
1035 self
.out_of
.round_bit
.eq(in_of
.guard
),
1036 self
.out_of
.sticky
.eq(in_of
.sticky | in_of
.round_bit
)
1042 class FPNorm1Single(FPState
, FPID
):
1044 def __init__(self
, width
, id_wid
, single_cycle
=True):
1045 FPID
.__init
__(self
, id_wid
)
1046 FPState
.__init
__(self
, "normalise_1")
1047 self
.mod
= FPNorm1ModSingle(width
)
1048 self
.out_norm
= Signal(reset_less
=True)
1049 self
.out_z
= FPNumBase(width
)
1050 self
.out_roundz
= Signal(reset_less
=True)
1052 def setup(self
, m
, in_z
, in_of
, in_mid
):
1053 """ links module to inputs and outputs
1055 self
.mod
.setup(m
, in_z
, in_of
, self
.out_z
)
1057 if self
.in_mid
is not None:
1058 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
1060 def action(self
, m
):
1062 m
.d
.sync
+= self
.out_roundz
.eq(self
.mod
.out_of
.roundz
)
1066 class FPNorm1Multi(FPState
, FPID
):
1068 def __init__(self
, width
, id_wid
):
1069 FPID
.__init
__(self
, id_wid
)
1070 FPState
.__init
__(self
, "normalise_1")
1071 self
.mod
= FPNorm1ModMulti(width
)
1072 self
.stb
= Signal(reset_less
=True)
1073 self
.ack
= Signal(reset
=0, reset_less
=True)
1074 self
.out_norm
= Signal(reset_less
=True)
1075 self
.in_accept
= Signal(reset_less
=True)
1076 self
.temp_z
= FPNumBase(width
)
1077 self
.temp_of
= Overflow()
1078 self
.out_z
= FPNumBase(width
)
1079 self
.out_roundz
= Signal(reset_less
=True)
1081 def setup(self
, m
, in_z
, in_of
, norm_stb
, in_mid
):
1082 """ links module to inputs and outputs
1084 self
.mod
.setup(m
, in_z
, in_of
, norm_stb
,
1085 self
.in_accept
, self
.temp_z
, self
.temp_of
,
1086 self
.out_z
, self
.out_norm
)
1088 m
.d
.comb
+= self
.stb
.eq(norm_stb
)
1089 m
.d
.sync
+= self
.ack
.eq(0) # sets to zero when not in normalise_1 state
1091 if self
.in_mid
is not None:
1092 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
1094 def action(self
, m
):
1096 m
.d
.comb
+= self
.in_accept
.eq((~self
.ack
) & (self
.stb
))
1097 m
.d
.sync
+= self
.temp_of
.copy(self
.mod
.out_of
)
1098 m
.d
.sync
+= self
.temp_z
.copy(self
.out_z
)
1099 with m
.If(self
.out_norm
):
1100 with m
.If(self
.in_accept
):
1105 m
.d
.sync
+= self
.ack
.eq(0)
1107 # normalisation not required (or done).
1109 m
.d
.sync
+= self
.ack
.eq(1)
1110 m
.d
.sync
+= self
.out_roundz
.eq(self
.mod
.out_of
.roundz
)
1113 class FPNormToPack(FPState
, FPID
):
1115 def __init__(self
, width
, id_wid
):
1116 FPID
.__init
__(self
, id_wid
)
1117 FPState
.__init
__(self
, "normalise_1")
1120 def setup(self
, m
, in_z
, in_of
, in_mid
):
1121 """ links module to inputs and outputs
1124 # Normalisation (chained to input in_z+in_of)
1125 nmod
= FPNorm1ModSingle(self
.width
)
1126 n_out_z
= FPNumBase(self
.width
)
1127 n_out_roundz
= Signal(reset_less
=True)
1128 nmod
.setup(m
, in_z
, in_of
, n_out_z
)
1130 # Rounding (chained to normalisation)
1131 rmod
= FPRoundMod(self
.width
)
1132 r_out_z
= FPNumBase(self
.width
)
1133 rmod
.setup(m
, n_out_z
, n_out_roundz
)
1134 m
.d
.comb
+= n_out_roundz
.eq(nmod
.out_of
.roundz
)
1135 m
.d
.comb
+= r_out_z
.copy(rmod
.out_z
)
1137 # Corrections (chained to rounding)
1138 cmod
= FPCorrectionsMod(self
.width
)
1139 c_out_z
= FPNumBase(self
.width
)
1140 cmod
.setup(m
, r_out_z
)
1141 m
.d
.comb
+= c_out_z
.copy(cmod
.out_z
)
1143 # Pack (chained to corrections)
1144 self
.pmod
= FPPackMod(self
.width
)
1145 self
.out_z
= FPNumBase(self
.width
)
1146 self
.pmod
.setup(m
, c_out_z
)
1149 if self
.in_mid
is not None:
1150 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
1152 def action(self
, m
):
1153 self
.idsync(m
) # copies incoming ID to outgoing
1154 m
.d
.sync
+= self
.out_z
.v
.eq(self
.pmod
.out_z
.v
) # outputs packed result
1155 m
.next
= "pack_put_z"
1160 def __init__(self
, width
):
1161 self
.in_roundz
= Signal(reset_less
=True)
1162 self
.in_z
= FPNumBase(width
, False)
1163 self
.out_z
= FPNumBase(width
, False)
1165 def setup(self
, m
, in_z
, roundz
):
1166 m
.submodules
.roundz
= self
1168 m
.d
.comb
+= self
.in_z
.copy(in_z
)
1169 m
.d
.comb
+= self
.in_roundz
.eq(roundz
)
1171 def elaborate(self
, platform
):
1173 m
.d
.comb
+= self
.out_z
.copy(self
.in_z
)
1174 with m
.If(self
.in_roundz
):
1175 m
.d
.comb
+= self
.out_z
.m
.eq(self
.in_z
.m
+ 1) # mantissa rounds up
1176 with m
.If(self
.in_z
.m
== self
.in_z
.m1s
): # all 1s
1177 m
.d
.comb
+= self
.out_z
.e
.eq(self
.in_z
.e
+ 1) # exponent up
1181 class FPRound(FPState
, FPID
):
1183 def __init__(self
, width
, id_wid
):
1184 FPState
.__init
__(self
, "round")
1185 FPID
.__init
__(self
, id_wid
)
1186 self
.mod
= FPRoundMod(width
)
1187 self
.out_z
= FPNumBase(width
)
1189 def setup(self
, m
, in_z
, roundz
, in_mid
):
1190 """ links module to inputs and outputs
1192 self
.mod
.setup(m
, in_z
, roundz
)
1194 if self
.in_mid
is not None:
1195 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
1197 def action(self
, m
):
1199 m
.d
.sync
+= self
.out_z
.copy(self
.mod
.out_z
)
1200 m
.next
= "corrections"
1203 class FPCorrectionsMod
:
1205 def __init__(self
, width
):
1206 self
.in_z
= FPNumOut(width
, False)
1207 self
.out_z
= FPNumOut(width
, False)
1209 def setup(self
, m
, in_z
):
1210 """ links module to inputs and outputs
1212 m
.submodules
.corrections
= self
1213 m
.d
.comb
+= self
.in_z
.copy(in_z
)
1215 def elaborate(self
, platform
):
1217 m
.submodules
.corr_in_z
= self
.in_z
1218 m
.submodules
.corr_out_z
= self
.out_z
1219 m
.d
.comb
+= self
.out_z
.copy(self
.in_z
)
1220 with m
.If(self
.in_z
.is_denormalised
):
1221 m
.d
.comb
+= self
.out_z
.e
.eq(self
.in_z
.N127
)
1225 class FPCorrections(FPState
, FPID
):
1227 def __init__(self
, width
, id_wid
):
1228 FPState
.__init
__(self
, "corrections")
1229 FPID
.__init
__(self
, id_wid
)
1230 self
.mod
= FPCorrectionsMod(width
)
1231 self
.out_z
= FPNumBase(width
)
1233 def setup(self
, m
, in_z
, in_mid
):
1234 """ links module to inputs and outputs
1236 self
.mod
.setup(m
, in_z
)
1237 if self
.in_mid
is not None:
1238 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
1240 def action(self
, m
):
1242 m
.d
.sync
+= self
.out_z
.copy(self
.mod
.out_z
)
1248 def __init__(self
, width
):
1249 self
.in_z
= FPNumOut(width
, False)
1250 self
.out_z
= FPNumOut(width
, False)
1252 def setup(self
, m
, in_z
):
1253 """ links module to inputs and outputs
1255 m
.submodules
.pack
= self
1256 m
.d
.comb
+= self
.in_z
.copy(in_z
)
1258 def elaborate(self
, platform
):
1260 m
.submodules
.pack_in_z
= self
.in_z
1261 with m
.If(self
.in_z
.is_overflowed
):
1262 m
.d
.comb
+= self
.out_z
.inf(self
.in_z
.s
)
1264 m
.d
.comb
+= self
.out_z
.create(self
.in_z
.s
, self
.in_z
.e
, self
.in_z
.m
)
1268 class FPPack(FPState
, FPID
):
1270 def __init__(self
, width
, id_wid
):
1271 FPState
.__init
__(self
, "pack")
1272 FPID
.__init
__(self
, id_wid
)
1273 self
.mod
= FPPackMod(width
)
1274 self
.out_z
= FPNumOut(width
, False)
1276 def setup(self
, m
, in_z
, in_mid
):
1277 """ links module to inputs and outputs
1279 self
.mod
.setup(m
, in_z
)
1280 if self
.in_mid
is not None:
1281 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
1283 def action(self
, m
):
1285 m
.d
.sync
+= self
.out_z
.v
.eq(self
.mod
.out_z
.v
)
1286 m
.next
= "pack_put_z"
1289 class FPPutZ(FPState
):
1291 def __init__(self
, state
, in_z
, out_z
, in_mid
, out_mid
, to_state
=None):
1292 FPState
.__init
__(self
, state
)
1293 if to_state
is None:
1294 to_state
= "get_ops"
1295 self
.to_state
= to_state
1298 self
.in_mid
= in_mid
1299 self
.out_mid
= out_mid
1301 def action(self
, m
):
1302 if self
.in_mid
is not None:
1303 m
.d
.sync
+= self
.out_mid
.eq(self
.in_mid
)
1305 self
.out_z
.v
.eq(self
.in_z
.v
)
1307 with m
.If(self
.out_z
.stb
& self
.out_z
.ack
):
1308 m
.d
.sync
+= self
.out_z
.stb
.eq(0)
1309 m
.next
= self
.to_state
1311 m
.d
.sync
+= self
.out_z
.stb
.eq(1)
1314 class FPPutZIdx(FPState
):
1316 def __init__(self
, state
, in_z
, out_zs
, in_mid
, to_state
=None):
1317 FPState
.__init
__(self
, state
)
1318 if to_state
is None:
1319 to_state
= "get_ops"
1320 self
.to_state
= to_state
1322 self
.out_zs
= out_zs
1323 self
.in_mid
= in_mid
1325 def action(self
, m
):
1326 outz_stb
= Signal(reset_less
=True)
1327 outz_ack
= Signal(reset_less
=True)
1328 m
.d
.comb
+= [outz_stb
.eq(self
.out_zs
[self
.in_mid
].stb
),
1329 outz_ack
.eq(self
.out_zs
[self
.in_mid
].ack
),
1332 self
.out_zs
[self
.in_mid
].v
.eq(self
.in_z
.v
)
1334 with m
.If(outz_stb
& outz_ack
):
1335 m
.d
.sync
+= self
.out_zs
[self
.in_mid
].stb
.eq(0)
1336 m
.next
= self
.to_state
1338 m
.d
.sync
+= self
.out_zs
[self
.in_mid
].stb
.eq(1)
1341 class FPADDBaseMod(FPID
):
1343 def __init__(self
, width
, id_wid
=None, single_cycle
=False, compact
=True):
1346 * width: bit-width of IEEE754. supported: 16, 32, 64
1347 * id_wid: an identifier that is sync-connected to the input
1348 * single_cycle: True indicates each stage to complete in 1 clock
1349 * compact: True indicates a reduced number of stages
1351 FPID
.__init
__(self
, id_wid
)
1353 self
.single_cycle
= single_cycle
1354 self
.compact
= compact
1356 self
.in_t
= Trigger()
1357 self
.in_a
= Signal(width
)
1358 self
.in_b
= Signal(width
)
1359 self
.out_z
= FPOp(width
)
1363 def add_state(self
, state
):
1364 self
.states
.append(state
)
1367 def get_fragment(self
, platform
=None):
1368 """ creates the HDL code-fragment for FPAdd
1371 m
.submodules
.out_z
= self
.out_z
1372 m
.submodules
.in_t
= self
.in_t
1374 self
.get_compact_fragment(m
, platform
)
1376 self
.get_longer_fragment(m
, platform
)
1378 with m
.FSM() as fsm
:
1380 for state
in self
.states
:
1381 with m
.State(state
.state_from
):
1386 def get_longer_fragment(self
, m
, platform
=None):
1388 get
= self
.add_state(FPGet2Op("get_ops", "special_cases",
1389 self
.in_a
, self
.in_b
, self
.width
))
1390 get
.setup(m
, self
.in_a
, self
.in_b
, self
.in_t
.stb
, self
.in_t
.ack
)
1394 sc
= self
.add_state(FPAddSpecialCases(self
.width
, self
.id_wid
))
1395 sc
.setup(m
, a
, b
, self
.in_mid
)
1397 dn
= self
.add_state(FPAddDeNorm(self
.width
, self
.id_wid
))
1398 dn
.setup(m
, a
, b
, sc
.in_mid
)
1400 if self
.single_cycle
:
1401 alm
= self
.add_state(FPAddAlignSingle(self
.width
, self
.id_wid
))
1402 alm
.setup(m
, dn
.out_a
, dn
.out_b
, dn
.in_mid
)
1404 alm
= self
.add_state(FPAddAlignMulti(self
.width
, self
.id_wid
))
1405 alm
.setup(m
, dn
.out_a
, dn
.out_b
, dn
.in_mid
)
1407 add0
= self
.add_state(FPAddStage0(self
.width
, self
.id_wid
))
1408 add0
.setup(m
, alm
.out_a
, alm
.out_b
, alm
.in_mid
)
1410 add1
= self
.add_state(FPAddStage1(self
.width
, self
.id_wid
))
1411 add1
.setup(m
, add0
.out_tot
, add0
.out_z
, add0
.in_mid
)
1413 if self
.single_cycle
:
1414 n1
= self
.add_state(FPNorm1Single(self
.width
, self
.id_wid
))
1415 n1
.setup(m
, add1
.out_z
, add1
.out_of
, add0
.in_mid
)
1417 n1
= self
.add_state(FPNorm1Multi(self
.width
, self
.id_wid
))
1418 n1
.setup(m
, add1
.out_z
, add1
.out_of
, add1
.norm_stb
, add0
.in_mid
)
1420 rn
= self
.add_state(FPRound(self
.width
, self
.id_wid
))
1421 rn
.setup(m
, n1
.out_z
, n1
.out_roundz
, n1
.in_mid
)
1423 cor
= self
.add_state(FPCorrections(self
.width
, self
.id_wid
))
1424 cor
.setup(m
, rn
.out_z
, rn
.in_mid
)
1426 pa
= self
.add_state(FPPack(self
.width
, self
.id_wid
))
1427 pa
.setup(m
, cor
.out_z
, rn
.in_mid
)
1429 ppz
= self
.add_state(FPPutZ("pack_put_z", pa
.out_z
, self
.out_z
,
1430 pa
.in_mid
, self
.out_mid
))
1432 pz
= self
.add_state(FPPutZ("put_z", sc
.out_z
, self
.out_z
,
1433 pa
.in_mid
, self
.out_mid
))
1435 def get_compact_fragment(self
, m
, platform
=None):
1437 get
= self
.add_state(FPGet2Op("get_ops", "special_cases",
1438 self
.in_a
, self
.in_b
, self
.width
))
1439 get
.setup(m
, self
.in_a
, self
.in_b
, self
.in_t
.stb
, self
.in_t
.ack
)
1443 sc
= self
.add_state(FPAddSpecialCasesDeNorm(self
.width
, self
.id_wid
))
1444 sc
.setup(m
, a
, b
, self
.in_mid
)
1446 alm
= self
.add_state(FPAddAlignSingleAdd(self
.width
, self
.id_wid
))
1447 alm
.setup(m
, sc
.out_a
, sc
.out_b
, sc
.in_mid
)
1449 n1
= self
.add_state(FPNormToPack(self
.width
, self
.id_wid
))
1450 n1
.setup(m
, alm
.out_z
, alm
.out_of
, alm
.in_mid
)
1452 ppz
= self
.add_state(FPPutZ("pack_put_z", n1
.out_z
, self
.out_z
,
1453 n1
.in_mid
, self
.out_mid
))
1455 pz
= self
.add_state(FPPutZ("put_z", sc
.out_z
, self
.out_z
,
1456 sc
.in_mid
, self
.out_mid
))
1459 class FPADDBase(FPState
, FPID
):
1461 def __init__(self
, width
, id_wid
=None, single_cycle
=False):
1464 * width: bit-width of IEEE754. supported: 16, 32, 64
1465 * id_wid: an identifier that is sync-connected to the input
1466 * single_cycle: True indicates each stage to complete in 1 clock
1468 FPID
.__init
__(self
, id_wid
)
1469 FPState
.__init
__(self
, "fpadd")
1471 self
.single_cycle
= single_cycle
1472 self
.mod
= FPADDBaseMod(width
, id_wid
, single_cycle
)
1474 self
.in_t
= Trigger()
1475 self
.in_a
= Signal(width
)
1476 self
.in_b
= Signal(width
)
1477 #self.out_z = FPOp(width)
1479 self
.z_done
= Signal(reset_less
=True) # connects to out_z Strobe
1480 self
.in_accept
= Signal(reset_less
=True)
1481 self
.add_stb
= Signal(reset_less
=True)
1482 self
.add_ack
= Signal(reset
=0, reset_less
=True)
1484 def setup(self
, m
, a
, b
, add_stb
, in_mid
, out_z
, out_mid
):
1486 self
.out_mid
= out_mid
1487 m
.d
.comb
+= [self
.in_a
.eq(a
),
1489 self
.mod
.in_a
.eq(self
.in_a
),
1490 self
.mod
.in_b
.eq(self
.in_b
),
1491 self
.in_mid
.eq(in_mid
),
1492 self
.mod
.in_mid
.eq(self
.in_mid
),
1493 self
.z_done
.eq(self
.mod
.out_z
.trigger
),
1494 #self.add_stb.eq(add_stb),
1495 self
.mod
.in_t
.stb
.eq(self
.in_t
.stb
),
1496 self
.in_t
.ack
.eq(self
.mod
.in_t
.ack
),
1497 self
.out_mid
.eq(self
.mod
.out_mid
),
1498 self
.out_z
.v
.eq(self
.mod
.out_z
.v
),
1499 self
.out_z
.stb
.eq(self
.mod
.out_z
.stb
),
1500 self
.mod
.out_z
.ack
.eq(self
.out_z
.ack
),
1503 m
.d
.sync
+= self
.add_stb
.eq(add_stb
)
1504 m
.d
.sync
+= self
.add_ack
.eq(0) # sets to zero when not in active state
1505 m
.d
.sync
+= self
.out_z
.ack
.eq(0) # likewise
1506 #m.d.sync += self.in_t.stb.eq(0)
1508 m
.submodules
.fpadd
= self
.mod
1510 def action(self
, m
):
1512 # in_accept is set on incoming strobe HIGH and ack LOW.
1513 m
.d
.comb
+= self
.in_accept
.eq((~self
.add_ack
) & (self
.add_stb
))
1515 #with m.If(self.in_t.ack):
1516 # m.d.sync += self.in_t.stb.eq(0)
1517 with m
.If(~self
.z_done
):
1518 # not done: test for accepting an incoming operand pair
1519 with m
.If(self
.in_accept
):
1521 self
.add_ack
.eq(1), # acknowledge receipt...
1522 self
.in_t
.stb
.eq(1), # initiate add
1525 m
.d
.sync
+= [self
.add_ack
.eq(0),
1526 self
.in_t
.stb
.eq(0),
1527 self
.out_z
.ack
.eq(1),
1530 # done: acknowledge, and write out id and value
1531 m
.d
.sync
+= [self
.add_ack
.eq(1),
1538 if self
.in_mid
is not None:
1539 m
.d
.sync
+= self
.out_mid
.eq(self
.mod
.out_mid
)
1542 self
.out_z
.v
.eq(self
.mod
.out_z
.v
)
1544 # move to output state on detecting z ack
1545 with m
.If(self
.out_z
.trigger
):
1546 m
.d
.sync
+= self
.out_z
.stb
.eq(0)
1549 m
.d
.sync
+= self
.out_z
.stb
.eq(1)
1552 def __init__(self
, width
, id_wid
):
1554 self
.id_wid
= id_wid
1556 for i
in range(rs_sz
):
1558 out_z
.name
= "out_z_%d" % i
1560 self
.res
= Array(res
)
1561 self
.in_z
= FPOp(width
)
1562 self
.in_mid
= Signal(self
.id_wid
, reset_less
=True)
1564 def setup(self
, m
, in_z
, in_mid
):
1565 m
.d
.comb
+= [self
.in_z
.copy(in_z
),
1566 self
.in_mid
.eq(in_mid
)]
1568 def get_fragment(self
, platform
=None):
1569 """ creates the HDL code-fragment for FPAdd
1572 m
.submodules
.res_in_z
= self
.in_z
1573 m
.submodules
+= self
.res
1585 """ FPADD: stages as follows:
1591 FPAddBase---> FPAddBaseMod
1593 PutZ GetOps->Specials->Align->Add1/2->Norm->Round/Pack->PutZ
1595 FPAddBase is tricky: it is both a stage and *has* stages.
1596 Connection to FPAddBaseMod therefore requires an in stb/ack
1597 and an out stb/ack. Just as with Add1-Norm1 interaction, FPGetOp
1598 needs to be the thing that raises the incoming stb.
1601 def __init__(self
, width
, id_wid
=None, single_cycle
=False, rs_sz
=2):
1604 * width: bit-width of IEEE754. supported: 16, 32, 64
1605 * id_wid: an identifier that is sync-connected to the input
1606 * single_cycle: True indicates each stage to complete in 1 clock
1609 self
.id_wid
= id_wid
1610 self
.single_cycle
= single_cycle
1612 #self.out_z = FPOp(width)
1613 self
.ids
= FPID(id_wid
)
1616 for i
in range(rs_sz
):
1619 in_a
.name
= "in_a_%d" % i
1620 in_b
.name
= "in_b_%d" % i
1621 rs
.append((in_a
, in_b
))
1625 for i
in range(rs_sz
):
1627 out_z
.name
= "out_z_%d" % i
1629 self
.res
= Array(res
)
1633 def add_state(self
, state
):
1634 self
.states
.append(state
)
1637 def get_fragment(self
, platform
=None):
1638 """ creates the HDL code-fragment for FPAdd
1641 m
.submodules
+= self
.rs
1643 in_a
= self
.rs
[0][0]
1644 in_b
= self
.rs
[0][1]
1646 out_z
= FPOp(self
.width
)
1647 out_mid
= Signal(self
.id_wid
, reset_less
=True)
1648 m
.submodules
.out_z
= out_z
1650 geta
= self
.add_state(FPGetOp("get_a", "get_b",
1655 getb
= self
.add_state(FPGetOp("get_b", "fpadd",
1660 ab
= FPADDBase(self
.width
, self
.id_wid
, self
.single_cycle
)
1661 ab
= self
.add_state(ab
)
1662 ab
.setup(m
, a
, b
, getb
.out_decode
, self
.ids
.in_mid
,
1665 pz
= self
.add_state(FPPutZIdx("put_z", ab
.out_z
, self
.res
,
1668 with m
.FSM() as fsm
:
1670 for state
in self
.states
:
1671 with m
.State(state
.state_from
):
1677 if __name__
== "__main__":
1679 alu
= FPADD(width
=32, id_wid
=5, single_cycle
=True)
1680 main(alu
, ports
=alu
.rs
[0][0].ports() + \
1681 alu
.rs
[0][1].ports() + \
1682 alu
.res
[0].ports() + \
1683 [alu
.ids
.in_mid
, alu
.ids
.out_mid
])
1685 alu
= FPADDBase(width
=32, id_wid
=5, single_cycle
=True)
1686 main(alu
, ports
=[alu
.in_a
, alu
.in_b
] + \
1687 alu
.in_t
.ports() + \
1688 alu
.out_z
.ports() + \
1689 [alu
.in_mid
, alu
.out_mid
])
1692 # works... but don't use, just do "python fname.py convert -t v"
1693 #print (verilog.convert(alu, ports=[
1694 # ports=alu.in_a.ports() + \
1695 # alu.in_b.ports() + \
1696 # alu.out_z.ports())