74ce502d65a8fd3209a3ba211a94ab43631a581d
1 # IEEE Floating Point Adder (Single Precision)
2 # Copyright (C) Jonathan P Dawson 2013
5 from nmigen
import Module
, Signal
, Cat
, Mux
, Array
, Const
6 from nmigen
.lib
.coding
import PriorityEncoder
7 from nmigen
.cli
import main
, verilog
10 from fpbase
import FPNumIn
, FPNumOut
, FPOp
, Overflow
, FPBase
, FPNumBase
11 from fpbase
import MultiShiftRMerge
, Trigger
12 #from fpbase import FPNumShiftMultiRight
15 class FPState(FPBase
):
16 def __init__(self
, state_from
):
17 self
.state_from
= state_from
19 def set_inputs(self
, inputs
):
21 for k
,v
in inputs
.items():
24 def set_outputs(self
, outputs
):
25 self
.outputs
= outputs
26 for k
,v
in outputs
.items():
30 class FPGetSyncOpsMod
:
31 def __init__(self
, width
, num_ops
=2):
33 self
.num_ops
= num_ops
36 for i
in range(num_ops
):
37 inops
.append(Signal(width
, reset_less
=True))
38 outops
.append(Signal(width
, reset_less
=True))
41 self
.stb
= Signal(num_ops
)
43 self
.ready
= Signal(reset_less
=True)
44 self
.out_decode
= Signal(reset_less
=True)
46 def elaborate(self
, platform
):
48 m
.d
.comb
+= self
.ready
.eq(self
.stb
== Const(-1, (self
.num_ops
, False)))
49 m
.d
.comb
+= self
.out_decode
.eq(self
.ack
& self
.ready
)
50 with m
.If(self
.out_decode
):
51 for i
in range(self
.num_ops
):
53 self
.out_op
[i
].eq(self
.in_op
[i
]),
58 return self
.in_op
+ self
.out_op
+ [self
.stb
, self
.ack
]
62 def __init__(self
, width
, num_ops
):
63 Trigger
.__init
__(self
)
65 self
.num_ops
= num_ops
68 for i
in range(num_ops
):
69 res
.append(Signal(width
))
74 for i
in range(self
.num_ops
):
82 def __init__(self
, width
, num_ops
=2, num_rows
=4):
84 self
.num_ops
= num_ops
85 self
.num_rows
= num_rows
86 self
.mmax
= int(log(self
.num_rows
) / log(2))
88 self
.mid
= Signal(self
.mmax
, reset_less
=True) # multiplex id
89 for i
in range(num_rows
):
90 self
.rs
.append(FPGetSyncOpsMod(width
, num_ops
))
91 self
.rs
= Array(self
.rs
)
93 self
.out_op
= FPOps(width
, num_ops
)
95 def elaborate(self
, platform
):
98 pe
= PriorityEncoder(self
.num_rows
)
99 m
.submodules
.selector
= pe
100 m
.submodules
.out_op
= self
.out_op
101 m
.submodules
+= self
.rs
103 # connect priority encoder
105 for i
in range(self
.num_rows
):
106 in_ready
.append(self
.rs
[i
].ready
)
107 m
.d
.comb
+= pe
.i
.eq(Cat(*in_ready
))
108 m
.d
.comb
+= self
.out_op
.stb
.eq(pe
.n
) # strobe-out when encoder active
111 m
.d
.sync
+= self
.mid
.eq(pe
.o
)
112 for j
in range(self
.num_ops
):
113 m
.d
.sync
+= self
.out_op
.v
[j
].eq(self
.rs
[pe
.o
].out_op
[j
])
118 for i
in range(self
.num_rows
):
120 res
+= inop
.in_op
+ [inop
.stb
]
121 return self
.out_op
.ports() + res
+ [self
.mid
]
125 def __init__(self
, width
):
126 self
.in_op
= FPOp(width
)
127 self
.out_op
= Signal(width
)
128 self
.out_decode
= Signal(reset_less
=True)
130 def elaborate(self
, platform
):
132 m
.d
.comb
+= self
.out_decode
.eq((self
.in_op
.ack
) & (self
.in_op
.stb
))
133 m
.submodules
.get_op_in
= self
.in_op
134 #m.submodules.get_op_out = self.out_op
135 with m
.If(self
.out_decode
):
137 self
.out_op
.eq(self
.in_op
.v
),
142 class FPGetOp(FPState
):
146 def __init__(self
, in_state
, out_state
, in_op
, width
):
147 FPState
.__init
__(self
, in_state
)
148 self
.out_state
= out_state
149 self
.mod
= FPGetOpMod(width
)
151 self
.out_op
= Signal(width
)
152 self
.out_decode
= Signal(reset_less
=True)
154 def setup(self
, m
, in_op
):
155 """ links module to inputs and outputs
157 setattr(m
.submodules
, self
.state_from
, self
.mod
)
158 m
.d
.comb
+= self
.mod
.in_op
.copy(in_op
)
159 #m.d.comb += self.out_op.eq(self.mod.out_op)
160 m
.d
.comb
+= self
.out_decode
.eq(self
.mod
.out_decode
)
163 with m
.If(self
.out_decode
):
164 m
.next
= self
.out_state
166 self
.in_op
.ack
.eq(0),
167 self
.out_op
.eq(self
.mod
.out_op
)
170 m
.d
.sync
+= self
.in_op
.ack
.eq(1)
173 class FPGet2OpMod(Trigger
):
174 def __init__(self
, width
):
175 Trigger
.__init
__(self
)
176 self
.in_op1
= Signal(width
, reset_less
=True)
177 self
.in_op2
= Signal(width
, reset_less
=True)
178 self
.out_op1
= FPNumIn(None, width
)
179 self
.out_op2
= FPNumIn(None, width
)
181 def elaborate(self
, platform
):
182 m
= Trigger
.elaborate(self
, platform
)
183 #m.submodules.get_op_in = self.in_op
184 m
.submodules
.get_op1_out
= self
.out_op1
185 m
.submodules
.get_op2_out
= self
.out_op2
186 with m
.If(self
.trigger
):
188 self
.out_op1
.decode(self
.in_op1
),
189 self
.out_op2
.decode(self
.in_op2
),
194 class FPGet2Op(FPState
):
198 def __init__(self
, in_state
, out_state
, in_op1
, in_op2
, width
):
199 FPState
.__init
__(self
, in_state
)
200 self
.out_state
= out_state
201 self
.mod
= FPGet2OpMod(width
)
204 self
.out_op1
= FPNumIn(None, width
)
205 self
.out_op2
= FPNumIn(None, width
)
206 self
.in_stb
= Signal(reset_less
=True)
207 self
.out_ack
= Signal(reset_less
=True)
208 self
.out_decode
= Signal(reset_less
=True)
210 def setup(self
, m
, in_op1
, in_op2
, in_stb
, in_ack
):
211 """ links module to inputs and outputs
213 m
.submodules
.get_ops
= self
.mod
214 m
.d
.comb
+= self
.mod
.in_op1
.eq(in_op1
)
215 m
.d
.comb
+= self
.mod
.in_op2
.eq(in_op2
)
216 m
.d
.comb
+= self
.mod
.stb
.eq(in_stb
)
217 m
.d
.comb
+= self
.out_ack
.eq(self
.mod
.ack
)
218 m
.d
.comb
+= self
.out_decode
.eq(self
.mod
.trigger
)
219 m
.d
.comb
+= in_ack
.eq(self
.mod
.ack
)
222 with m
.If(self
.out_decode
):
223 m
.next
= self
.out_state
226 #self.out_op1.v.eq(self.mod.out_op1.v),
227 #self.out_op2.v.eq(self.mod.out_op2.v),
228 self
.out_op1
.copy(self
.mod
.out_op1
),
229 self
.out_op2
.copy(self
.mod
.out_op2
)
232 m
.d
.sync
+= self
.mod
.ack
.eq(1)
235 class FPAddSpecialCasesMod
:
236 """ special cases: NaNs, infs, zeros, denormalised
237 NOTE: some of these are unique to add. see "Special Operations"
238 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
241 def __init__(self
, width
):
242 self
.in_a
= FPNumBase(width
)
243 self
.in_b
= FPNumBase(width
)
244 self
.out_z
= FPNumOut(width
, False)
245 self
.out_do_z
= Signal(reset_less
=True)
247 def setup(self
, m
, in_a
, in_b
, out_do_z
):
248 """ links module to inputs and outputs
250 m
.submodules
.specialcases
= self
251 m
.d
.comb
+= self
.in_a
.copy(in_a
)
252 m
.d
.comb
+= self
.in_b
.copy(in_b
)
253 m
.d
.comb
+= out_do_z
.eq(self
.out_do_z
)
255 def elaborate(self
, platform
):
258 m
.submodules
.sc_in_a
= self
.in_a
259 m
.submodules
.sc_in_b
= self
.in_b
260 m
.submodules
.sc_out_z
= self
.out_z
263 m
.d
.comb
+= s_nomatch
.eq(self
.in_a
.s
!= self
.in_b
.s
)
266 m
.d
.comb
+= m_match
.eq(self
.in_a
.m
== self
.in_b
.m
)
268 # if a is NaN or b is NaN return NaN
269 with m
.If(self
.in_a
.is_nan | self
.in_b
.is_nan
):
270 m
.d
.comb
+= self
.out_do_z
.eq(1)
271 m
.d
.comb
+= self
.out_z
.nan(0)
273 # XXX WEIRDNESS for FP16 non-canonical NaN handling
276 ## if a is zero and b is NaN return -b
277 #with m.If(a.is_zero & (a.s==0) & b.is_nan):
278 # m.d.comb += self.out_do_z.eq(1)
279 # m.d.comb += z.create(b.s, b.e, Cat(b.m[3:-2], ~b.m[0]))
281 ## if b is zero and a is NaN return -a
282 #with m.Elif(b.is_zero & (b.s==0) & a.is_nan):
283 # m.d.comb += self.out_do_z.eq(1)
284 # m.d.comb += z.create(a.s, a.e, Cat(a.m[3:-2], ~a.m[0]))
286 ## if a is -zero and b is NaN return -b
287 #with m.Elif(a.is_zero & (a.s==1) & b.is_nan):
288 # m.d.comb += self.out_do_z.eq(1)
289 # m.d.comb += z.create(a.s & b.s, b.e, Cat(b.m[3:-2], 1))
291 ## if b is -zero and a is NaN return -a
292 #with m.Elif(b.is_zero & (b.s==1) & a.is_nan):
293 # m.d.comb += self.out_do_z.eq(1)
294 # m.d.comb += z.create(a.s & b.s, a.e, Cat(a.m[3:-2], 1))
296 # if a is inf return inf (or NaN)
297 with m
.Elif(self
.in_a
.is_inf
):
298 m
.d
.comb
+= self
.out_do_z
.eq(1)
299 m
.d
.comb
+= self
.out_z
.inf(self
.in_a
.s
)
300 # if a is inf and signs don't match return NaN
301 with m
.If(self
.in_b
.exp_128
& s_nomatch
):
302 m
.d
.comb
+= self
.out_z
.nan(0)
304 # if b is inf return inf
305 with m
.Elif(self
.in_b
.is_inf
):
306 m
.d
.comb
+= self
.out_do_z
.eq(1)
307 m
.d
.comb
+= self
.out_z
.inf(self
.in_b
.s
)
309 # if a is zero and b zero return signed-a/b
310 with m
.Elif(self
.in_a
.is_zero
& self
.in_b
.is_zero
):
311 m
.d
.comb
+= self
.out_do_z
.eq(1)
312 m
.d
.comb
+= self
.out_z
.create(self
.in_a
.s
& self
.in_b
.s
,
316 # if a is zero return b
317 with m
.Elif(self
.in_a
.is_zero
):
318 m
.d
.comb
+= self
.out_do_z
.eq(1)
319 m
.d
.comb
+= self
.out_z
.create(self
.in_b
.s
, self
.in_b
.e
,
322 # if b is zero return a
323 with m
.Elif(self
.in_b
.is_zero
):
324 m
.d
.comb
+= self
.out_do_z
.eq(1)
325 m
.d
.comb
+= self
.out_z
.create(self
.in_a
.s
, self
.in_a
.e
,
328 # if a equal to -b return zero (+ve zero)
329 with m
.Elif(s_nomatch
& m_match
& (self
.in_a
.e
== self
.in_b
.e
)):
330 m
.d
.comb
+= self
.out_do_z
.eq(1)
331 m
.d
.comb
+= self
.out_z
.zero(0)
333 # Denormalised Number checks
335 m
.d
.comb
+= self
.out_do_z
.eq(0)
341 def __init__(self
, id_wid
):
344 self
.in_mid
= Signal(id_wid
, reset_less
=True)
345 self
.out_mid
= Signal(id_wid
, reset_less
=True)
351 if self
.id_wid
is not None:
352 m
.d
.sync
+= self
.out_mid
.eq(self
.in_mid
)
355 class FPAddSpecialCases(FPState
, FPID
):
356 """ special cases: NaNs, infs, zeros, denormalised
357 NOTE: some of these are unique to add. see "Special Operations"
358 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
361 def __init__(self
, width
, id_wid
):
362 FPState
.__init
__(self
, "special_cases")
363 FPID
.__init
__(self
, id_wid
)
364 self
.mod
= FPAddSpecialCasesMod(width
)
365 self
.out_z
= FPNumOut(width
, False)
366 self
.out_do_z
= Signal(reset_less
=True)
368 def setup(self
, m
, in_a
, in_b
, in_mid
):
369 """ links module to inputs and outputs
371 self
.mod
.setup(m
, in_a
, in_b
, self
.out_do_z
)
372 if self
.in_mid
is not None:
373 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
377 with m
.If(self
.out_do_z
):
378 m
.d
.sync
+= self
.out_z
.v
.eq(self
.mod
.out_z
.v
) # only take the output
381 m
.next
= "denormalise"
384 class FPAddSpecialCasesDeNorm(FPState
, FPID
):
385 """ special cases: NaNs, infs, zeros, denormalised
386 NOTE: some of these are unique to add. see "Special Operations"
387 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
390 def __init__(self
, width
, id_wid
):
391 FPState
.__init
__(self
, "special_cases")
392 FPID
.__init
__(self
, id_wid
)
393 self
.smod
= FPAddSpecialCasesMod(width
)
394 self
.out_z
= FPNumOut(width
, False)
395 self
.out_do_z
= Signal(reset_less
=True)
397 self
.dmod
= FPAddDeNormMod(width
)
398 self
.out_a
= FPNumBase(width
)
399 self
.out_b
= FPNumBase(width
)
401 def setup(self
, m
, in_a
, in_b
, in_mid
):
402 """ links module to inputs and outputs
404 self
.smod
.setup(m
, in_a
, in_b
, self
.out_do_z
)
405 self
.dmod
.setup(m
, in_a
, in_b
)
406 if self
.in_mid
is not None:
407 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
411 with m
.If(self
.out_do_z
):
412 m
.d
.sync
+= self
.out_z
.v
.eq(self
.smod
.out_z
.v
) # only take output
416 m
.d
.sync
+= self
.out_a
.copy(self
.dmod
.out_a
)
417 m
.d
.sync
+= self
.out_b
.copy(self
.dmod
.out_b
)
420 class FPAddDeNormMod(FPState
):
422 def __init__(self
, width
):
423 self
.in_a
= FPNumBase(width
)
424 self
.in_b
= FPNumBase(width
)
425 self
.out_a
= FPNumBase(width
)
426 self
.out_b
= FPNumBase(width
)
428 def setup(self
, m
, in_a
, in_b
):
429 """ links module to inputs and outputs
431 m
.submodules
.denormalise
= self
432 m
.d
.comb
+= self
.in_a
.copy(in_a
)
433 m
.d
.comb
+= self
.in_b
.copy(in_b
)
435 def elaborate(self
, platform
):
437 m
.submodules
.denorm_in_a
= self
.in_a
438 m
.submodules
.denorm_in_b
= self
.in_b
439 m
.submodules
.denorm_out_a
= self
.out_a
440 m
.submodules
.denorm_out_b
= self
.out_b
441 # hmmm, don't like repeating identical code
442 m
.d
.comb
+= self
.out_a
.copy(self
.in_a
)
443 with m
.If(self
.in_a
.exp_n127
):
444 m
.d
.comb
+= self
.out_a
.e
.eq(self
.in_a
.N126
) # limit a exponent
446 m
.d
.comb
+= self
.out_a
.m
[-1].eq(1) # set top mantissa bit
448 m
.d
.comb
+= self
.out_b
.copy(self
.in_b
)
449 with m
.If(self
.in_b
.exp_n127
):
450 m
.d
.comb
+= self
.out_b
.e
.eq(self
.in_b
.N126
) # limit a exponent
452 m
.d
.comb
+= self
.out_b
.m
[-1].eq(1) # set top mantissa bit
457 class FPAddDeNorm(FPState
, FPID
):
459 def __init__(self
, width
, id_wid
):
460 FPState
.__init
__(self
, "denormalise")
461 FPID
.__init
__(self
, id_wid
)
462 self
.mod
= FPAddDeNormMod(width
)
463 self
.out_a
= FPNumBase(width
)
464 self
.out_b
= FPNumBase(width
)
466 def setup(self
, m
, in_a
, in_b
, in_mid
):
467 """ links module to inputs and outputs
469 self
.mod
.setup(m
, in_a
, in_b
)
470 if self
.in_mid
is not None:
471 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
475 # Denormalised Number checks
477 m
.d
.sync
+= self
.out_a
.copy(self
.mod
.out_a
)
478 m
.d
.sync
+= self
.out_b
.copy(self
.mod
.out_b
)
481 class FPAddAlignMultiMod(FPState
):
483 def __init__(self
, width
):
484 self
.in_a
= FPNumBase(width
)
485 self
.in_b
= FPNumBase(width
)
486 self
.out_a
= FPNumIn(None, width
)
487 self
.out_b
= FPNumIn(None, width
)
488 self
.exp_eq
= Signal(reset_less
=True)
490 def elaborate(self
, platform
):
491 # This one however (single-cycle) will do the shift
496 m
.submodules
.align_in_a
= self
.in_a
497 m
.submodules
.align_in_b
= self
.in_b
498 m
.submodules
.align_out_a
= self
.out_a
499 m
.submodules
.align_out_b
= self
.out_b
501 # NOTE: this does *not* do single-cycle multi-shifting,
502 # it *STAYS* in the align state until exponents match
504 # exponent of a greater than b: shift b down
505 m
.d
.comb
+= self
.exp_eq
.eq(0)
506 m
.d
.comb
+= self
.out_a
.copy(self
.in_a
)
507 m
.d
.comb
+= self
.out_b
.copy(self
.in_b
)
508 agtb
= Signal(reset_less
=True)
509 altb
= Signal(reset_less
=True)
510 m
.d
.comb
+= agtb
.eq(self
.in_a
.e
> self
.in_b
.e
)
511 m
.d
.comb
+= altb
.eq(self
.in_a
.e
< self
.in_b
.e
)
513 m
.d
.comb
+= self
.out_b
.shift_down(self
.in_b
)
514 # exponent of b greater than a: shift a down
516 m
.d
.comb
+= self
.out_a
.shift_down(self
.in_a
)
517 # exponents equal: move to next stage.
519 m
.d
.comb
+= self
.exp_eq
.eq(1)
523 class FPAddAlignMulti(FPState
, FPID
):
525 def __init__(self
, width
, id_wid
):
526 FPID
.__init
__(self
, id_wid
)
527 FPState
.__init
__(self
, "align")
528 self
.mod
= FPAddAlignMultiMod(width
)
529 self
.out_a
= FPNumIn(None, width
)
530 self
.out_b
= FPNumIn(None, width
)
531 self
.exp_eq
= Signal(reset_less
=True)
533 def setup(self
, m
, in_a
, in_b
, in_mid
):
534 """ links module to inputs and outputs
536 m
.submodules
.align
= self
.mod
537 m
.d
.comb
+= self
.mod
.in_a
.copy(in_a
)
538 m
.d
.comb
+= self
.mod
.in_b
.copy(in_b
)
539 #m.d.comb += self.out_a.copy(self.mod.out_a)
540 #m.d.comb += self.out_b.copy(self.mod.out_b)
541 m
.d
.comb
+= self
.exp_eq
.eq(self
.mod
.exp_eq
)
542 if self
.in_mid
is not None:
543 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
547 m
.d
.sync
+= self
.out_a
.copy(self
.mod
.out_a
)
548 m
.d
.sync
+= self
.out_b
.copy(self
.mod
.out_b
)
549 with m
.If(self
.exp_eq
):
553 class FPAddAlignSingleMod
:
555 def __init__(self
, width
):
557 self
.in_a
= FPNumBase(width
)
558 self
.in_b
= FPNumBase(width
)
559 self
.out_a
= FPNumIn(None, width
)
560 self
.out_b
= FPNumIn(None, width
)
562 def setup(self
, m
, in_a
, in_b
):
563 """ links module to inputs and outputs
565 m
.submodules
.align
= self
566 m
.d
.comb
+= self
.in_a
.copy(in_a
)
567 m
.d
.comb
+= self
.in_b
.copy(in_b
)
569 def elaborate(self
, platform
):
570 """ Aligns A against B or B against A, depending on which has the
571 greater exponent. This is done in a *single* cycle using
572 variable-width bit-shift
574 the shifter used here is quite expensive in terms of gates.
575 Mux A or B in (and out) into temporaries, as only one of them
576 needs to be aligned against the other
580 m
.submodules
.align_in_a
= self
.in_a
581 m
.submodules
.align_in_b
= self
.in_b
582 m
.submodules
.align_out_a
= self
.out_a
583 m
.submodules
.align_out_b
= self
.out_b
585 # temporary (muxed) input and output to be shifted
586 t_inp
= FPNumBase(self
.width
)
587 t_out
= FPNumIn(None, self
.width
)
588 espec
= (len(self
.in_a
.e
), True)
589 msr
= MultiShiftRMerge(self
.in_a
.m_width
, espec
)
590 m
.submodules
.align_t_in
= t_inp
591 m
.submodules
.align_t_out
= t_out
592 m
.submodules
.multishift_r
= msr
594 ediff
= Signal(espec
, reset_less
=True)
595 ediffr
= Signal(espec
, reset_less
=True)
596 tdiff
= Signal(espec
, reset_less
=True)
597 elz
= Signal(reset_less
=True)
598 egz
= Signal(reset_less
=True)
600 # connect multi-shifter to t_inp/out mantissa (and tdiff)
601 m
.d
.comb
+= msr
.inp
.eq(t_inp
.m
)
602 m
.d
.comb
+= msr
.diff
.eq(tdiff
)
603 m
.d
.comb
+= t_out
.m
.eq(msr
.m
)
604 m
.d
.comb
+= t_out
.e
.eq(t_inp
.e
+ tdiff
)
605 m
.d
.comb
+= t_out
.s
.eq(t_inp
.s
)
607 m
.d
.comb
+= ediff
.eq(self
.in_a
.e
- self
.in_b
.e
)
608 m
.d
.comb
+= ediffr
.eq(self
.in_b
.e
- self
.in_a
.e
)
609 m
.d
.comb
+= elz
.eq(self
.in_a
.e
< self
.in_b
.e
)
610 m
.d
.comb
+= egz
.eq(self
.in_a
.e
> self
.in_b
.e
)
612 # default: A-exp == B-exp, A and B untouched (fall through)
613 m
.d
.comb
+= self
.out_a
.copy(self
.in_a
)
614 m
.d
.comb
+= self
.out_b
.copy(self
.in_b
)
615 # only one shifter (muxed)
616 #m.d.comb += t_out.shift_down_multi(tdiff, t_inp)
617 # exponent of a greater than b: shift b down
619 m
.d
.comb
+= [t_inp
.copy(self
.in_b
),
621 self
.out_b
.copy(t_out
),
622 self
.out_b
.s
.eq(self
.in_b
.s
), # whoops forgot sign
624 # exponent of b greater than a: shift a down
626 m
.d
.comb
+= [t_inp
.copy(self
.in_a
),
628 self
.out_a
.copy(t_out
),
629 self
.out_a
.s
.eq(self
.in_a
.s
), # whoops forgot sign
634 class FPAddAlignSingle(FPState
, FPID
):
636 def __init__(self
, width
, id_wid
):
637 FPState
.__init
__(self
, "align")
638 FPID
.__init
__(self
, id_wid
)
639 self
.mod
= FPAddAlignSingleMod(width
)
640 self
.out_a
= FPNumIn(None, width
)
641 self
.out_b
= FPNumIn(None, width
)
643 def setup(self
, m
, in_a
, in_b
, in_mid
):
644 """ links module to inputs and outputs
646 self
.mod
.setup(m
, in_a
, in_b
)
647 if self
.in_mid
is not None:
648 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
652 # NOTE: could be done as comb
653 m
.d
.sync
+= self
.out_a
.copy(self
.mod
.out_a
)
654 m
.d
.sync
+= self
.out_b
.copy(self
.mod
.out_b
)
658 class FPAddAlignSingleAdd(FPState
, FPID
):
660 def __init__(self
, width
, id_wid
):
661 FPState
.__init
__(self
, "align")
662 FPID
.__init
__(self
, id_wid
)
663 self
.mod
= FPAddAlignSingleMod(width
)
664 self
.out_a
= FPNumIn(None, width
)
665 self
.out_b
= FPNumIn(None, width
)
667 self
.a0mod
= FPAddStage0Mod(width
)
668 self
.a0_out_z
= FPNumBase(width
, False)
669 self
.out_tot
= Signal(self
.a0_out_z
.m_width
+ 4, reset_less
=True)
670 self
.a0_out_z
= FPNumBase(width
, False)
672 self
.a1mod
= FPAddStage1Mod(width
)
673 self
.out_z
= FPNumBase(width
, False)
674 self
.out_of
= Overflow()
676 def setup(self
, m
, in_a
, in_b
, in_mid
):
677 """ links module to inputs and outputs
679 self
.mod
.setup(m
, in_a
, in_b
)
680 m
.d
.comb
+= self
.out_a
.copy(self
.mod
.out_a
)
681 m
.d
.comb
+= self
.out_b
.copy(self
.mod
.out_b
)
683 self
.a0mod
.setup(m
, self
.out_a
, self
.out_b
)
684 m
.d
.comb
+= self
.a0_out_z
.copy(self
.a0mod
.out_z
)
685 m
.d
.comb
+= self
.out_tot
.eq(self
.a0mod
.out_tot
)
687 self
.a1mod
.setup(m
, self
.out_tot
, self
.a0_out_z
)
689 if self
.in_mid
is not None:
690 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
694 m
.d
.sync
+= self
.out_of
.copy(self
.a1mod
.out_of
)
695 m
.d
.sync
+= self
.out_z
.copy(self
.a1mod
.out_z
)
696 m
.next
= "normalise_1"
699 class FPAddStage0Mod
:
701 def __init__(self
, width
):
702 self
.in_a
= FPNumBase(width
)
703 self
.in_b
= FPNumBase(width
)
704 self
.in_z
= FPNumBase(width
, False)
705 self
.out_z
= FPNumBase(width
, False)
706 self
.out_tot
= Signal(self
.out_z
.m_width
+ 4, reset_less
=True)
708 def setup(self
, m
, in_a
, in_b
):
709 """ links module to inputs and outputs
711 m
.submodules
.add0
= self
712 m
.d
.comb
+= self
.in_a
.copy(in_a
)
713 m
.d
.comb
+= self
.in_b
.copy(in_b
)
715 def elaborate(self
, platform
):
717 m
.submodules
.add0_in_a
= self
.in_a
718 m
.submodules
.add0_in_b
= self
.in_b
719 m
.submodules
.add0_out_z
= self
.out_z
721 m
.d
.comb
+= self
.out_z
.e
.eq(self
.in_a
.e
)
723 # store intermediate tests (and zero-extended mantissas)
724 seq
= Signal(reset_less
=True)
725 mge
= Signal(reset_less
=True)
726 am0
= Signal(len(self
.in_a
.m
)+1, reset_less
=True)
727 bm0
= Signal(len(self
.in_b
.m
)+1, reset_less
=True)
728 m
.d
.comb
+= [seq
.eq(self
.in_a
.s
== self
.in_b
.s
),
729 mge
.eq(self
.in_a
.m
>= self
.in_b
.m
),
730 am0
.eq(Cat(self
.in_a
.m
, 0)),
731 bm0
.eq(Cat(self
.in_b
.m
, 0))
733 # same-sign (both negative or both positive) add mantissas
736 self
.out_tot
.eq(am0
+ bm0
),
737 self
.out_z
.s
.eq(self
.in_a
.s
)
739 # a mantissa greater than b, use a
742 self
.out_tot
.eq(am0
- bm0
),
743 self
.out_z
.s
.eq(self
.in_a
.s
)
745 # b mantissa greater than a, use b
748 self
.out_tot
.eq(bm0
- am0
),
749 self
.out_z
.s
.eq(self
.in_b
.s
)
754 class FPAddStage0(FPState
, FPID
):
755 """ First stage of add. covers same-sign (add) and subtract
756 special-casing when mantissas are greater or equal, to
757 give greatest accuracy.
760 def __init__(self
, width
, id_wid
):
761 FPState
.__init
__(self
, "add_0")
762 FPID
.__init
__(self
, id_wid
)
763 self
.mod
= FPAddStage0Mod(width
)
764 self
.out_z
= FPNumBase(width
, False)
765 self
.out_tot
= Signal(self
.out_z
.m_width
+ 4, reset_less
=True)
767 def setup(self
, m
, in_a
, in_b
, in_mid
):
768 """ links module to inputs and outputs
770 self
.mod
.setup(m
, in_a
, in_b
)
771 if self
.in_mid
is not None:
772 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
776 # NOTE: these could be done as combinatorial (merge add0+add1)
777 m
.d
.sync
+= self
.out_z
.copy(self
.mod
.out_z
)
778 m
.d
.sync
+= self
.out_tot
.eq(self
.mod
.out_tot
)
782 class FPAddStage1Mod(FPState
):
783 """ Second stage of add: preparation for normalisation.
784 detects when tot sum is too big (tot[27] is kinda a carry bit)
787 def __init__(self
, width
):
788 self
.out_norm
= Signal(reset_less
=True)
789 self
.in_z
= FPNumBase(width
, False)
790 self
.in_tot
= Signal(self
.in_z
.m_width
+ 4, reset_less
=True)
791 self
.out_z
= FPNumBase(width
, False)
792 self
.out_of
= Overflow()
794 def setup(self
, m
, in_tot
, in_z
):
795 """ links module to inputs and outputs
797 m
.submodules
.add1
= self
798 m
.submodules
.add1_out_overflow
= self
.out_of
800 m
.d
.comb
+= self
.in_z
.copy(in_z
)
801 m
.d
.comb
+= self
.in_tot
.eq(in_tot
)
803 def elaborate(self
, platform
):
805 #m.submodules.norm1_in_overflow = self.in_of
806 #m.submodules.norm1_out_overflow = self.out_of
807 #m.submodules.norm1_in_z = self.in_z
808 #m.submodules.norm1_out_z = self.out_z
809 m
.d
.comb
+= self
.out_z
.copy(self
.in_z
)
810 # tot[27] gets set when the sum overflows. shift result down
811 with m
.If(self
.in_tot
[-1]):
813 self
.out_z
.m
.eq(self
.in_tot
[4:]),
814 self
.out_of
.m0
.eq(self
.in_tot
[4]),
815 self
.out_of
.guard
.eq(self
.in_tot
[3]),
816 self
.out_of
.round_bit
.eq(self
.in_tot
[2]),
817 self
.out_of
.sticky
.eq(self
.in_tot
[1] | self
.in_tot
[0]),
818 self
.out_z
.e
.eq(self
.in_z
.e
+ 1)
823 self
.out_z
.m
.eq(self
.in_tot
[3:]),
824 self
.out_of
.m0
.eq(self
.in_tot
[3]),
825 self
.out_of
.guard
.eq(self
.in_tot
[2]),
826 self
.out_of
.round_bit
.eq(self
.in_tot
[1]),
827 self
.out_of
.sticky
.eq(self
.in_tot
[0])
832 class FPAddStage1(FPState
, FPID
):
834 def __init__(self
, width
, id_wid
):
835 FPState
.__init
__(self
, "add_1")
836 FPID
.__init
__(self
, id_wid
)
837 self
.mod
= FPAddStage1Mod(width
)
838 self
.out_z
= FPNumBase(width
, False)
839 self
.out_of
= Overflow()
840 self
.norm_stb
= Signal()
842 def setup(self
, m
, in_tot
, in_z
, in_mid
):
843 """ links module to inputs and outputs
845 self
.mod
.setup(m
, in_tot
, in_z
)
847 m
.d
.sync
+= self
.norm_stb
.eq(0) # sets to zero when not in add1 state
849 if self
.in_mid
is not None:
850 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
854 m
.d
.sync
+= self
.out_of
.copy(self
.mod
.out_of
)
855 m
.d
.sync
+= self
.out_z
.copy(self
.mod
.out_z
)
856 m
.d
.sync
+= self
.norm_stb
.eq(1)
857 m
.next
= "normalise_1"
860 class FPNorm1ModSingle
:
862 def __init__(self
, width
):
864 self
.out_norm
= Signal(reset_less
=True)
865 self
.in_z
= FPNumBase(width
, False)
866 self
.in_of
= Overflow()
867 self
.out_z
= FPNumBase(width
, False)
868 self
.out_of
= Overflow()
870 def setup(self
, m
, in_z
, in_of
, out_z
):
871 """ links module to inputs and outputs
873 m
.submodules
.normalise_1
= self
875 m
.d
.comb
+= self
.in_z
.copy(in_z
)
876 m
.d
.comb
+= self
.in_of
.copy(in_of
)
878 m
.d
.comb
+= out_z
.copy(self
.out_z
)
880 def elaborate(self
, platform
):
883 mwid
= self
.out_z
.m_width
+2
884 pe
= PriorityEncoder(mwid
)
885 m
.submodules
.norm_pe
= pe
887 m
.submodules
.norm1_out_z
= self
.out_z
888 m
.submodules
.norm1_out_overflow
= self
.out_of
889 m
.submodules
.norm1_in_z
= self
.in_z
890 m
.submodules
.norm1_in_overflow
= self
.in_of
892 in_z
= FPNumBase(self
.width
, False)
894 m
.submodules
.norm1_insel_z
= in_z
895 m
.submodules
.norm1_insel_overflow
= in_of
897 espec
= (len(in_z
.e
), True)
898 ediff_n126
= Signal(espec
, reset_less
=True)
899 msr
= MultiShiftRMerge(mwid
, espec
)
900 m
.submodules
.multishift_r
= msr
902 m
.d
.comb
+= in_z
.copy(self
.in_z
)
903 m
.d
.comb
+= in_of
.copy(self
.in_of
)
904 # initialise out from in (overridden below)
905 m
.d
.comb
+= self
.out_z
.copy(in_z
)
906 m
.d
.comb
+= self
.out_of
.copy(in_of
)
907 # normalisation increase/decrease conditions
908 decrease
= Signal(reset_less
=True)
909 increase
= Signal(reset_less
=True)
910 m
.d
.comb
+= decrease
.eq(in_z
.m_msbzero
& in_z
.exp_gt_n126
)
911 m
.d
.comb
+= increase
.eq(in_z
.exp_lt_n126
)
914 # *sigh* not entirely obvious: count leading zeros (clz)
915 # with a PriorityEncoder: to find from the MSB
916 # we reverse the order of the bits.
917 temp_m
= Signal(mwid
, reset_less
=True)
918 temp_s
= Signal(mwid
+1, reset_less
=True)
919 clz
= Signal((len(in_z
.e
), True), reset_less
=True)
920 # make sure that the amount to decrease by does NOT
921 # go below the minimum non-INF/NaN exponent
922 limclz
= Mux(in_z
.exp_sub_n126
> pe
.o
, pe
.o
,
925 # cat round and guard bits back into the mantissa
926 temp_m
.eq(Cat(in_of
.round_bit
, in_of
.guard
, in_z
.m
)),
927 pe
.i
.eq(temp_m
[::-1]), # inverted
928 clz
.eq(limclz
), # count zeros from MSB down
929 temp_s
.eq(temp_m
<< clz
), # shift mantissa UP
930 self
.out_z
.e
.eq(in_z
.e
- clz
), # DECREASE exponent
931 self
.out_z
.m
.eq(temp_s
[2:]), # exclude bits 0&1
932 self
.out_of
.m0
.eq(temp_s
[2]), # copy of mantissa[0]
933 # overflow in bits 0..1: got shifted too (leave sticky)
934 self
.out_of
.guard
.eq(temp_s
[1]), # guard
935 self
.out_of
.round_bit
.eq(temp_s
[0]), # round
938 with m
.Elif(increase
):
939 temp_m
= Signal(mwid
+1, reset_less
=True)
941 temp_m
.eq(Cat(in_of
.sticky
, in_of
.round_bit
, in_of
.guard
,
943 ediff_n126
.eq(in_z
.N126
- in_z
.e
),
944 # connect multi-shifter to inp/out mantissa (and ediff)
946 msr
.diff
.eq(ediff_n126
),
947 self
.out_z
.m
.eq(msr
.m
[3:]),
948 self
.out_of
.m0
.eq(temp_s
[3]), # copy of mantissa[0]
949 # overflow in bits 0..1: got shifted too (leave sticky)
950 self
.out_of
.guard
.eq(temp_s
[2]), # guard
951 self
.out_of
.round_bit
.eq(temp_s
[1]), # round
952 self
.out_of
.sticky
.eq(temp_s
[0]), # sticky
953 self
.out_z
.e
.eq(in_z
.e
+ ediff_n126
),
959 class FPNorm1ModMulti
:
961 def __init__(self
, width
, single_cycle
=True):
963 self
.in_select
= Signal(reset_less
=True)
964 self
.out_norm
= Signal(reset_less
=True)
965 self
.in_z
= FPNumBase(width
, False)
966 self
.in_of
= Overflow()
967 self
.temp_z
= FPNumBase(width
, False)
968 self
.temp_of
= Overflow()
969 self
.out_z
= FPNumBase(width
, False)
970 self
.out_of
= Overflow()
972 def elaborate(self
, platform
):
975 m
.submodules
.norm1_out_z
= self
.out_z
976 m
.submodules
.norm1_out_overflow
= self
.out_of
977 m
.submodules
.norm1_temp_z
= self
.temp_z
978 m
.submodules
.norm1_temp_of
= self
.temp_of
979 m
.submodules
.norm1_in_z
= self
.in_z
980 m
.submodules
.norm1_in_overflow
= self
.in_of
982 in_z
= FPNumBase(self
.width
, False)
984 m
.submodules
.norm1_insel_z
= in_z
985 m
.submodules
.norm1_insel_overflow
= in_of
987 # select which of temp or in z/of to use
988 with m
.If(self
.in_select
):
989 m
.d
.comb
+= in_z
.copy(self
.in_z
)
990 m
.d
.comb
+= in_of
.copy(self
.in_of
)
992 m
.d
.comb
+= in_z
.copy(self
.temp_z
)
993 m
.d
.comb
+= in_of
.copy(self
.temp_of
)
994 # initialise out from in (overridden below)
995 m
.d
.comb
+= self
.out_z
.copy(in_z
)
996 m
.d
.comb
+= self
.out_of
.copy(in_of
)
997 # normalisation increase/decrease conditions
998 decrease
= Signal(reset_less
=True)
999 increase
= Signal(reset_less
=True)
1000 m
.d
.comb
+= decrease
.eq(in_z
.m_msbzero
& in_z
.exp_gt_n126
)
1001 m
.d
.comb
+= increase
.eq(in_z
.exp_lt_n126
)
1002 m
.d
.comb
+= self
.out_norm
.eq(decrease | increase
) # loop-end
1004 with m
.If(decrease
):
1006 self
.out_z
.e
.eq(in_z
.e
- 1), # DECREASE exponent
1007 self
.out_z
.m
.eq(in_z
.m
<< 1), # shift mantissa UP
1008 self
.out_z
.m
[0].eq(in_of
.guard
), # steal guard (was tot[2])
1009 self
.out_of
.guard
.eq(in_of
.round_bit
), # round (was tot[1])
1010 self
.out_of
.round_bit
.eq(0), # reset round bit
1011 self
.out_of
.m0
.eq(in_of
.guard
),
1014 with m
.Elif(increase
):
1016 self
.out_z
.e
.eq(in_z
.e
+ 1), # INCREASE exponent
1017 self
.out_z
.m
.eq(in_z
.m
>> 1), # shift mantissa DOWN
1018 self
.out_of
.guard
.eq(in_z
.m
[0]),
1019 self
.out_of
.m0
.eq(in_z
.m
[1]),
1020 self
.out_of
.round_bit
.eq(in_of
.guard
),
1021 self
.out_of
.sticky
.eq(in_of
.sticky | in_of
.round_bit
)
1027 class FPNorm1Single(FPState
, FPID
):
1029 def __init__(self
, width
, id_wid
, single_cycle
=True):
1030 FPID
.__init
__(self
, id_wid
)
1031 FPState
.__init
__(self
, "normalise_1")
1032 self
.mod
= FPNorm1ModSingle(width
)
1033 self
.out_norm
= Signal(reset_less
=True)
1034 self
.out_z
= FPNumBase(width
)
1035 self
.out_roundz
= Signal(reset_less
=True)
1037 def setup(self
, m
, in_z
, in_of
, in_mid
):
1038 """ links module to inputs and outputs
1040 self
.mod
.setup(m
, in_z
, in_of
, self
.out_z
)
1042 if self
.in_mid
is not None:
1043 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
1045 def action(self
, m
):
1047 m
.d
.sync
+= self
.out_roundz
.eq(self
.mod
.out_of
.roundz
)
1051 class FPNorm1Multi(FPState
, FPID
):
1053 def __init__(self
, width
, id_wid
):
1054 FPID
.__init
__(self
, id_wid
)
1055 FPState
.__init
__(self
, "normalise_1")
1056 self
.mod
= FPNorm1ModMulti(width
)
1057 self
.stb
= Signal(reset_less
=True)
1058 self
.ack
= Signal(reset
=0, reset_less
=True)
1059 self
.out_norm
= Signal(reset_less
=True)
1060 self
.in_accept
= Signal(reset_less
=True)
1061 self
.temp_z
= FPNumBase(width
)
1062 self
.temp_of
= Overflow()
1063 self
.out_z
= FPNumBase(width
)
1064 self
.out_roundz
= Signal(reset_less
=True)
1066 def setup(self
, m
, in_z
, in_of
, norm_stb
, in_mid
):
1067 """ links module to inputs and outputs
1069 self
.mod
.setup(m
, in_z
, in_of
, norm_stb
,
1070 self
.in_accept
, self
.temp_z
, self
.temp_of
,
1071 self
.out_z
, self
.out_norm
)
1073 m
.d
.comb
+= self
.stb
.eq(norm_stb
)
1074 m
.d
.sync
+= self
.ack
.eq(0) # sets to zero when not in normalise_1 state
1076 if self
.in_mid
is not None:
1077 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
1079 def action(self
, m
):
1081 m
.d
.comb
+= self
.in_accept
.eq((~self
.ack
) & (self
.stb
))
1082 m
.d
.sync
+= self
.temp_of
.copy(self
.mod
.out_of
)
1083 m
.d
.sync
+= self
.temp_z
.copy(self
.out_z
)
1084 with m
.If(self
.out_norm
):
1085 with m
.If(self
.in_accept
):
1090 m
.d
.sync
+= self
.ack
.eq(0)
1092 # normalisation not required (or done).
1094 m
.d
.sync
+= self
.ack
.eq(1)
1095 m
.d
.sync
+= self
.out_roundz
.eq(self
.mod
.out_of
.roundz
)
1098 class FPNormToPack(FPState
, FPID
):
1100 def __init__(self
, width
, id_wid
):
1101 FPID
.__init
__(self
, id_wid
)
1102 FPState
.__init
__(self
, "normalise_1")
1105 def setup(self
, m
, in_z
, in_of
, in_mid
):
1106 """ links module to inputs and outputs
1109 # Normalisation (chained to input in_z+in_of)
1110 nmod
= FPNorm1ModSingle(self
.width
)
1111 n_out_z
= FPNumBase(self
.width
)
1112 n_out_roundz
= Signal(reset_less
=True)
1113 nmod
.setup(m
, in_z
, in_of
, n_out_z
)
1115 # Rounding (chained to normalisation)
1116 rmod
= FPRoundMod(self
.width
)
1117 r_out_z
= FPNumBase(self
.width
)
1118 rmod
.setup(m
, n_out_z
, n_out_roundz
)
1119 m
.d
.comb
+= n_out_roundz
.eq(nmod
.out_of
.roundz
)
1120 m
.d
.comb
+= r_out_z
.copy(rmod
.out_z
)
1122 # Corrections (chained to rounding)
1123 cmod
= FPCorrectionsMod(self
.width
)
1124 c_out_z
= FPNumBase(self
.width
)
1125 cmod
.setup(m
, r_out_z
)
1126 m
.d
.comb
+= c_out_z
.copy(cmod
.out_z
)
1128 # Pack (chained to corrections)
1129 self
.pmod
= FPPackMod(self
.width
)
1130 self
.out_z
= FPNumBase(self
.width
)
1131 self
.pmod
.setup(m
, c_out_z
)
1134 if self
.in_mid
is not None:
1135 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
1137 def action(self
, m
):
1138 self
.idsync(m
) # copies incoming ID to outgoing
1139 m
.d
.sync
+= self
.out_z
.v
.eq(self
.pmod
.out_z
.v
) # outputs packed result
1140 m
.next
= "pack_put_z"
1145 def __init__(self
, width
):
1146 self
.in_roundz
= Signal(reset_less
=True)
1147 self
.in_z
= FPNumBase(width
, False)
1148 self
.out_z
= FPNumBase(width
, False)
1150 def setup(self
, m
, in_z
, roundz
):
1151 m
.submodules
.roundz
= self
1153 m
.d
.comb
+= self
.in_z
.copy(in_z
)
1154 m
.d
.comb
+= self
.in_roundz
.eq(roundz
)
1156 def elaborate(self
, platform
):
1158 m
.d
.comb
+= self
.out_z
.copy(self
.in_z
)
1159 with m
.If(self
.in_roundz
):
1160 m
.d
.comb
+= self
.out_z
.m
.eq(self
.in_z
.m
+ 1) # mantissa rounds up
1161 with m
.If(self
.in_z
.m
== self
.in_z
.m1s
): # all 1s
1162 m
.d
.comb
+= self
.out_z
.e
.eq(self
.in_z
.e
+ 1) # exponent up
1166 class FPRound(FPState
, FPID
):
1168 def __init__(self
, width
, id_wid
):
1169 FPState
.__init
__(self
, "round")
1170 FPID
.__init
__(self
, id_wid
)
1171 self
.mod
= FPRoundMod(width
)
1172 self
.out_z
= FPNumBase(width
)
1174 def setup(self
, m
, in_z
, roundz
, in_mid
):
1175 """ links module to inputs and outputs
1177 self
.mod
.setup(m
, in_z
, roundz
)
1179 if self
.in_mid
is not None:
1180 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
1182 def action(self
, m
):
1184 m
.d
.sync
+= self
.out_z
.copy(self
.mod
.out_z
)
1185 m
.next
= "corrections"
1188 class FPCorrectionsMod
:
1190 def __init__(self
, width
):
1191 self
.in_z
= FPNumOut(width
, False)
1192 self
.out_z
= FPNumOut(width
, False)
1194 def setup(self
, m
, in_z
):
1195 """ links module to inputs and outputs
1197 m
.submodules
.corrections
= self
1198 m
.d
.comb
+= self
.in_z
.copy(in_z
)
1200 def elaborate(self
, platform
):
1202 m
.submodules
.corr_in_z
= self
.in_z
1203 m
.submodules
.corr_out_z
= self
.out_z
1204 m
.d
.comb
+= self
.out_z
.copy(self
.in_z
)
1205 with m
.If(self
.in_z
.is_denormalised
):
1206 m
.d
.comb
+= self
.out_z
.e
.eq(self
.in_z
.N127
)
1210 class FPCorrections(FPState
, FPID
):
1212 def __init__(self
, width
, id_wid
):
1213 FPState
.__init
__(self
, "corrections")
1214 FPID
.__init
__(self
, id_wid
)
1215 self
.mod
= FPCorrectionsMod(width
)
1216 self
.out_z
= FPNumBase(width
)
1218 def setup(self
, m
, in_z
, in_mid
):
1219 """ links module to inputs and outputs
1221 self
.mod
.setup(m
, in_z
)
1222 if self
.in_mid
is not None:
1223 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
1225 def action(self
, m
):
1227 m
.d
.sync
+= self
.out_z
.copy(self
.mod
.out_z
)
1233 def __init__(self
, width
):
1234 self
.in_z
= FPNumOut(width
, False)
1235 self
.out_z
= FPNumOut(width
, False)
1237 def setup(self
, m
, in_z
):
1238 """ links module to inputs and outputs
1240 m
.submodules
.pack
= self
1241 m
.d
.comb
+= self
.in_z
.copy(in_z
)
1243 def elaborate(self
, platform
):
1245 m
.submodules
.pack_in_z
= self
.in_z
1246 with m
.If(self
.in_z
.is_overflowed
):
1247 m
.d
.comb
+= self
.out_z
.inf(self
.in_z
.s
)
1249 m
.d
.comb
+= self
.out_z
.create(self
.in_z
.s
, self
.in_z
.e
, self
.in_z
.m
)
1253 class FPPack(FPState
, FPID
):
1255 def __init__(self
, width
, id_wid
):
1256 FPState
.__init
__(self
, "pack")
1257 FPID
.__init
__(self
, id_wid
)
1258 self
.mod
= FPPackMod(width
)
1259 self
.out_z
= FPNumOut(width
, False)
1261 def setup(self
, m
, in_z
, in_mid
):
1262 """ links module to inputs and outputs
1264 self
.mod
.setup(m
, in_z
)
1265 if self
.in_mid
is not None:
1266 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
1268 def action(self
, m
):
1270 m
.d
.sync
+= self
.out_z
.v
.eq(self
.mod
.out_z
.v
)
1271 m
.next
= "pack_put_z"
1274 class FPPutZ(FPState
):
1276 def __init__(self
, state
, in_z
, out_z
, in_mid
, out_mid
, to_state
=None):
1277 FPState
.__init
__(self
, state
)
1278 if to_state
is None:
1279 to_state
= "get_ops"
1280 self
.to_state
= to_state
1283 self
.in_mid
= in_mid
1284 self
.out_mid
= out_mid
1286 def action(self
, m
):
1287 if self
.in_mid
is not None:
1288 m
.d
.sync
+= self
.out_mid
.eq(self
.in_mid
)
1290 self
.out_z
.v
.eq(self
.in_z
.v
)
1292 with m
.If(self
.out_z
.stb
& self
.out_z
.ack
):
1293 m
.d
.sync
+= self
.out_z
.stb
.eq(0)
1294 m
.next
= self
.to_state
1296 m
.d
.sync
+= self
.out_z
.stb
.eq(1)
1299 class FPPutZIdx(FPState
):
1301 def __init__(self
, state
, in_z
, out_zs
, in_mid
, to_state
=None):
1302 FPState
.__init
__(self
, state
)
1303 if to_state
is None:
1304 to_state
= "get_ops"
1305 self
.to_state
= to_state
1307 self
.out_zs
= out_zs
1308 self
.in_mid
= in_mid
1310 def action(self
, m
):
1311 outz_stb
= Signal(reset_less
=True)
1312 outz_ack
= Signal(reset_less
=True)
1313 m
.d
.comb
+= [outz_stb
.eq(self
.out_zs
[self
.in_mid
].stb
),
1314 outz_ack
.eq(self
.out_zs
[self
.in_mid
].ack
),
1317 self
.out_zs
[self
.in_mid
].v
.eq(self
.in_z
.v
)
1319 with m
.If(outz_stb
& outz_ack
):
1320 m
.d
.sync
+= self
.out_zs
[self
.in_mid
].stb
.eq(0)
1321 m
.next
= self
.to_state
1323 m
.d
.sync
+= self
.out_zs
[self
.in_mid
].stb
.eq(1)
1326 class FPADDBaseMod(FPID
):
1328 def __init__(self
, width
, id_wid
=None, single_cycle
=False, compact
=True):
1331 * width: bit-width of IEEE754. supported: 16, 32, 64
1332 * id_wid: an identifier that is sync-connected to the input
1333 * single_cycle: True indicates each stage to complete in 1 clock
1334 * compact: True indicates a reduced number of stages
1336 FPID
.__init
__(self
, id_wid
)
1338 self
.single_cycle
= single_cycle
1339 self
.compact
= compact
1341 self
.in_t
= Trigger()
1342 self
.in_a
= Signal(width
)
1343 self
.in_b
= Signal(width
)
1344 self
.out_z
= FPOp(width
)
1348 def add_state(self
, state
):
1349 self
.states
.append(state
)
1352 def get_fragment(self
, platform
=None):
1353 """ creates the HDL code-fragment for FPAdd
1356 m
.submodules
.out_z
= self
.out_z
1357 m
.submodules
.in_t
= self
.in_t
1359 self
.get_compact_fragment(m
, platform
)
1361 self
.get_longer_fragment(m
, platform
)
1363 with m
.FSM() as fsm
:
1365 for state
in self
.states
:
1366 with m
.State(state
.state_from
):
1371 def get_longer_fragment(self
, m
, platform
=None):
1373 get
= self
.add_state(FPGet2Op("get_ops", "special_cases",
1374 self
.in_a
, self
.in_b
, self
.width
))
1375 get
.setup(m
, self
.in_a
, self
.in_b
, self
.in_t
.stb
, self
.in_t
.ack
)
1379 sc
= self
.add_state(FPAddSpecialCases(self
.width
, self
.id_wid
))
1380 sc
.setup(m
, a
, b
, self
.in_mid
)
1382 dn
= self
.add_state(FPAddDeNorm(self
.width
, self
.id_wid
))
1383 dn
.setup(m
, a
, b
, sc
.in_mid
)
1385 if self
.single_cycle
:
1386 alm
= self
.add_state(FPAddAlignSingle(self
.width
, self
.id_wid
))
1387 alm
.setup(m
, dn
.out_a
, dn
.out_b
, dn
.in_mid
)
1389 alm
= self
.add_state(FPAddAlignMulti(self
.width
, self
.id_wid
))
1390 alm
.setup(m
, dn
.out_a
, dn
.out_b
, dn
.in_mid
)
1392 add0
= self
.add_state(FPAddStage0(self
.width
, self
.id_wid
))
1393 add0
.setup(m
, alm
.out_a
, alm
.out_b
, alm
.in_mid
)
1395 add1
= self
.add_state(FPAddStage1(self
.width
, self
.id_wid
))
1396 add1
.setup(m
, add0
.out_tot
, add0
.out_z
, add0
.in_mid
)
1398 if self
.single_cycle
:
1399 n1
= self
.add_state(FPNorm1Single(self
.width
, self
.id_wid
))
1400 n1
.setup(m
, add1
.out_z
, add1
.out_of
, add0
.in_mid
)
1402 n1
= self
.add_state(FPNorm1Multi(self
.width
, self
.id_wid
))
1403 n1
.setup(m
, add1
.out_z
, add1
.out_of
, add1
.norm_stb
, add0
.in_mid
)
1405 rn
= self
.add_state(FPRound(self
.width
, self
.id_wid
))
1406 rn
.setup(m
, n1
.out_z
, n1
.out_roundz
, n1
.in_mid
)
1408 cor
= self
.add_state(FPCorrections(self
.width
, self
.id_wid
))
1409 cor
.setup(m
, rn
.out_z
, rn
.in_mid
)
1411 pa
= self
.add_state(FPPack(self
.width
, self
.id_wid
))
1412 pa
.setup(m
, cor
.out_z
, rn
.in_mid
)
1414 ppz
= self
.add_state(FPPutZ("pack_put_z", pa
.out_z
, self
.out_z
,
1415 pa
.in_mid
, self
.out_mid
))
1417 pz
= self
.add_state(FPPutZ("put_z", sc
.out_z
, self
.out_z
,
1418 pa
.in_mid
, self
.out_mid
))
1420 def get_compact_fragment(self
, m
, platform
=None):
1422 get
= self
.add_state(FPGet2Op("get_ops", "special_cases",
1423 self
.in_a
, self
.in_b
, self
.width
))
1424 get
.setup(m
, self
.in_a
, self
.in_b
, self
.in_t
.stb
, self
.in_t
.ack
)
1428 sc
= self
.add_state(FPAddSpecialCasesDeNorm(self
.width
, self
.id_wid
))
1429 sc
.setup(m
, a
, b
, self
.in_mid
)
1431 alm
= self
.add_state(FPAddAlignSingleAdd(self
.width
, self
.id_wid
))
1432 alm
.setup(m
, sc
.out_a
, sc
.out_b
, sc
.in_mid
)
1434 n1
= self
.add_state(FPNormToPack(self
.width
, self
.id_wid
))
1435 n1
.setup(m
, alm
.out_z
, alm
.out_of
, alm
.in_mid
)
1437 ppz
= self
.add_state(FPPutZ("pack_put_z", n1
.out_z
, self
.out_z
,
1438 n1
.in_mid
, self
.out_mid
))
1440 pz
= self
.add_state(FPPutZ("put_z", sc
.out_z
, self
.out_z
,
1441 sc
.in_mid
, self
.out_mid
))
1444 class FPADDBase(FPState
, FPID
):
1446 def __init__(self
, width
, id_wid
=None, single_cycle
=False):
1449 * width: bit-width of IEEE754. supported: 16, 32, 64
1450 * id_wid: an identifier that is sync-connected to the input
1451 * single_cycle: True indicates each stage to complete in 1 clock
1453 FPID
.__init
__(self
, id_wid
)
1454 FPState
.__init
__(self
, "fpadd")
1456 self
.single_cycle
= single_cycle
1457 self
.mod
= FPADDBaseMod(width
, id_wid
, single_cycle
)
1459 self
.in_t
= Trigger()
1460 self
.in_a
= Signal(width
)
1461 self
.in_b
= Signal(width
)
1462 #self.out_z = FPOp(width)
1464 self
.z_done
= Signal(reset_less
=True) # connects to out_z Strobe
1465 self
.in_accept
= Signal(reset_less
=True)
1466 self
.add_stb
= Signal(reset_less
=True)
1467 self
.add_ack
= Signal(reset
=0, reset_less
=True)
1469 def setup(self
, m
, a
, b
, add_stb
, in_mid
, out_z
, out_mid
):
1471 self
.out_mid
= out_mid
1472 m
.d
.comb
+= [self
.in_a
.eq(a
),
1474 self
.mod
.in_a
.eq(self
.in_a
),
1475 self
.mod
.in_b
.eq(self
.in_b
),
1476 self
.in_mid
.eq(in_mid
),
1477 self
.mod
.in_mid
.eq(self
.in_mid
),
1478 self
.z_done
.eq(self
.mod
.out_z
.trigger
),
1479 #self.add_stb.eq(add_stb),
1480 self
.mod
.in_t
.stb
.eq(self
.in_t
.stb
),
1481 self
.in_t
.ack
.eq(self
.mod
.in_t
.ack
),
1482 self
.out_mid
.eq(self
.mod
.out_mid
),
1483 self
.out_z
.v
.eq(self
.mod
.out_z
.v
),
1484 self
.out_z
.stb
.eq(self
.mod
.out_z
.stb
),
1485 self
.mod
.out_z
.ack
.eq(self
.out_z
.ack
),
1488 m
.d
.sync
+= self
.add_stb
.eq(add_stb
)
1489 m
.d
.sync
+= self
.add_ack
.eq(0) # sets to zero when not in active state
1490 m
.d
.sync
+= self
.out_z
.ack
.eq(0) # likewise
1491 #m.d.sync += self.in_t.stb.eq(0)
1493 m
.submodules
.fpadd
= self
.mod
1495 def action(self
, m
):
1497 # in_accept is set on incoming strobe HIGH and ack LOW.
1498 m
.d
.comb
+= self
.in_accept
.eq((~self
.add_ack
) & (self
.add_stb
))
1500 #with m.If(self.in_t.ack):
1501 # m.d.sync += self.in_t.stb.eq(0)
1502 with m
.If(~self
.z_done
):
1503 # not done: test for accepting an incoming operand pair
1504 with m
.If(self
.in_accept
):
1506 self
.add_ack
.eq(1), # acknowledge receipt...
1507 self
.in_t
.stb
.eq(1), # initiate add
1510 m
.d
.sync
+= [self
.add_ack
.eq(0),
1511 self
.in_t
.stb
.eq(0),
1512 self
.out_z
.ack
.eq(1),
1515 # done: acknowledge, and write out id and value
1516 m
.d
.sync
+= [self
.add_ack
.eq(1),
1523 if self
.in_mid
is not None:
1524 m
.d
.sync
+= self
.out_mid
.eq(self
.mod
.out_mid
)
1527 self
.out_z
.v
.eq(self
.mod
.out_z
.v
)
1529 # move to output state on detecting z ack
1530 with m
.If(self
.out_z
.trigger
):
1531 m
.d
.sync
+= self
.out_z
.stb
.eq(0)
1534 m
.d
.sync
+= self
.out_z
.stb
.eq(1)
1537 def __init__(self
, width
, id_wid
):
1539 self
.id_wid
= id_wid
1541 for i
in range(rs_sz
):
1543 out_z
.name
= "out_z_%d" % i
1545 self
.res
= Array(res
)
1546 self
.in_z
= FPOp(width
)
1547 self
.in_mid
= Signal(self
.id_wid
, reset_less
=True)
1549 def setup(self
, m
, in_z
, in_mid
):
1550 m
.d
.comb
+= [self
.in_z
.copy(in_z
),
1551 self
.in_mid
.eq(in_mid
)]
1553 def get_fragment(self
, platform
=None):
1554 """ creates the HDL code-fragment for FPAdd
1557 m
.submodules
.res_in_z
= self
.in_z
1558 m
.submodules
+= self
.res
1570 """ FPADD: stages as follows:
1576 FPAddBase---> FPAddBaseMod
1578 PutZ GetOps->Specials->Align->Add1/2->Norm->Round/Pack->PutZ
1580 FPAddBase is tricky: it is both a stage and *has* stages.
1581 Connection to FPAddBaseMod therefore requires an in stb/ack
1582 and an out stb/ack. Just as with Add1-Norm1 interaction, FPGetOp
1583 needs to be the thing that raises the incoming stb.
1586 def __init__(self
, width
, id_wid
=None, single_cycle
=False, rs_sz
=2):
1589 * width: bit-width of IEEE754. supported: 16, 32, 64
1590 * id_wid: an identifier that is sync-connected to the input
1591 * single_cycle: True indicates each stage to complete in 1 clock
1594 self
.id_wid
= id_wid
1595 self
.single_cycle
= single_cycle
1597 #self.out_z = FPOp(width)
1598 self
.ids
= FPID(id_wid
)
1601 for i
in range(rs_sz
):
1604 in_a
.name
= "in_a_%d" % i
1605 in_b
.name
= "in_b_%d" % i
1606 rs
.append((in_a
, in_b
))
1610 for i
in range(rs_sz
):
1612 out_z
.name
= "out_z_%d" % i
1614 self
.res
= Array(res
)
1618 def add_state(self
, state
):
1619 self
.states
.append(state
)
1622 def get_fragment(self
, platform
=None):
1623 """ creates the HDL code-fragment for FPAdd
1626 m
.submodules
+= self
.rs
1628 in_a
= self
.rs
[0][0]
1629 in_b
= self
.rs
[0][1]
1631 out_z
= FPOp(self
.width
)
1632 out_mid
= Signal(self
.id_wid
, reset_less
=True)
1633 m
.submodules
.out_z
= out_z
1635 geta
= self
.add_state(FPGetOp("get_a", "get_b",
1640 getb
= self
.add_state(FPGetOp("get_b", "fpadd",
1645 ab
= FPADDBase(self
.width
, self
.id_wid
, self
.single_cycle
)
1646 ab
= self
.add_state(ab
)
1647 ab
.setup(m
, a
, b
, getb
.out_decode
, self
.ids
.in_mid
,
1650 pz
= self
.add_state(FPPutZIdx("put_z", ab
.out_z
, self
.res
,
1653 with m
.FSM() as fsm
:
1655 for state
in self
.states
:
1656 with m
.State(state
.state_from
):
1662 if __name__
== "__main__":
1664 alu
= FPADD(width
=32, id_wid
=5, single_cycle
=True)
1665 main(alu
, ports
=alu
.rs
[0][0].ports() + \
1666 alu
.rs
[0][1].ports() + \
1667 alu
.res
[0].ports() + \
1668 [alu
.ids
.in_mid
, alu
.ids
.out_mid
])
1670 alu
= FPADDBase(width
=32, id_wid
=5, single_cycle
=True)
1671 main(alu
, ports
=[alu
.in_a
, alu
.in_b
] + \
1672 alu
.in_t
.ports() + \
1673 alu
.out_z
.ports() + \
1674 [alu
.in_mid
, alu
.out_mid
])
1677 # works... but don't use, just do "python fname.py convert -t v"
1678 #print (verilog.convert(alu, ports=[
1679 # ports=alu.in_a.ports() + \
1680 # alu.in_b.ports() + \
1681 # alu.out_z.ports())