c22f9fbf327f6f960e77803bd8180e421a8ae2d3
[ieee754fpu.git] / src / ieee754 / fcvt / pipeline.py
1 # IEEE Floating Point Adder (Single Precision)
2 # Copyright (C) Jonathan P Dawson 2013
3 # 2013-12-12
4
5 from nmigen import Module, Signal, Cat, Const, Elaboratable
6 from nmigen.cli import main, verilog
7
8 from nmutil.singlepipe import ControlBase
9 from nmutil.concurrentunit import ReservationStations, num_bits
10
11 from ieee754.fpcommon.getop import FPADDBaseData
12 from ieee754.fpcommon.pack import FPPackData
13 from ieee754.fpcommon.normtopack import FPNormToPack
14 from ieee754.fpcommon.postcalc import FPAddStage1Data
15 from ieee754.fpcommon.msbhigh import FPMSBHigh
16
17
18 from nmigen import Module, Signal, Elaboratable
19 from math import log
20
21 from ieee754.fpcommon.fpbase import FPNumIn, FPNumOut, FPNumBaseRecord
22 from ieee754.fpcommon.fpbase import FPState, FPNumBase
23 from ieee754.fpcommon.getop import FPPipeContext
24
25 from ieee754.fpcommon.fpbase import FPNumDecode, FPNumBaseRecord
26 from nmutil.singlepipe import SimpleHandshake, StageChain
27
28 from ieee754.fpcommon.fpbase import FPState
29 from ieee754.pipeline import PipelineSpec
30
31 class SignedOp:
32 def __init__(self):
33 self.signed = Signal(reset_less=True)
34
35 def eq(self, i):
36 return [self.signed.eq(i)]
37
38
39 class FPCVTIntToFloatMod(Elaboratable):
40 """ FP integer conversion: copes with 16/32/64 int to 16/32/64 fp.
41
42 self.ctx.i.op & 0x1 == 0x1 : SIGNED int
43 self.ctx.i.op & 0x1 == 0x0 : UNSIGNED int
44 """
45 def __init__(self, in_pspec, out_pspec):
46 self.in_pspec = in_pspec
47 self.out_pspec = out_pspec
48 self.i = self.ispec()
49 self.o = self.ospec()
50
51 def ispec(self):
52 return FPADDBaseData(self.in_pspec)
53
54 def ospec(self):
55 return FPAddStage1Data(self.out_pspec, e_extra=True)
56
57 def setup(self, m, i):
58 """ links module to inputs and outputs
59 """
60 m.submodules.intconvert = self
61 m.d.comb += self.i.eq(i)
62
63 def process(self, i):
64 return self.o
65
66 def elaborate(self, platform):
67 m = Module()
68
69 #m.submodules.sc_out_z = self.o.z
70
71 # decode: XXX really should move to separate stage
72 print("in_width out", self.in_pspec.width,
73 self.out_pspec.width)
74 print("a1", self.in_pspec.width)
75 z1 = self.o.z
76 print("z1", z1.width, z1.rmw, z1.e_width, z1.e_start, z1.e_end)
77
78 me = self.in_pspec.width
79 mz = self.o.z.rmw
80 ms = mz - me
81 print("ms-me", ms, me, mz)
82
83 # 3 extra bits for guard/round/sticky
84 msb = FPMSBHigh(me+3, z1.e_width)
85 m.submodules.norm_msb = msb
86
87 # signed or unsigned, use operator context
88 signed = Signal(reset_less=True)
89 m.d.comb += signed.eq(self.i.ctx.op[0])
90
91 # copy of mantissa (one less bit if signed)
92 mantissa = Signal(me, reset_less=True)
93
94 # detect signed/unsigned. key case: -ve numbers need inversion
95 # to +ve because the FP sign says if it's -ve or not.
96 with m.If(signed):
97 m.d.comb += z1.s.eq(self.i.a[-1]) # sign in top bit of a
98 with m.If(z1.s):
99 m.d.comb += mantissa.eq(-self.i.a) # invert input if sign -ve
100 with m.Else():
101 m.d.comb += mantissa.eq(self.i.a) # leave as-is
102 with m.Else():
103 m.d.comb += mantissa.eq(self.i.a) # unsigned, use full a
104 m.d.comb += z1.s.eq(0)
105
106 # set input from full INT
107 m.d.comb += msb.m_in.eq(Cat(0, 0, 0, mantissa)) # g/r/s + input
108 m.d.comb += msb.e_in.eq(me) # exp = int width
109
110 if ms < 0:
111 # larger int to smaller FP (uint32/64 -> fp16 most likely)
112 m.d.comb += z1.e.eq(msb.e_out-1)
113 m.d.comb += z1.m[ms-1:].eq(msb.m_out[-mz-1:])
114 else:
115 # smaller int to larger FP
116 m.d.comb += z1.e.eq(msb.e_out)
117 m.d.comb += z1.m[ms:].eq(msb.m_out[3:])
118 m.d.comb += z1.create(z1.s, z1.e, z1.m) # ... here
119
120 # note: post-normalisation actually appears to be capable of
121 # detecting overflow to infinity (FPPackMod). so it's ok to
122 # drop the bits into the mantissa (with a fixed exponent),
123 # do some rounding (which might result in exceeding the
124 # range of the target FP by re-increasing the exponent),
125 # and basically *not* have to do any kind of range-checking
126 # here: just set up guard/round/sticky, drop the INT into the
127 # mantissa, and away we go. XXX TODO: see if FPNormaliseMod
128 # is even necessary. it probably isn't
129
130 # initialise rounding (but only activate if needed)
131 if ms < 0:
132 # larger int to smaller FP (uint32/64 -> fp16 most likely)
133 m.d.comb += self.o.of.guard.eq(msb.m_out[-mz-2])
134 m.d.comb += self.o.of.round_bit.eq(msb.m_out[-mz-3])
135 m.d.comb += self.o.of.sticky.eq(msb.m_out[:-mz-3].bool())
136 m.d.comb += self.o.of.m0.eq(msb.m_out[-mz-1])
137 else:
138 # smaller int to larger FP
139 m.d.comb += self.o.of.guard.eq(msb.m_out[2])
140 m.d.comb += self.o.of.round_bit.eq(msb.m_out[1])
141 m.d.comb += self.o.of.sticky.eq(msb.m_out[:1].bool())
142 m.d.comb += self.o.of.m0.eq(msb.m_out[3])
143
144 # special cases active by default
145 m.d.comb += self.o.out_do_z.eq(1)
146
147 # detect zero
148 with m.If(~self.i.a.bool()):
149 m.d.comb += self.o.z.zero(0)
150 with m.Else():
151 m.d.comb += self.o.out_do_z.eq(0) # activate normalisation
152
153 # copy the context (muxid, operator)
154 m.d.comb += self.o.oz.eq(self.o.z.v)
155 m.d.comb += self.o.ctx.eq(self.i.ctx)
156
157 return m
158
159
160 class FPCVTUpConvertMod(Elaboratable):
161 """ FP up-conversion (lower to higher bitwidth)
162 """
163 def __init__(self, in_pspec, out_pspec):
164 self.in_pspec = in_pspec
165 self.out_pspec = out_pspec
166 self.i = self.ispec()
167 self.o = self.ospec()
168
169 def ispec(self):
170 return FPADDBaseData(self.in_pspec)
171
172 def ospec(self):
173 return FPAddStage1Data(self.out_pspec, e_extra=False)
174
175 def setup(self, m, i):
176 """ links module to inputs and outputs
177 """
178 m.submodules.upconvert = self
179 m.d.comb += self.i.eq(i)
180
181 def process(self, i):
182 return self.o
183
184 def elaborate(self, platform):
185 m = Module()
186
187 #m.submodules.sc_out_z = self.o.z
188
189 # decode: XXX really should move to separate stage
190 print("in_width out", self.in_pspec.width,
191 self.out_pspec.width)
192 a1 = FPNumBaseRecord(self.in_pspec.width, False)
193 print("a1", a1.width, a1.rmw, a1.e_width, a1.e_start, a1.e_end)
194 m.submodules.sc_decode_a = a1 = FPNumDecode(None, a1)
195 m.d.comb += a1.v.eq(self.i.a)
196 z1 = self.o.z
197 print("z1", z1.width, z1.rmw, z1.e_width, z1.e_start, z1.e_end)
198
199 me = a1.rmw
200 ms = self.o.z.rmw - a1.rmw
201 print("ms-me", ms, me, self.o.z.rmw, a1.rmw)
202
203 # conversion can mostly be done manually...
204 m.d.comb += self.o.z.s.eq(a1.s)
205 m.d.comb += self.o.z.e.eq(a1.e)
206 m.d.comb += self.o.z.m[ms:].eq(a1.m)
207 m.d.comb += self.o.z.create(a1.s, a1.e, self.o.z.m) # ... here
208
209 # initialise rounding to all zeros (deactivate)
210 m.d.comb += self.o.of.guard.eq(0)
211 m.d.comb += self.o.of.round_bit.eq(0)
212 m.d.comb += self.o.of.sticky.eq(0)
213 m.d.comb += self.o.of.m0.eq(a1.m[0])
214
215 # most special cases active (except tiny-number normalisation, below)
216 m.d.comb += self.o.out_do_z.eq(1)
217
218 # detect NaN/Inf first
219 with m.If(a1.exp_128):
220 with m.If(~a1.m_zero):
221 m.d.comb += self.o.z.nan(0) # RISC-V wants normalised NaN
222 with m.Else():
223 m.d.comb += self.o.z.inf(a1.s) # RISC-V wants signed INF
224 with m.Else():
225 with m.If(a1.exp_n127):
226 with m.If(~a1.m_zero):
227 m.d.comb += self.o.z.m[ms:].eq(Cat(0, a1.m))
228 m.d.comb += self.o.out_do_z.eq(0) # activate normalisation
229 with m.Else():
230 # RISC-V zero needs actual zero
231 m.d.comb += self.o.z.zero(a1.s)
232
233 # copy the context (muxid, operator)
234 m.d.comb += self.o.oz.eq(self.o.z.v)
235 m.d.comb += self.o.ctx.eq(self.i.ctx)
236
237 return m
238
239
240 class FPCVTDownConvertMod(Elaboratable):
241 """ FP down-conversion (higher to lower bitwidth)
242 """
243 def __init__(self, in_pspec, out_pspec):
244 self.in_pspec = in_pspec
245 self.out_pspec = out_pspec
246 self.i = self.ispec()
247 self.o = self.ospec()
248
249 def ispec(self):
250 return FPADDBaseData(self.in_pspec)
251
252 def ospec(self):
253 return FPAddStage1Data(self.out_pspec, e_extra=True)
254
255 def setup(self, m, i):
256 """ links module to inputs and outputs
257 """
258 m.submodules.downconvert = self
259 m.d.comb += self.i.eq(i)
260
261 def process(self, i):
262 return self.o
263
264 def elaborate(self, platform):
265 m = Module()
266
267 #m.submodules.sc_out_z = self.o.z
268
269 # decode: XXX really should move to separate stage
270 print("in_width out", self.in_pspec.width,
271 self.out_pspec.width)
272 a1 = FPNumBaseRecord(self.in_pspec.width, False)
273 print("a1", a1.width, a1.rmw, a1.e_width, a1.e_start, a1.e_end)
274 m.submodules.sc_decode_a = a1 = FPNumDecode(None, a1)
275 m.d.comb += a1.v.eq(self.i.a)
276 z1 = self.o.z
277 print("z1", z1.width, z1.rmw, z1.e_width, z1.e_start, z1.e_end)
278
279 me = a1.rmw
280 ms = a1.rmw - self.o.z.rmw
281 print("ms-me", ms, me)
282
283 # intermediaries
284 exp_sub_n126 = Signal((a1.e_width, True), reset_less=True)
285 exp_gt127 = Signal(reset_less=True)
286 # constants from z1, at the bit-width of a1.
287 N126 = Const(z1.fp.N126.value, (a1.e_width, True))
288 P127 = Const(z1.fp.P127.value, (a1.e_width, True))
289 m.d.comb += exp_sub_n126.eq(a1.e - N126)
290 m.d.comb += exp_gt127.eq(a1.e > P127)
291
292 # if a zero, return zero (signed)
293 with m.If(a1.exp_n127):
294 m.d.comb += self.o.z.zero(a1.s)
295 m.d.comb += self.o.out_do_z.eq(1)
296
297 # if a range outside z's min range (-126)
298 with m.Elif(exp_sub_n126 < 0):
299 m.d.comb += self.o.of.guard.eq(a1.m[ms-1])
300 m.d.comb += self.o.of.round_bit.eq(a1.m[ms-2])
301 m.d.comb += self.o.of.sticky.eq(a1.m[:ms-2].bool())
302 m.d.comb += self.o.of.m0.eq(a1.m[ms]) # bit of a1
303
304 m.d.comb += self.o.z.s.eq(a1.s)
305 m.d.comb += self.o.z.e.eq(a1.e)
306 m.d.comb += self.o.z.m.eq(a1.m[-self.o.z.rmw-1:])
307 m.d.comb += self.o.z.m[-1].eq(1)
308
309 # if a is inf return inf
310 with m.Elif(a1.is_inf):
311 m.d.comb += self.o.z.inf(a1.s)
312 m.d.comb += self.o.out_do_z.eq(1)
313
314 # if a is NaN return NaN
315 with m.Elif(a1.is_nan):
316 m.d.comb += self.o.z.nan(0)
317 m.d.comb += self.o.out_do_z.eq(1)
318
319 # if a mantissa greater than 127, return inf
320 with m.Elif(exp_gt127):
321 print("inf", self.o.z.inf(a1.s))
322 m.d.comb += self.o.z.inf(a1.s)
323 m.d.comb += self.o.out_do_z.eq(1)
324
325 # ok after all that, anything else should fit fine (whew)
326 with m.Else():
327 m.d.comb += self.o.of.guard.eq(a1.m[ms-1])
328 m.d.comb += self.o.of.round_bit.eq(a1.m[ms-2])
329 m.d.comb += self.o.of.sticky.eq(a1.m[:ms-2].bool())
330 m.d.comb += self.o.of.m0.eq(a1.m[ms]) # bit of a1
331
332 # XXX TODO: this is basically duplicating FPRoundMod. hmmm...
333 print("alen", a1.e_start, z1.fp.N126, N126)
334 print("m1", self.o.z.rmw, a1.m[-self.o.z.rmw-1:])
335 mo = Signal(self.o.z.m_width-1)
336 m.d.comb += mo.eq(a1.m[ms:me])
337 with m.If(self.o.of.roundz):
338 with m.If((~mo == 0)): # all 1s
339 m.d.comb += self.o.z.create(a1.s, a1.e+1, mo+1)
340 with m.Else():
341 m.d.comb += self.o.z.create(a1.s, a1.e, mo+1)
342 with m.Else():
343 m.d.comb += self.o.z.create(a1.s, a1.e, a1.m[-self.o.z.rmw-1:])
344 m.d.comb += self.o.out_do_z.eq(1)
345
346 # copy the context (muxid, operator)
347 m.d.comb += self.o.oz.eq(self.o.z.v)
348 m.d.comb += self.o.ctx.eq(self.i.ctx)
349
350 return m
351
352
353 class FPCVTIntToFloat(FPState):
354 """ Up-conversion
355 """
356
357 def __init__(self, in_width, out_width, id_wid):
358 FPState.__init__(self, "inttofloat")
359 self.mod = FPCVTIntToFloatMod(in_width, out_width)
360 self.out_z = self.mod.ospec()
361 self.out_do_z = Signal(reset_less=True)
362
363 def setup(self, m, i):
364 """ links module to inputs and outputs
365 """
366 self.mod.setup(m, i, self.out_do_z)
367 m.d.sync += self.out_z.v.eq(self.mod.out_z.v) # only take the output
368 m.d.sync += self.out_z.ctx.eq(self.mod.o.ctx) # (and context)
369
370 def action(self, m):
371 self.idsync(m)
372 with m.If(self.out_do_z):
373 m.next = "put_z"
374 with m.Else():
375 m.next = "denormalise"
376
377
378 class FPCVTUpConvert(FPState):
379 """ Up-conversion
380 """
381
382 def __init__(self, in_width, out_width, id_wid):
383 FPState.__init__(self, "upconvert")
384 self.mod = FPCVTUpConvertMod(in_width, out_width)
385 self.out_z = self.mod.ospec()
386 self.out_do_z = Signal(reset_less=True)
387
388 def setup(self, m, i):
389 """ links module to inputs and outputs
390 """
391 self.mod.setup(m, i, self.out_do_z)
392 m.d.sync += self.out_z.v.eq(self.mod.out_z.v) # only take the output
393 m.d.sync += self.out_z.ctx.eq(self.mod.o.ctx) # (and context)
394
395 def action(self, m):
396 self.idsync(m)
397 with m.If(self.out_do_z):
398 m.next = "put_z"
399 with m.Else():
400 m.next = "denormalise"
401
402
403 class FPCVTDownConvert(FPState):
404 """ special cases: NaNs, infs, zeros, denormalised
405 """
406
407 def __init__(self, in_width, out_width, id_wid):
408 FPState.__init__(self, "special_cases")
409 self.mod = FPCVTDownConvertMod(in_width, out_width)
410 self.out_z = self.mod.ospec()
411 self.out_do_z = Signal(reset_less=True)
412
413 def setup(self, m, i):
414 """ links module to inputs and outputs
415 """
416 self.mod.setup(m, i, self.out_do_z)
417 m.d.sync += self.out_z.v.eq(self.mod.out_z.v) # only take the output
418 m.d.sync += self.out_z.ctx.eq(self.mod.o.ctx) # (and context)
419
420 def action(self, m):
421 self.idsync(m)
422 with m.If(self.out_do_z):
423 m.next = "put_z"
424 with m.Else():
425 m.next = "denormalise"
426
427
428 class FPCVTIntToFloatDeNorm(FPState, SimpleHandshake):
429 """ Upconvert
430 """
431
432 def __init__(self, in_pspec, out_pspec):
433 FPState.__init__(self, "inttofloat")
434 sc = FPCVTIntToFloatMod(in_pspec, out_pspec)
435 SimpleHandshake.__init__(self, sc)
436 self.out = self.ospec(None)
437
438
439 class FPCVTUpConvertDeNorm(FPState, SimpleHandshake):
440 """ Upconvert
441 """
442
443 def __init__(self, in_pspec, out_pspec):
444 FPState.__init__(self, "upconvert")
445 sc = FPCVTUpConvertMod(in_pspec, out_pspec)
446 SimpleHandshake.__init__(self, sc)
447 self.out = self.ospec(None)
448
449
450 class FPCVTDownConvertDeNorm(FPState, SimpleHandshake):
451 """ downconvert
452 """
453
454 def __init__(self, in_pspec, out_pspec):
455 FPState.__init__(self, "downconvert")
456 sc = FPCVTDownConvertMod(in_pspec, out_pspec)
457 SimpleHandshake.__init__(self, sc)
458 self.out = self.ospec(None)
459
460
461 class FPCVTIntBasePipe(ControlBase):
462 def __init__(self, in_pspec, out_pspec):
463 ControlBase.__init__(self)
464 self.pipe1 = FPCVTIntToFloatDeNorm(in_pspec, out_pspec)
465 self.pipe2 = FPNormToPack(out_pspec, e_extra=True)
466
467 self._eqs = self.connect([self.pipe1, self.pipe2])
468
469 def elaborate(self, platform):
470 m = ControlBase.elaborate(self, platform)
471 m.submodules.toint = self.pipe1
472 m.submodules.normpack = self.pipe2
473 m.d.comb += self._eqs
474 return m
475
476
477 class FPCVTUpBasePipe(ControlBase):
478 def __init__(self, in_pspec, out_pspec):
479 ControlBase.__init__(self)
480 self.pipe1 = FPCVTUpConvertDeNorm(in_pspec, out_pspec)
481 self.pipe2 = FPNormToPack(out_pspec, e_extra=False)
482
483 self._eqs = self.connect([self.pipe1, self.pipe2])
484
485 def elaborate(self, platform):
486 m = ControlBase.elaborate(self, platform)
487 m.submodules.up = self.pipe1
488 m.submodules.normpack = self.pipe2
489 m.d.comb += self._eqs
490 return m
491
492
493 class FPCVTDownBasePipe(ControlBase):
494 def __init__(self, in_pspec, out_pspec):
495 ControlBase.__init__(self)
496 self.pipe1 = FPCVTDownConvertDeNorm(in_pspec, out_pspec)
497 self.pipe2 = FPNormToPack(out_pspec, e_extra=True)
498
499 self._eqs = self.connect([self.pipe1, self.pipe2])
500
501 def elaborate(self, platform):
502 m = ControlBase.elaborate(self, platform)
503 m.submodules.down = self.pipe1
504 m.submodules.normpack = self.pipe2
505 m.d.comb += self._eqs
506 return m
507
508
509 class FPCVTIntMuxInOut(ReservationStations):
510 """ Reservation-Station version of FPCVT int-to-float pipeline.
511
512 * fan-in on inputs (an array of FPADDBaseData: a,b,mid)
513 * 2-stage multiplier pipeline
514 * fan-out on outputs (an array of FPPackData: z,mid)
515
516 Fan-in and Fan-out are combinatorial.
517 """
518
519 def __init__(self, in_width, out_width, num_rows, op_wid=0):
520 self.op_wid = op_wid
521 self.id_wid = num_bits(in_width)
522 self.out_id_wid = num_bits(out_width)
523
524 self.in_pspec = PipelineSpec(in_width, self.id_wid, self.op_wid)
525 self.out_pspec = PipelineSpec(out_width, self.out_id_wid, op_wid)
526
527 self.alu = FPCVTIntBasePipe(self.in_pspec, self.out_pspec)
528 ReservationStations.__init__(self, num_rows)
529
530 def i_specfn(self):
531 return FPADDBaseData(self.in_pspec)
532
533 def o_specfn(self):
534 return FPPackData(self.out_pspec)
535
536
537 class FPCVTUpMuxInOut(ReservationStations):
538 """ Reservation-Station version of FPCVT up pipeline.
539
540 * fan-in on inputs (an array of FPADDBaseData: a,b,mid)
541 * 2-stage multiplier pipeline
542 * fan-out on outputs (an array of FPPackData: z,mid)
543
544 Fan-in and Fan-out are combinatorial.
545 """
546
547 def __init__(self, in_width, out_width, num_rows, op_wid=0):
548 self.op_wid = op_wid
549 self.id_wid = num_bits(in_width)
550 self.out_id_wid = num_bits(out_width)
551
552 self.in_pspec = PipelineSpec(in_width, self.id_wid, self.op_wid)
553 self.out_pspec = PipelineSpec(out_width, self.out_id_wid, op_wid)
554
555 self.alu = FPCVTUpBasePipe(self.in_pspec, self.out_pspec)
556 ReservationStations.__init__(self, num_rows)
557
558 def i_specfn(self):
559 return FPADDBaseData(self.in_pspec)
560
561 def o_specfn(self):
562 return FPPackData(self.out_pspec)
563
564
565 class FPCVTDownMuxInOut(ReservationStations):
566 """ Reservation-Station version of FPCVT pipeline.
567
568 * fan-in on inputs (an array of FPADDBaseData: a,b,mid)
569 * 2-stage multiplier pipeline
570 * fan-out on outputs (an array of FPPackData: z,mid)
571
572 Fan-in and Fan-out are combinatorial.
573 """
574
575 def __init__(self, in_width, out_width, num_rows, op_wid=0):
576 self.op_wid = op_wid
577 self.id_wid = num_bits(in_width)
578 self.out_id_wid = num_bits(out_width)
579
580 self.in_pspec = PipelineSpec(in_width, self.id_wid, self.op_wid)
581 self.out_pspec = PipelineSpec(out_width, self.out_id_wid, op_wid)
582
583 self.alu = FPCVTDownBasePipe(self.in_pspec, self.out_pspec)
584 ReservationStations.__init__(self, num_rows)
585
586 def i_specfn(self):
587 return FPADDBaseData(self.in_pspec)
588
589 def o_specfn(self):
590 return FPPackData(self.out_pspec)