hilarious: fp upconvert of zero was wrong
[ieee754fpu.git] / src / ieee754 / fcvt / pipeline.py
1 # IEEE Floating Point Adder (Single Precision)
2 # Copyright (C) Jonathan P Dawson 2013
3 # 2013-12-12
4
5 from nmigen import Module, Signal, Cat, Const, Elaboratable
6 from nmigen.cli import main, verilog
7
8 from nmutil.singlepipe import ControlBase
9 from nmutil.concurrentunit import ReservationStations, num_bits
10
11 from ieee754.fpcommon.getop import FPADDBaseData
12 from ieee754.fpcommon.pack import FPPackData
13 from ieee754.fpcommon.normtopack import FPNormToPack
14 from ieee754.fpcommon.postcalc import FPAddStage1Data
15
16
17 from nmigen import Module, Signal, Elaboratable
18 from math import log
19
20 from ieee754.fpcommon.fpbase import FPNumIn, FPNumOut, FPNumBaseRecord
21 from ieee754.fpcommon.fpbase import FPState, FPNumBase
22 from ieee754.fpcommon.getop import FPPipeContext
23
24 from ieee754.fpcommon.fpbase import FPNumDecode, FPNumBaseRecord
25 from nmutil.singlepipe import SimpleHandshake, StageChain
26
27 from ieee754.fpcommon.fpbase import FPState
28 from ieee754.pipeline import PipelineSpec
29
30
31 class FPCVTUpConvertMod(Elaboratable):
32 """ FP up-conversion (lower to higher bitwidth)
33 """
34 def __init__(self, in_pspec, out_pspec):
35 self.in_pspec = in_pspec
36 self.out_pspec = out_pspec
37 self.i = self.ispec()
38 self.o = self.ospec()
39
40 def ispec(self):
41 return FPADDBaseData(self.in_pspec)
42
43 def ospec(self):
44 return FPAddStage1Data(self.out_pspec, e_extra=False)
45
46 def setup(self, m, i):
47 """ links module to inputs and outputs
48 """
49 m.submodules.upconvert = self
50 m.d.comb += self.i.eq(i)
51
52 def process(self, i):
53 return self.o
54
55 def elaborate(self, platform):
56 m = Module()
57
58 #m.submodules.sc_out_z = self.o.z
59
60 # decode: XXX really should move to separate stage
61 print("in_width out", self.in_pspec.width,
62 self.out_pspec.width)
63 a1 = FPNumBaseRecord(self.in_pspec.width, False)
64 print("a1", a1.width, a1.rmw, a1.e_width, a1.e_start, a1.e_end)
65 m.submodules.sc_decode_a = a1 = FPNumDecode(None, a1)
66 m.d.comb += a1.v.eq(self.i.a)
67 z1 = self.o.z
68 print("z1", z1.width, z1.rmw, z1.e_width, z1.e_start, z1.e_end)
69
70 me = a1.rmw
71 ms = self.o.z.rmw - a1.rmw
72 print("ms-me", ms, me, self.o.z.rmw, a1.rmw)
73
74 # intermediaries
75 exp_sub_n126 = Signal((a1.e_width, True), reset_less=True)
76 exp_gt127 = Signal(reset_less=True)
77 # constants from z1, at the bit-width of a1.
78 N126 = Const(z1.fp.N126.value, (a1.e_width, True))
79 P127 = Const(z1.fp.P127.value, (a1.e_width, True))
80 m.d.comb += exp_sub_n126.eq(a1.e - N126)
81 m.d.comb += exp_gt127.eq(a1.e > P127)
82
83 m.d.comb += self.o.z.s.eq(a1.s)
84 m.d.comb += self.o.z.e.eq(a1.e)
85 m.d.comb += self.o.z.m[ms:].eq(a1.m)
86 m.d.comb += self.o.z.create(a1.s, a1.e, self.o.z.m)
87
88 m.d.comb += self.o.of.guard.eq(0)
89 m.d.comb += self.o.of.round_bit.eq(0)
90 m.d.comb += self.o.of.sticky.eq(0)
91 m.d.comb += self.o.of.m0.eq(a1.m[0])
92
93 m.d.comb += self.o.out_do_z.eq(1)
94 # if exp == top
95 with m.If(a1.exp_128):
96 with m.If(~a1.m_zero):
97 #m.d.comb += self.o.z.create(a1.s, self.o.z.P128, self.o.z.m)
98 m.d.comb += self.o.z.nan(0)
99 with m.Else():
100 m.d.comb += self.o.z.inf(a1.s)
101 m.d.comb += self.o.out_do_z.eq(1)
102 with m.Else():
103 with m.If(a1.exp_n127):
104 with m.If(~a1.m_zero):
105 m.d.comb += self.o.z.m[ms:].eq(Cat(0, a1.m))
106 m.d.comb += self.o.of.guard.eq(0)
107 m.d.comb += self.o.of.round_bit.eq(0)
108 m.d.comb += self.o.of.sticky.eq(0)
109 m.d.comb += self.o.of.m0.eq(a1.m[0])
110 m.d.comb += self.o.out_do_z.eq(0) # normalise
111 with m.Else():
112 m.d.comb += self.o.z.zero(a1.s)
113
114 # copy the context (muxid, operator)
115 m.d.comb += self.o.oz.eq(self.o.z.v)
116 m.d.comb += self.o.ctx.eq(self.i.ctx)
117
118 return m
119
120
121 class FPCVTDownConvertMod(Elaboratable):
122 """ FP down-conversion (higher to lower bitwidth)
123 """
124 def __init__(self, in_pspec, out_pspec):
125 self.in_pspec = in_pspec
126 self.out_pspec = out_pspec
127 self.i = self.ispec()
128 self.o = self.ospec()
129
130 def ispec(self):
131 return FPADDBaseData(self.in_pspec)
132
133 def ospec(self):
134 return FPAddStage1Data(self.out_pspec, e_extra=True)
135
136 def setup(self, m, i):
137 """ links module to inputs and outputs
138 """
139 m.submodules.downconvert = self
140 m.d.comb += self.i.eq(i)
141
142 def process(self, i):
143 return self.o
144
145 def elaborate(self, platform):
146 m = Module()
147
148 #m.submodules.sc_out_z = self.o.z
149
150 # decode: XXX really should move to separate stage
151 print("in_width out", self.in_pspec.width,
152 self.out_pspec.width)
153 a1 = FPNumBaseRecord(self.in_pspec.width, False)
154 print("a1", a1.width, a1.rmw, a1.e_width, a1.e_start, a1.e_end)
155 m.submodules.sc_decode_a = a1 = FPNumDecode(None, a1)
156 m.d.comb += a1.v.eq(self.i.a)
157 z1 = self.o.z
158 print("z1", z1.width, z1.rmw, z1.e_width, z1.e_start, z1.e_end)
159
160 me = a1.rmw
161 ms = a1.rmw - self.o.z.rmw
162 print("ms-me", ms, me)
163
164 # intermediaries
165 exp_sub_n126 = Signal((a1.e_width, True), reset_less=True)
166 exp_gt127 = Signal(reset_less=True)
167 # constants from z1, at the bit-width of a1.
168 N126 = Const(z1.fp.N126.value, (a1.e_width, True))
169 P127 = Const(z1.fp.P127.value, (a1.e_width, True))
170 m.d.comb += exp_sub_n126.eq(a1.e - N126)
171 m.d.comb += exp_gt127.eq(a1.e > P127)
172
173 # if a zero, return zero (signed)
174 with m.If(a1.exp_n127):
175 m.d.comb += self.o.z.zero(a1.s)
176 m.d.comb += self.o.out_do_z.eq(1)
177
178 # if a range outside z's min range (-126)
179 with m.Elif(exp_sub_n126 < 0):
180 m.d.comb += self.o.of.guard.eq(a1.m[ms-1])
181 m.d.comb += self.o.of.round_bit.eq(a1.m[ms-2])
182 m.d.comb += self.o.of.sticky.eq(a1.m[:ms-2].bool())
183 m.d.comb += self.o.of.m0.eq(a1.m[ms]) # bit of a1
184
185 m.d.comb += self.o.z.s.eq(a1.s)
186 m.d.comb += self.o.z.e.eq(a1.e)
187 m.d.comb += self.o.z.m.eq(a1.m[-self.o.z.rmw-1:])
188 m.d.comb += self.o.z.m[-1].eq(1)
189
190 # if a is inf return inf
191 with m.Elif(a1.is_inf):
192 m.d.comb += self.o.z.inf(a1.s)
193 m.d.comb += self.o.out_do_z.eq(1)
194
195 # if a is NaN return NaN
196 with m.Elif(a1.is_nan):
197 m.d.comb += self.o.z.nan(0)
198 m.d.comb += self.o.out_do_z.eq(1)
199
200 # if a mantissa greater than 127, return inf
201 with m.Elif(exp_gt127):
202 print("inf", self.o.z.inf(a1.s))
203 m.d.comb += self.o.z.inf(a1.s)
204 m.d.comb += self.o.out_do_z.eq(1)
205
206 # ok after all that, anything else should fit fine (whew)
207 with m.Else():
208 m.d.comb += self.o.of.guard.eq(a1.m[ms-1])
209 m.d.comb += self.o.of.round_bit.eq(a1.m[ms-2])
210 m.d.comb += self.o.of.sticky.eq(a1.m[:ms-2].bool())
211 m.d.comb += self.o.of.m0.eq(a1.m[ms]) # bit of a1
212
213 # XXX TODO: this is basically duplicating FPRoundMod. hmmm...
214 print("alen", a1.e_start, z1.fp.N126, N126)
215 print("m1", self.o.z.rmw, a1.m[-self.o.z.rmw-1:])
216 mo = Signal(self.o.z.m_width-1)
217 m.d.comb += mo.eq(a1.m[ms:me])
218 with m.If(self.o.of.roundz):
219 with m.If((~mo == 0)): # all 1s
220 m.d.comb += self.o.z.create(a1.s, a1.e+1, mo+1)
221 with m.Else():
222 m.d.comb += self.o.z.create(a1.s, a1.e, mo+1)
223 with m.Else():
224 m.d.comb += self.o.z.create(a1.s, a1.e, a1.m[-self.o.z.rmw-1:])
225 m.d.comb += self.o.out_do_z.eq(1)
226
227 # copy the context (muxid, operator)
228 m.d.comb += self.o.oz.eq(self.o.z.v)
229 m.d.comb += self.o.ctx.eq(self.i.ctx)
230
231 return m
232
233
234 class FPCVTUpConvert(FPState):
235 """ Up-conversion
236 """
237
238 def __init__(self, in_width, out_width, id_wid):
239 FPState.__init__(self, "upconvert")
240 self.mod = FPCVTUpConvertMod(in_width, out_width)
241 self.out_z = self.mod.ospec()
242 self.out_do_z = Signal(reset_less=True)
243
244 def setup(self, m, i):
245 """ links module to inputs and outputs
246 """
247 self.mod.setup(m, i, self.out_do_z)
248 m.d.sync += self.out_z.v.eq(self.mod.out_z.v) # only take the output
249 m.d.sync += self.out_z.ctx.eq(self.mod.o.ctx) # (and context)
250
251 def action(self, m):
252 self.idsync(m)
253 with m.If(self.out_do_z):
254 m.next = "put_z"
255 with m.Else():
256 m.next = "denormalise"
257
258
259 class FPCVTDownConvert(FPState):
260 """ special cases: NaNs, infs, zeros, denormalised
261 """
262
263 def __init__(self, in_width, out_width, id_wid):
264 FPState.__init__(self, "special_cases")
265 self.mod = FPCVTDownConvertMod(in_width, out_width)
266 self.out_z = self.mod.ospec()
267 self.out_do_z = Signal(reset_less=True)
268
269 def setup(self, m, i):
270 """ links module to inputs and outputs
271 """
272 self.mod.setup(m, i, self.out_do_z)
273 m.d.sync += self.out_z.v.eq(self.mod.out_z.v) # only take the output
274 m.d.sync += self.out_z.ctx.eq(self.mod.o.ctx) # (and context)
275
276 def action(self, m):
277 self.idsync(m)
278 with m.If(self.out_do_z):
279 m.next = "put_z"
280 with m.Else():
281 m.next = "denormalise"
282
283
284 class FPCVTUpConvertDeNorm(FPState, SimpleHandshake):
285 """ Upconvert
286 """
287
288 def __init__(self, in_pspec, out_pspec):
289 FPState.__init__(self, "upconvert")
290 sc = FPCVTUpConvertMod(in_pspec, out_pspec)
291 SimpleHandshake.__init__(self, sc)
292 self.out = self.ospec(None)
293
294
295 class FPCVTDownConvertDeNorm(FPState, SimpleHandshake):
296 """ downconvert
297 """
298
299 def __init__(self, in_pspec, out_pspec):
300 FPState.__init__(self, "downconvert")
301 sc = FPCVTDownConvertMod(in_pspec, out_pspec)
302 SimpleHandshake.__init__(self, sc)
303 self.out = self.ospec(None)
304
305
306 class FPCVTUpBasePipe(ControlBase):
307 def __init__(self, in_pspec, out_pspec):
308 ControlBase.__init__(self)
309 self.pipe1 = FPCVTUpConvertDeNorm(in_pspec, out_pspec)
310 self.pipe2 = FPNormToPack(out_pspec, e_extra=False)
311
312 self._eqs = self.connect([self.pipe1, self.pipe2])
313
314 def elaborate(self, platform):
315 m = ControlBase.elaborate(self, platform)
316 m.submodules.up = self.pipe1
317 m.submodules.normpack = self.pipe2
318 m.d.comb += self._eqs
319 return m
320
321
322 class FPCVTDownBasePipe(ControlBase):
323 def __init__(self, in_pspec, out_pspec):
324 ControlBase.__init__(self)
325 self.pipe1 = FPCVTDownConvertDeNorm(in_pspec, out_pspec)
326 self.pipe2 = FPNormToPack(out_pspec, e_extra=True)
327
328 self._eqs = self.connect([self.pipe1, self.pipe2])
329
330 def elaborate(self, platform):
331 m = ControlBase.elaborate(self, platform)
332 m.submodules.down = self.pipe1
333 m.submodules.normpack = self.pipe2
334 m.d.comb += self._eqs
335 return m
336
337
338 class FPCVTUpMuxInOut(ReservationStations):
339 """ Reservation-Station version of FPCVT up pipeline.
340
341 * fan-in on inputs (an array of FPADDBaseData: a,b,mid)
342 * 2-stage multiplier pipeline
343 * fan-out on outputs (an array of FPPackData: z,mid)
344
345 Fan-in and Fan-out are combinatorial.
346 """
347
348 def __init__(self, in_width, out_width, num_rows, op_wid=0):
349 self.op_wid = op_wid
350 self.id_wid = num_bits(in_width)
351 self.out_id_wid = num_bits(out_width)
352
353 self.in_pspec = PipelineSpec(in_width, self.id_wid, self.op_wid)
354 self.out_pspec = PipelineSpec(out_width, self.out_id_wid, op_wid)
355
356 self.alu = FPCVTUpBasePipe(self.in_pspec, self.out_pspec)
357 ReservationStations.__init__(self, num_rows)
358
359 def i_specfn(self):
360 return FPADDBaseData(self.in_pspec)
361
362 def o_specfn(self):
363 return FPPackData(self.out_pspec)
364
365 class FPCVTDownMuxInOut(ReservationStations):
366 """ Reservation-Station version of FPCVT pipeline.
367
368 * fan-in on inputs (an array of FPADDBaseData: a,b,mid)
369 * 2-stage multiplier pipeline
370 * fan-out on outputs (an array of FPPackData: z,mid)
371
372 Fan-in and Fan-out are combinatorial.
373 """
374
375 def __init__(self, in_width, out_width, num_rows, op_wid=0):
376 self.op_wid = op_wid
377 self.id_wid = num_bits(in_width)
378 self.out_id_wid = num_bits(out_width)
379
380 self.in_pspec = PipelineSpec(in_width, self.id_wid, self.op_wid)
381 self.out_pspec = PipelineSpec(out_width, self.out_id_wid, op_wid)
382
383 self.alu = FPCVTDownBasePipe(self.in_pspec, self.out_pspec)
384 ReservationStations.__init__(self, num_rows)
385
386 def i_specfn(self):
387 return FPADDBaseData(self.in_pspec)
388
389 def o_specfn(self):
390 return FPPackData(self.out_pspec)