add debug prints
[ieee754fpu.git] / src / ieee754 / div_rem_sqrt_rsqrt / core.py
1 # SPDX-License-Identifier: LGPL-2.1-or-later
2 # See Notices.txt for copyright information
3 """ Core of the div/rem/sqrt/rsqrt pipeline.
4
5 Special case handling, input/output conversion, and muxid handling are handled
6 outside of these classes.
7
8 Algorithms based on ``algorithm.FixedUDivRemSqrtRSqrt``.
9
10 Formulas solved are:
11 * div/rem:
12 ``dividend == quotient_root * divisor_radicand``
13 * sqrt/rem:
14 ``divisor_radicand == quotient_root * quotient_root``
15 * rsqrt/rem:
16 ``1 == quotient_root * quotient_root * divisor_radicand``
17
18 The remainder is the left-hand-side of the comparison minus the
19 right-hand-side of the comparison in the above formulas.
20 """
21 from nmigen import (Elaboratable, Module, Signal, Const, Mux, Cat, Array)
22 from nmigen.lib.coding import PriorityEncoder
23 import enum
24
25
26 class DivPipeCoreConfig:
27 """ Configuration for core of the div/rem/sqrt/rsqrt pipeline.
28
29 :attribute bit_width: base bit-width.
30 :attribute fract_width: base fract-width. Specifies location of base-2
31 radix point.
32 :attribute log2_radix: number of bits of ``quotient_root`` that should be
33 computed per pipeline stage.
34 """
35
36 def __init__(self, bit_width, fract_width, log2_radix):
37 """ Create a ``DivPipeCoreConfig`` instance. """
38 self.bit_width = bit_width
39 self.fract_width = fract_width
40 self.log2_radix = log2_radix
41 print(f"{self}: n_stages={self.n_stages}")
42
43 def __repr__(self):
44 """ Get repr. """
45 return f"DivPipeCoreConfig({self.bit_width}, " \
46 + f"{self.fract_width}, {self.log2_radix})"
47
48 @property
49 def n_stages(self):
50 """ Get the number of ``DivPipeCoreCalculateStage`` needed. """
51 return (self.bit_width + self.log2_radix - 1) // self.log2_radix
52
53
54 class DivPipeCoreOperation(enum.Enum):
55 """ Operation for ``DivPipeCore``.
56
57 :attribute UDivRem: unsigned divide/remainder.
58 :attribute SqrtRem: square-root/remainder.
59 :attribute RSqrtRem: reciprocal-square-root/remainder.
60 """
61
62 UDivRem = 0
63 SqrtRem = 1
64 RSqrtRem = 2
65
66 def __int__(self):
67 """ Convert to int. """
68 return self.value
69
70 @classmethod
71 def create_signal(cls, *, src_loc_at=0, **kwargs):
72 """ Create a signal that can contain a ``DivPipeCoreOperation``. """
73 return Signal(min=min(map(int, cls)),
74 max=max(map(int, cls)) + 2,
75 src_loc_at=(src_loc_at + 1),
76 decoder=lambda v: str(cls(v)),
77 **kwargs)
78
79
80 DP = DivPipeCoreOperation
81
82
83 class DivPipeCoreInputData:
84 """ input data type for ``DivPipeCore``.
85
86 :attribute core_config: ``DivPipeCoreConfig`` instance describing the
87 configuration to be used.
88 :attribute dividend: dividend for div/rem. Signal with a bit-width of
89 ``core_config.bit_width + core_config.fract_width`` and a fract-width
90 of ``core_config.fract_width * 2`` bits.
91 :attribute divisor_radicand: divisor for div/rem and radicand for
92 sqrt/rsqrt. Signal with a bit-width of ``core_config.bit_width`` and a
93 fract-width of ``core_config.fract_width`` bits.
94 :attribute operation: the ``DivPipeCoreOperation`` to be computed.
95 """
96
97 def __init__(self, core_config, reset_less=True):
98 """ Create a ``DivPipeCoreInputData`` instance. """
99 self.core_config = core_config
100 self.dividend = Signal(core_config.bit_width + core_config.fract_width,
101 reset_less=reset_less)
102 self.divisor_radicand = Signal(core_config.bit_width,
103 reset_less=reset_less)
104 self.operation = DP.create_signal(reset_less=reset_less)
105
106 def __iter__(self):
107 """ Get member signals. """
108 yield self.dividend
109 yield self.divisor_radicand
110 yield self.operation
111
112 def eq(self, rhs):
113 """ Assign member signals. """
114 return [self.dividend.eq(rhs.dividend),
115 self.divisor_radicand.eq(rhs.divisor_radicand),
116 self.operation.eq(rhs.operation),
117 ]
118
119
120 class DivPipeCoreInterstageData:
121 """ interstage data type for ``DivPipeCore``.
122
123 :attribute core_config: ``DivPipeCoreConfig`` instance describing the
124 configuration to be used.
125 :attribute divisor_radicand: divisor for div/rem and radicand for
126 sqrt/rsqrt. Signal with a bit-width of ``core_config.bit_width`` and a
127 fract-width of ``core_config.fract_width`` bits.
128 :attribute operation: the ``DivPipeCoreOperation`` to be computed.
129 :attribute quotient_root: the quotient or root part of the result of the
130 operation. Signal with a bit-width of ``core_config.bit_width`` and a
131 fract-width of ``core_config.fract_width`` bits.
132 :attribute root_times_radicand: ``quotient_root * divisor_radicand``.
133 Signal with a bit-width of ``core_config.bit_width * 2`` and a
134 fract-width of ``core_config.fract_width * 2`` bits.
135 :attribute compare_lhs: The left-hand-side of the comparison in the
136 equation to be solved. Signal with a bit-width of
137 ``core_config.bit_width * 3`` and a fract-width of
138 ``core_config.fract_width * 3`` bits.
139 :attribute compare_rhs: The right-hand-side of the comparison in the
140 equation to be solved. Signal with a bit-width of
141 ``core_config.bit_width * 3`` and a fract-width of
142 ``core_config.fract_width * 3`` bits.
143 """
144
145 def __init__(self, core_config, reset_less=True):
146 """ Create a ``DivPipeCoreInterstageData`` instance. """
147 self.core_config = core_config
148 self.divisor_radicand = Signal(core_config.bit_width,
149 reset_less=reset_less)
150 self.operation = DP.create_signal(reset_less=reset_less)
151 self.quotient_root = Signal(core_config.bit_width,
152 reset_less=reset_less)
153 self.root_times_radicand = Signal(core_config.bit_width * 2,
154 reset_less=reset_less)
155 self.compare_lhs = Signal(core_config.bit_width * 3,
156 reset_less=reset_less)
157 self.compare_rhs = Signal(core_config.bit_width * 3,
158 reset_less=reset_less)
159
160 def __iter__(self):
161 """ Get member signals. """
162 yield self.divisor_radicand
163 yield self.operation
164 yield self.quotient_root
165 yield self.root_times_radicand
166 yield self.compare_lhs
167 yield self.compare_rhs
168
169 def eq(self, rhs):
170 """ Assign member signals. """
171 return [self.divisor_radicand.eq(rhs.divisor_radicand),
172 self.operation.eq(rhs.operation),
173 self.quotient_root.eq(rhs.quotient_root),
174 self.root_times_radicand.eq(rhs.root_times_radicand),
175 self.compare_lhs.eq(rhs.compare_lhs),
176 self.compare_rhs.eq(rhs.compare_rhs)]
177
178
179 class DivPipeCoreOutputData:
180 """ output data type for ``DivPipeCore``.
181
182 :attribute core_config: ``DivPipeCoreConfig`` instance describing the
183 configuration to be used.
184 :attribute quotient_root: the quotient or root part of the result of the
185 operation. Signal with a bit-width of ``core_config.bit_width`` and a
186 fract-width of ``core_config.fract_width`` bits.
187 :attribute remainder: the remainder part of the result of the operation.
188 Signal with a bit-width of ``core_config.bit_width * 3`` and a
189 fract-width of ``core_config.fract_width * 3`` bits.
190 """
191
192 def __init__(self, core_config, reset_less=True):
193 """ Create a ``DivPipeCoreOutputData`` instance. """
194 self.core_config = core_config
195 self.quotient_root = Signal(core_config.bit_width,
196 reset_less=reset_less)
197 self.remainder = Signal(core_config.bit_width * 3,
198 reset_less=reset_less)
199
200 def __iter__(self):
201 """ Get member signals. """
202 yield self.quotient_root
203 yield self.remainder
204 return
205
206 def eq(self, rhs):
207 """ Assign member signals. """
208 return [self.quotient_root.eq(rhs.quotient_root),
209 self.remainder.eq(rhs.remainder)]
210
211
212 class DivPipeCoreSetupStage(Elaboratable):
213 """ Setup Stage of the core of the div/rem/sqrt/rsqrt pipeline. """
214
215 def __init__(self, core_config):
216 """ Create a ``DivPipeCoreSetupStage`` instance."""
217 self.core_config = core_config
218 self.i = self.ispec()
219 self.o = self.ospec()
220
221 def ispec(self):
222 """ Get the input spec for this pipeline stage."""
223 return DivPipeCoreInputData(self.core_config)
224
225 def ospec(self):
226 """ Get the output spec for this pipeline stage."""
227 return DivPipeCoreInterstageData(self.core_config)
228
229 def setup(self, m, i):
230 """ Pipeline stage setup. """
231 m.submodules.div_pipe_core_setup = self
232 m.d.comb += self.i.eq(i)
233
234 def process(self, i):
235 """ Pipeline stage process. """
236 return self.o # return processed data (ignore i)
237
238 def elaborate(self, platform):
239 """ Elaborate into ``Module``. """
240 m = Module()
241
242 m.d.comb += self.o.divisor_radicand.eq(self.i.divisor_radicand)
243 m.d.comb += self.o.quotient_root.eq(0)
244 m.d.comb += self.o.root_times_radicand.eq(0)
245
246 with m.If(self.i.operation == int(DP.UDivRem)):
247 m.d.comb += self.o.compare_lhs.eq(self.i.dividend
248 << self.core_config.fract_width)
249 with m.Elif(self.i.operation == int(DP.SqrtRem)):
250 m.d.comb += self.o.compare_lhs.eq(
251 self.i.divisor_radicand << (self.core_config.fract_width * 2))
252 with m.Else(): # DivPipeCoreOperation.RSqrtRem
253 m.d.comb += self.o.compare_lhs.eq(
254 1 << (self.core_config.fract_width * 3))
255
256 m.d.comb += self.o.compare_rhs.eq(0)
257 m.d.comb += self.o.operation.eq(self.i.operation)
258
259 return m
260
261
262 class Trial(Elaboratable):
263 def __init__(self, core_config, trial_bits, current_shift, log2_radix):
264 self.core_config = core_config
265 self.trial_bits = trial_bits
266 self.current_shift = current_shift
267 self.log2_radix = log2_radix
268 bw = core_config.bit_width
269 self.divisor_radicand = Signal(bw, reset_less=True)
270 self.quotient_root = Signal(bw, reset_less=True)
271 self.root_times_radicand = Signal(bw * 2, reset_less=True)
272 self.compare_rhs = Signal(bw * 3, reset_less=True)
273 self.trial_compare_rhs = Signal(bw * 3, reset_less=True)
274 self.operation = DP.create_signal(reset_less=True)
275
276 def elaborate(self, platform):
277
278 m = Module()
279
280 dr = self.divisor_radicand
281 qr = self.quotient_root
282 rr = self.root_times_radicand
283
284 trial_bits_sig = Const(self.trial_bits, self.log2_radix)
285 trial_bits_sqrd_sig = Const(self.trial_bits * self.trial_bits,
286 self.log2_radix * 2)
287
288 tblen = self.core_config.bit_width+self.log2_radix
289 tblen2 = self.core_config.bit_width+self.log2_radix*2
290 dr_times_trial_bits_sqrd = Signal(tblen2, reset_less=True)
291 m.d.comb += dr_times_trial_bits_sqrd.eq(dr * trial_bits_sqrd_sig)
292
293 # UDivRem
294 with m.If(self.operation == int(DP.UDivRem)):
295 dr_times_trial_bits = Signal(tblen, reset_less=True)
296 m.d.comb += dr_times_trial_bits.eq(dr * trial_bits_sig)
297 div_rhs = self.compare_rhs
298
299 div_term1 = dr_times_trial_bits
300 div_term1_shift = self.core_config.fract_width
301 div_term1_shift += self.current_shift
302 div_rhs += div_term1 << div_term1_shift
303
304 m.d.comb += self.trial_compare_rhs.eq(div_rhs)
305
306 # SqrtRem
307 with m.Elif(self.operation == int(DP.SqrtRem)):
308 qr_times_trial_bits = Signal((tblen+1)*2, reset_less=True)
309 m.d.comb += qr_times_trial_bits.eq(qr * trial_bits_sig)
310 sqrt_rhs = self.compare_rhs
311
312 sqrt_term1 = qr_times_trial_bits
313 sqrt_term1_shift = self.core_config.fract_width
314 sqrt_term1_shift += self.current_shift + 1
315 sqrt_rhs += sqrt_term1 << sqrt_term1_shift
316 sqrt_term2 = trial_bits_sqrd_sig
317 sqrt_term2_shift = self.core_config.fract_width
318 sqrt_term2_shift += self.current_shift * 2
319 sqrt_rhs += sqrt_term2 << sqrt_term2_shift
320
321 m.d.comb += self.trial_compare_rhs.eq(sqrt_rhs)
322
323 # RSqrtRem
324 with m.Else():
325 rr_times_trial_bits = Signal((tblen+1)*3, reset_less=True)
326 m.d.comb += rr_times_trial_bits.eq(rr * trial_bits_sig)
327 rsqrt_rhs = self.compare_rhs
328
329 rsqrt_term1 = rr_times_trial_bits
330 rsqrt_term1_shift = self.current_shift + 1
331 rsqrt_rhs += rsqrt_term1 << rsqrt_term1_shift
332 rsqrt_term2 = dr_times_trial_bits_sqrd
333 rsqrt_term2_shift = self.current_shift * 2
334 rsqrt_rhs += rsqrt_term2 << rsqrt_term2_shift
335
336 m.d.comb += self.trial_compare_rhs.eq(rsqrt_rhs)
337
338 return m
339
340
341 class DivPipeCoreCalculateStage(Elaboratable):
342 """ Calculate Stage of the core of the div/rem/sqrt/rsqrt pipeline. """
343
344 def __init__(self, core_config, stage_index):
345 """ Create a ``DivPipeCoreSetupStage`` instance. """
346 self.core_config = core_config
347 assert stage_index in range(core_config.n_stages)
348 self.stage_index = stage_index
349 self.i = self.ispec()
350 self.o = self.ospec()
351
352 def ispec(self):
353 """ Get the input spec for this pipeline stage. """
354 return DivPipeCoreInterstageData(self.core_config)
355
356 def ospec(self):
357 """ Get the output spec for this pipeline stage. """
358 return DivPipeCoreInterstageData(self.core_config)
359
360 def setup(self, m, i):
361 """ Pipeline stage setup. """
362 setattr(m.submodules,
363 f"div_pipe_core_calculate_{self.stage_index}",
364 self)
365 m.d.comb += self.i.eq(i)
366
367 def process(self, i):
368 """ Pipeline stage process. """
369 return self.o
370
371 def elaborate(self, platform):
372 """ Elaborate into ``Module``. """
373 m = Module()
374
375 # copy invariant inputs to outputs (for next stage)
376 m.d.comb += self.o.divisor_radicand.eq(self.i.divisor_radicand)
377 m.d.comb += self.o.operation.eq(self.i.operation)
378 m.d.comb += self.o.compare_lhs.eq(self.i.compare_lhs)
379
380 # constants
381 log2_radix = self.core_config.log2_radix
382 current_shift = self.core_config.bit_width
383 current_shift -= self.stage_index * log2_radix
384 log2_radix = min(log2_radix, current_shift)
385 assert log2_radix > 0
386 current_shift -= log2_radix
387 print(f"DivPipeCoreCalc: stage {self.stage_index}"
388 + f" of {self.core_config.n_stages} handling "
389 + f"bits [{current_shift}, {current_shift+log2_radix})"
390 + f" of {self.core_config.bit_width}")
391 radix = 1 << log2_radix
392
393 # trials within this radix range. carried out by Trial module,
394 # results stored in pass_flags. pass_flags are unary priority.
395 trial_compare_rhs_values = []
396 pfl = []
397 for trial_bits in range(radix):
398 t = Trial(self.core_config, trial_bits, current_shift, log2_radix)
399 setattr(m.submodules, "trial%d" % trial_bits, t)
400
401 m.d.comb += t.divisor_radicand.eq(self.i.divisor_radicand)
402 m.d.comb += t.quotient_root.eq(self.i.quotient_root)
403 m.d.comb += t.root_times_radicand.eq(self.i.root_times_radicand)
404 m.d.comb += t.compare_rhs.eq(self.i.compare_rhs)
405 m.d.comb += t.operation.eq(self.i.operation)
406
407 # get the trial output
408 trial_compare_rhs_values.append(t.trial_compare_rhs)
409
410 # make the trial comparison against the [invariant] lhs.
411 # trial_compare_rhs is always decreasing as trial_bits increases
412 pass_flag = Signal(name=f"pass_flag_{trial_bits}", reset_less=True)
413 m.d.comb += pass_flag.eq(self.i.compare_lhs >= t.trial_compare_rhs)
414 pfl.append(pass_flag)
415
416 # Cat all the pass flags list together (easier to handle, below)
417 pass_flags = Signal(radix, reset_less=True)
418 m.d.comb += pass_flags.eq(Cat(*pfl))
419
420 # convert pass_flags (unary priority) to next_bits (binary index)
421 #
422 # Assumes that for each set bit in pass_flag, all previous bits are
423 # also set.
424 #
425 # Assumes that pass_flag[0] is always set (since
426 # compare_lhs >= compare_rhs is a pipeline invariant).
427
428 m.submodules.pe = pe = PriorityEncoder(radix)
429 next_bits = Signal(log2_radix, reset_less=True)
430 m.d.comb += pe.i.eq(~pass_flags)
431 with m.If(~pe.n):
432 m.d.comb += next_bits.eq(pe.o-1)
433 with m.Else():
434 m.d.comb += next_bits.eq(radix-1)
435
436 # get the highest passing rhs trial (indexed by next_bits)
437 ta = Array(trial_compare_rhs_values)
438 m.d.comb += self.o.compare_rhs.eq(ta[next_bits])
439
440 # create outputs for next phase
441 m.d.comb += self.o.root_times_radicand.eq(self.i.root_times_radicand
442 + ((self.i.divisor_radicand
443 * next_bits)
444 << current_shift))
445 m.d.comb += self.o.quotient_root.eq(self.i.quotient_root
446 | (next_bits << current_shift))
447 return m
448
449
450 class DivPipeCoreFinalStage(Elaboratable):
451 """ Final Stage of the core of the div/rem/sqrt/rsqrt pipeline. """
452
453 def __init__(self, core_config):
454 """ Create a ``DivPipeCoreFinalStage`` instance."""
455 self.core_config = core_config
456 self.i = self.ispec()
457 self.o = self.ospec()
458
459 def ispec(self):
460 """ Get the input spec for this pipeline stage."""
461 return DivPipeCoreInterstageData(self.core_config)
462
463 def ospec(self):
464 """ Get the output spec for this pipeline stage."""
465 return DivPipeCoreOutputData(self.core_config)
466
467 def setup(self, m, i):
468 """ Pipeline stage setup. """
469 m.submodules.div_pipe_core_final = self
470 m.d.comb += self.i.eq(i)
471
472 def process(self, i):
473 """ Pipeline stage process. """
474 return self.o # return processed data (ignore i)
475
476 def elaborate(self, platform):
477 """ Elaborate into ``Module``. """
478 m = Module()
479
480 m.d.comb += self.o.quotient_root.eq(self.i.quotient_root)
481 m.d.comb += self.o.remainder.eq(self.i.compare_lhs
482 - self.i.compare_rhs)
483
484 return m