clean up comments
[ieee754fpu.git] / src / ieee754 / div_rem_sqrt_rsqrt / core.py
index 245cd1c6cd426604d98ac5890b55bacca0c1695a..7c6fb181111544c2e85d763dc5764d21a7f232ca 100644 (file)
@@ -18,7 +18,8 @@ Formulas solved are:
 The remainder is the left-hand-side of the comparison minus the
 right-hand-side of the comparison in the above formulas.
 """
-from nmigen import (Elaboratable, Module, Signal, Const, Mux, Cat)
+from nmigen import (Elaboratable, Module, Signal, Const, Mux, Cat, Array)
+from nmigen.lib.coding import PriorityEncoder
 import enum
 
 
@@ -44,12 +45,12 @@ class DivPipeCoreConfig:
             + f"{self.fract_width}, {self.log2_radix})"
 
     @property
-    def num_calculate_stages(self):
+    def n_stages(self):
         """ Get the number of ``DivPipeCoreCalculateStage`` needed. """
         return (self.bit_width + self.log2_radix - 1) // self.log2_radix
 
 
-class DivPipeCoreOperation(enum.IntEnum):
+class DivPipeCoreOperation(enum.Enum):
     """ Operation for ``DivPipeCore``.
 
     :attribute UDivRem: unsigned divide/remainder.
@@ -61,16 +62,22 @@ class DivPipeCoreOperation(enum.IntEnum):
     SqrtRem = 1
     RSqrtRem = 2
 
+    def __int__(self):
+        """ Convert to int. """
+        return self.value
+
     @classmethod
     def create_signal(cls, *, src_loc_at=0, **kwargs):
         """ Create a signal that can contain a ``DivPipeCoreOperation``. """
-        return Signal(min=int(min(cls)),
-                      max=int(max(cls)),
+        return Signal(min=min(map(int, cls)),
+                      max=max(map(int, cls)) + 2,
                       src_loc_at=(src_loc_at + 1),
-                      decoder=cls,
+                      decoder=lambda v: str(cls(v)),
                       **kwargs)
 
 
+DP = DivPipeCoreOperation
+
 class DivPipeCoreInputData:
     """ input data type for ``DivPipeCore``.
 
@@ -92,22 +99,19 @@ class DivPipeCoreInputData:
                                reset_less=reset_less)
         self.divisor_radicand = Signal(core_config.bit_width,
                                        reset_less=reset_less)
-
-        # FIXME: this goes into (is replaced by) self.ctx.op
-        self.operation = \
-            DivPipeCoreOperation.create_signal(reset_less=reset_less)
+        self.operation = DP.create_signal(reset_less=reset_less)
 
     def __iter__(self):
         """ Get member signals. """
         yield self.dividend
         yield self.divisor_radicand
-        yield self.operation  # FIXME: delete.  already covered by self.ctx
+        yield self.operation
 
     def eq(self, rhs):
         """ Assign member signals. """
         return [self.dividend.eq(rhs.dividend),
                 self.divisor_radicand.eq(rhs.divisor_radicand),
-                self.operation.eq(rhs.operation),  # FIXME: delete.
+                self.operation.eq(rhs.operation),
                 ]
 
 
@@ -141,9 +145,7 @@ class DivPipeCoreInterstageData:
         self.core_config = core_config
         self.divisor_radicand = Signal(core_config.bit_width,
                                        reset_less=reset_less)
-        # FIXME: delete self.operation.  already covered by self.ctx.op
-        self.operation = \
-            DivPipeCoreOperation.create_signal(reset_less=reset_less)
+        self.operation = DP.create_signal(reset_less=reset_less)
         self.quotient_root = Signal(core_config.bit_width,
                                     reset_less=reset_less)
         self.root_times_radicand = Signal(core_config.bit_width * 2,
@@ -156,7 +158,7 @@ class DivPipeCoreInterstageData:
     def __iter__(self):
         """ Get member signals. """
         yield self.divisor_radicand
-        yield self.operation  # FIXME: delete.  already in self.ctx.op
+        yield self.operation
         yield self.quotient_root
         yield self.root_times_radicand
         yield self.compare_lhs
@@ -165,7 +167,7 @@ class DivPipeCoreInterstageData:
     def eq(self, rhs):
         """ Assign member signals. """
         return [self.divisor_radicand.eq(rhs.divisor_radicand),
-                self.operation.eq(rhs.operation),  # FIXME: delete.
+                self.operation.eq(rhs.operation),
                 self.quotient_root.eq(rhs.quotient_root),
                 self.root_times_radicand.eq(rhs.root_times_radicand),
                 self.compare_lhs.eq(rhs.compare_lhs),
@@ -239,10 +241,10 @@ class DivPipeCoreSetupStage(Elaboratable):
         m.d.comb += self.o.quotient_root.eq(0)
         m.d.comb += self.o.root_times_radicand.eq(0)
 
-        with m.If(self.i.operation == DivPipeCoreOperation.UDivRem):
+        with m.If(self.i.operation == int(DP.UDivRem)):
             m.d.comb += self.o.compare_lhs.eq(self.i.dividend
                                               << self.core_config.fract_width)
-        with m.Elif(self.i.operation == DivPipeCoreOperation.SqrtRem):
+        with m.Elif(self.i.operation == int(DP.SqrtRem)):
             m.d.comb += self.o.compare_lhs.eq(
                 self.i.divisor_radicand << (self.core_config.fract_width * 2))
         with m.Else():  # DivPipeCoreOperation.RSqrtRem
@@ -255,13 +257,92 @@ class DivPipeCoreSetupStage(Elaboratable):
         return m
 
 
+class Trial(Elaboratable):
+    def __init__(self, core_config, trial_bits, current_shift, log2_radix):
+        self.core_config = core_config
+        self.trial_bits = trial_bits
+        self.current_shift = current_shift
+        self.log2_radix = log2_radix
+        bw = core_config.bit_width
+        self.divisor_radicand = Signal(bw, reset_less=True)
+        self.quotient_root = Signal(bw, reset_less=True)
+        self.root_times_radicand = Signal(bw * 2, reset_less=True)
+        self.compare_rhs = Signal(bw * 3, reset_less=True)
+        self.trial_compare_rhs = Signal(bw * 3, reset_less=True)
+        self.operation = DP.create_signal(reset_less=True)
+
+    def elaborate(self, platform):
+
+        m = Module()
+
+        dr = self.divisor_radicand
+        qr = self.quotient_root
+        rr = self.root_times_radicand
+
+        trial_bits_sig = Const(self.trial_bits, self.log2_radix)
+        trial_bits_sqrd_sig = Const(self.trial_bits * self.trial_bits,
+                                    self.log2_radix * 2)
+
+        tblen = self.core_config.bit_width+self.log2_radix
+        tblen2 = self.core_config.bit_width+self.log2_radix*2
+        dr_times_trial_bits_sqrd = Signal(tblen2, reset_less=True)
+        m.d.comb += dr_times_trial_bits_sqrd.eq(dr * trial_bits_sqrd_sig)
+
+        # UDivRem
+        with m.If(self.operation == int(DP.UDivRem)):
+            dr_times_trial_bits = Signal(tblen, reset_less=True)
+            m.d.comb += dr_times_trial_bits.eq(dr * trial_bits_sig)
+            div_rhs = self.compare_rhs
+
+            div_term1 = dr_times_trial_bits
+            div_term1_shift = self.core_config.fract_width
+            div_term1_shift += self.current_shift
+            div_rhs += div_term1 << div_term1_shift
+
+            m.d.comb += self.trial_compare_rhs.eq(div_rhs)
+
+        # SqrtRem
+        with m.Elif(self.operation == int(DP.SqrtRem)):
+            qr_times_trial_bits = Signal((tblen+1)*2, reset_less=True)
+            m.d.comb += qr_times_trial_bits.eq(qr * trial_bits_sig)
+            sqrt_rhs = self.compare_rhs
+
+            sqrt_term1 = qr_times_trial_bits
+            sqrt_term1_shift = self.core_config.fract_width
+            sqrt_term1_shift += self.current_shift + 1
+            sqrt_rhs += sqrt_term1 << sqrt_term1_shift
+            sqrt_term2 = trial_bits_sqrd_sig
+            sqrt_term2_shift = self.core_config.fract_width
+            sqrt_term2_shift += self.current_shift * 2
+            sqrt_rhs += sqrt_term2 << sqrt_term2_shift
+
+            m.d.comb += self.trial_compare_rhs.eq(sqrt_rhs)
+
+        # RSqrtRem
+        with m.Else():
+            rr_times_trial_bits = Signal((tblen+1)*3, reset_less=True)
+            m.d.comb += rr_times_trial_bits.eq(rr * trial_bits_sig)
+            rsqrt_rhs = self.compare_rhs
+
+            rsqrt_term1 = rr_times_trial_bits
+            rsqrt_term1_shift = self.current_shift + 1
+            rsqrt_rhs += rsqrt_term1 << rsqrt_term1_shift
+            rsqrt_term2 = dr_times_trial_bits_sqrd
+            rsqrt_term2_shift = self.current_shift * 2
+            rsqrt_rhs += rsqrt_term2 << rsqrt_term2_shift
+
+            m.d.comb += self.trial_compare_rhs.eq(rsqrt_rhs)
+
+        return m
+
+
 class DivPipeCoreCalculateStage(Elaboratable):
     """ Calculate Stage of the core of the div/rem/sqrt/rsqrt pipeline. """
 
     def __init__(self, core_config, stage_index):
         """ Create a ``DivPipeCoreSetupStage`` instance. """
         self.core_config = core_config
-        assert stage_index in range(core_config.num_calculate_stages)
+        assert stage_index in range(core_config.n_stages)
         self.stage_index = stage_index
         self.i = self.ispec()
         self.o = self.ospec()
@@ -288,9 +369,13 @@ class DivPipeCoreCalculateStage(Elaboratable):
     def elaborate(self, platform):
         """ Elaborate into ``Module``. """
         m = Module()
+
+        # copy invariant inputs to outputs (for next stage)
         m.d.comb += self.o.divisor_radicand.eq(self.i.divisor_radicand)
         m.d.comb += self.o.operation.eq(self.i.operation)
         m.d.comb += self.o.compare_lhs.eq(self.i.compare_lhs)
+
+        # constants
         log2_radix = self.core_config.log2_radix
         current_shift = self.core_config.bit_width
         current_shift -= self.stage_index * log2_radix
@@ -298,48 +383,31 @@ class DivPipeCoreCalculateStage(Elaboratable):
         assert log2_radix > 0
         current_shift -= log2_radix
         radix = 1 << log2_radix
+
+        # trials within this radix range.  carried out by Trial module,
+        # results stored in pass_flags.  pass_flags are unary priority.
         trial_compare_rhs_values = []
-        pass_flags = []
+        pfl = []
         for trial_bits in range(radix):
-            tb = trial_bits << current_shift
-            log2_tb = log2_radix + current_shift
-            shifted_trial_bits = Const(tb, log2_tb)
-            shifted_trial_bits2 = Const(tb*2, log2_tb+1)
-            shifted_trial_bits_sqrd = Const(tb * tb, log2_tb * 2)
-
-            # UDivRem
-            div_rhs = self.i.compare_rhs
-            div_factor1 = self.i.divisor_radicand * shifted_trial_bits2
-            div_rhs += div_factor1 << self.core_config.fract_width
-
-            # SqrtRem
-            sqrt_rhs = self.i.compare_rhs
-            sqrt_factor1 = self.i.quotient_root * shifted_trial_bits2
-            sqrt_rhs += sqrt_factor1 << self.core_config.fract_width
-            sqrt_factor2 = shifted_trial_bits_sqrd
-            sqrt_rhs += sqrt_factor2 << self.core_config.fract_width
-
-            # RSqrtRem
-            rsqrt_rhs = self.i.compare_rhs
-            rsqrt_rhs += self.i.root_times_radicand * shifted_trial_bits2
-            rsqrt_rhs += self.i.divisor_radicand * shifted_trial_bits_sqrd
-
-            trial_compare_rhs = Signal.like(
-                self.o.compare_rhs, name=f"trial_compare_rhs_{trial_bits}")
-
-            with m.If(self.i.operation == DivPipeCoreOperation.UDivRem):
-                m.d.comb += trial_compare_rhs.eq(div_rhs)
-            with m.Elif(self.i.operation == DivPipeCoreOperation.SqrtRem):
-                m.d.comb += trial_compare_rhs.eq(sqrt_rhs)
-            with m.Else():  # DivPipeCoreOperation.RSqrtRem
-                m.d.comb += trial_compare_rhs.eq(rsqrt_rhs)
-            trial_compare_rhs_values.append(trial_compare_rhs)
-
-            pass_flag = Signal(name=f"pass_flag_{trial_bits}")
-            m.d.comb += pass_flag.eq(self.i.compare_lhs >= trial_compare_rhs)
-            pass_flags.append(pass_flag)
-
-        # convert pass_flags to next_bits.
+            t = Trial(self.core_config, trial_bits, current_shift, log2_radix)
+            setattr(m.submodules, "trial%d" % trial_bits, t)
+
+            m.d.comb += t.divisor_radicand.eq(self.i.divisor_radicand)
+            m.d.comb += t.quotient_root.eq(self.i.quotient_root)
+            m.d.comb += t.root_times_radicand.eq(self.i.root_times_radicand)
+            m.d.comb += t.compare_rhs.eq(self.i.compare_rhs)
+            m.d.comb += t.operation.eq(self.i.operation)
+
+            trial_compare_rhs_values.append(t.trial_compare_rhs)
+
+            pass_flag = Signal(name=f"pass_flag_{trial_bits}", reset_less=True)
+            m.d.comb += pass_flag.eq(self.i.compare_lhs >= t.trial_compare_rhs)
+            pfl.append(pass_flag)
+
+        pass_flags = Signal(radix, reset_less=True)
+        m.d.comb += pass_flags.eq(Cat(*pfl))
+
+        # convert pass_flags (unary priority) to next_bits (binary index)
         #
         # Assumes that for each set bit in pass_flag, all previous bits are
         # also set.
@@ -347,26 +415,19 @@ class DivPipeCoreCalculateStage(Elaboratable):
         # Assumes that pass_flag[0] is always set (since
         # compare_lhs >= compare_rhs is a pipeline invariant).
 
-        next_bits = Signal(log2_radix)
-        for i in range(log2_radix):
-            bit_value = 1
-            for j in range(0, radix, 1 << i):
-                bit_value ^= pass_flags[j]
-            m.d.comb += next_bits.part(i, 1).eq(bit_value)
-
-        next_compare_rhs = Signal(radix, reset_less=True)
-        l = []
-        for i in range(radix):
-            next_flag = pass_flags[i + 1] if i + 1 < radix else 0
-            flag = Signal(reset_less=True, name=f"flag{i}")
-            test = Signal(reset_less=True, name=f"test{i}")
-            # XXX TODO: check the width on this
-            m.d.comb += test.eq((pass_flags[i] & ~next_flag))
-            m.d.comb += flag.eq(Mux(test, trial_compare_rhs_values[i], 0))
-            l.append(flag)
-
-        m.d.comb += next_compare_rhs.eq(Cat(*l))
-        m.d.comb += self.o.compare_rhs.eq(next_compare_rhs.bool())
+        m.submodules.pe = pe = PriorityEncoder(radix)
+        next_bits = Signal(log2_radix+1, reset_less=True)
+        m.d.comb += pe.i.eq(~pass_flags)
+        with m.If(~pe.n):
+            m.d.comb += next_bits.eq(pe.o-1)
+        with m.Else():
+            m.d.comb += next_bits.eq(radix-1)
+
+        # get the highest passing rhs trial (indexed by next_bits)
+        ta = Array(trial_compare_rhs_values)
+        m.d.comb += self.o.compare_rhs.eq(ta[next_bits])
+
+        # create outputs for next phase
         m.d.comb += self.o.root_times_radicand.eq(self.i.root_times_radicand
                                                   + ((self.i.divisor_radicand
                                                       * next_bits)