Revise documentation for how carry works in adder.py

[ieee754fpu.git] / src / ieee754 / part_mul_add / adder.py
diff --git a/src/ieee754/part_mul_add/adder.py b/src/ieee754/part_mul_add/adder.py

index 18d8df7572d608cbf78d4b6d7c5dc5a7793fa41b..2eb6de27e868b8f1f3be78604400361a70561760 100644 (file)
--- a/src/ieee754/part_mul_add/adder.py
+++ b/src/ieee754/part_mul_add/adder.py
@@ -6,16 +6,10 @@ See:
  * https://libre-riscv.org/3d_gpu/architecture/dynamic_simd/add/
  """
  
-from nmigen import Signal, Module, Value, Elaboratable, Cat, C, Mux, Repl
-from nmigen.hdl.ast import Assign
-from abc import ABCMeta, abstractmethod
-from nmigen.cli import main
-from functools import reduce
-from operator import or_
-from ieee754.pipeline import PipelineSpec
-from nmutil.pipemodbase import PipeModBase
+from nmigen import Signal, Module, Elaboratable, Cat
  
  from ieee754.part_mul_add.partpoints import PartitionPoints
+from ieee754.part_cmp.ripple import MoveMSBDown
  
  
  class FullAdder(Elaboratable):
@@ -119,6 +113,7 @@ class PartitionedAdder(Elaboratable):
      to the next bit.  Then the final output *removes* the extra bits from
      the result.
  
+    In the case of no carry:
      partition: .... P... P... P... P... (32 bits)
      a        : .... .... .... .... .... (32 bits)
      b        : .... .... .... .... .... (32 bits)
@@ -127,33 +122,69 @@ class PartitionedAdder(Elaboratable):
      exp-o    : ....xN...xN...xN...xN... (32+4 bits - x to be discarded)
      o        : .... N... N... N... N... (32 bits - x ignored, N is carry-over)
  
+    However, with carry the behavior is a little different:
+    partition:      p    p    p    p      (4 bits)
+    carry-in :      c    c    c    c    c (5 bits)
+    C = c & P:      C    C    C    C    c (5 bits)
+    I = P=>c :      I    I    I    I    c (5 bits)
+    a        :  AAAA AAAA AAAA AAAA AAAA  (32 bits)
+    b        :  BBBB BBBB BBBB BBBB BBBB  (32 bits)
+    exp-a    : 0AAAACAAAACAAAACAAAACAAAAc (32+4+2 bits, P=1 if no partition)
+    exp-b    : 0BBBBIBBBBIBBBBIBBBBIBBBBc (32+2 bits plus 4 zeros)
+    exp-o    : o....oN...oN...oN...oN...x (32+4+2 bits - x to be discarded)
+    o        :  .... N... N... N... N... (32 bits - x ignored, N is carry-over)
+    carry-out: o    o    o    o    o      (5 bits)
+    
+    A couple of differences should be noted:
+     - The expanded a/b/o have 2 extra bits added to them. These bits
+       allow the carry-in for the least significant partition to be
+       injected, and the carry out for the most significant partition
+       to be extracted.
+     - The partition bits P and 0 in the first example have been
+       replaced with bits C and I. Bits C and I are set to 1 when
+       there is a partition and a carry-in at that position. This has
+       the effect of creating a carry at that position in the expanded
+       adder, while preventing carries from the previous partition
+       from propogating through to the next. These bits are also used
+       to extract the carry-out information for each partition, as
+       when there is a carry out in a partition, the next most
+       significant partition bit will be set to 1
+    
+    Additionally, the carry-out bits must be rearranged before being
+    output to move the most significant carry bit for each partition
+    into the least significant bit for that partition, as well as to
+    ignore the other carry bits in that partition. This is
+    accomplished by the MoveMSBDown module
+
      :attribute width: the bit width of the input and output. Read-only.
      :attribute a: the first input to the adder
      :attribute b: the second input to the adder
      :attribute output: the sum output
-    :attribute partition_points: the input partition points. Modification not
+    :attribute part_pts: the input partition points. Modification not
          supported, except for by ``Signal.eq``.
      """
  
-    def __init__(self, width, partition_points, partition_step=1):
+    def __init__(self, width, part_pts, partition_step=1):
          """Create a ``PartitionedAdder``.
  
          :param width: the bit width of the input and output
-        :param partition_points: the input partition points
+        :param part_pts: the input partition points
          :param partition_step: a multiplier (typically double) step
                                 which in-place "expands" the partition points
          """
          self.width = width
          self.pmul = partition_step
+        self.part_pts = PartitionPoints(part_pts)
          self.a = Signal(width, reset_less=True)
          self.b = Signal(width, reset_less=True)
+        self.carry_in = Signal(self.part_pts.get_max_partition_count(width))
+        self.carry_out = Signal(self.part_pts.get_max_partition_count(width))
          self.output = Signal(width, reset_less=True)
-        self.partition_points = PartitionPoints(partition_points)
-        if not self.partition_points.fits_in_width(width):
+        if not self.part_pts.fits_in_width(width):
              raise ValueError("partition_points doesn't fit in width")
-        expanded_width = 0
+        expanded_width = 2
          for i in range(self.width):
-            if i in self.partition_points:
+            if i in self.part_pts:
                  expanded_width += 1
              expanded_width += 1
          self._expanded_width = expanded_width
@@ -162,13 +193,17 @@ class PartitionedAdder(Elaboratable):
          """Elaborate this module."""
          m = Module()
          comb = m.d.comb
+
+        carry_tmp = Signal(self.carry_out.width)
+        m.submodules.ripple = ripple = MoveMSBDown(self.carry_out.width)
+
          expanded_a = Signal(self._expanded_width, reset_less=True)
          expanded_b = Signal(self._expanded_width, reset_less=True)
          expanded_o = Signal(self._expanded_width, reset_less=True)
  
          expanded_index = 0
          # store bits in a list, use Cat later.  graphviz is much cleaner
-        al, bl, ol, ea, eb, eo = [],[],[],[],[],[]
+        al, bl, ol, cl, ea, eb, eo, co = [], [], [], [], [], [], [], []
  
          # partition points are "breaks" (extra zeros or 1s) in what would
          # otherwise be a massive long add.  when the "break" points are 0,
@@ -178,16 +213,32 @@ class PartitionedAdder(Elaboratable):
          # however by that time we've got the effect that we wanted: the
          # carry has been carried *over* the break point.
  
+        carry_bit = 0
+        al.append(self.carry_in[carry_bit])
+        bl.append(self.carry_in[carry_bit])
+        ea.append(expanded_a[expanded_index])
+        eb.append(expanded_b[expanded_index])
+        carry_bit += 1
+        expanded_index += 1
+
          for i in range(self.width):
-            pi = i/self.pmul # double the range of the partition point test
-            if pi.is_integer() and pi in self.partition_points:
-                # add extra bit set to 0 + 0 for enabled partition points
+            pi = i/self.pmul  # double the range of the partition point test
+            if pi.is_integer() and pi in self.part_pts:
+                # add extra bit set to carry + carry for enabled
+                # partition points
+                a_bit = Signal(name="a_bit_%d" % i, reset_less=True)
+                carry_in = self.carry_in[carry_bit]  # convenience
+                m.d.comb += a_bit.eq(self.part_pts[pi].implies(carry_in))
+
                  # and 1 + 0 for disabled partition points
                  ea.append(expanded_a[expanded_index])
-                al.append(~self.partition_points[pi]) # add extra bit in a
+                al.append(a_bit)  # add extra bit in a
                  eb.append(expanded_b[expanded_index])
-                bl.append(C(0)) # yes, add a zero
-                expanded_index += 1 # skip the extra point.  NOT in the output
+                bl.append(carry_in & self.part_pts[pi])  # carry bit
+                co.append(expanded_o[expanded_index])
+                cl.append(carry_tmp[carry_bit-1])
+                expanded_index += 1  # skip the extra point.  NOT in the output
+                carry_bit += 1
              ea.append(expanded_a[expanded_index])
              eb.append(expanded_b[expanded_index])
              eo.append(expanded_o[expanded_index])
@@ -195,16 +246,23 @@ class PartitionedAdder(Elaboratable):
              bl.append(self.b[i])
              ol.append(self.output[i])
              expanded_index += 1
+        al.append(0)
+        bl.append(0)
+        co.append(expanded_o[-1])
+        cl.append(carry_tmp[carry_bit-1])
  
          # combine above using Cat
          comb += Cat(*ea).eq(Cat(*al))
          comb += Cat(*eb).eq(Cat(*bl))
          comb += Cat(*ol).eq(Cat(*eo))
+        comb += Cat(*cl).eq(Cat(*co))
  
          # use only one addition to take advantage of look-ahead carry and
          # special hardware on FPGAs
          comb += expanded_o.eq(expanded_a + expanded_b)
  
-        return m
-
+        comb += ripple.results_in.eq(carry_tmp)
+        comb += ripple.gates.eq(self.part_pts.as_sig())
+        comb += self.carry_out.eq(ripple.output)
  
+        return m