a_enabled and b_enabled into signals

[ieee754fpu.git] / src / ieee754 / part_mul_add / multiply.py
diff --git a/src/ieee754/part_mul_add/multiply.py b/src/ieee754/part_mul_add/multiply.py

index 5902967c8f95c8f2b0a5f3ec6c64be298a18fb4f..bdbb28cc8d9ec04ab9b226dccb3a4d61934e222e 100644 (file)
--- a/src/ieee754/part_mul_add/multiply.py
+++ b/src/ieee754/part_mul_add/multiply.py
@@ -5,14 +5,10 @@
  from nmigen import Signal, Module, Value, Elaboratable, Cat, C, Mux, Repl
  from nmigen.hdl.ast import Assign
  from abc import ABCMeta, abstractmethod
-from typing import Any, NewType, Union, List, Dict, Iterable, Mapping, Optional
-from typing_extensions import final
  from nmigen.cli import main
  
-PartitionPointsIn = Mapping[int, Union[Value, bool, int]]
  
-
-class PartitionPoints(Dict[int, Value]):
+class PartitionPoints(dict):
      """Partition points and corresponding ``Value``s.
  
      The points at where an ALU is partitioned along with ``Value``s that
@@ -38,7 +34,7 @@ class PartitionPoints(Dict[int, Value]):
          * bits 10 <= ``i`` < 16
      """
  
-    def __init__(self, partition_points: Optional[PartitionPointsIn] = None):
+    def __init__(self, partition_points=None):
          """Create a new ``PartitionPoints``.
  
          :param partition_points: the input partition points to values mapping.
@@ -52,9 +48,7 @@ class PartitionPoints(Dict[int, Value]):
                      raise ValueError("point must be a non-negative integer")
                  self[point] = Value.wrap(enabled)
  
-    def like(self,
-             name: Optional[str] = None,
-             src_loc_at: int = 0) -> 'PartitionPoints':
+    def like(self, name=None, src_loc_at=0):
          """Create a new ``PartitionPoints`` with ``Signal``s for all values.
  
          :param name: the base name for the new ``Signal``s.
@@ -66,14 +60,14 @@ class PartitionPoints(Dict[int, Value]):
              retval[point] = Signal(enabled.shape(), name=f"{name}_{point}")
          return retval
  
-    def eq(self, rhs: 'PartitionPoints') -> Iterable[Assign]:
+    def eq(self, rhs):
          """Assign ``PartitionPoints`` using ``Signal.eq``."""
          if set(self.keys()) != set(rhs.keys()):
              raise ValueError("incompatible point set")
          for point, enabled in self.items():
              yield enabled.eq(rhs[point])
  
-    def as_mask(self, width: int) -> Value:
+    def as_mask(self, width):
          """Create a bit-mask from `self`.
  
          Each bit in the returned mask is clear only if the partition point at
@@ -81,7 +75,6 @@ class PartitionPoints(Dict[int, Value]):
  
          :param width: the bit width of the resulting mask
          """
-        bits: List[Union[Value, bool]]
          bits = []
          for i in range(width):
              if i in self:
@@ -90,7 +83,7 @@ class PartitionPoints(Dict[int, Value]):
                  bits.append(True)
          return Cat(*bits)
  
-    def get_max_partition_count(self, width: int) -> int:
+    def get_max_partition_count(self, width):
          """Get the maximum number of partitions.
  
          Gets the number of partitions when all partition points are enabled.
@@ -101,7 +94,7 @@ class PartitionPoints(Dict[int, Value]):
                  retval += 1
          return retval
  
-    def fits_in_width(self, width: int) -> bool:
+    def fits_in_width(self, width):
          """Check if all partition points are smaller than `width`."""
          for point in self.keys():
              if point >= width:
@@ -109,7 +102,6 @@ class PartitionPoints(Dict[int, Value]):
          return True
  
  
-@final
  class FullAdder(Elaboratable):
      """Full Adder.
  
@@ -120,7 +112,7 @@ class FullAdder(Elaboratable):
      :attribute carry: the carry output
      """
  
-    def __init__(self, width: int):
+    def __init__(self, width):
          """Create a ``FullAdder``.
  
          :param width: the bit width of the input and output
@@ -131,7 +123,7 @@ class FullAdder(Elaboratable):
          self.sum = Signal(width)
          self.carry = Signal(width)
  
-    def elaborate(self, platform: Any) -> Module:
+    def elaborate(self, platform):
          """Elaborate this module."""
          m = Module()
          m.d.comb += self.sum.eq(self.in0 ^ self.in1 ^ self.in2)
@@ -141,7 +133,6 @@ class FullAdder(Elaboratable):
          return m
  
  
-@final
  class PartitionedAdder(Elaboratable):
      """Partitioned Adder.
  
@@ -153,7 +144,7 @@ class PartitionedAdder(Elaboratable):
          supported, except for by ``Signal.eq``.
      """
  
-    def __init__(self, width: int, partition_points: PartitionPointsIn):
+    def __init__(self, width, partition_points):
          """Create a ``PartitionedAdder``.
  
          :param width: the bit width of the input and output
@@ -176,23 +167,39 @@ class PartitionedAdder(Elaboratable):
          self._expanded_b = Signal(expanded_width)
          self._expanded_output = Signal(expanded_width)
  
-    def elaborate(self, platform: Any) -> Module:
+    def elaborate(self, platform):
          """Elaborate this module."""
          m = Module()
          expanded_index = 0
+        # store bits in a list, use Cat later.  graphviz is much cleaner
+        al = []
+        bl = []
+        ol = []
+        ea = []
+        eb = []
+        eo = []
+        # partition points are "breaks" (extra zeros) in what would otherwise
+        # be a massive long add.
          for i in range(self.width):
              if i in self.partition_points:
                  # add extra bit set to 0 + 0 for enabled partition points
                  # and 1 + 0 for disabled partition points
-                m.d.comb += self._expanded_a[expanded_index].eq(
-                    ~self.partition_points[i])
-                m.d.comb += self._expanded_b[expanded_index].eq(0)
+                ea.append(self._expanded_a[expanded_index])
+                al.append(~self.partition_points[i])
+                eb.append(self._expanded_b[expanded_index])
+                bl.append(C(0))
                  expanded_index += 1
-            m.d.comb += self._expanded_a[expanded_index].eq(self.a[i])
-            m.d.comb += self._expanded_b[expanded_index].eq(self.b[i])
-            m.d.comb += self.output[i].eq(
-                self._expanded_output[expanded_index])
+            ea.append(self._expanded_a[expanded_index])
+            al.append(self.a[i])
+            eb.append(self._expanded_b[expanded_index])
+            bl.append(self.b[i])
+            eo.append(self._expanded_output[expanded_index])
+            ol.append(self.output[i])
              expanded_index += 1
+        # combine above using Cat
+        m.d.comb += Cat(*ea).eq(Cat(*al))
+        m.d.comb += Cat(*eb).eq(Cat(*bl))
+        m.d.comb += Cat(*eo).eq(Cat(*ol))
          # use only one addition to take advantage of look-ahead carry and
          # special hardware on FPGAs
          m.d.comb += self._expanded_output.eq(
@@ -203,7 +210,6 @@ class PartitionedAdder(Elaboratable):
  FULL_ADDER_INPUT_COUNT = 3
  
  
-@final
  class AddReduce(Elaboratable):
      """Add list of numbers together.
  
@@ -216,11 +222,7 @@ class AddReduce(Elaboratable):
          supported, except for by ``Signal.eq``.
      """
  
-    def __init__(self,
-                 inputs: Iterable[Signal],
-                 output_width: int,
-                 register_levels: Iterable[int],
-                 partition_points: PartitionPointsIn):
+    def __init__(self, inputs, output_width, register_levels, partition_points):
          """Create an ``AddReduce``.
  
          :param inputs: input ``Signal``s to be summed.
@@ -246,7 +248,7 @@ class AddReduce(Elaboratable):
                      "not enough adder levels for specified register levels")
  
      @staticmethod
-    def get_max_level(input_count: int) -> int:
+    def get_max_level(input_count):
          """Get the maximum level.
  
          All ``register_levels`` must be less than or equal to the maximum
@@ -261,20 +263,20 @@ class AddReduce(Elaboratable):
              input_count += 2 * len(groups)
              retval += 1
  
-    def next_register_levels(self) -> Iterable[int]:
+    def next_register_levels(self):
          """``Iterable`` of ``register_levels`` for next recursive level."""
          for level in self.register_levels:
              if level > 0:
                  yield level - 1
  
      @staticmethod
-    def full_adder_groups(input_count: int) -> range:
+    def full_adder_groups(input_count):
          """Get ``inputs`` indices for which a full adder should be built."""
          return range(0,
                       input_count - FULL_ADDER_INPUT_COUNT + 1,
                       FULL_ADDER_INPUT_COUNT)
  
-    def elaborate(self, platform: Any) -> Module:
+    def elaborate(self, platform):
          """Elaborate this module."""
          m = Module()
  
@@ -311,17 +313,19 @@ class AddReduce(Elaboratable):
                  m.d.comb += self.output.eq(adder.output)
              return m
          # go on to handle recursive case
-        intermediate_terms: List[Signal]
          intermediate_terms = []
  
-        def add_intermediate_term(value: Value) -> None:
+        def add_intermediate_term(value):
              intermediate_term = Signal(
                  len(self.output),
                  name=f"intermediate_terms[{len(intermediate_terms)}]")
              intermediate_terms.append(intermediate_term)
              m.d.comb += intermediate_term.eq(value)
  
-        part_mask = self._reg_partition_points.as_mask(len(self.output))
+        # store mask in intermediary (simplifies graph)
+        part_mask = Signal(len(self.output), reset_less=True)
+        mask = self._reg_partition_points.as_mask(len(self.output))
+        m.d.comb += part_mask.eq(mask)
  
          # create full adders for this recursive level.
          # this shrinks N terms to 2 * (N // 3) plus the remainder
@@ -390,7 +394,7 @@ class Mul8_16_32_64(Elaboratable):
              instruction.
      """
  
-    def __init__(self, register_levels: Iterable[int] = ()):
+    def __init__(self, register_levels= ()):
          self.part_pts = PartitionPoints()
          for i in range(8, 64, 8):
              self.part_pts[i] = Signal(name=f"part_pts_{i}")
@@ -446,13 +450,13 @@ class Mul8_16_32_64(Elaboratable):
          self._not_b_term_64 = Signal(128)
          self._neg_lsb_b_term_64 = Signal(128)
  
-    def _part_byte(self, index: int) -> Value:
+    def _part_byte(self, index):
          if index == -1 or index == 7:
              return C(True, 1)
          assert index >= 0 and index < 8
          return self.part_pts[index * 8 + 8]
  
-    def elaborate(self, platform: Any) -> Module:
+    def elaborate(self, platform):
          m = Module()
  
          for i in range(len(self.part_ops)):
@@ -461,16 +465,24 @@ class Mul8_16_32_64(Elaboratable):
                           .eq(self._delayed_part_ops[j][i])
                           for j in range(len(self.register_levels))]
  
+        def add_intermediate_value(value):
+            intermediate_value = Signal(len(value), reset_less=True)
+            m.d.comb += intermediate_value.eq(value)
+            return intermediate_value
+
          for parts, delayed_parts in [(self._part_64, self._delayed_part_64),
                                       (self._part_32, self._delayed_part_32),
                                       (self._part_16, self._delayed_part_16),
                                       (self._part_8, self._delayed_part_8)]:
              byte_count = 8 // len(parts)
              for i in range(len(parts)):
-                value = self._part_byte(i * byte_count - 1)
+                pb = self._part_byte(i * byte_count - 1)
+                value = add_intermediate_value(pb)
                  for j in range(i * byte_count, (i + 1) * byte_count - 1):
-                    value &= ~self._part_byte(j)
-                value &= self._part_byte((i + 1) * byte_count - 1)
+                    pb = add_intermediate_value(~self._part_byte(j))
+                    value = add_intermediate_value(value & pb)
+                pb = self._part_byte((i + 1) * byte_count - 1)
+                value = add_intermediate_value(value & pb)
                  m.d.comb += parts[i].eq(value)
                  m.d.comb += delayed_parts[0][i].eq(parts[i])
                  m.d.sync += [delayed_parts[j + 1][i].eq(delayed_parts[j][i])
@@ -489,9 +501,7 @@ class Mul8_16_32_64(Elaboratable):
  
          terms = []
  
-        def add_term(value: Value,
-                     shift: int = 0,
-                     enabled: Optional[Value] = None) -> None:
+        def add_term(value, shift=0, enabled=None):
              term = Signal(128)
              terms.append(term)
              if enabled is not None:
@@ -560,10 +570,14 @@ class Mul8_16_32_64(Elaboratable):
              byte_width = 8 // len(parts)
              bit_width = 8 * byte_width
              for i in range(len(parts)):
-                b_enabled = parts[i] & self.a[(i + 1) * bit_width - 1] \
+                ae = parts[i] & self.a[(i + 1) * bit_width - 1] \
                      & self._a_signed[i * byte_width]
-                a_enabled = parts[i] & self.b[(i + 1) * bit_width - 1] \
+                be = parts[i] & self.b[(i + 1) * bit_width - 1] \
                      & self._b_signed[i * byte_width]
+                a_enabled = Signal(name="a_enabled_%d" % i, reset_less=True)
+                b_enabled = Signal(name="b_enabled_%d" % i, reset_less=True)
+                m.d.comb += a_enabled.eq(ae)
+                m.d.comb += b_enabled.eq(be)
  
                  # for 8-bit values: form a * 0xFF00 by using -a * 0x100, the
                  # negation operation is split into a bitwise not and a +1.