from nmigen import Signal, Module, Value, Elaboratable, Cat, C, Mux, Repl
from nmigen.hdl.ast import Assign
from abc import ABCMeta, abstractmethod
-from typing import Any, NewType, Union, List, Dict, Iterable, Mapping, Optional
-from typing_extensions import final
from nmigen.cli import main
-PartitionPointsIn = Mapping[int, Union[Value, bool, int]]
-
-class PartitionPoints(Dict[int, Value]):
+class PartitionPoints(dict):
"""Partition points and corresponding ``Value``s.
The points at where an ALU is partitioned along with ``Value``s that
* bits 10 <= ``i`` < 16
"""
- def __init__(self, partition_points: Optional[PartitionPointsIn] = None):
+ def __init__(self, partition_points=None):
"""Create a new ``PartitionPoints``.
:param partition_points: the input partition points to values mapping.
raise ValueError("point must be a non-negative integer")
self[point] = Value.wrap(enabled)
- def like(self,
- name: Optional[str] = None,
- src_loc_at: int = 0) -> 'PartitionPoints':
+ def like(self, name=None, src_loc_at=0):
"""Create a new ``PartitionPoints`` with ``Signal``s for all values.
:param name: the base name for the new ``Signal``s.
retval[point] = Signal(enabled.shape(), name=f"{name}_{point}")
return retval
- def eq(self, rhs: 'PartitionPoints') -> Iterable[Assign]:
+ def eq(self, rhs):
"""Assign ``PartitionPoints`` using ``Signal.eq``."""
if set(self.keys()) != set(rhs.keys()):
raise ValueError("incompatible point set")
for point, enabled in self.items():
yield enabled.eq(rhs[point])
- def as_mask(self, width: int) -> Value:
+ def as_mask(self, width):
"""Create a bit-mask from `self`.
Each bit in the returned mask is clear only if the partition point at
:param width: the bit width of the resulting mask
"""
- bits: List[Union[Value, bool]]
bits = []
for i in range(width):
if i in self:
bits.append(True)
return Cat(*bits)
- def get_max_partition_count(self, width: int) -> int:
+ def get_max_partition_count(self, width):
"""Get the maximum number of partitions.
Gets the number of partitions when all partition points are enabled.
retval += 1
return retval
- def fits_in_width(self, width: int) -> bool:
+ def fits_in_width(self, width):
"""Check if all partition points are smaller than `width`."""
for point in self.keys():
if point >= width:
return True
-@final
class FullAdder(Elaboratable):
"""Full Adder.
:attribute carry: the carry output
"""
- def __init__(self, width: int):
+ def __init__(self, width):
"""Create a ``FullAdder``.
:param width: the bit width of the input and output
self.sum = Signal(width)
self.carry = Signal(width)
- def elaborate(self, platform: Any) -> Module:
+ def elaborate(self, platform):
"""Elaborate this module."""
m = Module()
m.d.comb += self.sum.eq(self.in0 ^ self.in1 ^ self.in2)
return m
-@final
class PartitionedAdder(Elaboratable):
"""Partitioned Adder.
supported, except for by ``Signal.eq``.
"""
- def __init__(self, width: int, partition_points: PartitionPointsIn):
+ def __init__(self, width, partition_points):
"""Create a ``PartitionedAdder``.
:param width: the bit width of the input and output
self._expanded_b = Signal(expanded_width)
self._expanded_output = Signal(expanded_width)
- def elaborate(self, platform: Any) -> Module:
+ def elaborate(self, platform):
"""Elaborate this module."""
m = Module()
expanded_index = 0
+ # store bits in a list, use Cat later. graphviz is much cleaner
+ al = []
+ bl = []
+ ol = []
+ ea = []
+ eb = []
+ eo = []
+ # partition points are "breaks" (extra zeros) in what would otherwise
+ # be a massive long add.
for i in range(self.width):
if i in self.partition_points:
# add extra bit set to 0 + 0 for enabled partition points
# and 1 + 0 for disabled partition points
- m.d.comb += self._expanded_a[expanded_index].eq(
- ~self.partition_points[i])
- m.d.comb += self._expanded_b[expanded_index].eq(0)
+ ea.append(self._expanded_a[expanded_index])
+ al.append(~self.partition_points[i])
+ eb.append(self._expanded_b[expanded_index])
+ bl.append(C(0))
expanded_index += 1
- m.d.comb += self._expanded_a[expanded_index].eq(self.a[i])
- m.d.comb += self._expanded_b[expanded_index].eq(self.b[i])
- m.d.comb += self.output[i].eq(
- self._expanded_output[expanded_index])
+ ea.append(self._expanded_a[expanded_index])
+ al.append(self.a[i])
+ eb.append(self._expanded_b[expanded_index])
+ bl.append(self.b[i])
+ eo.append(self._expanded_output[expanded_index])
+ ol.append(self.output[i])
expanded_index += 1
+ # combine above using Cat
+ m.d.comb += Cat(*ea).eq(Cat(*al))
+ m.d.comb += Cat(*eb).eq(Cat(*bl))
+ m.d.comb += Cat(*eo).eq(Cat(*ol))
# use only one addition to take advantage of look-ahead carry and
# special hardware on FPGAs
m.d.comb += self._expanded_output.eq(
FULL_ADDER_INPUT_COUNT = 3
-@final
class AddReduce(Elaboratable):
"""Add list of numbers together.
supported, except for by ``Signal.eq``.
"""
- def __init__(self,
- inputs: Iterable[Signal],
- output_width: int,
- register_levels: Iterable[int],
- partition_points: PartitionPointsIn):
+ def __init__(self, inputs, output_width, register_levels, partition_points):
"""Create an ``AddReduce``.
:param inputs: input ``Signal``s to be summed.
"not enough adder levels for specified register levels")
@staticmethod
- def get_max_level(input_count: int) -> int:
+ def get_max_level(input_count):
"""Get the maximum level.
All ``register_levels`` must be less than or equal to the maximum
input_count += 2 * len(groups)
retval += 1
- def next_register_levels(self) -> Iterable[int]:
+ def next_register_levels(self):
"""``Iterable`` of ``register_levels`` for next recursive level."""
for level in self.register_levels:
if level > 0:
yield level - 1
@staticmethod
- def full_adder_groups(input_count: int) -> range:
+ def full_adder_groups(input_count):
"""Get ``inputs`` indices for which a full adder should be built."""
return range(0,
input_count - FULL_ADDER_INPUT_COUNT + 1,
FULL_ADDER_INPUT_COUNT)
- def elaborate(self, platform: Any) -> Module:
+ def elaborate(self, platform):
"""Elaborate this module."""
m = Module()
m.d.comb += self.output.eq(adder.output)
return m
# go on to handle recursive case
- intermediate_terms: List[Signal]
intermediate_terms = []
- def add_intermediate_term(value: Value) -> None:
+ def add_intermediate_term(value):
intermediate_term = Signal(
len(self.output),
name=f"intermediate_terms[{len(intermediate_terms)}]")
intermediate_terms.append(intermediate_term)
m.d.comb += intermediate_term.eq(value)
- part_mask = self._reg_partition_points.as_mask(len(self.output))
+ # store mask in intermediary (simplifies graph)
+ part_mask = Signal(len(self.output), reset_less=True)
+ mask = self._reg_partition_points.as_mask(len(self.output))
+ m.d.comb += part_mask.eq(mask)
# create full adders for this recursive level.
# this shrinks N terms to 2 * (N // 3) plus the remainder
instruction.
"""
- def __init__(self, register_levels: Iterable[int] = ()):
+ def __init__(self, register_levels= ()):
self.part_pts = PartitionPoints()
for i in range(8, 64, 8):
self.part_pts[i] = Signal(name=f"part_pts_{i}")
self._not_b_term_64 = Signal(128)
self._neg_lsb_b_term_64 = Signal(128)
- def _part_byte(self, index: int) -> Value:
+ def _part_byte(self, index):
if index == -1 or index == 7:
return C(True, 1)
assert index >= 0 and index < 8
return self.part_pts[index * 8 + 8]
- def elaborate(self, platform: Any) -> Module:
+ def elaborate(self, platform):
m = Module()
for i in range(len(self.part_ops)):
.eq(self._delayed_part_ops[j][i])
for j in range(len(self.register_levels))]
+ def add_intermediate_value(value):
+ intermediate_value = Signal(len(value), reset_less=True)
+ m.d.comb += intermediate_value.eq(value)
+ return intermediate_value
+
for parts, delayed_parts in [(self._part_64, self._delayed_part_64),
(self._part_32, self._delayed_part_32),
(self._part_16, self._delayed_part_16),
(self._part_8, self._delayed_part_8)]:
byte_count = 8 // len(parts)
for i in range(len(parts)):
- value = self._part_byte(i * byte_count - 1)
+ pb = self._part_byte(i * byte_count - 1)
+ value = add_intermediate_value(pb)
for j in range(i * byte_count, (i + 1) * byte_count - 1):
- value &= ~self._part_byte(j)
- value &= self._part_byte((i + 1) * byte_count - 1)
+ pb = add_intermediate_value(~self._part_byte(j))
+ value = add_intermediate_value(value & pb)
+ pb = self._part_byte((i + 1) * byte_count - 1)
+ value = add_intermediate_value(value & pb)
m.d.comb += parts[i].eq(value)
m.d.comb += delayed_parts[0][i].eq(parts[i])
m.d.sync += [delayed_parts[j + 1][i].eq(delayed_parts[j][i])
terms = []
- def add_term(value: Value,
- shift: int = 0,
- enabled: Optional[Value] = None) -> None:
+ def add_term(value, shift=0, enabled=None):
term = Signal(128)
terms.append(term)
if enabled is not None:
byte_width = 8 // len(parts)
bit_width = 8 * byte_width
for i in range(len(parts)):
- b_enabled = parts[i] & self.a[(i + 1) * bit_width - 1] \
+ ae = parts[i] & self.a[(i + 1) * bit_width - 1] \
& self._a_signed[i * byte_width]
- a_enabled = parts[i] & self.b[(i + 1) * bit_width - 1] \
+ be = parts[i] & self.b[(i + 1) * bit_width - 1] \
& self._b_signed[i * byte_width]
+ a_enabled = Signal(name="a_enabled_%d" % i, reset_less=True)
+ b_enabled = Signal(name="b_enabled_%d" % i, reset_less=True)
+ m.d.comb += a_enabled.eq(ae)
+ m.d.comb += b_enabled.eq(be)
# for 8-bit values: form a * 0xFF00 by using -a * 0x100, the
# negation operation is split into a bitwise not and a +1.