+class InputData:
+
+ def __init__(self):
+ self.a = Signal(64)
+ self.b = Signal(64)
+ self.part_pts = PartitionPoints()
+ for i in range(8, 64, 8):
+ self.part_pts[i] = Signal(name=f"part_pts_{i}")
+ self.part_ops = [Signal(2, name=f"part_ops_{i}") for i in range(8)]
+
+ def eq_from(self, part_pts, a, b, part_ops):
+ return [self.part_pts.eq(part_pts)] + \
+ [self.a.eq(a), self.b.eq(b)] + \
+ [self.part_ops[i].eq(part_ops[i])
+ for i in range(len(self.part_ops))]
+
+ def eq(self, rhs):
+ return self.eq_from(rhs.part_pts, rhs.a, rhs.b, rhs.part_ops)
+
+
+class OutputData:
+
+ def __init__(self):
+ self.intermediate_output = Signal(128) # needed for unit tests
+ self.output = Signal(64)
+
+ def eq(self, rhs):
+ return [self.intermediate_output.eq(rhs.intermediate_output),
+ self.output.eq(rhs.output)]
+
+
+class AllTerms(PipeModBase):
+ """Set of terms to be added together
+ """
+
+ def __init__(self, pspec, n_inputs):
+ """Create an ``AllTerms``.
+ """
+ self.n_inputs = n_inputs
+ self.n_parts = pspec.n_parts
+ self.output_width = pspec.width * 2
+ super().__init__(pspec, "allterms")
+
+ def ispec(self):
+ return InputData()
+
+ def ospec(self):
+ return AddReduceData(self.i.part_pts, self.n_inputs,
+ self.output_width, self.n_parts)
+
+ def elaborate(self, platform):
+ m = Module()
+
+ eps = self.i.part_pts
+
+ # collect part-bytes
+ pbs = Signal(8, reset_less=True)
+ tl = []
+ for i in range(8):
+ pb = Signal(name="pb%d" % i, reset_less=True)
+ m.d.comb += pb.eq(eps.part_byte(i))
+ tl.append(pb)
+ m.d.comb += pbs.eq(Cat(*tl))
+
+ # local variables
+ signs = []
+ for i in range(8):
+ s = Signs()
+ signs.append(s)
+ setattr(m.submodules, "signs%d" % i, s)
+ m.d.comb += s.part_ops.eq(self.i.part_ops[i])
+
+ m.submodules.part_8 = part_8 = Part(eps, 128, 8, 8)
+ m.submodules.part_16 = part_16 = Part(eps, 128, 4, 8)
+ m.submodules.part_32 = part_32 = Part(eps, 128, 2, 8)
+ m.submodules.part_64 = part_64 = Part(eps, 128, 1, 8)
+ nat_l, nbt_l, nla_l, nlb_l = [], [], [], []
+ for mod in [part_8, part_16, part_32, part_64]:
+ m.d.comb += mod.a.eq(self.i.a)
+ m.d.comb += mod.b.eq(self.i.b)
+ for i in range(len(signs)):
+ m.d.comb += mod.a_signed[i].eq(signs[i].a_signed)
+ m.d.comb += mod.b_signed[i].eq(signs[i].b_signed)
+ m.d.comb += mod.pbs.eq(pbs)
+ nat_l.append(mod.not_a_term)
+ nbt_l.append(mod.not_b_term)
+ nla_l.append(mod.neg_lsb_a_term)
+ nlb_l.append(mod.neg_lsb_b_term)
+
+ terms = []
+
+ for a_index in range(8):
+ t = ProductTerms(8, 128, 8, a_index, 8)
+ setattr(m.submodules, "terms_%d" % a_index, t)
+
+ m.d.comb += t.a.eq(self.i.a)
+ m.d.comb += t.b.eq(self.i.b)
+ m.d.comb += t.pb_en.eq(pbs)
+
+ for term in t.terms:
+ terms.append(term)
+
+ # it's fine to bitwise-or data together since they are never enabled
+ # at the same time
+ m.submodules.nat_or = nat_or = OrMod(128)
+ m.submodules.nbt_or = nbt_or = OrMod(128)
+ m.submodules.nla_or = nla_or = OrMod(128)
+ m.submodules.nlb_or = nlb_or = OrMod(128)
+ for l, mod in [(nat_l, nat_or),
+ (nbt_l, nbt_or),
+ (nla_l, nla_or),
+ (nlb_l, nlb_or)]:
+ for i in range(len(l)):
+ m.d.comb += mod.orin[i].eq(l[i])
+ terms.append(mod.orout)
+
+ # copy the intermediate terms to the output
+ for i, value in enumerate(terms):
+ m.d.comb += self.o.terms[i].eq(value)
+
+ # copy reg part points and part ops to output
+ m.d.comb += self.o.part_pts.eq(eps)
+ m.d.comb += [self.o.part_ops[i].eq(self.i.part_ops[i])
+ for i in range(len(self.i.part_ops))]
+
+ return m
+
+
+class Intermediates(PipeModBase):