X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fieee754%2Fpart_mul_add%2Fmultiply.py;h=b132de56f0ecc90316b13fd1ee1f8da5b7ea4075;hb=HEAD;hp=215d18c6a1aacce049dd74c6331437c2e74e5853;hpb=674602ad56ad774971c0ce95a878028b65dc176b;p=ieee754fpu.git diff --git a/src/ieee754/part_mul_add/multiply.py b/src/ieee754/part_mul_add/multiply.py index 215d18c6..b132de56 100644 --- a/src/ieee754/part_mul_add/multiply.py +++ b/src/ieee754/part_mul_add/multiply.py @@ -11,316 +11,29 @@ from operator import or_ from ieee754.pipeline import PipelineSpec from nmutil.pipemodbase import PipeModBase - -class PartitionPoints(dict): - """Partition points and corresponding ``Value``s. - - The points at where an ALU is partitioned along with ``Value``s that - specify if the corresponding partition points are enabled. - - For example: ``{1: True, 5: True, 10: True}`` with - ``width == 16`` specifies that the ALU is split into 4 sections: - * bits 0 <= ``i`` < 1 - * bits 1 <= ``i`` < 5 - * bits 5 <= ``i`` < 10 - * bits 10 <= ``i`` < 16 - - If the partition_points were instead ``{1: True, 5: a, 10: True}`` - where ``a`` is a 1-bit ``Signal``: - * If ``a`` is asserted: - * bits 0 <= ``i`` < 1 - * bits 1 <= ``i`` < 5 - * bits 5 <= ``i`` < 10 - * bits 10 <= ``i`` < 16 - * Otherwise - * bits 0 <= ``i`` < 1 - * bits 1 <= ``i`` < 10 - * bits 10 <= ``i`` < 16 - """ - - def __init__(self, partition_points=None): - """Create a new ``PartitionPoints``. - - :param partition_points: the input partition points to values mapping. - """ - super().__init__() - if partition_points is not None: - for point, enabled in partition_points.items(): - if not isinstance(point, int): - raise TypeError("point must be a non-negative integer") - if point < 0: - raise ValueError("point must be a non-negative integer") - self[point] = Value.wrap(enabled) - - def like(self, name=None, src_loc_at=0, mul=1): - """Create a new ``PartitionPoints`` with ``Signal``s for all values. - - :param name: the base name for the new ``Signal``s. - :param mul: a multiplication factor on the indices - """ - if name is None: - name = Signal(src_loc_at=1+src_loc_at).name # get variable name - retval = PartitionPoints() - for point, enabled in self.items(): - point *= mul - retval[point] = Signal(enabled.shape(), name=f"{name}_{point}") - return retval - - def eq(self, rhs): - """Assign ``PartitionPoints`` using ``Signal.eq``.""" - if set(self.keys()) != set(rhs.keys()): - raise ValueError("incompatible point set") - for point, enabled in self.items(): - yield enabled.eq(rhs[point]) - - def as_mask(self, width, mul=1): - """Create a bit-mask from `self`. - - Each bit in the returned mask is clear only if the partition point at - the same bit-index is enabled. - - :param width: the bit width of the resulting mask - :param mul: a "multiplier" which in-place expands the partition points - typically set to "2" when used for multipliers - """ - bits = [] - for i in range(width): - i /= mul - if i.is_integer() and int(i) in self: - bits.append(~self[i]) - else: - bits.append(True) - return Cat(*bits) - - def get_max_partition_count(self, width): - """Get the maximum number of partitions. - - Gets the number of partitions when all partition points are enabled. - """ - retval = 1 - for point in self.keys(): - if point < width: - retval += 1 - return retval - - def fits_in_width(self, width): - """Check if all partition points are smaller than `width`.""" - for point in self.keys(): - if point >= width: - return False - return True - - def part_byte(self, index, mfactor=1): # mfactor used for "expanding" - if index == -1 or index == 7: - return C(True, 1) - assert index >= 0 and index < 8 - return self[(index * 8 + 8)*mfactor] - - -class FullAdder(Elaboratable): - """Full Adder. - - :attribute in0: the first input - :attribute in1: the second input - :attribute in2: the third input - :attribute sum: the sum output - :attribute carry: the carry output - - Rather than do individual full adders (and have an array of them, - which would be very slow to simulate), this module can specify the - bit width of the inputs and outputs: in effect it performs multiple - Full 3-2 Add operations "in parallel". - """ - - def __init__(self, width): - """Create a ``FullAdder``. - - :param width: the bit width of the input and output - """ - self.in0 = Signal(width, reset_less=True) - self.in1 = Signal(width, reset_less=True) - self.in2 = Signal(width, reset_less=True) - self.sum = Signal(width, reset_less=True) - self.carry = Signal(width, reset_less=True) - - def elaborate(self, platform): - """Elaborate this module.""" - m = Module() - m.d.comb += self.sum.eq(self.in0 ^ self.in1 ^ self.in2) - m.d.comb += self.carry.eq((self.in0 & self.in1) - | (self.in1 & self.in2) - | (self.in2 & self.in0)) - return m - - -class MaskedFullAdder(Elaboratable): - """Masked Full Adder. - - :attribute mask: the carry partition mask - :attribute in0: the first input - :attribute in1: the second input - :attribute in2: the third input - :attribute sum: the sum output - :attribute mcarry: the masked carry output - - FullAdders are always used with a "mask" on the output. To keep - the graphviz "clean", this class performs the masking here rather - than inside a large for-loop. - - See the following discussion as to why this is no longer derived - from FullAdder. Each carry is shifted here *before* being ANDed - with the mask, so that an AOI cell may be used (which is more - gate-efficient) - https://en.wikipedia.org/wiki/AND-OR-Invert - https://groups.google.com/d/msg/comp.arch/fcq-GLQqvas/vTxmcA0QAgAJ - """ - - def __init__(self, width): - """Create a ``MaskedFullAdder``. - - :param width: the bit width of the input and output - """ - self.width = width - self.mask = Signal(width, reset_less=True) - self.mcarry = Signal(width, reset_less=True) - self.in0 = Signal(width, reset_less=True) - self.in1 = Signal(width, reset_less=True) - self.in2 = Signal(width, reset_less=True) - self.sum = Signal(width, reset_less=True) - - def elaborate(self, platform): - """Elaborate this module.""" - m = Module() - s1 = Signal(self.width, reset_less=True) - s2 = Signal(self.width, reset_less=True) - s3 = Signal(self.width, reset_less=True) - c1 = Signal(self.width, reset_less=True) - c2 = Signal(self.width, reset_less=True) - c3 = Signal(self.width, reset_less=True) - m.d.comb += self.sum.eq(self.in0 ^ self.in1 ^ self.in2) - m.d.comb += s1.eq(Cat(0, self.in0)) - m.d.comb += s2.eq(Cat(0, self.in1)) - m.d.comb += s3.eq(Cat(0, self.in2)) - m.d.comb += c1.eq(s1 & s2 & self.mask) - m.d.comb += c2.eq(s2 & s3 & self.mask) - m.d.comb += c3.eq(s3 & s1 & self.mask) - m.d.comb += self.mcarry.eq(c1 | c2 | c3) - return m - - -class PartitionedAdder(Elaboratable): - """Partitioned Adder. - - Performs the final add. The partition points are included in the - actual add (in one of the operands only), which causes a carry over - to the next bit. Then the final output *removes* the extra bits from - the result. - - partition: .... P... P... P... P... (32 bits) - a : .... .... .... .... .... (32 bits) - b : .... .... .... .... .... (32 bits) - exp-a : ....P....P....P....P.... (32+4 bits, P=1 if no partition) - exp-b : ....0....0....0....0.... (32 bits plus 4 zeros) - exp-o : ....xN...xN...xN...xN... (32+4 bits - x to be discarded) - o : .... N... N... N... N... (32 bits - x ignored, N is carry-over) - - :attribute width: the bit width of the input and output. Read-only. - :attribute a: the first input to the adder - :attribute b: the second input to the adder - :attribute output: the sum output - :attribute partition_points: the input partition points. Modification not - supported, except for by ``Signal.eq``. - """ - - def __init__(self, width, partition_points, partition_step=1): - """Create a ``PartitionedAdder``. - - :param width: the bit width of the input and output - :param partition_points: the input partition points - :param partition_step: a multiplier (typically double) step - which in-place "expands" the partition points - """ - self.width = width - self.pmul = partition_step - self.a = Signal(width, reset_less=True) - self.b = Signal(width, reset_less=True) - self.output = Signal(width, reset_less=True) - self.partition_points = PartitionPoints(partition_points) - if not self.partition_points.fits_in_width(width): - raise ValueError("partition_points doesn't fit in width") - expanded_width = 0 - for i in range(self.width): - if i in self.partition_points: - expanded_width += 1 - expanded_width += 1 - self._expanded_width = expanded_width - - def elaborate(self, platform): - """Elaborate this module.""" - m = Module() - expanded_a = Signal(self._expanded_width, reset_less=True) - expanded_b = Signal(self._expanded_width, reset_less=True) - expanded_o = Signal(self._expanded_width, reset_less=True) - - expanded_index = 0 - # store bits in a list, use Cat later. graphviz is much cleaner - al, bl, ol, ea, eb, eo = [],[],[],[],[],[] - - # partition points are "breaks" (extra zeros or 1s) in what would - # otherwise be a massive long add. when the "break" points are 0, - # whatever is in it (in the output) is discarded. however when - # there is a "1", it causes a roll-over carry to the *next* bit. - # we still ignore the "break" bit in the [intermediate] output, - # however by that time we've got the effect that we wanted: the - # carry has been carried *over* the break point. - - for i in range(self.width): - pi = i/self.pmul # double the range of the partition point test - if pi.is_integer() and pi in self.partition_points: - # add extra bit set to 0 + 0 for enabled partition points - # and 1 + 0 for disabled partition points - ea.append(expanded_a[expanded_index]) - al.append(~self.partition_points[pi]) # add extra bit in a - eb.append(expanded_b[expanded_index]) - bl.append(C(0)) # yes, add a zero - expanded_index += 1 # skip the extra point. NOT in the output - ea.append(expanded_a[expanded_index]) - eb.append(expanded_b[expanded_index]) - eo.append(expanded_o[expanded_index]) - al.append(self.a[i]) - bl.append(self.b[i]) - ol.append(self.output[i]) - expanded_index += 1 - - # combine above using Cat - m.d.comb += Cat(*ea).eq(Cat(*al)) - m.d.comb += Cat(*eb).eq(Cat(*bl)) - m.d.comb += Cat(*ol).eq(Cat(*eo)) - - # use only one addition to take advantage of look-ahead carry and - # special hardware on FPGAs - m.d.comb += expanded_o.eq(expanded_a + expanded_b) - return m +from ieee754.part_mul_add.partpoints import PartitionPoints +from ieee754.part_mul_add.adder import PartitionedAdder, MaskedFullAdder FULL_ADDER_INPUT_COUNT = 3 + class AddReduceData: def __init__(self, part_pts, n_inputs, output_width, n_parts): self.part_ops = [Signal(2, name=f"part_ops_{i}", reset_less=True) - for i in range(n_parts)] + for i in range(n_parts)] self.terms = [Signal(output_width, name=f"terms_{i}", - reset_less=True) - for i in range(n_inputs)] + reset_less=True) + for i in range(n_inputs)] self.part_pts = part_pts.like() def eq_from(self, part_pts, inputs, part_ops): return [self.part_pts.eq(part_pts)] + \ [self.terms[i].eq(inputs[i]) - for i in range(len(self.terms))] + \ + for i in range(len(self.terms))] + \ [self.part_ops[i].eq(part_ops[i]) - for i in range(len(self.part_ops))] + for i in range(len(self.part_ops))] def eq(self, rhs): return self.eq_from(rhs.part_pts, rhs.terms, rhs.part_ops) @@ -330,7 +43,7 @@ class FinalReduceData: def __init__(self, part_pts, output_width, n_parts): self.part_ops = [Signal(2, name=f"part_ops_{i}", reset_less=True) - for i in range(n_parts)] + for i in range(n_parts)] self.output = Signal(output_width, reset_less=True) self.part_pts = part_pts.like() @@ -338,7 +51,7 @@ class FinalReduceData: return [self.part_pts.eq(part_pts)] + \ [self.output.eq(output)] + \ [self.part_ops[i].eq(part_ops[i]) - for i in range(len(self.part_ops))] + for i in range(len(self.part_ops))] def eq(self, rhs): return self.eq_from(rhs.part_pts, rhs.output, rhs.part_ops) @@ -349,7 +62,7 @@ class FinalAdd(PipeModBase): """ def __init__(self, pspec, lidx, n_inputs, partition_points, - partition_step=1): + partition_step=1): self.lidx = lidx self.partition_step = partition_step self.output_width = pspec.width * 2 @@ -367,7 +80,7 @@ class FinalAdd(PipeModBase): def ospec(self): return FinalReduceData(self.partition_points, - self.output_width, self.n_parts) + self.output_width, self.n_parts) def elaborate(self, platform): """Elaborate this module.""" @@ -411,7 +124,7 @@ class AddReduceSingle(PipeModBase): """ def __init__(self, pspec, lidx, n_inputs, partition_points, - partition_step=1): + partition_step=1): """Create an ``AddReduce``. :param inputs: input ``Signal``s to be summed. @@ -516,7 +229,7 @@ class AddReduceSingle(PipeModBase): # copy reg part points and part ops to output m.d.comb += self.o.part_pts.eq(self.i.part_pts) m.d.comb += [self.o.part_ops[i].eq(self.i.part_ops[i]) - for i in range(len(self.i.part_ops))] + for i in range(len(self.i.part_ops))] # set up the partition mask (for the adders) part_mask = Signal(self.output_width, reset_less=True) @@ -604,7 +317,7 @@ class AddReduce(AddReduceInternal, Elaboratable): """ def __init__(self, inputs, output_width, register_levels, part_pts, - part_ops, partition_step=1): + part_ops, partition_step=1): """Create an ``AddReduce``. :param inputs: input ``Signal``s to be summed. @@ -618,7 +331,7 @@ class AddReduce(AddReduceInternal, Elaboratable): self._part_ops = part_ops n_parts = len(part_ops) self.i = AddReduceData(part_pts, len(inputs), - output_width, n_parts) + output_width, n_parts) AddReduceInternal.__init__(self, pspec, n_inputs, part_pts, partition_step) self.o = FinalReduceData(part_pts, output_width, n_parts) @@ -639,7 +352,8 @@ class AddReduce(AddReduceInternal, Elaboratable): """Elaborate this module.""" m = Module() - m.d.comb += self.i.eq_from(self._part_pts, self._inputs, self._part_ops) + m.d.comb += self.i.eq_from(self._part_pts, + self._inputs, self._part_ops) for i, next_level in enumerate(self.levels): setattr(m.submodules, "next_level%d" % i, next_level) @@ -651,7 +365,7 @@ class AddReduce(AddReduceInternal, Elaboratable): m.d.sync += mcur.i.eq(i) else: m.d.comb += mcur.i.eq(i) - i = mcur.o # for next loop + i = mcur.o # for next loop # output comes from last module m.d.comb += self.o.eq(i) @@ -708,7 +422,7 @@ class ProductTerm(Elaboratable): else: term_enabled = None self.enabled = term_enabled - self.term.name = "term_%d_%d" % (a_index, b_index) # rename + self.term.name = "term_%d_%d" % (a_index, b_index) # rename def elaborate(self, platform): @@ -751,6 +465,7 @@ class ProductTerms(Elaboratable): this class is to be wrapped with a for-loop on the "a" operand. it creates a second-level for-loop on the "b" operand. """ + def __init__(self, width, twidth, pbwid, a_index, blen): self.a_index = a_index self.blen = blen @@ -760,8 +475,8 @@ class ProductTerms(Elaboratable): self.a = Signal(twidth//2, reset_less=True) self.b = Signal(twidth//2, reset_less=True) self.pb_en = Signal(pbwid, reset_less=True) - self.terms = [Signal(twidth, name="term%d"%i, reset_less=True) \ - for i in range(blen)] + self.terms = [Signal(twidth, name="term%d" % i, reset_less=True) + for i in range(blen)] def elaborate(self, platform): @@ -796,7 +511,7 @@ class LSBNegTerm(Elaboratable): m = Module() comb = m.d.comb bit_wid = self.bit_width - ext = Repl(0, bit_wid) # extend output to HI part + ext = Repl(0, bit_wid) # extend output to HI part # determine sign of each incoming number *in this partition* enabled = Signal(reset_less=True) @@ -870,6 +585,7 @@ class Part(Elaboratable): the extra terms - as separate terms - are then thrown at the AddReduce alongside the multiplication part-results. """ + def __init__(self, part_pts, width, n_parts, pbwid): self.pbwid = pbwid @@ -879,14 +595,14 @@ class Part(Elaboratable): self.a = Signal(64, reset_less=True) self.b = Signal(64, reset_less=True) self.a_signed = [Signal(name=f"a_signed_{i}", reset_less=True) - for i in range(8)] + for i in range(8)] self.b_signed = [Signal(name=f"_b_signed_{i}", reset_less=True) - for i in range(8)] + for i in range(8)] self.pbs = Signal(pbwid, reset_less=True) # outputs self.parts = [Signal(name=f"part_{i}", reset_less=True) - for i in range(n_parts)] + for i in range(n_parts)] self.not_a_term = Signal(width, reset_less=True) self.neg_lsb_a_term = Signal(width, reset_less=True) @@ -905,10 +621,10 @@ class Part(Elaboratable): byte_count = 8 // len(parts) not_a_term, neg_lsb_a_term, not_b_term, neg_lsb_b_term = ( - self.not_a_term, self.neg_lsb_a_term, - self.not_b_term, self.neg_lsb_b_term) + self.not_a_term, self.neg_lsb_a_term, + self.not_b_term, self.neg_lsb_b_term) - byte_width = 8 // len(parts) # byte width + byte_width = 8 // len(parts) # byte width bit_wid = 8 * byte_width # bit width nat, nbt, nla, nlb = [], [], [], [] for i in range(len(parts)): @@ -917,8 +633,8 @@ class Part(Elaboratable): setattr(m.submodules, "lnt_%d_a_%d" % (bit_wid, i), pa) m.d.comb += pa.part.eq(parts[i]) m.d.comb += pa.op.eq(self.a.bit_select(bit_wid * i, bit_wid)) - m.d.comb += pa.signed.eq(self.b_signed[i * byte_width]) # yes b - m.d.comb += pa.msb.eq(self.b[(i + 1) * bit_wid - 1]) # really, b + m.d.comb += pa.signed.eq(self.b_signed[i * byte_width]) # yes b + m.d.comb += pa.msb.eq(self.b[(i + 1) * bit_wid - 1]) # really, b nat.append(pa.nt) nla.append(pa.nl) @@ -927,8 +643,8 @@ class Part(Elaboratable): setattr(m.submodules, "lnt_%d_b_%d" % (bit_wid, i), pb) m.d.comb += pb.part.eq(parts[i]) m.d.comb += pb.op.eq(self.b.bit_select(bit_wid * i, bit_wid)) - m.d.comb += pb.signed.eq(self.a_signed[i * byte_width]) # yes a - m.d.comb += pb.msb.eq(self.a[(i + 1) * bit_wid - 1]) # really, a + m.d.comb += pb.signed.eq(self.a_signed[i * byte_width]) # yes a + m.d.comb += pb.msb.eq(self.a[(i + 1) * bit_wid - 1]) # really, a nbt.append(pb.nt) nlb.append(pb.nl) @@ -937,7 +653,7 @@ class Part(Elaboratable): not_b_term.eq(Cat(*nbt)), neg_lsb_a_term.eq(Cat(*nla)), neg_lsb_b_term.eq(Cat(*nlb)), - ] + ] return m @@ -946,11 +662,12 @@ class IntermediateOut(Elaboratable): """ selects the HI/LO part of the multiplication, for a given bit-width the output is also reconstructed in its SIMD (partition) lanes. """ + def __init__(self, width, out_wid, n_parts): self.width = width self.n_parts = n_parts self.part_ops = [Signal(2, name="dpop%d" % i, reset_less=True) - for i in range(8)] + for i in range(8)] self.intermed = Signal(out_wid, reset_less=True) self.output = Signal(out_wid//2, reset_less=True) @@ -979,6 +696,7 @@ class FinalOut(PipeModBase): that some partitions requested 8-bit computation whilst others requested 16 or 32 bit. """ + def __init__(self, pspec, part_pts): self.part_pts = part_pts @@ -1042,9 +760,9 @@ class FinalOut(PipeModBase): m.d.comb += op.eq( Mux(d8[i] | d16[i // 2], Mux(d8[i], i8.bit_select(i * 8, 8), - i16.bit_select(i * 8, 8)), + i16.bit_select(i * 8, 8)), Mux(d32[i // 4], i32.bit_select(i * 8, 8), - i64.bit_select(i * 8, 8)))) + i64.bit_select(i * 8, 8)))) ol.append(op) # create outputs @@ -1057,6 +775,7 @@ class FinalOut(PipeModBase): class OrMod(Elaboratable): """ ORs four values together in a hierarchical tree """ + def __init__(self, wid): self.wid = wid self.orin = [Signal(wid, name="orin%d" % i, reset_less=True) @@ -1090,7 +809,7 @@ class Signs(Elaboratable): asig = self.part_ops != OP_MUL_UNSIGNED_HIGH bsig = (self.part_ops == OP_MUL_LOW) \ - | (self.part_ops == OP_MUL_SIGNED_HIGH) + | (self.part_ops == OP_MUL_SIGNED_HIGH) m.d.comb += self.a_signed.eq(asig) m.d.comb += self.b_signed.eq(bsig) @@ -1101,21 +820,21 @@ class IntermediateData: def __init__(self, part_pts, output_width, n_parts): self.part_ops = [Signal(2, name=f"part_ops_{i}", reset_less=True) - for i in range(n_parts)] + for i in range(n_parts)] self.part_pts = part_pts.like() self.outputs = [Signal(output_width, name="io%d" % i, reset_less=True) - for i in range(4)] + for i in range(4)] # intermediates (needed for unit tests) self.intermediate_output = Signal(output_width) def eq_from(self, part_pts, outputs, intermediate_output, - part_ops): + part_ops): return [self.part_pts.eq(part_pts)] + \ [self.intermediate_output.eq(intermediate_output)] + \ [self.outputs[i].eq(outputs[i]) - for i in range(4)] + \ + for i in range(4)] + \ [self.part_ops[i].eq(part_ops[i]) - for i in range(len(self.part_ops))] + for i in range(len(self.part_ops))] def eq(self, rhs): return self.eq_from(rhs.part_pts, rhs.outputs, @@ -1136,7 +855,7 @@ class InputData: return [self.part_pts.eq(part_pts)] + \ [self.a.eq(a), self.b.eq(b)] + \ [self.part_ops[i].eq(part_ops[i]) - for i in range(len(self.part_ops))] + for i in range(len(self.part_ops))] def eq(self, rhs): return self.eq_from(rhs.part_pts, rhs.a, rhs.b, rhs.part_ops) @@ -1145,7 +864,7 @@ class InputData: class OutputData: def __init__(self): - self.intermediate_output = Signal(128) # needed for unit tests + self.intermediate_output = Signal(128) # needed for unit tests self.output = Signal(64) def eq(self, rhs): @@ -1231,9 +950,9 @@ class AllTerms(PipeModBase): m.submodules.nla_or = nla_or = OrMod(128) m.submodules.nlb_or = nlb_or = OrMod(128) for l, mod in [(nat_l, nat_or), - (nbt_l, nbt_or), - (nla_l, nla_or), - (nlb_l, nlb_or)]: + (nbt_l, nbt_or), + (nla_l, nla_or), + (nlb_l, nlb_or)]: for i in range(len(l)): m.d.comb += mod.orin[i].eq(l[i]) terms.append(mod.orout) @@ -1245,7 +964,7 @@ class AllTerms(PipeModBase): # copy reg part points and part ops to output m.d.comb += self.o.part_pts.eq(eps) m.d.comb += [self.o.part_ops[i].eq(self.i.part_ops[i]) - for i in range(len(self.i.part_ops))] + for i in range(len(self.i.part_ops))] return m @@ -1344,7 +1063,7 @@ class Mul8_16_32_64(Elaboratable): flip-flops are to be inserted. """ - self.id_wid = 0 # num_bits(num_rows) + self.id_wid = 0 # num_bits(num_rows) self.op_wid = 0 self.pspec = PipelineSpec(64, self.id_wid, self.op_wid, n_ops=3) self.pspec.n_parts = 8 @@ -1382,7 +1101,8 @@ class Mul8_16_32_64(Elaboratable): terms = t.o.terms - at = AddReduceInternal(self.pspec, n_inputs, part_pts, partition_step=2) + at = AddReduceInternal(self.pspec, n_inputs, + part_pts, partition_step=2) i = t.o for idx in range(len(at.levels)): @@ -1393,7 +1113,7 @@ class Mul8_16_32_64(Elaboratable): m.d.sync += o.eq(mcur.process(i)) else: m.d.comb += o.eq(mcur.process(i)) - i = o # for next loop + i = o # for next loop interm = Intermediates(self.pspec, part_pts) interm.setup(m, i)