1 # SPDX-License-Identifier: LGPL-2.1-or-later
2 # See Notices.txt for copyright information
3 """Integer Multiplication."""
5 from nmigen
import Signal
, Module
, Value
, Elaboratable
, Cat
, C
, Mux
, Repl
6 from nmigen
.hdl
.ast
import Assign
7 from abc
import ABCMeta
, abstractmethod
8 from nmigen
.cli
import main
11 class PartitionPoints(dict):
12 """Partition points and corresponding ``Value``s.
14 The points at where an ALU is partitioned along with ``Value``s that
15 specify if the corresponding partition points are enabled.
17 For example: ``{1: True, 5: True, 10: True}`` with
18 ``width == 16`` specifies that the ALU is split into 4 sections:
21 * bits 5 <= ``i`` < 10
22 * bits 10 <= ``i`` < 16
24 If the partition_points were instead ``{1: True, 5: a, 10: True}``
25 where ``a`` is a 1-bit ``Signal``:
26 * If ``a`` is asserted:
29 * bits 5 <= ``i`` < 10
30 * bits 10 <= ``i`` < 16
33 * bits 1 <= ``i`` < 10
34 * bits 10 <= ``i`` < 16
37 def __init__(self
, partition_points
=None):
38 """Create a new ``PartitionPoints``.
40 :param partition_points: the input partition points to values mapping.
43 if partition_points
is not None:
44 for point
, enabled
in partition_points
.items():
45 if not isinstance(point
, int):
46 raise TypeError("point must be a non-negative integer")
48 raise ValueError("point must be a non-negative integer")
49 self
[point
] = Value
.wrap(enabled
)
51 def like(self
, name
=None, src_loc_at
=0):
52 """Create a new ``PartitionPoints`` with ``Signal``s for all values.
54 :param name: the base name for the new ``Signal``s.
57 name
= Signal(src_loc_at
=1+src_loc_at
).name
# get variable name
58 retval
= PartitionPoints()
59 for point
, enabled
in self
.items():
60 retval
[point
] = Signal(enabled
.shape(), name
=f
"{name}_{point}")
64 """Assign ``PartitionPoints`` using ``Signal.eq``."""
65 if set(self
.keys()) != set(rhs
.keys()):
66 raise ValueError("incompatible point set")
67 for point
, enabled
in self
.items():
68 yield enabled
.eq(rhs
[point
])
70 def as_mask(self
, width
):
71 """Create a bit-mask from `self`.
73 Each bit in the returned mask is clear only if the partition point at
74 the same bit-index is enabled.
76 :param width: the bit width of the resulting mask
79 for i
in range(width
):
86 def get_max_partition_count(self
, width
):
87 """Get the maximum number of partitions.
89 Gets the number of partitions when all partition points are enabled.
92 for point
in self
.keys():
97 def fits_in_width(self
, width
):
98 """Check if all partition points are smaller than `width`."""
99 for point
in self
.keys():
105 class FullAdder(Elaboratable
):
108 :attribute in0: the first input
109 :attribute in1: the second input
110 :attribute in2: the third input
111 :attribute sum: the sum output
112 :attribute carry: the carry output
115 def __init__(self
, width
):
116 """Create a ``FullAdder``.
118 :param width: the bit width of the input and output
120 self
.in0
= Signal(width
)
121 self
.in1
= Signal(width
)
122 self
.in2
= Signal(width
)
123 self
.sum = Signal(width
)
124 self
.carry
= Signal(width
)
126 def elaborate(self
, platform
):
127 """Elaborate this module."""
129 m
.d
.comb
+= self
.sum.eq(self
.in0 ^ self
.in1 ^ self
.in2
)
130 m
.d
.comb
+= self
.carry
.eq((self
.in0
& self
.in1
)
131 |
(self
.in1
& self
.in2
)
132 |
(self
.in2
& self
.in0
))
136 class PartitionedAdder(Elaboratable
):
137 """Partitioned Adder.
139 :attribute width: the bit width of the input and output. Read-only.
140 :attribute a: the first input to the adder
141 :attribute b: the second input to the adder
142 :attribute output: the sum output
143 :attribute partition_points: the input partition points. Modification not
144 supported, except for by ``Signal.eq``.
147 def __init__(self
, width
, partition_points
):
148 """Create a ``PartitionedAdder``.
150 :param width: the bit width of the input and output
151 :param partition_points: the input partition points
154 self
.a
= Signal(width
)
155 self
.b
= Signal(width
)
156 self
.output
= Signal(width
)
157 self
.partition_points
= PartitionPoints(partition_points
)
158 if not self
.partition_points
.fits_in_width(width
):
159 raise ValueError("partition_points doesn't fit in width")
161 for i
in range(self
.width
):
162 if i
in self
.partition_points
:
165 self
._expanded
_width
= expanded_width
166 self
._expanded
_a
= Signal(expanded_width
)
167 self
._expanded
_b
= Signal(expanded_width
)
168 self
._expanded
_output
= Signal(expanded_width
)
170 def elaborate(self
, platform
):
171 """Elaborate this module."""
174 # store bits in a list, use Cat later. graphviz is much cleaner
181 # partition points are "breaks" (extra zeros) in what would otherwise
182 # be a massive long add.
183 for i
in range(self
.width
):
184 if i
in self
.partition_points
:
185 # add extra bit set to 0 + 0 for enabled partition points
186 # and 1 + 0 for disabled partition points
187 ea
.append(self
._expanded
_a
[expanded_index
])
188 al
.append(~self
.partition_points
[i
])
189 eb
.append(self
._expanded
_b
[expanded_index
])
192 ea
.append(self
._expanded
_a
[expanded_index
])
194 eb
.append(self
._expanded
_b
[expanded_index
])
196 eo
.append(self
._expanded
_output
[expanded_index
])
197 ol
.append(self
.output
[i
])
199 # combine above using Cat
200 m
.d
.comb
+= Cat(*ea
).eq(Cat(*al
))
201 m
.d
.comb
+= Cat(*eb
).eq(Cat(*bl
))
202 m
.d
.comb
+= Cat(*ol
).eq(Cat(*eo
))
203 # use only one addition to take advantage of look-ahead carry and
204 # special hardware on FPGAs
205 m
.d
.comb
+= self
._expanded
_output
.eq(
206 self
._expanded
_a
+ self
._expanded
_b
)
210 FULL_ADDER_INPUT_COUNT
= 3
213 class AddReduce(Elaboratable
):
214 """Add list of numbers together.
216 :attribute inputs: input ``Signal``s to be summed. Modification not
217 supported, except for by ``Signal.eq``.
218 :attribute register_levels: List of nesting levels that should have
220 :attribute output: output sum.
221 :attribute partition_points: the input partition points. Modification not
222 supported, except for by ``Signal.eq``.
225 def __init__(self
, inputs
, output_width
, register_levels
, partition_points
):
226 """Create an ``AddReduce``.
228 :param inputs: input ``Signal``s to be summed.
229 :param output_width: bit-width of ``output``.
230 :param register_levels: List of nesting levels that should have
232 :param partition_points: the input partition points.
234 self
.inputs
= list(inputs
)
235 self
._resized
_inputs
= [
236 Signal(output_width
, name
=f
"resized_inputs[{i}]")
237 for i
in range(len(self
.inputs
))]
238 self
.register_levels
= list(register_levels
)
239 self
.output
= Signal(output_width
)
240 self
.partition_points
= PartitionPoints(partition_points
)
241 if not self
.partition_points
.fits_in_width(output_width
):
242 raise ValueError("partition_points doesn't fit in output_width")
243 self
._reg
_partition
_points
= self
.partition_points
.like()
244 max_level
= AddReduce
.get_max_level(len(self
.inputs
))
245 for level
in self
.register_levels
:
246 if level
> max_level
:
248 "not enough adder levels for specified register levels")
251 def get_max_level(input_count
):
252 """Get the maximum level.
254 All ``register_levels`` must be less than or equal to the maximum
259 groups
= AddReduce
.full_adder_groups(input_count
)
262 input_count
%= FULL_ADDER_INPUT_COUNT
263 input_count
+= 2 * len(groups
)
266 def next_register_levels(self
):
267 """``Iterable`` of ``register_levels`` for next recursive level."""
268 for level
in self
.register_levels
:
273 def full_adder_groups(input_count
):
274 """Get ``inputs`` indices for which a full adder should be built."""
276 input_count
- FULL_ADDER_INPUT_COUNT
+ 1,
277 FULL_ADDER_INPUT_COUNT
)
279 def elaborate(self
, platform
):
280 """Elaborate this module."""
283 # resize inputs to correct bit-width and optionally add in
285 resized_input_assignments
= [self
._resized
_inputs
[i
].eq(self
.inputs
[i
])
286 for i
in range(len(self
.inputs
))]
287 if 0 in self
.register_levels
:
288 m
.d
.sync
+= resized_input_assignments
289 m
.d
.sync
+= self
._reg
_partition
_points
.eq(self
.partition_points
)
291 m
.d
.comb
+= resized_input_assignments
292 m
.d
.comb
+= self
._reg
_partition
_points
.eq(self
.partition_points
)
294 groups
= AddReduce
.full_adder_groups(len(self
.inputs
))
295 # if there are no full adders to create, then we handle the base cases
296 # and return, otherwise we go on to the recursive case
298 if len(self
.inputs
) == 0:
299 # use 0 as the default output value
300 m
.d
.comb
+= self
.output
.eq(0)
301 elif len(self
.inputs
) == 1:
302 # handle single input
303 m
.d
.comb
+= self
.output
.eq(self
._resized
_inputs
[0])
305 # base case for adding 2 or more inputs, which get recursively
306 # reduced to 2 inputs
307 assert len(self
.inputs
) == 2
308 adder
= PartitionedAdder(len(self
.output
),
309 self
._reg
_partition
_points
)
310 m
.submodules
.final_adder
= adder
311 m
.d
.comb
+= adder
.a
.eq(self
._resized
_inputs
[0])
312 m
.d
.comb
+= adder
.b
.eq(self
._resized
_inputs
[1])
313 m
.d
.comb
+= self
.output
.eq(adder
.output
)
315 # go on to handle recursive case
316 intermediate_terms
= []
318 def add_intermediate_term(value
):
319 intermediate_term
= Signal(
321 name
=f
"intermediate_terms[{len(intermediate_terms)}]")
322 intermediate_terms
.append(intermediate_term
)
323 m
.d
.comb
+= intermediate_term
.eq(value
)
325 # store mask in intermediary (simplifies graph)
326 part_mask
= Signal(len(self
.output
), reset_less
=True)
327 mask
= self
._reg
_partition
_points
.as_mask(len(self
.output
))
328 m
.d
.comb
+= part_mask
.eq(mask
)
330 # create full adders for this recursive level.
331 # this shrinks N terms to 2 * (N // 3) plus the remainder
333 adder_i
= FullAdder(len(self
.output
))
334 setattr(m
.submodules
, f
"adder_{i}", adder_i
)
335 m
.d
.comb
+= adder_i
.in0
.eq(self
._resized
_inputs
[i
])
336 m
.d
.comb
+= adder_i
.in1
.eq(self
._resized
_inputs
[i
+ 1])
337 m
.d
.comb
+= adder_i
.in2
.eq(self
._resized
_inputs
[i
+ 2])
338 add_intermediate_term(adder_i
.sum)
339 shifted_carry
= adder_i
.carry
<< 1
340 # mask out carry bits to prevent carries between partitions
341 add_intermediate_term((adder_i
.carry
<< 1) & part_mask
)
342 # handle the remaining inputs.
343 if len(self
.inputs
) % FULL_ADDER_INPUT_COUNT
== 1:
344 add_intermediate_term(self
._resized
_inputs
[-1])
345 elif len(self
.inputs
) % FULL_ADDER_INPUT_COUNT
== 2:
346 # Just pass the terms to the next layer, since we wouldn't gain
347 # anything by using a half adder since there would still be 2 terms
348 # and just passing the terms to the next layer saves gates.
349 add_intermediate_term(self
._resized
_inputs
[-2])
350 add_intermediate_term(self
._resized
_inputs
[-1])
352 assert len(self
.inputs
) % FULL_ADDER_INPUT_COUNT
== 0
353 # recursive invocation of ``AddReduce``
354 next_level
= AddReduce(intermediate_terms
,
356 self
.next_register_levels(),
357 self
._reg
_partition
_points
)
358 m
.submodules
.next_level
= next_level
359 m
.d
.comb
+= self
.output
.eq(next_level
.output
)
364 OP_MUL_SIGNED_HIGH
= 1
365 OP_MUL_SIGNED_UNSIGNED_HIGH
= 2 # a is signed, b is unsigned
366 OP_MUL_UNSIGNED_HIGH
= 3
369 def get_term(value
, shift
=0, enabled
=None):
370 if enabled
is not None:
371 value
= Mux(enabled
, value
, 0)
373 value
= Cat(Repl(C(0, 1), shift
), value
)
379 class Term(Elaboratable
):
380 def __init__(self
, width
, twidth
, shift
=0, enabled
=None):
383 self
.enabled
= enabled
384 self
.ti
= Signal(width
, reset_less
=True)
385 self
.term
= Signal(twidth
, reset_less
=True)
387 def elaborate(self
, platform
):
390 m
.d
.comb
+= self
.term
.eq(get_term(self
.ti
, self
.shift
, self
.enabled
))
395 class ProductTerm(Elaboratable
):
396 def __init__(self
, width
, twidth
, pbwid
, a_index
, b_index
):
397 self
.a_index
= a_index
398 self
.b_index
= b_index
399 shift
= 8 * (self
.a_index
+ self
.b_index
)
401 self
.a
= Signal(width
, reset_less
=True)
402 self
.b
= Signal(width
, reset_less
=True)
403 self
.pb_en
= Signal(pbwid
, reset_less
=True)
406 min_index
= min(self
.a_index
, self
.b_index
)
407 max_index
= max(self
.a_index
, self
.b_index
)
408 for i
in range(min_index
, max_index
):
409 tl
.append(self
.pb_en
[i
])
410 name
= "te_%d_%d" % (self
.a_index
, self
.b_index
)
412 term_enabled
= Signal(name
=name
, reset_less
=True)
416 Term
.__init
__(self
, width
*2, twidth
, shift
, term_enabled
)
418 def elaborate(self
, platform
):
420 m
= Term
.elaborate(self
, platform
)
421 if self
.enabled
is not None:
422 m
.d
.comb
+= self
.enabled
.eq(~
(Cat(*self
.tl
).bool()))
423 m
.d
.comb
+= self
.ti
.eq(self
.a
* self
.b
)
428 class Mul8_16_32_64(Elaboratable
):
429 """Signed/Unsigned 8/16/32/64-bit partitioned integer multiplier.
431 Supports partitioning into any combination of 8, 16, 32, and 64-bit
432 partitions on naturally-aligned boundaries. Supports the operation being
433 set for each partition independently.
435 :attribute part_pts: the input partition points. Has a partition point at
436 multiples of 8 in 0 < i < 64. Each partition point's associated
437 ``Value`` is a ``Signal``. Modification not supported, except for by
439 :attribute part_ops: the operation for each byte. The operation for a
440 particular partition is selected by assigning the selected operation
441 code to each byte in the partition. The allowed operation codes are:
443 :attribute OP_MUL_LOW: the LSB half of the product. Equivalent to
444 RISC-V's `mul` instruction.
445 :attribute OP_MUL_SIGNED_HIGH: the MSB half of the product where both
446 ``a`` and ``b`` are signed. Equivalent to RISC-V's `mulh`
448 :attribute OP_MUL_SIGNED_UNSIGNED_HIGH: the MSB half of the product
449 where ``a`` is signed and ``b`` is unsigned. Equivalent to RISC-V's
450 `mulhsu` instruction.
451 :attribute OP_MUL_UNSIGNED_HIGH: the MSB half of the product where both
452 ``a`` and ``b`` are unsigned. Equivalent to RISC-V's `mulhu`
456 def __init__(self
, register_levels
= ()):
457 self
.part_pts
= PartitionPoints()
458 for i
in range(8, 64, 8):
459 self
.part_pts
[i
] = Signal(name
=f
"part_pts_{i}")
460 self
.part_ops
= [Signal(2, name
=f
"part_ops_{i}") for i
in range(8)]
463 self
.output
= Signal(64)
464 self
.register_levels
= list(register_levels
)
465 self
._intermediate
_output
= Signal(128)
466 self
._delayed
_part
_ops
= [
467 [Signal(2, name
=f
"_delayed_part_ops_{delay}_{i}")
469 for delay
in range(1 + len(self
.register_levels
))]
470 self
._part
_8 = [Signal(name
=f
"_part_8_{i}") for i
in range(8)]
471 self
._part
_16 = [Signal(name
=f
"_part_16_{i}") for i
in range(4)]
472 self
._part
_32 = [Signal(name
=f
"_part_32_{i}") for i
in range(2)]
473 self
._part
_64 = [Signal(name
=f
"_part_64")]
474 self
._delayed
_part
_8 = [
475 [Signal(name
=f
"_delayed_part_8_{delay}_{i}")
477 for delay
in range(1 + len(self
.register_levels
))]
478 self
._delayed
_part
_16 = [
479 [Signal(name
=f
"_delayed_part_16_{delay}_{i}")
481 for delay
in range(1 + len(self
.register_levels
))]
482 self
._delayed
_part
_32 = [
483 [Signal(name
=f
"_delayed_part_32_{delay}_{i}")
485 for delay
in range(1 + len(self
.register_levels
))]
486 self
._delayed
_part
_64 = [
487 [Signal(name
=f
"_delayed_part_64_{delay}")]
488 for delay
in range(1 + len(self
.register_levels
))]
489 self
._output
_64 = Signal(64)
490 self
._output
_32 = Signal(64)
491 self
._output
_16 = Signal(64)
492 self
._output
_8 = Signal(64)
493 self
._a
_signed
= [Signal(name
=f
"_a_signed_{i}") for i
in range(8)]
494 self
._b
_signed
= [Signal(name
=f
"_b_signed_{i}") for i
in range(8)]
495 self
._not
_a
_term
_8 = Signal(128)
496 self
._neg
_lsb
_a
_term
_8 = Signal(128)
497 self
._not
_b
_term
_8 = Signal(128)
498 self
._neg
_lsb
_b
_term
_8 = Signal(128)
499 self
._not
_a
_term
_16 = Signal(128)
500 self
._neg
_lsb
_a
_term
_16 = Signal(128)
501 self
._not
_b
_term
_16 = Signal(128)
502 self
._neg
_lsb
_b
_term
_16 = Signal(128)
503 self
._not
_a
_term
_32 = Signal(128)
504 self
._neg
_lsb
_a
_term
_32 = Signal(128)
505 self
._not
_b
_term
_32 = Signal(128)
506 self
._neg
_lsb
_b
_term
_32 = Signal(128)
507 self
._not
_a
_term
_64 = Signal(128)
508 self
._neg
_lsb
_a
_term
_64 = Signal(128)
509 self
._not
_b
_term
_64 = Signal(128)
510 self
._neg
_lsb
_b
_term
_64 = Signal(128)
512 def _part_byte(self
, index
):
513 if index
== -1 or index
== 7:
515 assert index
>= 0 and index
< 8
516 return self
.part_pts
[index
* 8 + 8]
518 def elaborate(self
, platform
):
522 pbs
= Signal(8, reset_less
=True)
525 pb
= Signal(name
="pb%d" % i
, reset_less
=True)
526 m
.d
.comb
+= pb
.eq(self
._part
_byte
(i
))
528 m
.d
.comb
+= pbs
.eq(Cat(*tl
))
530 for i
in range(len(self
.part_ops
)):
531 m
.d
.comb
+= self
._delayed
_part
_ops
[0][i
].eq(self
.part_ops
[i
])
532 m
.d
.sync
+= [self
._delayed
_part
_ops
[j
+ 1][i
]
533 .eq(self
._delayed
_part
_ops
[j
][i
])
534 for j
in range(len(self
.register_levels
))]
536 for parts
, delayed_parts
in [(self
._part
_64, self
._delayed
_part
_64),
537 (self
._part
_32, self
._delayed
_part
_32),
538 (self
._part
_16, self
._delayed
_part
_16),
539 (self
._part
_8, self
._delayed
_part
_8)]:
540 byte_count
= 8 // len(parts
)
541 for i
in range(len(parts
)):
543 pbl
.append(~pbs
[i
* byte_count
- 1])
544 for j
in range(i
* byte_count
, (i
+ 1) * byte_count
- 1):
546 pbl
.append(~pbs
[(i
+ 1) * byte_count
- 1])
547 value
= Signal(len(pbl
), reset_less
=True)
548 m
.d
.comb
+= value
.eq(Cat(*pbl
))
549 m
.d
.comb
+= parts
[i
].eq(~
(value
).bool())
550 m
.d
.comb
+= delayed_parts
[0][i
].eq(parts
[i
])
551 m
.d
.sync
+= [delayed_parts
[j
+ 1][i
].eq(delayed_parts
[j
][i
])
552 for j
in range(len(self
.register_levels
))]
556 for a_index
in range(8):
557 for b_index
in range(8):
558 t
= ProductTerm(8, 128, 8, a_index
, b_index
)
559 setattr(m
.submodules
, "term_%d_%d" % (a_index
, b_index
), t
)
561 m
.d
.comb
+= t
.a
.eq(self
.a
.bit_select(a_index
* 8, 8))
562 m
.d
.comb
+= t
.b
.eq(self
.b
.bit_select(b_index
* 8, 8))
563 m
.d
.comb
+= t
.pb_en
.eq(pbs
)
568 a_signed
= self
.part_ops
[i
] != OP_MUL_UNSIGNED_HIGH
569 b_signed
= (self
.part_ops
[i
] == OP_MUL_LOW
) \
570 |
(self
.part_ops
[i
] == OP_MUL_SIGNED_HIGH
)
571 m
.d
.comb
+= self
._a
_signed
[i
].eq(a_signed
)
572 m
.d
.comb
+= self
._b
_signed
[i
].eq(b_signed
)
574 # it's fine to bitwise-or these together since they are never enabled
576 m
.submodules
.nat
= nat
= Term(128, 128)
577 m
.submodules
.nla
= nla
= Term(128, 128)
578 m
.submodules
.nbt
= nbt
= Term(128, 128)
579 m
.submodules
.nlb
= nlb
= Term(128, 128)
580 m
.d
.comb
+= nat
.ti
.eq(self
._not
_a
_term
_8 | self
._not
_a
_term
_16
581 | self
._not
_a
_term
_32 | self
._not
_a
_term
_64)
582 m
.d
.comb
+= nbt
.ti
.eq(self
._not
_b
_term
_8 | self
._not
_b
_term
_16
583 | self
._not
_b
_term
_32 | self
._not
_b
_term
_64)
584 m
.d
.comb
+= nla
.ti
.eq(self
._neg
_lsb
_a
_term
_8 | self
._neg
_lsb
_a
_term
_16
585 | self
._neg
_lsb
_a
_term
_32 | self
._neg
_lsb
_a
_term
_64)
586 m
.d
.comb
+= nlb
.ti
.eq(self
._neg
_lsb
_b
_term
_8 | self
._neg
_lsb
_b
_term
_16
587 | self
._neg
_lsb
_b
_term
_32 | self
._neg
_lsb
_b
_term
_64)
588 terms
.append(nat
.term
)
589 terms
.append(nla
.term
)
590 terms
.append(nbt
.term
)
591 terms
.append(nlb
.term
)
599 self
._neg
_lsb
_a
_term
_8,
601 self
._neg
_lsb
_b
_term
_8,
603 (self
._not
_a
_term
_16,
604 self
._neg
_lsb
_a
_term
_16,
606 self
._neg
_lsb
_b
_term
_16,
608 (self
._not
_a
_term
_32,
609 self
._neg
_lsb
_a
_term
_32,
611 self
._neg
_lsb
_b
_term
_32,
613 (self
._not
_a
_term
_64,
614 self
._neg
_lsb
_a
_term
_64,
616 self
._neg
_lsb
_b
_term
_64,
619 byte_width
= 8 // len(parts
)
620 bit_width
= 8 * byte_width
621 nat
, nbt
, nla
, nlb
= [], [], [], []
622 for i
in range(len(parts
)):
623 be
= parts
[i
] & self
.a
[(i
+ 1) * bit_width
- 1] \
624 & self
._a
_signed
[i
* byte_width
]
625 ae
= parts
[i
] & self
.b
[(i
+ 1) * bit_width
- 1] \
626 & self
._b
_signed
[i
* byte_width
]
627 a_enabled
= Signal(name
="a_en_%d" % i
, reset_less
=True)
628 b_enabled
= Signal(name
="b_en_%d" % i
, reset_less
=True)
629 m
.d
.comb
+= a_enabled
.eq(ae
)
630 m
.d
.comb
+= b_enabled
.eq(be
)
632 # for 8-bit values: form a * 0xFF00 by using -a * 0x100, the
633 # negation operation is split into a bitwise not and a +1.
634 # likewise for 16, 32, and 64-bit values.
635 nat
.append(Mux(a_enabled
,
636 Cat(Repl(0, bit_width
),
637 ~self
.a
.bit_select(bit_width
* i
, bit_width
)),
640 nla
.append(Cat(Repl(0, bit_width
), a_enabled
,
641 Repl(0, bit_width
-1)))
643 nbt
.append(Mux(b_enabled
,
644 Cat(Repl(0, bit_width
),
645 ~self
.b
.bit_select(bit_width
* i
, bit_width
)),
648 nlb
.append(Cat(Repl(0, bit_width
), b_enabled
,
649 Repl(0, bit_width
-1)))
651 m
.d
.comb
+= [not_a_term
.eq(Cat(*nat
)),
652 not_b_term
.eq(Cat(*nbt
)),
653 neg_lsb_a_term
.eq(Cat(*nla
)),
654 neg_lsb_b_term
.eq(Cat(*nlb
)),
657 expanded_part_pts
= PartitionPoints()
658 for i
, v
in self
.part_pts
.items():
659 signal
= Signal(name
=f
"expanded_part_pts_{i*2}", reset_less
=True)
660 expanded_part_pts
[i
* 2] = signal
661 m
.d
.comb
+= signal
.eq(v
)
663 add_reduce
= AddReduce(terms
,
665 self
.register_levels
,
667 m
.submodules
.add_reduce
= add_reduce
668 m
.d
.comb
+= self
._intermediate
_output
.eq(add_reduce
.output
)
669 m
.d
.comb
+= self
._output
_64.eq(
670 Mux(self
._delayed
_part
_ops
[-1][0] == OP_MUL_LOW
,
671 self
._intermediate
_output
.bit_select(0, 64),
672 self
._intermediate
_output
.bit_select(64, 64)))
677 op
= Signal(32, reset_less
=True, name
="op32_%d" % i
)
679 Mux(self
._delayed
_part
_ops
[-1][4 * i
] == OP_MUL_LOW
,
680 self
._intermediate
_output
.bit_select(i
* 64, 32),
681 self
._intermediate
_output
.bit_select(i
* 64 + 32, 32)))
683 m
.d
.comb
+= self
._output
_32.eq(Cat(*ol
))
688 op
= Signal(16, reset_less
=True, name
="op16_%d" % i
)
690 Mux(self
._delayed
_part
_ops
[-1][2 * i
] == OP_MUL_LOW
,
691 self
._intermediate
_output
.bit_select(i
* 32, 16),
692 self
._intermediate
_output
.bit_select(i
* 32 + 16, 16)))
694 m
.d
.comb
+= self
._output
_16.eq(Cat(*ol
))
699 op
= Signal(8, reset_less
=True, name
="op8_%d" % i
)
701 Mux(self
._delayed
_part
_ops
[-1][i
] == OP_MUL_LOW
,
702 self
._intermediate
_output
.bit_select(i
* 16, 8),
703 self
._intermediate
_output
.bit_select(i
* 16 + 8, 8)))
705 m
.d
.comb
+= self
._output
_8.eq(Cat(*ol
))
710 op
= Signal(8, reset_less
=True, name
="op%d" % i
)
712 Mux(self
._delayed
_part
_8[-1][i
]
713 | self
._delayed
_part
_16[-1][i
// 2],
714 Mux(self
._delayed
_part
_8[-1][i
],
715 self
._output
_8.bit_select(i
* 8, 8),
716 self
._output
_16.bit_select(i
* 8, 8)),
717 Mux(self
._delayed
_part
_32[-1][i
// 4],
718 self
._output
_32.bit_select(i
* 8, 8),
719 self
._output
_64.bit_select(i
* 8, 8))))
721 m
.d
.comb
+= self
.output
.eq(Cat(*ol
))
725 if __name__
== "__main__":
729 m
._intermediate
_output
,
732 *m
.part_pts
.values()])