2 # SPDX-License-Identifier: LGPL-2.1-or-later
3 # See Notices.txt for copyright information
5 from ieee754
.part_mul_add
.multiply
import \
6 (PartitionPoints
, PartitionedAdder
, AddReduce
,
7 Mul8_16_32_64
, OP_MUL_LOW
, OP_MUL_SIGNED_HIGH
,
8 OP_MUL_SIGNED_UNSIGNED_HIGH
, OP_MUL_UNSIGNED_HIGH
)
9 from nmigen
import Signal
, Module
10 from nmigen
.back
.pysim
import Simulator
, Delay
, Tick
, Passive
11 from nmigen
.hdl
.ast
import Assign
, Value
12 from typing
import Any
, Generator
, List
, Union
, Optional
, Tuple
, Iterable
14 from hashlib
import sha256
19 def create_simulator(module
: Any
,
21 test_name
: str) -> Simulator
:
22 return Simulator(module
,
23 vcd_file
=open(test_name
+ ".vcd", "w"),
24 gtkw_file
=open(test_name
+ ".gtkw", "w"),
28 AsyncProcessCommand
= Union
[Delay
, Tick
, Passive
, Assign
, Value
]
29 ProcessCommand
= Optional
[AsyncProcessCommand
]
30 AsyncProcessGenerator
= Generator
[AsyncProcessCommand
, Union
[int, None], None]
31 ProcessGenerator
= Generator
[ProcessCommand
, Union
[int, None], None]
34 class TestPartitionPoints(unittest
.TestCase
):
35 def test(self
) -> None:
39 partition_point_10
= Signal()
40 partition_points
= PartitionPoints({1: True,
42 10: partition_point_10
})
43 module
.d
.comb
+= mask
.eq(partition_points
.as_mask(width
))
44 with
create_simulator(module
,
45 [mask
, partition_point_10
],
46 "partition_points") as sim
:
47 def async_process() -> AsyncProcessGenerator
:
48 self
.assertEqual((yield partition_points
[1]), True)
49 self
.assertEqual((yield partition_points
[5]), False)
50 yield partition_point_10
.eq(0)
52 self
.assertEqual((yield mask
), 0xFFFD)
53 yield partition_point_10
.eq(1)
55 self
.assertEqual((yield mask
), 0xFBFD)
57 sim
.add_process(async_process
)
61 class TestPartitionedAdder(unittest
.TestCase
):
62 def test(self
) -> None:
64 partition_nibbles
= Signal()
65 partition_bytes
= Signal()
66 module
= PartitionedAdder(width
,
67 {0x4: partition_nibbles
,
68 0x8: partition_bytes | partition_nibbles
,
69 0xC: partition_nibbles
})
70 with
create_simulator(module
,
76 "partitioned_adder") as sim
:
77 def async_process() -> AsyncProcessGenerator
:
78 def test_add(msg_prefix
: str,
79 *mask_list
: Tuple
[int, ...]) -> Any
:
80 for a
, b
in [(0x0000, 0x0000),
91 for mask
in mask_list
:
92 y |
= mask
& ((a
& mask
) + (b
& mask
))
93 output
= (yield module
.output
)
94 msg
= f
"{msg_prefix}: 0x{a:X} + 0x{b:X}" + \
95 f
" => 0x{y:X} != 0x{output:X}"
96 self
.assertEqual(y
, output
, msg
)
97 yield partition_nibbles
.eq(0)
98 yield partition_bytes
.eq(0)
99 yield from test_add("16-bit", 0xFFFF)
100 yield partition_nibbles
.eq(0)
101 yield partition_bytes
.eq(1)
102 yield from test_add("8-bit", 0xFF00, 0x00FF)
103 yield partition_nibbles
.eq(1)
104 yield partition_bytes
.eq(0)
105 yield from test_add("4-bit", 0xF000, 0x0F00, 0x00F0, 0x000F)
107 sim
.add_process(async_process
)
111 class GenOrCheck(enum
.Enum
):
112 Generate
= enum
.auto()
116 class TestAddReduce(unittest
.TestCase
):
117 def calculate_input_values(self
,
120 extra_keys
: List
[int] = []
121 ) -> (List
[int], List
[str]):
123 input_values_str
= []
124 for i
in range(input_count
):
132 hash_input
= f
"{input_count} {i} {key} {extra_keys}"
133 hash = sha256(hash_input
.encode()).digest()
134 value
= int.from_bytes(hash, byteorder
="little")
136 input_values
.append(value
)
137 input_values_str
.append(f
"0x{value:04X}")
138 return input_values
, input_values_str
140 def subtest_value(self
,
141 inputs
: List
[Signal
],
143 mask_list
: List
[int],
144 gen_or_check
: GenOrCheck
,
145 values
: List
[int]) -> AsyncProcessGenerator
:
146 if gen_or_check
== GenOrCheck
.Generate
:
147 for i
, v
in zip(inputs
, values
):
151 for mask
in mask_list
:
156 output
= (yield module
.output
)
157 if gen_or_check
== GenOrCheck
.Check
:
158 self
.assertEqual(y
, output
, f
"0x{y:X} != 0x{output:X}")
161 def subtest_key(self
,
163 inputs
: List
[Signal
],
166 mask_list
: List
[int],
167 gen_or_check
: GenOrCheck
) -> AsyncProcessGenerator
:
168 values
, values_str
= self
.calculate_input_values(input_count
, key
)
169 if gen_or_check
== GenOrCheck
.Check
:
170 with self
.subTest(inputs
=values_str
):
171 yield from self
.subtest_value(inputs
,
177 yield from self
.subtest_value(inputs
,
183 def subtest_run_sim(self
,
188 inputs
: List
[Signal
],
190 delay_cycles
: int) -> None:
191 def generic_process(gen_or_check
: GenOrCheck
) -> AsyncProcessGenerator
:
192 for partition_4_value
, partition_8_value
, mask_list
in [
194 (0, 1, [0xFF00, 0x00FF]),
195 (1, 0, [0xFFF0, 0x000F]),
196 (1, 1, [0xFF00, 0x00F0, 0x000F])]:
198 if gen_or_check
== GenOrCheck
.Check
:
199 with self
.subTest(partition_4
=partition_4_value
,
200 partition_8
=partition_8_value
):
201 for key
in range(key_count
):
202 with self
.subTest(key
=key
):
203 yield from self
.subtest_key(input_count
,
210 if gen_or_check
== GenOrCheck
.Generate
:
211 yield partition_4
.eq(partition_4_value
)
212 yield partition_8
.eq(partition_8_value
)
213 for key
in range(key_count
):
214 yield from self
.subtest_key(input_count
,
221 def generate_process() -> AsyncProcessGenerator
:
222 yield from generic_process(GenOrCheck
.Generate
)
224 def check_process() -> AsyncProcessGenerator
:
225 if delay_cycles
!= 0:
226 for _
in range(delay_cycles
):
228 yield from generic_process(GenOrCheck
.Check
)
231 sim
.add_process(generate_process
)
232 sim
.add_process(check_process
)
235 def subtest_file(self
,
237 register_levels
: List
[int]) -> None:
238 max_level
= AddReduce
.get_max_level(input_count
)
239 for level
in register_levels
:
240 if level
> max_level
:
242 partition_4
= Signal()
243 partition_8
= Signal()
244 partition_points
= PartitionPoints()
245 partition_points
[4] = partition_4
246 partition_points
[8] = partition_8
248 inputs
= [Signal(width
, name
=f
"input_{i}")
249 for i
in range(input_count
)]
250 module
= AddReduce(inputs
,
255 file_name
= "add_reduce"
256 if len(register_levels
) != 0:
257 file_name
+= f
"-{'_'.join(map(repr, register_levels))}"
258 file_name
+= f
"-{input_count:02d}"
259 with
create_simulator(module
,
265 self
.subtest_run_sim(input_count
,
271 len(register_levels
))
273 def subtest_register_levels(self
, register_levels
: List
[int]) -> None:
274 for input_count
in range(0, 16):
275 with self
.subTest(input_count
=input_count
,
276 register_levels
=repr(register_levels
)):
277 self
.subtest_file(input_count
, register_levels
)
279 def test_empty(self
) -> None:
280 self
.subtest_register_levels([])
282 def test_0(self
) -> None:
283 self
.subtest_register_levels([0])
285 def test_1(self
) -> None:
286 self
.subtest_register_levels([1])
288 def test_2(self
) -> None:
289 self
.subtest_register_levels([2])
291 def test_3(self
) -> None:
292 self
.subtest_register_levels([3])
294 def test_4(self
) -> None:
295 self
.subtest_register_levels([4])
297 def test_5(self
) -> None:
298 self
.subtest_register_levels([5])
300 def test_0(self
) -> None:
301 self
.subtest_register_levels([0])
303 def test_0_1(self
) -> None:
304 self
.subtest_register_levels([0, 1])
306 def test_0_1_2(self
) -> None:
307 self
.subtest_register_levels([0, 1, 2])
309 def test_0_1_2_3(self
) -> None:
310 self
.subtest_register_levels([0, 1, 2, 3])
312 def test_0_1_2_3_4(self
) -> None:
313 self
.subtest_register_levels([0, 1, 2, 3, 4])
315 def test_0_1_2_3_4_5(self
) -> None:
316 self
.subtest_register_levels([0, 1, 2, 3, 4, 5])
318 def test_0_2(self
) -> None:
319 self
.subtest_register_levels([0, 2])
321 def test_0_3(self
) -> None:
322 self
.subtest_register_levels([0, 3])
324 def test_0_4(self
) -> None:
325 self
.subtest_register_levels([0, 4])
327 def test_0_5(self
) -> None:
328 self
.subtest_register_levels([0, 5])
337 self
.a_signed
= a_signed
338 self
.b_signed
= b_signed
339 self
.bit_width
= bit_width
340 self
.high_half
= high_half
343 return f
"SIMDMulLane({self.a_signed}, {self.b_signed}, " +\
344 f
"{self.bit_width}, {self.high_half})"
347 class TestMul8_16_32_64(unittest
.TestCase
):
349 def simd_mul(a
: int, b
: int, lanes
: List
[SIMDMulLane
]) -> Tuple
[int, int]:
351 intermediate_output
= 0
354 a_signed
= lane
.a_signed
or not lane
.high_half
355 b_signed
= lane
.b_signed
or not lane
.high_half
356 mask
= (1 << lane
.bit_width
) - 1
357 sign_bit
= 1 << (lane
.bit_width
- 1)
358 a_part
= (a
>> shift
) & mask
359 if a_signed
and (a_part
& sign_bit
) != 0:
360 a_part
-= 1 << lane
.bit_width
361 b_part
= (b
>> shift
) & mask
362 if b_signed
and (b_part
& sign_bit
) != 0:
363 b_part
-= 1 << lane
.bit_width
364 value
= a_part
* b_part
365 value
&= (1 << (lane
.bit_width
* 2)) - 1
366 intermediate_output |
= value
<< (shift
* 2)
368 value
>>= lane
.bit_width
370 output |
= value
<< shift
371 shift
+= lane
.bit_width
372 return output
, intermediate_output
375 def get_test_cases(lanes
: List
[SIMDMulLane
],
376 keys
: Iterable
[int]) -> Iterable
[Tuple
[int, int]]:
379 hash_input
= f
"{i} {lanes} {list(keys)}"
380 hash = sha256(hash_input
.encode()).digest()
381 value
= int.from_bytes(hash, byteorder
="little")
382 yield (value
& mask
, value
>> 64)
387 a |
= 1 << (shift
+ lane
.bit_width
- 1)
388 b |
= 1 << (shift
+ lane
.bit_width
- 1)
389 shift
+= lane
.bit_width
392 def test_simd_mul_lane(self
):
393 self
.assertEqual(f
"{SIMDMulLane(True, True, 8, False)}",
394 "SIMDMulLane(True, True, 8, False)")
396 def test_simd_mul(self
):
397 lanes
= [SIMDMulLane(True,
413 a
= 0x0123456789ABCDEF
414 b
= 0xFEDCBA9876543210
415 output
= 0x0121FA00FE1C28FE
416 intermediate_output
= 0x0121FA0023E20B28C94DFE1C280AFEF0
417 self
.assertEqual(self
.simd_mul(a
, b
, lanes
),
418 (output
, intermediate_output
))
419 a
= 0x8123456789ABCDEF
420 b
= 0xFEDCBA9876543210
421 output
= 0x81B39CB4FE1C28FE
422 intermediate_output
= 0x81B39CB423E20B28C94DFE1C280AFEF0
423 self
.assertEqual(self
.simd_mul(a
, b
, lanes
),
424 (output
, intermediate_output
))
426 def test_signed_mul_from_unsigned(self
):
427 for i
in range(0, 0x10):
428 for j
in range(0, 0x10):
429 si
= i
if i
& 8 else i
- 0x10 # signed i
430 sj
= j
if j
& 8 else j
- 0x10 # signed j
434 with self
.subTest(i
=i
, j
=j
, si
=si
, sj
=sj
,
435 mulu
=mulu
, mulsu
=mulsu
, mul
=mul
):
440 self
.assertEqual(mulsu
& 0xFF, mulsu2
& 0xFF)
445 self
.assertEqual(mul
& 0xFF, mul2
& 0xFF)
447 def subtest_value(self
,
450 module
: Mul8_16_32_64
,
451 lanes
: List
[SIMDMulLane
],
452 gen_or_check
: GenOrCheck
) -> AsyncProcessGenerator
:
453 if gen_or_check
== GenOrCheck
.Generate
:
456 output2
, intermediate_output2
= self
.simd_mul(a
, b
, lanes
)
458 if gen_or_check
== GenOrCheck
.Check
:
459 intermediate_output
= (yield module
._intermediate
_output
)
460 self
.assertEqual(intermediate_output
,
461 intermediate_output2
,
462 f
"0x{intermediate_output:X} "
463 + f
"!= 0x{intermediate_output2:X}")
464 output
= (yield module
.output
)
465 self
.assertEqual(output
, output2
, f
"0x{output:X} != 0x{output2:X}")
468 def subtest_lanes_2(self
,
469 lanes
: List
[SIMDMulLane
],
470 module
: Mul8_16_32_64
,
471 gen_or_check
: GenOrCheck
) -> AsyncProcessGenerator
:
478 op
= OP_MUL_SIGNED_HIGH
480 op
= OP_MUL_SIGNED_UNSIGNED_HIGH
482 self
.assertFalse(lane
.b_signed
,
483 "unsigned * signed not supported")
484 op
= OP_MUL_UNSIGNED_HIGH
487 self
.assertEqual(lane
.bit_width
% 8, 0)
488 for i
in range(lane
.bit_width
// 8):
489 if gen_or_check
== GenOrCheck
.Generate
:
490 yield module
.part_ops
[part_index
].eq(op
)
492 for i
in range(lane
.bit_width
// 8 - 1):
493 if gen_or_check
== GenOrCheck
.Generate
:
494 yield module
.part_pts
[bit_index
].eq(0)
496 if bit_index
< 64 and gen_or_check
== GenOrCheck
.Generate
:
497 yield module
.part_pts
[bit_index
].eq(1)
499 self
.assertEqual(part_index
, 8)
500 for a
, b
in self
.get_test_cases(lanes
, ()):
501 if gen_or_check
== GenOrCheck
.Check
:
502 with self
.subTest(a
=f
"{a:X}", b
=f
"{b:X}"):
503 yield from self
.subtest_value(a
, b
, module
, lanes
, gen_or_check
)
505 yield from self
.subtest_value(a
, b
, module
, lanes
, gen_or_check
)
507 def subtest_lanes(self
,
508 lanes
: List
[SIMDMulLane
],
509 module
: Mul8_16_32_64
,
510 gen_or_check
: GenOrCheck
) -> AsyncProcessGenerator
:
511 if gen_or_check
== GenOrCheck
.Check
:
512 with self
.subTest(lanes
=repr(lanes
)):
513 yield from self
.subtest_lanes_2(lanes
, module
, gen_or_check
)
515 yield from self
.subtest_lanes_2(lanes
, module
, gen_or_check
)
517 def subtest_file(self
,
518 register_levels
: List
[int]) -> None:
519 module
= Mul8_16_32_64(register_levels
)
520 file_name
= "mul8_16_32_64"
521 if len(register_levels
) != 0:
522 file_name
+= f
"-{'_'.join(map(repr, register_levels))}"
525 module
._intermediate
_output
,
527 ports
.extend(module
.part_ops
)
528 ports
.extend(module
.part_pts
.values())
529 with
create_simulator(module
, ports
, file_name
) as sim
:
530 def process(gen_or_check
: GenOrCheck
) -> AsyncProcessGenerator
:
531 for a_signed
in False, True:
532 for b_signed
in False, True:
533 if not a_signed
and b_signed
:
535 for high_half
in False, True:
536 if not high_half
and not (a_signed
and b_signed
):
538 yield from self
.subtest_lanes(
539 [SIMDMulLane(a_signed
,
545 yield from self
.subtest_lanes(
546 [SIMDMulLane(a_signed
,
552 yield from self
.subtest_lanes(
553 [SIMDMulLane(a_signed
,
559 yield from self
.subtest_lanes(
560 [SIMDMulLane(a_signed
,
566 yield from self
.subtest_lanes([SIMDMulLane(False,
584 yield from self
.subtest_lanes([SIMDMulLane(True,
602 yield from self
.subtest_lanes([SIMDMulLane(True,
621 def generate_process() -> AsyncProcessGenerator
:
622 yield from process(GenOrCheck
.Generate
)
624 def check_process() -> AsyncProcessGenerator
:
625 if len(register_levels
) != 0:
626 for _
in register_levels
:
628 yield from process(GenOrCheck
.Check
)
631 sim
.add_process(generate_process
)
632 sim
.add_process(check_process
)
635 def subtest_register_levels(self
, register_levels
: List
[int]) -> None:
636 with self
.subTest(register_levels
=repr(register_levels
)):
637 self
.subtest_file(register_levels
)
639 def test_empty(self
) -> None:
640 self
.subtest_register_levels([])
642 def test_0(self
) -> None:
643 self
.subtest_register_levels([0])
645 def test_1(self
) -> None:
646 self
.subtest_register_levels([1])
648 def test_2(self
) -> None:
649 self
.subtest_register_levels([2])
651 def test_3(self
) -> None:
652 self
.subtest_register_levels([3])
654 def test_4(self
) -> None:
655 self
.subtest_register_levels([4])
657 def test_5(self
) -> None:
658 self
.subtest_register_levels([5])
660 def test_6(self
) -> None:
661 self
.subtest_register_levels([6])
663 def test_7(self
) -> None:
664 self
.subtest_register_levels([7])
666 def test_8(self
) -> None:
667 self
.subtest_register_levels([8])
669 def test_9(self
) -> None:
670 self
.subtest_register_levels([9])
672 def test_10(self
) -> None:
673 self
.subtest_register_levels([10])
675 def test_0(self
) -> None:
676 self
.subtest_register_levels([0])
678 def test_0_1(self
) -> None:
679 self
.subtest_register_levels([0, 1])
681 def test_0_1_2(self
) -> None:
682 self
.subtest_register_levels([0, 1, 2])
684 def test_0_1_2_3(self
) -> None:
685 self
.subtest_register_levels([0, 1, 2, 3])
687 def test_0_1_2_3_4(self
) -> None:
688 self
.subtest_register_levels([0, 1, 2, 3, 4])
690 def test_0_1_2_3_4_5(self
) -> None:
691 self
.subtest_register_levels([0, 1, 2, 3, 4, 5])
693 def test_0_1_2_3_4_5_6(self
) -> None:
694 self
.subtest_register_levels([0, 1, 2, 3, 4, 5, 6])
696 def test_0_1_2_3_4_5_6_7(self
) -> None:
697 self
.subtest_register_levels([0, 1, 2, 3, 4, 5, 6, 7])
699 def test_0_1_2_3_4_5_6_7_8(self
) -> None:
700 self
.subtest_register_levels([0, 1, 2, 3, 4, 5, 6, 7, 8])
702 def test_0_1_2_3_4_5_6_7_8_9(self
) -> None:
703 self
.subtest_register_levels([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
705 def test_0_1_2_3_4_5_6_7_8_9_10(self
) -> None:
706 self
.subtest_register_levels([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
708 def test_0_2(self
) -> None:
709 self
.subtest_register_levels([0, 2])
711 def test_0_3(self
) -> None:
712 self
.subtest_register_levels([0, 3])
714 def test_0_4(self
) -> None:
715 self
.subtest_register_levels([0, 4])
717 def test_0_5(self
) -> None:
718 self
.subtest_register_levels([0, 5])
720 def test_0_6(self
) -> None:
721 self
.subtest_register_levels([0, 6])
723 def test_0_7(self
) -> None:
724 self
.subtest_register_levels([0, 7])
726 def test_0_8(self
) -> None:
727 self
.subtest_register_levels([0, 8])
729 def test_0_9(self
) -> None:
730 self
.subtest_register_levels([0, 9])
732 def test_0_10(self
) -> None:
733 self
.subtest_register_levels([0, 10])
735 if __name__
== '__main__':