+++ /dev/null
-# SPDX-License-Identifier: LGPL-3-or-later
-# Copyright 2022 Jacob Lifshay programmerjake@gmail.com
-
-# Funded by NLnet Assure Programme 2021-02-052, https://nlnet.nl/assure part
-# of Horizon 2020 EU Programme 957073.
-
-""" Carry-less Multiplication.
-
-https://bugs.libre-soc.org/show_bug.cgi?id=784
-"""
-
-from functools import reduce
-from operator import xor
-from nmigen.hdl.ir import Elaboratable
-from nmigen.hdl.ast import Signal, Cat, Repl, Value
-from nmigen.hdl.dsl import Module
-
-
-class BitwiseXorReduce(Elaboratable):
- """Bitwise Xor lots of stuff together by using tree-reduction on each bit.
-
- Properties:
- input_values: tuple[Value, ...]
- input nmigen Values
- output: Signal
- output, set to `input_values[0] ^ input_values[1] ^ input_values[2]...`
- """
-
- def __init__(self, input_values):
- self.input_values = tuple(map(Value.cast, input_values))
- assert len(self.input_values) > 0, "can't xor-reduce nothing"
- self.output = Signal(reduce(xor, self.input_values).shape())
-
- def elaborate(self, platform):
- m = Module()
- # collect inputs into full-width Signals
- inputs = []
- for i, inp_v in enumerate(self.input_values):
- inp = self.output.like(self.output, name=f"input_{i}")
- # sign/zero-extend inp_v to full-width
- m.d.comb += inp.eq(inp_v)
- inputs.append(inp)
- for bit in range(self.output.width):
- # construct a tree-reduction for bit index `bit` of all inputs
- m.d.comb += self.output[bit].eq(Cat(i[bit] for i in inputs).xor())
- return m
-
-
-class CLMulAdd(Elaboratable):
- """Carry-less multiply-add.
-
- Computes:
- ```
- self.output = (clmul(self.factor1, self.factor2) ^ self.terms[0]
- ^ self.terms[1] ^ self.terms[2] ...)
- ```
-
- Properties:
- factor_width: int
- the bit-width of `factor1` and `factor2`
- term_widths: tuple[int, ...]
- the bit-width of each Signal in `terms`
- factor1: Signal of width self.factor_width
- the first input to the carry-less multiplication section
- factor2: Signal of width self.factor_width
- the second input to the carry-less multiplication section
- terms: tuple[Signal, ...]
- inputs to be carry-less added (really XOR)
- output: Signal
- the final output
- """
-
- def __init__(self, factor_width, term_widths=()):
- assert isinstance(factor_width, int) and factor_width >= 1
- self.factor_width = factor_width
- self.term_widths = tuple(map(int, term_widths))
-
- # build Signals
- self.factor1 = Signal(self.factor_width)
- self.factor2 = Signal(self.factor_width)
-
- def terms():
- for i, inp in enumerate(self.term_widths):
- yield Signal(inp, name=f"term_{i}")
- self.terms = tuple(terms())
- self.output = Signal(max((self.factor_width * 2 - 1,
- *self.term_widths)))
-
- def __reduce_inputs(self):
- for shift in range(self.factor_width):
- mask = Repl(self.factor2[shift], self.factor_width)
- yield (self.factor1 & mask) << shift
- yield from self.terms
-
- def elaborate(self, platform):
- m = Module()
- xor_reduce = BitwiseXorReduce(self.__reduce_inputs())
- m.submodules.xor_reduce = xor_reduce
- m.d.comb += self.output.eq(xor_reduce.output)
- return m
+++ /dev/null
-# SPDX-License-Identifier: LGPL-3-or-later
-# Copyright 2022 Jacob Lifshay
-
-# Funded by NLnet Assure Programme 2021-02-052, https://nlnet.nl/assure part
-# of Horizon 2020 EU Programme 957073.
-
-from functools import reduce
-from operator import xor
-import unittest
-from nmigen.hdl.ast import (AnyConst, Assert, Signal, Const, unsigned, signed,
- Mux)
-from nmigen.hdl.dsl import Module
-from nmutil.formaltest import FHDLTestCase
-from nmutil.openpower_sv_bitmanip_in_wiki.clmul import clmul
-from nmutil.clmul import BitwiseXorReduce, CLMulAdd
-from nmigen.sim import Delay
-from nmutil.sim_util import do_sim, hash_256
-
-
-class TestBitwiseXorReduce(FHDLTestCase):
- def tst(self, input_shapes):
- dut = BitwiseXorReduce(Signal(w, name=f"input_{i}")
- for i, w in enumerate(input_shapes))
- self.assertEqual(reduce(xor, dut.input_values).shape(),
- dut.output.shape())
-
- def case(inputs):
- expected = reduce(xor, inputs)
- with self.subTest(inputs=list(map(hex, inputs)),
- expected=hex(expected)):
- for i, inp in enumerate(inputs):
- yield dut.input_values[i].eq(inp)
- yield Delay(1e-6)
- output = yield dut.output
- with self.subTest(output=hex(output)):
- self.assertEqual(expected, output)
-
- def process():
- for i in range(100):
- inputs = []
- for inp in dut.input_values:
- v = hash_256(f"bxorr input {i} {inp.name}")
- inputs.append(Const.normalize(v, inp.shape()))
- yield from case(inputs)
-
- with do_sim(self, dut, [*dut.input_values, dut.output]) as sim:
- sim.add_process(process)
- sim.run()
-
- def tst_formal(self, input_shapes):
- dut = BitwiseXorReduce(Signal(w, name=f"input_{i}")
- for i, w in enumerate(input_shapes))
- m = Module()
- m.submodules.dut = dut
- for i in dut.input_values:
- m.d.comb += i.eq(AnyConst(i.shape()))
- m.d.comb += Assert(dut.output == reduce(xor, dut.input_values))
- self.assertFormal(m)
-
- def test_65_of_u64(self):
- self.tst([64] * 65)
-
- def test_formal_65_of_u64(self):
- self.tst_formal([64] * 65)
-
- def test_5_of_u6(self):
- self.tst([6] * 5)
-
- def test_formal_5_of_u6(self):
- self.tst_formal([6] * 5)
-
- def test_u5_i6_u3_i10(self):
- self.tst([unsigned(5), signed(6), unsigned(3), signed(10)])
-
- def test_formal_u5_i6_u3_i10(self):
- self.tst_formal([unsigned(5), signed(6), unsigned(3), signed(10)])
-
-
-class TestCLMulAdd(FHDLTestCase):
- def tst(self, factor_width, terms_width):
- dut = CLMulAdd(factor_width, terms_width)
- self.assertEqual(dut.output.width,
- max((factor_width * 2 - 1, *terms_width)))
-
- def case(factor1, factor2, terms):
- expected = reduce(xor, terms, clmul(factor1, factor2))
- with self.subTest(factor1=hex(factor1),
- factor2=bin(factor2),
- terms=list(map(hex, terms)),
- expected=hex(expected)):
- yield dut.factor1.eq(factor1)
- yield dut.factor2.eq(factor2)
- for i, term in enumerate(terms):
- yield dut.terms[i].eq(term)
- yield Delay(1e-6)
- output = yield dut.output
- with self.subTest(output=hex(output)):
- self.assertEqual(expected, output)
-
- def process():
- for i in range(100):
- v = hash_256(f"clmuladd term {i} factor1")
- factor1 = Const.normalize(v, unsigned(factor_width))
- v = hash_256(f"clmuladd term {i} factor2")
- factor2 = Const.normalize(v, unsigned(factor_width))
- terms = []
- for j, term_width in enumerate(terms_width):
- v = hash_256(f"clmuladd term {i} {j}")
- terms.append(Const.normalize(v, unsigned(term_width)))
- yield from case(factor1, factor2, terms)
- with do_sim(self, dut, [dut.factor1, dut.factor2, *dut.terms,
- dut.output]) as sim:
- sim.add_process(process)
- sim.run()
-
- def test_4x4(self):
- self.tst(4, ())
-
- def test_4x4_8(self):
- self.tst(4, (8,))
-
- def test_64x64(self):
- self.tst(64, ())
-
- def test_64x64_64(self):
- self.tst(64, (64,))
-
- def test_8x8_16_16_16(self):
- self.tst(8, (16, 16, 16))
-
- def tst_formal(self, factor_width, terms_width):
- dut = CLMulAdd(factor_width, terms_width)
- m = Module()
- m.submodules.dut = dut
- m.d.comb += dut.factor1.eq(AnyConst(factor_width))
- m.d.comb += dut.factor2.eq(AnyConst(factor_width))
- reduce_inputs = []
- for shift in range(factor_width):
- reduce_inputs.append(
- Mux(dut.factor1[shift], dut.factor2 << shift, 0))
- for i in dut.terms:
- m.d.comb += i.eq(AnyConst(i.shape()))
- reduce_inputs.append(i)
- for i in range(len(reduce_inputs)):
- sig = Signal(reduce_inputs[i].shape(), name=f"reduce_input_{i}")
- m.d.comb += sig.eq(reduce_inputs[i])
- reduce_inputs[i] = sig
- expected = Signal(reduce(xor, reduce_inputs).shape())
- m.d.comb += expected.eq(reduce(xor, reduce_inputs))
- m.d.comb += Assert(dut.output == expected)
- self.assertFormal(m)
-
- def test_formal_4x4(self):
- self.tst_formal(4, ())
-
- def test_formal_4x4_8(self):
- self.tst_formal(4, (8,))
-
- def test_formal_64x64(self):
- self.tst_formal(64, ())
-
- def test_formal_64x64_64(self):
- self.tst_formal(64, (64,))
-
- def test_formal_8x8_16_16_16(self):
- self.tst_formal(8, (16, 16, 16))
-
-
-if __name__ == "__main__":
- unittest.main()