From: Luke Kenneth Casson Leighton Date: Mon, 18 May 2020 10:11:32 +0000 (+0100) Subject: move countzero to fu/logical X-Git-Tag: div_pipeline~1088 X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=b57cba89a1d0a21fed88d0ecf5e259b063007240;p=soc.git move countzero to fu/logical --- diff --git a/src/soc/fu/countzero/countzero.py b/src/soc/fu/countzero/countzero.py deleted file mode 100644 index bd61f571..00000000 --- a/src/soc/fu/countzero/countzero.py +++ /dev/null @@ -1,136 +0,0 @@ -# https://github.com/antonblanchard/microwatt/blob/master/countzero.vhdl -from nmigen import Memory, Module, Signal, Cat, Elaboratable -from nmigen.hdl.rec import Record, Layout -from nmigen.cli import main - - -def or4(a, b, c, d): - return Cat(a.any(), b.any(), c.any(), d.any()) - - -class IntermediateResult(Record): - def __init__(self, name=None): - layout = (('v16', 15), - ('sel_hi', 2), - ('is_32bit', 1), - ('count_right', 1)) - Record.__init__(self, Layout(layout), name=name) - - -class ZeroCounter(Elaboratable): - def __init__(self): - self.rs_i = Signal(64, reset_less=True) - self.count_right_i = Signal(1, reset_less=True) - self.is_32bit_i = Signal(1, reset_less=True) - self.result_o = Signal(64, reset_less=True) - - def ports(self): - return [self.rs_i, self.count_right_i, self.is_32bit_i, self.result_o] - - def elaborate(self, platform): - m = Module() - - # TODO: replace this with m.submodule.pe1 = PriorityEncoder(4) - # m.submodule.pe2 = PriorityEncoder(4) - # m.submodule.pe3 = PriorityEncoder(4) - # etc. - # and where right will assign input to v and !right will assign v[::-1] - # so as to reverse the order of the input bits. - - def encoder(v, right): - """ - Return the index of the leftmost or rightmost 1 in a set of 4 bits. - Assumes v is not "0000"; if it is, return (right ? "11" : "00"). - """ - ret = Signal(2, reset_less=True) - with m.If(right): - with m.If(v[0]): - m.d.comb += ret.eq(0) - with m.Elif(v[1]): - m.d.comb += ret.eq(1) - with m.Elif(v[2]): - m.d.comb += ret.eq(2) - with m.Else(): - m.d.comb += ret.eq(3) - with m.Else(): - with m.If(v[3]): - m.d.comb += ret.eq(3) - with m.Elif(v[2]): - m.d.comb += ret.eq(2) - with m.Elif(v[1]): - m.d.comb += ret.eq(1) - with m.Else(): - m.d.comb += ret.eq(0) - return ret - - r = IntermediateResult() - r_in = IntermediateResult() - - m.d.comb += r.eq(r_in) # make the module entirely combinatorial for now - - v = IntermediateResult() - y = Signal(4, reset_less=True) - z = Signal(4, reset_less=True) - sel = Signal(6, reset_less=True) - v4 = Signal(4, reset_less=True) - - # Test 4 groups of 16 bits each. - # The top 2 groups are considered to be zero in 32-bit mode. - m.d.comb += z.eq(or4(self.rs_i[0:16], self.rs_i[16:32], - self.rs_i[32:48], self.rs_i[48:64])) - with m.If(self.is_32bit_i): - m.d.comb += v.sel_hi[1].eq(0) - with m.If(self.count_right_i): - m.d.comb += v.sel_hi[0].eq(~z[0]) - with m.Else(): - m.d.comb += v.sel_hi[0].eq(z[1]) - with m.Else(): - m.d.comb += v.sel_hi.eq(encoder(z, self.count_right_i)) - - # Select the leftmost/rightmost non-zero group of 16 bits - with m.Switch(v.sel_hi): - with m.Case(0): - m.d.comb += v.v16.eq(self.rs_i[0:16]) - with m.Case(1): - m.d.comb += v.v16.eq(self.rs_i[16:32]) - with m.Case(2): - m.d.comb += v.v16.eq(self.rs_i[32:48]) - with m.Case(3): - m.d.comb += v.v16.eq(self.rs_i[48:64]) - - # Latch this and do the rest in the next cycle, for the sake of timing - m.d.comb += v.is_32bit.eq(self.is_32bit_i) - m.d.comb += v.count_right.eq(self.count_right_i) - m.d.comb += r_in.eq(v) - m.d.comb += sel[4:6].eq(r.sel_hi) - - # Test 4 groups of 4 bits - m.d.comb += y.eq(or4(r.v16[0:4], r.v16[4:8], - r.v16[8:12], r.v16[12:16])) - m.d.comb += sel[2:4].eq(encoder(y, r.count_right)) - - # Select the leftmost/rightmost non-zero group of 4 bits - with m.Switch(sel[2:4]): - with m.Case(0): - m.d.comb += v4.eq(r.v16[0:4]) - with m.Case(1): - m.d.comb += v4.eq(r.v16[4:8]) - with m.Case(2): - m.d.comb += v4.eq(r.v16[8:12]) - with m.Case(3): - m.d.comb += v4.eq(r.v16[12:16]) - - m.d.comb += sel[0:2].eq(encoder(v4, r.count_right)) - - # sel is now the index of the leftmost/rightmost 1 bit in rs - o = self.result_o - with m.If(v4 == 0): - # operand is zero, return 32 for 32-bit, else 64 - m.d.comb += o[5:7].eq(Cat(r.is_32bit, ~r.is_32bit)) - with m.Elif(r.count_right): - # return (63 - sel), trimmed to 5 bits in 32-bit mode - m.d.comb += o.eq(Cat(~sel[0:5], ~(sel[5] | r.is_32bit))) - with m.Else(): - m.d.comb += o.eq(sel) - - return m diff --git a/src/soc/fu/countzero/test/test_countzero.py b/src/soc/fu/countzero/test/test_countzero.py deleted file mode 100644 index b795920c..00000000 --- a/src/soc/fu/countzero/test/test_countzero.py +++ /dev/null @@ -1,105 +0,0 @@ -# https://github.com/antonblanchard/microwatt/blob/master/countzero_tb.vhdl -from nmigen import Module, Signal -from nmigen.cli import rtlil -from nmigen.back.pysim import Simulator, Delay -from nmigen.test.utils import FHDLTestCase -import unittest -from soc.fu.countzero.countzero import ZeroCounter - - -class ZeroCounterTestCase(FHDLTestCase): - def test_zerocounter(self): - m = Module() - comb = m.d.comb - m.submodules.dut = dut = ZeroCounter() - - sim = Simulator(m) - # sim.add_clock(1e-6) - - def process(): - print("test zero input") - yield dut.rs_i.eq(0) - yield dut.is_32bit_i.eq(0) - yield dut.count_right_i.eq(0) - yield Delay(1e-6) - result = yield dut.result_o - assert result == 0x40 - # report "bad cntlzd 0 = " & to_hstring(result); - assert(result == 0x40) - yield dut.count_right_i.eq(1) - yield Delay(1e-6) - result = yield dut.result_o - # report "bad cntlzd 0 = " & to_hstring(result); - assert(result == 0x40) - yield dut.is_32bit_i.eq(1) - yield dut.count_right_i.eq(0) - yield Delay(1e-6) - result = yield dut.result_o - # report "bad cntlzw 0 = " & to_hstring(result); - assert(result == 0x20) - yield dut.count_right_i.eq(1) - yield Delay(1e-6) - result = yield dut.result_o - # report "bad cntlzw 0 = " & to_hstring(result); - assert(result == 0x20) - # TODO next tests - - yield dut.rs_i.eq(0b00010000) - yield dut.is_32bit_i.eq(0) - yield dut.count_right_i.eq(0) - yield Delay(1e-6) - result = yield dut.result_o - assert result == 4, "result %d" % result - - yield dut.count_right_i.eq(1) - yield Delay(1e-6) - result = yield dut.result_o - assert result == 59, "result %d" % result - - yield dut.is_32bit_i.eq(1) - yield Delay(1e-6) - result = yield dut.result_o - assert result == 27, "result %d" % result - - yield dut.rs_i.eq(0b1100000100000000) - yield dut.is_32bit_i.eq(0) - yield dut.count_right_i.eq(0) - yield Delay(1e-6) - result = yield dut.result_o - assert result == 14, "result %d" % result - - yield dut.count_right_i.eq(1) - yield Delay(1e-6) - result = yield dut.result_o - assert result == 55, "result %d" % result - - yield dut.is_32bit_i.eq(1) - yield Delay(1e-6) - result = yield dut.result_o - assert result == 23, "result %d" % result - - yield dut.count_right_i.eq(0) - yield Delay(1e-6) - result = yield dut.result_o - assert result == 14, "result %d" % result - - - sim.add_process(process) # or sim.add_sync_process(process), see below - - # run test and write vcd - fn = "genullnau" - with sim.write_vcd(fn+".vcd", fn+".gtkw", traces=dut.ports()): - sim.run() - - # cntlzd_w - # cnttzd_w - - -if __name__ == "__main__": - - dut = ZeroCounter() - vl = rtlil.convert(dut, ports=dut.ports()) - with open("countzero.il", "w") as f: - f.write(vl) - - unittest.main() diff --git a/src/soc/fu/logical/countzero.py b/src/soc/fu/logical/countzero.py new file mode 100644 index 00000000..bd61f571 --- /dev/null +++ b/src/soc/fu/logical/countzero.py @@ -0,0 +1,136 @@ +# https://github.com/antonblanchard/microwatt/blob/master/countzero.vhdl +from nmigen import Memory, Module, Signal, Cat, Elaboratable +from nmigen.hdl.rec import Record, Layout +from nmigen.cli import main + + +def or4(a, b, c, d): + return Cat(a.any(), b.any(), c.any(), d.any()) + + +class IntermediateResult(Record): + def __init__(self, name=None): + layout = (('v16', 15), + ('sel_hi', 2), + ('is_32bit', 1), + ('count_right', 1)) + Record.__init__(self, Layout(layout), name=name) + + +class ZeroCounter(Elaboratable): + def __init__(self): + self.rs_i = Signal(64, reset_less=True) + self.count_right_i = Signal(1, reset_less=True) + self.is_32bit_i = Signal(1, reset_less=True) + self.result_o = Signal(64, reset_less=True) + + def ports(self): + return [self.rs_i, self.count_right_i, self.is_32bit_i, self.result_o] + + def elaborate(self, platform): + m = Module() + + # TODO: replace this with m.submodule.pe1 = PriorityEncoder(4) + # m.submodule.pe2 = PriorityEncoder(4) + # m.submodule.pe3 = PriorityEncoder(4) + # etc. + # and where right will assign input to v and !right will assign v[::-1] + # so as to reverse the order of the input bits. + + def encoder(v, right): + """ + Return the index of the leftmost or rightmost 1 in a set of 4 bits. + Assumes v is not "0000"; if it is, return (right ? "11" : "00"). + """ + ret = Signal(2, reset_less=True) + with m.If(right): + with m.If(v[0]): + m.d.comb += ret.eq(0) + with m.Elif(v[1]): + m.d.comb += ret.eq(1) + with m.Elif(v[2]): + m.d.comb += ret.eq(2) + with m.Else(): + m.d.comb += ret.eq(3) + with m.Else(): + with m.If(v[3]): + m.d.comb += ret.eq(3) + with m.Elif(v[2]): + m.d.comb += ret.eq(2) + with m.Elif(v[1]): + m.d.comb += ret.eq(1) + with m.Else(): + m.d.comb += ret.eq(0) + return ret + + r = IntermediateResult() + r_in = IntermediateResult() + + m.d.comb += r.eq(r_in) # make the module entirely combinatorial for now + + v = IntermediateResult() + y = Signal(4, reset_less=True) + z = Signal(4, reset_less=True) + sel = Signal(6, reset_less=True) + v4 = Signal(4, reset_less=True) + + # Test 4 groups of 16 bits each. + # The top 2 groups are considered to be zero in 32-bit mode. + m.d.comb += z.eq(or4(self.rs_i[0:16], self.rs_i[16:32], + self.rs_i[32:48], self.rs_i[48:64])) + with m.If(self.is_32bit_i): + m.d.comb += v.sel_hi[1].eq(0) + with m.If(self.count_right_i): + m.d.comb += v.sel_hi[0].eq(~z[0]) + with m.Else(): + m.d.comb += v.sel_hi[0].eq(z[1]) + with m.Else(): + m.d.comb += v.sel_hi.eq(encoder(z, self.count_right_i)) + + # Select the leftmost/rightmost non-zero group of 16 bits + with m.Switch(v.sel_hi): + with m.Case(0): + m.d.comb += v.v16.eq(self.rs_i[0:16]) + with m.Case(1): + m.d.comb += v.v16.eq(self.rs_i[16:32]) + with m.Case(2): + m.d.comb += v.v16.eq(self.rs_i[32:48]) + with m.Case(3): + m.d.comb += v.v16.eq(self.rs_i[48:64]) + + # Latch this and do the rest in the next cycle, for the sake of timing + m.d.comb += v.is_32bit.eq(self.is_32bit_i) + m.d.comb += v.count_right.eq(self.count_right_i) + m.d.comb += r_in.eq(v) + m.d.comb += sel[4:6].eq(r.sel_hi) + + # Test 4 groups of 4 bits + m.d.comb += y.eq(or4(r.v16[0:4], r.v16[4:8], + r.v16[8:12], r.v16[12:16])) + m.d.comb += sel[2:4].eq(encoder(y, r.count_right)) + + # Select the leftmost/rightmost non-zero group of 4 bits + with m.Switch(sel[2:4]): + with m.Case(0): + m.d.comb += v4.eq(r.v16[0:4]) + with m.Case(1): + m.d.comb += v4.eq(r.v16[4:8]) + with m.Case(2): + m.d.comb += v4.eq(r.v16[8:12]) + with m.Case(3): + m.d.comb += v4.eq(r.v16[12:16]) + + m.d.comb += sel[0:2].eq(encoder(v4, r.count_right)) + + # sel is now the index of the leftmost/rightmost 1 bit in rs + o = self.result_o + with m.If(v4 == 0): + # operand is zero, return 32 for 32-bit, else 64 + m.d.comb += o[5:7].eq(Cat(r.is_32bit, ~r.is_32bit)) + with m.Elif(r.count_right): + # return (63 - sel), trimmed to 5 bits in 32-bit mode + m.d.comb += o.eq(Cat(~sel[0:5], ~(sel[5] | r.is_32bit))) + with m.Else(): + m.d.comb += o.eq(sel) + + return m diff --git a/src/soc/fu/logical/main_stage.py b/src/soc/fu/logical/main_stage.py index 06e1afbc..bb6efaf2 100644 --- a/src/soc/fu/logical/main_stage.py +++ b/src/soc/fu/logical/main_stage.py @@ -11,7 +11,7 @@ from soc.fu.logical.pipe_data import ALUInputData from soc.fu.alu.pipe_data import ALUOutputData from ieee754.part.partsig import PartitionedSignal from soc.decoder.power_enums import InternalOp -from soc.fu.countzero.countzero import ZeroCounter +from soc.fu.logical.countzero import ZeroCounter from soc.decoder.power_fields import DecodeFields from soc.decoder.power_fieldsn import SignalBitRange diff --git a/src/soc/fu/logical/test/test_countzero.py b/src/soc/fu/logical/test/test_countzero.py new file mode 100644 index 00000000..43aae01d --- /dev/null +++ b/src/soc/fu/logical/test/test_countzero.py @@ -0,0 +1,105 @@ +# https://github.com/antonblanchard/microwatt/blob/master/countzero_tb.vhdl +from nmigen import Module, Signal +from nmigen.cli import rtlil +from nmigen.back.pysim import Simulator, Delay +from nmigen.test.utils import FHDLTestCase +import unittest +from soc.fu.logical.countzero import ZeroCounter + + +class ZeroCounterTestCase(FHDLTestCase): + def test_zerocounter(self): + m = Module() + comb = m.d.comb + m.submodules.dut = dut = ZeroCounter() + + sim = Simulator(m) + # sim.add_clock(1e-6) + + def process(): + print("test zero input") + yield dut.rs_i.eq(0) + yield dut.is_32bit_i.eq(0) + yield dut.count_right_i.eq(0) + yield Delay(1e-6) + result = yield dut.result_o + assert result == 0x40 + # report "bad cntlzd 0 = " & to_hstring(result); + assert(result == 0x40) + yield dut.count_right_i.eq(1) + yield Delay(1e-6) + result = yield dut.result_o + # report "bad cntlzd 0 = " & to_hstring(result); + assert(result == 0x40) + yield dut.is_32bit_i.eq(1) + yield dut.count_right_i.eq(0) + yield Delay(1e-6) + result = yield dut.result_o + # report "bad cntlzw 0 = " & to_hstring(result); + assert(result == 0x20) + yield dut.count_right_i.eq(1) + yield Delay(1e-6) + result = yield dut.result_o + # report "bad cntlzw 0 = " & to_hstring(result); + assert(result == 0x20) + # TODO next tests + + yield dut.rs_i.eq(0b00010000) + yield dut.is_32bit_i.eq(0) + yield dut.count_right_i.eq(0) + yield Delay(1e-6) + result = yield dut.result_o + assert result == 4, "result %d" % result + + yield dut.count_right_i.eq(1) + yield Delay(1e-6) + result = yield dut.result_o + assert result == 59, "result %d" % result + + yield dut.is_32bit_i.eq(1) + yield Delay(1e-6) + result = yield dut.result_o + assert result == 27, "result %d" % result + + yield dut.rs_i.eq(0b1100000100000000) + yield dut.is_32bit_i.eq(0) + yield dut.count_right_i.eq(0) + yield Delay(1e-6) + result = yield dut.result_o + assert result == 14, "result %d" % result + + yield dut.count_right_i.eq(1) + yield Delay(1e-6) + result = yield dut.result_o + assert result == 55, "result %d" % result + + yield dut.is_32bit_i.eq(1) + yield Delay(1e-6) + result = yield dut.result_o + assert result == 23, "result %d" % result + + yield dut.count_right_i.eq(0) + yield Delay(1e-6) + result = yield dut.result_o + assert result == 14, "result %d" % result + + + sim.add_process(process) # or sim.add_sync_process(process), see below + + # run test and write vcd + fn = "genullnau" + with sim.write_vcd(fn+".vcd", fn+".gtkw", traces=dut.ports()): + sim.run() + + # cntlzd_w + # cnttzd_w + + +if __name__ == "__main__": + + dut = ZeroCounter() + vl = rtlil.convert(dut, ports=dut.ports()) + with open("countzero.il", "w") as f: + f.write(vl) + + unittest.main()