1 # Popcount: a successive (cascading) sum-reduction algorithm starting from
2 # single-bit adds and reducing down to one final answer: the total number of
3 # bits set to "1" in the input.
5 from nmigen
import (Elaboratable
, Module
, Signal
, Cat
, Const
)
8 def array_of(count
, bitwidth
):
10 for i
in range(count
):
11 res
.append(Signal(bitwidth
, reset_less
=True,
12 name
=f
"pop_{bitwidth}_{i}"))
16 class Popcount(Elaboratable
):
18 self
.a
= Signal(64, reset_less
=True)
19 self
.b
= Signal(64, reset_less
=True)
20 self
.data_len
= Signal(64, reset_less
=True)
21 self
.o
= Signal(64, reset_less
=True)
23 def elaborate(self
, platform
):
26 a
, b
, data_len
, o
= self
.a
, self
.b
, self
.data_len
, self
.o
28 # starting from a, perform successive addition-reductions
29 # creating arrays big enough to store the sum, each time
31 # QTY32 2-bit (to take 2x 1-bit sums) etc.
32 work
= [(32, 2), (16, 3), (8, 4), (4, 5), (2, 6), (1, 7)]
33 for l
, bw
in work
: # l=number of add-reductions, bw=bitwidth
34 pc
.append(array_of(l
, bw
))
35 pc8
= pc
[3] # array of 8 8-bit counts (popcntb)
36 pc32
= pc
[5] # array of 2 32-bit counts (popcntw)
37 popcnt
= pc
[-1] # array of 1 64-bit count (popcntd)
38 # cascade-tree of adds
39 for idx
, (l
, bw
) in enumerate(work
):
42 src
, dst
= pc
[idx
], pc
[idx
+1]
43 comb
+= dst
[i
].eq(Cat(src
[stt
], Const(0, 1)) +
44 Cat(src
[end
], Const(0, 1)))
45 # decode operation length (1-hot)
46 with m
.If(data_len
== 1):
47 # popcntb - pack 8x 4-bit answers into 8x 8-bit output fields
49 comb
+= o
[i
*8:(i
+1)*8].eq(pc8
[i
])
50 with m
.Elif(data_len
== 4):
51 # popcntw - pack 2x 5-bit answers into 2x 32-bit output fields
53 comb
+= o
[i
*32:(i
+1)*32].eq(pc32
[i
])
55 # popcntd - put 1x 6-bit answer into 64-bit output
56 comb
+= o
.eq(popcnt
[0])