From a803c8b6d3f864293e9dac770ec913b16c10232e Mon Sep 17 00:00:00 2001 From: lkcl Date: Thu, 30 Nov 2023 15:25:26 +0000 Subject: [PATCH] --- openpower/sv/cookbook/pospopcnt.mdwn | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/openpower/sv/cookbook/pospopcnt.mdwn b/openpower/sv/cookbook/pospopcnt.mdwn index 53767c25e..bb6181aae 100644 --- a/openpower/sv/cookbook/pospopcnt.mdwn +++ b/openpower/sv/cookbook/pospopcnt.mdwn @@ -19,6 +19,27 @@ func count8safe(counts *[8]int, buf []uint8) { } } ``` + +A simple but still hardware-paralleliseable SVP64 assembler for +8-bit input values (`count8safe`) is as follows: + +``` +mtspr 9, 3" # move r3 to CTR +# VL = MIN(CTR,MAXVL=8), Rc=1 (CR0 set if CTR ends) +setvl 3,0,8,0,1,1" # set MVL=8, VL=CTR and CR0 (Rc=1) +# load VL bytes (update r4 addr) but compressed (dw=8) +addi 6, 0, 0 # initialise all 64-bits of r6 to zero +sv.lbzu/pi/dw=8 *6, 1(4) # should be /lf here as well +# gather performs the transpose (which gets us to positional..) +gbbd 8,6 +# now those bits have been turned around, popcount and sum them +setvl 0,0,8,0,1,1 # set MVL=VL=8 +sv.popcntd/sw=8 *24,*8 # do the (now transposed) popcount +sv.add *16,*16,*24 # and accumulate in results +# branch back if CTR still non-zero. works even though VL=8 +sv.bc/all 16, *0, -0x28 # reduce CTR by VL and stop if -ve +``` + Array popcount is just standard popcount function ([[!wikipedia Hamming weight]]) on an array of values whereas positional popcount adds up the totals of each bit set to 1 in each bit-position, of an array of input values. pospopcnt -- 2.30.2