From c53f65ced7db2e96d18023bd9edcd4c9f85c7017 Mon Sep 17 00:00:00 2001 From: Luke Kenneth Casson Leighton Date: Mon, 8 Jan 2024 14:09:04 +0000 Subject: [PATCH] remove enough chars to get slide onto 1 page, pospopcount.s --- .../fosdem2024_ddffirst/pospopcount.s | 27 +++++++++---------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/conferences/fosdem2024/fosdem2024_ddffirst/pospopcount.s b/conferences/fosdem2024/fosdem2024_ddffirst/pospopcount.s index c8347927f..2b905f135 100644 --- a/conferences/fosdem2024/fosdem2024_ddffirst/pospopcount.s +++ b/conferences/fosdem2024/fosdem2024_ddffirst/pospopcount.s @@ -1,14 +1,13 @@ - - mtspr 9, 3 # move r3 to CTR - setvl 3,0,8,0,1,1 # set MVL=8, VL=r3=MIN(MVL,CTR) - # load VL bytes (update r4 addr) but compressed (dw=8) - addi 6, 0, 0 # initialise all 64-bits of r6 to zero - sv.lbzu/pi/dw=8 *6, 1(4) # should be /lf here as well - # gather performs the transpose (which gets us to positional..) - gbbd 8,6 - # now those bits have been turned around, popcount and sum them - setvl 0,0,8,0,1,1 # set MVL=VL=8 - sv.popcntd/sw=8 *24,*8 # do the (now transposed) popcount - sv.add *16,*16,*24 # and accumulate in results - # branch back if CTR still non-zero. works even though VL=8 - sv.bc/all 16, *0, -0x28 # reduce CTR by VL and stop if -ve \ No newline at end of file +mtspr 9, 3 # move r3 to CTR +setvl 3,0,8,0,1,1 # MVL=8, VL=r3=MIN(MVL,CTR) +# load VL bytes (update r4 addr) but width=8 (dw=8) +addi 6, 0, 0 # set 64-bits of r6=0 +sv.lbzu/pi/dw=8 *6, 1(4) +# gather performs transpose (gets us to positional) +gbbd 8,6 +# now bits are turned around, popcount and sum them +setvl 0,0,8,0,1,1 # set MVL=VL=8 +sv.popcntd/sw=8 *24,*8 # do (transposed) popcnt +sv.add *16,*16,*24 # accumulate in results +# branch back if CTR non-zero. works even when VL=8 +sv.bc/all 16, *0, -0x28 # reduce CTR by VL -- 2.30.2