From b1331f37e6b41ac133f4286486a4af27b3f7fc1a Mon Sep 17 00:00:00 2001 From: Luke Kenneth Casson Leighton Date: Tue, 9 Jan 2024 14:10:32 +0000 Subject: [PATCH] bug 1244: update pospopcnt.s assembler comments --- .../fosdem2024/fosdem2024_ddffirst/pospopcount.s | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/conferences/fosdem2024/fosdem2024_ddffirst/pospopcount.s b/conferences/fosdem2024/fosdem2024_ddffirst/pospopcount.s index 2b905f135..3bbdddf10 100644 --- a/conferences/fosdem2024/fosdem2024_ddffirst/pospopcount.s +++ b/conferences/fosdem2024/fosdem2024_ddffirst/pospopcount.s @@ -1,13 +1,12 @@ mtspr 9, 3 # move r3 to CTR setvl 3,0,8,0,1,1 # MVL=8, VL=r3=MIN(MVL,CTR) -# load VL bytes (update r4 addr) but width=8 (dw=8) -addi 6, 0, 0 # set 64-bits of r6=0 -sv.lbzu/pi/dw=8 *6, 1(4) -# gather performs transpose (gets us to positional) -gbbd 8,6 -# now bits are turned around, popcount and sum them +# load VL bytes (update r4 addr) at width=8 (dw=8) +addi 6, 0, 0 # set all 64-bits of r6=0 +sv.lbzu/pi/dw=8 *6, 1(4) +gbbd 8,6 # gbbd performs the transpose +# now bits are turned around, popcnt and sum them setvl 0,0,8,0,1,1 # set MVL=VL=8 sv.popcntd/sw=8 *24,*8 # do (transposed) popcnt sv.add *16,*16,*24 # accumulate in results -# branch back if CTR non-zero. works even when VL=8 -sv.bc/all 16, *0, -0x28 # reduce CTR by VL +# branch back if CTR non-zero works even when VL=8 +sv.bc/all 16, *0, -0x28 # reduces CTR by VL -- 2.30.2