From 18b2788883edcbeeed181ae4c727b0d68891d767 Mon Sep 17 00:00:00 2001 From: Shriya Sharma Date: Mon, 8 Jan 2024 14:01:13 +0000 Subject: [PATCH] adding missing files --- .../fosdem2024/fosdem2024_ddffirst/pospopcount.c | 11 +++++++++++ .../fosdem2024/fosdem2024_ddffirst/pospopcount.s | 14 ++++++++++++++ 2 files changed, 25 insertions(+) create mode 100644 conferences/fosdem2024/fosdem2024_ddffirst/pospopcount.c create mode 100644 conferences/fosdem2024/fosdem2024_ddffirst/pospopcount.s diff --git a/conferences/fosdem2024/fosdem2024_ddffirst/pospopcount.c b/conferences/fosdem2024/fosdem2024_ddffirst/pospopcount.c new file mode 100644 index 000000000..0fe84be45 --- /dev/null +++ b/conferences/fosdem2024/fosdem2024_ddffirst/pospopcount.c @@ -0,0 +1,11 @@ +// Copyright (c) 2020 Robert Clausecker +// count8 reference implementation for tests. Do not alter. +func count8safe(counts *[8]int, buf []uint8) + { + for i := range buf { + for j := 0; j < 8; j++ { + counts[j] += int(buf[i] >> j & 1) + } + } + } + \ No newline at end of file diff --git a/conferences/fosdem2024/fosdem2024_ddffirst/pospopcount.s b/conferences/fosdem2024/fosdem2024_ddffirst/pospopcount.s new file mode 100644 index 000000000..c8347927f --- /dev/null +++ b/conferences/fosdem2024/fosdem2024_ddffirst/pospopcount.s @@ -0,0 +1,14 @@ + + mtspr 9, 3 # move r3 to CTR + setvl 3,0,8,0,1,1 # set MVL=8, VL=r3=MIN(MVL,CTR) + # load VL bytes (update r4 addr) but compressed (dw=8) + addi 6, 0, 0 # initialise all 64-bits of r6 to zero + sv.lbzu/pi/dw=8 *6, 1(4) # should be /lf here as well + # gather performs the transpose (which gets us to positional..) + gbbd 8,6 + # now those bits have been turned around, popcount and sum them + setvl 0,0,8,0,1,1 # set MVL=VL=8 + sv.popcntd/sw=8 *24,*8 # do the (now transposed) popcount + sv.add *16,*16,*24 # and accumulate in results + # branch back if CTR still non-zero. works even though VL=8 + sv.bc/all 16, *0, -0x28 # reduce CTR by VL and stop if -ve \ No newline at end of file -- 2.30.2