From 1b57b8363cf2a468dd4c33814e336097861fbbbe Mon Sep 17 00:00:00 2001 From: Shriya Sharma Date: Mon, 8 Jan 2024 13:49:05 +0000 Subject: [PATCH] addedd code for pospopcount --- .../fosdem2024_ddffirst.tex | 39 ++++++------------- 1 file changed, 11 insertions(+), 28 deletions(-) diff --git a/conferences/fosdem2024/fosdem2024_ddffirst/fosdem2024_ddffirst.tex b/conferences/fosdem2024/fosdem2024_ddffirst/fosdem2024_ddffirst.tex index dcbd7886a..ef907759b 100644 --- a/conferences/fosdem2024/fosdem2024_ddffirst/fosdem2024_ddffirst.tex +++ b/conferences/fosdem2024/fosdem2024_ddffirst/fosdem2024_ddffirst.tex @@ -181,34 +181,17 @@ function op\_cmpi(BA, RA, SI) # cmpi not vector-cmpi! \item "TODO \end{itemize} } -\frame{\frametitle{Pospopcount} - \begin{semiverbatim} - // Copyright (c) 2020 Robert Clausecker - // count8 reference implementation for tests. Do not alter. - func count8safe(counts *[8]int, buf []uint8) { - for i := range buf { - for j := 0; j < 8; j++ { - counts[j] += int(buf[i] >> j & 1) - } - } - } +\frame{\frametitle{Pospopcount.s} - A simple but still hardware-paralleliseable SVP64 assembler for 8-bit input values (count8safe) is as follows: - - mtspr 9, 3 # move r3 to CTR - setvl 3,0,8,0,1,1 # set MVL=8, VL=r3=MIN(MVL,CTR) - # load VL bytes (update r4 addr) but compressed (dw=8) - addi 6, 0, 0 # initialise all 64-bits of r6 to zero - sv.lbzu/pi/dw=8 *6, 1(4) # should be /lf here as well - # gather performs the transpose (which gets us to positional..) - gbbd 8,6 - # now those bits have been turned around, popcount and sum them - setvl 0,0,8,0,1,1 # set MVL=VL=8 - sv.popcntd/sw=8 *24,*8 # do the (now transposed) popcount - sv.add *16,*16,*24 # and accumulate in results - # branch back if CTR still non-zero. works even though VL=8 - sv.bc/all 16, *0, -0x28 # reduce CTR by VL and stop if -ve - \end{semiverbatim} +\lstinputlisting[language={}]{pospopcount.s} + +} + +} + +\frame{\frametitle{Pospopcount.c} + +\lstinputlisting[language={}]{pospopcount.c} } @@ -221,7 +204,7 @@ function op\_cmpi(BA, RA, SI) # cmpi not vector-cmpi! \frame{\frametitle{strncpy assembler} -\lstinputlisting[language={}]{\strncpy.s} +\lstinputlisting[language={}]{strncpy.s} } -- 2.30.2