\end{itemize}
}
\frame{\frametitle{Pospopcount}
- \begin{itemize}
- \item "TODO
- \end{itemize}
+ \begin{semiverbatim}
+ // Copyright (c) 2020 Robert Clausecker <fuz@fuz.su>
+ // count8 reference implementation for tests. Do not alter.
+ func count8safe(counts *[8]int, buf []uint8) {
+ for i := range buf {
+ for j := 0; j < 8; j++ {
+ counts[j] += int(buf[i] >> j & 1)
+ }
+ }
+ }
+
+ A simple but still hardware-paralleliseable SVP64 assembler for 8-bit input values (count8safe) is as follows:
+
+ mtspr 9, 3 # move r3 to CTR
+ setvl 3,0,8,0,1,1 # set MVL=8, VL=r3=MIN(MVL,CTR)
+ # load VL bytes (update r4 addr) but compressed (dw=8)
+ addi 6, 0, 0 # initialise all 64-bits of r6 to zero
+ sv.lbzu/pi/dw=8 *6, 1(4) # should be /lf here as well
+ # gather performs the transpose (which gets us to positional..)
+ gbbd 8,6
+ # now those bits have been turned around, popcount and sum them
+ setvl 0,0,8,0,1,1 # set MVL=VL=8
+ sv.popcntd/sw=8 *24,*8 # do the (now transposed) popcount
+ sv.add *16,*16,*24 # and accumulate in results
+ # branch back if CTR still non-zero. works even though VL=8
+ sv.bc/all 16, *0, -0x28 # reduce CTR by VL and stop if -ve
+ \end{semiverbatim}
+
}
\frame{\frametitle{strncpy}
\begin{itemize}