From: Luke Kenneth Casson Leighton Date: Mon, 12 Feb 2024 16:50:47 +0000 (+0000) Subject: update fosdem 2024 ddffirst strncpy section on slides X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=c40d9e6aa609839dd5435dfe0e94babb8bdc3e81;p=libreriscv.git update fosdem 2024 ddffirst strncpy section on slides --- diff --git a/conferences/fosdem2024/fosdem2024_ddffirst/fosdem2024_ddffirst.tex b/conferences/fosdem2024/fosdem2024_ddffirst/fosdem2024_ddffirst.tex index c02d755c2..de1d19eb0 100644 --- a/conferences/fosdem2024/fosdem2024_ddffirst/fosdem2024_ddffirst.tex +++ b/conferences/fosdem2024/fosdem2024_ddffirst/fosdem2024_ddffirst.tex @@ -102,12 +102,6 @@ for (i = 0; i < VL; i++) identical to general-purpose Simple-V DD-FFirst... \end{itemize} -}Po - -\frame{\frametitle{maxloc} - \begin{itemize} - \item "TODO - \end{itemize} } \frame{\frametitle{Pospopcount} @@ -153,26 +147,34 @@ for (i = 0; i < VL; i++) } -\frame{\frametitle{Pospopcount.s} +\frame{\frametitle{pospopcount assembler} \lstinputlisting[language={}]{pospopcount.s} } - \frame{\frametitle{strncpy} \lstinputlisting[language={}]{strncpy.c} \begin{itemize} - \item two simple-looking for-loops, unfortunately sequentially + \item two simple-looking for-loops, data-dependent in the first. - \item Power ISA added a hard-coded variant of this inner - data-dependent capacity into VSX - only for strcpy! + \item sv.cmpi stops at the first zero, /vli includes the zero + in VL. + \item note the post-increment Load/Store: saves + pre-decrementing + \item a Vector of CRs is produced which then get tested + by the sv.bc/all instruction, counting down CTR + per item tested. + \item Power ISA added hard-coded data-dependent capacity + into vstribr, where SVP64 it is generic (applies + to any instruction) \item even the null-ing part is not straightforward as it could be mis-aligned compared to the VSX width. - \item end-result is that assembler-optimised strncpy on Power - ISA v3.0 is a whopping 240 instructions. SVP64 is 10 + \item end-result: assembler-optimised strncpy on Power + ISA v3.0 is a whopping 240 instructions. SVP64 is 10 + and parallel in HW \end{itemize} } @@ -214,6 +216,12 @@ for (i = 0; i < VL; i++) \end{center} } +\frame{\frametitle{maxloc} + \begin{itemize} + \item "TODO + \end{itemize} +} + \frame{\frametitle{Summary} \begin{itemize} diff --git a/conferences/fosdem2024/fosdem2024_ddffirst/strncpy.c b/conferences/fosdem2024/fosdem2024_ddffirst/strncpy.c index 0c76efa2a..af65f16de 100644 --- a/conferences/fosdem2024/fosdem2024_ddffirst/strncpy.c +++ b/conferences/fosdem2024/fosdem2024_ddffirst/strncpy.c @@ -1,4 +1,2 @@ -for (i = 0; i < n && src[i] != chr(0); i++) - dest[i] = src[i]; -for ( ; i < n; i++) - dest[i] = chr(0); +for (i=0; i