X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=conferences%2Ffosdem2024%2Ffosdem2024_ddffirst%2Ffosdem2024_ddffirst.tex;h=06301e8998b12505898bd0677ce316c5830e362e;hb=946f05168d76c8040ad982ec649ba8bcd9499c2f;hp=f27646e63f95c4d64b8ec88a5afabca58523a1ee;hpb=eb93d93bb6fa4f31f71816f06473a6589c0bb7c5;p=libreriscv.git diff --git a/conferences/fosdem2024/fosdem2024_ddffirst/fosdem2024_ddffirst.tex b/conferences/fosdem2024/fosdem2024_ddffirst/fosdem2024_ddffirst.tex index f27646e63..06301e899 100644 --- a/conferences/fosdem2024/fosdem2024_ddffirst/fosdem2024_ddffirst.tex +++ b/conferences/fosdem2024/fosdem2024_ddffirst/fosdem2024_ddffirst.tex @@ -84,7 +84,7 @@ heat sink normally not required (simplifies overall design) \vspace{3pt} \item Fully-integrated peripherals (not Northbridge/Southbridge)\\ - USB, HDMI, RGB/TTL, SD/MMC, I2C, UART, SPI, GPIO etc. etc. + USB, HDMI, RGB/TTL, SD/MMC, I2C, UART, SPI, GPIO etc. etc. \vspace{3pt} \item Built-in GPU (shared memory bus, 3rd party licensed) \vspace{3pt} \item Built-in VPU (likewise, proprietary)\vspace{3pt} @@ -183,19 +183,26 @@ for (i = 0; i < VL; i++) \begin{itemize} \item Positional popcount adds up the totals of each bit set to 1 in each bit-position, of an array of input values. \item Notoriously difficult to do in SIMD assembler: typically 550 lines + \item https://github.com/clausecker/pospop \end{itemize} \lstinputlisting[language={}]{pospopcount.c} + } \frame{\frametitle{Pospopcount} \begin{center} - \includegraphics[width=0.6\textwidth]{pospopcount.png} + \includegraphics[width=0.5\textwidth]{pospopcount.png} \end{center} - + \begin{itemize} + \item The challenge is to perform an appropriate transpose of the data (the CPU can only work on registers, horizontally), + in blocks that suit the processor and the ISA capacity. + + + \end{itemize} } \frame{\frametitle{Pospopcount} @@ -205,11 +212,10 @@ for (i = 0; i < VL; i++) \end{center} \begin{itemize} - \item The challenge is to perform an appropriate transpose of the data, - in blocks that suit the processor and the ISA capacity. - \item The draft gbbd instruction implemets the transpose, - preparing the data for using the standard popcount instruction. + \item The draft gbbd instruction implements the transpose (shown above), + preparing the data to use standard popcount. + (gbbd is based on Power ISA vgbbd, v3.1 p445) \end{itemize} @@ -259,7 +265,7 @@ for (i = 0; i < VL; i++) development costs for customers \item It also happens to be fascinating, deeply rewarding technically challenging, and funded by NLnet - + \end{itemize} } @@ -271,7 +277,7 @@ for (i = 0; i < VL; i++) Questions?\vspace{12pt} } \end{center} - + \begin{itemize} \item Discussion: http://lists.libre-soc.org \item Freenode IRC \#libre-soc