whitespace
[libreriscv.git] / conferences / fosdem2024 / fosdem2024_ddffirst / fosdem2024_ddffirst.tex
index 56b43a1b593a257b02e1dbe2f4225717609be484..06301e8998b12505898bd0677ce316c5830e362e 100644 (file)
@@ -84,7 +84,7 @@
                heat sink normally not required (simplifies overall design)
                \vspace{3pt}
    \item Fully-integrated peripherals (not Northbridge/Southbridge)\\
                heat sink normally not required (simplifies overall design)
                \vspace{3pt}
    \item Fully-integrated peripherals (not Northbridge/Southbridge)\\
-         USB, HDMI, RGB/TTL, SD/MMC, I2C, UART, SPI, GPIO etc. etc. 
+         USB, HDMI, RGB/TTL, SD/MMC, I2C, UART, SPI, GPIO etc. etc.
          \vspace{3pt}
    \item Built-in GPU (shared memory bus, 3rd party licensed) \vspace{3pt}
    \item Built-in VPU (likewise, proprietary)\vspace{3pt}
          \vspace{3pt}
    \item Built-in GPU (shared memory bus, 3rd party licensed) \vspace{3pt}
    \item Built-in VPU (likewise, proprietary)\vspace{3pt}
 
 
 
 
 
 
-\frame{\frametitle{Simple SBC-style SoC}
-
-\begin{center}
-\includegraphics[width=0.6\textwidth]{pospopcount.png}
-\end{center}
-
-}
-
-
-
-
 \begin{frame}[fragile]
 \frametitle{Simple-V CMPI in a nutshell}
 
 \begin{frame}[fragile]
 \frametitle{Simple-V CMPI in a nutshell}
 
@@ -141,8 +130,8 @@ function op\_cmpi(BA, RA, SI) # cmpi not vector-cmpi!
         \item ARM SVE: https://arxiv.org/pdf/1803.06185.pdf
         \item more: wikipedia Vector processor page: Fault/Fail First
         \vspace{10pt}
         \item ARM SVE: https://arxiv.org/pdf/1803.06185.pdf
         \item more: wikipedia Vector processor page: Fault/Fail First
         \vspace{10pt}
-               \item Load/Store is Memory to/from Register, \\
-              what about Register to Register?
+               \item Load/Store is Memory to/from Register, what about
+              Register to Register?
         \item Register-to-register: "Data-Dependent Fail-First."
         \item Z80 LDIR: Mem-Register, CPIR: Register-Register
        \end{itemize}
         \item Register-to-register: "Data-Dependent Fail-First."
         \item Z80 LDIR: Mem-Register, CPIR: Register-Register
        \end{itemize}
@@ -172,7 +161,16 @@ for (i = 0; i < VL; i++)
        \end{itemize}
 \end{frame}
 
        \end{itemize}
 \end{frame}
 
-
+\frame{\frametitle{Power ISA v3.1 vstribr}
+       
+       \lstinputlisting[language={}]{vstribr.txt}
+       
+       \begin{itemize}
+               \item ironically this hard-coded instruction is
+               identical to general-purpose Simple-V DD-FFirst...
+       \end{itemize}
+       
+}Po
 
 \frame{\frametitle{maxloc}
   \begin{itemize}
 
 \frame{\frametitle{maxloc}
   \begin{itemize}
@@ -185,11 +183,44 @@ for (i = 0; i < VL; i++)
   \begin{itemize}
        \item   Positional popcount adds up the totals of each bit set to 1 in each bit-position, of an array of input values.
        \item   Notoriously difficult to do in SIMD assembler: typically 550 lines
   \begin{itemize}
        \item   Positional popcount adds up the totals of each bit set to 1 in each bit-position, of an array of input values.
        \item   Notoriously difficult to do in SIMD assembler: typically 550 lines
+    \item https://github.com/clausecker/pospop
+
    \end{itemize}
        
        \lstinputlisting[language={}]{pospopcount.c}
    \end{itemize}
        
        \lstinputlisting[language={}]{pospopcount.c}
+
        
 }
        
 }
+
+\frame{\frametitle{Pospopcount}
+       
+       \begin{center}
+               \includegraphics[width=0.5\textwidth]{pospopcount.png}
+       \end{center}
+         \begin{itemize}
+               \item   The challenge is to perform an appropriate transpose of the data (the CPU can only work on registers, horizontally),
+               in blocks that suit the processor and the ISA capacity.
+
+               
+       \end{itemize}
+}
+
+\frame{\frametitle{Pospopcount}
+       
+       \begin{center}
+               \includegraphics[width=0.6\textwidth]{array_popcnt.png}
+       \end{center}
+
+  \begin{itemize}
+
+               \item   The draft gbbd instruction implements the transpose (shown above),
+                               preparing the data to use standard popcount.
+                          (gbbd is based on Power ISA vgbbd, v3.1 p445)
+       
+       \end{itemize}
+       
+}
+
 \frame{\frametitle{Pospopcount.s}
 
 
 \frame{\frametitle{Pospopcount.s}
 
 
@@ -200,11 +231,14 @@ for (i = 0; i < VL; i++)
 
 \frame{\frametitle{strncpy}
 
 
 \frame{\frametitle{strncpy}
 
+       \lstinputlisting[language={}]{strncpy.c}
   \begin{itemize}
        \item "TODO
  \end{itemize} 
 }
 
   \begin{itemize}
        \item "TODO
  \end{itemize} 
 }
 
+
+
 \frame{\frametitle{strncpy assembler}
 
 \lstinputlisting[language={}]{strncpy.s}
 \frame{\frametitle{strncpy assembler}
 
 \lstinputlisting[language={}]{strncpy.s}
@@ -231,7 +265,7 @@ for (i = 0; i < VL; i++)
          development costs for customers
    \item It also happens to be fascinating, deeply rewarding technically
          challenging, and funded by NLnet
          development costs for customers
    \item It also happens to be fascinating, deeply rewarding technically
          challenging, and funded by NLnet
-         
+
   \end{itemize}
 }
 
   \end{itemize}
 }
 
@@ -243,7 +277,7 @@ for (i = 0; i < VL; i++)
                   Questions?\vspace{12pt}
        }
   \end{center}
                   Questions?\vspace{12pt}
        }
   \end{center}
-  
+
   \begin{itemize}
        \item Discussion: http://lists.libre-soc.org
        \item Freenode IRC \#libre-soc
   \begin{itemize}
        \item Discussion: http://lists.libre-soc.org
        \item Freenode IRC \#libre-soc