-\frame{\frametitle{Simple SBC-style SoC}
-
-\begin{center}
-\includegraphics[width=0.6\textwidth]{pospopcount.png}
-\end{center}
-
-}
-
-
-
-
\begin{frame}[fragile]
\frametitle{Simple-V CMPI in a nutshell}
CR[BA+id] <= compare(ireg[RA+ira], SI);
if (reg\_is\_vectorised[BA] ) \{ id += 1; \}
if (reg\_is\_vectorised[RA]) \{ ira += 1; \}
- if test (CR[BA+id]) == FAIL: \{ VL = id + 1; break \}
+ if test (CR[BA+id]) == FAIL: \{ VL = i + 1; break \}
\end{semiverbatim}
\begin{itemize}
\end{itemize}
\end{frame}
-
-
-\frame{\frametitle{Additional Simple-V features}
-
- \begin{itemize}
- \item "fail-on-first" (POWER9 VSX strncpy segfaults on boundary!)
- \item "Twin Predication" (covers VSPLAT, VGATHER, VSCATTER, VINDEX etc.)
- \item SVP64: extensive "tag" (Vector context) augmentation
- \item "Context propagation": a VLIW-like context. Allows contexts
- to be repeatedly applied.
- Effectively a "hardware compression algorithm" for ISAs.
- \item Ultimate goal: cut down I-Cache usage, cuts down on power
- \item Typical GPU has its own I-Cache and small shaders.\\
- \textit{We are a Hybrid CPU/GPU: I-Cache is not separate!}
- \item Needs to go through OpenPOWER Foundation `approval'
- \end{itemize}
-}
+\frame{\frametitle{Power ISA v3.1 vstribr}
+
+ \lstinputlisting[language={}]{vstribr.txt}
+
+ \begin{itemize}
+ \item ironically this hard-coded instruction is
+ identical to general-purpose Simple-V DD-FFirst...
+ \end{itemize}
+
+}Po
\frame{\frametitle{maxloc}
\begin{itemize}
\begin{itemize}
\item Positional popcount adds up the totals of each bit set to 1 in each bit-position, of an array of input values.
\item Notoriously difficult to do in SIMD assembler: typically 550 lines
+
\end{itemize}
\lstinputlisting[language={}]{pospopcount.c}
}
+
+\frame{\frametitle{Pospopcount}
+
+ \begin{center}
+ \includegraphics[width=0.6\textwidth]{pospopcount.png}
+ \end{center}
+
+}
+
+\frame{\frametitle{Pospopcount}
+
+ \begin{center}
+ \includegraphics[width=0.6\textwidth]{array_popcnt.png}
+ \end{center}
+
+ \begin{itemize}
+ \item The challenge is to perform an appropriate transpose of the data,
+ in blocks that suit the processor and the ISA capacity.
+ \item The draft gbbd instruction implemets the transpose,
+ preparing the data for using the standard popcount instruction.
+
+
+ \end{itemize}
+
+}
+
\frame{\frametitle{Pospopcount.s}
\frame{\frametitle{strncpy}
+ \lstinputlisting[language={}]{strncpy.c}
\begin{itemize}
\item "TODO
\end{itemize}
}
+
+
\frame{\frametitle{strncpy assembler}
\lstinputlisting[language={}]{strncpy.s}