From ba3cabae3f787742c114ae675d18c177200b610e Mon Sep 17 00:00:00 2001 From: Luke Kenneth Casson Leighton Date: Sat, 19 May 2018 18:54:39 +0100 Subject: [PATCH] more slides --- simple_v_extension/simple_v_chennai_2018.tex | 44 +++++++++++++++++++- 1 file changed, 42 insertions(+), 2 deletions(-) diff --git a/simple_v_extension/simple_v_chennai_2018.tex b/simple_v_extension/simple_v_chennai_2018.tex index 56e99d013..142094ed2 100644 --- a/simple_v_extension/simple_v_chennai_2018.tex +++ b/simple_v_extension/simple_v_chennai_2018.tex @@ -27,7 +27,7 @@ \begin{itemize} \item RVV very heavy-duty (excellent for supercomputing)\vspace{10pt} \item Simple-V abstracts parallelism (based on best of RVV)\vspace{10pt} - \item Graded levels: hardware or software-emulation\vspace{10pt} + \item Graded levels: hardware, hybrid or traps\vspace{10pt} \item Even Compressed instructions become vectorised\vspace{10pt} \end{itemize} What Simple-V is not:\vspace{10pt} @@ -96,13 +96,53 @@ function op_add(rd, rs1, rs2, predr) # add not VADD! if (reg_is_vectorised[rs1]) \{ irs1 += 1; \} if (reg_is_vectorised[rs2]) \{ irs2 += 1; \} \end{semiverbatim} + \begin{itemize} - \item SIMD slightly more complex (case above is elwidth = default) + \item SIMD slightly more complex (case above is elwidth = default) \item Scalar-scalar and scalar-vector and vector-vector now all in one \item OoO may choose to push ADDs into instr. queue (v. busy!) \end{itemize} \end{frame} +\begin{frame}[fragile] +\frametitle{Predication-Branch (or trap, or actual hardware loop)} + +\begin{semiverbatim} +s1 = vectorlen[src1] > 1; +s2 = vectorlen[src2] > 1; +for (int i = 0; i < VL; ++i) + preg[rs3] |= 1 << cmp(s1 ? reg[src1+i] : reg[src1], + s2 ? reg[src2+i] : reg[src2]); +\end{semiverbatim} + + \begin{itemize} + \item SIMD slightly more complex (case above is elwidth = default) + \item If s1 and s2 both scalars, Standard branch occurs + \item Predication stored in integer regfile as a bitfield + \item x + \end{itemize} +\end{frame} + +\begin{frame}[fragile] +\frametitle{LD/LD.S/LD.X (or trap, or actual hardware loop)} + +\begin{semiverbatim} +if (unit-strided) stride = elsize; +else stride = areg[as2]; // constant-strided +for (int i = 0; i < VL; ++i) + if (preg_enabled[rd] && ([!]preg[rd] & 1<