\frametitle{ADD pseudocode (or trap, or actual hardware loop)}
\begin{semiverbatim}
-function op_add(rd, rs1, rs2, predr) \{ # add not PADD!
+function op_add(rd, rs1, rs2, predr) # add not VADD!
int i, id=0, irs1=0, irs2=0;
for (i=0; i < MIN(VL, vectorlen[rd]); i++)
if (ireg[predr] & 1<<i) # predication uses intregs
- ireg[rd+id] <= ireg[rs1+irs1] + ireg[rs2+irs2];
- # now increment idxs: src/dest all vec/scalar
+ ireg[rd+id] <= ireg[rs1+irs1] + ireg[rs2+irs2];
if (reg_is_vectorised[rd]) \{ id += 1; \}
- if (reg_is_vectorised[rs1]) \{ irs1 += 1; \}
- if (reg_is_vectorised[rs2]) \{ irs2 += 1; \}
-\}
+ if (reg_is_vectorised[rs1]) \{ irs1 += 1; \}
+ if (reg_is_vectorised[rs2]) \{ irs2 += 1; \}
\end{semiverbatim}
- \begin{itemize}
+ \begin{itemize}
+ \item SIMD slightly more complex (case above is elwidth = default)
\item Scalar-scalar and scalar-vector and vector-vector now all in one
\item OoO may choose to push ADDs into instr. queue (v. busy!)
\end{itemize}