From: Luke Kenneth Casson Leighton Date: Wed, 3 May 2023 13:22:47 +0000 (+0100) Subject: add opentitan shift vs dsld X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=47f7f98777ddebf06befe0adad240165a2fe50ff;p=libreriscv.git add opentitan shift vs dsld --- diff --git a/conferences/siliconsalon2023/siliconsalon2023.tex b/conferences/siliconsalon2023/siliconsalon2023.tex index 39ba81932..63cdf6f5f 100644 --- a/conferences/siliconsalon2023/siliconsalon2023.tex +++ b/conferences/siliconsalon2023/siliconsalon2023.tex @@ -214,10 +214,49 @@ bit-width. 256-bit unlikely to be reasonable time. \item 256-bit is great for EC25519 but for RSA (etc.) you run into exactly the same problem as a Scalar ISA, made worse. - \item Opportunities to optimise algorithms not possible. + \item Opportunities to optimise algorithms not possible (efficient + power-optimised Karatsuba, etc.) \end{itemize} } +\begin{frame}[fragile]\frametitle{OpenTITAN shift} + + \begin{itemize} + \item Immediate-only. what about shift-by-reg? + \item merges 2 operands, still not chainable. + \item needs a copy of the vector input (double number of regs) + \item needs massive 256-bit shifter! 8 layers of muxes! + \end{itemize} + + \begin{verbatim} + a = WDRs[wrs1] + b = WDRs[wrs2] + + result = (((a << 256) | b) >> imm) & ((1 << 256) - 1) + WDRs[wrd] = result + \end{verbatim} + +\end{frame} + +\begin{frame}[fragile]\frametitle{Draft Double-Shift} + + \begin{itemize} + \item Remarkably similar to x86 dsld + \item Does not need 128-bit ROT: simple mod to existing hardware + \item Hardware may macro-op fuse Vector-shift for better efficiency + \item Chainable and in-place (no copy of vector needed). + \end{itemize} + + \begin{verbatim} + n <- (RB)[58:63] # Power ISA MSB0 numbering. sigh + v <- ROTL64((RA), n) + mask <- MASK(0, 63-n) + RT <- (v[0:63] & mask) | ((RC) & ~mask) + RS <- v[0:63] & ~mask + \end{verbatim} + +\end{frame} + \frame{\frametitle{Conclusion} \begin{itemize}