From: Luke Kenneth Casson Leighton <lkcl@lkcl.net>
Date: Wed, 3 May 2023 13:22:47 +0000 (+0100)
Subject: add opentitan shift vs dsld
X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=47f7f98777ddebf06befe0adad240165a2fe50ff;p=libreriscv.git

add opentitan shift vs dsld
---

diff --git a/conferences/siliconsalon2023/siliconsalon2023.tex b/conferences/siliconsalon2023/siliconsalon2023.tex
index 39ba81932..63cdf6f5f 100644
--- a/conferences/siliconsalon2023/siliconsalon2023.tex
+++ b/conferences/siliconsalon2023/siliconsalon2023.tex
@@ -214,10 +214,49 @@
    		bit-width.  256-bit unlikely to be reasonable time.
    \item 256-bit is great for EC25519 but for RSA (etc.) you run
    		into exactly the same problem as a Scalar ISA, made worse.
-   \item Opportunities to optimise algorithms not possible.
+   \item Opportunities to optimise algorithms not possible (efficient
+         power-optimised Karatsuba, etc.)
   \end{itemize}
 }
 
+\begin{frame}[fragile]\frametitle{OpenTITAN shift}
+
+ \begin{itemize}
+   \item Immediate-only. what about shift-by-reg?
+   \item merges 2 operands, still not chainable.
+   \item needs a copy of the vector input (double number of regs)
+   \item needs massive 256-bit shifter! 8 layers of muxes!
+  \end{itemize}
+  
+  \begin{verbatim}
+  a = WDRs[wrs1]
+  b = WDRs[wrs2]
+  
+  result = (((a << 256) | b) >> imm) & ((1 << 256) - 1)
+  WDRs[wrd] = result
+  \end{verbatim}
+  
+\end{frame}
+
+\begin{frame}[fragile]\frametitle{Draft Double-Shift}
+
+ \begin{itemize}
+   \item Remarkably similar to x86 dsld
+   \item Does not need 128-bit ROT: simple mod to existing hardware
+   \item Hardware may macro-op fuse Vector-shift for better efficiency
+   \item Chainable and in-place (no copy of vector needed).
+  \end{itemize}
+
+  \begin{verbatim}
+    n <- (RB)[58:63]    # Power ISA MSB0 numbering. sigh
+    v <- ROTL64((RA), n)
+    mask <- MASK(0, 63-n)
+    RT <- (v[0:63] & mask) | ((RC) & ~mask)
+    RS <- v[0:63] & ~mask
+  \end{verbatim}
+  
+\end{frame}
+
 \frame{\frametitle{Conclusion}
 
  \begin{itemize}