79d1fd380d761a72788e3369490c8a7435c84813
[libreriscv.git] / conferences / fosdem2024 / fosdem2024_bigint / fosdem2024_bigint.tex
1 % Copyright 2024 Jacob Lifshay
2
3 \documentclass{beamer}
4 \usepackage{beamerthemesplit}
5 \usetheme{default}
6 \usepackage[english]{babel}
7 \usepackage{tikz}
8 \usepackage{minted}
9 \usemintedstyle{monokai}
10 \definecolor{codebg}{rgb}{0.1,0.09,0.08}
11 \newminted[codeenv]{python3}{escapeinside=@@,fontsize=\small,bgcolor=codebg}
12 \newmintinline[codeinline]{python3}{escapeinside=@@,fontsize=\small,bgcolor=codebg}
13
14 \title[Fast Big-Integer Arithmetic on SVP64 ...]{
15 Fast Big-Integer Arithmetic on SVP64 at up to 256-bits/cycle and beyond
16 }
17
18 \author{Jacob R. Lifshay}
19
20 \date{FOSDEM 2024}
21
22 \logo{\includegraphics[height=0.5cm]{../../../images/lsoclogo.png}}
23
24 \begin{document}
25
26 \begin{frame}
27 \titlepage
28 \end{frame}
29
30 \begin{frame}[fragile]
31 \frametitle{What is SVP64?}
32 \begin{itemize}
33 \item Vectorization Extension for PowerISA developed by \href{https://libre-soc.org}{Libre-SOC}
34 \pause
35 \item Basically, a way to modify nearly any PowerISA instruction to run it in a HW loop.
36 \pause \\
37 \medskip
38 Simple Example:
39 \begin{codeenv}
40 setvl 0, 0, 3, 0, 1, 1 # makes stuff run 3 times
41 sv.add *r3, *r15, r12 # adds 3 times
42 @\pause@
43 # expands to:
44 add r3, r15, r12 # no * means r12 doesn't increment
45 add r4, r16, r12 # * means r3 and r15 increment
46 add r5, r17, r12
47 \end{codeenv}
48 \end{itemize}
49 \end{frame}
50
51 \begin{frame}[fragile]
52 \frametitle{Big-Integer Addition on SVP64}
53 How can we use SVP64 to add 256-bit integers?
54 \pause
55 \begin{codeenv}
56 setvl 0, 0, 4, 0, 1, 1 # makes stuff run 4 times
57 addic r0, r0, 0 # clear CA (carry flag)
58 sv.adde *r4, *r4, *r8 # carry-propagating add
59 @\pause@
60 # expands to:
61 addic r0, r0, 0 # clear CA (carry flag)
62 adde r4, r4, r8
63 adde r5, r5, r9
64 adde r6, r6, r10
65 adde r7, r7, r11
66 \end{codeenv}
67 \end{frame}
68
69 \begin{frame}
70 \frametitle{Big-Integer Addition on an example CPU}
71 Disclaimer:
72 SVP64 is designed for everything from tiny to big and fast CPUs, this example only shows a hypothetical big and fast CPU design
73 \end{frame}
74
75 \begin{frame}
76 \frametitle{Big-Integer Addition on an example CPU}
77 \input{bigint-add-pipe.dia-tex}
78 \end{frame}
79
80 \begin{frame}[fragile]
81 \frametitle{Big-Integer Multiply on SVP64}
82 How can we use SVP64 to Multiply a 64-bit by a 256-bit integer?
83 \pause
84 \begin{itemize}
85 \item new instruction: \codeinline{maddedu RT, RA, RB, RC}
86 \pause
87 \item $64 \times 64 + 64 \rightarrow 128$-bit Multiply-Add
88 \pause
89 \item Semantics as used in this presentation (somewhat simplified):
90 \begin{codeenv}
91 result = (RA * RB) + RC
92 RT = LSB_HALF(result)
93 RC = MSB_HALF(result)
94 \end{codeenv}
95 \end{itemize}
96 \end{frame}
97
98 \begin{frame}[fragile]
99 \frametitle{Big-Integer Multiply on SVP64}
100 How can we use SVP64 to Multiply a 64-bit by a 256-bit integer?
101 \pause
102 \begin{codeenv}
103 # 64-bit input in r3
104 # 256-bit input in r20-23
105 # 320-bit output in r4-8
106 setvl 0, 0, 4, 0, 1, 1 # makes stuff run 4 times
107 li r8, 0 # clear carry register
108 sv.maddedu *r4, r3, *r20, r8 # carrying multiply
109 @\pause@
110 # expands to:
111 li r8, 0
112 maddedu r4, r3, r20, r8
113 maddedu r5, r3, r21, r8
114 maddedu r6, r3, r22, r8
115 maddedu r7, r3, r23, r8
116 \end{codeenv}
117 \end{frame}
118
119 \begin{frame}
120 \input{test.dia-tex}
121 \end{frame}
122
123 \end{document}