1 % Copyright 2024 Jacob Lifshay
4 \usepackage{beamerthemesplit
}
6 \usepackage[english
]{babel
}
9 \usemintedstyle{monokai
}
10 \definecolor{codebg
}{rgb
}{0.1,
0.09,
0.08}
11 \newminted[codeenv
]{python3
}{escapeinside=@@,fontsize=
\small,bgcolor=codebg
}
12 \newmintinline[codeinline
]{python3
}{escapeinside=@@,fontsize=
\small,bgcolor=codebg
}
14 \title[Fast Big-Integer Arithmetic on SVP64 ...
]{
15 Fast Big-Integer Arithmetic on SVP64 at up to
256-bits/cycle and beyond
18 \author{Jacob R. Lifshay
}
22 \logo{\includegraphics[height=
0.5cm
]{../../../images/lsoclogo.png
}}
30 \begin{frame
}[fragile
]
31 \frametitle{What is SVP64?
}
33 \item Vectorization Extension for PowerISA developed by
\href{https://libre-soc.org
}{Libre-SOC
}
35 \item Basically, a way to modify nearly any PowerISA instruction to run it in a HW loop.
40 setvl
0,
0,
3,
0,
1,
1 # makes stuff run
3 times
41 sv.add *r3, *r15, r12 # adds
3 times
44 add r3, r15, r12 # no * means r12 doesn't increment
45 add r4, r16, r12 # * means r3 and r15 increment
51 \begin{frame
}[fragile
]
52 \frametitle{Big-Integer Addition on SVP64
}
53 How can we use SVP64 to add
256-bit integers?
56 setvl
0,
0,
4,
0,
1,
1 # makes stuff run
4 times
57 addic r0, r0,
0 # clear CA (carry flag)
58 sv.adde *r4, *r4, *r8 # carry-propagating add
61 addic r0, r0,
0 # clear CA (carry flag)
69 \begin{frame
}[fragile
]
70 \frametitle{Big-Integer Addition on SVP64
}
71 How can we use SVP64 to add
256-bit integers?
73 \input{sv.adde.dia-tex
}
77 \frametitle{Big-Integer Addition on an example CPU
}
79 SVP64 is designed for everything from tiny to big and fast CPUs, this example only shows a hypothetical big and fast CPU design
83 \frametitle{Big-Integer Addition on an example CPU
}
84 \input{bigint-add-pipe.dia-tex
}
87 \begin{frame
}[fragile
]
88 \frametitle{Big-Integer Multiply on SVP64
}
89 How can we use SVP64 to Multiply a
64-bit by a
256-bit integer?
92 \item new instruction:
\codeinline{maddedu RT, RA, RB, RC
}
94 \item $
64 \times 64 +
64 \rightarrow 128$-bit Multiply-Add
96 \item Semantics as used in this presentation (somewhat simplified):
98 result = (RA * RB) + RC
100 RC = MSB_HALF(result)
105 \begin{frame
}[fragile
]
106 \frametitle{Big-Integer Multiply on SVP64
}
107 How can we use SVP64 to Multiply a
64-bit by a
256-bit integer?
111 #
256-bit input in r20-
23
112 #
320-bit output in r4-
8
113 setvl
0,
0,
4,
0,
1,
1 # makes stuff run
4 times
114 li r8,
0 # clear carry register
115 sv.maddedu *r4, r3, *r20, r8 # carrying multiply
119 maddedu r4, r3, r20, r8
120 maddedu r5, r3, r21, r8
121 maddedu r6, r3, r22, r8
122 maddedu r7, r3, r23, r8