1 % Copyright 2024 Jacob Lifshay
4 \usepackage{beamerthemesplit
}
6 \usepackage[english
]{babel
}
9 \usemintedstyle{monokai
}
10 \definecolor{codebg
}{rgb
}{0.1,
0.09,
0.08}
11 \newminted[codeenv
]{python3
}{escapeinside=@@,fontsize=
\small,bgcolor=codebg
}
12 \newmintinline[codeinline
]{python3
}{escapeinside=@@,fontsize=
\small,bgcolor=codebg
}
15 \title[Fast Big-Integer Arithmetic on SVP64 ...
]{
16 Fast Big-Integer Arithmetic on SVP64 at up to
256-bits/cycle and beyond
19 \author{Jacob R. Lifshay
}
23 \logo{\includegraphics[height=
0.5cm
]{../../../images/lsoclogo.png
}}
31 \begin{frame
}[fragile
]
32 \frametitle{What is SVP64?
}
34 \item Vectorization Extension for PowerISA developed by
\href{https://libre-soc.org
}{Libre-SOC
}
36 \item Basically, a way to modify nearly any PowerISA instruction to run it in a HW loop.
41 setvl
0,
0,
3,
0,
1,
1 # makes stuff run
3 times
42 sv.add *r3, *r15, r12 # adds
3 times
45 add r3, r15, r12 # no * means r12 doesn't increment
46 add r4, r16, r12 # * means r3 and r15 increment
52 \begin{frame
}[fragile
]
53 \frametitle{Big-Integer Addition on SVP64
}
54 How can we use SVP64 to add
256-bit integers?
57 setvl
0,
0,
4,
0,
1,
1 # makes stuff run
4 times
58 addic r0, r0,
0 # clear CA (carry flag)
59 sv.adde *r4, *r4, *r8 # carry-propagating add
62 addic r0, r0,
0 # clear CA (carry flag)
70 \begin{frame
}[fragile
]
71 \frametitle{Big-Integer Addition on SVP64
}
72 How can we use SVP64 to add
256-bit integers?
74 \input{sv.adde.dia-tex
}
78 \frametitle{Big-Integer Addition on an example CPU
}
80 SVP64 is designed for everything from tiny to big and fast CPUs, this example only shows a hypothetical big and fast CPU design
84 \frametitle{Big-Integer Addition on an example CPU
}
85 \input{bigint-add-pipe.dia-tex
}
88 \begin{frame
}[fragile
]
89 \frametitle{Big-Integer Multiply on SVP64
}
90 How can we use SVP64 to Multiply a
64-bit by a
256-bit integer?
93 \item new instruction:
\codeinline{maddedu RT, RA, RB, RC
}
95 \item $
64 \times 64 +
64 \rightarrow 128$-bit Multiply-Add
97 \item Semantics as used in this presentation (somewhat simplified):
99 result = (RA * RB) + RC
100 RT = LSB_HALF(result)
101 RC = MSB_HALF(result)
106 \begin{frame
}[fragile
]
107 \frametitle{Big-Integer Multiply on SVP64
}
108 How can we use SVP64 to Multiply a
64-bit by a
256-bit integer?
112 #
256-bit input in r20-
23
113 #
320-bit output in r4-
8
114 setvl
0,
0,
4,
0,
1,
1 # makes stuff run
4 times
115 li r8,
0 # clear carry register
116 sv.maddedu *r4, r3, *r20, r8 # carrying multiply
120 maddedu r4, r3, r20, r8
121 maddedu r5, r3, r21, r8
122 maddedu r6, r3, r22, r8
123 maddedu r7, r3, r23, r8
128 \frametitle{Big-Integer Multiply on SVP64
}
129 \input{sv.maddedu.dia-tex
}
132 \begin{frame
}[fragile
]
133 \frametitle{Big-Integer Multiply on an example CPU
}
136 \codeinline{sv.maddld *r4, *r8, *r16, *r20 # mul-add
}
138 \codeinline{sv.maddedu *r4, r3, *r20, r8 # carrying multiply
}
141 \input{bigint-mul-pipe.dia-tex
}
144 \begin{frame
}[fragile
]
146 \item Discussion:
\url{https://lists.libre-soc.org
}
147 \item IRC \#libre-soc on OFTC or Libera
148 % workaround busted escaping in \href command
149 \item Matrix
\hyperref{https://matrix.to/\#/\#_oftc_
}{libre-soc:matrix
}{org
}{\#
\_oftc\_\#libre-soc:matrix.org
}
150 \item \url{https://libre-soc.org/
}
151 \item Thanks to NLnet for funding this:
\url{https://nlnet.nl/assure
}
152 \item \url{https://libre-soc.org/nlnet/\#faq
}