bug 1244: separate frame for linked list image
[libreriscv.git] / conferences / ics2021 / ics2021_svp64.tex
1 \documentclass[slidestop]{beamer}
2 \usepackage{beamerthemesplit}
3 \usepackage{graphics}
4 \usepackage{pstricks}
5
6 \graphicspath{{./}}
7
8 \title{The Libre-SOC Hybrid 3D CPU}
9 \author{Luke Kenneth Casson Leighton}
10
11
12 \begin{document}
13
14 \frame{
15 \begin{center}
16 \huge{Libre-SOC SVP64 Vector Processing}\\
17 \vspace{32pt}
18 \Large{Augmenting the OpenPOWER ISA}\\
19 \Large{to provide 3D and Video instructions}\\
20 \Large{and add Cray-style Vector Extensions}\\
21 \vspace{24pt}
22 \Large{ICS2021}\\
23 \vspace{16pt}
24 \large{Sponsored by NLnet's PET Programme}\\
25 \vspace{6pt}
26 \large{June 14, 2021}
27 \end{center}
28 }
29
30
31 \frame{\frametitle{OpenPOWER today}
32
33 \begin{center}
34 \begin{itemize}
35 \item Open ISA: EULA v3.0B announced August 2019\vspace{6pt}
36 \item Compliancy subsets: mandatory and optional features
37 \vspace{6pt}
38 \item Compliance provides royalty-free IBM Patent grant\vspace{6pt}
39 \item Custom extensions permitted (see v3.0C): recommends "common-usage"
40 ones be submitted as RFCs to OpenPOWER ISA WG
41 \vspace{6pt}
42 \item On this basis we have the freedom and are encouraged to create
43 Cray-style Vectorisation Extensions
44 \vspace{6pt}
45 \item VSX will not be part of that: it is fixed-width SIMD.\\
46 https://tinyurl.com/simd-considered-harmful\\
47 https://en.wikipedia.org/wiki/Vector\_processor
48 \vspace{6pt}
49 \end{itemize}
50 \end{center}
51
52 }
53
54
55 \frame{\frametitle{Why OpenPOWER?}
56
57 \vspace{10pt}
58
59 \begin{itemize}
60 \item Good ecosystem essential\\
61 linux kernel, u-boot, compilers, OSes,\\
62 Reference Implementation(s)\vspace{10pt}
63 \item Supportive Foundation and Members\\
64 need to be able to submit ISA augmentations\\
65 (for proper peer review)\vspace{10pt}
66 \item No NDAs, full transparency must be acceptable\\
67 due to being funded under NLnet's PET Programme\vspace{10pt}
68 \item OpenPOWER: established for decades, excellent Foundation,\\
69 Microwatt as Reference, approachable and friendly.
70 \end{itemize}
71 }
72
73 \frame{\frametitle{Severe Limitations of RISC-V for Supercomputing}
74
75 \begin{center}
76 \begin{itemize}
77 \item Independent Research and public commentary:
78 https://news.ycombinator.com/item?id=24459314 \\
79 https://www.iscaconf.org/isca2020/papers/466100a052.pdf
80 \vspace{2pt}
81 \item No LOAD/STORE with Update (present in OpenPOWER)
82 \item No LOAD/STORE with Shift-immediate (as in ARM, x86)
83 \item No Condition Codes or Carry (present in OpenPOWER)\\
84 Extremely costly to add to compiler infrastructure
85 (already done in OpenPOWER).
86 \item Over-simplified ISA (assumption of macro-op fusion and Compressed
87 which massively complicates Multi-issue decode and Issue phases,
88 Multi-issue being fundamental to HPC)
89 \item RISC-V is great for Embedded scenarios, but it is just not up
90 to scratch for Supercomputing. OpenPOWER already is.
91
92 \end{itemize}
93 \end{center}
94
95 }
96
97
98 \frame{\frametitle{The summary on SVP64}
99
100 \begin{itemize}
101 \item Specification: https://libre-soc.org/openpower/sv/svp64/
102 \item SVP64 is similar to Intel x86 "REP" instruction\\
103 "please repeat the following instruction N times"\\
104 (but add some extra "stuff" in the process)
105 \item Uses the Cray-style "setvl" instruction\\
106 (Cray-I, NEC SX-Aurora, RISC-V RVV)\\
107 \item Unlike "REP" there is additional "Vector context":\\
108 Predication, Twin-predication, Element-width Overrides,
109 Map-reduce, Iteration, Saturation and more.
110 \item Just like "REP", none of this requires extra instructions!\\
111 (except setvl and the "REP"-like prefix itself)\\
112 \item "SIMD Considered Harmful" principle applies equally
113 to RISC-V Vectors (190+ instructions on top of RV64GC's 80)\\
114 \em{RVV more than doubles the number of RISC-V instructions}.
115 \end{itemize}
116 }
117
118 \begin{frame}[fragile]
119 \frametitle{Simple-V ADD in a nutshell}
120
121 \begin{semiverbatim}
122 function op\_add(rd, rs1, rs2, predr) # add not VADD!
123  int i, id=0, irs1=0, irs2=0;
124  for (i = 0; i < VL; i++)
125   if (ireg[predr] & 1<<i) # predication uses intregs
126    ireg[rd+id] <= ireg[rs1+irs1] + ireg[rs2+irs2];
127 if (reg\_is\_vectorised[rd] )  \{ id += 1; \}
128 if (reg\_is\_vectorised[rs1])  \{ irs1 += 1; \}
129 if (reg\_is\_vectorised[rs2])  \{ irs2 += 1; \}
130 \end{semiverbatim}
131
132 \begin{itemize}
133 \item Above is oversimplified: Reg. indirection left out (for clarity).
134 \item SIMD slightly more complex (case above is elwidth = default)
135 \item Scalar-scalar and scalar-vector and vector-vector now all in one
136 \item OoO may choose to push ADDs into instr. queue (v. busy!)
137 \end{itemize}
138 \end{frame}
139
140
141 \frame{\frametitle{Additional Simple-V features}
142
143 \begin{itemize}
144 \item "fail-on-first" (POWER9 VSX strncpy segfaults on boundary!)
145 \item "Twin Predication" (covers VSPLAT, VGATHER, VSCATTER, VINDEX etc.)
146 \item SVP64: extensive "tag" (Vector context) augmentation
147 \item "Context propagation": a VLIW-like context. Allows contexts
148 to be repeatedly applied (x86 "REP").
149 Effectively a "hardware compression algorithm" for ISAs.
150 \item Map-reduce and Iteration (like Cray-I and SX-Aurora).\\
151 Also new: prefix-sum (Pascal's Triangle)
152 \item REMAP (suitable for in-place variable-sized Matrix
153 Multiply)
154 \item Ultimate goal: cut down I-Cache usage, cuts down on power
155 \item Specifications: https://libre-soc.org/openpower/sv/
156 \item Needs to go through OpenPOWER Foundation `approval'
157 \end{itemize}
158 }
159
160 \frame{\frametitle{How can you help?}
161
162 \vspace{15pt}
163
164 \begin{itemize}
165 \item We need help. This is big. plenty of Research Opportunities\\
166 Also a lot more "Supercomputer-centric"
167 \vspace{3pt}
168 \item Start here! https://libre-soc.org \\
169 Mailing lists https://lists.libre-soc.org \\
170 IRC OFTC libre-soc \\
171 etc. etc. (it's a Libre project, go figure) \\
172 \vspace{3pt}
173 \item Can I get paid? Yes! NLnet funded\\
174 See https://libre-soc.org/nlnet/\#faq \\
175 \vspace{3pt}
176 \item Also profit-sharing in any commercial ventures \\
177 \vspace{3pt}
178 \end{itemize}
179 }
180
181
182 \frame{\frametitle{Summary}
183
184 \begin{itemize}
185 \item OpenPOWER is already a Supercomputer ISA: SVP64 greatly simplifies
186 it, goes back to the original Cray-style roots, and adds half a dozen
187 completely new innovations.
188 \item Collaboration with OpenPOWER Foundation and Members absolutely
189 essential. No short-cuts. Standards to be developed and ratified
190 so that everyone benefits.
191 \item Riding the wave of huge stability of OpenPOWER ecosystem
192 \item Greatly simplified software development: reduces costs and risks.
193 \item It also happens to be fascinating, deeply rewarding technically
194 challenging, and funded by NLnet
195 \item Simulator and HDL at https://git.libre-soc.org/
196
197 \end{itemize}
198 }
199
200
201 \frame{
202 \begin{center}
203 {\Huge The end\vspace{12pt}\\
204 Thank you\vspace{12pt}\\
205 Questions?\vspace{12pt}
206 }
207 \end{center}
208
209 \begin{itemize}
210 \item Discussion: http://lists.libre-soc.org
211 \item OFTC IRC \#libre-soc
212 \item http://libre-soc.org/
213 \item http://nlnet.nl/PET
214 \item https://libre-soc.org/nlnet/\#faq
215 \end{itemize}
216 }
217
218 \end{document}