Replaced shadow.jpg with svg one.
[libreriscv.git] / conferences / openpower_2020.tex
1 \documentclass[slidestop]{beamer}
2 \usepackage{beamerthemesplit}
3 \usepackage{graphics}
4 \usepackage{pstricks}
5
6 \graphicspath{{./}}
7
8 \title{The Libre-SOC Hybrid 3D CPU}
9 \author{Luke Kenneth Casson Leighton}
10
11
12 \begin{document}
13
14 \frame{
15 \begin{center}
16 \huge{The Libre-SOC Hybrid 3D CPU}\\
17 \vspace{32pt}
18 \Large{Augmenting the OpenPOWER ISA}\\
19 \Large{to provide 3D and Video instructions}\\
20 \Large{(properly and officially)}\\
21 \vspace{24pt}
22 \Large{[proposed for] OpenPOWER Summit 2020}\\
23 \vspace{16pt}
24 \large{Sponsored by NLnet's PET Programme}\\
25 \vspace{6pt}
26 \large{\today}
27 \end{center}
28 }
29
30
31 \frame{\frametitle{Why another SoC?}
32
33 \vspace{15pt}
34
35 \begin{itemize}
36 \item Intel Management Engine, QA issues, Spectre\vspace{15pt}
37 \item Endless proprietary drivers \\
38 (affects product development cost)\vspace{15pt}
39 \item Opportunity to drastically simplify driver development\\
40 and engage in "long-tail" markets\vspace{15pt}
41 \item Because for 30 years I Always Wanted To Design A CPU\vspace{10pt}
42 \end{itemize}
43 }
44
45
46 \frame{\frametitle{Why OpenPOWER? (but first: Evaluation Criteria)}
47
48 \vspace{15pt}
49
50 \begin{itemize}
51 \item Good ecosystem essential\\
52 linux kernel, u-boot, compilers, OSes,\\
53 Reference Implementation(s)\vspace{12pt}
54 \item Supportive Foundation and Members\\
55 need to be able to submit ISA augmentations\\
56 (for proper peer review)\vspace{12pt}
57 \item No NDAs, full transparency must be acceptable\\
58 due to being funded under NLnet's PET Programme\vspace{12pt}
59 \end{itemize}
60 }
61
62 \frame{\frametitle{Why OpenPOWER?}
63
64
65 \begin{itemize}
66 \item RISC-V: closed secretive mailing lists, closed secretive\\
67 ISA Working Groups, no acceptance of transparency\\
68 requirements, not well-established enough
69 \item MIPS Open Initiative website was offline
70 \item ARM and x86 are proprietary (x86 too complex)
71 \item OpenRISC 1200 not enough adoption
72 \item Nyuzi GPU too specialist (not a general-purpose ISA)
73 \item MIAOW GPU is not a GPU (it's an AMD Vector Engine)
74 \item "rolling your own" out of the question (20+ man-years)
75 \item OpenPOWER: established for decades, excellent Foundation,\\
76 Microwatt as Reference, approachable and friendly.
77 \end{itemize}
78 }
79
80 \frame{\frametitle{What goes into a typical SoC?}
81 \vspace{9pt}
82 \begin{itemize}
83 \item 15 to 20mm BGA package: 2.5 to 5 watt power consumption\\
84 heat sink normally not required (simplifies overall design)
85 \vspace{10pt}
86 \item Fully-integrated peripherals (not Northbridge/Southbridge)\\
87 USB, HDMI, RGB/TTL, SD/MMC, I2C, UART, SPI, GPIO etc. etc.
88 \vspace{10pt}
89 \item Built-in GPU (shared memory bus, 3rd party licensed) \vspace{10pt}
90 \item Build-in VPU (likewise)\vspace{10pt}
91 \item Target price between \$2.50 and \$30 depending on market\\
92 Radically different from IBM POWER9 Core (200 Watt)
93 \vspace{10pt}
94 \end{itemize}
95 }
96
97
98
99 \frame{\frametitle{Simple SBC-style SoC}
100
101 \begin{center}
102 \includegraphics[width=0.9\textwidth]{shakti_libre_soc.jpg}
103 \end{center}
104
105 }
106
107
108 \frame{\frametitle{Where to start? (roadmap)}
109
110 \begin{itemize}
111 \item First thing: get a basic core working on an FPGA\\
112 (use Microwatt as a reference)
113 \item Next: create a low-cost test ASIC (180nm).\\
114 (first OpenPOWER ASIC since IBM's POWER9, 10 years ago)
115 \item (in parallel): Develop Vector ISA with 3D and Video\\
116 extensions, under watchful eye of OpenPOWER Foundation
117 \item Implement Vector ISA in simulator, then HDL, then FPGA\\
118 and finally (only when ratified by OPF) into silicon
119 \item Sell chips, make \$\$\$.
120 \end{itemize}
121 }
122
123 \frame{\frametitle{What's different about Libre-SOC?}
124
125 \begin{itemize}
126 \item Hybrid - integrated. The CPU \textit{is} the GPU.\\
127 The GPU \textit{is} the CPU. The VPU \textit{is} the CPU.\\
128 \textit{There is No Separate VPU/GPU Pipeline}\\
129 \vspace{9pt}
130 \item written in nmigen (a python-based HDL). Not VHDL\\
131 not Verilog (definitely not Chisel3/Scala)\\
132 This is an extremely important strategic decision.
133 \vspace{9pt}
134 \item Simple-V Vector Extension. See "SIMD Considered harmful".\\
135 SV effectively a "hardware for-loop" on standard scalar ISA\\
136 (conceptually similar to Zero-Overhead Loops in DSPs)
137 \vspace{9pt}
138 \end{itemize}
139 }
140
141 \frame{\frametitle{Hybrid Architecture: Augmented 6600}
142
143 \begin{itemize}
144 \item CDC 6600 is a design from 1965. The \textit{augmentations} are not.\\
145 Help from Mitch Alsup includes \textit{precise exceptions}, \\
146 multi-issue and more. Academic literature on 6600 utterly misleading.
147 6600 Scoreboards completely underestimated (Seymour Cray and
148 James Thornton
149 solved problems they didn't realise existed elsewhere!)
150 \item Front-end Vector ISA, back-end "Predicated (masked) SIMD"\\
151 nmigen (python OO) strategically critical to achieving this.
152 \item Out-of-order combined with Simple-V allows scalar operations\\
153 at the developer end to be turned into SIMD at the back-end\\
154 \textit{without the developer needing to do SIMD}
155 \item IEEE754 sin / cos / atan2, Texturisation opcodes, YUV2RGB\\
156 all automatically vectorised.
157 \end{itemize}
158 }
159
160 \frame{\frametitle{Why nmigen? (but first: evaluate other HDLs)}
161
162 \begin{itemize}
163 \item Verilog: designed in the 1980s purely for doing unit tests (!)
164 \item VHDL: again, a 1980s-era "Procedural" language (BASIC, Fortran).
165 Does now have "records" which is nice.
166 \item Chisel3 / Scala: OO, but very obscure (20th on index)
167 \item pyrtl: not large enough community
168 \item MyHDL: subset of python only
169 \vspace{9pt}
170 \item Slowly forming a set of criteria: must be OO (python), must have
171 wide adoption (python), must have good well-established
172 programming practices already in place (python), must be
173 easy to learn (python)
174 \item HDL itself although a much smaller community must have the same
175 criteria. Only nmigen meets that criteria.
176
177 \end{itemize}
178 }
179
180 \frame{\frametitle{Why nmigen?}
181
182 \begin{itemize}
183 \item Uses python to build an AST (Abstract Syntax Tree).
184 Actually hands that over to yosys (to create ILANG file)
185 after which verilog can (if necessary) be created
186 \item Deterministic synthesiseable behaviour (Signals are declared
187 with their reset pattern: no more forgetting "if rst" block).
188 \item python OO programming techniques can be deployed. classes
189 and functions created which pass in parameters which change
190 what HDL is created (IEEE754 FP16 / 32 / 64 for example)
191 \item python-based for-loops can e.g. read CSV files then generate
192 a hierarchical nested suite of HDL Switch / Case statements
193 (this is how the Libre-soc PowerISA decoder is implemented)
194 \item extreme OO abstraction can even be used to create "dynamic
195 partitioned Signals" that have the same operator-overloaded
196 "add", "subtract", "greater-than" operators
197
198 \end{itemize}
199 }
200
201 \frame{\frametitle{nmigen (dynamic) vs VHDL (static)}
202
203 \begin{center}
204 \includegraphics[width=1.0\textwidth]{2020-09-10_11-53.png}
205 \end{center}
206
207 }
208
209 \frame{\frametitle{nmigen PowerISA Decoder}
210
211 \begin{center}
212 \includegraphics[width=1.0\textwidth]{2020-09-10_11-46.png}
213 \end{center}
214
215 }
216
217 \frame{\frametitle{nmigen PowerISA Decoder}
218
219 \begin{center}
220 \includegraphics[width=0.55\textwidth]{2020-09-09_21-04.png}
221 \end{center}
222
223 }
224
225 \frame{\frametitle{Why another Vector ISA? (or: not-exactly another)}
226
227 \begin{itemize}
228 \item Simple-V is a 'register tag' system. \textit{There are no opcodes}\\
229 SV 'tags' scalar operations (scalar regfiles) as 'vectorised'
230 \item (PowerISA SIMD is around 700 opcodes, making it unlikely to be
231 able to fit a PowerISA decoder in only one clock cycle)
232 \item Effectively a 'hardware sub-counter for-loop': pauses the PC\\
233 then rolls incrementally through the operand register numbers\\
234 issuing \textit{multiple} scalar instructions into the pipelines\\
235 (hence the reason for a multi-issue OoO microarchitecture)
236 \item Current \textit{and future} PowerISA scalar opcodes inherently
237 \textit{and automatically} become 'vectorised' by SV without
238 needing an explicit new Vector opcode.
239 \item Predication and element width polymorphism are also 'tags'.
240 elwidth polymorphism allows for FP16 / 80 / 128 to be added to
241 the ISA \textit{without modifying the ISA}
242
243 \end{itemize}
244 }
245
246
247 \begin{frame}[fragile]
248 \frametitle{Simple-V ADD in a nutshell}
249
250 \begin{semiverbatim}
251 function op\_add(rd, rs1, rs2, predr) # add not VADD!
252  int i, id=0, irs1=0, irs2=0;
253  for (i = 0; i < VL; i++)
254   if (ireg[predr] & 1<<i) # predication uses intregs
255    ireg[rd+id] <= ireg[rs1+irs1] + ireg[rs2+irs2];
256 if (reg\_is\_vectorised[rd] )  \{ id += 1; \}
257 if (reg\_is\_vectorised[rs1])  \{ irs1 += 1; \}
258 if (reg\_is\_vectorised[rs2])  \{ irs2 += 1; \}
259 \end{semiverbatim}
260
261 \begin{itemize}
262 \item Above is oversimplified: Reg. indirection left out (for clarity).
263 \item SIMD slightly more complex (case above is elwidth = default)
264 \item Scalar-scalar and scalar-vector and vector-vector now all in one
265 \item OoO may choose to push ADDs into instr. queue (v. busy!)
266 \end{itemize}
267 \end{frame}
268
269
270 \frame{\frametitle{Summary}
271
272 \begin{itemize}
273 \item Goal is to create a mass-volume low-power embedded SoC suitable
274 for use in netbooks, chromebooks, tablets, smartphones, IoT SBCs.
275 \item No DRM. 'Trustable' (by the users, not by Media Moguls) design
276 ethos as a \textit{business} objective: requires full transparency
277 as well as Formal Correctness Proofs
278 \item Collaboration with OpenPOWER Foundation and Members absolutely
279 essential. No short-cuts. Standards to be developed and ratified
280 so that everyone benefits.
281 \item Working on the back of huge stability of POWER ecosystem
282 \item Combination of which is that Board Support Package is 100\%
283 upstream, app and product development by customer is hugely
284 simplified and much more attractive
285
286 \end{itemize}
287 }
288
289
290 \frame{
291 \begin{center}
292 {\Huge The end\vspace{15pt}\\
293 Thank you\vspace{15pt}\\
294 Questions?\vspace{15pt}
295 }
296 \end{center}
297
298 \begin{itemize}
299 \item Discussion: Libre-SOC-dev mailing list
300 \item Freenode IRC \#libre-soc
301 \item http://libre-soc.org/
302 \item http://nlnet.nl/PET
303 \end{itemize}
304 }
305
306
307 \end{document}