more warm words
[libreriscv.git] / openpower / simple_v_spec.tex
1 \documentclass[oneside]{book}
2 \usepackage{lmodern}
3 \usepackage{amssymb,amsmath}
4 \usepackage{lscape}
5 \usepackage{sectsty}
6 \usepackage{appendix}
7 \usepackage{graphicx}
8 \usepackage[firstpage]{draftwatermark}
9 \usepackage[printonlyused,withpage]{acronym}
10 \usepackage{float}
11 \usepackage{url}
12 \usepackage[useregional]{datetime2}
13 \usepackage{anyfontsize}
14 \usepackage{ifxetex,ifluatex}
15 \usepackage{fixltx2e} % provides \textsubscript
16 \ifnum 0\ifxetex 1\fi\ifluatex 1\fi=0 % if pdftex
17 \usepackage[T1]{fontenc}
18 \usepackage[utf8]{inputenc}
19 \else % if luatex or xelatex
20 \ifxetex
21 \usepackage{mathspec}
22 \else
23 \usepackage{fontspec}
24 \fi
25 \defaultfontfeatures{Ligatures=TeX,Scale=MatchLowercase}
26 \fi
27 % use upquote if available, for straight quotes in verbatim environments
28 \IfFileExists{upquote.sty}{\usepackage{upquote}}{}
29 % use microtype if available
30 \IfFileExists{microtype.sty}{%
31 \usepackage[]{microtype}
32 \UseMicrotypeSet[protrusion]{basicmath} % disable protrusion for tt fonts
33 }{}
34 \PassOptionsToPackage{hyphens}{url} % url is loaded by hyperref
35 \usepackage[unicode=true]{hyperref}
36 \hypersetup{
37 pdfborder={0 0 0},
38 breaklinks=true}
39 \usepackage[margin=0.9in]{geometry}
40 \usepackage{color}
41 \usepackage{fancyvrb}
42 \newcommand{\VerbBar}{|}
43 \newcommand{\VERB}{\Verb[commandchars=\\\{\}]}
44 \DefineVerbatimEnvironment{Highlighting}{Verbatim}{commandchars=\\\{\},xleftmargin=5mm}
45 % Add ',fontsize=\small' for more characters per line
46 \newenvironment{Shaded}{}{}
47 \newcommand{\KeywordTok}[1]{\textcolor[rgb]{0.00,0.44,0.13}{\textbf{#1}}}
48 \newcommand{\DataTypeTok}[1]{\textcolor[rgb]{0.56,0.13,0.00}{#1}}
49 \newcommand{\DecValTok}[1]{\textcolor[rgb]{0.25,0.63,0.44}{#1}}
50 \newcommand{\BaseNTok}[1]{\textcolor[rgb]{0.25,0.63,0.44}{#1}}
51 \newcommand{\FloatTok}[1]{\textcolor[rgb]{0.25,0.63,0.44}{#1}}
52 \newcommand{\ConstantTok}[1]{\textcolor[rgb]{0.53,0.00,0.00}{#1}}
53 \newcommand{\CharTok}[1]{\textcolor[rgb]{0.25,0.44,0.63}{#1}}
54 \newcommand{\SpecialCharTok}[1]{\textcolor[rgb]{0.25,0.44,0.63}{#1}}
55 \newcommand{\StringTok}[1]{\textcolor[rgb]{0.25,0.44,0.63}{#1}}
56 \newcommand{\VerbatimStringTok}[1]{\textcolor[rgb]{0.25,0.44,0.63}{#1}}
57 \newcommand{\SpecialStringTok}[1]{\textcolor[rgb]{0.73,0.40,0.53}{#1}}
58 \newcommand{\ImportTok}[1]{#1}
59 \newcommand{\CommentTok}[1]{\textcolor[rgb]{0.38,0.63,0.69}{\textit{#1}}}
60 \newcommand{\DocumentationTok}[1]{\textcolor[rgb]{0.73,0.13,0.13}{\textit{#1}}}
61 \newcommand{\AnnotationTok}[1]{\textcolor[rgb]{0.38,0.63,0.69}{\textbf{\textit{#1}}}}
62 \newcommand{\CommentVarTok}[1]{\textcolor[rgb]{0.38,0.63,0.69}{\textbf{\textit{#1}}}}
63 \newcommand{\OtherTok}[1]{\textcolor[rgb]{0.00,0.44,0.13}{#1}}
64 \newcommand{\FunctionTok}[1]{\textcolor[rgb]{0.02,0.16,0.49}{#1}}
65 \newcommand{\VariableTok}[1]{\textcolor[rgb]{0.10,0.09,0.49}{#1}}
66 \newcommand{\ControlFlowTok}[1]{\textcolor[rgb]{0.00,0.44,0.13}{\textbf{#1}}}
67 \newcommand{\OperatorTok}[1]{\textcolor[rgb]{0.40,0.40,0.40}{#1}}
68 \newcommand{\BuiltInTok}[1]{#1}
69 \newcommand{\ExtensionTok}[1]{#1}
70 \newcommand{\PreprocessorTok}[1]{\textcolor[rgb]{0.74,0.48,0.00}{#1}}
71 \newcommand{\AttributeTok}[1]{\textcolor[rgb]{0.49,0.56,0.16}{#1}}
72 \newcommand{\RegionMarkerTok}[1]{#1}
73 \newcommand{\InformationTok}[1]{\textcolor[rgb]{0.38,0.63,0.69}{\textbf{\textit{#1}}}}
74 \newcommand{\WarningTok}[1]{\textcolor[rgb]{0.38,0.63,0.69}{\textbf{\textit{#1}}}}
75 \newcommand{\AlertTok}[1]{\textcolor[rgb]{1.00,0.00,0.00}{\textbf{#1}}}
76 \newcommand{\ErrorTok}[1]{\textcolor[rgb]{1.00,0.00,0.00}{\textbf{#1}}}
77 \newcommand{\NormalTok}[1]{#1}
78
79 % indent all verbatim
80 \catcode`\@=11
81 \let \saveverbatime \@xverbatim
82 \def \@xverbatim {\leftskip = 1cm\relax\saveverbatime}
83 \catcode`\@=12
84
85 \usepackage{longtable,booktabs}
86 % Fix footnotes in tables (requires footnote package)
87 \IfFileExists{footnote.sty}{\usepackage{footnote}\makesavenoteenv{long
88 table}}{}
89 \IfFileExists{parskip.sty}{%
90 \usepackage{parskip}
91 }{% else
92 \setlength{\parindent}{0pt}
93 \setlength{\parskip}{6pt plus 2pt minus 1pt}
94 }
95 \setlength{\emergencystretch}{3em} % prevent overfull lines
96 \providecommand{\tightlist}{%
97 \setlength{\itemsep}{0pt}\setlength{\parskip}{0pt}}
98 \setcounter{secnumdepth}{5}
99 % Redefines (sub)paragraphs to behave more like sections
100 \ifx\paragraph\undefined\else
101 \let\oldparagraph\paragraph
102 \renewcommand{\paragraph}[1]{\oldparagraph{#1}\mbox{}}
103 \fi
104 \ifx\subparagraph\undefined\else
105 \let\oldsubparagraph\subparagraph
106 \renewcommand{\subparagraph}[1]{\oldsubparagraph{#1}\mbox{}}
107 \fi
108
109 % set default figure placement to htbp
110 \makeatletter\@addtoreset{chapter}{part}\makeatother%
111 \def\fps@figure{htbp}
112 \makeatother
113
114 % graphics path for primer
115 \graphicspath{ {svp64-primer/img/} }
116
117 \date{}
118
119 \begin{document}
120
121 \chapter*{Preamble}
122 \addcontentsline{toc}{chapter}{Preamble} \markboth{INTRODUCTION}{}
123
124 \textbf{Last modified date: \today}
125
126 This document is an auto-generated version of the Draft SVP64
127 Specification available at
128
129 \begin{verbatim}
130 https://libre-soc.org/openpower/sv
131 \end{verbatim}
132
133 for which the source code is available at
134
135 \begin{verbatim}
136 https://git.libre-soc.org/?p=libreriscv.git;a=tree;f=openpower;hb=HEAD
137 \end{verbatim}
138
139 This PDF may be created with "make pdf" from the following file:
140
141 \begin{verbatim}
142 https://git.libre-soc.org/?p=libreriscv.git;a=blob;f=openpower/Makefile;hb=HEAD
143 \end{verbatim}
144
145 by executing the following commands:
146
147 \begin{verbatim}
148 git clone https://git.libre-soc.org/git/libreriscv.git libresoc
149 cd libresoc/libresoc/openpower
150 make pdf
151 \end{verbatim}
152
153 Simple-V Cray-style Vectors have been developed by the Libre-SOC Team,
154 sponsored by the NLnet Foundation and NGI POINTER under
155 EU Grants 871528 and 957073.
156
157 Simple-V is in DRAFT Status and will be submitted publicly
158 (non-confidentially) through the OPF ISA WG "External Submissions"
159 Process. Funding from NLnet, through their Privacy and Enhanced Trust
160 Programme, requires full transparency.
161
162 As this document is under continuous rapid revision please check frequently
163 at:
164
165 \begin{verbatim}
166 https://ftp.libre-soc.org/simple_v_spec.pdf
167 \end{verbatim}
168
169 \subsection*{Contacts}
170 For questions, comments, and clarification, please contact the following:
171 \begin{itemize}
172 \itemsep -0.3em
173 \item Libre-SOC ISA Dev Mailing List - libre-soc-isa@lists.libre-soc.org
174 \item Luke Kenneth Casson Leighton - Libre-SOC team lead and Red
175 Semiconductor Ltd Director - lkcl@lkcl.net
176 \item David Calderwood - Red Semiconductor Ltd Director -
177 djac@calderwoodhan.com
178 \item Toshaan Bharvani - OpenPOWER Foundation Technical Chair, VanTosh
179 Director - toshaan@vantosh.com
180 \item Konstantinos Margaritis - Engineer and Founder of VectorCamp, writing optimised assembler for a number of SIMD/Vector ISAs - konstantinos@vectorcamp.gr
181 \item Dmitry Selyutin - Libre-SOC engineer, working on binutils SVP64 assembler - ghostmansd@gmail.com
182 \item Jacob Lifshay - Libre-SOC engineer, CPU arch and verification - programmerjake@gmail.com
183 \item Cesar Strauss - Libre-SOC engineer, CPU arch and verification - cestrauss@gmail.com
184 \item Andrey Miroshnikov - Libre-SOC engineer, assisting with documentation - andrey@technepisteme.xyz
185 \end{itemize}
186
187 \newpage
188 \subsection*{Executive Summary}
189 \hypertarget{svux2fexecutive_summary}{}
190 \input{tex_out/executive_summary.tex}
191
192 \newpage
193 \begin{landscape}
194 \addcontentsline{toc}{chapter}{Comparison Table} \markboth{INTRODUCTION}{}
195 \hypertarget{svux2fcomparison_table}{}
196 {
197 \fontsize{6}{8}\selectfont
198 \input{tex_out/comparison_table.tex}
199 }
200 \end{landscape}
201
202 \part{Scalable Vectors Primer}
203 \input{svp64-primer/acronyms}
204 %\chapter*{Executive Summary}
205 \include{svp64-primer/summary}
206 \bibliography{svp64-primer/references}
207 \bibliographystyle{ieeetr}
208
209 \tableofcontents
210
211 % Part II
212 \part{Scalable Vectors for the Power ISA}
213
214
215 \chapter{Fields and Forms}
216 \hypertarget{svux2ffields}{}
217 \input{tex_out/fields.tex}
218 \chapter{Scalable Vectors for the Power ISA}
219 \hypertarget{svux2fscalvecpowisa}{}
220 \hypertarget{SVux7csv}{}
221 \input{tex_out/sv.tex}
222 \chapter{Other Vector ISAs}\hypertarget{svux2fvector_isa_comparison}{}
223 \input{tex_out/vector_isas.tex}
224 \chapter{Overview}\hypertarget{svux2foverview}{}
225 \input{tex_out/overview.tex}
226 \chapter{Compliancy Levels}\hypertarget{svux2fcompliancy_levels}{}
227 \input{tex_out/compliancy_levels.tex}
228 \chapter{SVP64}\hypertarget{svux2fsvp64}{}
229 \input{tex_out/svp64.tex}
230 \chapter{SPRs}\hypertarget{svux2fsprs}{}
231 \input{tex_out/sprs.tex}
232 \chapter{Arithmetic Mode}\hypertarget{svux2fnormal}{}
233 \input{tex_out/normal.tex}
234 \chapter{Load/Store Mode}\hypertarget{svux2fldst}{}
235 \input{tex_out/ldst.tex}
236 \chapter{Condition Register Fields Mode}\hypertarget{svux2fcr_ops}{}
237 \input{tex_out/cr_ops.tex}
238 \chapter{Branch Mode}\hypertarget{svux2fbranches}{}
239 \input{tex_out/branches.tex}
240 \chapter{setvl instruction}\hypertarget{svux2fsetvl}{}
241 \input{tex_out/setvl.tex}
242 \chapter{svstep instruction}\hypertarget{svux2fsvstep}{}
243 \input{tex_out/svstep.tex}
244 \chapter{REMAP subsystem}\hypertarget{svux2fremap}{}
245 \input{tex_out/remap.tex}
246 \chapter{Swizzle Move}\hypertarget{svux2fmv.swizzle}{}
247 \input{tex_out/mv_swizzle.tex}
248 \chapter{Pack / Unpack}\hypertarget{svux2fmv.vec}{}
249 \input{tex_out/mv_vec.tex}
250
251 \begin{appendices}
252 \chapter{SVP64 Appendix}\hypertarget{svp64ux2fappendix}{}
253 \hypertarget{svux2fsvp64ux2fappendix}{}
254 \input{tex_out/svp64_appendix.tex}
255 \chapter{SVP64 Quirks}\hypertarget{svux2fsvp64_quirks}{}
256 \input{tex_out/svp64_quirks.tex}
257 \chapter{REMAP algorithms}\hypertarget{svux2fremapux2fappendix}{}
258 \input{tex_out/remap_appendix.tex}
259 \chapter{Simple-V pseudocode}\hypertarget{svux2fpseudocode_simplev}{}
260 \input{tex_out/pseudocode_simplev.tex}
261
262 \chapter{SVP64 Augmentation Table}\hypertarget{opcode_regs_deduped}{}
263 \begin{landscape}
264 {
265 \fontsize{7}{9}\selectfont
266 \input{tex_out/opcode_regs_deduped.tex}
267 }
268 \end{landscape}
269
270 \end{appendices}
271
272 % Part III
273 \part{Scalar Instructions}
274
275 \chapter*{Preamble}{}
276
277 As explained in the Simple-V introduction
278 these are all intentionally and specifically Scalar instructions.
279 They have with almost no exceptions been specifically crafted to
280 have a justification for their inclusion in the Power ISA as Scalar
281 instructions purely on their own merit.
282
283 \begin{itemize}
284 \item The biginteger multiply-and-add instruction is similar
285 to Intel's mulx in that it produces a pair of results.
286 \item Javascript(tm) rounding is present in ARM as fjcvtzs
287 and would save an astounding 35 instructions with 5 branches.
288 \item Whilst there exist CR bit manipulation and copying
289 instructions there are no CR Field maniulation instructions,
290 putting pressure on GPRs if several CR fits need to be analysed.
291 \item one single instruction, bmask, is proposed that covers
292 the whole of x86 BMI1 and AMD TBM, combined, and provides more.
293 \end{itemize}
294
295 All of these have nothing to do with Simple-V at all: they make
296 the Power ISA better at modern general-purpose compute, bringing
297 it up-to-date.
298
299 That said: by a wonderful coincidence, should they be included, then
300 Simple-V's capabilities increase significantly. For example the CRweird
301 instructions combined with the bitmanip instructions, alongside
302 Vectorised Rc=1 turn CR Fields into
303 extremely powerful Predicate masks. bmask not only
304 covers the BMI and TBM instructions of Intel and AMD it also
305 includes the RVV set-before-first and set-after-first instructions.
306
307 The clean and clear separation between Vectorisation Prefix and Scalar
308 Suffix is what makes it possible for both Scalar-only and Scalable-Vectors
309 to benefit. It also makes proposal much easier, as there is no
310 inter-dependence.
311
312 It is however important to note that the rationale for these instructions
313 comes from a more general-purpose modern computing paradigm that is
314 outside of IBM's much more focussed and specialist traditional customer
315 base. We deeply respect IBM's curator role of the Power ISA of the past 25
316 years as much as we appreciate their courage in transferring that role
317 to the OpenPOWER Foundation ISA Working Group.
318
319 \chapter{SV Vector ops}\hypertarget{svux2fvector_ops}{}
320 \input{tex_out/vector_ops.tex}
321 \chapter{CR Weird ops}\hypertarget{svux2fcr_int_predication}{}
322 \hypertarget{cr_int_predication}{}
323 \input{tex_out/cr_int_predication.tex}
324 \chapter{Bitmanip ops}\hypertarget{svux2fbitmanip}{}
325 \input{tex_out/bitmanip.tex}
326 \chapter{FP/Int Conversion ops}\hypertarget{svux2fint_fp_mv}{}
327 \input{tex_out/int_fp_mv.tex}
328 \chapter{FP Class ops}\hypertarget{svux2ffclass}{}
329 \input{tex_out/fclass.tex}
330 \chapter{Audio and Video Opcodes}\hypertarget{svux2fav_opcodes}{}
331 \hypertarget{av_opcodes}{}
332 \input{tex_out/av_opcodes.tex}
333 \chapter{Big Integer}\hypertarget{svux2fbiginteger}{}
334 \input{tex_out/big_integer.tex}
335 \chapter{Transcendentals}\hypertarget{transcendentals}{}
336 \input{tex_out/transcendentals.tex}
337 \chapter{Acquire/Release Atomic Memory}\hypertarget{atomics}{}
338 \input{tex_out/atomics.tex}
339
340 \begin{appendices}
341 \chapter{Big Integer Analysis}\hypertarget{svux2fbigintegerux2fanalysis}{}
342 \input{tex_out/big_integer_analysis.tex}
343 \chapter{Bitmanip pseudocode}\hypertarget{svux2fpseudocode_bitmanip}{}
344 \input{tex_out/pseudocode_bitmanip.tex}
345 \chapter{Floating Point pseudocode}\hypertarget{isaux2fsvfparith}{}
346 \input{tex_out/pseudocode_svfparith.tex}
347 \chapter{Fixed Point pseudocode}
348 \hypertarget{isaux2fsvfixedarith}{}
349 \input{tex_out/pseudocode_svfixedarith.tex}
350 \end{appendices}
351
352 % Part IV
353 \part{Scalar Power ISA pseudocode}
354 \backmatter % temporary fix for too many appenfices
355 %\setcounter{chapter}{0}
356 %\renewcommand{\thechapter}{\Alph{chapter}}
357
358 \chapter*{Preamble}
359 \addcontentsline{toc}{chapter}{Preamble} \markboth{INTRODUCTION}{}
360
361 This section contains updated pseudocode from the Power ISA Specification
362 v3.0B to be executable. Several bugfixes in Power ISA v3.0B have been
363 found and reported as a direct result due to actually running the
364 pseudocode as executable code in a Simulator.
365 A Formal Correctness Proof Research Paper written by Boris
366 Shingarov.
367
368 Additionally, with SVP64 performing element-width over-rides it is the
369 \textit{Scalar} pseudocode that needs adapting to variable-length
370 (\textbf{XLEN}). Maintaining duplicate identical copies in every
371 respect \textit{except} for an XLEN as part of the Simple-V Specification
372 is completely pointless and a waste of time: the updates to include
373 XLEN need to be part
374 of the Scalar Power ISA Specification. This has the added benefit
375 that it makes life much easier for 32-bit implementors, and has an
376 additional benefit of making it possible for the Scalar Power ISA
377 to extend to 128-bit in future (like RV128).
378
379 \begin{appendices}
380 \chapter{Binary Coded Decimal pseudocode}
381 \hypertarget{svux2fpseudocode_bcd}{}
382 \input{tex_out/pseudocode_bcd.tex}
383 \chapter{Branch pseudocode}
384 \hypertarget{openpowerux2fisaux2fbranch}{}
385 \hypertarget{svux2fpseudocode_branch}{}
386 \input{tex_out/pseudocode_branch.tex}
387 \chapter{Fixed Point Compare pseudocode}
388 \hypertarget{svux2fpseudocode_comparefixed}{}
389 \input{tex_out/pseudocode_comparefixed.tex}
390 \chapter{Condition Register pseudocode}
391 \hypertarget{svux2fpseudocode_condition}{}
392 \input{tex_out/pseudocode_condition.tex}
393
394 \chapter{Fixed Point Arithmetic pseudocode}
395 \hypertarget{svux2fpseudocode_fixedarith}{}
396 \input{tex_out/pseudocode_fixedarith.tex}
397 \chapter{Fixed Point Load pseudocode}
398 \hypertarget{svux2fpseudocode_fixedload}{}
399 \input{tex_out/pseudocode_fixedload.tex}
400 \chapter{Fixed Point Logical pseudocode}
401 \hypertarget{svux2fpseudocode_fixedlogical}{}
402 \input{tex_out/pseudocode_fixedlogical.tex}
403 \chapter{Fixed Point Rotate pseudocode}
404 \hypertarget{svux2fpseudocode_fixedshift}{}
405 \input{tex_out/pseudocode_fixedshift.tex}
406
407 \chapter{Fixed Point Store pseudocode}
408 \hypertarget{svux2fpseudocode_fixedstore}{}
409 \input{tex_out/pseudocode_fixedstore.tex}
410 \chapter{Fixed Point Trap pseudocode}
411 \hypertarget{svux2fpseudocode_fixedtrap}{}
412 \input{tex_out/pseudocode_fixedtrap.tex}
413 \chapter{Special Purpose Register pseudocode}
414 \hypertarget{svux2fpseudocode_sprset}{}
415 \input{tex_out/pseudocode_sprset.tex}
416 \chapter{String Load/Store pseudocode}
417 \hypertarget{svux2fpseudocode_stringldst}{}
418 \input{tex_out/pseudocode_stringldst.tex}
419 \chapter{System Call pseudocode}
420 \hypertarget{svux2fpseudocode_system}{}
421 \input{tex_out/pseudocode_system.tex}
422
423 \chapter{Floating Point Load pseudocode}
424 \hypertarget{svux2fpseudocode_fpload}{}
425 \input{tex_out/pseudocode_fpload.tex}
426 \chapter{Floating Point Store pseudocode}
427 \hypertarget{svux2fpseudocode_fpstore}{}
428 \input{tex_out/pseudocode_fpstore.tex}
429 \chapter{Floating Point Move pseudocode}
430 \hypertarget{svux2fpseudocode_fpmove}{}
431 \input{tex_out/pseudocode_fpmove.tex}
432 \chapter{Floating Point Arithmetic pseudocode}
433 \hypertarget{svux2fpseudocode_fparith}{}
434 \input{tex_out/pseudocode_fparith.tex}
435 \chapter{Floating Point Integer Conversion pseudocode}
436 \hypertarget{svux2fpseudocode_fpcvt}{}
437 \input{tex_out/pseudocode_fpcvt.tex}
438
439 \end{appendices}
440
441
442
443
444 \end{document}