remove atomics from spec, not enough research done yet
[libreriscv.git] / openpower / simple_v_spec.tex
1 \documentclass[oneside]{book}
2 \usepackage{lmodern}
3 \usepackage{amssymb,amsmath}
4 \usepackage{lscape}
5 \usepackage{sectsty}
6 \usepackage{appendix}
7 \usepackage{graphicx}
8 \usepackage[firstpage]{draftwatermark}
9 \usepackage[printonlyused,withpage]{acronym}
10 \usepackage{float}
11 \usepackage{url}
12 \usepackage[useregional]{datetime2}
13 \usepackage{anyfontsize}
14 \usepackage{ifxetex,ifluatex}
15 \usepackage{fixltx2e} % provides \textsubscript
16 \ifnum 0\ifxetex 1\fi\ifluatex 1\fi=0 % if pdftex
17 \usepackage[T1]{fontenc}
18 \usepackage[utf8]{inputenc}
19 \else % if luatex or xelatex
20 \ifxetex
21 \usepackage{mathspec}
22 \else
23 \usepackage{fontspec}
24 \fi
25 \defaultfontfeatures{Ligatures=TeX,Scale=MatchLowercase}
26 \fi
27 % use upquote if available, for straight quotes in verbatim environments
28 \IfFileExists{upquote.sty}{\usepackage{upquote}}{}
29 % use microtype if available
30 \IfFileExists{microtype.sty}{%
31 \usepackage[]{microtype}
32 \UseMicrotypeSet[protrusion]{basicmath} % disable protrusion for tt fonts
33 }{}
34 \PassOptionsToPackage{hyphens}{url} % url is loaded by hyperref
35 \usepackage[unicode=true]{hyperref}
36 \hypersetup{colorlinks=true,
37 linkcolor=blue,
38 filecolor=cyan,
39 urlcolor=magenta,
40 breaklinks=true}
41 \usepackage[margin=0.9in]{geometry}
42 \usepackage{color}
43 \usepackage{fancyvrb}
44 \newcommand{\VerbBar}{|}
45 \newcommand{\VERB}{\Verb[commandchars=\\\{\}]}
46 \DefineVerbatimEnvironment{Highlighting}{Verbatim}{commandchars=\\\{\},xleftmargin=5mm}
47 % Add ',fontsize=\small' for more characters per line
48 \newenvironment{Shaded}{}{}
49 \newcommand{\KeywordTok}[1]{\textcolor[rgb]{0.00,0.44,0.13}{\textbf{#1}}}
50 \newcommand{\DataTypeTok}[1]{\textcolor[rgb]{0.56,0.13,0.00}{#1}}
51 \newcommand{\DecValTok}[1]{\textcolor[rgb]{0.25,0.63,0.44}{#1}}
52 \newcommand{\BaseNTok}[1]{\textcolor[rgb]{0.25,0.63,0.44}{#1}}
53 \newcommand{\FloatTok}[1]{\textcolor[rgb]{0.25,0.63,0.44}{#1}}
54 \newcommand{\ConstantTok}[1]{\textcolor[rgb]{0.53,0.00,0.00}{#1}}
55 \newcommand{\CharTok}[1]{\textcolor[rgb]{0.25,0.44,0.63}{#1}}
56 \newcommand{\SpecialCharTok}[1]{\textcolor[rgb]{0.25,0.44,0.63}{#1}}
57 \newcommand{\StringTok}[1]{\textcolor[rgb]{0.25,0.44,0.63}{#1}}
58 \newcommand{\VerbatimStringTok}[1]{\textcolor[rgb]{0.25,0.44,0.63}{#1}}
59 \newcommand{\SpecialStringTok}[1]{\textcolor[rgb]{0.73,0.40,0.53}{#1}}
60 \newcommand{\ImportTok}[1]{#1}
61 \newcommand{\CommentTok}[1]{\textcolor[rgb]{0.38,0.63,0.69}{\textit{#1}}}
62 \newcommand{\DocumentationTok}[1]{\textcolor[rgb]{0.73,0.13,0.13}{\textit{#1}}}
63 \newcommand{\AnnotationTok}[1]{\textcolor[rgb]{0.38,0.63,0.69}{\textbf{\textit{#1}}}}
64 \newcommand{\CommentVarTok}[1]{\textcolor[rgb]{0.38,0.63,0.69}{\textbf{\textit{#1}}}}
65 \newcommand{\OtherTok}[1]{\textcolor[rgb]{0.00,0.44,0.13}{#1}}
66 \newcommand{\FunctionTok}[1]{\textcolor[rgb]{0.02,0.16,0.49}{#1}}
67 \newcommand{\VariableTok}[1]{\textcolor[rgb]{0.10,0.09,0.49}{#1}}
68 \newcommand{\ControlFlowTok}[1]{\textcolor[rgb]{0.00,0.44,0.13}{\textbf{#1}}}
69 \newcommand{\OperatorTok}[1]{\textcolor[rgb]{0.40,0.40,0.40}{#1}}
70 \newcommand{\BuiltInTok}[1]{#1}
71 \newcommand{\ExtensionTok}[1]{#1}
72 \newcommand{\PreprocessorTok}[1]{\textcolor[rgb]{0.74,0.48,0.00}{#1}}
73 \newcommand{\AttributeTok}[1]{\textcolor[rgb]{0.49,0.56,0.16}{#1}}
74 \newcommand{\RegionMarkerTok}[1]{#1}
75 \newcommand{\InformationTok}[1]{\textcolor[rgb]{0.38,0.63,0.69}{\textbf{\textit{#1}}}}
76 \newcommand{\WarningTok}[1]{\textcolor[rgb]{0.38,0.63,0.69}{\textbf{\textit{#1}}}}
77 \newcommand{\AlertTok}[1]{\textcolor[rgb]{1.00,0.00,0.00}{\textbf{#1}}}
78 \newcommand{\ErrorTok}[1]{\textcolor[rgb]{1.00,0.00,0.00}{\textbf{#1}}}
79 \newcommand{\NormalTok}[1]{#1}
80
81 % indent all verbatim
82 \catcode`\@=11
83 \let \saveverbatime \@xverbatim
84 \def \@xverbatim {\leftskip = 1cm\relax\saveverbatime}
85 \catcode`\@=12
86
87 \usepackage{longtable,booktabs}
88 % Fix footnotes in tables (requires footnote package)
89 \IfFileExists{footnote.sty}{\usepackage{footnote}\makesavenoteenv{long
90 table}}{}
91 \IfFileExists{parskip.sty}{%
92 \usepackage{parskip}
93 }{% else
94 \setlength{\parindent}{0pt}
95 \setlength{\parskip}{6pt plus 2pt minus 1pt}
96 }
97 \setlength{\emergencystretch}{3em} % prevent overfull lines
98 \providecommand{\tightlist}{%
99 \setlength{\itemsep}{0pt}\setlength{\parskip}{0pt}}
100 \setcounter{secnumdepth}{5}
101 % Redefines (sub)paragraphs to behave more like sections
102 \ifx\paragraph\undefined\else
103 \let\oldparagraph\paragraph
104 \renewcommand{\paragraph}[1]{\oldparagraph{#1}\mbox{}}
105 \fi
106 \ifx\subparagraph\undefined\else
107 \let\oldsubparagraph\subparagraph
108 \renewcommand{\subparagraph}[1]{\oldsubparagraph{#1}\mbox{}}
109 \fi
110
111 % set default figure placement to htbp
112 \makeatletter\@addtoreset{chapter}{part}\makeatother%
113 \def\fps@figure{htbp}
114 \makeatother
115
116 % graphics path for primer
117 \graphicspath{ {svp64-primer/img/} }
118
119 \date{}
120
121 \begin{document}
122
123 \chapter*{Preamble}
124 \addcontentsline{toc}{chapter}{Preamble} \markboth{INTRODUCTION}{}
125
126 \textbf{Last modified date: \today}
127
128 This document is an auto-generated version of the Draft SVP64
129 Specification available at
130
131 \begin{verbatim}
132 https://libre-soc.org/openpower/sv
133 \end{verbatim}
134
135 for which the source code is available at
136
137 \begin{verbatim}
138 https://git.libre-soc.org/?p=libreriscv.git;a=tree;f=openpower;hb=HEAD
139 \end{verbatim}
140
141 This PDF may be created with "make pdf" from the following file:
142
143 \begin{verbatim}
144 https://git.libre-soc.org/?p=libreriscv.git;a=blob;f=openpower/Makefile;hb=HEAD
145 \end{verbatim}
146
147 by executing the following commands:
148
149 \begin{verbatim}
150 git clone https://git.libre-soc.org/git/libreriscv.git libresoc
151 cd libresoc/libresoc/openpower
152 make pdf
153 \end{verbatim}
154
155 Simple-V Cray-style Vectors have been developed by the Libre-SOC Team,
156 sponsored by the NLnet Foundation and NGI POINTER under
157 EU Grants 871528 and 957073.
158
159 Simple-V is in DRAFT Status and will be submitted publicly
160 (non-confidentially) through the OPF ISA WG "External Submissions"
161 Process. Funding from NLnet, through their Privacy and Enhanced Trust
162 Programme, requires full transparency.
163
164 As this document is under continuous rapid revision please check frequently
165 at:
166
167 \begin{verbatim}
168 https://ftp.libre-soc.org/simple_v_spec.pdf
169 \end{verbatim}
170
171 \subsection*{Contacts}
172 For questions, comments, and clarification, please contact the following:
173 \begin{itemize}
174 \itemsep -0.3em
175 \item Libre-SOC ISA Dev Mailing List - libre-soc-isa@lists.libre-soc.org
176 \item Luke Kenneth Casson Leighton - Libre-SOC team lead and Red
177 Semiconductor Ltd Director - lkcl@lkcl.net
178 \item David Calderwood - Red Semiconductor Ltd Director -
179 djac@calderwoodhan.com
180 \item Toshaan Bharvani - OpenPOWER Foundation Technical Chair, VanTosh
181 Director - toshaan@vantosh.com
182 \item Konstantinos Margaritis - Engineer and Founder of VectorCamp, writing optimised assembler for a number of SIMD/Vector ISAs - konstantinos@vectorcamp.gr
183 \item Dmitry Selyutin - Libre-SOC engineer, working on binutils SVP64 assembler - ghostmansd@gmail.com
184 \item Jacob Lifshay - Libre-SOC engineer, CPU arch and verification - programmerjake@gmail.com
185 \item Cesar Strauss - Libre-SOC engineer, CPU arch and verification - cestrauss@gmail.com
186 \item Andrey Miroshnikov - Libre-SOC engineer, assisting with documentation - andrey@technepisteme.xyz
187 \end{itemize}
188
189 \newpage
190 \subsection*{Executive Summary}
191 \hypertarget{svux2fexecutive_summary}{}
192 \input{tex_out/executive_summary.tex}
193
194 \newpage
195 \begin{landscape}
196 \addcontentsline{toc}{chapter}{Comparison Table} \markboth{INTRODUCTION}{}
197 \hypertarget{svux2fcomparison_table}{}
198 {
199 \fontsize{6}{8}\selectfont
200 \input{tex_out/comparison_table.tex}
201 }
202 \end{landscape}
203
204 \part{Scalable Vectors Primer}
205 \input{svp64-primer/acronyms}
206 %\chapter*{Executive Summary}
207 \include{svp64-primer/summary}
208 \bibliography{svp64-primer/references}
209 \bibliographystyle{ieeetr}
210
211 \tableofcontents
212
213 % Part II
214 \part{Scalable Vectors for the Power ISA}
215
216
217 \chapter{Fields and Forms}
218 \hypertarget{svux2ffields}{}
219 \input{tex_out/fields.tex}
220 \chapter{Scalable Vectors for the Power ISA}
221 \hypertarget{svux2fscalvecpowisa}{}
222 \hypertarget{SVux7csv}{}
223 \input{tex_out/sv.tex}
224 \chapter{Other Vector ISAs}\hypertarget{svux2fvector_isa_comparison}{}
225 \input{tex_out/vector_isas.tex}
226 \chapter{Overview}\hypertarget{svux2foverview}{}
227 \input{tex_out/overview.tex}
228 \chapter{Compliancy Levels}\hypertarget{svux2fcompliancy_levels}{}
229 \input{tex_out/compliancy_levels.tex}
230 \chapter{SVP64}\hypertarget{svux2fsvp64}{}
231 \input{tex_out/svp64.tex}
232 \chapter{SPRs}\hypertarget{svux2fsprs}{}
233 \input{tex_out/sprs.tex}
234 \chapter{Arithmetic Mode}\hypertarget{svux2fnormal}{}
235 \input{tex_out/normal.tex}
236 \chapter{Load/Store Mode}\hypertarget{svux2fldst}{}
237 \input{tex_out/ldst.tex}
238 \chapter{Condition Register Fields Mode}\hypertarget{svux2fcr_ops}{}
239 \input{tex_out/cr_ops.tex}
240 \chapter{Branch Mode}\hypertarget{svux2fbranches}{}
241 \input{tex_out/branches.tex}
242 \chapter{setvl instruction}\hypertarget{svux2fsetvl}{}
243 \input{tex_out/setvl.tex}
244 \chapter{svstep instruction}\hypertarget{svux2fsvstep}{}
245 \input{tex_out/svstep.tex}
246 \chapter{REMAP subsystem}\hypertarget{svux2fremap}{}
247 \input{tex_out/remap.tex}
248 \chapter{Swizzle Move}\hypertarget{svux2fmv.swizzle}{}
249 \input{tex_out/mv_swizzle.tex}
250 \chapter{Pack / Unpack}\hypertarget{svux2fmv.vec}{}
251 \input{tex_out/mv_vec.tex}
252
253 \begin{appendices}
254 \chapter{SVP64 Appendix}\hypertarget{svp64ux2fappendix}{}
255 \hypertarget{svux2fsvp64ux2fappendix}{}
256 \input{tex_out/svp64_appendix.tex}
257 \chapter{SVP64 Quirks}\hypertarget{svux2fsvp64_quirks}{}
258 \input{tex_out/svp64_quirks.tex}
259 \chapter{REMAP algorithms}\hypertarget{svux2fremapux2fappendix}{}
260 \input{tex_out/remap_appendix.tex}
261 \chapter{Simple-V pseudocode}\hypertarget{svux2fpseudocode_simplev}{}
262 \input{tex_out/pseudocode_simplev.tex}
263 \chapter{Simple-V Analysis}\hypertarget{svux2fsv_analysis}{}
264 \input{tex_out/sv_analysis.tex}
265
266 \chapter{SVP64 Augmentation Table}\hypertarget{opcode_regs_deduped}{}
267 \begin{landscape}
268 {
269 \fontsize{7}{9}\selectfont
270 \input{tex_out/opcode_regs_deduped.tex}
271 }
272 \end{landscape}
273
274 \end{appendices}
275
276 % Part III
277 \part{Scalar Instructions}
278
279 \chapter*{Preamble}{}
280
281 As explained in the Simple-V introduction
282 these are all intentionally and specifically Scalar instructions.
283 Each section is free-standing, has no connection, dependence or
284 relationship to any other section, including no direct critical dependence
285 either way on Simple-V.
286 They have with almost no exceptions been specifically crafted to
287 have a justification for their inclusion in the Power ISA as Scalar
288 instructions purely on their own merit.
289
290 \begin{itemize}
291 \item The biginteger multiply-and-add instruction is similar
292 to Intel's mulx in that it produces a pair of results.
293 \item Javascript(tm) rounding is present in ARM as fjcvtzs
294 and would save an astounding 35 instructions with 5 branches.
295 \item Whilst there exist CR bit manipulation and copying
296 instructions there are no CR Field manipulation instructions,
297 putting pressure on GPRs if several CR fits need to be analysed.
298 \item one single instruction, bmask, is proposed that covers
299 the whole of x86 BMI1 and AMD TBM, combined, and provides more.
300 \end{itemize}
301
302 All of these have nothing to do with Simple-V at all: they make
303 the Power ISA better at modern general-purpose compute, bringing
304 it up-to-date.
305
306 That said: by a wonderful coincidence, should they be included, then
307 Simple-V's capabilities increase significantly. For example the CRweird
308 instructions combined with the bitmanip instructions, alongside
309 Vectorised Rc=1 turn CR Fields into
310 extremely powerful Predicate masks. bmask not only
311 covers the BMI and TBM instructions of Intel and AMD it also
312 includes the RVV set-before-first and set-after-first instructions.
313
314 The clean and clear separation between Vectorisation Prefix and Scalar
315 Suffix is what makes it possible for both Scalar-only and Scalable-Vectors
316 to benefit. It also makes proposal much easier, as there is no
317 inter-dependence.
318
319 It is however important to note that the rationale for these instructions
320 comes from a more general-purpose modern computing paradigm that is
321 outside of IBM's much more focussed and specialist traditional customer
322 base. We deeply respect IBM's curator role of the Power ISA of the past 25
323 years as much as we appreciate their courage in transferring that role
324 to the OpenPOWER Foundation ISA Working Group.
325
326 \chapter{SV Vector-assist Scalar ops}\hypertarget{svux2fvector_ops}{}
327 \input{tex_out/vector_ops.tex}
328 \chapter{CR Weird ops}\hypertarget{svux2fcr_int_predication}{}
329 \hypertarget{cr_int_predication}{}
330 \input{tex_out/cr_int_predication.tex}
331 \chapter{Bitmanip ops}\hypertarget{svux2fbitmanip}{}
332 \input{tex_out/bitmanip.tex}
333 \chapter{FP/Int Conversion ops}\hypertarget{svux2fint_fp_mv}{}
334 \input{tex_out/int_fp_mv.tex}
335 \chapter{FP Class ops}\hypertarget{svux2ffclass}{}
336 \input{tex_out/fclass.tex}
337 \chapter{Audio and Video Opcodes}\hypertarget{svux2fav_opcodes}{}
338 \hypertarget{av_opcodes}{}
339 \input{tex_out/av_opcodes.tex}
340 \chapter{Big Integer}\hypertarget{svux2fbiginteger}{}
341 \input{tex_out/big_integer.tex}
342 \chapter{Transcendentals}\hypertarget{transcendentals}{}
343 \input{tex_out/transcendentals.tex}
344 %\chapter{Acquire/Release Atomic Memory}\hypertarget{atomics}{}
345 %\input{tex_out/atomics.tex}
346
347 \begin{appendices}
348 \chapter{Big Integer Analysis}\hypertarget{svux2fbigintegerux2fanalysis}{}
349 \input{tex_out/big_integer_analysis.tex}
350 \chapter{Bitmanip pseudocode}\hypertarget{svux2fpseudocode_bitmanip}{}
351 \input{tex_out/pseudocode_bitmanip.tex}
352 \chapter{Floating Point pseudocode}\hypertarget{isaux2fsvfparith}{}
353 \input{tex_out/pseudocode_svfparith.tex}
354 \chapter{Fixed Point pseudocode}
355 \hypertarget{isaux2fsvfixedarith}{}
356 \input{tex_out/pseudocode_svfixedarith.tex}
357 \end{appendices}
358
359 % Part IV
360 \part{Scalar Power ISA pseudocode}
361 \backmatter % temporary fix for too many appenfices
362 %\setcounter{chapter}{0}
363 %\renewcommand{\thechapter}{\Alph{chapter}}
364
365 \chapter*{Preamble}
366 \addcontentsline{toc}{chapter}{Preamble} \markboth{INTRODUCTION}{}
367
368 This section contains updated pseudocode from the Power ISA Specification
369 v3.0B to be executable. Several bugfixes in Power ISA v3.0B have been
370 found and reported as a direct result due to actually running the
371 pseudocode as executable code in a Simulator.
372 A Formal Correctness Proof Research Paper written by Boris
373 Shingarov.
374
375 Additionally, with SVP64 performing element-width over-rides it is the
376 \textit{Scalar} pseudocode that needs adapting to variable-length
377 (\textbf{XLEN}). Maintaining duplicate identical copies in every
378 respect \textit{except} for an XLEN as part of the Simple-V Specification
379 is completely pointless and a waste of time: the updates to include
380 XLEN need to be part
381 of the Scalar Power ISA Specification. This has the added benefit
382 that it makes life much easier for 32-bit implementors, and has an
383 additional benefit of making it possible for the Scalar Power ISA
384 to extend to 128-bit in future (like RV128).
385
386 \begin{appendices}
387 \chapter{Binary Coded Decimal pseudocode}
388 \hypertarget{svux2fpseudocode_bcd}{}
389 \input{tex_out/pseudocode_bcd.tex}
390 \chapter{Branch pseudocode}
391 \hypertarget{openpowerux2fisaux2fbranch}{}
392 \hypertarget{svux2fpseudocode_branch}{}
393 \input{tex_out/pseudocode_branch.tex}
394 \chapter{Fixed Point Compare pseudocode}
395 \hypertarget{svux2fpseudocode_comparefixed}{}
396 \input{tex_out/pseudocode_comparefixed.tex}
397 \chapter{Condition Register pseudocode}
398 \hypertarget{svux2fpseudocode_condition}{}
399 \input{tex_out/pseudocode_condition.tex}
400
401 \chapter{Fixed Point Arithmetic pseudocode}
402 \hypertarget{svux2fpseudocode_fixedarith}{}
403 \input{tex_out/pseudocode_fixedarith.tex}
404 \chapter{Fixed Point Load pseudocode}
405 \hypertarget{svux2fpseudocode_fixedload}{}
406 \input{tex_out/pseudocode_fixedload.tex}
407 \chapter{Fixed Point Logical pseudocode}
408 \hypertarget{svux2fpseudocode_fixedlogical}{}
409 \input{tex_out/pseudocode_fixedlogical.tex}
410 \chapter{Fixed Point Rotate pseudocode}
411 \hypertarget{svux2fpseudocode_fixedshift}{}
412 \input{tex_out/pseudocode_fixedshift.tex}
413
414 \chapter{Fixed Point Store pseudocode}
415 \hypertarget{svux2fpseudocode_fixedstore}{}
416 \input{tex_out/pseudocode_fixedstore.tex}
417 \chapter{Fixed Point Trap pseudocode}
418 \hypertarget{svux2fpseudocode_fixedtrap}{}
419 \input{tex_out/pseudocode_fixedtrap.tex}
420 \chapter{Special Purpose Register pseudocode}
421 \hypertarget{svux2fpseudocode_sprset}{}
422 \input{tex_out/pseudocode_sprset.tex}
423 \chapter{String Load/Store pseudocode}
424 \hypertarget{svux2fpseudocode_stringldst}{}
425 \input{tex_out/pseudocode_stringldst.tex}
426 \chapter{System Call pseudocode}
427 \hypertarget{svux2fpseudocode_system}{}
428 \input{tex_out/pseudocode_system.tex}
429
430 \chapter{Floating Point Load pseudocode}
431 \hypertarget{svux2fpseudocode_fpload}{}
432 \input{tex_out/pseudocode_fpload.tex}
433 \chapter{Floating Point Store pseudocode}
434 \hypertarget{svux2fpseudocode_fpstore}{}
435 \input{tex_out/pseudocode_fpstore.tex}
436 \chapter{Floating Point Move pseudocode}
437 \hypertarget{svux2fpseudocode_fpmove}{}
438 \input{tex_out/pseudocode_fpmove.tex}
439 \chapter{Floating Point Arithmetic pseudocode}
440 \hypertarget{svux2fpseudocode_fparith}{}
441 \input{tex_out/pseudocode_fparith.tex}
442 \chapter{Floating Point Integer Conversion pseudocode}
443 \hypertarget{svux2fpseudocode_fpcvt}{}
444 \input{tex_out/pseudocode_fpcvt.tex}
445
446 \end{appendices}
447
448
449
450
451 \end{document}