(no commit message)

[libreriscv.git] / ztrans_proposal.mdwn
diff --git a/ztrans_proposal.mdwn b/ztrans_proposal.mdwn

index 7610755be5fe9d92f648716350f6cc726cf0a1d1..ac805a542f6cbb75a345a09be9bebd422294c1ed 100644 (file)
--- a/ztrans_proposal.mdwn
+++ b/ztrans_proposal.mdwn
@@ -1,5 +1,11 @@
+[[!tag standards]]
+
  # Zftrans - transcendental operations
  
+Summary:
+
+*This proposal extends RISC-V scalar floating point operations to add IEEE754 transcendental functions (pow, log etc) and trigonometric functions (sin, cos etc). These functions are also 98% shared with the Khronos Group OpenCL Extended Instruction Set.*
+
  With thanks to:
  
  * Jacob Lifshay
@@ -54,7 +60,7 @@ Minimum recommended requirements for Mobile-Embedded 3D: Ztrignpi, Zftrans, with
  
  This proposal is designed to meet a wide range of extremely diverse needs,
  allowing implementors from all of them to benefit from the tools and hardware
-cost reductions associated with common standards adoption.
+cost reductions associated with common standards adoption in RISC-V (primarily IEEE754 and Vulkan).
  
  **There are *four* different, disparate platform's needs (two new)**:
  
@@ -210,11 +216,14 @@ Any deviation from Trademarked Standards means that an implementation
  may not be sold and also make a claim of being, for example, "Vulkan
  compatible".
  
-This in turn reinforces and makes a hard requirement a need for public
+For 3D, this in turn reinforces and makes a hard requirement a need for public
  compliance with such standards, over-and-above what would otherwise be
  set by a RISC-V Standards Development Process, including both the
  software compliance and the knock-on implications that has for hardware.
  
+For libraries such as libm and numpy, accuracy is paramount, for software  interoperability across multiple platforms. Some algorithms critically rely on correct IEEE754, for example.
+The conflicting accuracy requirements can be met through the zfpacc extension.
+
  **Collaboration**:
  
  The case for collaboration on any Extension is already well-known.
@@ -312,8 +321,11 @@ numbers of potential opcodes.  BitManip is the perfect counter-example.
  
  # Proposed Opcodes vs Khronos OpenCL vs IEEE754-2019<a name="khronos_equiv"></a>
  
-This list shows the (direct) equivalence between proposed opcodes and
-their Khronos OpenCL equivalents.
+This list shows the (direct) equivalence between proposed opcodes,
+their Khronos OpenCL equivalents, and their IEEE754-2019 equivalents.
+98% of the opcodes in this proposal that are in the IEEE754-2019 standard
+are present in the Khronos Extended Instruction Set.
+
  For RISCV opcode encodings see 
  [[rv_major_opcode_1010011]]
  
@@ -363,7 +375,7 @@ FATANH   | atanh       | NONE        | NONE          | NONE        | atanh    |
  FATAN2   | atan2       | NONE        | NONE          | NONE        | atan2    |
  FATAN2PI | atan2pi     | NONE        | NONE          | NONE        | atan2pi  |
  FRSQRT   | rsqrt       | half\_rsqrt | native\_rsqrt | NONE        | rSqrt    |
-FCBRT    | cbrt        | NONE        | NONE          | NONE        | NONE (4) |
+FCBRT    | cbrt        | NONE        | NONE          | NONE        | NONE (2) |
  FEXP2    | exp2        | half\_exp2  | native\_exp2  | NONE        | exp2     |
  FLOG2    | log2        | half\_log2  | native\_log2  | NONE        | log2     |
  FEXPM1   | expm1       | NONE        | NONE          | NONE        | expm1    |
@@ -373,27 +385,23 @@ FLOG     | log         | half\_log   | native\_log   | NONE        | log      |
  FEXP10   | exp10       | half\_exp10 | native\_exp10 | NONE        | exp10    |
  FLOG10   | log10       | half\_log10 | native\_log10 | NONE        | log10    |
  FPOW     | pow         | NONE        | NONE          | NONE        | pow      |
+FPOWN    | pown        | NONE        | NONE          | NONE        | pown     |
+FPOWR    | powr        | half\_powr  | native\_powr  | NONE        | powr     |
  FROOTN   | rootn       | NONE        | NONE          | NONE        | rootn    |
  FHYPOT   | hypot       | NONE        | NONE          | NONE        | hypot    |
-FRECIP   | NONE        | half\_recip | native\_recip | NONE        | NONE (5) |
+FRECIP   | NONE        | half\_recip | native\_recip | NONE        | NONE (3) |
  NONE     | NONE        | NONE        | NONE          | NONE        | compound |
  NONE     | NONE        | NONE        | NONE          | NONE        | exp2m1   |
  NONE     | NONE        | NONE        | NONE          | NONE        | exp10m1  |
  NONE     | NONE        | NONE        | NONE          | NONE        | log2p1   |
  NONE     | NONE        | NONE        | NONE          | NONE        | log10p1  |
-NONE     | NONE        | NONE        | NONE          | NONE        | pown (2) |
-NONE     | NONE        | NONE        | NONE          | NONE        | powr (3) |
  """]]
  
  Note (1) FSINCOS is macro-op fused (see below).
  
-Note (2) IEEE754-2019 pown(x, n) - n is an integer
-
-Note (3) IEEE754-2019 powr(x, y) is defined as "exp(y log (x))"
+Note (2) synthesised in IEEE754-2019 as "pown(x, 3)"
  
-Note (4) synthesised in IEEE754-2019 as "pown(x, 3)"
-
-Note (5) synthesised in IEEE754-2019 using "1.0 / x"
+Note (3) synthesised in IEEE754-2019 using "1.0 / x"
  
  ## List of 2-arg opcodes
  
@@ -402,6 +410,8 @@ opcode    | Description            | pseudocode                 | Extension   |
  FATAN2    | atan2 arc tangent      | rd = atan2(rs2, rs1)       | Zarctrignpi |
  FATAN2PI  | atan2 arc tangent / pi | rd = atan2(rs2, rs1) / pi  | Zarctrigpi  |
  FPOW      | x power of y           | rd = pow(rs1, rs2)         | ZftransAdv  |
+FPOWN     | x power of n (n int)   | rd = pow(rs1, rs2)         | ZftransAdv  |
+FPOWR     | x power of y (x +ve)   | rd = exp(rs1 log(rs2))     | ZftransAdv  |
  FROOTN    | x power 1/n (n integer)| rd = pow(rs1, 1/rs2)       | ZftransAdv  |
  FHYPOT    | hypotenuse             | rd = sqrt(rs1^2 + rs2^2)   | ZftransAdv  |
  """]]
@@ -435,7 +445,6 @@ FACOS       | arccos (radians)         | rd = acos(rs1)          | Zarctrignpi |
  FATAN       | arctan (radians)         | rd = atan(rs1)          | Zarctrignpi |
  FSINPI      | sin times pi             | rd = sin(pi * rs1)      | Ztrigpi |
  FCOSPI      | cos times pi             | rd = cos(pi * rs1)      | Ztrigpi |
-
  FTANPI      | tan times pi             | rd = tan(pi * rs1)      | Ztrigpi |
  FASINPI     | arcsin / pi              | rd = asin(rs1) / pi     | Zarctrigpi |
  FACOSPI     | arccos / pi              | rd = acos(rs1) / pi     | Zarctrigpi |
@@ -556,7 +565,7 @@ HPC and high-end GPUs are likely markets for these.
  
  ### ZftransAdv
  
-CBRT, POW, ROOTN
+CBRT, POW, POWN, POWR, ROOTN
  
  These are simply much more complex to implement in hardware, and typically
  will only be put into HPC applications.