From: Luke Kenneth Casson Leighton Date: Fri, 24 Feb 2023 21:21:19 +0000 (+0000) Subject: add comments and column for MyISA 66000 transcendentals, X-Git-Tag: opf_rfc_ls001_v3~209 X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=59663eaaedd93c1257cc734eae3a129dd52a95b2;p=libreriscv.git add comments and column for MyISA 66000 transcendentals, with thanks to Mitch Alsup --- diff --git a/openpower/transcendentals.mdwn b/openpower/transcendentals.mdwn index 5dcfb1f9e..be09348bd 100644 --- a/openpower/transcendentals.mdwn +++ b/openpower/transcendentals.mdwn @@ -134,68 +134,79 @@ IEEE754-2019 Table 9.1 lists "additional mathematical operations". Interestingly the only functions missing when compared to OpenCL are compound, exp2m1, exp10m1, log2p1, log10p1, pown (integer power) and powr. -|opcode |OpenCL FP32|OpenCL FP16|OpenCL native|IEEE754 |Power ISA | -|------------|-----------|-----------|-------------|-------------- |------------------------| -|fsin |sin |half\_sin |native\_sin |sin |NONE | -|fcos |cos |half\_cos |native\_cos |cos |NONE | -|ftan |tan |half\_tan |native\_tan |tan |NONE | -|NONE (1) |sincos |NONE |NONE |NONE |NONE | -|fasin |asin |NONE |NONE |asin |NONE | -|facos |acos |NONE |NONE |acos |NONE | -|fatan |atan |NONE |NONE |atan |NONE | -|fsinpi |sinpi |NONE |NONE |sinPi |NONE | -|fcospi |cospi |NONE |NONE |cosPi |NONE | -|ftanpi |tanpi |NONE |NONE |tanPi |NONE | -|fasinpi |asinpi |NONE |NONE |asinPi |NONE | -|facospi |acospi |NONE |NONE |acosPi |NONE | -|fatanpi |atanpi |NONE |NONE |atanPi |NONE | -|fsinh |sinh |NONE |NONE |sinh |NONE | -|fcosh |cosh |NONE |NONE |cosh |NONE | -|ftanh |tanh |NONE |NONE |tanh |NONE | -|fasinh |asinh |NONE |NONE |asinh |NONE | -|facosh |acosh |NONE |NONE |acosh |NONE | -|fatanh |atanh |NONE |NONE |atanh |NONE | -|fatan2 |atan2 |NONE |NONE |atan2 |NONE | -|fatan2pi |atan2pi |NONE |NONE |atan2pi |NONE | -|frsqrt |rsqrt |half\_rsqrt|native\_rsqrt|rSqrt |fsqrte, fsqrtes (4) | -|fcbrt |cbrt |NONE |NONE |NONE (2) |NONE | -|fexp2 |exp2 |half\_exp2 |native\_exp2 |exp2 |NONE | -|flog2 |log2 |half\_log2 |native\_log2 |log2 |NONE | -|fexpm1 |expm1 |NONE |NONE |expm1 |NONE | -|flog1p |log1p |NONE |NONE |logp1 |NONE | -|fexp |exp |half\_exp |native\_exp |exp |NONE | -|flog |log |half\_log |native\_log |log |NONE | -|fexp10 |exp10 |half\_exp10|native\_exp10|exp10 |NONE | -|flog10 |log10 |half\_log10|native\_log10|log10 |NONE | -|fpow |pow |NONE |NONE |pow |NONE | -|fpown |pown |NONE |NONE |pown |NONE | -|fpowr |powr |half\_powr |native\_powr |powr |NONE | -|frootn |rootn |NONE |NONE |rootn |NONE | -|fhypot |hypot |NONE |NONE |hypot |NONE | -|frecip |NONE |half\_recip|native\_recip|NONE (3) |fre, fres (4) | -|NONE |NONE |NONE |NONE |compound |NONE | -|fexp2m1 |NONE |NONE |NONE |exp2m1 |NONE | -|fexp10m1 |NONE |NONE |NONE |exp10m1 |NONE | -|flog2p1 |NONE |NONE |NONE |log2p1 |NONE | -|flog10p1 |NONE |NONE |NONE |log10p1 |NONE | -|fminnum08 |fmin |fmin |NONE |minNum |xsmindp (5) | -|fmaxnum08 |fmax |fmax |NONE |maxNum |xsmaxdp (5) | -|fmin19 |fmin |fmin |NONE |minimum |NONE | -|fmax19 |fmax |fmax |NONE |maximum |NONE | -|fminnum19 |fmin |fmin |NONE |minimumNumber |vminfp (6), xsminjdp (5)| -|fmaxnum19 |fmax |fmax |NONE |maximumNumber |vmaxfp (6), xsmaxjdp (5)| -|fminc |fmin |fmin |NONE |NONE |xsmincdp (5) | -|fmaxc |fmax |fmax |NONE |NONE |xsmaxcdp (5) | -|fminmagnum08|minmag |minmag |NONE |minNumMag |NONE | -|fmaxmagnum08|maxmag |maxmag |NONE |maxNumMag |NONE | -|fminmag19 |minmag |minmag |NONE |minimumMagnitude |NONE | -|fmaxmag19 |maxmag |maxmag |NONE |maximumMagnitude |NONE | -|fminmagnum19|minmag |minmag |NONE |minimumMagnitudeNumber|NONE | -|fmaxmagnum19|maxmag |maxmag |NONE |maximumMagnitudeNumber|NONE | -|fminmagc |minmag |minmag |NONE |NONE |NONE | -|fmaxmagc |maxmag |maxmag |NONE |NONE |NONE | -|fmod |fmod |fmod | |NONE |NONE | -|fremainder |remainder |remainder | |remainder |NONE | +|opcode |OpenCL FP32|OpenCL FP16|OpenCL native|IEEE754 |Power ISA |My 66000 ISA | +|------------|-----------|-----------|-------------|-------------- |------------------------|-------------| +|fsin |sin |half\_sin |native\_sin |sin |NONE |sin | +|fcos |cos |half\_cos |native\_cos |cos |NONE |cos | +|ftan |tan |half\_tan |native\_tan |tan |NONE |tan | +|NONE (1) |sincos |NONE |NONE |NONE |NONE | | +|fasin |asin |NONE |NONE |asin |NONE |asin | +|facos |acos |NONE |NONE |acos |NONE |acos | +|fatan |atan |NONE |NONE |atan |NONE |atan | +|fsinpi |sinpi |NONE |NONE |sinPi |NONE |sinpi | +|fcospi |cospi |NONE |NONE |cosPi |NONE |cospi | +|ftanpi |tanpi |NONE |NONE |tanPi |NONE |tanpi | +|fasinpi |asinpi |NONE |NONE |asinPi |NONE |asinpi | +|facospi |acospi |NONE |NONE |acosPi |NONE |acospi | +|fatanpi |atanpi |NONE |NONE |atanPi |NONE |atanpi | +|fsinh |sinh |NONE |NONE |sinh |NONE | | +|fcosh |cosh |NONE |NONE |cosh |NONE | | +|ftanh |tanh |NONE |NONE |tanh |NONE | | +|fasinh |asinh |NONE |NONE |asinh |NONE | | +|facosh |acosh |NONE |NONE |acosh |NONE | | +|fatanh |atanh |NONE |NONE |atanh |NONE | | +|fatan2 |atan2 |NONE |NONE |atan2 |NONE |atan2 | +|fatan2pi |atan2pi |NONE |NONE |atan2pi |NONE |atan2pi | +|frsqrt |rsqrt |half\_rsqrt|native\_rsqrt|rSqrt |fsqrte, fsqrtes (4) |rsqrt | +|fcbrt |cbrt |NONE |NONE |NONE (2) |NONE | | +|fexp2 |exp2 |half\_exp2 |native\_exp2 |exp2 |NONE |exp2 | +|flog2 |log2 |half\_log2 |native\_log2 |log2 |NONE |ln2 | +|fexpm1 |expm1 |NONE |NONE |expm1 |NONE |expm1 | +|flog1p |log1p |NONE |NONE |logp1 |NONE |logp1 | +|fexp |exp |half\_exp |native\_exp |exp |NONE |exp | +|flog |log |half\_log |native\_log |log |NONE |ln | +|fexp10 |exp10 |half\_exp10|native\_exp10|exp10 |NONE |exp10 | +|flog10 |log10 |half\_log10|native\_log10|log10 |NONE |log | +|fpow |pow |NONE |NONE |pow |NONE |pow | +|fpown |pown |NONE |NONE |pown |NONE | | +|fpowr |powr |half\_powr |native\_powr |powr |NONE | | +|frootn |rootn |NONE |NONE |rootn |NONE | | +|fhypot |hypot |NONE |NONE |hypot |NONE | | +|frecip |NONE |half\_recip|native\_recip|NONE (3) |fre, fres (4) |rcp | +|NONE |NONE |NONE |NONE |compound |NONE | | +|fexp2m1 |NONE |NONE |NONE |exp2m1 |NONE |exp2m1 | +|fexp10m1 |NONE |NONE |NONE |exp10m1 |NONE |exp10m1 | +|flog2p1 |NONE |NONE |NONE |log2p1 |NONE |ln2p1 | +|flog10p1 |NONE |NONE |NONE |log10p1 |NONE |logp1 | +|fminnum08 |fmin |fmin |NONE |minNum |xsmindp (5) | | +|fmaxnum08 |fmax |fmax |NONE |maxNum |xsmaxdp (5) | | +|fmin19 |fmin |fmin |NONE |minimum |NONE |fmin | +|fmax19 |fmax |fmax |NONE |maximum |NONE |fmax | +|fminnum19 |fmin |fmin |NONE |minimumNumber |vminfp (6), xsminjdp (5)| | +|fmaxnum19 |fmax |fmax |NONE |maximumNumber |vmaxfp (6), xsmaxjdp (5)| | +|fminc |fmin |fmin |NONE |NONE |xsmincdp (5) |fmin* | +|fmaxc |fmax |fmax |NONE |NONE |xsmaxcdp (5) |fmax* | +|fminmagnum08|minmag |minmag |NONE |minNumMag |NONE | | +|fmaxmagnum08|maxmag |maxmag |NONE |maxNumMag |NONE | | +|fminmag19 |minmag |minmag |NONE |minimumMagnitude |NONE | | +|fmaxmag19 |maxmag |maxmag |NONE |maximumMagnitude |NONE | | +|fminmagnum19|minmag |minmag |NONE |minimumMagnitudeNumber|NONE | | +|fmaxmagnum19|maxmag |maxmag |NONE |maximumMagnitudeNumber|NONE | | +|fminmagc |minmag |minmag |NONE |NONE |NONE | | +|fmaxmagc |maxmag |maxmag |NONE |NONE |NONE | | +|fmod |fmod |fmod | |NONE |NONE | | +|fremainder |remainder |remainder | |remainder |NONE | | + + from Mitch Alsup: + +* Brian's LLVM compiler converts fminc and fmaxc into fmin and fmax instructions +These are all IEEE 754-2019 compliant +These are native instructions not extensions +All listed functions are available in both F32 and F64 formats. +THere is some confusion (in my head) abouot fmin and fmax. I intend both instruction to perform 754-2019 semantics-- +but I don know if this is minimum/maximum or minimumNumber/maximumNumber. +fmad and remainder are a 2-instruction sequence--don't know how to "edit it in" + Note (1) fsincos is macro-op fused (see below). @@ -434,6 +445,9 @@ provided by GPUs for 3D, warranting their own subset. (programmerjake: actually, all other GPU ISAs mentioned in this document have sinpi/cospi or equivalent, and often not sin/cos, because sinpi/cospi are actually *waay* easier to implement because range reduction is simply a bitwise mask, whereas for sin/cos range reduction is a full division by pi) +(Mitch: My patent USPTO 10,761,806 shows that the above statement is no longer true.) + + In the case of the Ztrigpi subset, these are commonly used in for loops with a power of two number of subdivisions, and the cost of multiplying by PI inside each loop (or cumulative addition, resulting in cumulative