From: Jacob Lifshay <programmerjake@gmail.com>
Date: Fri, 7 May 2021 07:15:54 +0000 (-0700)
Subject: working on adding sin_cos_pi
X-Git-Url: https://git.libre-soc.org/?p=vector-math.git;a=commitdiff_plain;h=ad217eb300ff1d610689c9ebe5b06e94b659a508;hp=211dd475c41a76b11e4c5b3dcf7fc5b2c0babaf9

working on adding sin_cos_pi
---

diff --git a/Cargo.toml b/Cargo.toml
index 74c1f2b..d83abb8 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -18,6 +18,7 @@ fma = ["std"]
 std = []
 ir = ["std", "typed-arena"]
 stdsimd = ["core_simd"]
+full_tests = []
 
 [workspace]
 members = [".", "vector-math-proc-macro"]
diff --git a/src/algorithms.rs b/src/algorithms.rs
index 61191a6..76907f4 100644
--- a/src/algorithms.rs
+++ b/src/algorithms.rs
@@ -1 +1,2 @@
 pub mod ilogb;
+pub mod trig_pi;
diff --git a/src/algorithms/trig_pi.rs b/src/algorithms/trig_pi.rs
new file mode 100644
index 0000000..28e67ef
--- /dev/null
+++ b/src/algorithms/trig_pi.rs
@@ -0,0 +1,180 @@
+use crate::traits::{Context, ConvertTo, Float};
+
+mod consts {
+    #![allow(clippy::excessive_precision)]
+
+    /// coefficients of taylor series for `sin(pi * x)` centered at `0`
+    /// generated using:
+    /// ```maxima,text
+    /// fpprec:50$
+    /// sinpi: bfloat(taylor(sin(%pi*x),x,0,19))$
+    /// for i: 1 step 2 thru 19 do
+    ///     printf(true, "pub(crate) const SINPI_KERNEL_TAYLOR_~d: f64 = ~a;~%", i, ssubst("e", "b", string(coeff(sinpi, x, i))))$
+    /// ```
+    pub(crate) const SINPI_KERNEL_TAYLOR_1: f64 =
+        3.1415926535897932384626433832795028841971693993751e0;
+    pub(crate) const SINPI_KERNEL_TAYLOR_3: f64 =
+        -5.1677127800499700292460525111835658670375480943142e0;
+    pub(crate) const SINPI_KERNEL_TAYLOR_5: f64 =
+        2.550164039877345443856177583695296720669172555234e0;
+    pub(crate) const SINPI_KERNEL_TAYLOR_7: f64 =
+        -5.9926452932079207688773938354604004601536358636814e-1;
+    pub(crate) const SINPI_KERNEL_TAYLOR_9: f64 =
+        8.2145886611128228798802365523698344807837460797753e-2;
+    pub(crate) const SINPI_KERNEL_TAYLOR_11: f64 =
+        -7.370430945714350777259089957290781501211638236021e-3;
+    pub(crate) const SINPI_KERNEL_TAYLOR_13: f64 =
+        4.6630280576761256442062891447027174382819981361599e-4;
+    pub(crate) const SINPI_KERNEL_TAYLOR_15: f64 =
+        -2.1915353447830215827384652057094188859248708765956e-5;
+    pub(crate) const SINPI_KERNEL_TAYLOR_17: f64 =
+        7.9520540014755127847832068624575890327682459384282e-7;
+    pub(crate) const SINPI_KERNEL_TAYLOR_19: f64 =
+        -2.2948428997269873110203872385571587856074785581088e-8;
+
+    /// coefficients of taylor series for `cos(pi * x)` centered at `0`
+    /// generated using:
+    /// ```maxima,text
+    /// fpprec:50$
+    /// cospi: bfloat(taylor(cos(%pi*x),x,0,18))$
+    /// for i: 0 step 2 thru 18 do
+    ///     printf(true, "pub(crate) const COSPI_KERNEL_TAYLOR_~d: f64 = ~a;~%", i, ssubst("e", "b", string(coeff(cospi, x, i))))$
+    /// ```
+    pub(crate) const COSPI_KERNEL_TAYLOR_0: f64 = 1.0e0;
+    pub(crate) const COSPI_KERNEL_TAYLOR_2: f64 =
+        -4.9348022005446793094172454999380755676568497036204e0;
+    pub(crate) const COSPI_KERNEL_TAYLOR_4: f64 =
+        4.0587121264167682181850138620293796354053160696952e0;
+    pub(crate) const COSPI_KERNEL_TAYLOR_6: f64 =
+        -1.3352627688545894958753047828505831928711354556681e0;
+    pub(crate) const COSPI_KERNEL_TAYLOR_8: f64 =
+        2.3533063035889320454187935277546542154506893530856e-1;
+    pub(crate) const COSPI_KERNEL_TAYLOR_10: f64 =
+        -2.5806891390014060012598294252898849657186441048147e-2;
+    pub(crate) const COSPI_KERNEL_TAYLOR_12: f64 =
+        1.9295743094039230479033455636859576401684718150003e-3;
+    pub(crate) const COSPI_KERNEL_TAYLOR_14: f64 =
+        -1.0463810492484570711801672835223932761029733149091e-4;
+    pub(crate) const COSPI_KERNEL_TAYLOR_16: f64 =
+        4.3030695870329470072978237149669233008960901556009e-6;
+    pub(crate) const COSPI_KERNEL_TAYLOR_18: f64 =
+        -1.387895246221377211446808750399309343777037849978e-7;
+}
+
+/// computes `sin(pi * x)` for `-0.25 <= x <= 0.25`
+/// not guaranteed to give correct sign for zero result
+/// has an error of up to 2ULP
+pub fn sin_pi_kernel_f16<Ctx: Context>(ctx: Ctx, x: Ctx::VecF16) -> Ctx::VecF16 {
+    let x_sq = x * x;
+    let mut v: Ctx::VecF16 = ctx.make(consts::SINPI_KERNEL_TAYLOR_5.to());
+    v = v.mul_add_fast(x_sq, ctx.make(consts::SINPI_KERNEL_TAYLOR_3.to()));
+    v = v.mul_add_fast(x_sq, ctx.make(consts::SINPI_KERNEL_TAYLOR_1.to()));
+    v * x
+}
+
+/// computes `cos(pi * x)` for `-0.25 <= x <= 0.25`
+/// has an error of up to 2ULP
+pub fn cos_pi_kernel_f16<Ctx: Context>(ctx: Ctx, x: Ctx::VecF16) -> Ctx::VecF16 {
+    let x_sq = x * x;
+    let mut v: Ctx::VecF16 = ctx.make(consts::COSPI_KERNEL_TAYLOR_4.to());
+    v = v.mul_add_fast(x_sq, ctx.make(consts::COSPI_KERNEL_TAYLOR_2.to()));
+    v.mul_add_fast(x_sq, ctx.make(consts::COSPI_KERNEL_TAYLOR_0.to()))
+}
+
+/// computes `(sin(pi * x), cos(pi * x))`
+/// not guaranteed to give correct sign for zero results
+/// has an error of up to 2ULP
+pub fn sin_cos_pi_f16<Ctx: Context>(_ctx: Ctx, _x: Ctx::VecF16) -> (Ctx::VecF16, Ctx::VecF16) {
+    todo!()
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::{f16::F16, scalar::Scalar};
+    use std::f64;
+
+    struct CheckUlpCallbackArg<F, I> {
+        distance_in_ulp: I,
+        expected: F,
+        result: F,
+    }
+
+    #[track_caller]
+    fn check_ulp_f16(
+        x: F16,
+        is_ok: impl Fn(CheckUlpCallbackArg<F16, i16>) -> bool,
+        fn_f16: impl Fn(F16) -> F16,
+        fn_f64: impl Fn(f64) -> f64,
+    ) {
+        let x_f64: f64 = x.to();
+        let expected_f64 = fn_f64(x_f64);
+        let expected: F16 = expected_f64.to();
+        let result = fn_f16(x);
+        if result == expected {
+            return;
+        }
+        let distance_in_ulp = (expected.to_bits() as i16).wrapping_sub(result.to_bits() as i16);
+        if is_ok(CheckUlpCallbackArg {
+            distance_in_ulp,
+            expected,
+            result,
+        }) {
+            return;
+        }
+        panic!(
+            "error is too big: \
+                x = {x:?} {x_bits:#X}, \
+                result = {result:?} {result_bits:#X}, \
+                expected = {expected:?} {expected_bits:#X}",
+            x = x,
+            x_bits = x.to_bits(),
+            result = result,
+            result_bits = result.to_bits(),
+            expected = expected,
+            expected_bits = expected.to_bits(),
+        );
+    }
+
+    #[test]
+    #[cfg_attr(
+        not(feature = "f16"),
+        should_panic(expected = "f16 feature is not enabled")
+    )]
+    fn test_sin_pi_kernel_f16() {
+        let check = |x| {
+            check_ulp_f16(
+                x,
+                |arg| arg.distance_in_ulp <= if arg.expected == 0.to() { 0 } else { 2 },
+                |x| sin_pi_kernel_f16(Scalar, x),
+                |x| (f64::consts::PI * x).sin(),
+            )
+        };
+        let quarter = F16::to_bits(0.25f32.to());
+        for bits in (0..=quarter).rev() {
+            check(F16::from_bits(bits));
+            check(-F16::from_bits(bits));
+        }
+    }
+
+    #[test]
+    #[cfg_attr(
+        not(feature = "f16"),
+        should_panic(expected = "f16 feature is not enabled")
+    )]
+    fn test_cos_pi_kernel_f16() {
+        let check = |x| {
+            check_ulp_f16(
+                x,
+                |arg| arg.distance_in_ulp <= 2 && arg.result <= 1.to(),
+                |x| cos_pi_kernel_f16(Scalar, x),
+                |x| (f64::consts::PI * x).cos(),
+            )
+        };
+        let quarter = F16::to_bits(0.25f32.to());
+        for bits in (0..=quarter).rev() {
+            check(F16::from_bits(bits));
+            check(-F16::from_bits(bits));
+        }
+    }
+}
diff --git a/src/traits.rs b/src/traits.rs
index ffc766b..6555016 100644
--- a/src/traits.rs
+++ b/src/traits.rs
@@ -187,9 +187,18 @@ pub trait Float: Number + Neg<Output = Self> {
     fn trunc(self) -> Self;
     fn ceil(self) -> Self;
     fn floor(self) -> Self;
+    /// round to nearest integer, unspecified which way half-way cases are rounded
     fn round(self) -> Self;
+    /// returns `self * a + b` but only rounding once
     #[cfg(feature = "fma")]
     fn fma(self, a: Self, b: Self) -> Self;
+    /// returns `self * a + b` either using `fma` or `self * a + b`
+    fn mul_add_fast(self, a: Self, b: Self) -> Self {
+        #[cfg(feature = "fma")]
+        return self.fma(a, b);
+        #[cfg(not(feature = "fma"))]
+        return self * a + b;
+    }
     fn is_nan(self) -> Self::Bool {
         self.ne(self)
     }