From: Jacob Lifshay Date: Fri, 7 May 2021 07:15:54 +0000 (-0700) Subject: working on adding sin_cos_pi X-Git-Url: https://git.libre-soc.org/?p=vector-math.git;a=commitdiff_plain;h=ad217eb300ff1d610689c9ebe5b06e94b659a508;hp=211dd475c41a76b11e4c5b3dcf7fc5b2c0babaf9 working on adding sin_cos_pi --- diff --git a/Cargo.toml b/Cargo.toml index 74c1f2b..d83abb8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -18,6 +18,7 @@ fma = ["std"] std = [] ir = ["std", "typed-arena"] stdsimd = ["core_simd"] +full_tests = [] [workspace] members = [".", "vector-math-proc-macro"] diff --git a/src/algorithms.rs b/src/algorithms.rs index 61191a6..76907f4 100644 --- a/src/algorithms.rs +++ b/src/algorithms.rs @@ -1 +1,2 @@ pub mod ilogb; +pub mod trig_pi; diff --git a/src/algorithms/trig_pi.rs b/src/algorithms/trig_pi.rs new file mode 100644 index 0000000..28e67ef --- /dev/null +++ b/src/algorithms/trig_pi.rs @@ -0,0 +1,180 @@ +use crate::traits::{Context, ConvertTo, Float}; + +mod consts { + #![allow(clippy::excessive_precision)] + + /// coefficients of taylor series for `sin(pi * x)` centered at `0` + /// generated using: + /// ```maxima,text + /// fpprec:50$ + /// sinpi: bfloat(taylor(sin(%pi*x),x,0,19))$ + /// for i: 1 step 2 thru 19 do + /// printf(true, "pub(crate) const SINPI_KERNEL_TAYLOR_~d: f64 = ~a;~%", i, ssubst("e", "b", string(coeff(sinpi, x, i))))$ + /// ``` + pub(crate) const SINPI_KERNEL_TAYLOR_1: f64 = + 3.1415926535897932384626433832795028841971693993751e0; + pub(crate) const SINPI_KERNEL_TAYLOR_3: f64 = + -5.1677127800499700292460525111835658670375480943142e0; + pub(crate) const SINPI_KERNEL_TAYLOR_5: f64 = + 2.550164039877345443856177583695296720669172555234e0; + pub(crate) const SINPI_KERNEL_TAYLOR_7: f64 = + -5.9926452932079207688773938354604004601536358636814e-1; + pub(crate) const SINPI_KERNEL_TAYLOR_9: f64 = + 8.2145886611128228798802365523698344807837460797753e-2; + pub(crate) const SINPI_KERNEL_TAYLOR_11: f64 = + -7.370430945714350777259089957290781501211638236021e-3; + pub(crate) const SINPI_KERNEL_TAYLOR_13: f64 = + 4.6630280576761256442062891447027174382819981361599e-4; + pub(crate) const SINPI_KERNEL_TAYLOR_15: f64 = + -2.1915353447830215827384652057094188859248708765956e-5; + pub(crate) const SINPI_KERNEL_TAYLOR_17: f64 = + 7.9520540014755127847832068624575890327682459384282e-7; + pub(crate) const SINPI_KERNEL_TAYLOR_19: f64 = + -2.2948428997269873110203872385571587856074785581088e-8; + + /// coefficients of taylor series for `cos(pi * x)` centered at `0` + /// generated using: + /// ```maxima,text + /// fpprec:50$ + /// cospi: bfloat(taylor(cos(%pi*x),x,0,18))$ + /// for i: 0 step 2 thru 18 do + /// printf(true, "pub(crate) const COSPI_KERNEL_TAYLOR_~d: f64 = ~a;~%", i, ssubst("e", "b", string(coeff(cospi, x, i))))$ + /// ``` + pub(crate) const COSPI_KERNEL_TAYLOR_0: f64 = 1.0e0; + pub(crate) const COSPI_KERNEL_TAYLOR_2: f64 = + -4.9348022005446793094172454999380755676568497036204e0; + pub(crate) const COSPI_KERNEL_TAYLOR_4: f64 = + 4.0587121264167682181850138620293796354053160696952e0; + pub(crate) const COSPI_KERNEL_TAYLOR_6: f64 = + -1.3352627688545894958753047828505831928711354556681e0; + pub(crate) const COSPI_KERNEL_TAYLOR_8: f64 = + 2.3533063035889320454187935277546542154506893530856e-1; + pub(crate) const COSPI_KERNEL_TAYLOR_10: f64 = + -2.5806891390014060012598294252898849657186441048147e-2; + pub(crate) const COSPI_KERNEL_TAYLOR_12: f64 = + 1.9295743094039230479033455636859576401684718150003e-3; + pub(crate) const COSPI_KERNEL_TAYLOR_14: f64 = + -1.0463810492484570711801672835223932761029733149091e-4; + pub(crate) const COSPI_KERNEL_TAYLOR_16: f64 = + 4.3030695870329470072978237149669233008960901556009e-6; + pub(crate) const COSPI_KERNEL_TAYLOR_18: f64 = + -1.387895246221377211446808750399309343777037849978e-7; +} + +/// computes `sin(pi * x)` for `-0.25 <= x <= 0.25` +/// not guaranteed to give correct sign for zero result +/// has an error of up to 2ULP +pub fn sin_pi_kernel_f16(ctx: Ctx, x: Ctx::VecF16) -> Ctx::VecF16 { + let x_sq = x * x; + let mut v: Ctx::VecF16 = ctx.make(consts::SINPI_KERNEL_TAYLOR_5.to()); + v = v.mul_add_fast(x_sq, ctx.make(consts::SINPI_KERNEL_TAYLOR_3.to())); + v = v.mul_add_fast(x_sq, ctx.make(consts::SINPI_KERNEL_TAYLOR_1.to())); + v * x +} + +/// computes `cos(pi * x)` for `-0.25 <= x <= 0.25` +/// has an error of up to 2ULP +pub fn cos_pi_kernel_f16(ctx: Ctx, x: Ctx::VecF16) -> Ctx::VecF16 { + let x_sq = x * x; + let mut v: Ctx::VecF16 = ctx.make(consts::COSPI_KERNEL_TAYLOR_4.to()); + v = v.mul_add_fast(x_sq, ctx.make(consts::COSPI_KERNEL_TAYLOR_2.to())); + v.mul_add_fast(x_sq, ctx.make(consts::COSPI_KERNEL_TAYLOR_0.to())) +} + +/// computes `(sin(pi * x), cos(pi * x))` +/// not guaranteed to give correct sign for zero results +/// has an error of up to 2ULP +pub fn sin_cos_pi_f16(_ctx: Ctx, _x: Ctx::VecF16) -> (Ctx::VecF16, Ctx::VecF16) { + todo!() +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::{f16::F16, scalar::Scalar}; + use std::f64; + + struct CheckUlpCallbackArg { + distance_in_ulp: I, + expected: F, + result: F, + } + + #[track_caller] + fn check_ulp_f16( + x: F16, + is_ok: impl Fn(CheckUlpCallbackArg) -> bool, + fn_f16: impl Fn(F16) -> F16, + fn_f64: impl Fn(f64) -> f64, + ) { + let x_f64: f64 = x.to(); + let expected_f64 = fn_f64(x_f64); + let expected: F16 = expected_f64.to(); + let result = fn_f16(x); + if result == expected { + return; + } + let distance_in_ulp = (expected.to_bits() as i16).wrapping_sub(result.to_bits() as i16); + if is_ok(CheckUlpCallbackArg { + distance_in_ulp, + expected, + result, + }) { + return; + } + panic!( + "error is too big: \ + x = {x:?} {x_bits:#X}, \ + result = {result:?} {result_bits:#X}, \ + expected = {expected:?} {expected_bits:#X}", + x = x, + x_bits = x.to_bits(), + result = result, + result_bits = result.to_bits(), + expected = expected, + expected_bits = expected.to_bits(), + ); + } + + #[test] + #[cfg_attr( + not(feature = "f16"), + should_panic(expected = "f16 feature is not enabled") + )] + fn test_sin_pi_kernel_f16() { + let check = |x| { + check_ulp_f16( + x, + |arg| arg.distance_in_ulp <= if arg.expected == 0.to() { 0 } else { 2 }, + |x| sin_pi_kernel_f16(Scalar, x), + |x| (f64::consts::PI * x).sin(), + ) + }; + let quarter = F16::to_bits(0.25f32.to()); + for bits in (0..=quarter).rev() { + check(F16::from_bits(bits)); + check(-F16::from_bits(bits)); + } + } + + #[test] + #[cfg_attr( + not(feature = "f16"), + should_panic(expected = "f16 feature is not enabled") + )] + fn test_cos_pi_kernel_f16() { + let check = |x| { + check_ulp_f16( + x, + |arg| arg.distance_in_ulp <= 2 && arg.result <= 1.to(), + |x| cos_pi_kernel_f16(Scalar, x), + |x| (f64::consts::PI * x).cos(), + ) + }; + let quarter = F16::to_bits(0.25f32.to()); + for bits in (0..=quarter).rev() { + check(F16::from_bits(bits)); + check(-F16::from_bits(bits)); + } + } +} diff --git a/src/traits.rs b/src/traits.rs index ffc766b..6555016 100644 --- a/src/traits.rs +++ b/src/traits.rs @@ -187,9 +187,18 @@ pub trait Float: Number + Neg { fn trunc(self) -> Self; fn ceil(self) -> Self; fn floor(self) -> Self; + /// round to nearest integer, unspecified which way half-way cases are rounded fn round(self) -> Self; + /// returns `self * a + b` but only rounding once #[cfg(feature = "fma")] fn fma(self, a: Self, b: Self) -> Self; + /// returns `self * a + b` either using `fma` or `self * a + b` + fn mul_add_fast(self, a: Self, b: Self) -> Self { + #[cfg(feature = "fma")] + return self.fma(a, b); + #[cfg(not(feature = "fma"))] + return self * a + b; + } fn is_nan(self) -> Self::Bool { self.ne(self) }