mod consts {
#![allow(clippy::excessive_precision)]
+ #![allow(dead_code)]
/// coefficients of taylor series for `sin(pi * x)` centered at `0`
/// generated using:
v.mul_add_fast(x_sq, ctx.make(consts::COSPI_KERNEL_TAYLOR_0.to()))
}
+/// computes `sin(pi * x)` for `-0.25 <= x <= 0.25`
+/// not guaranteed to give correct sign for zero result
+/// has an error of up to 2ULP
+pub fn sin_pi_kernel_f64<Ctx: Context>(ctx: Ctx, x: Ctx::VecF64) -> Ctx::VecF64 {
+ let x_sq = x * x;
+ let mut v: Ctx::VecF64 = ctx.make(consts::SINPI_KERNEL_TAYLOR_15.to());
+ v = v.mul_add_fast(x_sq, ctx.make(consts::SINPI_KERNEL_TAYLOR_13.to()));
+ v = v.mul_add_fast(x_sq, ctx.make(consts::SINPI_KERNEL_TAYLOR_11.to()));
+ v = v.mul_add_fast(x_sq, ctx.make(consts::SINPI_KERNEL_TAYLOR_9.to()));
+ v = v.mul_add_fast(x_sq, ctx.make(consts::SINPI_KERNEL_TAYLOR_7.to()));
+ v = v.mul_add_fast(x_sq, ctx.make(consts::SINPI_KERNEL_TAYLOR_5.to()));
+ v = v.mul_add_fast(x_sq, ctx.make(consts::SINPI_KERNEL_TAYLOR_3.to()));
+ v = v.mul_add_fast(x_sq, ctx.make(consts::SINPI_KERNEL_TAYLOR_1.to()));
+ v * x
+}
+
+/// computes `cos(pi * x)` for `-0.25 <= x <= 0.25`
+/// has an error of up to 2ULP
+pub fn cos_pi_kernel_f64<Ctx: Context>(ctx: Ctx, x: Ctx::VecF64) -> Ctx::VecF64 {
+ let x_sq = x * x;
+ let mut v: Ctx::VecF64 = ctx.make(consts::COSPI_KERNEL_TAYLOR_16.to());
+ v = v.mul_add_fast(x_sq, ctx.make(consts::COSPI_KERNEL_TAYLOR_14.to()));
+ v = v.mul_add_fast(x_sq, ctx.make(consts::COSPI_KERNEL_TAYLOR_12.to()));
+ v = v.mul_add_fast(x_sq, ctx.make(consts::COSPI_KERNEL_TAYLOR_10.to()));
+ v = v.mul_add_fast(x_sq, ctx.make(consts::COSPI_KERNEL_TAYLOR_8.to()));
+ v = v.mul_add_fast(x_sq, ctx.make(consts::COSPI_KERNEL_TAYLOR_6.to()));
+ v = v.mul_add_fast(x_sq, ctx.make(consts::COSPI_KERNEL_TAYLOR_4.to()));
+ v = v.mul_add_fast(x_sq, ctx.make(consts::COSPI_KERNEL_TAYLOR_2.to()));
+ v.mul_add_fast(x_sq, ctx.make(consts::COSPI_KERNEL_TAYLOR_0.to()))
+}
+
/// computes `(sin(pi * x), cos(pi * x))`
/// not guaranteed to give correct sign for zero results
/// inherits error from `sin_pi_kernel` and `cos_pi_kernel`
let check = |x| {
check_ulp(
x,
- |arg| arg.distance_in_ulp <= if arg.expected == 0.to() { 0 } else { 2 },
+ |arg| arg.distance_in_ulp <= if arg.expected == 0. { 0 } else { 2 },
|x| sin_pi_kernel_f32(Scalar, Value(x)).0,
|x| (f64::consts::PI * x).sin(),
)
let check = |x| {
check_ulp(
x,
- |arg| arg.distance_in_ulp <= 2 && arg.result <= 1.to(),
+ |arg| arg.distance_in_ulp <= 2 && arg.result <= 1.,
|x| cos_pi_kernel_f32(Scalar, Value(x)).0,
|x| (f64::consts::PI * x).cos(),
)
}
}
+ #[test]
+ #[cfg(feature = "full_tests")]
+ fn test_sin_pi_kernel_f64() {
+ let check = |x| {
+ check_ulp(
+ x,
+ sin_cos_pi_check_ulp_callback,
+ |x| sin_pi_kernel_f64(Scalar, Value(x)).0,
+ |x| reference_sin_cos_pi_f64(x).0,
+ )
+ };
+ let quarter = 0.25f32.to_bits();
+ for bits in (0..=quarter).rev().step_by(1 << 5) {
+ check(f32::from_bits(bits) as f64);
+ check(-f32::from_bits(bits) as f64);
+ }
+ }
+
+ #[test]
+ #[cfg(feature = "full_tests")]
+ fn test_cos_pi_kernel_f64() {
+ let check = |x| {
+ check_ulp(
+ x,
+ sin_cos_pi_check_ulp_callback,
+ |x| cos_pi_kernel_f64(Scalar, Value(x)).0,
+ |x| reference_sin_cos_pi_f64(x).1,
+ )
+ };
+ let quarter = 0.25f32.to_bits();
+ for bits in (0..=quarter).rev().step_by(1 << 5) {
+ check(f32::from_bits(bits) as f64);
+ check(-f32::from_bits(bits) as f64);
+ }
+ }
+
fn sin_cos_pi_check_ulp_callback<F: PrimFloat>(arg: CheckUlpCallbackArg<F, u64>) -> bool {
if arg.x % 0.5.to() == 0.0.to() {
arg.distance_in_ulp == 0
}
}
+ fn reference_sin_cos_pi_f64(mut v: f64) -> (f64, f64) {
+ use az::Cast;
+ use rug::{float::Constant, Float};
+ if !v.is_finite() {
+ return (f64::NAN, f64::NAN);
+ }
+ v %= 2.0;
+ if v >= 1.0 {
+ v -= 2.0;
+ } else if v <= -1.0 {
+ v += 2.0;
+ }
+ v *= 2.0;
+ let part = v.round() as i32;
+ v -= part as f64;
+ let precision = 100;
+ let mut v = Float::with_val(precision, v);
+ let pi = Float::with_val(precision, Constant::Pi);
+ let pi_2 = pi / 2;
+ v *= &pi_2;
+ let cos = pi_2; // just a temp var, value is ignored
+ let (sin, cos) = v.sin_cos(cos);
+ let sin: f64 = sin.cast();
+ let cos: f64 = cos.cast();
+ match part {
+ 0 => (sin, cos),
+ 1 => (cos, -sin),
+ 2 => (-sin, -cos),
+ -2 => (-sin, -cos),
+ -1 => (-cos, sin),
+ _ => panic!("not implemented: part={}", part),
+ }
+ }
+
+ macro_rules! test_reference_sin_cos_pi_test_cases {
+ ($case:expr, $ty:ident) => {
+ $case($ty::NAN, $ty::NAN, $ty::NAN);
+ $case($ty::INFINITY, $ty::NAN, $ty::NAN);
+ $case(-$ty::INFINITY, $ty::NAN, $ty::NAN);
+ $case(-4., 0., 1.);
+ $case(
+ -3.875,
+ 0.38268343236508977172845998403039886676134456248563,
+ 0.92387953251128675612818318939678828682241662586364,
+ );
+ $case(
+ -3.75,
+ 0.70710678118654752440084436210484903928483593768847,
+ 0.70710678118654752440084436210484903928483593768847,
+ );
+ $case(
+ -3.625,
+ 0.92387953251128675612818318939678828682241662586364,
+ 0.38268343236508977172845998403039886676134456248563,
+ );
+ $case(-3.5, 1., -0.);
+ $case(
+ -3.375,
+ 0.92387953251128675612818318939678828682241662586364,
+ -0.38268343236508977172845998403039886676134456248563,
+ );
+ $case(
+ -3.25,
+ 0.70710678118654752440084436210484903928483593768847,
+ -0.70710678118654752440084436210484903928483593768847,
+ );
+ $case(
+ -3.125,
+ 0.38268343236508977172845998403039886676134456248563,
+ -0.92387953251128675612818318939678828682241662586364,
+ );
+ $case(-3., -0., -1.);
+ $case(
+ -2.875,
+ -0.38268343236508977172845998403039886676134456248563,
+ -0.92387953251128675612818318939678828682241662586364,
+ );
+ $case(
+ -2.75,
+ -0.70710678118654752440084436210484903928483593768847,
+ -0.70710678118654752440084436210484903928483593768847,
+ );
+ $case(
+ -2.625,
+ -0.92387953251128675612818318939678828682241662586364,
+ -0.38268343236508977172845998403039886676134456248563,
+ );
+ $case(-2.5, -1., 0.);
+ $case(
+ -2.375,
+ -0.92387953251128675612818318939678828682241662586364,
+ 0.38268343236508977172845998403039886676134456248563,
+ );
+ $case(
+ -2.25,
+ -0.70710678118654752440084436210484903928483593768847,
+ 0.70710678118654752440084436210484903928483593768847,
+ );
+ $case(
+ -2.125,
+ -0.38268343236508977172845998403039886676134456248563,
+ 0.92387953251128675612818318939678828682241662586364,
+ );
+ $case(-2., 0., 1.);
+ $case(
+ -1.875,
+ 0.38268343236508977172845998403039886676134456248563,
+ 0.92387953251128675612818318939678828682241662586364,
+ );
+ $case(
+ -1.75,
+ 0.70710678118654752440084436210484903928483593768847,
+ 0.70710678118654752440084436210484903928483593768847,
+ );
+ $case(
+ -1.625,
+ 0.92387953251128675612818318939678828682241662586364,
+ 0.38268343236508977172845998403039886676134456248563,
+ );
+ $case(-1.5, 1., -0.);
+ $case(
+ -1.375,
+ 0.92387953251128675612818318939678828682241662586364,
+ -0.38268343236508977172845998403039886676134456248563,
+ );
+ $case(
+ -1.25,
+ 0.70710678118654752440084436210484903928483593768847,
+ -0.70710678118654752440084436210484903928483593768847,
+ );
+ $case(
+ -1.125,
+ 0.38268343236508977172845998403039886676134456248563,
+ -0.92387953251128675612818318939678828682241662586364,
+ );
+ $case(-1., -0., -1.);
+ $case(
+ -0.875,
+ -0.38268343236508977172845998403039886676134456248563,
+ -0.92387953251128675612818318939678828682241662586364,
+ );
+ $case(
+ -0.75,
+ -0.70710678118654752440084436210484903928483593768847,
+ -0.70710678118654752440084436210484903928483593768847,
+ );
+ $case(
+ -0.625,
+ -0.92387953251128675612818318939678828682241662586364,
+ -0.38268343236508977172845998403039886676134456248563,
+ );
+ $case(-0.5, -1., 0.);
+ $case(
+ -0.375,
+ -0.92387953251128675612818318939678828682241662586364,
+ 0.38268343236508977172845998403039886676134456248563,
+ );
+ $case(
+ -0.25,
+ -0.70710678118654752440084436210484903928483593768847,
+ 0.70710678118654752440084436210484903928483593768847,
+ );
+ $case(
+ -0.125,
+ -0.38268343236508977172845998403039886676134456248563,
+ 0.92387953251128675612818318939678828682241662586364,
+ );
+ $case(0., 0., 1.);
+ $case(
+ 0.125,
+ 0.38268343236508977172845998403039886676134456248563,
+ 0.92387953251128675612818318939678828682241662586364,
+ );
+ $case(
+ 0.25,
+ 0.70710678118654752440084436210484903928483593768847,
+ 0.70710678118654752440084436210484903928483593768847,
+ );
+ $case(
+ 0.375,
+ 0.92387953251128675612818318939678828682241662586364,
+ 0.38268343236508977172845998403039886676134456248563,
+ );
+ $case(0.5, 1., 0.);
+ $case(
+ 0.625,
+ 0.92387953251128675612818318939678828682241662586364,
+ -0.38268343236508977172845998403039886676134456248563,
+ );
+ $case(
+ 0.75,
+ 0.70710678118654752440084436210484903928483593768847,
+ -0.70710678118654752440084436210484903928483593768847,
+ );
+ $case(
+ 0.875,
+ 0.38268343236508977172845998403039886676134456248563,
+ -0.92387953251128675612818318939678828682241662586364,
+ );
+ $case(1., 0., -1.);
+ $case(
+ 1.125,
+ -0.38268343236508977172845998403039886676134456248563,
+ -0.92387953251128675612818318939678828682241662586364,
+ );
+ $case(
+ 1.25,
+ -0.70710678118654752440084436210484903928483593768847,
+ -0.70710678118654752440084436210484903928483593768847,
+ );
+ $case(
+ 1.375,
+ -0.92387953251128675612818318939678828682241662586364,
+ -0.38268343236508977172845998403039886676134456248563,
+ );
+ $case(1.5, -1., -0.);
+ $case(
+ 1.625,
+ -0.92387953251128675612818318939678828682241662586364,
+ 0.38268343236508977172845998403039886676134456248563,
+ );
+ $case(
+ 1.75,
+ -0.70710678118654752440084436210484903928483593768847,
+ 0.70710678118654752440084436210484903928483593768847,
+ );
+ $case(
+ 1.875,
+ -0.38268343236508977172845998403039886676134456248563,
+ 0.92387953251128675612818318939678828682241662586364,
+ );
+ $case(2., -0., 1.);
+ $case(
+ 2.125,
+ 0.38268343236508977172845998403039886676134456248563,
+ 0.92387953251128675612818318939678828682241662586364,
+ );
+ $case(
+ 2.25,
+ 0.70710678118654752440084436210484903928483593768847,
+ 0.70710678118654752440084436210484903928483593768847,
+ );
+ $case(
+ 2.375,
+ 0.92387953251128675612818318939678828682241662586364,
+ 0.38268343236508977172845998403039886676134456248563,
+ );
+ $case(2.5, 1., 0.);
+ $case(
+ 2.625,
+ 0.92387953251128675612818318939678828682241662586364,
+ -0.38268343236508977172845998403039886676134456248563,
+ );
+ $case(
+ 2.75,
+ 0.70710678118654752440084436210484903928483593768847,
+ -0.70710678118654752440084436210484903928483593768847,
+ );
+ $case(
+ 2.875,
+ 0.38268343236508977172845998403039886676134456248563,
+ -0.92387953251128675612818318939678828682241662586364,
+ );
+ $case(3., 0., -1.);
+ $case(
+ 3.125,
+ -0.38268343236508977172845998403039886676134456248563,
+ -0.92387953251128675612818318939678828682241662586364,
+ );
+ $case(
+ 3.25,
+ -0.70710678118654752440084436210484903928483593768847,
+ -0.70710678118654752440084436210484903928483593768847,
+ );
+ $case(
+ 3.375,
+ -0.92387953251128675612818318939678828682241662586364,
+ -0.38268343236508977172845998403039886676134456248563,
+ );
+ $case(3.5, -1., -0.);
+ $case(
+ 3.625,
+ -0.92387953251128675612818318939678828682241662586364,
+ 0.38268343236508977172845998403039886676134456248563,
+ );
+ $case(
+ 3.75,
+ -0.70710678118654752440084436210484903928483593768847,
+ 0.70710678118654752440084436210484903928483593768847,
+ );
+ $case(
+ 3.875,
+ -0.38268343236508977172845998403039886676134456248563,
+ 0.92387953251128675612818318939678828682241662586364,
+ );
+ $case(4., -0., 1.);
+ };
+ }
+
#[test]
fn test_reference_sin_cos_pi_f32() {
fn approx_same(a: f32, b: f32) -> bool {
ref_cos=ref_cos,
);
}
- case(f32::NAN, f32::NAN, f32::NAN);
- case(f32::INFINITY, f32::NAN, f32::NAN);
- case(-f32::INFINITY, f32::NAN, f32::NAN);
- case(-4.0, 0.0, 1.0);
- case(-3.875, 0.3826834323650906, 0.9238795325112864);
- case(-3.75, 0.7071067811865475, 0.7071067811865475);
- case(-3.625, 0.9238795325112867, 0.3826834323650898);
- case(-3.5, 1.0, 0.0);
- case(-3.375, 0.9238795325112864, -0.3826834323650905);
- case(-3.25, 0.7071067811865475, -0.7071067811865475);
- case(-3.125, 0.3826834323650898, -0.9238795325112867);
- case(-3.0, 0.0, -1.0);
- case(-2.875, -0.3826834323650905, -0.9238795325112864);
- case(-2.75, -0.7071067811865475, -0.7071067811865475);
- case(-2.625, -0.9238795325112867, -0.3826834323650899);
- case(-2.5, -1.0, 0.0);
- case(-2.375, -0.9238795325112865, 0.3826834323650904);
- case(-2.25, -0.7071067811865475, 0.7071067811865475);
- case(-2.125, -0.3826834323650899, 0.9238795325112867);
- case(-2.0, 0.0, 1.0);
- case(-1.875, 0.3826834323650904, 0.9238795325112865);
- case(-1.75, 0.7071067811865475, 0.7071067811865475);
- case(-1.625, 0.9238795325112866, 0.38268343236509);
- case(-1.5, 1.0, 0.0);
- case(-1.375, 0.9238795325112865, -0.3826834323650903);
- case(-1.25, 0.7071067811865475, -0.7071067811865475);
- case(-1.125, 0.3826834323650896, -0.9238795325112869);
- case(-1.0, 0.0, -1.0);
- case(-0.875, -0.3826834323650899, -0.9238795325112867);
- case(-0.75, -0.7071067811865475, -0.7071067811865475);
- case(-0.625, -0.9238795325112867, -0.3826834323650897);
- case(-0.5, -1.0, 0.0);
- case(-0.375, -0.9238795325112867, 0.3826834323650898);
- case(-0.25, -0.7071067811865475, 0.7071067811865475);
- case(-0.125, -0.3826834323650898, 0.9238795325112867);
- case(0.0, 0.0, 1.0);
- case(0.125, 0.3826834323650898, 0.9238795325112867);
- case(0.25, 0.7071067811865475, 0.7071067811865475);
- case(0.375, 0.9238795325112867, 0.3826834323650898);
- case(0.5, 1.0, 0.0);
- case(0.625, 0.9238795325112867, -0.3826834323650897);
- case(0.75, 0.7071067811865475, -0.7071067811865475);
- case(0.875, 0.3826834323650899, -0.9238795325112867);
- case(1.0, 0.0, -1.0);
- case(1.125, -0.3826834323650896, -0.9238795325112869);
- case(1.25, -0.7071067811865475, -0.7071067811865475);
- case(1.375, -0.9238795325112865, -0.3826834323650903);
- case(1.5, -1.0, 0.0);
- case(1.625, -0.9238795325112866, 0.38268343236509);
- case(1.75, -0.7071067811865475, 0.7071067811865475);
- case(1.875, -0.3826834323650904, 0.9238795325112865);
- case(2.0, 0.0, 1.0);
- case(2.125, 0.3826834323650899, 0.9238795325112867);
- case(2.25, 0.7071067811865475, 0.7071067811865475);
- case(2.375, 0.9238795325112865, 0.3826834323650904);
- case(2.5, 1.0, 0.0);
- case(2.625, 0.9238795325112867, -0.3826834323650899);
- case(2.75, 0.7071067811865475, -0.7071067811865475);
- case(2.875, 0.3826834323650905, -0.9238795325112864);
- case(3.0, 0.0, -1.0);
- case(3.125, -0.3826834323650898, -0.9238795325112867);
- case(3.25, -0.7071067811865475, -0.7071067811865475);
- case(3.375, -0.9238795325112864, -0.3826834323650905);
- case(3.5, -1.0, 0.0);
- case(3.625, -0.9238795325112867, 0.3826834323650898);
- case(3.75, -0.7071067811865475, 0.7071067811865475);
- case(3.875, -0.3826834323650906, 0.9238795325112864);
- case(4.0, 0.0, 1.0);
+ test_reference_sin_cos_pi_test_cases!(case, f32);
+ }
+
+ #[test]
+ fn test_reference_sin_cos_pi_f64() {
+ fn same(a: f64, b: f64) -> bool {
+ if a.is_finite() && b.is_finite() {
+ a == b
+ } else {
+ a == b || (a.is_nan() && b.is_nan())
+ }
+ }
+ #[track_caller]
+ fn case(x: f64, expected_sin: f64, expected_cos: f64) {
+ let (ref_sin, ref_cos) = reference_sin_cos_pi_f64(x);
+ assert!(
+ same(ref_sin, expected_sin) && same(ref_cos, expected_cos),
+ "case failed: x={x}, expected_sin={expected_sin}, expected_cos={expected_cos}, ref_sin={ref_sin}, ref_cos={ref_cos}",
+ x=x,
+ expected_sin=expected_sin,
+ expected_cos=expected_cos,
+ ref_sin=ref_sin,
+ ref_cos=ref_cos,
+ );
+ }
+ test_reference_sin_cos_pi_test_cases!(case, f64);
}
#[test]