remove note to convert to sin_cos_tau instead sin_cos_pi

[vector-math.git] / src / algorithms / trig_pi.rs
diff --git a/src/algorithms/trig_pi.rs b/src/algorithms/trig_pi.rs

index 28e67ef0a4f1327eec038b54cfc94d4672c2276e..5be0ad65b017a389823488c1050a1b22eeb51293 100644 (file)
--- a/src/algorithms/trig_pi.rs
+++ b/src/algorithms/trig_pi.rs
@@ -1,7 +1,11 @@
-use crate::traits::{Context, ConvertTo, Float};
+use crate::{
+    prim::{PrimFloat, PrimUInt},
+    traits::{Compare, Context, ConvertFrom, ConvertTo, Float, Make, Select},
+};
  
  mod consts {
      #![allow(clippy::excessive_precision)]
+    #![allow(dead_code)]
  
      /// coefficients of taylor series for `sin(pi * x)` centered at `0`
      /// generated using:
@@ -81,58 +85,251 @@ pub fn cos_pi_kernel_f16<Ctx: Context>(ctx: Ctx, x: Ctx::VecF16) -> Ctx::VecF16
      v.mul_add_fast(x_sq, ctx.make(consts::COSPI_KERNEL_TAYLOR_0.to()))
  }
  
+/// computes `sin(pi * x)` for `-0.25 <= x <= 0.25`
+/// not guaranteed to give correct sign for zero result
+/// has an error of up to 2ULP
+pub fn sin_pi_kernel_f32<Ctx: Context>(ctx: Ctx, x: Ctx::VecF32) -> Ctx::VecF32 {
+    let x_sq = x * x;
+    let mut v: Ctx::VecF32 = ctx.make(consts::SINPI_KERNEL_TAYLOR_9.to());
+    v = v.mul_add_fast(x_sq, ctx.make(consts::SINPI_KERNEL_TAYLOR_7.to()));
+    v = v.mul_add_fast(x_sq, ctx.make(consts::SINPI_KERNEL_TAYLOR_5.to()));
+    v = v.mul_add_fast(x_sq, ctx.make(consts::SINPI_KERNEL_TAYLOR_3.to()));
+    v = v.mul_add_fast(x_sq, ctx.make(consts::SINPI_KERNEL_TAYLOR_1.to()));
+    v * x
+}
+
+/// computes `cos(pi * x)` for `-0.25 <= x <= 0.25`
+/// has an error of up to 2ULP
+pub fn cos_pi_kernel_f32<Ctx: Context>(ctx: Ctx, x: Ctx::VecF32) -> Ctx::VecF32 {
+    let x_sq = x * x;
+    let mut v: Ctx::VecF32 = ctx.make(consts::COSPI_KERNEL_TAYLOR_8.to());
+    v = v.mul_add_fast(x_sq, ctx.make(consts::COSPI_KERNEL_TAYLOR_6.to()));
+    v = v.mul_add_fast(x_sq, ctx.make(consts::COSPI_KERNEL_TAYLOR_4.to()));
+    v = v.mul_add_fast(x_sq, ctx.make(consts::COSPI_KERNEL_TAYLOR_2.to()));
+    v.mul_add_fast(x_sq, ctx.make(consts::COSPI_KERNEL_TAYLOR_0.to()))
+}
+
+/// computes `sin(pi * x)` for `-0.25 <= x <= 0.25`
+/// not guaranteed to give correct sign for zero result
+/// has an error of up to 2ULP
+pub fn sin_pi_kernel_f64<Ctx: Context>(ctx: Ctx, x: Ctx::VecF64) -> Ctx::VecF64 {
+    let x_sq = x * x;
+    let mut v: Ctx::VecF64 = ctx.make(consts::SINPI_KERNEL_TAYLOR_15.to());
+    v = v.mul_add_fast(x_sq, ctx.make(consts::SINPI_KERNEL_TAYLOR_13.to()));
+    v = v.mul_add_fast(x_sq, ctx.make(consts::SINPI_KERNEL_TAYLOR_11.to()));
+    v = v.mul_add_fast(x_sq, ctx.make(consts::SINPI_KERNEL_TAYLOR_9.to()));
+    v = v.mul_add_fast(x_sq, ctx.make(consts::SINPI_KERNEL_TAYLOR_7.to()));
+    v = v.mul_add_fast(x_sq, ctx.make(consts::SINPI_KERNEL_TAYLOR_5.to()));
+    v = v.mul_add_fast(x_sq, ctx.make(consts::SINPI_KERNEL_TAYLOR_3.to()));
+    v = v.mul_add_fast(x_sq, ctx.make(consts::SINPI_KERNEL_TAYLOR_1.to()));
+    v * x
+}
+
+/// computes `cos(pi * x)` for `-0.25 <= x <= 0.25`
+/// has an error of up to 2ULP
+pub fn cos_pi_kernel_f64<Ctx: Context>(ctx: Ctx, x: Ctx::VecF64) -> Ctx::VecF64 {
+    let x_sq = x * x;
+    let mut v: Ctx::VecF64 = ctx.make(consts::COSPI_KERNEL_TAYLOR_16.to());
+    v = v.mul_add_fast(x_sq, ctx.make(consts::COSPI_KERNEL_TAYLOR_14.to()));
+    v = v.mul_add_fast(x_sq, ctx.make(consts::COSPI_KERNEL_TAYLOR_12.to()));
+    v = v.mul_add_fast(x_sq, ctx.make(consts::COSPI_KERNEL_TAYLOR_10.to()));
+    v = v.mul_add_fast(x_sq, ctx.make(consts::COSPI_KERNEL_TAYLOR_8.to()));
+    v = v.mul_add_fast(x_sq, ctx.make(consts::COSPI_KERNEL_TAYLOR_6.to()));
+    v = v.mul_add_fast(x_sq, ctx.make(consts::COSPI_KERNEL_TAYLOR_4.to()));
+    v = v.mul_add_fast(x_sq, ctx.make(consts::COSPI_KERNEL_TAYLOR_2.to()));
+    v.mul_add_fast(x_sq, ctx.make(consts::COSPI_KERNEL_TAYLOR_0.to()))
+}
+
+/// computes `(sin(pi * x), cos(pi * x))`
+/// not guaranteed to give correct sign for zero results
+/// inherits error from `sin_pi_kernel` and `cos_pi_kernel`
+pub fn sin_cos_pi_impl<
+    Ctx: Context,
+    VecF: Float<PrimFloat = PrimF> + Make<Context = Ctx>,
+    PrimF: PrimFloat<BitsType = PrimU>,
+    PrimU: PrimUInt,
+    SinPiKernel: FnOnce(Ctx, VecF) -> VecF,
+    CosPiKernel: FnOnce(Ctx, VecF) -> VecF,
+>(
+    ctx: Ctx,
+    x: VecF,
+    sin_pi_kernel: SinPiKernel,
+    cos_pi_kernel: CosPiKernel,
+) -> (VecF, VecF) {
+    let two_f: VecF = ctx.make(2.0.to());
+    let one_half: VecF = ctx.make(0.5.to());
+    let max_contiguous_integer: VecF = ctx.make(PrimF::max_contiguous_integer());
+    // if `x` is finite and bigger than `max_contiguous_integer`, then x is an even integer
+    let in_range = x.abs().lt(max_contiguous_integer); // use `lt` so nans are counted as out-of-range
+    let is_finite = x.is_finite();
+    let nan: VecF = ctx.make(f32::NAN.to());
+    let zero_f: VecF = ctx.make(0.to());
+    let one_f: VecF = ctx.make(1.to());
+    let zero_i: VecF::SignedBitsType = ctx.make(0.to());
+    let one_i: VecF::SignedBitsType = ctx.make(1.to());
+    let two_i: VecF::SignedBitsType = ctx.make(2.to());
+    let out_of_range_sin = is_finite.select(zero_f, nan);
+    let out_of_range_cos = is_finite.select(one_f, nan);
+    let xi = (x * two_f).round();
+    let xk = x - xi * one_half;
+    let sk = sin_pi_kernel(ctx, xk);
+    let ck = cos_pi_kernel(ctx, xk);
+    let xi = VecF::SignedBitsType::cvt_from(xi);
+    let bit_0_clear = (xi & one_i).eq(zero_i);
+    let st = bit_0_clear.select(sk, ck);
+    let ct = bit_0_clear.select(ck, sk);
+    let s = (xi & two_i).eq(zero_i).select(st, -st);
+    let c = ((xi + one_i) & two_i).eq(zero_i).select(ct, -ct);
+    (
+        in_range.select(s, out_of_range_sin),
+        in_range.select(c, out_of_range_cos),
+    )
+}
+
+/// computes `(sin(pi * x), cos(pi * x))`
+/// not guaranteed to give correct sign for zero results
+/// has an error of up to 2ULP
+pub fn sin_cos_pi_f16<Ctx: Context>(ctx: Ctx, x: Ctx::VecF16) -> (Ctx::VecF16, Ctx::VecF16) {
+    sin_cos_pi_impl(ctx, x, sin_pi_kernel_f16, cos_pi_kernel_f16)
+}
+
+/// computes `sin(pi * x)`
+/// not guaranteed to give correct sign for zero results
+/// has an error of up to 2ULP
+pub fn sin_pi_f16<Ctx: Context>(ctx: Ctx, x: Ctx::VecF16) -> Ctx::VecF16 {
+    sin_cos_pi_f16(ctx, x).0
+}
+
+/// computes `cos(pi * x)`
+/// not guaranteed to give correct sign for zero results
+/// has an error of up to 2ULP
+pub fn cos_pi_f16<Ctx: Context>(ctx: Ctx, x: Ctx::VecF16) -> Ctx::VecF16 {
+    sin_cos_pi_f16(ctx, x).1
+}
+
+/// computes `(sin(pi * x), cos(pi * x))`
+/// not guaranteed to give correct sign for zero results
+/// has an error of up to 2ULP
+pub fn sin_cos_pi_f32<Ctx: Context>(ctx: Ctx, x: Ctx::VecF32) -> (Ctx::VecF32, Ctx::VecF32) {
+    sin_cos_pi_impl(ctx, x, sin_pi_kernel_f32, cos_pi_kernel_f32)
+}
+
+/// computes `sin(pi * x)`
+/// not guaranteed to give correct sign for zero results
+/// has an error of up to 2ULP
+pub fn sin_pi_f32<Ctx: Context>(ctx: Ctx, x: Ctx::VecF32) -> Ctx::VecF32 {
+    sin_cos_pi_f32(ctx, x).0
+}
+
+/// computes `cos(pi * x)`
+/// not guaranteed to give correct sign for zero results
+/// has an error of up to 2ULP
+pub fn cos_pi_f32<Ctx: Context>(ctx: Ctx, x: Ctx::VecF32) -> Ctx::VecF32 {
+    sin_cos_pi_f32(ctx, x).1
+}
+
  /// computes `(sin(pi * x), cos(pi * x))`
  /// not guaranteed to give correct sign for zero results
  /// has an error of up to 2ULP
-pub fn sin_cos_pi_f16<Ctx: Context>(_ctx: Ctx, _x: Ctx::VecF16) -> (Ctx::VecF16, Ctx::VecF16) {
-    todo!()
+pub fn sin_cos_pi_f64<Ctx: Context>(ctx: Ctx, x: Ctx::VecF64) -> (Ctx::VecF64, Ctx::VecF64) {
+    sin_cos_pi_impl(ctx, x, sin_pi_kernel_f64, cos_pi_kernel_f64)
+}
+
+/// computes `sin(pi * x)`
+/// not guaranteed to give correct sign for zero results
+/// has an error of up to 2ULP
+pub fn sin_pi_f64<Ctx: Context>(ctx: Ctx, x: Ctx::VecF64) -> Ctx::VecF64 {
+    sin_cos_pi_f64(ctx, x).0
+}
+
+/// computes `cos(pi * x)`
+/// not guaranteed to give correct sign for zero results
+/// has an error of up to 2ULP
+pub fn cos_pi_f64<Ctx: Context>(ctx: Ctx, x: Ctx::VecF64) -> Ctx::VecF64 {
+    sin_cos_pi_f64(ctx, x).1
+}
+
+/// computes `tan(pi * x)`
+/// error inherited from `sin_pi / cos_pi`
+pub fn tan_pi_f16<Ctx: Context>(ctx: Ctx, x: Ctx::VecF16) -> Ctx::VecF16 {
+    let (sin, cos) = sin_cos_pi_f16(ctx, x);
+    sin / cos
+}
+
+/// computes `tan(pi * x)`
+/// error inherited from `sin_pi / cos_pi`
+pub fn tan_pi_f32<Ctx: Context>(ctx: Ctx, x: Ctx::VecF32) -> Ctx::VecF32 {
+    let (sin, cos) = sin_cos_pi_f32(ctx, x);
+    sin / cos
+}
+
+/// computes `tan(pi * x)`
+/// error inherited from `sin_pi / cos_pi`
+pub fn tan_pi_f64<Ctx: Context>(ctx: Ctx, x: Ctx::VecF64) -> Ctx::VecF64 {
+    let (sin, cos) = sin_cos_pi_f64(ctx, x);
+    sin / cos
  }
  
  #[cfg(test)]
  mod tests {
      use super::*;
-    use crate::{f16::F16, scalar::Scalar};
+    use crate::{
+        f16::F16,
+        scalar::{Scalar, Value},
+    };
      use std::f64;
  
      struct CheckUlpCallbackArg<F, I> {
          distance_in_ulp: I,
+        x: F,
          expected: F,
          result: F,
      }
  
      #[track_caller]
-    fn check_ulp_f16(
-        x: F16,
-        is_ok: impl Fn(CheckUlpCallbackArg<F16, i16>) -> bool,
-        fn_f16: impl Fn(F16) -> F16,
-        fn_f64: impl Fn(f64) -> f64,
+    fn check_ulp<T: PrimFloat>(
+        x: T,
+        is_ok: impl Fn(CheckUlpCallbackArg<T, u64>) -> bool,
+        fn_f16: impl Fn(T) -> T,
+        fn_reference: impl Fn(f64) -> f64,
      ) {
          let x_f64: f64 = x.to();
-        let expected_f64 = fn_f64(x_f64);
-        let expected: F16 = expected_f64.to();
+        let expected_f64 = fn_reference(x_f64);
+        let expected: T = expected_f64.to();
          let result = fn_f16(x);
          if result == expected {
              return;
          }
-        let distance_in_ulp = (expected.to_bits() as i16).wrapping_sub(result.to_bits() as i16);
-        if is_ok(CheckUlpCallbackArg {
-            distance_in_ulp,
-            expected,
-            result,
-        }) {
+        if result.is_nan() && expected.is_nan() {
+            return;
+        }
+        let expected_bits: i64 = expected.to_bits().to();
+        let result_bits: i64 = result.to_bits().to();
+        let distance_in_ulp = (expected_bits - result_bits).unsigned_abs();
+        if !result.is_nan()
+            && !expected.is_nan()
+            && is_ok(CheckUlpCallbackArg {
+                distance_in_ulp,
+                x,
+                expected,
+                result,
+            })
+        {
              return;
          }
          panic!(
              "error is too big: \
                  x = {x:?} {x_bits:#X}, \
                  result = {result:?} {result_bits:#X}, \
-                expected = {expected:?} {expected_bits:#X}",
+                expected = {expected:?} {expected_bits:#X}, \
+                distance_in_ulp = {distance_in_ulp}",
              x = x,
              x_bits = x.to_bits(),
              result = result,
              result_bits = result.to_bits(),
              expected = expected,
              expected_bits = expected.to_bits(),
+            distance_in_ulp = distance_in_ulp,
          );
      }
  
@@ -143,10 +340,10 @@ mod tests {
      )]
      fn test_sin_pi_kernel_f16() {
          let check = |x| {
-            check_ulp_f16(
+            check_ulp(
                  x,
                  |arg| arg.distance_in_ulp <= if arg.expected == 0.to() { 0 } else { 2 },
-                |x| sin_pi_kernel_f16(Scalar, x),
+                |x| sin_pi_kernel_f16(Scalar, Value(x)).0,
                  |x| (f64::consts::PI * x).sin(),
              )
          };
@@ -164,10 +361,10 @@ mod tests {
      )]
      fn test_cos_pi_kernel_f16() {
          let check = |x| {
-            check_ulp_f16(
+            check_ulp(
                  x,
                  |arg| arg.distance_in_ulp <= 2 && arg.result <= 1.to(),
-                |x| cos_pi_kernel_f16(Scalar, x),
+                |x| cos_pi_kernel_f16(Scalar, Value(x)).0,
                  |x| (f64::consts::PI * x).cos(),
              )
          };
@@ -177,4 +374,543 @@ mod tests {
              check(-F16::from_bits(bits));
          }
      }
+
+    #[test]
+    #[cfg(feature = "full_tests")]
+    fn test_sin_pi_kernel_f32() {
+        let check = |x| {
+            check_ulp(
+                x,
+                |arg| arg.distance_in_ulp <= if arg.expected == 0. { 0 } else { 2 },
+                |x| sin_pi_kernel_f32(Scalar, Value(x)).0,
+                |x| (f64::consts::PI * x).sin(),
+            )
+        };
+        let quarter = 0.25f32.to_bits();
+        for bits in (0..=quarter).rev() {
+            check(f32::from_bits(bits));
+            check(-f32::from_bits(bits));
+        }
+    }
+
+    #[test]
+    #[cfg(feature = "full_tests")]
+    fn test_cos_pi_kernel_f32() {
+        let check = |x| {
+            check_ulp(
+                x,
+                |arg| arg.distance_in_ulp <= 2 && arg.result <= 1.,
+                |x| cos_pi_kernel_f32(Scalar, Value(x)).0,
+                |x| (f64::consts::PI * x).cos(),
+            )
+        };
+        let quarter = 0.25f32.to_bits();
+        for bits in (0..=quarter).rev() {
+            check(f32::from_bits(bits));
+            check(-f32::from_bits(bits));
+        }
+    }
+
+    #[test]
+    #[cfg(feature = "full_tests")]
+    fn test_sin_pi_kernel_f64() {
+        let check = |x| {
+            check_ulp(
+                x,
+                sin_cos_pi_check_ulp_callback,
+                |x| sin_pi_kernel_f64(Scalar, Value(x)).0,
+                |x| reference_sin_cos_pi_f64(x).0,
+            )
+        };
+        let quarter = 0.25f32.to_bits();
+        for bits in (0..=quarter).rev().step_by(1 << 5) {
+            check(f32::from_bits(bits) as f64);
+            check(-f32::from_bits(bits) as f64);
+        }
+    }
+
+    #[test]
+    #[cfg(feature = "full_tests")]
+    fn test_cos_pi_kernel_f64() {
+        let check = |x| {
+            check_ulp(
+                x,
+                sin_cos_pi_check_ulp_callback,
+                |x| cos_pi_kernel_f64(Scalar, Value(x)).0,
+                |x| reference_sin_cos_pi_f64(x).1,
+            )
+        };
+        let quarter = 0.25f32.to_bits();
+        for bits in (0..=quarter).rev().step_by(1 << 5) {
+            check(f32::from_bits(bits) as f64);
+            check(-f32::from_bits(bits) as f64);
+        }
+    }
+
+    fn sin_cos_pi_check_ulp_callback<F: PrimFloat>(arg: CheckUlpCallbackArg<F, u64>) -> bool {
+        if arg.x % 0.5.to() == 0.0.to() {
+            arg.distance_in_ulp == 0
+        } else {
+            arg.distance_in_ulp <= 2 && arg.result.abs() <= 1.to()
+        }
+    }
+
+    #[test]
+    #[cfg_attr(
+        not(feature = "f16"),
+        should_panic(expected = "f16 feature is not enabled")
+    )]
+    fn test_sin_pi_f16() {
+        for bits in 0..=u16::MAX {
+            check_ulp(
+                F16::from_bits(bits),
+                sin_cos_pi_check_ulp_callback,
+                |x| sin_pi_f16(Scalar, Value(x)).0,
+                |x| (f64::consts::PI * x).sin(),
+            );
+        }
+    }
+
+    #[test]
+    #[cfg_attr(
+        not(feature = "f16"),
+        should_panic(expected = "f16 feature is not enabled")
+    )]
+    fn test_cos_pi_f16() {
+        for bits in 0..=u16::MAX {
+            check_ulp(
+                F16::from_bits(bits),
+                sin_cos_pi_check_ulp_callback,
+                |x| cos_pi_f16(Scalar, Value(x)).0,
+                |x| (f64::consts::PI * x).cos(),
+            );
+        }
+    }
+
+    fn reference_sin_cos_pi_f32(mut v: f64) -> (f64, f64) {
+        if !v.is_finite() {
+            return (f64::NAN, f64::NAN);
+        }
+        v %= 2.0;
+        if v >= 1.0 {
+            v -= 2.0;
+        } else if v <= -1.0 {
+            v += 2.0;
+        }
+        v *= 2.0;
+        let part = v.round() as i32;
+        v -= part as f64;
+        v *= f64::consts::PI / 2.0;
+        let (sin, cos) = v.sin_cos();
+        match part {
+            0 => (sin, cos),
+            1 => (cos, -sin),
+            2 => (-sin, -cos),
+            -2 => (-sin, -cos),
+            -1 => (-cos, sin),
+            _ => panic!("not implemented: part={}", part),
+        }
+    }
+
+    fn reference_sin_cos_pi_f64(mut v: f64) -> (f64, f64) {
+        use az::Cast;
+        use rug::{float::Constant, Float};
+        if !v.is_finite() {
+            return (f64::NAN, f64::NAN);
+        }
+        v %= 2.0;
+        if v >= 1.0 {
+            v -= 2.0;
+        } else if v <= -1.0 {
+            v += 2.0;
+        }
+        v *= 2.0;
+        let part = v.round() as i32;
+        v -= part as f64;
+        let precision = 100;
+        let mut v = Float::with_val(precision, v);
+        let pi = Float::with_val(precision, Constant::Pi);
+        let pi_2 = pi / 2;
+        v *= &pi_2;
+        let cos = pi_2; // just a temp var, value is ignored
+        let (sin, cos) = v.sin_cos(cos);
+        let sin: f64 = sin.cast();
+        let cos: f64 = cos.cast();
+        match part {
+            0 => (sin, cos),
+            1 => (cos, -sin),
+            2 => (-sin, -cos),
+            -2 => (-sin, -cos),
+            -1 => (-cos, sin),
+            _ => panic!("not implemented: part={}", part),
+        }
+    }
+
+    macro_rules! test_reference_sin_cos_pi_test_cases {
+        ($case:expr, $ty:ident) => {
+            $case($ty::NAN, $ty::NAN, $ty::NAN);
+            $case($ty::INFINITY, $ty::NAN, $ty::NAN);
+            $case(-$ty::INFINITY, $ty::NAN, $ty::NAN);
+            $case(-4., 0., 1.);
+            $case(
+                -3.875,
+                0.38268343236508977172845998403039886676134456248563,
+                0.92387953251128675612818318939678828682241662586364,
+            );
+            $case(
+                -3.75,
+                0.70710678118654752440084436210484903928483593768847,
+                0.70710678118654752440084436210484903928483593768847,
+            );
+            $case(
+                -3.625,
+                0.92387953251128675612818318939678828682241662586364,
+                0.38268343236508977172845998403039886676134456248563,
+            );
+            $case(-3.5, 1., -0.);
+            $case(
+                -3.375,
+                0.92387953251128675612818318939678828682241662586364,
+                -0.38268343236508977172845998403039886676134456248563,
+            );
+            $case(
+                -3.25,
+                0.70710678118654752440084436210484903928483593768847,
+                -0.70710678118654752440084436210484903928483593768847,
+            );
+            $case(
+                -3.125,
+                0.38268343236508977172845998403039886676134456248563,
+                -0.92387953251128675612818318939678828682241662586364,
+            );
+            $case(-3., -0., -1.);
+            $case(
+                -2.875,
+                -0.38268343236508977172845998403039886676134456248563,
+                -0.92387953251128675612818318939678828682241662586364,
+            );
+            $case(
+                -2.75,
+                -0.70710678118654752440084436210484903928483593768847,
+                -0.70710678118654752440084436210484903928483593768847,
+            );
+            $case(
+                -2.625,
+                -0.92387953251128675612818318939678828682241662586364,
+                -0.38268343236508977172845998403039886676134456248563,
+            );
+            $case(-2.5, -1., 0.);
+            $case(
+                -2.375,
+                -0.92387953251128675612818318939678828682241662586364,
+                0.38268343236508977172845998403039886676134456248563,
+            );
+            $case(
+                -2.25,
+                -0.70710678118654752440084436210484903928483593768847,
+                0.70710678118654752440084436210484903928483593768847,
+            );
+            $case(
+                -2.125,
+                -0.38268343236508977172845998403039886676134456248563,
+                0.92387953251128675612818318939678828682241662586364,
+            );
+            $case(-2., 0., 1.);
+            $case(
+                -1.875,
+                0.38268343236508977172845998403039886676134456248563,
+                0.92387953251128675612818318939678828682241662586364,
+            );
+            $case(
+                -1.75,
+                0.70710678118654752440084436210484903928483593768847,
+                0.70710678118654752440084436210484903928483593768847,
+            );
+            $case(
+                -1.625,
+                0.92387953251128675612818318939678828682241662586364,
+                0.38268343236508977172845998403039886676134456248563,
+            );
+            $case(-1.5, 1., -0.);
+            $case(
+                -1.375,
+                0.92387953251128675612818318939678828682241662586364,
+                -0.38268343236508977172845998403039886676134456248563,
+            );
+            $case(
+                -1.25,
+                0.70710678118654752440084436210484903928483593768847,
+                -0.70710678118654752440084436210484903928483593768847,
+            );
+            $case(
+                -1.125,
+                0.38268343236508977172845998403039886676134456248563,
+                -0.92387953251128675612818318939678828682241662586364,
+            );
+            $case(-1., -0., -1.);
+            $case(
+                -0.875,
+                -0.38268343236508977172845998403039886676134456248563,
+                -0.92387953251128675612818318939678828682241662586364,
+            );
+            $case(
+                -0.75,
+                -0.70710678118654752440084436210484903928483593768847,
+                -0.70710678118654752440084436210484903928483593768847,
+            );
+            $case(
+                -0.625,
+                -0.92387953251128675612818318939678828682241662586364,
+                -0.38268343236508977172845998403039886676134456248563,
+            );
+            $case(-0.5, -1., 0.);
+            $case(
+                -0.375,
+                -0.92387953251128675612818318939678828682241662586364,
+                0.38268343236508977172845998403039886676134456248563,
+            );
+            $case(
+                -0.25,
+                -0.70710678118654752440084436210484903928483593768847,
+                0.70710678118654752440084436210484903928483593768847,
+            );
+            $case(
+                -0.125,
+                -0.38268343236508977172845998403039886676134456248563,
+                0.92387953251128675612818318939678828682241662586364,
+            );
+            $case(0., 0., 1.);
+            $case(
+                0.125,
+                0.38268343236508977172845998403039886676134456248563,
+                0.92387953251128675612818318939678828682241662586364,
+            );
+            $case(
+                0.25,
+                0.70710678118654752440084436210484903928483593768847,
+                0.70710678118654752440084436210484903928483593768847,
+            );
+            $case(
+                0.375,
+                0.92387953251128675612818318939678828682241662586364,
+                0.38268343236508977172845998403039886676134456248563,
+            );
+            $case(0.5, 1., 0.);
+            $case(
+                0.625,
+                0.92387953251128675612818318939678828682241662586364,
+                -0.38268343236508977172845998403039886676134456248563,
+            );
+            $case(
+                0.75,
+                0.70710678118654752440084436210484903928483593768847,
+                -0.70710678118654752440084436210484903928483593768847,
+            );
+            $case(
+                0.875,
+                0.38268343236508977172845998403039886676134456248563,
+                -0.92387953251128675612818318939678828682241662586364,
+            );
+            $case(1., 0., -1.);
+            $case(
+                1.125,
+                -0.38268343236508977172845998403039886676134456248563,
+                -0.92387953251128675612818318939678828682241662586364,
+            );
+            $case(
+                1.25,
+                -0.70710678118654752440084436210484903928483593768847,
+                -0.70710678118654752440084436210484903928483593768847,
+            );
+            $case(
+                1.375,
+                -0.92387953251128675612818318939678828682241662586364,
+                -0.38268343236508977172845998403039886676134456248563,
+            );
+            $case(1.5, -1., -0.);
+            $case(
+                1.625,
+                -0.92387953251128675612818318939678828682241662586364,
+                0.38268343236508977172845998403039886676134456248563,
+            );
+            $case(
+                1.75,
+                -0.70710678118654752440084436210484903928483593768847,
+                0.70710678118654752440084436210484903928483593768847,
+            );
+            $case(
+                1.875,
+                -0.38268343236508977172845998403039886676134456248563,
+                0.92387953251128675612818318939678828682241662586364,
+            );
+            $case(2., -0., 1.);
+            $case(
+                2.125,
+                0.38268343236508977172845998403039886676134456248563,
+                0.92387953251128675612818318939678828682241662586364,
+            );
+            $case(
+                2.25,
+                0.70710678118654752440084436210484903928483593768847,
+                0.70710678118654752440084436210484903928483593768847,
+            );
+            $case(
+                2.375,
+                0.92387953251128675612818318939678828682241662586364,
+                0.38268343236508977172845998403039886676134456248563,
+            );
+            $case(2.5, 1., 0.);
+            $case(
+                2.625,
+                0.92387953251128675612818318939678828682241662586364,
+                -0.38268343236508977172845998403039886676134456248563,
+            );
+            $case(
+                2.75,
+                0.70710678118654752440084436210484903928483593768847,
+                -0.70710678118654752440084436210484903928483593768847,
+            );
+            $case(
+                2.875,
+                0.38268343236508977172845998403039886676134456248563,
+                -0.92387953251128675612818318939678828682241662586364,
+            );
+            $case(3., 0., -1.);
+            $case(
+                3.125,
+                -0.38268343236508977172845998403039886676134456248563,
+                -0.92387953251128675612818318939678828682241662586364,
+            );
+            $case(
+                3.25,
+                -0.70710678118654752440084436210484903928483593768847,
+                -0.70710678118654752440084436210484903928483593768847,
+            );
+            $case(
+                3.375,
+                -0.92387953251128675612818318939678828682241662586364,
+                -0.38268343236508977172845998403039886676134456248563,
+            );
+            $case(3.5, -1., -0.);
+            $case(
+                3.625,
+                -0.92387953251128675612818318939678828682241662586364,
+                0.38268343236508977172845998403039886676134456248563,
+            );
+            $case(
+                3.75,
+                -0.70710678118654752440084436210484903928483593768847,
+                0.70710678118654752440084436210484903928483593768847,
+            );
+            $case(
+                3.875,
+                -0.38268343236508977172845998403039886676134456248563,
+                0.92387953251128675612818318939678828682241662586364,
+            );
+            $case(4., -0., 1.);
+        };
+    }
+
+    #[test]
+    fn test_reference_sin_cos_pi_f32() {
+        fn approx_same(a: f32, b: f32) -> bool {
+            if a.is_finite() && b.is_finite() {
+                (a - b).abs() < 1e-6
+            } else {
+                a == b || (a.is_nan() && b.is_nan())
+            }
+        }
+        #[track_caller]
+        fn case(x: f32, expected_sin: f32, expected_cos: f32) {
+            let (ref_sin, ref_cos) = reference_sin_cos_pi_f32(x as f64);
+            assert!(
+                approx_same(ref_sin as f32, expected_sin)
+                    && approx_same(ref_cos as f32, expected_cos),
+                "case failed: x={x}, expected_sin={expected_sin}, expected_cos={expected_cos}, ref_sin={ref_sin}, ref_cos={ref_cos}",
+                x=x,
+                expected_sin=expected_sin,
+                expected_cos=expected_cos,
+                ref_sin=ref_sin,
+                ref_cos=ref_cos,
+            );
+        }
+        test_reference_sin_cos_pi_test_cases!(case, f32);
+    }
+
+    #[test]
+    fn test_reference_sin_cos_pi_f64() {
+        fn same(a: f64, b: f64) -> bool {
+            if a.is_finite() && b.is_finite() {
+                a == b
+            } else {
+                a == b || (a.is_nan() && b.is_nan())
+            }
+        }
+        #[track_caller]
+        fn case(x: f64, expected_sin: f64, expected_cos: f64) {
+            let (ref_sin, ref_cos) = reference_sin_cos_pi_f64(x);
+            assert!(
+                same(ref_sin, expected_sin) && same(ref_cos, expected_cos),
+                "case failed: x={x}, expected_sin={expected_sin}, expected_cos={expected_cos}, ref_sin={ref_sin}, ref_cos={ref_cos}",
+                x=x,
+                expected_sin=expected_sin,
+                expected_cos=expected_cos,
+                ref_sin=ref_sin,
+                ref_cos=ref_cos,
+            );
+        }
+        test_reference_sin_cos_pi_test_cases!(case, f64);
+    }
+
+    #[test]
+    #[cfg(feature = "full_tests")]
+    fn test_sin_pi_f32() {
+        for bits in 0..=u32::MAX {
+            check_ulp(
+                f32::from_bits(bits),
+                sin_cos_pi_check_ulp_callback,
+                |x| sin_pi_f32(Scalar, Value(x)).0,
+                |x| reference_sin_cos_pi_f32(x).0,
+            );
+        }
+    }
+
+    #[test]
+    #[cfg(feature = "full_tests")]
+    fn test_cos_pi_f32() {
+        for bits in 0..=u32::MAX {
+            check_ulp(
+                f32::from_bits(bits),
+                sin_cos_pi_check_ulp_callback,
+                |x| cos_pi_f32(Scalar, Value(x)).0,
+                |x| reference_sin_cos_pi_f32(x).1,
+            );
+        }
+    }
+
+    #[test]
+    #[cfg(feature = "full_tests")]
+    fn test_sin_pi_f64() {
+        for bits in (0..=u32::MAX).step_by(1 << 7) {
+            check_ulp(
+                f32::from_bits(bits) as f64,
+                sin_cos_pi_check_ulp_callback,
+                |x| sin_pi_f64(Scalar, Value(x)).0,
+                |x| reference_sin_cos_pi_f64(x).0,
+            );
+        }
+    }
+
+    #[test]
+    #[cfg(feature = "full_tests")]
+    fn test_cos_pi_f64() {
+        for bits in (0..=u32::MAX).step_by(1 << 7) {
+            check_ulp(
+                f32::from_bits(bits) as f64,
+                sin_cos_pi_check_ulp_callback,
+                |x| cos_pi_f64(Scalar, Value(x)).0,
+                |x| reference_sin_cos_pi_f64(x).1,
+            )
+        }
+    }
  }