src/algorithms/trig_pi.rs

   1 use crate::{
   2     prim::{PrimFloat, PrimUInt},
   3     traits::{Compare, Context, ConvertFrom, ConvertTo, Float, Make, Select},
   4 };
   5
   6 mod consts {
   7     #![allow(clippy::excessive_precision)]
   8     #![allow(dead_code)]
   9
  10     /// coefficients of taylor series for `sin(pi * x)` centered at `0`
  11     /// generated using:
  12     /// ```maxima,text
  13     /// fpprec:50$
  14     /// sinpi: bfloat(taylor(sin(%pi*x),x,0,19))$
  15     /// for i: 1 step 2 thru 19 do
  16     ///     printf(true, "pub(crate) const SINPI_KERNEL_TAYLOR_~d: f64 = ~a;~%", i, ssubst("e", "b", string(coeff(sinpi, x, i))))$
  17     /// ```
  18     pub(crate) const SINPI_KERNEL_TAYLOR_1: f64 =
  19         3.1415926535897932384626433832795028841971693993751e0;
  20     pub(crate) const SINPI_KERNEL_TAYLOR_3: f64 =
  21         -5.1677127800499700292460525111835658670375480943142e0;
  22     pub(crate) const SINPI_KERNEL_TAYLOR_5: f64 =
  23         2.550164039877345443856177583695296720669172555234e0;
  24     pub(crate) const SINPI_KERNEL_TAYLOR_7: f64 =
  25         -5.9926452932079207688773938354604004601536358636814e-1;
  26     pub(crate) const SINPI_KERNEL_TAYLOR_9: f64 =
  27         8.2145886611128228798802365523698344807837460797753e-2;
  28     pub(crate) const SINPI_KERNEL_TAYLOR_11: f64 =
  29         -7.370430945714350777259089957290781501211638236021e-3;
  30     pub(crate) const SINPI_KERNEL_TAYLOR_13: f64 =
  31         4.6630280576761256442062891447027174382819981361599e-4;
  32     pub(crate) const SINPI_KERNEL_TAYLOR_15: f64 =
  33         -2.1915353447830215827384652057094188859248708765956e-5;
  34     pub(crate) const SINPI_KERNEL_TAYLOR_17: f64 =
  35         7.9520540014755127847832068624575890327682459384282e-7;
  36     pub(crate) const SINPI_KERNEL_TAYLOR_19: f64 =
  37         -2.2948428997269873110203872385571587856074785581088e-8;
  38
  39     /// coefficients of taylor series for `cos(pi * x)` centered at `0`
  40     /// generated using:
  41     /// ```maxima,text
  42     /// fpprec:50$
  43     /// cospi: bfloat(taylor(cos(%pi*x),x,0,18))$
  44     /// for i: 0 step 2 thru 18 do
  45     ///     printf(true, "pub(crate) const COSPI_KERNEL_TAYLOR_~d: f64 = ~a;~%", i, ssubst("e", "b", string(coeff(cospi, x, i))))$
  46     /// ```
  47     pub(crate) const COSPI_KERNEL_TAYLOR_0: f64 = 1.0e0;
  48     pub(crate) const COSPI_KERNEL_TAYLOR_2: f64 =
  49         -4.9348022005446793094172454999380755676568497036204e0;
  50     pub(crate) const COSPI_KERNEL_TAYLOR_4: f64 =
  51         4.0587121264167682181850138620293796354053160696952e0;
  52     pub(crate) const COSPI_KERNEL_TAYLOR_6: f64 =
  53         -1.3352627688545894958753047828505831928711354556681e0;
  54     pub(crate) const COSPI_KERNEL_TAYLOR_8: f64 =
  55         2.3533063035889320454187935277546542154506893530856e-1;
  56     pub(crate) const COSPI_KERNEL_TAYLOR_10: f64 =
  57         -2.5806891390014060012598294252898849657186441048147e-2;
  58     pub(crate) const COSPI_KERNEL_TAYLOR_12: f64 =
  59         1.9295743094039230479033455636859576401684718150003e-3;
  60     pub(crate) const COSPI_KERNEL_TAYLOR_14: f64 =
  61         -1.0463810492484570711801672835223932761029733149091e-4;
  62     pub(crate) const COSPI_KERNEL_TAYLOR_16: f64 =
  63         4.3030695870329470072978237149669233008960901556009e-6;
  64     pub(crate) const COSPI_KERNEL_TAYLOR_18: f64 =
  65         -1.387895246221377211446808750399309343777037849978e-7;
  66 }
  67
  68 /// computes `sin(pi * x)` for `-0.25 <= x <= 0.25`
  69 /// not guaranteed to give correct sign for zero result
  70 /// has an error of up to 2ULP
  71 pub fn sin_pi_kernel_f16<Ctx: Context>(ctx: Ctx, x: Ctx::VecF16) -> Ctx::VecF16 {
  72     let x_sq = x * x;
  73     let mut v: Ctx::VecF16 = ctx.make(consts::SINPI_KERNEL_TAYLOR_5.to());
  74     v = v.mul_add_fast(x_sq, ctx.make(consts::SINPI_KERNEL_TAYLOR_3.to()));
  75     v = v.mul_add_fast(x_sq, ctx.make(consts::SINPI_KERNEL_TAYLOR_1.to()));
  76     v * x
  77 }
  78
  79 /// computes `cos(pi * x)` for `-0.25 <= x <= 0.25`
  80 /// has an error of up to 2ULP
  81 pub fn cos_pi_kernel_f16<Ctx: Context>(ctx: Ctx, x: Ctx::VecF16) -> Ctx::VecF16 {
  82     let x_sq = x * x;
  83     let mut v: Ctx::VecF16 = ctx.make(consts::COSPI_KERNEL_TAYLOR_4.to());
  84     v = v.mul_add_fast(x_sq, ctx.make(consts::COSPI_KERNEL_TAYLOR_2.to()));
  85     v.mul_add_fast(x_sq, ctx.make(consts::COSPI_KERNEL_TAYLOR_0.to()))
  86 }
  87
  88 /// computes `sin(pi * x)` for `-0.25 <= x <= 0.25`
  89 /// not guaranteed to give correct sign for zero result
  90 /// has an error of up to 2ULP
  91 pub fn sin_pi_kernel_f32<Ctx: Context>(ctx: Ctx, x: Ctx::VecF32) -> Ctx::VecF32 {
  92     let x_sq = x * x;
  93     let mut v: Ctx::VecF32 = ctx.make(consts::SINPI_KERNEL_TAYLOR_9.to());
  94     v = v.mul_add_fast(x_sq, ctx.make(consts::SINPI_KERNEL_TAYLOR_7.to()));
  95     v = v.mul_add_fast(x_sq, ctx.make(consts::SINPI_KERNEL_TAYLOR_5.to()));
  96     v = v.mul_add_fast(x_sq, ctx.make(consts::SINPI_KERNEL_TAYLOR_3.to()));
  97     v = v.mul_add_fast(x_sq, ctx.make(consts::SINPI_KERNEL_TAYLOR_1.to()));
  98     v * x
  99 }
 100
 101 /// computes `cos(pi * x)` for `-0.25 <= x <= 0.25`
 102 /// has an error of up to 2ULP
 103 pub fn cos_pi_kernel_f32<Ctx: Context>(ctx: Ctx, x: Ctx::VecF32) -> Ctx::VecF32 {
 104     let x_sq = x * x;
 105     let mut v: Ctx::VecF32 = ctx.make(consts::COSPI_KERNEL_TAYLOR_8.to());
 106     v = v.mul_add_fast(x_sq, ctx.make(consts::COSPI_KERNEL_TAYLOR_6.to()));
 107     v = v.mul_add_fast(x_sq, ctx.make(consts::COSPI_KERNEL_TAYLOR_4.to()));
 108     v = v.mul_add_fast(x_sq, ctx.make(consts::COSPI_KERNEL_TAYLOR_2.to()));
 109     v.mul_add_fast(x_sq, ctx.make(consts::COSPI_KERNEL_TAYLOR_0.to()))
 110 }
 111
 112 /// computes `sin(pi * x)` for `-0.25 <= x <= 0.25`
 113 /// not guaranteed to give correct sign for zero result
 114 /// has an error of up to 2ULP
 115 pub fn sin_pi_kernel_f64<Ctx: Context>(ctx: Ctx, x: Ctx::VecF64) -> Ctx::VecF64 {
 116     let x_sq = x * x;
 117     let mut v: Ctx::VecF64 = ctx.make(consts::SINPI_KERNEL_TAYLOR_15.to());
 118     v = v.mul_add_fast(x_sq, ctx.make(consts::SINPI_KERNEL_TAYLOR_13.to()));
 119     v = v.mul_add_fast(x_sq, ctx.make(consts::SINPI_KERNEL_TAYLOR_11.to()));
 120     v = v.mul_add_fast(x_sq, ctx.make(consts::SINPI_KERNEL_TAYLOR_9.to()));
 121     v = v.mul_add_fast(x_sq, ctx.make(consts::SINPI_KERNEL_TAYLOR_7.to()));
 122     v = v.mul_add_fast(x_sq, ctx.make(consts::SINPI_KERNEL_TAYLOR_5.to()));
 123     v = v.mul_add_fast(x_sq, ctx.make(consts::SINPI_KERNEL_TAYLOR_3.to()));
 124     v = v.mul_add_fast(x_sq, ctx.make(consts::SINPI_KERNEL_TAYLOR_1.to()));
 125     v * x
 126 }
 127
 128 /// computes `cos(pi * x)` for `-0.25 <= x <= 0.25`
 129 /// has an error of up to 2ULP
 130 pub fn cos_pi_kernel_f64<Ctx: Context>(ctx: Ctx, x: Ctx::VecF64) -> Ctx::VecF64 {
 131     let x_sq = x * x;
 132     let mut v: Ctx::VecF64 = ctx.make(consts::COSPI_KERNEL_TAYLOR_16.to());
 133     v = v.mul_add_fast(x_sq, ctx.make(consts::COSPI_KERNEL_TAYLOR_14.to()));
 134     v = v.mul_add_fast(x_sq, ctx.make(consts::COSPI_KERNEL_TAYLOR_12.to()));
 135     v = v.mul_add_fast(x_sq, ctx.make(consts::COSPI_KERNEL_TAYLOR_10.to()));
 136     v = v.mul_add_fast(x_sq, ctx.make(consts::COSPI_KERNEL_TAYLOR_8.to()));
 137     v = v.mul_add_fast(x_sq, ctx.make(consts::COSPI_KERNEL_TAYLOR_6.to()));
 138     v = v.mul_add_fast(x_sq, ctx.make(consts::COSPI_KERNEL_TAYLOR_4.to()));
 139     v = v.mul_add_fast(x_sq, ctx.make(consts::COSPI_KERNEL_TAYLOR_2.to()));
 140     v.mul_add_fast(x_sq, ctx.make(consts::COSPI_KERNEL_TAYLOR_0.to()))
 141 }
 142
 143 /// computes `(sin(pi * x), cos(pi * x))`
 144 /// not guaranteed to give correct sign for zero results
 145 /// inherits error from `sin_pi_kernel` and `cos_pi_kernel`
 146 pub fn sin_cos_pi_impl<
 147     Ctx: Context,
 148     VecF: Float<PrimFloat = PrimF> + Make<Context = Ctx>,
 149     PrimF: PrimFloat<BitsType = PrimU>,
 150     PrimU: PrimUInt,
 151     SinPiKernel: FnOnce(Ctx, VecF) -> VecF,
 152     CosPiKernel: FnOnce(Ctx, VecF) -> VecF,
 153 >(
 154     ctx: Ctx,
 155     x: VecF,
 156     sin_pi_kernel: SinPiKernel,
 157     cos_pi_kernel: CosPiKernel,
 158 ) -> (VecF, VecF) {
 159     let two_f: VecF = ctx.make(2.0.to());
 160     let one_half: VecF = ctx.make(0.5.to());
 161     let max_contiguous_integer: VecF = ctx.make(PrimF::max_contiguous_integer());
 162     // if `x` is finite and bigger than `max_contiguous_integer`, then x is an even integer
 163     let in_range = x.abs().lt(max_contiguous_integer); // use `lt` so nans are counted as out-of-range
 164     let is_finite = x.is_finite();
 165     let nan: VecF = ctx.make(f32::NAN.to());
 166     let zero_f: VecF = ctx.make(0.to());
 167     let one_f: VecF = ctx.make(1.to());
 168     let zero_i: VecF::SignedBitsType = ctx.make(0.to());
 169     let one_i: VecF::SignedBitsType = ctx.make(1.to());
 170     let two_i: VecF::SignedBitsType = ctx.make(2.to());
 171     let out_of_range_sin = is_finite.select(zero_f, nan);
 172     let out_of_range_cos = is_finite.select(one_f, nan);
 173     let xi = (x * two_f).round();
 174     let xk = x - xi * one_half;
 175     let sk = sin_pi_kernel(ctx, xk);
 176     let ck = cos_pi_kernel(ctx, xk);
 177     let xi = VecF::SignedBitsType::cvt_from(xi);
 178     let bit_0_clear = (xi & one_i).eq(zero_i);
 179     let st = bit_0_clear.select(sk, ck);
 180     let ct = bit_0_clear.select(ck, sk);
 181     let s = (xi & two_i).eq(zero_i).select(st, -st);
 182     let c = ((xi + one_i) & two_i).eq(zero_i).select(ct, -ct);
 183     (
 184         in_range.select(s, out_of_range_sin),
 185         in_range.select(c, out_of_range_cos),
 186     )
 187 }
 188
 189 /// computes `(sin(pi * x), cos(pi * x))`
 190 /// not guaranteed to give correct sign for zero results
 191 /// has an error of up to 2ULP
 192 pub fn sin_cos_pi_f16<Ctx: Context>(ctx: Ctx, x: Ctx::VecF16) -> (Ctx::VecF16, Ctx::VecF16) {
 193     sin_cos_pi_impl(ctx, x, sin_pi_kernel_f16, cos_pi_kernel_f16)
 194 }
 195
 196 /// computes `sin(pi * x)`
 197 /// not guaranteed to give correct sign for zero results
 198 /// has an error of up to 2ULP
 199 pub fn sin_pi_f16<Ctx: Context>(ctx: Ctx, x: Ctx::VecF16) -> Ctx::VecF16 {
 200     sin_cos_pi_f16(ctx, x).0
 201 }
 202
 203 /// computes `cos(pi * x)`
 204 /// not guaranteed to give correct sign for zero results
 205 /// has an error of up to 2ULP
 206 pub fn cos_pi_f16<Ctx: Context>(ctx: Ctx, x: Ctx::VecF16) -> Ctx::VecF16 {
 207     sin_cos_pi_f16(ctx, x).1
 208 }
 209
 210 /// computes `(sin(pi * x), cos(pi * x))`
 211 /// not guaranteed to give correct sign for zero results
 212 /// has an error of up to 2ULP
 213 pub fn sin_cos_pi_f32<Ctx: Context>(ctx: Ctx, x: Ctx::VecF32) -> (Ctx::VecF32, Ctx::VecF32) {
 214     sin_cos_pi_impl(ctx, x, sin_pi_kernel_f32, cos_pi_kernel_f32)
 215 }
 216
 217 /// computes `sin(pi * x)`
 218 /// not guaranteed to give correct sign for zero results
 219 /// has an error of up to 2ULP
 220 pub fn sin_pi_f32<Ctx: Context>(ctx: Ctx, x: Ctx::VecF32) -> Ctx::VecF32 {
 221     sin_cos_pi_f32(ctx, x).0
 222 }
 223
 224 /// computes `cos(pi * x)`
 225 /// not guaranteed to give correct sign for zero results
 226 /// has an error of up to 2ULP
 227 pub fn cos_pi_f32<Ctx: Context>(ctx: Ctx, x: Ctx::VecF32) -> Ctx::VecF32 {
 228     sin_cos_pi_f32(ctx, x).1
 229 }
 230
 231 /// computes `(sin(pi * x), cos(pi * x))`
 232 /// not guaranteed to give correct sign for zero results
 233 /// has an error of up to 2ULP
 234 pub fn sin_cos_pi_f64<Ctx: Context>(ctx: Ctx, x: Ctx::VecF64) -> (Ctx::VecF64, Ctx::VecF64) {
 235     sin_cos_pi_impl(ctx, x, sin_pi_kernel_f64, cos_pi_kernel_f64)
 236 }
 237
 238 /// computes `sin(pi * x)`
 239 /// not guaranteed to give correct sign for zero results
 240 /// has an error of up to 2ULP
 241 pub fn sin_pi_f64<Ctx: Context>(ctx: Ctx, x: Ctx::VecF64) -> Ctx::VecF64 {
 242     sin_cos_pi_f64(ctx, x).0
 243 }
 244
 245 /// computes `cos(pi * x)`
 246 /// not guaranteed to give correct sign for zero results
 247 /// has an error of up to 2ULP
 248 pub fn cos_pi_f64<Ctx: Context>(ctx: Ctx, x: Ctx::VecF64) -> Ctx::VecF64 {
 249     sin_cos_pi_f64(ctx, x).1
 250 }
 251
 252 #[cfg(test)]
 253 mod tests {
 254     use super::*;
 255     use crate::{
 256         f16::F16,
 257         scalar::{Scalar, Value},
 258     };
 259     use std::f64;
 260
 261     struct CheckUlpCallbackArg<F, I> {
 262         distance_in_ulp: I,
 263         x: F,
 264         expected: F,
 265         result: F,
 266     }
 267
 268     #[track_caller]
 269     fn check_ulp<T: PrimFloat>(
 270         x: T,
 271         is_ok: impl Fn(CheckUlpCallbackArg<T, u64>) -> bool,
 272         fn_f16: impl Fn(T) -> T,
 273         fn_reference: impl Fn(f64) -> f64,
 274     ) {
 275         let x_f64: f64 = x.to();
 276         let expected_f64 = fn_reference(x_f64);
 277         let expected: T = expected_f64.to();
 278         let result = fn_f16(x);
 279         if result == expected {
 280             return;
 281         }
 282         if result.is_nan() && expected.is_nan() {
 283             return;
 284         }
 285         let expected_bits: i64 = expected.to_bits().to();
 286         let result_bits: i64 = result.to_bits().to();
 287         let distance_in_ulp = (expected_bits - result_bits).unsigned_abs();
 288         if !result.is_nan()
 289             && !expected.is_nan()
 290             && is_ok(CheckUlpCallbackArg {
 291                 distance_in_ulp,
 292                 x,
 293                 expected,
 294                 result,
 295             })
 296         {
 297             return;
 298         }
 299         panic!(
 300             "error is too big: \
 301                 x = {x:?} {x_bits:#X}, \
 302                 result = {result:?} {result_bits:#X}, \
 303                 expected = {expected:?} {expected_bits:#X}, \
 304                 distance_in_ulp = {distance_in_ulp}",
 305             x = x,
 306             x_bits = x.to_bits(),
 307             result = result,
 308             result_bits = result.to_bits(),
 309             expected = expected,
 310             expected_bits = expected.to_bits(),
 311             distance_in_ulp = distance_in_ulp,
 312         );
 313     }
 314
 315     #[test]
 316     #[cfg_attr(
 317         not(feature = "f16"),
 318         should_panic(expected = "f16 feature is not enabled")
 319     )]
 320     fn test_sin_pi_kernel_f16() {
 321         let check = |x| {
 322             check_ulp(
 323                 x,
 324                 |arg| arg.distance_in_ulp <= if arg.expected == 0.to() { 0 } else { 2 },
 325                 |x| sin_pi_kernel_f16(Scalar, Value(x)).0,
 326                 |x| (f64::consts::PI * x).sin(),
 327             )
 328         };
 329         let quarter = F16::to_bits(0.25f32.to());
 330         for bits in (0..=quarter).rev() {
 331             check(F16::from_bits(bits));
 332             check(-F16::from_bits(bits));
 333         }
 334     }
 335
 336     #[test]
 337     #[cfg_attr(
 338         not(feature = "f16"),
 339         should_panic(expected = "f16 feature is not enabled")
 340     )]
 341     fn test_cos_pi_kernel_f16() {
 342         let check = |x| {
 343             check_ulp(
 344                 x,
 345                 |arg| arg.distance_in_ulp <= 2 && arg.result <= 1.to(),
 346                 |x| cos_pi_kernel_f16(Scalar, Value(x)).0,
 347                 |x| (f64::consts::PI * x).cos(),
 348             )
 349         };
 350         let quarter = F16::to_bits(0.25f32.to());
 351         for bits in (0..=quarter).rev() {
 352             check(F16::from_bits(bits));
 353             check(-F16::from_bits(bits));
 354         }
 355     }
 356
 357     #[test]
 358     #[cfg(feature = "full_tests")]
 359     fn test_sin_pi_kernel_f32() {
 360         let check = |x| {
 361             check_ulp(
 362                 x,
 363                 |arg| arg.distance_in_ulp <= if arg.expected == 0. { 0 } else { 2 },
 364                 |x| sin_pi_kernel_f32(Scalar, Value(x)).0,
 365                 |x| (f64::consts::PI * x).sin(),
 366             )
 367         };
 368         let quarter = 0.25f32.to_bits();
 369         for bits in (0..=quarter).rev() {
 370             check(f32::from_bits(bits));
 371             check(-f32::from_bits(bits));
 372         }
 373     }
 374
 375     #[test]
 376     #[cfg(feature = "full_tests")]
 377     fn test_cos_pi_kernel_f32() {
 378         let check = |x| {
 379             check_ulp(
 380                 x,
 381                 |arg| arg.distance_in_ulp <= 2 && arg.result <= 1.,
 382                 |x| cos_pi_kernel_f32(Scalar, Value(x)).0,
 383                 |x| (f64::consts::PI * x).cos(),
 384             )
 385         };
 386         let quarter = 0.25f32.to_bits();
 387         for bits in (0..=quarter).rev() {
 388             check(f32::from_bits(bits));
 389             check(-f32::from_bits(bits));
 390         }
 391     }
 392
 393     #[test]
 394     #[cfg(feature = "full_tests")]
 395     fn test_sin_pi_kernel_f64() {
 396         let check = |x| {
 397             check_ulp(
 398                 x,
 399                 sin_cos_pi_check_ulp_callback,
 400                 |x| sin_pi_kernel_f64(Scalar, Value(x)).0,
 401                 |x| reference_sin_cos_pi_f64(x).0,
 402             )
 403         };
 404         let quarter = 0.25f32.to_bits();
 405         for bits in (0..=quarter).rev().step_by(1 << 5) {
 406             check(f32::from_bits(bits) as f64);
 407             check(-f32::from_bits(bits) as f64);
 408         }
 409     }
 410
 411     #[test]
 412     #[cfg(feature = "full_tests")]
 413     fn test_cos_pi_kernel_f64() {
 414         let check = |x| {
 415             check_ulp(
 416                 x,
 417                 sin_cos_pi_check_ulp_callback,
 418                 |x| cos_pi_kernel_f64(Scalar, Value(x)).0,
 419                 |x| reference_sin_cos_pi_f64(x).1,
 420             )
 421         };
 422         let quarter = 0.25f32.to_bits();
 423         for bits in (0..=quarter).rev().step_by(1 << 5) {
 424             check(f32::from_bits(bits) as f64);
 425             check(-f32::from_bits(bits) as f64);
 426         }
 427     }
 428
 429     fn sin_cos_pi_check_ulp_callback<F: PrimFloat>(arg: CheckUlpCallbackArg<F, u64>) -> bool {
 430         if arg.x % 0.5.to() == 0.0.to() {
 431             arg.distance_in_ulp == 0
 432         } else {
 433             arg.distance_in_ulp <= 2 && arg.result.abs() <= 1.to()
 434         }
 435     }
 436
 437     #[test]
 438     #[cfg_attr(
 439         not(feature = "f16"),
 440         should_panic(expected = "f16 feature is not enabled")
 441     )]
 442     fn test_sin_pi_f16() {
 443         for bits in 0..=u16::MAX {
 444             check_ulp(
 445                 F16::from_bits(bits),
 446                 sin_cos_pi_check_ulp_callback,
 447                 |x| sin_pi_f16(Scalar, Value(x)).0,
 448                 |x| (f64::consts::PI * x).sin(),
 449             );
 450         }
 451     }
 452
 453     #[test]
 454     #[cfg_attr(
 455         not(feature = "f16"),
 456         should_panic(expected = "f16 feature is not enabled")
 457     )]
 458     fn test_cos_pi_f16() {
 459         for bits in 0..=u16::MAX {
 460             check_ulp(
 461                 F16::from_bits(bits),
 462                 sin_cos_pi_check_ulp_callback,
 463                 |x| cos_pi_f16(Scalar, Value(x)).0,
 464                 |x| (f64::consts::PI * x).cos(),
 465             );
 466         }
 467     }
 468
 469     fn reference_sin_cos_pi_f32(mut v: f64) -> (f64, f64) {
 470         if !v.is_finite() {
 471             return (f64::NAN, f64::NAN);
 472         }
 473         v %= 2.0;
 474         if v >= 1.0 {
 475             v -= 2.0;
 476         } else if v <= -1.0 {
 477             v += 2.0;
 478         }
 479         v *= 2.0;
 480         let part = v.round() as i32;
 481         v -= part as f64;
 482         v *= f64::consts::PI / 2.0;
 483         let (sin, cos) = v.sin_cos();
 484         match part {
 485             0 => (sin, cos),
 486             1 => (cos, -sin),
 487             2 => (-sin, -cos),
 488             -2 => (-sin, -cos),
 489             -1 => (-cos, sin),
 490             _ => panic!("not implemented: part={}", part),
 491         }
 492     }
 493
 494     fn reference_sin_cos_pi_f64(mut v: f64) -> (f64, f64) {
 495         use az::Cast;
 496         use rug::{float::Constant, Float};
 497         if !v.is_finite() {
 498             return (f64::NAN, f64::NAN);
 499         }
 500         v %= 2.0;
 501         if v >= 1.0 {
 502             v -= 2.0;
 503         } else if v <= -1.0 {
 504             v += 2.0;
 505         }
 506         v *= 2.0;
 507         let part = v.round() as i32;
 508         v -= part as f64;
 509         let precision = 100;
 510         let mut v = Float::with_val(precision, v);
 511         let pi = Float::with_val(precision, Constant::Pi);
 512         let pi_2 = pi / 2;
 513         v *= &pi_2;
 514         let cos = pi_2; // just a temp var, value is ignored
 515         let (sin, cos) = v.sin_cos(cos);
 516         let sin: f64 = sin.cast();
 517         let cos: f64 = cos.cast();
 518         match part {
 519             0 => (sin, cos),
 520             1 => (cos, -sin),
 521             2 => (-sin, -cos),
 522             -2 => (-sin, -cos),
 523             -1 => (-cos, sin),
 524             _ => panic!("not implemented: part={}", part),
 525         }
 526     }
 527
 528     macro_rules! test_reference_sin_cos_pi_test_cases {
 529         ($case:expr, $ty:ident) => {
 530             $case($ty::NAN, $ty::NAN, $ty::NAN);
 531             $case($ty::INFINITY, $ty::NAN, $ty::NAN);
 532             $case(-$ty::INFINITY, $ty::NAN, $ty::NAN);
 533             $case(-4., 0., 1.);
 534             $case(
 535                 -3.875,
 536                 0.38268343236508977172845998403039886676134456248563,
 537                 0.92387953251128675612818318939678828682241662586364,
 538             );
 539             $case(
 540                 -3.75,
 541                 0.70710678118654752440084436210484903928483593768847,
 542                 0.70710678118654752440084436210484903928483593768847,
 543             );
 544             $case(
 545                 -3.625,
 546                 0.92387953251128675612818318939678828682241662586364,
 547                 0.38268343236508977172845998403039886676134456248563,
 548             );
 549             $case(-3.5, 1., -0.);
 550             $case(
 551                 -3.375,
 552                 0.92387953251128675612818318939678828682241662586364,
 553                 -0.38268343236508977172845998403039886676134456248563,
 554             );
 555             $case(
 556                 -3.25,
 557                 0.70710678118654752440084436210484903928483593768847,
 558                 -0.70710678118654752440084436210484903928483593768847,
 559             );
 560             $case(
 561                 -3.125,
 562                 0.38268343236508977172845998403039886676134456248563,
 563                 -0.92387953251128675612818318939678828682241662586364,
 564             );
 565             $case(-3., -0., -1.);
 566             $case(
 567                 -2.875,
 568                 -0.38268343236508977172845998403039886676134456248563,
 569                 -0.92387953251128675612818318939678828682241662586364,
 570             );
 571             $case(
 572                 -2.75,
 573                 -0.70710678118654752440084436210484903928483593768847,
 574                 -0.70710678118654752440084436210484903928483593768847,
 575             );
 576             $case(
 577                 -2.625,
 578                 -0.92387953251128675612818318939678828682241662586364,
 579                 -0.38268343236508977172845998403039886676134456248563,
 580             );
 581             $case(-2.5, -1., 0.);
 582             $case(
 583                 -2.375,
 584                 -0.92387953251128675612818318939678828682241662586364,
 585                 0.38268343236508977172845998403039886676134456248563,
 586             );
 587             $case(
 588                 -2.25,
 589                 -0.70710678118654752440084436210484903928483593768847,
 590                 0.70710678118654752440084436210484903928483593768847,
 591             );
 592             $case(
 593                 -2.125,
 594                 -0.38268343236508977172845998403039886676134456248563,
 595                 0.92387953251128675612818318939678828682241662586364,
 596             );
 597             $case(-2., 0., 1.);
 598             $case(
 599                 -1.875,
 600                 0.38268343236508977172845998403039886676134456248563,
 601                 0.92387953251128675612818318939678828682241662586364,
 602             );
 603             $case(
 604                 -1.75,
 605                 0.70710678118654752440084436210484903928483593768847,
 606                 0.70710678118654752440084436210484903928483593768847,
 607             );
 608             $case(
 609                 -1.625,
 610                 0.92387953251128675612818318939678828682241662586364,
 611                 0.38268343236508977172845998403039886676134456248563,
 612             );
 613             $case(-1.5, 1., -0.);
 614             $case(
 615                 -1.375,
 616                 0.92387953251128675612818318939678828682241662586364,
 617                 -0.38268343236508977172845998403039886676134456248563,
 618             );
 619             $case(
 620                 -1.25,
 621                 0.70710678118654752440084436210484903928483593768847,
 622                 -0.70710678118654752440084436210484903928483593768847,
 623             );
 624             $case(
 625                 -1.125,
 626                 0.38268343236508977172845998403039886676134456248563,
 627                 -0.92387953251128675612818318939678828682241662586364,
 628             );
 629             $case(-1., -0., -1.);
 630             $case(
 631                 -0.875,
 632                 -0.38268343236508977172845998403039886676134456248563,
 633                 -0.92387953251128675612818318939678828682241662586364,
 634             );
 635             $case(
 636                 -0.75,
 637                 -0.70710678118654752440084436210484903928483593768847,
 638                 -0.70710678118654752440084436210484903928483593768847,
 639             );
 640             $case(
 641                 -0.625,
 642                 -0.92387953251128675612818318939678828682241662586364,
 643                 -0.38268343236508977172845998403039886676134456248563,
 644             );
 645             $case(-0.5, -1., 0.);
 646             $case(
 647                 -0.375,
 648                 -0.92387953251128675612818318939678828682241662586364,
 649                 0.38268343236508977172845998403039886676134456248563,
 650             );
 651             $case(
 652                 -0.25,
 653                 -0.70710678118654752440084436210484903928483593768847,
 654                 0.70710678118654752440084436210484903928483593768847,
 655             );
 656             $case(
 657                 -0.125,
 658                 -0.38268343236508977172845998403039886676134456248563,
 659                 0.92387953251128675612818318939678828682241662586364,
 660             );
 661             $case(0., 0., 1.);
 662             $case(
 663                 0.125,
 664                 0.38268343236508977172845998403039886676134456248563,
 665                 0.92387953251128675612818318939678828682241662586364,
 666             );
 667             $case(
 668                 0.25,
 669                 0.70710678118654752440084436210484903928483593768847,
 670                 0.70710678118654752440084436210484903928483593768847,
 671             );
 672             $case(
 673                 0.375,
 674                 0.92387953251128675612818318939678828682241662586364,
 675                 0.38268343236508977172845998403039886676134456248563,
 676             );
 677             $case(0.5, 1., 0.);
 678             $case(
 679                 0.625,
 680                 0.92387953251128675612818318939678828682241662586364,
 681                 -0.38268343236508977172845998403039886676134456248563,
 682             );
 683             $case(
 684                 0.75,
 685                 0.70710678118654752440084436210484903928483593768847,
 686                 -0.70710678118654752440084436210484903928483593768847,
 687             );
 688             $case(
 689                 0.875,
 690                 0.38268343236508977172845998403039886676134456248563,
 691                 -0.92387953251128675612818318939678828682241662586364,
 692             );
 693             $case(1., 0., -1.);
 694             $case(
 695                 1.125,
 696                 -0.38268343236508977172845998403039886676134456248563,
 697                 -0.92387953251128675612818318939678828682241662586364,
 698             );
 699             $case(
 700                 1.25,
 701                 -0.70710678118654752440084436210484903928483593768847,
 702                 -0.70710678118654752440084436210484903928483593768847,
 703             );
 704             $case(
 705                 1.375,
 706                 -0.92387953251128675612818318939678828682241662586364,
 707                 -0.38268343236508977172845998403039886676134456248563,
 708             );
 709             $case(1.5, -1., -0.);
 710             $case(
 711                 1.625,
 712                 -0.92387953251128675612818318939678828682241662586364,
 713                 0.38268343236508977172845998403039886676134456248563,
 714             );
 715             $case(
 716                 1.75,
 717                 -0.70710678118654752440084436210484903928483593768847,
 718                 0.70710678118654752440084436210484903928483593768847,
 719             );
 720             $case(
 721                 1.875,
 722                 -0.38268343236508977172845998403039886676134456248563,
 723                 0.92387953251128675612818318939678828682241662586364,
 724             );
 725             $case(2., -0., 1.);
 726             $case(
 727                 2.125,
 728                 0.38268343236508977172845998403039886676134456248563,
 729                 0.92387953251128675612818318939678828682241662586364,
 730             );
 731             $case(
 732                 2.25,
 733                 0.70710678118654752440084436210484903928483593768847,
 734                 0.70710678118654752440084436210484903928483593768847,
 735             );
 736             $case(
 737                 2.375,
 738                 0.92387953251128675612818318939678828682241662586364,
 739                 0.38268343236508977172845998403039886676134456248563,
 740             );
 741             $case(2.5, 1., 0.);
 742             $case(
 743                 2.625,
 744                 0.92387953251128675612818318939678828682241662586364,
 745                 -0.38268343236508977172845998403039886676134456248563,
 746             );
 747             $case(
 748                 2.75,
 749                 0.70710678118654752440084436210484903928483593768847,
 750                 -0.70710678118654752440084436210484903928483593768847,
 751             );
 752             $case(
 753                 2.875,
 754                 0.38268343236508977172845998403039886676134456248563,
 755                 -0.92387953251128675612818318939678828682241662586364,
 756             );
 757             $case(3., 0., -1.);
 758             $case(
 759                 3.125,
 760                 -0.38268343236508977172845998403039886676134456248563,
 761                 -0.92387953251128675612818318939678828682241662586364,
 762             );
 763             $case(
 764                 3.25,
 765                 -0.70710678118654752440084436210484903928483593768847,
 766                 -0.70710678118654752440084436210484903928483593768847,
 767             );
 768             $case(
 769                 3.375,
 770                 -0.92387953251128675612818318939678828682241662586364,
 771                 -0.38268343236508977172845998403039886676134456248563,
 772             );
 773             $case(3.5, -1., -0.);
 774             $case(
 775                 3.625,
 776                 -0.92387953251128675612818318939678828682241662586364,
 777                 0.38268343236508977172845998403039886676134456248563,
 778             );
 779             $case(
 780                 3.75,
 781                 -0.70710678118654752440084436210484903928483593768847,
 782                 0.70710678118654752440084436210484903928483593768847,
 783             );
 784             $case(
 785                 3.875,
 786                 -0.38268343236508977172845998403039886676134456248563,
 787                 0.92387953251128675612818318939678828682241662586364,
 788             );
 789             $case(4., -0., 1.);
 790         };
 791     }
 792
 793     #[test]
 794     fn test_reference_sin_cos_pi_f32() {
 795         fn approx_same(a: f32, b: f32) -> bool {
 796             if a.is_finite() && b.is_finite() {
 797                 (a - b).abs() < 1e-6
 798             } else {
 799                 a == b || (a.is_nan() && b.is_nan())
 800             }
 801         }
 802         #[track_caller]
 803         fn case(x: f32, expected_sin: f32, expected_cos: f32) {
 804             let (ref_sin, ref_cos) = reference_sin_cos_pi_f32(x as f64);
 805             assert!(
 806                 approx_same(ref_sin as f32, expected_sin)
 807                     && approx_same(ref_cos as f32, expected_cos),
 808                 "case failed: x={x}, expected_sin={expected_sin}, expected_cos={expected_cos}, ref_sin={ref_sin}, ref_cos={ref_cos}",
 809                 x=x,
 810                 expected_sin=expected_sin,
 811                 expected_cos=expected_cos,
 812                 ref_sin=ref_sin,
 813                 ref_cos=ref_cos,
 814             );
 815         }
 816         test_reference_sin_cos_pi_test_cases!(case, f32);
 817     }
 818
 819     #[test]
 820     fn test_reference_sin_cos_pi_f64() {
 821         fn same(a: f64, b: f64) -> bool {
 822             if a.is_finite() && b.is_finite() {
 823                 a == b
 824             } else {
 825                 a == b || (a.is_nan() && b.is_nan())
 826             }
 827         }
 828         #[track_caller]
 829         fn case(x: f64, expected_sin: f64, expected_cos: f64) {
 830             let (ref_sin, ref_cos) = reference_sin_cos_pi_f64(x);
 831             assert!(
 832                 same(ref_sin, expected_sin) && same(ref_cos, expected_cos),
 833                 "case failed: x={x}, expected_sin={expected_sin}, expected_cos={expected_cos}, ref_sin={ref_sin}, ref_cos={ref_cos}",
 834                 x=x,
 835                 expected_sin=expected_sin,
 836                 expected_cos=expected_cos,
 837                 ref_sin=ref_sin,
 838                 ref_cos=ref_cos,
 839             );
 840         }
 841         test_reference_sin_cos_pi_test_cases!(case, f64);
 842     }
 843
 844     #[test]
 845     #[cfg(feature = "full_tests")]
 846     fn test_sin_pi_f32() {
 847         for bits in 0..=u32::MAX {
 848             check_ulp(
 849                 f32::from_bits(bits),
 850                 sin_cos_pi_check_ulp_callback,
 851                 |x| sin_pi_f32(Scalar, Value(x)).0,
 852                 |x| reference_sin_cos_pi_f32(x).0,
 853             );
 854         }
 855     }
 856
 857     #[test]
 858     #[cfg(feature = "full_tests")]
 859     fn test_cos_pi_f32() {
 860         for bits in 0..=u32::MAX {
 861             check_ulp(
 862                 f32::from_bits(bits),
 863                 sin_cos_pi_check_ulp_callback,
 864                 |x| cos_pi_f32(Scalar, Value(x)).0,
 865                 |x| reference_sin_cos_pi_f32(x).1,
 866             );
 867         }
 868     }
 869
 870     #[test]
 871     #[cfg(feature = "full_tests")]
 872     fn test_sin_pi_f64() {
 873         for bits in (0..=u32::MAX).step_by(1 << 7) {
 874             check_ulp(
 875                 f32::from_bits(bits) as f64,
 876                 sin_cos_pi_check_ulp_callback,
 877                 |x| sin_pi_f64(Scalar, Value(x)).0,
 878                 |x| reference_sin_cos_pi_f64(x).0,
 879             );
 880         }
 881     }
 882
 883     #[test]
 884     #[cfg(feature = "full_tests")]
 885     fn test_cos_pi_f64() {
 886         for bits in (0..=u32::MAX).step_by(1 << 7) {
 887             check_ulp(
 888                 f32::from_bits(bits) as f64,
 889                 sin_cos_pi_check_ulp_callback,
 890                 |x| cos_pi_f64(Scalar, Value(x)).0,
 891                 |x| reference_sin_cos_pi_f64(x).1,
 892             )
 893         }
 894     }
 895 }