src/algorithms/trig_pi.rs

   1 use crate::{
   2     prim::{PrimFloat, PrimUInt},
   3     traits::{Compare, Context, ConvertFrom, ConvertTo, Float, Make, Select},
   4 };
   5
   6 mod consts {
   7     #![allow(clippy::excessive_precision)]
   8     #![allow(dead_code)]
   9
  10     /// coefficients of taylor series for `sin(pi * x)` centered at `0`
  11     /// generated using:
  12     /// ```maxima,text
  13     /// fpprec:50$
  14     /// sinpi: bfloat(taylor(sin(%pi*x),x,0,19))$
  15     /// for i: 1 step 2 thru 19 do
  16     ///     printf(true, "pub(crate) const SINPI_KERNEL_TAYLOR_~d: f64 = ~a;~%", i, ssubst("e", "b", string(coeff(sinpi, x, i))))$
  17     /// ```
  18     pub(crate) const SINPI_KERNEL_TAYLOR_1: f64 =
  19         3.1415926535897932384626433832795028841971693993751e0;
  20     pub(crate) const SINPI_KERNEL_TAYLOR_3: f64 =
  21         -5.1677127800499700292460525111835658670375480943142e0;
  22     pub(crate) const SINPI_KERNEL_TAYLOR_5: f64 =
  23         2.550164039877345443856177583695296720669172555234e0;
  24     pub(crate) const SINPI_KERNEL_TAYLOR_7: f64 =
  25         -5.9926452932079207688773938354604004601536358636814e-1;
  26     pub(crate) const SINPI_KERNEL_TAYLOR_9: f64 =
  27         8.2145886611128228798802365523698344807837460797753e-2;
  28     pub(crate) const SINPI_KERNEL_TAYLOR_11: f64 =
  29         -7.370430945714350777259089957290781501211638236021e-3;
  30     pub(crate) const SINPI_KERNEL_TAYLOR_13: f64 =
  31         4.6630280576761256442062891447027174382819981361599e-4;
  32     pub(crate) const SINPI_KERNEL_TAYLOR_15: f64 =
  33         -2.1915353447830215827384652057094188859248708765956e-5;
  34     pub(crate) const SINPI_KERNEL_TAYLOR_17: f64 =
  35         7.9520540014755127847832068624575890327682459384282e-7;
  36     pub(crate) const SINPI_KERNEL_TAYLOR_19: f64 =
  37         -2.2948428997269873110203872385571587856074785581088e-8;
  38
  39     /// coefficients of taylor series for `cos(pi * x)` centered at `0`
  40     /// generated using:
  41     /// ```maxima,text
  42     /// fpprec:50$
  43     /// cospi: bfloat(taylor(cos(%pi*x),x,0,18))$
  44     /// for i: 0 step 2 thru 18 do
  45     ///     printf(true, "pub(crate) const COSPI_KERNEL_TAYLOR_~d: f64 = ~a;~%", i, ssubst("e", "b", string(coeff(cospi, x, i))))$
  46     /// ```
  47     pub(crate) const COSPI_KERNEL_TAYLOR_0: f64 = 1.0e0;
  48     pub(crate) const COSPI_KERNEL_TAYLOR_2: f64 =
  49         -4.9348022005446793094172454999380755676568497036204e0;
  50     pub(crate) const COSPI_KERNEL_TAYLOR_4: f64 =
  51         4.0587121264167682181850138620293796354053160696952e0;
  52     pub(crate) const COSPI_KERNEL_TAYLOR_6: f64 =
  53         -1.3352627688545894958753047828505831928711354556681e0;
  54     pub(crate) const COSPI_KERNEL_TAYLOR_8: f64 =
  55         2.3533063035889320454187935277546542154506893530856e-1;
  56     pub(crate) const COSPI_KERNEL_TAYLOR_10: f64 =
  57         -2.5806891390014060012598294252898849657186441048147e-2;
  58     pub(crate) const COSPI_KERNEL_TAYLOR_12: f64 =
  59         1.9295743094039230479033455636859576401684718150003e-3;
  60     pub(crate) const COSPI_KERNEL_TAYLOR_14: f64 =
  61         -1.0463810492484570711801672835223932761029733149091e-4;
  62     pub(crate) const COSPI_KERNEL_TAYLOR_16: f64 =
  63         4.3030695870329470072978237149669233008960901556009e-6;
  64     pub(crate) const COSPI_KERNEL_TAYLOR_18: f64 =
  65         -1.387895246221377211446808750399309343777037849978e-7;
  66 }
  67
  68 /// computes `sin(pi * x)` for `-0.25 <= x <= 0.25`
  69 /// not guaranteed to give correct sign for zero result
  70 /// has an error of up to 2ULP
  71 pub fn sin_pi_kernel_f16<Ctx: Context>(ctx: Ctx, x: Ctx::VecF16) -> Ctx::VecF16 {
  72     let x_sq = x * x;
  73     let mut v: Ctx::VecF16 = ctx.make(consts::SINPI_KERNEL_TAYLOR_5.to());
  74     v = v.mul_add_fast(x_sq, ctx.make(consts::SINPI_KERNEL_TAYLOR_3.to()));
  75     v = v.mul_add_fast(x_sq, ctx.make(consts::SINPI_KERNEL_TAYLOR_1.to()));
  76     v * x
  77 }
  78
  79 /// computes `cos(pi * x)` for `-0.25 <= x <= 0.25`
  80 /// has an error of up to 2ULP
  81 pub fn cos_pi_kernel_f16<Ctx: Context>(ctx: Ctx, x: Ctx::VecF16) -> Ctx::VecF16 {
  82     let x_sq = x * x;
  83     let mut v: Ctx::VecF16 = ctx.make(consts::COSPI_KERNEL_TAYLOR_4.to());
  84     v = v.mul_add_fast(x_sq, ctx.make(consts::COSPI_KERNEL_TAYLOR_2.to()));
  85     v.mul_add_fast(x_sq, ctx.make(consts::COSPI_KERNEL_TAYLOR_0.to()))
  86 }
  87
  88 /// computes `sin(pi * x)` for `-0.25 <= x <= 0.25`
  89 /// not guaranteed to give correct sign for zero result
  90 /// has an error of up to 2ULP
  91 pub fn sin_pi_kernel_f32<Ctx: Context>(ctx: Ctx, x: Ctx::VecF32) -> Ctx::VecF32 {
  92     let x_sq = x * x;
  93     let mut v: Ctx::VecF32 = ctx.make(consts::SINPI_KERNEL_TAYLOR_9.to());
  94     v = v.mul_add_fast(x_sq, ctx.make(consts::SINPI_KERNEL_TAYLOR_7.to()));
  95     v = v.mul_add_fast(x_sq, ctx.make(consts::SINPI_KERNEL_TAYLOR_5.to()));
  96     v = v.mul_add_fast(x_sq, ctx.make(consts::SINPI_KERNEL_TAYLOR_3.to()));
  97     v = v.mul_add_fast(x_sq, ctx.make(consts::SINPI_KERNEL_TAYLOR_1.to()));
  98     v * x
  99 }
 100
 101 /// computes `cos(pi * x)` for `-0.25 <= x <= 0.25`
 102 /// has an error of up to 2ULP
 103 pub fn cos_pi_kernel_f32<Ctx: Context>(ctx: Ctx, x: Ctx::VecF32) -> Ctx::VecF32 {
 104     let x_sq = x * x;
 105     let mut v: Ctx::VecF32 = ctx.make(consts::COSPI_KERNEL_TAYLOR_8.to());
 106     v = v.mul_add_fast(x_sq, ctx.make(consts::COSPI_KERNEL_TAYLOR_6.to()));
 107     v = v.mul_add_fast(x_sq, ctx.make(consts::COSPI_KERNEL_TAYLOR_4.to()));
 108     v = v.mul_add_fast(x_sq, ctx.make(consts::COSPI_KERNEL_TAYLOR_2.to()));
 109     v.mul_add_fast(x_sq, ctx.make(consts::COSPI_KERNEL_TAYLOR_0.to()))
 110 }
 111
 112 /// computes `sin(pi * x)` for `-0.25 <= x <= 0.25`
 113 /// not guaranteed to give correct sign for zero result
 114 /// has an error of up to 2ULP
 115 pub fn sin_pi_kernel_f64<Ctx: Context>(ctx: Ctx, x: Ctx::VecF64) -> Ctx::VecF64 {
 116     let x_sq = x * x;
 117     let mut v: Ctx::VecF64 = ctx.make(consts::SINPI_KERNEL_TAYLOR_15.to());
 118     v = v.mul_add_fast(x_sq, ctx.make(consts::SINPI_KERNEL_TAYLOR_13.to()));
 119     v = v.mul_add_fast(x_sq, ctx.make(consts::SINPI_KERNEL_TAYLOR_11.to()));
 120     v = v.mul_add_fast(x_sq, ctx.make(consts::SINPI_KERNEL_TAYLOR_9.to()));
 121     v = v.mul_add_fast(x_sq, ctx.make(consts::SINPI_KERNEL_TAYLOR_7.to()));
 122     v = v.mul_add_fast(x_sq, ctx.make(consts::SINPI_KERNEL_TAYLOR_5.to()));
 123     v = v.mul_add_fast(x_sq, ctx.make(consts::SINPI_KERNEL_TAYLOR_3.to()));
 124     v = v.mul_add_fast(x_sq, ctx.make(consts::SINPI_KERNEL_TAYLOR_1.to()));
 125     v * x
 126 }
 127
 128 /// computes `cos(pi * x)` for `-0.25 <= x <= 0.25`
 129 /// has an error of up to 2ULP
 130 pub fn cos_pi_kernel_f64<Ctx: Context>(ctx: Ctx, x: Ctx::VecF64) -> Ctx::VecF64 {
 131     let x_sq = x * x;
 132     let mut v: Ctx::VecF64 = ctx.make(consts::COSPI_KERNEL_TAYLOR_16.to());
 133     v = v.mul_add_fast(x_sq, ctx.make(consts::COSPI_KERNEL_TAYLOR_14.to()));
 134     v = v.mul_add_fast(x_sq, ctx.make(consts::COSPI_KERNEL_TAYLOR_12.to()));
 135     v = v.mul_add_fast(x_sq, ctx.make(consts::COSPI_KERNEL_TAYLOR_10.to()));
 136     v = v.mul_add_fast(x_sq, ctx.make(consts::COSPI_KERNEL_TAYLOR_8.to()));
 137     v = v.mul_add_fast(x_sq, ctx.make(consts::COSPI_KERNEL_TAYLOR_6.to()));
 138     v = v.mul_add_fast(x_sq, ctx.make(consts::COSPI_KERNEL_TAYLOR_4.to()));
 139     v = v.mul_add_fast(x_sq, ctx.make(consts::COSPI_KERNEL_TAYLOR_2.to()));
 140     v.mul_add_fast(x_sq, ctx.make(consts::COSPI_KERNEL_TAYLOR_0.to()))
 141 }
 142
 143 /// computes `(sin(pi * x), cos(pi * x))`
 144 /// not guaranteed to give correct sign for zero results
 145 /// inherits error from `sin_pi_kernel` and `cos_pi_kernel`
 146 pub fn sin_cos_pi_impl<
 147     Ctx: Context,
 148     VecF: Float<PrimFloat = PrimF> + Make<Context = Ctx>,
 149     PrimF: PrimFloat<BitsType = PrimU>,
 150     PrimU: PrimUInt,
 151     SinPiKernel: FnOnce(Ctx, VecF) -> VecF,
 152     CosPiKernel: FnOnce(Ctx, VecF) -> VecF,
 153 >(
 154     ctx: Ctx,
 155     x: VecF,
 156     sin_pi_kernel: SinPiKernel,
 157     cos_pi_kernel: CosPiKernel,
 158 ) -> (VecF, VecF) {
 159     let two_f: VecF = ctx.make(2.0.to());
 160     let one_half: VecF = ctx.make(0.5.to());
 161     let max_contiguous_integer: VecF =
 162         ctx.make((PrimU::cvt_from(1) << (PrimF::MANTISSA_FIELD_WIDTH + 1.to())).to());
 163     // if `x` is finite and bigger than `max_contiguous_integer`, then x is an even integer
 164     let in_range = x.abs().lt(max_contiguous_integer); // use `lt` so nans are counted as out-of-range
 165     let is_finite = x.is_finite();
 166     let nan: VecF = ctx.make(f32::NAN.to());
 167     let zero_f: VecF = ctx.make(0.to());
 168     let one_f: VecF = ctx.make(1.to());
 169     let zero_i: VecF::SignedBitsType = ctx.make(0.to());
 170     let one_i: VecF::SignedBitsType = ctx.make(1.to());
 171     let two_i: VecF::SignedBitsType = ctx.make(2.to());
 172     let out_of_range_sin = is_finite.select(zero_f, nan);
 173     let out_of_range_cos = is_finite.select(one_f, nan);
 174     let xi = (x * two_f).round();
 175     let xk = x - xi * one_half;
 176     let sk = sin_pi_kernel(ctx, xk);
 177     let ck = cos_pi_kernel(ctx, xk);
 178     let xi = VecF::SignedBitsType::cvt_from(xi);
 179     let bit_0_clear = (xi & one_i).eq(zero_i);
 180     let st = bit_0_clear.select(sk, ck);
 181     let ct = bit_0_clear.select(ck, sk);
 182     let s = (xi & two_i).eq(zero_i).select(st, -st);
 183     let c = ((xi + one_i) & two_i).eq(zero_i).select(ct, -ct);
 184     (
 185         in_range.select(s, out_of_range_sin),
 186         in_range.select(c, out_of_range_cos),
 187     )
 188 }
 189
 190 /// computes `(sin(pi * x), cos(pi * x))`
 191 /// not guaranteed to give correct sign for zero results
 192 /// has an error of up to 2ULP
 193 pub fn sin_cos_pi_f16<Ctx: Context>(ctx: Ctx, x: Ctx::VecF16) -> (Ctx::VecF16, Ctx::VecF16) {
 194     sin_cos_pi_impl(ctx, x, sin_pi_kernel_f16, cos_pi_kernel_f16)
 195 }
 196
 197 /// computes `sin(pi * x)`
 198 /// not guaranteed to give correct sign for zero results
 199 /// has an error of up to 2ULP
 200 pub fn sin_pi_f16<Ctx: Context>(ctx: Ctx, x: Ctx::VecF16) -> Ctx::VecF16 {
 201     sin_cos_pi_f16(ctx, x).0
 202 }
 203
 204 /// computes `cos(pi * x)`
 205 /// not guaranteed to give correct sign for zero results
 206 /// has an error of up to 2ULP
 207 pub fn cos_pi_f16<Ctx: Context>(ctx: Ctx, x: Ctx::VecF16) -> Ctx::VecF16 {
 208     sin_cos_pi_f16(ctx, x).1
 209 }
 210
 211 /// computes `(sin(pi * x), cos(pi * x))`
 212 /// not guaranteed to give correct sign for zero results
 213 /// has an error of up to 2ULP
 214 pub fn sin_cos_pi_f32<Ctx: Context>(ctx: Ctx, x: Ctx::VecF32) -> (Ctx::VecF32, Ctx::VecF32) {
 215     sin_cos_pi_impl(ctx, x, sin_pi_kernel_f32, cos_pi_kernel_f32)
 216 }
 217
 218 /// computes `sin(pi * x)`
 219 /// not guaranteed to give correct sign for zero results
 220 /// has an error of up to 2ULP
 221 pub fn sin_pi_f32<Ctx: Context>(ctx: Ctx, x: Ctx::VecF32) -> Ctx::VecF32 {
 222     sin_cos_pi_f32(ctx, x).0
 223 }
 224
 225 /// computes `cos(pi * x)`
 226 /// not guaranteed to give correct sign for zero results
 227 /// has an error of up to 2ULP
 228 pub fn cos_pi_f32<Ctx: Context>(ctx: Ctx, x: Ctx::VecF32) -> Ctx::VecF32 {
 229     sin_cos_pi_f32(ctx, x).1
 230 }
 231
 232 /// computes `(sin(pi * x), cos(pi * x))`
 233 /// not guaranteed to give correct sign for zero results
 234 /// has an error of up to 2ULP
 235 pub fn sin_cos_pi_f64<Ctx: Context>(ctx: Ctx, x: Ctx::VecF64) -> (Ctx::VecF64, Ctx::VecF64) {
 236     sin_cos_pi_impl(ctx, x, sin_pi_kernel_f64, cos_pi_kernel_f64)
 237 }
 238
 239 /// computes `sin(pi * x)`
 240 /// not guaranteed to give correct sign for zero results
 241 /// has an error of up to 2ULP
 242 pub fn sin_pi_f64<Ctx: Context>(ctx: Ctx, x: Ctx::VecF64) -> Ctx::VecF64 {
 243     sin_cos_pi_f64(ctx, x).0
 244 }
 245
 246 /// computes `cos(pi * x)`
 247 /// not guaranteed to give correct sign for zero results
 248 /// has an error of up to 2ULP
 249 pub fn cos_pi_f64<Ctx: Context>(ctx: Ctx, x: Ctx::VecF64) -> Ctx::VecF64 {
 250     sin_cos_pi_f64(ctx, x).1
 251 }
 252
 253 #[cfg(test)]
 254 mod tests {
 255     use super::*;
 256     use crate::{
 257         f16::F16,
 258         scalar::{Scalar, Value},
 259     };
 260     use std::f64;
 261
 262     struct CheckUlpCallbackArg<F, I> {
 263         distance_in_ulp: I,
 264         x: F,
 265         expected: F,
 266         result: F,
 267     }
 268
 269     #[track_caller]
 270     fn check_ulp<T: PrimFloat>(
 271         x: T,
 272         is_ok: impl Fn(CheckUlpCallbackArg<T, u64>) -> bool,
 273         fn_f16: impl Fn(T) -> T,
 274         fn_reference: impl Fn(f64) -> f64,
 275     ) {
 276         let x_f64: f64 = x.to();
 277         let expected_f64 = fn_reference(x_f64);
 278         let expected: T = expected_f64.to();
 279         let result = fn_f16(x);
 280         if result == expected {
 281             return;
 282         }
 283         if result.is_nan() && expected.is_nan() {
 284             return;
 285         }
 286         let expected_bits: i64 = expected.to_bits().to();
 287         let result_bits: i64 = result.to_bits().to();
 288         let distance_in_ulp = (expected_bits - result_bits).unsigned_abs();
 289         if !result.is_nan()
 290             && !expected.is_nan()
 291             && is_ok(CheckUlpCallbackArg {
 292                 distance_in_ulp,
 293                 x,
 294                 expected,
 295                 result,
 296             })
 297         {
 298             return;
 299         }
 300         panic!(
 301             "error is too big: \
 302                 x = {x:?} {x_bits:#X}, \
 303                 result = {result:?} {result_bits:#X}, \
 304                 expected = {expected:?} {expected_bits:#X}, \
 305                 distance_in_ulp = {distance_in_ulp}",
 306             x = x,
 307             x_bits = x.to_bits(),
 308             result = result,
 309             result_bits = result.to_bits(),
 310             expected = expected,
 311             expected_bits = expected.to_bits(),
 312             distance_in_ulp = distance_in_ulp,
 313         );
 314     }
 315
 316     #[test]
 317     #[cfg_attr(
 318         not(feature = "f16"),
 319         should_panic(expected = "f16 feature is not enabled")
 320     )]
 321     fn test_sin_pi_kernel_f16() {
 322         let check = |x| {
 323             check_ulp(
 324                 x,
 325                 |arg| arg.distance_in_ulp <= if arg.expected == 0.to() { 0 } else { 2 },
 326                 |x| sin_pi_kernel_f16(Scalar, Value(x)).0,
 327                 |x| (f64::consts::PI * x).sin(),
 328             )
 329         };
 330         let quarter = F16::to_bits(0.25f32.to());
 331         for bits in (0..=quarter).rev() {
 332             check(F16::from_bits(bits));
 333             check(-F16::from_bits(bits));
 334         }
 335     }
 336
 337     #[test]
 338     #[cfg_attr(
 339         not(feature = "f16"),
 340         should_panic(expected = "f16 feature is not enabled")
 341     )]
 342     fn test_cos_pi_kernel_f16() {
 343         let check = |x| {
 344             check_ulp(
 345                 x,
 346                 |arg| arg.distance_in_ulp <= 2 && arg.result <= 1.to(),
 347                 |x| cos_pi_kernel_f16(Scalar, Value(x)).0,
 348                 |x| (f64::consts::PI * x).cos(),
 349             )
 350         };
 351         let quarter = F16::to_bits(0.25f32.to());
 352         for bits in (0..=quarter).rev() {
 353             check(F16::from_bits(bits));
 354             check(-F16::from_bits(bits));
 355         }
 356     }
 357
 358     #[test]
 359     #[cfg(feature = "full_tests")]
 360     fn test_sin_pi_kernel_f32() {
 361         let check = |x| {
 362             check_ulp(
 363                 x,
 364                 |arg| arg.distance_in_ulp <= if arg.expected == 0. { 0 } else { 2 },
 365                 |x| sin_pi_kernel_f32(Scalar, Value(x)).0,
 366                 |x| (f64::consts::PI * x).sin(),
 367             )
 368         };
 369         let quarter = 0.25f32.to_bits();
 370         for bits in (0..=quarter).rev() {
 371             check(f32::from_bits(bits));
 372             check(-f32::from_bits(bits));
 373         }
 374     }
 375
 376     #[test]
 377     #[cfg(feature = "full_tests")]
 378     fn test_cos_pi_kernel_f32() {
 379         let check = |x| {
 380             check_ulp(
 381                 x,
 382                 |arg| arg.distance_in_ulp <= 2 && arg.result <= 1.,
 383                 |x| cos_pi_kernel_f32(Scalar, Value(x)).0,
 384                 |x| (f64::consts::PI * x).cos(),
 385             )
 386         };
 387         let quarter = 0.25f32.to_bits();
 388         for bits in (0..=quarter).rev() {
 389             check(f32::from_bits(bits));
 390             check(-f32::from_bits(bits));
 391         }
 392     }
 393
 394     #[test]
 395     #[cfg(feature = "full_tests")]
 396     fn test_sin_pi_kernel_f64() {
 397         let check = |x| {
 398             check_ulp(
 399                 x,
 400                 sin_cos_pi_check_ulp_callback,
 401                 |x| sin_pi_kernel_f64(Scalar, Value(x)).0,
 402                 |x| reference_sin_cos_pi_f64(x).0,
 403             )
 404         };
 405         let quarter = 0.25f32.to_bits();
 406         for bits in (0..=quarter).rev().step_by(1 << 5) {
 407             check(f32::from_bits(bits) as f64);
 408             check(-f32::from_bits(bits) as f64);
 409         }
 410     }
 411
 412     #[test]
 413     #[cfg(feature = "full_tests")]
 414     fn test_cos_pi_kernel_f64() {
 415         let check = |x| {
 416             check_ulp(
 417                 x,
 418                 sin_cos_pi_check_ulp_callback,
 419                 |x| cos_pi_kernel_f64(Scalar, Value(x)).0,
 420                 |x| reference_sin_cos_pi_f64(x).1,
 421             )
 422         };
 423         let quarter = 0.25f32.to_bits();
 424         for bits in (0..=quarter).rev().step_by(1 << 5) {
 425             check(f32::from_bits(bits) as f64);
 426             check(-f32::from_bits(bits) as f64);
 427         }
 428     }
 429
 430     fn sin_cos_pi_check_ulp_callback<F: PrimFloat>(arg: CheckUlpCallbackArg<F, u64>) -> bool {
 431         if arg.x % 0.5.to() == 0.0.to() {
 432             arg.distance_in_ulp == 0
 433         } else {
 434             arg.distance_in_ulp <= 2 && arg.result.abs() <= 1.to()
 435         }
 436     }
 437
 438     #[test]
 439     #[cfg_attr(
 440         not(feature = "f16"),
 441         should_panic(expected = "f16 feature is not enabled")
 442     )]
 443     fn test_sin_pi_f16() {
 444         for bits in 0..=u16::MAX {
 445             check_ulp(
 446                 F16::from_bits(bits),
 447                 sin_cos_pi_check_ulp_callback,
 448                 |x| sin_pi_f16(Scalar, Value(x)).0,
 449                 |x| (f64::consts::PI * x).sin(),
 450             );
 451         }
 452     }
 453
 454     #[test]
 455     #[cfg_attr(
 456         not(feature = "f16"),
 457         should_panic(expected = "f16 feature is not enabled")
 458     )]
 459     fn test_cos_pi_f16() {
 460         for bits in 0..=u16::MAX {
 461             check_ulp(
 462                 F16::from_bits(bits),
 463                 sin_cos_pi_check_ulp_callback,
 464                 |x| cos_pi_f16(Scalar, Value(x)).0,
 465                 |x| (f64::consts::PI * x).cos(),
 466             );
 467         }
 468     }
 469
 470     fn reference_sin_cos_pi_f32(mut v: f64) -> (f64, f64) {
 471         if !v.is_finite() {
 472             return (f64::NAN, f64::NAN);
 473         }
 474         v %= 2.0;
 475         if v >= 1.0 {
 476             v -= 2.0;
 477         } else if v <= -1.0 {
 478             v += 2.0;
 479         }
 480         v *= 2.0;
 481         let part = v.round() as i32;
 482         v -= part as f64;
 483         v *= f64::consts::PI / 2.0;
 484         let (sin, cos) = v.sin_cos();
 485         match part {
 486             0 => (sin, cos),
 487             1 => (cos, -sin),
 488             2 => (-sin, -cos),
 489             -2 => (-sin, -cos),
 490             -1 => (-cos, sin),
 491             _ => panic!("not implemented: part={}", part),
 492         }
 493     }
 494
 495     fn reference_sin_cos_pi_f64(mut v: f64) -> (f64, f64) {
 496         use az::Cast;
 497         use rug::{float::Constant, Float};
 498         if !v.is_finite() {
 499             return (f64::NAN, f64::NAN);
 500         }
 501         v %= 2.0;
 502         if v >= 1.0 {
 503             v -= 2.0;
 504         } else if v <= -1.0 {
 505             v += 2.0;
 506         }
 507         v *= 2.0;
 508         let part = v.round() as i32;
 509         v -= part as f64;
 510         let precision = 100;
 511         let mut v = Float::with_val(precision, v);
 512         let pi = Float::with_val(precision, Constant::Pi);
 513         let pi_2 = pi / 2;
 514         v *= &pi_2;
 515         let cos = pi_2; // just a temp var, value is ignored
 516         let (sin, cos) = v.sin_cos(cos);
 517         let sin: f64 = sin.cast();
 518         let cos: f64 = cos.cast();
 519         match part {
 520             0 => (sin, cos),
 521             1 => (cos, -sin),
 522             2 => (-sin, -cos),
 523             -2 => (-sin, -cos),
 524             -1 => (-cos, sin),
 525             _ => panic!("not implemented: part={}", part),
 526         }
 527     }
 528
 529     macro_rules! test_reference_sin_cos_pi_test_cases {
 530         ($case:expr, $ty:ident) => {
 531             $case($ty::NAN, $ty::NAN, $ty::NAN);
 532             $case($ty::INFINITY, $ty::NAN, $ty::NAN);
 533             $case(-$ty::INFINITY, $ty::NAN, $ty::NAN);
 534             $case(-4., 0., 1.);
 535             $case(
 536                 -3.875,
 537                 0.38268343236508977172845998403039886676134456248563,
 538                 0.92387953251128675612818318939678828682241662586364,
 539             );
 540             $case(
 541                 -3.75,
 542                 0.70710678118654752440084436210484903928483593768847,
 543                 0.70710678118654752440084436210484903928483593768847,
 544             );
 545             $case(
 546                 -3.625,
 547                 0.92387953251128675612818318939678828682241662586364,
 548                 0.38268343236508977172845998403039886676134456248563,
 549             );
 550             $case(-3.5, 1., -0.);
 551             $case(
 552                 -3.375,
 553                 0.92387953251128675612818318939678828682241662586364,
 554                 -0.38268343236508977172845998403039886676134456248563,
 555             );
 556             $case(
 557                 -3.25,
 558                 0.70710678118654752440084436210484903928483593768847,
 559                 -0.70710678118654752440084436210484903928483593768847,
 560             );
 561             $case(
 562                 -3.125,
 563                 0.38268343236508977172845998403039886676134456248563,
 564                 -0.92387953251128675612818318939678828682241662586364,
 565             );
 566             $case(-3., -0., -1.);
 567             $case(
 568                 -2.875,
 569                 -0.38268343236508977172845998403039886676134456248563,
 570                 -0.92387953251128675612818318939678828682241662586364,
 571             );
 572             $case(
 573                 -2.75,
 574                 -0.70710678118654752440084436210484903928483593768847,
 575                 -0.70710678118654752440084436210484903928483593768847,
 576             );
 577             $case(
 578                 -2.625,
 579                 -0.92387953251128675612818318939678828682241662586364,
 580                 -0.38268343236508977172845998403039886676134456248563,
 581             );
 582             $case(-2.5, -1., 0.);
 583             $case(
 584                 -2.375,
 585                 -0.92387953251128675612818318939678828682241662586364,
 586                 0.38268343236508977172845998403039886676134456248563,
 587             );
 588             $case(
 589                 -2.25,
 590                 -0.70710678118654752440084436210484903928483593768847,
 591                 0.70710678118654752440084436210484903928483593768847,
 592             );
 593             $case(
 594                 -2.125,
 595                 -0.38268343236508977172845998403039886676134456248563,
 596                 0.92387953251128675612818318939678828682241662586364,
 597             );
 598             $case(-2., 0., 1.);
 599             $case(
 600                 -1.875,
 601                 0.38268343236508977172845998403039886676134456248563,
 602                 0.92387953251128675612818318939678828682241662586364,
 603             );
 604             $case(
 605                 -1.75,
 606                 0.70710678118654752440084436210484903928483593768847,
 607                 0.70710678118654752440084436210484903928483593768847,
 608             );
 609             $case(
 610                 -1.625,
 611                 0.92387953251128675612818318939678828682241662586364,
 612                 0.38268343236508977172845998403039886676134456248563,
 613             );
 614             $case(-1.5, 1., -0.);
 615             $case(
 616                 -1.375,
 617                 0.92387953251128675612818318939678828682241662586364,
 618                 -0.38268343236508977172845998403039886676134456248563,
 619             );
 620             $case(
 621                 -1.25,
 622                 0.70710678118654752440084436210484903928483593768847,
 623                 -0.70710678118654752440084436210484903928483593768847,
 624             );
 625             $case(
 626                 -1.125,
 627                 0.38268343236508977172845998403039886676134456248563,
 628                 -0.92387953251128675612818318939678828682241662586364,
 629             );
 630             $case(-1., -0., -1.);
 631             $case(
 632                 -0.875,
 633                 -0.38268343236508977172845998403039886676134456248563,
 634                 -0.92387953251128675612818318939678828682241662586364,
 635             );
 636             $case(
 637                 -0.75,
 638                 -0.70710678118654752440084436210484903928483593768847,
 639                 -0.70710678118654752440084436210484903928483593768847,
 640             );
 641             $case(
 642                 -0.625,
 643                 -0.92387953251128675612818318939678828682241662586364,
 644                 -0.38268343236508977172845998403039886676134456248563,
 645             );
 646             $case(-0.5, -1., 0.);
 647             $case(
 648                 -0.375,
 649                 -0.92387953251128675612818318939678828682241662586364,
 650                 0.38268343236508977172845998403039886676134456248563,
 651             );
 652             $case(
 653                 -0.25,
 654                 -0.70710678118654752440084436210484903928483593768847,
 655                 0.70710678118654752440084436210484903928483593768847,
 656             );
 657             $case(
 658                 -0.125,
 659                 -0.38268343236508977172845998403039886676134456248563,
 660                 0.92387953251128675612818318939678828682241662586364,
 661             );
 662             $case(0., 0., 1.);
 663             $case(
 664                 0.125,
 665                 0.38268343236508977172845998403039886676134456248563,
 666                 0.92387953251128675612818318939678828682241662586364,
 667             );
 668             $case(
 669                 0.25,
 670                 0.70710678118654752440084436210484903928483593768847,
 671                 0.70710678118654752440084436210484903928483593768847,
 672             );
 673             $case(
 674                 0.375,
 675                 0.92387953251128675612818318939678828682241662586364,
 676                 0.38268343236508977172845998403039886676134456248563,
 677             );
 678             $case(0.5, 1., 0.);
 679             $case(
 680                 0.625,
 681                 0.92387953251128675612818318939678828682241662586364,
 682                 -0.38268343236508977172845998403039886676134456248563,
 683             );
 684             $case(
 685                 0.75,
 686                 0.70710678118654752440084436210484903928483593768847,
 687                 -0.70710678118654752440084436210484903928483593768847,
 688             );
 689             $case(
 690                 0.875,
 691                 0.38268343236508977172845998403039886676134456248563,
 692                 -0.92387953251128675612818318939678828682241662586364,
 693             );
 694             $case(1., 0., -1.);
 695             $case(
 696                 1.125,
 697                 -0.38268343236508977172845998403039886676134456248563,
 698                 -0.92387953251128675612818318939678828682241662586364,
 699             );
 700             $case(
 701                 1.25,
 702                 -0.70710678118654752440084436210484903928483593768847,
 703                 -0.70710678118654752440084436210484903928483593768847,
 704             );
 705             $case(
 706                 1.375,
 707                 -0.92387953251128675612818318939678828682241662586364,
 708                 -0.38268343236508977172845998403039886676134456248563,
 709             );
 710             $case(1.5, -1., -0.);
 711             $case(
 712                 1.625,
 713                 -0.92387953251128675612818318939678828682241662586364,
 714                 0.38268343236508977172845998403039886676134456248563,
 715             );
 716             $case(
 717                 1.75,
 718                 -0.70710678118654752440084436210484903928483593768847,
 719                 0.70710678118654752440084436210484903928483593768847,
 720             );
 721             $case(
 722                 1.875,
 723                 -0.38268343236508977172845998403039886676134456248563,
 724                 0.92387953251128675612818318939678828682241662586364,
 725             );
 726             $case(2., -0., 1.);
 727             $case(
 728                 2.125,
 729                 0.38268343236508977172845998403039886676134456248563,
 730                 0.92387953251128675612818318939678828682241662586364,
 731             );
 732             $case(
 733                 2.25,
 734                 0.70710678118654752440084436210484903928483593768847,
 735                 0.70710678118654752440084436210484903928483593768847,
 736             );
 737             $case(
 738                 2.375,
 739                 0.92387953251128675612818318939678828682241662586364,
 740                 0.38268343236508977172845998403039886676134456248563,
 741             );
 742             $case(2.5, 1., 0.);
 743             $case(
 744                 2.625,
 745                 0.92387953251128675612818318939678828682241662586364,
 746                 -0.38268343236508977172845998403039886676134456248563,
 747             );
 748             $case(
 749                 2.75,
 750                 0.70710678118654752440084436210484903928483593768847,
 751                 -0.70710678118654752440084436210484903928483593768847,
 752             );
 753             $case(
 754                 2.875,
 755                 0.38268343236508977172845998403039886676134456248563,
 756                 -0.92387953251128675612818318939678828682241662586364,
 757             );
 758             $case(3., 0., -1.);
 759             $case(
 760                 3.125,
 761                 -0.38268343236508977172845998403039886676134456248563,
 762                 -0.92387953251128675612818318939678828682241662586364,
 763             );
 764             $case(
 765                 3.25,
 766                 -0.70710678118654752440084436210484903928483593768847,
 767                 -0.70710678118654752440084436210484903928483593768847,
 768             );
 769             $case(
 770                 3.375,
 771                 -0.92387953251128675612818318939678828682241662586364,
 772                 -0.38268343236508977172845998403039886676134456248563,
 773             );
 774             $case(3.5, -1., -0.);
 775             $case(
 776                 3.625,
 777                 -0.92387953251128675612818318939678828682241662586364,
 778                 0.38268343236508977172845998403039886676134456248563,
 779             );
 780             $case(
 781                 3.75,
 782                 -0.70710678118654752440084436210484903928483593768847,
 783                 0.70710678118654752440084436210484903928483593768847,
 784             );
 785             $case(
 786                 3.875,
 787                 -0.38268343236508977172845998403039886676134456248563,
 788                 0.92387953251128675612818318939678828682241662586364,
 789             );
 790             $case(4., -0., 1.);
 791         };
 792     }
 793
 794     #[test]
 795     fn test_reference_sin_cos_pi_f32() {
 796         fn approx_same(a: f32, b: f32) -> bool {
 797             if a.is_finite() && b.is_finite() {
 798                 (a - b).abs() < 1e-6
 799             } else {
 800                 a == b || (a.is_nan() && b.is_nan())
 801             }
 802         }
 803         #[track_caller]
 804         fn case(x: f32, expected_sin: f32, expected_cos: f32) {
 805             let (ref_sin, ref_cos) = reference_sin_cos_pi_f32(x as f64);
 806             assert!(
 807                 approx_same(ref_sin as f32, expected_sin)
 808                     && approx_same(ref_cos as f32, expected_cos),
 809                 "case failed: x={x}, expected_sin={expected_sin}, expected_cos={expected_cos}, ref_sin={ref_sin}, ref_cos={ref_cos}",
 810                 x=x,
 811                 expected_sin=expected_sin,
 812                 expected_cos=expected_cos,
 813                 ref_sin=ref_sin,
 814                 ref_cos=ref_cos,
 815             );
 816         }
 817         test_reference_sin_cos_pi_test_cases!(case, f32);
 818     }
 819
 820     #[test]
 821     fn test_reference_sin_cos_pi_f64() {
 822         fn same(a: f64, b: f64) -> bool {
 823             if a.is_finite() && b.is_finite() {
 824                 a == b
 825             } else {
 826                 a == b || (a.is_nan() && b.is_nan())
 827             }
 828         }
 829         #[track_caller]
 830         fn case(x: f64, expected_sin: f64, expected_cos: f64) {
 831             let (ref_sin, ref_cos) = reference_sin_cos_pi_f64(x);
 832             assert!(
 833                 same(ref_sin, expected_sin) && same(ref_cos, expected_cos),
 834                 "case failed: x={x}, expected_sin={expected_sin}, expected_cos={expected_cos}, ref_sin={ref_sin}, ref_cos={ref_cos}",
 835                 x=x,
 836                 expected_sin=expected_sin,
 837                 expected_cos=expected_cos,
 838                 ref_sin=ref_sin,
 839                 ref_cos=ref_cos,
 840             );
 841         }
 842         test_reference_sin_cos_pi_test_cases!(case, f64);
 843     }
 844
 845     #[test]
 846     #[cfg(feature = "full_tests")]
 847     fn test_sin_pi_f32() {
 848         for bits in 0..=u32::MAX {
 849             check_ulp(
 850                 f32::from_bits(bits),
 851                 sin_cos_pi_check_ulp_callback,
 852                 |x| sin_pi_f32(Scalar, Value(x)).0,
 853                 |x| reference_sin_cos_pi_f32(x).0,
 854             );
 855         }
 856     }
 857
 858     #[test]
 859     #[cfg(feature = "full_tests")]
 860     fn test_cos_pi_f32() {
 861         for bits in 0..=u32::MAX {
 862             check_ulp(
 863                 f32::from_bits(bits),
 864                 sin_cos_pi_check_ulp_callback,
 865                 |x| cos_pi_f32(Scalar, Value(x)).0,
 866                 |x| reference_sin_cos_pi_f32(x).1,
 867             );
 868         }
 869     }
 870
 871     #[test]
 872     #[cfg(feature = "full_tests")]
 873     fn test_sin_pi_f64() {
 874         for bits in (0..=u32::MAX).step_by(1 << 7) {
 875             check_ulp(
 876                 f32::from_bits(bits) as f64,
 877                 sin_cos_pi_check_ulp_callback,
 878                 |x| sin_pi_f64(Scalar, Value(x)).0,
 879                 |x| reference_sin_cos_pi_f64(x).0,
 880             );
 881         }
 882     }
 883
 884     #[test]
 885     #[cfg(feature = "full_tests")]
 886     fn test_cos_pi_f64() {
 887         for bits in (0..=u32::MAX).step_by(1 << 7) {
 888             check_ulp(
 889                 f32::from_bits(bits) as f64,
 890                 sin_cos_pi_check_ulp_callback,
 891                 |x| cos_pi_f64(Scalar, Value(x)).0,
 892                 |x| reference_sin_cos_pi_f64(x).1,
 893             )
 894         }
 895     }
 896 }