3 prim::{PrimFloat, PrimSInt, PrimUInt},
4 traits::{Compare, Context, ConvertFrom, ConvertTo, Float, Make, Select},
8 #![allow(clippy::excessive_precision)]
10 /// coefficients of taylor series for `sin(pi * x)` centered at `0`
14 /// sinpi: bfloat(taylor(sin(%pi*x),x,0,19))$
15 /// for i: 1 step 2 thru 19 do
16 /// printf(true, "pub(crate) const SINPI_KERNEL_TAYLOR_~d: f64 = ~a;~%", i, ssubst("e", "b", string(coeff(sinpi, x, i))))$
18 pub(crate) const SINPI_KERNEL_TAYLOR_1: f64 =
19 3.1415926535897932384626433832795028841971693993751e0;
20 pub(crate) const SINPI_KERNEL_TAYLOR_3: f64 =
21 -5.1677127800499700292460525111835658670375480943142e0;
22 pub(crate) const SINPI_KERNEL_TAYLOR_5: f64 =
23 2.550164039877345443856177583695296720669172555234e0;
24 pub(crate) const SINPI_KERNEL_TAYLOR_7: f64 =
25 -5.9926452932079207688773938354604004601536358636814e-1;
26 pub(crate) const SINPI_KERNEL_TAYLOR_9: f64 =
27 8.2145886611128228798802365523698344807837460797753e-2;
28 pub(crate) const SINPI_KERNEL_TAYLOR_11: f64 =
29 -7.370430945714350777259089957290781501211638236021e-3;
30 pub(crate) const SINPI_KERNEL_TAYLOR_13: f64 =
31 4.6630280576761256442062891447027174382819981361599e-4;
32 pub(crate) const SINPI_KERNEL_TAYLOR_15: f64 =
33 -2.1915353447830215827384652057094188859248708765956e-5;
34 pub(crate) const SINPI_KERNEL_TAYLOR_17: f64 =
35 7.9520540014755127847832068624575890327682459384282e-7;
36 pub(crate) const SINPI_KERNEL_TAYLOR_19: f64 =
37 -2.2948428997269873110203872385571587856074785581088e-8;
39 /// coefficients of taylor series for `cos(pi * x)` centered at `0`
43 /// cospi: bfloat(taylor(cos(%pi*x),x,0,18))$
44 /// for i: 0 step 2 thru 18 do
45 /// printf(true, "pub(crate) const COSPI_KERNEL_TAYLOR_~d: f64 = ~a;~%", i, ssubst("e", "b", string(coeff(cospi, x, i))))$
47 pub(crate) const COSPI_KERNEL_TAYLOR_0: f64 = 1.0e0;
48 pub(crate) const COSPI_KERNEL_TAYLOR_2: f64 =
49 -4.9348022005446793094172454999380755676568497036204e0;
50 pub(crate) const COSPI_KERNEL_TAYLOR_4: f64 =
51 4.0587121264167682181850138620293796354053160696952e0;
52 pub(crate) const COSPI_KERNEL_TAYLOR_6: f64 =
53 -1.3352627688545894958753047828505831928711354556681e0;
54 pub(crate) const COSPI_KERNEL_TAYLOR_8: f64 =
55 2.3533063035889320454187935277546542154506893530856e-1;
56 pub(crate) const COSPI_KERNEL_TAYLOR_10: f64 =
57 -2.5806891390014060012598294252898849657186441048147e-2;
58 pub(crate) const COSPI_KERNEL_TAYLOR_12: f64 =
59 1.9295743094039230479033455636859576401684718150003e-3;
60 pub(crate) const COSPI_KERNEL_TAYLOR_14: f64 =
61 -1.0463810492484570711801672835223932761029733149091e-4;
62 pub(crate) const COSPI_KERNEL_TAYLOR_16: f64 =
63 4.3030695870329470072978237149669233008960901556009e-6;
64 pub(crate) const COSPI_KERNEL_TAYLOR_18: f64 =
65 -1.387895246221377211446808750399309343777037849978e-7;
68 /// computes `sin(pi * x)` for `-0.25 <= x <= 0.25`
69 /// not guaranteed to give correct sign for zero result
70 /// has an error of up to 2ULP
71 pub fn sin_pi_kernel_f16<Ctx: Context>(ctx: Ctx, x: Ctx::VecF16) -> Ctx::VecF16 {
73 let mut v: Ctx::VecF16 = ctx.make(consts::SINPI_KERNEL_TAYLOR_5.to());
74 v = v.mul_add_fast(x_sq, ctx.make(consts::SINPI_KERNEL_TAYLOR_3.to()));
75 v = v.mul_add_fast(x_sq, ctx.make(consts::SINPI_KERNEL_TAYLOR_1.to()));
79 /// computes `cos(pi * x)` for `-0.25 <= x <= 0.25`
80 /// has an error of up to 2ULP
81 pub fn cos_pi_kernel_f16<Ctx: Context>(ctx: Ctx, x: Ctx::VecF16) -> Ctx::VecF16 {
83 let mut v: Ctx::VecF16 = ctx.make(consts::COSPI_KERNEL_TAYLOR_4.to());
84 v = v.mul_add_fast(x_sq, ctx.make(consts::COSPI_KERNEL_TAYLOR_2.to()));
85 v.mul_add_fast(x_sq, ctx.make(consts::COSPI_KERNEL_TAYLOR_0.to()))
88 /// computes `sin(pi * x)` for `-0.25 <= x <= 0.25`
89 /// not guaranteed to give correct sign for zero result
90 /// has an error of up to 2ULP
91 pub fn sin_pi_kernel_f32<Ctx: Context>(ctx: Ctx, x: Ctx::VecF32) -> Ctx::VecF32 {
93 let mut v: Ctx::VecF32 = ctx.make(consts::SINPI_KERNEL_TAYLOR_9.to());
94 v = v.mul_add_fast(x_sq, ctx.make(consts::SINPI_KERNEL_TAYLOR_7.to()));
95 v = v.mul_add_fast(x_sq, ctx.make(consts::SINPI_KERNEL_TAYLOR_5.to()));
96 v = v.mul_add_fast(x_sq, ctx.make(consts::SINPI_KERNEL_TAYLOR_3.to()));
97 v = v.mul_add_fast(x_sq, ctx.make(consts::SINPI_KERNEL_TAYLOR_1.to()));
101 /// computes `cos(pi * x)` for `-0.25 <= x <= 0.25`
102 /// has an error of up to 2ULP
103 pub fn cos_pi_kernel_f32<Ctx: Context>(ctx: Ctx, x: Ctx::VecF32) -> Ctx::VecF32 {
105 let mut v: Ctx::VecF32 = ctx.make(consts::COSPI_KERNEL_TAYLOR_8.to());
106 v = v.mul_add_fast(x_sq, ctx.make(consts::COSPI_KERNEL_TAYLOR_6.to()));
107 v = v.mul_add_fast(x_sq, ctx.make(consts::COSPI_KERNEL_TAYLOR_4.to()));
108 v = v.mul_add_fast(x_sq, ctx.make(consts::COSPI_KERNEL_TAYLOR_2.to()));
109 v.mul_add_fast(x_sq, ctx.make(consts::COSPI_KERNEL_TAYLOR_0.to()))
112 /// computes `(sin(pi * x), cos(pi * x))`
113 /// not guaranteed to give correct sign for zero results
114 /// inherits error from `sin_pi_kernel` and `cos_pi_kernel`
115 pub fn sin_cos_pi_impl<
117 VecF: Float<PrimFloat = PrimF> + Make<Context = Ctx>,
118 PrimF: PrimFloat<BitsType = PrimU>,
120 SinPiKernel: FnOnce(Ctx, VecF) -> VecF,
121 CosPiKernel: FnOnce(Ctx, VecF) -> VecF,
125 sin_pi_kernel: SinPiKernel,
126 cos_pi_kernel: CosPiKernel,
128 let two_f: VecF = ctx.make(2.0.to());
129 let one_half: VecF = ctx.make(0.5.to());
130 let max_contiguous_integer: VecF =
131 ctx.make((PrimU::cvt_from(1) << (PrimF::MANTISSA_FIELD_WIDTH + 1.to())).to());
132 // if `x` is finite and bigger than `max_contiguous_integer`, then x is an even integer
133 let in_range = x.abs().lt(max_contiguous_integer); // use `lt` so nans are counted as out-of-range
134 let is_finite = x.is_finite();
135 let nan: VecF = ctx.make(f32::NAN.to());
136 let zero_f: VecF = ctx.make(0.to());
137 let one_f: VecF = ctx.make(1.to());
138 let zero_i: VecF::SignedBitsType = ctx.make(0.to());
139 let one_i: VecF::SignedBitsType = ctx.make(1.to());
140 let two_i: VecF::SignedBitsType = ctx.make(2.to());
141 let out_of_range_sin = is_finite.select(zero_f, nan);
142 let out_of_range_cos = is_finite.select(one_f, nan);
143 let xi = (x * two_f).round();
144 let xk = x - xi * one_half;
145 let sk = sin_pi_kernel(ctx, xk);
146 let ck = cos_pi_kernel(ctx, xk);
147 let xi = VecF::SignedBitsType::cvt_from(xi);
148 let bit_0_clear = (xi & one_i).eq(zero_i);
149 let st = bit_0_clear.select(sk, ck);
150 let ct = bit_0_clear.select(ck, sk);
151 let s = (xi & two_i).eq(zero_i).select(st, -st);
152 let c = ((xi + one_i) & two_i).eq(zero_i).select(ct, -ct);
154 in_range.select(s, out_of_range_sin),
155 in_range.select(c, out_of_range_cos),
159 /// computes `(sin(pi * x), cos(pi * x))`
160 /// not guaranteed to give correct sign for zero results
161 /// has an error of up to 2ULP
162 pub fn sin_cos_pi_f16<Ctx: Context>(ctx: Ctx, x: Ctx::VecF16) -> (Ctx::VecF16, Ctx::VecF16) {
163 sin_cos_pi_impl(ctx, x, sin_pi_kernel_f16, cos_pi_kernel_f16)
166 /// computes `sin(pi * x)`
167 /// not guaranteed to give correct sign for zero results
168 /// has an error of up to 2ULP
169 pub fn sin_pi_f16<Ctx: Context>(ctx: Ctx, x: Ctx::VecF16) -> Ctx::VecF16 {
170 sin_cos_pi_f16(ctx, x).0
173 /// computes `cos(pi * x)`
174 /// not guaranteed to give correct sign for zero results
175 /// has an error of up to 2ULP
176 pub fn cos_pi_f16<Ctx: Context>(ctx: Ctx, x: Ctx::VecF16) -> Ctx::VecF16 {
177 sin_cos_pi_f16(ctx, x).1
180 /// computes `(sin(pi * x), cos(pi * x))`
181 /// not guaranteed to give correct sign for zero results
182 /// has an error of up to 2ULP
183 pub fn sin_cos_pi_f32<Ctx: Context>(ctx: Ctx, x: Ctx::VecF32) -> (Ctx::VecF32, Ctx::VecF32) {
184 sin_cos_pi_impl(ctx, x, sin_pi_kernel_f32, cos_pi_kernel_f32)
187 /// computes `sin(pi * x)`
188 /// not guaranteed to give correct sign for zero results
189 /// has an error of up to 2ULP
190 pub fn sin_pi_f32<Ctx: Context>(ctx: Ctx, x: Ctx::VecF32) -> Ctx::VecF32 {
191 sin_cos_pi_f32(ctx, x).0
194 /// computes `cos(pi * x)`
195 /// not guaranteed to give correct sign for zero results
196 /// has an error of up to 2ULP
197 pub fn cos_pi_f32<Ctx: Context>(ctx: Ctx, x: Ctx::VecF32) -> Ctx::VecF32 {
198 sin_cos_pi_f32(ctx, x).1
206 scalar::{Scalar, Value},
210 struct CheckUlpCallbackArg<F, I> {
218 fn check_ulp<T: PrimFloat>(
220 is_ok: impl Fn(CheckUlpCallbackArg<T, u64>) -> bool,
221 fn_f16: impl Fn(T) -> T,
222 fn_reference: impl Fn(f64) -> f64,
224 let x_f64: f64 = x.to();
225 let expected_f64 = fn_reference(x_f64);
226 let expected: T = expected_f64.to();
227 let result = fn_f16(x);
228 if result == expected {
231 if result.is_nan() && expected.is_nan() {
234 let expected_bits: i64 = expected.to_bits().to();
235 let result_bits: i64 = result.to_bits().to();
236 let distance_in_ulp = (expected_bits - result_bits).unsigned_abs();
238 && !expected.is_nan()
239 && is_ok(CheckUlpCallbackArg {
250 x = {x:?} {x_bits:#X}, \
251 result = {result:?} {result_bits:#X}, \
252 expected = {expected:?} {expected_bits:#X}, \
253 distance_in_ulp = {distance_in_ulp}",
255 x_bits = x.to_bits(),
257 result_bits = result.to_bits(),
259 expected_bits = expected.to_bits(),
260 distance_in_ulp = distance_in_ulp,
266 not(feature = "f16"),
267 should_panic(expected = "f16 feature is not enabled")
269 fn test_sin_pi_kernel_f16() {
273 |arg| arg.distance_in_ulp <= if arg.expected == 0.to() { 0 } else { 2 },
274 |x| sin_pi_kernel_f16(Scalar, Value(x)).0,
275 |x| (f64::consts::PI * x).sin(),
278 let quarter = F16::to_bits(0.25f32.to());
279 for bits in (0..=quarter).rev() {
280 check(F16::from_bits(bits));
281 check(-F16::from_bits(bits));
287 not(feature = "f16"),
288 should_panic(expected = "f16 feature is not enabled")
290 fn test_cos_pi_kernel_f16() {
294 |arg| arg.distance_in_ulp <= 2 && arg.result <= 1.to(),
295 |x| cos_pi_kernel_f16(Scalar, Value(x)).0,
296 |x| (f64::consts::PI * x).cos(),
299 let quarter = F16::to_bits(0.25f32.to());
300 for bits in (0..=quarter).rev() {
301 check(F16::from_bits(bits));
302 check(-F16::from_bits(bits));
307 #[cfg(feature = "full_tests")]
308 fn test_sin_pi_kernel_f32() {
312 |arg| arg.distance_in_ulp <= if arg.expected == 0.to() { 0 } else { 2 },
313 |x| sin_pi_kernel_f32(Scalar, Value(x)).0,
314 |x| (f64::consts::PI * x).sin(),
317 let quarter = 0.25f32.to_bits();
318 for bits in (0..=quarter).rev() {
319 check(f32::from_bits(bits));
320 check(-f32::from_bits(bits));
325 #[cfg(feature = "full_tests")]
326 fn test_cos_pi_kernel_f32() {
330 |arg| arg.distance_in_ulp <= 2 && arg.result <= 1.to(),
331 |x| cos_pi_kernel_f32(Scalar, Value(x)).0,
332 |x| (f64::consts::PI * x).cos(),
335 let quarter = 0.25f32.to_bits();
336 for bits in (0..=quarter).rev() {
337 check(f32::from_bits(bits));
338 check(-f32::from_bits(bits));
342 fn sin_cos_pi_check_ulp_callback<F: PrimFloat>(arg: CheckUlpCallbackArg<F, u64>) -> bool {
343 if arg.x % 0.5.to() == 0.0.to() {
344 arg.distance_in_ulp == 0
346 arg.distance_in_ulp <= 2 && arg.result.abs() <= 1.to()
352 not(feature = "f16"),
353 should_panic(expected = "f16 feature is not enabled")
355 fn test_sin_pi_f16() {
356 for bits in 0..=u16::MAX {
358 F16::from_bits(bits),
359 sin_cos_pi_check_ulp_callback,
360 |x| sin_pi_f16(Scalar, Value(x)).0,
361 |x| (f64::consts::PI * x).sin(),
368 not(feature = "f16"),
369 should_panic(expected = "f16 feature is not enabled")
371 fn test_cos_pi_f16() {
372 for bits in 0..=u16::MAX {
374 F16::from_bits(bits),
375 sin_cos_pi_check_ulp_callback,
376 |x| cos_pi_f16(Scalar, Value(x)).0,
377 |x| (f64::consts::PI * x).cos(),
382 fn reference_sin_cos_pi_f32(mut v: f64) -> (f64, f64) {
384 return (f64::NAN, f64::NAN);
389 } else if v <= -1.0 {
393 let part = v.round() as i32;
395 v *= f64::consts::PI / 2.0;
396 let (sin, cos) = v.sin_cos();
403 _ => panic!("not implemented: part={}", part),
408 fn test_reference_sin_cos_pi_f32() {
409 fn approx_same(a: f32, b: f32) -> bool {
410 if a.is_finite() && b.is_finite() {
413 a == b || (a.is_nan() && b.is_nan())
417 fn case(x: f32, expected_sin: f32, expected_cos: f32) {
418 let (ref_sin, ref_cos) = reference_sin_cos_pi_f32(x as f64);
420 approx_same(ref_sin as f32, expected_sin)
421 && approx_same(ref_cos as f32, expected_cos),
422 "case failed: x={x}, expected_sin={expected_sin}, expected_cos={expected_cos}, ref_sin={ref_sin}, ref_cos={ref_cos}",
424 expected_sin=expected_sin,
425 expected_cos=expected_cos,
430 case(f32::NAN, f32::NAN, f32::NAN);
431 case(f32::INFINITY, f32::NAN, f32::NAN);
432 case(-f32::INFINITY, f32::NAN, f32::NAN);
433 case(-4.0, 0.0, 1.0);
434 case(-3.875, 0.3826834323650906, 0.9238795325112864);
435 case(-3.75, 0.7071067811865475, 0.7071067811865475);
436 case(-3.625, 0.9238795325112867, 0.3826834323650898);
437 case(-3.5, 1.0, 0.0);
438 case(-3.375, 0.9238795325112864, -0.3826834323650905);
439 case(-3.25, 0.7071067811865475, -0.7071067811865475);
440 case(-3.125, 0.3826834323650898, -0.9238795325112867);
441 case(-3.0, 0.0, -1.0);
442 case(-2.875, -0.3826834323650905, -0.9238795325112864);
443 case(-2.75, -0.7071067811865475, -0.7071067811865475);
444 case(-2.625, -0.9238795325112867, -0.3826834323650899);
445 case(-2.5, -1.0, 0.0);
446 case(-2.375, -0.9238795325112865, 0.3826834323650904);
447 case(-2.25, -0.7071067811865475, 0.7071067811865475);
448 case(-2.125, -0.3826834323650899, 0.9238795325112867);
449 case(-2.0, 0.0, 1.0);
450 case(-1.875, 0.3826834323650904, 0.9238795325112865);
451 case(-1.75, 0.7071067811865475, 0.7071067811865475);
452 case(-1.625, 0.9238795325112866, 0.38268343236509);
453 case(-1.5, 1.0, 0.0);
454 case(-1.375, 0.9238795325112865, -0.3826834323650903);
455 case(-1.25, 0.7071067811865475, -0.7071067811865475);
456 case(-1.125, 0.3826834323650896, -0.9238795325112869);
457 case(-1.0, 0.0, -1.0);
458 case(-0.875, -0.3826834323650899, -0.9238795325112867);
459 case(-0.75, -0.7071067811865475, -0.7071067811865475);
460 case(-0.625, -0.9238795325112867, -0.3826834323650897);
461 case(-0.5, -1.0, 0.0);
462 case(-0.375, -0.9238795325112867, 0.3826834323650898);
463 case(-0.25, -0.7071067811865475, 0.7071067811865475);
464 case(-0.125, -0.3826834323650898, 0.9238795325112867);
466 case(0.125, 0.3826834323650898, 0.9238795325112867);
467 case(0.25, 0.7071067811865475, 0.7071067811865475);
468 case(0.375, 0.9238795325112867, 0.3826834323650898);
470 case(0.625, 0.9238795325112867, -0.3826834323650897);
471 case(0.75, 0.7071067811865475, -0.7071067811865475);
472 case(0.875, 0.3826834323650899, -0.9238795325112867);
473 case(1.0, 0.0, -1.0);
474 case(1.125, -0.3826834323650896, -0.9238795325112869);
475 case(1.25, -0.7071067811865475, -0.7071067811865475);
476 case(1.375, -0.9238795325112865, -0.3826834323650903);
477 case(1.5, -1.0, 0.0);
478 case(1.625, -0.9238795325112866, 0.38268343236509);
479 case(1.75, -0.7071067811865475, 0.7071067811865475);
480 case(1.875, -0.3826834323650904, 0.9238795325112865);
482 case(2.125, 0.3826834323650899, 0.9238795325112867);
483 case(2.25, 0.7071067811865475, 0.7071067811865475);
484 case(2.375, 0.9238795325112865, 0.3826834323650904);
486 case(2.625, 0.9238795325112867, -0.3826834323650899);
487 case(2.75, 0.7071067811865475, -0.7071067811865475);
488 case(2.875, 0.3826834323650905, -0.9238795325112864);
489 case(3.0, 0.0, -1.0);
490 case(3.125, -0.3826834323650898, -0.9238795325112867);
491 case(3.25, -0.7071067811865475, -0.7071067811865475);
492 case(3.375, -0.9238795325112864, -0.3826834323650905);
493 case(3.5, -1.0, 0.0);
494 case(3.625, -0.9238795325112867, 0.3826834323650898);
495 case(3.75, -0.7071067811865475, 0.7071067811865475);
496 case(3.875, -0.3826834323650906, 0.9238795325112864);
501 #[cfg(feature = "full_tests")]
502 fn test_sin_pi_f32() {
503 for bits in 0..=u32::MAX {
505 f32::from_bits(bits),
506 sin_cos_pi_check_ulp_callback,
507 |x| sin_pi_f32(Scalar, Value(x)).0,
508 |x| reference_sin_cos_pi_f32(x).0,
514 #[cfg(feature = "full_tests")]
515 fn test_cos_pi_f32() {
516 for bits in 0..=u32::MAX {
518 f32::from_bits(bits),
519 sin_cos_pi_check_ulp_callback,
520 |x| cos_pi_f32(Scalar, Value(x)).0,
521 |x| reference_sin_cos_pi_f32(x).1,