From d77893b257c86d7ddc81f3772b5e143ce768d291 Mon Sep 17 00:00:00 2001 From: Jacob Lifshay Date: Wed, 5 May 2021 02:56:23 -0700 Subject: [PATCH] stdsimd bindings work! --- Cargo.toml | 2 +- src/f16.rs | 14 +- src/stdsimd.rs | 1396 ++++++++++++++++++++++++++++++++++++++++++++++-- 3 files changed, 1372 insertions(+), 40 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 858dbd9..74c1f2b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -12,7 +12,7 @@ core_simd = { version = "0.1.0", git = "https://github.com/rust-lang/stdsimd", o vector-math-proc-macro = { version = "=0.1.0", path = "vector-math-proc-macro" } [features] -default = ["f16", "fma"] +default = ["fma"] f16 = ["half"] fma = ["std"] std = [] diff --git a/src/f16.rs b/src/f16.rs index ee13d99..5bc1119 100644 --- a/src/f16.rs +++ b/src/f16.rs @@ -14,6 +14,12 @@ type F16Impl = u16; #[repr(transparent)] pub struct F16(F16Impl); +#[cfg(not(feature = "f16"))] +#[track_caller] +pub(crate) fn panic_f16_feature_disabled() -> ! { + panic!("f16 feature is not enabled") +} + #[cfg(feature = "f16")] macro_rules! f16_impl { ($v:expr, [$($vars:ident),*]) => { @@ -26,11 +32,17 @@ macro_rules! f16_impl { ($v:expr, [$($vars:ident),*]) => { { $(let _ = $vars;)* - panic!("f16 feature is not enabled") + panic_f16_feature_disabled() } }; } +impl Default for F16 { + fn default() -> Self { + f16_impl!(F16(F16Impl::default()), []) + } +} + impl From for F16 { fn from(v: F16Impl) -> Self { F16(v) diff --git a/src/stdsimd.rs b/src/stdsimd.rs index 4e76d8f..046a337 100644 --- a/src/stdsimd.rs +++ b/src/stdsimd.rs @@ -1,45 +1,1365 @@ -use crate::{f16::F16, traits::Context}; -use core::marker::PhantomData; -use core_simd::SimdF32; +#[cfg(not(feature = "f16"))] +use crate::f16::panic_f16_feature_disabled; +use crate::{ + f16::F16, + traits::{Bool, Compare, Context, ConvertTo, Float, Int, Make, SInt, Select, UInt}, +}; +use core::{ + marker::PhantomData, + ops::{ + Add, AddAssign, BitAnd, BitAndAssign, BitOr, BitOrAssign, BitXor, BitXorAssign, Div, + DivAssign, Mul, MulAssign, Neg, Not, Rem, RemAssign, Shl, ShlAssign, Shr, ShrAssign, Sub, + SubAssign, + }, +}; +use core_simd::{ + LanesAtMost32, Mask, Mask16, Mask32, Mask64, Mask8, SimdF32, SimdF64, SimdI16, SimdI32, + SimdI64, SimdI8, SimdU16, SimdU32, SimdU64, SimdU8, +}; + +#[cfg(not(feature = "f16"))] +mod f16 { + use super::*; + + #[derive(Copy, Clone, Debug)] + #[repr(transparent)] + pub struct SimdF16(pub(crate) SimdU16) + where + SimdU16: LanesAtMost32; + + impl SimdF16 + where + SimdU16: LanesAtMost32, + SimdI16: LanesAtMost32, + Mask16: Mask, + { + pub(crate) fn splat(_v: F16) -> Self { + panic_f16_feature_disabled() + } + pub(crate) fn abs(self) -> Self { + panic_f16_feature_disabled() + } + pub(crate) fn trunc(self) -> Self { + panic_f16_feature_disabled() + } + pub(crate) fn floor(self) -> Self { + panic_f16_feature_disabled() + } + pub(crate) fn ceil(self) -> Self { + panic_f16_feature_disabled() + } + pub(crate) fn round(self) -> Self { + panic_f16_feature_disabled() + } + pub(crate) fn from_bits(_v: SimdU16) -> Self { + panic_f16_feature_disabled() + } + pub(crate) fn to_bits(self) -> SimdU16 { + panic_f16_feature_disabled() + } + pub(crate) fn is_finite(self) -> Mask16 { + panic_f16_feature_disabled() + } + pub(crate) fn lanes_eq(self, _rhs: Self) -> Mask16 { + panic_f16_feature_disabled() + } + pub(crate) fn lanes_ne(self, _rhs: Self) -> Mask16 { + panic_f16_feature_disabled() + } + pub(crate) fn lanes_gt(self, _rhs: Self) -> Mask16 { + panic_f16_feature_disabled() + } + pub(crate) fn lanes_ge(self, _rhs: Self) -> Mask16 { + panic_f16_feature_disabled() + } + pub(crate) fn lanes_lt(self, _rhs: Self) -> Mask16 { + panic_f16_feature_disabled() + } + pub(crate) fn lanes_le(self, _rhs: Self) -> Mask16 { + panic_f16_feature_disabled() + } + } + + impl From<[F16; LANES]> for SimdF16 + where + SimdU16: LanesAtMost32, + { + fn from(_v: [F16; LANES]) -> Self { + panic_f16_feature_disabled() + } + } + + impl From> for [F16; LANES] + where + SimdU16: LanesAtMost32, + { + fn from(_v: SimdF16) -> Self { + panic_f16_feature_disabled() + } + } + + macro_rules! impl_f16_bin_op { + ($trait:ident, $fn:ident) => { + impl $trait for SimdF16 + where + SimdU16: LanesAtMost32, + { + type Output = Self; + + fn $fn(self, _rhs: Self) -> Self::Output { + panic_f16_feature_disabled() + } + } + }; + } + + impl_f16_bin_op!(Add, add); + impl_f16_bin_op!(Sub, sub); + impl_f16_bin_op!(Mul, mul); + impl_f16_bin_op!(Div, div); + impl_f16_bin_op!(Rem, rem); + + macro_rules! impl_f16_bin_assign_op { + ($trait:ident, $fn:ident) => { + impl $trait for SimdF16 + where + SimdU16: LanesAtMost32, + { + fn $fn(&mut self, _rhs: Self) { + panic_f16_feature_disabled() + } + } + }; + } + + impl_f16_bin_assign_op!(AddAssign, add_assign); + impl_f16_bin_assign_op!(SubAssign, sub_assign); + impl_f16_bin_assign_op!(MulAssign, mul_assign); + impl_f16_bin_assign_op!(DivAssign, div_assign); + impl_f16_bin_assign_op!(RemAssign, rem_assign); + + impl Neg for SimdF16 + where + SimdU16: LanesAtMost32, + { + type Output = Self; + + fn neg(self) -> Self::Output { + panic_f16_feature_disabled() + } + } +} + +#[cfg(not(feature = "f16"))] +use f16::SimdF16; + +#[cfg(feature = "f16")] +compile_error!("core_simd doesn't yet support f16"); #[derive(Clone, Copy, PartialEq, Eq, Hash, Debug, Default)] pub struct StdSimd(PhantomData<[(); LANES]>); #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Debug, Default)] -pub struct Scalar(pub T, PhantomData<[(); LANES]>); +#[repr(transparent)] +pub struct Wrapper(pub T, PhantomData<[(); LANES]>); -impl From for Scalar { +impl From for Wrapper { fn from(v: T) -> Self { - Scalar(v, PhantomData) - } -} - -/* -TODO(programmerjake): finish after splitting Context::VecBool -impl Context for StdSimd { - type Bool = Scalar; - type U8 = Scalar; - type I8 = Scalar; - type U16 = Scalar; - type I16 = Scalar; - type F16 = Scalar; - type U32 = Scalar; - type I32 = Scalar; - type F32 = Scalar; - type U64 = Scalar; - type I64 = Scalar; - type F64 = Scalar; - type VecBool; - type VecU8; - type VecI8; - type VecU16; - type VecI16; - type VecF16; - type VecU32; - type VecI32; - type VecF32; - type VecU64; - type VecI64; - type VecF64; -} -*/ + Wrapper(v, PhantomData) + } +} + +macro_rules! impl_bin_op_for_wrapper { + ($trait:ident, $fn:ident) => { + impl, const LANES: usize> $trait for Wrapper { + type Output = Self; + + fn $fn(self, rhs: Self) -> Self::Output { + self.0.$fn(rhs.0).into() + } + } + }; +} + +impl_bin_op_for_wrapper!(Add, add); +impl_bin_op_for_wrapper!(Sub, sub); +impl_bin_op_for_wrapper!(Mul, mul); +impl_bin_op_for_wrapper!(Div, div); +impl_bin_op_for_wrapper!(Rem, rem); +impl_bin_op_for_wrapper!(Shl, shl); +impl_bin_op_for_wrapper!(Shr, shr); +impl_bin_op_for_wrapper!(BitAnd, bitand); +impl_bin_op_for_wrapper!(BitOr, bitor); +impl_bin_op_for_wrapper!(BitXor, bitxor); + +macro_rules! impl_bin_op_assign_for_wrapper { + ($trait:ident, $fn:ident) => { + impl $trait for Wrapper { + fn $fn(&mut self, rhs: Self) { + self.0.$fn(rhs.0); + } + } + }; +} + +impl_bin_op_assign_for_wrapper!(AddAssign, add_assign); +impl_bin_op_assign_for_wrapper!(SubAssign, sub_assign); +impl_bin_op_assign_for_wrapper!(MulAssign, mul_assign); +impl_bin_op_assign_for_wrapper!(DivAssign, div_assign); +impl_bin_op_assign_for_wrapper!(RemAssign, rem_assign); +impl_bin_op_assign_for_wrapper!(ShlAssign, shl_assign); +impl_bin_op_assign_for_wrapper!(ShrAssign, shr_assign); +impl_bin_op_assign_for_wrapper!(BitAndAssign, bitand_assign); +impl_bin_op_assign_for_wrapper!(BitOrAssign, bitor_assign); +impl_bin_op_assign_for_wrapper!(BitXorAssign, bitxor_assign); + +macro_rules! impl_un_op_for_wrapper { + ($trait:ident, $fn:ident) => { + impl, const LANES: usize> $trait for Wrapper { + type Output = Self; + + fn $fn(self) -> Self::Output { + self.0.$fn().into() + } + } + }; +} + +impl_un_op_for_wrapper!(Not, not); +impl_un_op_for_wrapper!(Neg, neg); + +macro_rules! impl_make_for_scalar { + ($ty:ident) => { + impl Make for Wrapper<$ty, LANES> + where + SimdI8: LanesAtMost32, + SimdU8: LanesAtMost32, + Mask8: Mask, + SimdI16: LanesAtMost32, + SimdU16: LanesAtMost32, + Mask16: Mask, + SimdI32: LanesAtMost32, + SimdU32: LanesAtMost32, + SimdF32: LanesAtMost32, + Mask32: Mask, + SimdI64: LanesAtMost32, + SimdU64: LanesAtMost32, + SimdF64: LanesAtMost32, + Mask64: Mask, + { + type Prim = $ty; + + type Context = StdSimd; + + fn ctx(self) -> Self::Context { + StdSimd::default() + } + + fn make(_ctx: Self::Context, v: Self::Prim) -> Self { + v.into() + } + } + }; +} + +impl_make_for_scalar!(bool); +impl_make_for_scalar!(i8); +impl_make_for_scalar!(u8); +impl_make_for_scalar!(i16); +impl_make_for_scalar!(u16); +impl_make_for_scalar!(F16); +impl_make_for_scalar!(i32); +impl_make_for_scalar!(u32); +impl_make_for_scalar!(f32); +impl_make_for_scalar!(i64); +impl_make_for_scalar!(u64); +impl_make_for_scalar!(f64); + +impl Select for Wrapper +where + SimdI8: LanesAtMost32, + SimdU8: LanesAtMost32, + Mask8: Mask, + SimdI16: LanesAtMost32, + SimdU16: LanesAtMost32, + Mask16: Mask, + SimdI32: LanesAtMost32, + SimdU32: LanesAtMost32, + SimdF32: LanesAtMost32, + Mask32: Mask, + SimdI64: LanesAtMost32, + SimdU64: LanesAtMost32, + SimdF64: LanesAtMost32, + Mask64: Mask, +{ + fn select(self, true_v: V, false_v: V) -> V { + self.0.select(true_v, false_v) + } +} + +macro_rules! impl_scalar_compare { + ($ty:ident) => { + impl Compare for Wrapper<$ty, LANES> + where + SimdI8: LanesAtMost32, + SimdU8: LanesAtMost32, + Mask8: Mask, + SimdI16: LanesAtMost32, + SimdU16: LanesAtMost32, + Mask16: Mask, + SimdI32: LanesAtMost32, + SimdU32: LanesAtMost32, + SimdF32: LanesAtMost32, + Mask32: Mask, + SimdI64: LanesAtMost32, + SimdU64: LanesAtMost32, + SimdF64: LanesAtMost32, + Mask64: Mask, + { + type Bool = Wrapper; + + fn eq(self, rhs: Self) -> Self::Bool { + self.0.eq(rhs.0).into() + } + + fn ne(self, rhs: Self) -> Self::Bool { + self.0.ne(rhs.0).into() + } + + fn lt(self, rhs: Self) -> Self::Bool { + self.0.lt(rhs.0).into() + } + + fn gt(self, rhs: Self) -> Self::Bool { + self.0.gt(rhs.0).into() + } + + fn le(self, rhs: Self) -> Self::Bool { + self.0.le(rhs.0).into() + } + + fn ge(self, rhs: Self) -> Self::Bool { + self.0.ge(rhs.0).into() + } + } + }; +} + +impl_scalar_compare!(bool); +impl_scalar_compare!(i8); +impl_scalar_compare!(u8); +impl_scalar_compare!(i16); +impl_scalar_compare!(u16); +impl_scalar_compare!(F16); +impl_scalar_compare!(i32); +impl_scalar_compare!(u32); +impl_scalar_compare!(f32); +impl_scalar_compare!(i64); +impl_scalar_compare!(u64); +impl_scalar_compare!(f64); + +macro_rules! impl_vector_compare { + ($ty:ident, $mask:ident) => { + impl Compare for Wrapper<$ty, LANES> + where + SimdI8: LanesAtMost32, + SimdU8: LanesAtMost32, + Mask8: Mask, + SimdI16: LanesAtMost32, + SimdU16: LanesAtMost32, + Mask16: Mask, + SimdI32: LanesAtMost32, + SimdU32: LanesAtMost32, + SimdF32: LanesAtMost32, + Mask32: Mask, + SimdI64: LanesAtMost32, + SimdU64: LanesAtMost32, + SimdF64: LanesAtMost32, + Mask64: Mask, + { + type Bool = Wrapper<$mask, LANES>; + + fn eq(self, rhs: Self) -> Self::Bool { + self.0.lanes_eq(rhs.0).into() + } + + fn ne(self, rhs: Self) -> Self::Bool { + self.0.lanes_ne(rhs.0).into() + } + + fn lt(self, rhs: Self) -> Self::Bool { + self.0.lanes_lt(rhs.0).into() + } + + fn gt(self, rhs: Self) -> Self::Bool { + self.0.lanes_gt(rhs.0).into() + } + + fn le(self, rhs: Self) -> Self::Bool { + self.0.lanes_le(rhs.0).into() + } + + fn ge(self, rhs: Self) -> Self::Bool { + self.0.lanes_ge(rhs.0).into() + } + } + }; +} + +impl_vector_compare!(SimdI8, Mask8); +impl_vector_compare!(SimdU8, Mask8); +impl_vector_compare!(SimdI16, Mask16); +impl_vector_compare!(SimdU16, Mask16); +impl_vector_compare!(SimdF16, Mask16); +impl_vector_compare!(SimdI32, Mask32); +impl_vector_compare!(SimdU32, Mask32); +impl_vector_compare!(SimdF32, Mask32); +impl_vector_compare!(SimdI64, Mask64); +impl_vector_compare!(SimdU64, Mask64); +impl_vector_compare!(SimdF64, Mask64); + +macro_rules! impl_vector_mask_compare { + ($ty:ident) => { + impl Compare for Wrapper<$ty, LANES> + where + SimdI8: LanesAtMost32, + SimdU8: LanesAtMost32, + Mask8: Mask, + SimdI16: LanesAtMost32, + SimdU16: LanesAtMost32, + Mask16: Mask, + SimdI32: LanesAtMost32, + SimdU32: LanesAtMost32, + SimdF32: LanesAtMost32, + Mask32: Mask, + SimdI64: LanesAtMost32, + SimdU64: LanesAtMost32, + SimdF64: LanesAtMost32, + Mask64: Mask, + { + type Bool = Self; + + fn eq(self, rhs: Self) -> Self::Bool { + !(self ^ rhs) + } + fn ne(self, rhs: Self) -> Self::Bool { + self ^ rhs + } + fn lt(self, rhs: Self) -> Self::Bool { + !self & rhs + } + fn gt(self, rhs: Self) -> Self::Bool { + self & !rhs + } + fn le(self, rhs: Self) -> Self::Bool { + !self | rhs + } + fn ge(self, rhs: Self) -> Self::Bool { + self | !rhs + } + } + }; +} + +impl_vector_mask_compare!(Mask8); +impl_vector_mask_compare!(Mask16); +impl_vector_mask_compare!(Mask32); +impl_vector_mask_compare!(Mask64); + +macro_rules! impl_int_scalar { + ($ty:ident) => { + impl Int for Wrapper<$ty, LANES> + where + SimdI8: LanesAtMost32, + SimdU8: LanesAtMost32, + Mask8: Mask, + SimdI16: LanesAtMost32, + SimdU16: LanesAtMost32, + Mask16: Mask, + SimdI32: LanesAtMost32, + SimdU32: LanesAtMost32, + SimdF32: LanesAtMost32, + Mask32: Mask, + SimdI64: LanesAtMost32, + SimdU64: LanesAtMost32, + SimdF64: LanesAtMost32, + Mask64: Mask, + { + fn leading_zeros(self) -> Self { + (self.0.leading_zeros() as $ty).into() + } + + fn trailing_zeros(self) -> Self { + (self.0.trailing_zeros() as $ty).into() + } + + fn count_ones(self) -> Self { + (self.0.count_ones() as $ty).into() + } + + fn leading_ones(self) -> Self { + (self.0.leading_ones() as $ty).into() + } + + fn trailing_ones(self) -> Self { + (self.0.trailing_ones() as $ty).into() + } + + fn count_zeros(self) -> Self { + (self.0.count_zeros() as $ty).into() + } + } + }; +} + +macro_rules! impl_int_vector { + ($ty:ident) => { + impl Int for Wrapper<$ty, LANES> + where + SimdI8: LanesAtMost32, + SimdU8: LanesAtMost32, + Mask8: Mask, + SimdI16: LanesAtMost32, + SimdU16: LanesAtMost32, + Mask16: Mask, + SimdI32: LanesAtMost32, + SimdU32: LanesAtMost32, + SimdF32: LanesAtMost32, + Mask32: Mask, + SimdI64: LanesAtMost32, + SimdU64: LanesAtMost32, + SimdF64: LanesAtMost32, + Mask64: Mask, + { + fn leading_zeros(self) -> Self { + todo!() + } + + fn trailing_zeros(self) -> Self { + todo!() + } + + fn count_ones(self) -> Self { + todo!() + } + + fn leading_ones(self) -> Self { + todo!() + } + + fn trailing_ones(self) -> Self { + todo!() + } + + fn count_zeros(self) -> Self { + todo!() + } + } + }; +} + +macro_rules! impl_uint_vector { + ($ty:ident) => { + impl_int_vector!($ty); + impl UInt for Wrapper<$ty, LANES> + where + SimdI8: LanesAtMost32, + SimdU8: LanesAtMost32, + Mask8: Mask, + SimdI16: LanesAtMost32, + SimdU16: LanesAtMost32, + Mask16: Mask, + SimdI32: LanesAtMost32, + SimdU32: LanesAtMost32, + SimdF32: LanesAtMost32, + Mask32: Mask, + SimdI64: LanesAtMost32, + SimdU64: LanesAtMost32, + SimdF64: LanesAtMost32, + Mask64: Mask, + { + } + }; +} + +impl_uint_vector!(SimdU8); +impl_uint_vector!(SimdU16); +impl_uint_vector!(SimdU32); +impl_uint_vector!(SimdU64); + +macro_rules! impl_uint_scalar { + ($ty:ident) => { + impl_int_scalar!($ty); + impl UInt for Wrapper<$ty, LANES> + where + SimdI8: LanesAtMost32, + SimdU8: LanesAtMost32, + Mask8: Mask, + SimdI16: LanesAtMost32, + SimdU16: LanesAtMost32, + Mask16: Mask, + SimdI32: LanesAtMost32, + SimdU32: LanesAtMost32, + SimdF32: LanesAtMost32, + Mask32: Mask, + SimdI64: LanesAtMost32, + SimdU64: LanesAtMost32, + SimdF64: LanesAtMost32, + Mask64: Mask, + { + } + }; +} + +impl_uint_scalar!(u8); +impl_uint_scalar!(u16); +impl_uint_scalar!(u32); +impl_uint_scalar!(u64); + +macro_rules! impl_sint_vector { + ($ty:ident) => { + impl_int_vector!($ty); + impl SInt for Wrapper<$ty, LANES> + where + SimdI8: LanesAtMost32, + SimdU8: LanesAtMost32, + Mask8: Mask, + SimdI16: LanesAtMost32, + SimdU16: LanesAtMost32, + Mask16: Mask, + SimdI32: LanesAtMost32, + SimdU32: LanesAtMost32, + SimdF32: LanesAtMost32, + Mask32: Mask, + SimdI64: LanesAtMost32, + SimdU64: LanesAtMost32, + SimdF64: LanesAtMost32, + Mask64: Mask, + { + } + }; +} + +impl_sint_vector!(SimdI8); +impl_sint_vector!(SimdI16); +impl_sint_vector!(SimdI32); +impl_sint_vector!(SimdI64); + +macro_rules! impl_sint_scalar { + ($ty:ident) => { + impl_int_scalar!($ty); + impl SInt for Wrapper<$ty, LANES> + where + SimdI8: LanesAtMost32, + SimdU8: LanesAtMost32, + Mask8: Mask, + SimdI16: LanesAtMost32, + SimdU16: LanesAtMost32, + Mask16: Mask, + SimdI32: LanesAtMost32, + SimdU32: LanesAtMost32, + SimdF32: LanesAtMost32, + Mask32: Mask, + SimdI64: LanesAtMost32, + SimdU64: LanesAtMost32, + SimdF64: LanesAtMost32, + Mask64: Mask, + { + } + }; +} + +impl_sint_scalar!(i8); +impl_sint_scalar!(i16); +impl_sint_scalar!(i32); +impl_sint_scalar!(i64); + +macro_rules! impl_float { + ($ty:ident, $prim:ident, $uint:ident, $sint:ident) => { + impl Float for Wrapper<$prim, LANES> + where + SimdI8: LanesAtMost32, + SimdU8: LanesAtMost32, + Mask8: Mask, + SimdI16: LanesAtMost32, + SimdU16: LanesAtMost32, + Mask16: Mask, + SimdI32: LanesAtMost32, + SimdU32: LanesAtMost32, + SimdF32: LanesAtMost32, + Mask32: Mask, + SimdI64: LanesAtMost32, + SimdU64: LanesAtMost32, + SimdF64: LanesAtMost32, + Mask64: Mask, + { + type FloatEncoding = $prim; + + type BitsType = Wrapper<<$prim as Float>::BitsType, LANES>; + + type SignedBitsType = Wrapper<<$prim as Float>::SignedBitsType, LANES>; + + fn abs(self) -> Self { + self.0.abs().into() + } + + fn trunc(self) -> Self { + self.0.trunc().into() + } + + fn ceil(self) -> Self { + self.0.ceil().into() + } + + fn floor(self) -> Self { + self.0.floor().into() + } + + fn round(self) -> Self { + self.0.round().into() + } + + fn fma(self, a: Self, b: Self) -> Self { + self.0.fma(a.0, b.0).into() + } + + fn is_finite(self) -> Self::Bool { + self.0.is_finite().into() + } + + fn from_bits(v: Self::BitsType) -> Self { + $prim::from_bits(v.0).into() + } + + fn to_bits(self) -> Self::BitsType { + self.0.to_bits().into() + } + } + + impl Float for Wrapper<$ty, LANES> + where + SimdI8: LanesAtMost32, + SimdU8: LanesAtMost32, + Mask8: Mask, + SimdI16: LanesAtMost32, + SimdU16: LanesAtMost32, + Mask16: Mask, + SimdI32: LanesAtMost32, + SimdU32: LanesAtMost32, + SimdF32: LanesAtMost32, + Mask32: Mask, + SimdI64: LanesAtMost32, + SimdU64: LanesAtMost32, + SimdF64: LanesAtMost32, + Mask64: Mask, + { + type FloatEncoding = $prim; + + type BitsType = Wrapper<$uint, LANES>; + + type SignedBitsType = Wrapper<$sint, LANES>; + + fn abs(self) -> Self { + self.0.abs().into() + } + + fn trunc(self) -> Self { + self.0.trunc().into() + } + + fn ceil(self) -> Self { + self.0.ceil().into() + } + + fn floor(self) -> Self { + self.0.floor().into() + } + + fn round(self) -> Self { + self.0.round().into() + } + + fn fma(self, _a: Self, _b: Self) -> Self { + // FIXME(programmerjake): implement once core_simd gains support: + // https://github.com/rust-lang/stdsimd/issues/102 + todo!() + } + + fn is_finite(self) -> Self::Bool { + self.0.is_finite().into() + } + + fn from_bits(v: Self::BitsType) -> Self { + $ty::::from_bits(v.0).into() + } + + fn to_bits(self) -> Self::BitsType { + self.0.to_bits().into() + } + } + }; +} + +impl_float!(SimdF16, F16, SimdU16, SimdI16); +impl_float!(SimdF32, f32, SimdU32, SimdI32); +impl_float!(SimdF64, f64, SimdU64, SimdI64); + +macro_rules! impl_scalar_convert_to_helper { + ($src:ty => $dest:ty) => { + impl ConvertTo> for Wrapper<$src, LANES> + where + SimdI8: LanesAtMost32, + SimdU8: LanesAtMost32, + Mask8: Mask, + SimdI16: LanesAtMost32, + SimdU16: LanesAtMost32, + Mask16: Mask, + SimdI32: LanesAtMost32, + SimdU32: LanesAtMost32, + SimdF32: LanesAtMost32, + Mask32: Mask, + SimdI64: LanesAtMost32, + SimdU64: LanesAtMost32, + SimdF64: LanesAtMost32, + Mask64: Mask, + { + fn to(self) -> Wrapper<$dest, LANES> { + let v: $dest = self.0.to(); + v.into() + } + } + }; +} + +macro_rules! impl_scalar_convert_to { + ($first:ty $(, $ty:ty)*) => { + $( + impl_scalar_convert_to_helper!($first => $ty); + impl_scalar_convert_to_helper!($ty => $first); + )* + impl_scalar_convert_to![$($ty),*]; + }; + () => {}; +} + +impl_scalar_convert_to![u8, i8, u16, i16, F16, u32, i32, u64, i64, f32, f64]; + +macro_rules! impl_vector_convert_to_helper { + (($(#[From = $From:ident])? $src:ident, $src_prim:ident) => ($(#[From = $From2:ident])? $dest:ident, $dest_prim:ident)) => { + impl ConvertTo, LANES>> + for Wrapper<$src, LANES> + where + SimdI8: LanesAtMost32, + SimdU8: LanesAtMost32, + Mask8: Mask, + SimdI16: LanesAtMost32, + SimdU16: LanesAtMost32, + Mask16: Mask, + SimdI32: LanesAtMost32, + SimdU32: LanesAtMost32, + SimdF32: LanesAtMost32, + Mask32: Mask, + SimdI64: LanesAtMost32, + SimdU64: LanesAtMost32, + SimdF64: LanesAtMost32, + Mask64: Mask, + { + fn to(self) -> Wrapper<$dest, LANES> { + // FIXME(programmerjake): workaround https://github.com/rust-lang/stdsimd/issues/116 + let src: [$src_prim; LANES] = self.0.into(); + let mut dest: [$dest_prim; LANES] = [Default::default(); LANES]; + for i in 0..LANES { + dest[i] = src[i].to(); + } + $dest::::from(dest).into() + } + } + + $(impl $From, LANES>> for Wrapper<$dest, LANES> + where + SimdI8: LanesAtMost32, + SimdU8: LanesAtMost32, + Mask8: Mask, + SimdI16: LanesAtMost32, + SimdU16: LanesAtMost32, + Mask16: Mask, + SimdI32: LanesAtMost32, + SimdU32: LanesAtMost32, + SimdF32: LanesAtMost32, + Mask32: Mask, + SimdI64: LanesAtMost32, + SimdU64: LanesAtMost32, + SimdF64: LanesAtMost32, + Mask64: Mask, + { + fn from(v: Wrapper<$src, LANES>) -> Self { + v.to() + } + })? + }; +} + +macro_rules! impl_vector_convert_to { + ($first:tt $(, $ty:tt)*) => { + $( + impl_vector_convert_to_helper!($first => $ty); + impl_vector_convert_to_helper!($ty => $first); + )* + impl_vector_convert_to![$($ty),*]; + }; + () => {}; +} + +impl_vector_convert_to![ + (SimdU8, u8), + (SimdI8, i8), + (SimdU16, u16), + (SimdI16, i16), + (SimdF16, F16), + (SimdU32, u32), + (SimdI32, i32), + (SimdU64, u64), + (SimdI64, i64), + (SimdF32, f32), + (SimdF64, f64) +]; + +impl_vector_convert_to![ + ( + #[From = From] + Mask8, + bool + ), + ( + #[From = From] + Mask16, + bool + ), + ( + #[From = From] + Mask32, + bool + ), + ( + #[From = From] + Mask64, + bool + ) +]; + +macro_rules! impl_from_helper { + (#[lanes = $LANES:ident] $src:ty => $dest:ty) => { + impl From<$src> for $dest + where + SimdI8: LanesAtMost32, + SimdU8: LanesAtMost32, + Mask8: Mask, + SimdI16: LanesAtMost32, + SimdU16: LanesAtMost32, + Mask16: Mask, + SimdI32: LanesAtMost32, + SimdU32: LanesAtMost32, + SimdF32: LanesAtMost32, + Mask32: Mask, + SimdI64: LanesAtMost32, + SimdU64: LanesAtMost32, + SimdF64: LanesAtMost32, + Mask64: Mask, + { + fn from(v: $src) -> Self { + <$src as ConvertTo<$dest>>::to(v) + } + } + }; +} + +macro_rules! impl_from { + (#[lanes = $LANES:ident] $src:ty => [$($dest:ty),*]) => { + $(impl_from_helper!(#[lanes = $LANES] $src => $dest);)* + }; +} + +macro_rules! impl_froms { + ( + #[lanes = $LANES:ident] + #[u8] $u8:ty; + #[i8] $i8:ty; + #[u16] $u16:ty; + #[i16] $i16:ty; + #[f16] $f16:ty; + #[u32] $u32:ty; + #[i32] $i32:ty; + #[f32] $f32:ty; + #[u64] $u64:ty; + #[i64] $i64:ty; + #[f64] $f64:ty; + ) => { + impl_from!(#[lanes = $LANES] $u8 => [$u16, $i16, $f16, $u32, $i32, $f32, $u64, $i64, $f64]); + impl_from!(#[lanes = $LANES] $u16 => [$u32, $i32, $f32, $u64, $i64, $f64]); + impl_from!(#[lanes = $LANES] $u32 => [$u64, $i64, $f64]); + impl_from!(#[lanes = $LANES] $i8 => [$i16, $f16, $i32, $f32, $i64, $f64]); + impl_from!(#[lanes = $LANES] $i16 => [$i32, $f32, $i64, $f64]); + impl_from!(#[lanes = $LANES] $i32 => [$i64, $f64]); + impl_from!(#[lanes = $LANES] $f16 => [$f32, $f64]); + impl_from!(#[lanes = $LANES] $f32 => [$f64]); + }; +} + +impl_froms! { + #[lanes = LANES] + #[u8] Wrapper; + #[i8] Wrapper; + #[u16] Wrapper; + #[i16] Wrapper; + #[f16] Wrapper; + #[u32] Wrapper; + #[i32] Wrapper; + #[f32] Wrapper; + #[u64] Wrapper; + #[i64] Wrapper; + #[f64] Wrapper; +} + +impl_froms! { + #[lanes = LANES] + #[u8] Wrapper, LANES>; + #[i8] Wrapper, LANES>; + #[u16] Wrapper, LANES>; + #[i16] Wrapper, LANES>; + #[f16] Wrapper, LANES>; + #[u32] Wrapper, LANES>; + #[i32] Wrapper, LANES>; + #[f32] Wrapper, LANES>; + #[u64] Wrapper, LANES>; + #[i64] Wrapper, LANES>; + #[f64] Wrapper, LANES>; +} + +macro_rules! impl_select { + ($mask:ident, $ty:ident) => { + impl Select, LANES>> for Wrapper<$mask, LANES> + where + SimdI8: LanesAtMost32, + SimdU8: LanesAtMost32, + Mask8: Mask, + SimdI16: LanesAtMost32, + SimdU16: LanesAtMost32, + Mask16: Mask, + SimdI32: LanesAtMost32, + SimdU32: LanesAtMost32, + SimdF32: LanesAtMost32, + Mask32: Mask, + SimdI64: LanesAtMost32, + SimdU64: LanesAtMost32, + SimdF64: LanesAtMost32, + Mask64: Mask, + { + fn select( + self, + true_v: Wrapper<$ty, LANES>, + false_v: Wrapper<$ty, LANES>, + ) -> Wrapper<$ty, LANES> { + self.0.select(true_v.0, false_v.0).into() + } + } + }; +} + +impl_select!(Mask8, SimdU8); +impl_select!(Mask8, SimdI8); +impl_select!(Mask16, SimdU16); +impl_select!(Mask16, SimdI16); +impl_select!(Mask32, SimdU32); +impl_select!(Mask32, SimdI32); +impl_select!(Mask32, SimdF32); +impl_select!(Mask64, SimdU64); +impl_select!(Mask64, SimdI64); +impl_select!(Mask64, SimdF64); + +#[cfg(not(feature = "f16"))] +impl Select, LANES>> for Wrapper, LANES> +where + SimdI8: LanesAtMost32, + SimdU8: LanesAtMost32, + Mask8: Mask, + SimdI16: LanesAtMost32, + SimdU16: LanesAtMost32, + Mask16: Mask, + SimdI32: LanesAtMost32, + SimdU32: LanesAtMost32, + SimdF32: LanesAtMost32, + Mask32: Mask, + SimdI64: LanesAtMost32, + SimdU64: LanesAtMost32, + SimdF64: LanesAtMost32, + Mask64: Mask, +{ + fn select( + self, + _true_v: Wrapper, LANES>, + _false_v: Wrapper, LANES>, + ) -> Wrapper, LANES> { + panic_f16_feature_disabled() + } +} + +macro_rules! impl_select_mask { + ($ty:ident) => { + impl Select for Wrapper<$ty, LANES> + where + SimdI8: LanesAtMost32, + SimdU8: LanesAtMost32, + Mask8: Mask, + SimdI16: LanesAtMost32, + SimdU16: LanesAtMost32, + Mask16: Mask, + SimdI32: LanesAtMost32, + SimdU32: LanesAtMost32, + SimdF32: LanesAtMost32, + Mask32: Mask, + SimdI64: LanesAtMost32, + SimdU64: LanesAtMost32, + SimdF64: LanesAtMost32, + Mask64: Mask, + { + fn select(self, true_v: Self, false_v: Self) -> Self { + // FIXME(programmerjake): work around https://github.com/rust-lang/stdsimd/issues/114 + (self & true_v) | (!self & false_v) + } + } + }; +} + +impl_select_mask!(Mask8); +impl_select_mask!(Mask16); +impl_select_mask!(Mask32); +impl_select_mask!(Mask64); + +macro_rules! impl_mask { + ($mask:ident) => { + impl Bool for Wrapper<$mask, LANES> + where + SimdI8: LanesAtMost32, + SimdU8: LanesAtMost32, + Mask8: Mask, + SimdI16: LanesAtMost32, + SimdU16: LanesAtMost32, + Mask16: Mask, + SimdI32: LanesAtMost32, + SimdU32: LanesAtMost32, + SimdF32: LanesAtMost32, + Mask32: Mask, + SimdI64: LanesAtMost32, + SimdU64: LanesAtMost32, + SimdF64: LanesAtMost32, + Mask64: Mask, + { + } + }; +} + +impl_mask!(Mask8); +impl_mask!(Mask16); +impl_mask!(Mask32); +impl_mask!(Mask64); + +impl Bool for Wrapper +where + SimdI8: LanesAtMost32, + SimdU8: LanesAtMost32, + Mask8: Mask, + SimdI16: LanesAtMost32, + SimdU16: LanesAtMost32, + Mask16: Mask, + SimdI32: LanesAtMost32, + SimdU32: LanesAtMost32, + SimdF32: LanesAtMost32, + Mask32: Mask, + SimdI64: LanesAtMost32, + SimdU64: LanesAtMost32, + SimdF64: LanesAtMost32, + Mask64: Mask, +{ +} + +macro_rules! impl_make { + ($ty:ident, $prim:ident) => { + impl Make for Wrapper<$ty, LANES> + where + SimdI8: LanesAtMost32, + SimdU8: LanesAtMost32, + Mask8: Mask, + SimdI16: LanesAtMost32, + SimdU16: LanesAtMost32, + Mask16: Mask, + SimdI32: LanesAtMost32, + SimdU32: LanesAtMost32, + SimdF32: LanesAtMost32, + Mask32: Mask, + SimdI64: LanesAtMost32, + SimdU64: LanesAtMost32, + SimdF64: LanesAtMost32, + Mask64: Mask, + { + type Prim = $prim; + + type Context = StdSimd; + + fn ctx(self) -> Self::Context { + StdSimd::default() + } + + fn make(_ctx: Self::Context, v: Self::Prim) -> Self { + $ty::splat(v).into() + } + } + + impl From> for Wrapper<$ty, LANES> + where + SimdI8: LanesAtMost32, + SimdU8: LanesAtMost32, + Mask8: Mask, + SimdI16: LanesAtMost32, + SimdU16: LanesAtMost32, + Mask16: Mask, + SimdI32: LanesAtMost32, + SimdU32: LanesAtMost32, + SimdF32: LanesAtMost32, + Mask32: Mask, + SimdI64: LanesAtMost32, + SimdU64: LanesAtMost32, + SimdF64: LanesAtMost32, + Mask64: Mask, + { + fn from(v: Wrapper<$prim, LANES>) -> Self { + $ty::splat(v.0).into() + } + } + }; +} + +impl_make!(Mask8, bool); +impl_make!(Mask16, bool); +impl_make!(Mask32, bool); +impl_make!(Mask64, bool); +impl_make!(SimdI8, i8); +impl_make!(SimdI16, i16); +impl_make!(SimdI32, i32); +impl_make!(SimdI64, i64); +impl_make!(SimdU8, u8); +impl_make!(SimdU16, u16); +impl_make!(SimdU32, u32); +impl_make!(SimdU64, u64); +impl_make!(SimdF16, F16); +impl_make!(SimdF32, f32); +impl_make!(SimdF64, f64); + +impl Context for StdSimd +where + SimdI8: LanesAtMost32, + SimdU8: LanesAtMost32, + Mask8: Mask, + SimdI16: LanesAtMost32, + SimdU16: LanesAtMost32, + Mask16: Mask, + SimdI32: LanesAtMost32, + SimdU32: LanesAtMost32, + SimdF32: LanesAtMost32, + Mask32: Mask, + SimdI64: LanesAtMost32, + SimdU64: LanesAtMost32, + SimdF64: LanesAtMost32, + Mask64: Mask, +{ + type Bool = Wrapper; + type U8 = Wrapper; + type I8 = Wrapper; + type U16 = Wrapper; + type I16 = Wrapper; + type F16 = Wrapper; + type U32 = Wrapper; + type I32 = Wrapper; + type F32 = Wrapper; + type U64 = Wrapper; + type I64 = Wrapper; + type F64 = Wrapper; + type VecBool8 = Wrapper, LANES>; + type VecU8 = Wrapper, LANES>; + type VecI8 = Wrapper, LANES>; + type VecBool16 = Wrapper, LANES>; + type VecU16 = Wrapper, LANES>; + type VecI16 = Wrapper, LANES>; + type VecF16 = Wrapper, LANES>; + type VecBool32 = Wrapper, LANES>; + type VecU32 = Wrapper, LANES>; + type VecI32 = Wrapper, LANES>; + type VecF32 = Wrapper, LANES>; + type VecBool64 = Wrapper, LANES>; + type VecU64 = Wrapper, LANES>; + type VecI64 = Wrapper, LANES>; + type VecF64 = Wrapper, LANES>; +} + +pub type ScalarBool = Wrapper; +pub type ScalarU8 = Wrapper; +pub type ScalarI8 = Wrapper; +pub type ScalarU16 = Wrapper; +pub type ScalarI16 = Wrapper; +pub type ScalarF16 = Wrapper; +pub type ScalarU32 = Wrapper; +pub type ScalarI32 = Wrapper; +pub type ScalarF32 = Wrapper; +pub type ScalarU64 = Wrapper; +pub type ScalarI64 = Wrapper; +pub type ScalarF64 = Wrapper; +pub type VecBool8 = Wrapper, LANES>; +pub type VecU8 = Wrapper, LANES>; +pub type VecI8 = Wrapper, LANES>; +pub type VecBool16 = Wrapper, LANES>; +pub type VecU16 = Wrapper, LANES>; +pub type VecI16 = Wrapper, LANES>; +pub type VecF16 = Wrapper, LANES>; +pub type VecBool32 = Wrapper, LANES>; +pub type VecU32 = Wrapper, LANES>; +pub type VecI32 = Wrapper, LANES>; +pub type VecF32 = Wrapper, LANES>; +pub type VecBool64 = Wrapper, LANES>; +pub type VecU64 = Wrapper, LANES>; +pub type VecI64 = Wrapper, LANES>; +pub type VecF64 = Wrapper, LANES>; + +#[cfg(test)] +mod tests { + use super::*; + use crate::algorithms::ilogb::{ + ilogb_f32, ILOGB_NAN_RESULT_F32, ILOGB_OVERFLOW_RESULT_F32, ILOGB_UNDERFLOW_RESULT_F32, + }; + + #[inline(never)] + fn do_ilogb_f32x4(arg: VecF32<4>) -> VecI32<4> { + ilogb_f32(StdSimd::default(), arg) + } + + #[test] + fn test_ilogb_f32x4() { + let ctx = StdSimd::<4>::default(); + assert_eq!( + do_ilogb_f32x4(ctx.make(0f32)), + ctx.make(ILOGB_UNDERFLOW_RESULT_F32) + ); + assert_eq!(do_ilogb_f32x4(ctx.make(1f32)), ctx.make(0)); + assert_eq!(do_ilogb_f32x4(ctx.make(2f32)), ctx.make(1)); + assert_eq!(do_ilogb_f32x4(ctx.make(3f32)), ctx.make(1)); + assert_eq!(do_ilogb_f32x4(ctx.make(3.99999f32)), ctx.make(1)); + assert_eq!(do_ilogb_f32x4(ctx.make(0.5f32)), ctx.make(-1)); + assert_eq!(do_ilogb_f32x4(ctx.make(0.5f32.powi(130))), ctx.make(-130)); + assert_eq!( + do_ilogb_f32x4(ctx.make(f32::INFINITY)), + ctx.make(ILOGB_OVERFLOW_RESULT_F32) + ); + assert_eq!( + do_ilogb_f32x4(ctx.make(f32::NAN)), + ctx.make(ILOGB_NAN_RESULT_F32) + ); + } +} -- 2.30.2