From d77893b257c86d7ddc81f3772b5e143ce768d291 Mon Sep 17 00:00:00 2001
From: Jacob Lifshay <programmerjake@gmail.com>
Date: Wed, 5 May 2021 02:56:23 -0700
Subject: [PATCH] stdsimd bindings work!

---
 Cargo.toml     |    2 +-
 src/f16.rs     |   14 +-
 src/stdsimd.rs | 1396 ++++++++++++++++++++++++++++++++++++++++++++++--
 3 files changed, 1372 insertions(+), 40 deletions(-)
diff --git a/Cargo.toml b/Cargo.toml
index 858dbd9..74c1f2b 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -12,7 +12,7 @@ core_simd = { version = "0.1.0", git = "https://github.com/rust-lang/stdsimd", o
 vector-math-proc-macro = { version = "=0.1.0", path = "vector-math-proc-macro" }
 
 [features]
-default = ["f16", "fma"]
+default = ["fma"]
 f16 = ["half"]
 fma = ["std"]
 std = []
diff --git a/src/f16.rs b/src/f16.rs
index ee13d99..5bc1119 100644
--- a/src/f16.rs
+++ b/src/f16.rs
@@ -14,6 +14,12 @@ type F16Impl = u16;
 #[repr(transparent)]
 pub struct F16(F16Impl);
 
+#[cfg(not(feature = "f16"))]
+#[track_caller]
+pub(crate) fn panic_f16_feature_disabled() -> ! {
+    panic!("f16 feature is not enabled")
+}
+
 #[cfg(feature = "f16")]
 macro_rules! f16_impl {
     ($v:expr, [$($vars:ident),*]) => {
@@ -26,11 +32,17 @@ macro_rules! f16_impl {
     ($v:expr, [$($vars:ident),*]) => {
         {
             $(let _ = $vars;)*
-            panic!("f16 feature is not enabled")
+            panic_f16_feature_disabled()
         }
     };
 }
 
+impl Default for F16 {
+    fn default() -> Self {
+        f16_impl!(F16(F16Impl::default()), [])
+    }
+}
+
 impl From<F16Impl> for F16 {
     fn from(v: F16Impl) -> Self {
         F16(v)
diff --git a/src/stdsimd.rs b/src/stdsimd.rs
index 4e76d8f..046a337 100644
--- a/src/stdsimd.rs
+++ b/src/stdsimd.rs
@@ -1,45 +1,1365 @@
-use crate::{f16::F16, traits::Context};
-use core::marker::PhantomData;
-use core_simd::SimdF32;
+#[cfg(not(feature = "f16"))]
+use crate::f16::panic_f16_feature_disabled;
+use crate::{
+    f16::F16,
+    traits::{Bool, Compare, Context, ConvertTo, Float, Int, Make, SInt, Select, UInt},
+};
+use core::{
+    marker::PhantomData,
+    ops::{
+        Add, AddAssign, BitAnd, BitAndAssign, BitOr, BitOrAssign, BitXor, BitXorAssign, Div,
+        DivAssign, Mul, MulAssign, Neg, Not, Rem, RemAssign, Shl, ShlAssign, Shr, ShrAssign, Sub,
+        SubAssign,
+    },
+};
+use core_simd::{
+    LanesAtMost32, Mask, Mask16, Mask32, Mask64, Mask8, SimdF32, SimdF64, SimdI16, SimdI32,
+    SimdI64, SimdI8, SimdU16, SimdU32, SimdU64, SimdU8,
+};
+
+#[cfg(not(feature = "f16"))]
+mod f16 {
+    use super::*;
+
+    #[derive(Copy, Clone, Debug)]
+    #[repr(transparent)]
+    pub struct SimdF16<const LANES: usize>(pub(crate) SimdU16<LANES>)
+    where
+        SimdU16<LANES>: LanesAtMost32;
+
+    impl<const LANES: usize> SimdF16<LANES>
+    where
+        SimdU16<LANES>: LanesAtMost32,
+        SimdI16<LANES>: LanesAtMost32,
+        Mask16<LANES>: Mask,
+    {
+        pub(crate) fn splat(_v: F16) -> Self {
+            panic_f16_feature_disabled()
+        }
+        pub(crate) fn abs(self) -> Self {
+            panic_f16_feature_disabled()
+        }
+        pub(crate) fn trunc(self) -> Self {
+            panic_f16_feature_disabled()
+        }
+        pub(crate) fn floor(self) -> Self {
+            panic_f16_feature_disabled()
+        }
+        pub(crate) fn ceil(self) -> Self {
+            panic_f16_feature_disabled()
+        }
+        pub(crate) fn round(self) -> Self {
+            panic_f16_feature_disabled()
+        }
+        pub(crate) fn from_bits(_v: SimdU16<LANES>) -> Self {
+            panic_f16_feature_disabled()
+        }
+        pub(crate) fn to_bits(self) -> SimdU16<LANES> {
+            panic_f16_feature_disabled()
+        }
+        pub(crate) fn is_finite(self) -> Mask16<LANES> {
+            panic_f16_feature_disabled()
+        }
+        pub(crate) fn lanes_eq(self, _rhs: Self) -> Mask16<LANES> {
+            panic_f16_feature_disabled()
+        }
+        pub(crate) fn lanes_ne(self, _rhs: Self) -> Mask16<LANES> {
+            panic_f16_feature_disabled()
+        }
+        pub(crate) fn lanes_gt(self, _rhs: Self) -> Mask16<LANES> {
+            panic_f16_feature_disabled()
+        }
+        pub(crate) fn lanes_ge(self, _rhs: Self) -> Mask16<LANES> {
+            panic_f16_feature_disabled()
+        }
+        pub(crate) fn lanes_lt(self, _rhs: Self) -> Mask16<LANES> {
+            panic_f16_feature_disabled()
+        }
+        pub(crate) fn lanes_le(self, _rhs: Self) -> Mask16<LANES> {
+            panic_f16_feature_disabled()
+        }
+    }
+
+    impl<const LANES: usize> From<[F16; LANES]> for SimdF16<LANES>
+    where
+        SimdU16<LANES>: LanesAtMost32,
+    {
+        fn from(_v: [F16; LANES]) -> Self {
+            panic_f16_feature_disabled()
+        }
+    }
+
+    impl<const LANES: usize> From<SimdF16<LANES>> for [F16; LANES]
+    where
+        SimdU16<LANES>: LanesAtMost32,
+    {
+        fn from(_v: SimdF16<LANES>) -> Self {
+            panic_f16_feature_disabled()
+        }
+    }
+
+    macro_rules! impl_f16_bin_op {
+        ($trait:ident, $fn:ident) => {
+            impl<const LANES: usize> $trait for SimdF16<LANES>
+            where
+                SimdU16<LANES>: LanesAtMost32,
+            {
+                type Output = Self;
+
+                fn $fn(self, _rhs: Self) -> Self::Output {
+                    panic_f16_feature_disabled()
+                }
+            }
+        };
+    }
+
+    impl_f16_bin_op!(Add, add);
+    impl_f16_bin_op!(Sub, sub);
+    impl_f16_bin_op!(Mul, mul);
+    impl_f16_bin_op!(Div, div);
+    impl_f16_bin_op!(Rem, rem);
+
+    macro_rules! impl_f16_bin_assign_op {
+        ($trait:ident, $fn:ident) => {
+            impl<const LANES: usize> $trait for SimdF16<LANES>
+            where
+                SimdU16<LANES>: LanesAtMost32,
+            {
+                fn $fn(&mut self, _rhs: Self) {
+                    panic_f16_feature_disabled()
+                }
+            }
+        };
+    }
+
+    impl_f16_bin_assign_op!(AddAssign, add_assign);
+    impl_f16_bin_assign_op!(SubAssign, sub_assign);
+    impl_f16_bin_assign_op!(MulAssign, mul_assign);
+    impl_f16_bin_assign_op!(DivAssign, div_assign);
+    impl_f16_bin_assign_op!(RemAssign, rem_assign);
+
+    impl<const LANES: usize> Neg for SimdF16<LANES>
+    where
+        SimdU16<LANES>: LanesAtMost32,
+    {
+        type Output = Self;
+
+        fn neg(self) -> Self::Output {
+            panic_f16_feature_disabled()
+        }
+    }
+}
+
+#[cfg(not(feature = "f16"))]
+use f16::SimdF16;
+
+#[cfg(feature = "f16")]
+compile_error!("core_simd doesn't yet support f16");
 
 #[derive(Clone, Copy, PartialEq, Eq, Hash, Debug, Default)]
 pub struct StdSimd<const LANES: usize>(PhantomData<[(); LANES]>);
 
 #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Debug, Default)]
-pub struct Scalar<T, const LANES: usize>(pub T, PhantomData<[(); LANES]>);
+#[repr(transparent)]
+pub struct Wrapper<T, const LANES: usize>(pub T, PhantomData<[(); LANES]>);
 
-impl<T, const LANES: usize> From<T> for Scalar<T, LANES> {
+impl<T, const LANES: usize> From<T> for Wrapper<T, LANES> {
     fn from(v: T) -> Self {
-        Scalar(v, PhantomData)
-    }
-}
-
-/*
-TODO(programmerjake): finish after splitting Context::VecBool
-impl<const LANES: usize> Context for StdSimd<LANES> {
-    type Bool = Scalar<bool>;
-    type U8 = Scalar<u8>;
-    type I8 = Scalar<i8>;
-    type U16 = Scalar<u16>;
-    type I16 = Scalar<i16>;
-    type F16 = Scalar<F16>;
-    type U32 = Scalar<u32>;
-    type I32 = Scalar<i32>;
-    type F32 = Scalar<f32>;
-    type U64 = Scalar<u64>;
-    type I64 = Scalar<i64>;
-    type F64 = Scalar<f64>;
-    type VecBool;
-    type VecU8;
-    type VecI8;
-    type VecU16;
-    type VecI16;
-    type VecF16;
-    type VecU32;
-    type VecI32;
-    type VecF32;
-    type VecU64;
-    type VecI64;
-    type VecF64;
-}
-*/
+        Wrapper(v, PhantomData)
+    }
+}
+
+macro_rules! impl_bin_op_for_wrapper {
+    ($trait:ident, $fn:ident) => {
+        impl<T: $trait<Output = T>, const LANES: usize> $trait for Wrapper<T, LANES> {
+            type Output = Self;
+
+            fn $fn(self, rhs: Self) -> Self::Output {
+                self.0.$fn(rhs.0).into()
+            }
+        }
+    };
+}
+
+impl_bin_op_for_wrapper!(Add, add);
+impl_bin_op_for_wrapper!(Sub, sub);
+impl_bin_op_for_wrapper!(Mul, mul);
+impl_bin_op_for_wrapper!(Div, div);
+impl_bin_op_for_wrapper!(Rem, rem);
+impl_bin_op_for_wrapper!(Shl, shl);
+impl_bin_op_for_wrapper!(Shr, shr);
+impl_bin_op_for_wrapper!(BitAnd, bitand);
+impl_bin_op_for_wrapper!(BitOr, bitor);
+impl_bin_op_for_wrapper!(BitXor, bitxor);
+
+macro_rules! impl_bin_op_assign_for_wrapper {
+    ($trait:ident, $fn:ident) => {
+        impl<T: $trait, const LANES: usize> $trait for Wrapper<T, LANES> {
+            fn $fn(&mut self, rhs: Self) {
+                self.0.$fn(rhs.0);
+            }
+        }
+    };
+}
+
+impl_bin_op_assign_for_wrapper!(AddAssign, add_assign);
+impl_bin_op_assign_for_wrapper!(SubAssign, sub_assign);
+impl_bin_op_assign_for_wrapper!(MulAssign, mul_assign);
+impl_bin_op_assign_for_wrapper!(DivAssign, div_assign);
+impl_bin_op_assign_for_wrapper!(RemAssign, rem_assign);
+impl_bin_op_assign_for_wrapper!(ShlAssign, shl_assign);
+impl_bin_op_assign_for_wrapper!(ShrAssign, shr_assign);
+impl_bin_op_assign_for_wrapper!(BitAndAssign, bitand_assign);
+impl_bin_op_assign_for_wrapper!(BitOrAssign, bitor_assign);
+impl_bin_op_assign_for_wrapper!(BitXorAssign, bitxor_assign);
+
+macro_rules! impl_un_op_for_wrapper {
+    ($trait:ident, $fn:ident) => {
+        impl<T: $trait<Output = T>, const LANES: usize> $trait for Wrapper<T, LANES> {
+            type Output = Self;
+
+            fn $fn(self) -> Self::Output {
+                self.0.$fn().into()
+            }
+        }
+    };
+}
+
+impl_un_op_for_wrapper!(Not, not);
+impl_un_op_for_wrapper!(Neg, neg);
+
+macro_rules! impl_make_for_scalar {
+    ($ty:ident) => {
+        impl<const LANES: usize> Make for Wrapper<$ty, LANES>
+        where
+            SimdI8<LANES>: LanesAtMost32,
+            SimdU8<LANES>: LanesAtMost32,
+            Mask8<LANES>: Mask,
+            SimdI16<LANES>: LanesAtMost32,
+            SimdU16<LANES>: LanesAtMost32,
+            Mask16<LANES>: Mask,
+            SimdI32<LANES>: LanesAtMost32,
+            SimdU32<LANES>: LanesAtMost32,
+            SimdF32<LANES>: LanesAtMost32,
+            Mask32<LANES>: Mask,
+            SimdI64<LANES>: LanesAtMost32,
+            SimdU64<LANES>: LanesAtMost32,
+            SimdF64<LANES>: LanesAtMost32,
+            Mask64<LANES>: Mask,
+        {
+            type Prim = $ty;
+
+            type Context = StdSimd<LANES>;
+
+            fn ctx(self) -> Self::Context {
+                StdSimd::default()
+            }
+
+            fn make(_ctx: Self::Context, v: Self::Prim) -> Self {
+                v.into()
+            }
+        }
+    };
+}
+
+impl_make_for_scalar!(bool);
+impl_make_for_scalar!(i8);
+impl_make_for_scalar!(u8);
+impl_make_for_scalar!(i16);
+impl_make_for_scalar!(u16);
+impl_make_for_scalar!(F16);
+impl_make_for_scalar!(i32);
+impl_make_for_scalar!(u32);
+impl_make_for_scalar!(f32);
+impl_make_for_scalar!(i64);
+impl_make_for_scalar!(u64);
+impl_make_for_scalar!(f64);
+
+impl<V, const LANES: usize> Select<V> for Wrapper<bool, LANES>
+where
+    SimdI8<LANES>: LanesAtMost32,
+    SimdU8<LANES>: LanesAtMost32,
+    Mask8<LANES>: Mask,
+    SimdI16<LANES>: LanesAtMost32,
+    SimdU16<LANES>: LanesAtMost32,
+    Mask16<LANES>: Mask,
+    SimdI32<LANES>: LanesAtMost32,
+    SimdU32<LANES>: LanesAtMost32,
+    SimdF32<LANES>: LanesAtMost32,
+    Mask32<LANES>: Mask,
+    SimdI64<LANES>: LanesAtMost32,
+    SimdU64<LANES>: LanesAtMost32,
+    SimdF64<LANES>: LanesAtMost32,
+    Mask64<LANES>: Mask,
+{
+    fn select(self, true_v: V, false_v: V) -> V {
+        self.0.select(true_v, false_v)
+    }
+}
+
+macro_rules! impl_scalar_compare {
+    ($ty:ident) => {
+        impl<const LANES: usize> Compare for Wrapper<$ty, LANES>
+        where
+            SimdI8<LANES>: LanesAtMost32,
+            SimdU8<LANES>: LanesAtMost32,
+            Mask8<LANES>: Mask,
+            SimdI16<LANES>: LanesAtMost32,
+            SimdU16<LANES>: LanesAtMost32,
+            Mask16<LANES>: Mask,
+            SimdI32<LANES>: LanesAtMost32,
+            SimdU32<LANES>: LanesAtMost32,
+            SimdF32<LANES>: LanesAtMost32,
+            Mask32<LANES>: Mask,
+            SimdI64<LANES>: LanesAtMost32,
+            SimdU64<LANES>: LanesAtMost32,
+            SimdF64<LANES>: LanesAtMost32,
+            Mask64<LANES>: Mask,
+        {
+            type Bool = Wrapper<bool, LANES>;
+
+            fn eq(self, rhs: Self) -> Self::Bool {
+                self.0.eq(rhs.0).into()
+            }
+
+            fn ne(self, rhs: Self) -> Self::Bool {
+                self.0.ne(rhs.0).into()
+            }
+
+            fn lt(self, rhs: Self) -> Self::Bool {
+                self.0.lt(rhs.0).into()
+            }
+
+            fn gt(self, rhs: Self) -> Self::Bool {
+                self.0.gt(rhs.0).into()
+            }
+
+            fn le(self, rhs: Self) -> Self::Bool {
+                self.0.le(rhs.0).into()
+            }
+
+            fn ge(self, rhs: Self) -> Self::Bool {
+                self.0.ge(rhs.0).into()
+            }
+        }
+    };
+}
+
+impl_scalar_compare!(bool);
+impl_scalar_compare!(i8);
+impl_scalar_compare!(u8);
+impl_scalar_compare!(i16);
+impl_scalar_compare!(u16);
+impl_scalar_compare!(F16);
+impl_scalar_compare!(i32);
+impl_scalar_compare!(u32);
+impl_scalar_compare!(f32);
+impl_scalar_compare!(i64);
+impl_scalar_compare!(u64);
+impl_scalar_compare!(f64);
+
+macro_rules! impl_vector_compare {
+    ($ty:ident, $mask:ident) => {
+        impl<const LANES: usize> Compare for Wrapper<$ty<LANES>, LANES>
+        where
+            SimdI8<LANES>: LanesAtMost32,
+            SimdU8<LANES>: LanesAtMost32,
+            Mask8<LANES>: Mask,
+            SimdI16<LANES>: LanesAtMost32,
+            SimdU16<LANES>: LanesAtMost32,
+            Mask16<LANES>: Mask,
+            SimdI32<LANES>: LanesAtMost32,
+            SimdU32<LANES>: LanesAtMost32,
+            SimdF32<LANES>: LanesAtMost32,
+            Mask32<LANES>: Mask,
+            SimdI64<LANES>: LanesAtMost32,
+            SimdU64<LANES>: LanesAtMost32,
+            SimdF64<LANES>: LanesAtMost32,
+            Mask64<LANES>: Mask,
+        {
+            type Bool = Wrapper<$mask<LANES>, LANES>;
+
+            fn eq(self, rhs: Self) -> Self::Bool {
+                self.0.lanes_eq(rhs.0).into()
+            }
+
+            fn ne(self, rhs: Self) -> Self::Bool {
+                self.0.lanes_ne(rhs.0).into()
+            }
+
+            fn lt(self, rhs: Self) -> Self::Bool {
+                self.0.lanes_lt(rhs.0).into()
+            }
+
+            fn gt(self, rhs: Self) -> Self::Bool {
+                self.0.lanes_gt(rhs.0).into()
+            }
+
+            fn le(self, rhs: Self) -> Self::Bool {
+                self.0.lanes_le(rhs.0).into()
+            }
+
+            fn ge(self, rhs: Self) -> Self::Bool {
+                self.0.lanes_ge(rhs.0).into()
+            }
+        }
+    };
+}
+
+impl_vector_compare!(SimdI8, Mask8);
+impl_vector_compare!(SimdU8, Mask8);
+impl_vector_compare!(SimdI16, Mask16);
+impl_vector_compare!(SimdU16, Mask16);
+impl_vector_compare!(SimdF16, Mask16);
+impl_vector_compare!(SimdI32, Mask32);
+impl_vector_compare!(SimdU32, Mask32);
+impl_vector_compare!(SimdF32, Mask32);
+impl_vector_compare!(SimdI64, Mask64);
+impl_vector_compare!(SimdU64, Mask64);
+impl_vector_compare!(SimdF64, Mask64);
+
+macro_rules! impl_vector_mask_compare {
+    ($ty:ident) => {
+        impl<const LANES: usize> Compare for Wrapper<$ty<LANES>, LANES>
+        where
+            SimdI8<LANES>: LanesAtMost32,
+            SimdU8<LANES>: LanesAtMost32,
+            Mask8<LANES>: Mask,
+            SimdI16<LANES>: LanesAtMost32,
+            SimdU16<LANES>: LanesAtMost32,
+            Mask16<LANES>: Mask,
+            SimdI32<LANES>: LanesAtMost32,
+            SimdU32<LANES>: LanesAtMost32,
+            SimdF32<LANES>: LanesAtMost32,
+            Mask32<LANES>: Mask,
+            SimdI64<LANES>: LanesAtMost32,
+            SimdU64<LANES>: LanesAtMost32,
+            SimdF64<LANES>: LanesAtMost32,
+            Mask64<LANES>: Mask,
+        {
+            type Bool = Self;
+
+            fn eq(self, rhs: Self) -> Self::Bool {
+                !(self ^ rhs)
+            }
+            fn ne(self, rhs: Self) -> Self::Bool {
+                self ^ rhs
+            }
+            fn lt(self, rhs: Self) -> Self::Bool {
+                !self & rhs
+            }
+            fn gt(self, rhs: Self) -> Self::Bool {
+                self & !rhs
+            }
+            fn le(self, rhs: Self) -> Self::Bool {
+                !self | rhs
+            }
+            fn ge(self, rhs: Self) -> Self::Bool {
+                self | !rhs
+            }
+        }
+    };
+}
+
+impl_vector_mask_compare!(Mask8);
+impl_vector_mask_compare!(Mask16);
+impl_vector_mask_compare!(Mask32);
+impl_vector_mask_compare!(Mask64);
+
+macro_rules! impl_int_scalar {
+    ($ty:ident) => {
+        impl<const LANES: usize> Int for Wrapper<$ty, LANES>
+        where
+            SimdI8<LANES>: LanesAtMost32,
+            SimdU8<LANES>: LanesAtMost32,
+            Mask8<LANES>: Mask,
+            SimdI16<LANES>: LanesAtMost32,
+            SimdU16<LANES>: LanesAtMost32,
+            Mask16<LANES>: Mask,
+            SimdI32<LANES>: LanesAtMost32,
+            SimdU32<LANES>: LanesAtMost32,
+            SimdF32<LANES>: LanesAtMost32,
+            Mask32<LANES>: Mask,
+            SimdI64<LANES>: LanesAtMost32,
+            SimdU64<LANES>: LanesAtMost32,
+            SimdF64<LANES>: LanesAtMost32,
+            Mask64<LANES>: Mask,
+        {
+            fn leading_zeros(self) -> Self {
+                (self.0.leading_zeros() as $ty).into()
+            }
+
+            fn trailing_zeros(self) -> Self {
+                (self.0.trailing_zeros() as $ty).into()
+            }
+
+            fn count_ones(self) -> Self {
+                (self.0.count_ones() as $ty).into()
+            }
+
+            fn leading_ones(self) -> Self {
+                (self.0.leading_ones() as $ty).into()
+            }
+
+            fn trailing_ones(self) -> Self {
+                (self.0.trailing_ones() as $ty).into()
+            }
+
+            fn count_zeros(self) -> Self {
+                (self.0.count_zeros() as $ty).into()
+            }
+        }
+    };
+}
+
+macro_rules! impl_int_vector {
+    ($ty:ident) => {
+        impl<const LANES: usize> Int for Wrapper<$ty<LANES>, LANES>
+        where
+            SimdI8<LANES>: LanesAtMost32,
+            SimdU8<LANES>: LanesAtMost32,
+            Mask8<LANES>: Mask,
+            SimdI16<LANES>: LanesAtMost32,
+            SimdU16<LANES>: LanesAtMost32,
+            Mask16<LANES>: Mask,
+            SimdI32<LANES>: LanesAtMost32,
+            SimdU32<LANES>: LanesAtMost32,
+            SimdF32<LANES>: LanesAtMost32,
+            Mask32<LANES>: Mask,
+            SimdI64<LANES>: LanesAtMost32,
+            SimdU64<LANES>: LanesAtMost32,
+            SimdF64<LANES>: LanesAtMost32,
+            Mask64<LANES>: Mask,
+        {
+            fn leading_zeros(self) -> Self {
+                todo!()
+            }
+
+            fn trailing_zeros(self) -> Self {
+                todo!()
+            }
+
+            fn count_ones(self) -> Self {
+                todo!()
+            }
+
+            fn leading_ones(self) -> Self {
+                todo!()
+            }
+
+            fn trailing_ones(self) -> Self {
+                todo!()
+            }
+
+            fn count_zeros(self) -> Self {
+                todo!()
+            }
+        }
+    };
+}
+
+macro_rules! impl_uint_vector {
+    ($ty:ident) => {
+        impl_int_vector!($ty);
+        impl<const LANES: usize> UInt for Wrapper<$ty<LANES>, LANES>
+        where
+            SimdI8<LANES>: LanesAtMost32,
+            SimdU8<LANES>: LanesAtMost32,
+            Mask8<LANES>: Mask,
+            SimdI16<LANES>: LanesAtMost32,
+            SimdU16<LANES>: LanesAtMost32,
+            Mask16<LANES>: Mask,
+            SimdI32<LANES>: LanesAtMost32,
+            SimdU32<LANES>: LanesAtMost32,
+            SimdF32<LANES>: LanesAtMost32,
+            Mask32<LANES>: Mask,
+            SimdI64<LANES>: LanesAtMost32,
+            SimdU64<LANES>: LanesAtMost32,
+            SimdF64<LANES>: LanesAtMost32,
+            Mask64<LANES>: Mask,
+        {
+        }
+    };
+}
+
+impl_uint_vector!(SimdU8);
+impl_uint_vector!(SimdU16);
+impl_uint_vector!(SimdU32);
+impl_uint_vector!(SimdU64);
+
+macro_rules! impl_uint_scalar {
+    ($ty:ident) => {
+        impl_int_scalar!($ty);
+        impl<const LANES: usize> UInt for Wrapper<$ty, LANES>
+        where
+            SimdI8<LANES>: LanesAtMost32,
+            SimdU8<LANES>: LanesAtMost32,
+            Mask8<LANES>: Mask,
+            SimdI16<LANES>: LanesAtMost32,
+            SimdU16<LANES>: LanesAtMost32,
+            Mask16<LANES>: Mask,
+            SimdI32<LANES>: LanesAtMost32,
+            SimdU32<LANES>: LanesAtMost32,
+            SimdF32<LANES>: LanesAtMost32,
+            Mask32<LANES>: Mask,
+            SimdI64<LANES>: LanesAtMost32,
+            SimdU64<LANES>: LanesAtMost32,
+            SimdF64<LANES>: LanesAtMost32,
+            Mask64<LANES>: Mask,
+        {
+        }
+    };
+}
+
+impl_uint_scalar!(u8);
+impl_uint_scalar!(u16);
+impl_uint_scalar!(u32);
+impl_uint_scalar!(u64);
+
+macro_rules! impl_sint_vector {
+    ($ty:ident) => {
+        impl_int_vector!($ty);
+        impl<const LANES: usize> SInt for Wrapper<$ty<LANES>, LANES>
+        where
+            SimdI8<LANES>: LanesAtMost32,
+            SimdU8<LANES>: LanesAtMost32,
+            Mask8<LANES>: Mask,
+            SimdI16<LANES>: LanesAtMost32,
+            SimdU16<LANES>: LanesAtMost32,
+            Mask16<LANES>: Mask,
+            SimdI32<LANES>: LanesAtMost32,
+            SimdU32<LANES>: LanesAtMost32,
+            SimdF32<LANES>: LanesAtMost32,
+            Mask32<LANES>: Mask,
+            SimdI64<LANES>: LanesAtMost32,
+            SimdU64<LANES>: LanesAtMost32,
+            SimdF64<LANES>: LanesAtMost32,
+            Mask64<LANES>: Mask,
+        {
+        }
+    };
+}
+
+impl_sint_vector!(SimdI8);
+impl_sint_vector!(SimdI16);
+impl_sint_vector!(SimdI32);
+impl_sint_vector!(SimdI64);
+
+macro_rules! impl_sint_scalar {
+    ($ty:ident) => {
+        impl_int_scalar!($ty);
+        impl<const LANES: usize> SInt for Wrapper<$ty, LANES>
+        where
+            SimdI8<LANES>: LanesAtMost32,
+            SimdU8<LANES>: LanesAtMost32,
+            Mask8<LANES>: Mask,
+            SimdI16<LANES>: LanesAtMost32,
+            SimdU16<LANES>: LanesAtMost32,
+            Mask16<LANES>: Mask,
+            SimdI32<LANES>: LanesAtMost32,
+            SimdU32<LANES>: LanesAtMost32,
+            SimdF32<LANES>: LanesAtMost32,
+            Mask32<LANES>: Mask,
+            SimdI64<LANES>: LanesAtMost32,
+            SimdU64<LANES>: LanesAtMost32,
+            SimdF64<LANES>: LanesAtMost32,
+            Mask64<LANES>: Mask,
+        {
+        }
+    };
+}
+
+impl_sint_scalar!(i8);
+impl_sint_scalar!(i16);
+impl_sint_scalar!(i32);
+impl_sint_scalar!(i64);
+
+macro_rules! impl_float {
+    ($ty:ident, $prim:ident, $uint:ident, $sint:ident) => {
+        impl<const LANES: usize> Float for Wrapper<$prim, LANES>
+        where
+            SimdI8<LANES>: LanesAtMost32,
+            SimdU8<LANES>: LanesAtMost32,
+            Mask8<LANES>: Mask,
+            SimdI16<LANES>: LanesAtMost32,
+            SimdU16<LANES>: LanesAtMost32,
+            Mask16<LANES>: Mask,
+            SimdI32<LANES>: LanesAtMost32,
+            SimdU32<LANES>: LanesAtMost32,
+            SimdF32<LANES>: LanesAtMost32,
+            Mask32<LANES>: Mask,
+            SimdI64<LANES>: LanesAtMost32,
+            SimdU64<LANES>: LanesAtMost32,
+            SimdF64<LANES>: LanesAtMost32,
+            Mask64<LANES>: Mask,
+        {
+            type FloatEncoding = $prim;
+
+            type BitsType = Wrapper<<$prim as Float>::BitsType, LANES>;
+
+            type SignedBitsType = Wrapper<<$prim as Float>::SignedBitsType, LANES>;
+
+            fn abs(self) -> Self {
+                self.0.abs().into()
+            }
+
+            fn trunc(self) -> Self {
+                self.0.trunc().into()
+            }
+
+            fn ceil(self) -> Self {
+                self.0.ceil().into()
+            }
+
+            fn floor(self) -> Self {
+                self.0.floor().into()
+            }
+
+            fn round(self) -> Self {
+                self.0.round().into()
+            }
+
+            fn fma(self, a: Self, b: Self) -> Self {
+                self.0.fma(a.0, b.0).into()
+            }
+
+            fn is_finite(self) -> Self::Bool {
+                self.0.is_finite().into()
+            }
+
+            fn from_bits(v: Self::BitsType) -> Self {
+                $prim::from_bits(v.0).into()
+            }
+
+            fn to_bits(self) -> Self::BitsType {
+                self.0.to_bits().into()
+            }
+        }
+
+        impl<const LANES: usize> Float for Wrapper<$ty<LANES>, LANES>
+        where
+            SimdI8<LANES>: LanesAtMost32,
+            SimdU8<LANES>: LanesAtMost32,
+            Mask8<LANES>: Mask,
+            SimdI16<LANES>: LanesAtMost32,
+            SimdU16<LANES>: LanesAtMost32,
+            Mask16<LANES>: Mask,
+            SimdI32<LANES>: LanesAtMost32,
+            SimdU32<LANES>: LanesAtMost32,
+            SimdF32<LANES>: LanesAtMost32,
+            Mask32<LANES>: Mask,
+            SimdI64<LANES>: LanesAtMost32,
+            SimdU64<LANES>: LanesAtMost32,
+            SimdF64<LANES>: LanesAtMost32,
+            Mask64<LANES>: Mask,
+        {
+            type FloatEncoding = $prim;
+
+            type BitsType = Wrapper<$uint<LANES>, LANES>;
+
+            type SignedBitsType = Wrapper<$sint<LANES>, LANES>;
+
+            fn abs(self) -> Self {
+                self.0.abs().into()
+            }
+
+            fn trunc(self) -> Self {
+                self.0.trunc().into()
+            }
+
+            fn ceil(self) -> Self {
+                self.0.ceil().into()
+            }
+
+            fn floor(self) -> Self {
+                self.0.floor().into()
+            }
+
+            fn round(self) -> Self {
+                self.0.round().into()
+            }
+
+            fn fma(self, _a: Self, _b: Self) -> Self {
+                // FIXME(programmerjake): implement once core_simd gains support:
+                // https://github.com/rust-lang/stdsimd/issues/102
+                todo!()
+            }
+
+            fn is_finite(self) -> Self::Bool {
+                self.0.is_finite().into()
+            }
+
+            fn from_bits(v: Self::BitsType) -> Self {
+                $ty::<LANES>::from_bits(v.0).into()
+            }
+
+            fn to_bits(self) -> Self::BitsType {
+                self.0.to_bits().into()
+            }
+        }
+    };
+}
+
+impl_float!(SimdF16, F16, SimdU16, SimdI16);
+impl_float!(SimdF32, f32, SimdU32, SimdI32);
+impl_float!(SimdF64, f64, SimdU64, SimdI64);
+
+macro_rules! impl_scalar_convert_to_helper {
+    ($src:ty => $dest:ty) => {
+        impl<const LANES: usize> ConvertTo<Wrapper<$dest, LANES>> for Wrapper<$src, LANES>
+        where
+            SimdI8<LANES>: LanesAtMost32,
+            SimdU8<LANES>: LanesAtMost32,
+            Mask8<LANES>: Mask,
+            SimdI16<LANES>: LanesAtMost32,
+            SimdU16<LANES>: LanesAtMost32,
+            Mask16<LANES>: Mask,
+            SimdI32<LANES>: LanesAtMost32,
+            SimdU32<LANES>: LanesAtMost32,
+            SimdF32<LANES>: LanesAtMost32,
+            Mask32<LANES>: Mask,
+            SimdI64<LANES>: LanesAtMost32,
+            SimdU64<LANES>: LanesAtMost32,
+            SimdF64<LANES>: LanesAtMost32,
+            Mask64<LANES>: Mask,
+        {
+            fn to(self) -> Wrapper<$dest, LANES> {
+                let v: $dest = self.0.to();
+                v.into()
+            }
+        }
+    };
+}
+
+macro_rules! impl_scalar_convert_to {
+    ($first:ty $(, $ty:ty)*) => {
+        $(
+            impl_scalar_convert_to_helper!($first => $ty);
+            impl_scalar_convert_to_helper!($ty => $first);
+        )*
+        impl_scalar_convert_to![$($ty),*];
+    };
+    () => {};
+}
+
+impl_scalar_convert_to![u8, i8, u16, i16, F16, u32, i32, u64, i64, f32, f64];
+
+macro_rules! impl_vector_convert_to_helper {
+    (($(#[From = $From:ident])? $src:ident, $src_prim:ident) => ($(#[From = $From2:ident])? $dest:ident, $dest_prim:ident)) => {
+        impl<const LANES: usize> ConvertTo<Wrapper<$dest<LANES>, LANES>>
+            for Wrapper<$src<LANES>, LANES>
+        where
+            SimdI8<LANES>: LanesAtMost32,
+            SimdU8<LANES>: LanesAtMost32,
+            Mask8<LANES>: Mask,
+            SimdI16<LANES>: LanesAtMost32,
+            SimdU16<LANES>: LanesAtMost32,
+            Mask16<LANES>: Mask,
+            SimdI32<LANES>: LanesAtMost32,
+            SimdU32<LANES>: LanesAtMost32,
+            SimdF32<LANES>: LanesAtMost32,
+            Mask32<LANES>: Mask,
+            SimdI64<LANES>: LanesAtMost32,
+            SimdU64<LANES>: LanesAtMost32,
+            SimdF64<LANES>: LanesAtMost32,
+            Mask64<LANES>: Mask,
+        {
+            fn to(self) -> Wrapper<$dest<LANES>, LANES> {
+                // FIXME(programmerjake): workaround https://github.com/rust-lang/stdsimd/issues/116
+                let src: [$src_prim; LANES] = self.0.into();
+                let mut dest: [$dest_prim; LANES] = [Default::default(); LANES];
+                for i in 0..LANES {
+                    dest[i] = src[i].to();
+                }
+                $dest::<LANES>::from(dest).into()
+            }
+        }
+
+        $(impl<const LANES: usize> $From<Wrapper<$src<LANES>, LANES>> for Wrapper<$dest<LANES>, LANES>
+        where
+            SimdI8<LANES>: LanesAtMost32,
+            SimdU8<LANES>: LanesAtMost32,
+            Mask8<LANES>: Mask,
+            SimdI16<LANES>: LanesAtMost32,
+            SimdU16<LANES>: LanesAtMost32,
+            Mask16<LANES>: Mask,
+            SimdI32<LANES>: LanesAtMost32,
+            SimdU32<LANES>: LanesAtMost32,
+            SimdF32<LANES>: LanesAtMost32,
+            Mask32<LANES>: Mask,
+            SimdI64<LANES>: LanesAtMost32,
+            SimdU64<LANES>: LanesAtMost32,
+            SimdF64<LANES>: LanesAtMost32,
+            Mask64<LANES>: Mask,
+        {
+            fn from(v: Wrapper<$src<LANES>, LANES>) -> Self {
+                v.to()
+            }
+        })?
+    };
+}
+
+macro_rules! impl_vector_convert_to {
+    ($first:tt $(, $ty:tt)*) => {
+        $(
+            impl_vector_convert_to_helper!($first => $ty);
+            impl_vector_convert_to_helper!($ty => $first);
+        )*
+        impl_vector_convert_to![$($ty),*];
+    };
+    () => {};
+}
+
+impl_vector_convert_to![
+    (SimdU8, u8),
+    (SimdI8, i8),
+    (SimdU16, u16),
+    (SimdI16, i16),
+    (SimdF16, F16),
+    (SimdU32, u32),
+    (SimdI32, i32),
+    (SimdU64, u64),
+    (SimdI64, i64),
+    (SimdF32, f32),
+    (SimdF64, f64)
+];
+
+impl_vector_convert_to![
+    (
+        #[From = From]
+        Mask8,
+        bool
+    ),
+    (
+        #[From = From]
+        Mask16,
+        bool
+    ),
+    (
+        #[From = From]
+        Mask32,
+        bool
+    ),
+    (
+        #[From = From]
+        Mask64,
+        bool
+    )
+];
+
+macro_rules! impl_from_helper {
+    (#[lanes = $LANES:ident] $src:ty => $dest:ty) => {
+        impl<const $LANES: usize> From<$src> for $dest
+        where
+            SimdI8<LANES>: LanesAtMost32,
+            SimdU8<LANES>: LanesAtMost32,
+            Mask8<LANES>: Mask,
+            SimdI16<LANES>: LanesAtMost32,
+            SimdU16<LANES>: LanesAtMost32,
+            Mask16<LANES>: Mask,
+            SimdI32<LANES>: LanesAtMost32,
+            SimdU32<LANES>: LanesAtMost32,
+            SimdF32<LANES>: LanesAtMost32,
+            Mask32<LANES>: Mask,
+            SimdI64<LANES>: LanesAtMost32,
+            SimdU64<LANES>: LanesAtMost32,
+            SimdF64<LANES>: LanesAtMost32,
+            Mask64<LANES>: Mask,
+        {
+            fn from(v: $src) -> Self {
+                <$src as ConvertTo<$dest>>::to(v)
+            }
+        }
+    };
+}
+
+macro_rules! impl_from {
+    (#[lanes = $LANES:ident] $src:ty => [$($dest:ty),*]) => {
+        $(impl_from_helper!(#[lanes = $LANES] $src => $dest);)*
+    };
+}
+
+macro_rules! impl_froms {
+    (
+        #[lanes = $LANES:ident]
+        #[u8] $u8:ty;
+        #[i8] $i8:ty;
+        #[u16] $u16:ty;
+        #[i16] $i16:ty;
+        #[f16] $f16:ty;
+        #[u32] $u32:ty;
+        #[i32] $i32:ty;
+        #[f32] $f32:ty;
+        #[u64] $u64:ty;
+        #[i64] $i64:ty;
+        #[f64] $f64:ty;
+    ) => {
+        impl_from!(#[lanes = $LANES] $u8 => [$u16, $i16, $f16, $u32, $i32, $f32, $u64, $i64, $f64]);
+        impl_from!(#[lanes = $LANES] $u16 => [$u32, $i32, $f32, $u64, $i64, $f64]);
+        impl_from!(#[lanes = $LANES] $u32 => [$u64, $i64, $f64]);
+        impl_from!(#[lanes = $LANES] $i8 => [$i16, $f16, $i32, $f32, $i64, $f64]);
+        impl_from!(#[lanes = $LANES] $i16 => [$i32, $f32, $i64, $f64]);
+        impl_from!(#[lanes = $LANES] $i32 => [$i64, $f64]);
+        impl_from!(#[lanes = $LANES] $f16 => [$f32, $f64]);
+        impl_from!(#[lanes = $LANES] $f32 => [$f64]);
+    };
+}
+
+impl_froms! {
+    #[lanes = LANES]
+    #[u8] Wrapper<u8, LANES>;
+    #[i8] Wrapper<i8, LANES>;
+    #[u16] Wrapper<u16, LANES>;
+    #[i16] Wrapper<i16, LANES>;
+    #[f16] Wrapper<F16, LANES>;
+    #[u32] Wrapper<u32, LANES>;
+    #[i32] Wrapper<i32, LANES>;
+    #[f32] Wrapper<f32, LANES>;
+    #[u64] Wrapper<u64, LANES>;
+    #[i64] Wrapper<i64, LANES>;
+    #[f64] Wrapper<f64, LANES>;
+}
+
+impl_froms! {
+    #[lanes = LANES]
+    #[u8] Wrapper<SimdU8<LANES>, LANES>;
+    #[i8] Wrapper<SimdI8<LANES>, LANES>;
+    #[u16] Wrapper<SimdU16<LANES>, LANES>;
+    #[i16] Wrapper<SimdI16<LANES>, LANES>;
+    #[f16] Wrapper<SimdF16<LANES>, LANES>;
+    #[u32] Wrapper<SimdU32<LANES>, LANES>;
+    #[i32] Wrapper<SimdI32<LANES>, LANES>;
+    #[f32] Wrapper<SimdF32<LANES>, LANES>;
+    #[u64] Wrapper<SimdU64<LANES>, LANES>;
+    #[i64] Wrapper<SimdI64<LANES>, LANES>;
+    #[f64] Wrapper<SimdF64<LANES>, LANES>;
+}
+
+macro_rules! impl_select {
+    ($mask:ident, $ty:ident) => {
+        impl<const LANES: usize> Select<Wrapper<$ty<LANES>, LANES>> for Wrapper<$mask<LANES>, LANES>
+        where
+            SimdI8<LANES>: LanesAtMost32,
+            SimdU8<LANES>: LanesAtMost32,
+            Mask8<LANES>: Mask,
+            SimdI16<LANES>: LanesAtMost32,
+            SimdU16<LANES>: LanesAtMost32,
+            Mask16<LANES>: Mask,
+            SimdI32<LANES>: LanesAtMost32,
+            SimdU32<LANES>: LanesAtMost32,
+            SimdF32<LANES>: LanesAtMost32,
+            Mask32<LANES>: Mask,
+            SimdI64<LANES>: LanesAtMost32,
+            SimdU64<LANES>: LanesAtMost32,
+            SimdF64<LANES>: LanesAtMost32,
+            Mask64<LANES>: Mask,
+        {
+            fn select(
+                self,
+                true_v: Wrapper<$ty<LANES>, LANES>,
+                false_v: Wrapper<$ty<LANES>, LANES>,
+            ) -> Wrapper<$ty<LANES>, LANES> {
+                self.0.select(true_v.0, false_v.0).into()
+            }
+        }
+    };
+}
+
+impl_select!(Mask8, SimdU8);
+impl_select!(Mask8, SimdI8);
+impl_select!(Mask16, SimdU16);
+impl_select!(Mask16, SimdI16);
+impl_select!(Mask32, SimdU32);
+impl_select!(Mask32, SimdI32);
+impl_select!(Mask32, SimdF32);
+impl_select!(Mask64, SimdU64);
+impl_select!(Mask64, SimdI64);
+impl_select!(Mask64, SimdF64);
+
+#[cfg(not(feature = "f16"))]
+impl<const LANES: usize> Select<Wrapper<SimdF16<LANES>, LANES>> for Wrapper<Mask16<LANES>, LANES>
+where
+    SimdI8<LANES>: LanesAtMost32,
+    SimdU8<LANES>: LanesAtMost32,
+    Mask8<LANES>: Mask,
+    SimdI16<LANES>: LanesAtMost32,
+    SimdU16<LANES>: LanesAtMost32,
+    Mask16<LANES>: Mask,
+    SimdI32<LANES>: LanesAtMost32,
+    SimdU32<LANES>: LanesAtMost32,
+    SimdF32<LANES>: LanesAtMost32,
+    Mask32<LANES>: Mask,
+    SimdI64<LANES>: LanesAtMost32,
+    SimdU64<LANES>: LanesAtMost32,
+    SimdF64<LANES>: LanesAtMost32,
+    Mask64<LANES>: Mask,
+{
+    fn select(
+        self,
+        _true_v: Wrapper<SimdF16<LANES>, LANES>,
+        _false_v: Wrapper<SimdF16<LANES>, LANES>,
+    ) -> Wrapper<SimdF16<LANES>, LANES> {
+        panic_f16_feature_disabled()
+    }
+}
+
+macro_rules! impl_select_mask {
+    ($ty:ident) => {
+        impl<const LANES: usize> Select<Self> for Wrapper<$ty<LANES>, LANES>
+        where
+            SimdI8<LANES>: LanesAtMost32,
+            SimdU8<LANES>: LanesAtMost32,
+            Mask8<LANES>: Mask,
+            SimdI16<LANES>: LanesAtMost32,
+            SimdU16<LANES>: LanesAtMost32,
+            Mask16<LANES>: Mask,
+            SimdI32<LANES>: LanesAtMost32,
+            SimdU32<LANES>: LanesAtMost32,
+            SimdF32<LANES>: LanesAtMost32,
+            Mask32<LANES>: Mask,
+            SimdI64<LANES>: LanesAtMost32,
+            SimdU64<LANES>: LanesAtMost32,
+            SimdF64<LANES>: LanesAtMost32,
+            Mask64<LANES>: Mask,
+        {
+            fn select(self, true_v: Self, false_v: Self) -> Self {
+                // FIXME(programmerjake): work around https://github.com/rust-lang/stdsimd/issues/114
+                (self & true_v) | (!self & false_v)
+            }
+        }
+    };
+}
+
+impl_select_mask!(Mask8);
+impl_select_mask!(Mask16);
+impl_select_mask!(Mask32);
+impl_select_mask!(Mask64);
+
+macro_rules! impl_mask {
+    ($mask:ident) => {
+        impl<const LANES: usize> Bool for Wrapper<$mask<LANES>, LANES>
+        where
+            SimdI8<LANES>: LanesAtMost32,
+            SimdU8<LANES>: LanesAtMost32,
+            Mask8<LANES>: Mask,
+            SimdI16<LANES>: LanesAtMost32,
+            SimdU16<LANES>: LanesAtMost32,
+            Mask16<LANES>: Mask,
+            SimdI32<LANES>: LanesAtMost32,
+            SimdU32<LANES>: LanesAtMost32,
+            SimdF32<LANES>: LanesAtMost32,
+            Mask32<LANES>: Mask,
+            SimdI64<LANES>: LanesAtMost32,
+            SimdU64<LANES>: LanesAtMost32,
+            SimdF64<LANES>: LanesAtMost32,
+            Mask64<LANES>: Mask,
+        {
+        }
+    };
+}
+
+impl_mask!(Mask8);
+impl_mask!(Mask16);
+impl_mask!(Mask32);
+impl_mask!(Mask64);
+
+impl<const LANES: usize> Bool for Wrapper<bool, LANES>
+where
+    SimdI8<LANES>: LanesAtMost32,
+    SimdU8<LANES>: LanesAtMost32,
+    Mask8<LANES>: Mask,
+    SimdI16<LANES>: LanesAtMost32,
+    SimdU16<LANES>: LanesAtMost32,
+    Mask16<LANES>: Mask,
+    SimdI32<LANES>: LanesAtMost32,
+    SimdU32<LANES>: LanesAtMost32,
+    SimdF32<LANES>: LanesAtMost32,
+    Mask32<LANES>: Mask,
+    SimdI64<LANES>: LanesAtMost32,
+    SimdU64<LANES>: LanesAtMost32,
+    SimdF64<LANES>: LanesAtMost32,
+    Mask64<LANES>: Mask,
+{
+}
+
+macro_rules! impl_make {
+    ($ty:ident, $prim:ident) => {
+        impl<const LANES: usize> Make for Wrapper<$ty<LANES>, LANES>
+        where
+            SimdI8<LANES>: LanesAtMost32,
+            SimdU8<LANES>: LanesAtMost32,
+            Mask8<LANES>: Mask,
+            SimdI16<LANES>: LanesAtMost32,
+            SimdU16<LANES>: LanesAtMost32,
+            Mask16<LANES>: Mask,
+            SimdI32<LANES>: LanesAtMost32,
+            SimdU32<LANES>: LanesAtMost32,
+            SimdF32<LANES>: LanesAtMost32,
+            Mask32<LANES>: Mask,
+            SimdI64<LANES>: LanesAtMost32,
+            SimdU64<LANES>: LanesAtMost32,
+            SimdF64<LANES>: LanesAtMost32,
+            Mask64<LANES>: Mask,
+        {
+            type Prim = $prim;
+
+            type Context = StdSimd<LANES>;
+
+            fn ctx(self) -> Self::Context {
+                StdSimd::default()
+            }
+
+            fn make(_ctx: Self::Context, v: Self::Prim) -> Self {
+                $ty::splat(v).into()
+            }
+        }
+
+        impl<const LANES: usize> From<Wrapper<$prim, LANES>> for Wrapper<$ty<LANES>, LANES>
+        where
+            SimdI8<LANES>: LanesAtMost32,
+            SimdU8<LANES>: LanesAtMost32,
+            Mask8<LANES>: Mask,
+            SimdI16<LANES>: LanesAtMost32,
+            SimdU16<LANES>: LanesAtMost32,
+            Mask16<LANES>: Mask,
+            SimdI32<LANES>: LanesAtMost32,
+            SimdU32<LANES>: LanesAtMost32,
+            SimdF32<LANES>: LanesAtMost32,
+            Mask32<LANES>: Mask,
+            SimdI64<LANES>: LanesAtMost32,
+            SimdU64<LANES>: LanesAtMost32,
+            SimdF64<LANES>: LanesAtMost32,
+            Mask64<LANES>: Mask,
+        {
+            fn from(v: Wrapper<$prim, LANES>) -> Self {
+                $ty::splat(v.0).into()
+            }
+        }
+    };
+}
+
+impl_make!(Mask8, bool);
+impl_make!(Mask16, bool);
+impl_make!(Mask32, bool);
+impl_make!(Mask64, bool);
+impl_make!(SimdI8, i8);
+impl_make!(SimdI16, i16);
+impl_make!(SimdI32, i32);
+impl_make!(SimdI64, i64);
+impl_make!(SimdU8, u8);
+impl_make!(SimdU16, u16);
+impl_make!(SimdU32, u32);
+impl_make!(SimdU64, u64);
+impl_make!(SimdF16, F16);
+impl_make!(SimdF32, f32);
+impl_make!(SimdF64, f64);
+
+impl<const LANES: usize> Context for StdSimd<LANES>
+where
+    SimdI8<LANES>: LanesAtMost32,
+    SimdU8<LANES>: LanesAtMost32,
+    Mask8<LANES>: Mask,
+    SimdI16<LANES>: LanesAtMost32,
+    SimdU16<LANES>: LanesAtMost32,
+    Mask16<LANES>: Mask,
+    SimdI32<LANES>: LanesAtMost32,
+    SimdU32<LANES>: LanesAtMost32,
+    SimdF32<LANES>: LanesAtMost32,
+    Mask32<LANES>: Mask,
+    SimdI64<LANES>: LanesAtMost32,
+    SimdU64<LANES>: LanesAtMost32,
+    SimdF64<LANES>: LanesAtMost32,
+    Mask64<LANES>: Mask,
+{
+    type Bool = Wrapper<bool, LANES>;
+    type U8 = Wrapper<u8, LANES>;
+    type I8 = Wrapper<i8, LANES>;
+    type U16 = Wrapper<u16, LANES>;
+    type I16 = Wrapper<i16, LANES>;
+    type F16 = Wrapper<F16, LANES>;
+    type U32 = Wrapper<u32, LANES>;
+    type I32 = Wrapper<i32, LANES>;
+    type F32 = Wrapper<f32, LANES>;
+    type U64 = Wrapper<u64, LANES>;
+    type I64 = Wrapper<i64, LANES>;
+    type F64 = Wrapper<f64, LANES>;
+    type VecBool8 = Wrapper<Mask8<LANES>, LANES>;
+    type VecU8 = Wrapper<SimdU8<LANES>, LANES>;
+    type VecI8 = Wrapper<SimdI8<LANES>, LANES>;
+    type VecBool16 = Wrapper<Mask16<LANES>, LANES>;
+    type VecU16 = Wrapper<SimdU16<LANES>, LANES>;
+    type VecI16 = Wrapper<SimdI16<LANES>, LANES>;
+    type VecF16 = Wrapper<SimdF16<LANES>, LANES>;
+    type VecBool32 = Wrapper<Mask32<LANES>, LANES>;
+    type VecU32 = Wrapper<SimdU32<LANES>, LANES>;
+    type VecI32 = Wrapper<SimdI32<LANES>, LANES>;
+    type VecF32 = Wrapper<SimdF32<LANES>, LANES>;
+    type VecBool64 = Wrapper<Mask64<LANES>, LANES>;
+    type VecU64 = Wrapper<SimdU64<LANES>, LANES>;
+    type VecI64 = Wrapper<SimdI64<LANES>, LANES>;
+    type VecF64 = Wrapper<SimdF64<LANES>, LANES>;
+}
+
+pub type ScalarBool<const LANES: usize> = Wrapper<bool, LANES>;
+pub type ScalarU8<const LANES: usize> = Wrapper<u8, LANES>;
+pub type ScalarI8<const LANES: usize> = Wrapper<i8, LANES>;
+pub type ScalarU16<const LANES: usize> = Wrapper<u16, LANES>;
+pub type ScalarI16<const LANES: usize> = Wrapper<i16, LANES>;
+pub type ScalarF16<const LANES: usize> = Wrapper<F16, LANES>;
+pub type ScalarU32<const LANES: usize> = Wrapper<u32, LANES>;
+pub type ScalarI32<const LANES: usize> = Wrapper<i32, LANES>;
+pub type ScalarF32<const LANES: usize> = Wrapper<f32, LANES>;
+pub type ScalarU64<const LANES: usize> = Wrapper<u64, LANES>;
+pub type ScalarI64<const LANES: usize> = Wrapper<i64, LANES>;
+pub type ScalarF64<const LANES: usize> = Wrapper<f64, LANES>;
+pub type VecBool8<const LANES: usize> = Wrapper<Mask8<LANES>, LANES>;
+pub type VecU8<const LANES: usize> = Wrapper<SimdU8<LANES>, LANES>;
+pub type VecI8<const LANES: usize> = Wrapper<SimdI8<LANES>, LANES>;
+pub type VecBool16<const LANES: usize> = Wrapper<Mask16<LANES>, LANES>;
+pub type VecU16<const LANES: usize> = Wrapper<SimdU16<LANES>, LANES>;
+pub type VecI16<const LANES: usize> = Wrapper<SimdI16<LANES>, LANES>;
+pub type VecF16<const LANES: usize> = Wrapper<SimdF16<LANES>, LANES>;
+pub type VecBool32<const LANES: usize> = Wrapper<Mask32<LANES>, LANES>;
+pub type VecU32<const LANES: usize> = Wrapper<SimdU32<LANES>, LANES>;
+pub type VecI32<const LANES: usize> = Wrapper<SimdI32<LANES>, LANES>;
+pub type VecF32<const LANES: usize> = Wrapper<SimdF32<LANES>, LANES>;
+pub type VecBool64<const LANES: usize> = Wrapper<Mask64<LANES>, LANES>;
+pub type VecU64<const LANES: usize> = Wrapper<SimdU64<LANES>, LANES>;
+pub type VecI64<const LANES: usize> = Wrapper<SimdI64<LANES>, LANES>;
+pub type VecF64<const LANES: usize> = Wrapper<SimdF64<LANES>, LANES>;
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::algorithms::ilogb::{
+        ilogb_f32, ILOGB_NAN_RESULT_F32, ILOGB_OVERFLOW_RESULT_F32, ILOGB_UNDERFLOW_RESULT_F32,
+    };
+
+    #[inline(never)]
+    fn do_ilogb_f32x4(arg: VecF32<4>) -> VecI32<4> {
+        ilogb_f32(StdSimd::default(), arg)
+    }
+
+    #[test]
+    fn test_ilogb_f32x4() {
+        let ctx = StdSimd::<4>::default();
+        assert_eq!(
+            do_ilogb_f32x4(ctx.make(0f32)),
+            ctx.make(ILOGB_UNDERFLOW_RESULT_F32)
+        );
+        assert_eq!(do_ilogb_f32x4(ctx.make(1f32)), ctx.make(0));
+        assert_eq!(do_ilogb_f32x4(ctx.make(2f32)), ctx.make(1));
+        assert_eq!(do_ilogb_f32x4(ctx.make(3f32)), ctx.make(1));
+        assert_eq!(do_ilogb_f32x4(ctx.make(3.99999f32)), ctx.make(1));
+        assert_eq!(do_ilogb_f32x4(ctx.make(0.5f32)), ctx.make(-1));
+        assert_eq!(do_ilogb_f32x4(ctx.make(0.5f32.powi(130))), ctx.make(-130));
+        assert_eq!(
+            do_ilogb_f32x4(ctx.make(f32::INFINITY)),
+            ctx.make(ILOGB_OVERFLOW_RESULT_F32)
+        );
+        assert_eq!(
+            do_ilogb_f32x4(ctx.make(f32::NAN)),
+            ctx.make(ILOGB_NAN_RESULT_F32)
+        );
+    }
+}
-- 
2.30.2