src/f16.rs

   1 use crate::{
   2     prim::PrimFloat,
   3     scalar::Value,
   4     traits::{ConvertFrom, ConvertTo, Float},
   5 };
   6 use core::{
   7     fmt,
   8     ops::{Add, AddAssign, Div, DivAssign, Mul, MulAssign, Neg, Rem, RemAssign, Sub, SubAssign},
   9 };
  10
  11 #[cfg(feature = "f16")]
  12 use half::f16 as F16Impl;
  13
  14 #[cfg(not(feature = "f16"))]
  15 type F16Impl = u16;
  16
  17 #[derive(Clone, Copy, PartialEq, PartialOrd)]
  18 #[repr(transparent)]
  19 pub struct F16(F16Impl);
  20
  21 #[cfg(not(feature = "f16"))]
  22 #[track_caller]
  23 pub(crate) fn panic_f16_feature_disabled() -> ! {
  24     panic!("f16 feature is not enabled")
  25 }
  26
  27 #[cfg(feature = "f16")]
  28 macro_rules! f16_impl {
  29     ($v:expr, [$($vars:ident),*]) => {
  30         $v
  31     };
  32 }
  33
  34 #[cfg(not(feature = "f16"))]
  35 macro_rules! f16_impl {
  36     ($v:expr, [$($vars:ident),*]) => {
  37         {
  38             $(let _ = $vars;)*
  39             panic_f16_feature_disabled()
  40         }
  41     };
  42 }
  43
  44 impl fmt::Display for F16 {
  45     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
  46         f16_impl!(self.0.fmt(f), [f])
  47     }
  48 }
  49
  50 impl fmt::LowerExp for F16 {
  51     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
  52         f16_impl!(self.0.fmt(f), [f])
  53     }
  54 }
  55
  56 impl fmt::UpperExp for F16 {
  57     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
  58         f16_impl!(self.0.fmt(f), [f])
  59     }
  60 }
  61
  62 impl fmt::Debug for F16 {
  63     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
  64         f16_impl!(self.0.fmt(f), [f])
  65     }
  66 }
  67
  68 impl Default for F16 {
  69     fn default() -> Self {
  70         f16_impl!(F16(F16Impl::default()), [])
  71     }
  72 }
  73
  74 impl From<F16Impl> for F16 {
  75     fn from(v: F16Impl) -> Self {
  76         F16(v)
  77     }
  78 }
  79
  80 impl From<F16> for F16Impl {
  81     fn from(v: F16) -> Self {
  82         v.0
  83     }
  84 }
  85
  86 macro_rules! impl_f16_from {
  87     ($($ty:ident,)*) => {
  88         $(
  89             impl From<$ty> for F16 {
  90                 fn from(v: $ty) -> Self {
  91                     f16_impl!(F16(F16Impl::from(v)), [v])
  92                 }
  93             }
  94
  95             impl ConvertFrom<$ty> for F16 {
  96                 fn cvt_from(v: $ty) -> F16 {
  97                     v.into()
  98                 }
  99             }
 100         )*
 101     };
 102 }
 103
 104 macro_rules! impl_from_f16 {
 105     ($($ty:ident,)*) => {
 106         $(
 107             impl From<F16> for $ty {
 108                 fn from(v: F16) -> Self {
 109                     f16_impl!(v.0.into(), [v])
 110                 }
 111             }
 112
 113             impl ConvertFrom<F16> for $ty {
 114                 fn cvt_from(v: F16) -> Self {
 115                     v.into()
 116                 }
 117             }
 118         )*
 119     };
 120 }
 121
 122 impl_f16_from![i8, u8,];
 123
 124 impl_from_f16![f32, f64,];
 125
 126 macro_rules! impl_int_to_f16 {
 127     ($($int:ident),*) => {
 128         $(
 129             impl ConvertFrom<$int> for F16 {
 130                 fn cvt_from(v: $int) -> Self {
 131                     // f32 has enough mantissa bits such that f16 overflows to
 132                     // infinity before f32 stops being able to properly
 133                     // represent integer values, making the below conversion correct.
 134                     F16::cvt_from(v as f32)
 135                 }
 136             }
 137         )*
 138     };
 139 }
 140
 141 macro_rules! impl_f16_to_int {
 142     ($($int:ident),*) => {
 143         $(
 144             impl ConvertFrom<F16> for $int {
 145                 fn cvt_from(v: F16) -> Self {
 146                     f32::from(v) as $int
 147                 }
 148             }
 149         )*
 150     };
 151 }
 152
 153 impl_int_to_f16![i16, u16, i32, u32, i64, u64, i128, u128];
 154 impl_f16_to_int![i8, u8, i16, u16, i32, u32, i64, u64, i128, u128];
 155
 156 impl ConvertFrom<f32> for F16 {
 157     fn cvt_from(v: f32) -> Self {
 158         f16_impl!(F16(F16Impl::from_f32(v)), [v])
 159     }
 160 }
 161
 162 impl ConvertFrom<f64> for F16 {
 163     fn cvt_from(v: f64) -> Self {
 164         f16_impl!(F16(F16Impl::from_f64(v)), [v])
 165     }
 166 }
 167
 168 impl Neg for F16 {
 169     type Output = Self;
 170
 171     fn neg(self) -> Self::Output {
 172         f16_impl!(Self::from_bits(self.to_bits() ^ 0x8000), [])
 173     }
 174 }
 175
 176 macro_rules! impl_bin_op_using_f32 {
 177     ($($op:ident, $op_fn:ident, $op_assign:ident, $op_assign_fn:ident;)*) => {
 178         $(
 179             impl $op for F16 {
 180                 type Output = Self;
 181
 182                 fn $op_fn(self, rhs: Self) -> Self::Output {
 183                     f32::from(self).$op_fn(f32::from(rhs)).to()
 184                 }
 185             }
 186
 187             impl $op_assign for F16 {
 188                 fn $op_assign_fn(&mut self, rhs: Self) {
 189                     *self = (*self).$op_fn(rhs);
 190                 }
 191             }
 192         )*
 193     };
 194 }
 195
 196 impl_bin_op_using_f32! {
 197     Add, add, AddAssign, add_assign;
 198     Sub, sub, SubAssign, sub_assign;
 199     Mul, mul, MulAssign, mul_assign;
 200     Div, div, DivAssign, div_assign;
 201     Rem, rem, RemAssign, rem_assign;
 202 }
 203
 204 impl F16 {
 205     pub fn from_bits(v: u16) -> Self {
 206         #[cfg(feature = "f16")]
 207         return F16(F16Impl::from_bits(v));
 208         #[cfg(not(feature = "f16"))]
 209         return F16(v);
 210     }
 211     pub fn to_bits(self) -> u16 {
 212         #[cfg(feature = "f16")]
 213         return self.0.to_bits();
 214         #[cfg(not(feature = "f16"))]
 215         return self.0;
 216     }
 217     pub fn abs(self) -> Self {
 218         f16_impl!(Self::from_bits(self.to_bits() & 0x7FFF), [])
 219     }
 220     pub fn copysign(self, sign: Self) -> Self {
 221         f16_impl!(
 222             Self::from_bits((self.to_bits() & 0x7FFF) | (sign.to_bits() & 0x8000)),
 223             [sign]
 224         )
 225     }
 226     pub fn trunc(self) -> Self {
 227         return PrimFloat::trunc(f32::from(self)).to();
 228     }
 229     pub fn ceil(self) -> Self {
 230         return PrimFloat::ceil(f32::from(self)).to();
 231     }
 232     pub fn floor(self) -> Self {
 233         return PrimFloat::floor(f32::from(self)).to();
 234     }
 235     /// round to nearest, ties to unspecified
 236     pub fn round(self) -> Self {
 237         return PrimFloat::round(f32::from(self)).to();
 238     }
 239     #[cfg(feature = "fma")]
 240     pub fn fma(self, a: Self, b: Self) -> Self {
 241         (f64::from(self) * f64::from(a) + f64::from(b)).to()
 242     }
 243
 244     pub fn is_nan(self) -> bool {
 245         f16_impl!(self.0.is_nan(), [])
 246     }
 247
 248     pub fn is_infinite(self) -> bool {
 249         f16_impl!(self.0.is_infinite(), [])
 250     }
 251
 252     pub fn is_finite(self) -> bool {
 253         f16_impl!(self.0.is_finite(), [])
 254     }
 255 }
 256
 257 impl Float for Value<F16> {
 258     type PrimFloat = F16;
 259     type BitsType = Value<u16>;
 260     type SignedBitsType = Value<i16>;
 261
 262     fn abs(self) -> Self {
 263         Value(self.0.abs())
 264     }
 265
 266     fn trunc(self) -> Self {
 267         Value(self.0.trunc())
 268     }
 269
 270     fn ceil(self) -> Self {
 271         Value(self.0.ceil())
 272     }
 273
 274     fn floor(self) -> Self {
 275         Value(self.0.floor())
 276     }
 277
 278     fn round(self) -> Self {
 279         Value(self.0.round())
 280     }
 281
 282     #[cfg(feature = "fma")]
 283     fn fma(self, a: Self, b: Self) -> Self {
 284         Value(self.0.fma(a.0, b.0))
 285     }
 286
 287     fn is_nan(self) -> Self::Bool {
 288         Value(self.0.is_nan())
 289     }
 290
 291     fn is_infinite(self) -> Self::Bool {
 292         Value(self.0.is_infinite())
 293     }
 294
 295     fn is_finite(self) -> Self::Bool {
 296         Value(self.0.is_finite())
 297     }
 298
 299     fn from_bits(v: Self::BitsType) -> Self {
 300         Value(F16::from_bits(v.0))
 301     }
 302
 303     fn to_bits(self) -> Self::BitsType {
 304         Value(self.0.to_bits())
 305     }
 306 }
 307
 308 #[cfg(test)]
 309 mod tests {
 310     use super::*;
 311     use core::cmp::Ordering;
 312
 313     #[test]
 314     #[cfg_attr(
 315         not(feature = "f16"),
 316         should_panic(expected = "f16 feature is not enabled")
 317     )]
 318     fn test_abs() {
 319         assert_eq!(F16::from_bits(0x8000).abs().to_bits(), 0);
 320         assert_eq!(F16::from_bits(0).abs().to_bits(), 0);
 321         assert_eq!(F16::from_bits(0x8ABC).abs().to_bits(), 0xABC);
 322         assert_eq!(F16::from_bits(0xFE00).abs().to_bits(), 0x7E00);
 323         assert_eq!(F16::from_bits(0x7E00).abs().to_bits(), 0x7E00);
 324     }
 325
 326     #[test]
 327     #[cfg_attr(
 328         not(feature = "f16"),
 329         should_panic(expected = "f16 feature is not enabled")
 330     )]
 331     fn test_neg() {
 332         assert_eq!(F16::from_bits(0x8000).neg().to_bits(), 0);
 333         assert_eq!(F16::from_bits(0).neg().to_bits(), 0x8000);
 334         assert_eq!(F16::from_bits(0x8ABC).neg().to_bits(), 0xABC);
 335         assert_eq!(F16::from_bits(0xFE00).neg().to_bits(), 0x7E00);
 336         assert_eq!(F16::from_bits(0x7E00).neg().to_bits(), 0xFE00);
 337     }
 338
 339     #[test]
 340     #[cfg_attr(
 341         not(feature = "f16"),
 342         should_panic(expected = "f16 feature is not enabled")
 343     )]
 344     fn test_int_to_f16() {
 345         assert_eq!(F16::to_bits(0u32.to()), 0);
 346         for v in 1..0x20000u32 {
 347             let leading_zeros = u32::leading_zeros(v);
 348             let shifted_v = v << leading_zeros;
 349             // round to nearest, ties to even
 350             let round_up = match (shifted_v & 0x1FFFFF).cmp(&0x100000) {
 351                 Ordering::Less => false,
 352                 Ordering::Equal => (shifted_v & 0x200000) != 0,
 353                 Ordering::Greater => true,
 354             };
 355             let (rounded, carry) =
 356                 (shifted_v & !0x1FFFFF).overflowing_add(round_up.then(|| 0x200000).unwrap_or(0));
 357             let mantissa;
 358             if carry {
 359                 mantissa = (rounded >> 22) as u16 + 0x400;
 360             } else {
 361                 mantissa = (rounded >> 21) as u16;
 362             }
 363             assert_eq!((mantissa & !0x3FF), 0x400);
 364             let exponent = 31 - leading_zeros as u16 + 15 + carry as u16;
 365             let expected = if exponent < 0x1F {
 366                 (mantissa & 0x3FF) + (exponent << 10)
 367             } else {
 368                 0x7C00
 369             };
 370             let actual = F16::to_bits(v.to());
 371             assert_eq!(
 372                 actual, expected,
 373                 "actual = {:#X}, expected = {:#X}, v = {:#X}",
 374                 actual, expected, v
 375             );
 376         }
 377     }
 378 }