+2019-02-25 Tamar Christina <tamar.christina@arm.com>
+
+ * config/aarch64/arm_neon.h (vfmlal_low_u32, vfmlsl_low_u32,
+ vfmlalq_low_u32, vfmlslq_low_u32, vfmlal_high_u32, vfmlsl_high_u32,
+ vfmlalq_high_u32, vfmlslq_high_u32, vfmlal_lane_low_u32,
+ vfmlsl_lane_low_u32, vfmlal_laneq_low_u32, vfmlsl_laneq_low_u32,
+ vfmlalq_lane_low_u32, vfmlslq_lane_low_u32, vfmlalq_laneq_low_u32,
+ vfmlslq_laneq_low_u32, vfmlal_lane_high_u32, vfmlsl_lane_high_u32,
+ vfmlal_laneq_high_u32, vfmlsl_laneq_high_u32, vfmlalq_lane_high_u32,
+ vfmlslq_lane_high_u32, vfmlalq_laneq_high_u32, vfmlslq_laneq_high_u32):
+ Rename ...
+ (vfmlal_low_f16, vfmlsl_low_f16, vfmlalq_low_f16, vfmlslq_low_f16,
+ vfmlal_high_f16, vfmlsl_high_f16, vfmlalq_high_f16, vfmlslq_high_f16,
+ vfmlal_lane_low_f16, vfmlsl_lane_low_f16, vfmlal_laneq_low_f16,
+ vfmlsl_laneq_low_f16, vfmlalq_lane_low_f16, vfmlslq_lane_low_f16,
+ vfmlalq_laneq_low_f16, vfmlslq_laneq_low_f16, vfmlal_lane_high_f16,
+ vfmlsl_lane_high_f16, vfmlal_laneq_high_f16, vfmlsl_laneq_high_f16,
+ vfmlalq_lane_high_f16, vfmlslq_lane_high_f16, vfmlalq_laneq_high_f16,
+ vfmlslq_laneq_high_f16): ... To this.
+
+
2019-02-25 Alexander Monakov <amonakov@ispras.ru>
PR rtl-optimization/86096
__extension__ extern __inline float32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vfmlal_low_u32 (float32x2_t __r, float16x4_t __a, float16x4_t __b)
+vfmlal_low_f16 (float32x2_t __r, float16x4_t __a, float16x4_t __b)
{
return __builtin_aarch64_fmlal_lowv2sf (__r, __a, __b);
}
__extension__ extern __inline float32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vfmlsl_low_u32 (float32x2_t __r, float16x4_t __a, float16x4_t __b)
+vfmlsl_low_f16 (float32x2_t __r, float16x4_t __a, float16x4_t __b)
{
return __builtin_aarch64_fmlsl_lowv2sf (__r, __a, __b);
}
__extension__ extern __inline float32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vfmlalq_low_u32 (float32x4_t __r, float16x8_t __a, float16x8_t __b)
+vfmlalq_low_f16 (float32x4_t __r, float16x8_t __a, float16x8_t __b)
{
return __builtin_aarch64_fmlalq_lowv4sf (__r, __a, __b);
}
__extension__ extern __inline float32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vfmlslq_low_u32 (float32x4_t __r, float16x8_t __a, float16x8_t __b)
+vfmlslq_low_f16 (float32x4_t __r, float16x8_t __a, float16x8_t __b)
{
return __builtin_aarch64_fmlslq_lowv4sf (__r, __a, __b);
}
__extension__ extern __inline float32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vfmlal_high_u32 (float32x2_t __r, float16x4_t __a, float16x4_t __b)
+vfmlal_high_f16 (float32x2_t __r, float16x4_t __a, float16x4_t __b)
{
return __builtin_aarch64_fmlal_highv2sf (__r, __a, __b);
}
__extension__ extern __inline float32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vfmlsl_high_u32 (float32x2_t __r, float16x4_t __a, float16x4_t __b)
+vfmlsl_high_f16 (float32x2_t __r, float16x4_t __a, float16x4_t __b)
{
return __builtin_aarch64_fmlsl_highv2sf (__r, __a, __b);
}
__extension__ extern __inline float32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vfmlalq_high_u32 (float32x4_t __r, float16x8_t __a, float16x8_t __b)
+vfmlalq_high_f16 (float32x4_t __r, float16x8_t __a, float16x8_t __b)
{
return __builtin_aarch64_fmlalq_highv4sf (__r, __a, __b);
}
__extension__ extern __inline float32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vfmlslq_high_u32 (float32x4_t __r, float16x8_t __a, float16x8_t __b)
+vfmlslq_high_f16 (float32x4_t __r, float16x8_t __a, float16x8_t __b)
{
return __builtin_aarch64_fmlslq_highv4sf (__r, __a, __b);
}
__extension__ extern __inline float32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vfmlal_lane_low_u32 (float32x2_t __r, float16x4_t __a, float16x4_t __b,
+vfmlal_lane_low_f16 (float32x2_t __r, float16x4_t __a, float16x4_t __b,
const int __lane)
{
return __builtin_aarch64_fmlal_lane_lowv2sf (__r, __a, __b, __lane);
__extension__ extern __inline float32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vfmlsl_lane_low_u32 (float32x2_t __r, float16x4_t __a, float16x4_t __b,
+vfmlsl_lane_low_f16 (float32x2_t __r, float16x4_t __a, float16x4_t __b,
const int __lane)
{
return __builtin_aarch64_fmlsl_lane_lowv2sf (__r, __a, __b, __lane);
__extension__ extern __inline float32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vfmlal_laneq_low_u32 (float32x2_t __r, float16x4_t __a, float16x8_t __b,
+vfmlal_laneq_low_f16 (float32x2_t __r, float16x4_t __a, float16x8_t __b,
const int __lane)
{
return __builtin_aarch64_fmlal_laneq_lowv2sf (__r, __a, __b, __lane);
__extension__ extern __inline float32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vfmlsl_laneq_low_u32 (float32x2_t __r, float16x4_t __a, float16x8_t __b,
+vfmlsl_laneq_low_f16 (float32x2_t __r, float16x4_t __a, float16x8_t __b,
const int __lane)
{
return __builtin_aarch64_fmlsl_laneq_lowv2sf (__r, __a, __b, __lane);
__extension__ extern __inline float32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vfmlalq_lane_low_u32 (float32x4_t __r, float16x8_t __a, float16x4_t __b,
+vfmlalq_lane_low_f16 (float32x4_t __r, float16x8_t __a, float16x4_t __b,
const int __lane)
{
return __builtin_aarch64_fmlalq_lane_lowv4sf (__r, __a, __b, __lane);
__extension__ extern __inline float32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vfmlslq_lane_low_u32 (float32x4_t __r, float16x8_t __a, float16x4_t __b,
+vfmlslq_lane_low_f16 (float32x4_t __r, float16x8_t __a, float16x4_t __b,
const int __lane)
{
return __builtin_aarch64_fmlslq_lane_lowv4sf (__r, __a, __b, __lane);
__extension__ extern __inline float32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vfmlalq_laneq_low_u32 (float32x4_t __r, float16x8_t __a, float16x8_t __b,
+vfmlalq_laneq_low_f16 (float32x4_t __r, float16x8_t __a, float16x8_t __b,
const int __lane)
{
return __builtin_aarch64_fmlalq_laneq_lowv4sf (__r, __a, __b, __lane);
__extension__ extern __inline float32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vfmlslq_laneq_low_u32 (float32x4_t __r, float16x8_t __a, float16x8_t __b,
+vfmlslq_laneq_low_f16 (float32x4_t __r, float16x8_t __a, float16x8_t __b,
const int __lane)
{
return __builtin_aarch64_fmlslq_laneq_lowv4sf (__r, __a, __b, __lane);
__extension__ extern __inline float32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vfmlal_lane_high_u32 (float32x2_t __r, float16x4_t __a, float16x4_t __b,
+vfmlal_lane_high_f16 (float32x2_t __r, float16x4_t __a, float16x4_t __b,
const int __lane)
{
return __builtin_aarch64_fmlal_lane_highv2sf (__r, __a, __b, __lane);
__extension__ extern __inline float32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vfmlsl_lane_high_u32 (float32x2_t __r, float16x4_t __a, float16x4_t __b,
+vfmlsl_lane_high_f16 (float32x2_t __r, float16x4_t __a, float16x4_t __b,
const int __lane)
{
return __builtin_aarch64_fmlsl_lane_highv2sf (__r, __a, __b, __lane);
__extension__ extern __inline float32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vfmlal_laneq_high_u32 (float32x2_t __r, float16x4_t __a, float16x8_t __b,
+vfmlal_laneq_high_f16 (float32x2_t __r, float16x4_t __a, float16x8_t __b,
const int __lane)
{
return __builtin_aarch64_fmlal_laneq_highv2sf (__r, __a, __b, __lane);
__extension__ extern __inline float32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vfmlsl_laneq_high_u32 (float32x2_t __r, float16x4_t __a, float16x8_t __b,
+vfmlsl_laneq_high_f16 (float32x2_t __r, float16x4_t __a, float16x8_t __b,
const int __lane)
{
return __builtin_aarch64_fmlsl_laneq_highv2sf (__r, __a, __b, __lane);
__extension__ extern __inline float32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vfmlalq_lane_high_u32 (float32x4_t __r, float16x8_t __a, float16x4_t __b,
+vfmlalq_lane_high_f16 (float32x4_t __r, float16x8_t __a, float16x4_t __b,
const int __lane)
{
return __builtin_aarch64_fmlalq_lane_highv4sf (__r, __a, __b, __lane);
__extension__ extern __inline float32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vfmlslq_lane_high_u32 (float32x4_t __r, float16x8_t __a, float16x4_t __b,
+vfmlslq_lane_high_f16 (float32x4_t __r, float16x8_t __a, float16x4_t __b,
const int __lane)
{
return __builtin_aarch64_fmlslq_lane_highv4sf (__r, __a, __b, __lane);
__extension__ extern __inline float32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vfmlalq_laneq_high_u32 (float32x4_t __r, float16x8_t __a, float16x8_t __b,
+vfmlalq_laneq_high_f16 (float32x4_t __r, float16x8_t __a, float16x8_t __b,
const int __lane)
{
return __builtin_aarch64_fmlalq_laneq_highv4sf (__r, __a, __b, __lane);
__extension__ extern __inline float32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vfmlslq_laneq_high_u32 (float32x4_t __r, float16x8_t __a, float16x8_t __b,
+vfmlslq_laneq_high_f16 (float32x4_t __r, float16x8_t __a, float16x8_t __b,
const int __lane)
{
return __builtin_aarch64_fmlslq_laneq_highv4sf (__r, __a, __b, __lane);
+2019-02-25 Tamar Christina <tamar.christina@arm.com>
+
+ * gcc.target/aarch64/fp16_fmul_high.h (test_vfmlal_high_u32,
+ test_vfmlalq_high_u32, test_vfmlsl_high_u32, test_vfmlslq_high_u32):
+ Rename ...
+ (test_vfmlal_high_f16, test_vfmlalq_high_f16, test_vfmlsl_high_f16,
+ test_vfmlslq_high_f16): ... To this.
+ * gcc.target/aarch64/fp16_fmul_lane_high.h (test_vfmlal_lane_high_u32,
+ tets_vfmlsl_lane_high_u32, test_vfmlal_laneq_high_u32,
+ test_vfmlsl_laneq_high_u32, test_vfmlalq_lane_high_u32,
+ test_vfmlslq_lane_high_u32, test_vfmlalq_laneq_high_u32,
+ test_vfmlslq_laneq_high_u32): Rename ...
+ (test_vfmlal_lane_high_f16, tets_vfmlsl_lane_high_f16,
+ test_vfmlal_laneq_high_f16, test_vfmlsl_laneq_high_f16,
+ test_vfmlalq_lane_high_f16, test_vfmlslq_lane_high_f16,
+ test_vfmlalq_laneq_high_f16, test_vfmlslq_laneq_high_f16): ... To this.
+ * gcc.target/aarch64/fp16_fmul_lane_low.h (test_vfmlal_lane_low_u32,
+ test_vfmlsl_lane_low_u32, test_vfmlal_laneq_low_u32,
+ test_vfmlsl_laneq_low_u32, test_vfmlalq_lane_low_u32,
+ test_vfmlslq_lane_low_u32, test_vfmlalq_laneq_low_u32,
+ test_vfmlslq_laneq_low_u32): Rename ...
+ (test_vfmlal_lane_low_f16, test_vfmlsl_lane_low_f16,
+ test_vfmlal_laneq_low_f16, test_vfmlsl_laneq_low_f16,
+ test_vfmlalq_lane_low_f16, test_vfmlslq_lane_low_f16,
+ test_vfmlalq_laneq_low_f16, test_vfmlslq_laneq_low_f16): ... To this.
+ * gcc.target/aarch64/fp16_fmul_low.h (test_vfmlal_low_u32,
+ test_vfmlalq_low_u32, test_vfmlsl_low_u32, test_vfmlslq_low_u32):
+ Rename ...
+ (test_vfmlal_low_f16, test_vfmlalq_low_f16, test_vfmlsl_low_f16,
+ test_vfmlslq_low_f16): ... To This.
+ * lib/target-supports.exp
+ (check_effective_target_arm_fp16fml_neon_ok_nocache): Update test.
+
2019-02-25 Dominique d'Humieres <dominiq@gcc.gnu.org>
PR fortran/89282
#include "arm_neon.h"
float32x2_t
-test_vfmlal_high_u32 (float32x2_t r, float16x4_t a, float16x4_t b)
+test_vfmlal_high_f16 (float32x2_t r, float16x4_t a, float16x4_t b)
{
- return vfmlal_high_u32 (r, a, b);
+ return vfmlal_high_f16 (r, a, b);
}
float32x4_t
-test_vfmlalq_high_u32 (float32x4_t r, float16x8_t a, float16x8_t b)
+test_vfmlalq_high_f16 (float32x4_t r, float16x8_t a, float16x8_t b)
{
- return vfmlalq_high_u32 (r, a, b);
+ return vfmlalq_high_f16 (r, a, b);
}
float32x2_t
-test_vfmlsl_high_u32 (float32x2_t r, float16x4_t a, float16x4_t b)
+test_vfmlsl_high_f16 (float32x2_t r, float16x4_t a, float16x4_t b)
{
- return vfmlsl_high_u32 (r, a, b);
+ return vfmlsl_high_f16 (r, a, b);
}
float32x4_t
-test_vfmlslq_high_u32 (float32x4_t r, float16x8_t a, float16x8_t b)
+test_vfmlslq_high_f16 (float32x4_t r, float16x8_t a, float16x8_t b)
{
- return vfmlslq_high_u32 (r, a, b);
+ return vfmlslq_high_f16 (r, a, b);
}
#include "arm_neon.h"
float32x2_t
-test_vfmlal_lane_high_u32 (float32x2_t r, float16x4_t a, float16x4_t b)
+test_vfmlal_lane_high_f16 (float32x2_t r, float16x4_t a, float16x4_t b)
{
- return vfmlal_lane_high_u32 (r, a, b, 0);
+ return vfmlal_lane_high_f16 (r, a, b, 0);
}
float32x2_t
-tets_vfmlsl_lane_high_u32 (float32x2_t r, float16x4_t a, float16x4_t b)
+tets_vfmlsl_lane_high_f16 (float32x2_t r, float16x4_t a, float16x4_t b)
{
- return vfmlsl_lane_high_u32 (r, a, b, 0);
+ return vfmlsl_lane_high_f16 (r, a, b, 0);
}
float32x2_t
-test_vfmlal_laneq_high_u32 (float32x2_t r, float16x4_t a, float16x8_t b)
+test_vfmlal_laneq_high_f16 (float32x2_t r, float16x4_t a, float16x8_t b)
{
- return vfmlal_laneq_high_u32 (r, a, b, 6);
+ return vfmlal_laneq_high_f16 (r, a, b, 6);
}
float32x2_t
-test_vfmlsl_laneq_high_u32 (float32x2_t r, float16x4_t a, float16x8_t b)
+test_vfmlsl_laneq_high_f16 (float32x2_t r, float16x4_t a, float16x8_t b)
{
- return vfmlsl_laneq_high_u32 (r, a, b, 6);
+ return vfmlsl_laneq_high_f16 (r, a, b, 6);
}
float32x4_t
-test_vfmlalq_lane_high_u32 (float32x4_t r, float16x8_t a, float16x4_t b)
+test_vfmlalq_lane_high_f16 (float32x4_t r, float16x8_t a, float16x4_t b)
{
- return vfmlalq_lane_high_u32 (r, a, b, 1);
+ return vfmlalq_lane_high_f16 (r, a, b, 1);
}
float32x4_t
-test_vfmlslq_lane_high_u32 (float32x4_t r, float16x8_t a, float16x4_t b)
+test_vfmlslq_lane_high_f16 (float32x4_t r, float16x8_t a, float16x4_t b)
{
- return vfmlslq_lane_high_u32 (r, a, b, 1);
+ return vfmlslq_lane_high_f16 (r, a, b, 1);
}
float32x4_t
-test_vfmlalq_laneq_high_u32 (float32x4_t r, float16x8_t a, float16x8_t b)
+test_vfmlalq_laneq_high_f16 (float32x4_t r, float16x8_t a, float16x8_t b)
{
- return vfmlalq_laneq_high_u32 (r, a, b, 7);
+ return vfmlalq_laneq_high_f16 (r, a, b, 7);
}
float32x4_t
-test_vfmlslq_laneq_high_u32 (float32x4_t r, float16x8_t a, float16x8_t b)
+test_vfmlslq_laneq_high_f16 (float32x4_t r, float16x8_t a, float16x8_t b)
{
- return vfmlslq_laneq_high_u32 (r, a, b, 7);
+ return vfmlslq_laneq_high_f16 (r, a, b, 7);
}
#include "arm_neon.h"
float32x2_t
-test_vfmlal_lane_low_u32 (float32x2_t r, float16x4_t a, float16x4_t b)
+test_vfmlal_lane_low_f16 (float32x2_t r, float16x4_t a, float16x4_t b)
{
- return vfmlal_lane_low_u32 (r, a, b, 0);
+ return vfmlal_lane_low_f16 (r, a, b, 0);
}
float32x2_t
-test_vfmlsl_lane_low_u32 (float32x2_t r, float16x4_t a, float16x4_t b)
+test_vfmlsl_lane_low_f16 (float32x2_t r, float16x4_t a, float16x4_t b)
{
- return vfmlsl_lane_low_u32 (r, a, b, 0);
+ return vfmlsl_lane_low_f16 (r, a, b, 0);
}
float32x2_t
-test_vfmlal_laneq_low_u32 (float32x2_t r, float16x4_t a, float16x8_t b)
+test_vfmlal_laneq_low_f16 (float32x2_t r, float16x4_t a, float16x8_t b)
{
- return vfmlal_laneq_low_u32 (r, a, b, 6);
+ return vfmlal_laneq_low_f16 (r, a, b, 6);
}
float32x2_t
-test_vfmlsl_laneq_low_u32 (float32x2_t r, float16x4_t a, float16x8_t b)
+test_vfmlsl_laneq_low_f16 (float32x2_t r, float16x4_t a, float16x8_t b)
{
- return vfmlsl_laneq_low_u32 (r, a, b, 6);
+ return vfmlsl_laneq_low_f16 (r, a, b, 6);
}
float32x4_t
-test_vfmlalq_lane_low_u32 (float32x4_t r, float16x8_t a, float16x4_t b)
+test_vfmlalq_lane_low_f16 (float32x4_t r, float16x8_t a, float16x4_t b)
{
- return vfmlalq_lane_low_u32 (r, a, b, 1);
+ return vfmlalq_lane_low_f16 (r, a, b, 1);
}
float32x4_t
-test_vfmlslq_lane_low_u32 (float32x4_t r, float16x8_t a, float16x4_t b)
+test_vfmlslq_lane_low_f16 (float32x4_t r, float16x8_t a, float16x4_t b)
{
- return vfmlslq_lane_low_u32 (r, a, b, 1);
+ return vfmlslq_lane_low_f16 (r, a, b, 1);
}
float32x4_t
-test_vfmlalq_laneq_low_u32 (float32x4_t r, float16x8_t a, float16x8_t b)
+test_vfmlalq_laneq_low_f16 (float32x4_t r, float16x8_t a, float16x8_t b)
{
- return vfmlalq_laneq_low_u32 (r, a, b, 7);
+ return vfmlalq_laneq_low_f16 (r, a, b, 7);
}
float32x4_t
-test_vfmlslq_laneq_low_u32 (float32x4_t r, float16x8_t a, float16x8_t b)
+test_vfmlslq_laneq_low_f16 (float32x4_t r, float16x8_t a, float16x8_t b)
{
- return vfmlslq_laneq_low_u32 (r, a, b, 7);
+ return vfmlslq_laneq_low_f16 (r, a, b, 7);
}
#include "arm_neon.h"
float32x2_t
-test_vfmlal_low_u32 (float32x2_t r, float16x4_t a, float16x4_t b)
+test_vfmlal_low_f16 (float32x2_t r, float16x4_t a, float16x4_t b)
{
- return vfmlal_low_u32 (r, a, b);
+ return vfmlal_low_f16 (r, a, b);
}
float32x4_t
-test_vfmlalq_low_u32 (float32x4_t r, float16x8_t a, float16x8_t b)
+test_vfmlalq_low_f16 (float32x4_t r, float16x8_t a, float16x8_t b)
{
- return vfmlalq_low_u32 (r, a, b);
+ return vfmlalq_low_f16 (r, a, b);
}
float32x2_t
-test_vfmlsl_low_u32 (float32x2_t r, float16x4_t a, float16x4_t b)
+test_vfmlsl_low_f16 (float32x2_t r, float16x4_t a, float16x4_t b)
{
- return vfmlsl_low_u32 (r, a, b);
+ return vfmlsl_low_f16 (r, a, b);
}
float32x4_t
-test_vfmlslq_low_u32 (float32x4_t r, float16x8_t a, float16x8_t b)
+test_vfmlslq_low_f16 (float32x4_t r, float16x8_t a, float16x8_t b)
{
- return vfmlslq_low_u32 (r, a, b);
+ return vfmlslq_low_f16 (r, a, b);
}
float32x2_t
foo (float32x2_t r, float16x4_t a, float16x4_t b)
{
- return vfmlal_high_u32 (r, a, b);
+ return vfmlal_high_f16 (r, a, b);
}
} "$flags -march=armv8.2-a+fp16fml"] } {
set et_arm_fp16fml_neon_flags "$flags -march=armv8.2-a+fp16fml"