[PATCH 6/17][ARM] Add data processing intrinsics for float16_t.
authorMatthew Wahab <matthew.wahab@arm.com>
Fri, 23 Sep 2016 09:23:01 +0000 (09:23 +0000)
committerMatthew Wahab <mwahab@gcc.gnu.org>
Fri, 23 Sep 2016 09:23:01 +0000 (09:23 +0000)
gcc/
2016-09-23  Matthew Wahab  <matthew.wahab@arm.com>

* config/arm/arm.c (arm_evpc_neon_vuzp): Add support for V8HF and
V4HF modes.
(arm_evpc_neon_vtrn): Likewise.
(arm_evpc_neon_vrev): Likewise.
(arm_evpc_neon_vext): Likewise.
* config/arm/arm_neon.h (vbsl_f16): New.
(vbslq_f16): New.
(vdup_n_f16): New.
(vdupq_n_f16): New.
(vdup_lane_f16): New.
(vdupq_lane_f16): New.
(vext_f16): New.
(vextq_f16): New.
(vmov_n_f16): New.
(vmovq_n_f16): New.
(vrev64_f16): New.
(vrev64q_f16): New.
(vtrn_f16): New.
(vtrnq_f16): New.
(vuzp_f16): New.
(vuzpq_f16): New.
(vzip_f16): New.
(vzipq_f16): New.
* config/arm/arm_neon_buillins.def (vdup_n): New (v8hf, v4hf variants).
(vdup_lane): New (v8hf, v4hf variants).
(vext): New (v8hf, v4hf variants).
(vbsl): New (v8hf, v4hf variants).
* config/arm/iterators.md (VDQWH): New.
(VH): New.
(V_double_vector_mode): Add V8HF and V4HF.  Fix white-space.
(Scalar_mul_8_16): Fix white-space.
(Is_d_reg): Add V4HF and V8HF.
* config/arm/neon.md (neon_vdup_lane<mode>_internal): New.
(neon_vdup_lane<mode>): New.
(neon_vtrn<mode>_internal): Replace VDQW with VDQWH.
(*neon_vtrn<mode>_insn): Likewise.
(neon_vzip<mode>_internal): Likewise. Also fix white-space.
(*neon_vzip<mode>_insn): Likewise
(neon_vuzp<mode>_internal): Likewise.
(*neon_vuzp<mode>_insn): Likewise
* config/arm/vec-common.md (vec_perm_const<mode>): New.

testsuite/
2016-09-23  Matthew Wahab  <matthew.wahab@arm.com>

* gcc.target/aarch64/advsimd-intrinsics/arm-neon-ref.h
(FP16_SUPPORTED): New
(expected-hfloat-16x4): Make conditional on __fp16 support.
(expected-hfloat-16x8): Likewise.
(vdup_n_f16): Disable for non-AArch64 targets.
* gcc.target/aarch64/advsimd-intrinsics/vbsl.c: Add __fp16 tests,
conditional on FP16_SUPPORTED.
* gcc.target/aarch64/advsimd-intrinsics/vdup-vmov.c: Likewise.
* gcc.target/aarch64/advsimd-intrinsics/vdup_lane.c: Likewise.
* gcc.target/aarch64/advsimd-intrinsics/vext.c: Likewise.
* gcc.target/aarch64/advsimd-intrinsics/vrev.c: Likewise.
* gcc.target/aarch64/advsimd-intrinsics/vshuffle.inc: Add support
for testing __fp16.
* gcc.target/aarch64/advsimd-intrinsics/vtrn.c: Add __fp16 tests,
conditional on FP16_SUPPORTED.
* gcc.target/aarch64/advsimd-intrinsics/vuzp.c: Likewise.
* gcc.target/aarch64/advsimd-intrinsics/vzip.c: Likewise.

From-SVN: r240404

18 files changed:
gcc/ChangeLog
gcc/config/arm/arm.c
gcc/config/arm/arm_neon.h
gcc/config/arm/arm_neon_builtins.def
gcc/config/arm/iterators.md
gcc/config/arm/neon.md
gcc/config/arm/vec-common.md
gcc/testsuite/ChangeLog
gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/arm-neon-ref.h
gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vbsl.c
gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdup-vmov.c
gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdup_lane.c
gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vext.c
gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrev.c
gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vshuffle.inc
gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vtrn.c
gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vuzp.c
gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vzip.c

index d5b764d4a8ae0ecdab4e1e7abf807d4bd039ea1d..1ffee95a749de051d944fd83cd4bfaacf759771d 100644 (file)
@@ -1,3 +1,47 @@
+2016-09-23  Matthew Wahab  <matthew.wahab@arm.com>
+
+       * config/arm/arm.c (arm_evpc_neon_vuzp): Add support for V8HF and
+       V4HF modes.
+       (arm_evpc_neon_vtrn): Likewise.
+       (arm_evpc_neon_vrev): Likewise.
+       (arm_evpc_neon_vext): Likewise.
+       * config/arm/arm_neon.h (vbsl_f16): New.
+       (vbslq_f16): New.
+       (vdup_n_f16): New.
+       (vdupq_n_f16): New.
+       (vdup_lane_f16): New.
+       (vdupq_lane_f16): New.
+       (vext_f16): New.
+       (vextq_f16): New.
+       (vmov_n_f16): New.
+       (vmovq_n_f16): New.
+       (vrev64_f16): New.
+       (vrev64q_f16): New.
+       (vtrn_f16): New.
+       (vtrnq_f16): New.
+       (vuzp_f16): New.
+       (vuzpq_f16): New.
+       (vzip_f16): New.
+       (vzipq_f16): New.
+       * config/arm/arm_neon_buillins.def (vdup_n): New (v8hf, v4hf variants).
+       (vdup_lane): New (v8hf, v4hf variants).
+       (vext): New (v8hf, v4hf variants).
+       (vbsl): New (v8hf, v4hf variants).
+       * config/arm/iterators.md (VDQWH): New.
+       (VH): New.
+       (V_double_vector_mode): Add V8HF and V4HF.  Fix white-space.
+       (Scalar_mul_8_16): Fix white-space.
+       (Is_d_reg): Add V4HF and V8HF.
+       * config/arm/neon.md (neon_vdup_lane<mode>_internal): New.
+       (neon_vdup_lane<mode>): New.
+       (neon_vtrn<mode>_internal): Replace VDQW with VDQWH.
+       (*neon_vtrn<mode>_insn): Likewise.
+       (neon_vzip<mode>_internal): Likewise. Also fix white-space.
+       (*neon_vzip<mode>_insn): Likewise
+       (neon_vuzp<mode>_internal): Likewise.
+       (*neon_vuzp<mode>_insn): Likewise
+       * config/arm/vec-common.md (vec_perm_const<mode>): New.
+
 2016-09-23  Jiong Wang  <jiong.wang@arm.com>
            Matthew Wahab  <matthew.wahab@arm.com>
 
index 04c73d98e34f8cd97aa38ad08b76b766572b1a57..e6b59d54157df27a516106c57baa83ff91f634b3 100644 (file)
@@ -28576,6 +28576,8 @@ arm_evpc_neon_vuzp (struct expand_vec_perm_d *d)
     case V8QImode:  gen = gen_neon_vuzpv8qi_internal;  break;
     case V8HImode:  gen = gen_neon_vuzpv8hi_internal;  break;
     case V4HImode:  gen = gen_neon_vuzpv4hi_internal;  break;
+    case V8HFmode:  gen = gen_neon_vuzpv8hf_internal;  break;
+    case V4HFmode:  gen = gen_neon_vuzpv4hf_internal;  break;
     case V4SImode:  gen = gen_neon_vuzpv4si_internal;  break;
     case V2SImode:  gen = gen_neon_vuzpv2si_internal;  break;
     case V2SFmode:  gen = gen_neon_vuzpv2sf_internal;  break;
@@ -28649,6 +28651,8 @@ arm_evpc_neon_vzip (struct expand_vec_perm_d *d)
     case V8QImode:  gen = gen_neon_vzipv8qi_internal;  break;
     case V8HImode:  gen = gen_neon_vzipv8hi_internal;  break;
     case V4HImode:  gen = gen_neon_vzipv4hi_internal;  break;
+    case V8HFmode:  gen = gen_neon_vzipv8hf_internal;  break;
+    case V4HFmode:  gen = gen_neon_vzipv4hf_internal;  break;
     case V4SImode:  gen = gen_neon_vzipv4si_internal;  break;
     case V2SImode:  gen = gen_neon_vzipv2si_internal;  break;
     case V2SFmode:  gen = gen_neon_vzipv2sf_internal;  break;
@@ -28701,6 +28705,8 @@ arm_evpc_neon_vrev (struct expand_vec_perm_d *d)
        case V8QImode:  gen = gen_neon_vrev32v8qi;  break;
        case V8HImode:  gen = gen_neon_vrev64v8hi;  break;
        case V4HImode:  gen = gen_neon_vrev64v4hi;  break;
+       case V8HFmode:  gen = gen_neon_vrev64v8hf;  break;
+       case V4HFmode:  gen = gen_neon_vrev64v4hf;  break;
        default:
          return false;
        }
@@ -28784,6 +28790,8 @@ arm_evpc_neon_vtrn (struct expand_vec_perm_d *d)
     case V8QImode:  gen = gen_neon_vtrnv8qi_internal;  break;
     case V8HImode:  gen = gen_neon_vtrnv8hi_internal;  break;
     case V4HImode:  gen = gen_neon_vtrnv4hi_internal;  break;
+    case V8HFmode:  gen = gen_neon_vtrnv8hf_internal;  break;
+    case V4HFmode:  gen = gen_neon_vtrnv4hf_internal;  break;
     case V4SImode:  gen = gen_neon_vtrnv4si_internal;  break;
     case V2SImode:  gen = gen_neon_vtrnv2si_internal;  break;
     case V2SFmode:  gen = gen_neon_vtrnv2sf_internal;  break;
@@ -28859,6 +28867,8 @@ arm_evpc_neon_vext (struct expand_vec_perm_d *d)
     case V8HImode: gen = gen_neon_vextv8hi; break;
     case V2SImode: gen = gen_neon_vextv2si; break;
     case V4SImode: gen = gen_neon_vextv4si; break;
+    case V4HFmode: gen = gen_neon_vextv4hf; break;
+    case V8HFmode: gen = gen_neon_vextv8hf; break;
     case V2SFmode: gen = gen_neon_vextv2sf; break;
     case V4SFmode: gen = gen_neon_vextv4sf; break;
     case V2DImode: gen = gen_neon_vextv2di; break;
index 32ee06c65f3f86f9a3dbd4ce5845146abbedd225..3bd951703baa8c9e215c4c8ba3ff536acacdfec2 100644 (file)
@@ -14842,6 +14842,181 @@ vmull_high_p64 (poly64x2_t __a, poly64x2_t __b)
 
 #pragma GCC pop_options
 
+  /* Half-precision data processing intrinsics.  */
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+
+__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
+vbsl_f16 (uint16x4_t __a, float16x4_t __b, float16x4_t __c)
+{
+  return __builtin_neon_vbslv4hf ((int16x4_t)__a, __b, __c);
+}
+
+__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
+vbslq_f16 (uint16x8_t __a, float16x8_t __b, float16x8_t __c)
+{
+  return __builtin_neon_vbslv8hf ((int16x8_t)__a, __b, __c);
+}
+
+__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
+vdup_n_f16 (float16_t __a)
+{
+  return __builtin_neon_vdup_nv4hf (__a);
+}
+
+__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
+vdupq_n_f16 (float16_t __a)
+{
+  return __builtin_neon_vdup_nv8hf (__a);
+}
+
+__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
+vdup_lane_f16 (float16x4_t __a, const int __b)
+{
+  return __builtin_neon_vdup_lanev4hf (__a, __b);
+}
+
+__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
+vdupq_lane_f16 (float16x4_t __a, const int __b)
+{
+  return __builtin_neon_vdup_lanev8hf (__a, __b);
+}
+
+__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
+vext_f16 (float16x4_t __a, float16x4_t __b, const int __c)
+{
+  return __builtin_neon_vextv4hf (__a, __b, __c);
+}
+
+__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
+vextq_f16 (float16x8_t __a, float16x8_t __b, const int __c)
+{
+  return __builtin_neon_vextv8hf (__a, __b, __c);
+}
+
+__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
+vmov_n_f16 (float16_t __a)
+{
+  return __builtin_neon_vdup_nv4hf (__a);
+}
+
+__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
+vmovq_n_f16 (float16_t __a)
+{
+  return __builtin_neon_vdup_nv8hf (__a);
+}
+
+__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
+vrev64_f16 (float16x4_t __a)
+{
+  return (float16x4_t)__builtin_shuffle (__a, (uint16x4_t){ 3, 2, 1, 0 });
+}
+
+__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
+vrev64q_f16 (float16x8_t __a)
+{
+  return
+    (float16x8_t)__builtin_shuffle (__a,
+                                   (uint16x8_t){ 3, 2, 1, 0, 7, 6, 5, 4 });
+}
+
+__extension__ static __inline float16x4x2_t __attribute__ ((__always_inline__))
+vtrn_f16 (float16x4_t __a, float16x4_t __b)
+{
+  float16x4x2_t __rv;
+#ifdef __ARM_BIG_ENDIAN
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x4_t){ 5, 1, 7, 3 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x4_t){ 4, 0, 6, 2 });
+#else
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x4_t){ 0, 4, 2, 6 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x4_t){ 1, 5, 3, 7 });
+#endif
+  return __rv;
+}
+
+__extension__ static __inline float16x8x2_t __attribute__ ((__always_inline__))
+vtrnq_f16 (float16x8_t __a, float16x8_t __b)
+{
+  float16x8x2_t __rv;
+#ifdef __ARM_BIG_ENDIAN
+  __rv.val[0] = __builtin_shuffle (__a, __b,
+                                  (uint16x8_t){ 9, 1, 11, 3, 13, 5, 15, 7 });
+  __rv.val[1] = __builtin_shuffle (__a, __b,
+                                  (uint16x8_t){ 8, 0, 10, 2, 12, 4, 14, 6 });
+#else
+  __rv.val[0] = __builtin_shuffle (__a, __b,
+                                  (uint16x8_t){ 0, 8, 2, 10, 4, 12, 6, 14 });
+  __rv.val[1] = __builtin_shuffle (__a, __b,
+                                  (uint16x8_t){ 1, 9, 3, 11, 5, 13, 7, 15 });
+#endif
+  return __rv;
+}
+
+__extension__ static __inline float16x4x2_t __attribute__ ((__always_inline__))
+vuzp_f16 (float16x4_t __a, float16x4_t __b)
+{
+  float16x4x2_t __rv;
+#ifdef __ARM_BIG_ENDIAN
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x4_t){ 5, 7, 1, 3 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x4_t){ 4, 6, 0, 2 });
+#else
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x4_t){ 0, 2, 4, 6 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x4_t){ 1, 3, 5, 7 });
+#endif
+  return __rv;
+}
+
+__extension__ static __inline float16x8x2_t __attribute__ ((__always_inline__))
+vuzpq_f16 (float16x8_t __a, float16x8_t __b)
+{
+  float16x8x2_t __rv;
+#ifdef __ARM_BIG_ENDIAN
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x8_t)
+                                  { 5, 7, 1, 3, 13, 15, 9, 11 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x8_t)
+                                  { 4, 6, 0, 2, 12, 14, 8, 10 });
+#else
+  __rv.val[0] = __builtin_shuffle (__a, __b,
+                                  (uint16x8_t){ 0, 2, 4, 6, 8, 10, 12, 14 });
+  __rv.val[1] = __builtin_shuffle (__a, __b,
+                                  (uint16x8_t){ 1, 3, 5, 7, 9, 11, 13, 15 });
+#endif
+  return __rv;
+}
+
+__extension__ static __inline float16x4x2_t __attribute__ ((__always_inline__))
+vzip_f16 (float16x4_t __a, float16x4_t __b)
+{
+  float16x4x2_t __rv;
+#ifdef __ARM_BIG_ENDIAN
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x4_t){ 6, 2, 7, 3 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x4_t){ 4, 0, 5, 1 });
+#else
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x4_t){ 0, 4, 1, 5 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x4_t){ 2, 6, 3, 7 });
+#endif
+  return __rv;
+}
+
+__extension__ static __inline float16x8x2_t __attribute__ ((__always_inline__))
+vzipq_f16 (float16x8_t __a, float16x8_t __b)
+{
+  float16x8x2_t __rv;
+#ifdef __ARM_BIG_ENDIAN
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x8_t)
+                                  { 10, 2, 11, 3, 8, 0, 9, 1 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x8_t)
+                                  { 14, 6, 15, 7, 12, 4, 13, 5 });
+#else
+  __rv.val[0] = __builtin_shuffle (__a, __b,
+                                  (uint16x8_t){ 0, 8, 1, 9, 2, 10, 3, 11 });
+  __rv.val[1] = __builtin_shuffle (__a, __b,
+                                  (uint16x8_t){ 4, 12, 5, 13, 6, 14, 7, 15 });
+#endif
+  return __rv;
+}
+
+#endif
+
 #ifdef __cplusplus
 }
 #endif
index d9fac7847704f8558ec11e0438b5d81d16e5833b..a4ba516209c04b3bc9c3fd7b76726feb75cb1c67 100644 (file)
@@ -166,8 +166,10 @@ VAR10 (SETLANE, vset_lane,
 VAR5 (UNOP, vcreate, v8qi, v4hi, v2si, v2sf, di)
 VAR10 (UNOP, vdup_n,
         v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di)
+VAR2 (UNOP, vdup_n, v8hf, v4hf)
 VAR10 (GETLANE, vdup_lane,
         v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di)
+VAR2 (GETLANE, vdup_lane, v8hf, v4hf)
 VAR6 (COMBINE, vcombine, v8qi, v4hi, v4hf, v2si, v2sf, di)
 VAR6 (UNOP, vget_high, v16qi, v8hi, v8hf, v4si, v4sf, v2di)
 VAR6 (UNOP, vget_low, v16qi, v8hi, v8hf, v4si, v4sf, v2di)
@@ -197,6 +199,7 @@ VAR2 (MAC_N, vmlslu_n, v4hi, v2si)
 VAR2 (MAC_N, vqdmlsl_n, v4hi, v2si)
 VAR10 (SETLANE, vext,
         v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di)
+VAR2 (SETLANE, vext, v8hf, v4hf)
 VAR8 (UNOP, vrev64, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf)
 VAR4 (UNOP, vrev32, v8qi, v4hi, v16qi, v8hi)
 VAR2 (UNOP, vrev16, v8qi, v16qi)
@@ -208,6 +211,7 @@ VAR1 (UNOP, vcvtv4sf, v4hf)
 VAR1 (UNOP, vcvtv4hf, v4sf)
 VAR10 (TERNOP, vbsl,
         v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di)
+VAR2 (TERNOP, vbsl, v8hf, v4hf)
 VAR2 (UNOP, copysignf, v2sf, v4sf)
 VAR2 (UNOP, vrintn, v2sf, v4sf)
 VAR2 (UNOP, vrinta, v2sf, v4sf)
index aba1023cdd0d9feb5396222a27c1897ca844937e..3f9d9e43e3cbd7f0ebb93939d6fb328fc3c38763 100644 (file)
 ;; All supported vector modes (except those with 64-bit integer elements).
 (define_mode_iterator VDQW [V8QI V16QI V4HI V8HI V2SI V4SI V2SF V4SF])
 
+;; All supported vector modes including 16-bit float modes.
+(define_mode_iterator VDQWH [V8QI V16QI V4HI V8HI V2SI V4SI V2SF V4SF
+                            V8HF V4HF])
+
 ;; Supported integer vector modes (not 64 bit elements).
 (define_mode_iterator VDQIW [V8QI V16QI V4HI V8HI V2SI V4SI])
 
 ;; Modes with 8-bit, 16-bit and 32-bit elements.
 (define_mode_iterator VU [V16QI V8HI V4SI])
 
+;; Vector modes for 16-bit floating-point support.
+(define_mode_iterator VH [V8HF V4HF])
+
 ;; Iterators used for fixed-point support.
 (define_mode_iterator FIXED [QQ HQ SQ UQQ UHQ USQ HA SA UHA USA])
 
 ;; Used for neon_vdup_lane, where the second operand is double-sized
 ;; even when the first one is quad.
 (define_mode_attr V_double_vector_mode [(V16QI "V8QI") (V8HI "V4HI")
-                                        (V4SI "V2SI") (V4SF "V2SF")
-                                        (V8QI "V8QI") (V4HI "V4HI")
-                                        (V2SI "V2SI") (V2SF "V2SF")])
+                                       (V4SI "V2SI") (V4SF "V2SF")
+                                       (V8QI "V8QI") (V4HI "V4HI")
+                                       (V2SI "V2SI") (V2SF "V2SF")
+                                       (V8HF "V4HF") (V4HF "V4HF")])
 
 ;; Mode of result of comparison operations (and bit-select operand 1).
 (define_mode_attr V_cmp_result [(V8QI "V8QI") (V16QI "V16QI")
                  (DI "false") (V2DI "false")])
 
 (define_mode_attr Scalar_mul_8_16 [(V8QI "true") (V16QI "true")
-                   (V4HI "true") (V8HI "true")
-                   (V2SI "false") (V4SI "false")
-                   (V2SF "false") (V4SF "false")
-                   (DI "false") (V2DI "false")])
-
+                                  (V4HI "true") (V8HI "true")
+                                  (V2SI "false") (V4SI "false")
+                                  (V2SF "false") (V4SF "false")
+                                  (DI "false") (V2DI "false")])
 
 (define_mode_attr Is_d_reg [(V8QI "true") (V16QI "false")
                             (V4HI "true") (V8HI  "false")
                             (V2SI "true") (V4SI  "false")
                             (V2SF "true") (V4SF  "false")
-                            (DI   "true") (V2DI  "false")])
+                            (DI   "true") (V2DI  "false")
+                           (V4HF "true") (V8HF  "false")])
 
 (define_mode_attr V_mode_nunits [(V8QI "8") (V16QI "16")
                                 (V4HF "4") (V8HF "8")
index e2fdfbb04621ee6f8603849be089e8bce624214d..c7bb121368736ce4e2956865eed2638fa35538f6 100644 (file)
@@ -3045,6 +3045,28 @@ if (BYTES_BIG_ENDIAN)
   [(set_attr "type" "neon_dup<q>")]
 )
 
+(define_insn "neon_vdup_lane<mode>_internal"
+ [(set (match_operand:VH 0 "s_register_operand" "=w")
+   (vec_duplicate:VH
+    (vec_select:<V_elem>
+     (match_operand:<V_double_vector_mode> 1 "s_register_operand" "w")
+     (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
+ "TARGET_NEON && TARGET_FP16"
+{
+  if (BYTES_BIG_ENDIAN)
+    {
+      int elt = INTVAL (operands[2]);
+      elt = GET_MODE_NUNITS (<V_double_vector_mode>mode) - 1 - elt;
+      operands[2] = GEN_INT (elt);
+    }
+  if (<Is_d_reg>)
+    return "vdup.<V_sz_elem>\t%P0, %P1[%c2]";
+  else
+    return "vdup.<V_sz_elem>\t%q0, %P1[%c2]";
+}
+  [(set_attr "type" "neon_dup<q>")]
+)
+
 (define_expand "neon_vdup_lane<mode>"
   [(match_operand:VDQW 0 "s_register_operand" "=w")
    (match_operand:<V_double_vector_mode> 1 "s_register_operand" "w")
@@ -3064,6 +3086,25 @@ if (BYTES_BIG_ENDIAN)
     DONE;
 })
 
+(define_expand "neon_vdup_lane<mode>"
+  [(match_operand:VH 0 "s_register_operand")
+   (match_operand:<V_double_vector_mode> 1 "s_register_operand")
+   (match_operand:SI 2 "immediate_operand")]
+  "TARGET_NEON && TARGET_FP16"
+{
+  if (BYTES_BIG_ENDIAN)
+    {
+      unsigned int elt = INTVAL (operands[2]);
+      unsigned int reg_nelts
+       = 64 / GET_MODE_UNIT_BITSIZE (<V_double_vector_mode>mode);
+      elt ^= reg_nelts - 1;
+      operands[2] = GEN_INT (elt);
+    }
+  emit_insn (gen_neon_vdup_lane<mode>_internal (operands[0], operands[1],
+                                               operands[2]));
+  DONE;
+})
+
 ; Scalar index is ignored, since only zero is valid here.
 (define_expand "neon_vdup_lanedi"
   [(match_operand:DI 0 "s_register_operand" "=w")
@@ -4281,25 +4322,25 @@ if (BYTES_BIG_ENDIAN)
 
 (define_expand "neon_vtrn<mode>_internal"
   [(parallel
-    [(set (match_operand:VDQW 0 "s_register_operand" "")
-         (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "")
-                       (match_operand:VDQW 2 "s_register_operand" "")]
+    [(set (match_operand:VDQWH 0 "s_register_operand")
+         (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand")
+                        (match_operand:VDQWH 2 "s_register_operand")]
           UNSPEC_VTRN1))
-     (set (match_operand:VDQW 3 "s_register_operand" "")
-          (unspec:VDQW [(match_dup 1) (match_dup 2)] UNSPEC_VTRN2))])]
+     (set (match_operand:VDQWH 3 "s_register_operand")
+         (unspec:VDQWH [(match_dup 1) (match_dup 2)] UNSPEC_VTRN2))])]
   "TARGET_NEON"
   ""
 )
 
 ;; Note: Different operand numbering to handle tied registers correctly.
 (define_insn "*neon_vtrn<mode>_insn"
-  [(set (match_operand:VDQW 0 "s_register_operand" "=&w")
-        (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
-                      (match_operand:VDQW 3 "s_register_operand" "2")]
-                     UNSPEC_VTRN1))
-   (set (match_operand:VDQW 2 "s_register_operand" "=&w")
-         (unspec:VDQW [(match_dup 1) (match_dup 3)]
-                     UNSPEC_VTRN2))]
+  [(set (match_operand:VDQWH 0 "s_register_operand" "=&w")
+       (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand" "0")
+                      (match_operand:VDQWH 3 "s_register_operand" "2")]
+        UNSPEC_VTRN1))
+   (set (match_operand:VDQWH 2 "s_register_operand" "=&w")
+       (unspec:VDQWH [(match_dup 1) (match_dup 3)]
+        UNSPEC_VTRN2))]
   "TARGET_NEON"
   "vtrn.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
   [(set_attr "type" "neon_permute<q>")]
@@ -4307,25 +4348,25 @@ if (BYTES_BIG_ENDIAN)
 
 (define_expand "neon_vzip<mode>_internal"
   [(parallel
-    [(set (match_operand:VDQW 0 "s_register_operand" "")
-         (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "")
-                       (match_operand:VDQW 2 "s_register_operand" "")]
-                      UNSPEC_VZIP1))
-    (set (match_operand:VDQW 3 "s_register_operand" "")
-        (unspec:VDQW [(match_dup 1) (match_dup 2)] UNSPEC_VZIP2))])]
+    [(set (match_operand:VDQWH 0 "s_register_operand")
+         (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand")
+                        (match_operand:VDQWH 2 "s_register_operand")]
+          UNSPEC_VZIP1))
+    (set (match_operand:VDQWH 3 "s_register_operand")
+        (unspec:VDQWH [(match_dup 1) (match_dup 2)] UNSPEC_VZIP2))])]
   "TARGET_NEON"
   ""
 )
 
 ;; Note: Different operand numbering to handle tied registers correctly.
 (define_insn "*neon_vzip<mode>_insn"
-  [(set (match_operand:VDQW 0 "s_register_operand" "=&w")
-        (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
-                      (match_operand:VDQW 3 "s_register_operand" "2")]
-                     UNSPEC_VZIP1))
-   (set (match_operand:VDQW 2 "s_register_operand" "=&w")
-        (unspec:VDQW [(match_dup 1) (match_dup 3)]
-                     UNSPEC_VZIP2))]
+  [(set (match_operand:VDQWH 0 "s_register_operand" "=&w")
+       (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand" "0")
+                      (match_operand:VDQWH 3 "s_register_operand" "2")]
+        UNSPEC_VZIP1))
+   (set (match_operand:VDQWH 2 "s_register_operand" "=&w")
+       (unspec:VDQWH [(match_dup 1) (match_dup 3)]
+        UNSPEC_VZIP2))]
   "TARGET_NEON"
   "vzip.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
   [(set_attr "type" "neon_zip<q>")]
@@ -4333,25 +4374,25 @@ if (BYTES_BIG_ENDIAN)
 
 (define_expand "neon_vuzp<mode>_internal"
   [(parallel
-    [(set (match_operand:VDQW 0 "s_register_operand" "")
-         (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "")
-                       (match_operand:VDQW 2 "s_register_operand" "")]
+    [(set (match_operand:VDQWH 0 "s_register_operand")
+         (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand")
+                       (match_operand:VDQWH 2 "s_register_operand")]
           UNSPEC_VUZP1))
-     (set (match_operand:VDQW 3 "s_register_operand" "")
-         (unspec:VDQW [(match_dup 1) (match_dup 2)] UNSPEC_VUZP2))])]
+     (set (match_operand:VDQWH 3 "s_register_operand" "")
+         (unspec:VDQWH [(match_dup 1) (match_dup 2)] UNSPEC_VUZP2))])]
   "TARGET_NEON"
   ""
 )
 
 ;; Note: Different operand numbering to handle tied registers correctly.
 (define_insn "*neon_vuzp<mode>_insn"
-  [(set (match_operand:VDQW 0 "s_register_operand" "=&w")
-        (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
-                      (match_operand:VDQW 3 "s_register_operand" "2")]
-                     UNSPEC_VUZP1))
-   (set (match_operand:VDQW 2 "s_register_operand" "=&w")
-        (unspec:VDQW [(match_dup 1) (match_dup 3)]
-                     UNSPEC_VUZP2))]
+  [(set (match_operand:VDQWH 0 "s_register_operand" "=&w")
+       (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand" "0")
+                      (match_operand:VDQWH 3 "s_register_operand" "2")]
+        UNSPEC_VUZP1))
+   (set (match_operand:VDQWH 2 "s_register_operand" "=&w")
+       (unspec:VDQWH [(match_dup 1) (match_dup 3)]
+        UNSPEC_VUZP2))]
   "TARGET_NEON"
   "vuzp.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
   [(set_attr "type" "neon_zip<q>")]
index ce98f71164ca329ad3b0a5b5274cf0167017ba29..645b01ead67124dd52eae5564f909f23779e78ee 100644 (file)
     FAIL;
 })
 
+(define_expand "vec_perm_const<mode>"
+  [(match_operand:VH 0 "s_register_operand")
+   (match_operand:VH 1 "s_register_operand")
+   (match_operand:VH 2 "s_register_operand")
+   (match_operand:<V_cmp_result> 3)]
+  "TARGET_NEON"
+{
+  if (arm_expand_vec_perm_const (operands[0], operands[1],
+                                operands[2], operands[3]))
+    DONE;
+  else
+    FAIL;
+})
+
 (define_expand "vec_perm<mode>"
   [(match_operand:VE 0 "s_register_operand" "")
    (match_operand:VE 1 "s_register_operand" "")
index f19e24b883f42a66dbf579a91ff7f4c0b7ebd890..34ef9d3a9119765f2b9132d39fa1b151b0e4e8b9 100644 (file)
@@ -1,3 +1,23 @@
+2016-09-23  Matthew Wahab  <matthew.wahab@arm.com>
+
+       * gcc.target/aarch64/advsimd-intrinsics/arm-neon-ref.h
+       (FP16_SUPPORTED): New
+       (expected-hfloat-16x4): Make conditional on __fp16 support.
+       (expected-hfloat-16x8): Likewise.
+       (vdup_n_f16): Disable for non-AArch64 targets.
+       * gcc.target/aarch64/advsimd-intrinsics/vbsl.c: Add __fp16 tests,
+       conditional on FP16_SUPPORTED.
+       * gcc.target/aarch64/advsimd-intrinsics/vdup-vmov.c: Likewise.
+       * gcc.target/aarch64/advsimd-intrinsics/vdup_lane.c: Likewise.
+       * gcc.target/aarch64/advsimd-intrinsics/vext.c: Likewise.
+       * gcc.target/aarch64/advsimd-intrinsics/vrev.c: Likewise.
+       * gcc.target/aarch64/advsimd-intrinsics/vshuffle.inc: Add support
+       for testing __fp16.
+       * gcc.target/aarch64/advsimd-intrinsics/vtrn.c: Add __fp16 tests,
+       conditional on FP16_SUPPORTED.
+       * gcc.target/aarch64/advsimd-intrinsics/vuzp.c: Likewise.
+       * gcc.target/aarch64/advsimd-intrinsics/vzip.c: Likewise.
+
 2016-09-23  Matthew Wahab  <matthew.wahab@arm.com>
 
        * gcc.target/arm/short-vfp-1.c: New.
index 3363a7208625175d0a9cbb79954902c80aeedabc..1297137350c93c293cececd35b1202306bd4115a 100644 (file)
@@ -16,6 +16,15 @@ extern void *memset(void *, int, size_t);
 extern void *memcpy(void *, const void *, size_t);
 extern size_t strlen(const char *);
 
+/* Helper macro to select FP16 tests.  */
+#if (!defined (__aarch64__)                                            \
+     && (defined (__ARM_FP16_FORMAT_IEEE)                              \
+        || defined (__ARM_FP16_FORMAT_ALTERNATIVE)))
+#define FP16_SUPPORTED (1)
+#else
+#undef FP16_SUPPORTED
+#endif
+
 /* Various string construction helpers.  */
 
 /*
@@ -511,7 +520,9 @@ static void clean_results (void)
 /* Helpers to initialize vectors.  */
 #define VDUP(VAR, Q, T1, T2, W, N, V)                  \
   VECT_VAR(VAR, T1, W, N) = vdup##Q##_n_##T2##W(V)
-#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+#if (defined (__aarch64__)                                             \
+     && (defined (__ARM_FP16_FORMAT_IEEE)                              \
+        || defined (__ARM_FP16_FORMAT_ALTERNATIVE)))
 /* Work around that there is no vdup_n_f16 intrinsic.  */
 #define vdup_n_f16(VAL)                \
   __extension__                        \
index c4fdbb45102a5407d71cdc75043e74dd0ebb076e..e9b3dfd1c0233a900ce9889793100a651a7e51cc 100644 (file)
@@ -16,6 +16,10 @@ VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffff1 };
 VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf3, 0xf3, 0xf3, 0xf3,
                                        0xf7, 0xf7, 0xf7, 0xf7 };
 VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff0, 0xfff0, 0xfff2, 0xfff2 };
+#if defined (FP16_SUPPORTED)
+VECT_VAR_DECL (expected, hfloat, 16, 4) [] = { 0xcc09, 0xcb89,
+                                              0xcb09, 0xca89 };
+#endif
 VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1800004, 0xc1700004 };
 VECT_VAR_DECL(expected,int,8,16) [] = { 0xf2, 0xf2, 0xf2, 0xf2,
                                        0xf6, 0xf6, 0xf6, 0xf6,
@@ -43,6 +47,12 @@ VECT_VAR_DECL(expected,poly,8,16) [] = { 0xf3, 0xf3, 0xf3, 0xf3,
                                         0xf7, 0xf7, 0xf7, 0xf7 };
 VECT_VAR_DECL(expected,poly,16,8) [] = { 0xfff0, 0xfff0, 0xfff2, 0xfff2,
                                         0xfff4, 0xfff4, 0xfff6, 0xfff6 };
+#if defined (FP16_SUPPORTED)
+VECT_VAR_DECL (expected, hfloat, 16, 8) [] = { 0xcc09, 0xcb89,
+                                              0xcb09, 0xca89,
+                                              0xca09, 0xc989,
+                                              0xc909, 0xc889 };
+#endif
 VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc1800001, 0xc1700001,
                                           0xc1600001, 0xc1500001 };
 
@@ -66,6 +76,10 @@ void exec_vbsl (void)
   clean_results ();
 
   TEST_MACRO_ALL_VARIANTS_2_5(VLOAD, vector, buffer);
+#if defined (FP16_SUPPORTED)
+  VLOAD(vector, buffer, , float, f, 16, 4);
+  VLOAD(vector, buffer, q, float, f, 16, 8);
+#endif
   VLOAD(vector, buffer, , float, f, 32, 2);
   VLOAD(vector, buffer, q, float, f, 32, 4);
 
@@ -80,6 +94,9 @@ void exec_vbsl (void)
   VDUP(vector2, , uint, u, 16, 4, 0xFFF2);
   VDUP(vector2, , uint, u, 32, 2, 0xFFFFFFF0);
   VDUP(vector2, , uint, u, 64, 1, 0xFFFFFFF3);
+#if defined (FP16_SUPPORTED)
+  VDUP(vector2, , float, f, 16, 4, -2.4f);   /* -2.4f is 0xC0CD.  */
+#endif
   VDUP(vector2, , float, f, 32, 2, -30.3f);
   VDUP(vector2, , poly, p, 8, 8, 0xF3);
   VDUP(vector2, , poly, p, 16, 4, 0xFFF2);
@@ -94,6 +111,9 @@ void exec_vbsl (void)
   VDUP(vector2, q, uint, u, 64, 2, 0xFFFFFFF3);
   VDUP(vector2, q, poly, p, 8, 16, 0xF3);
   VDUP(vector2, q, poly, p, 16, 8, 0xFFF2);
+#if defined (FP16_SUPPORTED)
+  VDUP(vector2, q, float, f, 16, 8, -2.4f);
+#endif
   VDUP(vector2, q, float, f, 32, 4, -30.4f);
 
   VDUP(vector_first, , uint, u, 8, 8, 0xF4);
@@ -111,10 +131,18 @@ void exec_vbsl (void)
   TEST_VBSL(uint, , poly, p, 16, 4);
   TEST_VBSL(uint, q, poly, p, 8, 16);
   TEST_VBSL(uint, q, poly, p, 16, 8);
+#if defined (FP16_SUPPORTED)
+  TEST_VBSL(uint, , float, f, 16, 4);
+  TEST_VBSL(uint, q, float, f, 16, 8);
+#endif
   TEST_VBSL(uint, , float, f, 32, 2);
   TEST_VBSL(uint, q, float, f, 32, 4);
 
+#if defined (FP16_SUPPORTED)
+  CHECK_RESULTS (TEST_MSG, "");
+#else
   CHECK_RESULTS_NO_FP16 (TEST_MSG, "");
+#endif
 }
 
 int main (void)
index 22d45d56c8e3c517da8cc595f767f8034aeabde8..aef4173326c907a5f487f2520d0e354afbc16fd4 100644 (file)
@@ -19,6 +19,10 @@ VECT_VAR_DECL(expected0,uint,64,1) [] = { 0xfffffffffffffff0 };
 VECT_VAR_DECL(expected0,poly,8,8) [] = { 0xf0, 0xf0, 0xf0, 0xf0,
                                         0xf0, 0xf0, 0xf0, 0xf0 };
 VECT_VAR_DECL(expected0,poly,16,4) [] = { 0xfff0, 0xfff0, 0xfff0, 0xfff0 };
+#if defined (FP16_SUPPORTED)
+VECT_VAR_DECL (expected0, hfloat, 16, 4) [] = { 0xcc00, 0xcc00,
+                                               0xcc00, 0xcc00 };
+#endif
 VECT_VAR_DECL(expected0,hfloat,32,2) [] = { 0xc1800000, 0xc1800000 };
 VECT_VAR_DECL(expected0,int,8,16) [] = { 0xf0, 0xf0, 0xf0, 0xf0,
                                         0xf0, 0xf0, 0xf0, 0xf0,
@@ -46,6 +50,12 @@ VECT_VAR_DECL(expected0,poly,8,16) [] = { 0xf0, 0xf0, 0xf0, 0xf0,
                                          0xf0, 0xf0, 0xf0, 0xf0 };
 VECT_VAR_DECL(expected0,poly,16,8) [] = { 0xfff0, 0xfff0, 0xfff0, 0xfff0,
                                          0xfff0, 0xfff0, 0xfff0, 0xfff0 };
+#if defined (FP16_SUPPORTED)
+VECT_VAR_DECL (expected0, hfloat, 16, 8) [] = { 0xcc00, 0xcc00,
+                                               0xcc00, 0xcc00,
+                                               0xcc00, 0xcc00,
+                                               0xcc00, 0xcc00 };
+#endif
 VECT_VAR_DECL(expected0,hfloat,32,4) [] = { 0xc1800000, 0xc1800000,
                                            0xc1800000, 0xc1800000 };
 
@@ -63,6 +73,10 @@ VECT_VAR_DECL(expected1,uint,64,1) [] = { 0xfffffffffffffff1 };
 VECT_VAR_DECL(expected1,poly,8,8) [] = { 0xf1, 0xf1, 0xf1, 0xf1,
                                         0xf1, 0xf1, 0xf1, 0xf1 };
 VECT_VAR_DECL(expected1,poly,16,4) [] = { 0xfff1, 0xfff1, 0xfff1, 0xfff1 };
+#if defined (FP16_SUPPORTED)
+VECT_VAR_DECL (expected1, hfloat, 16, 4) [] = { 0xcb80, 0xcb80,
+                                               0xcb80, 0xcb80 };
+#endif
 VECT_VAR_DECL(expected1,hfloat,32,2) [] = { 0xc1700000, 0xc1700000 };
 VECT_VAR_DECL(expected1,int,8,16) [] = { 0xf1, 0xf1, 0xf1, 0xf1,
                                         0xf1, 0xf1, 0xf1, 0xf1,
@@ -90,6 +104,12 @@ VECT_VAR_DECL(expected1,poly,8,16) [] = { 0xf1, 0xf1, 0xf1, 0xf1,
                                          0xf1, 0xf1, 0xf1, 0xf1 };
 VECT_VAR_DECL(expected1,poly,16,8) [] = { 0xfff1, 0xfff1, 0xfff1, 0xfff1,
                                          0xfff1, 0xfff1, 0xfff1, 0xfff1 };
+#if defined (FP16_SUPPORTED)
+VECT_VAR_DECL (expected1, hfloat, 16, 8) [] = { 0xcb80, 0xcb80,
+                                               0xcb80, 0xcb80,
+                                               0xcb80, 0xcb80,
+                                               0xcb80, 0xcb80 };
+#endif
 VECT_VAR_DECL(expected1,hfloat,32,4) [] = { 0xc1700000, 0xc1700000,
                                            0xc1700000, 0xc1700000 };
 
@@ -107,6 +127,10 @@ VECT_VAR_DECL(expected2,uint,64,1) [] = { 0xfffffffffffffff2 };
 VECT_VAR_DECL(expected2,poly,8,8) [] = { 0xf2, 0xf2, 0xf2, 0xf2,
                                         0xf2, 0xf2, 0xf2, 0xf2 };
 VECT_VAR_DECL(expected2,poly,16,4) [] = { 0xfff2, 0xfff2, 0xfff2, 0xfff2 };
+#if defined (FP16_SUPPORTED)
+VECT_VAR_DECL (expected2, hfloat, 16, 4) [] = { 0xcb00, 0xcb00,
+                                               0xcb00, 0xcb00 };
+#endif
 VECT_VAR_DECL(expected2,hfloat,32,2) [] = { 0xc1600000, 0xc1600000 };
 VECT_VAR_DECL(expected2,int,8,16) [] = { 0xf2, 0xf2, 0xf2, 0xf2,
                                         0xf2, 0xf2, 0xf2, 0xf2,
@@ -134,6 +158,12 @@ VECT_VAR_DECL(expected2,poly,8,16) [] = { 0xf2, 0xf2, 0xf2, 0xf2,
                                          0xf2, 0xf2, 0xf2, 0xf2 };
 VECT_VAR_DECL(expected2,poly,16,8) [] = { 0xfff2, 0xfff2, 0xfff2, 0xfff2,
                                          0xfff2, 0xfff2, 0xfff2, 0xfff2 };
+#if defined (FP16_SUPPORTED)
+VECT_VAR_DECL (expected2, hfloat, 16, 8) [] = { 0xcb00, 0xcb00,
+                                               0xcb00, 0xcb00,
+                                               0xcb00, 0xcb00,
+                                               0xcb00, 0xcb00 };
+#endif
 VECT_VAR_DECL(expected2,hfloat,32,4) [] = { 0xc1600000, 0xc1600000,
                                            0xc1600000, 0xc1600000 };
 
@@ -171,6 +201,9 @@ void exec_vdup_vmov (void)
     TEST_VDUP(, uint, u, 64, 1);
     TEST_VDUP(, poly, p, 8, 8);
     TEST_VDUP(, poly, p, 16, 4);
+#if defined (FP16_SUPPORTED)
+    TEST_VDUP(, float, f, 16, 4);
+#endif
     TEST_VDUP(, float, f, 32, 2);
 
     TEST_VDUP(q, int, s, 8, 16);
@@ -183,8 +216,26 @@ void exec_vdup_vmov (void)
     TEST_VDUP(q, uint, u, 64, 2);
     TEST_VDUP(q, poly, p, 8, 16);
     TEST_VDUP(q, poly, p, 16, 8);
+#if defined (FP16_SUPPORTED)
+    TEST_VDUP(q, float, f, 16, 8);
+#endif
     TEST_VDUP(q, float, f, 32, 4);
 
+#if defined (FP16_SUPPORTED)
+    switch (i) {
+    case 0:
+      CHECK_RESULTS_NAMED (TEST_MSG, expected0, "");
+      break;
+    case 1:
+      CHECK_RESULTS_NAMED (TEST_MSG, expected1, "");
+      break;
+    case 2:
+      CHECK_RESULTS_NAMED (TEST_MSG, expected2, "");
+      break;
+    default:
+      abort();
+    }
+#else
     switch (i) {
     case 0:
       CHECK_RESULTS_NAMED_NO_FP16 (TEST_MSG, expected0, "");
@@ -198,6 +249,7 @@ void exec_vdup_vmov (void)
     default:
       abort();
     }
+#endif
   }
 
   /* Do the same tests with vmov. Use the same expected results.  */
@@ -216,6 +268,9 @@ void exec_vdup_vmov (void)
     TEST_VMOV(, uint, u, 64, 1);
     TEST_VMOV(, poly, p, 8, 8);
     TEST_VMOV(, poly, p, 16, 4);
+#if defined (FP16_SUPPORTED)
+    TEST_VMOV(, float, f, 16, 4);
+#endif
     TEST_VMOV(, float, f, 32, 2);
 
     TEST_VMOV(q, int, s, 8, 16);
@@ -228,8 +283,26 @@ void exec_vdup_vmov (void)
     TEST_VMOV(q, uint, u, 64, 2);
     TEST_VMOV(q, poly, p, 8, 16);
     TEST_VMOV(q, poly, p, 16, 8);
+#if defined (FP16_SUPPORTED)
+    TEST_VMOV(q, float, f, 16, 8);
+#endif
     TEST_VMOV(q, float, f, 32, 4);
 
+#if defined (FP16_SUPPORTED)
+    switch (i) {
+    case 0:
+      CHECK_RESULTS_NAMED (TEST_MSG, expected0, "");
+      break;
+    case 1:
+      CHECK_RESULTS_NAMED (TEST_MSG, expected1, "");
+      break;
+    case 2:
+      CHECK_RESULTS_NAMED (TEST_MSG, expected2, "");
+      break;
+    default:
+      abort();
+    }
+#else
     switch (i) {
     case 0:
       CHECK_RESULTS_NAMED_NO_FP16 (TEST_MSG, expected0, "");
@@ -243,6 +316,8 @@ void exec_vdup_vmov (void)
     default:
       abort();
     }
+#endif
+
   }
 }
 
index ef708dcba17e68ccc0e3540c52bb6507d0562fad..c4b8f14de78cdc1696101e9bc8b9091df34a6afd 100644 (file)
@@ -17,6 +17,10 @@ VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf7, 0xf7, 0xf7, 0xf7,
                                        0xf7, 0xf7, 0xf7, 0xf7 };
 VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff3, 0xfff3, 0xfff3, 0xfff3 };
 VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1700000, 0xc1700000 };
+#if defined (FP16_SUPPORTED)
+VECT_VAR_DECL (expected, hfloat, 16, 4) [] = { 0xca80, 0xca80,
+                                              0xca80, 0xca80 };
+#endif
 VECT_VAR_DECL(expected,int,8,16) [] = { 0xf2, 0xf2, 0xf2, 0xf2,
                                        0xf2, 0xf2, 0xf2, 0xf2,
                                        0xf2, 0xf2, 0xf2, 0xf2,
@@ -43,6 +47,12 @@ VECT_VAR_DECL(expected,poly,8,16) [] = { 0xf5, 0xf5, 0xf5, 0xf5,
                                         0xf5, 0xf5, 0xf5, 0xf5 };
 VECT_VAR_DECL(expected,poly,16,8) [] = { 0xfff1, 0xfff1, 0xfff1, 0xfff1,
                                         0xfff1, 0xfff1, 0xfff1, 0xfff1 };
+#if defined (FP16_SUPPORTED)
+VECT_VAR_DECL (expected, hfloat, 16, 8) [] = { 0xca80, 0xca80,
+                                              0xca80, 0xca80,
+                                              0xca80, 0xca80,
+                                              0xca80, 0xca80 };
+#endif
 VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc1700000, 0xc1700000,
                                           0xc1700000, 0xc1700000 };
 
@@ -63,6 +73,9 @@ void exec_vdup_lane (void)
   clean_results ();
 
   TEST_MACRO_64BITS_VARIANTS_2_5(VLOAD, vector, buffer);
+#if defined (FP16_SUPPORTED)
+  VLOAD(vector, buffer, , float, f, 16, 4);
+#endif
   VLOAD(vector, buffer, , float, f, 32, 2);
 
   /* Choose lane arbitrarily.  */
@@ -76,6 +89,9 @@ void exec_vdup_lane (void)
   TEST_VDUP_LANE(, uint, u, 64, 1, 1, 0);
   TEST_VDUP_LANE(, poly, p, 8, 8, 8, 7);
   TEST_VDUP_LANE(, poly, p, 16, 4, 4, 3);
+#if defined (FP16_SUPPORTED)
+  TEST_VDUP_LANE(, float, f, 16, 4, 4, 3);
+#endif
   TEST_VDUP_LANE(, float, f, 32, 2, 2, 1);
 
   TEST_VDUP_LANE(q, int, s, 8, 16, 8, 2);
@@ -88,9 +104,16 @@ void exec_vdup_lane (void)
   TEST_VDUP_LANE(q, uint, u, 64, 2, 1, 0);
   TEST_VDUP_LANE(q, poly, p, 8, 16, 8, 5);
   TEST_VDUP_LANE(q, poly, p, 16, 8, 4, 1);
+#if defined (FP16_SUPPORTED)
+  TEST_VDUP_LANE(q, float, f, 16, 8, 4, 3);
+#endif
   TEST_VDUP_LANE(q, float, f, 32, 4, 2, 1);
 
+#if defined (FP16_SUPPORTED)
+  CHECK_RESULTS (TEST_MSG, "");
+#else
   CHECK_RESULTS_NO_FP16 (TEST_MSG, "");
+#endif
 }
 
 int main (void)
index 98f88a69898130dc922e205717d1dcd82c77aa11..908294a991db7daeafe7ba9f29826575c1c7eb02 100644 (file)
@@ -16,6 +16,10 @@ VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffff0 };
 VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf6, 0xf7, 0x55, 0x55,
                                        0x55, 0x55, 0x55, 0x55 };
 VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff2, 0xfff3, 0x66, 0x66 };
+#if defined (FP16_SUPPORTED)
+VECT_VAR_DECL (expected, hfloat, 16, 4) [] = { 0xcb00, 0xca80,
+                                              0x4b4d, 0x4b4d };
+#endif
 VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1700000, 0x42066666 };
 VECT_VAR_DECL(expected,int,8,16) [] = { 0xfe, 0xff, 0x11, 0x11,
                                        0x11, 0x11, 0x11, 0x11,
@@ -39,6 +43,12 @@ VECT_VAR_DECL(expected,poly,8,16) [] = { 0xfc, 0xfd, 0xfe, 0xff,
                                         0x55, 0x55, 0x55, 0x55 };
 VECT_VAR_DECL(expected,poly,16,8) [] = { 0xfff6, 0xfff7, 0x66, 0x66,
                                         0x66, 0x66, 0x66, 0x66 };
+#if defined (FP16_SUPPORTED)
+VECT_VAR_DECL (expected, hfloat, 16, 8) [] = { 0xc880, 0x4b4d,
+                                              0x4b4d, 0x4b4d,
+                                              0x4b4d, 0x4b4d,
+                                              0x4b4d, 0x4b4d };
+#endif
 VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc1500000, 0x4204cccd,
                                           0x4204cccd, 0x4204cccd };
 
@@ -60,6 +70,10 @@ void exec_vext (void)
   clean_results ();
 
   TEST_MACRO_ALL_VARIANTS_2_5(VLOAD, vector1, buffer);
+#ifdef FP16_SUPPORTED
+  VLOAD(vector1, buffer, , float, f, 16, 4);
+  VLOAD(vector1, buffer, q, float, f, 16, 8);
+#endif
   VLOAD(vector1, buffer, , float, f, 32, 2);
   VLOAD(vector1, buffer, q, float, f, 32, 4);
 
@@ -74,6 +88,9 @@ void exec_vext (void)
   VDUP(vector2, , uint, u, 64, 1, 0x88);
   VDUP(vector2, , poly, p, 8, 8, 0x55);
   VDUP(vector2, , poly, p, 16, 4, 0x66);
+#if defined (FP16_SUPPORTED)
+  VDUP (vector2, , float, f, 16, 4, 14.6f);   /* 14.6f is 0x4b4d.  */
+#endif
   VDUP(vector2, , float, f, 32, 2, 33.6f);
 
   VDUP(vector2, q, int, s, 8, 16, 0x11);
@@ -86,6 +103,9 @@ void exec_vext (void)
   VDUP(vector2, q, uint, u, 64, 2, 0x88);
   VDUP(vector2, q, poly, p, 8, 16, 0x55);
   VDUP(vector2, q, poly, p, 16, 8, 0x66);
+#if defined (FP16_SUPPORTED)
+  VDUP (vector2, q, float, f, 16, 8, 14.6f);
+#endif
   VDUP(vector2, q, float, f, 32, 4, 33.2f);
 
   /* Choose arbitrary extract offsets.  */
@@ -99,6 +119,9 @@ void exec_vext (void)
   TEST_VEXT(, uint, u, 64, 1, 0);
   TEST_VEXT(, poly, p, 8, 8, 6);
   TEST_VEXT(, poly, p, 16, 4, 2);
+#if defined (FP16_SUPPORTED)
+  TEST_VEXT(, float, f, 16, 4, 2);
+#endif
   TEST_VEXT(, float, f, 32, 2, 1);
 
   TEST_VEXT(q, int, s, 8, 16, 14);
@@ -111,9 +134,16 @@ void exec_vext (void)
   TEST_VEXT(q, uint, u, 64, 2, 1);
   TEST_VEXT(q, poly, p, 8, 16, 12);
   TEST_VEXT(q, poly, p, 16, 8, 6);
+#if defined (FP16_SUPPORTED)
+  TEST_VEXT(q, float, f, 16, 8, 7);
+#endif
   TEST_VEXT(q, float, f, 32, 4, 3);
 
+#if defined (FP16_SUPPORTED)
+  CHECK_RESULTS (TEST_MSG, "");
+#else
   CHECK_RESULTS_NO_FP16 (TEST_MSG, "");
+#endif
 }
 
 int main (void)
index 3b574da403a26ed90eea2e9112c46335415175cd..0c01318a3032f34a1d37ad73ed2e8821fc30ddf6 100644 (file)
@@ -63,6 +63,10 @@ VECT_VAR_DECL(expected_vrev64,uint,32,2) [] = { 0xfffffff1, 0xfffffff0 };
 VECT_VAR_DECL(expected_vrev64,poly,8,8) [] = { 0xf7, 0xf6, 0xf5, 0xf4,
                                               0xf3, 0xf2, 0xf1, 0xf0 };
 VECT_VAR_DECL(expected_vrev64,poly,16,4) [] = { 0xfff3, 0xfff2, 0xfff1, 0xfff0 };
+#if defined (FP16_SUPPORTED)
+VECT_VAR_DECL (expected_vrev64, hfloat, 16, 4) [] = { 0xca80, 0xcb00,
+                                                     0xcb80, 0xcc00 };
+#endif
 VECT_VAR_DECL(expected_vrev64,hfloat,32,2) [] = { 0xc1700000, 0xc1800000 };
 VECT_VAR_DECL(expected_vrev64,int,8,16) [] = { 0xf7, 0xf6, 0xf5, 0xf4,
                                               0xf3, 0xf2, 0xf1, 0xf0,
@@ -86,6 +90,12 @@ VECT_VAR_DECL(expected_vrev64,poly,8,16) [] = { 0xf7, 0xf6, 0xf5, 0xf4,
                                                0xfb, 0xfa, 0xf9, 0xf8 };
 VECT_VAR_DECL(expected_vrev64,poly,16,8) [] = { 0xfff3, 0xfff2, 0xfff1, 0xfff0,
                                                0xfff7, 0xfff6, 0xfff5, 0xfff4 };
+#if defined (FP16_SUPPORTED)
+VECT_VAR_DECL (expected_vrev64, hfloat, 16, 8) [] = { 0xca80, 0xcb00,
+                                                     0xcb80, 0xcc00,
+                                                     0xc880, 0xc900,
+                                                     0xc980, 0xca00 };
+#endif
 VECT_VAR_DECL(expected_vrev64,hfloat,32,4) [] = { 0xc1700000, 0xc1800000,
                                                  0xc1500000, 0xc1600000 };
 
@@ -104,6 +114,10 @@ void exec_vrev (void)
 
   /* Initialize input "vector" from "buffer".  */
   TEST_MACRO_ALL_VARIANTS_2_5(VLOAD, vector, buffer);
+#if defined (FP16_SUPPORTED)
+  VLOAD (vector, buffer, , float, f, 16, 4);
+  VLOAD (vector, buffer, q, float, f, 16, 8);
+#endif
   VLOAD(vector, buffer, , float, f, 32, 2);
   VLOAD(vector, buffer, q, float, f, 32, 4);
 
@@ -187,6 +201,12 @@ void exec_vrev (void)
   CHECK(TEST_MSG, poly, 8, 16, PRIx8, expected_vrev64, "");
   CHECK(TEST_MSG, poly, 16, 8, PRIx16, expected_vrev64, "");
 
+#if defined (FP16_SUPPORTED)
+  TEST_VREV (, float, f, 16, 4, 64);
+  TEST_VREV (q, float, f, 16, 8, 64);
+  CHECK_FP(TEST_MSG, float, 16, 4, PRIx32, expected_vrev64, "");
+  CHECK_FP(TEST_MSG, float, 16, 8, PRIx32, expected_vrev64, "");
+#endif
   TEST_VREV(, float, f, 32, 2, 64);
   TEST_VREV(q, float, f, 32, 4, 64);
   CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_vrev64, "");
index b55a205d7ac4dfd09961e05b6eba6ab383cb0fd7..ad5bf315b6c9468ea5b038bb9413a51efda80872 100644 (file)
@@ -53,9 +53,17 @@ void FNNAME (INSN_NAME) (void)
   DECL_VSHUFFLE(float, 32, 4)
 
   DECL_ALL_VSHUFFLE();
+#if defined (FP16_SUPPORTED)
+  DECL_VSHUFFLE (float, 16, 4);
+  DECL_VSHUFFLE (float, 16, 8);
+#endif
 
   /* Initialize input "vector" from "buffer".  */
   TEST_MACRO_ALL_VARIANTS_2_5(VLOAD, vector1, buffer);
+#if defined (FP16_SUPPORTED)
+  VLOAD (vector1, buffer, , float, f, 16, 4);
+  VLOAD (vector1, buffer, q, float, f, 16, 8);
+#endif
   VLOAD(vector1, buffer, , float, f, 32, 2);
   VLOAD(vector1, buffer, q, float, f, 32, 4);
 
@@ -68,6 +76,9 @@ void FNNAME (INSN_NAME) (void)
   VDUP(vector2, , uint, u, 32, 2, 0x77);
   VDUP(vector2, , poly, p, 8, 8, 0x55);
   VDUP(vector2, , poly, p, 16, 4, 0x66);
+#if defined (FP16_SUPPORTED)
+  VDUP (vector2, , float, f, 16, 4, 14.6f);   /* 14.6f is 0x4b4d.  */
+#endif
   VDUP(vector2, , float, f, 32, 2, 33.6f);
 
   VDUP(vector2, q, int, s, 8, 16, 0x11);
@@ -78,8 +89,11 @@ void FNNAME (INSN_NAME) (void)
   VDUP(vector2, q, uint, u, 32, 4, 0x77);
   VDUP(vector2, q, poly, p, 8, 16, 0x55);
   VDUP(vector2, q, poly, p, 16, 8, 0x66);
+#if defined (FP16_SUPPORTED)
+  VDUP (vector2, q, float, f, 16, 8, 14.6f);
+#endif
   VDUP(vector2, q, float, f, 32, 4, 33.8f);
-  
+
 #define TEST_ALL_VSHUFFLE(INSN)                                \
   TEST_VSHUFFLE(INSN, , int, s, 8, 8);                 \
   TEST_VSHUFFLE(INSN, , int, s, 16, 4);                        \
@@ -100,6 +114,10 @@ void FNNAME (INSN_NAME) (void)
   TEST_VSHUFFLE(INSN, q, poly, p, 16, 8);              \
   TEST_VSHUFFLE(INSN, q, float, f, 32, 4)
 
+#define TEST_VSHUFFLE_FP16(INSN)               \
+  TEST_VSHUFFLE(INSN, , float, f, 16, 4);      \
+  TEST_VSHUFFLE(INSN, q, float, f, 16, 8);
+
 #define TEST_ALL_EXTRA_CHUNKS()                        \
   TEST_EXTRA_CHUNK(int, 8, 8, 1);              \
   TEST_EXTRA_CHUNK(int, 16, 4, 1);             \
@@ -143,17 +161,37 @@ void FNNAME (INSN_NAME) (void)
     CHECK(test_name, poly, 8, 16, PRIx8, EXPECTED, comment);           \
     CHECK(test_name, poly, 16, 8, PRIx16, EXPECTED, comment);          \
     CHECK_FP(test_name, float, 32, 4, PRIx32, EXPECTED, comment);      \
-  }                                                                    \
+  }
+
+#define CHECK_RESULTS_VSHUFFLE_FP16(test_name,EXPECTED,comment)                \
+  {                                                                    \
+    CHECK_FP (test_name, float, 16, 4, PRIx16, EXPECTED, comment);     \
+    CHECK_FP (test_name, float, 16, 8, PRIx16, EXPECTED, comment);     \
+  }
 
   clean_results ();
 
   /* Execute the tests.  */
   TEST_ALL_VSHUFFLE(INSN_NAME);
+#if defined (FP16_SUPPORTED)
+  TEST_VSHUFFLE_FP16 (INSN_NAME);
+#endif
 
   CHECK_RESULTS_VSHUFFLE (TEST_MSG, expected0, "(chunk 0)");
+#if defined (FP16_SUPPORTED)
+  CHECK_RESULTS_VSHUFFLE_FP16 (TEST_MSG, expected0, "(chunk 0)");
+#endif
 
   TEST_ALL_EXTRA_CHUNKS();
+#if defined (FP16_SUPPORTED)
+  TEST_EXTRA_CHUNK (float, 16, 4, 1);
+  TEST_EXTRA_CHUNK (float, 16, 8, 1);
+#endif
+
   CHECK_RESULTS_VSHUFFLE (TEST_MSG, expected1, "(chunk 1)");
+#if defined (FP16_SUPPORTED)
+  CHECK_RESULTS_VSHUFFLE_FP16 (TEST_MSG, expected1, "(chunk 1)");
+#endif
 }
 
 int main (void)
index 2c4a09c1c4e00282db97e216438ff2248d6f7477..ea2d8d8f8bb5363efded15c4d176af314c07cbda 100644 (file)
@@ -15,6 +15,10 @@ VECT_VAR_DECL(expected0,uint,32,2) [] = { 0xfffffff0, 0xfffffff1 };
 VECT_VAR_DECL(expected0,poly,8,8) [] = { 0xf0, 0xf1, 0x55, 0x55,
                                         0xf2, 0xf3, 0x55, 0x55 };
 VECT_VAR_DECL(expected0,poly,16,4) [] = { 0xfff0, 0xfff1, 0x66, 0x66 };
+#if defined (FP16_SUPPORTED)
+VECT_VAR_DECL (expected0, hfloat, 16, 4) [] = { 0xcc00, 0xcb80,
+                                               0x4b4d, 0x4b4d };
+#endif
 VECT_VAR_DECL(expected0,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 };
 VECT_VAR_DECL(expected0,int,8,16) [] = { 0xf0, 0xf1, 0x11, 0x11,
                                         0xf2, 0xf3, 0x11, 0x11,
@@ -36,6 +40,12 @@ VECT_VAR_DECL(expected0,poly,8,16) [] = { 0xf0, 0xf1, 0x55, 0x55,
                                          0xf6, 0xf7, 0x55, 0x55 };
 VECT_VAR_DECL(expected0,poly,16,8) [] = { 0xfff0, 0xfff1, 0x66, 0x66,
                                          0xfff2, 0xfff3, 0x66, 0x66 };
+#if defined (FP16_SUPPORTED)
+VECT_VAR_DECL (expected0, hfloat, 16, 8) [] = { 0xcc00, 0xcb80,
+                                               0x4b4d, 0x4b4d,
+                                               0xcb00, 0xca80,
+                                               0x4b4d, 0x4b4d };
+#endif
 VECT_VAR_DECL(expected0,hfloat,32,4) [] = { 0xc1800000, 0xc1700000,
                                            0x42073333, 0x42073333 };
 
@@ -51,6 +61,10 @@ VECT_VAR_DECL(expected1,uint,32,2) [] = { 0x77, 0x77 };
 VECT_VAR_DECL(expected1,poly,8,8) [] = { 0xf4, 0xf5, 0x55, 0x55,
                                         0xf6, 0xf7, 0x55, 0x55 };
 VECT_VAR_DECL(expected1,poly,16,4) [] = { 0xfff2, 0xfff3, 0x66, 0x66 };
+#if defined (FP16_SUPPORTED)
+VECT_VAR_DECL (expected1, hfloat, 16, 4) [] = { 0xcb00, 0xca80,
+                                               0x4b4d, 0x4b4d };
+#endif
 VECT_VAR_DECL(expected1,hfloat,32,2) [] = { 0x42066666, 0x42066666 };
 VECT_VAR_DECL(expected1,int,8,16) [] = { 0xf8, 0xf9, 0x11, 0x11,
                                         0xfa, 0xfb, 0x11, 0x11,
@@ -72,6 +86,12 @@ VECT_VAR_DECL(expected1,poly,8,16) [] = { 0xf8, 0xf9, 0x55, 0x55,
                                          0xfe, 0xff, 0x55, 0x55 };
 VECT_VAR_DECL(expected1,poly,16,8) [] = { 0xfff4, 0xfff5, 0x66, 0x66,
                                          0xfff6, 0xfff7, 0x66, 0x66 };
+#if defined (FP16_SUPPORTED)
+VECT_VAR_DECL (expected1, hfloat, 16, 8) [] = { 0xca00, 0xc980,
+                                               0x4b4d, 0x4b4d,
+                                               0xc900, 0xc880,
+                                               0x4b4d, 0x4b4d };
+#endif
 VECT_VAR_DECL(expected1,hfloat,32,4) [] = { 0xc1600000, 0xc1500000,
                                            0x42073333, 0x42073333 };
 
index ab6e57693ac7114a2f7e15eb792e5cf2b6f11c3f..43b49cacd6ba1944f68401705491d2f84cbd1341 100644 (file)
@@ -19,6 +19,10 @@ VECT_VAR_DECL(expected0,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
                                         0xf4, 0xf5, 0xf6, 0xf7 };
 VECT_VAR_DECL(expected0,poly,16,4) [] = { 0xfff0, 0xfff1,
                                          0xfff2, 0xfff3 };
+#if defined (FP16_SUPPORTED)
+VECT_VAR_DECL (expected0, hfloat, 16, 4) [] = { 0xcc00, 0xcb80,
+                                               0xcb00, 0xca80 };
+#endif
 VECT_VAR_DECL(expected0,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 };
 VECT_VAR_DECL(expected0,int,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
                                         0xf4, 0xf5, 0xf6, 0xf7,
@@ -48,6 +52,12 @@ VECT_VAR_DECL(expected0,poly,16,8) [] = { 0xfff0, 0xfff1,
                                          0xfff2, 0xfff3,
                                          0xfff4, 0xfff5,
                                          0xfff6, 0xfff7 };
+#if defined (FP16_SUPPORTED)
+VECT_VAR_DECL (expected0, hfloat, 16, 8) [] = { 0xcc00, 0xcb80,
+                                               0xcb00, 0xca80,
+                                               0xca00, 0xc980,
+                                               0xc900, 0xc880 };
+#endif
 VECT_VAR_DECL(expected0,hfloat,32,4) [] = { 0xc1800000, 0xc1700000,
                                            0xc1600000, 0xc1500000 };
 
@@ -63,6 +73,10 @@ VECT_VAR_DECL(expected1,uint,32,2) [] = { 0x77, 0x77 };
 VECT_VAR_DECL(expected1,poly,8,8) [] = { 0x55, 0x55, 0x55, 0x55,
                                         0x55, 0x55, 0x55, 0x55 };
 VECT_VAR_DECL(expected1,poly,16,4) [] = { 0x66, 0x66, 0x66, 0x66 };
+#if defined (FP16_SUPPORTED)
+VECT_VAR_DECL (expected1, hfloat, 16, 4) [] = { 0x4b4d, 0x4b4d,
+                                               0x4b4d, 0x4b4d };
+#endif
 VECT_VAR_DECL(expected1,hfloat,32,2) [] = { 0x42066666, 0x42066666 };
 VECT_VAR_DECL(expected1,int,8,16) [] = { 0x11, 0x11, 0x11, 0x11,
                                         0x11, 0x11, 0x11, 0x11,
@@ -84,6 +98,12 @@ VECT_VAR_DECL(expected1,poly,8,16) [] = { 0x55, 0x55, 0x55, 0x55,
                                          0x55, 0x55, 0x55, 0x55 };
 VECT_VAR_DECL(expected1,poly,16,8) [] = { 0x66, 0x66, 0x66, 0x66,
                                          0x66, 0x66, 0x66, 0x66 };
+#if defined (FP16_SUPPORTED)
+VECT_VAR_DECL (expected1, hfloat, 16, 8) [] = { 0x4b4d, 0x4b4d,
+                                               0x4b4d, 0x4b4d,
+                                               0x4b4d, 0x4b4d,
+                                               0x4b4d, 0x4b4d };
+#endif
 VECT_VAR_DECL(expected1,hfloat,32,4) [] = { 0x42073333, 0x42073333,
                                            0x42073333, 0x42073333 };
 
index b5fe51644f58759127ccb364024aead2a4cebb15..20f4f5dbd3d66a1da1e6aaabec73b32ed26be1c0 100644 (file)
@@ -18,6 +18,10 @@ VECT_VAR_DECL(expected0,poly,8,8) [] = { 0xf0, 0xf4, 0x55, 0x55,
                                         0xf1, 0xf5, 0x55, 0x55 };
 VECT_VAR_DECL(expected0,poly,16,4) [] = { 0xfff0, 0xfff2,
                                          0x66, 0x66 };
+#if defined (FP16_SUPPORTED)
+VECT_VAR_DECL (expected0, hfloat, 16, 4) [] = { 0xcc00, 0xcb00,
+                                               0x4b4d, 0x4b4d };
+#endif
 VECT_VAR_DECL(expected0,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 };
 VECT_VAR_DECL(expected0,int,8,16) [] = { 0xf0, 0xf8, 0x11, 0x11,
                                         0xf1, 0xf9, 0x11, 0x11,
@@ -41,6 +45,12 @@ VECT_VAR_DECL(expected0,poly,8,16) [] = { 0xf0, 0xf8, 0x55, 0x55,
                                          0xf3, 0xfb, 0x55, 0x55 };
 VECT_VAR_DECL(expected0,poly,16,8) [] = { 0xfff0, 0xfff4, 0x66, 0x66,
                                          0xfff1, 0xfff5, 0x66, 0x66 };
+#if defined (FP16_SUPPORTED)
+VECT_VAR_DECL (expected0, hfloat, 16, 8) [] = { 0xcc00, 0xca00,
+                                               0x4b4d, 0x4b4d,
+                                               0xcb80, 0xc980,
+                                               0x4b4d, 0x4b4d };
+#endif
 VECT_VAR_DECL(expected0,hfloat,32,4) [] = { 0xc1800000, 0xc1600000,
                                            0x42073333, 0x42073333 };
 
@@ -59,6 +69,10 @@ VECT_VAR_DECL(expected1,poly,8,8) [] = { 0xf2, 0xf6, 0x55, 0x55,
                                         0xf3, 0xf7, 0x55, 0x55 };
 VECT_VAR_DECL(expected1,poly,16,4) [] = { 0xfff1, 0xfff3,
                                          0x66, 0x66 };
+#if defined (FP16_SUPPORTED)
+VECT_VAR_DECL (expected1, hfloat, 16, 4) [] = { 0xcb80, 0xca80,
+                                               0x4b4d, 0x4b4d };
+#endif
 VECT_VAR_DECL(expected1,hfloat,32,2) [] = { 0x42066666, 0x42066666 };
 VECT_VAR_DECL(expected1,int,8,16) [] = { 0xf4, 0xfc, 0x11, 0x11,
                                         0xf5, 0xfd, 0x11, 0x11,
@@ -82,6 +96,12 @@ VECT_VAR_DECL(expected1,poly,8,16) [] = { 0xf4, 0xfc, 0x55, 0x55,
                                          0xf7, 0xff, 0x55, 0x55 };
 VECT_VAR_DECL(expected1,poly,16,8) [] = { 0xfff2, 0xfff6, 0x66, 0x66,
                                          0xfff3, 0xfff7, 0x66, 0x66 };
+#if defined (FP16_SUPPORTED)
+VECT_VAR_DECL (expected1, hfloat, 16, 8) [] = { 0xcb00, 0xc900,
+                                               0x4b4d, 0x4b4d,
+                                               0xca80, 0xc880,
+                                               0x4b4d, 0x4b4d };
+#endif
 VECT_VAR_DECL(expected1,hfloat,32,4) [] = { 0xc1700000, 0xc1500000,
                                            0x42073333, 0x42073333 };