+2016-09-23 Matthew Wahab <matthew.wahab@arm.com>
+
+ * config/arm/arm.c (arm_evpc_neon_vuzp): Add support for V8HF and
+ V4HF modes.
+ (arm_evpc_neon_vtrn): Likewise.
+ (arm_evpc_neon_vrev): Likewise.
+ (arm_evpc_neon_vext): Likewise.
+ * config/arm/arm_neon.h (vbsl_f16): New.
+ (vbslq_f16): New.
+ (vdup_n_f16): New.
+ (vdupq_n_f16): New.
+ (vdup_lane_f16): New.
+ (vdupq_lane_f16): New.
+ (vext_f16): New.
+ (vextq_f16): New.
+ (vmov_n_f16): New.
+ (vmovq_n_f16): New.
+ (vrev64_f16): New.
+ (vrev64q_f16): New.
+ (vtrn_f16): New.
+ (vtrnq_f16): New.
+ (vuzp_f16): New.
+ (vuzpq_f16): New.
+ (vzip_f16): New.
+ (vzipq_f16): New.
+ * config/arm/arm_neon_buillins.def (vdup_n): New (v8hf, v4hf variants).
+ (vdup_lane): New (v8hf, v4hf variants).
+ (vext): New (v8hf, v4hf variants).
+ (vbsl): New (v8hf, v4hf variants).
+ * config/arm/iterators.md (VDQWH): New.
+ (VH): New.
+ (V_double_vector_mode): Add V8HF and V4HF. Fix white-space.
+ (Scalar_mul_8_16): Fix white-space.
+ (Is_d_reg): Add V4HF and V8HF.
+ * config/arm/neon.md (neon_vdup_lane<mode>_internal): New.
+ (neon_vdup_lane<mode>): New.
+ (neon_vtrn<mode>_internal): Replace VDQW with VDQWH.
+ (*neon_vtrn<mode>_insn): Likewise.
+ (neon_vzip<mode>_internal): Likewise. Also fix white-space.
+ (*neon_vzip<mode>_insn): Likewise
+ (neon_vuzp<mode>_internal): Likewise.
+ (*neon_vuzp<mode>_insn): Likewise
+ * config/arm/vec-common.md (vec_perm_const<mode>): New.
+
2016-09-23 Jiong Wang <jiong.wang@arm.com>
Matthew Wahab <matthew.wahab@arm.com>
case V8QImode: gen = gen_neon_vuzpv8qi_internal; break;
case V8HImode: gen = gen_neon_vuzpv8hi_internal; break;
case V4HImode: gen = gen_neon_vuzpv4hi_internal; break;
+ case V8HFmode: gen = gen_neon_vuzpv8hf_internal; break;
+ case V4HFmode: gen = gen_neon_vuzpv4hf_internal; break;
case V4SImode: gen = gen_neon_vuzpv4si_internal; break;
case V2SImode: gen = gen_neon_vuzpv2si_internal; break;
case V2SFmode: gen = gen_neon_vuzpv2sf_internal; break;
case V8QImode: gen = gen_neon_vzipv8qi_internal; break;
case V8HImode: gen = gen_neon_vzipv8hi_internal; break;
case V4HImode: gen = gen_neon_vzipv4hi_internal; break;
+ case V8HFmode: gen = gen_neon_vzipv8hf_internal; break;
+ case V4HFmode: gen = gen_neon_vzipv4hf_internal; break;
case V4SImode: gen = gen_neon_vzipv4si_internal; break;
case V2SImode: gen = gen_neon_vzipv2si_internal; break;
case V2SFmode: gen = gen_neon_vzipv2sf_internal; break;
case V8QImode: gen = gen_neon_vrev32v8qi; break;
case V8HImode: gen = gen_neon_vrev64v8hi; break;
case V4HImode: gen = gen_neon_vrev64v4hi; break;
+ case V8HFmode: gen = gen_neon_vrev64v8hf; break;
+ case V4HFmode: gen = gen_neon_vrev64v4hf; break;
default:
return false;
}
case V8QImode: gen = gen_neon_vtrnv8qi_internal; break;
case V8HImode: gen = gen_neon_vtrnv8hi_internal; break;
case V4HImode: gen = gen_neon_vtrnv4hi_internal; break;
+ case V8HFmode: gen = gen_neon_vtrnv8hf_internal; break;
+ case V4HFmode: gen = gen_neon_vtrnv4hf_internal; break;
case V4SImode: gen = gen_neon_vtrnv4si_internal; break;
case V2SImode: gen = gen_neon_vtrnv2si_internal; break;
case V2SFmode: gen = gen_neon_vtrnv2sf_internal; break;
case V8HImode: gen = gen_neon_vextv8hi; break;
case V2SImode: gen = gen_neon_vextv2si; break;
case V4SImode: gen = gen_neon_vextv4si; break;
+ case V4HFmode: gen = gen_neon_vextv4hf; break;
+ case V8HFmode: gen = gen_neon_vextv8hf; break;
case V2SFmode: gen = gen_neon_vextv2sf; break;
case V4SFmode: gen = gen_neon_vextv4sf; break;
case V2DImode: gen = gen_neon_vextv2di; break;
#pragma GCC pop_options
+ /* Half-precision data processing intrinsics. */
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+
+__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
+vbsl_f16 (uint16x4_t __a, float16x4_t __b, float16x4_t __c)
+{
+ return __builtin_neon_vbslv4hf ((int16x4_t)__a, __b, __c);
+}
+
+__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
+vbslq_f16 (uint16x8_t __a, float16x8_t __b, float16x8_t __c)
+{
+ return __builtin_neon_vbslv8hf ((int16x8_t)__a, __b, __c);
+}
+
+__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
+vdup_n_f16 (float16_t __a)
+{
+ return __builtin_neon_vdup_nv4hf (__a);
+}
+
+__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
+vdupq_n_f16 (float16_t __a)
+{
+ return __builtin_neon_vdup_nv8hf (__a);
+}
+
+__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
+vdup_lane_f16 (float16x4_t __a, const int __b)
+{
+ return __builtin_neon_vdup_lanev4hf (__a, __b);
+}
+
+__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
+vdupq_lane_f16 (float16x4_t __a, const int __b)
+{
+ return __builtin_neon_vdup_lanev8hf (__a, __b);
+}
+
+__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
+vext_f16 (float16x4_t __a, float16x4_t __b, const int __c)
+{
+ return __builtin_neon_vextv4hf (__a, __b, __c);
+}
+
+__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
+vextq_f16 (float16x8_t __a, float16x8_t __b, const int __c)
+{
+ return __builtin_neon_vextv8hf (__a, __b, __c);
+}
+
+__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
+vmov_n_f16 (float16_t __a)
+{
+ return __builtin_neon_vdup_nv4hf (__a);
+}
+
+__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
+vmovq_n_f16 (float16_t __a)
+{
+ return __builtin_neon_vdup_nv8hf (__a);
+}
+
+__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
+vrev64_f16 (float16x4_t __a)
+{
+ return (float16x4_t)__builtin_shuffle (__a, (uint16x4_t){ 3, 2, 1, 0 });
+}
+
+__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
+vrev64q_f16 (float16x8_t __a)
+{
+ return
+ (float16x8_t)__builtin_shuffle (__a,
+ (uint16x8_t){ 3, 2, 1, 0, 7, 6, 5, 4 });
+}
+
+__extension__ static __inline float16x4x2_t __attribute__ ((__always_inline__))
+vtrn_f16 (float16x4_t __a, float16x4_t __b)
+{
+ float16x4x2_t __rv;
+#ifdef __ARM_BIG_ENDIAN
+ __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x4_t){ 5, 1, 7, 3 });
+ __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x4_t){ 4, 0, 6, 2 });
+#else
+ __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x4_t){ 0, 4, 2, 6 });
+ __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x4_t){ 1, 5, 3, 7 });
+#endif
+ return __rv;
+}
+
+__extension__ static __inline float16x8x2_t __attribute__ ((__always_inline__))
+vtrnq_f16 (float16x8_t __a, float16x8_t __b)
+{
+ float16x8x2_t __rv;
+#ifdef __ARM_BIG_ENDIAN
+ __rv.val[0] = __builtin_shuffle (__a, __b,
+ (uint16x8_t){ 9, 1, 11, 3, 13, 5, 15, 7 });
+ __rv.val[1] = __builtin_shuffle (__a, __b,
+ (uint16x8_t){ 8, 0, 10, 2, 12, 4, 14, 6 });
+#else
+ __rv.val[0] = __builtin_shuffle (__a, __b,
+ (uint16x8_t){ 0, 8, 2, 10, 4, 12, 6, 14 });
+ __rv.val[1] = __builtin_shuffle (__a, __b,
+ (uint16x8_t){ 1, 9, 3, 11, 5, 13, 7, 15 });
+#endif
+ return __rv;
+}
+
+__extension__ static __inline float16x4x2_t __attribute__ ((__always_inline__))
+vuzp_f16 (float16x4_t __a, float16x4_t __b)
+{
+ float16x4x2_t __rv;
+#ifdef __ARM_BIG_ENDIAN
+ __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x4_t){ 5, 7, 1, 3 });
+ __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x4_t){ 4, 6, 0, 2 });
+#else
+ __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x4_t){ 0, 2, 4, 6 });
+ __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x4_t){ 1, 3, 5, 7 });
+#endif
+ return __rv;
+}
+
+__extension__ static __inline float16x8x2_t __attribute__ ((__always_inline__))
+vuzpq_f16 (float16x8_t __a, float16x8_t __b)
+{
+ float16x8x2_t __rv;
+#ifdef __ARM_BIG_ENDIAN
+ __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x8_t)
+ { 5, 7, 1, 3, 13, 15, 9, 11 });
+ __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x8_t)
+ { 4, 6, 0, 2, 12, 14, 8, 10 });
+#else
+ __rv.val[0] = __builtin_shuffle (__a, __b,
+ (uint16x8_t){ 0, 2, 4, 6, 8, 10, 12, 14 });
+ __rv.val[1] = __builtin_shuffle (__a, __b,
+ (uint16x8_t){ 1, 3, 5, 7, 9, 11, 13, 15 });
+#endif
+ return __rv;
+}
+
+__extension__ static __inline float16x4x2_t __attribute__ ((__always_inline__))
+vzip_f16 (float16x4_t __a, float16x4_t __b)
+{
+ float16x4x2_t __rv;
+#ifdef __ARM_BIG_ENDIAN
+ __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x4_t){ 6, 2, 7, 3 });
+ __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x4_t){ 4, 0, 5, 1 });
+#else
+ __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x4_t){ 0, 4, 1, 5 });
+ __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x4_t){ 2, 6, 3, 7 });
+#endif
+ return __rv;
+}
+
+__extension__ static __inline float16x8x2_t __attribute__ ((__always_inline__))
+vzipq_f16 (float16x8_t __a, float16x8_t __b)
+{
+ float16x8x2_t __rv;
+#ifdef __ARM_BIG_ENDIAN
+ __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x8_t)
+ { 10, 2, 11, 3, 8, 0, 9, 1 });
+ __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x8_t)
+ { 14, 6, 15, 7, 12, 4, 13, 5 });
+#else
+ __rv.val[0] = __builtin_shuffle (__a, __b,
+ (uint16x8_t){ 0, 8, 1, 9, 2, 10, 3, 11 });
+ __rv.val[1] = __builtin_shuffle (__a, __b,
+ (uint16x8_t){ 4, 12, 5, 13, 6, 14, 7, 15 });
+#endif
+ return __rv;
+}
+
+#endif
+
#ifdef __cplusplus
}
#endif
VAR5 (UNOP, vcreate, v8qi, v4hi, v2si, v2sf, di)
VAR10 (UNOP, vdup_n,
v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di)
+VAR2 (UNOP, vdup_n, v8hf, v4hf)
VAR10 (GETLANE, vdup_lane,
v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di)
+VAR2 (GETLANE, vdup_lane, v8hf, v4hf)
VAR6 (COMBINE, vcombine, v8qi, v4hi, v4hf, v2si, v2sf, di)
VAR6 (UNOP, vget_high, v16qi, v8hi, v8hf, v4si, v4sf, v2di)
VAR6 (UNOP, vget_low, v16qi, v8hi, v8hf, v4si, v4sf, v2di)
VAR2 (MAC_N, vqdmlsl_n, v4hi, v2si)
VAR10 (SETLANE, vext,
v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di)
+VAR2 (SETLANE, vext, v8hf, v4hf)
VAR8 (UNOP, vrev64, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf)
VAR4 (UNOP, vrev32, v8qi, v4hi, v16qi, v8hi)
VAR2 (UNOP, vrev16, v8qi, v16qi)
VAR1 (UNOP, vcvtv4hf, v4sf)
VAR10 (TERNOP, vbsl,
v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di)
+VAR2 (TERNOP, vbsl, v8hf, v4hf)
VAR2 (UNOP, copysignf, v2sf, v4sf)
VAR2 (UNOP, vrintn, v2sf, v4sf)
VAR2 (UNOP, vrinta, v2sf, v4sf)
;; All supported vector modes (except those with 64-bit integer elements).
(define_mode_iterator VDQW [V8QI V16QI V4HI V8HI V2SI V4SI V2SF V4SF])
+;; All supported vector modes including 16-bit float modes.
+(define_mode_iterator VDQWH [V8QI V16QI V4HI V8HI V2SI V4SI V2SF V4SF
+ V8HF V4HF])
+
;; Supported integer vector modes (not 64 bit elements).
(define_mode_iterator VDQIW [V8QI V16QI V4HI V8HI V2SI V4SI])
;; Modes with 8-bit, 16-bit and 32-bit elements.
(define_mode_iterator VU [V16QI V8HI V4SI])
+;; Vector modes for 16-bit floating-point support.
+(define_mode_iterator VH [V8HF V4HF])
+
;; Iterators used for fixed-point support.
(define_mode_iterator FIXED [QQ HQ SQ UQQ UHQ USQ HA SA UHA USA])
;; Used for neon_vdup_lane, where the second operand is double-sized
;; even when the first one is quad.
(define_mode_attr V_double_vector_mode [(V16QI "V8QI") (V8HI "V4HI")
- (V4SI "V2SI") (V4SF "V2SF")
- (V8QI "V8QI") (V4HI "V4HI")
- (V2SI "V2SI") (V2SF "V2SF")])
+ (V4SI "V2SI") (V4SF "V2SF")
+ (V8QI "V8QI") (V4HI "V4HI")
+ (V2SI "V2SI") (V2SF "V2SF")
+ (V8HF "V4HF") (V4HF "V4HF")])
;; Mode of result of comparison operations (and bit-select operand 1).
(define_mode_attr V_cmp_result [(V8QI "V8QI") (V16QI "V16QI")
(DI "false") (V2DI "false")])
(define_mode_attr Scalar_mul_8_16 [(V8QI "true") (V16QI "true")
- (V4HI "true") (V8HI "true")
- (V2SI "false") (V4SI "false")
- (V2SF "false") (V4SF "false")
- (DI "false") (V2DI "false")])
-
+ (V4HI "true") (V8HI "true")
+ (V2SI "false") (V4SI "false")
+ (V2SF "false") (V4SF "false")
+ (DI "false") (V2DI "false")])
(define_mode_attr Is_d_reg [(V8QI "true") (V16QI "false")
(V4HI "true") (V8HI "false")
(V2SI "true") (V4SI "false")
(V2SF "true") (V4SF "false")
- (DI "true") (V2DI "false")])
+ (DI "true") (V2DI "false")
+ (V4HF "true") (V8HF "false")])
(define_mode_attr V_mode_nunits [(V8QI "8") (V16QI "16")
(V4HF "4") (V8HF "8")
[(set_attr "type" "neon_dup<q>")]
)
+(define_insn "neon_vdup_lane<mode>_internal"
+ [(set (match_operand:VH 0 "s_register_operand" "=w")
+ (vec_duplicate:VH
+ (vec_select:<V_elem>
+ (match_operand:<V_double_vector_mode> 1 "s_register_operand" "w")
+ (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
+ "TARGET_NEON && TARGET_FP16"
+{
+ if (BYTES_BIG_ENDIAN)
+ {
+ int elt = INTVAL (operands[2]);
+ elt = GET_MODE_NUNITS (<V_double_vector_mode>mode) - 1 - elt;
+ operands[2] = GEN_INT (elt);
+ }
+ if (<Is_d_reg>)
+ return "vdup.<V_sz_elem>\t%P0, %P1[%c2]";
+ else
+ return "vdup.<V_sz_elem>\t%q0, %P1[%c2]";
+}
+ [(set_attr "type" "neon_dup<q>")]
+)
+
(define_expand "neon_vdup_lane<mode>"
[(match_operand:VDQW 0 "s_register_operand" "=w")
(match_operand:<V_double_vector_mode> 1 "s_register_operand" "w")
DONE;
})
+(define_expand "neon_vdup_lane<mode>"
+ [(match_operand:VH 0 "s_register_operand")
+ (match_operand:<V_double_vector_mode> 1 "s_register_operand")
+ (match_operand:SI 2 "immediate_operand")]
+ "TARGET_NEON && TARGET_FP16"
+{
+ if (BYTES_BIG_ENDIAN)
+ {
+ unsigned int elt = INTVAL (operands[2]);
+ unsigned int reg_nelts
+ = 64 / GET_MODE_UNIT_BITSIZE (<V_double_vector_mode>mode);
+ elt ^= reg_nelts - 1;
+ operands[2] = GEN_INT (elt);
+ }
+ emit_insn (gen_neon_vdup_lane<mode>_internal (operands[0], operands[1],
+ operands[2]));
+ DONE;
+})
+
; Scalar index is ignored, since only zero is valid here.
(define_expand "neon_vdup_lanedi"
[(match_operand:DI 0 "s_register_operand" "=w")
(define_expand "neon_vtrn<mode>_internal"
[(parallel
- [(set (match_operand:VDQW 0 "s_register_operand" "")
- (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "")
- (match_operand:VDQW 2 "s_register_operand" "")]
+ [(set (match_operand:VDQWH 0 "s_register_operand")
+ (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand")
+ (match_operand:VDQWH 2 "s_register_operand")]
UNSPEC_VTRN1))
- (set (match_operand:VDQW 3 "s_register_operand" "")
- (unspec:VDQW [(match_dup 1) (match_dup 2)] UNSPEC_VTRN2))])]
+ (set (match_operand:VDQWH 3 "s_register_operand")
+ (unspec:VDQWH [(match_dup 1) (match_dup 2)] UNSPEC_VTRN2))])]
"TARGET_NEON"
""
)
;; Note: Different operand numbering to handle tied registers correctly.
(define_insn "*neon_vtrn<mode>_insn"
- [(set (match_operand:VDQW 0 "s_register_operand" "=&w")
- (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
- (match_operand:VDQW 3 "s_register_operand" "2")]
- UNSPEC_VTRN1))
- (set (match_operand:VDQW 2 "s_register_operand" "=&w")
- (unspec:VDQW [(match_dup 1) (match_dup 3)]
- UNSPEC_VTRN2))]
+ [(set (match_operand:VDQWH 0 "s_register_operand" "=&w")
+ (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand" "0")
+ (match_operand:VDQWH 3 "s_register_operand" "2")]
+ UNSPEC_VTRN1))
+ (set (match_operand:VDQWH 2 "s_register_operand" "=&w")
+ (unspec:VDQWH [(match_dup 1) (match_dup 3)]
+ UNSPEC_VTRN2))]
"TARGET_NEON"
"vtrn.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
[(set_attr "type" "neon_permute<q>")]
(define_expand "neon_vzip<mode>_internal"
[(parallel
- [(set (match_operand:VDQW 0 "s_register_operand" "")
- (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "")
- (match_operand:VDQW 2 "s_register_operand" "")]
- UNSPEC_VZIP1))
- (set (match_operand:VDQW 3 "s_register_operand" "")
- (unspec:VDQW [(match_dup 1) (match_dup 2)] UNSPEC_VZIP2))])]
+ [(set (match_operand:VDQWH 0 "s_register_operand")
+ (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand")
+ (match_operand:VDQWH 2 "s_register_operand")]
+ UNSPEC_VZIP1))
+ (set (match_operand:VDQWH 3 "s_register_operand")
+ (unspec:VDQWH [(match_dup 1) (match_dup 2)] UNSPEC_VZIP2))])]
"TARGET_NEON"
""
)
;; Note: Different operand numbering to handle tied registers correctly.
(define_insn "*neon_vzip<mode>_insn"
- [(set (match_operand:VDQW 0 "s_register_operand" "=&w")
- (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
- (match_operand:VDQW 3 "s_register_operand" "2")]
- UNSPEC_VZIP1))
- (set (match_operand:VDQW 2 "s_register_operand" "=&w")
- (unspec:VDQW [(match_dup 1) (match_dup 3)]
- UNSPEC_VZIP2))]
+ [(set (match_operand:VDQWH 0 "s_register_operand" "=&w")
+ (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand" "0")
+ (match_operand:VDQWH 3 "s_register_operand" "2")]
+ UNSPEC_VZIP1))
+ (set (match_operand:VDQWH 2 "s_register_operand" "=&w")
+ (unspec:VDQWH [(match_dup 1) (match_dup 3)]
+ UNSPEC_VZIP2))]
"TARGET_NEON"
"vzip.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
[(set_attr "type" "neon_zip<q>")]
(define_expand "neon_vuzp<mode>_internal"
[(parallel
- [(set (match_operand:VDQW 0 "s_register_operand" "")
- (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "")
- (match_operand:VDQW 2 "s_register_operand" "")]
+ [(set (match_operand:VDQWH 0 "s_register_operand")
+ (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand")
+ (match_operand:VDQWH 2 "s_register_operand")]
UNSPEC_VUZP1))
- (set (match_operand:VDQW 3 "s_register_operand" "")
- (unspec:VDQW [(match_dup 1) (match_dup 2)] UNSPEC_VUZP2))])]
+ (set (match_operand:VDQWH 3 "s_register_operand" "")
+ (unspec:VDQWH [(match_dup 1) (match_dup 2)] UNSPEC_VUZP2))])]
"TARGET_NEON"
""
)
;; Note: Different operand numbering to handle tied registers correctly.
(define_insn "*neon_vuzp<mode>_insn"
- [(set (match_operand:VDQW 0 "s_register_operand" "=&w")
- (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
- (match_operand:VDQW 3 "s_register_operand" "2")]
- UNSPEC_VUZP1))
- (set (match_operand:VDQW 2 "s_register_operand" "=&w")
- (unspec:VDQW [(match_dup 1) (match_dup 3)]
- UNSPEC_VUZP2))]
+ [(set (match_operand:VDQWH 0 "s_register_operand" "=&w")
+ (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand" "0")
+ (match_operand:VDQWH 3 "s_register_operand" "2")]
+ UNSPEC_VUZP1))
+ (set (match_operand:VDQWH 2 "s_register_operand" "=&w")
+ (unspec:VDQWH [(match_dup 1) (match_dup 3)]
+ UNSPEC_VUZP2))]
"TARGET_NEON"
"vuzp.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
[(set_attr "type" "neon_zip<q>")]
FAIL;
})
+(define_expand "vec_perm_const<mode>"
+ [(match_operand:VH 0 "s_register_operand")
+ (match_operand:VH 1 "s_register_operand")
+ (match_operand:VH 2 "s_register_operand")
+ (match_operand:<V_cmp_result> 3)]
+ "TARGET_NEON"
+{
+ if (arm_expand_vec_perm_const (operands[0], operands[1],
+ operands[2], operands[3]))
+ DONE;
+ else
+ FAIL;
+})
+
(define_expand "vec_perm<mode>"
[(match_operand:VE 0 "s_register_operand" "")
(match_operand:VE 1 "s_register_operand" "")
+2016-09-23 Matthew Wahab <matthew.wahab@arm.com>
+
+ * gcc.target/aarch64/advsimd-intrinsics/arm-neon-ref.h
+ (FP16_SUPPORTED): New
+ (expected-hfloat-16x4): Make conditional on __fp16 support.
+ (expected-hfloat-16x8): Likewise.
+ (vdup_n_f16): Disable for non-AArch64 targets.
+ * gcc.target/aarch64/advsimd-intrinsics/vbsl.c: Add __fp16 tests,
+ conditional on FP16_SUPPORTED.
+ * gcc.target/aarch64/advsimd-intrinsics/vdup-vmov.c: Likewise.
+ * gcc.target/aarch64/advsimd-intrinsics/vdup_lane.c: Likewise.
+ * gcc.target/aarch64/advsimd-intrinsics/vext.c: Likewise.
+ * gcc.target/aarch64/advsimd-intrinsics/vrev.c: Likewise.
+ * gcc.target/aarch64/advsimd-intrinsics/vshuffle.inc: Add support
+ for testing __fp16.
+ * gcc.target/aarch64/advsimd-intrinsics/vtrn.c: Add __fp16 tests,
+ conditional on FP16_SUPPORTED.
+ * gcc.target/aarch64/advsimd-intrinsics/vuzp.c: Likewise.
+ * gcc.target/aarch64/advsimd-intrinsics/vzip.c: Likewise.
+
2016-09-23 Matthew Wahab <matthew.wahab@arm.com>
* gcc.target/arm/short-vfp-1.c: New.
extern void *memcpy(void *, const void *, size_t);
extern size_t strlen(const char *);
+/* Helper macro to select FP16 tests. */
+#if (!defined (__aarch64__) \
+ && (defined (__ARM_FP16_FORMAT_IEEE) \
+ || defined (__ARM_FP16_FORMAT_ALTERNATIVE)))
+#define FP16_SUPPORTED (1)
+#else
+#undef FP16_SUPPORTED
+#endif
+
/* Various string construction helpers. */
/*
/* Helpers to initialize vectors. */
#define VDUP(VAR, Q, T1, T2, W, N, V) \
VECT_VAR(VAR, T1, W, N) = vdup##Q##_n_##T2##W(V)
-#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+#if (defined (__aarch64__) \
+ && (defined (__ARM_FP16_FORMAT_IEEE) \
+ || defined (__ARM_FP16_FORMAT_ALTERNATIVE)))
/* Work around that there is no vdup_n_f16 intrinsic. */
#define vdup_n_f16(VAL) \
__extension__ \
VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf3, 0xf3, 0xf3, 0xf3,
0xf7, 0xf7, 0xf7, 0xf7 };
VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff0, 0xfff0, 0xfff2, 0xfff2 };
+#if defined (FP16_SUPPORTED)
+VECT_VAR_DECL (expected, hfloat, 16, 4) [] = { 0xcc09, 0xcb89,
+ 0xcb09, 0xca89 };
+#endif
VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1800004, 0xc1700004 };
VECT_VAR_DECL(expected,int,8,16) [] = { 0xf2, 0xf2, 0xf2, 0xf2,
0xf6, 0xf6, 0xf6, 0xf6,
0xf7, 0xf7, 0xf7, 0xf7 };
VECT_VAR_DECL(expected,poly,16,8) [] = { 0xfff0, 0xfff0, 0xfff2, 0xfff2,
0xfff4, 0xfff4, 0xfff6, 0xfff6 };
+#if defined (FP16_SUPPORTED)
+VECT_VAR_DECL (expected, hfloat, 16, 8) [] = { 0xcc09, 0xcb89,
+ 0xcb09, 0xca89,
+ 0xca09, 0xc989,
+ 0xc909, 0xc889 };
+#endif
VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc1800001, 0xc1700001,
0xc1600001, 0xc1500001 };
clean_results ();
TEST_MACRO_ALL_VARIANTS_2_5(VLOAD, vector, buffer);
+#if defined (FP16_SUPPORTED)
+ VLOAD(vector, buffer, , float, f, 16, 4);
+ VLOAD(vector, buffer, q, float, f, 16, 8);
+#endif
VLOAD(vector, buffer, , float, f, 32, 2);
VLOAD(vector, buffer, q, float, f, 32, 4);
VDUP(vector2, , uint, u, 16, 4, 0xFFF2);
VDUP(vector2, , uint, u, 32, 2, 0xFFFFFFF0);
VDUP(vector2, , uint, u, 64, 1, 0xFFFFFFF3);
+#if defined (FP16_SUPPORTED)
+ VDUP(vector2, , float, f, 16, 4, -2.4f); /* -2.4f is 0xC0CD. */
+#endif
VDUP(vector2, , float, f, 32, 2, -30.3f);
VDUP(vector2, , poly, p, 8, 8, 0xF3);
VDUP(vector2, , poly, p, 16, 4, 0xFFF2);
VDUP(vector2, q, uint, u, 64, 2, 0xFFFFFFF3);
VDUP(vector2, q, poly, p, 8, 16, 0xF3);
VDUP(vector2, q, poly, p, 16, 8, 0xFFF2);
+#if defined (FP16_SUPPORTED)
+ VDUP(vector2, q, float, f, 16, 8, -2.4f);
+#endif
VDUP(vector2, q, float, f, 32, 4, -30.4f);
VDUP(vector_first, , uint, u, 8, 8, 0xF4);
TEST_VBSL(uint, , poly, p, 16, 4);
TEST_VBSL(uint, q, poly, p, 8, 16);
TEST_VBSL(uint, q, poly, p, 16, 8);
+#if defined (FP16_SUPPORTED)
+ TEST_VBSL(uint, , float, f, 16, 4);
+ TEST_VBSL(uint, q, float, f, 16, 8);
+#endif
TEST_VBSL(uint, , float, f, 32, 2);
TEST_VBSL(uint, q, float, f, 32, 4);
+#if defined (FP16_SUPPORTED)
+ CHECK_RESULTS (TEST_MSG, "");
+#else
CHECK_RESULTS_NO_FP16 (TEST_MSG, "");
+#endif
}
int main (void)
VECT_VAR_DECL(expected0,poly,8,8) [] = { 0xf0, 0xf0, 0xf0, 0xf0,
0xf0, 0xf0, 0xf0, 0xf0 };
VECT_VAR_DECL(expected0,poly,16,4) [] = { 0xfff0, 0xfff0, 0xfff0, 0xfff0 };
+#if defined (FP16_SUPPORTED)
+VECT_VAR_DECL (expected0, hfloat, 16, 4) [] = { 0xcc00, 0xcc00,
+ 0xcc00, 0xcc00 };
+#endif
VECT_VAR_DECL(expected0,hfloat,32,2) [] = { 0xc1800000, 0xc1800000 };
VECT_VAR_DECL(expected0,int,8,16) [] = { 0xf0, 0xf0, 0xf0, 0xf0,
0xf0, 0xf0, 0xf0, 0xf0,
0xf0, 0xf0, 0xf0, 0xf0 };
VECT_VAR_DECL(expected0,poly,16,8) [] = { 0xfff0, 0xfff0, 0xfff0, 0xfff0,
0xfff0, 0xfff0, 0xfff0, 0xfff0 };
+#if defined (FP16_SUPPORTED)
+VECT_VAR_DECL (expected0, hfloat, 16, 8) [] = { 0xcc00, 0xcc00,
+ 0xcc00, 0xcc00,
+ 0xcc00, 0xcc00,
+ 0xcc00, 0xcc00 };
+#endif
VECT_VAR_DECL(expected0,hfloat,32,4) [] = { 0xc1800000, 0xc1800000,
0xc1800000, 0xc1800000 };
VECT_VAR_DECL(expected1,poly,8,8) [] = { 0xf1, 0xf1, 0xf1, 0xf1,
0xf1, 0xf1, 0xf1, 0xf1 };
VECT_VAR_DECL(expected1,poly,16,4) [] = { 0xfff1, 0xfff1, 0xfff1, 0xfff1 };
+#if defined (FP16_SUPPORTED)
+VECT_VAR_DECL (expected1, hfloat, 16, 4) [] = { 0xcb80, 0xcb80,
+ 0xcb80, 0xcb80 };
+#endif
VECT_VAR_DECL(expected1,hfloat,32,2) [] = { 0xc1700000, 0xc1700000 };
VECT_VAR_DECL(expected1,int,8,16) [] = { 0xf1, 0xf1, 0xf1, 0xf1,
0xf1, 0xf1, 0xf1, 0xf1,
0xf1, 0xf1, 0xf1, 0xf1 };
VECT_VAR_DECL(expected1,poly,16,8) [] = { 0xfff1, 0xfff1, 0xfff1, 0xfff1,
0xfff1, 0xfff1, 0xfff1, 0xfff1 };
+#if defined (FP16_SUPPORTED)
+VECT_VAR_DECL (expected1, hfloat, 16, 8) [] = { 0xcb80, 0xcb80,
+ 0xcb80, 0xcb80,
+ 0xcb80, 0xcb80,
+ 0xcb80, 0xcb80 };
+#endif
VECT_VAR_DECL(expected1,hfloat,32,4) [] = { 0xc1700000, 0xc1700000,
0xc1700000, 0xc1700000 };
VECT_VAR_DECL(expected2,poly,8,8) [] = { 0xf2, 0xf2, 0xf2, 0xf2,
0xf2, 0xf2, 0xf2, 0xf2 };
VECT_VAR_DECL(expected2,poly,16,4) [] = { 0xfff2, 0xfff2, 0xfff2, 0xfff2 };
+#if defined (FP16_SUPPORTED)
+VECT_VAR_DECL (expected2, hfloat, 16, 4) [] = { 0xcb00, 0xcb00,
+ 0xcb00, 0xcb00 };
+#endif
VECT_VAR_DECL(expected2,hfloat,32,2) [] = { 0xc1600000, 0xc1600000 };
VECT_VAR_DECL(expected2,int,8,16) [] = { 0xf2, 0xf2, 0xf2, 0xf2,
0xf2, 0xf2, 0xf2, 0xf2,
0xf2, 0xf2, 0xf2, 0xf2 };
VECT_VAR_DECL(expected2,poly,16,8) [] = { 0xfff2, 0xfff2, 0xfff2, 0xfff2,
0xfff2, 0xfff2, 0xfff2, 0xfff2 };
+#if defined (FP16_SUPPORTED)
+VECT_VAR_DECL (expected2, hfloat, 16, 8) [] = { 0xcb00, 0xcb00,
+ 0xcb00, 0xcb00,
+ 0xcb00, 0xcb00,
+ 0xcb00, 0xcb00 };
+#endif
VECT_VAR_DECL(expected2,hfloat,32,4) [] = { 0xc1600000, 0xc1600000,
0xc1600000, 0xc1600000 };
TEST_VDUP(, uint, u, 64, 1);
TEST_VDUP(, poly, p, 8, 8);
TEST_VDUP(, poly, p, 16, 4);
+#if defined (FP16_SUPPORTED)
+ TEST_VDUP(, float, f, 16, 4);
+#endif
TEST_VDUP(, float, f, 32, 2);
TEST_VDUP(q, int, s, 8, 16);
TEST_VDUP(q, uint, u, 64, 2);
TEST_VDUP(q, poly, p, 8, 16);
TEST_VDUP(q, poly, p, 16, 8);
+#if defined (FP16_SUPPORTED)
+ TEST_VDUP(q, float, f, 16, 8);
+#endif
TEST_VDUP(q, float, f, 32, 4);
+#if defined (FP16_SUPPORTED)
+ switch (i) {
+ case 0:
+ CHECK_RESULTS_NAMED (TEST_MSG, expected0, "");
+ break;
+ case 1:
+ CHECK_RESULTS_NAMED (TEST_MSG, expected1, "");
+ break;
+ case 2:
+ CHECK_RESULTS_NAMED (TEST_MSG, expected2, "");
+ break;
+ default:
+ abort();
+ }
+#else
switch (i) {
case 0:
CHECK_RESULTS_NAMED_NO_FP16 (TEST_MSG, expected0, "");
default:
abort();
}
+#endif
}
/* Do the same tests with vmov. Use the same expected results. */
TEST_VMOV(, uint, u, 64, 1);
TEST_VMOV(, poly, p, 8, 8);
TEST_VMOV(, poly, p, 16, 4);
+#if defined (FP16_SUPPORTED)
+ TEST_VMOV(, float, f, 16, 4);
+#endif
TEST_VMOV(, float, f, 32, 2);
TEST_VMOV(q, int, s, 8, 16);
TEST_VMOV(q, uint, u, 64, 2);
TEST_VMOV(q, poly, p, 8, 16);
TEST_VMOV(q, poly, p, 16, 8);
+#if defined (FP16_SUPPORTED)
+ TEST_VMOV(q, float, f, 16, 8);
+#endif
TEST_VMOV(q, float, f, 32, 4);
+#if defined (FP16_SUPPORTED)
+ switch (i) {
+ case 0:
+ CHECK_RESULTS_NAMED (TEST_MSG, expected0, "");
+ break;
+ case 1:
+ CHECK_RESULTS_NAMED (TEST_MSG, expected1, "");
+ break;
+ case 2:
+ CHECK_RESULTS_NAMED (TEST_MSG, expected2, "");
+ break;
+ default:
+ abort();
+ }
+#else
switch (i) {
case 0:
CHECK_RESULTS_NAMED_NO_FP16 (TEST_MSG, expected0, "");
default:
abort();
}
+#endif
+
}
}
0xf7, 0xf7, 0xf7, 0xf7 };
VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff3, 0xfff3, 0xfff3, 0xfff3 };
VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1700000, 0xc1700000 };
+#if defined (FP16_SUPPORTED)
+VECT_VAR_DECL (expected, hfloat, 16, 4) [] = { 0xca80, 0xca80,
+ 0xca80, 0xca80 };
+#endif
VECT_VAR_DECL(expected,int,8,16) [] = { 0xf2, 0xf2, 0xf2, 0xf2,
0xf2, 0xf2, 0xf2, 0xf2,
0xf2, 0xf2, 0xf2, 0xf2,
0xf5, 0xf5, 0xf5, 0xf5 };
VECT_VAR_DECL(expected,poly,16,8) [] = { 0xfff1, 0xfff1, 0xfff1, 0xfff1,
0xfff1, 0xfff1, 0xfff1, 0xfff1 };
+#if defined (FP16_SUPPORTED)
+VECT_VAR_DECL (expected, hfloat, 16, 8) [] = { 0xca80, 0xca80,
+ 0xca80, 0xca80,
+ 0xca80, 0xca80,
+ 0xca80, 0xca80 };
+#endif
VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc1700000, 0xc1700000,
0xc1700000, 0xc1700000 };
clean_results ();
TEST_MACRO_64BITS_VARIANTS_2_5(VLOAD, vector, buffer);
+#if defined (FP16_SUPPORTED)
+ VLOAD(vector, buffer, , float, f, 16, 4);
+#endif
VLOAD(vector, buffer, , float, f, 32, 2);
/* Choose lane arbitrarily. */
TEST_VDUP_LANE(, uint, u, 64, 1, 1, 0);
TEST_VDUP_LANE(, poly, p, 8, 8, 8, 7);
TEST_VDUP_LANE(, poly, p, 16, 4, 4, 3);
+#if defined (FP16_SUPPORTED)
+ TEST_VDUP_LANE(, float, f, 16, 4, 4, 3);
+#endif
TEST_VDUP_LANE(, float, f, 32, 2, 2, 1);
TEST_VDUP_LANE(q, int, s, 8, 16, 8, 2);
TEST_VDUP_LANE(q, uint, u, 64, 2, 1, 0);
TEST_VDUP_LANE(q, poly, p, 8, 16, 8, 5);
TEST_VDUP_LANE(q, poly, p, 16, 8, 4, 1);
+#if defined (FP16_SUPPORTED)
+ TEST_VDUP_LANE(q, float, f, 16, 8, 4, 3);
+#endif
TEST_VDUP_LANE(q, float, f, 32, 4, 2, 1);
+#if defined (FP16_SUPPORTED)
+ CHECK_RESULTS (TEST_MSG, "");
+#else
CHECK_RESULTS_NO_FP16 (TEST_MSG, "");
+#endif
}
int main (void)
VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf6, 0xf7, 0x55, 0x55,
0x55, 0x55, 0x55, 0x55 };
VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff2, 0xfff3, 0x66, 0x66 };
+#if defined (FP16_SUPPORTED)
+VECT_VAR_DECL (expected, hfloat, 16, 4) [] = { 0xcb00, 0xca80,
+ 0x4b4d, 0x4b4d };
+#endif
VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1700000, 0x42066666 };
VECT_VAR_DECL(expected,int,8,16) [] = { 0xfe, 0xff, 0x11, 0x11,
0x11, 0x11, 0x11, 0x11,
0x55, 0x55, 0x55, 0x55 };
VECT_VAR_DECL(expected,poly,16,8) [] = { 0xfff6, 0xfff7, 0x66, 0x66,
0x66, 0x66, 0x66, 0x66 };
+#if defined (FP16_SUPPORTED)
+VECT_VAR_DECL (expected, hfloat, 16, 8) [] = { 0xc880, 0x4b4d,
+ 0x4b4d, 0x4b4d,
+ 0x4b4d, 0x4b4d,
+ 0x4b4d, 0x4b4d };
+#endif
VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc1500000, 0x4204cccd,
0x4204cccd, 0x4204cccd };
clean_results ();
TEST_MACRO_ALL_VARIANTS_2_5(VLOAD, vector1, buffer);
+#ifdef FP16_SUPPORTED
+ VLOAD(vector1, buffer, , float, f, 16, 4);
+ VLOAD(vector1, buffer, q, float, f, 16, 8);
+#endif
VLOAD(vector1, buffer, , float, f, 32, 2);
VLOAD(vector1, buffer, q, float, f, 32, 4);
VDUP(vector2, , uint, u, 64, 1, 0x88);
VDUP(vector2, , poly, p, 8, 8, 0x55);
VDUP(vector2, , poly, p, 16, 4, 0x66);
+#if defined (FP16_SUPPORTED)
+ VDUP (vector2, , float, f, 16, 4, 14.6f); /* 14.6f is 0x4b4d. */
+#endif
VDUP(vector2, , float, f, 32, 2, 33.6f);
VDUP(vector2, q, int, s, 8, 16, 0x11);
VDUP(vector2, q, uint, u, 64, 2, 0x88);
VDUP(vector2, q, poly, p, 8, 16, 0x55);
VDUP(vector2, q, poly, p, 16, 8, 0x66);
+#if defined (FP16_SUPPORTED)
+ VDUP (vector2, q, float, f, 16, 8, 14.6f);
+#endif
VDUP(vector2, q, float, f, 32, 4, 33.2f);
/* Choose arbitrary extract offsets. */
TEST_VEXT(, uint, u, 64, 1, 0);
TEST_VEXT(, poly, p, 8, 8, 6);
TEST_VEXT(, poly, p, 16, 4, 2);
+#if defined (FP16_SUPPORTED)
+ TEST_VEXT(, float, f, 16, 4, 2);
+#endif
TEST_VEXT(, float, f, 32, 2, 1);
TEST_VEXT(q, int, s, 8, 16, 14);
TEST_VEXT(q, uint, u, 64, 2, 1);
TEST_VEXT(q, poly, p, 8, 16, 12);
TEST_VEXT(q, poly, p, 16, 8, 6);
+#if defined (FP16_SUPPORTED)
+ TEST_VEXT(q, float, f, 16, 8, 7);
+#endif
TEST_VEXT(q, float, f, 32, 4, 3);
+#if defined (FP16_SUPPORTED)
+ CHECK_RESULTS (TEST_MSG, "");
+#else
CHECK_RESULTS_NO_FP16 (TEST_MSG, "");
+#endif
}
int main (void)
VECT_VAR_DECL(expected_vrev64,poly,8,8) [] = { 0xf7, 0xf6, 0xf5, 0xf4,
0xf3, 0xf2, 0xf1, 0xf0 };
VECT_VAR_DECL(expected_vrev64,poly,16,4) [] = { 0xfff3, 0xfff2, 0xfff1, 0xfff0 };
+#if defined (FP16_SUPPORTED)
+VECT_VAR_DECL (expected_vrev64, hfloat, 16, 4) [] = { 0xca80, 0xcb00,
+ 0xcb80, 0xcc00 };
+#endif
VECT_VAR_DECL(expected_vrev64,hfloat,32,2) [] = { 0xc1700000, 0xc1800000 };
VECT_VAR_DECL(expected_vrev64,int,8,16) [] = { 0xf7, 0xf6, 0xf5, 0xf4,
0xf3, 0xf2, 0xf1, 0xf0,
0xfb, 0xfa, 0xf9, 0xf8 };
VECT_VAR_DECL(expected_vrev64,poly,16,8) [] = { 0xfff3, 0xfff2, 0xfff1, 0xfff0,
0xfff7, 0xfff6, 0xfff5, 0xfff4 };
+#if defined (FP16_SUPPORTED)
+VECT_VAR_DECL (expected_vrev64, hfloat, 16, 8) [] = { 0xca80, 0xcb00,
+ 0xcb80, 0xcc00,
+ 0xc880, 0xc900,
+ 0xc980, 0xca00 };
+#endif
VECT_VAR_DECL(expected_vrev64,hfloat,32,4) [] = { 0xc1700000, 0xc1800000,
0xc1500000, 0xc1600000 };
/* Initialize input "vector" from "buffer". */
TEST_MACRO_ALL_VARIANTS_2_5(VLOAD, vector, buffer);
+#if defined (FP16_SUPPORTED)
+ VLOAD (vector, buffer, , float, f, 16, 4);
+ VLOAD (vector, buffer, q, float, f, 16, 8);
+#endif
VLOAD(vector, buffer, , float, f, 32, 2);
VLOAD(vector, buffer, q, float, f, 32, 4);
CHECK(TEST_MSG, poly, 8, 16, PRIx8, expected_vrev64, "");
CHECK(TEST_MSG, poly, 16, 8, PRIx16, expected_vrev64, "");
+#if defined (FP16_SUPPORTED)
+ TEST_VREV (, float, f, 16, 4, 64);
+ TEST_VREV (q, float, f, 16, 8, 64);
+ CHECK_FP(TEST_MSG, float, 16, 4, PRIx32, expected_vrev64, "");
+ CHECK_FP(TEST_MSG, float, 16, 8, PRIx32, expected_vrev64, "");
+#endif
TEST_VREV(, float, f, 32, 2, 64);
TEST_VREV(q, float, f, 32, 4, 64);
CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_vrev64, "");
DECL_VSHUFFLE(float, 32, 4)
DECL_ALL_VSHUFFLE();
+#if defined (FP16_SUPPORTED)
+ DECL_VSHUFFLE (float, 16, 4);
+ DECL_VSHUFFLE (float, 16, 8);
+#endif
/* Initialize input "vector" from "buffer". */
TEST_MACRO_ALL_VARIANTS_2_5(VLOAD, vector1, buffer);
+#if defined (FP16_SUPPORTED)
+ VLOAD (vector1, buffer, , float, f, 16, 4);
+ VLOAD (vector1, buffer, q, float, f, 16, 8);
+#endif
VLOAD(vector1, buffer, , float, f, 32, 2);
VLOAD(vector1, buffer, q, float, f, 32, 4);
VDUP(vector2, , uint, u, 32, 2, 0x77);
VDUP(vector2, , poly, p, 8, 8, 0x55);
VDUP(vector2, , poly, p, 16, 4, 0x66);
+#if defined (FP16_SUPPORTED)
+ VDUP (vector2, , float, f, 16, 4, 14.6f); /* 14.6f is 0x4b4d. */
+#endif
VDUP(vector2, , float, f, 32, 2, 33.6f);
VDUP(vector2, q, int, s, 8, 16, 0x11);
VDUP(vector2, q, uint, u, 32, 4, 0x77);
VDUP(vector2, q, poly, p, 8, 16, 0x55);
VDUP(vector2, q, poly, p, 16, 8, 0x66);
+#if defined (FP16_SUPPORTED)
+ VDUP (vector2, q, float, f, 16, 8, 14.6f);
+#endif
VDUP(vector2, q, float, f, 32, 4, 33.8f);
-
+
#define TEST_ALL_VSHUFFLE(INSN) \
TEST_VSHUFFLE(INSN, , int, s, 8, 8); \
TEST_VSHUFFLE(INSN, , int, s, 16, 4); \
TEST_VSHUFFLE(INSN, q, poly, p, 16, 8); \
TEST_VSHUFFLE(INSN, q, float, f, 32, 4)
+#define TEST_VSHUFFLE_FP16(INSN) \
+ TEST_VSHUFFLE(INSN, , float, f, 16, 4); \
+ TEST_VSHUFFLE(INSN, q, float, f, 16, 8);
+
#define TEST_ALL_EXTRA_CHUNKS() \
TEST_EXTRA_CHUNK(int, 8, 8, 1); \
TEST_EXTRA_CHUNK(int, 16, 4, 1); \
CHECK(test_name, poly, 8, 16, PRIx8, EXPECTED, comment); \
CHECK(test_name, poly, 16, 8, PRIx16, EXPECTED, comment); \
CHECK_FP(test_name, float, 32, 4, PRIx32, EXPECTED, comment); \
- } \
+ }
+
+#define CHECK_RESULTS_VSHUFFLE_FP16(test_name,EXPECTED,comment) \
+ { \
+ CHECK_FP (test_name, float, 16, 4, PRIx16, EXPECTED, comment); \
+ CHECK_FP (test_name, float, 16, 8, PRIx16, EXPECTED, comment); \
+ }
clean_results ();
/* Execute the tests. */
TEST_ALL_VSHUFFLE(INSN_NAME);
+#if defined (FP16_SUPPORTED)
+ TEST_VSHUFFLE_FP16 (INSN_NAME);
+#endif
CHECK_RESULTS_VSHUFFLE (TEST_MSG, expected0, "(chunk 0)");
+#if defined (FP16_SUPPORTED)
+ CHECK_RESULTS_VSHUFFLE_FP16 (TEST_MSG, expected0, "(chunk 0)");
+#endif
TEST_ALL_EXTRA_CHUNKS();
+#if defined (FP16_SUPPORTED)
+ TEST_EXTRA_CHUNK (float, 16, 4, 1);
+ TEST_EXTRA_CHUNK (float, 16, 8, 1);
+#endif
+
CHECK_RESULTS_VSHUFFLE (TEST_MSG, expected1, "(chunk 1)");
+#if defined (FP16_SUPPORTED)
+ CHECK_RESULTS_VSHUFFLE_FP16 (TEST_MSG, expected1, "(chunk 1)");
+#endif
}
int main (void)
VECT_VAR_DECL(expected0,poly,8,8) [] = { 0xf0, 0xf1, 0x55, 0x55,
0xf2, 0xf3, 0x55, 0x55 };
VECT_VAR_DECL(expected0,poly,16,4) [] = { 0xfff0, 0xfff1, 0x66, 0x66 };
+#if defined (FP16_SUPPORTED)
+VECT_VAR_DECL (expected0, hfloat, 16, 4) [] = { 0xcc00, 0xcb80,
+ 0x4b4d, 0x4b4d };
+#endif
VECT_VAR_DECL(expected0,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 };
VECT_VAR_DECL(expected0,int,8,16) [] = { 0xf0, 0xf1, 0x11, 0x11,
0xf2, 0xf3, 0x11, 0x11,
0xf6, 0xf7, 0x55, 0x55 };
VECT_VAR_DECL(expected0,poly,16,8) [] = { 0xfff0, 0xfff1, 0x66, 0x66,
0xfff2, 0xfff3, 0x66, 0x66 };
+#if defined (FP16_SUPPORTED)
+VECT_VAR_DECL (expected0, hfloat, 16, 8) [] = { 0xcc00, 0xcb80,
+ 0x4b4d, 0x4b4d,
+ 0xcb00, 0xca80,
+ 0x4b4d, 0x4b4d };
+#endif
VECT_VAR_DECL(expected0,hfloat,32,4) [] = { 0xc1800000, 0xc1700000,
0x42073333, 0x42073333 };
VECT_VAR_DECL(expected1,poly,8,8) [] = { 0xf4, 0xf5, 0x55, 0x55,
0xf6, 0xf7, 0x55, 0x55 };
VECT_VAR_DECL(expected1,poly,16,4) [] = { 0xfff2, 0xfff3, 0x66, 0x66 };
+#if defined (FP16_SUPPORTED)
+VECT_VAR_DECL (expected1, hfloat, 16, 4) [] = { 0xcb00, 0xca80,
+ 0x4b4d, 0x4b4d };
+#endif
VECT_VAR_DECL(expected1,hfloat,32,2) [] = { 0x42066666, 0x42066666 };
VECT_VAR_DECL(expected1,int,8,16) [] = { 0xf8, 0xf9, 0x11, 0x11,
0xfa, 0xfb, 0x11, 0x11,
0xfe, 0xff, 0x55, 0x55 };
VECT_VAR_DECL(expected1,poly,16,8) [] = { 0xfff4, 0xfff5, 0x66, 0x66,
0xfff6, 0xfff7, 0x66, 0x66 };
+#if defined (FP16_SUPPORTED)
+VECT_VAR_DECL (expected1, hfloat, 16, 8) [] = { 0xca00, 0xc980,
+ 0x4b4d, 0x4b4d,
+ 0xc900, 0xc880,
+ 0x4b4d, 0x4b4d };
+#endif
VECT_VAR_DECL(expected1,hfloat,32,4) [] = { 0xc1600000, 0xc1500000,
0x42073333, 0x42073333 };
0xf4, 0xf5, 0xf6, 0xf7 };
VECT_VAR_DECL(expected0,poly,16,4) [] = { 0xfff0, 0xfff1,
0xfff2, 0xfff3 };
+#if defined (FP16_SUPPORTED)
+VECT_VAR_DECL (expected0, hfloat, 16, 4) [] = { 0xcc00, 0xcb80,
+ 0xcb00, 0xca80 };
+#endif
VECT_VAR_DECL(expected0,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 };
VECT_VAR_DECL(expected0,int,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
0xf4, 0xf5, 0xf6, 0xf7,
0xfff2, 0xfff3,
0xfff4, 0xfff5,
0xfff6, 0xfff7 };
+#if defined (FP16_SUPPORTED)
+VECT_VAR_DECL (expected0, hfloat, 16, 8) [] = { 0xcc00, 0xcb80,
+ 0xcb00, 0xca80,
+ 0xca00, 0xc980,
+ 0xc900, 0xc880 };
+#endif
VECT_VAR_DECL(expected0,hfloat,32,4) [] = { 0xc1800000, 0xc1700000,
0xc1600000, 0xc1500000 };
VECT_VAR_DECL(expected1,poly,8,8) [] = { 0x55, 0x55, 0x55, 0x55,
0x55, 0x55, 0x55, 0x55 };
VECT_VAR_DECL(expected1,poly,16,4) [] = { 0x66, 0x66, 0x66, 0x66 };
+#if defined (FP16_SUPPORTED)
+VECT_VAR_DECL (expected1, hfloat, 16, 4) [] = { 0x4b4d, 0x4b4d,
+ 0x4b4d, 0x4b4d };
+#endif
VECT_VAR_DECL(expected1,hfloat,32,2) [] = { 0x42066666, 0x42066666 };
VECT_VAR_DECL(expected1,int,8,16) [] = { 0x11, 0x11, 0x11, 0x11,
0x11, 0x11, 0x11, 0x11,
0x55, 0x55, 0x55, 0x55 };
VECT_VAR_DECL(expected1,poly,16,8) [] = { 0x66, 0x66, 0x66, 0x66,
0x66, 0x66, 0x66, 0x66 };
+#if defined (FP16_SUPPORTED)
+VECT_VAR_DECL (expected1, hfloat, 16, 8) [] = { 0x4b4d, 0x4b4d,
+ 0x4b4d, 0x4b4d,
+ 0x4b4d, 0x4b4d,
+ 0x4b4d, 0x4b4d };
+#endif
VECT_VAR_DECL(expected1,hfloat,32,4) [] = { 0x42073333, 0x42073333,
0x42073333, 0x42073333 };
0xf1, 0xf5, 0x55, 0x55 };
VECT_VAR_DECL(expected0,poly,16,4) [] = { 0xfff0, 0xfff2,
0x66, 0x66 };
+#if defined (FP16_SUPPORTED)
+VECT_VAR_DECL (expected0, hfloat, 16, 4) [] = { 0xcc00, 0xcb00,
+ 0x4b4d, 0x4b4d };
+#endif
VECT_VAR_DECL(expected0,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 };
VECT_VAR_DECL(expected0,int,8,16) [] = { 0xf0, 0xf8, 0x11, 0x11,
0xf1, 0xf9, 0x11, 0x11,
0xf3, 0xfb, 0x55, 0x55 };
VECT_VAR_DECL(expected0,poly,16,8) [] = { 0xfff0, 0xfff4, 0x66, 0x66,
0xfff1, 0xfff5, 0x66, 0x66 };
+#if defined (FP16_SUPPORTED)
+VECT_VAR_DECL (expected0, hfloat, 16, 8) [] = { 0xcc00, 0xca00,
+ 0x4b4d, 0x4b4d,
+ 0xcb80, 0xc980,
+ 0x4b4d, 0x4b4d };
+#endif
VECT_VAR_DECL(expected0,hfloat,32,4) [] = { 0xc1800000, 0xc1600000,
0x42073333, 0x42073333 };
0xf3, 0xf7, 0x55, 0x55 };
VECT_VAR_DECL(expected1,poly,16,4) [] = { 0xfff1, 0xfff3,
0x66, 0x66 };
+#if defined (FP16_SUPPORTED)
+VECT_VAR_DECL (expected1, hfloat, 16, 4) [] = { 0xcb80, 0xca80,
+ 0x4b4d, 0x4b4d };
+#endif
VECT_VAR_DECL(expected1,hfloat,32,2) [] = { 0x42066666, 0x42066666 };
VECT_VAR_DECL(expected1,int,8,16) [] = { 0xf4, 0xfc, 0x11, 0x11,
0xf5, 0xfd, 0x11, 0x11,
0xf7, 0xff, 0x55, 0x55 };
VECT_VAR_DECL(expected1,poly,16,8) [] = { 0xfff2, 0xfff6, 0x66, 0x66,
0xfff3, 0xfff7, 0x66, 0x66 };
+#if defined (FP16_SUPPORTED)
+VECT_VAR_DECL (expected1, hfloat, 16, 8) [] = { 0xcb00, 0xc900,
+ 0x4b4d, 0x4b4d,
+ 0xca80, 0xc880,
+ 0x4b4d, 0x4b4d };
+#endif
VECT_VAR_DECL(expected1,hfloat,32,4) [] = { 0xc1700000, 0xc1500000,
0x42073333, 0x42073333 };