From: James Greenhalgh Date: Fri, 9 Aug 2013 09:28:51 +0000 (+0000) Subject: [AArch64] Fixup the vget_lane RTL patterns and intrinsics X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=66adb8eb440d94f72f9973f63b1aac722eb1201d;p=gcc.git [AArch64] Fixup the vget_lane RTL patterns and intrinsics gcc/ * config/aarch64/aarch64-simd-builtins.def (get_lane_signed): Remove. (get_lane_unsigned): Likewise. (dup_lane_scalar): Likewise. (get_lane): enable for VALL. * config/aarch64/aarch64-simd.md (aarch64_dup_lane_scalar): Remove. (aarch64_get_lane_signed): Likewise. (aarch64_get_lane_unsigned): Likewise. (aarch64_get_lane_extend): New. (aarch64_get_lane_zero_extendsi): Likewise. (aarch64_get_lane): Enable for all vector modes. (aarch64_get_lanedi): Remove misleading constraints. * config/aarch64/arm_neon.h (__aarch64_vget_lane_any): Define. (__aarch64_vget_lane_<8,16,32,64>): Likewise. (vget_lane_<8,16,32,64>): Use __aarch64_vget_lane macros. (vdup_lane_<8,16,32,64>): Likewise. * config/aarch64/iterators.md (VDQQH): New. (VDQQHS): Likewise. (vwcore): Likewise. gcc/testsuite/ * gcc.target/aarch64/scalar_intrinsics.c: Update expected output of vdup intrinsics. From-SVN: r201624 --- diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 8ad53a5be45..8d80204803e 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,26 @@ +2013-08-09 James Greenhalgh + + * config/aarch64/aarch64-simd-builtins.def (get_lane_signed): Remove. + (get_lane_unsigned): Likewise. + (dup_lane_scalar): Likewise. + (get_lane): enable for VALL. + * config/aarch64/aarch64-simd.md + (aarch64_dup_lane_scalar): Remove. + (aarch64_get_lane_signed): Likewise. + (aarch64_get_lane_unsigned): Likewise. + (aarch64_get_lane_extend): New. + (aarch64_get_lane_zero_extendsi): Likewise. + (aarch64_get_lane): Enable for all vector modes. + (aarch64_get_lanedi): Remove misleading constraints. + * config/aarch64/arm_neon.h + (__aarch64_vget_lane_any): Define. + (__aarch64_vget_lane_<8,16,32,64>): Likewise. + (vget_lane_<8,16,32,64>): Use __aarch64_vget_lane macros. + (vdup_lane_<8,16,32,64>): Likewise. + * config/aarch64/iterators.md (VDQQH): New. + (VDQQHS): Likewise. + (vwcore): Likewise. + 2013-08-09 Eric Botcazou * configure.ac: Add GAS check for LEON instructions on SPARC. diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def index 55dead6e404..4046d7a7001 100644 --- a/gcc/config/aarch64/aarch64-simd-builtins.def +++ b/gcc/config/aarch64/aarch64-simd-builtins.def @@ -40,10 +40,6 @@ 10 - CODE_FOR_. */ BUILTIN_VD_RE (CREATE, create, 0) - BUILTIN_VQ_S (GETLANE, get_lane_signed, 0) - BUILTIN_VDQ (GETLANE, get_lane_unsigned, 0) - BUILTIN_VDQF (GETLANE, get_lane, 0) - VAR1 (GETLANE, get_lane, 0, di) BUILTIN_VDC (COMBINE, combine, 0) BUILTIN_VB (BINOP, pmul, 0) BUILTIN_VDQF (UNOP, sqrt, 2) @@ -51,6 +47,9 @@ VAR1 (UNOP, addp, 0, di) VAR1 (UNOP, clz, 2, v4si) + BUILTIN_VALL (GETLANE, get_lane, 0) + VAR1 (GETLANE, get_lane, 0, di) + BUILTIN_VD_RE (REINTERP, reinterpretdi, 0) BUILTIN_VDC (REINTERP, reinterpretv8qi, 0) BUILTIN_VDC (REINTERP, reinterpretv4hi, 0) @@ -64,7 +63,6 @@ BUILTIN_VQ (REINTERP, reinterpretv2df, 0) BUILTIN_VDQ_I (BINOP, dup_lane, 0) - BUILTIN_VDQ_I (BINOP, dup_lane_scalar, 0) /* Implemented by aarch64_qshl. */ BUILTIN_VSDQ_I (BINOP, sqshl, 0) BUILTIN_VSDQ_I (BINOP, uqshl, 0) diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index 3c76032499d..982373099f7 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -357,20 +357,6 @@ (set_attr "simd_mode" "")] ) -(define_insn "aarch64_dup_lane_scalar" - [(set (match_operand: 0 "register_operand" "=w, r") - (vec_select: - (match_operand:VDQ 1 "register_operand" "w, w") - (parallel [(match_operand:SI 2 "immediate_operand" "i, i")]) - ))] - "TARGET_SIMD" - "@ - dup\\t%0, %1.[%2] - umov\\t%0, %1.[%2]" - [(set_attr "simd_type" "simd_dup, simd_movgp") - (set_attr "simd_mode" "")] -) - (define_insn "aarch64_simd_dup" [(set (match_operand:VDQF 0 "register_operand" "=w") (vec_duplicate:VDQF (match_operand: 1 "register_operand" "w")))] @@ -2147,45 +2133,50 @@ DONE; }) -(define_insn "aarch64_get_lane_signed" - [(set (match_operand: 0 "register_operand" "=r") - (sign_extend: +;; Lane extraction with sign extension to general purpose register. +(define_insn "*aarch64_get_lane_extend" + [(set (match_operand:GPI 0 "register_operand" "=r") + (sign_extend:GPI (vec_select: - (match_operand:VQ_S 1 "register_operand" "w") + (match_operand:VDQQH 1 "register_operand" "w") (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))] "TARGET_SIMD" - "smov\\t%0, %1.[%2]" + "smov\\t%0, %1.[%2]" [(set_attr "simd_type" "simd_movgp") - (set_attr "simd_mode" "")] + (set_attr "simd_mode" "")] ) -(define_insn "aarch64_get_lane_unsigned" - [(set (match_operand: 0 "register_operand" "=r") - (zero_extend: +(define_insn "*aarch64_get_lane_zero_extendsi" + [(set (match_operand:SI 0 "register_operand" "=r") + (zero_extend:SI (vec_select: - (match_operand:VDQ 1 "register_operand" "w") + (match_operand:VDQQH 1 "register_operand" "w") (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))] "TARGET_SIMD" - "umov\\t%0, %1.[%2]" + "umov\\t%w0, %1.[%2]" [(set_attr "simd_type" "simd_movgp") (set_attr "simd_mode" "")] ) +;; Lane extraction of a value, neither sign nor zero extension +;; is guaranteed so upper bits should be considered undefined. (define_insn "aarch64_get_lane" - [(set (match_operand: 0 "register_operand" "=w") + [(set (match_operand: 0 "register_operand" "=r, w") (vec_select: - (match_operand:VDQF 1 "register_operand" "w") - (parallel [(match_operand:SI 2 "immediate_operand" "i")])))] + (match_operand:VALL 1 "register_operand" "w, w") + (parallel [(match_operand:SI 2 "immediate_operand" "i, i")])))] "TARGET_SIMD" - "mov\\t%0.[0], %1.[%2]" - [(set_attr "simd_type" "simd_ins") + "@ + umov\\t%0, %1.[%2] + dup\\t%0, %1.[%2]" + [(set_attr "simd_type" "simd_movgp, simd_dup") (set_attr "simd_mode" "")] ) (define_expand "aarch64_get_lanedi" - [(match_operand:DI 0 "register_operand" "=r") - (match_operand:DI 1 "register_operand" "w") - (match_operand:SI 2 "immediate_operand" "i")] + [(match_operand:DI 0 "register_operand") + (match_operand:DI 1 "register_operand") + (match_operand:SI 2 "immediate_operand")] "TARGET_SIMD" { aarch64_simd_lane_bounds (operands[2], 0, 1); diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h index 99cf123e29e..73a5400831d 100644 --- a/gcc/config/aarch64/arm_neon.h +++ b/gcc/config/aarch64/arm_neon.h @@ -446,7 +446,66 @@ typedef struct poly16x8x4_t poly16x8_t val[4]; } poly16x8x4_t; - +/* vget_lane internal macros. */ + +#define __aarch64_vget_lane_any(__size, __cast_ret, __cast_a, __a, __b) \ + (__cast_ret \ + __builtin_aarch64_get_lane##__size (__cast_a __a, __b)) + +#define __aarch64_vget_lane_f32(__a, __b) \ + __aarch64_vget_lane_any (v2sf, , , __a, __b) +#define __aarch64_vget_lane_f64(__a, __b) (__a) + +#define __aarch64_vget_lane_p8(__a, __b) \ + __aarch64_vget_lane_any (v8qi, (poly8_t), (int8x8_t), __a, __b) +#define __aarch64_vget_lane_p16(__a, __b) \ + __aarch64_vget_lane_any (v4hi, (poly16_t), (int16x4_t), __a, __b) + +#define __aarch64_vget_lane_s8(__a, __b) \ + __aarch64_vget_lane_any (v8qi, , ,__a, __b) +#define __aarch64_vget_lane_s16(__a, __b) \ + __aarch64_vget_lane_any (v4hi, , ,__a, __b) +#define __aarch64_vget_lane_s32(__a, __b) \ + __aarch64_vget_lane_any (v2si, , ,__a, __b) +#define __aarch64_vget_lane_s64(__a, __b) (__a) + +#define __aarch64_vget_lane_u8(__a, __b) \ + __aarch64_vget_lane_any (v8qi, (uint8_t), (int8x8_t), __a, __b) +#define __aarch64_vget_lane_u16(__a, __b) \ + __aarch64_vget_lane_any (v4hi, (uint16_t), (int16x4_t), __a, __b) +#define __aarch64_vget_lane_u32(__a, __b) \ + __aarch64_vget_lane_any (v2si, (uint32_t), (int32x2_t), __a, __b) +#define __aarch64_vget_lane_u64(__a, __b) (__a) + +#define __aarch64_vgetq_lane_f32(__a, __b) \ + __aarch64_vget_lane_any (v4sf, , , __a, __b) +#define __aarch64_vgetq_lane_f64(__a, __b) \ + __aarch64_vget_lane_any (v2df, , , __a, __b) + +#define __aarch64_vgetq_lane_p8(__a, __b) \ + __aarch64_vget_lane_any (v16qi, (poly8_t), (int8x16_t), __a, __b) +#define __aarch64_vgetq_lane_p16(__a, __b) \ + __aarch64_vget_lane_any (v8hi, (poly16_t), (int16x8_t), __a, __b) + +#define __aarch64_vgetq_lane_s8(__a, __b) \ + __aarch64_vget_lane_any (v16qi, , ,__a, __b) +#define __aarch64_vgetq_lane_s16(__a, __b) \ + __aarch64_vget_lane_any (v8hi, , ,__a, __b) +#define __aarch64_vgetq_lane_s32(__a, __b) \ + __aarch64_vget_lane_any (v4si, , ,__a, __b) +#define __aarch64_vgetq_lane_s64(__a, __b) \ + __aarch64_vget_lane_any (v2di, , ,__a, __b) + +#define __aarch64_vgetq_lane_u8(__a, __b) \ + __aarch64_vget_lane_any (v16qi, (uint8_t), (int8x16_t), __a, __b) +#define __aarch64_vgetq_lane_u16(__a, __b) \ + __aarch64_vget_lane_any (v8hi, (uint16_t), (int16x8_t), __a, __b) +#define __aarch64_vgetq_lane_u32(__a, __b) \ + __aarch64_vget_lane_any (v4si, (uint32_t), (int32x4_t), __a, __b) +#define __aarch64_vgetq_lane_u64(__a, __b) \ + __aarch64_vget_lane_any (v2di, (uint64_t), (int64x2_t), __a, __b) + +/* vadd */ __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) vadd_s8 (int8x8_t __a, int8x8_t __b) { @@ -2307,155 +2366,156 @@ vcreate_p16 (uint64_t __a) return (poly16x4_t) __a; } +/* vget_lane */ + +__extension__ static __inline float32_t __attribute__ ((__always_inline__)) +vget_lane_f32 (float32x2_t __a, const int __b) +{ + return __aarch64_vget_lane_f32 (__a, __b); +} + +__extension__ static __inline float64_t __attribute__ ((__always_inline__)) +vget_lane_f64 (float64x1_t __a, const int __b) +{ + return __aarch64_vget_lane_f64 (__a, __b); +} + +__extension__ static __inline poly8_t __attribute__ ((__always_inline__)) +vget_lane_p8 (poly8x8_t __a, const int __b) +{ + return __aarch64_vget_lane_p8 (__a, __b); +} + +__extension__ static __inline poly16_t __attribute__ ((__always_inline__)) +vget_lane_p16 (poly16x4_t __a, const int __b) +{ + return __aarch64_vget_lane_p16 (__a, __b); +} + __extension__ static __inline int8_t __attribute__ ((__always_inline__)) vget_lane_s8 (int8x8_t __a, const int __b) { - return (int8_t) __builtin_aarch64_get_lane_signedv8qi (__a, __b); + return __aarch64_vget_lane_s8 (__a, __b); } __extension__ static __inline int16_t __attribute__ ((__always_inline__)) vget_lane_s16 (int16x4_t __a, const int __b) { - return (int16_t) __builtin_aarch64_get_lane_signedv4hi (__a, __b); + return __aarch64_vget_lane_s16 (__a, __b); } __extension__ static __inline int32_t __attribute__ ((__always_inline__)) vget_lane_s32 (int32x2_t __a, const int __b) { - return (int32_t) __builtin_aarch64_get_lane_signedv2si (__a, __b); + return __aarch64_vget_lane_s32 (__a, __b); } -__extension__ static __inline float32_t __attribute__ ((__always_inline__)) -vget_lane_f32 (float32x2_t __a, const int __b) +__extension__ static __inline int64_t __attribute__ ((__always_inline__)) +vget_lane_s64 (int64x1_t __a, const int __b) { - return (float32_t) __builtin_aarch64_get_lanev2sf (__a, __b); + return __aarch64_vget_lane_s64 (__a, __b); } __extension__ static __inline uint8_t __attribute__ ((__always_inline__)) vget_lane_u8 (uint8x8_t __a, const int __b) { - return (uint8_t) __builtin_aarch64_get_lane_unsignedv8qi ((int8x8_t) __a, - __b); + return __aarch64_vget_lane_u8 (__a, __b); } __extension__ static __inline uint16_t __attribute__ ((__always_inline__)) vget_lane_u16 (uint16x4_t __a, const int __b) { - return (uint16_t) __builtin_aarch64_get_lane_unsignedv4hi ((int16x4_t) __a, - __b); + return __aarch64_vget_lane_u16 (__a, __b); } __extension__ static __inline uint32_t __attribute__ ((__always_inline__)) vget_lane_u32 (uint32x2_t __a, const int __b) { - return (uint32_t) __builtin_aarch64_get_lane_unsignedv2si ((int32x2_t) __a, - __b); + return __aarch64_vget_lane_u32 (__a, __b); } -__extension__ static __inline poly8_t __attribute__ ((__always_inline__)) -vget_lane_p8 (poly8x8_t __a, const int __b) +__extension__ static __inline uint64_t __attribute__ ((__always_inline__)) +vget_lane_u64 (uint64x1_t __a, const int __b) { - return (poly8_t) __builtin_aarch64_get_lane_unsignedv8qi ((int8x8_t) __a, - __b); + return __aarch64_vget_lane_u64 (__a, __b); } -__extension__ static __inline poly16_t __attribute__ ((__always_inline__)) -vget_lane_p16 (poly16x4_t __a, const int __b) +/* vgetq_lane */ + +__extension__ static __inline float32_t __attribute__ ((__always_inline__)) +vgetq_lane_f32 (float32x4_t __a, const int __b) { - return (poly16_t) __builtin_aarch64_get_lane_unsignedv4hi ((int16x4_t) __a, - __b); + return __aarch64_vgetq_lane_f32 (__a, __b); } -__extension__ static __inline int64_t __attribute__ ((__always_inline__)) -vget_lane_s64 (int64x1_t __a, const int __b) +__extension__ static __inline float64_t __attribute__ ((__always_inline__)) +vgetq_lane_f64 (float64x2_t __a, const int __b) { - return (int64_t) __builtin_aarch64_get_lanedi (__a, __b); + return __aarch64_vgetq_lane_f64 (__a, __b); } -__extension__ static __inline uint64_t __attribute__ ((__always_inline__)) -vget_lane_u64 (uint64x1_t __a, const int __b) +__extension__ static __inline poly8_t __attribute__ ((__always_inline__)) +vgetq_lane_p8 (poly8x16_t __a, const int __b) +{ + return __aarch64_vgetq_lane_p8 (__a, __b); +} + +__extension__ static __inline poly16_t __attribute__ ((__always_inline__)) +vgetq_lane_p16 (poly16x8_t __a, const int __b) { - return (uint64_t) __builtin_aarch64_get_lanedi ((int64x1_t) __a, __b); + return __aarch64_vgetq_lane_p16 (__a, __b); } __extension__ static __inline int8_t __attribute__ ((__always_inline__)) vgetq_lane_s8 (int8x16_t __a, const int __b) { - return (int8_t) __builtin_aarch64_get_lane_signedv16qi (__a, __b); + return __aarch64_vgetq_lane_s8 (__a, __b); } __extension__ static __inline int16_t __attribute__ ((__always_inline__)) vgetq_lane_s16 (int16x8_t __a, const int __b) { - return (int16_t) __builtin_aarch64_get_lane_signedv8hi (__a, __b); + return __aarch64_vgetq_lane_s16 (__a, __b); } __extension__ static __inline int32_t __attribute__ ((__always_inline__)) vgetq_lane_s32 (int32x4_t __a, const int __b) { - return (int32_t) __builtin_aarch64_get_lane_signedv4si (__a, __b); -} - -__extension__ static __inline float32_t __attribute__ ((__always_inline__)) -vgetq_lane_f32 (float32x4_t __a, const int __b) -{ - return (float32_t) __builtin_aarch64_get_lanev4sf (__a, __b); + return __aarch64_vgetq_lane_s32 (__a, __b); } -__extension__ static __inline float64_t __attribute__ ((__always_inline__)) -vgetq_lane_f64 (float64x2_t __a, const int __b) +__extension__ static __inline int64_t __attribute__ ((__always_inline__)) +vgetq_lane_s64 (int64x2_t __a, const int __b) { - return (float64_t) __builtin_aarch64_get_lanev2df (__a, __b); + return __aarch64_vgetq_lane_s64 (__a, __b); } __extension__ static __inline uint8_t __attribute__ ((__always_inline__)) vgetq_lane_u8 (uint8x16_t __a, const int __b) { - return (uint8_t) __builtin_aarch64_get_lane_unsignedv16qi ((int8x16_t) __a, - __b); + return __aarch64_vgetq_lane_u8 (__a, __b); } __extension__ static __inline uint16_t __attribute__ ((__always_inline__)) vgetq_lane_u16 (uint16x8_t __a, const int __b) { - return (uint16_t) __builtin_aarch64_get_lane_unsignedv8hi ((int16x8_t) __a, - __b); + return __aarch64_vgetq_lane_u16 (__a, __b); } __extension__ static __inline uint32_t __attribute__ ((__always_inline__)) vgetq_lane_u32 (uint32x4_t __a, const int __b) { - return (uint32_t) __builtin_aarch64_get_lane_unsignedv4si ((int32x4_t) __a, - __b); -} - -__extension__ static __inline poly8_t __attribute__ ((__always_inline__)) -vgetq_lane_p8 (poly8x16_t __a, const int __b) -{ - return (poly8_t) __builtin_aarch64_get_lane_unsignedv16qi ((int8x16_t) __a, - __b); -} - -__extension__ static __inline poly16_t __attribute__ ((__always_inline__)) -vgetq_lane_p16 (poly16x8_t __a, const int __b) -{ - return (poly16_t) __builtin_aarch64_get_lane_unsignedv8hi ((int16x8_t) __a, - __b); -} - -__extension__ static __inline int64_t __attribute__ ((__always_inline__)) -vgetq_lane_s64 (int64x2_t __a, const int __b) -{ - return __builtin_aarch64_get_lane_unsignedv2di (__a, __b); + return __aarch64_vgetq_lane_u32 (__a, __b); } __extension__ static __inline uint64_t __attribute__ ((__always_inline__)) vgetq_lane_u64 (uint64x2_t __a, const int __b) { - return (uint64_t) __builtin_aarch64_get_lane_unsignedv2di ((int64x2_t) __a, - __b); + return __aarch64_vgetq_lane_u64 (__a, __b); } +/* vreinterpret */ + __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) vreinterpret_p8_s8 (int8x8_t __a) { @@ -6724,18 +6784,6 @@ vget_high_u64 (uint64x2_t a) return result; } -#define vget_lane_f64(a, b) \ - __extension__ \ - ({ \ - float64x1_t a_ = (a); \ - float64_t result; \ - __asm__ ("umov %x0, %1.d[%2]" \ - : "=r"(result) \ - : "w"(a_), "i"(b) \ - : /* No clobbers */); \ - result; \ - }) - __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) vget_low_f32 (float32x4_t a) { @@ -19732,49 +19780,49 @@ vcvtpq_u64_f64 (float64x2_t __a) __extension__ static __inline int8x1_t __attribute__ ((__always_inline__)) vdupb_lane_s8 (int8x16_t a, int const b) { - return __builtin_aarch64_dup_lane_scalarv16qi (a, b); + return __aarch64_vget_laneq_s8 (a, b); } __extension__ static __inline uint8x1_t __attribute__ ((__always_inline__)) vdupb_lane_u8 (uint8x16_t a, int const b) { - return (uint8x1_t) __builtin_aarch64_dup_lane_scalarv16qi ((int8x16_t) a, b); + return __aarch64_vget_laneq_u8 (a, b); } __extension__ static __inline int16x1_t __attribute__ ((__always_inline__)) vduph_lane_s16 (int16x8_t a, int const b) { - return __builtin_aarch64_dup_lane_scalarv8hi (a, b); + return __aarch64_vget_laneq_s16 (a, b); } __extension__ static __inline uint16x1_t __attribute__ ((__always_inline__)) vduph_lane_u16 (uint16x8_t a, int const b) { - return (uint16x1_t) __builtin_aarch64_dup_lane_scalarv8hi ((int16x8_t) a, b); + return __aarch64_vget_laneq_u16 (a, b); } __extension__ static __inline int32x1_t __attribute__ ((__always_inline__)) vdups_lane_s32 (int32x4_t a, int const b) { - return __builtin_aarch64_dup_lane_scalarv4si (a, b); + return __aarch64_vget_laneq_s32 (a, b); } __extension__ static __inline uint32x1_t __attribute__ ((__always_inline__)) vdups_lane_u32 (uint32x4_t a, int const b) { - return (uint32x1_t) __builtin_aarch64_dup_lane_scalarv4si ((int32x4_t) a, b); + return __aarch64_vget_laneq_u32 (a, b); } __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) vdupd_lane_s64 (int64x2_t a, int const b) { - return __builtin_aarch64_dup_lane_scalarv2di (a, b); + return __aarch64_vget_laneq_s64 (a, b); } __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) vdupd_lane_u64 (uint64x2_t a, int const b) { - return (uint64x1_t) __builtin_aarch64_dup_lane_scalarv2di ((int64x2_t) a, b); + return __aarch64_vget_laneq_s64 (a, b); } /* vld1 */ @@ -25581,4 +25629,31 @@ __INTERLEAVE_LIST (zip) /* End of optimal implementations in approved order. */ +#undef __aarch64_vget_lane_any +#undef __aarch64_vget_lane_f32 +#undef __aarch64_vget_lane_f64 +#undef __aarch64_vget_lane_p8 +#undef __aarch64_vget_lane_p16 +#undef __aarch64_vget_lane_s8 +#undef __aarch64_vget_lane_s16 +#undef __aarch64_vget_lane_s32 +#undef __aarch64_vget_lane_s64 +#undef __aarch64_vget_lane_u8 +#undef __aarch64_vget_lane_u16 +#undef __aarch64_vget_lane_u32 +#undef __aarch64_vget_lane_u64 + +#undef __aarch64_vgetq_lane_f32 +#undef __aarch64_vgetq_lane_f64 +#undef __aarch64_vgetq_lane_p8 +#undef __aarch64_vgetq_lane_p16 +#undef __aarch64_vgetq_lane_s8 +#undef __aarch64_vgetq_lane_s16 +#undef __aarch64_vgetq_lane_s32 +#undef __aarch64_vgetq_lane_s64 +#undef __aarch64_vgetq_lane_u8 +#undef __aarch64_vgetq_lane_u16 +#undef __aarch64_vgetq_lane_u32 +#undef __aarch64_vgetq_lane_u64 + #endif diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md index 3ec889f28fd..37b6cbc8dc8 100644 --- a/gcc/config/aarch64/iterators.md +++ b/gcc/config/aarch64/iterators.md @@ -134,9 +134,15 @@ ;; Vector modes except double int. (define_mode_iterator VDQIF [V8QI V16QI V4HI V8HI V2SI V4SI V2SF V4SF V2DF]) +;; Vector modes for Q and H types. +(define_mode_iterator VDQQH [V8QI V16QI V4HI V8HI]) + ;; Vector modes for H and S types. (define_mode_iterator VDQHS [V4HI V8HI V2SI V4SI]) +;; Vector modes for Q, H and S types. +(define_mode_iterator VDQQHS [V8QI V16QI V4HI V8HI V2SI V4SI]) + ;; Vector and scalar integer modes for H and S (define_mode_iterator VSDQ_HSI [V4HI V8HI V2SI V4SI HI SI]) @@ -453,6 +459,15 @@ (V2SF "s") (V4SF "s") (V2DF "d")]) +;; Corresponding core element mode for each vector mode. This is a +;; variation on mapping FP modes to GP regs. +(define_mode_attr vwcore [(V8QI "w") (V16QI "w") + (V4HI "w") (V8HI "w") + (V2SI "w") (V4SI "w") + (DI "x") (V2DI "x") + (V2SF "w") (V4SF "w") + (V2DF "x")]) + ;; Double vector types for ALLX. (define_mode_attr Vallxd [(QI "8b") (HI "4h") (SI "2s")]) diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index a777c7d5eae..1e682b96fd8 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,8 @@ +2013-08-09 James Greenhalgh + + * gcc.target/aarch64/scalar_intrinsics.c: Update expected + output of vdup intrinsics. + 2013-08-09 Zhenqiang Chen * gcc.target/arm/lp1189445.c: New testcase. diff --git a/gcc/testsuite/gcc.target/aarch64/scalar_intrinsics.c b/gcc/testsuite/gcc.target/aarch64/scalar_intrinsics.c index 3d902f6342d..d84bfeb55e9 100644 --- a/gcc/testsuite/gcc.target/aarch64/scalar_intrinsics.c +++ b/gcc/testsuite/gcc.target/aarch64/scalar_intrinsics.c @@ -193,7 +193,7 @@ test_vcltzd_s64 (int64x1_t a) return res; } -/* { dg-final { scan-assembler-times "aarch64_dup_lane_scalarv16qi" 2 } } */ +/* { dg-final { scan-assembler-times "aarch64_get_lanev16qi" 2 } } */ int8x1_t test_vdupb_lane_s8 (int8x16_t a) @@ -207,7 +207,7 @@ test_vdupb_lane_u8 (uint8x16_t a) return vdupb_lane_u8 (a, 2); } -/* { dg-final { scan-assembler-times "aarch64_dup_lane_scalarv8hi" 2 } } */ +/* { dg-final { scan-assembler-times "aarch64_get_lanev8hi" 2 } } */ int16x1_t test_vduph_lane_s16 (int16x8_t a) @@ -221,7 +221,7 @@ test_vduph_lane_u16 (uint16x8_t a) return vduph_lane_u16 (a, 2); } -/* { dg-final { scan-assembler-times "aarch64_dup_lane_scalarv4si" 2 } } */ +/* { dg-final { scan-assembler-times "aarch64_get_lanev4si" 2 } } */ int32x1_t test_vdups_lane_s32 (int32x4_t a) @@ -235,7 +235,7 @@ test_vdups_lane_u32 (uint32x4_t a) return vdups_lane_u32 (a, 2); } -/* { dg-final { scan-assembler-times "aarch64_dup_lane_scalarv2di" 2 } } */ +/* { dg-final { scan-assembler-times "aarch64_get_lanev2di" 2 } } */ int64x1_t test_vdupd_lane_s64 (int64x2_t a)