From 467e6f1bb18878c330e1dc88888b133e85524c46 Mon Sep 17 00:00:00 2001 From: Kugan Vivekanandarajah Date: Wed, 27 Dec 2017 11:47:45 +0000 Subject: [PATCH] aarch64-simd.md (aarch64_ld1x2): New. gcc/ChangeLog: 2017-12-27 Kugan Vivekanandarajah * config/aarch64/aarch64-simd.md (aarch64_ld1x2): New. (aarch64_ld1x2): Likewise. (aarch64_simd_ld1_x2): Likewise. (aarch64_simd_ld1_x2): Likewise. * config/aarch64/arm_neon.h (vld1_u8_x2): New. (vld1_s8_x2): Likewise. (vld1_u16_x2): Likewise. (vld1_s16_x2): Likewise. (vld1_u32_x2): Likewise. (vld1_s32_x2): Likewise. (vld1_u64_x2): Likewise. (vld1_s64_x2): Likewise. (vld1_f16_x2): Likewise. (vld1_f32_x2): Likewise. (vld1_f64_x2): Likewise. (vld1_p8_x2): Likewise. (vld1_p16_x2): Likewise. (vld1_p64_x2): Likewise. (vld1q_u8_x2): Likewise. (vld1q_s8_x2): Likewise. (vld1q_u16_x2): Likewise. (vld1q_s16_x2): Likewise. (vld1q_u32_x2): Likewise. (vld1q_s32_x2): Likewise. (vld1q_u64_x2): Likewise. (vld1q_s64_x2): Likewise. (vld1q_f16_x2): Likewise. (vld1q_f32_x2): Likewise. (vld1q_f64_x2): Likewise. (vld1q_p8_x2): Likewise. (vld1q_p16_x2): Likewise. (vld1q_p64_x2): Likewise. gcc/testsuite/ChangeLog: 2017-12-27 Kugan Vivekanandarajah * gcc.target/aarch64/advsimd-intrinsics/vld1x2.c: New test. From-SVN: r256010 --- gcc/ChangeLog | 35 ++ gcc/config/aarch64/aarch64-simd-builtins.def | 6 +- gcc/config/aarch64/aarch64-simd.md | 48 +++ gcc/config/aarch64/arm_neon.h | 336 ++++++++++++++++++ gcc/testsuite/ChangeLog | 4 + .../aarch64/advsimd-intrinsics/vld1x2.c | 71 ++++ 6 files changed, 499 insertions(+), 1 deletion(-) create mode 100644 gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld1x2.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 5747358acef..4130edafea6 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,38 @@ +2017-12-27 Kugan Vivekanandarajah + + * config/aarch64/aarch64-simd.md (aarch64_ld1x2): New. + (aarch64_ld1x2): Likewise. + (aarch64_simd_ld1_x2): Likewise. + (aarch64_simd_ld1_x2): Likewise. + * config/aarch64/arm_neon.h (vld1_u8_x2): New. + (vld1_s8_x2): Likewise. + (vld1_u16_x2): Likewise. + (vld1_s16_x2): Likewise. + (vld1_u32_x2): Likewise. + (vld1_s32_x2): Likewise. + (vld1_u64_x2): Likewise. + (vld1_s64_x2): Likewise. + (vld1_f16_x2): Likewise. + (vld1_f32_x2): Likewise. + (vld1_f64_x2): Likewise. + (vld1_p8_x2): Likewise. + (vld1_p16_x2): Likewise. + (vld1_p64_x2): Likewise. + (vld1q_u8_x2): Likewise. + (vld1q_s8_x2): Likewise. + (vld1q_u16_x2): Likewise. + (vld1q_s16_x2): Likewise. + (vld1q_u32_x2): Likewise. + (vld1q_s32_x2): Likewise. + (vld1q_u64_x2): Likewise. + (vld1q_s64_x2): Likewise. + (vld1q_f16_x2): Likewise. + (vld1q_f32_x2): Likewise. + (vld1q_f64_x2): Likewise. + (vld1q_p8_x2): Likewise. + (vld1q_p16_x2): Likewise. + (vld1q_p64_x2): Likewise. + 2017-12-27 Martin Liska PR tree-optimization/83552 diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def index 52d01342372..94afff94f7c 100644 --- a/gcc/config/aarch64/aarch64-simd-builtins.def +++ b/gcc/config/aarch64/aarch64-simd-builtins.def @@ -86,6 +86,10 @@ VAR1 (SETREGP, set_qregoi, 0, v2di) VAR1 (SETREGP, set_qregci, 0, v2di) VAR1 (SETREGP, set_qregxi, 0, v2di) + /* Implemented by aarch64_ld1x2. */ + BUILTIN_VQ (LOADSTRUCT, ld1x2, 0) + /* Implemented by aarch64_ld1x2. */ + BUILTIN_VDC (LOADSTRUCT, ld1x2, 0) /* Implemented by aarch64_ld. */ BUILTIN_VDC (LOADSTRUCT, ld2, 0) BUILTIN_VDC (LOADSTRUCT, ld3, 0) @@ -571,4 +575,4 @@ BUILTIN_GPI (UNOP, fix_truncdf, 2) BUILTIN_GPI_I16 (UNOPUS, fixuns_trunchf, 2) BUILTIN_GPI (UNOPUS, fixuns_truncsf, 2) - BUILTIN_GPI (UNOPUS, fixuns_truncdf, 2) \ No newline at end of file + BUILTIN_GPI (UNOPUS, fixuns_truncdf, 2) diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index f90f74fe7fd..84c4f8286c0 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -5296,6 +5296,33 @@ DONE; }) +(define_expand "aarch64_ld1x2" + [(match_operand:OI 0 "register_operand" "=w") + (match_operand:DI 1 "register_operand" "r") + (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + "TARGET_SIMD" +{ + machine_mode mode = OImode; + rtx mem = gen_rtx_MEM (mode, operands[1]); + + emit_insn (gen_aarch64_simd_ld1_x2 (operands[0], mem)); + DONE; +}) + +(define_expand "aarch64_ld1x2" + [(match_operand:OI 0 "register_operand" "=w") + (match_operand:DI 1 "register_operand" "r") + (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + "TARGET_SIMD" +{ + machine_mode mode = OImode; + rtx mem = gen_rtx_MEM (mode, operands[1]); + + emit_insn (gen_aarch64_simd_ld1_x2 (operands[0], mem)); + DONE; +}) + + (define_expand "aarch64_ld_lane" [(match_operand:VSTRUCT 0 "register_operand" "=w") (match_operand:DI 1 "register_operand" "w") @@ -5692,6 +5719,27 @@ [(set_attr "type" "neon_load1_all_lanes")] ) +(define_insn "aarch64_simd_ld1_x2" + [(set (match_operand:OI 0 "register_operand" "=w") + (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv") + (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + UNSPEC_LD1))] + "TARGET_SIMD" + "ld1\\t{%S0. - %T0.}, %1" + [(set_attr "type" "neon_load1_2reg")] +) + +(define_insn "aarch64_simd_ld1_x2" + [(set (match_operand:OI 0 "register_operand" "=w") + (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv") + (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + UNSPEC_LD1))] + "TARGET_SIMD" + "ld1\\t{%S0. - %T0.}, %1" + [(set_attr "type" "neon_load1_2reg")] +) + + (define_insn "aarch64_frecpe" [(set (match_operand:VHSDF 0 "register_operand" "=w") (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")] diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h index 96e740f91a7..6d6ba4fae3e 100644 --- a/gcc/config/aarch64/arm_neon.h +++ b/gcc/config/aarch64/arm_neon.h @@ -17228,6 +17228,342 @@ vld1q_u8 (const uint8_t *a) __builtin_aarch64_ld1v16qi ((const __builtin_aarch64_simd_qi *) a); } +__extension__ extern __inline uint8x8x2_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vld1_u8_x2 (const uint8_t *__a) +{ + uint8x8x2_t ret; + __builtin_aarch64_simd_oi __o; + __o = __builtin_aarch64_ld1x2v8qi ((const __builtin_aarch64_simd_qi *) __a); + ret.val[0] = (uint8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 0); + ret.val[1] = (uint8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 1); + return ret; +} + +__extension__ extern __inline int8x8x2_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vld1_s8_x2 (const int8_t *__a) +{ + int8x8x2_t ret; + __builtin_aarch64_simd_oi __o; + __o = __builtin_aarch64_ld1x2v8qi ((const __builtin_aarch64_simd_qi *) __a); + ret.val[0] = (int8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 0); + ret.val[1] = (int8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 1); + return ret; +} + +__extension__ extern __inline uint16x4x2_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vld1_u16_x2 (const uint16_t *__a) +{ + uint16x4x2_t ret; + __builtin_aarch64_simd_oi __o; + __o = __builtin_aarch64_ld1x2v4hi ((const __builtin_aarch64_simd_hi *) __a); + ret.val[0] = (uint16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 0); + ret.val[1] = (uint16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 1); + return ret; +} + +__extension__ extern __inline int16x4x2_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vld1_s16_x2 (const int16_t *__a) +{ + int16x4x2_t ret; + __builtin_aarch64_simd_oi __o; + __o = __builtin_aarch64_ld1x2v4hi ((const __builtin_aarch64_simd_hi *) __a); + ret.val[0] = (int16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 0); + ret.val[1] = (int16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 1); + return ret; +} + +__extension__ extern __inline uint32x2x2_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vld1_u32_x2 (const uint32_t *__a) +{ + uint32x2x2_t ret; + __builtin_aarch64_simd_oi __o; + __o = __builtin_aarch64_ld1x2v2si ((const __builtin_aarch64_simd_si *) __a); + ret.val[0] = (uint32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 0); + ret.val[1] = (uint32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 1); + return ret; +} + +__extension__ extern __inline int32x2x2_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vld1_s32_x2 (const int32_t *__a) +{ + int32x2x2_t ret; + __builtin_aarch64_simd_oi __o; + __o = __builtin_aarch64_ld1x2v2si ((const __builtin_aarch64_simd_si *) __a); + ret.val[0] = (int32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 0); + ret.val[1] = (int32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 1); + return ret; +} + +__extension__ extern __inline uint64x1x2_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vld1_u64_x2 (const uint64_t *__a) +{ + uint64x1x2_t ret; + __builtin_aarch64_simd_oi __o; + __o = __builtin_aarch64_ld1x2di ((const __builtin_aarch64_simd_di *) __a); + ret.val[0] = (uint64x1_t) __builtin_aarch64_get_dregoidi (__o, 0); + ret.val[1] = (uint64x1_t) __builtin_aarch64_get_dregoidi (__o, 1); + return ret; +} + +__extension__ extern __inline int64x1x2_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vld1_s64_x2 (const int64_t *__a) +{ + int64x1x2_t ret; + __builtin_aarch64_simd_oi __o; + __o = __builtin_aarch64_ld1x2di ((const __builtin_aarch64_simd_di *) __a); + ret.val[0] = (int64x1_t) __builtin_aarch64_get_dregoidi (__o, 0); + ret.val[1] = (int64x1_t) __builtin_aarch64_get_dregoidi (__o, 1); + return ret; +} + +__extension__ extern __inline float16x4x2_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vld1_f16_x2 (const float16_t *__a) +{ + float16x4x2_t ret; + __builtin_aarch64_simd_oi __o; + __o = __builtin_aarch64_ld1x2v4hf ((const __builtin_aarch64_simd_hf *) __a); + ret.val[0] = (float16x4_t) __builtin_aarch64_get_dregoiv4hf (__o, 0); + ret.val[1] = (float16x4_t) __builtin_aarch64_get_dregoiv4hf (__o, 1); + return ret; +} + +__extension__ extern __inline float32x2x2_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vld1_f32_x2 (const float32_t *__a) +{ + float32x2x2_t ret; + __builtin_aarch64_simd_oi __o; + __o = __builtin_aarch64_ld1x2v2sf ((const __builtin_aarch64_simd_sf *) __a); + ret.val[0] = (float32x2_t) __builtin_aarch64_get_dregoiv2sf (__o, 0); + ret.val[1] = (float32x2_t) __builtin_aarch64_get_dregoiv2sf (__o, 1); + return ret; +} + +__extension__ extern __inline float64x1x2_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vld1_f64_x2 (const float64_t *__a) +{ + float64x1x2_t ret; + __builtin_aarch64_simd_oi __o; + __o = __builtin_aarch64_ld1x2df ((const __builtin_aarch64_simd_df *) __a); + ret.val[0] = (float64x1_t) {__builtin_aarch64_get_dregoidf (__o, 0)}; + ret.val[1] = (float64x1_t) {__builtin_aarch64_get_dregoidf (__o, 1)}; + return ret; +} + +__extension__ extern __inline poly8x8x2_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vld1_p8_x2 (const poly8_t *__a) +{ + poly8x8x2_t ret; + __builtin_aarch64_simd_oi __o; + __o = __builtin_aarch64_ld1x2v8qi ((const __builtin_aarch64_simd_qi *) __a); + ret.val[0] = (poly8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 0); + ret.val[1] = (poly8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 1); + return ret; +} + +__extension__ extern __inline poly16x4x2_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vld1_p16_x2 (const poly16_t *__a) +{ + poly16x4x2_t ret; + __builtin_aarch64_simd_oi __o; + __o = __builtin_aarch64_ld1x2v4hi ((const __builtin_aarch64_simd_hi *) __a); + ret.val[0] = (poly16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 0); + ret.val[1] = (poly16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 1); + return ret; +} + +__extension__ extern __inline poly64x1x2_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vld1_p64_x2 (const poly64_t *__a) +{ + poly64x1x2_t ret; + __builtin_aarch64_simd_oi __o; + __o = __builtin_aarch64_ld1x2di ((const __builtin_aarch64_simd_di *) __a); + ret.val[0] = (poly64x1_t) __builtin_aarch64_get_dregoidi (__o, 0); + ret.val[1] = (poly64x1_t) __builtin_aarch64_get_dregoidi (__o, 1); + return ret; +} + +__extension__ extern __inline uint8x16x2_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vld1q_u8_x2 (const uint8_t *__a) +{ + uint8x16x2_t ret; + __builtin_aarch64_simd_oi __o; + __o = __builtin_aarch64_ld1x2v16qi ((const __builtin_aarch64_simd_qi *) __a); + ret.val[0] = (uint8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 0); + ret.val[1] = (uint8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 1); + return ret; +} + +__extension__ extern __inline int8x16x2_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vld1q_s8_x2 (const int8_t *__a) +{ + int8x16x2_t ret; + __builtin_aarch64_simd_oi __o; + __o = __builtin_aarch64_ld1x2v16qi ((const __builtin_aarch64_simd_qi *) __a); + ret.val[0] = (int8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 0); + ret.val[1] = (int8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 1); + return ret; +} + +__extension__ extern __inline uint16x8x2_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vld1q_u16_x2 (const uint16_t *__a) +{ + uint16x8x2_t ret; + __builtin_aarch64_simd_oi __o; + __o = __builtin_aarch64_ld1x2v8hi ((const __builtin_aarch64_simd_hi *) __a); + ret.val[0] = (uint16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 0); + ret.val[1] = (uint16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 1); + return ret; +} + +__extension__ extern __inline int16x8x2_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vld1q_s16_x2 (const int16_t *__a) +{ + int16x8x2_t ret; + __builtin_aarch64_simd_oi __o; + __o = __builtin_aarch64_ld1x2v8hi ((const __builtin_aarch64_simd_hi *) __a); + ret.val[0] = (int16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 0); + ret.val[1] = (int16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 1); + return ret; +} + +__extension__ extern __inline uint32x4x2_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vld1q_u32_x2 (const uint32_t *__a) +{ + uint32x4x2_t ret; + __builtin_aarch64_simd_oi __o; + __o = __builtin_aarch64_ld1x2v4si ((const __builtin_aarch64_simd_si *) __a); + ret.val[0] = (uint32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 0); + ret.val[1] = (uint32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 1); + return ret; +} + +__extension__ extern __inline int32x4x2_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vld1q_s32_x2 (const int32_t *__a) +{ + int32x4x2_t ret; + __builtin_aarch64_simd_oi __o; + __o = __builtin_aarch64_ld1x2v4si ((const __builtin_aarch64_simd_si *) __a); + ret.val[0] = (int32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 0); + ret.val[1] = (int32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 1); + return ret; +} + +__extension__ extern __inline uint64x2x2_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vld1q_u64_x2 (const uint64_t *__a) +{ + uint64x2x2_t ret; + __builtin_aarch64_simd_oi __o; + __o = __builtin_aarch64_ld1x2v2di ((const __builtin_aarch64_simd_di *) __a); + ret.val[0] = (uint64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 0); + ret.val[1] = (uint64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 1); + return ret; +} + +__extension__ extern __inline int64x2x2_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vld1q_s64_x2 (const int64_t *__a) +{ + int64x2x2_t ret; + __builtin_aarch64_simd_oi __o; + __o = __builtin_aarch64_ld1x2v2di ((const __builtin_aarch64_simd_di *) __a); + ret.val[0] = (int64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 0); + ret.val[1] = (int64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 1); + return ret; +} + +__extension__ extern __inline float16x8x2_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vld1q_f16_x2 (const float16_t *__a) +{ + float16x8x2_t ret; + __builtin_aarch64_simd_oi __o; + __o = __builtin_aarch64_ld1x2v8hf ((const __builtin_aarch64_simd_hf *) __a); + ret.val[0] = (float16x8_t) __builtin_aarch64_get_qregoiv8hf (__o, 0); + ret.val[1] = (float16x8_t) __builtin_aarch64_get_qregoiv8hf (__o, 1); + return ret; +} + +__extension__ extern __inline float32x4x2_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vld1q_f32_x2 (const float32_t *__a) +{ + float32x4x2_t ret; + __builtin_aarch64_simd_oi __o; + __o = __builtin_aarch64_ld1x2v4sf ((const __builtin_aarch64_simd_sf *) __a); + ret.val[0] = (float32x4_t) __builtin_aarch64_get_qregoiv4sf (__o, 0); + ret.val[1] = (float32x4_t) __builtin_aarch64_get_qregoiv4sf (__o, 1); + return ret; +} + +__extension__ extern __inline float64x2x2_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vld1q_f64_x2 (const float64_t *__a) +{ + float64x2x2_t ret; + __builtin_aarch64_simd_oi __o; + __o = __builtin_aarch64_ld1x2v2df ((const __builtin_aarch64_simd_df *) __a); + ret.val[0] = (float64x2_t) __builtin_aarch64_get_qregoiv2df (__o, 0); + ret.val[1] = (float64x2_t) __builtin_aarch64_get_qregoiv2df (__o, 1); + return ret; +} + +__extension__ extern __inline poly8x16x2_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vld1q_p8_x2 (const poly8_t *__a) +{ + poly8x16x2_t ret; + __builtin_aarch64_simd_oi __o; + __o = __builtin_aarch64_ld1x2v16qi ((const __builtin_aarch64_simd_qi *) __a); + ret.val[0] = (poly8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 0); + ret.val[1] = (poly8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 1); + return ret; +} + +__extension__ extern __inline poly16x8x2_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vld1q_p16_x2 (const poly16_t *__a) +{ + poly16x8x2_t ret; + __builtin_aarch64_simd_oi __o; + __o = __builtin_aarch64_ld1x2v8hi ((const __builtin_aarch64_simd_hi *) __a); + ret.val[0] = (poly16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 0); + ret.val[1] = (poly16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 1); + return ret; +} + +__extension__ extern __inline poly64x2x2_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vld1q_p64_x2 (const poly64_t *__a) +{ + poly64x2x2_t ret; + __builtin_aarch64_simd_oi __o; + __o = __builtin_aarch64_ld1x2v2di ((const __builtin_aarch64_simd_di *) __a); + ret.val[0] = (poly64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 0); + ret.val[1] = (poly64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 1); + return ret; +} + __extension__ extern __inline uint16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1q_u16 (const uint16_t *a) diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index c03e6e1a440..53f61f105dd 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,7 @@ +2017-12-27 Kugan Vivekanandarajah + + * gcc.target/aarch64/advsimd-intrinsics/vld1x2.c: New test. + 2017-12-27 Martin Liska PR tree-optimization/83552 diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld1x2.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld1x2.c new file mode 100644 index 00000000000..0a43d0daf8c --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld1x2.c @@ -0,0 +1,71 @@ +/* { dg-do run } */ +/* { dg-options "-O3" } */ + +#include + +extern void abort (void); + +#define TESTMETH(BASE, ELTS, SUFFIX) \ +int __attribute__ ((noinline)) \ +test_vld##SUFFIX##_x2 () \ +{ \ + BASE##_t data[ELTS * 2]; \ + BASE##_t temp[ELTS * 2]; \ + BASE##x##ELTS##x##2##_t vectors; \ + int i,j; \ + for (i = 0; i < ELTS * 2; i++) \ + data [i] = (BASE##_t) 2*i + 1; \ + asm volatile ("" : : : "memory"); \ + vectors = vld1##SUFFIX##_x2 (data); \ + vst1##SUFFIX (temp, vectors.val[0]); \ + vst1##SUFFIX (&temp[ELTS], vectors.val[1]); \ + asm volatile ("" : : : "memory"); \ + for (j = 0; j < ELTS * 2; j++) \ + if (temp[j] != data[j]) \ + return 1; \ + return 0; \ +} + +#define VARIANTS(VARIANT) \ +VARIANT (uint8, 8, _u8) \ +VARIANT (uint16, 4, _u16) \ +VARIANT (uint32, 2, _u32) \ +VARIANT (uint64, 1, _u64) \ +VARIANT (int8, 8, _s8) \ +VARIANT (int16, 4, _s16) \ +VARIANT (int32, 2, _s32) \ +VARIANT (int64, 1, _s64) \ +VARIANT (poly8, 8, _p8) \ +VARIANT (poly16, 4, _p16) \ +VARIANT (float16, 4, _f16) \ +VARIANT (float32, 2, _f32) \ +VARIANT (float64, 1, _f64) \ +VARIANT (uint8, 16, q_u8) \ +VARIANT (uint16, 8, q_u16) \ +VARIANT (uint32, 4, q_u32) \ +VARIANT (uint64, 2, q_u64) \ +VARIANT (int8, 16, q_s8) \ +VARIANT (int16, 8, q_s16) \ +VARIANT (int32, 4, q_s32) \ +VARIANT (int64, 2, q_s64) \ +VARIANT (poly8, 16, q_p8) \ +VARIANT (poly16, 8, q_p16) \ +VARIANT (float16, 8, q_f16) \ +VARIANT (float32, 4, q_f32) \ +VARIANT (float64, 2, q_f64) + +/* Tests of vld1_x2 and vld1q_x2. */ +VARIANTS (TESTMETH) + +#define CHECK(BASE, ELTS, SUFFIX) \ + if (test_vld##SUFFIX##_x2 () != 0) \ + abort (); + +int +main (int argc, char **argv) +{ + VARIANTS (CHECK) + + return 0; +} + -- 2.30.2