From 391625888d4d97f9016ab9ac04acc55d81f0c26f Mon Sep 17 00:00:00 2001 From: Sylvia Taylor Date: Thu, 22 Aug 2019 11:28:26 +0000 Subject: [PATCH] [aarch64]: add intrinsics for vld1(q)_x4 and vst1(q)_x4 This patch adds the intrinsic functions for: - vld1__x4 - vst1__x4 - vld1q__x4 - vst1q__x4 Bootstrapped and tested on aarch64-none-linux-gnu. Committed on behalf of Sylvia Taylor. 2019-08-22 Sylvia Taylor * config/aarch64/aarch64-simd-builtins.def: (ld1x4): New. (st1x4): Likewise. * config/aarch64/aarch64-simd.md: (aarch64_ld1x4): New pattern. (aarch64_st1x4): Likewise. (aarch64_ld1_x4_): Likewise. (aarch64_st1_x4_): Likewise. * config/aarch64/arm_neon.h: (vld1_s8_x4): New function. (vld1q_s8_x4): Likewise. (vld1_s16_x4): Likewise. (vld1q_s16_x4): Likewise. (vld1_s32_x4): Likewise. (vld1q_s32_x4): Likewise. (vld1_u8_x4): Likewise. (vld1q_u8_x4): Likewise. (vld1_u16_x4): Likewise. (vld1q_u16_x4): Likewise. (vld1_u32_x4): Likewise. (vld1q_u32_x4): Likewise. (vld1_f16_x4): Likewise. (vld1q_f16_x4): Likewise. (vld1_f32_x4): Likewise. (vld1q_f32_x4): Likewise. (vld1_p8_x4): Likewise. (vld1q_p8_x4): Likewise. (vld1_p16_x4): Likewise. (vld1q_p16_x4): Likewise. (vld1_s64_x4): Likewise. (vld1_u64_x4): Likewise. (vld1_p64_x4): Likewise. (vld1q_s64_x4): Likewise. (vld1q_u64_x4): Likewise. (vld1q_p64_x4): Likewise. (vld1_f64_x4): Likewise. (vld1q_f64_x4): Likewise. (vst1_s8_x4): Likewise. (vst1q_s8_x4): Likewise. (vst1_s16_x4): Likewise. (vst1q_s16_x4): Likewise. (vst1_s32_x4): Likewise. (vst1q_s32_x4): Likewise. (vst1_u8_x4): Likewise. (vst1q_u8_x4): Likewise. (vst1_u16_x4): Likewise. (vst1q_u16_x4): Likewise. (vst1_u32_x4): Likewise. (vst1q_u32_x4): Likewise. (vst1_f16_x4): Likewise. (vst1q_f16_x4): Likewise. (vst1_f32_x4): Likewise. (vst1q_f32_x4): Likewise. (vst1_p8_x4): Likewise. (vst1q_p8_x4): Likewise. (vst1_p16_x4): Likewise. (vst1q_p16_x4): Likewise. (vst1_s64_x4): Likewise. (vst1_u64_x4): Likewise. (vst1_p64_x4): Likewise. (vst1q_s64_x4): Likewise. (vst1q_u64_x4): Likewise. (vst1q_p64_x4): Likewise. (vst1_f64_x4): Likewise. (vst1q_f64_x4): Likewise. * gcc.target/aarch64/advsimd-intrinsics/vld1x4.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst1x4.c: New test. From-SVN: r274820 --- gcc/ChangeLog | 68 +++ gcc/config/aarch64/aarch64-simd-builtins.def | 6 + gcc/config/aarch64/aarch64-simd.md | 44 ++ gcc/config/aarch64/arm_neon.h | 508 ++++++++++++++++++ gcc/testsuite/ChangeLog | 5 + .../aarch64/advsimd-intrinsics/vld1x4.c | 83 +++ .../aarch64/advsimd-intrinsics/vst1x4.c | 83 +++ 7 files changed, 797 insertions(+) create mode 100644 gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld1x4.c create mode 100644 gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst1x4.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 603687eae08..271786f27b3 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,71 @@ +2019-08-22 Sylvia Taylor + + * config/aarch64/aarch64-simd-builtins.def: + (ld1x4): New. + (st1x4): Likewise. + * config/aarch64/aarch64-simd.md: + (aarch64_ld1x4): New pattern. + (aarch64_st1x4): Likewise. + (aarch64_ld1_x4_): Likewise. + (aarch64_st1_x4_): Likewise. + * config/aarch64/arm_neon.h: + (vld1_s8_x4): New function. + (vld1q_s8_x4): Likewise. + (vld1_s16_x4): Likewise. + (vld1q_s16_x4): Likewise. + (vld1_s32_x4): Likewise. + (vld1q_s32_x4): Likewise. + (vld1_u8_x4): Likewise. + (vld1q_u8_x4): Likewise. + (vld1_u16_x4): Likewise. + (vld1q_u16_x4): Likewise. + (vld1_u32_x4): Likewise. + (vld1q_u32_x4): Likewise. + (vld1_f16_x4): Likewise. + (vld1q_f16_x4): Likewise. + (vld1_f32_x4): Likewise. + (vld1q_f32_x4): Likewise. + (vld1_p8_x4): Likewise. + (vld1q_p8_x4): Likewise. + (vld1_p16_x4): Likewise. + (vld1q_p16_x4): Likewise. + (vld1_s64_x4): Likewise. + (vld1_u64_x4): Likewise. + (vld1_p64_x4): Likewise. + (vld1q_s64_x4): Likewise. + (vld1q_u64_x4): Likewise. + (vld1q_p64_x4): Likewise. + (vld1_f64_x4): Likewise. + (vld1q_f64_x4): Likewise. + (vst1_s8_x4): Likewise. + (vst1q_s8_x4): Likewise. + (vst1_s16_x4): Likewise. + (vst1q_s16_x4): Likewise. + (vst1_s32_x4): Likewise. + (vst1q_s32_x4): Likewise. + (vst1_u8_x4): Likewise. + (vst1q_u8_x4): Likewise. + (vst1_u16_x4): Likewise. + (vst1q_u16_x4): Likewise. + (vst1_u32_x4): Likewise. + (vst1q_u32_x4): Likewise. + (vst1_f16_x4): Likewise. + (vst1q_f16_x4): Likewise. + (vst1_f32_x4): Likewise. + (vst1q_f32_x4): Likewise. + (vst1_p8_x4): Likewise. + (vst1q_p8_x4): Likewise. + (vst1_p16_x4): Likewise. + (vst1q_p16_x4): Likewise. + (vst1_s64_x4): Likewise. + (vst1_u64_x4): Likewise. + (vst1_p64_x4): Likewise. + (vst1q_s64_x4): Likewise. + (vst1q_u64_x4): Likewise. + (vst1q_p64_x4): Likewise. + (vst1_f64_x4): Likewise. + (vst1q_f64_x4): Likewise. + 2019-08-22 Prathamesh Kulkarni * config/aarch64/aarch64-sve.md (vcond_mask): Add "@". diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def index 01518fec69b..779111a486d 100644 --- a/gcc/config/aarch64/aarch64-simd-builtins.def +++ b/gcc/config/aarch64/aarch64-simd-builtins.def @@ -465,12 +465,18 @@ /* Implemented by aarch64_ld1x3. */ BUILTIN_VALLDIF (LOADSTRUCT, ld1x3, 0) + /* Implemented by aarch64_ld1x4. */ + BUILTIN_VALLDIF (LOADSTRUCT, ld1x4, 0) + /* Implemented by aarch64_st1x2. */ BUILTIN_VALLDIF (STORESTRUCT, st1x2, 0) /* Implemented by aarch64_st1x3. */ BUILTIN_VALLDIF (STORESTRUCT, st1x3, 0) + /* Implemented by aarch64_st1x4. */ + BUILTIN_VALLDIF (STORESTRUCT, st1x4, 0) + /* Implemented by fma4. */ BUILTIN_VHSDF (TERNOP, fma, 4) VAR1 (TERNOP, fma, 4, hf) diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index e33a00967a9..6f7fb1c9b0c 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -5284,6 +5284,28 @@ [(set_attr "type" "neon_load1_3reg")] ) +(define_expand "aarch64_ld1x4" + [(match_operand:XI 0 "register_operand" "=w") + (match_operand:DI 1 "register_operand" "r") + (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + "TARGET_SIMD" +{ + rtx mem = gen_rtx_MEM (XImode, operands[1]); + emit_insn (gen_aarch64_ld1_x4_ (operands[0], mem)); + DONE; +}) + +(define_insn "aarch64_ld1_x4_" + [(set (match_operand:XI 0 "register_operand" "=w") + (unspec:XI + [(match_operand:XI 1 "aarch64_simd_struct_operand" "Utv") + (unspec:VALLDIF [(const_int 4)] UNSPEC_VSTRUCTDUMMY)] + UNSPEC_LD1))] + "TARGET_SIMD" + "ld1\\t{%S0. - %V0.}, %1" + [(set_attr "type" "neon_load1_4reg")] +) + (define_expand "aarch64_st1x2" [(match_operand:DI 0 "register_operand") (match_operand:OI 1 "register_operand") @@ -5326,6 +5348,28 @@ [(set_attr "type" "neon_store1_3reg")] ) +(define_expand "aarch64_st1x4" + [(match_operand:DI 0 "register_operand" "") + (match_operand:XI 1 "register_operand" "") + (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + "TARGET_SIMD" +{ + rtx mem = gen_rtx_MEM (XImode, operands[0]); + emit_insn (gen_aarch64_st1_x4_ (mem, operands[1])); + DONE; +}) + +(define_insn "aarch64_st1_x4_" + [(set (match_operand:XI 0 "aarch64_simd_struct_operand" "=Utv") + (unspec:XI + [(match_operand:XI 1 "register_operand" "w") + (unspec:VALLDIF [(const_int 4)] UNSPEC_VSTRUCTDUMMY)] + UNSPEC_ST1))] + "TARGET_SIMD" + "st1\\t{%S1. - %V1.}, %0" + [(set_attr "type" "neon_store1_4reg")] +) + (define_insn "*aarch64_mov" [(set (match_operand:VSTRUCT 0 "aarch64_simd_nonimmediate_operand" "=w,Utv,w") (match_operand:VSTRUCT 1 "aarch64_simd_general_operand" " w,w,Utv"))] diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h index 314ef30187d..9ecc00c46b9 100644 --- a/gcc/config/aarch64/arm_neon.h +++ b/gcc/config/aarch64/arm_neon.h @@ -17968,6 +17968,288 @@ vld1q_u64 (const uint64_t *a) __builtin_aarch64_ld1v2di ((const __builtin_aarch64_simd_di *) a); } +/* vld1(q)_x4. */ + +__extension__ extern __inline int8x8x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vld1_s8_x4 (const int8_t *__a) +{ + union { int8x8x4_t __i; __builtin_aarch64_simd_xi __o; } __au; + __au.__o + = __builtin_aarch64_ld1x4v8qi ((const __builtin_aarch64_simd_qi *) __a); + return __au.__i; +} + +__extension__ extern __inline int8x16x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vld1q_s8_x4 (const int8_t *__a) +{ + union { int8x16x4_t __i; __builtin_aarch64_simd_xi __o; } __au; + __au.__o + = __builtin_aarch64_ld1x4v16qi ((const __builtin_aarch64_simd_qi *) __a); + return __au.__i; +} + +__extension__ extern __inline int16x4x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vld1_s16_x4 (const int16_t *__a) +{ + union { int16x4x4_t __i; __builtin_aarch64_simd_xi __o; } __au; + __au.__o + = __builtin_aarch64_ld1x4v4hi ((const __builtin_aarch64_simd_hi *) __a); + return __au.__i; +} + +__extension__ extern __inline int16x8x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vld1q_s16_x4 (const int16_t *__a) +{ + union { int16x8x4_t __i; __builtin_aarch64_simd_xi __o; } __au; + __au.__o + = __builtin_aarch64_ld1x4v8hi ((const __builtin_aarch64_simd_hi *) __a); + return __au.__i; +} + +__extension__ extern __inline int32x2x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vld1_s32_x4 (const int32_t *__a) +{ + union { int32x2x4_t __i; __builtin_aarch64_simd_xi __o; } __au; + __au.__o + = __builtin_aarch64_ld1x4v2si ((const __builtin_aarch64_simd_si *) __a); + return __au.__i; +} + +__extension__ extern __inline int32x4x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vld1q_s32_x4 (const int32_t *__a) +{ + union { int32x4x4_t __i; __builtin_aarch64_simd_xi __o; } __au; + __au.__o + = __builtin_aarch64_ld1x4v4si ((const __builtin_aarch64_simd_si *) __a); + return __au.__i; +} + +__extension__ extern __inline uint8x8x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vld1_u8_x4 (const uint8_t *__a) +{ + union { uint8x8x4_t __i; __builtin_aarch64_simd_xi __o; } __au; + __au.__o + = __builtin_aarch64_ld1x4v8qi ((const __builtin_aarch64_simd_qi *) __a); + return __au.__i; +} + +__extension__ extern __inline uint8x16x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vld1q_u8_x4 (const uint8_t *__a) +{ + union { uint8x16x4_t __i; __builtin_aarch64_simd_xi __o; } __au; + __au.__o + = __builtin_aarch64_ld1x4v16qi ((const __builtin_aarch64_simd_qi *) __a); + return __au.__i; +} + +__extension__ extern __inline uint16x4x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vld1_u16_x4 (const uint16_t *__a) +{ + union { uint16x4x4_t __i; __builtin_aarch64_simd_xi __o; } __au; + __au.__o + = __builtin_aarch64_ld1x4v4hi ((const __builtin_aarch64_simd_hi *) __a); + return __au.__i; +} + +__extension__ extern __inline uint16x8x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vld1q_u16_x4 (const uint16_t *__a) +{ + union { uint16x8x4_t __i; __builtin_aarch64_simd_xi __o; } __au; + __au.__o + = __builtin_aarch64_ld1x4v8hi ((const __builtin_aarch64_simd_hi *) __a); + return __au.__i; +} + +__extension__ extern __inline uint32x2x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vld1_u32_x4 (const uint32_t *__a) +{ + union { uint32x2x4_t __i; __builtin_aarch64_simd_xi __o; } __au; + __au.__o + = __builtin_aarch64_ld1x4v2si ((const __builtin_aarch64_simd_si *) __a); + return __au.__i; +} + +__extension__ extern __inline uint32x4x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vld1q_u32_x4 (const uint32_t *__a) +{ + union { uint32x4x4_t __i; __builtin_aarch64_simd_xi __o; } __au; + __au.__o + = __builtin_aarch64_ld1x4v4si ((const __builtin_aarch64_simd_si *) __a); + return __au.__i; +} + +__extension__ extern __inline float16x4x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vld1_f16_x4 (const float16_t *__a) +{ + union { float16x4x4_t __i; __builtin_aarch64_simd_xi __o; } __au; + __au.__o + = __builtin_aarch64_ld1x4v4hf ((const __builtin_aarch64_simd_hf *) __a); + return __au.__i; +} + +__extension__ extern __inline float16x8x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vld1q_f16_x4 (const float16_t *__a) +{ + union { float16x8x4_t __i; __builtin_aarch64_simd_xi __o; } __au; + __au.__o + = __builtin_aarch64_ld1x4v8hf ((const __builtin_aarch64_simd_hf *) __a); + return __au.__i; +} + +__extension__ extern __inline float32x2x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vld1_f32_x4 (const float32_t *__a) +{ + union { float32x2x4_t __i; __builtin_aarch64_simd_xi __o; } __au; + __au.__o + = __builtin_aarch64_ld1x4v2sf ((const __builtin_aarch64_simd_sf *) __a); + return __au.__i; +} + +__extension__ extern __inline float32x4x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vld1q_f32_x4 (const float32_t *__a) +{ + union { float32x4x4_t __i; __builtin_aarch64_simd_xi __o; } __au; + __au.__o + = __builtin_aarch64_ld1x4v4sf ((const __builtin_aarch64_simd_sf *) __a); + return __au.__i; +} + +__extension__ extern __inline poly8x8x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vld1_p8_x4 (const poly8_t *__a) +{ + union { poly8x8x4_t __i; __builtin_aarch64_simd_xi __o; } __au; + __au.__o + = __builtin_aarch64_ld1x4v8qi ((const __builtin_aarch64_simd_qi *) __a); + return __au.__i; +} + +__extension__ extern __inline poly8x16x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vld1q_p8_x4 (const poly8_t *__a) +{ + union { poly8x16x4_t __i; __builtin_aarch64_simd_xi __o; } __au; + __au.__o + = __builtin_aarch64_ld1x4v16qi ((const __builtin_aarch64_simd_qi *) __a); + return __au.__i; +} + +__extension__ extern __inline poly16x4x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vld1_p16_x4 (const poly16_t *__a) +{ + union { poly16x4x4_t __i; __builtin_aarch64_simd_xi __o; } __au; + __au.__o + = __builtin_aarch64_ld1x4v4hi ((const __builtin_aarch64_simd_hi *) __a); + return __au.__i; +} + +__extension__ extern __inline poly16x8x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vld1q_p16_x4 (const poly16_t *__a) +{ + union { poly16x8x4_t __i; __builtin_aarch64_simd_xi __o; } __au; + __au.__o + = __builtin_aarch64_ld1x4v8hi ((const __builtin_aarch64_simd_hi *) __a); + return __au.__i; +} + +__extension__ extern __inline int64x1x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vld1_s64_x4 (const int64_t *__a) +{ + union { int64x1x4_t __i; __builtin_aarch64_simd_xi __o; } __au; + __au.__o + = __builtin_aarch64_ld1x4di ((const __builtin_aarch64_simd_di *) __a); + return __au.__i; +} + +__extension__ extern __inline uint64x1x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vld1_u64_x4 (const uint64_t *__a) +{ + union { uint64x1x4_t __i; __builtin_aarch64_simd_xi __o; } __au; + __au.__o + = __builtin_aarch64_ld1x4di ((const __builtin_aarch64_simd_di *) __a); + return __au.__i; +} + +__extension__ extern __inline poly64x1x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vld1_p64_x4 (const poly64_t *__a) +{ + union { poly64x1x4_t __i; __builtin_aarch64_simd_xi __o; } __au; + __au.__o + = __builtin_aarch64_ld1x4di ((const __builtin_aarch64_simd_di *) __a); + return __au.__i; +} + +__extension__ extern __inline int64x2x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vld1q_s64_x4 (const int64_t *__a) +{ + union { int64x2x4_t __i; __builtin_aarch64_simd_xi __o; } __au; + __au.__o + = __builtin_aarch64_ld1x4v2di ((const __builtin_aarch64_simd_di *) __a); + return __au.__i; +} + +__extension__ extern __inline uint64x2x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vld1q_u64_x4 (const uint64_t *__a) +{ + union { uint64x2x4_t __i; __builtin_aarch64_simd_xi __o; } __au; + __au.__o + = __builtin_aarch64_ld1x4v2di ((const __builtin_aarch64_simd_di *) __a); + return __au.__i; +} + +__extension__ extern __inline poly64x2x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vld1q_p64_x4 (const poly64_t *__a) +{ + union { poly64x2x4_t __i; __builtin_aarch64_simd_xi __o; } __au; + __au.__o + = __builtin_aarch64_ld1x4v2di ((const __builtin_aarch64_simd_di *) __a); + return __au.__i; +} + +__extension__ extern __inline float64x1x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vld1_f64_x4 (const float64_t *__a) +{ + union { float64x1x4_t __i; __builtin_aarch64_simd_xi __o; } __au; + __au.__o + = __builtin_aarch64_ld1x4df ((const __builtin_aarch64_simd_df *) __a); + return __au.__i; +} + +__extension__ extern __inline float64x2x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vld1q_f64_x4 (const float64_t *__a) +{ + union { float64x2x4_t __i; __builtin_aarch64_simd_xi __o; } __au; + __au.__o + = __builtin_aarch64_ld1x4v2df ((const __builtin_aarch64_simd_df *) __a); + return __au.__i; +} + /* vld1_dup */ __extension__ extern __inline float16x4_t @@ -28596,6 +28878,232 @@ vst1q_p64_x3 (poly64_t * __a, poly64x2x3_t val) __builtin_aarch64_st1x3v2di ((__builtin_aarch64_simd_di *) __a, __o); } +/* vst1(q)_x4. */ + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst1_s8_x4 (int8_t * __a, int8x8x4_t val) +{ + union { int8x8x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val }; + __builtin_aarch64_st1x4v8qi ((__builtin_aarch64_simd_qi *) __a, __u.__o); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst1q_s8_x4 (int8_t * __a, int8x16x4_t val) +{ + union { int8x16x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val }; + __builtin_aarch64_st1x4v16qi ((__builtin_aarch64_simd_qi *) __a, __u.__o); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst1_s16_x4 (int16_t * __a, int16x4x4_t val) +{ + union { int16x4x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val }; + __builtin_aarch64_st1x4v4hi ((__builtin_aarch64_simd_hi *) __a, __u.__o); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst1q_s16_x4 (int16_t * __a, int16x8x4_t val) +{ + union { int16x8x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val }; + __builtin_aarch64_st1x4v8hi ((__builtin_aarch64_simd_hi *) __a, __u.__o); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst1_s32_x4 (int32_t * __a, int32x2x4_t val) +{ + union { int32x2x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val }; + __builtin_aarch64_st1x4v2si ((__builtin_aarch64_simd_si *) __a, __u.__o); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst1q_s32_x4 (int32_t * __a, int32x4x4_t val) +{ + union { int32x4x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val }; + __builtin_aarch64_st1x4v4si ((__builtin_aarch64_simd_si *) __a, __u.__o); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst1_u8_x4 (uint8_t * __a, uint8x8x4_t val) +{ + union { uint8x8x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val }; + __builtin_aarch64_st1x4v8qi ((__builtin_aarch64_simd_qi *) __a, __u.__o); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst1q_u8_x4 (uint8_t * __a, uint8x16x4_t val) +{ + union { uint8x16x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val }; + __builtin_aarch64_st1x4v16qi ((__builtin_aarch64_simd_qi *) __a, __u.__o); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst1_u16_x4 (uint16_t * __a, uint16x4x4_t val) +{ + union { uint16x4x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val }; + __builtin_aarch64_st1x4v4hi ((__builtin_aarch64_simd_hi *) __a, __u.__o); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst1q_u16_x4 (uint16_t * __a, uint16x8x4_t val) +{ + union { uint16x8x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val }; + __builtin_aarch64_st1x4v8hi ((__builtin_aarch64_simd_hi *) __a, __u.__o); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst1_u32_x4 (uint32_t * __a, uint32x2x4_t val) +{ + union { uint32x2x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val }; + __builtin_aarch64_st1x4v2si ((__builtin_aarch64_simd_si *) __a, __u.__o); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst1q_u32_x4 (uint32_t * __a, uint32x4x4_t val) +{ + union { uint32x4x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val }; + __builtin_aarch64_st1x4v4si ((__builtin_aarch64_simd_si *) __a, __u.__o); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst1_f16_x4 (float16_t * __a, float16x4x4_t val) +{ + union { float16x4x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val }; + __builtin_aarch64_st1x4v4hf ((__builtin_aarch64_simd_hf *) __a, __u.__o); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst1q_f16_x4 (float16_t * __a, float16x8x4_t val) +{ + union { float16x8x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val }; + __builtin_aarch64_st1x4v8hf ((__builtin_aarch64_simd_hf *) __a, __u.__o); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst1_f32_x4 (float32_t * __a, float32x2x4_t val) +{ + union { float32x2x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val }; + __builtin_aarch64_st1x4v2sf ((__builtin_aarch64_simd_sf *) __a, __u.__o); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst1q_f32_x4 (float32_t * __a, float32x4x4_t val) +{ + union { float32x4x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val }; + __builtin_aarch64_st1x4v4sf ((__builtin_aarch64_simd_sf *) __a, __u.__o); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst1_p8_x4 (poly8_t * __a, poly8x8x4_t val) +{ + union { poly8x8x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val }; + __builtin_aarch64_st1x4v8qi ((__builtin_aarch64_simd_qi *) __a, __u.__o); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst1q_p8_x4 (poly8_t * __a, poly8x16x4_t val) +{ + union { poly8x16x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val }; + __builtin_aarch64_st1x4v16qi ((__builtin_aarch64_simd_qi *) __a, __u.__o); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst1_p16_x4 (poly16_t * __a, poly16x4x4_t val) +{ + union { poly16x4x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val }; + __builtin_aarch64_st1x4v4hi ((__builtin_aarch64_simd_hi *) __a, __u.__o); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst1q_p16_x4 (poly16_t * __a, poly16x8x4_t val) +{ + union { poly16x8x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val }; + __builtin_aarch64_st1x4v8hi ((__builtin_aarch64_simd_hi *) __a, __u.__o); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst1_s64_x4 (int64_t * __a, int64x1x4_t val) +{ + union { int64x1x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val }; + __builtin_aarch64_st1x4di ((__builtin_aarch64_simd_di *) __a, __u.__o); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst1_u64_x4 (uint64_t * __a, uint64x1x4_t val) +{ + union { uint64x1x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val }; + __builtin_aarch64_st1x4di ((__builtin_aarch64_simd_di *) __a, __u.__o); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst1_p64_x4 (poly64_t * __a, poly64x1x4_t val) +{ + union { poly64x1x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val }; + __builtin_aarch64_st1x4di ((__builtin_aarch64_simd_di *) __a, __u.__o); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst1q_s64_x4 (int64_t * __a, int64x2x4_t val) +{ + union { int64x2x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val }; + __builtin_aarch64_st1x4v2di ((__builtin_aarch64_simd_di *) __a, __u.__o); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst1q_u64_x4 (uint64_t * __a, uint64x2x4_t val) +{ + union { uint64x2x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val }; + __builtin_aarch64_st1x4v2di ((__builtin_aarch64_simd_di *) __a, __u.__o); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst1q_p64_x4 (poly64_t * __a, poly64x2x4_t val) +{ + union { poly64x2x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val }; + __builtin_aarch64_st1x4v2di ((__builtin_aarch64_simd_di *) __a, __u.__o); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst1_f64_x4 (float64_t * __a, float64x1x4_t val) +{ + union { float64x1x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val }; + __builtin_aarch64_st1x4df ((__builtin_aarch64_simd_df *) __a, __u.__o); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst1q_f64_x4 (float64_t * __a, float64x2x4_t val) +{ + union { float64x2x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val }; + __builtin_aarch64_st1x4v2df ((__builtin_aarch64_simd_df *) __a, __u.__o); +} + /* vstn */ __extension__ extern __inline void diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 0d141dc8f47..03fd8328577 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,8 @@ +2019-08-22 Sylvia Taylor + + * gcc.target/aarch64/advsimd-intrinsics/vld1x4.c: New test. + * gcc.target/aarch64/advsimd-intrinsics/vst1x4.c: New test. + 2019-08-22 Prathamesh Kulkarni Richard Sandiford diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld1x4.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld1x4.c new file mode 100644 index 00000000000..451a0afc6aa --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld1x4.c @@ -0,0 +1,83 @@ +/* We haven't implemented these intrinsics for arm yet. */ +/* { dg-xfail-if "" { arm*-*-* } } */ +/* { dg-do run } */ +/* { dg-options "-O3" } */ + +#include +#include "arm-neon-ref.h" + +extern void abort (void); + +#define TESTMETH(BASE, ELTS, SUFFIX) \ +int __attribute__ ((noinline)) \ +test_vld1##SUFFIX##_x4 () \ +{ \ + BASE##_t data[ELTS * 4]; \ + BASE##_t temp[ELTS * 4]; \ + BASE##x##ELTS##x##4##_t vectors; \ + int i,j; \ + for (i = 0; i < ELTS * 4; i++) \ + data [i] = (BASE##_t) 4*i; \ + asm volatile ("" : : : "memory"); \ + vectors = vld1##SUFFIX##_x4 (data); \ + vst1##SUFFIX (temp, vectors.val[0]); \ + vst1##SUFFIX (&temp[ELTS], vectors.val[1]); \ + vst1##SUFFIX (&temp[ELTS * 2], vectors.val[2]); \ + vst1##SUFFIX (&temp[ELTS * 3], vectors.val[3]); \ + asm volatile ("" : : : "memory"); \ + for (j = 0; j < ELTS * 4; j++) \ + if (temp[j] != data[j]) \ + return 1; \ + return 0; \ +} + +#define VARIANTS_1(VARIANT) \ +VARIANT (uint8, 8, _u8) \ +VARIANT (uint16, 4, _u16) \ +VARIANT (uint32, 2, _u32) \ +VARIANT (uint64, 1, _u64) \ +VARIANT (int8, 8, _s8) \ +VARIANT (int16, 4, _s16) \ +VARIANT (int32, 2, _s32) \ +VARIANT (int64, 1, _s64) \ +VARIANT (poly8, 8, _p8) \ +VARIANT (poly16, 4, _p16) \ +VARIANT (poly64, 1, _p64) \ +VARIANT (float16, 4, _f16) \ +VARIANT (float32, 2, _f32) \ +VARIANT (uint8, 16, q_u8) \ +VARIANT (uint16, 8, q_u16) \ +VARIANT (uint32, 4, q_u32) \ +VARIANT (uint64, 2, q_u64) \ +VARIANT (int8, 16, q_s8) \ +VARIANT (int16, 8, q_s16) \ +VARIANT (int32, 4, q_s32) \ +VARIANT (int64, 2, q_s64) \ +VARIANT (poly8, 16, q_p8) \ +VARIANT (poly16, 8, q_p16) \ +VARIANT (poly64, 2, q_p64) \ +VARIANT (float16, 8, q_f16) \ +VARIANT (float32, 4, q_f32) + +#ifdef __aarch64__ +#define VARIANTS(VARIANT) VARIANTS_1(VARIANT) \ +VARIANT (float64, 1, _f64) \ +VARIANT (float64, 2, q_f64) +#else +#define VARIANTS(VARIANT) VARIANTS_1(VARIANT) +#endif + +/* Tests of vld1_x4 and vld1q_x4. */ +VARIANTS (TESTMETH) + +#define CHECKS(BASE, ELTS, SUFFIX) \ + if (test_vld1##SUFFIX##_x4 () != 0) \ + fprintf (stderr, "test_vld1##SUFFIX##_x4"); + +int +main (int argc, char **argv) +{ + VARIANTS (CHECKS) + + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst1x4.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst1x4.c new file mode 100644 index 00000000000..1f17b5342de --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst1x4.c @@ -0,0 +1,83 @@ +/* We haven't implemented these intrinsics for arm yet. */ +/* { dg-xfail-if "" { arm*-*-* } } */ +/* { dg-do run } */ +/* { dg-options "-O3" } */ + +#include +#include "arm-neon-ref.h" + +extern void abort (void); + +#define TESTMETH(BASE, ELTS, SUFFIX) \ +int __attribute__ ((noinline)) \ +test_vst1##SUFFIX##_x4 () \ +{ \ + BASE##_t data[ELTS * 4]; \ + BASE##_t temp[ELTS * 4]; \ + BASE##x##ELTS##x##4##_t vectors; \ + int i,j; \ + for (i = 0; i < ELTS * 4; i++) \ + data [i] = (BASE##_t) 4*i; \ + asm volatile ("" : : : "memory"); \ + vectors.val[0] = vld1##SUFFIX (data); \ + vectors.val[1] = vld1##SUFFIX (&data[ELTS]); \ + vectors.val[2] = vld1##SUFFIX (&data[ELTS * 2]); \ + vectors.val[3] = vld1##SUFFIX (&data[ELTS * 3]); \ + vst1##SUFFIX##_x4 (temp, vectors); \ + asm volatile ("" : : : "memory"); \ + for (j = 0; j < ELTS * 4; j++) \ + if (temp[j] != data[j]) \ + return 1; \ + return 0; \ +} + +#define VARIANTS_1(VARIANT) \ +VARIANT (uint8, 8, _u8) \ +VARIANT (uint16, 4, _u16) \ +VARIANT (uint32, 2, _u32) \ +VARIANT (uint64, 1, _u64) \ +VARIANT (int8, 8, _s8) \ +VARIANT (int16, 4, _s16) \ +VARIANT (int32, 2, _s32) \ +VARIANT (int64, 1, _s64) \ +VARIANT (poly8, 8, _p8) \ +VARIANT (poly16, 4, _p16) \ +VARIANT (poly64, 1, _p64) \ +VARIANT (float16, 4, _f16) \ +VARIANT (float32, 2, _f32) \ +VARIANT (uint8, 16, q_u8) \ +VARIANT (uint16, 8, q_u16) \ +VARIANT (uint32, 4, q_u32) \ +VARIANT (uint64, 2, q_u64) \ +VARIANT (int8, 16, q_s8) \ +VARIANT (int16, 8, q_s16) \ +VARIANT (int32, 4, q_s32) \ +VARIANT (int64, 2, q_s64) \ +VARIANT (poly8, 16, q_p8) \ +VARIANT (poly16, 8, q_p16) \ +VARIANT (poly64, 2, q_p64) \ +VARIANT (float16, 8, q_f16) \ +VARIANT (float32, 4, q_f32) + +#ifdef __aarch64__ +#define VARIANTS(VARIANT) VARIANTS_1(VARIANT) \ +VARIANT (float64, 1, _f64) \ +VARIANT (float64, 2, q_f64) +#else +#define VARIANTS(VARIANT) VARIANTS_1(VARIANT) +#endif + +/* Tests of vst1_x4 and vst1q_x4. */ +VARIANTS (TESTMETH) + +#define CHECKS(BASE, ELTS, SUFFIX) \ + if (test_vst1##SUFFIX##_x4 () != 0) \ + fprintf (stderr, "test_vst1##SUFFIX##_x4"); + +int +main (int argc, char **argv) +{ + VARIANTS (CHECKS) + + return 0; +} -- 2.30.2