From 4cc23303bad126f844a14f88c344317e6cf4c3dc Mon Sep 17 00:00:00 2001 From: Srinath Parvathaneni Date: Wed, 18 Mar 2020 18:58:48 +0000 Subject: [PATCH] [ARM][GCC][6/5x]: Remaining MVE load intrinsics which loads half word and word or double word from memory. This patch supports the following Remaining MVE ACLE load intrinsics which load an halfword, word or double word from memory. vldrdq_gather_base_s64, vldrdq_gather_base_u64, vldrdq_gather_base_z_s64, vldrdq_gather_base_z_u64, vldrdq_gather_offset_s64, vldrdq_gather_offset_u64, vldrdq_gather_offset_z_s64, vldrdq_gather_offset_z_u64, vldrdq_gather_shifted_offset_s64, vldrdq_gather_shifted_offset_u64, vldrdq_gather_shifted_offset_z_s64, vldrdq_gather_shifted_offset_z_u64, vldrhq_gather_offset_f16, vldrhq_gather_offset_z_f16, vldrhq_gather_shifted_offset_f16, vldrhq_gather_shifted_offset_z_f16, vldrwq_gather_base_f32, vldrwq_gather_base_z_f32, vldrwq_gather_offset_f32, vldrwq_gather_offset_s32, vldrwq_gather_offset_u32, vldrwq_gather_offset_z_f32, vldrwq_gather_offset_z_s32, vldrwq_gather_offset_z_u32, vldrwq_gather_shifted_offset_f32, vldrwq_gather_shifted_offset_s32, vldrwq_gather_shifted_offset_u32, vldrwq_gather_shifted_offset_z_f32, vldrwq_gather_shifted_offset_z_s32, vldrwq_gather_shifted_offset_z_u32. Please refer to M-profile Vector Extension (MVE) intrinsics [1] for more details. [1] https://developer.arm.com/architectures/instruction-sets/simd-isas/helium/mve-intrinsics 2020-03-18 Andre Vieira Mihail Ionescu Srinath Parvathaneni * config/arm/arm_mve.h (vld1q_s8): Define macro. (vld1q_s32): Likewise. (vld1q_s16): Likewise. (vld1q_u8): Likewise. (vld1q_u32): Likewise. (vld1q_u16): Likewise. (vldrhq_gather_offset_s32): Likewise. (vldrhq_gather_offset_s16): Likewise. (vldrhq_gather_offset_u32): Likewise. (vldrhq_gather_offset_u16): Likewise. (vldrhq_gather_offset_z_s32): Likewise. (vldrhq_gather_offset_z_s16): Likewise. (vldrhq_gather_offset_z_u32): Likewise. (vldrhq_gather_offset_z_u16): Likewise. (vldrhq_gather_shifted_offset_s32): Likewise. (vldrhq_gather_shifted_offset_s16): Likewise. (vldrhq_gather_shifted_offset_u32): Likewise. (vldrhq_gather_shifted_offset_u16): Likewise. (vldrhq_gather_shifted_offset_z_s32): Likewise. (vldrhq_gather_shifted_offset_z_s16): Likewise. (vldrhq_gather_shifted_offset_z_u32): Likewise. (vldrhq_gather_shifted_offset_z_u16): Likewise. (vldrhq_s32): Likewise. (vldrhq_s16): Likewise. (vldrhq_u32): Likewise. (vldrhq_u16): Likewise. (vldrhq_z_s32): Likewise. (vldrhq_z_s16): Likewise. (vldrhq_z_u32): Likewise. (vldrhq_z_u16): Likewise. (vldrwq_s32): Likewise. (vldrwq_u32): Likewise. (vldrwq_z_s32): Likewise. (vldrwq_z_u32): Likewise. (vld1q_f32): Likewise. (vld1q_f16): Likewise. (vldrhq_f16): Likewise. (vldrhq_z_f16): Likewise. (vldrwq_f32): Likewise. (vldrwq_z_f32): Likewise. (__arm_vld1q_s8): Define intrinsic. (__arm_vld1q_s32): Likewise. (__arm_vld1q_s16): Likewise. (__arm_vld1q_u8): Likewise. (__arm_vld1q_u32): Likewise. (__arm_vld1q_u16): Likewise. (__arm_vldrhq_gather_offset_s32): Likewise. (__arm_vldrhq_gather_offset_s16): Likewise. (__arm_vldrhq_gather_offset_u32): Likewise. (__arm_vldrhq_gather_offset_u16): Likewise. (__arm_vldrhq_gather_offset_z_s32): Likewise. (__arm_vldrhq_gather_offset_z_s16): Likewise. (__arm_vldrhq_gather_offset_z_u32): Likewise. (__arm_vldrhq_gather_offset_z_u16): Likewise. (__arm_vldrhq_gather_shifted_offset_s32): Likewise. (__arm_vldrhq_gather_shifted_offset_s16): Likewise. (__arm_vldrhq_gather_shifted_offset_u32): Likewise. (__arm_vldrhq_gather_shifted_offset_u16): Likewise. (__arm_vldrhq_gather_shifted_offset_z_s32): Likewise. (__arm_vldrhq_gather_shifted_offset_z_s16): Likewise. (__arm_vldrhq_gather_shifted_offset_z_u32): Likewise. (__arm_vldrhq_gather_shifted_offset_z_u16): Likewise. (__arm_vldrhq_s32): Likewise. (__arm_vldrhq_s16): Likewise. (__arm_vldrhq_u32): Likewise. (__arm_vldrhq_u16): Likewise. (__arm_vldrhq_z_s32): Likewise. (__arm_vldrhq_z_s16): Likewise. (__arm_vldrhq_z_u32): Likewise. (__arm_vldrhq_z_u16): Likewise. (__arm_vldrwq_s32): Likewise. (__arm_vldrwq_u32): Likewise. (__arm_vldrwq_z_s32): Likewise. (__arm_vldrwq_z_u32): Likewise. (__arm_vld1q_f32): Likewise. (__arm_vld1q_f16): Likewise. (__arm_vldrwq_f32): Likewise. (__arm_vldrwq_z_f32): Likewise. (__arm_vldrhq_z_f16): Likewise. (__arm_vldrhq_f16): Likewise. (vld1q): Define polymorphic variant. (vldrhq_gather_offset): Likewise. (vldrhq_gather_offset_z): Likewise. (vldrhq_gather_shifted_offset): Likewise. (vldrhq_gather_shifted_offset_z): Likewise. * config/arm/arm_mve_builtins.def (LDRU): Use builtin qualifier. (LDRS): Likewise. (LDRU_Z): Likewise. (LDRS_Z): Likewise. (LDRGU_Z): Likewise. (LDRGU): Likewise. (LDRGS_Z): Likewise. (LDRGS): Likewise. * config/arm/mve.md (MVE_H_ELEM): Define mode iterator. (V_sz_elem1): Likewise. (VLD1Q): Define iterator. (VLDRHGOQ): Likewise. (VLDRHGSOQ): Likewise. (VLDRHQ): Likewise. (VLDRWQ): Likewise. (mve_vldrhq_fv8hf): Define RTL pattern. (mve_vldrhq_gather_offset_): Likewise. (mve_vldrhq_gather_offset_z_): Likewise. (mve_vldrhq_gather_shifted_offset_): Likewise. (mve_vldrhq_gather_shifted_offset_z_): Likewise. (mve_vldrhq_): Likewise. (mve_vldrhq_z_fv8hf): Likewise. (mve_vldrhq_z_): Likewise. (mve_vldrwq_fv4sf): Likewise. (mve_vldrwq_v4si): Likewise. (mve_vldrwq_z_fv4sf): Likewise. (mve_vldrwq_z_v4si): Likewise. (mve_vld1q_f): Define RTL expand pattern. (mve_vld1q_): Likewise. gcc/testsuite/ChangeLog: 2020-03-18 Andre Vieira Mihail Ionescu Srinath Parvathaneni * gcc.target/arm/mve/intrinsics/vld1q_f16.c: New test. * gcc.target/arm/mve/intrinsics/vld1q_f32.c: Likewise. * gcc.target/arm/mve/intrinsics/vld1q_s16.c: Likewise. * gcc.target/arm/mve/intrinsics/vld1q_s32.c: Likewise. * gcc.target/arm/mve/intrinsics/vld1q_s8.c: Likewise. * gcc.target/arm/mve/intrinsics/vld1q_u16.c: Likewise. * gcc.target/arm/mve/intrinsics/vld1q_u32.c: Likewise. * gcc.target/arm/mve/intrinsics/vld1q_u8.c: Likewise. * gcc.target/arm/mve/intrinsics/vldrhq_f16.c: Likewise. * gcc.target/arm/mve/intrinsics/vldrhq_gather_offset_s16.c: Likewise. * gcc.target/arm/mve/intrinsics/vldrhq_gather_offset_s32.c: Likewise. * gcc.target/arm/mve/intrinsics/vldrhq_gather_offset_u16.c: Likewise. * gcc.target/arm/mve/intrinsics/vldrhq_gather_offset_u32.c: Likewise. * gcc.target/arm/mve/intrinsics/vldrhq_gather_offset_z_s16.c: Likewise. * gcc.target/arm/mve/intrinsics/vldrhq_gather_offset_z_s32.c: Likewise. * gcc.target/arm/mve/intrinsics/vldrhq_gather_offset_z_u16.c: Likewise. * gcc.target/arm/mve/intrinsics/vldrhq_gather_offset_z_u32.c: Likewise. * gcc.target/arm/mve/intrinsics/vldrhq_gather_shifted_offset_s16.c: Likewise. * gcc.target/arm/mve/intrinsics/vldrhq_gather_shifted_offset_s32.c: Likewise. * gcc.target/arm/mve/intrinsics/vldrhq_gather_shifted_offset_u16.c: Likewise. * gcc.target/arm/mve/intrinsics/vldrhq_gather_shifted_offset_u32.c: Likewise. * gcc.target/arm/mve/intrinsics/vldrhq_gather_shifted_offset_z_s16.c: Likewise. * gcc.target/arm/mve/intrinsics/vldrhq_gather_shifted_offset_z_s32.c: Likewise. * gcc.target/arm/mve/intrinsics/vldrhq_gather_shifted_offset_z_u16.c: Likewise. * gcc.target/arm/mve/intrinsics/vldrhq_gather_shifted_offset_z_u32.c: Likewise. * gcc.target/arm/mve/intrinsics/vldrhq_s16.c: Likewise. * gcc.target/arm/mve/intrinsics/vldrhq_s32.c: Likewise. * gcc.target/arm/mve/intrinsics/vldrhq_u16.c: Likewise. * gcc.target/arm/mve/intrinsics/vldrhq_u32.c: Likewise. * gcc.target/arm/mve/intrinsics/vldrhq_z_f16.c: Likewise. * gcc.target/arm/mve/intrinsics/vldrhq_z_s16.c: Likewise. * gcc.target/arm/mve/intrinsics/vldrhq_z_s32.c: Likewise. * gcc.target/arm/mve/intrinsics/vldrhq_z_u16.c: Likewise. * gcc.target/arm/mve/intrinsics/vldrhq_z_u32.c: Likewise. * gcc.target/arm/mve/intrinsics/vldrwq_f32.c: Likewise. * gcc.target/arm/mve/intrinsics/vldrwq_s32.c: Likewise. * gcc.target/arm/mve/intrinsics/vldrwq_u32.c: Likewise. * gcc.target/arm/mve/intrinsics/vldrwq_z_f32.c: Likewise. * gcc.target/arm/mve/intrinsics/vldrwq_z_s32.c: Likewise. * gcc.target/arm/mve/intrinsics/vldrwq_z_u32.c: Likewise. --- gcc/ChangeLog | 119 +++++ gcc/config/arm/arm_mve.h | 359 +++++++++++++++ gcc/config/arm/arm_mve_builtins.def | 30 ++ gcc/config/arm/mve.md | 432 +++++++++++++++++- gcc/testsuite/ChangeLog | 45 ++ .../mve/intrinsics/vldrdq_gather_base_s64.c | 13 + .../mve/intrinsics/vldrdq_gather_base_u64.c | 13 + .../mve/intrinsics/vldrdq_gather_base_z_s64.c | 13 + .../mve/intrinsics/vldrdq_gather_base_z_u64.c | 13 + .../mve/intrinsics/vldrdq_gather_offset_s64.c | 21 + .../mve/intrinsics/vldrdq_gather_offset_u64.c | 21 + .../intrinsics/vldrdq_gather_offset_z_s64.c | 21 + .../intrinsics/vldrdq_gather_offset_z_u64.c | 21 + .../vldrdq_gather_shifted_offset_s64.c | 21 + .../vldrdq_gather_shifted_offset_u64.c | 21 + .../vldrdq_gather_shifted_offset_z_s64.c | 21 + .../vldrdq_gather_shifted_offset_z_u64.c | 21 + .../mve/intrinsics/vldrhq_gather_offset_f16.c | 21 + .../intrinsics/vldrhq_gather_offset_z_f16.c | 21 + .../vldrhq_gather_shifted_offset_f16.c | 21 + .../vldrhq_gather_shifted_offset_z_f16.c | 21 + .../mve/intrinsics/vldrwq_gather_base_f32.c | 13 + .../mve/intrinsics/vldrwq_gather_base_z_f32.c | 13 + .../mve/intrinsics/vldrwq_gather_offset_f32.c | 21 + .../mve/intrinsics/vldrwq_gather_offset_s32.c | 21 + .../mve/intrinsics/vldrwq_gather_offset_u32.c | 21 + .../intrinsics/vldrwq_gather_offset_z_f32.c | 21 + .../intrinsics/vldrwq_gather_offset_z_s32.c | 21 + .../intrinsics/vldrwq_gather_offset_z_u32.c | 21 + .../vldrwq_gather_shifted_offset_f32.c | 21 + .../vldrwq_gather_shifted_offset_s32.c | 21 + .../vldrwq_gather_shifted_offset_u32.c | 21 + .../vldrwq_gather_shifted_offset_z_f32.c | 21 + .../vldrwq_gather_shifted_offset_z_s32.c | 21 + .../vldrwq_gather_shifted_offset_z_u32.c | 21 + 35 files changed, 1565 insertions(+), 2 deletions(-) create mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrdq_gather_base_s64.c create mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrdq_gather_base_u64.c create mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrdq_gather_base_z_s64.c create mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrdq_gather_base_z_u64.c create mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrdq_gather_offset_s64.c create mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrdq_gather_offset_u64.c create mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrdq_gather_offset_z_s64.c create mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrdq_gather_offset_z_u64.c create mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrdq_gather_shifted_offset_s64.c create mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrdq_gather_shifted_offset_u64.c create mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrdq_gather_shifted_offset_z_s64.c create mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrdq_gather_shifted_offset_z_u64.c create mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrhq_gather_offset_f16.c create mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrhq_gather_offset_z_f16.c create mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrhq_gather_shifted_offset_f16.c create mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrhq_gather_shifted_offset_z_f16.c create mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrwq_gather_base_f32.c create mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrwq_gather_base_z_f32.c create mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrwq_gather_offset_f32.c create mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrwq_gather_offset_s32.c create mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrwq_gather_offset_u32.c create mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrwq_gather_offset_z_f32.c create mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrwq_gather_offset_z_s32.c create mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrwq_gather_offset_z_u32.c create mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrwq_gather_shifted_offset_f32.c create mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrwq_gather_shifted_offset_s32.c create mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrwq_gather_shifted_offset_u32.c create mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrwq_gather_shifted_offset_z_f32.c create mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrwq_gather_shifted_offset_z_s32.c create mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrwq_gather_shifted_offset_z_u32.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 657683d21cd..2a29c2dac13 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -117,6 +117,125 @@ (mve_vld1q_f): Define RTL expand pattern. (mve_vld1q_): Likewise. +2020-03-18 Andre Vieira + Mihail Ionescu + Srinath Parvathaneni + + * config/arm/arm_mve.h (vld1q_s8): Define macro. + (vld1q_s32): Likewise. + (vld1q_s16): Likewise. + (vld1q_u8): Likewise. + (vld1q_u32): Likewise. + (vld1q_u16): Likewise. + (vldrhq_gather_offset_s32): Likewise. + (vldrhq_gather_offset_s16): Likewise. + (vldrhq_gather_offset_u32): Likewise. + (vldrhq_gather_offset_u16): Likewise. + (vldrhq_gather_offset_z_s32): Likewise. + (vldrhq_gather_offset_z_s16): Likewise. + (vldrhq_gather_offset_z_u32): Likewise. + (vldrhq_gather_offset_z_u16): Likewise. + (vldrhq_gather_shifted_offset_s32): Likewise. + (vldrhq_gather_shifted_offset_s16): Likewise. + (vldrhq_gather_shifted_offset_u32): Likewise. + (vldrhq_gather_shifted_offset_u16): Likewise. + (vldrhq_gather_shifted_offset_z_s32): Likewise. + (vldrhq_gather_shifted_offset_z_s16): Likewise. + (vldrhq_gather_shifted_offset_z_u32): Likewise. + (vldrhq_gather_shifted_offset_z_u16): Likewise. + (vldrhq_s32): Likewise. + (vldrhq_s16): Likewise. + (vldrhq_u32): Likewise. + (vldrhq_u16): Likewise. + (vldrhq_z_s32): Likewise. + (vldrhq_z_s16): Likewise. + (vldrhq_z_u32): Likewise. + (vldrhq_z_u16): Likewise. + (vldrwq_s32): Likewise. + (vldrwq_u32): Likewise. + (vldrwq_z_s32): Likewise. + (vldrwq_z_u32): Likewise. + (vld1q_f32): Likewise. + (vld1q_f16): Likewise. + (vldrhq_f16): Likewise. + (vldrhq_z_f16): Likewise. + (vldrwq_f32): Likewise. + (vldrwq_z_f32): Likewise. + (__arm_vld1q_s8): Define intrinsic. + (__arm_vld1q_s32): Likewise. + (__arm_vld1q_s16): Likewise. + (__arm_vld1q_u8): Likewise. + (__arm_vld1q_u32): Likewise. + (__arm_vld1q_u16): Likewise. + (__arm_vldrhq_gather_offset_s32): Likewise. + (__arm_vldrhq_gather_offset_s16): Likewise. + (__arm_vldrhq_gather_offset_u32): Likewise. + (__arm_vldrhq_gather_offset_u16): Likewise. + (__arm_vldrhq_gather_offset_z_s32): Likewise. + (__arm_vldrhq_gather_offset_z_s16): Likewise. + (__arm_vldrhq_gather_offset_z_u32): Likewise. + (__arm_vldrhq_gather_offset_z_u16): Likewise. + (__arm_vldrhq_gather_shifted_offset_s32): Likewise. + (__arm_vldrhq_gather_shifted_offset_s16): Likewise. + (__arm_vldrhq_gather_shifted_offset_u32): Likewise. + (__arm_vldrhq_gather_shifted_offset_u16): Likewise. + (__arm_vldrhq_gather_shifted_offset_z_s32): Likewise. + (__arm_vldrhq_gather_shifted_offset_z_s16): Likewise. + (__arm_vldrhq_gather_shifted_offset_z_u32): Likewise. + (__arm_vldrhq_gather_shifted_offset_z_u16): Likewise. + (__arm_vldrhq_s32): Likewise. + (__arm_vldrhq_s16): Likewise. + (__arm_vldrhq_u32): Likewise. + (__arm_vldrhq_u16): Likewise. + (__arm_vldrhq_z_s32): Likewise. + (__arm_vldrhq_z_s16): Likewise. + (__arm_vldrhq_z_u32): Likewise. + (__arm_vldrhq_z_u16): Likewise. + (__arm_vldrwq_s32): Likewise. + (__arm_vldrwq_u32): Likewise. + (__arm_vldrwq_z_s32): Likewise. + (__arm_vldrwq_z_u32): Likewise. + (__arm_vld1q_f32): Likewise. + (__arm_vld1q_f16): Likewise. + (__arm_vldrwq_f32): Likewise. + (__arm_vldrwq_z_f32): Likewise. + (__arm_vldrhq_z_f16): Likewise. + (__arm_vldrhq_f16): Likewise. + (vld1q): Define polymorphic variant. + (vldrhq_gather_offset): Likewise. + (vldrhq_gather_offset_z): Likewise. + (vldrhq_gather_shifted_offset): Likewise. + (vldrhq_gather_shifted_offset_z): Likewise. + * config/arm/arm_mve_builtins.def (LDRU): Use builtin qualifier. + (LDRS): Likewise. + (LDRU_Z): Likewise. + (LDRS_Z): Likewise. + (LDRGU_Z): Likewise. + (LDRGU): Likewise. + (LDRGS_Z): Likewise. + (LDRGS): Likewise. + * config/arm/mve.md (MVE_H_ELEM): Define mode iterator. + (V_sz_elem1): Likewise. + (VLD1Q): Define iterator. + (VLDRHGOQ): Likewise. + (VLDRHGSOQ): Likewise. + (VLDRHQ): Likewise. + (VLDRWQ): Likewise. + (mve_vldrhq_fv8hf): Define RTL pattern. + (mve_vldrhq_gather_offset_): Likewise. + (mve_vldrhq_gather_offset_z_): Likewise. + (mve_vldrhq_gather_shifted_offset_): Likewise. + (mve_vldrhq_gather_shifted_offset_z_): Likewise. + (mve_vldrhq_): Likewise. + (mve_vldrhq_z_fv8hf): Likewise. + (mve_vldrhq_z_): Likewise. + (mve_vldrwq_fv4sf): Likewise. + (mve_vldrwq_v4si): Likewise. + (mve_vldrwq_z_fv4sf): Likewise. + (mve_vldrwq_z_v4si): Likewise. + (mve_vld1q_f): Define RTL expand pattern. + (mve_vld1q_): Likewise. + 2020-03-18 Andre Vieira Mihail Ionescu Srinath Parvathaneni diff --git a/gcc/config/arm/arm_mve.h b/gcc/config/arm/arm_mve.h index 9991e25c8d6..89cdc5bf858 100644 --- a/gcc/config/arm/arm_mve.h +++ b/gcc/config/arm/arm_mve.h @@ -1798,6 +1798,36 @@ typedef struct { uint8x16_t val[4]; } uint8x16x4_t; #define vldrhq_z_f16(__base, __p) __arm_vldrhq_z_f16(__base, __p) #define vldrwq_f32(__base) __arm_vldrwq_f32(__base) #define vldrwq_z_f32(__base, __p) __arm_vldrwq_z_f32(__base, __p) +#define vldrdq_gather_base_s64(__addr, __offset) __arm_vldrdq_gather_base_s64(__addr, __offset) +#define vldrdq_gather_base_u64(__addr, __offset) __arm_vldrdq_gather_base_u64(__addr, __offset) +#define vldrdq_gather_base_z_s64(__addr, __offset, __p) __arm_vldrdq_gather_base_z_s64(__addr, __offset, __p) +#define vldrdq_gather_base_z_u64(__addr, __offset, __p) __arm_vldrdq_gather_base_z_u64(__addr, __offset, __p) +#define vldrdq_gather_offset_s64(__base, __offset) __arm_vldrdq_gather_offset_s64(__base, __offset) +#define vldrdq_gather_offset_u64(__base, __offset) __arm_vldrdq_gather_offset_u64(__base, __offset) +#define vldrdq_gather_offset_z_s64(__base, __offset, __p) __arm_vldrdq_gather_offset_z_s64(__base, __offset, __p) +#define vldrdq_gather_offset_z_u64(__base, __offset, __p) __arm_vldrdq_gather_offset_z_u64(__base, __offset, __p) +#define vldrdq_gather_shifted_offset_s64(__base, __offset) __arm_vldrdq_gather_shifted_offset_s64(__base, __offset) +#define vldrdq_gather_shifted_offset_u64(__base, __offset) __arm_vldrdq_gather_shifted_offset_u64(__base, __offset) +#define vldrdq_gather_shifted_offset_z_s64(__base, __offset, __p) __arm_vldrdq_gather_shifted_offset_z_s64(__base, __offset, __p) +#define vldrdq_gather_shifted_offset_z_u64(__base, __offset, __p) __arm_vldrdq_gather_shifted_offset_z_u64(__base, __offset, __p) +#define vldrhq_gather_offset_f16(__base, __offset) __arm_vldrhq_gather_offset_f16(__base, __offset) +#define vldrhq_gather_offset_z_f16(__base, __offset, __p) __arm_vldrhq_gather_offset_z_f16(__base, __offset, __p) +#define vldrhq_gather_shifted_offset_f16(__base, __offset) __arm_vldrhq_gather_shifted_offset_f16(__base, __offset) +#define vldrhq_gather_shifted_offset_z_f16(__base, __offset, __p) __arm_vldrhq_gather_shifted_offset_z_f16(__base, __offset, __p) +#define vldrwq_gather_base_f32(__addr, __offset) __arm_vldrwq_gather_base_f32(__addr, __offset) +#define vldrwq_gather_base_z_f32(__addr, __offset, __p) __arm_vldrwq_gather_base_z_f32(__addr, __offset, __p) +#define vldrwq_gather_offset_f32(__base, __offset) __arm_vldrwq_gather_offset_f32(__base, __offset) +#define vldrwq_gather_offset_s32(__base, __offset) __arm_vldrwq_gather_offset_s32(__base, __offset) +#define vldrwq_gather_offset_u32(__base, __offset) __arm_vldrwq_gather_offset_u32(__base, __offset) +#define vldrwq_gather_offset_z_f32(__base, __offset, __p) __arm_vldrwq_gather_offset_z_f32(__base, __offset, __p) +#define vldrwq_gather_offset_z_s32(__base, __offset, __p) __arm_vldrwq_gather_offset_z_s32(__base, __offset, __p) +#define vldrwq_gather_offset_z_u32(__base, __offset, __p) __arm_vldrwq_gather_offset_z_u32(__base, __offset, __p) +#define vldrwq_gather_shifted_offset_f32(__base, __offset) __arm_vldrwq_gather_shifted_offset_f32(__base, __offset) +#define vldrwq_gather_shifted_offset_s32(__base, __offset) __arm_vldrwq_gather_shifted_offset_s32(__base, __offset) +#define vldrwq_gather_shifted_offset_u32(__base, __offset) __arm_vldrwq_gather_shifted_offset_u32(__base, __offset) +#define vldrwq_gather_shifted_offset_z_f32(__base, __offset, __p) __arm_vldrwq_gather_shifted_offset_z_f32(__base, __offset, __p) +#define vldrwq_gather_shifted_offset_z_s32(__base, __offset, __p) __arm_vldrwq_gather_shifted_offset_z_s32(__base, __offset, __p) +#define vldrwq_gather_shifted_offset_z_u32(__base, __offset, __p) __arm_vldrwq_gather_shifted_offset_z_u32(__base, __offset, __p) #endif __extension__ extern __inline void @@ -11722,6 +11752,147 @@ __arm_vldrwq_z_u32 (uint32_t const * __base, mve_pred16_t __p) return __builtin_mve_vldrwq_z_uv4si ((__builtin_neon_si *) __base, __p); } +__extension__ extern __inline int64x2_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vldrdq_gather_base_s64 (uint64x2_t __addr, const int __offset) +{ + return __builtin_mve_vldrdq_gather_base_sv2di (__addr, __offset); +} + +__extension__ extern __inline uint64x2_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vldrdq_gather_base_u64 (uint64x2_t __addr, const int __offset) +{ + return __builtin_mve_vldrdq_gather_base_uv2di (__addr, __offset); +} + +__extension__ extern __inline int64x2_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vldrdq_gather_base_z_s64 (uint64x2_t __addr, const int __offset, mve_pred16_t __p) +{ + return __builtin_mve_vldrdq_gather_base_z_sv2di (__addr, __offset, __p); +} + +__extension__ extern __inline uint64x2_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vldrdq_gather_base_z_u64 (uint64x2_t __addr, const int __offset, mve_pred16_t __p) +{ + return __builtin_mve_vldrdq_gather_base_z_uv2di (__addr, __offset, __p); +} + +__extension__ extern __inline int64x2_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vldrdq_gather_offset_s64 (int64_t const * __base, uint64x2_t __offset) +{ + return __builtin_mve_vldrdq_gather_offset_sv2di ((__builtin_neon_di *) __base, __offset); +} + +__extension__ extern __inline uint64x2_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vldrdq_gather_offset_u64 (uint64_t const * __base, uint64x2_t __offset) +{ + return __builtin_mve_vldrdq_gather_offset_uv2di ((__builtin_neon_di *) __base, __offset); +} + +__extension__ extern __inline int64x2_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vldrdq_gather_offset_z_s64 (int64_t const * __base, uint64x2_t __offset, mve_pred16_t __p) +{ + return __builtin_mve_vldrdq_gather_offset_z_sv2di ((__builtin_neon_di *) __base, __offset, __p); +} + + +__extension__ extern __inline uint64x2_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vldrdq_gather_offset_z_u64 (uint64_t const * __base, uint64x2_t __offset, mve_pred16_t __p) +{ + return __builtin_mve_vldrdq_gather_offset_z_uv2di ((__builtin_neon_di *) __base, __offset, __p); +} + +__extension__ extern __inline int64x2_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vldrdq_gather_shifted_offset_s64 (int64_t const * __base, uint64x2_t __offset) +{ + return __builtin_mve_vldrdq_gather_shifted_offset_sv2di ((__builtin_neon_di *) __base, __offset); +} + +__extension__ extern __inline uint64x2_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vldrdq_gather_shifted_offset_u64 (uint64_t const * __base, uint64x2_t __offset) +{ + return __builtin_mve_vldrdq_gather_shifted_offset_uv2di ((__builtin_neon_di *) __base, __offset); +} + +__extension__ extern __inline int64x2_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vldrdq_gather_shifted_offset_z_s64 (int64_t const * __base, uint64x2_t __offset, mve_pred16_t __p) +{ + return __builtin_mve_vldrdq_gather_shifted_offset_z_sv2di ((__builtin_neon_di *) __base, __offset, __p); +} + +__extension__ extern __inline uint64x2_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vldrdq_gather_shifted_offset_z_u64 (uint64_t const * __base, uint64x2_t __offset, mve_pred16_t __p) +{ + return __builtin_mve_vldrdq_gather_shifted_offset_z_uv2di ((__builtin_neon_di *) __base, __offset, __p); +} + +__extension__ extern __inline int32x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vldrwq_gather_offset_s32 (int32_t const * __base, uint32x4_t __offset) +{ + return __builtin_mve_vldrwq_gather_offset_sv4si ((__builtin_neon_si *) __base, __offset); +} + +__extension__ extern __inline uint32x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vldrwq_gather_offset_u32 (uint32_t const * __base, uint32x4_t __offset) +{ + return __builtin_mve_vldrwq_gather_offset_uv4si ((__builtin_neon_si *) __base, __offset); +} + +__extension__ extern __inline int32x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vldrwq_gather_offset_z_s32 (int32_t const * __base, uint32x4_t __offset, mve_pred16_t __p) +{ + return __builtin_mve_vldrwq_gather_offset_z_sv4si ((__builtin_neon_si *) __base, __offset, __p); +} + +__extension__ extern __inline uint32x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vldrwq_gather_offset_z_u32 (uint32_t const * __base, uint32x4_t __offset, mve_pred16_t __p) +{ + return __builtin_mve_vldrwq_gather_offset_z_uv4si ((__builtin_neon_si *) __base, __offset, __p); +} + +__extension__ extern __inline int32x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vldrwq_gather_shifted_offset_s32 (int32_t const * __base, uint32x4_t __offset) +{ + return __builtin_mve_vldrwq_gather_shifted_offset_sv4si ((__builtin_neon_si *) __base, __offset); +} + +__extension__ extern __inline uint32x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vldrwq_gather_shifted_offset_u32 (uint32_t const * __base, uint32x4_t __offset) +{ + return __builtin_mve_vldrwq_gather_shifted_offset_uv4si ((__builtin_neon_si *) __base, __offset); +} + +__extension__ extern __inline int32x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vldrwq_gather_shifted_offset_z_s32 (int32_t const * __base, uint32x4_t __offset, mve_pred16_t __p) +{ + return __builtin_mve_vldrwq_gather_shifted_offset_z_sv4si ((__builtin_neon_si *) __base, __offset, __p); +} + +__extension__ extern __inline uint32x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vldrwq_gather_shifted_offset_z_u32 (uint32_t const * __base, uint32x4_t __offset, mve_pred16_t __p) +{ + return __builtin_mve_vldrwq_gather_shifted_offset_z_uv4si ((__builtin_neon_si *) __base, __offset, __p); +} + #if (__ARM_FEATURE_MVE & 2) /* MVE Floating point. */ __extension__ extern __inline void @@ -13905,6 +14076,77 @@ __arm_vldrhq_f16 (float16_t const * __base) { return __builtin_mve_vldrhq_fv8hf((__builtin_neon_hi *) __base); } + +__extension__ extern __inline float16x8_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vldrhq_gather_offset_f16 (float16_t const * __base, uint16x8_t __offset) +{ + return __builtin_mve_vldrhq_gather_offset_fv8hf((__builtin_neon_hi *) __base, __offset); +} + +__extension__ extern __inline float16x8_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vldrhq_gather_offset_z_f16 (float16_t const * __base, uint16x8_t __offset, mve_pred16_t __p) +{ + return __builtin_mve_vldrhq_gather_offset_z_fv8hf((__builtin_neon_hi *) __base, __offset, __p); +} + +__extension__ extern __inline float16x8_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vldrhq_gather_shifted_offset_f16 (float16_t const * __base, uint16x8_t __offset) +{ + return __builtin_mve_vldrhq_gather_shifted_offset_fv8hf (__base, __offset); +} + +__extension__ extern __inline float16x8_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vldrhq_gather_shifted_offset_z_f16 (float16_t const * __base, uint16x8_t __offset, mve_pred16_t __p) +{ + return __builtin_mve_vldrhq_gather_shifted_offset_z_fv8hf (__base, __offset, __p); +} + +__extension__ extern __inline float32x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vldrwq_gather_base_f32 (uint32x4_t __addr, const int __offset) +{ + return __builtin_mve_vldrwq_gather_base_fv4sf (__addr, __offset); +} + +__extension__ extern __inline float32x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vldrwq_gather_base_z_f32 (uint32x4_t __addr, const int __offset, mve_pred16_t __p) +{ + return __builtin_mve_vldrwq_gather_base_z_fv4sf (__addr, __offset, __p); +} + +__extension__ extern __inline float32x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vldrwq_gather_offset_f32 (float32_t const * __base, uint32x4_t __offset) +{ + return __builtin_mve_vldrwq_gather_offset_fv4sf((__builtin_neon_si *) __base, __offset); +} + +__extension__ extern __inline float32x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vldrwq_gather_offset_z_f32 (float32_t const * __base, uint32x4_t __offset, mve_pred16_t __p) +{ + return __builtin_mve_vldrwq_gather_offset_z_fv4sf((__builtin_neon_si *) __base, __offset, __p); +} + +__extension__ extern __inline float32x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vldrwq_gather_shifted_offset_f32 (float32_t const * __base, uint32x4_t __offset) +{ + return __builtin_mve_vldrwq_gather_shifted_offset_fv4sf (__base, __offset); +} + +__extension__ extern __inline float32x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vldrwq_gather_shifted_offset_z_f32 (float32_t const * __base, uint32x4_t __offset, mve_pred16_t __p) +{ + return __builtin_mve_vldrwq_gather_shifted_offset_z_fv4sf (__base, __offset, __p); +} + #endif enum { @@ -16384,6 +16626,74 @@ extern void *__ARM_undef; int (*)[__ARM_mve_type_float16_t_const_ptr]: __arm_vld1q_f16 (__ARM_mve_coerce(__p0, float16_t const *)), \ int (*)[__ARM_mve_type_float32_t_const_ptr]: __arm_vld1q_f32 (__ARM_mve_coerce(__p0, float32_t const *)));}) +#define vldrhq_gather_offset(p0,p1) __arm_vldrhq_gather_offset(p0,p1) +#define __arm_vldrhq_gather_offset(p0,p1) ({ __typeof(p0) __p0 = (p0); \ + __typeof(p1) __p1 = (p1); \ + _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \ + int (*)[__ARM_mve_type_int16_t_const_ptr][__ARM_mve_type_uint16x8_t]: __arm_vldrhq_gather_offset_s16 (__ARM_mve_coerce(__p0, int16_t const *), __ARM_mve_coerce(__p1, uint16x8_t)), \ + int (*)[__ARM_mve_type_int16_t_const_ptr][__ARM_mve_type_uint32x4_t]: __arm_vldrhq_gather_offset_s32 (__ARM_mve_coerce(__p0, int16_t const *), __ARM_mve_coerce(__p1, uint32x4_t)), \ + int (*)[__ARM_mve_type_uint16_t_const_ptr][__ARM_mve_type_uint16x8_t]: __arm_vldrhq_gather_offset_u16 (__ARM_mve_coerce(__p0, uint16_t const *), __ARM_mve_coerce(__p1, uint16x8_t)), \ + int (*)[__ARM_mve_type_uint16_t_const_ptr][__ARM_mve_type_uint32x4_t]: __arm_vldrhq_gather_offset_u32 (__ARM_mve_coerce(__p0, uint16_t const *), __ARM_mve_coerce(__p1, uint32x4_t)), \ + int (*)[__ARM_mve_type_float16_t_const_ptr][__ARM_mve_type_uint16x8_t]: __arm_vldrhq_gather_offset_f16 (__ARM_mve_coerce(__p0, float16_t const *), __ARM_mve_coerce(__p1, uint16x8_t)));}) + +#define vldrhq_gather_offset_z(p0,p1,p2) __arm_vldrhq_gather_offset_z(p0,p1,p2) +#define __arm_vldrhq_gather_offset_z(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \ + __typeof(p1) __p1 = (p1); \ + _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \ + int (*)[__ARM_mve_type_int16_t_const_ptr][__ARM_mve_type_uint16x8_t]: __arm_vldrhq_gather_offset_z_s16 (__ARM_mve_coerce(__p0, int16_t const *), __ARM_mve_coerce(__p1, uint16x8_t), p2), \ + int (*)[__ARM_mve_type_int16_t_const_ptr][__ARM_mve_type_uint32x4_t]: __arm_vldrhq_gather_offset_z_s32 (__ARM_mve_coerce(__p0, int16_t const *), __ARM_mve_coerce(__p1, uint32x4_t), p2), \ + int (*)[__ARM_mve_type_uint16_t_const_ptr][__ARM_mve_type_uint16x8_t]: __arm_vldrhq_gather_offset_z_u16 (__ARM_mve_coerce(__p0, uint16_t const *), __ARM_mve_coerce(__p1, uint16x8_t), p2), \ + int (*)[__ARM_mve_type_uint16_t_const_ptr][__ARM_mve_type_uint32x4_t]: __arm_vldrhq_gather_offset_z_u32 (__ARM_mve_coerce(__p0, uint16_t const *), __ARM_mve_coerce(__p1, uint32x4_t), p2), \ + int (*)[__ARM_mve_type_float16_t_const_ptr][__ARM_mve_type_uint16x8_t]: __arm_vldrhq_gather_offset_z_f16 (__ARM_mve_coerce(__p0, float16_t const *), __ARM_mve_coerce(__p1, uint16x8_t), p2));}) + +#define vldrhq_gather_shifted_offset(p0,p1) __arm_vldrhq_gather_shifted_offset(p0,p1) +#define __arm_vldrhq_gather_shifted_offset(p0,p1) ({ __typeof(p0) __p0 = (p0); \ + __typeof(p1) __p1 = (p1); \ + _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \ + int (*)[__ARM_mve_type_int16_t_const_ptr][__ARM_mve_type_uint16x8_t]: __arm_vldrhq_gather_shifted_offset_s16 (__ARM_mve_coerce(__p0, int16_t const *), __ARM_mve_coerce(__p1, uint16x8_t)), \ + int (*)[__ARM_mve_type_int16_t_const_ptr][__ARM_mve_type_uint32x4_t]: __arm_vldrhq_gather_shifted_offset_s32 (__ARM_mve_coerce(__p0, int16_t const *), __ARM_mve_coerce(__p1, uint32x4_t)), \ + int (*)[__ARM_mve_type_uint16_t_const_ptr][__ARM_mve_type_uint16x8_t]: __arm_vldrhq_gather_shifted_offset_u16 (__ARM_mve_coerce(__p0, uint16_t const *), __ARM_mve_coerce(__p1, uint16x8_t)), \ + int (*)[__ARM_mve_type_uint16_t_const_ptr][__ARM_mve_type_uint32x4_t]: __arm_vldrhq_gather_shifted_offset_u32 (__ARM_mve_coerce(__p0, uint16_t const *), __ARM_mve_coerce(__p1, uint32x4_t)), \ + int (*)[__ARM_mve_type_float16_t_const_ptr][__ARM_mve_type_uint16x8_t]: __arm_vldrhq_gather_shifted_offset_f16 (__ARM_mve_coerce(__p0, float16_t const *), __ARM_mve_coerce(__p1, uint16x8_t)));}) + +#define vldrhq_gather_shifted_offset_z(p0,p1,p2) __arm_vldrhq_gather_shifted_offset_z(p0,p1,p2) +#define __arm_vldrhq_gather_shifted_offset_z(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \ + __typeof(p1) __p1 = (p1); \ + _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \ + int (*)[__ARM_mve_type_int16_t_const_ptr][__ARM_mve_type_uint16x8_t]: __arm_vldrhq_gather_shifted_offset_z_s16 (__ARM_mve_coerce(__p0, int16_t const *), __ARM_mve_coerce(__p1, uint16x8_t), p2), \ + int (*)[__ARM_mve_type_int16_t_const_ptr][__ARM_mve_type_uint32x4_t]: __arm_vldrhq_gather_shifted_offset_z_s32 (__ARM_mve_coerce(__p0, int16_t const *), __ARM_mve_coerce(__p1, uint32x4_t), p2), \ + int (*)[__ARM_mve_type_uint16_t_const_ptr][__ARM_mve_type_uint16x8_t]: __arm_vldrhq_gather_shifted_offset_z_u16 (__ARM_mve_coerce(__p0, uint16_t const *), __ARM_mve_coerce(__p1, uint16x8_t), p2), \ + int (*)[__ARM_mve_type_uint16_t_const_ptr][__ARM_mve_type_uint32x4_t]: __arm_vldrhq_gather_shifted_offset_z_u32 (__ARM_mve_coerce(__p0, uint16_t const *), __ARM_mve_coerce(__p1, uint32x4_t), p2), \ + int (*)[__ARM_mve_type_float16_t_const_ptr][__ARM_mve_type_uint16x8_t]: __arm_vldrhq_gather_shifted_offset_z_f16 (__ARM_mve_coerce(__p0, float16_t const *), __ARM_mve_coerce(__p1, uint16x8_t), p2));}) + +#define vldrwq_gather_offset(p0,p1) __arm_vldrwq_gather_offset(p0,p1) +#define __arm_vldrwq_gather_offset(p0,p1) ({ __typeof(p0) __p0 = (p0); \ + _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \ + int (*)[__ARM_mve_type_int32_t_const_ptr]: __arm_vldrwq_gather_offset_s32 (__ARM_mve_coerce(__p0, int32_t const *), p1), \ + int (*)[__ARM_mve_type_uint32_t_const_ptr]: __arm_vldrwq_gather_offset_u32 (__ARM_mve_coerce(__p0, uint32_t const *), p1), \ + int (*)[__ARM_mve_type_float32_t_const_ptr]: __arm_vldrwq_gather_offset_f32 (__ARM_mve_coerce(__p0, float32_t const *), p1));}) + +#define vldrwq_gather_offset_z(p0,p1,p2) __arm_vldrwq_gather_offset_z(p0,p1,p2) +#define __arm_vldrwq_gather_offset_z(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \ + _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \ + int (*)[__ARM_mve_type_int32_t_const_ptr]: __arm_vldrwq_gather_offset_z_s32 (__ARM_mve_coerce(__p0, int32_t const *), p1, p2), \ + int (*)[__ARM_mve_type_uint32_t_const_ptr]: __arm_vldrwq_gather_offset_z_u32 (__ARM_mve_coerce(__p0, uint32_t const *), p1, p2), \ + int (*)[__ARM_mve_type_float32_t_const_ptr]: __arm_vldrwq_gather_offset_z_f32 (__ARM_mve_coerce(__p0, float32_t const *), p1, p2));}) + +#define vldrwq_gather_shifted_offset(p0,p1) __arm_vldrwq_gather_shifted_offset(p0,p1) +#define __arm_vldrwq_gather_shifted_offset(p0,p1) ({ __typeof(p0) __p0 = (p0); \ + _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \ + int (*)[__ARM_mve_type_int32_t_const_ptr]: __arm_vldrwq_gather_shifted_offset_s32 (__ARM_mve_coerce(__p0, int32_t const *), p1), \ + int (*)[__ARM_mve_type_uint32_t_const_ptr]: __arm_vldrwq_gather_shifted_offset_u32 (__ARM_mve_coerce(__p0, uint32_t const *), p1), \ + int (*)[__ARM_mve_type_float32_t_const_ptr]: __arm_vldrwq_gather_shifted_offset_f32 (__ARM_mve_coerce(__p0, float32_t const *), p1));}) + +#define vldrwq_gather_shifted_offset_z(p0,p1,p2) __arm_vldrwq_gather_shifted_offset_z(p0,p1,p2) +#define __arm_vldrwq_gather_shifted_offset_z(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \ + _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \ + int (*)[__ARM_mve_type_int32_t_const_ptr]: __arm_vldrwq_gather_shifted_offset_z_s32 (__ARM_mve_coerce(__p0, int32_t const *), p1, p2), \ + int (*)[__ARM_mve_type_uint32_t_const_ptr]: __arm_vldrwq_gather_shifted_offset_z_u32 (__ARM_mve_coerce(__p0, uint32_t const *), p1, p2), \ + int (*)[__ARM_mve_type_float32_t_const_ptr]: __arm_vldrwq_gather_shifted_offset_z_f32 (__ARM_mve_coerce(__p0, float32_t const *), p1, p2));}) + #else /* MVE Integer. */ #define vst4q(p0,p1) __arm_vst4q(p0,p1) @@ -18526,8 +18836,57 @@ extern void *__ARM_undef; int (*)[__ARM_mve_type_uint16_t_const_ptr][__ARM_mve_type_uint16x8_t]: __arm_vldrhq_gather_shifted_offset_z_u16 (__ARM_mve_coerce(__p0, uint16_t const *), __ARM_mve_coerce(__p1, uint16x8_t), p2), \ int (*)[__ARM_mve_type_uint16_t_const_ptr][__ARM_mve_type_uint32x4_t]: __arm_vldrhq_gather_shifted_offset_z_u32 (__ARM_mve_coerce(__p0, uint16_t const *), __ARM_mve_coerce(__p1, uint32x4_t), p2));}) +#define vldrwq_gather_offset(p0,p1) __arm_vldrwq_gather_offset(p0,p1) +#define __arm_vldrwq_gather_offset(p0,p1) ({ __typeof(p0) __p0 = (p0); \ + _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \ + int (*)[__ARM_mve_type_int32_t_const_ptr]: __arm_vldrwq_gather_offset_s32 (__ARM_mve_coerce(__p0, int32_t const *), p1), \ + int (*)[__ARM_mve_type_uint32_t_const_ptr]: __arm_vldrwq_gather_offset_u32 (__ARM_mve_coerce(__p0, uint32_t const *), p1));}) + +#define vldrwq_gather_offset_z(p0,p1,p2) __arm_vldrwq_gather_offset_z(p0,p1,p2) +#define __arm_vldrwq_gather_offset_z(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \ + _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \ + int (*)[__ARM_mve_type_int32_t_const_ptr]: __arm_vldrwq_gather_offset_z_s32 (__ARM_mve_coerce(__p0, int32_t const *), p1, p2), \ + int (*)[__ARM_mve_type_uint32_t_const_ptr]: __arm_vldrwq_gather_offset_z_u32 (__ARM_mve_coerce(__p0, uint32_t const *), p1, p2));}) + +#define vldrwq_gather_shifted_offset(p0,p1) __arm_vldrwq_gather_shifted_offset(p0,p1) +#define __arm_vldrwq_gather_shifted_offset(p0,p1) ({ __typeof(p0) __p0 = (p0); \ + _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \ + int (*)[__ARM_mve_type_int32_t_const_ptr]: __arm_vldrwq_gather_shifted_offset_s32 (__ARM_mve_coerce(__p0, int32_t const *), p1), \ + int (*)[__ARM_mve_type_uint32_t_const_ptr]: __arm_vldrwq_gather_shifted_offset_u32 (__ARM_mve_coerce(__p0, uint32_t const *), p1));}) + +#define vldrwq_gather_shifted_offset_z(p0,p1,p2) __arm_vldrwq_gather_shifted_offset_z(p0,p1,p2) +#define __arm_vldrwq_gather_shifted_offset_z(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \ + _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \ + int (*)[__ARM_mve_type_int32_t_const_ptr]: __arm_vldrwq_gather_shifted_offset_z_s32 (__ARM_mve_coerce(__p0, int32_t const *), p1, p2), \ + int (*)[__ARM_mve_type_uint32_t_const_ptr]: __arm_vldrwq_gather_shifted_offset_z_u32 (__ARM_mve_coerce(__p0, uint32_t const *), p1, p2));}) + #endif /* MVE Integer. */ +#define vldrdq_gather_offset(p0,p1) __arm_vldrdq_gather_offset(p0,p1) +#define __arm_vldrdq_gather_offset(p0,p1) ({ __typeof(p0) __p0 = (p0); \ + _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \ + int (*)[__ARM_mve_type_int64_t_const_ptr]: __arm_vldrdq_gather_offset_s64 (__ARM_mve_coerce(__p0, int64_t const *), p1), \ + int (*)[__ARM_mve_type_uint64_t_const_ptr]: __arm_vldrdq_gather_offset_u64 (__ARM_mve_coerce(__p0, uint64_t const *), p1));}) + +#define vldrdq_gather_offset_z(p0,p1,p2) __arm_vldrdq_gather_offset_z(p0,p1,p2) +#define __arm_vldrdq_gather_offset_z(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \ + _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \ + int (*)[__ARM_mve_type_int64_t_const_ptr]: __arm_vldrdq_gather_offset_z_s64 (__ARM_mve_coerce(__p0, int64_t const *), p1, p2), \ + int (*)[__ARM_mve_type_uint64_t_const_ptr]: __arm_vldrdq_gather_offset_z_u64 (__ARM_mve_coerce(__p0, uint64_t const *), p1, p2));}) + +#define vldrdq_gather_shifted_offset(p0,p1) __arm_vldrdq_gather_shifted_offset(p0,p1) +#define __arm_vldrdq_gather_shifted_offset(p0,p1) ({ __typeof(p0) __p0 = (p0); \ + _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \ + int (*)[__ARM_mve_type_int64_t_const_ptr]: __arm_vldrdq_gather_shifted_offset_s64 (__ARM_mve_coerce(__p0, int64_t const *), p1), \ + int (*)[__ARM_mve_type_uint64_t_const_ptr]: __arm_vldrdq_gather_shifted_offset_u64 (__ARM_mve_coerce(__p0, uint64_t const *), p1));}) + +#define vldrdq_gather_shifted_offset_z(p0,p1,p2) __arm_vldrdq_gather_shifted_offset_z(p0,p1,p2) +#define __arm_vldrdq_gather_shifted_offset_z(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \ + _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \ + int (*)[__ARM_mve_type_int64_t_const_ptr]: __arm_vldrdq_gather_shifted_offset_z_s64 (__ARM_mve_coerce(__p0, int64_t const *), p1, p2), \ + int (*)[__ARM_mve_type_uint64_t_const_ptr]: __arm_vldrdq_gather_shifted_offset_z_u64 (__ARM_mve_coerce(__p0, uint64_t const *), p1, p2));}) + + #define vldrbq_gather_offset_z(p0,p1,p2) __arm_vldrbq_gather_offset_z(p0,p1,p2) #define __arm_vldrbq_gather_offset_z(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \ __typeof(p1) __p1 = (p1); \ diff --git a/gcc/config/arm/arm_mve_builtins.def b/gcc/config/arm/arm_mve_builtins.def index bafc953a5e5..fc303612d13 100644 --- a/gcc/config/arm/arm_mve_builtins.def +++ b/gcc/config/arm/arm_mve_builtins.def @@ -732,3 +732,33 @@ VAR1 (LDRU, vldrwq_u, v4si) VAR1 (LDRS_Z, vldrwq_z_f, v4sf) VAR1 (LDRS_Z, vldrwq_z_s, v4si) VAR1 (LDRU_Z, vldrwq_z_u, v4si) +VAR1 (LDRGBS, vldrdq_gather_base_s, v2di) +VAR1 (LDRGBS, vldrwq_gather_base_f, v4sf) +VAR1 (LDRGBS_Z, vldrdq_gather_base_z_s, v2di) +VAR1 (LDRGBS_Z, vldrwq_gather_base_z_f, v4sf) +VAR1 (LDRGBU, vldrdq_gather_base_u, v2di) +VAR1 (LDRGBU_Z, vldrdq_gather_base_z_u, v2di) +VAR1 (LDRGS, vldrdq_gather_offset_s, v2di) +VAR1 (LDRGS, vldrdq_gather_shifted_offset_s, v2di) +VAR1 (LDRGS, vldrhq_gather_offset_f, v8hf) +VAR1 (LDRGS, vldrhq_gather_shifted_offset_f, v8hf) +VAR1 (LDRGS, vldrwq_gather_offset_f, v4sf) +VAR1 (LDRGS, vldrwq_gather_offset_s, v4si) +VAR1 (LDRGS, vldrwq_gather_shifted_offset_f, v4sf) +VAR1 (LDRGS, vldrwq_gather_shifted_offset_s, v4si) +VAR1 (LDRGS_Z, vldrdq_gather_offset_z_s, v2di) +VAR1 (LDRGS_Z, vldrdq_gather_shifted_offset_z_s, v2di) +VAR1 (LDRGS_Z, vldrhq_gather_offset_z_f, v8hf) +VAR1 (LDRGS_Z, vldrhq_gather_shifted_offset_z_f, v8hf) +VAR1 (LDRGS_Z, vldrwq_gather_offset_z_f, v4sf) +VAR1 (LDRGS_Z, vldrwq_gather_offset_z_s, v4si) +VAR1 (LDRGS_Z, vldrwq_gather_shifted_offset_z_f, v4sf) +VAR1 (LDRGS_Z, vldrwq_gather_shifted_offset_z_s, v4si) +VAR1 (LDRGU, vldrdq_gather_offset_u, v2di) +VAR1 (LDRGU, vldrdq_gather_shifted_offset_u, v2di) +VAR1 (LDRGU, vldrwq_gather_offset_u, v4si) +VAR1 (LDRGU, vldrwq_gather_shifted_offset_u, v4si) +VAR1 (LDRGU_Z, vldrdq_gather_offset_z_u, v2di) +VAR1 (LDRGU_Z, vldrdq_gather_shifted_offset_z_u, v2di) +VAR1 (LDRGU_Z, vldrwq_gather_offset_z_u, v4si) +VAR1 (LDRGU_Z, vldrwq_gather_shifted_offset_z_u, v4si) diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md index 89ff2e269e5..b0c0b872b43 100644 --- a/gcc/config/arm/mve.md +++ b/gcc/config/arm/mve.md @@ -197,7 +197,11 @@ VLDRBQGO_U VLDRBQ_S VLDRBQ_U VLDRWQGB_S VLDRWQGB_U VLD1Q_F VLD1Q_S VLD1Q_U VLDRHQ_F VLDRHQGO_S VLDRHQGO_U VLDRHQGSO_S VLDRHQGSO_U VLDRHQ_S VLDRHQ_U - VLDRWQ_F VLDRWQ_S VLDRWQ_U]) + VLDRWQ_F VLDRWQ_S VLDRWQ_U VLDRDQGB_S VLDRDQGB_U + VLDRDQGO_S VLDRDQGO_U VLDRDQGSO_S VLDRDQGSO_U + VLDRHQGO_F VLDRHQGSO_F VLDRWQGB_F VLDRWQGO_F + VLDRWQGO_S VLDRWQGO_U VLDRWQGSO_F VLDRWQGSO_S + VLDRWQGSO_U]) (define_mode_attr MVE_CNVT [(V8HI "V8HF") (V4SI "V4SF") (V8HF "V8HI") (V4SF "V4SI")]) @@ -356,7 +360,10 @@ (VLD1Q_S "s") (VLD1Q_U "u") (VLDRHQGO_S "s") (VLDRHQGO_U "u") (VLDRHQGSO_S "s") (VLDRHQGSO_U "u") (VLDRHQ_S "s") (VLDRHQ_U "u") (VLDRWQ_S "s") - (VLDRWQ_U "u")]) + (VLDRWQ_U "u") (VLDRDQGB_S "s") (VLDRDQGB_U "u") + (VLDRDQGO_S "s") (VLDRDQGO_U "u") (VLDRDQGSO_S "s") + (VLDRDQGSO_U "u") (VLDRWQGO_S "s") (VLDRWQGO_U "u") + (VLDRWQGSO_S "s") (VLDRWQGSO_U "u")]) (define_int_attr mode1 [(VCTP8Q "8") (VCTP16Q "16") (VCTP32Q "32") (VCTP64Q "64") (VCTP8Q_M "8") (VCTP16Q_M "16") @@ -590,6 +597,11 @@ (define_int_iterator VLDRHGSOQ [VLDRHQGSO_S VLDRHQGSO_U]) (define_int_iterator VLDRHQ [VLDRHQ_S VLDRHQ_U]) (define_int_iterator VLDRWQ [VLDRWQ_S VLDRWQ_U]) +(define_int_iterator VLDRDGBQ [VLDRDQGB_S VLDRDQGB_U]) +(define_int_iterator VLDRDGOQ [VLDRDQGO_S VLDRDQGO_U]) +(define_int_iterator VLDRDGSOQ [VLDRDQGSO_S VLDRDQGSO_U]) +(define_int_iterator VLDRWGOQ [VLDRWQGO_S VLDRWQGO_U]) +(define_int_iterator VLDRWGSOQ [VLDRWQGSO_S VLDRWQGSO_U]) (define_insn "*mve_mov" [(set (match_operand:MVE_types 0 "nonimmediate_operand" "=w,w,r,w,w,r,w,Us") @@ -8496,3 +8508,419 @@ emit_insn (gen_mve_vldrq_(operands[0],operands[1])); DONE; }) + +;; +;; [vldrdq_gather_base_s vldrdq_gather_base_u] +;; +(define_insn "mve_vldrdq_gather_base_v2di" + [(set (match_operand:V2DI 0 "s_register_operand" "=&w") + (unspec:V2DI [(match_operand:V2DI 1 "s_register_operand" "w") + (match_operand:SI 2 "immediate_operand" "i")] + VLDRDGBQ)) + ] + "TARGET_HAVE_MVE" +{ + rtx ops[3]; + ops[0] = operands[0]; + ops[1] = operands[1]; + ops[2] = operands[2]; + output_asm_insn ("vldrd.64\t%q0, [%q1, %2]",ops); + return ""; +} + [(set_attr "length" "4")]) + +;; +;; [vldrdq_gather_base_z_s vldrdq_gather_base_z_u] +;; +(define_insn "mve_vldrdq_gather_base_z_v2di" + [(set (match_operand:V2DI 0 "s_register_operand" "=&w") + (unspec:V2DI [(match_operand:V2DI 1 "s_register_operand" "w") + (match_operand:SI 2 "immediate_operand" "i") + (match_operand:HI 3 "vpr_register_operand" "Up")] + VLDRDGBQ)) + ] + "TARGET_HAVE_MVE" +{ + rtx ops[3]; + ops[0] = operands[0]; + ops[1] = operands[1]; + ops[2] = operands[2]; + output_asm_insn ("vpst\n\tvldrdt.u64\t%q0, [%q1, %2]",ops); + return ""; +} + [(set_attr "length" "8")]) + +;; +;; [vldrdq_gather_offset_s vldrdq_gather_offset_u] +;; +(define_insn "mve_vldrdq_gather_offset_v2di" + [(set (match_operand:V2DI 0 "s_register_operand" "=&w") + (unspec:V2DI [(match_operand:V2DI 1 "memory_operand" "Us") + (match_operand:V2DI 2 "s_register_operand" "w")] + VLDRDGOQ)) + ] + "TARGET_HAVE_MVE" +{ + rtx ops[3]; + ops[0] = operands[0]; + ops[1] = operands[1]; + ops[2] = operands[2]; + output_asm_insn ("vldrd.u64\t%q0, [%m1, %q2]",ops); + return ""; +} + [(set_attr "length" "4")]) + +;; +;; [vldrdq_gather_offset_z_s vldrdq_gather_offset_z_u] +;; +(define_insn "mve_vldrdq_gather_offset_z_v2di" + [(set (match_operand:V2DI 0 "s_register_operand" "=&w") + (unspec:V2DI [(match_operand:V2DI 1 "memory_operand" "Us") + (match_operand:V2DI 2 "s_register_operand" "w") + (match_operand:HI 3 "vpr_register_operand" "Up")] + VLDRDGOQ)) + ] + "TARGET_HAVE_MVE" +{ + rtx ops[3]; + ops[0] = operands[0]; + ops[1] = operands[1]; + ops[2] = operands[2]; + output_asm_insn ("vpst\n\tvldrdt.u64\t%q0, [%m1, %q2]",ops); + return ""; +} + [(set_attr "length" "8")]) + +;; +;; [vldrdq_gather_shifted_offset_s vldrdq_gather_shifted_offset_u] +;; +(define_insn "mve_vldrdq_gather_shifted_offset_v2di" + [(set (match_operand:V2DI 0 "s_register_operand" "=&w") + (unspec:V2DI [(match_operand:V2DI 1 "memory_operand" "Us") + (match_operand:V2DI 2 "s_register_operand" "w")] + VLDRDGSOQ)) + ] + "TARGET_HAVE_MVE" +{ + rtx ops[3]; + ops[0] = operands[0]; + ops[1] = operands[1]; + ops[2] = operands[2]; + output_asm_insn ("vldrd.u64\t%q0, [%m1, %q2, uxtw #3]",ops); + return ""; +} + [(set_attr "length" "4")]) + +;; +;; [vldrdq_gather_shifted_offset_z_s vldrdq_gather_shifted_offset_z_u] +;; +(define_insn "mve_vldrdq_gather_shifted_offset_z_v2di" + [(set (match_operand:V2DI 0 "s_register_operand" "=&w") + (unspec:V2DI [(match_operand:V2DI 1 "memory_operand" "Us") + (match_operand:V2DI 2 "s_register_operand" "w") + (match_operand:HI 3 "vpr_register_operand" "Up")] + VLDRDGSOQ)) + ] + "TARGET_HAVE_MVE" +{ + rtx ops[3]; + ops[0] = operands[0]; + ops[1] = operands[1]; + ops[2] = operands[2]; + output_asm_insn ("vpst\n\tvldrdt.u64\t%q0, [%m1, %q2, uxtw #3]",ops); + return ""; +} + [(set_attr "length" "8")]) + +;; +;; [vldrhq_gather_offset_f] +;; +(define_insn "mve_vldrhq_gather_offset_fv8hf" + [(set (match_operand:V8HF 0 "s_register_operand" "=&w") + (unspec:V8HF [(match_operand:V8HI 1 "memory_operand" "Us") + (match_operand:V8HI 2 "s_register_operand" "w")] + VLDRHQGO_F)) + ] + "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" +{ + rtx ops[3]; + ops[0] = operands[0]; + ops[1] = operands[1]; + ops[2] = operands[2]; + output_asm_insn ("vldrh.f16\t%q0, [%m1, %q2]",ops); + return ""; +} + [(set_attr "length" "4")]) + +;; +;; [vldrhq_gather_offset_z_f] +;; +(define_insn "mve_vldrhq_gather_offset_z_fv8hf" + [(set (match_operand:V8HF 0 "s_register_operand" "=&w") + (unspec:V8HF [(match_operand:V8HI 1 "memory_operand" "Us") + (match_operand:V8HI 2 "s_register_operand" "w") + (match_operand:HI 3 "vpr_register_operand" "Up")] + VLDRHQGO_F)) + ] + "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" +{ + rtx ops[4]; + ops[0] = operands[0]; + ops[1] = operands[1]; + ops[2] = operands[2]; + ops[3] = operands[3]; + output_asm_insn ("vpst\n\tvldrht.f16\t%q0, [%m1, %q2]",ops); + return ""; +} + [(set_attr "length" "8")]) + +;; +;; [vldrhq_gather_shifted_offset_f] +;; +(define_insn "mve_vldrhq_gather_shifted_offset_fv8hf" + [(set (match_operand:V8HF 0 "s_register_operand" "=&w") + (unspec:V8HF [(match_operand:V8HI 1 "memory_operand" "Us") + (match_operand:V8HI 2 "s_register_operand" "w")] + VLDRHQGSO_F)) + ] + "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" +{ + rtx ops[3]; + ops[0] = operands[0]; + ops[1] = operands[1]; + ops[2] = operands[2]; + output_asm_insn ("vldrh.f16\t%q0, [%m1, %q2, uxtw #1]",ops); + return ""; +} + [(set_attr "length" "4")]) + +;; +;; [vldrhq_gather_shifted_offset_z_f] +;; +(define_insn "mve_vldrhq_gather_shifted_offset_z_fv8hf" + [(set (match_operand:V8HF 0 "s_register_operand" "=&w") + (unspec:V8HF [(match_operand:V8HI 1 "memory_operand" "Us") + (match_operand:V8HI 2 "s_register_operand" "w") + (match_operand:HI 3 "vpr_register_operand" "Up")] + VLDRHQGSO_F)) + ] + "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" +{ + rtx ops[4]; + ops[0] = operands[0]; + ops[1] = operands[1]; + ops[2] = operands[2]; + ops[3] = operands[3]; + output_asm_insn ("vpst\n\tvldrht.f16\t%q0, [%m1, %q2, uxtw #1]",ops); + return ""; +} + [(set_attr "length" "8")]) + +;; +;; [vldrwq_gather_base_f] +;; +(define_insn "mve_vldrwq_gather_base_fv4sf" + [(set (match_operand:V4SF 0 "s_register_operand" "=&w") + (unspec:V4SF [(match_operand:V4SI 1 "s_register_operand" "w") + (match_operand:SI 2 "immediate_operand" "i")] + VLDRWQGB_F)) + ] + "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" +{ + rtx ops[3]; + ops[0] = operands[0]; + ops[1] = operands[1]; + ops[2] = operands[2]; + output_asm_insn ("vldrw.u32\t%q0, [%q1, %2]",ops); + return ""; +} + [(set_attr "length" "4")]) + +;; +;; [vldrwq_gather_base_z_f] +;; +(define_insn "mve_vldrwq_gather_base_z_fv4sf" + [(set (match_operand:V4SF 0 "s_register_operand" "=&w") + (unspec:V4SF [(match_operand:V4SI 1 "s_register_operand" "w") + (match_operand:SI 2 "immediate_operand" "i") + (match_operand:HI 3 "vpr_register_operand" "Up")] + VLDRWQGB_F)) + ] + "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" +{ + rtx ops[3]; + ops[0] = operands[0]; + ops[1] = operands[1]; + ops[2] = operands[2]; + output_asm_insn ("vpst\n\tvldrwt.u32\t%q0, [%q1, %2]",ops); + return ""; +} + [(set_attr "length" "8")]) + +;; +;; [vldrwq_gather_offset_f] +;; +(define_insn "mve_vldrwq_gather_offset_fv4sf" + [(set (match_operand:V4SF 0 "s_register_operand" "=&w") + (unspec:V4SF [(match_operand:V4SI 1 "memory_operand" "Us") + (match_operand:V4SI 2 "s_register_operand" "w")] + VLDRWQGO_F)) + ] + "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" +{ + rtx ops[3]; + ops[0] = operands[0]; + ops[1] = operands[1]; + ops[2] = operands[2]; + output_asm_insn ("vldrw.u32\t%q0, [%m1, %q2]",ops); + return ""; +} + [(set_attr "length" "4")]) + +;; +;; [vldrwq_gather_offset_s vldrwq_gather_offset_u] +;; +(define_insn "mve_vldrwq_gather_offset_v4si" + [(set (match_operand:V4SI 0 "s_register_operand" "=&w") + (unspec:V4SI [(match_operand:V4SI 1 "memory_operand" "Us") + (match_operand:V4SI 2 "s_register_operand" "w")] + VLDRWGOQ)) + ] + "TARGET_HAVE_MVE" +{ + rtx ops[3]; + ops[0] = operands[0]; + ops[1] = operands[1]; + ops[2] = operands[2]; + output_asm_insn ("vldrw.u32\t%q0, [%m1, %q2]",ops); + return ""; +} + [(set_attr "length" "4")]) + +;; +;; [vldrwq_gather_offset_z_f] +;; +(define_insn "mve_vldrwq_gather_offset_z_fv4sf" + [(set (match_operand:V4SF 0 "s_register_operand" "=&w") + (unspec:V4SF [(match_operand:V4SI 1 "memory_operand" "Us") + (match_operand:V4SI 2 "s_register_operand" "w") + (match_operand:HI 3 "vpr_register_operand" "Up")] + VLDRWQGO_F)) + ] + "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" +{ + rtx ops[4]; + ops[0] = operands[0]; + ops[1] = operands[1]; + ops[2] = operands[2]; + ops[3] = operands[3]; + output_asm_insn ("vpst\n\tvldrwt.u32\t%q0, [%m1, %q2]",ops); + return ""; +} + [(set_attr "length" "8")]) + +;; +;; [vldrwq_gather_offset_z_s vldrwq_gather_offset_z_u] +;; +(define_insn "mve_vldrwq_gather_offset_z_v4si" + [(set (match_operand:V4SI 0 "s_register_operand" "=&w") + (unspec:V4SI [(match_operand:V4SI 1 "memory_operand" "Us") + (match_operand:V4SI 2 "s_register_operand" "w") + (match_operand:HI 3 "vpr_register_operand" "Up")] + VLDRWGOQ)) + ] + "TARGET_HAVE_MVE" +{ + rtx ops[4]; + ops[0] = operands[0]; + ops[1] = operands[1]; + ops[2] = operands[2]; + ops[3] = operands[3]; + output_asm_insn ("vpst\n\tvldrwt.u32\t%q0, [%m1, %q2]",ops); + return ""; +} + [(set_attr "length" "8")]) + +;; +;; [vldrwq_gather_shifted_offset_f] +;; +(define_insn "mve_vldrwq_gather_shifted_offset_fv4sf" + [(set (match_operand:V4SF 0 "s_register_operand" "=&w") + (unspec:V4SF [(match_operand:V4SI 1 "memory_operand" "Us") + (match_operand:V4SI 2 "s_register_operand" "w")] + VLDRWQGSO_F)) + ] + "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" +{ + rtx ops[3]; + ops[0] = operands[0]; + ops[1] = operands[1]; + ops[2] = operands[2]; + output_asm_insn ("vldrw.u32\t%q0, [%m1, %q2, uxtw #2]",ops); + return ""; +} + [(set_attr "length" "4")]) + +;; +;; [vldrwq_gather_shifted_offset_s vldrwq_gather_shifted_offset_u] +;; +(define_insn "mve_vldrwq_gather_shifted_offset_v4si" + [(set (match_operand:V4SI 0 "s_register_operand" "=&w") + (unspec:V4SI [(match_operand:V4SI 1 "memory_operand" "Us") + (match_operand:V4SI 2 "s_register_operand" "w")] + VLDRWGSOQ)) + ] + "TARGET_HAVE_MVE" +{ + rtx ops[3]; + ops[0] = operands[0]; + ops[1] = operands[1]; + ops[2] = operands[2]; + output_asm_insn ("vldrw.u32\t%q0, [%m1, %q2, uxtw #2]",ops); + return ""; +} + [(set_attr "length" "4")]) + +;; +;; [vldrwq_gather_shifted_offset_z_f] +;; +(define_insn "mve_vldrwq_gather_shifted_offset_z_fv4sf" + [(set (match_operand:V4SF 0 "s_register_operand" "=&w") + (unspec:V4SF [(match_operand:V4SI 1 "memory_operand" "Us") + (match_operand:V4SI 2 "s_register_operand" "w") + (match_operand:HI 3 "vpr_register_operand" "Up")] + VLDRWQGSO_F)) + ] + "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" +{ + rtx ops[4]; + ops[0] = operands[0]; + ops[1] = operands[1]; + ops[2] = operands[2]; + ops[3] = operands[3]; + output_asm_insn ("vpst\n\tvldrwt.u32\t%q0, [%m1, %q2, uxtw #2]",ops); + return ""; +} + [(set_attr "length" "8")]) + +;; +;; [vldrwq_gather_shifted_offset_z_s vldrwq_gather_shifted_offset_z_u] +;; +(define_insn "mve_vldrwq_gather_shifted_offset_z_v4si" + [(set (match_operand:V4SI 0 "s_register_operand" "=&w") + (unspec:V4SI [(match_operand:V4SI 1 "memory_operand" "Us") + (match_operand:V4SI 2 "s_register_operand" "w") + (match_operand:HI 3 "vpr_register_operand" "Up")] + VLDRWGSOQ)) + ] + "TARGET_HAVE_MVE" +{ + rtx ops[4]; + ops[0] = operands[0]; + ops[1] = operands[1]; + ops[2] = operands[2]; + ops[3] = operands[3]; + output_asm_insn ("vpst\n\tvldrwt.u32\t%q0, [%m1, %q2, uxtw #2]",ops); + return ""; +} + [(set_attr "length" "8")]) diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index a86d0e66aac..3b689b15c47 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -43,6 +43,51 @@ * gcc.target/arm/mve/intrinsics/vldrwq_z_s32.c: Likewise. * gcc.target/arm/mve/intrinsics/vldrwq_z_u32.c: Likewise. +2020-03-18 Andre Vieira + Mihail Ionescu + Srinath Parvathaneni + + * gcc.target/arm/mve/intrinsics/vld1q_f16.c: New test. + * gcc.target/arm/mve/intrinsics/vld1q_f32.c: Likewise. + * gcc.target/arm/mve/intrinsics/vld1q_s16.c: Likewise. + * gcc.target/arm/mve/intrinsics/vld1q_s32.c: Likewise. + * gcc.target/arm/mve/intrinsics/vld1q_s8.c: Likewise. + * gcc.target/arm/mve/intrinsics/vld1q_u16.c: Likewise. + * gcc.target/arm/mve/intrinsics/vld1q_u32.c: Likewise. + * gcc.target/arm/mve/intrinsics/vld1q_u8.c: Likewise. + * gcc.target/arm/mve/intrinsics/vldrhq_f16.c: Likewise. + * gcc.target/arm/mve/intrinsics/vldrhq_gather_offset_s16.c: Likewise. + * gcc.target/arm/mve/intrinsics/vldrhq_gather_offset_s32.c: Likewise. + * gcc.target/arm/mve/intrinsics/vldrhq_gather_offset_u16.c: Likewise. + * gcc.target/arm/mve/intrinsics/vldrhq_gather_offset_u32.c: Likewise. + * gcc.target/arm/mve/intrinsics/vldrhq_gather_offset_z_s16.c: Likewise. + * gcc.target/arm/mve/intrinsics/vldrhq_gather_offset_z_s32.c: Likewise. + * gcc.target/arm/mve/intrinsics/vldrhq_gather_offset_z_u16.c: Likewise. + * gcc.target/arm/mve/intrinsics/vldrhq_gather_offset_z_u32.c: Likewise. + * gcc.target/arm/mve/intrinsics/vldrhq_gather_shifted_offset_s16.c: Likewise. + * gcc.target/arm/mve/intrinsics/vldrhq_gather_shifted_offset_s32.c: Likewise. + * gcc.target/arm/mve/intrinsics/vldrhq_gather_shifted_offset_u16.c: Likewise. + * gcc.target/arm/mve/intrinsics/vldrhq_gather_shifted_offset_u32.c: Likewise. + * gcc.target/arm/mve/intrinsics/vldrhq_gather_shifted_offset_z_s16.c: Likewise. + * gcc.target/arm/mve/intrinsics/vldrhq_gather_shifted_offset_z_s32.c: Likewise. + * gcc.target/arm/mve/intrinsics/vldrhq_gather_shifted_offset_z_u16.c: Likewise. + * gcc.target/arm/mve/intrinsics/vldrhq_gather_shifted_offset_z_u32.c: Likewise. + * gcc.target/arm/mve/intrinsics/vldrhq_s16.c: Likewise. + * gcc.target/arm/mve/intrinsics/vldrhq_s32.c: Likewise. + * gcc.target/arm/mve/intrinsics/vldrhq_u16.c: Likewise. + * gcc.target/arm/mve/intrinsics/vldrhq_u32.c: Likewise. + * gcc.target/arm/mve/intrinsics/vldrhq_z_f16.c: Likewise. + * gcc.target/arm/mve/intrinsics/vldrhq_z_s16.c: Likewise. + * gcc.target/arm/mve/intrinsics/vldrhq_z_s32.c: Likewise. + * gcc.target/arm/mve/intrinsics/vldrhq_z_u16.c: Likewise. + * gcc.target/arm/mve/intrinsics/vldrhq_z_u32.c: Likewise. + * gcc.target/arm/mve/intrinsics/vldrwq_f32.c: Likewise. + * gcc.target/arm/mve/intrinsics/vldrwq_s32.c: Likewise. + * gcc.target/arm/mve/intrinsics/vldrwq_u32.c: Likewise. + * gcc.target/arm/mve/intrinsics/vldrwq_z_f32.c: Likewise. + * gcc.target/arm/mve/intrinsics/vldrwq_z_s32.c: Likewise. + * gcc.target/arm/mve/intrinsics/vldrwq_z_u32.c: Likewise. + 2020-03-18 Andre Vieira Mihail Ionescu Srinath Parvathaneni diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrdq_gather_base_s64.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrdq_gather_base_s64.c new file mode 100644 index 00000000000..0116d35828d --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrdq_gather_base_s64.c @@ -0,0 +1,13 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-march=armv8.1-m.main+mve -mfloat-abi=hard -O2" } */ +/* { dg-skip-if "Skip if not auto" {*-*-*} {"-mfpu=*"} {"-mfpu=auto"} } */ + +#include "arm_mve.h" + +int64x2_t +foo (uint64x2_t addr) +{ + return vldrdq_gather_base_s64 (addr, 8); +} + +/* { dg-final { scan-assembler "vldrd.64" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrdq_gather_base_u64.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrdq_gather_base_u64.c new file mode 100644 index 00000000000..191e5deb4cf --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrdq_gather_base_u64.c @@ -0,0 +1,13 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-march=armv8.1-m.main+mve -mfloat-abi=hard -O2" } */ +/* { dg-skip-if "Skip if not auto" {*-*-*} {"-mfpu=*"} {"-mfpu=auto"} } */ + +#include "arm_mve.h" + +uint64x2_t +foo (uint64x2_t addr) +{ + return vldrdq_gather_base_u64 (addr, 8); +} + +/* { dg-final { scan-assembler "vldrd.64" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrdq_gather_base_z_s64.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrdq_gather_base_z_s64.c new file mode 100644 index 00000000000..9193b419b4e --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrdq_gather_base_z_s64.c @@ -0,0 +1,13 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-march=armv8.1-m.main+mve -mfloat-abi=hard -O2" } */ +/* { dg-skip-if "Skip if not auto" {*-*-*} {"-mfpu=*"} {"-mfpu=auto"} } */ + +#include "arm_mve.h" + +int64x2_t +foo (uint64x2_t addr, mve_pred16_t p) +{ + return vldrdq_gather_base_z_s64 (addr, 8, p); +} + +/* { dg-final { scan-assembler "vldrdt.u64" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrdq_gather_base_z_u64.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrdq_gather_base_z_u64.c new file mode 100644 index 00000000000..9f156751f55 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrdq_gather_base_z_u64.c @@ -0,0 +1,13 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-march=armv8.1-m.main+mve -mfloat-abi=hard -O2" } */ +/* { dg-skip-if "Skip if not auto" {*-*-*} {"-mfpu=*"} {"-mfpu=auto"} } */ + +#include "arm_mve.h" + +uint64x2_t +foo (uint64x2_t addr, mve_pred16_t p) +{ + return vldrdq_gather_base_z_u64 (addr, 8, p); +} + +/* { dg-final { scan-assembler "vldrdt.u64" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrdq_gather_offset_s64.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrdq_gather_offset_s64.c new file mode 100644 index 00000000000..00547a4b7c9 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrdq_gather_offset_s64.c @@ -0,0 +1,21 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-march=armv8.1-m.main+mve -mfloat-abi=hard -O2" } */ +/* { dg-skip-if "Skip if not auto" {*-*-*} {"-mfpu=*"} {"-mfpu=auto"} } */ + +#include "arm_mve.h" + +int64x2_t +foo (int64_t const * base, uint64x2_t offset) +{ + return vldrdq_gather_offset_s64 (base, offset); +} + +/* { dg-final { scan-assembler "vldrd.u64" } } */ + +int64x2_t +foo1 (int64_t const * base, uint64x2_t offset) +{ + return vldrdq_gather_offset (base, offset); +} + +/* { dg-final { scan-assembler "vldrd.u64" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrdq_gather_offset_u64.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrdq_gather_offset_u64.c new file mode 100644 index 00000000000..af59f950947 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrdq_gather_offset_u64.c @@ -0,0 +1,21 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-march=armv8.1-m.main+mve -mfloat-abi=hard -O2" } */ +/* { dg-skip-if "Skip if not auto" {*-*-*} {"-mfpu=*"} {"-mfpu=auto"} } */ + +#include "arm_mve.h" + +uint64x2_t +foo (uint64_t const * base, uint64x2_t offset) +{ + return vldrdq_gather_offset_u64 (base, offset); +} + +/* { dg-final { scan-assembler "vldrd.u64" } } */ + +uint64x2_t +foo1 (uint64_t const * base, uint64x2_t offset) +{ + return vldrdq_gather_offset (base, offset); +} + +/* { dg-final { scan-assembler "vldrd.u64" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrdq_gather_offset_z_s64.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrdq_gather_offset_z_s64.c new file mode 100644 index 00000000000..7818470d568 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrdq_gather_offset_z_s64.c @@ -0,0 +1,21 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-march=armv8.1-m.main+mve -mfloat-abi=hard -O2" } */ +/* { dg-skip-if "Skip if not auto" {*-*-*} {"-mfpu=*"} {"-mfpu=auto"} } */ + +#include "arm_mve.h" + +int64x2_t +foo (int64_t const * base, uint64x2_t offset, mve_pred16_t p) +{ + return vldrdq_gather_offset_z_s64 (base, offset, p); +} + +/* { dg-final { scan-assembler "vldrdt.u64" } } */ + +int64x2_t +foo1 (int64_t const * base, uint64x2_t offset, mve_pred16_t p) +{ + return vldrdq_gather_offset_z (base, offset, p); +} + +/* { dg-final { scan-assembler "vldrdt.u64" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrdq_gather_offset_z_u64.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrdq_gather_offset_z_u64.c new file mode 100644 index 00000000000..440941026ef --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrdq_gather_offset_z_u64.c @@ -0,0 +1,21 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-march=armv8.1-m.main+mve -mfloat-abi=hard -O2" } */ +/* { dg-skip-if "Skip if not auto" {*-*-*} {"-mfpu=*"} {"-mfpu=auto"} } */ + +#include "arm_mve.h" + +uint64x2_t +foo (uint64_t const * base, uint64x2_t offset, mve_pred16_t p) +{ + return vldrdq_gather_offset_z_u64 (base, offset, p); +} + +/* { dg-final { scan-assembler "vldrdt.u64" } } */ + +uint64x2_t +foo1 (uint64_t const * base, uint64x2_t offset, mve_pred16_t p) +{ + return vldrdq_gather_offset_z (base, offset, p); +} + +/* { dg-final { scan-assembler "vldrdt.u64" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrdq_gather_shifted_offset_s64.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrdq_gather_shifted_offset_s64.c new file mode 100644 index 00000000000..6dac7c2f89e --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrdq_gather_shifted_offset_s64.c @@ -0,0 +1,21 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-march=armv8.1-m.main+mve -mfloat-abi=hard -O2" } */ +/* { dg-skip-if "Skip if not auto" {*-*-*} {"-mfpu=*"} {"-mfpu=auto"} } */ + +#include "arm_mve.h" + +int64x2_t +foo (int64_t const * base, uint64x2_t offset) +{ + return vldrdq_gather_shifted_offset_s64 (base, offset); +} + +/* { dg-final { scan-assembler "vldrd.u64" } } */ + +int64x2_t +foo1 (int64_t const * base, uint64x2_t offset) +{ + return vldrdq_gather_shifted_offset (base, offset); +} + +/* { dg-final { scan-assembler "vldrd.u64" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrdq_gather_shifted_offset_u64.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrdq_gather_shifted_offset_u64.c new file mode 100644 index 00000000000..b33efc29626 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrdq_gather_shifted_offset_u64.c @@ -0,0 +1,21 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-march=armv8.1-m.main+mve -mfloat-abi=hard -O2" } */ +/* { dg-skip-if "Skip if not auto" {*-*-*} {"-mfpu=*"} {"-mfpu=auto"} } */ + +#include "arm_mve.h" + +uint64x2_t +foo (uint64_t const * base, uint64x2_t offset) +{ + return vldrdq_gather_shifted_offset_u64 (base, offset); +} + +/* { dg-final { scan-assembler "vldrd.u64" } } */ + +uint64x2_t +foo1 (uint64_t const * base, uint64x2_t offset) +{ + return vldrdq_gather_shifted_offset (base, offset); +} + +/* { dg-final { scan-assembler "vldrd.u64" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrdq_gather_shifted_offset_z_s64.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrdq_gather_shifted_offset_z_s64.c new file mode 100644 index 00000000000..9a0572e402d --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrdq_gather_shifted_offset_z_s64.c @@ -0,0 +1,21 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-march=armv8.1-m.main+mve -mfloat-abi=hard -O2" } */ +/* { dg-skip-if "Skip if not auto" {*-*-*} {"-mfpu=*"} {"-mfpu=auto"} } */ + +#include "arm_mve.h" + +int64x2_t +foo (int64_t const * base, uint64x2_t offset, mve_pred16_t p) +{ + return vldrdq_gather_shifted_offset_z_s64 (base, offset, p); +} + +/* { dg-final { scan-assembler "vldrdt.u64" } } */ + +int64x2_t +foo1 (int64_t const * base, uint64x2_t offset, mve_pred16_t p) +{ + return vldrdq_gather_shifted_offset_z (base, offset, p); +} + +/* { dg-final { scan-assembler "vldrdt.u64" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrdq_gather_shifted_offset_z_u64.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrdq_gather_shifted_offset_z_u64.c new file mode 100644 index 00000000000..50a2cd16196 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrdq_gather_shifted_offset_z_u64.c @@ -0,0 +1,21 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-march=armv8.1-m.main+mve -mfloat-abi=hard -O2" } */ +/* { dg-skip-if "Skip if not auto" {*-*-*} {"-mfpu=*"} {"-mfpu=auto"} } */ + +#include "arm_mve.h" + +uint64x2_t +foo (uint64_t const * base, uint64x2_t offset, mve_pred16_t p) +{ + return vldrdq_gather_shifted_offset_z_u64 (base, offset, p); +} + +/* { dg-final { scan-assembler "vldrdt.u64" } } */ + +uint64x2_t +foo1 (uint64_t const * base, uint64x2_t offset, mve_pred16_t p) +{ + return vldrdq_gather_shifted_offset_z (base, offset, p); +} + +/* { dg-final { scan-assembler "vldrdt.u64" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrhq_gather_offset_f16.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrhq_gather_offset_f16.c new file mode 100644 index 00000000000..a915959d016 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrhq_gather_offset_f16.c @@ -0,0 +1,21 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-march=armv8.1-m.main+mve.fp -mfloat-abi=hard -O2" } */ +/* { dg-skip-if "Skip if not auto" {*-*-*} {"-mfpu=*"} {"-mfpu=auto"} } */ + +#include "arm_mve.h" + +float16x8_t +foo (float16_t const * base, uint16x8_t offset) +{ + return vldrhq_gather_offset_f16 (base, offset); +} + +/* { dg-final { scan-assembler "vldrh.f16" } } */ + +float16x8_t +foo1 (float16_t const * base, uint16x8_t offset) +{ + return vldrhq_gather_offset (base, offset); +} + +/* { dg-final { scan-assembler "vldrh.f16" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrhq_gather_offset_z_f16.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrhq_gather_offset_z_f16.c new file mode 100644 index 00000000000..fdc6762c082 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrhq_gather_offset_z_f16.c @@ -0,0 +1,21 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-march=armv8.1-m.main+mve.fp -mfloat-abi=hard -O2" } */ +/* { dg-skip-if "Skip if not auto" {*-*-*} {"-mfpu=*"} {"-mfpu=auto"} } */ + +#include "arm_mve.h" + +float16x8_t +foo (float16_t const * base, uint16x8_t offset, mve_pred16_t p) +{ + return vldrhq_gather_offset_z_f16 (base, offset, p); +} + +/* { dg-final { scan-assembler "vldrht.f16" } } */ + +float16x8_t +foo1 (float16_t const * base, uint16x8_t offset, mve_pred16_t p) +{ + return vldrhq_gather_offset_z (base, offset, p); +} + +/* { dg-final { scan-assembler "vldrht.f16" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrhq_gather_shifted_offset_f16.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrhq_gather_shifted_offset_f16.c new file mode 100644 index 00000000000..ba9d0f2279a --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrhq_gather_shifted_offset_f16.c @@ -0,0 +1,21 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-march=armv8.1-m.main+mve.fp -mfloat-abi=hard -O2" } */ +/* { dg-skip-if "Skip if not auto" {*-*-*} {"-mfpu=*"} {"-mfpu=auto"} } */ + +#include "arm_mve.h" + +float16x8_t +foo (float16_t const * base, uint16x8_t offset) +{ + return vldrhq_gather_shifted_offset_f16 (base, offset); +} + +/* { dg-final { scan-assembler "vldrh.f16" } } */ + +float16x8_t +foo1 (float16_t const * base, uint16x8_t offset) +{ + return vldrhq_gather_shifted_offset (base, offset); +} + +/* { dg-final { scan-assembler "vldrh.f16" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrhq_gather_shifted_offset_z_f16.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrhq_gather_shifted_offset_z_f16.c new file mode 100644 index 00000000000..561669f0a2a --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrhq_gather_shifted_offset_z_f16.c @@ -0,0 +1,21 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-march=armv8.1-m.main+mve.fp -mfloat-abi=hard -O2" } */ +/* { dg-skip-if "Skip if not auto" {*-*-*} {"-mfpu=*"} {"-mfpu=auto"} } */ + +#include "arm_mve.h" + +float16x8_t +foo (float16_t const * base, uint16x8_t offset, mve_pred16_t p) +{ + return vldrhq_gather_shifted_offset_z_f16 (base, offset, p); +} + +/* { dg-final { scan-assembler "vldrht.f16" } } */ + +float16x8_t +foo1 (float16_t const * base, uint16x8_t offset, mve_pred16_t p) +{ + return vldrhq_gather_shifted_offset_z (base, offset, p); +} + +/* { dg-final { scan-assembler "vldrht.f16" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrwq_gather_base_f32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrwq_gather_base_f32.c new file mode 100644 index 00000000000..b398bab5e23 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrwq_gather_base_f32.c @@ -0,0 +1,13 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-march=armv8.1-m.main+mve.fp -mfloat-abi=hard -O2" } */ +/* { dg-skip-if "Skip if not auto" {*-*-*} {"-mfpu=*"} {"-mfpu=auto"} } */ + +#include "arm_mve.h" + +float32x4_t +foo (uint32x4_t addr) +{ + return vldrwq_gather_base_f32 (addr, 4); +} + +/* { dg-final { scan-assembler "vldrw.u32" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrwq_gather_base_z_f32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrwq_gather_base_z_f32.c new file mode 100644 index 00000000000..bc219c7f0b5 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrwq_gather_base_z_f32.c @@ -0,0 +1,13 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-march=armv8.1-m.main+mve.fp -mfloat-abi=hard -O2" } */ +/* { dg-skip-if "Skip if not auto" {*-*-*} {"-mfpu=*"} {"-mfpu=auto"} } */ + +#include "arm_mve.h" + +float32x4_t +foo (uint32x4_t addr, mve_pred16_t p) +{ + return vldrwq_gather_base_z_f32 (addr, 4, p); +} + +/* { dg-final { scan-assembler "vldrwt.u32" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrwq_gather_offset_f32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrwq_gather_offset_f32.c new file mode 100644 index 00000000000..2e3b94fbb0e --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrwq_gather_offset_f32.c @@ -0,0 +1,21 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-march=armv8.1-m.main+mve.fp -mfloat-abi=hard -O2" } */ +/* { dg-skip-if "Skip if not auto" {*-*-*} {"-mfpu=*"} {"-mfpu=auto"} } */ + +#include "arm_mve.h" + +float32x4_t +foo (float32_t const * base, uint32x4_t offset) +{ + return vldrwq_gather_offset_f32 (base, offset); +} + +/* { dg-final { scan-assembler "vldrw.u32" } } */ + +float32x4_t +foo1 (float32_t const * base, uint32x4_t offset) +{ + return vldrwq_gather_offset (base, offset); +} + +/* { dg-final { scan-assembler "vldrw.u32" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrwq_gather_offset_s32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrwq_gather_offset_s32.c new file mode 100644 index 00000000000..fe5d51ccfa5 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrwq_gather_offset_s32.c @@ -0,0 +1,21 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-march=armv8.1-m.main+mve -mfloat-abi=hard -O2" } */ +/* { dg-skip-if "Skip if not auto" {*-*-*} {"-mfpu=*"} {"-mfpu=auto"} } */ + +#include "arm_mve.h" + +int32x4_t +foo (int32_t const * base, uint32x4_t offset) +{ + return vldrwq_gather_offset_s32 (base, offset); +} + +/* { dg-final { scan-assembler "vldrw.u32" } } */ + +int32x4_t +foo1 (int32_t const * base, uint32x4_t offset) +{ + return vldrwq_gather_offset (base, offset); +} + +/* { dg-final { scan-assembler "vldrw.u32" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrwq_gather_offset_u32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrwq_gather_offset_u32.c new file mode 100644 index 00000000000..89ec3981300 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrwq_gather_offset_u32.c @@ -0,0 +1,21 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-march=armv8.1-m.main+mve -mfloat-abi=hard -O2" } */ +/* { dg-skip-if "Skip if not auto" {*-*-*} {"-mfpu=*"} {"-mfpu=auto"} } */ + +#include "arm_mve.h" + +uint32x4_t +foo (uint32_t const * base, uint32x4_t offset) +{ + return vldrwq_gather_offset_u32 (base, offset); +} + +/* { dg-final { scan-assembler "vldrw.u32" } } */ + +uint32x4_t +foo1 (uint32_t const * base, uint32x4_t offset) +{ + return vldrwq_gather_offset (base, offset); +} + +/* { dg-final { scan-assembler "vldrw.u32" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrwq_gather_offset_z_f32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrwq_gather_offset_z_f32.c new file mode 100644 index 00000000000..c85a0c2c7f8 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrwq_gather_offset_z_f32.c @@ -0,0 +1,21 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-march=armv8.1-m.main+mve.fp -mfloat-abi=hard -O2" } */ +/* { dg-skip-if "Skip if not auto" {*-*-*} {"-mfpu=*"} {"-mfpu=auto"} } */ + +#include "arm_mve.h" + +float32x4_t +foo (float32_t const * base, uint32x4_t offset, mve_pred16_t p) +{ + return vldrwq_gather_offset_z_f32 (base, offset, p); +} + +/* { dg-final { scan-assembler "vldrwt.u32" } } */ + +float32x4_t +foo1 (float32_t const * base, uint32x4_t offset, mve_pred16_t p) +{ + return vldrwq_gather_offset_z (base, offset, p); +} + +/* { dg-final { scan-assembler "vldrwt.u32" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrwq_gather_offset_z_s32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrwq_gather_offset_z_s32.c new file mode 100644 index 00000000000..e128b434bd6 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrwq_gather_offset_z_s32.c @@ -0,0 +1,21 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-march=armv8.1-m.main+mve -mfloat-abi=hard -O2" } */ +/* { dg-skip-if "Skip if not auto" {*-*-*} {"-mfpu=*"} {"-mfpu=auto"} } */ + +#include "arm_mve.h" + +int32x4_t +foo (int32_t const * base, uint32x4_t offset, mve_pred16_t p) +{ + return vldrwq_gather_offset_z_s32 (base, offset, p); +} + +/* { dg-final { scan-assembler "vldrwt.u32" } } */ + +int32x4_t +foo1 (int32_t const * base, uint32x4_t offset, mve_pred16_t p) +{ + return vldrwq_gather_offset_z (base, offset, p); +} + +/* { dg-final { scan-assembler "vldrwt.u32" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrwq_gather_offset_z_u32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrwq_gather_offset_z_u32.c new file mode 100644 index 00000000000..b183b9afefd --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrwq_gather_offset_z_u32.c @@ -0,0 +1,21 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-march=armv8.1-m.main+mve -mfloat-abi=hard -O2" } */ +/* { dg-skip-if "Skip if not auto" {*-*-*} {"-mfpu=*"} {"-mfpu=auto"} } */ + +#include "arm_mve.h" + +uint32x4_t +foo (uint32_t const * base, uint32x4_t offset, mve_pred16_t p) +{ + return vldrwq_gather_offset_z_u32 (base, offset, p); +} + +/* { dg-final { scan-assembler "vldrwt.u32" } } */ + +uint32x4_t +foo1 (uint32_t const * base, uint32x4_t offset, mve_pred16_t p) +{ + return vldrwq_gather_offset_z (base, offset, p); +} + +/* { dg-final { scan-assembler "vldrwt.u32" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrwq_gather_shifted_offset_f32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrwq_gather_shifted_offset_f32.c new file mode 100644 index 00000000000..67a42f72dd2 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrwq_gather_shifted_offset_f32.c @@ -0,0 +1,21 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-march=armv8.1-m.main+mve.fp -mfloat-abi=hard -O2" } */ +/* { dg-skip-if "Skip if not auto" {*-*-*} {"-mfpu=*"} {"-mfpu=auto"} } */ + +#include "arm_mve.h" + +float32x4_t +foo (float32_t const * base, uint32x4_t offset) +{ + return vldrwq_gather_shifted_offset_f32 (base, offset); +} + +/* { dg-final { scan-assembler "vldrw.u32" } } */ + +float32x4_t +foo1 (float32_t const * base, uint32x4_t offset) +{ + return vldrwq_gather_shifted_offset (base, offset); +} + +/* { dg-final { scan-assembler "vldrw.u32" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrwq_gather_shifted_offset_s32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrwq_gather_shifted_offset_s32.c new file mode 100644 index 00000000000..283d0a512e5 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrwq_gather_shifted_offset_s32.c @@ -0,0 +1,21 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-march=armv8.1-m.main+mve -mfloat-abi=hard -O2" } */ +/* { dg-skip-if "Skip if not auto" {*-*-*} {"-mfpu=*"} {"-mfpu=auto"} } */ + +#include "arm_mve.h" + +int32x4_t +foo (int32_t const * base, uint32x4_t offset) +{ + return vldrwq_gather_shifted_offset_s32 (base, offset); +} + +/* { dg-final { scan-assembler "vldrw.u32" } } */ + +int32x4_t +foo1 (int32_t const * base, uint32x4_t offset) +{ + return vldrwq_gather_shifted_offset (base, offset); +} + +/* { dg-final { scan-assembler "vldrw.u32" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrwq_gather_shifted_offset_u32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrwq_gather_shifted_offset_u32.c new file mode 100644 index 00000000000..4783fae28e6 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrwq_gather_shifted_offset_u32.c @@ -0,0 +1,21 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-march=armv8.1-m.main+mve -mfloat-abi=hard -O2" } */ +/* { dg-skip-if "Skip if not auto" {*-*-*} {"-mfpu=*"} {"-mfpu=auto"} } */ + +#include "arm_mve.h" + +uint32x4_t +foo (uint32_t const * base, uint32x4_t offset) +{ + return vldrwq_gather_shifted_offset_u32 (base, offset); +} + +/* { dg-final { scan-assembler "vldrw.u32" } } */ + +uint32x4_t +foo1 (uint32_t const * base, uint32x4_t offset) +{ + return vldrwq_gather_shifted_offset (base, offset); +} + +/* { dg-final { scan-assembler "vldrw.u32" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrwq_gather_shifted_offset_z_f32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrwq_gather_shifted_offset_z_f32.c new file mode 100644 index 00000000000..c1443854aee --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrwq_gather_shifted_offset_z_f32.c @@ -0,0 +1,21 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-march=armv8.1-m.main+mve.fp -mfloat-abi=hard -O2" } */ +/* { dg-skip-if "Skip if not auto" {*-*-*} {"-mfpu=*"} {"-mfpu=auto"} } */ + +#include "arm_mve.h" + +float32x4_t +foo (float32_t const * base, uint32x4_t offset, mve_pred16_t p) +{ + return vldrwq_gather_shifted_offset_z_f32 (base, offset, p); +} + +/* { dg-final { scan-assembler "vldrwt.u32" } } */ + +float32x4_t +foo1 (float32_t const * base, uint32x4_t offset, mve_pred16_t p) +{ + return vldrwq_gather_shifted_offset_z (base, offset, p); +} + +/* { dg-final { scan-assembler "vldrwt.u32" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrwq_gather_shifted_offset_z_s32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrwq_gather_shifted_offset_z_s32.c new file mode 100644 index 00000000000..b537998da59 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrwq_gather_shifted_offset_z_s32.c @@ -0,0 +1,21 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-march=armv8.1-m.main+mve -mfloat-abi=hard -O2" } */ +/* { dg-skip-if "Skip if not auto" {*-*-*} {"-mfpu=*"} {"-mfpu=auto"} } */ + +#include "arm_mve.h" + +int32x4_t +foo (int32_t const * base, uint32x4_t offset, mve_pred16_t p) +{ + return vldrwq_gather_shifted_offset_z_s32 (base, offset, p); +} + +/* { dg-final { scan-assembler "vldrwt.u32" } } */ + +int32x4_t +foo1 (int32_t const * base, uint32x4_t offset, mve_pred16_t p) +{ + return vldrwq_gather_shifted_offset_z (base, offset, p); +} + +/* { dg-final { scan-assembler "vldrwt.u32" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrwq_gather_shifted_offset_z_u32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrwq_gather_shifted_offset_z_u32.c new file mode 100644 index 00000000000..a3d4fde827c --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrwq_gather_shifted_offset_z_u32.c @@ -0,0 +1,21 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-march=armv8.1-m.main+mve -mfloat-abi=hard -O2" } */ +/* { dg-skip-if "Skip if not auto" {*-*-*} {"-mfpu=*"} {"-mfpu=auto"} } */ + +#include "arm_mve.h" + +uint32x4_t +foo (uint32_t const * base, uint32x4_t offset, mve_pred16_t p) +{ + return vldrwq_gather_shifted_offset_z_u32 (base, offset, p); +} + +/* { dg-final { scan-assembler "vldrwt.u32" } } */ + +uint32x4_t +foo1 (uint32_t const * base, uint32x4_t offset, mve_pred16_t p) +{ + return vldrwq_gather_shifted_offset_z (base, offset, p); +} + +/* { dg-final { scan-assembler "vldrwt.u32" } } */ -- 2.30.2