From 53cd0ac643ed1fcb507ceb01dc531da1868f88d7 Mon Sep 17 00:00:00 2001 From: Kyrylo Tkachov Date: Thu, 26 Sep 2019 10:46:14 +0000 Subject: [PATCH] [arm] Implement non-GE-setting SIMD32 intrinsics This patch is part of a series to implement the SIMD32 ACLE intrinsics [1]. The interesting parts implementation-wise involve adding support for setting and reading the Q bit for saturation and the GE-bits for the packed SIMD instructions. That will come in a later patch. For now, this patch implements the other intrinsics that don't need anything special ; just a mapping from arm_acle.h function to builtin to RTL expander+unspec. I've compressed as many as I could with iterators so that we end up needing only 3 new define_insns. Bootstrapped and tested on arm-none-linux-gnueabihf. [1] https://developer.arm.com/docs/101028/latest/data-processing-intrinsics * config/arm/arm.md (arm_): New define_insn. (arm_xtb16): Likewise. (arm_usada8): Likewise. * config/arm/arm_acle.h (__qadd8, __qsub8, __shadd8, __shsub8, __uhadd8, __uhsub8, __uqadd8, __uqsub8, __qadd16, __qasx, __qsax, __qsub16, __shadd16, __shasx, __shsax, __shsub16, __uhadd16, __uhasx, __uhsax, __uhsub16, __uqadd16, __uqasx, __uqsax, __uqsub16, __sxtab16, __sxtb16, __uxtab16, __uxtb16): Define. * config/arm/arm_acle_builtins.def: Define builtins for the above. * config/arm/unspecs.md: Define unspecs for the above. * config/arm/iterators.md (SIMD32_NOGE_BINOP): New int_iterator. (USXTB16): Likewise. (simd32_op): New int_attribute. (sup): Handle UNSPEC_SXTB16, UNSPEC_UXTB16. * doc/sourcebuild.exp (arm_simd32_ok): Document. * lib/target-supports.exp (check_effective_target_arm_simd32_ok_nocache): New procedure. (check_effective_target_arm_simd32_ok): Likewise. (add_options_for_arm_simd32): Likewise. * gcc.target/arm/acle/simd32.c: New test. From-SVN: r276146 --- gcc/ChangeLog | 18 ++ gcc/config/arm/arm.md | 30 +++ gcc/config/arm/arm_acle.h | 232 +++++++++++++++++++ gcc/config/arm/arm_acle_builtins.def | 33 +++ gcc/config/arm/iterators.md | 30 +++ gcc/config/arm/unspecs.md | 34 +++ gcc/doc/sourcebuild.texi | 7 + gcc/testsuite/ChangeLog | 8 + gcc/testsuite/gcc.target/arm/acle/simd32.c | 246 +++++++++++++++++++++ gcc/testsuite/lib/target-supports.exp | 39 ++++ 10 files changed, 677 insertions(+) create mode 100644 gcc/testsuite/gcc.target/arm/acle/simd32.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 6bbcfe5ab20..fad363b93db 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,21 @@ +2019-09-26 Kyrylo Tkachov + + * config/arm/arm.md (arm_): New define_insn. + (arm_xtb16): Likewise. + (arm_usada8): Likewise. + * config/arm/arm_acle.h (__qadd8, __qsub8, __shadd8, __shsub8, + __uhadd8, __uhsub8, __uqadd8, __uqsub8, __qadd16, __qasx, __qsax, + __qsub16, __shadd16, __shasx, __shsax, __shsub16, __uhadd16, __uhasx, + __uhsax, __uhsub16, __uqadd16, __uqasx, __uqsax, __uqsub16, __sxtab16, + __sxtb16, __uxtab16, __uxtb16): Define. + * config/arm/arm_acle_builtins.def: Define builtins for the above. + * config/arm/unspecs.md: Define unspecs for the above. + * config/arm/iterators.md (SIMD32_NOGE_BINOP): New int_iterator. + (USXTB16): Likewise. + (simd32_op): New int_attribute. + (sup): Handle UNSPEC_SXTB16, UNSPEC_UXTB16. + * doc/sourcebuild.exp (arm_simd32_ok): Document. + 2019-09-26 Martin Jambor * ipa-sra.c (verify_splitting_accesses): Fix quoting in a call to diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md index d607f88cb05..99e4acde55e 100644 --- a/gcc/config/arm/arm.md +++ b/gcc/config/arm/arm.md @@ -5058,6 +5058,36 @@ (set_attr "predicable" "yes")] ) +(define_insn "arm_xtb16" + [(set (match_operand:SI 0 "s_register_operand" "=r") + (unspec:SI + [(match_operand:SI 1 "s_register_operand" "r")] USXTB16))] + "TARGET_INT_SIMD" + "xtb16%?\\t%0, %1" + [(set_attr "predicable" "yes") + (set_attr "type" "alu_dsp_reg")]) + +(define_insn "arm_" + [(set (match_operand:SI 0 "s_register_operand" "=r") + (unspec:SI + [(match_operand:SI 1 "s_register_operand" "r") + (match_operand:SI 2 "s_register_operand" "r")] SIMD32_NOGE_BINOP))] + "TARGET_INT_SIMD" + "%?\\t%0, %1, %2" + [(set_attr "predicable" "yes") + (set_attr "type" "alu_dsp_reg")]) + +(define_insn "arm_usada8" + [(set (match_operand:SI 0 "s_register_operand" "=r") + (unspec:SI + [(match_operand:SI 1 "s_register_operand" "r") + (match_operand:SI 2 "s_register_operand" "r") + (match_operand:SI 3 "s_register_operand" "r")] UNSPEC_USADA8))] + "TARGET_INT_SIMD" + "usada8%?\\t%0, %1, %2, %3" + [(set_attr "predicable" "yes") + (set_attr "type" "alu_dsp_reg")]) + (define_expand "extendsfdf2" [(set (match_operand:DF 0 "s_register_operand") (float_extend:DF (match_operand:SF 1 "s_register_operand")))] diff --git a/gcc/config/arm/arm_acle.h b/gcc/config/arm/arm_acle.h index 6857ab1787d..9c6f12d5566 100644 --- a/gcc/config/arm/arm_acle.h +++ b/gcc/config/arm/arm_acle.h @@ -173,6 +173,238 @@ __arm_mrrc2 (const unsigned int __coproc, const unsigned int __opc1, #endif /* __ARM_ARCH >= 5. */ #endif /* (!__thumb__ || __thumb2__) && __ARM_ARCH >= 4. */ +#ifdef __ARM_FEATURE_SIMD32 +typedef int32_t int16x2_t; +typedef uint32_t uint16x2_t; +typedef int32_t int8x4_t; +typedef uint32_t uint8x4_t; + +__extension__ extern __inline int16x2_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__sxtab16 (int16x2_t __a, int8x4_t __b) +{ + return __builtin_arm_sxtab16 (__a, __b); +} + +__extension__ extern __inline int16x2_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__sxtb16 (int8x4_t __a) +{ + return __builtin_arm_sxtb16 (__a); +} + +__extension__ extern __inline uint16x2_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__uxtab16 (uint16x2_t __a, uint8x4_t __b) +{ + return __builtin_arm_uxtab16 (__a, __b); +} + +__extension__ extern __inline uint16x2_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__uxtb16 (uint8x4_t __a) +{ + return __builtin_arm_uxtb16 (__a); +} + +__extension__ extern __inline int8x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__qadd8 (int8x4_t __a, int8x4_t __b) +{ + return __builtin_arm_qadd8 (__a, __b); +} + +__extension__ extern __inline int8x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__qsub8 (int8x4_t __a, int8x4_t __b) +{ + return __builtin_arm_qsub8 (__a, __b); +} + +__extension__ extern __inline int8x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__shadd8 (int8x4_t __a, int8x4_t __b) +{ + return __builtin_arm_shadd8 (__a, __b); +} + +__extension__ extern __inline int8x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__shsub8 (int8x4_t __a, int8x4_t __b) +{ + return __builtin_arm_shsub8 (__a, __b); +} + +__extension__ extern __inline uint8x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__uhadd8 (uint8x4_t __a, uint8x4_t __b) +{ + return __builtin_arm_uhadd8 (__a, __b); +} + +__extension__ extern __inline uint8x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__uhsub8 (uint8x4_t __a, uint8x4_t __b) +{ + return __builtin_arm_uhsub8 (__a, __b); +} + +__extension__ extern __inline uint8x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__uqadd8 (uint8x4_t __a, uint8x4_t __b) +{ + return __builtin_arm_uqadd8 (__a, __b); +} + +__extension__ extern __inline uint8x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__uqsub8 (uint8x4_t __a, uint8x4_t __b) +{ + return __builtin_arm_uqsub8 (__a, __b); +} + +__extension__ extern __inline int16x2_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__qadd16 (int16x2_t __a, int16x2_t __b) +{ + return __builtin_arm_qadd16 (__a, __b); +} + +__extension__ extern __inline int16x2_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__qasx (int16x2_t __a, int16x2_t __b) +{ + return __builtin_arm_qasx (__a, __b); +} + +__extension__ extern __inline int16x2_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__qsax (int16x2_t __a, int16x2_t __b) +{ + return __builtin_arm_qsax (__a, __b); +} + +__extension__ extern __inline int16x2_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__qsub16 (int16x2_t __a, int16x2_t __b) +{ + return __builtin_arm_qsub16 (__a, __b); +} + +__extension__ extern __inline int16x2_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__shadd16 (int16x2_t __a, int16x2_t __b) +{ + return __builtin_arm_shadd16 (__a, __b); +} + +__extension__ extern __inline int16x2_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__shasx (int16x2_t __a, int16x2_t __b) +{ + return __builtin_arm_shasx (__a, __b); +} + +__extension__ extern __inline int16x2_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__shsax (int16x2_t __a, int16x2_t __b) +{ + return __builtin_arm_shsax (__a, __b); +} + +__extension__ extern __inline int16x2_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__shsub16 (int16x2_t __a, int16x2_t __b) +{ + return __builtin_arm_shsub16 (__a, __b); +} + +__extension__ extern __inline uint16x2_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__uhadd16 (uint16x2_t __a, uint16x2_t __b) +{ + return __builtin_arm_uhadd16 (__a, __b); +} + +__extension__ extern __inline uint16x2_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__uhasx (uint16x2_t __a, uint16x2_t __b) +{ + return __builtin_arm_uhasx (__a, __b); +} + +__extension__ extern __inline uint16x2_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__uhsax (uint16x2_t __a, uint16x2_t __b) +{ + return __builtin_arm_uhsax (__a, __b); +} + +__extension__ extern __inline uint16x2_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__uhsub16 (uint16x2_t __a, uint16x2_t __b) +{ + return __builtin_arm_uhsub16 (__a, __b); +} + +__extension__ extern __inline uint16x2_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__uqadd16 (uint16x2_t __a, uint16x2_t __b) +{ + return __builtin_arm_uqadd16 (__a, __b); +} + +__extension__ extern __inline uint16x2_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__uqasx (uint16x2_t __a, uint16x2_t __b) +{ + return __builtin_arm_uqasx (__a, __b); +} + +__extension__ extern __inline uint16x2_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__uqsax (uint16x2_t __a, uint16x2_t __b) +{ + return __builtin_arm_uqsax (__a, __b); +} + +__extension__ extern __inline uint16x2_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__uqsub16 (uint16x2_t __a, uint16x2_t __b) +{ + return __builtin_arm_uqsub16 (__a, __b); +} + +__extension__ extern __inline int32_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__smusd (int16x2_t __a, int16x2_t __b) +{ + return __builtin_arm_smusd (__a, __b); +} + +__extension__ extern __inline int32_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__smusdx (int16x2_t __a, int16x2_t __b) +{ + return __builtin_arm_smusdx (__a, __b); +} + +__extension__ extern __inline uint32_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__usad8 (uint8x4_t __a, uint8x4_t __b) +{ + return __builtin_arm_usad8 (__a, __b); +} + +__extension__ extern __inline uint32_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__usada8 (uint8x4_t __a, uint8x4_t __b, uint32_t __c) +{ + return __builtin_arm_usada8 (__a, __b, __c); +} + +#endif + #pragma GCC push_options #ifdef __ARM_FEATURE_CRC32 #ifdef __ARM_FP diff --git a/gcc/config/arm/arm_acle_builtins.def b/gcc/config/arm/arm_acle_builtins.def index b2438d66da2..c675fc46dae 100644 --- a/gcc/config/arm/arm_acle_builtins.def +++ b/gcc/config/arm/arm_acle_builtins.def @@ -42,3 +42,36 @@ VAR1 (MCRR, mcrr, void) VAR1 (MCRR, mcrr2, void) VAR1 (MRRC, mrrc, di) VAR1 (MRRC, mrrc2, di) + +VAR1 (BINOP, sxtab16, si) +VAR1 (UBINOP, uxtab16, si) +VAR1 (UNOP, sxtb16, si) +VAR1 (BSWAP, uxtb16, si) +VAR1 (BINOP, qadd8, si) +VAR1 (BINOP, qsub8, si) +VAR1 (BINOP, shadd8, si) +VAR1 (BINOP, shsub8, si) +VAR1 (UBINOP, uhadd8, si) +VAR1 (UBINOP, uhsub8, si) +VAR1 (UBINOP, uqadd8, si) +VAR1 (UBINOP, uqsub8, si) +VAR1 (BINOP, qadd16, si) +VAR1 (BINOP, qasx, si) +VAR1 (BINOP, qsax, si) +VAR1 (BINOP, qsub16, si) +VAR1 (BINOP, shadd16, si) +VAR1 (BINOP, shasx, si) +VAR1 (BINOP, shsax, si) +VAR1 (BINOP, shsub16, si) +VAR1 (UBINOP, uhadd16, si) +VAR1 (UBINOP, uhasx, si) +VAR1 (UBINOP, uhsax, si) +VAR1 (UBINOP, uhsub16, si) +VAR1 (UBINOP, uqadd16, si) +VAR1 (UBINOP, uqasx, si) +VAR1 (UBINOP, uqsax, si) +VAR1 (UBINOP, uqsub16, si) +VAR1 (BINOP, smusd, si) +VAR1 (BINOP, smusdx, si) +VAR1 (UBINOP, usad8, si) +VAR1 (UBINOP, usada8, si) diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md index 2d8ef3f5fbd..538f5bf6b01 100644 --- a/gcc/config/arm/iterators.md +++ b/gcc/config/arm/iterators.md @@ -430,6 +430,19 @@ (define_int_iterator CRYPTO_SELECTING [UNSPEC_SHA1C UNSPEC_SHA1M UNSPEC_SHA1P]) +(define_int_iterator USXTB16 [UNSPEC_SXTB16 UNSPEC_UXTB16]) +(define_int_iterator SIMD32_NOGE_BINOP + [UNSPEC_QADD8 UNSPEC_QSUB8 UNSPEC_SHADD8 + UNSPEC_SHSUB8 UNSPEC_UHADD8 UNSPEC_UHSUB8 + UNSPEC_UQADD8 UNSPEC_UQSUB8 + UNSPEC_QADD16 UNSPEC_QASX UNSPEC_QSAX + UNSPEC_QSUB16 UNSPEC_SHADD16 UNSPEC_SHASX + UNSPEC_SHSAX UNSPEC_SHSUB16 UNSPEC_UHADD16 + UNSPEC_UHASX UNSPEC_UHSAX UNSPEC_UHSUB16 + UNSPEC_UQADD16 UNSPEC_UQASX UNSPEC_UQSAX + UNSPEC_UQSUB16 UNSPEC_SMUSD UNSPEC_SMUSDX + UNSPEC_SXTAB16 UNSPEC_UXTAB16 UNSPEC_USAD8]) + (define_int_iterator VQRDMLH_AS [UNSPEC_VQRDMLAH UNSPEC_VQRDMLSH]) (define_int_iterator VFM_LANE_AS [UNSPEC_VFMA_LANE UNSPEC_VFMS_LANE]) @@ -835,6 +848,7 @@ ;; Mapping between vector UNSPEC operations and the signed ('s'), ;; unsigned ('u'), poly ('p') or float ('f') nature of their data type. (define_int_attr sup [ + (UNSPEC_SXTB16 "s") (UNSPEC_UXTB16 "u") (UNSPEC_VADDL_S "s") (UNSPEC_VADDL_U "u") (UNSPEC_VADDW_S "s") (UNSPEC_VADDW_U "u") (UNSPEC_VRHADD_S "s") (UNSPEC_VRHADD_U "u") @@ -1023,6 +1037,22 @@ (UNSPEC_VCMLA180 "180") (UNSPEC_VCMLA270 "270")]) +(define_int_attr simd32_op [(UNSPEC_QADD8 "qadd8") (UNSPEC_QSUB8 "qsub8") + (UNSPEC_SHADD8 "shadd8") (UNSPEC_SHSUB8 "shsub8") + (UNSPEC_UHADD8 "uhadd8") (UNSPEC_UHSUB8 "uhsub8") + (UNSPEC_UQADD8 "uqadd8") (UNSPEC_UQSUB8 "uqsub8") + (UNSPEC_QADD16 "qadd16") (UNSPEC_QASX "qasx") + (UNSPEC_QSAX "qsax") (UNSPEC_QSUB16 "qsub16") + (UNSPEC_SHADD16 "shadd16") (UNSPEC_SHASX "shasx") + (UNSPEC_SHSAX "shsax") (UNSPEC_SHSUB16 "shsub16") + (UNSPEC_UHADD16 "uhadd16") (UNSPEC_UHASX "uhasx") + (UNSPEC_UHSAX "uhsax") (UNSPEC_UHSUB16 "uhsub16") + (UNSPEC_UQADD16 "uqadd16") (UNSPEC_UQASX "uqasx") + (UNSPEC_UQSAX "uqsax") (UNSPEC_UQSUB16 "uqsub16") + (UNSPEC_SMUSD "smusd") (UNSPEC_SMUSDX "smusdx") + (UNSPEC_SXTAB16 "sxtab16") (UNSPEC_UXTAB16 "uxtab16") + (UNSPEC_USAD8 "usad8")]) + ;; Both kinds of return insn. (define_code_iterator RETURNS [return simple_return]) (define_code_attr return_str [(return "") (simple_return "simple_")]) diff --git a/gcc/config/arm/unspecs.md b/gcc/config/arm/unspecs.md index a9f99d04a8a..08a6cd77ce0 100644 --- a/gcc/config/arm/unspecs.md +++ b/gcc/config/arm/unspecs.md @@ -90,8 +90,42 @@ UNSPEC_SP_TEST ; Represent the testing of stack protector's canary ; against the guard. UNSPEC_PIC_RESTORE ; Use to restore fdpic register + + UNSPEC_SXTAB16 ; Represent the SXTAB16 operation. + UNSPEC_UXTAB16 ; Represent the UXTAB16 operation. + UNSPEC_SXTB16 ; Represent the SXTB16 operation. + UNSPEC_UXTB16 ; Represent the UXTB16 operation. + UNSPEC_QADD8 ; Represent the QADD8 operation. + UNSPEC_QSUB8 ; Represent the QSUB8 operation. + UNSPEC_SHADD8 ; Represent the SHADD8 operation. + UNSPEC_SHSUB8 ; Represent the SHSUB8 operation. + UNSPEC_UHADD8 ; Represent the UHADD8 operation. + UNSPEC_UHSUB8 ; Represent the UHSUB8 operation. + UNSPEC_UQADD8 ; Represent the UQADD8 operation. + UNSPEC_UQSUB8 ; Represent the UQSUB8 operation. + UNSPEC_QADD16 ; Represent the QADD16 operation. + UNSPEC_QASX ; Represent the QASX operation. + UNSPEC_QSAX ; Represent the QSAX operation. + UNSPEC_QSUB16 ; Represent the QSUB16 operation. + UNSPEC_SHADD16 ; Represent the SHADD16 operation. + UNSPEC_SHASX ; Represent the SHASX operation. + UNSPEC_SHSAX ; Represent the SSAX operation. + UNSPEC_SHSUB16 ; Represent the SHSUB16 operation. + UNSPEC_UHADD16 ; Represent the UHADD16 operation. + UNSPEC_UHASX ; Represent the UHASX operation. + UNSPEC_UHSAX ; Represent the USAX operation. + UNSPEC_UHSUB16 ; Represent the UHSUB16 operation. + UNSPEC_UQADD16 ; Represent the UQADD16 operation. + UNSPEC_UQASX ; Represent the UQASX operation. + UNSPEC_UQSAX ; Represent the UQSAX operation. + UNSPEC_UQSUB16 ; Represent the UQSUB16 operation. + UNSPEC_SMUSD ; Represent the SMUSD operation. + UNSPEC_SMUSDX ; Represent the SMUSDX operation. + UNSPEC_USAD8 ; Represent the USAD8 operation. + UNSPEC_USADA8 ; Represent the USADA8 operation. ]) + (define_c_enum "unspec" [ UNSPEC_WADDC ; Used by the intrinsic form of the iWMMXt WADDC instruction. UNSPEC_WABS ; Used by the intrinsic form of the iWMMXt WABS instruction. diff --git a/gcc/doc/sourcebuild.texi b/gcc/doc/sourcebuild.texi index 4ace224a8ff..9b98f013263 100644 --- a/gcc/doc/sourcebuild.texi +++ b/gcc/doc/sourcebuild.texi @@ -1900,6 +1900,13 @@ in @ref{arm_coproc2_ok} in addition the following: @code{MCRR} and @code{MRRC}. @item arm_coproc4_ok ARM target supports all the coprocessor instructions also listed as supported in @ref{arm_coproc3_ok} in addition the following: @code{MCRR2} and @code{MRRC2}. + +@item arm_simd32_ok +@anchor{arm_simd32_ok} +ARM Target supports options suitable for accessing the SIMD32 intrinsics from +@code{arm_acle.h}. +Some multilibs may be incompatible with these options. + @end table @subsubsection AArch64-specific attributes diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 0274587a244..d3a0af6ced1 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,11 @@ +2019-09-26 Kyrylo Tkachov + + * lib/target-supports.exp + (check_effective_target_arm_simd32_ok_nocache): New procedure. + (check_effective_target_arm_simd32_ok): Likewise. + (add_options_for_arm_simd32): Likewise. + * gcc.target/arm/acle/simd32.c: New test. + 2019-09-26 Richard Sandiford * gcc.target/arm/fp16-compile-alt-3.c: Expect (__fp16) -2.0 diff --git a/gcc/testsuite/gcc.target/arm/acle/simd32.c b/gcc/testsuite/gcc.target/arm/acle/simd32.c new file mode 100644 index 00000000000..f5c116d1396 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/acle/simd32.c @@ -0,0 +1,246 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_simd32_ok } */ +/* { dg-add-options arm_simd32 } */ + +#include + +int16x2_t +test_sxtab16 (int16x2_t a, int8x4_t b) +{ + return __sxtab16 (a, b); +} + +/* { dg-final { scan-assembler-times "sxtab16\t...?, ...?, ...?" 1 } } */ + + +int16x2_t +test_sxtb16 (int8x4_t a) +{ + return __sxtb16 (a); +} + +/* { dg-final { scan-assembler-times "sxtab16\t...?, ...?" 1 } } */ + +int8x4_t +test_qadd8 (int8x4_t a, int8x4_t b) +{ + return __qadd8 (a, b); +} + +/* { dg-final { scan-assembler-times "\tqadd8\t...?, ...?, ...?" 1 } } */ + +int8x4_t +test_qsub8 (int8x4_t a, int8x4_t b) +{ + return __qsub8 (a, b); +} + +/* { dg-final { scan-assembler-times "\tqsub8\t...?, ...?, ...?" 1 } } */ + +int8x4_t +test_shadd8 (int8x4_t a, int8x4_t b) +{ + return __shadd8 (a, b); +} + +/* { dg-final { scan-assembler-times "\tshadd8\t...?, ...?, ...?" 1 } } */ + +int8x4_t +test_shsub8 (int8x4_t a, int8x4_t b) +{ + return __shsub8 (a, b); +} + +/* { dg-final { scan-assembler-times "\tshsub8\t...?, ...?, ...?" 1 } } */ + +uint8x4_t +test_uhadd8 (uint8x4_t a, uint8x4_t b) +{ + return __uhadd8 (a, b); +} + +/* { dg-final { scan-assembler-times "\tuhadd8\t...?, ...?, ...?" 1 } } */ + +uint8x4_t +test_uhsub8 (uint8x4_t a, uint8x4_t b) +{ + return __uhsub8 (a, b); +} + +/* { dg-final { scan-assembler-times "\tuhsub8\t...?, ...?, ...?" 1 } } */ + +uint8x4_t +test_uqadd8 (uint8x4_t a, uint8x4_t b) +{ + return __uqadd8 (a, b); +} + +/* { dg-final { scan-assembler-times "\tuqadd8\t...?, ...?, ...?" 1 } } */ + +uint8x4_t +test_uqsub8 (uint8x4_t a, uint8x4_t b) +{ + return __uqsub8 (a, b); +} + +/* { dg-final { scan-assembler-times "\tuqsub8\t...?, ...?, ...?" 1 } } */ + +int16x2_t +test_qadd16 (int16x2_t a, int16x2_t b) +{ + return __qadd16 (a, b); +} + +/* { dg-final { scan-assembler-times "\tqadd16\t...?, ...?, ...?" 1 } } */ + +int16x2_t +test_qasx (int16x2_t a, int16x2_t b) +{ + return __qasx (a, b); +} + +/* { dg-final { scan-assembler-times "\tqasx\t...?, ...?, ...?" 1 } } */ + +int16x2_t +test_qsax (int16x2_t a, int16x2_t b) +{ + return __qsax (a, b); +} + +/* { dg-final { scan-assembler-times "\tqsax\t...?, ...?, ...?" 1 } } */ + +int16x2_t +test_qsub16 (int16x2_t a, int16x2_t b) +{ + return __qsub16 (a, b); +} + +/* { dg-final { scan-assembler-times "\tqsub16\t...?, ...?, ...?" 1 } } */ + +int16x2_t +test_shadd16 (int16x2_t a, int16x2_t b) +{ + return __shadd16 (a, b); +} + +/* { dg-final { scan-assembler-times "\tshadd16\t...?, ...?, ...?" 1 } } */ + +int16x2_t +test_shasx (int16x2_t a, int16x2_t b) +{ + return __shasx (a, b); +} + +/* { dg-final { scan-assembler-times "\tshasx\t...?, ...?, ...?" 1 } } */ + +int16x2_t +test_shsax (int16x2_t a, int16x2_t b) +{ + return __shsax (a, b); +} + +/* { dg-final { scan-assembler-times "\tshsax\t...?, ...?, ...?" 1 } } */ + +int16x2_t +test_shsub16 (int16x2_t a, int16x2_t b) +{ + return __shsub16 (a, b); +} + +/* { dg-final { scan-assembler-times "\tshsub16\t...?, ...?, ...?" 1 } } */ + +uint16x2_t +test_uhadd16 (uint16x2_t a, uint16x2_t b) +{ + return __uhadd16 (a, b); +} + +/* { dg-final { scan-assembler-times "\tuhadd16\t...?, ...?, ...?" 1 } } */ + +uint16x2_t +test_uhasx (uint16x2_t a, uint16x2_t b) +{ + return __uhasx (a, b); +} + +/* { dg-final { scan-assembler-times "\tuhasx\t...?, ...?, ...?" 1 } } */ + +uint16x2_t +test_uhsax (uint16x2_t a, uint16x2_t b) +{ + return __uhsax (a, b); +} + +/* { dg-final { scan-assembler-times "\tuhsax\t...?, ...?, ...?" 1 } } */ + +uint16x2_t +test_uhsub16 (uint16x2_t a, uint16x2_t b) +{ + return __uhsub16 (a, b); +} + +/* { dg-final { scan-assembler-times "\tuhsub16\t...?, ...?, ...?" 1 } } */ + +uint16x2_t +test_uqadd16 (uint16x2_t a, uint16x2_t b) +{ + return __uqadd16 (a, b); +} + +/* { dg-final { scan-assembler-times "\tuqadd16\t...?, ...?, ...?" 1 } } */ + +uint16x2_t +test_uqasx (uint16x2_t a, uint16x2_t b) +{ + return __uqasx (a, b); +} + +/* { dg-final { scan-assembler-times "\tuqasx\t...?, ...?, ...?" 1 } } */ + +uint16x2_t +test_uqsax (uint16x2_t a, uint16x2_t b) +{ + return __uqsax (a, b); +} + +/* { dg-final { scan-assembler-times "\tuqsax\t...?, ...?, ...?" 1 } } */ + +uint16x2_t +test_uqsub16 (uint16x2_t a, uint16x2_t b) +{ + return __uqsub16 (a, b); +} + +/* { dg-final { scan-assembler-times "\tuqsub16\t...?, ...?, ...?" 1 } } */ + +int32_t +test_smusd (int16x2_t a, int16x2_t b) +{ + return __smusd (a, b); +} + +/* { dg-final { scan-assembler-times "\tsmusd\t...?, ...?, ...?" 1 } } */ + +int32_t +test_smusdx (int16x2_t a, int16x2_t b) +{ + return __smusdx (a, b); +} + +/* { dg-final { scan-assembler-times "\tsmusdx\t...?, ...?, ...?" 1 } } */ + +uint32_t +test_usad8 (uint8x4_t a, uint8x4_t b) +{ + return __usad8 (a, b); +} + +/* { dg-final { scan-assembler-times "\tusad8\t...?, ...?, ...?" 1 } } */ + +uint32_t +test_usada8 (uint8x4_t a, uint8x4_t b, uint32_t c) +{ + return __usada8 (a, b, c); +} + +/* { dg-final { scan-assembler-times "\tusada8\t...?, ...?, ...?, ...?" 1 } } */ diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp index 6a1aaca9691..0268acd91d8 100644 --- a/gcc/testsuite/lib/target-supports.exp +++ b/gcc/testsuite/lib/target-supports.exp @@ -3806,6 +3806,45 @@ proc check_effective_target_arm_neon_ok { } { check_effective_target_arm_neon_ok_nocache] } + +# Return 1 if this is an ARM target supporting the SIMD32 intrinsics +# from arm_acle.h. Some multilibs may be incompatible with these options. +# Also set et_arm_simd32_flags to the best options to add. +# arm_acle.h includes stdint.h which can cause trouble with incompatible +# -mfloat-abi= options. + +proc check_effective_target_arm_simd32_ok_nocache { } { + global et_arm_simd32_flags + set et_arm_simd32_flags "" + foreach flags {"" "-march=armv6" "-march=armv6 -mfloat-abi=softfp" "-march=armv6 -mfloat-abi=hard"} { + if { [check_no_compiler_messages_nocache arm_simd32_ok object { + #include + int dummy; + #ifndef __ARM_FEATURE_SIMD32 + #error not SIMD32 + #endif + } "$flags"] } { + set et_arm_simd32_flags $flags + return 1 + } + } + + return 0 +} + +proc check_effective_target_arm_simd32_ok { } { + return [check_cached_effective_target arm_simd32_ok \ + check_effective_target_arm_simd32_ok_nocache] +} + +proc add_options_for_arm_simd32 { flags } { + if { ! [check_effective_target_arm_simd32_ok] } { + return "$flags" + } + global et_arm_simd32_flags + return "$flags $et_arm_simd32_flags" +} + # Return 1 if this is an ARM target supporting -mfpu=neon without any # -mfloat-abi= option. Useful in tests where add_options is not # supported (such as lto tests). -- 2.30.2