From: Tamar Christina Date: Mon, 21 May 2018 10:33:30 +0000 (+0000) Subject: Add missing AArch64 NEON instrinctics for Armv8.2-a to Armv8.4-a X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=d21052ebd7ac9d545a26dde3229c57f872c1d5f3;p=gcc.git Add missing AArch64 NEON instrinctics for Armv8.2-a to Armv8.4-a This patch adds the missing neon intrinsics for all 128 bit vector Integer modes for the three-way XOR and negate and xor instructions for Arm8.2-a to Armv8.4-a. gcc/ 2018-05-21 Tamar Christina * config/aarch64/aarch64-simd.md (aarch64_eor3qv8hi): Change to eor3q4. (aarch64_bcaxqv8hi): Change to bcaxq4. * config/aarch64/aarch64-simd-builtins.def (veor3q_u8, veor3q_u32, veor3q_u64, veor3q_s8, veor3q_s16, veor3q_s32, veor3q_s64, vbcaxq_u8, vbcaxq_u32, vbcaxq_u64, vbcaxq_s8, vbcaxq_s16, vbcaxq_s32, vbcaxq_s64): New. * config/aarch64/arm_neon.h: Likewise. * config/aarch64/iterators.md (VQ_I): New. gcc/testsuite/ 2018-05-21 Tamar Christina * gcc.target/gcc.target/aarch64/sha3.h (veor3q_u8, veor3q_u32, veor3q_u64, veor3q_s8, veor3q_s16, veor3q_s32, veor3q_s64, vbcaxq_u8, vbcaxq_u32, vbcaxq_u64, vbcaxq_s8, vbcaxq_s16, vbcaxq_s32, vbcaxq_s64): New. * gcc.target/gcc.target/aarch64/sha3_1.c: Likewise. * gcc.target/gcc.target/aarch64/sha3_1.c: Likewise. * gcc.target/gcc.target/aarch64/sha3_1.c: Likewise. From-SVN: r260435 --- diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 157a899b9f6..27b5c60af16 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,15 @@ +2018-05-21 Tamar Christina + + * config/aarch64/aarch64-simd.md (aarch64_eor3qv8hi): Change to + eor3q4. + (aarch64_bcaxqv8hi): Change to bcaxq4. + * config/aarch64/aarch64-simd-builtins.def (veor3q_u8, veor3q_u32, + veor3q_u64, veor3q_s8, veor3q_s16, veor3q_s32, veor3q_s64, vbcaxq_u8, + vbcaxq_u32, vbcaxq_u64, vbcaxq_s8, vbcaxq_s16, vbcaxq_s32, + vbcaxq_s64): New. + * config/aarch64/arm_neon.h: Likewise. + * config/aarch64/iterators.md (VQ_I): New. + 2018-05-21 Alexey Brodkin * config.gcc: Add arc/t-multilib-linux to tmake_file for diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def index b383f2485e5..439d4837fe7 100644 --- a/gcc/config/aarch64/aarch64-simd-builtins.def +++ b/gcc/config/aarch64/aarch64-simd-builtins.def @@ -599,14 +599,16 @@ VAR1 (BINOPU, crypto_sha512su0q, 0, v2di) /* Implemented by aarch64_crypto_sha512su1qv2di. */ VAR1 (TERNOPU, crypto_sha512su1q, 0, v2di) - /* Implemented by aarch64_eor3qv8hi. */ - VAR1 (TERNOPU, eor3q, 0, v8hi) + /* Implemented by eor3q4. */ + BUILTIN_VQ_I (TERNOPU, eor3q, 4) + BUILTIN_VQ_I (TERNOP, eor3q, 4) /* Implemented by aarch64_rax1qv2di. */ VAR1 (BINOPU, rax1q, 0, v2di) /* Implemented by aarch64_xarqv2di. */ VAR1 (TERNOPUI, xarq, 0, v2di) - /* Implemented by aarch64_bcaxqv8hi. */ - VAR1 (TERNOPU, bcaxq, 0, v8hi) + /* Implemented by bcaxq4. */ + BUILTIN_VQ_I (TERNOPU, bcaxq, 4) + BUILTIN_VQ_I (TERNOP, bcaxq, 4) /* Implemented by aarch64_fmll_low. */ VAR1 (TERNOP, fmlal_low, 0, v2sf) diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index 2ebd256329c..c53a774f005 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -5926,13 +5926,13 @@ ;; sha3 -(define_insn "aarch64_eor3qv8hi" - [(set (match_operand:V8HI 0 "register_operand" "=w") - (xor:V8HI - (xor:V8HI - (match_operand:V8HI 2 "register_operand" "%w") - (match_operand:V8HI 3 "register_operand" "w")) - (match_operand:V8HI 1 "register_operand" "w")))] +(define_insn "eor3q4" + [(set (match_operand:VQ_I 0 "register_operand" "=w") + (xor:VQ_I + (xor:VQ_I + (match_operand:VQ_I 2 "register_operand" "w") + (match_operand:VQ_I 3 "register_operand" "w")) + (match_operand:VQ_I 1 "register_operand" "w")))] "TARGET_SIMD && TARGET_SHA3" "eor3\\t%0.16b, %1.16b, %2.16b, %3.16b" [(set_attr "type" "crypto_sha3")] @@ -5962,13 +5962,13 @@ [(set_attr "type" "crypto_sha3")] ) -(define_insn "aarch64_bcaxqv8hi" - [(set (match_operand:V8HI 0 "register_operand" "=w") - (xor:V8HI - (and:V8HI - (not:V8HI (match_operand:V8HI 3 "register_operand" "w")) - (match_operand:V8HI 2 "register_operand" "w")) - (match_operand:V8HI 1 "register_operand" "w")))] +(define_insn "bcaxq4" + [(set (match_operand:VQ_I 0 "register_operand" "=w") + (xor:VQ_I + (and:VQ_I + (not:VQ_I (match_operand:VQ_I 3 "register_operand" "w")) + (match_operand:VQ_I 2 "register_operand" "w")) + (match_operand:VQ_I 1 "register_operand" "w")))] "TARGET_SIMD && TARGET_SHA3" "bcax\\t%0.16b, %1.16b, %2.16b, %3.16b" [(set_attr "type" "crypto_sha3")] diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h index c45c29ae815..4ff76b41339 100644 --- a/gcc/config/aarch64/arm_neon.h +++ b/gcc/config/aarch64/arm_neon.h @@ -32068,6 +32068,13 @@ vsha512su1q_u64 (uint64x2_t __a, uint64x2_t __b, uint64x2_t __c) return __builtin_aarch64_crypto_sha512su1qv2di_uuuu (__a, __b, __c); } +__extension__ extern __inline uint8x16_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +veor3q_u8 (uint8x16_t __a, uint8x16_t __b, uint8x16_t __c) +{ + return __builtin_aarch64_eor3qv16qi_uuuu (__a, __b, __c); +} + __extension__ extern __inline uint16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) veor3q_u16 (uint16x8_t __a, uint16x8_t __b, uint16x8_t __c) @@ -32075,6 +32082,49 @@ veor3q_u16 (uint16x8_t __a, uint16x8_t __b, uint16x8_t __c) return __builtin_aarch64_eor3qv8hi_uuuu (__a, __b, __c); } +__extension__ extern __inline uint32x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +veor3q_u32 (uint32x4_t __a, uint32x4_t __b, uint32x4_t __c) +{ + return __builtin_aarch64_eor3qv4si_uuuu (__a, __b, __c); +} + +__extension__ extern __inline uint64x2_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +veor3q_u64 (uint64x2_t __a, uint64x2_t __b, uint64x2_t __c) +{ + return __builtin_aarch64_eor3qv2di_uuuu (__a, __b, __c); +} + + +__extension__ extern __inline int8x16_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +veor3q_s8 (int8x16_t __a, int8x16_t __b, int8x16_t __c) +{ + return __builtin_aarch64_eor3qv16qi (__a, __b, __c); +} + +__extension__ extern __inline int16x8_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +veor3q_s16 (int16x8_t __a, int16x8_t __b, int16x8_t __c) +{ + return __builtin_aarch64_eor3qv8hi (__a, __b, __c); +} + +__extension__ extern __inline int32x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +veor3q_s32 (int32x4_t __a, int32x4_t __b, int32x4_t __c) +{ + return __builtin_aarch64_eor3qv4si (__a, __b, __c); +} + +__extension__ extern __inline int64x2_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +veor3q_s64 (int64x2_t __a, int64x2_t __b, int64x2_t __c) +{ + return __builtin_aarch64_eor3qv2di (__a, __b, __c); +} + __extension__ extern __inline uint64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vrax1q_u64 (uint64x2_t __a, uint64x2_t __b) @@ -32089,12 +32139,63 @@ vxarq_u64 (uint64x2_t __a, uint64x2_t __b, const int imm6) return __builtin_aarch64_xarqv2di_uuus (__a, __b,imm6); } +__extension__ extern __inline uint8x16_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vbcaxq_u8 (uint8x16_t __a, uint8x16_t __b, uint8x16_t __c) +{ + return __builtin_aarch64_bcaxqv16qi_uuuu (__a, __b, __c); +} + __extension__ extern __inline uint16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vbcaxq_u16 (uint16x8_t __a, uint16x8_t __b, uint16x8_t __c) { return __builtin_aarch64_bcaxqv8hi_uuuu (__a, __b, __c); } + +__extension__ extern __inline uint32x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vbcaxq_u32 (uint32x4_t __a, uint32x4_t __b, uint32x4_t __c) +{ + return __builtin_aarch64_bcaxqv4si_uuuu (__a, __b, __c); +} + +__extension__ extern __inline uint64x2_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vbcaxq_u64 (uint64x2_t __a, uint64x2_t __b, uint64x2_t __c) +{ + return __builtin_aarch64_bcaxqv2di_uuuu (__a, __b, __c); +} + +__extension__ extern __inline int8x16_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vbcaxq_s8 (int8x16_t __a, int8x16_t __b, int8x16_t __c) +{ + return __builtin_aarch64_bcaxqv16qi (__a, __b, __c); +} + +__extension__ extern __inline int16x8_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vbcaxq_s16 (int16x8_t __a, int16x8_t __b, int16x8_t __c) +{ + return __builtin_aarch64_bcaxqv8hi (__a, __b, __c); +} + +__extension__ extern __inline int32x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vbcaxq_s32 (int32x4_t __a, int32x4_t __b, int32x4_t __c) +{ + return __builtin_aarch64_bcaxqv4si (__a, __b, __c); +} + +__extension__ extern __inline int64x2_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vbcaxq_s64 (int64x2_t __a, int64x2_t __b, int64x2_t __c) +{ + return __builtin_aarch64_bcaxqv2di (__a, __b, __c); +} + + #pragma GCC pop_options #pragma GCC push_options diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md index bcedb86291f..ae4ec9d1565 100644 --- a/gcc/config/aarch64/iterators.md +++ b/gcc/config/aarch64/iterators.md @@ -78,6 +78,9 @@ ;; Quad vector modes. (define_mode_iterator VQ [V16QI V8HI V4SI V2DI V8HF V4SF V2DF]) +;; Quad integer vector modes. +(define_mode_iterator VQ_I [V16QI V8HI V4SI V2DI]) + ;; VQ without 2 element modes. (define_mode_iterator VQ_NO2E [V16QI V8HI V4SI V8HF V4SF]) diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 250f7a247ed..6f917cbe36e 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,13 @@ +2018-05-21 Tamar Christina + + * gcc.target/gcc.target/aarch64/sha3.h (veor3q_u8, veor3q_u32, + veor3q_u64, veor3q_s8, veor3q_s16, veor3q_s32, veor3q_s64, vbcaxq_u8, + vbcaxq_u32, vbcaxq_u64, vbcaxq_s8, vbcaxq_s16, vbcaxq_s32, + vbcaxq_s64): New. + * gcc.target/gcc.target/aarch64/sha3_1.c: Likewise. + * gcc.target/gcc.target/aarch64/sha3_1.c: Likewise. + * gcc.target/gcc.target/aarch64/sha3_1.c: Likewise. + 2018-05-21 Janus Weil PR fortran/85841 diff --git a/gcc/testsuite/gcc.target/aarch64/sha3.h b/gcc/testsuite/gcc.target/aarch64/sha3.h index 76dd1931dff..c8537c25196 100644 --- a/gcc/testsuite/gcc.target/aarch64/sha3.h +++ b/gcc/testsuite/gcc.target/aarch64/sha3.h @@ -1,10 +1,26 @@ #include "arm_neon.h" -uint16x8_t -test_veor3q_u16 (uint16x8_t a, uint16x8_t b, uint16x8_t c) -{ - return veor3q_u16 (a, b, c); -} +#define TEST_VEOR3(T, S) T \ +test_veor3q_ ## S (T a, T b, T c) \ +{ \ + return veor3q_ ## S (a, b, c); \ +} \ + +#define TEST_VBCAX(T, S) T \ +test_vbcaxq_ ## S (T a, T b, T c) \ +{ \ + return vbcaxq_ ## S (a, b, c); \ +} \ + + +TEST_VEOR3 (uint8x16_t, u8) +TEST_VEOR3 (uint16x8_t, u16) +TEST_VEOR3 (uint32x4_t, u32) +TEST_VEOR3 (uint64x2_t, u64) +TEST_VEOR3 (int8x16_t, s8) +TEST_VEOR3 (int16x8_t, s16) +TEST_VEOR3 (int32x4_t, s32) +TEST_VEOR3 (int64x2_t, s64) uint64x2_t test_vrax1q_u64 (uint64x2_t a, uint64x2_t b) @@ -18,8 +34,12 @@ test_vxarq_u64 (uint64x2_t a, uint64x2_t b) return vxarq_u64 (a, b, 15); } -uint16x8_t -test_vbcaxq_u16 (uint16x8_t a, uint16x8_t b, uint16x8_t c) -{ - return vbcaxq_u16 (a, b, c); -} +TEST_VBCAX (uint8x16_t, u8) +TEST_VBCAX (uint16x8_t, u16) +TEST_VBCAX (uint32x4_t, u32) +TEST_VBCAX (uint64x2_t, u64) +TEST_VBCAX (int8x16_t, s8) +TEST_VBCAX (int16x8_t, s16) +TEST_VBCAX (int32x4_t, s32) +TEST_VBCAX (int64x2_t, s64) + diff --git a/gcc/testsuite/gcc.target/aarch64/sha3_1.c b/gcc/testsuite/gcc.target/aarch64/sha3_1.c index 879eadd875e..0727ce77028 100644 --- a/gcc/testsuite/gcc.target/aarch64/sha3_1.c +++ b/gcc/testsuite/gcc.target/aarch64/sha3_1.c @@ -4,7 +4,7 @@ #include "sha3.h" -/* { dg-final { scan-assembler-times "eor3\\tv\[0-9\]+\.16b, v\[0-9\]+\.16b, v\[0-9\]+\.16b, v\[0-9\]+\.16b" 1 } } */ +/* { dg-final { scan-assembler-times "eor3\\tv\[0-9\]+\.16b, v\[0-9\]+\.16b, v\[0-9\]+\.16b, v\[0-9\]+\.16b" 8 } } */ /* { dg-final { scan-assembler-times "rax1\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, v\[0-9\]+\.2d" 1 } } */ /* { dg-final { scan-assembler-times "xar\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, v\[0-9\]+\.2d, 15" 1 } } */ -/* { dg-final { scan-assembler-times "bcax\\tv\[0-9\]+\.16b, v\[0-9\]+\.16b, v\[0-9\]+\.16b, v\[0-9\]+\.16b" 1 } } */ +/* { dg-final { scan-assembler-times "bcax\\tv\[0-9\]+\.16b, v\[0-9\]+\.16b, v\[0-9\]+\.16b, v\[0-9\]+\.16b" 8 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sha3_2.c b/gcc/testsuite/gcc.target/aarch64/sha3_2.c index 2afe28c4744..2d051161133 100644 --- a/gcc/testsuite/gcc.target/aarch64/sha3_2.c +++ b/gcc/testsuite/gcc.target/aarch64/sha3_2.c @@ -3,7 +3,7 @@ #include "sha3.h" -/* { dg-final { scan-assembler-times "eor3\\tv\[0-9\]+\.16b, v\[0-9\]+\.16b, v\[0-9\]+\.16b, v\[0-9\]+\.16b" 1 } } */ +/* { dg-final { scan-assembler-times "eor3\\tv\[0-9\]+\.16b, v\[0-9\]+\.16b, v\[0-9\]+\.16b, v\[0-9\]+\.16b" 8 } } */ /* { dg-final { scan-assembler-times "rax1\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, v\[0-9\]+\.2d" 1 } } */ /* { dg-final { scan-assembler-times "xar\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, v\[0-9\]+\.2d, 15" 1 } } */ -/* { dg-final { scan-assembler-times "bcax\\tv\[0-9\]+\.16b, v\[0-9\]+\.16b, v\[0-9\]+\.16b, v\[0-9\]+\.16b" 1 } } */ +/* { dg-final { scan-assembler-times "bcax\\tv\[0-9\]+\.16b, v\[0-9\]+\.16b, v\[0-9\]+\.16b, v\[0-9\]+\.16b" 8 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sha3_3.c b/gcc/testsuite/gcc.target/aarch64/sha3_3.c index 8915c805c3e..8d8ee77c293 100644 --- a/gcc/testsuite/gcc.target/aarch64/sha3_3.c +++ b/gcc/testsuite/gcc.target/aarch64/sha3_3.c @@ -3,7 +3,7 @@ #include "sha3.h" -/* { dg-final { scan-assembler-times "eor3\\tv\[0-9\]+\.16b, v\[0-9\]+\.16b, v\[0-9\]+\.16b, v\[0-9\]+\.16b" 1 } } */ +/* { dg-final { scan-assembler-times "eor3\\tv\[0-9\]+\.16b, v\[0-9\]+\.16b, v\[0-9\]+\.16b, v\[0-9\]+\.16b" 8 } } */ /* { dg-final { scan-assembler-times "rax1\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, v\[0-9\]+\.2d" 1 } } */ /* { dg-final { scan-assembler-times "xar\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, v\[0-9\]+\.2d, 15" 1 } } */ -/* { dg-final { scan-assembler-times "bcax\\tv\[0-9\]+\.16b, v\[0-9\]+\.16b, v\[0-9\]+\.16b, v\[0-9\]+\.16b" 1 } } */ +/* { dg-final { scan-assembler-times "bcax\\tv\[0-9\]+\.16b, v\[0-9\]+\.16b, v\[0-9\]+\.16b, v\[0-9\]+\.16b" 8 } } */