Add missing AArch64 NEON instrinctics for Armv8.2-a to Armv8.4-a

author Tamar Christina <tamar.christina@arm.com>

Mon, 21 May 2018 10:33:30 +0000 (10:33 +0000)

committer Tamar Christina <tnfchris@gcc.gnu.org>

Mon, 21 May 2018 10:33:30 +0000 (10:33 +0000)
author Tamar Christina <tamar.christina@arm.com>
Mon, 21 May 2018 10:33:30 +0000 (10:33 +0000)
committer Tamar Christina <tnfchris@gcc.gnu.org>
Mon, 21 May 2018 10:33:30 +0000 (10:33 +0000)
diff --git a/gcc/ChangeLog b/gcc/ChangeLog

index 157a899b9f6c121c3ea7f752f8cf44d3a8a27001..27b5c60af169531f4891f30c79570b1a2101422b 100644 (file)
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,15 @@
+2018-05-21  Tamar Christina  <tamar.christina@arm.com>
+
+       * config/aarch64/aarch64-simd.md (aarch64_eor3qv8hi): Change to
+       eor3q<mode>4.
+       (aarch64_bcaxqv8hi): Change to bcaxq<mode>4.
+       * config/aarch64/aarch64-simd-builtins.def (veor3q_u8, veor3q_u32,
+       veor3q_u64, veor3q_s8, veor3q_s16, veor3q_s32, veor3q_s64, vbcaxq_u8,
+       vbcaxq_u32, vbcaxq_u64, vbcaxq_s8, vbcaxq_s16, vbcaxq_s32,
+       vbcaxq_s64): New.
+       * config/aarch64/arm_neon.h: Likewise.
+       * config/aarch64/iterators.md (VQ_I): New.
+
  2018-05-21  Alexey Brodkin <abrodkin@synopsys.com>
  
         * config.gcc: Add arc/t-multilib-linux to tmake_file for
diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def

index b383f2485e5a287c6d833122d6be0c9ff2ef72a2..439d4837fe724b33d4c1bd834570fb464f47eb5b 100644 (file)
--- a/gcc/config/aarch64/aarch64-simd-builtins.def
+++ b/gcc/config/aarch64/aarch64-simd-builtins.def
@@ -599,14 +599,16 @@
    VAR1 (BINOPU, crypto_sha512su0q, 0, v2di)
    /* Implemented by aarch64_crypto_sha512su1qv2di.  */
    VAR1 (TERNOPU, crypto_sha512su1q, 0, v2di)
-  /* Implemented by aarch64_eor3qv8hi.  */
-  VAR1 (TERNOPU, eor3q, 0, v8hi)
+  /* Implemented by eor3q<mode>4.  */
+  BUILTIN_VQ_I (TERNOPU, eor3q, 4)
+  BUILTIN_VQ_I (TERNOP, eor3q, 4)
    /* Implemented by aarch64_rax1qv2di.  */
    VAR1 (BINOPU, rax1q, 0, v2di)
    /* Implemented by aarch64_xarqv2di.  */
    VAR1 (TERNOPUI, xarq, 0, v2di)
-  /* Implemented by aarch64_bcaxqv8hi.  */
-  VAR1 (TERNOPU, bcaxq, 0, v8hi)
+  /* Implemented by bcaxq<mode>4.  */
+  BUILTIN_VQ_I (TERNOPU, bcaxq, 4)
+  BUILTIN_VQ_I (TERNOP, bcaxq, 4)
  
    /* Implemented by aarch64_fml<f16mac1>l<f16quad>_low<mode>.  */
    VAR1 (TERNOP, fmlal_low, 0, v2sf)
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md

index 2ebd256329c1a6a6b790d16955cbcee3feca456c..c53a774f00538ed9c118d0338da590ed6ba603d9 100644 (file)
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -5926,13 +5926,13 @@
  
  ;; sha3
  
-(define_insn "aarch64_eor3qv8hi"
-  [(set (match_operand:V8HI 0 "register_operand" "=w")
-       (xor:V8HI
-        (xor:V8HI
-         (match_operand:V8HI 2 "register_operand" "%w")
-         (match_operand:V8HI 3 "register_operand" "w"))
-        (match_operand:V8HI 1 "register_operand" "w")))]
+(define_insn "eor3q<mode>4"
+  [(set (match_operand:VQ_I 0 "register_operand" "=w")
+       (xor:VQ_I
+        (xor:VQ_I
+         (match_operand:VQ_I 2 "register_operand" "w")
+         (match_operand:VQ_I 3 "register_operand" "w"))
+        (match_operand:VQ_I 1 "register_operand" "w")))]
    "TARGET_SIMD && TARGET_SHA3"
    "eor3\\t%0.16b, %1.16b, %2.16b, %3.16b"
    [(set_attr "type" "crypto_sha3")]
@@ -5962,13 +5962,13 @@
    [(set_attr "type" "crypto_sha3")]
  )
  
-(define_insn "aarch64_bcaxqv8hi"
-  [(set (match_operand:V8HI 0 "register_operand" "=w")
-       (xor:V8HI
-        (and:V8HI
-         (not:V8HI (match_operand:V8HI 3 "register_operand" "w"))
-         (match_operand:V8HI 2 "register_operand" "w"))
-        (match_operand:V8HI 1 "register_operand" "w")))]
+(define_insn "bcaxq<mode>4"
+  [(set (match_operand:VQ_I 0 "register_operand" "=w")
+       (xor:VQ_I
+        (and:VQ_I
+         (not:VQ_I (match_operand:VQ_I 3 "register_operand" "w"))
+         (match_operand:VQ_I 2 "register_operand" "w"))
+        (match_operand:VQ_I 1 "register_operand" "w")))]
    "TARGET_SIMD && TARGET_SHA3"
    "bcax\\t%0.16b, %1.16b, %2.16b, %3.16b"
    [(set_attr "type" "crypto_sha3")]
diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h

index c45c29ae815c9ff373eb2f57a77ebeda910a30cf..4ff76b4133959ae598468dff2554db37a0d07a62 100644 (file)
--- a/gcc/config/aarch64/arm_neon.h
+++ b/gcc/config/aarch64/arm_neon.h
@@ -32068,6 +32068,13 @@ vsha512su1q_u64 (uint64x2_t __a, uint64x2_t __b, uint64x2_t __c)
    return __builtin_aarch64_crypto_sha512su1qv2di_uuuu (__a, __b, __c);
  }
  
+__extension__ extern __inline uint8x16_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+veor3q_u8 (uint8x16_t __a, uint8x16_t __b, uint8x16_t __c)
+{
+  return __builtin_aarch64_eor3qv16qi_uuuu (__a, __b, __c);
+}
+
  __extension__ extern __inline uint16x8_t
  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
  veor3q_u16 (uint16x8_t __a, uint16x8_t __b, uint16x8_t __c)
@@ -32075,6 +32082,49 @@ veor3q_u16 (uint16x8_t __a, uint16x8_t __b, uint16x8_t __c)
    return __builtin_aarch64_eor3qv8hi_uuuu (__a, __b, __c);
  }
  
+__extension__ extern __inline uint32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+veor3q_u32 (uint32x4_t __a, uint32x4_t __b, uint32x4_t __c)
+{
+  return __builtin_aarch64_eor3qv4si_uuuu (__a, __b, __c);
+}
+
+__extension__ extern __inline uint64x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+veor3q_u64 (uint64x2_t __a, uint64x2_t __b, uint64x2_t __c)
+{
+  return __builtin_aarch64_eor3qv2di_uuuu (__a, __b, __c);
+}
+
+
+__extension__ extern __inline int8x16_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+veor3q_s8 (int8x16_t __a, int8x16_t __b, int8x16_t __c)
+{
+  return __builtin_aarch64_eor3qv16qi (__a, __b, __c);
+}
+
+__extension__ extern __inline int16x8_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+veor3q_s16 (int16x8_t __a, int16x8_t __b, int16x8_t __c)
+{
+  return __builtin_aarch64_eor3qv8hi (__a, __b, __c);
+}
+
+__extension__ extern __inline int32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+veor3q_s32 (int32x4_t __a, int32x4_t __b, int32x4_t __c)
+{
+  return __builtin_aarch64_eor3qv4si (__a, __b, __c);
+}
+
+__extension__ extern __inline int64x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+veor3q_s64 (int64x2_t __a, int64x2_t __b, int64x2_t __c)
+{
+  return __builtin_aarch64_eor3qv2di (__a, __b, __c);
+}
+
  __extension__ extern __inline uint64x2_t
  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
  vrax1q_u64 (uint64x2_t __a, uint64x2_t __b)
@@ -32089,12 +32139,63 @@ vxarq_u64 (uint64x2_t __a, uint64x2_t __b, const int imm6)
    return __builtin_aarch64_xarqv2di_uuus (__a, __b,imm6);
  }
  
+__extension__ extern __inline uint8x16_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vbcaxq_u8 (uint8x16_t __a, uint8x16_t __b, uint8x16_t __c)
+{
+  return __builtin_aarch64_bcaxqv16qi_uuuu (__a, __b, __c);
+}
+
  __extension__ extern __inline uint16x8_t
  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
  vbcaxq_u16 (uint16x8_t __a, uint16x8_t __b, uint16x8_t __c)
  {
    return __builtin_aarch64_bcaxqv8hi_uuuu (__a, __b, __c);
  }
+
+__extension__ extern __inline uint32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vbcaxq_u32 (uint32x4_t __a, uint32x4_t __b, uint32x4_t __c)
+{
+  return __builtin_aarch64_bcaxqv4si_uuuu (__a, __b, __c);
+}
+
+__extension__ extern __inline uint64x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vbcaxq_u64 (uint64x2_t __a, uint64x2_t __b, uint64x2_t __c)
+{
+  return __builtin_aarch64_bcaxqv2di_uuuu (__a, __b, __c);
+}
+
+__extension__ extern __inline int8x16_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vbcaxq_s8 (int8x16_t __a, int8x16_t __b, int8x16_t __c)
+{
+  return __builtin_aarch64_bcaxqv16qi (__a, __b, __c);
+}
+
+__extension__ extern __inline int16x8_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vbcaxq_s16 (int16x8_t __a, int16x8_t __b, int16x8_t __c)
+{
+  return __builtin_aarch64_bcaxqv8hi (__a, __b, __c);
+}
+
+__extension__ extern __inline int32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vbcaxq_s32 (int32x4_t __a, int32x4_t __b, int32x4_t __c)
+{
+  return __builtin_aarch64_bcaxqv4si (__a, __b, __c);
+}
+
+__extension__ extern __inline int64x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vbcaxq_s64 (int64x2_t __a, int64x2_t __b, int64x2_t __c)
+{
+  return __builtin_aarch64_bcaxqv2di (__a, __b, __c);
+}
+
+
  #pragma GCC pop_options
  
  #pragma GCC push_options
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md

index bcedb86291fae367abd50f98c8b76c304d195051..ae4ec9d1565cb32d5d450969175183e6dd53190c 100644 (file)
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -78,6 +78,9 @@
  ;; Quad vector modes.
  (define_mode_iterator VQ [V16QI V8HI V4SI V2DI V8HF V4SF V2DF])
  
+;; Quad integer vector modes.
+(define_mode_iterator VQ_I [V16QI V8HI V4SI V2DI])
+
  ;; VQ without 2 element modes.
  (define_mode_iterator VQ_NO2E [V16QI V8HI V4SI V8HF V4SF])
  
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog

index 250f7a247edc8ec23f7d2dae9bf043351386aecf..6f917cbe36e7735c6fc9b4a5fe1c075bff887c32 100644 (file)
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,13 @@
+2018-05-21  Tamar Christina  <tamar.christina@arm.com>
+
+       * gcc.target/gcc.target/aarch64/sha3.h (veor3q_u8, veor3q_u32,
+       veor3q_u64, veor3q_s8, veor3q_s16, veor3q_s32, veor3q_s64, vbcaxq_u8,
+       vbcaxq_u32, vbcaxq_u64, vbcaxq_s8, vbcaxq_s16, vbcaxq_s32,
+       vbcaxq_s64): New.
+       * gcc.target/gcc.target/aarch64/sha3_1.c: Likewise.
+       * gcc.target/gcc.target/aarch64/sha3_1.c: Likewise.
+       * gcc.target/gcc.target/aarch64/sha3_1.c: Likewise.
+
  2018-05-21  Janus Weil  <janus@gcc.gnu.org>
  
         PR fortran/85841
diff --git a/gcc/testsuite/gcc.target/aarch64/sha3.h b/gcc/testsuite/gcc.target/aarch64/sha3.h

index 76dd1931dffbf60c521e824a0c5d51d9aa08c9f0..c8537c251963317258237f5346b9ff3a7282de5a 100644 (file)
--- a/gcc/testsuite/gcc.target/aarch64/sha3.h
+++ b/gcc/testsuite/gcc.target/aarch64/sha3.h
@@ -1,10 +1,26 @@
  #include "arm_neon.h"
  
-uint16x8_t
-test_veor3q_u16 (uint16x8_t a, uint16x8_t b, uint16x8_t c)
-{
-  return veor3q_u16 (a, b, c);
-}
+#define TEST_VEOR3(T, S) T \
+test_veor3q_ ## S (T a, T b, T c) \
+{ \
+  return veor3q_ ## S (a, b, c); \
+} \
+
+#define TEST_VBCAX(T, S) T \
+test_vbcaxq_ ## S (T a, T b, T c) \
+{ \
+  return vbcaxq_ ## S (a, b, c); \
+} \
+
+
+TEST_VEOR3 (uint8x16_t, u8)
+TEST_VEOR3 (uint16x8_t, u16)
+TEST_VEOR3 (uint32x4_t, u32)
+TEST_VEOR3 (uint64x2_t, u64)
+TEST_VEOR3 (int8x16_t, s8)
+TEST_VEOR3 (int16x8_t, s16)
+TEST_VEOR3 (int32x4_t, s32)
+TEST_VEOR3 (int64x2_t, s64)
  
  uint64x2_t
  test_vrax1q_u64 (uint64x2_t a, uint64x2_t b)
@@ -18,8 +34,12 @@ test_vxarq_u64 (uint64x2_t a, uint64x2_t b)
    return vxarq_u64 (a, b, 15);
  }
  
-uint16x8_t
-test_vbcaxq_u16 (uint16x8_t a, uint16x8_t b, uint16x8_t c)
-{
-  return vbcaxq_u16 (a, b, c);
-}
+TEST_VBCAX (uint8x16_t, u8)
+TEST_VBCAX (uint16x8_t, u16)
+TEST_VBCAX (uint32x4_t, u32)
+TEST_VBCAX (uint64x2_t, u64)
+TEST_VBCAX (int8x16_t, s8)
+TEST_VBCAX (int16x8_t, s16)
+TEST_VBCAX (int32x4_t, s32)
+TEST_VBCAX (int64x2_t, s64)
+
diff --git a/gcc/testsuite/gcc.target/aarch64/sha3_1.c b/gcc/testsuite/gcc.target/aarch64/sha3_1.c

index 879eadd875e899c70b32680d40bdb3de419f00a1..0727ce770283844ea69fe4ccdd858e03f9396bc8 100644 (file)
--- a/gcc/testsuite/gcc.target/aarch64/sha3_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sha3_1.c
@@ -4,7 +4,7 @@
  #include "sha3.h"
  
  
-/* { dg-final { scan-assembler-times "eor3\\tv\[0-9\]+\.16b, v\[0-9\]+\.16b, v\[0-9\]+\.16b, v\[0-9\]+\.16b" 1 } } */
+/* { dg-final { scan-assembler-times "eor3\\tv\[0-9\]+\.16b, v\[0-9\]+\.16b, v\[0-9\]+\.16b, v\[0-9\]+\.16b" 8 } } */
  /* { dg-final { scan-assembler-times "rax1\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, v\[0-9\]+\.2d" 1 } } */
  /* { dg-final { scan-assembler-times "xar\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, v\[0-9\]+\.2d, 15" 1 } } */
-/* { dg-final { scan-assembler-times "bcax\\tv\[0-9\]+\.16b, v\[0-9\]+\.16b, v\[0-9\]+\.16b, v\[0-9\]+\.16b" 1 } } */
+/* { dg-final { scan-assembler-times "bcax\\tv\[0-9\]+\.16b, v\[0-9\]+\.16b, v\[0-9\]+\.16b, v\[0-9\]+\.16b" 8 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sha3_2.c b/gcc/testsuite/gcc.target/aarch64/sha3_2.c

index 2afe28c47445af53194427912b9d6a9de9b5ff04..2d05116113371b37f0788334cf94f6b35ed9d43f 100644 (file)
--- a/gcc/testsuite/gcc.target/aarch64/sha3_2.c
+++ b/gcc/testsuite/gcc.target/aarch64/sha3_2.c
@@ -3,7 +3,7 @@
  
  #include "sha3.h"
  
-/* { dg-final { scan-assembler-times "eor3\\tv\[0-9\]+\.16b, v\[0-9\]+\.16b, v\[0-9\]+\.16b, v\[0-9\]+\.16b" 1 } } */
+/* { dg-final { scan-assembler-times "eor3\\tv\[0-9\]+\.16b, v\[0-9\]+\.16b, v\[0-9\]+\.16b, v\[0-9\]+\.16b" 8 } } */
  /* { dg-final { scan-assembler-times "rax1\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, v\[0-9\]+\.2d" 1 } } */
  /* { dg-final { scan-assembler-times "xar\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, v\[0-9\]+\.2d, 15" 1 } } */
-/* { dg-final { scan-assembler-times "bcax\\tv\[0-9\]+\.16b, v\[0-9\]+\.16b, v\[0-9\]+\.16b, v\[0-9\]+\.16b" 1 } } */
+/* { dg-final { scan-assembler-times "bcax\\tv\[0-9\]+\.16b, v\[0-9\]+\.16b, v\[0-9\]+\.16b, v\[0-9\]+\.16b" 8 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sha3_3.c b/gcc/testsuite/gcc.target/aarch64/sha3_3.c

index 8915c805c3e55cb46691602dcf6a3627a28cb3c2..8d8ee77c2934610212286859297708528ec85ad8 100644 (file)
--- a/gcc/testsuite/gcc.target/aarch64/sha3_3.c
+++ b/gcc/testsuite/gcc.target/aarch64/sha3_3.c
@@ -3,7 +3,7 @@
  
  #include "sha3.h"
  
-/* { dg-final { scan-assembler-times "eor3\\tv\[0-9\]+\.16b, v\[0-9\]+\.16b, v\[0-9\]+\.16b, v\[0-9\]+\.16b" 1 } } */
+/* { dg-final { scan-assembler-times "eor3\\tv\[0-9\]+\.16b, v\[0-9\]+\.16b, v\[0-9\]+\.16b, v\[0-9\]+\.16b" 8 } } */
  /* { dg-final { scan-assembler-times "rax1\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, v\[0-9\]+\.2d" 1 } } */
  /* { dg-final { scan-assembler-times "xar\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, v\[0-9\]+\.2d, 15" 1 } } */
-/* { dg-final { scan-assembler-times "bcax\\tv\[0-9\]+\.16b, v\[0-9\]+\.16b, v\[0-9\]+\.16b, v\[0-9\]+\.16b" 1 } } */
+/* { dg-final { scan-assembler-times "bcax\\tv\[0-9\]+\.16b, v\[0-9\]+\.16b, v\[0-9\]+\.16b, v\[0-9\]+\.16b" 8 } } */
author	Tamar Christina <tamar.christina@arm.com>
	Mon, 21 May 2018 10:33:30 +0000 (10:33 +0000)
committer	Tamar Christina <tnfchris@gcc.gnu.org>
	Mon, 21 May 2018 10:33:30 +0000 (10:33 +0000)
gcc/ChangeLog		patch \| blob \| history
gcc/config/aarch64/aarch64-simd-builtins.def		patch \| blob \| history
gcc/config/aarch64/aarch64-simd.md		patch \| blob \| history
gcc/config/aarch64/arm_neon.h		patch \| blob \| history
gcc/config/aarch64/iterators.md		patch \| blob \| history
gcc/testsuite/ChangeLog		patch \| blob \| history
gcc/testsuite/gcc.target/aarch64/sha3.h		patch \| blob \| history
gcc/testsuite/gcc.target/aarch64/sha3_1.c		patch \| blob \| history
gcc/testsuite/gcc.target/aarch64/sha3_2.c		patch \| blob \| history
gcc/testsuite/gcc.target/aarch64/sha3_3.c		patch \| blob \| history