AArch64: Implement missing _p64 intrinsics for vector permutes

author Kyrylo Tkachov <kyrylo.tkachov@arm.com>

Wed, 23 Sep 2020 10:07:50 +0000 (11:07 +0100)

committer Kyrylo Tkachov <kyrylo.tkachov@arm.com>

Wed, 23 Sep 2020 10:07:50 +0000 (11:07 +0100)
author Kyrylo Tkachov <kyrylo.tkachov@arm.com>
Wed, 23 Sep 2020 10:07:50 +0000 (11:07 +0100)
committer Kyrylo Tkachov <kyrylo.tkachov@arm.com>
Wed, 23 Sep 2020 10:07:50 +0000 (11:07 +0100)
diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h

index 32b0877e2826c4680f1ed0564b6fa360351bcd35..e8c130f5e80696778b553572cf023cc880a2233e 100644 (file)
--- a/gcc/config/aarch64/arm_neon.h
+++ b/gcc/config/aarch64/arm_neon.h
@@ -30568,6 +30568,17 @@ vtrn1q_u32 (uint32x4_t __a, uint32x4_t __b)
  #endif
  }
  
+__extension__ extern __inline poly64x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vtrn1q_p64 (poly64x2_t __a, poly64x2_t __b)
+{
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (poly64x2_t) {3, 1});
+#else
+  return __builtin_shuffle (__a, __b, (poly64x2_t) {0, 2});
+#endif
+}
+
  __extension__ extern __inline uint64x2_t
  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
  vtrn1q_u64 (uint64x2_t __a, uint64x2_t __b)
@@ -30838,6 +30849,18 @@ vtrn2q_u64 (uint64x2_t __a, uint64x2_t __b)
  #endif
  }
  
+
+__extension__ extern __inline poly64x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vtrn2q_p64 (poly64x2_t __a, poly64x2_t __b)
+{
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (poly64x2_t) {2, 0});
+#else
+  return __builtin_shuffle (__a, __b, (poly64x2_t) {1, 3});
+#endif
+}
+
  __extension__ extern __inline float16x4x2_t
  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
  vtrn_f16 (float16x4_t __a, float16x4_t __b)
@@ -31484,6 +31507,17 @@ vuzp1q_u64 (uint64x2_t __a, uint64x2_t __b)
  #endif
  }
  
+__extension__ extern __inline poly64x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vuzp1q_p64 (poly64x2_t __a, poly64x2_t __b)
+{
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (poly64x2_t) {3, 1});
+#else
+  return __builtin_shuffle (__a, __b, (poly64x2_t) {0, 2});
+#endif
+}
+
  __extension__ extern __inline float16x4_t
  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
  vuzp2_f16 (float16x4_t __a, float16x4_t __b)
@@ -31743,6 +31777,17 @@ vuzp2q_u64 (uint64x2_t __a, uint64x2_t __b)
  #endif
  }
  
+__extension__ extern __inline poly64x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vuzp2q_p64 (poly64x2_t __a, poly64x2_t __b)
+{
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (poly64x2_t) {2, 0});
+#else
+  return __builtin_shuffle (__a, __b, (poly64x2_t) {1, 3});
+#endif
+}
+
  __INTERLEAVE_LIST (uzp)
  
  /* vzip */
@@ -32011,6 +32056,17 @@ vzip1q_u64 (uint64x2_t __a, uint64x2_t __b)
  #endif
  }
  
+__extension__ extern __inline poly64x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vzip1q_p64 (poly64x2_t __a, poly64x2_t __b)
+{
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (poly64x2_t) {3, 1});
+#else
+  return __builtin_shuffle (__a, __b, (poly64x2_t) {0, 2});
+#endif
+}
+
  __extension__ extern __inline float16x4_t
  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
  vzip2_f16 (float16x4_t __a, float16x4_t __b)
@@ -32275,6 +32331,17 @@ vzip2q_u64 (uint64x2_t __a, uint64x2_t __b)
  #endif
  }
  
+__extension__ extern __inline poly64x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vzip2q_p64 (poly64x2_t __a, poly64x2_t __b)
+{
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (poly64x2_t) {2, 0});
+#else
+  return __builtin_shuffle (__a, __b, (poly64x2_t) {1, 3});
+#endif
+}
+
  __INTERLEAVE_LIST (zip)
  
  #undef __INTERLEAVE_LIST
diff --git a/gcc/testsuite/gcc.target/aarch64/simd/trn_zip_p64_1.c b/gcc/testsuite/gcc.target/aarch64/simd/trn_zip_p64_1.c

new file mode 100644 (file)

index 0000000..a47321d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/simd/trn_zip_p64_1.c
@@ -0,0 +1,44 @@
+/* { dg-do compile } */
+/* { dg-options "-O" } */
+
+#include <arm_neon.h>
+
+poly64x2_t
+foo (poly64x2_t a, poly64x2_t b)
+{
+  return vtrn1q_p64 (a, b);
+}
+
+poly64x2_t
+foo1 (poly64x2_t a, poly64x2_t b)
+{
+  return vtrn2q_p64 (a, b);
+}
+
+poly64x2_t
+foo2 (poly64x2_t a, poly64x2_t b)
+{
+  return vuzp1q_p64 (a, b);
+}
+
+poly64x2_t
+foo3 (poly64x2_t a, poly64x2_t b)
+{
+  return vuzp2q_p64 (a, b);
+}
+
+poly64x2_t
+foo4 (poly64x2_t a, poly64x2_t b)
+{
+  return vzip1q_p64 (a, b);
+}
+
+poly64x2_t
+foo5 (poly64x2_t a, poly64x2_t b)
+{
+  return vzip2q_p64 (a, b);
+}
+
+/* { dg-final { scan-assembler-times {zip1\tv0.2d, v0.2d, v1.2d} 3 } } */
+/* { dg-final { scan-assembler-times {zip2\tv0.2d, v0.2d, v1.2d} 3 } } */
+
author	Kyrylo Tkachov <kyrylo.tkachov@arm.com>
	Wed, 23 Sep 2020 10:07:50 +0000 (11:07 +0100)
committer	Kyrylo Tkachov <kyrylo.tkachov@arm.com>
	Wed, 23 Sep 2020 10:07:50 +0000 (11:07 +0100)
gcc/config/aarch64/arm_neon.h		patch \| blob \| history
gcc/testsuite/gcc.target/aarch64/simd/trn_zip_p64_1.c	[new file with mode: 0644]	patch \| blob