[AArch64] Add NEON intrinsics vqrdmlah and vqrdmlsh.

author Matthew Wahab <matthew.wahab@arm.com>

Thu, 26 Nov 2015 15:13:02 +0000 (15:13 +0000)

committer Matthew Wahab <mwahab@gcc.gnu.org>

Thu, 26 Nov 2015 15:13:02 +0000 (15:13 +0000)
author Matthew Wahab <matthew.wahab@arm.com>
Thu, 26 Nov 2015 15:13:02 +0000 (15:13 +0000)
committer Matthew Wahab <mwahab@gcc.gnu.org>
Thu, 26 Nov 2015 15:13:02 +0000 (15:13 +0000)
diff --git a/gcc/ChangeLog b/gcc/ChangeLog

index 8cce38ea59ec2c867fb0d36e7c877ffceb9133fd..e169551de26af86116dc608b21791bb8490d7e2c 100644 (file)
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,10 @@
+2015-11-26  Matthew Wahab  <matthew.wahab@arm.com>
+
+       * gcc/config/aarch64/arm_neon.h (vqrdmlah_s16, vqrdmlah_s32): New.
+       (vqrdmlahq_s16, vqrdmlahq_s32): New.
+       (vqrdmlsh_s16, vqrdmlsh_s32): New.
+       (vqrdmlshq_s16, vqrdmlshq_s32): New.
+
  2015-11-26  Matthew Wahab  <matthew.wahab@arm.com>
  
         * config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): Add
diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h

index 138b108ff36cf63f5cd42aa4547c0291ea6cf2c7..63f1627ce4cec01b4892b59268ca7521d256dffa 100644 (file)
--- a/gcc/config/aarch64/arm_neon.h
+++ b/gcc/config/aarch64/arm_neon.h
@@ -11213,6 +11213,59 @@ vbslq_u64 (uint64x2_t __a, uint64x2_t __b, uint64x2_t __c)
    return __builtin_aarch64_simd_bslv2di_uuuu (__a, __b, __c);
  }
  
+/* ARMv8.1 instrinsics.  */
+#pragma GCC push_options
+#pragma GCC target ("arch=armv8.1-a")
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vqrdmlah_s16 (int16x4_t __a, int16x4_t __b, int16x4_t __c)
+{
+  return __builtin_aarch64_sqrdmlahv4hi (__a, __b, __c);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vqrdmlah_s32 (int32x2_t __a, int32x2_t __b, int32x2_t __c)
+{
+  return __builtin_aarch64_sqrdmlahv2si (__a, __b, __c);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vqrdmlahq_s16 (int16x8_t __a, int16x8_t __b, int16x8_t __c)
+{
+  return __builtin_aarch64_sqrdmlahv8hi (__a, __b, __c);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vqrdmlahq_s32 (int32x4_t __a, int32x4_t __b, int32x4_t __c)
+{
+  return __builtin_aarch64_sqrdmlahv4si (__a, __b, __c);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vqrdmlsh_s16 (int16x4_t __a, int16x4_t __b, int16x4_t __c)
+{
+  return __builtin_aarch64_sqrdmlshv4hi (__a, __b, __c);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vqrdmlsh_s32 (int32x2_t __a, int32x2_t __b, int32x2_t __c)
+{
+  return __builtin_aarch64_sqrdmlshv2si (__a, __b, __c);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vqrdmlshq_s16 (int16x8_t __a, int16x8_t __b, int16x8_t __c)
+{
+  return __builtin_aarch64_sqrdmlshv8hi (__a, __b, __c);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vqrdmlshq_s32 (int32x4_t __a, int32x4_t __b, int32x4_t __c)
+{
+  return __builtin_aarch64_sqrdmlshv4si (__a, __b, __c);
+}
+#pragma GCC pop_options
+
  #pragma GCC push_options
  #pragma GCC target ("+nothing+crypto")
  /* vaes  */
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog

index cf0059704044347e61ed7250066c5dff9c78310b..f326a3fecd8b3a06f14121dfb62f5e292374e7d0 100644 (file)
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,10 @@
+2015-11-26  Matthew Wahab  <matthew.wahab@arm.com>
+
+       * gcc.target/aarch64/advsimd-intrinsics/vqrdmlXh.inc: New file,
+       support code for vqrdml{as}h tests.
+       * gcc.target/aarch64/advsimd-intrinsics/vqrdmlah.c: New.
+       * gcc.target/aarch64/advsimd-intrinsics/vqrdmlsh.c: New.
+
  2015-11-26  Matthew Wahab  <matthew.wahab@arm.com>
  
         * lib/target-supports.exp (add_options_for_arm_v8_1a_neon): New.
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqrdmlXh.inc b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqrdmlXh.inc

new file mode 100644 (file)

index 0000000..a504ca6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqrdmlXh.inc
@@ -0,0 +1,138 @@
+#define FNNAME1(NAME) exec_ ## NAME
+#define FNNAME(NAME) FNNAME1 (NAME)
+
+void FNNAME (INSN) (void)
+{
+  /* vector_res = vqrdmlah (vector, vector2, vector3, vector4),
+     then store the result.  */
+#define TEST_VQRDMLAH2(INSN, Q, T1, T2, W, N, EXPECTED_CUMULATIVE_SAT, CMT) \
+  Set_Neon_Cumulative_Sat (0, VECT_VAR (vector_res, T1, W, N));                \
+  VECT_VAR (vector_res, T1, W, N) =                                    \
+    INSN##Q##_##T2##W (VECT_VAR (vector, T1, W, N),                    \
+                      VECT_VAR (vector2, T1, W, N),                    \
+                      VECT_VAR (vector3, T1, W, N));                   \
+  vst1##Q##_##T2##W (VECT_VAR (result, T1, W, N),                      \
+                    VECT_VAR (vector_res, T1, W, N));                  \
+  CHECK_CUMULATIVE_SAT (TEST_MSG, T1, W, N,                            \
+                       EXPECTED_CUMULATIVE_SAT, CMT)
+
+  /* Two auxliary macros are necessary to expand INSN.  */
+#define TEST_VQRDMLAH1(INSN, Q, T1, T2, W, N, EXPECTED_CUMULATIVE_SAT, CMT) \
+  TEST_VQRDMLAH2 (INSN, Q, T1, T2, W, N, EXPECTED_CUMULATIVE_SAT, CMT)
+
+#define TEST_VQRDMLAH(Q, T1, T2, W, N, EXPECTED_CUMULATIVE_SAT, CMT)   \
+  TEST_VQRDMLAH1 (INSN, Q, T1, T2, W, N, EXPECTED_CUMULATIVE_SAT, CMT)
+
+  DECL_VARIABLE (vector, int, 16, 4);
+  DECL_VARIABLE (vector, int, 32, 2);
+  DECL_VARIABLE (vector, int, 16, 8);
+  DECL_VARIABLE (vector, int, 32, 4);
+
+  DECL_VARIABLE (vector_res, int, 16, 4);
+  DECL_VARIABLE (vector_res, int, 32, 2);
+  DECL_VARIABLE (vector_res, int, 16, 8);
+  DECL_VARIABLE (vector_res, int, 32, 4);
+
+  DECL_VARIABLE (vector2, int, 16, 4);
+  DECL_VARIABLE (vector2, int, 32, 2);
+  DECL_VARIABLE (vector2, int, 16, 8);
+  DECL_VARIABLE (vector2, int, 32, 4);
+
+  DECL_VARIABLE (vector3, int, 16, 4);
+  DECL_VARIABLE (vector3, int, 32, 2);
+  DECL_VARIABLE (vector3, int, 16, 8);
+  DECL_VARIABLE (vector3, int, 32, 4);
+
+  clean_results ();
+
+  VLOAD (vector, buffer, , int, s, 16, 4);
+  VLOAD (vector, buffer, , int, s, 32, 2);
+  VLOAD (vector, buffer, q, int, s, 16, 8);
+  VLOAD (vector, buffer, q, int, s, 32, 4);
+
+  /* Initialize vector2.  */
+  VDUP (vector2, , int, s, 16, 4, 0x5555);
+  VDUP (vector2, , int, s, 32, 2, 0xBB);
+  VDUP (vector2, q, int, s, 16, 8, 0xBB);
+  VDUP (vector2, q, int, s, 32, 4, 0x22);
+
+  /* Initialize vector3.  */
+  VDUP (vector3, , int, s, 16, 4, 0x5555);
+  VDUP (vector3, , int, s, 32, 2, 0xBB);
+  VDUP (vector3, q, int, s, 16, 8, 0x33);
+  VDUP (vector3, q, int, s, 32, 4, 0x22);
+
+#define CMT ""
+  TEST_VQRDMLAH ( , int, s, 16, 4, expected_cumulative_sat, CMT);
+  TEST_VQRDMLAH ( , int, s, 32, 2, expected_cumulative_sat, CMT);
+  TEST_VQRDMLAH (q, int, s, 16, 8, expected_cumulative_sat, CMT);
+  TEST_VQRDMLAH (q, int, s, 32, 4, expected_cumulative_sat, CMT);
+
+  CHECK (TEST_MSG, int, 16, 4, PRIx16, expected, CMT);
+  CHECK (TEST_MSG, int, 32, 2, PRIx32, expected, CMT);
+  CHECK (TEST_MSG, int, 16, 8, PRIx16, expected, CMT);
+  CHECK (TEST_MSG, int, 32, 4, PRIx32, expected, CMT);
+
+  /* Now use input values such that the multiplication causes
+     saturation.  */
+#define TEST_MSG_MUL " (check mul cumulative saturation)"
+  VDUP (vector, , int, s, 16, 4, 0x8000);
+  VDUP (vector, , int, s, 32, 2, 0x80000000);
+  VDUP (vector, q, int, s, 16, 8, 0x8000);
+  VDUP (vector, q, int, s, 32, 4, 0x80000000);
+  VDUP (vector2, , int, s, 16, 4, 0x8000);
+  VDUP (vector2, , int, s, 32, 2, 0x80000000);
+  VDUP (vector2, q, int, s, 16, 8, 0x8000);
+  VDUP (vector2, q, int, s, 32, 4, 0x80000000);
+  VDUP (vector3, , int, s, 16, 4, 0x8000);
+  VDUP (vector3, , int, s, 32, 2, 0x80000000);
+  VDUP (vector3, q, int, s, 16, 8, 0x8000);
+  VDUP (vector3, q, int, s, 32, 4, 0x80000000);
+
+  TEST_VQRDMLAH ( , int, s, 16, 4, expected_cumulative_sat_mul, TEST_MSG_MUL);
+  TEST_VQRDMLAH ( , int, s, 32, 2, expected_cumulative_sat_mul, TEST_MSG_MUL);
+  TEST_VQRDMLAH (q, int, s, 16, 8, expected_cumulative_sat_mul, TEST_MSG_MUL);
+  TEST_VQRDMLAH (q, int, s, 32, 4, expected_cumulative_sat_mul, TEST_MSG_MUL);
+
+  CHECK (TEST_MSG, int, 16, 4, PRIx16, expected_mul, TEST_MSG_MUL);
+  CHECK (TEST_MSG, int, 32, 2, PRIx32, expected_mul, TEST_MSG_MUL);
+  CHECK (TEST_MSG, int, 16, 8, PRIx16, expected_mul, TEST_MSG_MUL);
+  CHECK (TEST_MSG, int, 32, 4, PRIx32, expected_mul, TEST_MSG_MUL);
+
+  /* Use input values where rounding produces a result equal to the
+     saturation value, but does not set the saturation flag.  */
+#define TEST_MSG_ROUND " (check rounding)"
+  VDUP (vector, , int, s, 16, 4, 0x8000);
+  VDUP (vector, , int, s, 32, 2, 0x80000000);
+  VDUP (vector, q, int, s, 16, 8, 0x8000);
+  VDUP (vector, q, int, s, 32, 4, 0x80000000);
+  VDUP (vector2, , int, s, 16, 4, 0x8001);
+  VDUP (vector2, , int, s, 32, 2, 0x80000001);
+  VDUP (vector2, q, int, s, 16, 8, 0x8001);
+  VDUP (vector2, q, int, s, 32, 4, 0x80000001);
+  VDUP (vector3, , int, s, 16, 4, 0x8001);
+  VDUP (vector3, , int, s, 32, 2, 0x80000001);
+  VDUP (vector3, q, int, s, 16, 8, 0x8001);
+  VDUP (vector3, q, int, s, 32, 4, 0x80000001);
+
+  TEST_VQRDMLAH ( , int, s, 16, 4, expected_cumulative_sat_round, \
+                TEST_MSG_ROUND);
+  TEST_VQRDMLAH ( , int, s, 32, 2, expected_cumulative_sat_round, \
+                TEST_MSG_ROUND);
+  TEST_VQRDMLAH (q, int, s, 16, 8, expected_cumulative_sat_round, \
+                TEST_MSG_ROUND);
+  TEST_VQRDMLAH (q, int, s, 32, 4, expected_cumulative_sat_round, \
+                TEST_MSG_ROUND);
+
+  CHECK (TEST_MSG, int, 16, 4, PRIx16, expected_round, TEST_MSG_ROUND);
+  CHECK (TEST_MSG, int, 32, 2, PRIx32, expected_round, TEST_MSG_ROUND);
+  CHECK (TEST_MSG, int, 16, 8, PRIx16, expected_round, TEST_MSG_ROUND);
+  CHECK (TEST_MSG, int, 32, 4, PRIx32, expected_round, TEST_MSG_ROUND);
+}
+
+int
+main (void)
+{
+  FNNAME (INSN) ();
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqrdmlah.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqrdmlah.c

new file mode 100644 (file)

index 0000000..148d94c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqrdmlah.c
@@ -0,0 +1,57 @@
+/* { dg-require-effective-target arm_v8_1a_neon_hw } */
+/* { dg-add-options arm_v8_1a_neon } */
+
+#include <arm_neon.h>
+#include "arm-neon-ref.h"
+#include "compute-ref-data.h"
+
+/* Expected values of cumulative_saturation flag.  */
+int VECT_VAR (expected_cumulative_sat, int, 16, 4) = 0;
+int VECT_VAR (expected_cumulative_sat, int, 32, 2) = 0;
+int VECT_VAR (expected_cumulative_sat, int, 16, 8) = 0;
+int VECT_VAR (expected_cumulative_sat, int, 32, 4) = 0;
+
+/* Expected results.  */
+VECT_VAR_DECL (expected, int, 16, 4) [] = { 0x38d3, 0x38d4, 0x38d5, 0x38d6 };
+VECT_VAR_DECL (expected, int, 32, 2) [] = { 0xfffffff0, 0xfffffff1 };
+VECT_VAR_DECL (expected, int, 16, 8) [] = { 0xfff0,  0xfff1, 0xfff2,  0xfff3,
+                                           0xfff4, 0xfff5, 0xfff6, 0xfff7 };
+VECT_VAR_DECL (expected, int, 32, 4) [] = { 0xfffffff0, 0xfffffff1,
+                                           0xfffffff2, 0xfffffff3 };
+
+/* Expected values of cumulative_saturation flag when multiplication
+   saturates.  */
+int VECT_VAR (expected_cumulative_sat_mul, int, 16, 4) = 0;
+int VECT_VAR (expected_cumulative_sat_mul, int, 32, 2) = 0;
+int VECT_VAR (expected_cumulative_sat_mul, int, 16, 8) = 0;
+int VECT_VAR (expected_cumulative_sat_mul, int, 32, 4) = 0;
+
+/* Expected results when multiplication saturates.  */
+VECT_VAR_DECL (expected_mul, int, 16, 4) [] = { 0x0, 0x0, 0x0, 0x0 };
+VECT_VAR_DECL (expected_mul, int, 32, 2) [] = { 0x0, 0x0 };
+VECT_VAR_DECL (expected_mul, int, 16, 8) [] = { 0x0, 0x0, 0x0, 0x0,
+                                               0x0, 0x0, 0x0, 0x0 };
+VECT_VAR_DECL (expected_mul, int, 32, 4) [] = { 0x0, 0x0, 0x0, 0x0 };
+
+/* Expected values of cumulative_saturation flag when rounding
+   should not cause saturation.  */
+int VECT_VAR (expected_cumulative_sat_round, int, 16, 4) = 0;
+int VECT_VAR (expected_cumulative_sat_round, int, 32, 2) = 0;
+int VECT_VAR (expected_cumulative_sat_round, int, 16, 8) = 0;
+int VECT_VAR (expected_cumulative_sat_round, int, 32, 4) = 0;
+
+/* Expected results when rounding should not cause saturation.  */
+VECT_VAR_DECL (expected_round, int, 16, 4) [] = { 0xfffe, 0xfffe,
+                                                 0xfffe, 0xfffe };
+VECT_VAR_DECL (expected_round, int, 32, 2) [] = { 0xfffffffe, 0xfffffffe };
+VECT_VAR_DECL (expected_round, int, 16, 8) [] = { 0xfffe, 0xfffe,
+                                                 0xfffe, 0xfffe,
+                                                 0xfffe, 0xfffe,
+                                                 0xfffe, 0xfffe };
+VECT_VAR_DECL (expected_round, int, 32, 4) [] = { 0xfffffffe, 0xfffffffe,
+                                                 0xfffffffe, 0xfffffffe };
+
+#define INSN vqrdmlah
+#define TEST_MSG "VQRDMLAH"
+
+#include "vqrdmlXh.inc"
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqrdmlsh.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqrdmlsh.c

new file mode 100644 (file)

index 0000000..91c3b34
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqrdmlsh.c
@@ -0,0 +1,61 @@
+/* { dg-require-effective-target arm_v8_1a_neon_hw } */
+/* { dg-add-options arm_v8_1a_neon } */
+
+#include <arm_neon.h>
+#include "arm-neon-ref.h"
+#include "compute-ref-data.h"
+
+/* Expected values of cumulative_saturation flag.  */
+int VECT_VAR (expected_cumulative_sat, int, 16, 4) = 0;
+int VECT_VAR (expected_cumulative_sat, int, 32, 2) = 0;
+int VECT_VAR (expected_cumulative_sat, int, 16, 8) = 0;
+int VECT_VAR (expected_cumulative_sat, int, 32, 4) = 0;
+
+/* Expected results.  */
+VECT_VAR_DECL (expected, int, 16, 4) [] = { 0xc70d, 0xc70e, 0xc70f, 0xc710 };
+VECT_VAR_DECL (expected, int, 32, 2) [] = { 0xfffffff0, 0xfffffff1 };
+VECT_VAR_DECL (expected, int, 16, 8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3,
+                                           0xfff4, 0xfff5, 0xfff6, 0xfff7 };
+VECT_VAR_DECL (expected, int, 32, 4) [] = { 0xfffffff0, 0xfffffff1,
+                                           0xfffffff2, 0xfffffff3 };
+
+/* Expected values of cumulative_saturation flag when multiplication
+   saturates.  */
+int VECT_VAR (expected_cumulative_sat_mul, int, 16, 4) = 1;
+int VECT_VAR (expected_cumulative_sat_mul, int, 32, 2) = 1;
+int VECT_VAR (expected_cumulative_sat_mul, int, 16, 8) = 1;
+int VECT_VAR (expected_cumulative_sat_mul, int, 32, 4) = 1;
+
+/* Expected results when multiplication saturates.  */
+VECT_VAR_DECL (expected_mul, int, 16, 4) [] = { 0x8000, 0x8000,
+                                               0x8000, 0x8000 };
+VECT_VAR_DECL (expected_mul, int, 32, 2) [] = { 0x80000000, 0x80000000 };
+VECT_VAR_DECL (expected_mul, int, 16, 8) [] = { 0x8000, 0x8000,
+                                               0x8000, 0x8000,
+                                               0x8000, 0x8000,
+                                               0x8000, 0x8000 };
+VECT_VAR_DECL (expected_mul, int, 32, 4) [] = { 0x80000000, 0x80000000,
+                                               0x80000000, 0x80000000 };
+
+/* Expected values of cumulative_saturation flag when rounding
+   should not cause saturation.  */
+int VECT_VAR (expected_cumulative_sat_round, int, 16, 4) = 1;
+int VECT_VAR (expected_cumulative_sat_round, int, 32, 2) = 1;
+int VECT_VAR (expected_cumulative_sat_round, int, 16, 8) = 1;
+int VECT_VAR (expected_cumulative_sat_round, int, 32, 4) = 1;
+
+/* Expected results when rounding should not cause saturation.  */
+VECT_VAR_DECL (expected_round, int, 16, 4) [] = { 0x8000, 0x8000,
+                                                 0x8000, 0x8000 };
+VECT_VAR_DECL (expected_round, int, 32, 2) [] = { 0x80000000, 0x80000000 };
+VECT_VAR_DECL (expected_round, int, 16, 8) [] = { 0x8000, 0x8000,
+                                                 0x8000, 0x8000,
+                                                 0x8000, 0x8000,
+                                                 0x8000, 0x8000 };
+VECT_VAR_DECL (expected_round, int, 32, 4) [] = { 0x80000000, 0x80000000,
+                                                 0x80000000, 0x80000000 };
+
+#define INSN vqrdmlsh
+#define TEST_MSG "VQRDMLSH"
+
+#include "vqrdmlXh.inc"
author	Matthew Wahab <matthew.wahab@arm.com>
	Thu, 26 Nov 2015 15:13:02 +0000 (15:13 +0000)
committer	Matthew Wahab <mwahab@gcc.gnu.org>
	Thu, 26 Nov 2015 15:13:02 +0000 (15:13 +0000)
gcc/ChangeLog		patch \| blob \| history
gcc/config/aarch64/arm_neon.h		patch \| blob \| history
gcc/testsuite/ChangeLog		patch \| blob \| history
gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqrdmlXh.inc	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqrdmlah.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqrdmlsh.c	[new file with mode: 0644]	patch \| blob