[AArch64] Add a couple of SVE ACLE comparison folds
authorRichard Sandiford <richard.sandiford@arm.com>
Mon, 2 Dec 2019 17:39:06 +0000 (17:39 +0000)
committerRichard Sandiford <rsandifo@gcc.gnu.org>
Mon, 2 Dec 2019 17:39:06 +0000 (17:39 +0000)
When writing vector-length specific SVE code, it's useful to be able
to store an svbool_t predicate in a GNU vector of unsigned chars.
This patch makes sure that there is no overhead when converting
to that form and then immediately reading it back again.

2019-12-02  Richard Sandiford  <richard.sandiford@arm.com>

gcc/
* config/aarch64/aarch64-sve-builtins.h
(gimple_folder::force_vector): Declare.
* config/aarch64/aarch64-sve-builtins.cc
(gimple_folder::force_vector): New function.
* config/aarch64/aarch64-sve-builtins-base.cc
(svcmp_impl::fold): Likewise.
(svdup_impl::fold): Handle svdup_z too.

gcc/testsuite/
* gcc.target/aarch64/sve/acle/general/eqne_dup_1.c: New test.
* gcc.target/aarch64/sve/acle/asm/dup_f16.c (dup_0_f16_z): Expect
the call to be folded to zero.
* gcc.target/aarch64/sve/acle/asm/dup_f32.c (dup_0_f32_z): Likewise.
* gcc.target/aarch64/sve/acle/asm/dup_f64.c (dup_0_f64_z): Likewise.
* gcc.target/aarch64/sve/acle/asm/dup_s8.c (dup_0_s8_z): Likewise.
* gcc.target/aarch64/sve/acle/asm/dup_s16.c (dup_0_s16_z): Likewise.
* gcc.target/aarch64/sve/acle/asm/dup_s32.c (dup_0_s32_z): Likewise.
* gcc.target/aarch64/sve/acle/asm/dup_s64.c (dup_0_s64_z): Likewise.
* gcc.target/aarch64/sve/acle/asm/dup_u8.c (dup_0_u8_z): Likewise.
* gcc.target/aarch64/sve/acle/asm/dup_u16.c (dup_0_u16_z): Likewise.
* gcc.target/aarch64/sve/acle/asm/dup_u32.c (dup_0_u32_z): Likewise.
* gcc.target/aarch64/sve/acle/asm/dup_u64.c (dup_0_u64_z): Likewise.

From-SVN: r278907

17 files changed:
gcc/ChangeLog
gcc/config/aarch64/aarch64-sve-builtins-base.cc
gcc/config/aarch64/aarch64-sve-builtins.cc
gcc/config/aarch64/aarch64-sve-builtins.h
gcc/testsuite/ChangeLog
gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_f16.c
gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_f32.c
gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_f64.c
gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_s16.c
gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_s32.c
gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_s64.c
gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_s8.c
gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_u16.c
gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_u32.c
gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_u64.c
gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_u8.c
gcc/testsuite/gcc.target/aarch64/sve/acle/general/eqne_dup_1.c [new file with mode: 0644]

index 765e01565d7d83b7ce08ce1a8176c327d83eafd4..29fed4f40f4bd71a1dad052b786348432553bf16 100644 (file)
@@ -1,3 +1,13 @@
+2019-12-02  Richard Sandiford  <richard.sandiford@arm.com>
+
+       * config/aarch64/aarch64-sve-builtins.h
+       (gimple_folder::force_vector): Declare.
+       * config/aarch64/aarch64-sve-builtins.cc
+       (gimple_folder::force_vector): New function.
+       * config/aarch64/aarch64-sve-builtins-base.cc
+       (svcmp_impl::fold): Likewise.
+       (svdup_impl::fold): Handle svdup_z too.
+
 2019-12-02  Martin Liska  <mliska@suse.cz>
 
        * ipa-devirt.c (warn_types_mismatch): Use get_odr_name_for_type
index 52166c4d88937317627084076263ee267849a972..38bd3adce1ebbde4c58531ffd26eedd4ae4938b0 100644 (file)
@@ -333,6 +333,28 @@ public:
   CONSTEXPR svcmp_impl (tree_code code, int unspec_for_fp)
     : m_code (code), m_unspec_for_fp (unspec_for_fp) {}
 
+  gimple *
+  fold (gimple_folder &f) const OVERRIDE
+  {
+    tree pg = gimple_call_arg (f.call, 0);
+    tree rhs1 = gimple_call_arg (f.call, 1);
+    tree rhs2 = gimple_call_arg (f.call, 2);
+
+    /* Convert a ptrue-predicated integer comparison into the corresponding
+       gimple-level operation.  */
+    if (integer_all_onesp (pg)
+       && f.type_suffix (0).element_bytes == 1
+       && f.type_suffix (0).integer_p)
+      {
+       gimple_seq stmts = NULL;
+       rhs2 = f.force_vector (stmts, TREE_TYPE (rhs1), rhs2);
+       gsi_insert_seq_before (f.gsi, stmts, GSI_SAME_STMT);
+       return gimple_build_assign (f.lhs, m_code, rhs1, rhs2);
+      }
+
+    return NULL;
+  }
+
   rtx
   expand (function_expander &e) const OVERRIDE
   {
@@ -700,6 +722,17 @@ public:
          return gimple_build_assign (f.lhs, VEC_DUPLICATE_EXPR, rhs);
       }
 
+    /* svdup_z (pg, x) == VEC_COND_EXPR <pg, VEC_DUPLICATE_EXPR <x>, 0>.  */
+    if (f.pred == PRED_z)
+      {
+       gimple_seq stmts = NULL;
+       tree pred = f.convert_pred (stmts, vec_type, 0);
+       rhs = f.force_vector (stmts, vec_type, rhs);
+       gsi_insert_seq_before (f.gsi, stmts, GSI_SAME_STMT);
+       return gimple_build_assign (f.lhs, VEC_COND_EXPR, pred, rhs,
+                                   build_zero_cst (vec_type));
+      }
+
     return NULL;
   }
 
index 5dd7ccb74ff41fd97513772f7bc05541a906a17f..3a6b4704604ac04786af14af43cfcbb505829a80 100644 (file)
@@ -2234,6 +2234,17 @@ gimple_folder::gimple_folder (const function_instance &instance, tree fndecl,
 {
 }
 
+/* VALUE might be a vector of type VECTYPE or a single scalar element.
+   Duplicate it into a vector of type VECTYPE in the latter case, adding any
+   new statements to STMTS.  */
+tree
+gimple_folder::force_vector (gimple_seq &stmts, tree vectype, tree value)
+{
+  if (!VECTOR_TYPE_P (TREE_TYPE (value)))
+    value = gimple_build_vector_from_val (&stmts, vectype, value);
+  return value;
+}
+
 /* Convert predicate argument ARGNO so that it has the type appropriate for
    an operation on VECTYPE.  Add any new statements to STMTS.  */
 tree
index 73b07c71e5d68f308521d3d66a58c05d0f99f378..0884b0f6756fb7b8376a032bcfa81962b25893f3 100644 (file)
@@ -488,6 +488,7 @@ public:
   gimple_folder (const function_instance &, tree,
                 gimple_stmt_iterator *, gcall *);
 
+  tree force_vector (gimple_seq &, tree, tree);
   tree convert_pred (gimple_seq &, tree, unsigned int);
   tree fold_contiguous_base (gimple_seq &, tree);
   tree load_store_cookie (tree);
index 64f035f63d47a401af7696c18d77dd92c63c1e48..2e6d171750b8e14e93d1c80135db151c2afdf89c 100644 (file)
@@ -1,3 +1,19 @@
+2019-12-02  Richard Sandiford  <richard.sandiford@arm.com>
+
+       * gcc.target/aarch64/sve/acle/general/eqne_dup_1.c: New test.
+       * gcc.target/aarch64/sve/acle/asm/dup_f16.c (dup_0_f16_z): Expect
+       the call to be folded to zero.
+       * gcc.target/aarch64/sve/acle/asm/dup_f32.c (dup_0_f32_z): Likewise.
+       * gcc.target/aarch64/sve/acle/asm/dup_f64.c (dup_0_f64_z): Likewise.
+       * gcc.target/aarch64/sve/acle/asm/dup_s8.c (dup_0_s8_z): Likewise.
+       * gcc.target/aarch64/sve/acle/asm/dup_s16.c (dup_0_s16_z): Likewise.
+       * gcc.target/aarch64/sve/acle/asm/dup_s32.c (dup_0_s32_z): Likewise.
+       * gcc.target/aarch64/sve/acle/asm/dup_s64.c (dup_0_s64_z): Likewise.
+       * gcc.target/aarch64/sve/acle/asm/dup_u8.c (dup_0_u8_z): Likewise.
+       * gcc.target/aarch64/sve/acle/asm/dup_u16.c (dup_0_u16_z): Likewise.
+       * gcc.target/aarch64/sve/acle/asm/dup_u32.c (dup_0_u32_z): Likewise.
+       * gcc.target/aarch64/sve/acle/asm/dup_u64.c (dup_0_u64_z): Likewise.
+
 2019-12-02  Sudakshina Das  <sudi.das@arm.com>
 
        * g++.dg/ext/arm-fp16/arm-fp16-ops.h: Remove volatile keyword.
index 2d48b9a3dbc62684ee7524258807e288b9bde285..a90c7118448cdf058db264f57c61a6cc51e89e5e 100644 (file)
@@ -120,7 +120,7 @@ TEST_UNIFORM_Z (dup_1_f16_z, svfloat16_t,
 
 /*
 ** dup_0_f16_z:
-**     mov     z0\.h, p0/z, #0
+**     mov     z0\.[bhsd], #0
 **     ret
 */
 TEST_UNIFORM_Z (dup_0_f16_z, svfloat16_t,
index f997b7a7dc877391ddefaeff1df8270ed2c4563b..ba23781429c87509606d168d0d96530dd70cc8f2 100644 (file)
@@ -118,7 +118,7 @@ TEST_UNIFORM_Z (dup_1_f32_z, svfloat32_t,
 
 /*
 ** dup_0_f32_z:
-**     mov     z0\.s, p0/z, #0
+**     mov     z0\.[bhsd], #0
 **     ret
 */
 TEST_UNIFORM_Z (dup_0_f32_z, svfloat32_t,
index e177d91086da346451a251318776b7a46ca31949..b397da885673a565c994415e6d6659f517c71e3a 100644 (file)
@@ -118,7 +118,7 @@ TEST_UNIFORM_Z (dup_1_f64_z, svfloat64_t,
 
 /*
 ** dup_0_f64_z:
-**     mov     z0\.d, p0/z, #0
+**     mov     z0\.[bhsd], #0
 **     ret
 */
 TEST_UNIFORM_Z (dup_0_f64_z, svfloat64_t,
index 876f36db7f588c51c600c16fce871c23672a56fc..21ab6f63e375583a0a031ee58c96244f0633c854 100644 (file)
@@ -869,7 +869,7 @@ TEST_UNIFORM_Z (dup_m8000_s16_z, svint16_t,
 
 /*
 ** dup_0_s16_z:
-**     mov     z0\.h, p0/z, #0
+**     mov     z0\.[bhsd], #0
 **     ret
 */
 TEST_UNIFORM_Z (dup_0_s16_z, svint16_t,
index 0b396dbeb50a00ebcd590575093bce6a2d98a45c..500ec48b34a63971626e879d24206d947bdad5d6 100644 (file)
@@ -849,7 +849,7 @@ TEST_UNIFORM_Z (dup_m8000_s32_z, svint32_t,
 
 /*
 ** dup_0_s32_z:
-**     mov     z0\.s, p0/z, #0
+**     mov     z0\.[bhsd], #0
 **     ret
 */
 TEST_UNIFORM_Z (dup_0_s32_z, svint32_t,
index 6259b7fb55dd99ed3a6748cef087c2334aacefaf..651bb1b43f0232e3d93288996d7418e90ebd76e1 100644 (file)
@@ -849,7 +849,7 @@ TEST_UNIFORM_Z (dup_m8000_s64_z, svint64_t,
 
 /*
 ** dup_0_s64_z:
-**     mov     z0\.d, p0/z, #0
+**     mov     z0\.[bhsd], #0
 **     ret
 */
 TEST_UNIFORM_Z (dup_0_s64_z, svint64_t,
index 96fc5fa6494d0f5ff9a647fd2b55018efce5ed31..f3c9db8ead70187dd9ce0e0570ce0e499399bded 100644 (file)
@@ -275,7 +275,7 @@ TEST_UNIFORM_Z (dup_m128_s8_z, svint8_t,
 
 /*
 ** dup_0_s8_z:
-**     mov     z0\.b, p0/z, #0
+**     mov     z0\.[bhsd], #0
 **     ret
 */
 TEST_UNIFORM_Z (dup_0_s8_z, svint8_t,
index 263eafef0cd1f8509230fbc865bff1aadf459729..dba409d5b3bfc7e6213213718371502c3bd1308a 100644 (file)
@@ -869,7 +869,7 @@ TEST_UNIFORM_Z (dup_m8000_u16_z, svuint16_t,
 
 /*
 ** dup_0_u16_z:
-**     mov     z0\.h, p0/z, #0
+**     mov     z0\.[bhsd], #0
 **     ret
 */
 TEST_UNIFORM_Z (dup_0_u16_z, svuint16_t,
index 667feea64539950cf65355c4549120332f1e23a0..7d5b4626fd43284a7b08448843c14ed8a80a628a 100644 (file)
@@ -849,7 +849,7 @@ TEST_UNIFORM_Z (dup_m8000_u32_z, svuint32_t,
 
 /*
 ** dup_0_u32_z:
-**     mov     z0\.s, p0/z, #0
+**     mov     z0\.[bhsd], #0
 **     ret
 */
 TEST_UNIFORM_Z (dup_0_u32_z, svuint32_t,
index a7cca7af0219b0f9756d0a13414f5e399cba318e..0431e75bc65ca46652c0f6f769c6579c6fbbd8c2 100644 (file)
@@ -849,7 +849,7 @@ TEST_UNIFORM_Z (dup_m8000_u64_z, svuint64_t,
 
 /*
 ** dup_0_u64_z:
-**     mov     z0\.d, p0/z, #0
+**     mov     z0\.[bhsd], #0
 **     ret
 */
 TEST_UNIFORM_Z (dup_0_u64_z, svuint64_t,
index d27f4bba9e3bc6e825269dfc9ea3504e6240293d..1bb4cc1bd794ff9a36d52d44bbfcb723e78c01df 100644 (file)
@@ -275,7 +275,7 @@ TEST_UNIFORM_Z (dup_m128_u8_z, svuint8_t,
 
 /*
 ** dup_0_u8_z:
-**     mov     z0\.b, p0/z, #0
+**     mov     z0\.[bhsd], #0
 **     ret
 */
 TEST_UNIFORM_Z (dup_0_u8_z, svuint8_t,
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/eqne_dup_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/eqne_dup_1.c
new file mode 100644 (file)
index 0000000..651f5ae
--- /dev/null
@@ -0,0 +1,40 @@
+/* { dg-additional-options "-O" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include <arm_sve.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+** cmp1:
+**     ptrue   (p[0-7])\.b(?:[^\n]*)
+**     cmple   p0\.b, \1/z, z0\.b, z1\.d
+**     ret
+*/
+svbool_t
+cmp1 (svint8_t x, svint64_t y)
+{
+  svbool_t res = svcmple_wide (svptrue_b8 (), x, y);
+  svuint8_t res_u8 = svdup_u8_z (res, 1);
+  return svcmpne (svptrue_b8 (), res_u8, 0);
+}
+
+/*
+** cmp2:
+**     ptrue   (p[0-7])\.b(?:[^\n]*)
+**     cmplt   p0\.b, \1/z, z0\.b, z1\.d
+**     ret
+*/
+svbool_t
+cmp2 (svint8_t x, svint64_t y)
+{
+  svbool_t res = svcmplt_wide (svptrue_b8 (), x, y);
+  svuint8_t res_u8 = svdup_u8_z (res, 42);
+  return svcmpeq (svptrue_b8 (), res_u8, 42);
+}
+
+#ifdef __cplusplus
+}
+#endif