From 0435b10db008e3019be0898270070eec4f3e318a Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Mon, 2 Dec 2019 17:39:06 +0000 Subject: [PATCH] [AArch64] Add a couple of SVE ACLE comparison folds When writing vector-length specific SVE code, it's useful to be able to store an svbool_t predicate in a GNU vector of unsigned chars. This patch makes sure that there is no overhead when converting to that form and then immediately reading it back again. 2019-12-02 Richard Sandiford gcc/ * config/aarch64/aarch64-sve-builtins.h (gimple_folder::force_vector): Declare. * config/aarch64/aarch64-sve-builtins.cc (gimple_folder::force_vector): New function. * config/aarch64/aarch64-sve-builtins-base.cc (svcmp_impl::fold): Likewise. (svdup_impl::fold): Handle svdup_z too. gcc/testsuite/ * gcc.target/aarch64/sve/acle/general/eqne_dup_1.c: New test. * gcc.target/aarch64/sve/acle/asm/dup_f16.c (dup_0_f16_z): Expect the call to be folded to zero. * gcc.target/aarch64/sve/acle/asm/dup_f32.c (dup_0_f32_z): Likewise. * gcc.target/aarch64/sve/acle/asm/dup_f64.c (dup_0_f64_z): Likewise. * gcc.target/aarch64/sve/acle/asm/dup_s8.c (dup_0_s8_z): Likewise. * gcc.target/aarch64/sve/acle/asm/dup_s16.c (dup_0_s16_z): Likewise. * gcc.target/aarch64/sve/acle/asm/dup_s32.c (dup_0_s32_z): Likewise. * gcc.target/aarch64/sve/acle/asm/dup_s64.c (dup_0_s64_z): Likewise. * gcc.target/aarch64/sve/acle/asm/dup_u8.c (dup_0_u8_z): Likewise. * gcc.target/aarch64/sve/acle/asm/dup_u16.c (dup_0_u16_z): Likewise. * gcc.target/aarch64/sve/acle/asm/dup_u32.c (dup_0_u32_z): Likewise. * gcc.target/aarch64/sve/acle/asm/dup_u64.c (dup_0_u64_z): Likewise. From-SVN: r278907 --- gcc/ChangeLog | 10 +++++ .../aarch64/aarch64-sve-builtins-base.cc | 33 +++++++++++++++ gcc/config/aarch64/aarch64-sve-builtins.cc | 11 +++++ gcc/config/aarch64/aarch64-sve-builtins.h | 1 + gcc/testsuite/ChangeLog | 16 ++++++++ .../gcc.target/aarch64/sve/acle/asm/dup_f16.c | 2 +- .../gcc.target/aarch64/sve/acle/asm/dup_f32.c | 2 +- .../gcc.target/aarch64/sve/acle/asm/dup_f64.c | 2 +- .../gcc.target/aarch64/sve/acle/asm/dup_s16.c | 2 +- .../gcc.target/aarch64/sve/acle/asm/dup_s32.c | 2 +- .../gcc.target/aarch64/sve/acle/asm/dup_s64.c | 2 +- .../gcc.target/aarch64/sve/acle/asm/dup_s8.c | 2 +- .../gcc.target/aarch64/sve/acle/asm/dup_u16.c | 2 +- .../gcc.target/aarch64/sve/acle/asm/dup_u32.c | 2 +- .../gcc.target/aarch64/sve/acle/asm/dup_u64.c | 2 +- .../gcc.target/aarch64/sve/acle/asm/dup_u8.c | 2 +- .../aarch64/sve/acle/general/eqne_dup_1.c | 40 +++++++++++++++++++ 17 files changed, 122 insertions(+), 11 deletions(-) create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/acle/general/eqne_dup_1.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 765e01565d7..29fed4f40f4 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,13 @@ +2019-12-02 Richard Sandiford + + * config/aarch64/aarch64-sve-builtins.h + (gimple_folder::force_vector): Declare. + * config/aarch64/aarch64-sve-builtins.cc + (gimple_folder::force_vector): New function. + * config/aarch64/aarch64-sve-builtins-base.cc + (svcmp_impl::fold): Likewise. + (svdup_impl::fold): Handle svdup_z too. + 2019-12-02 Martin Liska * ipa-devirt.c (warn_types_mismatch): Use get_odr_name_for_type diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.cc b/gcc/config/aarch64/aarch64-sve-builtins-base.cc index 52166c4d889..38bd3adce1e 100644 --- a/gcc/config/aarch64/aarch64-sve-builtins-base.cc +++ b/gcc/config/aarch64/aarch64-sve-builtins-base.cc @@ -333,6 +333,28 @@ public: CONSTEXPR svcmp_impl (tree_code code, int unspec_for_fp) : m_code (code), m_unspec_for_fp (unspec_for_fp) {} + gimple * + fold (gimple_folder &f) const OVERRIDE + { + tree pg = gimple_call_arg (f.call, 0); + tree rhs1 = gimple_call_arg (f.call, 1); + tree rhs2 = gimple_call_arg (f.call, 2); + + /* Convert a ptrue-predicated integer comparison into the corresponding + gimple-level operation. */ + if (integer_all_onesp (pg) + && f.type_suffix (0).element_bytes == 1 + && f.type_suffix (0).integer_p) + { + gimple_seq stmts = NULL; + rhs2 = f.force_vector (stmts, TREE_TYPE (rhs1), rhs2); + gsi_insert_seq_before (f.gsi, stmts, GSI_SAME_STMT); + return gimple_build_assign (f.lhs, m_code, rhs1, rhs2); + } + + return NULL; + } + rtx expand (function_expander &e) const OVERRIDE { @@ -700,6 +722,17 @@ public: return gimple_build_assign (f.lhs, VEC_DUPLICATE_EXPR, rhs); } + /* svdup_z (pg, x) == VEC_COND_EXPR , 0>. */ + if (f.pred == PRED_z) + { + gimple_seq stmts = NULL; + tree pred = f.convert_pred (stmts, vec_type, 0); + rhs = f.force_vector (stmts, vec_type, rhs); + gsi_insert_seq_before (f.gsi, stmts, GSI_SAME_STMT); + return gimple_build_assign (f.lhs, VEC_COND_EXPR, pred, rhs, + build_zero_cst (vec_type)); + } + return NULL; } diff --git a/gcc/config/aarch64/aarch64-sve-builtins.cc b/gcc/config/aarch64/aarch64-sve-builtins.cc index 5dd7ccb74ff..3a6b4704604 100644 --- a/gcc/config/aarch64/aarch64-sve-builtins.cc +++ b/gcc/config/aarch64/aarch64-sve-builtins.cc @@ -2234,6 +2234,17 @@ gimple_folder::gimple_folder (const function_instance &instance, tree fndecl, { } +/* VALUE might be a vector of type VECTYPE or a single scalar element. + Duplicate it into a vector of type VECTYPE in the latter case, adding any + new statements to STMTS. */ +tree +gimple_folder::force_vector (gimple_seq &stmts, tree vectype, tree value) +{ + if (!VECTOR_TYPE_P (TREE_TYPE (value))) + value = gimple_build_vector_from_val (&stmts, vectype, value); + return value; +} + /* Convert predicate argument ARGNO so that it has the type appropriate for an operation on VECTYPE. Add any new statements to STMTS. */ tree diff --git a/gcc/config/aarch64/aarch64-sve-builtins.h b/gcc/config/aarch64/aarch64-sve-builtins.h index 73b07c71e5d..0884b0f6756 100644 --- a/gcc/config/aarch64/aarch64-sve-builtins.h +++ b/gcc/config/aarch64/aarch64-sve-builtins.h @@ -488,6 +488,7 @@ public: gimple_folder (const function_instance &, tree, gimple_stmt_iterator *, gcall *); + tree force_vector (gimple_seq &, tree, tree); tree convert_pred (gimple_seq &, tree, unsigned int); tree fold_contiguous_base (gimple_seq &, tree); tree load_store_cookie (tree); diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 64f035f63d4..2e6d171750b 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,19 @@ +2019-12-02 Richard Sandiford + + * gcc.target/aarch64/sve/acle/general/eqne_dup_1.c: New test. + * gcc.target/aarch64/sve/acle/asm/dup_f16.c (dup_0_f16_z): Expect + the call to be folded to zero. + * gcc.target/aarch64/sve/acle/asm/dup_f32.c (dup_0_f32_z): Likewise. + * gcc.target/aarch64/sve/acle/asm/dup_f64.c (dup_0_f64_z): Likewise. + * gcc.target/aarch64/sve/acle/asm/dup_s8.c (dup_0_s8_z): Likewise. + * gcc.target/aarch64/sve/acle/asm/dup_s16.c (dup_0_s16_z): Likewise. + * gcc.target/aarch64/sve/acle/asm/dup_s32.c (dup_0_s32_z): Likewise. + * gcc.target/aarch64/sve/acle/asm/dup_s64.c (dup_0_s64_z): Likewise. + * gcc.target/aarch64/sve/acle/asm/dup_u8.c (dup_0_u8_z): Likewise. + * gcc.target/aarch64/sve/acle/asm/dup_u16.c (dup_0_u16_z): Likewise. + * gcc.target/aarch64/sve/acle/asm/dup_u32.c (dup_0_u32_z): Likewise. + * gcc.target/aarch64/sve/acle/asm/dup_u64.c (dup_0_u64_z): Likewise. + 2019-12-02 Sudakshina Das * g++.dg/ext/arm-fp16/arm-fp16-ops.h: Remove volatile keyword. diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_f16.c index 2d48b9a3dbc..a90c7118448 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_f16.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_f16.c @@ -120,7 +120,7 @@ TEST_UNIFORM_Z (dup_1_f16_z, svfloat16_t, /* ** dup_0_f16_z: -** mov z0\.h, p0/z, #0 +** mov z0\.[bhsd], #0 ** ret */ TEST_UNIFORM_Z (dup_0_f16_z, svfloat16_t, diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_f32.c index f997b7a7dc8..ba23781429c 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_f32.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_f32.c @@ -118,7 +118,7 @@ TEST_UNIFORM_Z (dup_1_f32_z, svfloat32_t, /* ** dup_0_f32_z: -** mov z0\.s, p0/z, #0 +** mov z0\.[bhsd], #0 ** ret */ TEST_UNIFORM_Z (dup_0_f32_z, svfloat32_t, diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_f64.c index e177d91086d..b397da88567 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_f64.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_f64.c @@ -118,7 +118,7 @@ TEST_UNIFORM_Z (dup_1_f64_z, svfloat64_t, /* ** dup_0_f64_z: -** mov z0\.d, p0/z, #0 +** mov z0\.[bhsd], #0 ** ret */ TEST_UNIFORM_Z (dup_0_f64_z, svfloat64_t, diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_s16.c index 876f36db7f5..21ab6f63e37 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_s16.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_s16.c @@ -869,7 +869,7 @@ TEST_UNIFORM_Z (dup_m8000_s16_z, svint16_t, /* ** dup_0_s16_z: -** mov z0\.h, p0/z, #0 +** mov z0\.[bhsd], #0 ** ret */ TEST_UNIFORM_Z (dup_0_s16_z, svint16_t, diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_s32.c index 0b396dbeb50..500ec48b34a 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_s32.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_s32.c @@ -849,7 +849,7 @@ TEST_UNIFORM_Z (dup_m8000_s32_z, svint32_t, /* ** dup_0_s32_z: -** mov z0\.s, p0/z, #0 +** mov z0\.[bhsd], #0 ** ret */ TEST_UNIFORM_Z (dup_0_s32_z, svint32_t, diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_s64.c index 6259b7fb55d..651bb1b43f0 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_s64.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_s64.c @@ -849,7 +849,7 @@ TEST_UNIFORM_Z (dup_m8000_s64_z, svint64_t, /* ** dup_0_s64_z: -** mov z0\.d, p0/z, #0 +** mov z0\.[bhsd], #0 ** ret */ TEST_UNIFORM_Z (dup_0_s64_z, svint64_t, diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_s8.c index 96fc5fa6494..f3c9db8ead7 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_s8.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_s8.c @@ -275,7 +275,7 @@ TEST_UNIFORM_Z (dup_m128_s8_z, svint8_t, /* ** dup_0_s8_z: -** mov z0\.b, p0/z, #0 +** mov z0\.[bhsd], #0 ** ret */ TEST_UNIFORM_Z (dup_0_s8_z, svint8_t, diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_u16.c index 263eafef0cd..dba409d5b3b 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_u16.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_u16.c @@ -869,7 +869,7 @@ TEST_UNIFORM_Z (dup_m8000_u16_z, svuint16_t, /* ** dup_0_u16_z: -** mov z0\.h, p0/z, #0 +** mov z0\.[bhsd], #0 ** ret */ TEST_UNIFORM_Z (dup_0_u16_z, svuint16_t, diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_u32.c index 667feea6453..7d5b4626fd4 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_u32.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_u32.c @@ -849,7 +849,7 @@ TEST_UNIFORM_Z (dup_m8000_u32_z, svuint32_t, /* ** dup_0_u32_z: -** mov z0\.s, p0/z, #0 +** mov z0\.[bhsd], #0 ** ret */ TEST_UNIFORM_Z (dup_0_u32_z, svuint32_t, diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_u64.c index a7cca7af021..0431e75bc65 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_u64.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_u64.c @@ -849,7 +849,7 @@ TEST_UNIFORM_Z (dup_m8000_u64_z, svuint64_t, /* ** dup_0_u64_z: -** mov z0\.d, p0/z, #0 +** mov z0\.[bhsd], #0 ** ret */ TEST_UNIFORM_Z (dup_0_u64_z, svuint64_t, diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_u8.c index d27f4bba9e3..1bb4cc1bd79 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_u8.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_u8.c @@ -275,7 +275,7 @@ TEST_UNIFORM_Z (dup_m128_u8_z, svuint8_t, /* ** dup_0_u8_z: -** mov z0\.b, p0/z, #0 +** mov z0\.[bhsd], #0 ** ret */ TEST_UNIFORM_Z (dup_0_u8_z, svuint8_t, diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/eqne_dup_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/eqne_dup_1.c new file mode 100644 index 00000000000..651f5ae18f8 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/eqne_dup_1.c @@ -0,0 +1,40 @@ +/* { dg-additional-options "-O" } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/* +** cmp1: +** ptrue (p[0-7])\.b(?:[^\n]*) +** cmple p0\.b, \1/z, z0\.b, z1\.d +** ret +*/ +svbool_t +cmp1 (svint8_t x, svint64_t y) +{ + svbool_t res = svcmple_wide (svptrue_b8 (), x, y); + svuint8_t res_u8 = svdup_u8_z (res, 1); + return svcmpne (svptrue_b8 (), res_u8, 0); +} + +/* +** cmp2: +** ptrue (p[0-7])\.b(?:[^\n]*) +** cmplt p0\.b, \1/z, z0\.b, z1\.d +** ret +*/ +svbool_t +cmp2 (svint8_t x, svint64_t y) +{ + svbool_t res = svcmplt_wide (svptrue_b8 (), x, y); + svuint8_t res_u8 = svdup_u8_z (res, 42); + return svcmpeq (svptrue_b8 (), res_u8, 42); +} + +#ifdef __cplusplus +} +#endif -- 2.30.2