From 2d56600c8de397d09a16dedd33d310a763a832ae Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Sat, 16 Nov 2019 11:14:51 +0000 Subject: [PATCH] [AArch64] Add truncation for partial SVE modes This patch adds support for "truncating" to a partial SVE vector from either a full SVE vector or a wider partial vector. This truncation is actually a no-op and so should have zero cost in the vector cost model. 2019-11-16 Richard Sandiford gcc/ * config/aarch64/aarch64-sve.md (trunc2): New pattern. * config/aarch64/aarch64.c (aarch64_integer_truncation_p): New function. (aarch64_sve_adjust_stmt_cost): Call it. gcc/testsuite/ * gcc.target/aarch64/sve/mask_struct_load_1.c: Add --param aarch64-sve-compare-costs=0. * gcc.target/aarch64/sve/mask_struct_load_2.c: Likewise. * gcc.target/aarch64/sve/mask_struct_load_3.c: Likewise. * gcc.target/aarch64/sve/mask_struct_load_4.c: Likewise. * gcc.target/aarch64/sve/mask_struct_load_5.c: Likewise. * gcc.target/aarch64/sve/pack_1.c: Likewise. * gcc.target/aarch64/sve/truncate_1.c: New test. From-SVN: r278344 --- gcc/ChangeLog | 8 ++++ gcc/config/aarch64/aarch64-sve.md | 24 ++++++++++ gcc/config/aarch64/aarch64.c | 20 +++++++++ gcc/testsuite/ChangeLog | 11 +++++ .../aarch64/sve/mask_struct_load_1.c | 2 +- .../aarch64/sve/mask_struct_load_2.c | 2 +- .../aarch64/sve/mask_struct_load_3.c | 2 +- .../aarch64/sve/mask_struct_load_4.c | 2 +- .../aarch64/sve/mask_struct_load_5.c | 2 +- gcc/testsuite/gcc.target/aarch64/sve/pack_1.c | 2 +- .../gcc.target/aarch64/sve/truncate_1.c | 44 +++++++++++++++++++ 11 files changed, 113 insertions(+), 6 deletions(-) create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/truncate_1.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 83931c56430..7fe9a114cfe 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,11 @@ +2019-11-16 Richard Sandiford + + * config/aarch64/aarch64-sve.md + (trunc2): New pattern. + * config/aarch64/aarch64.c (aarch64_integer_truncation_p): New + function. + (aarch64_sve_adjust_stmt_cost): Call it. + 2019-11-16 Richard Sandiford * config/aarch64/aarch64-sve.md diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md index ce1bd58c0b9..158a1786a97 100644 --- a/gcc/config/aarch64/aarch64-sve.md +++ b/gcc/config/aarch64/aarch64-sve.md @@ -72,6 +72,7 @@ ;; ---- [INT] General unary arithmetic corresponding to rtx codes ;; ---- [INT] General unary arithmetic corresponding to unspecs ;; ---- [INT] Sign and zero extension +;; ---- [INT] Truncation ;; ---- [INT] Logical inverse ;; ---- [FP<-INT] General unary arithmetic that maps to unspecs ;; ---- [FP] General unary arithmetic corresponding to unspecs @@ -2888,6 +2889,29 @@ [(set_attr "movprfx" "*,yes,yes")] ) +;; ------------------------------------------------------------------------- +;; ---- [INT] Truncation +;; ------------------------------------------------------------------------- +;; The patterns in this section are synthetic. +;; ------------------------------------------------------------------------- + +;; Truncate to a partial SVE vector from either a full vector or a +;; wider partial vector. This is a no-op, because we can just ignore +;; the unused upper bits of the source. +(define_insn_and_split "trunc2" + [(set (match_operand:SVE_PARTIAL_I 0 "register_operand" "=w") + (truncate:SVE_PARTIAL_I + (match_operand:SVE_HSDI 1 "register_operand" "w")))] + "TARGET_SVE && (~ & ) == 0" + "#" + "&& reload_completed" + [(set (match_dup 0) (match_dup 1))] + { + operands[1] = aarch64_replace_reg_mode (operands[1], + mode); + } +) + ;; ------------------------------------------------------------------------- ;; ---- [INT] Logical inverse ;; ------------------------------------------------------------------------- diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index 305c6da2316..f710aa2e795 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -12901,6 +12901,21 @@ aarch64_extending_load_p (stmt_vec_info stmt_info) && DR_IS_READ (STMT_VINFO_DATA_REF (def_stmt_info))); } +/* Return true if STMT_INFO is an integer truncation. */ +static bool +aarch64_integer_truncation_p (stmt_vec_info stmt_info) +{ + gassign *assign = dyn_cast (stmt_info->stmt); + if (!assign || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (assign))) + return false; + + tree lhs_type = TREE_TYPE (gimple_assign_lhs (assign)); + tree rhs_type = TREE_TYPE (gimple_assign_rhs1 (assign)); + return (INTEGRAL_TYPE_P (lhs_type) + && INTEGRAL_TYPE_P (rhs_type) + && TYPE_PRECISION (lhs_type) < TYPE_PRECISION (rhs_type)); +} + /* STMT_COST is the cost calculated by aarch64_builtin_vectorization_cost for STMT_INFO, which has cost kind KIND. Adjust the cost as necessary for SVE targets. */ @@ -12919,6 +12934,11 @@ aarch64_sve_adjust_stmt_cost (vect_cost_for_stmt kind, stmt_vec_info stmt_info, if (kind == vector_stmt && aarch64_extending_load_p (stmt_info)) stmt_cost = 0; + /* For similar reasons, vector_stmt integer truncations are a no-op, + because we can just ignore the unused upper bits of the source. */ + if (kind == vector_stmt && aarch64_integer_truncation_p (stmt_info)) + stmt_cost = 0; + return stmt_cost; } diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index e13aa9f6573..28f99e034f1 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,14 @@ +2019-11-16 Richard Sandiford + + * gcc.target/aarch64/sve/mask_struct_load_1.c: Add + --param aarch64-sve-compare-costs=0. + * gcc.target/aarch64/sve/mask_struct_load_2.c: Likewise. + * gcc.target/aarch64/sve/mask_struct_load_3.c: Likewise. + * gcc.target/aarch64/sve/mask_struct_load_4.c: Likewise. + * gcc.target/aarch64/sve/mask_struct_load_5.c: Likewise. + * gcc.target/aarch64/sve/pack_1.c: Likewise. + * gcc.target/aarch64/sve/truncate_1.c: New test. + 2019-11-16 Richard Sandiford * gcc.target/aarch64/sve/load_extend_1.c: New test. diff --git a/gcc/testsuite/gcc.target/aarch64/sve/mask_struct_load_1.c b/gcc/testsuite/gcc.target/aarch64/sve/mask_struct_load_1.c index d4503322d77..03b2b93df07 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/mask_struct_load_1.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/mask_struct_load_1.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -ftree-vectorize -ffast-math" } */ +/* { dg-options "-O2 -ftree-vectorize -ffast-math --param aarch64-sve-compare-costs=0" } */ #include diff --git a/gcc/testsuite/gcc.target/aarch64/sve/mask_struct_load_2.c b/gcc/testsuite/gcc.target/aarch64/sve/mask_struct_load_2.c index 812b96723cb..87ac3178be0 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/mask_struct_load_2.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/mask_struct_load_2.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -ftree-vectorize -ffast-math" } */ +/* { dg-options "-O2 -ftree-vectorize -ffast-math --param aarch64-sve-compare-costs=0" } */ #include diff --git a/gcc/testsuite/gcc.target/aarch64/sve/mask_struct_load_3.c b/gcc/testsuite/gcc.target/aarch64/sve/mask_struct_load_3.c index 29702ab55f2..54806f93ad9 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/mask_struct_load_3.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/mask_struct_load_3.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -ftree-vectorize -ffast-math" } */ +/* { dg-options "-O2 -ftree-vectorize -ffast-math --param aarch64-sve-compare-costs=0" } */ #include diff --git a/gcc/testsuite/gcc.target/aarch64/sve/mask_struct_load_4.c b/gcc/testsuite/gcc.target/aarch64/sve/mask_struct_load_4.c index 436da085760..4c73004f68d 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/mask_struct_load_4.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/mask_struct_load_4.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -ftree-vectorize -ffast-math" } */ +/* { dg-options "-O2 -ftree-vectorize -ffast-math --param aarch64-sve-compare-costs=0" } */ #include diff --git a/gcc/testsuite/gcc.target/aarch64/sve/mask_struct_load_5.c b/gcc/testsuite/gcc.target/aarch64/sve/mask_struct_load_5.c index 08e58247f00..da367e4fd79 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/mask_struct_load_5.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/mask_struct_load_5.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -ftree-vectorize -ffast-math" } */ +/* { dg-options "-O2 -ftree-vectorize -ffast-math --param aarch64-sve-compare-costs=0" } */ #include diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pack_1.c b/gcc/testsuite/gcc.target/aarch64/sve/pack_1.c index d9de9963d09..7c7bfdd030c 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/pack_1.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/pack_1.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -ftree-vectorize" } */ +/* { dg-options "-O2 -ftree-vectorize --param aarch64-sve-compare-costs=0" } */ #include diff --git a/gcc/testsuite/gcc.target/aarch64/sve/truncate_1.c b/gcc/testsuite/gcc.target/aarch64/sve/truncate_1.c new file mode 100644 index 00000000000..aeb06455575 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/truncate_1.c @@ -0,0 +1,44 @@ +/* { dg-options "-O2 -ftree-vectorize" } */ + +#include + +#define TEST_LOOP(TYPE1, TYPE2, SHIFT) \ + void \ + f_##TYPE1##_##TYPE2 (TYPE2 *restrict dst, TYPE1 *restrict src1, \ + TYPE1 *restrict src2, int n) \ + { \ + for (int i = 0; i < n; ++i) \ + dst[i] = (TYPE1) (src1[i] + src2[i]) >> SHIFT; \ + } + +#define TEST_ALL(T) \ + T (uint16_t, uint8_t, 2) \ + T (uint32_t, uint8_t, 18) \ + T (uint64_t, uint8_t, 34) \ + T (uint32_t, uint16_t, 3) \ + T (uint64_t, uint16_t, 19) \ + T (uint64_t, uint32_t, 4) + +TEST_ALL (TEST_LOOP) + +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.h,} 2 } } */ +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s,} 4 } } */ +/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d,} 6 } } */ + +/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.h, z[0-9]+\.h, z[0-9]+\.h\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.s, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 3 } } */ + +/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.h, z[0-9]+\.h, #2\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.s, z[0-9]+\.s, #18\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.d, z[0-9]+\.d, #34\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.s, z[0-9]+\.s, #3\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.d, z[0-9]+\.d, #19\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.d, z[0-9]+\.d, #4\n} 1 } } */ + +/* { dg-final { scan-assembler-times {\tst1b\tz[0-9]+\.h,} 1 } } */ +/* { dg-final { scan-assembler-times {\tst1b\tz[0-9]+\.s,} 1 } } */ +/* { dg-final { scan-assembler-times {\tst1b\tz[0-9]+\.d,} 1 } } */ +/* { dg-final { scan-assembler-times {\tst1h\tz[0-9]+\.s,} 1 } } */ +/* { dg-final { scan-assembler-times {\tst1h\tz[0-9]+\.d,} 1 } } */ +/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.d,} 1 } } */ -- 2.30.2