From e25c95ef2af3700f614b0c6745c71d5185d53dac Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Thu, 23 Aug 2018 08:57:17 +0000 Subject: [PATCH] [AArch64] Improve SVE handling of single-vector permutes aarch64_vectorize_vec_perm_const was failing to set one_vector_p if the permute had only a single input. This in turn was hiding a problem in the SVE TBL handling: it accepted single-vector variable-length permutes, but sent them through the general two-vector aarch64_expand_sve_vec_perm, which is only set up to handle constant-length permutes. 2018-08-23 Richard Sandiford gcc/ * config/aarch64/aarch64.c (aarch64_evpc_sve_tbl): Fix handling of single-vector TBLs. (aarch64_vectorize_vec_perm_const): Set one_vector_p when only one input is given. gcc/testsuite/ * gcc.dg/vect/no-vfa-vect-depend-2.c: Remove XFAIL. * gcc.dg/vect/no-vfa-vect-depend-3.c: Likewise. * gcc.dg/vect/pr65947-13.c: Update for vect_fold_extract_last. * gcc.dg/vect/pr80631-2.c: Likewise. From-SVN: r263804 --- gcc/ChangeLog | 7 +++++++ gcc/config/aarch64/aarch64.c | 8 ++++++-- gcc/testsuite/ChangeLog | 7 +++++++ gcc/testsuite/gcc.dg/vect/no-vfa-vect-depend-2.c | 5 +---- gcc/testsuite/gcc.dg/vect/no-vfa-vect-depend-3.c | 5 +---- gcc/testsuite/gcc.dg/vect/pr65947-13.c | 3 ++- gcc/testsuite/gcc.dg/vect/pr80631-2.c | 3 ++- 7 files changed, 26 insertions(+), 12 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index c7618c66a4c..64e6d8e7b97 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,10 @@ +2018-08-23 Richard Sandiford + + * config/aarch64/aarch64.c (aarch64_evpc_sve_tbl): Fix handling + of single-vector TBLs. + (aarch64_vectorize_vec_perm_const): Set one_vector_p when only + one input is given. + 2018-08-23 Richard Sandiford PR target/85910 diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index 13ffcff78f1..5bb30e0c69c 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -15423,7 +15423,10 @@ aarch64_evpc_sve_tbl (struct expand_vec_perm_d *d) machine_mode sel_mode = mode_for_int_vector (d->vmode).require (); rtx sel = vec_perm_indices_to_rtx (sel_mode, d->perm); - aarch64_expand_sve_vec_perm (d->target, d->op0, d->op1, sel); + if (d->one_vector_p) + emit_unspec2 (d->target, UNSPEC_TBL, d->op0, force_reg (sel_mode, sel)); + else + aarch64_expand_sve_vec_perm (d->target, d->op0, d->op1, sel); return true; } @@ -15476,7 +15479,8 @@ aarch64_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0, struct expand_vec_perm_d d; /* Check whether the mask can be applied to a single vector. */ - if (op0 && rtx_equal_p (op0, op1)) + if (sel.ninputs () == 1 + || (op0 && rtx_equal_p (op0, op1))) d.one_vector_p = true; else if (sel.all_from_input_p (0)) { diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index d011aa73e38..95fd2e58fe7 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,10 @@ +2018-08-23 Richard Sandiford + + * gcc.dg/vect/no-vfa-vect-depend-2.c: Remove XFAIL. + * gcc.dg/vect/no-vfa-vect-depend-3.c: Likewise. + * gcc.dg/vect/pr65947-13.c: Update for vect_fold_extract_last. + * gcc.dg/vect/pr80631-2.c: Likewise. + 2017-08-23 Paul Thomas PR fortran/86863 diff --git a/gcc/testsuite/gcc.dg/vect/no-vfa-vect-depend-2.c b/gcc/testsuite/gcc.dg/vect/no-vfa-vect-depend-2.c index acad8fc0332..1880d1edb32 100644 --- a/gcc/testsuite/gcc.dg/vect/no-vfa-vect-depend-2.c +++ b/gcc/testsuite/gcc.dg/vect/no-vfa-vect-depend-2.c @@ -51,7 +51,4 @@ int main (void) } /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" {xfail { vect_no_align && { ! vect_hw_misalign } } } } } */ -/* Requires reverse for variable-length SVE, which is implemented for - by a later patch. Until then we report it twice, once for SVE and - once for 128-bit Advanced SIMD. */ -/* { dg-final { scan-tree-dump-times "dependence distance negative" 1 "vect" { xfail { aarch64_sve && vect_variable_length } } } } */ +/* { dg-final { scan-tree-dump-times "dependence distance negative" 1 "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/no-vfa-vect-depend-3.c b/gcc/testsuite/gcc.dg/vect/no-vfa-vect-depend-3.c index 1ccfc1edacc..e5914d970e3 100644 --- a/gcc/testsuite/gcc.dg/vect/no-vfa-vect-depend-3.c +++ b/gcc/testsuite/gcc.dg/vect/no-vfa-vect-depend-3.c @@ -183,7 +183,4 @@ int main () } /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 4 "vect" {xfail { vect_no_align && { ! vect_hw_misalign } } } } } */ -/* f4 requires reverse for SVE, which is implemented by a later patch. - Until then we report it twice, once for SVE and once for 128-bit - Advanced SIMD. */ -/* { dg-final { scan-tree-dump-times "dependence distance negative" 4 "vect" { xfail { aarch64_sve && vect_variable_length } } } } */ +/* { dg-final { scan-tree-dump-times "dependence distance negative" 4 "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/pr65947-13.c b/gcc/testsuite/gcc.dg/vect/pr65947-13.c index ce290459c50..e1d3ff52f5c 100644 --- a/gcc/testsuite/gcc.dg/vect/pr65947-13.c +++ b/gcc/testsuite/gcc.dg/vect/pr65947-13.c @@ -41,4 +41,5 @@ main (void) } /* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" } } */ -/* { dg-final { scan-tree-dump-times "condition expression based on integer induction." 4 "vect" } } */ +/* { dg-final { scan-tree-dump-times "condition expression based on integer induction." 4 "vect" { xfail vect_fold_extract_last } } } */ +/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 4 "vect" { target vect_fold_extract_last } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/pr80631-2.c b/gcc/testsuite/gcc.dg/vect/pr80631-2.c index 6bf239adac5..b334ca2345b 100644 --- a/gcc/testsuite/gcc.dg/vect/pr80631-2.c +++ b/gcc/testsuite/gcc.dg/vect/pr80631-2.c @@ -72,4 +72,5 @@ main () } /* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 5 "vect" { target vect_condition } } } */ -/* { dg-final { scan-tree-dump-times "condition expression based on integer induction." 10 "vect" { target vect_condition } } } */ +/* { dg-final { scan-tree-dump-times "condition expression based on integer induction." 10 "vect" { target vect_condition xfail vect_fold_extract_last } } } */ +/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 10 "vect" { target vect_fold_extract_last } } } */ -- 2.30.2