Handle more SLP constant and extern definitions for variable VF

author Richard Sandiford <richard.sandiford@linaro.org>

Sat, 13 Jan 2018 17:58:14 +0000 (17:58 +0000)

committer Richard Sandiford <rsandifo@gcc.gnu.org>

Sat, 13 Jan 2018 17:58:14 +0000 (17:58 +0000)
author Richard Sandiford <richard.sandiford@linaro.org>
Sat, 13 Jan 2018 17:58:14 +0000 (17:58 +0000)
committer Richard Sandiford <rsandifo@gcc.gnu.org>
Sat, 13 Jan 2018 17:58:14 +0000 (17:58 +0000)
diff --git a/gcc/ChangeLog b/gcc/ChangeLog

index 242ecb5c145418384dcfaf35d8327d9c2486a4b5..8bb12d3035fe72cab05884300da09ca4dadabe25 100644 (file)
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,21 @@
+2018-01-13  Richard Sandiford  <richard.sandiford@linaro.org>
+           Alan Hayward  <alan.hayward@arm.com>
+           David Sherwood  <david.sherwood@arm.com>
+
+       * tree-vect-slp.c: Include gimple-fold.h and internal-fn.h
+       (can_duplicate_and_interleave_p): New function.
+       (vect_get_and_check_slp_defs): Take the vector of statements
+       rather than just the current one.  Remove excess parentheses.
+       Restriction rejectinon of vect_constant_def and vect_external_def
+       for variable-length vectors to boolean types, or types for which
+       can_duplicate_and_interleave_p is false.
+       (vect_build_slp_tree_2): Update call to vect_get_and_check_slp_defs.
+       (duplicate_and_interleave): New function.
+       (vect_get_constant_vectors): Use gimple_build_vector for
+       constant-length vectors and suitable variable-length constant
+       vectors.  Use duplicate_and_interleave for other variable-length
+       vectors.  Don't defer the update when inserting new statements.
+
  2018-01-13  Richard Sandiford  <richard.sandiford@linaro.org>
             Alan Hayward  <alan.hayward@arm.com>
             David Sherwood  <david.sherwood@arm.com>
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog

index 4b1b97425bb603f4a887b022c585591086ae2461..3f6b5d75c3260f6305e0cbb0da00a705fad6e265 100644 (file)
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,53 @@
+2018-01-13  Richard Sandiford  <richard.sandiford@linaro.org>
+           Alan Hayward  <alan.hayward@arm.com>
+           David Sherwood  <david.sherwood@arm.com>
+
+       * gcc.dg/vect/no-scevccp-slp-30.c: Don't XFAIL for vect_variable_length
+       && vect_load_lanes
+       * gcc.dg/vect/slp-1.c: Likewise.
+       * gcc.dg/vect/slp-10.c: Likewise.
+       * gcc.dg/vect/slp-12b.c: Likewise.
+       * gcc.dg/vect/slp-12c.c: Likewise.
+       * gcc.dg/vect/slp-17.c: Likewise.
+       * gcc.dg/vect/slp-19b.c: Likewise.
+       * gcc.dg/vect/slp-20.c: Likewise.
+       * gcc.dg/vect/slp-21.c: Likewise.
+       * gcc.dg/vect/slp-22.c: Likewise.
+       * gcc.dg/vect/slp-23.c: Likewise.
+       * gcc.dg/vect/slp-24-big-array.c: Likewise.
+       * gcc.dg/vect/slp-24.c: Likewise.
+       * gcc.dg/vect/slp-28.c: Likewise.
+       * gcc.dg/vect/slp-39.c: Likewise.
+       * gcc.dg/vect/slp-6.c: Likewise.
+       * gcc.dg/vect/slp-7.c: Likewise.
+       * gcc.dg/vect/slp-cond-1.c: Likewise.
+       * gcc.dg/vect/slp-cond-2-big-array.c: Likewise.
+       * gcc.dg/vect/slp-cond-2.c: Likewise.
+       * gcc.dg/vect/slp-multitypes-1.c: Likewise.
+       * gcc.dg/vect/slp-multitypes-8.c: Likewise.
+       * gcc.dg/vect/slp-multitypes-9.c: Likewise.
+       * gcc.dg/vect/slp-multitypes-10.c: Likewise.
+       * gcc.dg/vect/slp-multitypes-12.c: Likewise.
+       * gcc.dg/vect/slp-perm-6.c: Likewise.
+       * gcc.dg/vect/slp-widen-mult-half.c: Likewise.
+       * gcc.dg/vect/vect-live-slp-1.c: Likewise.
+       * gcc.dg/vect/vect-live-slp-2.c: Likewise.
+       * gcc.dg/vect/pr33953.c: Don't XFAIL for vect_variable_length.
+       * gcc.dg/vect/slp-12a.c: Likewise.
+       * gcc.dg/vect/slp-14.c: Likewise.
+       * gcc.dg/vect/slp-15.c: Likewise.
+       * gcc.dg/vect/slp-multitypes-2.c: Likewise.
+       * gcc.dg/vect/slp-multitypes-4.c: Likewise.
+       * gcc.dg/vect/slp-multitypes-5.c: Likewise.
+       * gcc.target/aarch64/sve/slp_1.c: New test.
+       * gcc.target/aarch64/sve/slp_1_run.c: Likewise.
+       * gcc.target/aarch64/sve/slp_2.c: Likewise.
+       * gcc.target/aarch64/sve/slp_2_run.c: Likewise.
+       * gcc.target/aarch64/sve/slp_3.c: Likewise.
+       * gcc.target/aarch64/sve/slp_3_run.c: Likewise.
+       * gcc.target/aarch64/sve/slp_4.c: Likewise.
+       * gcc.target/aarch64/sve/slp_4_run.c: Likewise.
+
  2018-01-13  Richard Sandiford  <richard.sandiford@linaro.org>
             Alan Hayward  <alan.hayward@arm.com>
             David Sherwood  <david.sherwood@arm.com>
diff --git a/gcc/testsuite/gcc.dg/vect/no-scevccp-slp-30.c b/gcc/testsuite/gcc.dg/vect/no-scevccp-slp-30.c

index 8f856596ce6a7b3c5dc4e2dc5866c0afc70b1ece..fe9e7e7ab4038acfe02d3e6ea9c4fc37ba207043 100644 (file)
--- a/gcc/testsuite/gcc.dg/vect/no-scevccp-slp-30.c
+++ b/gcc/testsuite/gcc.dg/vect/no-scevccp-slp-30.c
@@ -52,5 +52,5 @@ int main (void)
  }
  
  /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect"  } } */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { xfail vect_variable_length } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" } } */
    
diff --git a/gcc/testsuite/gcc.dg/vect/pr33953.c b/gcc/testsuite/gcc.dg/vect/pr33953.c

index deb66828d56279fc0e57c372efad4cec47df5bae..4dd54cd57f3e4b0e7eb724d032b2c85d0bf6f736 100644 (file)
--- a/gcc/testsuite/gcc.dg/vect/pr33953.c
+++ b/gcc/testsuite/gcc.dg/vect/pr33953.c
@@ -29,6 +29,6 @@ void blockmove_NtoN_blend_noremap32 (const UINT32 *srcdata, int srcwidth,
  }
  
  /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail { vect_no_align && { ! vect_hw_misalign } } } } } */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { xfail { { vect_no_align && { ! vect_hw_misalign } } || vect_variable_length } } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { xfail { vect_no_align && { ! vect_hw_misalign } } } } } */
  
  
diff --git a/gcc/testsuite/gcc.dg/vect/slp-1.c b/gcc/testsuite/gcc.dg/vect/slp-1.c

index db06995059bc8d4354f80e53922eb414bf5ae307..26b71d654252bcd2e4591f11a78a4c0a3dad5d85 100644 (file)
--- a/gcc/testsuite/gcc.dg/vect/slp-1.c
+++ b/gcc/testsuite/gcc.dg/vect/slp-1.c
@@ -118,5 +118,5 @@ int main (void)
  }
  
  /* { dg-final { scan-tree-dump-times "vectorized 4 loops" 1 "vect"  } } */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 4 "vect" { xfail vect_variable_length } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 4 "vect" } } */
    
diff --git a/gcc/testsuite/gcc.dg/vect/slp-10.c b/gcc/testsuite/gcc.dg/vect/slp-10.c

index d5775ef737b17537da8891aa2c6c2dc73b7044a6..da44f26601a9ba8ea52417ec5a160dc4bedfc315 100644 (file)
--- a/gcc/testsuite/gcc.dg/vect/slp-10.c
+++ b/gcc/testsuite/gcc.dg/vect/slp-10.c
@@ -107,7 +107,7 @@ int main (void)
  /* { dg-final { scan-tree-dump-times "vectorized 3 loops" 1 "vect"  {target {vect_uintfloat_cvt && vect_int_mult} } } } */
  /* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect"  {target {{! { vect_uintfloat_cvt}} && vect_int_mult} } } } */
  /* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect"  {target {{! { vect_uintfloat_cvt}} && { ! {vect_int_mult}}} } } } */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 3 "vect" {target { vect_uintfloat_cvt && vect_int_mult } xfail { vect_variable_length && vect_load_lanes } } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 3 "vect" {target { vect_uintfloat_cvt && vect_int_mult }} } } */
  /* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect"  {target {{! { vect_uintfloat_cvt}} && vect_int_mult} } } } */
  /* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect"  {target {{! { vect_uintfloat_cvt}} && { ! {vect_int_mult}}} } } } */
    
diff --git a/gcc/testsuite/gcc.dg/vect/slp-12a.c b/gcc/testsuite/gcc.dg/vect/slp-12a.c

index 522ab64cf09436558032db658a8a28f2fd1fbe67..08a8f55bab0b3d09e7eae14354c515203146b3d8 100644 (file)
--- a/gcc/testsuite/gcc.dg/vect/slp-12a.c
+++ b/gcc/testsuite/gcc.dg/vect/slp-12a.c
@@ -75,5 +75,5 @@ int main (void)
  
  /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_strided8 && vect_int_mult } } } } */
  /* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" { target { ! { vect_strided8 && vect_int_mult } } } } } */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target { vect_strided8 && vect_int_mult } xfail vect_variable_length } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target { vect_strided8 && vect_int_mult } } } } */
  /* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" { target { ! { vect_strided8 && vect_int_mult } } } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/slp-12b.c b/gcc/testsuite/gcc.dg/vect/slp-12b.c

index d1a28ac8a3e6014fe3268cc978a6880d49df34bf..48e78651a6dca24de91a1f36d0cd757e18f7c1b8 100644 (file)
--- a/gcc/testsuite/gcc.dg/vect/slp-12b.c
+++ b/gcc/testsuite/gcc.dg/vect/slp-12b.c
@@ -46,6 +46,6 @@ int main (void)
  
  /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect"  { target { vect_strided2 && vect_int_mult } } } } */
  /* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect"  { target { ! { vect_strided2 && vect_int_mult } } } } } */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect"  { target { vect_strided2 && vect_int_mult } xfail vect_variable_length } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect"  { target { vect_strided2 && vect_int_mult } } } } */
  /* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect"  { target { ! { vect_strided2 && vect_int_mult } } } } } */
    
diff --git a/gcc/testsuite/gcc.dg/vect/slp-12c.c b/gcc/testsuite/gcc.dg/vect/slp-12c.c

index df760327b5d99bbc15ed3a8e216a6d433154b2c5..6650b8bd94ece71dd9ccb9adcc3d17be2f2bc07a 100644 (file)
--- a/gcc/testsuite/gcc.dg/vect/slp-12c.c
+++ b/gcc/testsuite/gcc.dg/vect/slp-12c.c
@@ -48,5 +48,5 @@ int main (void)
  
  /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect"  { target { vect_int_mult } } } } */
  /* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect"  { target { ! vect_int_mult } } } } */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_int_mult xfail { vect_variable_length && vect_load_lanes } } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_int_mult } } } */
  /* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" { target { ! vect_int_mult } } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/slp-14.c b/gcc/testsuite/gcc.dg/vect/slp-14.c

index a5916047cef647b95899de58dc56e00f6dd3482c..6af70815dd43c13fc9abfcebd70c562268dea86f 100644 (file)
--- a/gcc/testsuite/gcc.dg/vect/slp-14.c
+++ b/gcc/testsuite/gcc.dg/vect/slp-14.c
@@ -111,5 +111,5 @@ int main (void)
  }
  
  /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect"  { target vect_int_mult } } }  */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { target vect_int_mult xfail vect_variable_length } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { target vect_int_mult } } } */
    
diff --git a/gcc/testsuite/gcc.dg/vect/slp-15.c b/gcc/testsuite/gcc.dg/vect/slp-15.c

index e09e967559cce4263d9678c5e7c7aa0e60cfdf04..dbced88c98d1fc8d289e6ac32a84dc9f4072e49f 100644 (file)
--- a/gcc/testsuite/gcc.dg/vect/slp-15.c
+++ b/gcc/testsuite/gcc.dg/vect/slp-15.c
@@ -112,6 +112,6 @@ int main (void)
  
  /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect"  {target vect_int_mult } } } */
  /* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect"  {target  { ! { vect_int_mult } } } } } */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" {target vect_int_mult xfail vect_variable_length } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" {target vect_int_mult } } } */
  /* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" {target { ! { vect_int_mult } } } } } */
    
diff --git a/gcc/testsuite/gcc.dg/vect/slp-17.c b/gcc/testsuite/gcc.dg/vect/slp-17.c

index 7f26884388a7dfe0a6831b1edd80f3e5ea8611f8..6fa11e4c53ad73735af9ee74f56ddff0b777b99b 100644 (file)
--- a/gcc/testsuite/gcc.dg/vect/slp-17.c
+++ b/gcc/testsuite/gcc.dg/vect/slp-17.c
@@ -51,5 +51,5 @@ int main (void)
  }
  
  /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { xfail { vect_variable_length && vect_load_lanes } } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" } } */
    
diff --git a/gcc/testsuite/gcc.dg/vect/slp-19b.c b/gcc/testsuite/gcc.dg/vect/slp-19b.c

index e268382f5b0dc4b072fa97d22f731bf76144bcfb..237b36dd227186c8f0cb78b703351fdae6fef27c 100644 (file)
--- a/gcc/testsuite/gcc.dg/vect/slp-19b.c
+++ b/gcc/testsuite/gcc.dg/vect/slp-19b.c
@@ -53,5 +53,5 @@ int main (void)
  
  /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_strided4 } } } */
  /* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" { target { ! vect_strided4 } } } } */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_strided4 xfail vect_variable_length } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_strided4 } } } */
  /* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" { target { ! vect_strided4 } } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/slp-20.c b/gcc/testsuite/gcc.dg/vect/slp-20.c

index fb825ffbce073aa52417bfeb3e2be83c57bd8006..dc5eab669ea9eaf7db83606b4c426921a6a5da15 100644 (file)
--- a/gcc/testsuite/gcc.dg/vect/slp-20.c
+++ b/gcc/testsuite/gcc.dg/vect/slp-20.c
@@ -110,5 +110,5 @@ int main (void)
  }
  
  /* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect"  } } */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 4 "vect" { xfail vect_variable_length } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 4 "vect" } } */
    
diff --git a/gcc/testsuite/gcc.dg/vect/slp-21.c b/gcc/testsuite/gcc.dg/vect/slp-21.c

index 25c75d6576d6da07c10de8c305689b4a4532bdf7..1f8c82e8ba8b4630ec47051346713cf67db4196d 100644 (file)
--- a/gcc/testsuite/gcc.dg/vect/slp-21.c
+++ b/gcc/testsuite/gcc.dg/vect/slp-21.c
@@ -201,6 +201,6 @@ int main (void)
  
  /* { dg-final { scan-tree-dump-times "vectorized 4 loops" 1 "vect"  { target { vect_strided4 || vect_extract_even_odd } } } } */
  /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect"  { target  { ! { vect_strided4 || vect_extract_even_odd } } } } } */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { target vect_strided4 xfail vect_variable_length } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { target vect_strided4 } } } */
  /* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect"  { target { ! { vect_strided4 } } } } } */
    
diff --git a/gcc/testsuite/gcc.dg/vect/slp-22.c b/gcc/testsuite/gcc.dg/vect/slp-22.c

index b7a2015c936cacf052931d71d8747f64ea0042e4..e2a0002ffaf363fc12b76deaaee3067c9a0a186b 100644 (file)
--- a/gcc/testsuite/gcc.dg/vect/slp-22.c
+++ b/gcc/testsuite/gcc.dg/vect/slp-22.c
@@ -129,5 +129,5 @@ int main (void)
  }
  
  /* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect"  } } */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 6 "vect" { xfail vect_variable_length } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 6 "vect" } } */
    
diff --git a/gcc/testsuite/gcc.dg/vect/slp-23.c b/gcc/testsuite/gcc.dg/vect/slp-23.c

index 88708e645d6bf949947a57dbc02d7aad0b9c17de..3cda497db0cd5331ad0b6156512a4972a64b4fa0 100644 (file)
--- a/gcc/testsuite/gcc.dg/vect/slp-23.c
+++ b/gcc/testsuite/gcc.dg/vect/slp-23.c
@@ -109,6 +109,6 @@ int main (void)
  /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { ! { vect_strided8 || vect_no_align } } } } } */
  /* We fail to vectorize the second loop with variable-length SVE but
     fall back to 128-bit vectors, which does use SLP.  */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target { ! vect_perm } xfail aarch64_sve } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target { ! vect_perm } } } } */
  /* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { target vect_perm } } } */
    
diff --git a/gcc/testsuite/gcc.dg/vect/slp-24-big-array.c b/gcc/testsuite/gcc.dg/vect/slp-24-big-array.c

index 58cedb45dda1a2a163e422dd73028922b12a5bc0..abd3a878f1ac36a7c8cde58743496f79b71f4476 100644 (file)
--- a/gcc/testsuite/gcc.dg/vect/slp-24-big-array.c
+++ b/gcc/testsuite/gcc.dg/vect/slp-24-big-array.c
@@ -91,4 +91,4 @@ int main (void)
  }
  
  /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail { vect_no_align && ilp32 } } } } */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { xfail { { vect_no_align && ilp32 } || vect_variable_length } } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { xfail { vect_no_align && ilp32 } } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/slp-24.c b/gcc/testsuite/gcc.dg/vect/slp-24.c

index 8d298cb0022d7dfecc1cd7b9887be6020a26b583..a45ce7de71fa6a8595b611dd47507df4e91e3b36 100644 (file)
--- a/gcc/testsuite/gcc.dg/vect/slp-24.c
+++ b/gcc/testsuite/gcc.dg/vect/slp-24.c
@@ -77,4 +77,4 @@ int main (void)
  }
  
  /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail { vect_no_align && ilp32 } } } } */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { xfail { { vect_no_align && ilp32 } || vect_variable_length } } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { xfail { vect_no_align && ilp32 } } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/slp-28.c b/gcc/testsuite/gcc.dg/vect/slp-28.c

index 95db4187df1d3aa0903176581e0d8b8daa759416..7778bad44653e7b29f4f2486236aab8be2f07919 100644 (file)
--- a/gcc/testsuite/gcc.dg/vect/slp-28.c
+++ b/gcc/testsuite/gcc.dg/vect/slp-28.c
@@ -89,5 +89,5 @@ int main (void)
  }
  
  /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect"  } } */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { xfail vect_variable_length } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" } } */
    
diff --git a/gcc/testsuite/gcc.dg/vect/slp-39.c b/gcc/testsuite/gcc.dg/vect/slp-39.c

index 330a626dd0f11b0567fb59b90d6d2e26ed625e95..85d32eaf748a64820a374eec88faad3ad5aac2ae 100644 (file)
--- a/gcc/testsuite/gcc.dg/vect/slp-39.c
+++ b/gcc/testsuite/gcc.dg/vect/slp-39.c
@@ -21,4 +21,4 @@ void bar (double w)
      }\r
  }\r
  \r
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { xfail vect_variable_length } } } */\r
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" } } */\r
diff --git a/gcc/testsuite/gcc.dg/vect/slp-6.c b/gcc/testsuite/gcc.dg/vect/slp-6.c

index 8205d542f4dcd222e3307d863b425f0324c62151..ec85eb77236e4b8bf5e0c6a8d07abf44a28e2a5c 100644 (file)
--- a/gcc/testsuite/gcc.dg/vect/slp-6.c
+++ b/gcc/testsuite/gcc.dg/vect/slp-6.c
@@ -116,6 +116,6 @@ int main (void)
  
  /* { dg-final { scan-tree-dump-times "vectorized 3 loops" 1 "vect"  {target vect_int_mult} } } */
  /* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect"  {target  { ! { vect_int_mult } } } } } */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 3 "vect" {target vect_int_mult xfail { vect_variable_length && vect_load_lanes } } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 3 "vect" {target vect_int_mult } } } */
  /* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" {target  { ! { vect_int_mult } } } } } */
    
diff --git a/gcc/testsuite/gcc.dg/vect/slp-7.c b/gcc/testsuite/gcc.dg/vect/slp-7.c

index bd7d44b5b5f17fe6bffe735182215ba5a28bac63..e836a1ae9b5b60685e8ec2d15ca5005ff35a895e 100644 (file)
--- a/gcc/testsuite/gcc.dg/vect/slp-7.c
+++ b/gcc/testsuite/gcc.dg/vect/slp-7.c
@@ -122,6 +122,6 @@ int main (void)
  
  /* { dg-final { scan-tree-dump-times "vectorized 3 loops" 1 "vect"  { target vect_short_mult } } }*/
  /* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect"  { target { ! { vect_short_mult } } } } }*/
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 3 "vect"  { target vect_short_mult xfail vect_variable_length } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 3 "vect"  { target vect_short_mult } } } */
  /* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect"  { target { ! { vect_short_mult } } } } } */
   
diff --git a/gcc/testsuite/gcc.dg/vect/slp-cond-1.c b/gcc/testsuite/gcc.dg/vect/slp-cond-1.c

index fd9165fec812411a7e5fd8d47d298c72a9cb1975..482fc080a0fc132409509b084fcd67ef95f2aa17 100644 (file)
--- a/gcc/testsuite/gcc.dg/vect/slp-cond-1.c
+++ b/gcc/testsuite/gcc.dg/vect/slp-cond-1.c
@@ -122,4 +122,4 @@ main ()
    return 0;
  }
  
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 3 "vect" { xfail { vect_variable_length && vect_load_lanes } } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 3 "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/slp-cond-2-big-array.c b/gcc/testsuite/gcc.dg/vect/slp-cond-2-big-array.c

index d5da5f2a163a08795128d9bd45dd65777e04854d..57cc67ee121108bcc5ccaaee0dca5085264c8818 100644 (file)
--- a/gcc/testsuite/gcc.dg/vect/slp-cond-2-big-array.c
+++ b/gcc/testsuite/gcc.dg/vect/slp-cond-2-big-array.c
@@ -125,4 +125,4 @@ main ()
    return 0;
  }
  
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 3 "vect" { xfail vect_variable_length } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 3 "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/slp-cond-2.c b/gcc/testsuite/gcc.dg/vect/slp-cond-2.c

index e206aedb55bb5514ad121e8abef98fb8bce6bb62..7350695ece0f53e36de861c4e7724ebf36ff6b76 100644 (file)
--- a/gcc/testsuite/gcc.dg/vect/slp-cond-2.c
+++ b/gcc/testsuite/gcc.dg/vect/slp-cond-2.c
@@ -125,4 +125,4 @@ main ()
    return 0;
  }
  
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 3 "vect" { xfail vect_variable_length } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 3 "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/slp-multitypes-1.c b/gcc/testsuite/gcc.dg/vect/slp-multitypes-1.c

index 4316d81bd2f37fa311fb22e13929a098d680c8c0..1850f063eb4fc74c26a9b1a1016f9d70a0c28441 100644 (file)
--- a/gcc/testsuite/gcc.dg/vect/slp-multitypes-1.c
+++ b/gcc/testsuite/gcc.dg/vect/slp-multitypes-1.c
@@ -52,5 +52,5 @@ int main (void)
  }
  
  /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect"  } } */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { xfail vect_variable_length } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" } } */
    
diff --git a/gcc/testsuite/gcc.dg/vect/slp-multitypes-10.c b/gcc/testsuite/gcc.dg/vect/slp-multitypes-10.c

index 68946c214bb770f9a98c6dcf9af1cfdab813e651..62580c070c8e19468812a9c81edc1c5847327ebb 100644 (file)
--- a/gcc/testsuite/gcc.dg/vect/slp-multitypes-10.c
+++ b/gcc/testsuite/gcc.dg/vect/slp-multitypes-10.c
@@ -46,5 +46,5 @@ int main (void)
  }
  
  /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect"  { target vect_pack_trunc } } } */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect"  { target vect_pack_trunc xfail vect_variable_length } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect"  { target vect_pack_trunc } } } */
    
diff --git a/gcc/testsuite/gcc.dg/vect/slp-multitypes-12.c b/gcc/testsuite/gcc.dg/vect/slp-multitypes-12.c

index d37434593d45526ee3391ecdd7862b48afb4420a..d4c929de2ecbc73c75c08ae498b8b400f67bf636 100644 (file)
--- a/gcc/testsuite/gcc.dg/vect/slp-multitypes-12.c
+++ b/gcc/testsuite/gcc.dg/vect/slp-multitypes-12.c
@@ -62,5 +62,5 @@ int main (void)
  }
  
  /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect"  } } */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 3 "vect" { xfail { vect_variable_length && vect_load_lanes } } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 3 "vect" } } */
    
diff --git a/gcc/testsuite/gcc.dg/vect/slp-multitypes-2.c b/gcc/testsuite/gcc.dg/vect/slp-multitypes-2.c

index 0eca73af699c60e05aed5dca592c4ec9c51db7c4..28a645c79472578d3775e9e2eb28cb7ee69efad0 100644 (file)
--- a/gcc/testsuite/gcc.dg/vect/slp-multitypes-2.c
+++ b/gcc/testsuite/gcc.dg/vect/slp-multitypes-2.c
@@ -77,5 +77,5 @@ int main (void)
  }
  
  /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect"  } } */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { xfail vect_variable_length } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect"  } } */
    
diff --git a/gcc/testsuite/gcc.dg/vect/slp-multitypes-4.c b/gcc/testsuite/gcc.dg/vect/slp-multitypes-4.c

index 2ab689ee54c0ce1715b0fc538923fcc0fe7a92fd..faf17d6f0cde5eacb7756996a224e4004b305f7f 100644 (file)
--- a/gcc/testsuite/gcc.dg/vect/slp-multitypes-4.c
+++ b/gcc/testsuite/gcc.dg/vect/slp-multitypes-4.c
@@ -52,5 +52,5 @@ int main (void)
  }
  
  /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect"  { target vect_unpack } } } */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect"  { target vect_unpack xfail vect_variable_length } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect"  { target vect_unpack } } } */
    
diff --git a/gcc/testsuite/gcc.dg/vect/slp-multitypes-5.c b/gcc/testsuite/gcc.dg/vect/slp-multitypes-5.c

index 1153e7b194dc7aec70a9cbb80a0d354a1b4b4304..fb4f720aa4935da6862951a3c618799bb37f535f 100644 (file)
--- a/gcc/testsuite/gcc.dg/vect/slp-multitypes-5.c
+++ b/gcc/testsuite/gcc.dg/vect/slp-multitypes-5.c
@@ -52,5 +52,5 @@ int main (void)
  }
  
  /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_pack_trunc } } } */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_pack_trunc xfail vect_variable_length } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_pack_trunc } } } */
    
diff --git a/gcc/testsuite/gcc.dg/vect/slp-multitypes-8.c b/gcc/testsuite/gcc.dg/vect/slp-multitypes-8.c

index 43faec933b0b6b5b52c18ddc10b689391e7fd166..d88ebe4d778c4487c00ef055059d2b825542679a 100644 (file)
--- a/gcc/testsuite/gcc.dg/vect/slp-multitypes-8.c
+++ b/gcc/testsuite/gcc.dg/vect/slp-multitypes-8.c
@@ -40,5 +40,5 @@ int main (void)
  }
  
  /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect"  { target vect_unpack } } } */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect"  { target vect_unpack xfail vect_variable_length } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect"  { target vect_unpack } } } */
    
diff --git a/gcc/testsuite/gcc.dg/vect/slp-multitypes-9.c b/gcc/testsuite/gcc.dg/vect/slp-multitypes-9.c

index ad9ffb6030d772a64097e72ec058d54e6e767107..872b20cac93c119854b8250eb85dc43767743da4 100644 (file)
--- a/gcc/testsuite/gcc.dg/vect/slp-multitypes-9.c
+++ b/gcc/testsuite/gcc.dg/vect/slp-multitypes-9.c
@@ -40,5 +40,5 @@ int main (void)
  }
  
  /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect"  { target vect_pack_trunc } } } */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect"  { target vect_pack_trunc xfail vect_variable_length } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect"  { target vect_pack_trunc } } } */
    
diff --git a/gcc/testsuite/gcc.dg/vect/slp-perm-6.c b/gcc/testsuite/gcc.dg/vect/slp-perm-6.c

index b7d7657939f07245214a6415033fcc807cbb1952..4eb648ac71b2f45e513afbda873b638b898aa6e3 100644 (file)
--- a/gcc/testsuite/gcc.dg/vect/slp-perm-6.c
+++ b/gcc/testsuite/gcc.dg/vect/slp-perm-6.c
@@ -104,7 +104,7 @@ int main (int argc, const char* argv[])
  
  /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect"  { target vect_perm } } } */
  /* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { target { vect_perm3_int && { ! vect_load_lanes } } } } } */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_load_lanes xfail { vect_variable_length && vect_load_lanes } } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_load_lanes } } } */
  /* { dg-final { scan-tree-dump "note: Built SLP cancelled: can use load/store-lanes" "vect" { target { vect_perm3_int && vect_load_lanes } } } } */
  /* { dg-final { scan-tree-dump "LOAD_LANES" "vect" { target vect_load_lanes } } } */
  /* { dg-final { scan-tree-dump "STORE_LANES" "vect" { target vect_load_lanes } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/slp-widen-mult-half.c b/gcc/testsuite/gcc.dg/vect/slp-widen-mult-half.c

index e06267c2f317f17c230aabc2359591fab1fbc827..f5fb63e19f15988b5de4854923169aafa24d99e4 100644 (file)
--- a/gcc/testsuite/gcc.dg/vect/slp-widen-mult-half.c
+++ b/gcc/testsuite/gcc.dg/vect/slp-widen-mult-half.c
@@ -46,7 +46,7 @@ int main (void)
  }
  
  /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_widen_mult_hi_to_si } } } */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { target vect_widen_mult_hi_to_si xfail vect_variable_length } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { target vect_widen_mult_hi_to_si } } } */
  /* { dg-final { scan-tree-dump-times "vect_recog_widen_mult_pattern: detected" 2 "vect" { target vect_widen_mult_hi_to_si_pattern } } } */
  /* { dg-final { scan-tree-dump-times "pattern recognized" 2 "vect" { target vect_widen_mult_hi_to_si_pattern } } } */
  
diff --git a/gcc/testsuite/gcc.dg/vect/vect-live-slp-1.c b/gcc/testsuite/gcc.dg/vect/vect-live-slp-1.c

index fc6a92478fa22924ccd453cb49dfd33c90884214..aff37c100f046021b7834ef0bfa399744a618dd8 100644 (file)
--- a/gcc/testsuite/gcc.dg/vect/vect-live-slp-1.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-live-slp-1.c
@@ -68,5 +68,5 @@ main (void)
  }
  
  /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 4 "vect" } } */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 4 "vect" { xfail vect_variable_length } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 4 "vect" } } */
  /* { dg-final { scan-tree-dump-times "vec_stmt_relevant_p: stmt live but not relevant" 4 "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-live-slp-2.c b/gcc/testsuite/gcc.dg/vect/vect-live-slp-2.c

index 6c66d294c6491c29873ae892d04c11a2c62b5034..35689665b548cf6ade0c8e8e2fbd490335ce7779 100644 (file)
--- a/gcc/testsuite/gcc.dg/vect/vect-live-slp-2.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-live-slp-2.c
@@ -62,5 +62,5 @@ main (void)
  }
  
  /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" } } */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { xfail vect_variable_length } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" } } */
  /* { dg-final { scan-tree-dump-times "vec_stmt_relevant_p: stmt live but not relevant" 2 "vect" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/slp_1.c b/gcc/testsuite/gcc.target/aarch64/sve/slp_1.c

new file mode 100644 (file)

index 0000000..dffc7b4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/slp_1.c
@@ -0,0 +1,40 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -msve-vector-bits=scalable" } */
+
+#include <stdint.h>
+
+#define VEC_PERM(TYPE)                                         \
+TYPE __attribute__ ((noinline, noclone))                       \
+vec_slp_##TYPE (TYPE *restrict a, TYPE b, TYPE c, int n)       \
+{                                                              \
+  for (int i = 0; i < n; ++i)                                  \
+    {                                                          \
+      a[i * 2] += b;                                           \
+      a[i * 2 + 1] += c;                                       \
+    }                                                          \
+}
+
+#define TEST_ALL(T)                            \
+  T (int8_t)                                   \
+  T (uint8_t)                                  \
+  T (int16_t)                                  \
+  T (uint16_t)                                 \
+  T (int32_t)                                  \
+  T (uint32_t)                                 \
+  T (int64_t)                                  \
+  T (uint64_t)                                 \
+  T (_Float16)                                 \
+  T (float)                                    \
+  T (double)
+
+TEST_ALL (VEC_PERM)
+
+/* We should use one DUP for each of the 8-, 16- and 32-bit types,
+   although we currently use LD1RW for _Float16.  We should use two
+   DUPs for each of the three 64-bit types.  */
+/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.h, [hw]} 2 } } */
+/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.s, [sw]} 2 } } */
+/* { dg-final { scan-assembler-times {\tld1rw\tz[0-9]+\.s, } 1 } } */
+/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.d, [dx]} 9 } } */
+/* { dg-final { scan-assembler-times {\tzip1\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 3 } } */
+/* { dg-final { scan-assembler-not {\tzip2\t} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/slp_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve/slp_1_run.c

new file mode 100644 (file)

index 0000000..0ce056a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/slp_1_run.c
@@ -0,0 +1,30 @@
+/* { dg-do run { target aarch64_sve_hw } } */
+/* { dg-options "-O2 -ftree-vectorize" } */
+
+#include "slp_1.c"
+
+#define N (103 * 2)
+
+#define HARNESS(TYPE)                                          \
+  {                                                            \
+    TYPE a[N], b[2] = { 3, 11 };                               \
+    for (unsigned int i = 0; i < N; ++i)                       \
+      {                                                                \
+       a[i] = i * 2 + i % 5;                                   \
+       asm volatile ("" ::: "memory");                         \
+      }                                                                \
+    vec_slp_##TYPE (a, b[0], b[1], N / 2);                     \
+    for (unsigned int i = 0; i < N; ++i)                       \
+      {                                                                \
+       TYPE orig = i * 2 + i % 5;                              \
+       TYPE expected = orig + b[i % 2];                        \
+       if (a[i] != expected)                                   \
+         __builtin_abort ();                                   \
+      }                                                                \
+  }
+
+int __attribute__ ((optimize (1)))
+main (void)
+{
+  TEST_ALL (HARNESS)
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/slp_2.c b/gcc/testsuite/gcc.target/aarch64/sve/slp_2.c

new file mode 100644 (file)

index 0000000..0a25887
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/slp_2.c
@@ -0,0 +1,37 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -msve-vector-bits=scalable" } */
+
+#include <stdint.h>
+
+#define VEC_PERM(TYPE)                                         \
+TYPE __attribute__ ((noinline, noclone))                       \
+vec_slp_##TYPE (TYPE *restrict a, int n)                       \
+{                                                              \
+  for (int i = 0; i < n; ++i)                                  \
+    {                                                          \
+      a[i * 2] += 10;                                          \
+      a[i * 2 + 1] += 17;                                      \
+    }                                                          \
+}
+
+#define TEST_ALL(T)                            \
+  T (int8_t)                                   \
+  T (uint8_t)                                  \
+  T (int16_t)                                  \
+  T (uint16_t)                                 \
+  T (int32_t)                                  \
+  T (uint32_t)                                 \
+  T (int64_t)                                  \
+  T (uint64_t)                                 \
+  T (_Float16)                                 \
+  T (float)                                    \
+  T (double)
+
+TEST_ALL (VEC_PERM)
+
+/* { dg-final { scan-assembler-times {\tld1rh\tz[0-9]+\.h, } 2 } } */
+/* { dg-final { scan-assembler-times {\tld1rw\tz[0-9]+\.s, } 3 } } */
+/* { dg-final { scan-assembler-times {\tld1rd\tz[0-9]+\.d, } 3 } } */
+/* { dg-final { scan-assembler-times {\tld1rqb\tz[0-9]+\.b, } 3 } } */
+/* { dg-final { scan-assembler-not {\tzip1\t} } } */
+/* { dg-final { scan-assembler-not {\tzip2\t} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/slp_2_run.c b/gcc/testsuite/gcc.target/aarch64/sve/slp_2_run.c

new file mode 100644 (file)

index 0000000..bb5ef66
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/slp_2_run.c
@@ -0,0 +1,30 @@
+/* { dg-do run { target aarch64_sve_hw } } */
+/* { dg-options "-O2 -ftree-vectorize" } */
+
+#include "slp_2.c"
+
+#define N (103 * 2)
+
+#define HARNESS(TYPE)                                          \
+  {                                                            \
+    TYPE a[N], b[2] = { 10, 17 };                              \
+    for (unsigned int i = 0; i < N; ++i)                       \
+      {                                                                \
+       a[i] = i * 2 + i % 5;                                   \
+       asm volatile ("" ::: "memory");                         \
+      }                                                                \
+    vec_slp_##TYPE (a, N / 2);                                 \
+    for (unsigned int i = 0; i < N; ++i)                       \
+      {                                                                \
+       TYPE orig = i * 2 + i % 5;                              \
+       TYPE expected = orig + b[i % 2];                        \
+       if (a[i] != expected)                                   \
+         __builtin_abort ();                                   \
+      }                                                                \
+  }
+
+int __attribute__ ((optimize (1)))
+main (void)
+{
+  TEST_ALL (HARNESS)
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/slp_3.c b/gcc/testsuite/gcc.target/aarch64/sve/slp_3.c

new file mode 100644 (file)

index 0000000..534ad44
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/slp_3.c
@@ -0,0 +1,49 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -msve-vector-bits=scalable" } */
+
+#include <stdint.h>
+
+#define VEC_PERM(TYPE)                                         \
+TYPE __attribute__ ((noinline, noclone))                       \
+vec_slp_##TYPE (TYPE *restrict a, int n)                       \
+{                                                              \
+  for (int i = 0; i < n; ++i)                                  \
+    {                                                          \
+      a[i * 4] += 41;                                          \
+      a[i * 4 + 1] += 25;                                      \
+      a[i * 4 + 2] += 31;                                      \
+      a[i * 4 + 3] += 62;                                      \
+    }                                                          \
+}
+
+#define TEST_ALL(T)                            \
+  T (int8_t)                                   \
+  T (uint8_t)                                  \
+  T (int16_t)                                  \
+  T (uint16_t)                                 \
+  T (int32_t)                                  \
+  T (uint32_t)                                 \
+  T (int64_t)                                  \
+  T (uint64_t)                                 \
+  T (_Float16)                                 \
+  T (float)                                    \
+  T (double)
+
+TEST_ALL (VEC_PERM)
+
+/* 1 for each 8-bit type.  */
+/* { dg-final { scan-assembler-times {\tld1rw\tz[0-9]+\.s, } 2 } } */
+/* 1 for each 16-bit type and 4 for double.  */
+/* { dg-final { scan-assembler-times {\tld1rd\tz[0-9]+\.d, } 7 } } */
+/* 1 for each 32-bit type.  */
+/* { dg-final { scan-assembler-times {\tld1rqb\tz[0-9]+\.b, } 3 } } */
+/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.d, #41\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.d, #25\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.d, #31\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.d, #62\n} 2 } } */
+/* The 64-bit types need:
+
+      ZIP1 ZIP1 (2 ZIP2s optimized away)
+      ZIP1 ZIP2.  */
+/* { dg-final { scan-assembler-times {\tzip1\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 9 } } */
+/* { dg-final { scan-assembler-times {\tzip2\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 3 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/slp_3_run.c b/gcc/testsuite/gcc.target/aarch64/sve/slp_3_run.c

new file mode 100644 (file)

index 0000000..0ec1cea
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/slp_3_run.c
@@ -0,0 +1,30 @@
+/* { dg-do run { target aarch64_sve_hw } } */
+/* { dg-options "-O2 -ftree-vectorize" } */
+
+#include "slp_3.c"
+
+#define N (77 * 4)
+
+#define HARNESS(TYPE)                                          \
+  {                                                            \
+    TYPE a[N], b[4] = { 41, 25, 31, 62 };                      \
+    for (unsigned int i = 0; i < N; ++i)                       \
+      {                                                                \
+       a[i] = i * 2 + i % 5;                                   \
+       asm volatile ("" ::: "memory");                         \
+      }                                                                \
+    vec_slp_##TYPE (a, N / 4);                                 \
+    for (unsigned int i = 0; i < N; ++i)                       \
+      {                                                                \
+       TYPE orig = i * 2 + i % 5;                              \
+       TYPE expected = orig + b[i % 4];                        \
+       if (a[i] != expected)                                   \
+         __builtin_abort ();                                   \
+      }                                                                \
+  }
+
+int __attribute__ ((optimize (1)))
+main (void)
+{
+  TEST_ALL (HARNESS)
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/slp_4.c b/gcc/testsuite/gcc.target/aarch64/sve/slp_4.c

new file mode 100644 (file)

index 0000000..09f9ded
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/slp_4.c
@@ -0,0 +1,61 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -msve-vector-bits=scalable" } */
+
+#include <stdint.h>
+
+#define VEC_PERM(TYPE)                                         \
+TYPE __attribute__ ((noinline, noclone))                       \
+vec_slp_##TYPE (TYPE *restrict a, int n)                       \
+{                                                              \
+  for (int i = 0; i < n; ++i)                                  \
+    {                                                          \
+      a[i * 8] += 99;                                          \
+      a[i * 8 + 1] += 11;                                      \
+      a[i * 8 + 2] += 17;                                      \
+      a[i * 8 + 3] += 80;                                      \
+      a[i * 8 + 4] += 63;                                      \
+      a[i * 8 + 5] += 37;                                      \
+      a[i * 8 + 6] += 24;                                      \
+      a[i * 8 + 7] += 81;                                      \
+    }                                                          \
+}
+
+#define TEST_ALL(T)                            \
+  T (int8_t)                                   \
+  T (uint8_t)                                  \
+  T (int16_t)                                  \
+  T (uint16_t)                                 \
+  T (int32_t)                                  \
+  T (uint32_t)                                 \
+  T (int64_t)                                  \
+  T (uint64_t)                                 \
+  T (_Float16)                                 \
+  T (float)                                    \
+  T (double)
+
+TEST_ALL (VEC_PERM)
+
+/* 1 for each 8-bit type, 4 for each 32-bit type and 8 for double.  */
+/* { dg-final { scan-assembler-times {\tld1rd\tz[0-9]+\.d, } 22 } } */
+/* 1 for each 16-bit type.  */
+/* { dg-final { scan-assembler-times {\tld1rqb\tz[0-9]\.b, } 3 } } */
+/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.d, #99\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.d, #11\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.d, #17\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.d, #80\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.d, #63\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.d, #37\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.d, #24\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.d, #81\n} 2 } } */
+/* The 32-bit types need:
+
+      ZIP1 ZIP1 (2 ZIP2s optimized away)
+      ZIP1 ZIP2
+
+   and the 64-bit types need:
+
+      ZIP1 ZIP1 ZIP1 ZIP1 (4 ZIP2s optimized away)
+      ZIP1 ZIP2 ZIP1 ZIP2
+      ZIP1 ZIP2 ZIP1 ZIP2.  */
+/* { dg-final { scan-assembler-times {\tzip1\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 33 } } */
+/* { dg-final { scan-assembler-times {\tzip2\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 15 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/slp_4_run.c b/gcc/testsuite/gcc.target/aarch64/sve/slp_4_run.c

new file mode 100644 (file)

index 0000000..3ca9dbb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/slp_4_run.c
@@ -0,0 +1,30 @@
+/* { dg-do run { target aarch64_sve_hw } } */
+/* { dg-options "-O2 -ftree-vectorize" } */
+
+#include "slp_4.c"
+
+#define N (59 * 8)
+
+#define HARNESS(TYPE)                                          \
+  {                                                            \
+    TYPE a[N], b[8] = { 99, 11, 17, 80, 63, 37, 24, 81 };      \
+    for (unsigned int i = 0; i < N; ++i)                       \
+      {                                                                \
+       a[i] = i * 2 + i % 5;                                   \
+       asm volatile ("" ::: "memory");                         \
+      }                                                                \
+    vec_slp_##TYPE (a, N / 8);                                 \
+    for (unsigned int i = 0; i < N; ++i)                       \
+      {                                                                \
+       TYPE orig = i * 2 + i % 5;                              \
+       TYPE expected = orig + b[i % 8];                        \
+       if (a[i] != expected)                                   \
+         __builtin_abort ();                                   \
+      }                                                                \
+  }
+
+int __attribute__ ((optimize (1)))
+main (void)
+{
+  TEST_ALL (HARNESS)
+}
diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c

index f52d82765739f570a2e58ba1a29211313a46d8ec..5f6a33a89706c6fd29e55af272b23ddbd84703f4 100644 (file)
--- a/gcc/tree-vect-slp.c
+++ b/gcc/tree-vect-slp.c
@@ -43,6 +43,8 @@ along with GCC; see the file COPYING3.  If not see
  #include "dbgcnt.h"
  #include "tree-vector-builder.h"
  #include "vec-perm-indices.h"
+#include "gimple-fold.h"
+#include "internal-fn.h"
  
  
  /* Recursively free the memory allocated for the SLP tree rooted at NODE.  */
@@ -208,24 +210,87 @@ vect_get_place_in_interleaving_chain (gimple *stmt, gimple *first_stmt)
    return -1;
  }
  
+/* Check whether it is possible to load COUNT elements of type ELT_MODE
+   using the method implemented by duplicate_and_interleave.  Return true
+   if so, returning the number of intermediate vectors in *NVECTORS_OUT
+   (if nonnull) and the type of each intermediate vector in *VECTOR_TYPE_OUT
+   (if nonnull).  */
+
+static bool
+can_duplicate_and_interleave_p (unsigned int count, machine_mode elt_mode,
+                               unsigned int *nvectors_out = NULL,
+                               tree *vector_type_out = NULL,
+                               tree *permutes = NULL)
+{
+  poly_int64 elt_bytes = count * GET_MODE_SIZE (elt_mode);
+  poly_int64 nelts;
+  unsigned int nvectors = 1;
+  for (;;)
+    {
+      scalar_int_mode int_mode;
+      poly_int64 elt_bits = elt_bytes * BITS_PER_UNIT;
+      if (multiple_p (current_vector_size, elt_bytes, &nelts)
+         && int_mode_for_size (elt_bits, 0).exists (&int_mode))
+       {
+         tree int_type = build_nonstandard_integer_type
+           (GET_MODE_BITSIZE (int_mode), 1);
+         tree vector_type = build_vector_type (int_type, nelts);
+         if (VECTOR_MODE_P (TYPE_MODE (vector_type)))
+           {
+             vec_perm_builder sel1 (nelts, 2, 3);
+             vec_perm_builder sel2 (nelts, 2, 3);
+             poly_int64 half_nelts = exact_div (nelts, 2);
+             for (unsigned int i = 0; i < 3; ++i)
+               {
+                 sel1.quick_push (i);
+                 sel1.quick_push (i + nelts);
+                 sel2.quick_push (half_nelts + i);
+                 sel2.quick_push (half_nelts + i + nelts);
+               }
+             vec_perm_indices indices1 (sel1, 2, nelts);
+             vec_perm_indices indices2 (sel2, 2, nelts);
+             if (can_vec_perm_const_p (TYPE_MODE (vector_type), indices1)
+                 && can_vec_perm_const_p (TYPE_MODE (vector_type), indices2))
+               {
+                 if (nvectors_out)
+                   *nvectors_out = nvectors;
+                 if (vector_type_out)
+                   *vector_type_out = vector_type;
+                 if (permutes)
+                   {
+                     permutes[0] = vect_gen_perm_mask_checked (vector_type,
+                                                               indices1);
+                     permutes[1] = vect_gen_perm_mask_checked (vector_type,
+                                                               indices2);
+                   }
+                 return true;
+               }
+           }
+       }
+      if (!multiple_p (elt_bytes, 2, &elt_bytes))
+       return false;
+      nvectors *= 2;
+    }
+}
  
  /* Get the defs for the rhs of STMT (collect them in OPRNDS_INFO), check that
     they are of a valid type and that they match the defs of the first stmt of
     the SLP group (stored in OPRNDS_INFO).  This function tries to match stmts
-   by swapping operands of STMT when possible.  Non-zero *SWAP indicates swap
-   is required for cond_expr stmts.  Specifically, *SWAP is 1 if STMT is cond
-   and operands of comparison need to be swapped; *SWAP is 2 if STMT is cond
-   and code of comparison needs to be inverted.  If there is any operand swap
-   in this function, *SWAP is set to non-zero value.
+   by swapping operands of STMTS[STMT_NUM] when possible.  Non-zero *SWAP
+   indicates swap is required for cond_expr stmts.  Specifically, *SWAP
+   is 1 if STMT is cond and operands of comparison need to be swapped;
+   *SWAP is 2 if STMT is cond and code of comparison needs to be inverted.
+   If there is any operand swap in this function, *SWAP is set to non-zero
+   value.
     If there was a fatal error return -1; if the error could be corrected by
     swapping operands of father node of this one, return 1; if everything is
     ok return 0.  */
-
  static int
  vect_get_and_check_slp_defs (vec_info *vinfo, unsigned char *swap,
-                            gimple *stmt, unsigned stmt_num,
+                            vec<gimple *> stmts, unsigned stmt_num,
                              vec<slp_oprnd_info> *oprnds_info)
  {
+  gimple *stmt = stmts[stmt_num];
    tree oprnd;
    unsigned int i, number_of_oprnds;
    gimple *def_stmt;
@@ -373,15 +438,15 @@ again:
              types for reduction chains: the first stmt must be a
              vect_reduction_def (a phi node), and the rest
              vect_internal_def.  */
-         if (((oprnd_info->first_dt != dt
-                && !(oprnd_info->first_dt == vect_reduction_def
-                     && dt == vect_internal_def)
-               && !((oprnd_info->first_dt == vect_external_def
-                     || oprnd_info->first_dt == vect_constant_def)
-                    && (dt == vect_external_def
-                        || dt == vect_constant_def)))
-               || !types_compatible_p (oprnd_info->first_op_type,
-                                      TREE_TYPE (oprnd))))
+         tree type = TREE_TYPE (oprnd);
+         if ((oprnd_info->first_dt != dt
+              && !(oprnd_info->first_dt == vect_reduction_def
+                   && dt == vect_internal_def)
+              && !((oprnd_info->first_dt == vect_external_def
+                    || oprnd_info->first_dt == vect_constant_def)
+                   && (dt == vect_external_def
+                       || dt == vect_constant_def)))
+             || !types_compatible_p (oprnd_info->first_op_type, type))
             {
               /* Try swapping operands if we got a mismatch.  */
               if (i == 0
@@ -398,16 +463,12 @@ again:
  
               return 1;
             }
-       }
-
-      /* Check the types of the definitions.  */
-      switch (dt)
-       {
-       case vect_constant_def:
-       case vect_external_def:
-         /* We must already have set a vector size by now.  */
-         gcc_checking_assert (maybe_ne (current_vector_size, 0U));
-         if (!current_vector_size.is_constant ())
+         if ((dt == vect_constant_def
+              || dt == vect_external_def)
+             && !current_vector_size.is_constant ()
+             && (TREE_CODE (type) == BOOLEAN_TYPE
+                 || !can_duplicate_and_interleave_p (stmts.length (),
+                                                     TYPE_MODE (type))))
             {
               if (dump_enabled_p ())
                 {
@@ -419,6 +480,13 @@ again:
                 }
               return -1;
             }
+       }
+
+      /* Check the types of the definitions.  */
+      switch (dt)
+       {
+       case vect_constant_def:
+       case vect_external_def:
           break;
  
         case vect_reduction_def:
@@ -1119,7 +1187,7 @@ vect_build_slp_tree_2 (vec_info *vinfo,
    FOR_EACH_VEC_ELT (stmts, i, stmt)
      {
        int res = vect_get_and_check_slp_defs (vinfo, &swap[i],
-                                            stmt, i, &oprnds_info);
+                                            stmts, i, &oprnds_info);
        if (res != 0)
         matches[(res == -1) ? 0 : i] = false;
        if (!matches[0])
@@ -3219,6 +3287,118 @@ vect_mask_constant_operand_p (gimple *stmt, int opnum)
    return VECTOR_BOOLEAN_TYPE_P (STMT_VINFO_VECTYPE (stmt_vinfo));
  }
  
+/* Build a variable-length vector in which the elements in ELTS are repeated
+   to a fill NRESULTS vectors of type VECTOR_TYPE.  Store the vectors in
+   RESULTS and add any new instructions to SEQ.
+
+   The approach we use is:
+
+   (1) Find a vector mode VM with integer elements of mode IM.
+
+   (2) Replace ELTS[0:NELTS] with ELTS'[0:NELTS'], where each element of
+       ELTS' has mode IM.  This involves creating NELTS' VIEW_CONVERT_EXPRs
+       from small vectors to IM.
+
+   (3) Duplicate each ELTS'[I] into a vector of mode VM.
+
+   (4) Use a tree of interleaving VEC_PERM_EXPRs to create VMs with the
+       correct byte contents.
+
+   (5) Use VIEW_CONVERT_EXPR to cast the final VMs to the required type.
+
+   We try to find the largest IM for which this sequence works, in order
+   to cut down on the number of interleaves.  */
+
+static void
+duplicate_and_interleave (gimple_seq *seq, tree vector_type, vec<tree> elts,
+                         unsigned int nresults, vec<tree> &results)
+{
+  unsigned int nelts = elts.length ();
+  tree element_type = TREE_TYPE (vector_type);
+
+  /* (1) Find a vector mode VM with integer elements of mode IM.  */
+  unsigned int nvectors = 1;
+  tree new_vector_type;
+  tree permutes[2];
+  if (!can_duplicate_and_interleave_p (nelts, TYPE_MODE (element_type),
+                                      &nvectors, &new_vector_type,
+                                      permutes))
+    gcc_unreachable ();
+
+  /* Get a vector type that holds ELTS[0:NELTS/NELTS'].  */
+  unsigned int partial_nelts = nelts / nvectors;
+  tree partial_vector_type = build_vector_type (element_type, partial_nelts);
+
+  tree_vector_builder partial_elts;
+  auto_vec<tree, 32> pieces (nvectors * 2);
+  pieces.quick_grow (nvectors * 2);
+  for (unsigned int i = 0; i < nvectors; ++i)
+    {
+      /* (2) Replace ELTS[0:NELTS] with ELTS'[0:NELTS'], where each element of
+            ELTS' has mode IM.  */
+      partial_elts.new_vector (partial_vector_type, partial_nelts, 1);
+      for (unsigned int j = 0; j < partial_nelts; ++j)
+       partial_elts.quick_push (elts[i * partial_nelts + j]);
+      tree t = gimple_build_vector (seq, &partial_elts);
+      t = gimple_build (seq, VIEW_CONVERT_EXPR,
+                       TREE_TYPE (new_vector_type), t);
+
+      /* (3) Duplicate each ELTS'[I] into a vector of mode VM.  */
+      pieces[i] = gimple_build_vector_from_val (seq, new_vector_type, t);
+    }
+
+  /* (4) Use a tree of VEC_PERM_EXPRs to create a single VM with the
+        correct byte contents.
+
+     We need to repeat the following operation log2(nvectors) times:
+
+       out[i * 2] = VEC_PERM_EXPR (in[i], in[i + hi_start], lo_permute);
+       out[i * 2 + 1] = VEC_PERM_EXPR (in[i], in[i + hi_start], hi_permute);
+
+     However, if each input repeats every N elements and the VF is
+     a multiple of N * 2, the HI result is the same as the LO.  */
+  unsigned int in_start = 0;
+  unsigned int out_start = nvectors;
+  unsigned int hi_start = nvectors / 2;
+  /* A bound on the number of outputs needed to produce NRESULTS results
+     in the final iteration.  */
+  unsigned int noutputs_bound = nvectors * nresults;
+  for (unsigned int in_repeat = 1; in_repeat < nvectors; in_repeat *= 2)
+    {
+      noutputs_bound /= 2;
+      unsigned int limit = MIN (noutputs_bound, nvectors);
+      for (unsigned int i = 0; i < limit; ++i)
+       {
+         if ((i & 1) != 0
+             && multiple_p (TYPE_VECTOR_SUBPARTS (new_vector_type),
+                            2 * in_repeat))
+           {
+             pieces[out_start + i] = pieces[out_start + i - 1];
+             continue;
+           }
+
+         tree output = make_ssa_name (new_vector_type);
+         tree input1 = pieces[in_start + (i / 2)];
+         tree input2 = pieces[in_start + (i / 2) + hi_start];
+         gassign *stmt = gimple_build_assign (output, VEC_PERM_EXPR,
+                                              input1, input2,
+                                              permutes[i & 1]);
+         gimple_seq_add_stmt (seq, stmt);
+         pieces[out_start + i] = output;
+       }
+      std::swap (in_start, out_start);
+    }
+
+  /* (5) Use VIEW_CONVERT_EXPR to cast the final VM to the required type.  */
+  results.reserve (nresults);
+  for (unsigned int i = 0; i < nresults; ++i)
+    if (i < nvectors)
+      results.quick_push (gimple_build (seq, VIEW_CONVERT_EXPR, vector_type,
+                                       pieces[in_start + i]));
+    else
+      results.quick_push (results[i - nvectors]);
+}
+
  
  /* For constant and loop invariant defs of SLP_NODE this function returns
     (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
@@ -3235,7 +3415,7 @@ vect_get_constant_vectors (tree op, slp_tree slp_node,
    vec<gimple *> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
    gimple *stmt = stmts[0];
    stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
-  unsigned nunits;
+  unsigned HOST_WIDE_INT nunits;
    tree vec_cst;
    unsigned j, number_of_places_left_in_vector;
    tree vector_type;
@@ -3249,6 +3429,7 @@ vect_get_constant_vectors (tree op, slp_tree slp_node,
    tree neutral_op = NULL;
    enum tree_code code = gimple_expr_code (stmt);
    gimple_seq ctor_seq = NULL;
+  auto_vec<tree, 16> permute_results;
  
    /* Check if vector type is a boolean vector.  */
    if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
@@ -3257,8 +3438,6 @@ vect_get_constant_vectors (tree op, slp_tree slp_node,
        = build_same_sized_truth_vector_type (STMT_VINFO_VECTYPE (stmt_vinfo));
    else
      vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
-  /* Enforced by vect_get_and_check_slp_defs.  */
-  nunits = TYPE_VECTOR_SUBPARTS (vector_type).to_constant ();
  
    if (STMT_VINFO_DATA_REF (stmt_vinfo))
      {
@@ -3286,6 +3465,11 @@ vect_get_constant_vectors (tree op, slp_tree slp_node,
       (s1, s2, ..., s8).  We will create two vectors {s1, s2, s3, s4} and
       {s5, s6, s7, s8}.  */
  
+  /* When using duplicate_and_interleave, we just need one element for
+     each scalar statement.  */
+  if (!TYPE_VECTOR_SUBPARTS (vector_type).is_constant (&nunits))
+    nunits = group_size;
+
    number_of_copies = nunits * number_of_vectors / group_size;
  
    number_of_places_left_in_vector = nunits;
@@ -3407,16 +3591,17 @@ vect_get_constant_vectors (tree op, slp_tree slp_node,
  
            if (number_of_places_left_in_vector == 0)
              {
-             if (constant_p)
-               vec_cst = elts.build ();
+             if (constant_p
+                 ? multiple_p (TYPE_VECTOR_SUBPARTS (vector_type), nunits)
+                 : known_eq (TYPE_VECTOR_SUBPARTS (vector_type), nunits))
+               vec_cst = gimple_build_vector (&ctor_seq, &elts);
               else
                 {
-                 vec<constructor_elt, va_gc> *v;
-                 unsigned k;
-                 vec_alloc (v, nunits);
-                 for (k = 0; k < nunits; ++k)
-                   CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, elts[k]);
-                 vec_cst = build_constructor (vector_type, v);
+                 if (vec_oprnds->is_empty ())
+                   duplicate_and_interleave (&ctor_seq, vector_type, elts,
+                                             number_of_vectors,
+                                             permute_results);
+                 vec_cst = permute_results[number_of_vectors - j - 1];
                 }
               tree init;
               gimple_stmt_iterator gsi;
@@ -3431,8 +3616,7 @@ vect_get_constant_vectors (tree op, slp_tree slp_node,
               if (ctor_seq != NULL)
                 {
                   gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (init));
-                 gsi_insert_seq_before_without_update (&gsi, ctor_seq,
-                                                       GSI_SAME_STMT);
+                 gsi_insert_seq_before (&gsi, ctor_seq, GSI_SAME_STMT);
                   ctor_seq = NULL;
                 }
               voprnds.quick_push (init);
author	Richard Sandiford <richard.sandiford@linaro.org>
	Sat, 13 Jan 2018 17:58:14 +0000 (17:58 +0000)
committer	Richard Sandiford <rsandifo@gcc.gnu.org>
	Sat, 13 Jan 2018 17:58:14 +0000 (17:58 +0000)
gcc/ChangeLog		patch \| blob \| history
gcc/testsuite/ChangeLog		patch \| blob \| history
gcc/testsuite/gcc.dg/vect/no-scevccp-slp-30.c		patch \| blob \| history
gcc/testsuite/gcc.dg/vect/pr33953.c		patch \| blob \| history
gcc/testsuite/gcc.dg/vect/slp-1.c		patch \| blob \| history
gcc/testsuite/gcc.dg/vect/slp-10.c		patch \| blob \| history
gcc/testsuite/gcc.dg/vect/slp-12a.c		patch \| blob \| history
gcc/testsuite/gcc.dg/vect/slp-12b.c		patch \| blob \| history
gcc/testsuite/gcc.dg/vect/slp-12c.c		patch \| blob \| history
gcc/testsuite/gcc.dg/vect/slp-14.c		patch \| blob \| history
gcc/testsuite/gcc.dg/vect/slp-15.c		patch \| blob \| history
gcc/testsuite/gcc.dg/vect/slp-17.c		patch \| blob \| history
gcc/testsuite/gcc.dg/vect/slp-19b.c		patch \| blob \| history
gcc/testsuite/gcc.dg/vect/slp-20.c		patch \| blob \| history
gcc/testsuite/gcc.dg/vect/slp-21.c		patch \| blob \| history
gcc/testsuite/gcc.dg/vect/slp-22.c		patch \| blob \| history
gcc/testsuite/gcc.dg/vect/slp-23.c		patch \| blob \| history
gcc/testsuite/gcc.dg/vect/slp-24-big-array.c		patch \| blob \| history
gcc/testsuite/gcc.dg/vect/slp-24.c		patch \| blob \| history
gcc/testsuite/gcc.dg/vect/slp-28.c		patch \| blob \| history
gcc/testsuite/gcc.dg/vect/slp-39.c		patch \| blob \| history
gcc/testsuite/gcc.dg/vect/slp-6.c		patch \| blob \| history
gcc/testsuite/gcc.dg/vect/slp-7.c		patch \| blob \| history
gcc/testsuite/gcc.dg/vect/slp-cond-1.c		patch \| blob \| history
gcc/testsuite/gcc.dg/vect/slp-cond-2-big-array.c		patch \| blob \| history
gcc/testsuite/gcc.dg/vect/slp-cond-2.c		patch \| blob \| history
gcc/testsuite/gcc.dg/vect/slp-multitypes-1.c		patch \| blob \| history
gcc/testsuite/gcc.dg/vect/slp-multitypes-10.c		patch \| blob \| history
gcc/testsuite/gcc.dg/vect/slp-multitypes-12.c		patch \| blob \| history
gcc/testsuite/gcc.dg/vect/slp-multitypes-2.c		patch \| blob \| history
gcc/testsuite/gcc.dg/vect/slp-multitypes-4.c		patch \| blob \| history
gcc/testsuite/gcc.dg/vect/slp-multitypes-5.c		patch \| blob \| history
gcc/testsuite/gcc.dg/vect/slp-multitypes-8.c		patch \| blob \| history
gcc/testsuite/gcc.dg/vect/slp-multitypes-9.c		patch \| blob \| history
gcc/testsuite/gcc.dg/vect/slp-perm-6.c		patch \| blob \| history
gcc/testsuite/gcc.dg/vect/slp-widen-mult-half.c		patch \| blob \| history
gcc/testsuite/gcc.dg/vect/vect-live-slp-1.c		patch \| blob \| history
gcc/testsuite/gcc.dg/vect/vect-live-slp-2.c		patch \| blob \| history
gcc/testsuite/gcc.target/aarch64/sve/slp_1.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/aarch64/sve/slp_1_run.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/aarch64/sve/slp_2.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/aarch64/sve/slp_2_run.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/aarch64/sve/slp_3.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/aarch64/sve/slp_3_run.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/aarch64/sve/slp_4.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/aarch64/sve/slp_4_run.c	[new file with mode: 0644]	patch \| blob
gcc/tree-vect-slp.c		patch \| blob \| history