re PR tree-optimization/65494 (Loop is not vectorized because of operand canonicaliza...
authorRichard Biener <rguenther@suse.de>
Mon, 23 Mar 2015 12:47:54 +0000 (12:47 +0000)
committerRichard Biener <rguenth@gcc.gnu.org>
Mon, 23 Mar 2015 12:47:54 +0000 (12:47 +0000)
2015-03-23  Richard Biener  <rguenther@suse.de>

PR tree-optimization/65494
* tree-vect-slp.c (vect_build_slp_tree): Do not (re-)allocate
matches here.
(vect_analyze_slp_instance): But do that here, always and once.

* gcc.dg/vect/pr65494.c: New testcase.

From-SVN: r221592

gcc/ChangeLog
gcc/testsuite/ChangeLog
gcc/testsuite/gcc.dg/vect/pr65494.c [new file with mode: 0644]
gcc/tree-vect-slp.c

index d269b76c464e1b0b862a9859d6c612afb485a23d..cb657f66fa8577ca01edfb1e59a7823a719e16af 100644 (file)
@@ -1,3 +1,10 @@
+2015-03-23  Richard Biener  <rguenther@suse.de>
+
+       PR tree-optimization/65494
+       * tree-vect-slp.c (vect_build_slp_tree): Do not (re-)allocate
+       matches here.
+       (vect_analyze_slp_instance): But do that here, always and once.
+
 2015-03-23  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
 
        * expmed.c (synth_mult): Fix comment about multiplying by T-1 and
index 9b84418bc81db3c74569ce1b86261f7da1b9951d..cafb23b4b27e58b9d28534ddb5e779c307d4131a 100644 (file)
@@ -1,3 +1,8 @@
+2015-03-23  Richard Biener  <rguenther@suse.de>
+
+       PR tree-optimization/65494
+       * gcc.dg/vect/pr65494.c: New testcase.
+
 2015-03-23  Jakub Jelinek  <jakub@redhat.com>
 
        PR preprocessor/65238
diff --git a/gcc/testsuite/gcc.dg/vect/pr65494.c b/gcc/testsuite/gcc.dg/vect/pr65494.c
new file mode 100644 (file)
index 0000000..753985f
--- /dev/null
@@ -0,0 +1,28 @@
+/* { dg-do compile } */\r
+/* { dg-additional-options "-Ofast -funroll-loops" } */\r
+\r
+typedef unsigned char uchar;\r
+typedef struct rgb_ {uchar r; uchar g; uchar b;} rgb;\r
+#define N 512\r
+rgb in[N], out[N];\r
+float c[N];\r
+void foo(int n)\r
+{\r
+  int i, j;\r
+  for (i = 0; i < n; i++) //in and out are RGB byte arrays\r
+    {\r
+      float r = 0, g = 0, b = 0;\r
+      for (j = 0; j < 5; j++)\r
+       {\r
+         r += (float)in[i + j].r * c[j];\r
+         g += (float)in[i + j].g * c[j];\r
+         b += (float)in[i + j].b * c[j];\r
+       }\r
+      out[i].r = (uchar)r;\r
+      out[i].g = (uchar)g;\r
+      out[i].b = (uchar)b;\r
+    }\r
+}\r
+\r
+/* { dg-final { scan-tree-dump "vectorized 1 loops in function" "vect" { target { vect_float && vect_intfloat_cvt } } } } */\r
+/* { dg-final { cleanup-tree-dump "vect" } } */\r
index ce0ca9103021d2133253804760b7c14beebe228a..c57a5caf68b87ca55b03c18bf1227ab5e40c4b51 100644 (file)
@@ -926,14 +926,9 @@ vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
                     bool *matches, unsigned *npermutes, unsigned *tree_size,
                     unsigned max_tree_size)
 {
-  unsigned nops, i, this_npermutes = 0, this_tree_size = 0;
+  unsigned nops, i, this_tree_size = 0;
   gimple stmt;
 
-  if (!matches)
-    matches = XALLOCAVEC (bool, group_size);
-  if (!npermutes)
-    npermutes = &this_npermutes;
-
   matches[0] = false;
 
   stmt = SLP_TREE_SCALAR_STMTS (*node)[0];
@@ -1012,7 +1007,6 @@ vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
          return false;
        }
 
-      bool *matches = XALLOCAVEC (bool, group_size);
       if (vect_build_slp_tree (loop_vinfo, bb_vinfo, &child,
                               group_size, max_nunits, loads,
                               vectorization_factor, matches,
@@ -1637,9 +1631,11 @@ vect_analyze_slp_instance (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
   loads.create (group_size);
 
   /* Build the tree for the SLP instance.  */
+  bool *matches = XALLOCAVEC (bool, group_size);
+  unsigned npermutes = 0;
   if (vect_build_slp_tree (loop_vinfo, bb_vinfo, &node, group_size,
                           &max_nunits, &loads,
-                          vectorization_factor, NULL, NULL, NULL,
+                          vectorization_factor, matches, &npermutes, NULL,
                           max_tree_size))
     {
       /* Calculate the unrolling factor based on the smallest type.  */