Check TYPE_OVERFLOW_WRAPS for parloops reductions
authorTom de Vries <tom@codesourcery.com>
Thu, 23 Jul 2015 12:17:52 +0000 (12:17 +0000)
committerTom de Vries <vries@gcc.gnu.org>
Thu, 23 Jul 2015 12:17:52 +0000 (12:17 +0000)
2015-07-23  Tom de Vries  <tom@codesourcery.com>

* tree-parloops.c (gather_scalar_reductions): Add arg to call to
vect_force_simple_reduction.
* tree-vect-loop.c (vect_analyze_scalar_cycles_1): Same.
(vect_is_simple_reduction_1): Add and handle
need_wrapping_integral_overflow parameter.
(vect_is_simple_reduction, vect_force_simple_reduction): Add and pass
need_wrapping_integral_overflow parameter.
(vectorizable_reduction): Add arg to call to vect_is_simple_reduction.
* tree-vectorizer.h (vect_force_simple_reduction): Add parameter to decl.

* gcc.dg/autopar/outer-4.c: Add xfail.
* gcc.dg/autopar/outer-5.c: Same.
* gcc.dg/autopar/outer-6.c: Same.
* gcc.dg/autopar/reduc-2.c: Same.
* gcc.dg/autopar/reduc-2char.c: Same.
* gcc.dg/autopar/reduc-2short.c: Same.
* gcc.dg/autopar/reduc-8.c: Same.
* gcc.dg/autopar/uns-outer-4.c: New test.
* gcc.dg/autopar/uns-outer-5.c: New test.
* gcc.dg/autopar/uns-outer-6.c: New test.

From-SVN: r226107

15 files changed:
gcc/ChangeLog
gcc/testsuite/ChangeLog
gcc/testsuite/gcc.dg/autopar/outer-4.c
gcc/testsuite/gcc.dg/autopar/outer-5.c
gcc/testsuite/gcc.dg/autopar/outer-6.c
gcc/testsuite/gcc.dg/autopar/reduc-2.c
gcc/testsuite/gcc.dg/autopar/reduc-2char.c
gcc/testsuite/gcc.dg/autopar/reduc-2short.c
gcc/testsuite/gcc.dg/autopar/reduc-8.c
gcc/testsuite/gcc.dg/autopar/uns-outer-4.c [new file with mode: 0644]
gcc/testsuite/gcc.dg/autopar/uns-outer-5.c [new file with mode: 0644]
gcc/testsuite/gcc.dg/autopar/uns-outer-6.c [new file with mode: 0644]
gcc/tree-parloops.c
gcc/tree-vect-loop.c
gcc/tree-vectorizer.h

index 6e1b53e988cb78673d3838e0e25a837748d49479..81c60beeba6d5dae09fd6bee262d19cb4eb1a337 100644 (file)
@@ -1,3 +1,15 @@
+2015-07-23  Tom de Vries  <tom@codesourcery.com>
+
+       * tree-parloops.c (gather_scalar_reductions): Add arg to call to
+       vect_force_simple_reduction.
+       * tree-vect-loop.c (vect_analyze_scalar_cycles_1): Same.
+       (vect_is_simple_reduction_1): Add and handle
+       need_wrapping_integral_overflow parameter.
+       (vect_is_simple_reduction, vect_force_simple_reduction): Add and pass
+       need_wrapping_integral_overflow parameter.
+       (vectorizable_reduction): Add arg to call to vect_is_simple_reduction.
+       * tree-vectorizer.h (vect_force_simple_reduction): Add parameter to decl.
+
 2015-07-23  Yuri Rumyantsev  <ysrumyan@gmail.com>
 
        PR tree-optimization/66926,66951
index 672be709cc6e1dcede7a349062ebd82d757bca2a..3a6374cfadb68a7ce97301aa818569abb04e77b1 100644 (file)
@@ -1,3 +1,16 @@
+2015-07-23  Tom de Vries  <tom@codesourcery.com>
+
+       * gcc.dg/autopar/outer-4.c: Add xfail.
+       * gcc.dg/autopar/outer-5.c: Same.
+       * gcc.dg/autopar/outer-6.c: Same.
+       * gcc.dg/autopar/reduc-2.c: Same.
+       * gcc.dg/autopar/reduc-2char.c: Same.
+       * gcc.dg/autopar/reduc-2short.c: Same.
+       * gcc.dg/autopar/reduc-8.c: Same.
+       * gcc.dg/autopar/uns-outer-4.c: New test.
+       * gcc.dg/autopar/uns-outer-5.c: New test.
+       * gcc.dg/autopar/uns-outer-6.c: New test.
+
 2015-07-23  Richard Biener  <rguenther@suse.de>
 
        PR tree-optimization/66952
index 6fd37c52aa9f3567aa483ff9cfacce070ed227bf..2027499eb288900b17e21e2e18e61b833bc70c82 100644 (file)
@@ -32,4 +32,4 @@ int main(void)
 
 
 /* { dg-final { scan-tree-dump-times "parallelizing outer loop" 1 "parloops" { xfail *-*-* } } } */
-/* { dg-final { scan-tree-dump-times "loopfn" 4 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "loopfn" 4 "optimized" { xfail *-*-* } } } */
index 6a0ae91da440731496976cb3d54b3e8786c6b38b..d6e0dd32836dab4081d9186d6c98671df3696e77 100644 (file)
@@ -45,4 +45,4 @@ int main(void)
 }
 
 /* { dg-final { scan-tree-dump-times "parallelizing outer loop" 1 "parloops" { xfail *-*-* } } } */
-/* { dg-final { scan-tree-dump-times "loopfn" 4 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "loopfn" 4 "optimized" { xfail *-*-* } } } */
index 6bef7ccc3e005511b158519c8285be25396ff53f..726794c25ca44b9a93f0ebdc83961ec8ae75aac0 100644 (file)
@@ -44,6 +44,6 @@ int main(void)
 
 
 /* Check that outer loop is parallelized.  */
-/* { dg-final { scan-tree-dump-times "parallelizing outer loop" 1 "parloops" } } */
+/* { dg-final { scan-tree-dump-times "parallelizing outer loop" 1 "parloops" { xfail *-*-* } } } */
 /* { dg-final { scan-tree-dump-times "parallelizing inner loop" 0 "parloops" } } */
-/* { dg-final { scan-tree-dump-times "loopfn" 4 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "loopfn" 4 "optimized" { xfail *-*-* } } } */
index 3ad16e49d366f465f86a8d1590f6167195f5a7c3..2f4883d08e45529d4ace18b34730f6c22eb945d3 100644 (file)
@@ -63,6 +63,6 @@ int main (void)
   return 0;
 }
 
-/* { dg-final { scan-tree-dump-times "Detected reduction" 3 "parloops" } } */
-/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 4 "parloops" } } */
+/* { dg-final { scan-tree-dump-times "Detected reduction" 3 "parloops" { xfail *-*-* } } } */
+/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 4 "parloops" { xfail *-*-* } } } */
 
index 072489f4d8541d3d779d3e17ded4c4f352576953..14867f343277cdc9b162806a6eba34217ed486bc 100644 (file)
@@ -60,7 +60,7 @@ int main (void)
 }
 
 
-/* { dg-final { scan-tree-dump-times "Detected reduction" 2 "parloops" } } */
-/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 3 "parloops" } } */
+/* { dg-final { scan-tree-dump-times "Detected reduction" 2 "parloops" { xfail *-*-* } } } */
+/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 3 "parloops" { xfail *-*-* } } } */
 
 
index 4dbbc8ae45a036dfadb6b1c6e55d30844c3bbc53..7c19cc59fd33b7edc91f424f8f7ce309c4cf6d67 100644 (file)
@@ -59,6 +59,6 @@ int main (void)
 }
 
 
-/* { dg-final { scan-tree-dump-times "Detected reduction" 2 "parloops" } } */
-/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 3 "parloops" } } */
+/* { dg-final { scan-tree-dump-times "Detected reduction" 2 "parloops" { xfail *-*-* } } } */
+/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 3 "parloops" { xfail *-*-* } } } */
 
index 16fb954df07f872c5230899ded7c18180a25f8e0..1d05c48274e7b6f45167a86fce04abd82cbb9e9b 100644 (file)
@@ -84,5 +84,5 @@ int main (void)
 }
 
 
-/* { dg-final { scan-tree-dump-times "Detected reduction" 2 "parloops" } } */
-/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 3 "parloops" } } */
+/* { dg-final { scan-tree-dump-times "Detected reduction" 2 "parloops" { xfail *-*-* } } } */
+/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 3 "parloops" { xfail *-*-* } } } */
diff --git a/gcc/testsuite/gcc.dg/autopar/uns-outer-4.c b/gcc/testsuite/gcc.dg/autopar/uns-outer-4.c
new file mode 100644 (file)
index 0000000..ef9fc2a
--- /dev/null
@@ -0,0 +1,36 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-parallelize-loops=4 -fdump-tree-parloops-details -fdump-tree-optimized" } */
+
+void abort (void);
+
+unsigned int g_sum=0;
+unsigned int x[500][500];
+
+void __attribute__((noinline))
+parloop (int N)
+{
+  int i, j;
+  unsigned int sum;
+
+  /* Double reduction is currently not supported, outer loop is not
+     parallelized.  Inner reduction is detected, inner loop is
+     parallelized.  */
+  sum = 0;
+  for (i = 0; i < N; i++)
+    for (j = 0; j < N; j++)
+      sum += x[i][j];
+
+  g_sum = sum;
+}
+
+int
+main (void)
+{
+  parloop (500);
+
+  return 0;
+}
+
+
+/* { dg-final { scan-tree-dump-times "parallelizing outer loop" 1 "parloops" { xfail *-*-* } } } */
+/* { dg-final { scan-tree-dump-times "loopfn" 4 "optimized" { xfail *-*-* } } } */
diff --git a/gcc/testsuite/gcc.dg/autopar/uns-outer-5.c b/gcc/testsuite/gcc.dg/autopar/uns-outer-5.c
new file mode 100644 (file)
index 0000000..a929e5d
--- /dev/null
@@ -0,0 +1,49 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-parallelize-loops=4 -fdump-tree-parloops-details -fdump-tree-optimized" } */
+
+void abort (void);
+
+unsigned int x[500][500];
+unsigned int y[500];
+unsigned int g_sum=0;
+
+void __attribute__((noinline))
+init (int i, int j)
+{
+  x[i][j]=1;
+}
+
+void __attribute__((noinline))
+parloop (int N)
+{
+  int i, j;
+  unsigned int sum;
+
+  /* Inner cycle is currently not supported, outer loop is not
+     parallelized.  Inner reduction is detected, inner loop is
+     parallelized.  */
+  for (i = 0; i < N; i++)
+    {
+      sum = 0;
+      for (j = 0; j < N; j++)
+       sum += x[i][j];
+      y[i]=sum;
+    }
+  g_sum = sum;
+}
+
+int
+main (void)
+{
+  int i, j;
+  for (i = 0; i < 500; i++)
+    for (j = 0; j < 500; j++)
+      init (i, j);
+
+  parloop (500);
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "parallelizing outer loop" 1 "parloops" { xfail *-*-* } } } */
+/* { dg-final { scan-tree-dump-times "loopfn" 4 "optimized" } } */
diff --git a/gcc/testsuite/gcc.dg/autopar/uns-outer-6.c b/gcc/testsuite/gcc.dg/autopar/uns-outer-6.c
new file mode 100644 (file)
index 0000000..5c745f8
--- /dev/null
@@ -0,0 +1,51 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-parallelize-loops=4 -fdump-tree-parloops-details -fdump-tree-optimized" } */
+
+void abort (void);
+
+unsigned int x[500][500];
+unsigned int y[500];
+unsigned int g_sum=0;
+
+
+void __attribute__((noinline))
+init (int i, int j)
+{
+  x[i][j]=1;
+}
+
+void __attribute__((noinline))
+parloop (int N)
+{
+  int i, j;
+  unsigned int sum;
+
+  /* Outer loop reduction, outerloop is parallelized.  */
+  sum=0;
+  for (i = 0; i < N; i++)
+    {
+      for (j = 0; j < N; j++)
+       y[i]=x[i][j];
+      sum += y[i];
+    }
+  g_sum = sum;
+}
+
+int
+main (void)
+{
+  int i, j;
+  for (i = 0; i < 500; i++)
+    for (j = 0; j < 500; j++)
+      init (i, j);
+
+  parloop (500);
+
+  return 0;
+}
+
+
+/* Check that outer loop is parallelized.  */
+/* { dg-final { scan-tree-dump-times "parallelizing outer loop" 1 "parloops" } } */
+/* { dg-final { scan-tree-dump-times "parallelizing inner loop" 0 "parloops" } } */
+/* { dg-final { scan-tree-dump-times "loopfn" 4 "optimized" } } */
index ec418343c265e6a4c585c838b8abaf61eb63c5c3..88f22e85d44a3ed5352503c06c0c05e8d48d1438 100644 (file)
@@ -2376,9 +2376,9 @@ gather_scalar_reductions (loop_p loop, reduction_info_table_type *reduction_list
       if (!simple_iv (loop, loop, res, &iv, true)
        && simple_loop_info)
        {
-           gimple reduc_stmt = vect_force_simple_reduction (simple_loop_info,
-                                                           phi, true,
-                                                           &double_reduc);
+          gimple reduc_stmt
+            = vect_force_simple_reduction (simple_loop_info, phi, true,
+                                           &double_reduc, true);
           if (reduc_stmt && !double_reduc)
               build_new_reduction (reduction_list, reduc_stmt, phi);
         }
index 9145dbf19e169ab05ee04fa276777a8439c68f2c..c31bfbdbad42d4f77c5207a38809fa7ccc90f805 100644 (file)
@@ -715,7 +715,7 @@ vect_analyze_scalar_cycles_1 (loop_vec_info loop_vinfo, struct loop *loop)
 
       nested_cycle = (loop != LOOP_VINFO_LOOP (loop_vinfo));
       reduc_stmt = vect_force_simple_reduction (loop_vinfo, phi, !nested_cycle,
-                                               &double_reduc);
+                                               &double_reduc, false);
       if (reduc_stmt)
         {
           if (double_reduc)
@@ -2339,7 +2339,7 @@ vect_is_slp_reduction (loop_vec_info loop_info, gimple phi, gimple first_stmt)
 static gimple
 vect_is_simple_reduction_1 (loop_vec_info loop_info, gimple phi,
                            bool check_reduction, bool *double_reduc,
-                           bool modify)
+                           bool modify, bool need_wrapping_integral_overflow)
 {
   struct loop *loop = (gimple_bb (phi))->loop_father;
   struct loop *vect_loop = LOOP_VINFO_LOOP (loop_info);
@@ -2613,14 +2613,26 @@ vect_is_simple_reduction_1 (loop_vec_info loop_info, gimple phi,
                        "reduction: unsafe fp math optimization: ");
       return NULL;
     }
-  else if (INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_TRAPS (type)
-          && check_reduction)
+  else if (INTEGRAL_TYPE_P (type) && check_reduction)
     {
-      /* Changing the order of operations changes the semantics.  */
-      if (dump_enabled_p ())
-       report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
-                       "reduction: unsafe int math optimization: ");
-      return NULL;
+      if (TYPE_OVERFLOW_TRAPS (type))
+       {
+         /* Changing the order of operations changes the semantics.  */
+         if (dump_enabled_p ())
+           report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
+                           "reduction: unsafe int math optimization"
+                           " (overflow traps): ");
+         return NULL;
+       }
+      if (need_wrapping_integral_overflow && !TYPE_OVERFLOW_WRAPS (type))
+       {
+         /* Changing the order of operations changes the semantics.  */
+         if (dump_enabled_p ())
+           report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
+                           "reduction: unsafe int math optimization"
+                           " (overflow doesn't wrap): ");
+         return NULL;
+       }
     }
   else if (SAT_FIXED_POINT_TYPE_P (type) && check_reduction)
     {
@@ -2749,10 +2761,12 @@ vect_is_simple_reduction_1 (loop_vec_info loop_info, gimple phi,
 
 static gimple
 vect_is_simple_reduction (loop_vec_info loop_info, gimple phi,
-                          bool check_reduction, bool *double_reduc)
+                         bool check_reduction, bool *double_reduc,
+                         bool need_wrapping_integral_overflow)
 {
   return vect_is_simple_reduction_1 (loop_info, phi, check_reduction,
-                                    double_reduc, false);
+                                    double_reduc, false,
+                                    need_wrapping_integral_overflow);
 }
 
 /* Wrapper around vect_is_simple_reduction_1, which will modify code
@@ -2761,10 +2775,12 @@ vect_is_simple_reduction (loop_vec_info loop_info, gimple phi,
 
 gimple
 vect_force_simple_reduction (loop_vec_info loop_info, gimple phi,
-                          bool check_reduction, bool *double_reduc)
+                            bool check_reduction, bool *double_reduc,
+                            bool need_wrapping_integral_overflow)
 {
   return vect_is_simple_reduction_1 (loop_info, phi, check_reduction,
-                                    double_reduc, true);
+                                    double_reduc, true,
+                                    need_wrapping_integral_overflow);
 }
 
 /* Calculate cost of peeling the loop PEEL_ITERS_PROLOGUE times.  */
@@ -5074,7 +5090,7 @@ vectorizable_reduction (gimple stmt, gimple_stmt_iterator *gsi,
     }
 
   gimple tmp = vect_is_simple_reduction (loop_vinfo, reduc_def_stmt,
-                                        !nested_cycle, &dummy);
+                                        !nested_cycle, &dummy, false);
   if (orig_stmt)
     gcc_assert (tmp == orig_stmt
                || GROUP_FIRST_ELEMENT (vinfo_for_stmt (tmp)) == orig_stmt);
index 48c1f8d64b7eac20496b80f1720023ab2ad475d1..dfa879583ac81d78004806e79a3159acc4317605 100644 (file)
@@ -1090,7 +1090,8 @@ extern tree vect_create_addr_base_for_vector_ref (gimple, gimple_seq *,
 /* In tree-vect-loop.c.  */
 /* FORNOW: Used in tree-parloops.c.  */
 extern void destroy_loop_vec_info (loop_vec_info, bool);
-extern gimple vect_force_simple_reduction (loop_vec_info, gimple, bool, bool *);
+extern gimple vect_force_simple_reduction (loop_vec_info, gimple, bool, bool *,
+                                          bool);
 /* Drive for loop analysis stage.  */
 extern loop_vec_info vect_analyze_loop (struct loop *);
 /* Drive for loop transformation stage.  */