From bcdaf3708453cc9f0ad23598f682e17ffd1e5afc Mon Sep 17 00:00:00 2001 From: Tom de Vries Date: Thu, 23 Jul 2015 12:17:52 +0000 Subject: [PATCH] Check TYPE_OVERFLOW_WRAPS for parloops reductions 2015-07-23 Tom de Vries * tree-parloops.c (gather_scalar_reductions): Add arg to call to vect_force_simple_reduction. * tree-vect-loop.c (vect_analyze_scalar_cycles_1): Same. (vect_is_simple_reduction_1): Add and handle need_wrapping_integral_overflow parameter. (vect_is_simple_reduction, vect_force_simple_reduction): Add and pass need_wrapping_integral_overflow parameter. (vectorizable_reduction): Add arg to call to vect_is_simple_reduction. * tree-vectorizer.h (vect_force_simple_reduction): Add parameter to decl. * gcc.dg/autopar/outer-4.c: Add xfail. * gcc.dg/autopar/outer-5.c: Same. * gcc.dg/autopar/outer-6.c: Same. * gcc.dg/autopar/reduc-2.c: Same. * gcc.dg/autopar/reduc-2char.c: Same. * gcc.dg/autopar/reduc-2short.c: Same. * gcc.dg/autopar/reduc-8.c: Same. * gcc.dg/autopar/uns-outer-4.c: New test. * gcc.dg/autopar/uns-outer-5.c: New test. * gcc.dg/autopar/uns-outer-6.c: New test. From-SVN: r226107 --- gcc/ChangeLog | 12 +++++ gcc/testsuite/ChangeLog | 13 ++++++ gcc/testsuite/gcc.dg/autopar/outer-4.c | 2 +- gcc/testsuite/gcc.dg/autopar/outer-5.c | 2 +- gcc/testsuite/gcc.dg/autopar/outer-6.c | 4 +- gcc/testsuite/gcc.dg/autopar/reduc-2.c | 4 +- gcc/testsuite/gcc.dg/autopar/reduc-2char.c | 4 +- gcc/testsuite/gcc.dg/autopar/reduc-2short.c | 4 +- gcc/testsuite/gcc.dg/autopar/reduc-8.c | 4 +- gcc/testsuite/gcc.dg/autopar/uns-outer-4.c | 36 +++++++++++++++ gcc/testsuite/gcc.dg/autopar/uns-outer-5.c | 49 ++++++++++++++++++++ gcc/testsuite/gcc.dg/autopar/uns-outer-6.c | 51 +++++++++++++++++++++ gcc/tree-parloops.c | 6 +-- gcc/tree-vect-loop.c | 44 ++++++++++++------ gcc/tree-vectorizer.h | 3 +- 15 files changed, 208 insertions(+), 30 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/autopar/uns-outer-4.c create mode 100644 gcc/testsuite/gcc.dg/autopar/uns-outer-5.c create mode 100644 gcc/testsuite/gcc.dg/autopar/uns-outer-6.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 6e1b53e988c..81c60beeba6 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,15 @@ +2015-07-23 Tom de Vries + + * tree-parloops.c (gather_scalar_reductions): Add arg to call to + vect_force_simple_reduction. + * tree-vect-loop.c (vect_analyze_scalar_cycles_1): Same. + (vect_is_simple_reduction_1): Add and handle + need_wrapping_integral_overflow parameter. + (vect_is_simple_reduction, vect_force_simple_reduction): Add and pass + need_wrapping_integral_overflow parameter. + (vectorizable_reduction): Add arg to call to vect_is_simple_reduction. + * tree-vectorizer.h (vect_force_simple_reduction): Add parameter to decl. + 2015-07-23 Yuri Rumyantsev PR tree-optimization/66926,66951 diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 672be709cc6..3a6374cfadb 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,16 @@ +2015-07-23 Tom de Vries + + * gcc.dg/autopar/outer-4.c: Add xfail. + * gcc.dg/autopar/outer-5.c: Same. + * gcc.dg/autopar/outer-6.c: Same. + * gcc.dg/autopar/reduc-2.c: Same. + * gcc.dg/autopar/reduc-2char.c: Same. + * gcc.dg/autopar/reduc-2short.c: Same. + * gcc.dg/autopar/reduc-8.c: Same. + * gcc.dg/autopar/uns-outer-4.c: New test. + * gcc.dg/autopar/uns-outer-5.c: New test. + * gcc.dg/autopar/uns-outer-6.c: New test. + 2015-07-23 Richard Biener PR tree-optimization/66952 diff --git a/gcc/testsuite/gcc.dg/autopar/outer-4.c b/gcc/testsuite/gcc.dg/autopar/outer-4.c index 6fd37c52aa9..2027499eb28 100644 --- a/gcc/testsuite/gcc.dg/autopar/outer-4.c +++ b/gcc/testsuite/gcc.dg/autopar/outer-4.c @@ -32,4 +32,4 @@ int main(void) /* { dg-final { scan-tree-dump-times "parallelizing outer loop" 1 "parloops" { xfail *-*-* } } } */ -/* { dg-final { scan-tree-dump-times "loopfn" 4 "optimized" } } */ +/* { dg-final { scan-tree-dump-times "loopfn" 4 "optimized" { xfail *-*-* } } } */ diff --git a/gcc/testsuite/gcc.dg/autopar/outer-5.c b/gcc/testsuite/gcc.dg/autopar/outer-5.c index 6a0ae91da44..d6e0dd32836 100644 --- a/gcc/testsuite/gcc.dg/autopar/outer-5.c +++ b/gcc/testsuite/gcc.dg/autopar/outer-5.c @@ -45,4 +45,4 @@ int main(void) } /* { dg-final { scan-tree-dump-times "parallelizing outer loop" 1 "parloops" { xfail *-*-* } } } */ -/* { dg-final { scan-tree-dump-times "loopfn" 4 "optimized" } } */ +/* { dg-final { scan-tree-dump-times "loopfn" 4 "optimized" { xfail *-*-* } } } */ diff --git a/gcc/testsuite/gcc.dg/autopar/outer-6.c b/gcc/testsuite/gcc.dg/autopar/outer-6.c index 6bef7ccc3e0..726794c25ca 100644 --- a/gcc/testsuite/gcc.dg/autopar/outer-6.c +++ b/gcc/testsuite/gcc.dg/autopar/outer-6.c @@ -44,6 +44,6 @@ int main(void) /* Check that outer loop is parallelized. */ -/* { dg-final { scan-tree-dump-times "parallelizing outer loop" 1 "parloops" } } */ +/* { dg-final { scan-tree-dump-times "parallelizing outer loop" 1 "parloops" { xfail *-*-* } } } */ /* { dg-final { scan-tree-dump-times "parallelizing inner loop" 0 "parloops" } } */ -/* { dg-final { scan-tree-dump-times "loopfn" 4 "optimized" } } */ +/* { dg-final { scan-tree-dump-times "loopfn" 4 "optimized" { xfail *-*-* } } } */ diff --git a/gcc/testsuite/gcc.dg/autopar/reduc-2.c b/gcc/testsuite/gcc.dg/autopar/reduc-2.c index 3ad16e49d36..2f4883d08e4 100644 --- a/gcc/testsuite/gcc.dg/autopar/reduc-2.c +++ b/gcc/testsuite/gcc.dg/autopar/reduc-2.c @@ -63,6 +63,6 @@ int main (void) return 0; } -/* { dg-final { scan-tree-dump-times "Detected reduction" 3 "parloops" } } */ -/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 4 "parloops" } } */ +/* { dg-final { scan-tree-dump-times "Detected reduction" 3 "parloops" { xfail *-*-* } } } */ +/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 4 "parloops" { xfail *-*-* } } } */ diff --git a/gcc/testsuite/gcc.dg/autopar/reduc-2char.c b/gcc/testsuite/gcc.dg/autopar/reduc-2char.c index 072489f4d85..14867f34327 100644 --- a/gcc/testsuite/gcc.dg/autopar/reduc-2char.c +++ b/gcc/testsuite/gcc.dg/autopar/reduc-2char.c @@ -60,7 +60,7 @@ int main (void) } -/* { dg-final { scan-tree-dump-times "Detected reduction" 2 "parloops" } } */ -/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 3 "parloops" } } */ +/* { dg-final { scan-tree-dump-times "Detected reduction" 2 "parloops" { xfail *-*-* } } } */ +/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 3 "parloops" { xfail *-*-* } } } */ diff --git a/gcc/testsuite/gcc.dg/autopar/reduc-2short.c b/gcc/testsuite/gcc.dg/autopar/reduc-2short.c index 4dbbc8ae45a..7c19cc59fd3 100644 --- a/gcc/testsuite/gcc.dg/autopar/reduc-2short.c +++ b/gcc/testsuite/gcc.dg/autopar/reduc-2short.c @@ -59,6 +59,6 @@ int main (void) } -/* { dg-final { scan-tree-dump-times "Detected reduction" 2 "parloops" } } */ -/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 3 "parloops" } } */ +/* { dg-final { scan-tree-dump-times "Detected reduction" 2 "parloops" { xfail *-*-* } } } */ +/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 3 "parloops" { xfail *-*-* } } } */ diff --git a/gcc/testsuite/gcc.dg/autopar/reduc-8.c b/gcc/testsuite/gcc.dg/autopar/reduc-8.c index 16fb954df07..1d05c48274e 100644 --- a/gcc/testsuite/gcc.dg/autopar/reduc-8.c +++ b/gcc/testsuite/gcc.dg/autopar/reduc-8.c @@ -84,5 +84,5 @@ int main (void) } -/* { dg-final { scan-tree-dump-times "Detected reduction" 2 "parloops" } } */ -/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 3 "parloops" } } */ +/* { dg-final { scan-tree-dump-times "Detected reduction" 2 "parloops" { xfail *-*-* } } } */ +/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 3 "parloops" { xfail *-*-* } } } */ diff --git a/gcc/testsuite/gcc.dg/autopar/uns-outer-4.c b/gcc/testsuite/gcc.dg/autopar/uns-outer-4.c new file mode 100644 index 00000000000..ef9fc2a9d4f --- /dev/null +++ b/gcc/testsuite/gcc.dg/autopar/uns-outer-4.c @@ -0,0 +1,36 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-parallelize-loops=4 -fdump-tree-parloops-details -fdump-tree-optimized" } */ + +void abort (void); + +unsigned int g_sum=0; +unsigned int x[500][500]; + +void __attribute__((noinline)) +parloop (int N) +{ + int i, j; + unsigned int sum; + + /* Double reduction is currently not supported, outer loop is not + parallelized. Inner reduction is detected, inner loop is + parallelized. */ + sum = 0; + for (i = 0; i < N; i++) + for (j = 0; j < N; j++) + sum += x[i][j]; + + g_sum = sum; +} + +int +main (void) +{ + parloop (500); + + return 0; +} + + +/* { dg-final { scan-tree-dump-times "parallelizing outer loop" 1 "parloops" { xfail *-*-* } } } */ +/* { dg-final { scan-tree-dump-times "loopfn" 4 "optimized" { xfail *-*-* } } } */ diff --git a/gcc/testsuite/gcc.dg/autopar/uns-outer-5.c b/gcc/testsuite/gcc.dg/autopar/uns-outer-5.c new file mode 100644 index 00000000000..a929e5dc727 --- /dev/null +++ b/gcc/testsuite/gcc.dg/autopar/uns-outer-5.c @@ -0,0 +1,49 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-parallelize-loops=4 -fdump-tree-parloops-details -fdump-tree-optimized" } */ + +void abort (void); + +unsigned int x[500][500]; +unsigned int y[500]; +unsigned int g_sum=0; + +void __attribute__((noinline)) +init (int i, int j) +{ + x[i][j]=1; +} + +void __attribute__((noinline)) +parloop (int N) +{ + int i, j; + unsigned int sum; + + /* Inner cycle is currently not supported, outer loop is not + parallelized. Inner reduction is detected, inner loop is + parallelized. */ + for (i = 0; i < N; i++) + { + sum = 0; + for (j = 0; j < N; j++) + sum += x[i][j]; + y[i]=sum; + } + g_sum = sum; +} + +int +main (void) +{ + int i, j; + for (i = 0; i < 500; i++) + for (j = 0; j < 500; j++) + init (i, j); + + parloop (500); + + return 0; +} + +/* { dg-final { scan-tree-dump-times "parallelizing outer loop" 1 "parloops" { xfail *-*-* } } } */ +/* { dg-final { scan-tree-dump-times "loopfn" 4 "optimized" } } */ diff --git a/gcc/testsuite/gcc.dg/autopar/uns-outer-6.c b/gcc/testsuite/gcc.dg/autopar/uns-outer-6.c new file mode 100644 index 00000000000..5c745f80e93 --- /dev/null +++ b/gcc/testsuite/gcc.dg/autopar/uns-outer-6.c @@ -0,0 +1,51 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-parallelize-loops=4 -fdump-tree-parloops-details -fdump-tree-optimized" } */ + +void abort (void); + +unsigned int x[500][500]; +unsigned int y[500]; +unsigned int g_sum=0; + + +void __attribute__((noinline)) +init (int i, int j) +{ + x[i][j]=1; +} + +void __attribute__((noinline)) +parloop (int N) +{ + int i, j; + unsigned int sum; + + /* Outer loop reduction, outerloop is parallelized. */ + sum=0; + for (i = 0; i < N; i++) + { + for (j = 0; j < N; j++) + y[i]=x[i][j]; + sum += y[i]; + } + g_sum = sum; +} + +int +main (void) +{ + int i, j; + for (i = 0; i < 500; i++) + for (j = 0; j < 500; j++) + init (i, j); + + parloop (500); + + return 0; +} + + +/* Check that outer loop is parallelized. */ +/* { dg-final { scan-tree-dump-times "parallelizing outer loop" 1 "parloops" } } */ +/* { dg-final { scan-tree-dump-times "parallelizing inner loop" 0 "parloops" } } */ +/* { dg-final { scan-tree-dump-times "loopfn" 4 "optimized" } } */ diff --git a/gcc/tree-parloops.c b/gcc/tree-parloops.c index ec418343c26..88f22e85d44 100644 --- a/gcc/tree-parloops.c +++ b/gcc/tree-parloops.c @@ -2376,9 +2376,9 @@ gather_scalar_reductions (loop_p loop, reduction_info_table_type *reduction_list if (!simple_iv (loop, loop, res, &iv, true) && simple_loop_info) { - gimple reduc_stmt = vect_force_simple_reduction (simple_loop_info, - phi, true, - &double_reduc); + gimple reduc_stmt + = vect_force_simple_reduction (simple_loop_info, phi, true, + &double_reduc, true); if (reduc_stmt && !double_reduc) build_new_reduction (reduction_list, reduc_stmt, phi); } diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c index 9145dbf19e1..c31bfbdbad4 100644 --- a/gcc/tree-vect-loop.c +++ b/gcc/tree-vect-loop.c @@ -715,7 +715,7 @@ vect_analyze_scalar_cycles_1 (loop_vec_info loop_vinfo, struct loop *loop) nested_cycle = (loop != LOOP_VINFO_LOOP (loop_vinfo)); reduc_stmt = vect_force_simple_reduction (loop_vinfo, phi, !nested_cycle, - &double_reduc); + &double_reduc, false); if (reduc_stmt) { if (double_reduc) @@ -2339,7 +2339,7 @@ vect_is_slp_reduction (loop_vec_info loop_info, gimple phi, gimple first_stmt) static gimple vect_is_simple_reduction_1 (loop_vec_info loop_info, gimple phi, bool check_reduction, bool *double_reduc, - bool modify) + bool modify, bool need_wrapping_integral_overflow) { struct loop *loop = (gimple_bb (phi))->loop_father; struct loop *vect_loop = LOOP_VINFO_LOOP (loop_info); @@ -2613,14 +2613,26 @@ vect_is_simple_reduction_1 (loop_vec_info loop_info, gimple phi, "reduction: unsafe fp math optimization: "); return NULL; } - else if (INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_TRAPS (type) - && check_reduction) + else if (INTEGRAL_TYPE_P (type) && check_reduction) { - /* Changing the order of operations changes the semantics. */ - if (dump_enabled_p ()) - report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt, - "reduction: unsafe int math optimization: "); - return NULL; + if (TYPE_OVERFLOW_TRAPS (type)) + { + /* Changing the order of operations changes the semantics. */ + if (dump_enabled_p ()) + report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt, + "reduction: unsafe int math optimization" + " (overflow traps): "); + return NULL; + } + if (need_wrapping_integral_overflow && !TYPE_OVERFLOW_WRAPS (type)) + { + /* Changing the order of operations changes the semantics. */ + if (dump_enabled_p ()) + report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt, + "reduction: unsafe int math optimization" + " (overflow doesn't wrap): "); + return NULL; + } } else if (SAT_FIXED_POINT_TYPE_P (type) && check_reduction) { @@ -2749,10 +2761,12 @@ vect_is_simple_reduction_1 (loop_vec_info loop_info, gimple phi, static gimple vect_is_simple_reduction (loop_vec_info loop_info, gimple phi, - bool check_reduction, bool *double_reduc) + bool check_reduction, bool *double_reduc, + bool need_wrapping_integral_overflow) { return vect_is_simple_reduction_1 (loop_info, phi, check_reduction, - double_reduc, false); + double_reduc, false, + need_wrapping_integral_overflow); } /* Wrapper around vect_is_simple_reduction_1, which will modify code @@ -2761,10 +2775,12 @@ vect_is_simple_reduction (loop_vec_info loop_info, gimple phi, gimple vect_force_simple_reduction (loop_vec_info loop_info, gimple phi, - bool check_reduction, bool *double_reduc) + bool check_reduction, bool *double_reduc, + bool need_wrapping_integral_overflow) { return vect_is_simple_reduction_1 (loop_info, phi, check_reduction, - double_reduc, true); + double_reduc, true, + need_wrapping_integral_overflow); } /* Calculate cost of peeling the loop PEEL_ITERS_PROLOGUE times. */ @@ -5074,7 +5090,7 @@ vectorizable_reduction (gimple stmt, gimple_stmt_iterator *gsi, } gimple tmp = vect_is_simple_reduction (loop_vinfo, reduc_def_stmt, - !nested_cycle, &dummy); + !nested_cycle, &dummy, false); if (orig_stmt) gcc_assert (tmp == orig_stmt || GROUP_FIRST_ELEMENT (vinfo_for_stmt (tmp)) == orig_stmt); diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h index 48c1f8d64b7..dfa879583ac 100644 --- a/gcc/tree-vectorizer.h +++ b/gcc/tree-vectorizer.h @@ -1090,7 +1090,8 @@ extern tree vect_create_addr_base_for_vector_ref (gimple, gimple_seq *, /* In tree-vect-loop.c. */ /* FORNOW: Used in tree-parloops.c. */ extern void destroy_loop_vec_info (loop_vec_info, bool); -extern gimple vect_force_simple_reduction (loop_vec_info, gimple, bool, bool *); +extern gimple vect_force_simple_reduction (loop_vec_info, gimple, bool, bool *, + bool); /* Drive for loop analysis stage. */ extern loop_vec_info vect_analyze_loop (struct loop *); /* Drive for loop transformation stage. */ -- 2.30.2