+2015-07-23 Tom de Vries <tom@codesourcery.com>
+
+ * tree-parloops.c (gather_scalar_reductions): Add arg to call to
+ vect_force_simple_reduction.
+ * tree-vect-loop.c (vect_analyze_scalar_cycles_1): Same.
+ (vect_is_simple_reduction_1): Add and handle
+ need_wrapping_integral_overflow parameter.
+ (vect_is_simple_reduction, vect_force_simple_reduction): Add and pass
+ need_wrapping_integral_overflow parameter.
+ (vectorizable_reduction): Add arg to call to vect_is_simple_reduction.
+ * tree-vectorizer.h (vect_force_simple_reduction): Add parameter to decl.
+
2015-07-23 Yuri Rumyantsev <ysrumyan@gmail.com>
PR tree-optimization/66926,66951
+2015-07-23 Tom de Vries <tom@codesourcery.com>
+
+ * gcc.dg/autopar/outer-4.c: Add xfail.
+ * gcc.dg/autopar/outer-5.c: Same.
+ * gcc.dg/autopar/outer-6.c: Same.
+ * gcc.dg/autopar/reduc-2.c: Same.
+ * gcc.dg/autopar/reduc-2char.c: Same.
+ * gcc.dg/autopar/reduc-2short.c: Same.
+ * gcc.dg/autopar/reduc-8.c: Same.
+ * gcc.dg/autopar/uns-outer-4.c: New test.
+ * gcc.dg/autopar/uns-outer-5.c: New test.
+ * gcc.dg/autopar/uns-outer-6.c: New test.
+
2015-07-23 Richard Biener <rguenther@suse.de>
PR tree-optimization/66952
/* { dg-final { scan-tree-dump-times "parallelizing outer loop" 1 "parloops" { xfail *-*-* } } } */
-/* { dg-final { scan-tree-dump-times "loopfn" 4 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "loopfn" 4 "optimized" { xfail *-*-* } } } */
}
/* { dg-final { scan-tree-dump-times "parallelizing outer loop" 1 "parloops" { xfail *-*-* } } } */
-/* { dg-final { scan-tree-dump-times "loopfn" 4 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "loopfn" 4 "optimized" { xfail *-*-* } } } */
/* Check that outer loop is parallelized. */
-/* { dg-final { scan-tree-dump-times "parallelizing outer loop" 1 "parloops" } } */
+/* { dg-final { scan-tree-dump-times "parallelizing outer loop" 1 "parloops" { xfail *-*-* } } } */
/* { dg-final { scan-tree-dump-times "parallelizing inner loop" 0 "parloops" } } */
-/* { dg-final { scan-tree-dump-times "loopfn" 4 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "loopfn" 4 "optimized" { xfail *-*-* } } } */
return 0;
}
-/* { dg-final { scan-tree-dump-times "Detected reduction" 3 "parloops" } } */
-/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 4 "parloops" } } */
+/* { dg-final { scan-tree-dump-times "Detected reduction" 3 "parloops" { xfail *-*-* } } } */
+/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 4 "parloops" { xfail *-*-* } } } */
}
-/* { dg-final { scan-tree-dump-times "Detected reduction" 2 "parloops" } } */
-/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 3 "parloops" } } */
+/* { dg-final { scan-tree-dump-times "Detected reduction" 2 "parloops" { xfail *-*-* } } } */
+/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 3 "parloops" { xfail *-*-* } } } */
}
-/* { dg-final { scan-tree-dump-times "Detected reduction" 2 "parloops" } } */
-/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 3 "parloops" } } */
+/* { dg-final { scan-tree-dump-times "Detected reduction" 2 "parloops" { xfail *-*-* } } } */
+/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 3 "parloops" { xfail *-*-* } } } */
}
-/* { dg-final { scan-tree-dump-times "Detected reduction" 2 "parloops" } } */
-/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 3 "parloops" } } */
+/* { dg-final { scan-tree-dump-times "Detected reduction" 2 "parloops" { xfail *-*-* } } } */
+/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 3 "parloops" { xfail *-*-* } } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-parallelize-loops=4 -fdump-tree-parloops-details -fdump-tree-optimized" } */
+
+void abort (void);
+
+unsigned int g_sum=0;
+unsigned int x[500][500];
+
+void __attribute__((noinline))
+parloop (int N)
+{
+ int i, j;
+ unsigned int sum;
+
+ /* Double reduction is currently not supported, outer loop is not
+ parallelized. Inner reduction is detected, inner loop is
+ parallelized. */
+ sum = 0;
+ for (i = 0; i < N; i++)
+ for (j = 0; j < N; j++)
+ sum += x[i][j];
+
+ g_sum = sum;
+}
+
+int
+main (void)
+{
+ parloop (500);
+
+ return 0;
+}
+
+
+/* { dg-final { scan-tree-dump-times "parallelizing outer loop" 1 "parloops" { xfail *-*-* } } } */
+/* { dg-final { scan-tree-dump-times "loopfn" 4 "optimized" { xfail *-*-* } } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-parallelize-loops=4 -fdump-tree-parloops-details -fdump-tree-optimized" } */
+
+void abort (void);
+
+unsigned int x[500][500];
+unsigned int y[500];
+unsigned int g_sum=0;
+
+void __attribute__((noinline))
+init (int i, int j)
+{
+ x[i][j]=1;
+}
+
+void __attribute__((noinline))
+parloop (int N)
+{
+ int i, j;
+ unsigned int sum;
+
+ /* Inner cycle is currently not supported, outer loop is not
+ parallelized. Inner reduction is detected, inner loop is
+ parallelized. */
+ for (i = 0; i < N; i++)
+ {
+ sum = 0;
+ for (j = 0; j < N; j++)
+ sum += x[i][j];
+ y[i]=sum;
+ }
+ g_sum = sum;
+}
+
+int
+main (void)
+{
+ int i, j;
+ for (i = 0; i < 500; i++)
+ for (j = 0; j < 500; j++)
+ init (i, j);
+
+ parloop (500);
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "parallelizing outer loop" 1 "parloops" { xfail *-*-* } } } */
+/* { dg-final { scan-tree-dump-times "loopfn" 4 "optimized" } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-parallelize-loops=4 -fdump-tree-parloops-details -fdump-tree-optimized" } */
+
+void abort (void);
+
+unsigned int x[500][500];
+unsigned int y[500];
+unsigned int g_sum=0;
+
+
+void __attribute__((noinline))
+init (int i, int j)
+{
+ x[i][j]=1;
+}
+
+void __attribute__((noinline))
+parloop (int N)
+{
+ int i, j;
+ unsigned int sum;
+
+ /* Outer loop reduction, outerloop is parallelized. */
+ sum=0;
+ for (i = 0; i < N; i++)
+ {
+ for (j = 0; j < N; j++)
+ y[i]=x[i][j];
+ sum += y[i];
+ }
+ g_sum = sum;
+}
+
+int
+main (void)
+{
+ int i, j;
+ for (i = 0; i < 500; i++)
+ for (j = 0; j < 500; j++)
+ init (i, j);
+
+ parloop (500);
+
+ return 0;
+}
+
+
+/* Check that outer loop is parallelized. */
+/* { dg-final { scan-tree-dump-times "parallelizing outer loop" 1 "parloops" } } */
+/* { dg-final { scan-tree-dump-times "parallelizing inner loop" 0 "parloops" } } */
+/* { dg-final { scan-tree-dump-times "loopfn" 4 "optimized" } } */
if (!simple_iv (loop, loop, res, &iv, true)
&& simple_loop_info)
{
- gimple reduc_stmt = vect_force_simple_reduction (simple_loop_info,
- phi, true,
- &double_reduc);
+ gimple reduc_stmt
+ = vect_force_simple_reduction (simple_loop_info, phi, true,
+ &double_reduc, true);
if (reduc_stmt && !double_reduc)
build_new_reduction (reduction_list, reduc_stmt, phi);
}
nested_cycle = (loop != LOOP_VINFO_LOOP (loop_vinfo));
reduc_stmt = vect_force_simple_reduction (loop_vinfo, phi, !nested_cycle,
- &double_reduc);
+ &double_reduc, false);
if (reduc_stmt)
{
if (double_reduc)
static gimple
vect_is_simple_reduction_1 (loop_vec_info loop_info, gimple phi,
bool check_reduction, bool *double_reduc,
- bool modify)
+ bool modify, bool need_wrapping_integral_overflow)
{
struct loop *loop = (gimple_bb (phi))->loop_father;
struct loop *vect_loop = LOOP_VINFO_LOOP (loop_info);
"reduction: unsafe fp math optimization: ");
return NULL;
}
- else if (INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_TRAPS (type)
- && check_reduction)
+ else if (INTEGRAL_TYPE_P (type) && check_reduction)
{
- /* Changing the order of operations changes the semantics. */
- if (dump_enabled_p ())
- report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
- "reduction: unsafe int math optimization: ");
- return NULL;
+ if (TYPE_OVERFLOW_TRAPS (type))
+ {
+ /* Changing the order of operations changes the semantics. */
+ if (dump_enabled_p ())
+ report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
+ "reduction: unsafe int math optimization"
+ " (overflow traps): ");
+ return NULL;
+ }
+ if (need_wrapping_integral_overflow && !TYPE_OVERFLOW_WRAPS (type))
+ {
+ /* Changing the order of operations changes the semantics. */
+ if (dump_enabled_p ())
+ report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
+ "reduction: unsafe int math optimization"
+ " (overflow doesn't wrap): ");
+ return NULL;
+ }
}
else if (SAT_FIXED_POINT_TYPE_P (type) && check_reduction)
{
static gimple
vect_is_simple_reduction (loop_vec_info loop_info, gimple phi,
- bool check_reduction, bool *double_reduc)
+ bool check_reduction, bool *double_reduc,
+ bool need_wrapping_integral_overflow)
{
return vect_is_simple_reduction_1 (loop_info, phi, check_reduction,
- double_reduc, false);
+ double_reduc, false,
+ need_wrapping_integral_overflow);
}
/* Wrapper around vect_is_simple_reduction_1, which will modify code
gimple
vect_force_simple_reduction (loop_vec_info loop_info, gimple phi,
- bool check_reduction, bool *double_reduc)
+ bool check_reduction, bool *double_reduc,
+ bool need_wrapping_integral_overflow)
{
return vect_is_simple_reduction_1 (loop_info, phi, check_reduction,
- double_reduc, true);
+ double_reduc, true,
+ need_wrapping_integral_overflow);
}
/* Calculate cost of peeling the loop PEEL_ITERS_PROLOGUE times. */
}
gimple tmp = vect_is_simple_reduction (loop_vinfo, reduc_def_stmt,
- !nested_cycle, &dummy);
+ !nested_cycle, &dummy, false);
if (orig_stmt)
gcc_assert (tmp == orig_stmt
|| GROUP_FIRST_ELEMENT (vinfo_for_stmt (tmp)) == orig_stmt);
/* In tree-vect-loop.c. */
/* FORNOW: Used in tree-parloops.c. */
extern void destroy_loop_vec_info (loop_vec_info, bool);
-extern gimple vect_force_simple_reduction (loop_vec_info, gimple, bool, bool *);
+extern gimple vect_force_simple_reduction (loop_vec_info, gimple, bool, bool *,
+ bool);
/* Drive for loop analysis stage. */
extern loop_vec_info vect_analyze_loop (struct loop *);
/* Drive for loop transformation stage. */