re PR tree-optimization/59058 (wrong code at -O3 on x86_64-linux-gnu (affecting gcc...
authorRichard Biener <rguenther@suse.de>
Fri, 6 Dec 2013 09:23:07 +0000 (09:23 +0000)
committerRichard Biener <rguenth@gcc.gnu.org>
Fri, 6 Dec 2013 09:23:07 +0000 (09:23 +0000)
2013-12-06  Richard Biener  <rguenther@suse.de>

PR tree-optimization/59058
* tree-vectorizer.h (struct _loop_vec_info): Add num_itersm1
member.
(LOOP_VINFO_NITERSM1): New macro.
* tree-vect-loop-manip.c (slpeel_tree_peel_loop_to_edge): Express
the vector loop entry test in terms of scalar latch executions.
(vect_do_peeling_for_alignment): Update LOOP_VINFO_NITERSM1.
* tree-vect-loop.c (vect_get_loop_niters): Also return the
number of latch executions.
(new_loop_vec_info): Initialize LOOP_VINFO_NITERSM1.
(vect_analyze_loop_form): Likewise.
(vect_generate_tmps_on_preheader): Compute the number of
vectorized iterations differently.

* gcc.dg/torture/pr59058.c: New testcase.

From-SVN: r205730

gcc/ChangeLog
gcc/testsuite/ChangeLog
gcc/testsuite/gcc.dg/torture/pr59058.c [new file with mode: 0644]
gcc/tree-vect-loop-manip.c
gcc/tree-vect-loop.c
gcc/tree-vectorizer.h

index 03e03d702bbc3484b94c43a1af96e96022509383..b763531126be0c20bb5cb569eb80e43eb73371aa 100644 (file)
@@ -1,3 +1,19 @@
+2013-12-06  Richard Biener  <rguenther@suse.de>
+
+       PR tree-optimization/59058
+       * tree-vectorizer.h (struct _loop_vec_info): Add num_itersm1
+       member.
+       (LOOP_VINFO_NITERSM1): New macro.
+       * tree-vect-loop-manip.c (slpeel_tree_peel_loop_to_edge): Express
+       the vector loop entry test in terms of scalar latch executions.
+       (vect_do_peeling_for_alignment): Update LOOP_VINFO_NITERSM1.
+       * tree-vect-loop.c (vect_get_loop_niters): Also return the
+       number of latch executions.
+       (new_loop_vec_info): Initialize LOOP_VINFO_NITERSM1.
+       (vect_analyze_loop_form): Likewise.
+       (vect_generate_tmps_on_preheader): Compute the number of
+       vectorized iterations differently.
+
 2013-12-05  Jan-Benedict Glaw  <jbglaw@lug-owl.de>
 
        * config/score/score.c (score_force_temporary): Delete function.
index a97ab31b4bde8200c5b4674c7b42f0cca5abdd2e..b3be3e0436d29627810833eff508c3db35786be6 100644 (file)
@@ -1,3 +1,8 @@
+2013-12-06  Richard Biener  <rguenther@suse.de>
+
+       PR tree-optimization/59058
+       * gcc.dg/torture/pr59058.c: New testcase.
+
 2013-12-05  Paolo Carlini  <paolo.carlini@oracle.com>
 
        * g++.dg/warn/pr15774-1.C: Adjust expected message.
diff --git a/gcc/testsuite/gcc.dg/torture/pr59058.c b/gcc/testsuite/gcc.dg/torture/pr59058.c
new file mode 100644 (file)
index 0000000..b3a5a39
--- /dev/null
@@ -0,0 +1,19 @@
+/* { dg-do run } */
+
+extern void abort (void);
+
+short b = 0;
+
+int
+main ()
+{
+  int c = 0;
+l1:
+  b++;
+  c |= b;
+  if (b)
+    goto l1;
+  if (c != -1)
+    abort ();
+  return 0;
+}
index f2fdc99ed04ca66925f9362bf21460250a675498..380fd2258b494d5a464d381bcabb2a51ee14c458 100644 (file)
@@ -1061,7 +1061,6 @@ slpeel_tree_peel_loop_to_edge (struct loop *loop,
   gimple_stmt_iterator gsi;
   edge exit_e = single_exit (loop);
   source_location loop_loc;
-  tree cost_pre_condition = NULL_TREE;
   /* There are many aspects to how likely the first loop is going to be executed.
      Without histogram we can't really do good job.  Simply set it to
      2/3, so the first loop is not reordered to the end of function and
@@ -1263,21 +1262,17 @@ slpeel_tree_peel_loop_to_edge (struct loop *loop,
   /* Epilogue peeling.  */
   if (!update_first_loop_count)
     {
+      loop_vec_info loop_vinfo = loop_vec_info_for_loop (loop);
+      tree scalar_loop_iters = LOOP_VINFO_NITERSM1 (loop_vinfo);
+      unsigned limit = LOOP_VINFO_VECT_FACTOR (loop_vinfo) - 1;
+      if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
+       limit = limit + 1;
+      if (check_profitability
+         && th > limit)
+       limit = th;
       pre_condition =
-       fold_build2 (LE_EXPR, boolean_type_node, *first_niters,
-                    build_int_cst (TREE_TYPE (*first_niters), 0));
-      if (check_profitability)
-       {
-         tree scalar_loop_iters
-           = unshare_expr (LOOP_VINFO_NITERS_UNCHANGED
-                                       (loop_vec_info_for_loop (loop)));
-         cost_pre_condition =
-           fold_build2 (LE_EXPR, boolean_type_node, scalar_loop_iters,
-                        build_int_cst (TREE_TYPE (scalar_loop_iters), th));
-
-         pre_condition = fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
-                                      cost_pre_condition, pre_condition);
-       }
+       fold_build2 (LT_EXPR, boolean_type_node, scalar_loop_iters,
+                    build_int_cst (TREE_TYPE (scalar_loop_iters), limit));
       if (cond_expr)
        {
          pre_condition =
@@ -1922,6 +1917,9 @@ vect_do_peeling_for_alignment (loop_vec_info loop_vinfo, tree ni_name,
   /* Update number of times loop executes.  */
   LOOP_VINFO_NITERS (loop_vinfo) = fold_build2 (MINUS_EXPR,
                TREE_TYPE (ni_name), ni_name, niters_of_prolog_loop);
+  LOOP_VINFO_NITERSM1 (loop_vinfo) = fold_build2 (MINUS_EXPR,
+               TREE_TYPE (ni_name),
+               LOOP_VINFO_NITERSM1 (loop_vinfo), niters_of_prolog_loop);
 
   if (types_compatible_p (sizetype, TREE_TYPE (niters_of_prolog_loop)))
     wide_prolog_niters = niters_of_prolog_loop;
index 25bf334edf807c217b070427cf20bbd6abb4f135..ca8d3a6cde089574cf431201dac9bea6fa60e226 100644 (file)
@@ -791,12 +791,14 @@ vect_analyze_scalar_cycles (loop_vec_info loop_vinfo)
 /* Function vect_get_loop_niters.
 
    Determine how many iterations the loop is executed and place it
-   in NUMBER_OF_ITERATIONS.
+   in NUMBER_OF_ITERATIONS.  Place the number of latch iterations
+   in NUMBER_OF_ITERATIONSM1.
 
    Return the loop exit condition.  */
 
 static gimple
-vect_get_loop_niters (struct loop *loop, tree *number_of_iterations)
+vect_get_loop_niters (struct loop *loop, tree *number_of_iterations,
+                     tree *number_of_iterationsm1)
 {
   tree niters;
 
@@ -805,12 +807,14 @@ vect_get_loop_niters (struct loop *loop, tree *number_of_iterations)
                     "=== get_loop_niters ===\n");
 
   niters = number_of_latch_executions (loop);
+  *number_of_iterationsm1 = niters;
+
   /* We want the number of loop header executions which is the number
      of latch executions plus one.
      ???  For UINT_MAX latch executions this number overflows to zero
      for loops like do { n++; } while (n != 0);  */
   if (niters && !chrec_contains_undetermined (niters))
-    niters = fold_build2 (PLUS_EXPR, TREE_TYPE (niters), niters,
+    niters = fold_build2 (PLUS_EXPR, TREE_TYPE (niters), unshare_expr (niters),
                          build_int_cst (TREE_TYPE (niters), 1));
   *number_of_iterations = niters;
 
@@ -916,6 +920,7 @@ new_loop_vec_info (struct loop *loop)
    gcc_assert (nbbs == loop->num_nodes);
 
   LOOP_VINFO_BBS (res) = bbs;
+  LOOP_VINFO_NITERSM1 (res) = NULL;
   LOOP_VINFO_NITERS (res) = NULL;
   LOOP_VINFO_NITERS_UNCHANGED (res) = NULL;
   LOOP_VINFO_COST_MODEL_MIN_ITERS (res) = 0;
@@ -1071,7 +1076,7 @@ vect_analyze_loop_form (struct loop *loop)
 {
   loop_vec_info loop_vinfo;
   gimple loop_cond;
-  tree number_of_iterations = NULL;
+  tree number_of_iterations = NULL, number_of_iterationsm1 = NULL;
   loop_vec_info inner_loop_vinfo = NULL;
 
   if (dump_enabled_p ())
@@ -1246,7 +1251,8 @@ vect_analyze_loop_form (struct loop *loop)
        }
     }
 
-  loop_cond = vect_get_loop_niters (loop, &number_of_iterations);
+  loop_cond = vect_get_loop_niters (loop, &number_of_iterations,
+                                   &number_of_iterationsm1);
   if (!loop_cond)
     {
       if (dump_enabled_p ())
@@ -1280,6 +1286,7 @@ vect_analyze_loop_form (struct loop *loop)
     }
 
   loop_vinfo = new_loop_vec_info (loop);
+  LOOP_VINFO_NITERSM1 (loop_vinfo) = number_of_iterationsm1;
   LOOP_VINFO_NITERS (loop_vinfo) = number_of_iterations;
   LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo) = number_of_iterations;
 
@@ -5637,12 +5644,11 @@ vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo,
   tree var;
   tree ratio_name;
   tree ratio_mult_vf_name;
-  tree ni = LOOP_VINFO_NITERS (loop_vinfo);
   int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
   edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo));
   tree log_vf;
 
-  log_vf = build_int_cst (TREE_TYPE (ni), exact_log2 (vf));
+  log_vf = build_int_cst (TREE_TYPE (ni_name), exact_log2 (vf));
 
   /* If epilogue loop is required because of data accesses with gaps, we
      subtract one iteration from the total number of iterations here for
@@ -5654,7 +5660,7 @@ vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo,
                                       build_one_cst (TREE_TYPE (ni_name)));
       if (!is_gimple_val (ni_minus_gap_name))
        {
-         var = create_tmp_var (TREE_TYPE (ni), "ni_gap");
+         var = create_tmp_var (TREE_TYPE (ni_name), "ni_gap");
           gimple stmts = NULL;
           ni_minus_gap_name = force_gimple_operand (ni_minus_gap_name, &stmts,
                                                    true, var);
@@ -5665,12 +5671,22 @@ vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo,
     ni_minus_gap_name = ni_name;
 
   /* Create: ratio = ni >> log2(vf) */
-
-  ratio_name = fold_build2 (RSHIFT_EXPR, TREE_TYPE (ni_minus_gap_name),
-                           ni_minus_gap_name, log_vf);
+  /* ???  As we have ni == number of latch executions + 1, ni could
+     have overflown to zero.  So avoid computing ratio based on ni
+     but compute it using the fact that we know ratio will be at least
+     one, thus via (ni - vf) >> log2(vf) + 1.  */
+  ratio_name
+    = fold_build2 (PLUS_EXPR, TREE_TYPE (ni_name),
+                  fold_build2 (RSHIFT_EXPR, TREE_TYPE (ni_name),
+                               fold_build2 (MINUS_EXPR, TREE_TYPE (ni_name),
+                                            ni_minus_gap_name,
+                                            build_int_cst
+                                              (TREE_TYPE (ni_name), vf)),
+                               log_vf),
+                  build_int_cst (TREE_TYPE (ni_name), 1));
   if (!is_gimple_val (ratio_name))
     {
-      var = create_tmp_var (TREE_TYPE (ni), "bnd");
+      var = create_tmp_var (TREE_TYPE (ni_name), "bnd");
       gimple stmts = NULL;
       ratio_name = force_gimple_operand (ratio_name, &stmts, true, var);
       gsi_insert_seq_on_edge_immediate (pe, stmts);
@@ -5685,7 +5701,7 @@ vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo,
                                        ratio_name, log_vf);
       if (!is_gimple_val (ratio_mult_vf_name))
        {
-         var = create_tmp_var (TREE_TYPE (ni), "ratio_mult_vf");
+         var = create_tmp_var (TREE_TYPE (ni_name), "ratio_mult_vf");
          gimple stmts = NULL;
          ratio_mult_vf_name = force_gimple_operand (ratio_mult_vf_name, &stmts,
                                                     true, var);
index 4427d6a7b335049441ca889e19149a022f29d5b0..b4daf0aba179c74d31c8c6df437c6bb9ea22a160 100644 (file)
@@ -250,8 +250,11 @@ typedef struct _loop_vec_info {
   /* The loop basic blocks.  */
   basic_block *bbs;
 
+  /* Number of latch executions.  */
+  tree num_itersm1;
   /* Number of iterations.  */
   tree num_iters;
+  /* Number of iterations of the original loop.  */
   tree num_iters_unchanged;
 
   /* Minimum number of iterations below which vectorization is expected to
@@ -349,9 +352,11 @@ typedef struct _loop_vec_info {
 /* Access Functions.  */
 #define LOOP_VINFO_LOOP(L)                 (L)->loop
 #define LOOP_VINFO_BBS(L)                  (L)->bbs
+#define LOOP_VINFO_NITERSM1(L)             (L)->num_itersm1
 #define LOOP_VINFO_NITERS(L)               (L)->num_iters
-/* Since LOOP_VINFO_NITERS can change after prologue peeling
-   retain total unchanged scalar loop iterations for cost model.  */
+/* Since LOOP_VINFO_NITERS and LOOP_VINFO_NITERSM1 can change after
+   prologue peeling retain total unchanged scalar loop iterations for
+   cost model.  */
 #define LOOP_VINFO_NITERS_UNCHANGED(L)     (L)->num_iters_unchanged
 #define LOOP_VINFO_COST_MODEL_MIN_ITERS(L) (L)->min_profitable_iters
 #define LOOP_VINFO_VECTORIZABLE_P(L)       (L)->vectorizable