re PR tree-optimization/77445 (Performance drop after r239219 on coremark test)
authorJan Hubicka <hubicka@ucw.cz>
Tue, 17 Jan 2017 12:49:41 +0000 (13:49 +0100)
committerJan Hubicka <hubicka@gcc.gnu.org>
Tue, 17 Jan 2017 12:49:41 +0000 (12:49 +0000)
PR middle-end/77445
* tree-ssa-threadupdate.c (remove_ctrl_stmt_and_useless_edges):
correctly set frequency of oudgoing edge.
(duplicate_thread_path): Fix profile updating.
* gcc.dg/tree-ssa/pr77445-2.c: New testcase.
* gcc.dg/tree-ssa/pr77445.c: New testcase.

From-SVN: r244528

gcc/ChangeLog
gcc/testsuite/ChangeLog
gcc/testsuite/gcc.dg/tree-ssa/pr77445-2.c [new file with mode: 0644]
gcc/testsuite/gcc.dg/tree-ssa/pr77445.c [new file with mode: 0644]
gcc/tree-ssa-threadupdate.c

index bef211cbd3e80bdd26c2c790018ec51ae937943c..5754ee21f8135ebde12fdaac84057ec5bf8e4e97 100644 (file)
@@ -1,3 +1,10 @@
+2017-01-17  Jan Hubicka  <hubicka@ucw.cz>
+
+       PR middle-end/77445
+       * tree-ssa-threadupdate.c (remove_ctrl_stmt_and_useless_edges):
+       correctly set frequency of oudgoing edge.
+       (duplicate_thread_path): Fix profile updating.
+
 2017-01-17  Jakub Jelinek  <jakub@redhat.com>
 
        PR other/79046
index 9f61689d194858bccad919de410544ac7dbe9df8..c34cac3d7ffe3ae63f6ecc83cc9d05eb483bde54 100644 (file)
@@ -1,3 +1,9 @@
+2017-01-17  Jan Hubicka  <hubicka@ucw.cz>
+
+       PR middle-end/77445
+       * gcc.dg/tree-ssa/pr77445-2.c: New testcase.
+       * gcc.dg/tree-ssa/pr77445.c: New testcase.
+
 2017-01-17  Jakub Jelinek  <jakub@redhat.com>
 
        * g++.dg/tree-ssa/ssa-dse-2.C (size_t): Typedef to __SIZE_TYPE__
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr77445-2.c b/gcc/testsuite/gcc.dg/tree-ssa/pr77445-2.c
new file mode 100644 (file)
index 0000000..47402ec
--- /dev/null
@@ -0,0 +1,123 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-thread1-details-blocks-stats" } */
+typedef enum STATES {
+       START=0,
+       INVALID,
+       S1,
+       S2,
+       INT,
+       FLOAT ,
+       EXPONENT,
+       SCIENTIFIC,
+       NUM_STATES
+} state_e ;
+
+typedef unsigned char u8;
+typedef unsigned int  u32;
+
+static u8 is_digit(u8 c) {
+       return (u8)((c>='0') & (c<='9')) ? 1 : 0;
+}
+
+enum STATES FMS( u8 **in , u32 *transitions) {
+       u8 *str = *in;
+       u8 NEXT_SYMBOL;
+       enum STATES state=START;
+       for( ; *str && state != INVALID; str++ ) {
+               NEXT_SYMBOL = *str;
+               if (NEXT_SYMBOL==',') /* end of this input */ {
+                       transitions[state]++;
+                       str++;
+                       break;
+               }
+               switch(state) {
+               case START:
+                       if(is_digit(NEXT_SYMBOL)) {
+                               state = INT;
+                       }
+                       else if( NEXT_SYMBOL == '+' || NEXT_SYMBOL == '-' ) {
+                               state = S1;
+                       }
+                       else if( NEXT_SYMBOL == '.' ) {
+                               state = FLOAT ;
+                       }
+                       else {
+                               state = INVALID;
+                       }
+                       transitions[START]++;
+                       break;
+               case S1:
+                       if(is_digit(NEXT_SYMBOL)) {
+                               state = INT;
+                               transitions[S1]++;
+                       }
+                       else if( NEXT_SYMBOL == '.' ) {
+                               state = FLOAT ;
+                               transitions[S1]++;
+                       }
+                       else {
+                               state = INVALID;
+                               transitions[S1]++;
+                       }
+                       break;
+               case INT:
+                       if( NEXT_SYMBOL == '.' ) {
+                               state = FLOAT ;
+                               transitions[INT]++;
+                       }
+                       else if(!is_digit(NEXT_SYMBOL)) {
+                               state = INVALID;
+                               transitions[INT]++;
+                       }
+                       break;
+               case FLOAT :
+                       if( NEXT_SYMBOL == 'E' || NEXT_SYMBOL == 'e' ) {
+                               state = S2;
+                               transitions[FLOAT ]++;
+                       }
+                       else if(!is_digit(NEXT_SYMBOL)) {
+                               state = INVALID;
+                               transitions[FLOAT ]++;
+                       }
+                       break;
+               case S2:
+                       if( NEXT_SYMBOL == '+' || NEXT_SYMBOL == '-' ) {
+                               state = EXPONENT;
+                               transitions[S2]++;
+                       }
+                       else {
+                               state = INVALID;
+                               transitions[S2]++;
+                       }
+                       break;
+               case EXPONENT:
+                       if(is_digit(NEXT_SYMBOL)) {
+                               state = SCIENTIFIC;
+                               transitions[EXPONENT]++;
+                       }
+                       else {
+                               state = INVALID;
+                               transitions[EXPONENT]++;
+                       }
+                       break;
+               case SCIENTIFIC:
+                       if(!is_digit(NEXT_SYMBOL)) {
+                               state = INVALID;
+                       }
+                       break;
+               default:
+                       break;
+               }
+       }
+       if (state==INVALID)
+               transitions[INVALID]++;
+       
+       *in = str;
+       return state;
+}
+
+/* The profile is not updated perfectly because it is inconsitent from
+   profile estimation stage. But the number of inconsistencies should not
+   increase much.  */
+/* { dg-final { scan-tree-dump "Jumps threaded: 16" "thread1" } } */
+/* { dg-final { scan-tree-dump-times "Invalid sum" 2 "thread1" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr77445.c b/gcc/testsuite/gcc.dg/tree-ssa/pr77445.c
new file mode 100644 (file)
index 0000000..98eb0f2
--- /dev/null
@@ -0,0 +1,29 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-thread3-details-blocks -fno-early-inlining -fno-tree-vrp -fno-tree-dominator-opts" } */
+
+static int a;
+static int b;
+void test2 ();
+void
+test ()
+{
+  b = 7;
+}
+
+void
+main (int argc)
+{
+  if (argc)
+    {
+      a = 7;
+      test ();
+    }
+  else
+    a = 0;
+  if (a)
+    test2 ();
+  if (b)
+    test2 ();
+}
+/* { dg-final { scan-tree-dump-times "Registering FSM jump thread" 2 "thread3" } } */
+/* { dg-final { scan-tree-dump-not "Invalid sum" "thread3" } } */
index 2da93a803d4762128ba66529ca3b987618b272d2..f85fed314f0f4601883ef044448d22258e67a9fd 100644 (file)
@@ -301,7 +301,11 @@ remove_ctrl_stmt_and_useless_edges (basic_block bb, basic_block dest_bb)
          remove_edge (e);
        }
       else
-       ei_next (&ei);
+       {
+         e->probability = REG_BR_PROB_BASE;
+         e->count = bb->count;
+         ei_next (&ei);
+       }
     }
 
   /* If the remaining edge is a loop exit, there must have
@@ -2212,8 +2216,8 @@ duplicate_thread_path (edge entry, edge exit,
   struct loop *loop = entry->dest->loop_father;
   edge exit_copy;
   edge redirected;
-  int total_freq = 0, entry_freq = 0;
-  gcov_type total_count = 0, entry_count = 0;
+  int curr_freq;
+  gcov_type curr_count;
 
   if (!can_copy_bbs_p (region, n_region))
     return false;
@@ -2240,27 +2244,6 @@ duplicate_thread_path (edge entry, edge exit,
       free_region_copy = true;
     }
 
-  if (entry->dest->count)
-    {
-      total_count = entry->dest->count;
-      entry_count = entry->count;
-      /* Fix up corner cases, to avoid division by zero or creation of negative
-        frequencies.  */
-      if (entry_count > total_count)
-       entry_count = total_count;
-    }
-  else
-    {
-      total_freq = entry->dest->frequency;
-      entry_freq = EDGE_FREQUENCY (entry);
-      /* Fix up corner cases, to avoid division by zero or creation of negative
-        frequencies.  */
-      if (total_freq == 0)
-       total_freq = 1;
-      else if (entry_freq > total_freq)
-       entry_freq = total_freq;
-    }
-
   copy_bbs (region, n_region, region_copy, &exit, 1, &exit_copy, loop,
            split_edge_bb_loc (entry), false);
 
@@ -2270,17 +2253,61 @@ duplicate_thread_path (edge entry, edge exit,
      invalidating the property that is propagated by executing all the blocks of
      the jump-thread path in order.  */
 
+  curr_count = entry->count;
+  curr_freq = EDGE_FREQUENCY (entry);
+
   for (i = 0; i < n_region; i++)
     {
       edge e;
       edge_iterator ei;
       basic_block bb = region_copy[i];
 
+      /* Watch inconsistent profile.  */
+      if (curr_count > region[i]->count)
+       curr_count = region[i]->count;
+      if (curr_freq > region[i]->frequency)
+       curr_freq = region[i]->frequency;
+      /* Scale current BB.  */
+      if (region[i]->count)
+       {
+         /* In the middle of the path we only scale the frequencies.
+            In last BB we need to update probabilities of outgoing edges
+            because we know which one is taken at the threaded path.  */
+         if (i + 1 != n_region)
+           scale_bbs_frequencies_gcov_type (region + i, 1,
+                                            region[i]->count - curr_count,
+                                            region[i]->count);
+         else
+           update_bb_profile_for_threading (region[i],
+                                            curr_freq, curr_count,
+                                            exit);
+         scale_bbs_frequencies_gcov_type (region_copy + i, 1, curr_count,
+                                          region_copy[i]->count);
+       }
+      else if (region[i]->frequency)
+       {
+         if (i + 1 != n_region)
+           scale_bbs_frequencies_int (region + i, 1,
+                                      region[i]->frequency - curr_freq,
+                                      region[i]->frequency);
+         else
+           update_bb_profile_for_threading (region[i],
+                                            curr_freq, curr_count,
+                                            exit);
+         scale_bbs_frequencies_int (region_copy + i, 1, curr_freq,
+                                    region_copy[i]->frequency);
+       }
+
       if (single_succ_p (bb))
        {
          /* Make sure the successor is the next node in the path.  */
          gcc_assert (i + 1 == n_region
                      || region_copy[i + 1] == single_succ_edge (bb)->dest);
+         if (i + 1 != n_region)
+           {
+             curr_freq = EDGE_FREQUENCY (single_succ_edge (bb));
+             curr_count = single_succ_edge (bb)->count;
+           }
          continue;
        }
 
@@ -2307,22 +2334,13 @@ duplicate_thread_path (edge entry, edge exit,
            if (orig)
              redirect_edge_and_branch_force (e, orig);
          }
+       else
+         {
+           curr_freq = EDGE_FREQUENCY (e);
+           curr_count = e->count;
+         }
     }
 
-  if (total_count)
-    {
-      scale_bbs_frequencies_gcov_type (region, n_region,
-                                      total_count - entry_count,
-                                      total_count);
-      scale_bbs_frequencies_gcov_type (region_copy, n_region, entry_count,
-                                      total_count);
-    }
-  else
-    {
-      scale_bbs_frequencies_int (region, n_region, total_freq - entry_freq,
-                                total_freq);
-      scale_bbs_frequencies_int (region_copy, n_region, entry_freq, total_freq);
-    }
 
   if (flag_checking)
     verify_jump_thread (region_copy, n_region);
@@ -2337,11 +2355,12 @@ duplicate_thread_path (edge entry, edge exit,
 
   edge e = make_edge (region_copy[n_region - 1], exit->dest, EDGE_FALLTHRU);
 
-  if (e) {
-    rescan_loop_exit (e, true, false);
-    e->probability = REG_BR_PROB_BASE;
-    e->count = region_copy[n_region - 1]->count;
-  }
+  if (e)
+    {
+      rescan_loop_exit (e, true, false);
+      e->probability = REG_BR_PROB_BASE;
+      e->count = region_copy[n_region - 1]->count;
+    }
 
   /* Redirect the entry and add the phi node arguments.  */
   if (entry->dest == loop->header)