poly_int: REG_OFFSET

[gcc.git] / gcc / tree-ssa-threadupdate.c
diff --git a/gcc/tree-ssa-threadupdate.c b/gcc/tree-ssa-threadupdate.c

index 1ff007a495a1c3c9e504ac11b12bcc81013089a6..7b823d130fac7519d6f264d3065bc1f0d269f7b7 100644 (file)
--- a/gcc/tree-ssa-threadupdate.c
+++ b/gcc/tree-ssa-threadupdate.c
@@ -1,5 +1,5 @@
  /* Thread edges through blocks and update the control flow and SSA graphs.
-   Copyright (C) 2004-2016 Free Software Foundation, Inc.
+   Copyright (C) 2004-2017 Free Software Foundation, Inc.
  
  This file is part of GCC.
  
@@ -34,6 +34,7 @@ along with GCC; see the file COPYING3.  If not see
  #include "cfgloop.h"
  #include "dbgcnt.h"
  #include "tree-cfg.h"
+#include "tree-vectorizer.h"
  
  /* Given a block B, update the CFG and SSA graph to reflect redirecting
     one or more in-edges to B to instead reach the destination of an
@@ -234,12 +235,12 @@ struct ssa_local_info_t
       and sharing a template for that block is considerably more difficult.  */
    basic_block template_block;
  
-  /* TRUE if we thread one or more jumps, FALSE otherwise.  */
-  bool jumps_threaded;
-
    /* Blocks duplicated for the thread.  */
    bitmap duplicate_blocks;
  
+  /* TRUE if we thread one or more jumps, FALSE otherwise.  */
+  bool jumps_threaded;
+
    /* When we have multiple paths through a joiner which reach different
       final destinations, then we may need to correct for potential
       profile insanities.  */
@@ -300,7 +301,10 @@ remove_ctrl_stmt_and_useless_edges (basic_block bb, basic_block dest_bb)
           remove_edge (e);
         }
        else
-       ei_next (&ei);
+       {
+         e->probability = profile_probability::always ();
+         ei_next (&ei);
+       }
      }
  
    /* If the remaining edge is a loop exit, there must have
@@ -335,8 +339,7 @@ create_block_for_threading (basic_block bb,
      e->aux = NULL;
  
    /* Zero out the profile, since the block is unreachable for now.  */
-  rd->dup_blocks[count]->frequency = 0;
-  rd->dup_blocks[count]->count = 0;
+  rd->dup_blocks[count]->count = profile_count::uninitialized ();
    if (duplicate_blocks)
      bitmap_set_bit (*duplicate_blocks, rd->dup_blocks[count]->index);
  }
@@ -541,11 +544,9 @@ static void
  create_edge_and_update_destination_phis (struct redirection_data *rd,
                                          basic_block bb, int idx)
  {
-  edge e = make_edge (bb, rd->path->last ()->e->dest, EDGE_FALLTHRU);
+  edge e = make_single_succ_edge (bb, rd->path->last ()->e->dest, EDGE_FALLTHRU);
  
    rescan_loop_exit (e, true, false);
-  e->probability = REG_BR_PROB_BASE;
-  e->count = bb->count;
  
    /* We used to copy the thread path here.  That was added in 2007
       and dutifully updated through the representation changes in 2013.
@@ -588,7 +589,7 @@ any_remaining_duplicated_blocks (vec<jump_thread_edge *> *path,
  }
  
  
-/* Compute the amount of profile count/frequency coming into the jump threading
+/* Compute the amount of profile count coming into the jump threading
     path stored in RD that we are duplicating, returned in PATH_IN_COUNT_PTR and
     PATH_IN_FREQ_PTR, as well as the amount of counts flowing out of the
     duplicated path, returned in PATH_OUT_COUNT_PTR.  LOCAL_INFO is used to
@@ -596,7 +597,7 @@ any_remaining_duplicated_blocks (vec<jump_thread_edge *> *path,
     edges that need to be ignored in the analysis.  Return true if path contains
     a joiner, false otherwise.
  
-   In the non-joiner case, this is straightforward - all the counts/frequency
+   In the non-joiner case, this is straightforward - all the counts
     flowing into the jump threading path should flow through the duplicated
     block and out of the duplicated path.
  
@@ -689,17 +690,15 @@ any_remaining_duplicated_blocks (vec<jump_thread_edge *> *path,
  static bool
  compute_path_counts (struct redirection_data *rd,
                      ssa_local_info_t *local_info,
-                    gcov_type *path_in_count_ptr,
-                    gcov_type *path_out_count_ptr,
-                    int *path_in_freq_ptr)
+                    profile_count *path_in_count_ptr,
+                    profile_count *path_out_count_ptr)
  {
    edge e = rd->incoming_edges->e;
    vec<jump_thread_edge *> *path = THREAD_PATH (e);
    edge elast = path->last ()->e;
-  gcov_type nonpath_count = 0;
+  profile_count nonpath_count = profile_count::zero ();
    bool has_joiner = false;
-  gcov_type path_in_count = 0;
-  int path_in_freq = 0;
+  profile_count path_in_count = profile_count::zero ();
  
    /* Start by accumulating incoming edge counts to the path's first bb
       into a couple buckets:
@@ -718,7 +717,7 @@ compute_path_counts (struct redirection_data *rd,
       below to add up the counts of the other edges not included in this jump
       threading path.  */
    struct el *next, *el;
-  bitmap in_edge_srcs = BITMAP_ALLOC (NULL);
+  auto_bitmap in_edge_srcs;
    for (el = rd->incoming_edges; el; el = next)
      {
        next = el->next;
@@ -738,31 +737,24 @@ compute_path_counts (struct redirection_data *rd,
              same last path edge in the case where the last edge has a nocopy
              source block.  */
           gcc_assert (ein_path->last ()->e == elast);
-         path_in_count += ein->count;
-         path_in_freq += EDGE_FREQUENCY (ein);
+         path_in_count += ein->count ();
         }
        else if (!ein_path)
         {
           /* Keep track of the incoming edges that are not on any jump-threading
              path.  These counts will still flow out of original path after all
              jump threading is complete.  */
-           nonpath_count += ein->count;
+           nonpath_count += ein->count ();
         }
      }
  
-  /* This is needed due to insane incoming frequencies.  */
-  if (path_in_freq > BB_FREQ_MAX)
-    path_in_freq = BB_FREQ_MAX;
-
-  BITMAP_FREE (in_edge_srcs);
-
    /* Now compute the fraction of the total count coming into the first
       path bb that is from the current threading path.  */
-  gcov_type total_count = e->dest->count;
+  profile_count total_count = e->dest->count;
    /* Handle incoming profile insanities.  */
    if (total_count < path_in_count)
      path_in_count = total_count;
-  int onpath_scale = GCOV_COMPUTE_SCALE (path_in_count, total_count);
+  profile_probability onpath_scale = path_in_count.probability_in (total_count);
  
    /* Walk the entire path to do some more computation in order to estimate
       how much of the path_in_count will flow out of the duplicated threading
@@ -783,16 +775,16 @@ compute_path_counts (struct redirection_data *rd,
       nonpath_count with any additional counts coming into the path.  Other
       blocks along the path may have additional predecessors from outside
       the path.  */
-  gcov_type path_out_count = path_in_count;
-  gcov_type min_path_count = path_in_count;
+  profile_count path_out_count = path_in_count;
+  profile_count min_path_count = path_in_count;
    for (unsigned int i = 1; i < path->length (); i++)
      {
        edge epath = (*path)[i]->e;
-      gcov_type cur_count = epath->count;
+      profile_count cur_count = epath->count ();
        if ((*path)[i]->type == EDGE_COPY_SRC_JOINER_BLOCK)
         {
           has_joiner = true;
-         cur_count = apply_probability (cur_count, onpath_scale);
+         cur_count = cur_count.apply_probability (onpath_scale);
         }
        /* In the joiner case we need to update nonpath_count for any edges
          coming into the path that will contribute to the count flowing
@@ -808,13 +800,13 @@ compute_path_counts (struct redirection_data *rd,
                      they are redirected by an invocation of this routine.  */
                   && !bitmap_bit_p (local_info->duplicate_blocks,
                                     ein->src->index))
-               nonpath_count += ein->count;
+               nonpath_count += ein->count ();
             }
         }
        if (cur_count < path_out_count)
         path_out_count = cur_count;
-      if (epath->count < min_path_count)
-       min_path_count = epath->count;
+      if (epath->count () < min_path_count)
+       min_path_count = epath->count ();
      }
  
    /* We computed path_out_count above assuming that this path targeted
@@ -829,12 +821,12 @@ compute_path_counts (struct redirection_data *rd,
       (since any path through the joiner with a different elast will not
       include a copy of this elast in its duplicated path).
       So ensure that this path's path_out_count is at least the
-     difference between elast->count and nonpath_count.  Otherwise the edge
+     difference between elast->count () and nonpath_count.  Otherwise the edge
       counts after threading will not be sane.  */
    if (local_info->need_profile_correction
-      && has_joiner && path_out_count < elast->count - nonpath_count)
+      && has_joiner && path_out_count < elast->count () - nonpath_count)
      {
-      path_out_count = elast->count - nonpath_count;
+      path_out_count = elast->count () - nonpath_count;
        /* But neither can we go above the minimum count along the path
          we are duplicating.  This can be an issue due to profile
          insanities coming in to this pass.  */
@@ -844,268 +836,101 @@ compute_path_counts (struct redirection_data *rd,
  
    *path_in_count_ptr = path_in_count;
    *path_out_count_ptr = path_out_count;
-  *path_in_freq_ptr = path_in_freq;
    return has_joiner;
  }
  
  
  /* Update the counts and frequencies for both an original path
     edge EPATH and its duplicate EDUP.  The duplicate source block
-   will get a count/frequency of PATH_IN_COUNT and PATH_IN_FREQ,
+   will get a count of PATH_IN_COUNT and PATH_IN_FREQ,
     and the duplicate edge EDUP will have a count of PATH_OUT_COUNT.  */
  static void
-update_profile (edge epath, edge edup, gcov_type path_in_count,
-               gcov_type path_out_count, int path_in_freq)
+update_profile (edge epath, edge edup, profile_count path_in_count,
+               profile_count path_out_count)
  {
  
-  /* First update the duplicated block's count / frequency.  */
+  /* First update the duplicated block's count.  */
    if (edup)
      {
        basic_block dup_block = edup->src;
-      gcc_assert (dup_block->count == 0);
-      gcc_assert (dup_block->frequency == 0);
+
+      /* Edup's count is reduced by path_out_count.  We need to redistribute
+         probabilities to the remaining edges.  */
+
+      edge esucc;
+      edge_iterator ei;
+      profile_probability edup_prob
+        = path_out_count.probability_in (path_in_count);
+
+      /* Either scale up or down the remaining edges.
+        probabilities are always in range <0,1> and thus we can't do
+        both by same loop.  */
+      if (edup->probability > edup_prob)
+       {
+          profile_probability rev_scale
+            = (profile_probability::always () - edup->probability)
+              / (profile_probability::always () - edup_prob);
+          FOR_EACH_EDGE (esucc, ei, dup_block->succs)
+            if (esucc != edup)
+              esucc->probability /= rev_scale;
+       }
+      else if (edup->probability < edup_prob)
+       {
+          profile_probability scale
+            = (profile_probability::always () - edup_prob)
+              / (profile_probability::always () - edup->probability);
+         FOR_EACH_EDGE (esucc, ei, dup_block->succs)
+           if (esucc != edup)
+             esucc->probability *= scale;
+       }
+      if (edup_prob.initialized_p ())
+        edup->probability = edup_prob;
+
+      gcc_assert (!dup_block->count.initialized_p ());
        dup_block->count = path_in_count;
-      dup_block->frequency = path_in_freq;
      }
  
-  /* Now update the original block's count and frequency in the
+  if (path_in_count == profile_count::zero ())
+    return;
+
+  profile_count final_count = epath->count () - path_out_count;
+
+  /* Now update the original block's count in the
       opposite manner - remove the counts/freq that will flow
       into the duplicated block.  Handle underflow due to precision/
       rounding issues.  */
    epath->src->count -= path_in_count;
-  if (epath->src->count < 0)
-    epath->src->count = 0;
-  epath->src->frequency -= path_in_freq;
-  if (epath->src->frequency < 0)
-    epath->src->frequency = 0;
  
    /* Next update this path edge's original and duplicated counts.  We know
       that the duplicated path will have path_out_count flowing
       out of it (in the joiner case this is the count along the duplicated path
       out of the duplicated joiner).  This count can then be removed from the
       original path edge.  */
-  if (edup)
-    edup->count = path_out_count;
-  epath->count -= path_out_count;
-  gcc_assert (epath->count >= 0);
-}
-
-
-/* The duplicate and original joiner blocks may end up with different
-   probabilities (different from both the original and from each other).
-   Recompute the probabilities here once we have updated the edge
-   counts and frequencies.  */
  
-static void
-recompute_probabilities (basic_block bb)
-{
    edge esucc;
    edge_iterator ei;
-  FOR_EACH_EDGE (esucc, ei, bb->succs)
-    {
-      if (!bb->count)
-       continue;
-
-      /* Prevent overflow computation due to insane profiles.  */
-      if (esucc->count < bb->count)
-       esucc->probability = GCOV_COMPUTE_SCALE (esucc->count,
-                                                bb->count);
-      else
-       /* Can happen with missing/guessed probabilities, since we
-          may determine that more is flowing along duplicated
-          path than joiner succ probabilities allowed.
-          Counts and freqs will be insane after jump threading,
-          at least make sure probability is sane or we will
-          get a flow verification error.
-          Not much we can do to make counts/freqs sane without
-          redoing the profile estimation.  */
-       esucc->probability = REG_BR_PROB_BASE;
-    }
-}
-
-
-/* Update the counts of the original and duplicated edges from a joiner
-   that go off path, given that we have already determined that the
-   duplicate joiner DUP_BB has incoming count PATH_IN_COUNT and
-   outgoing count along the path PATH_OUT_COUNT.  The original (on-)path
-   edge from joiner is EPATH.  */
-
-static void
-update_joiner_offpath_counts (edge epath, basic_block dup_bb,
-                             gcov_type path_in_count,
-                             gcov_type path_out_count)
-{
-  /* Compute the count that currently flows off path from the joiner.
-     In other words, the total count of joiner's out edges other than
-     epath.  Compute this by walking the successors instead of
-     subtracting epath's count from the joiner bb count, since there
-     are sometimes slight insanities where the total out edge count is
-     larger than the bb count (possibly due to rounding/truncation
-     errors).  */
-  gcov_type total_orig_off_path_count = 0;
-  edge enonpath;
-  edge_iterator ei;
-  FOR_EACH_EDGE (enonpath, ei, epath->src->succs)
-    {
-      if (enonpath == epath)
-       continue;
-      total_orig_off_path_count += enonpath->count;
-    }
-
-  /* For the path that we are duplicating, the amount that will flow
-     off path from the duplicated joiner is the delta between the
-     path's cumulative in count and the portion of that count we
-     estimated above as flowing from the joiner along the duplicated
-     path.  */
-  gcov_type total_dup_off_path_count = path_in_count - path_out_count;
-
-  /* Now do the actual updates of the off-path edges.  */
-  FOR_EACH_EDGE (enonpath, ei, epath->src->succs)
-    {
-      /* Look for edges going off of the threading path.  */
-      if (enonpath == epath)
-       continue;
-
-      /* Find the corresponding edge out of the duplicated joiner.  */
-      edge enonpathdup = find_edge (dup_bb, enonpath->dest);
-      gcc_assert (enonpathdup);
-
-      /* We can't use the original probability of the joiner's out
-        edges, since the probabilities of the original branch
-        and the duplicated branches may vary after all threading is
-        complete.  But apportion the duplicated joiner's off-path
-        total edge count computed earlier (total_dup_off_path_count)
-        among the duplicated off-path edges based on their original
-        ratio to the full off-path count (total_orig_off_path_count).
-        */
-      int scale = GCOV_COMPUTE_SCALE (enonpath->count,
-                                     total_orig_off_path_count);
-      /* Give the duplicated offpath edge a portion of the duplicated
-        total.  */
-      enonpathdup->count = apply_scale (scale,
-                                       total_dup_off_path_count);
-      /* Now update the original offpath edge count, handling underflow
-        due to rounding errors.  */
-      enonpath->count -= enonpathdup->count;
-      if (enonpath->count < 0)
-       enonpath->count = 0;
-    }
-}
-
-
-/* Check if the paths through RD all have estimated frequencies but zero
-   profile counts.  This is more accurate than checking the entry block
-   for a zero profile count, since profile insanities sometimes creep in.  */
-
-static bool
-estimated_freqs_path (struct redirection_data *rd)
-{
-  edge e = rd->incoming_edges->e;
-  vec<jump_thread_edge *> *path = THREAD_PATH (e);
-  edge ein;
-  edge_iterator ei;
-  bool non_zero_freq = false;
-  FOR_EACH_EDGE (ein, ei, e->dest->preds)
-    {
-      if (ein->count)
-       return false;
-      non_zero_freq |= ein->src->frequency != 0;
-    }
-
-  for (unsigned int i = 1; i < path->length (); i++)
-    {
-      edge epath = (*path)[i]->e;
-      if (epath->src->count)
-       return false;
-      non_zero_freq |= epath->src->frequency != 0;
-      edge esucc;
-      FOR_EACH_EDGE (esucc, ei, epath->src->succs)
-       {
-         if (esucc->count)
-           return false;
-         non_zero_freq |= esucc->src->frequency != 0;
-       }
-    }
-  return non_zero_freq;
-}
-
-
-/* Invoked for routines that have guessed frequencies and no profile
-   counts to record the block and edge frequencies for paths through RD
-   in the profile count fields of those blocks and edges.  This is because
-   ssa_fix_duplicate_block_edges incrementally updates the block and
-   edge counts as edges are redirected, and it is difficult to do that
-   for edge frequencies which are computed on the fly from the source
-   block frequency and probability.  When a block frequency is updated
-   its outgoing edge frequencies are affected and become difficult to
-   adjust.  */
-
-static void
-freqs_to_counts_path (struct redirection_data *rd)
-{
-  edge e = rd->incoming_edges->e;
-  vec<jump_thread_edge *> *path = THREAD_PATH (e);
-  edge ein;
-  edge_iterator ei;
-  FOR_EACH_EDGE (ein, ei, e->dest->preds)
-    {
-      /* Scale up the frequency by REG_BR_PROB_BASE, to avoid rounding
-        errors applying the probability when the frequencies are very
-        small.  */
-      ein->count = apply_probability (ein->src->frequency * REG_BR_PROB_BASE,
-                                     ein->probability);
-    }
+  profile_probability epath_prob = final_count.probability_in (epath->src->count);
  
-  for (unsigned int i = 1; i < path->length (); i++)
+  if (epath->probability > epath_prob)
      {
-      edge epath = (*path)[i]->e;
-      edge esucc;
-      /* Scale up the frequency by REG_BR_PROB_BASE, to avoid rounding
-        errors applying the edge probability when the frequencies are very
-        small.  */
-      epath->src->count = epath->src->frequency * REG_BR_PROB_BASE;
-      FOR_EACH_EDGE (esucc, ei, epath->src->succs)
-       esucc->count = apply_probability (esucc->src->count,
-                                         esucc->probability);
+       profile_probability rev_scale
+        = (profile_probability::always () - epath->probability)
+          / (profile_probability::always () - epath_prob);
+       FOR_EACH_EDGE (esucc, ei, epath->src->succs)
+        if (esucc != epath)
+          esucc->probability /= rev_scale;
      }
-}
-
-
-/* For routines that have guessed frequencies and no profile counts, where we
-   used freqs_to_counts_path to record block and edge frequencies for paths
-   through RD, we clear the counts after completing all updates for RD.
-   The updates in ssa_fix_duplicate_block_edges are based off the count fields,
-   but the block frequencies and edge probabilities were updated as well,
-   so we can simply clear the count fields.  */
-
-static void
-clear_counts_path (struct redirection_data *rd)
-{
-  edge e = rd->incoming_edges->e;
-  vec<jump_thread_edge *> *path = THREAD_PATH (e);
-  edge ein, esucc;
-  edge_iterator ei;
-  FOR_EACH_EDGE (ein, ei, e->dest->preds)
-    ein->count = 0;
-
-  /* First clear counts along original path.  */
-  for (unsigned int i = 1; i < path->length (); i++)
+  else if (epath->probability < epath_prob)
      {
-      edge epath = (*path)[i]->e;
+       profile_probability scale
+        = (profile_probability::always () - epath_prob)
+          / (profile_probability::always () - epath->probability);
        FOR_EACH_EDGE (esucc, ei, epath->src->succs)
-       esucc->count = 0;
-      epath->src->count = 0;
-    }
-  /* Also need to clear the counts along duplicated path.  */
-  for (unsigned int i = 0; i < 2; i++)
-    {
-      basic_block dup = rd->dup_blocks[i];
-      if (!dup)
-       continue;
-      FOR_EACH_EDGE (esucc, ei, dup->succs)
-       esucc->count = 0;
-      dup->count = 0;
+       if (esucc != epath)
+         esucc->probability *= scale;
      }
+  if (epath_prob.initialized_p ())
+    epath->probability = epath_prob;
  }
  
  /* Wire up the outgoing edges from the duplicate blocks and
@@ -1119,23 +944,8 @@ ssa_fix_duplicate_block_edges (struct redirection_data *rd,
    edge e = rd->incoming_edges->e;
    vec<jump_thread_edge *> *path = THREAD_PATH (e);
    edge elast = path->last ()->e;
-  gcov_type path_in_count = 0;
-  gcov_type path_out_count = 0;
-  int path_in_freq = 0;
-
-  /* This routine updates profile counts, frequencies, and probabilities
-     incrementally. Since it is difficult to do the incremental updates
-     using frequencies/probabilities alone, for routines without profile
-     data we first take a snapshot of the existing block and edge frequencies
-     by copying them into the empty profile count fields.  These counts are
-     then used to do the incremental updates, and cleared at the end of this
-     routine.  If the function is marked as having a profile, we still check
-     to see if the paths through RD are using estimated frequencies because
-     the routine had zero profile counts.  */
-  bool do_freqs_to_counts = (profile_status_for_fn (cfun) != PROFILE_READ
-                            || estimated_freqs_path (rd));
-  if (do_freqs_to_counts)
-    freqs_to_counts_path (rd);
+  profile_count path_in_count = profile_count::zero ();
+  profile_count path_out_count = profile_count::zero ();
  
    /* First determine how much profile count to move from original
       path to the duplicate path.  This is tricky in the presence of
@@ -1144,10 +954,8 @@ ssa_fix_duplicate_block_edges (struct redirection_data *rd,
       non-joiner case the path_in_count and path_out_count should be the
       same.  */
    bool has_joiner = compute_path_counts (rd, local_info,
-                                        &path_in_count, &path_out_count,
-                                        &path_in_freq);
+                                        &path_in_count, &path_out_count);
  
-  int cur_path_freq = path_in_freq;
    for (unsigned int count = 0, i = 1; i < path->length (); i++)
      {
        edge epath = (*path)[i]->e;
@@ -1213,30 +1021,14 @@ ssa_fix_duplicate_block_edges (struct redirection_data *rd,
                 }
             }
  
-         /* Update the counts and frequency of both the original block
+         /* Update the counts of both the original block
              and path edge, and the duplicates.  The path duplicate's
-            incoming count and frequency are the totals for all edges
+            incoming count are the totals for all edges
              incoming to this jump threading path computed earlier.
              And we know that the duplicated path will have path_out_count
              flowing out of it (i.e. along the duplicated path out of the
              duplicated joiner).  */
-         update_profile (epath, e2, path_in_count, path_out_count,
-                         path_in_freq);
-
-         /* Next we need to update the counts of the original and duplicated
-            edges from the joiner that go off path.  */
-         update_joiner_offpath_counts (epath, e2->src, path_in_count,
-                                       path_out_count);
-
-         /* Finally, we need to set the probabilities on the duplicated
-            edges out of the duplicated joiner (e2->src).  The probabilities
-            along the original path will all be updated below after we finish
-            processing the whole path.  */
-         recompute_probabilities (e2->src);
-
-         /* Record the frequency flowing to the downstream duplicated
-            path blocks.  */
-         cur_path_freq = EDGE_FREQUENCY (e2);
+         update_profile (epath, e2, path_in_count, path_out_count);
         }
        else if ((*path)[i]->type == EDGE_COPY_SRC_BLOCK)
         {
@@ -1246,7 +1038,7 @@ ssa_fix_duplicate_block_edges (struct redirection_data *rd,
           if (count == 1)
             single_succ_edge (rd->dup_blocks[1])->aux = NULL;
  
-         /* Update the counts and frequency of both the original block
+         /* Update the counts of both the original block
              and path edge, and the duplicates.  Since we are now after
              any joiner that may have existed on the path, the count
              flowing along the duplicated threaded path is path_out_count.
@@ -1256,8 +1048,7 @@ ssa_fix_duplicate_block_edges (struct redirection_data *rd,
              been updated at the end of that handling to the edge frequency
              along the duplicated joiner path edge.  */
           update_profile (epath, EDGE_SUCC (rd->dup_blocks[count], 0),
-                         path_out_count, path_out_count,
-                         cur_path_freq);
+                         path_out_count, path_out_count);
         }
        else
         {
@@ -1274,8 +1065,7 @@ ssa_fix_duplicate_block_edges (struct redirection_data *rd,
              thread path (path_in_freq).  If we had a joiner, it would have
              been updated at the end of that handling to the edge frequency
              along the duplicated joiner path edge.  */
-          update_profile (epath, NULL, path_out_count, path_out_count,
-                          cur_path_freq);
+          update_profile (epath, NULL, path_out_count, path_out_count);
         }
  
        /* Increment the index into the duplicated path when we processed
@@ -1286,19 +1076,6 @@ ssa_fix_duplicate_block_edges (struct redirection_data *rd,
           count++;
         }
      }
-
-  /* Now walk orig blocks and update their probabilities, since the
-     counts and freqs should be updated properly by above loop.  */
-  for (unsigned int i = 1; i < path->length (); i++)
-    {
-      edge epath = (*path)[i]->e;
-      recompute_probabilities (epath->src);
-    }
-
-  /* Done with all profile and frequency updates, clear counts if they
-     were copied.  */
-  if (do_freqs_to_counts)
-    clear_counts_path (rd);
  }
  
  /* Hash table traversal callback routine to create duplicate blocks.  */
@@ -1531,10 +1308,8 @@ thread_block_1 (basic_block bb, bool noloop_only, bool joiners)
              threading path that crosses loop boundaries.  We do not try
              and thread this elsewhere, so just cancel the jump threading
              request by clearing the AUX field now.  */
-         if ((bb->loop_father != e2->src->loop_father
-              && !loop_exit_edge_p (e2->src->loop_father, e2))
-             || (e2->src->loop_father != e2->dest->loop_father
-                 && !loop_exit_edge_p (e2->src->loop_father, e2)))
+         if (bb->loop_father != e2->src->loop_father
+             && !loop_exit_edge_p (e2->src->loop_father, e2))
             {
               /* Since this case is not handled by our special code
                  to thread through a loop header, we must explicitly
@@ -1558,6 +1333,31 @@ thread_block_1 (basic_block bb, bool noloop_only, bool joiners)
  
           if (i != path->length ())
             continue;
+
+         /* Loop parallelization can be confused by the result of
+            threading through the loop exit test back into the loop.
+            However, theading those jumps seems to help other codes.
+
+            I have been unable to find anything related to the shape of
+            the CFG, the contents of the affected blocks, etc which would
+            allow a more sensible test than what we're using below which
+            merely avoids the optimization when parallelizing loops.  */
+         if (flag_tree_parallelize_loops > 1)
+           {
+             for (i = 1; i < path->length (); i++)
+               if (bb->loop_father == e2->src->loop_father
+                   && loop_exits_from_bb_p (bb->loop_father,
+                                            (*path)[i]->e->src)
+                   && !loop_exit_edge_p (bb->loop_father, e2))
+                 break;
+
+             if (i != path->length ())
+               {
+                 delete_jump_thread_path (path);
+                 e->aux = NULL;
+                 continue;
+               }
+           }
         }
  
        /* Insert the outgoing edge into the hash table if it is not
@@ -1937,6 +1737,31 @@ phi_args_equal_on_edges (edge e1, edge e2)
    return true;
  }
  
+/* Return the number of non-debug statements and non-virtual PHIs in a
+   block.  */
+
+static unsigned int
+count_stmts_and_phis_in_block (basic_block bb)
+{
+  unsigned int num_stmts = 0;
+
+  gphi_iterator gpi;
+  for (gpi = gsi_start_phis (bb); !gsi_end_p (gpi); gsi_next (&gpi))
+    if (!virtual_operand_p (PHI_RESULT (gpi.phi ())))
+      num_stmts++;
+
+  gimple_stmt_iterator gsi;
+  for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
+    {
+      gimple *stmt = gsi_stmt (gsi);
+      if (!is_gimple_debug (stmt))
+        num_stmts++;
+    }
+
+  return num_stmts;
+}
+
+
  /* Walk through the registered jump threads and convert them into a
     form convenient for this pass.
  
@@ -1955,7 +1780,7 @@ mark_threaded_blocks (bitmap threaded_blocks)
  {
    unsigned int i;
    bitmap_iterator bi;
-  bitmap tmp = BITMAP_ALLOC (NULL);
+  auto_bitmap tmp;
    basic_block bb;
    edge e;
    edge_iterator ei;
@@ -2056,80 +1881,55 @@ mark_threaded_blocks (bitmap threaded_blocks)
         }
      }
  
-  /* If optimizing for size, only thread through block if we don't have
-     to duplicate it or it's an otherwise empty redirection block.  */
-  if (optimize_function_for_size_p (cfun))
-    {
-      EXECUTE_IF_SET_IN_BITMAP (tmp, 0, i, bi)
-       {
-         bb = BASIC_BLOCK_FOR_FN (cfun, i);
-         if (EDGE_COUNT (bb->preds) > 1
-             && !redirection_block_p (bb))
-           {
-             FOR_EACH_EDGE (e, ei, bb->preds)
-               {
-                 if (e->aux)
-                   {
-                     vec<jump_thread_edge *> *path = THREAD_PATH (e);
-                     delete_jump_thread_path (path);
-                     e->aux = NULL;
-                   }
-               }
-           }
-         else
-           bitmap_set_bit (threaded_blocks, i);
-       }
-    }
-  else
-    bitmap_copy (threaded_blocks, tmp);
+  /* When optimizing for size, prune all thread paths where statement
+     duplication is necessary.
  
-  /* Look for jump threading paths which cross multiple loop headers.
+     We walk the jump thread path looking for copied blocks.  There's
+     two types of copied blocks.
  
-     The code to thread through loop headers will change the CFG in ways
-     that break assumptions made by the loop optimization code.
+       EDGE_COPY_SRC_JOINER_BLOCK is always copied and thus we will
+       cancel the jump threading request when optimizing for size.
  
-     We don't want to blindly cancel the requests.  We can instead do better
-     by trimming off the end of the jump thread path.  */
-  EXECUTE_IF_SET_IN_BITMAP (tmp, 0, i, bi)
+       EDGE_COPY_SRC_BLOCK which is copied, but some of its statements
+       will be killed by threading.  If threading does not kill all of
+       its statements, then we should cancel the jump threading request
+       when optimizing for size.  */
+  if (optimize_function_for_size_p (cfun))
      {
-      basic_block bb = BASIC_BLOCK_FOR_FN (cfun, i);
-      FOR_EACH_EDGE (e, ei, bb->preds)
+      EXECUTE_IF_SET_IN_BITMAP (tmp, 0, i, bi)
         {
-         if (e->aux)
-           {
-             vec<jump_thread_edge *> *path = THREAD_PATH (e);
-
-             for (unsigned int i = 0, crossed_headers = 0;
-                  i < path->length ();
-                  i++)
-               {
-                 basic_block dest = (*path)[i]->e->dest;
-                 crossed_headers += (dest == dest->loop_father->header);
-                 if (crossed_headers > 1)
-                   {
-                     /* Trim from entry I onwards.  */
-                     for (unsigned int j = i; j < path->length (); j++)
-                       delete (*path)[j];
-                     path->truncate (i);
-
-                     /* Now that we've truncated the path, make sure
-                        what's left is still valid.   We need at least
-                        two edges on the path and the last edge can not
-                        be a joiner.  This should never happen, but let's
-                        be safe.  */
-                     if (path->length () < 2
-                         || (path->last ()->type
-                             == EDGE_COPY_SRC_JOINER_BLOCK))
-                       {
-                         delete_jump_thread_path (path);
-                         e->aux = NULL;
-                       }
+         FOR_EACH_EDGE (e, ei, BASIC_BLOCK_FOR_FN (cfun, i)->preds)
+           if (e->aux)
+             {
+               vec<jump_thread_edge *> *path = THREAD_PATH (e);
+
+               unsigned int j;
+               for (j = 1; j < path->length (); j++)
+                 {
+                   bb = (*path)[j]->e->src;
+                   if (redirection_block_p (bb))
+                     ;
+                   else if ((*path)[j]->type == EDGE_COPY_SRC_JOINER_BLOCK
+                            || ((*path)[j]->type == EDGE_COPY_SRC_BLOCK
+                                && (count_stmts_and_phis_in_block (bb)
+                                    != estimate_threading_killed_stmts (bb))))
                       break;
-                   }
-               }
-           }
+                 }
+
+               if (j != path->length ())
+                 {
+                   if (dump_file && (dump_flags & TDF_DETAILS))
+                     dump_jump_thread_path (dump_file, *path, 0);
+                   delete_jump_thread_path (path);
+                   e->aux = NULL;
+                 }
+               else
+                 bitmap_set_bit (threaded_blocks, i);
+             }
         }
      }
+  else
+    bitmap_copy (threaded_blocks, tmp);
  
    /* If we have a joiner block (J) which has two successors S1 and S2 and
       we are threading though S1 and the final destination of the thread
@@ -2175,7 +1975,45 @@ mark_threaded_blocks (bitmap threaded_blocks)
         }
      }
  
-  BITMAP_FREE (tmp);
+  /* Look for jump threading paths which cross multiple loop headers.
+
+     The code to thread through loop headers will change the CFG in ways
+     that invalidate the cached loop iteration information.  So we must
+     detect that case and wipe the cached information.  */
+  EXECUTE_IF_SET_IN_BITMAP (tmp, 0, i, bi)
+    {
+      basic_block bb = BASIC_BLOCK_FOR_FN (cfun, i);
+      FOR_EACH_EDGE (e, ei, bb->preds)
+       {
+         if (e->aux)
+           {
+             vec<jump_thread_edge *> *path = THREAD_PATH (e);
+
+             for (unsigned int i = 0, crossed_headers = 0;
+                  i < path->length ();
+                  i++)
+               {
+                 basic_block dest = (*path)[i]->e->dest;
+                 basic_block src = (*path)[i]->e->src;
+                 /* If we enter a loop.  */
+                 if (flow_loop_nested_p (src->loop_father, dest->loop_father))
+                   ++crossed_headers;
+                 /* If we step from a block outside an irreducible region
+                    to a block inside an irreducible region, then we have
+                    crossed into a loop.  */
+                 else if (! (src->flags & BB_IRREDUCIBLE_LOOP)
+                          && (dest->flags & BB_IRREDUCIBLE_LOOP))
+                     ++crossed_headers;
+                 if (crossed_headers > 1)
+                   {
+                     vect_free_loop_info_assumptions
+                       ((*path)[path->length () - 1]->e->dest->loop_father);
+                     break;
+                   }
+               }
+           }
+       }
+    }
  }
  
  
@@ -2210,23 +2048,17 @@ bb_in_bbs (basic_block bb, basic_block *bbs, int n)
     and create a single fallthru edge pointing to the same destination as the
     EXIT edge.
  
-   The new basic blocks are stored to REGION_COPY in the same order as they had
-   in REGION, provided that REGION_COPY is not NULL.
-
     Returns false if it is unable to copy the region, true otherwise.  */
  
  static bool
-duplicate_thread_path (edge entry, edge exit,
-                      basic_block *region, unsigned n_region,
-                      basic_block *region_copy)
+duplicate_thread_path (edge entry, edge exit, basic_block *region,
+                      unsigned n_region)
  {
    unsigned i;
-  bool free_region_copy = false;
    struct loop *loop = entry->dest->loop_father;
    edge exit_copy;
    edge redirected;
-  int total_freq = 0, entry_freq = 0;
-  gcov_type total_count = 0, entry_count = 0;
+  profile_count curr_count;
  
    if (!can_copy_bbs_p (region, n_region))
      return false;
@@ -2247,33 +2079,7 @@ duplicate_thread_path (edge entry, edge exit,
  
    set_loop_copy (loop, loop);
  
-  if (!region_copy)
-    {
-      region_copy = XNEWVEC (basic_block, n_region);
-      free_region_copy = true;
-    }
-
-  if (entry->dest->count)
-    {
-      total_count = entry->dest->count;
-      entry_count = entry->count;
-      /* Fix up corner cases, to avoid division by zero or creation of negative
-        frequencies.  */
-      if (entry_count > total_count)
-       entry_count = total_count;
-    }
-  else
-    {
-      total_freq = entry->dest->frequency;
-      entry_freq = EDGE_FREQUENCY (entry);
-      /* Fix up corner cases, to avoid division by zero or creation of negative
-        frequencies.  */
-      if (total_freq == 0)
-       total_freq = 1;
-      else if (entry_freq > total_freq)
-       entry_freq = total_freq;
-    }
-
+  basic_block *region_copy = XNEWVEC (basic_block, n_region);
    copy_bbs (region, n_region, region_copy, &exit, 1, &exit_copy, loop,
             split_edge_bb_loc (entry), false);
  
@@ -2283,17 +2089,44 @@ duplicate_thread_path (edge entry, edge exit,
       invalidating the property that is propagated by executing all the blocks of
       the jump-thread path in order.  */
  
+  curr_count = entry->count ();
+
    for (i = 0; i < n_region; i++)
      {
        edge e;
        edge_iterator ei;
        basic_block bb = region_copy[i];
  
+      /* Watch inconsistent profile.  */
+      if (curr_count > region[i]->count)
+       curr_count = region[i]->count;
+      /* Scale current BB.  */
+      if (region[i]->count.nonzero_p () && curr_count.initialized_p ())
+       {
+         /* In the middle of the path we only scale the frequencies.
+            In last BB we need to update probabilities of outgoing edges
+            because we know which one is taken at the threaded path.  */
+         if (i + 1 != n_region)
+           scale_bbs_frequencies_profile_count (region + i, 1,
+                                                region[i]->count - curr_count,
+                                                region[i]->count);
+         else
+           update_bb_profile_for_threading (region[i],
+                                            curr_count,
+                                            exit);
+         scale_bbs_frequencies_profile_count (region_copy + i, 1, curr_count,
+                                              region_copy[i]->count);
+       }
+
        if (single_succ_p (bb))
         {
           /* Make sure the successor is the next node in the path.  */
           gcc_assert (i + 1 == n_region
                       || region_copy[i + 1] == single_succ_edge (bb)->dest);
+         if (i + 1 != n_region)
+           {
+             curr_count = single_succ_edge (bb)->count ();
+           }
           continue;
         }
  
@@ -2320,22 +2153,12 @@ duplicate_thread_path (edge entry, edge exit,
             if (orig)
               redirect_edge_and_branch_force (e, orig);
           }
+       else
+         {
+           curr_count = e->count ();
+         }
      }
  
-  if (total_count)
-    {
-      scale_bbs_frequencies_gcov_type (region, n_region,
-                                      total_count - entry_count,
-                                      total_count);
-      scale_bbs_frequencies_gcov_type (region_copy, n_region, entry_count,
-                                      total_count);
-    }
-  else
-    {
-      scale_bbs_frequencies_int (region, n_region, total_freq - entry_freq,
-                                total_freq);
-      scale_bbs_frequencies_int (region_copy, n_region, entry_freq, total_freq);
-    }
  
    if (flag_checking)
      verify_jump_thread (region_copy, n_region);
@@ -2350,11 +2173,11 @@ duplicate_thread_path (edge entry, edge exit,
  
    edge e = make_edge (region_copy[n_region - 1], exit->dest, EDGE_FALLTHRU);
  
-  if (e) {
-    rescan_loop_exit (e, true, false);
-    e->probability = REG_BR_PROB_BASE;
-    e->count = region_copy[n_region - 1]->count;
-  }
+  if (e)
+    {
+      rescan_loop_exit (e, true, false);
+      e->probability = profile_probability::always ();
+    }
  
    /* Redirect the entry and add the phi node arguments.  */
    if (entry->dest == loop->header)
@@ -2366,8 +2189,7 @@ duplicate_thread_path (edge entry, edge exit,
    /* Add the other PHI node arguments.  */
    add_phi_args_after_copy (region_copy, n_region, NULL);
  
-  if (free_region_copy)
-    free (region_copy);
+  free (region_copy);
  
    free_original_copy_tables ();
    return true;
@@ -2425,9 +2247,8 @@ thread_through_all_blocks (bool may_peel_loop_headers)
  {
    bool retval = false;
    unsigned int i;
-  bitmap_iterator bi;
-  bitmap threaded_blocks;
    struct loop *loop;
+  auto_bitmap threaded_blocks;
  
    if (!paths.exists ())
      {
@@ -2435,7 +2256,6 @@ thread_through_all_blocks (bool may_peel_loop_headers)
        goto out;
      }
  
-  threaded_blocks = BITMAP_ALLOC (NULL);
    memset (&thread_stats, 0, sizeof (thread_stats));
  
    /* Remove any paths that referenced removed edges.  */
@@ -2493,7 +2313,7 @@ thread_through_all_blocks (bool may_peel_loop_headers)
        for (unsigned int j = 0; j < len - 1; j++)
         region[j] = (*path)[j]->e->dest;
  
-      if (duplicate_thread_path (entry, exit, region, len - 1, NULL))
+      if (duplicate_thread_path (entry, exit, region, len - 1))
         {
           /* We do not update dominance info.  */
           free_dominance_info (CDI_DOMINATORS);
@@ -2530,14 +2350,33 @@ thread_through_all_blocks (bool may_peel_loop_headers)
  
    initialize_original_copy_tables ();
  
-  /* First perform the threading requests that do not affect
-     loop structure.  */
-  EXECUTE_IF_SET_IN_BITMAP (threaded_blocks, 0, i, bi)
-    {
-      basic_block bb = BASIC_BLOCK_FOR_FN (cfun, i);
+  /* The order in which we process jump threads can be important.
+
+     Consider if we have two jump threading paths A and B.  If the
+     target edge of A is the starting edge of B and we thread path A
+     first, then we create an additional incoming edge into B->dest that
+     we can not discover as a jump threading path on this iteration.
+
+     If we instead thread B first, then the edge into B->dest will have
+     already been redirected before we process path A and path A will
+     natually, with no further work, target the redirected path for B.
  
-      if (EDGE_COUNT (bb->preds) > 0)
-       retval |= thread_block (bb, true);
+     An post-order is sufficient here.  Compute the ordering first, then
+     process the blocks.  */
+  if (!bitmap_empty_p (threaded_blocks))
+    {
+      int *postorder = XNEWVEC (int, n_basic_blocks_for_fn (cfun));
+      unsigned int postorder_num = post_order_compute (postorder, false, false);
+      for (unsigned int i = 0; i < postorder_num; i++)
+       {
+         unsigned int indx = postorder[i];
+         if (bitmap_bit_p (threaded_blocks, indx))
+           {
+             basic_block bb = BASIC_BLOCK_FOR_FN (cfun, indx);
+             retval |= thread_block (bb, true);
+           }
+       }
+      free (postorder);
      }
  
    /* Then perform the threading through loop headers.  We start with the
@@ -2552,55 +2391,15 @@ thread_through_all_blocks (bool may_peel_loop_headers)
        retval |= thread_through_loop_header (loop, may_peel_loop_headers);
      }
  
-  /* Any jump threading paths that are still attached to edges at this
-     point must be one of two cases.
-
-     First, we could have a jump threading path which went from outside
-     a loop to inside a loop that was ignored because a prior jump thread
-     across a backedge was realized (which indirectly causes the loop
-     above to ignore the latter thread).  We can detect these because the
-     loop structures will be different and we do not currently try to
-     optimize this case.
-
-     Second, we could be threading across a backedge to a point within the
-     same loop.  This occurrs for the FSA/FSM optimization and we would
-     like to optimize it.  However, we have to be very careful as this
-     may completely scramble the loop structures, with the result being
-     irreducible loops causing us to throw away our loop structure.
-
-     As a compromise for the latter case, if the thread path ends in
-     a block where the last statement is a multiway branch, then go
-     ahead and thread it, else ignore it.  */
+  /* All jump threading paths should have been resolved at this
+     point.  Verify that is the case.  */
    basic_block bb;
-  edge e;
    FOR_EACH_BB_FN (bb, cfun)
      {
-      /* If we do end up threading here, we can remove elements from
-        BB->preds.  Thus we can not use the FOR_EACH_EDGE iterator.  */
-      for (edge_iterator ei = ei_start (bb->preds);
-          (e = ei_safe_edge (ei));)
-       if (e->aux)
-         {
-           vec<jump_thread_edge *> *path = THREAD_PATH (e);
-
-           /* Case 1, threading from outside to inside the loop
-              after we'd already threaded through the header.  */
-           if ((*path)[0]->e->dest->loop_father
-               != path->last ()->e->src->loop_father)
-             {
-               delete_jump_thread_path (path);
-               e->aux = NULL;
-               ei_next (&ei);
-             }
-           else
-             {
-               delete_jump_thread_path (path);
-               e->aux = NULL;
-               ei_next (&ei);
-             }
-         }
-       else
-         ei_next (&ei);
+      edge_iterator ei;
+      edge e;
+      FOR_EACH_EDGE (e, ei, bb->preds)
+       gcc_assert (e->aux == NULL);
      }
  
    statistics_counter_event (cfun, "Jumps threaded",
@@ -2608,8 +2407,6 @@ thread_through_all_blocks (bool may_peel_loop_headers)
  
    free_original_copy_tables ();
  
-  BITMAP_FREE (threaded_blocks);
-  threaded_blocks = NULL;
    paths.release ();
  
    if (retval)
@@ -2621,7 +2418,7 @@ thread_through_all_blocks (bool may_peel_loop_headers)
    return retval;
  }
  
-/* Delete the jump threading path PATH.  We have to explcitly delete
+/* Delete the jump threading path PATH.  We have to explicitly delete
     each entry in the vector, then the container.  */
  
  void
@@ -2682,3 +2479,139 @@ register_jump_thread (vec<jump_thread_edge *> *path)
  
    paths.safe_push (path);
  }
+
+/* Return how many uses of T there are within BB, as long as there
+   aren't any uses outside BB.  If there are any uses outside BB,
+   return -1 if there's at most one use within BB, or -2 if there is
+   more than one use within BB.  */
+
+static int
+uses_in_bb (tree t, basic_block bb)
+{
+  int uses = 0;
+  bool outside_bb = false;
+
+  imm_use_iterator iter;
+  use_operand_p use_p;
+  FOR_EACH_IMM_USE_FAST (use_p, iter, t)
+    {
+      if (is_gimple_debug (USE_STMT (use_p)))
+       continue;
+
+      if (gimple_bb (USE_STMT (use_p)) != bb)
+       outside_bb = true;
+      else
+       uses++;
+
+      if (outside_bb && uses > 1)
+       return -2;
+    }
+
+  if (outside_bb)
+    return -1;
+
+  return uses;
+}
+
+/* Starting from the final control flow stmt in BB, assuming it will
+   be removed, follow uses in to-be-removed stmts back to their defs
+   and count how many defs are to become dead and be removed as
+   well.  */
+
+unsigned int
+estimate_threading_killed_stmts (basic_block bb)
+{
+  int killed_stmts = 0;
+  hash_map<tree, int> ssa_remaining_uses;
+  auto_vec<gimple *, 4> dead_worklist;
+
+  /* If the block has only two predecessors, threading will turn phi
+     dsts into either src, so count them as dead stmts.  */
+  bool drop_all_phis = EDGE_COUNT (bb->preds) == 2;
+
+  if (drop_all_phis)
+    for (gphi_iterator gsi = gsi_start_phis (bb);
+        !gsi_end_p (gsi); gsi_next (&gsi))
+      {
+       gphi *phi = gsi.phi ();
+       tree dst = gimple_phi_result (phi);
+
+       /* We don't count virtual PHIs as stmts in
+          record_temporary_equivalences_from_phis.  */
+       if (virtual_operand_p (dst))
+         continue;
+
+       killed_stmts++;
+      }
+
+  if (gsi_end_p (gsi_last_bb (bb)))
+    return killed_stmts;
+
+  gimple *stmt = gsi_stmt (gsi_last_bb (bb));
+  if (gimple_code (stmt) != GIMPLE_COND
+      && gimple_code (stmt) != GIMPLE_GOTO
+      && gimple_code (stmt) != GIMPLE_SWITCH)
+    return killed_stmts;
+
+  /* The control statement is always dead.  */
+  killed_stmts++;
+  dead_worklist.quick_push (stmt);
+  while (!dead_worklist.is_empty ())
+    {
+      stmt = dead_worklist.pop ();
+
+      ssa_op_iter iter;
+      use_operand_p use_p;
+      FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_USE)
+       {
+         tree t = USE_FROM_PTR (use_p);
+         gimple *def = SSA_NAME_DEF_STMT (t);
+
+         if (gimple_bb (def) == bb
+             && (gimple_code (def) != GIMPLE_PHI
+                 || !drop_all_phis)
+             && !gimple_has_side_effects (def))
+           {
+             int *usesp = ssa_remaining_uses.get (t);
+             int uses;
+
+             if (usesp)
+               uses = *usesp;
+             else
+               uses = uses_in_bb (t, bb);
+
+             gcc_assert (uses);
+
+             /* Don't bother recording the expected use count if we
+                won't find any further uses within BB.  */
+             if (!usesp && (uses < -1 || uses > 1))
+               {
+                 usesp = &ssa_remaining_uses.get_or_insert (t);
+                 *usesp = uses;
+               }
+
+             if (uses < 0)
+               continue;
+
+             --uses;
+             if (usesp)
+               *usesp = uses;
+
+             if (!uses)
+               {
+                 killed_stmts++;
+                 if (usesp)
+                   ssa_remaining_uses.remove (t);
+                 if (gimple_code (def) != GIMPLE_PHI)
+                   dead_worklist.safe_push (def);
+               }
+           }
+       }
+    }
+
+  if (dump_file)
+    fprintf (dump_file, "threading bb %i kills %i stmts\n",
+            bb->index, killed_stmts);
+
+  return killed_stmts;
+}