gcc/tree-ssa-threadupdate.c

   1 /* Thread edges through blocks and update the control flow and SSA graphs.
   2    Copyright (C) 2004-2014 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify
   7 it under the terms of the GNU General Public License as published by
   8 the Free Software Foundation; either version 3, or (at your option)
   9 any later version.
  10
  11 GCC is distributed in the hope that it will be useful,
  12 but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14 GNU General Public License for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 #include "config.h"
  21 #include "system.h"
  22 #include "coretypes.h"
  23 #include "tree.h"
  24 #include "flags.h"
  25 #include "basic-block.h"
  26 #include "function.h"
  27 #include "hash-table.h"
  28 #include "tree-ssa-alias.h"
  29 #include "internal-fn.h"
  30 #include "gimple-expr.h"
  31 #include "is-a.h"
  32 #include "gimple.h"
  33 #include "gimple-iterator.h"
  34 #include "gimple-ssa.h"
  35 #include "tree-phinodes.h"
  36 #include "tree-ssa.h"
  37 #include "tree-ssa-threadupdate.h"
  38 #include "ssa-iterators.h"
  39 #include "dumpfile.h"
  40 #include "cfgloop.h"
  41 #include "dbgcnt.h"
  42 #include "tree-cfg.h"
  43 #include "tree-pass.h"
  44
  45 /* Given a block B, update the CFG and SSA graph to reflect redirecting
  46    one or more in-edges to B to instead reach the destination of an
  47    out-edge from B while preserving any side effects in B.
  48
  49    i.e., given A->B and B->C, change A->B to be A->C yet still preserve the
  50    side effects of executing B.
  51
  52      1. Make a copy of B (including its outgoing edges and statements).  Call
  53         the copy B'.  Note B' has no incoming edges or PHIs at this time.
  54
  55      2. Remove the control statement at the end of B' and all outgoing edges
  56         except B'->C.
  57
  58      3. Add a new argument to each PHI in C with the same value as the existing
  59         argument associated with edge B->C.  Associate the new PHI arguments
  60         with the edge B'->C.
  61
  62      4. For each PHI in B, find or create a PHI in B' with an identical
  63         PHI_RESULT.  Add an argument to the PHI in B' which has the same
  64         value as the PHI in B associated with the edge A->B.  Associate
  65         the new argument in the PHI in B' with the edge A->B.
  66
  67      5. Change the edge A->B to A->B'.
  68
  69         5a. This automatically deletes any PHI arguments associated with the
  70             edge A->B in B.
  71
  72         5b. This automatically associates each new argument added in step 4
  73             with the edge A->B'.
  74
  75      6. Repeat for other incoming edges into B.
  76
  77      7. Put the duplicated resources in B and all the B' blocks into SSA form.
  78
  79    Note that block duplication can be minimized by first collecting the
  80    set of unique destination blocks that the incoming edges should
  81    be threaded to.
  82
  83    We reduce the number of edges and statements we create by not copying all
  84    the outgoing edges and the control statement in step #1.  We instead create
  85    a template block without the outgoing edges and duplicate the template.
  86
  87    Another case this code handles is threading through a "joiner" block.  In
  88    this case, we do not know the destination of the joiner block, but one
  89    of the outgoing edges from the joiner block leads to a threadable path.  This
  90    case largely works as outlined above, except the duplicate of the joiner
  91    block still contains a full set of outgoing edges and its control statement.
  92    We just redirect one of its outgoing edges to our jump threading path.  */
  93
  94
  95 /* Steps #5 and #6 of the above algorithm are best implemented by walking
  96    all the incoming edges which thread to the same destination edge at
  97    the same time.  That avoids lots of table lookups to get information
  98    for the destination edge.
  99
 100    To realize that implementation we create a list of incoming edges
 101    which thread to the same outgoing edge.  Thus to implement steps
 102    #5 and #6 we traverse our hash table of outgoing edge information.
 103    For each entry we walk the list of incoming edges which thread to
 104    the current outgoing edge.  */
 105
 106 struct el
 107 {
 108   edge e;
 109   struct el *next;
 110 };
 111
 112 /* Main data structure recording information regarding B's duplicate
 113    blocks.  */
 114
 115 /* We need to efficiently record the unique thread destinations of this
 116    block and specific information associated with those destinations.  We
 117    may have many incoming edges threaded to the same outgoing edge.  This
 118    can be naturally implemented with a hash table.  */
 119
 120 struct redirection_data : typed_free_remove<redirection_data>
 121 {
 122   /* We support wiring up two block duplicates in a jump threading path.
 123
 124      One is a normal block copy where we remove the control statement
 125      and wire up its single remaining outgoing edge to the thread path.
 126
 127      The other is a joiner block where we leave the control statement
 128      in place, but wire one of the outgoing edges to a thread path.
 129
 130      In theory we could have multiple block duplicates in a jump
 131      threading path, but I haven't tried that.
 132
 133      The duplicate blocks appear in this array in the same order in
 134      which they appear in the jump thread path.  */
 135   basic_block dup_blocks[2];
 136
 137   /* The jump threading path.  */
 138   vec<jump_thread_edge *> *path;
 139
 140   /* A list of incoming edges which we want to thread to the
 141      same path.  */
 142   struct el *incoming_edges;
 143
 144   /* hash_table support.  */
 145   typedef redirection_data value_type;
 146   typedef redirection_data compare_type;
 147   static inline hashval_t hash (const value_type *);
 148   static inline int equal (const value_type *, const compare_type *);
 149 };
 150
 151 /* Dump a jump threading path, including annotations about each
 152    edge in the path.  */
 153
 154 static void
 155 dump_jump_thread_path (FILE *dump_file, vec<jump_thread_edge *> path,
 156                        bool registering)
 157 {
 158   fprintf (dump_file,
 159            "  %s jump thread: (%d, %d) incoming edge; ",
 160            (registering ? "Registering" : "Cancelling"),
 161            path[0]->e->src->index, path[0]->e->dest->index);
 162
 163   for (unsigned int i = 1; i < path.length (); i++)
 164     {
 165       /* We can get paths with a NULL edge when the final destination
 166          of a jump thread turns out to be a constant address.  We dump
 167          those paths when debugging, so we have to be prepared for that
 168          possibility here.  */
 169       if (path[i]->e == NULL)
 170         continue;
 171
 172       if (path[i]->type == EDGE_COPY_SRC_JOINER_BLOCK)
 173         fprintf (dump_file, " (%d, %d) joiner; ",
 174                  path[i]->e->src->index, path[i]->e->dest->index);
 175       if (path[i]->type == EDGE_COPY_SRC_BLOCK)
 176        fprintf (dump_file, " (%d, %d) normal;",
 177                  path[i]->e->src->index, path[i]->e->dest->index);
 178       if (path[i]->type == EDGE_NO_COPY_SRC_BLOCK)
 179        fprintf (dump_file, " (%d, %d) nocopy;",
 180                  path[i]->e->src->index, path[i]->e->dest->index);
 181     }
 182   fputc ('\n', dump_file);
 183 }
 184
 185 /* Simple hashing function.  For any given incoming edge E, we're going
 186    to be most concerned with the final destination of its jump thread
 187    path.  So hash on the block index of the final edge in the path.  */
 188
 189 inline hashval_t
 190 redirection_data::hash (const value_type *p)
 191 {
 192   vec<jump_thread_edge *> *path = p->path;
 193   return path->last ()->e->dest->index;
 194 }
 195
 196 /* Given two hash table entries, return true if they have the same
 197    jump threading path.  */
 198 inline int
 199 redirection_data::equal (const value_type *p1, const compare_type *p2)
 200 {
 201   vec<jump_thread_edge *> *path1 = p1->path;
 202   vec<jump_thread_edge *> *path2 = p2->path;
 203
 204   if (path1->length () != path2->length ())
 205     return false;
 206
 207   for (unsigned int i = 1; i < path1->length (); i++)
 208     {
 209       if ((*path1)[i]->type != (*path2)[i]->type
 210           || (*path1)[i]->e != (*path2)[i]->e)
 211         return false;
 212     }
 213
 214   return true;
 215 }
 216
 217 /* Data structure of information to pass to hash table traversal routines.  */
 218 struct ssa_local_info_t
 219 {
 220   /* The current block we are working on.  */
 221   basic_block bb;
 222
 223   /* We only create a template block for the first duplicated block in a
 224      jump threading path as we may need many duplicates of that block.
 225
 226      The second duplicate block in a path is specific to that path.  Creating
 227      and sharing a template for that block is considerably more difficult.  */
 228   basic_block template_block;
 229
 230   /* TRUE if we thread one or more jumps, FALSE otherwise.  */
 231   bool jumps_threaded;
 232 };
 233
 234 /* Passes which use the jump threading code register jump threading
 235    opportunities as they are discovered.  We keep the registered
 236    jump threading opportunities in this vector as edge pairs
 237    (original_edge, target_edge).  */
 238 static vec<vec<jump_thread_edge *> *> paths;
 239
 240 /* When we start updating the CFG for threading, data necessary for jump
 241    threading is attached to the AUX field for the incoming edge.  Use these
 242    macros to access the underlying structure attached to the AUX field.  */
 243 #define THREAD_PATH(E) ((vec<jump_thread_edge *> *)(E)->aux)
 244
 245 /* Jump threading statistics.  */
 246
 247 struct thread_stats_d
 248 {
 249   unsigned long num_threaded_edges;
 250 };
 251
 252 struct thread_stats_d thread_stats;
 253
 254
 255 /* Remove the last statement in block BB if it is a control statement
 256    Also remove all outgoing edges except the edge which reaches DEST_BB.
 257    If DEST_BB is NULL, then remove all outgoing edges.  */
 258
 259 static void
 260 remove_ctrl_stmt_and_useless_edges (basic_block bb, basic_block dest_bb)
 261 {
 262   gimple_stmt_iterator gsi;
 263   edge e;
 264   edge_iterator ei;
 265
 266   gsi = gsi_last_bb (bb);
 267
 268   /* If the duplicate ends with a control statement, then remove it.
 269
 270      Note that if we are duplicating the template block rather than the
 271      original basic block, then the duplicate might not have any real
 272      statements in it.  */
 273   if (!gsi_end_p (gsi)
 274       && gsi_stmt (gsi)
 275       && (gimple_code (gsi_stmt (gsi)) == GIMPLE_COND
 276           || gimple_code (gsi_stmt (gsi)) == GIMPLE_GOTO
 277           || gimple_code (gsi_stmt (gsi)) == GIMPLE_SWITCH))
 278     gsi_remove (&gsi, true);
 279
 280   for (ei = ei_start (bb->succs); (e = ei_safe_edge (ei)); )
 281     {
 282       if (e->dest != dest_bb)
 283         remove_edge (e);
 284       else
 285         ei_next (&ei);
 286     }
 287 }
 288
 289 /* Create a duplicate of BB.  Record the duplicate block in an array
 290    indexed by COUNT stored in RD.  */
 291
 292 static void
 293 create_block_for_threading (basic_block bb,
 294                             struct redirection_data *rd,
 295                             unsigned int count)
 296 {
 297   edge_iterator ei;
 298   edge e;
 299
 300   /* We can use the generic block duplication code and simply remove
 301      the stuff we do not need.  */
 302   rd->dup_blocks[count] = duplicate_block (bb, NULL, NULL);
 303
 304   FOR_EACH_EDGE (e, ei, rd->dup_blocks[count]->succs)
 305     e->aux = NULL;
 306
 307   /* Zero out the profile, since the block is unreachable for now.  */
 308   rd->dup_blocks[count]->frequency = 0;
 309   rd->dup_blocks[count]->count = 0;
 310 }
 311
 312 /* Main data structure to hold information for duplicates of BB.  */
 313
 314 static hash_table <redirection_data> redirection_data;
 315
 316 /* Given an outgoing edge E lookup and return its entry in our hash table.
 317
 318    If INSERT is true, then we insert the entry into the hash table if
 319    it is not already present.  INCOMING_EDGE is added to the list of incoming
 320    edges associated with E in the hash table.  */
 321
 322 static struct redirection_data *
 323 lookup_redirection_data (edge e, enum insert_option insert)
 324 {
 325   struct redirection_data **slot;
 326   struct redirection_data *elt;
 327   vec<jump_thread_edge *> *path = THREAD_PATH (e);
 328
 329  /* Build a hash table element so we can see if E is already
 330      in the table.  */
 331   elt = XNEW (struct redirection_data);
 332   elt->path = path;
 333   elt->dup_blocks[0] = NULL;
 334   elt->dup_blocks[1] = NULL;
 335   elt->incoming_edges = NULL;
 336
 337   slot = redirection_data.find_slot (elt, insert);
 338
 339   /* This will only happen if INSERT is false and the entry is not
 340      in the hash table.  */
 341   if (slot == NULL)
 342     {
 343       free (elt);
 344       return NULL;
 345     }
 346
 347   /* This will only happen if E was not in the hash table and
 348      INSERT is true.  */
 349   if (*slot == NULL)
 350     {
 351       *slot = elt;
 352       elt->incoming_edges = XNEW (struct el);
 353       elt->incoming_edges->e = e;
 354       elt->incoming_edges->next = NULL;
 355       return elt;
 356     }
 357   /* E was in the hash table.  */
 358   else
 359     {
 360       /* Free ELT as we do not need it anymore, we will extract the
 361          relevant entry from the hash table itself.  */
 362       free (elt);
 363
 364       /* Get the entry stored in the hash table.  */
 365       elt = *slot;
 366
 367       /* If insertion was requested, then we need to add INCOMING_EDGE
 368          to the list of incoming edges associated with E.  */
 369       if (insert)
 370         {
 371           struct el *el = XNEW (struct el);
 372           el->next = elt->incoming_edges;
 373           el->e = e;
 374           elt->incoming_edges = el;
 375         }
 376
 377       return elt;
 378     }
 379 }
 380
 381 /* Similar to copy_phi_args, except that the PHI arg exists, it just
 382    does not have a value associated with it.  */
 383
 384 static void
 385 copy_phi_arg_into_existing_phi (edge src_e, edge tgt_e)
 386 {
 387   int src_idx = src_e->dest_idx;
 388   int tgt_idx = tgt_e->dest_idx;
 389
 390   /* Iterate over each PHI in e->dest.  */
 391   for (gimple_stmt_iterator gsi = gsi_start_phis (src_e->dest),
 392                             gsi2 = gsi_start_phis (tgt_e->dest);
 393        !gsi_end_p (gsi);
 394        gsi_next (&gsi), gsi_next (&gsi2))
 395     {
 396       gimple src_phi = gsi_stmt (gsi);
 397       gimple dest_phi = gsi_stmt (gsi2);
 398       tree val = gimple_phi_arg_def (src_phi, src_idx);
 399       source_location locus = gimple_phi_arg_location (src_phi, src_idx);
 400
 401       SET_PHI_ARG_DEF (dest_phi, tgt_idx, val);
 402       gimple_phi_arg_set_location (dest_phi, tgt_idx, locus);
 403     }
 404 }
 405
 406 /* For each PHI in BB, copy the argument associated with SRC_E to TGT_E.  */
 407
 408 static void
 409 copy_phi_args (basic_block bb, edge src_e, edge tgt_e)
 410 {
 411   gimple_stmt_iterator gsi;
 412   int src_indx = src_e->dest_idx;
 413
 414   for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
 415     {
 416       gimple phi = gsi_stmt (gsi);
 417       source_location locus = gimple_phi_arg_location (phi, src_indx);
 418       add_phi_arg (phi, gimple_phi_arg_def (phi, src_indx), tgt_e, locus);
 419     }
 420 }
 421
 422 /* We have recently made a copy of ORIG_BB, including its outgoing
 423    edges.  The copy is NEW_BB.  Every PHI node in every direct successor of
 424    ORIG_BB has a new argument associated with edge from NEW_BB to the
 425    successor.  Initialize the PHI argument so that it is equal to the PHI
 426    argument associated with the edge from ORIG_BB to the successor.  */
 427
 428 static void
 429 update_destination_phis (basic_block orig_bb, basic_block new_bb)
 430 {
 431   edge_iterator ei;
 432   edge e;
 433
 434   FOR_EACH_EDGE (e, ei, orig_bb->succs)
 435     {
 436       edge e2 = find_edge (new_bb, e->dest);
 437       copy_phi_args (e->dest, e, e2);
 438     }
 439 }
 440
 441 /* Given a duplicate block and its single destination (both stored
 442    in RD).  Create an edge between the duplicate and its single
 443    destination.
 444
 445    Add an additional argument to any PHI nodes at the single
 446    destination.  */
 447
 448 static void
 449 create_edge_and_update_destination_phis (struct redirection_data *rd,
 450                                          basic_block bb)
 451 {
 452   edge e = make_edge (bb, rd->path->last ()->e->dest, EDGE_FALLTHRU);
 453
 454   rescan_loop_exit (e, true, false);
 455   e->probability = REG_BR_PROB_BASE;
 456   e->count = bb->count;
 457
 458   /* We used to copy the thread path here.  That was added in 2007
 459      and dutifully updated through the representation changes in 2013.
 460
 461      In 2013 we added code to thread from an interior node through
 462      the backedge to another interior node.  That runs after the code
 463      to thread through loop headers from outside the loop.
 464
 465      The latter may delete edges in the CFG, including those
 466      which appeared in the jump threading path we copied here.  Thus
 467      we'd end up using a dangling pointer.
 468
 469      After reviewing the 2007/2011 code, I can't see how anything
 470      depended on copying the AUX field and clearly copying the jump
 471      threading path is problematical due to embedded edge pointers.
 472      It has been removed.  */
 473   e->aux = NULL;
 474
 475   /* If there are any PHI nodes at the destination of the outgoing edge
 476      from the duplicate block, then we will need to add a new argument
 477      to them.  The argument should have the same value as the argument
 478      associated with the outgoing edge stored in RD.  */
 479   copy_phi_args (e->dest, rd->path->last ()->e, e);
 480 }
 481
 482 /* Look through PATH beginning at START and return TRUE if there are
 483    any additional blocks that need to be duplicated.  Otherwise,
 484    return FALSE.  */
 485 static bool
 486 any_remaining_duplicated_blocks (vec<jump_thread_edge *> *path,
 487                                  unsigned int start)
 488 {
 489   for (unsigned int i = start + 1; i < path->length (); i++)
 490     {
 491       if ((*path)[i]->type == EDGE_COPY_SRC_JOINER_BLOCK
 492           || (*path)[i]->type == EDGE_COPY_SRC_BLOCK)
 493         return true;
 494     }
 495   return false;
 496 }
 497
 498 /* Wire up the outgoing edges from the duplicate blocks and
 499    update any PHIs as needed.  */
 500 void
 501 ssa_fix_duplicate_block_edges (struct redirection_data *rd,
 502                                ssa_local_info_t *local_info)
 503 {
 504   edge e = rd->incoming_edges->e;
 505   vec<jump_thread_edge *> *path = THREAD_PATH (e);
 506
 507   for (unsigned int count = 0, i = 1; i < path->length (); i++)
 508     {
 509       /* If we were threading through an joiner block, then we want
 510          to keep its control statement and redirect an outgoing edge.
 511          Else we want to remove the control statement & edges, then create
 512          a new outgoing edge.  In both cases we may need to update PHIs.  */
 513       if ((*path)[i]->type == EDGE_COPY_SRC_JOINER_BLOCK)
 514         {
 515           edge victim;
 516           edge e2;
 517
 518           /* This updates the PHIs at the destination of the duplicate
 519              block.  */
 520           update_destination_phis (local_info->bb, rd->dup_blocks[count]);
 521
 522           /* Find the edge from the duplicate block to the block we're
 523              threading through.  That's the edge we want to redirect.  */
 524           victim = find_edge (rd->dup_blocks[count], (*path)[i]->e->dest);
 525
 526           /* If there are no remaining blocks on the path to duplicate,
 527              then redirect VICTIM to the final destination of the jump
 528              threading path.  */
 529           if (!any_remaining_duplicated_blocks (path, i))
 530             {
 531               e2 = redirect_edge_and_branch (victim, path->last ()->e->dest);
 532               e2->count = path->last ()->e->count;
 533               /* If we redirected the edge, then we need to copy PHI arguments
 534                  at the target.  If the edge already existed (e2 != victim
 535                  case), then the PHIs in the target already have the correct
 536                  arguments.  */
 537               if (e2 == victim)
 538                 copy_phi_args (e2->dest, path->last ()->e, e2);
 539             }
 540           else
 541             {
 542               /* Redirect VICTIM to the next duplicated block in the path.  */
 543               e2 = redirect_edge_and_branch (victim, rd->dup_blocks[count + 1]);
 544
 545               /* We need to update the PHIs in the next duplicated block.  We
 546                  want the new PHI args to have the same value as they had
 547                  in the source of the next duplicate block.
 548
 549                  Thus, we need to know which edge we traversed into the
 550                  source of the duplicate.  Furthermore, we may have
 551                  traversed many edges to reach the source of the duplicate.
 552
 553                  Walk through the path starting at element I until we
 554                  hit an edge marked with EDGE_COPY_SRC_BLOCK.  We want
 555                  the edge from the prior element.  */
 556               for (unsigned int j = i + 1; j < path->length (); j++)
 557                 {
 558                   if ((*path)[j]->type == EDGE_COPY_SRC_BLOCK)
 559                     {
 560                       copy_phi_arg_into_existing_phi ((*path)[j - 1]->e, e2);
 561                       break;
 562                     }
 563                 }
 564             }
 565           count++;
 566         }
 567       else if ((*path)[i]->type == EDGE_COPY_SRC_BLOCK)
 568         {
 569           remove_ctrl_stmt_and_useless_edges (rd->dup_blocks[count], NULL);
 570           create_edge_and_update_destination_phis (rd, rd->dup_blocks[count]);
 571           if (count == 1)
 572             single_succ_edge (rd->dup_blocks[1])->aux = NULL;
 573           count++;
 574         }
 575     }
 576 }
 577
 578 /* Hash table traversal callback routine to create duplicate blocks.  */
 579
 580 int
 581 ssa_create_duplicates (struct redirection_data **slot,
 582                        ssa_local_info_t *local_info)
 583 {
 584   struct redirection_data *rd = *slot;
 585
 586   /* The second duplicated block in a jump threading path is specific
 587      to the path.  So it gets stored in RD rather than in LOCAL_DATA.
 588
 589      Each time we're called, we have to look through the path and see
 590      if a second block needs to be duplicated.
 591
 592      Note the search starts with the third edge on the path.  The first
 593      edge is the incoming edge, the second edge always has its source
 594      duplicated.  Thus we start our search with the third edge.  */
 595   vec<jump_thread_edge *> *path = rd->path;
 596   for (unsigned int i = 2; i < path->length (); i++)
 597     {
 598       if ((*path)[i]->type == EDGE_COPY_SRC_BLOCK
 599           || (*path)[i]->type == EDGE_COPY_SRC_JOINER_BLOCK)
 600         {
 601           create_block_for_threading ((*path)[i]->e->src, rd, 1);
 602           break;
 603         }
 604     }
 605
 606   /* Create a template block if we have not done so already.  Otherwise
 607      use the template to create a new block.  */
 608   if (local_info->template_block == NULL)
 609     {
 610       create_block_for_threading ((*path)[1]->e->src, rd, 0);
 611       local_info->template_block = rd->dup_blocks[0];
 612
 613       /* We do not create any outgoing edges for the template.  We will
 614          take care of that in a later traversal.  That way we do not
 615          create edges that are going to just be deleted.  */
 616     }
 617   else
 618     {
 619       create_block_for_threading (local_info->template_block, rd, 0);
 620
 621       /* Go ahead and wire up outgoing edges and update PHIs for the duplicate
 622          block.   */
 623       ssa_fix_duplicate_block_edges (rd, local_info);
 624     }
 625
 626   /* Keep walking the hash table.  */
 627   return 1;
 628 }
 629
 630 /* We did not create any outgoing edges for the template block during
 631    block creation.  This hash table traversal callback creates the
 632    outgoing edge for the template block.  */
 633
 634 inline int
 635 ssa_fixup_template_block (struct redirection_data **slot,
 636                           ssa_local_info_t *local_info)
 637 {
 638   struct redirection_data *rd = *slot;
 639
 640   /* If this is the template block halt the traversal after updating
 641      it appropriately.
 642
 643      If we were threading through an joiner block, then we want
 644      to keep its control statement and redirect an outgoing edge.
 645      Else we want to remove the control statement & edges, then create
 646      a new outgoing edge.  In both cases we may need to update PHIs.  */
 647   if (rd->dup_blocks[0] && rd->dup_blocks[0] == local_info->template_block)
 648     {
 649       ssa_fix_duplicate_block_edges (rd, local_info);
 650       return 0;
 651     }
 652
 653   return 1;
 654 }
 655
 656 /* Hash table traversal callback to redirect each incoming edge
 657    associated with this hash table element to its new destination.  */
 658
 659 int
 660 ssa_redirect_edges (struct redirection_data **slot,
 661                     ssa_local_info_t *local_info)
 662 {
 663   struct redirection_data *rd = *slot;
 664   struct el *next, *el;
 665
 666   /* Walk over all the incoming edges associated associated with this
 667      hash table entry.  */
 668   for (el = rd->incoming_edges; el; el = next)
 669     {
 670       edge e = el->e;
 671       vec<jump_thread_edge *> *path = THREAD_PATH (e);
 672
 673       /* Go ahead and free this element from the list.  Doing this now
 674          avoids the need for another list walk when we destroy the hash
 675          table.  */
 676       next = el->next;
 677       free (el);
 678
 679       thread_stats.num_threaded_edges++;
 680
 681       if (rd->dup_blocks[0])
 682         {
 683           edge e2;
 684
 685           if (dump_file && (dump_flags & TDF_DETAILS))
 686             fprintf (dump_file, "  Threaded jump %d --> %d to %d\n",
 687                      e->src->index, e->dest->index, rd->dup_blocks[0]->index);
 688
 689           rd->dup_blocks[0]->count += e->count;
 690
 691           /* Excessive jump threading may make frequencies large enough so
 692              the computation overflows.  */
 693           if (rd->dup_blocks[0]->frequency < BB_FREQ_MAX * 2)
 694             rd->dup_blocks[0]->frequency += EDGE_FREQUENCY (e);
 695
 696           /* In the case of threading through a joiner block, the outgoing
 697              edges from the duplicate block were updated when they were
 698              redirected during ssa_fix_duplicate_block_edges.  */
 699           if ((*path)[1]->type != EDGE_COPY_SRC_JOINER_BLOCK)
 700             EDGE_SUCC (rd->dup_blocks[0], 0)->count += e->count;
 701
 702           /* Redirect the incoming edge (possibly to the joiner block) to the
 703              appropriate duplicate block.  */
 704           e2 = redirect_edge_and_branch (e, rd->dup_blocks[0]);
 705           gcc_assert (e == e2);
 706           flush_pending_stmts (e2);
 707         }
 708
 709       /* Go ahead and clear E->aux.  It's not needed anymore and failure
 710          to clear it will cause all kinds of unpleasant problems later.  */
 711       delete_jump_thread_path (path);
 712       e->aux = NULL;
 713
 714     }
 715
 716   /* Indicate that we actually threaded one or more jumps.  */
 717   if (rd->incoming_edges)
 718     local_info->jumps_threaded = true;
 719
 720   return 1;
 721 }
 722
 723 /* Return true if this block has no executable statements other than
 724    a simple ctrl flow instruction.  When the number of outgoing edges
 725    is one, this is equivalent to a "forwarder" block.  */
 726
 727 static bool
 728 redirection_block_p (basic_block bb)
 729 {
 730   gimple_stmt_iterator gsi;
 731
 732   /* Advance to the first executable statement.  */
 733   gsi = gsi_start_bb (bb);
 734   while (!gsi_end_p (gsi)
 735          && (gimple_code (gsi_stmt (gsi)) == GIMPLE_LABEL
 736              || is_gimple_debug (gsi_stmt (gsi))
 737              || gimple_nop_p (gsi_stmt (gsi))))
 738     gsi_next (&gsi);
 739
 740   /* Check if this is an empty block.  */
 741   if (gsi_end_p (gsi))
 742     return true;
 743
 744   /* Test that we've reached the terminating control statement.  */
 745   return gsi_stmt (gsi)
 746          && (gimple_code (gsi_stmt (gsi)) == GIMPLE_COND
 747              || gimple_code (gsi_stmt (gsi)) == GIMPLE_GOTO
 748              || gimple_code (gsi_stmt (gsi)) == GIMPLE_SWITCH);
 749 }
 750
 751 /* BB is a block which ends with a COND_EXPR or SWITCH_EXPR and when BB
 752    is reached via one or more specific incoming edges, we know which
 753    outgoing edge from BB will be traversed.
 754
 755    We want to redirect those incoming edges to the target of the
 756    appropriate outgoing edge.  Doing so avoids a conditional branch
 757    and may expose new optimization opportunities.  Note that we have
 758    to update dominator tree and SSA graph after such changes.
 759
 760    The key to keeping the SSA graph update manageable is to duplicate
 761    the side effects occurring in BB so that those side effects still
 762    occur on the paths which bypass BB after redirecting edges.
 763
 764    We accomplish this by creating duplicates of BB and arranging for
 765    the duplicates to unconditionally pass control to one specific
 766    successor of BB.  We then revector the incoming edges into BB to
 767    the appropriate duplicate of BB.
 768
 769    If NOLOOP_ONLY is true, we only perform the threading as long as it
 770    does not affect the structure of the loops in a nontrivial way.
 771
 772    If JOINERS is true, then thread through joiner blocks as well.  */
 773
 774 static bool
 775 thread_block_1 (basic_block bb, bool noloop_only, bool joiners)
 776 {
 777   /* E is an incoming edge into BB that we may or may not want to
 778      redirect to a duplicate of BB.  */
 779   edge e, e2;
 780   edge_iterator ei;
 781   ssa_local_info_t local_info;
 782   struct loop *loop = bb->loop_father;
 783
 784   /* To avoid scanning a linear array for the element we need we instead
 785      use a hash table.  For normal code there should be no noticeable
 786      difference.  However, if we have a block with a large number of
 787      incoming and outgoing edges such linear searches can get expensive.  */
 788   redirection_data.create (EDGE_COUNT (bb->succs));
 789
 790   /* If we thread the latch of the loop to its exit, the loop ceases to
 791      exist.  Make sure we do not restrict ourselves in order to preserve
 792      this loop.  */
 793   if (loop->header == bb)
 794     {
 795       e = loop_latch_edge (loop);
 796       vec<jump_thread_edge *> *path = THREAD_PATH (e);
 797
 798       if (path
 799           && (((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK && joiners)
 800               || ((*path)[1]->type == EDGE_COPY_SRC_BLOCK && !joiners)))
 801         {
 802           for (unsigned int i = 1; i < path->length (); i++)
 803             {
 804               edge e2 = (*path)[i]->e;
 805
 806               if (loop_exit_edge_p (loop, e2))
 807                 {
 808                   loop->header = NULL;
 809                   loop->latch = NULL;
 810                   loops_state_set (LOOPS_NEED_FIXUP);
 811                 }
 812             }
 813         }
 814     }
 815
 816   /* Record each unique threaded destination into a hash table for
 817      efficient lookups.  */
 818   FOR_EACH_EDGE (e, ei, bb->preds)
 819     {
 820       if (e->aux == NULL)
 821         continue;
 822
 823       vec<jump_thread_edge *> *path = THREAD_PATH (e);
 824
 825       if (((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK && !joiners)
 826           || ((*path)[1]->type == EDGE_COPY_SRC_BLOCK && joiners))
 827         continue;
 828
 829       e2 = path->last ()->e;
 830       if (!e2 || noloop_only)
 831         {
 832           /* If NOLOOP_ONLY is true, we only allow threading through the
 833              header of a loop to exit edges.  */
 834
 835           /* One case occurs when there was loop header buried in a jump
 836              threading path that crosses loop boundaries.  We do not try
 837              and thread this elsewhere, so just cancel the jump threading
 838              request by clearing the AUX field now.  */
 839           if ((bb->loop_father != e2->src->loop_father
 840                && !loop_exit_edge_p (e2->src->loop_father, e2))
 841               || (e2->src->loop_father != e2->dest->loop_father
 842                   && !loop_exit_edge_p (e2->src->loop_father, e2)))
 843             {
 844               /* Since this case is not handled by our special code
 845                  to thread through a loop header, we must explicitly
 846                  cancel the threading request here.  */
 847               delete_jump_thread_path (path);
 848               e->aux = NULL;
 849               continue;
 850             }
 851
 852           /* Another case occurs when trying to thread through our
 853              own loop header, possibly from inside the loop.  We will
 854              thread these later.  */
 855           unsigned int i;
 856           for (i = 1; i < path->length (); i++)
 857             {
 858               if ((*path)[i]->e->src == bb->loop_father->header
 859                   && (!loop_exit_edge_p (bb->loop_father, e2)
 860                       || (*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK))
 861                 break;
 862             }
 863
 864           if (i != path->length ())
 865             continue;
 866         }
 867
 868       if (e->dest == e2->src)
 869         update_bb_profile_for_threading (e->dest, EDGE_FREQUENCY (e),
 870                                          e->count, (*THREAD_PATH (e))[1]->e);
 871
 872       /* Insert the outgoing edge into the hash table if it is not
 873          already in the hash table.  */
 874       lookup_redirection_data (e, INSERT);
 875     }
 876
 877   /* We do not update dominance info.  */
 878   free_dominance_info (CDI_DOMINATORS);
 879
 880   /* We know we only thread through the loop header to loop exits.
 881      Let the basic block duplication hook know we are not creating
 882      a multiple entry loop.  */
 883   if (noloop_only
 884       && bb == bb->loop_father->header)
 885     set_loop_copy (bb->loop_father, loop_outer (bb->loop_father));
 886
 887   /* Now create duplicates of BB.
 888
 889      Note that for a block with a high outgoing degree we can waste
 890      a lot of time and memory creating and destroying useless edges.
 891
 892      So we first duplicate BB and remove the control structure at the
 893      tail of the duplicate as well as all outgoing edges from the
 894      duplicate.  We then use that duplicate block as a template for
 895      the rest of the duplicates.  */
 896   local_info.template_block = NULL;
 897   local_info.bb = bb;
 898   local_info.jumps_threaded = false;
 899   redirection_data.traverse <ssa_local_info_t *, ssa_create_duplicates>
 900                             (&local_info);
 901
 902   /* The template does not have an outgoing edge.  Create that outgoing
 903      edge and update PHI nodes as the edge's target as necessary.
 904
 905      We do this after creating all the duplicates to avoid creating
 906      unnecessary edges.  */
 907   redirection_data.traverse <ssa_local_info_t *, ssa_fixup_template_block>
 908                             (&local_info);
 909
 910   /* The hash table traversals above created the duplicate blocks (and the
 911      statements within the duplicate blocks).  This loop creates PHI nodes for
 912      the duplicated blocks and redirects the incoming edges into BB to reach
 913      the duplicates of BB.  */
 914   redirection_data.traverse <ssa_local_info_t *, ssa_redirect_edges>
 915                             (&local_info);
 916
 917   /* Done with this block.  Clear REDIRECTION_DATA.  */
 918   redirection_data.dispose ();
 919
 920   if (noloop_only
 921       && bb == bb->loop_father->header)
 922     set_loop_copy (bb->loop_father, NULL);
 923
 924   /* Indicate to our caller whether or not any jumps were threaded.  */
 925   return local_info.jumps_threaded;
 926 }
 927
 928 /* Wrapper for thread_block_1 so that we can first handle jump
 929    thread paths which do not involve copying joiner blocks, then
 930    handle jump thread paths which have joiner blocks.
 931
 932    By doing things this way we can be as aggressive as possible and
 933    not worry that copying a joiner block will create a jump threading
 934    opportunity.  */
 935
 936 static bool
 937 thread_block (basic_block bb, bool noloop_only)
 938 {
 939   bool retval;
 940   retval = thread_block_1 (bb, noloop_only, false);
 941   retval |= thread_block_1 (bb, noloop_only, true);
 942   return retval;
 943 }
 944
 945
 946 /* Threads edge E through E->dest to the edge THREAD_TARGET (E).  Returns the
 947    copy of E->dest created during threading, or E->dest if it was not necessary
 948    to copy it (E is its single predecessor).  */
 949
 950 static basic_block
 951 thread_single_edge (edge e)
 952 {
 953   basic_block bb = e->dest;
 954   struct redirection_data rd;
 955   vec<jump_thread_edge *> *path = THREAD_PATH (e);
 956   edge eto = (*path)[1]->e;
 957
 958   for (unsigned int i = 0; i < path->length (); i++)
 959     delete (*path)[i];
 960   delete path;
 961   e->aux = NULL;
 962
 963   thread_stats.num_threaded_edges++;
 964
 965   if (single_pred_p (bb))
 966     {
 967       /* If BB has just a single predecessor, we should only remove the
 968          control statements at its end, and successors except for ETO.  */
 969       remove_ctrl_stmt_and_useless_edges (bb, eto->dest);
 970
 971       /* And fixup the flags on the single remaining edge.  */
 972       eto->flags &= ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE | EDGE_ABNORMAL);
 973       eto->flags |= EDGE_FALLTHRU;
 974
 975       return bb;
 976     }
 977
 978   /* Otherwise, we need to create a copy.  */
 979   if (e->dest == eto->src)
 980     update_bb_profile_for_threading (bb, EDGE_FREQUENCY (e), e->count, eto);
 981
 982   vec<jump_thread_edge *> *npath = new vec<jump_thread_edge *> ();
 983   jump_thread_edge *x = new jump_thread_edge (e, EDGE_START_JUMP_THREAD);
 984   npath->safe_push (x);
 985
 986   x = new jump_thread_edge (eto, EDGE_COPY_SRC_BLOCK);
 987   npath->safe_push (x);
 988   rd.path = npath;
 989
 990   create_block_for_threading (bb, &rd, 0);
 991   remove_ctrl_stmt_and_useless_edges (rd.dup_blocks[0], NULL);
 992   create_edge_and_update_destination_phis (&rd, rd.dup_blocks[0]);
 993
 994   if (dump_file && (dump_flags & TDF_DETAILS))
 995     fprintf (dump_file, "  Threaded jump %d --> %d to %d\n",
 996              e->src->index, e->dest->index, rd.dup_blocks[0]->index);
 997
 998   rd.dup_blocks[0]->count = e->count;
 999   rd.dup_blocks[0]->frequency = EDGE_FREQUENCY (e);
1000   single_succ_edge (rd.dup_blocks[0])->count = e->count;
1001   redirect_edge_and_branch (e, rd.dup_blocks[0]);
1002   flush_pending_stmts (e);
1003
1004   return rd.dup_blocks[0];
1005 }
1006
1007 /* Callback for dfs_enumerate_from.  Returns true if BB is different
1008    from STOP and DBDS_CE_STOP.  */
1009
1010 static basic_block dbds_ce_stop;
1011 static bool
1012 dbds_continue_enumeration_p (const_basic_block bb, const void *stop)
1013 {
1014   return (bb != (const_basic_block) stop
1015           && bb != dbds_ce_stop);
1016 }
1017
1018 /* Evaluates the dominance relationship of latch of the LOOP and BB, and
1019    returns the state.  */
1020
1021 enum bb_dom_status
1022 {
1023   /* BB does not dominate latch of the LOOP.  */
1024   DOMST_NONDOMINATING,
1025   /* The LOOP is broken (there is no path from the header to its latch.  */
1026   DOMST_LOOP_BROKEN,
1027   /* BB dominates the latch of the LOOP.  */
1028   DOMST_DOMINATING
1029 };
1030
1031 static enum bb_dom_status
1032 determine_bb_domination_status (struct loop *loop, basic_block bb)
1033 {
1034   basic_block *bblocks;
1035   unsigned nblocks, i;
1036   bool bb_reachable = false;
1037   edge_iterator ei;
1038   edge e;
1039
1040   /* This function assumes BB is a successor of LOOP->header.
1041      If that is not the case return DOMST_NONDOMINATING which
1042      is always safe.  */
1043     {
1044       bool ok = false;
1045
1046       FOR_EACH_EDGE (e, ei, bb->preds)
1047         {
1048           if (e->src == loop->header)
1049             {
1050               ok = true;
1051               break;
1052             }
1053         }
1054
1055       if (!ok)
1056         return DOMST_NONDOMINATING;
1057     }
1058
1059   if (bb == loop->latch)
1060     return DOMST_DOMINATING;
1061
1062   /* Check that BB dominates LOOP->latch, and that it is back-reachable
1063      from it.  */
1064
1065   bblocks = XCNEWVEC (basic_block, loop->num_nodes);
1066   dbds_ce_stop = loop->header;
1067   nblocks = dfs_enumerate_from (loop->latch, 1, dbds_continue_enumeration_p,
1068                                 bblocks, loop->num_nodes, bb);
1069   for (i = 0; i < nblocks; i++)
1070     FOR_EACH_EDGE (e, ei, bblocks[i]->preds)
1071       {
1072         if (e->src == loop->header)
1073           {
1074             free (bblocks);
1075             return DOMST_NONDOMINATING;
1076           }
1077         if (e->src == bb)
1078           bb_reachable = true;
1079       }
1080
1081   free (bblocks);
1082   return (bb_reachable ? DOMST_DOMINATING : DOMST_LOOP_BROKEN);
1083 }
1084
1085 /* Return true if BB is part of the new pre-header that is created
1086    when threading the latch to DATA.  */
1087
1088 static bool
1089 def_split_header_continue_p (const_basic_block bb, const void *data)
1090 {
1091   const_basic_block new_header = (const_basic_block) data;
1092   const struct loop *l;
1093
1094   if (bb == new_header
1095       || loop_depth (bb->loop_father) < loop_depth (new_header->loop_father))
1096     return false;
1097   for (l = bb->loop_father; l; l = loop_outer (l))
1098     if (l == new_header->loop_father)
1099       return true;
1100   return false;
1101 }
1102
1103 /* Thread jumps through the header of LOOP.  Returns true if cfg changes.
1104    If MAY_PEEL_LOOP_HEADERS is false, we avoid threading from entry edges
1105    to the inside of the loop.  */
1106
1107 static bool
1108 thread_through_loop_header (struct loop *loop, bool may_peel_loop_headers)
1109 {
1110   basic_block header = loop->header;
1111   edge e, tgt_edge, latch = loop_latch_edge (loop);
1112   edge_iterator ei;
1113   basic_block tgt_bb, atgt_bb;
1114   enum bb_dom_status domst;
1115
1116   /* We have already threaded through headers to exits, so all the threading
1117      requests now are to the inside of the loop.  We need to avoid creating
1118      irreducible regions (i.e., loops with more than one entry block), and
1119      also loop with several latch edges, or new subloops of the loop (although
1120      there are cases where it might be appropriate, it is difficult to decide,
1121      and doing it wrongly may confuse other optimizers).
1122
1123      We could handle more general cases here.  However, the intention is to
1124      preserve some information about the loop, which is impossible if its
1125      structure changes significantly, in a way that is not well understood.
1126      Thus we only handle few important special cases, in which also updating
1127      of the loop-carried information should be feasible:
1128
1129      1) Propagation of latch edge to a block that dominates the latch block
1130         of a loop.  This aims to handle the following idiom:
1131
1132         first = 1;
1133         while (1)
1134           {
1135             if (first)
1136               initialize;
1137             first = 0;
1138             body;
1139           }
1140
1141         After threading the latch edge, this becomes
1142
1143         first = 1;
1144         if (first)
1145           initialize;
1146         while (1)
1147           {
1148             first = 0;
1149             body;
1150           }
1151
1152         The original header of the loop is moved out of it, and we may thread
1153         the remaining edges through it without further constraints.
1154
1155      2) All entry edges are propagated to a single basic block that dominates
1156         the latch block of the loop.  This aims to handle the following idiom
1157         (normally created for "for" loops):
1158
1159         i = 0;
1160         while (1)
1161           {
1162             if (i >= 100)
1163               break;
1164             body;
1165             i++;
1166           }
1167
1168         This becomes
1169
1170         i = 0;
1171         while (1)
1172           {
1173             body;
1174             i++;
1175             if (i >= 100)
1176               break;
1177           }
1178      */
1179
1180   /* Threading through the header won't improve the code if the header has just
1181      one successor.  */
1182   if (single_succ_p (header))
1183     goto fail;
1184
1185   /* If we threaded the latch using a joiner block, we cancel the
1186      threading opportunity out of an abundance of caution.  However,
1187      still allow threading from outside to inside the loop.  */
1188   if (latch->aux)
1189     {
1190       vec<jump_thread_edge *> *path = THREAD_PATH (latch);
1191       if ((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK)
1192         {
1193           delete_jump_thread_path (path);
1194           latch->aux = NULL;
1195         }
1196     }
1197
1198   if (latch->aux)
1199     {
1200       vec<jump_thread_edge *> *path = THREAD_PATH (latch);
1201       tgt_edge = (*path)[1]->e;
1202       tgt_bb = tgt_edge->dest;
1203     }
1204   else if (!may_peel_loop_headers
1205            && !redirection_block_p (loop->header))
1206     goto fail;
1207   else
1208     {
1209       tgt_bb = NULL;
1210       tgt_edge = NULL;
1211       FOR_EACH_EDGE (e, ei, header->preds)
1212         {
1213           if (!e->aux)
1214             {
1215               if (e == latch)
1216                 continue;
1217
1218               /* If latch is not threaded, and there is a header
1219                  edge that is not threaded, we would create loop
1220                  with multiple entries.  */
1221               goto fail;
1222             }
1223
1224           vec<jump_thread_edge *> *path = THREAD_PATH (e);
1225
1226           if ((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK)
1227             goto fail;
1228           tgt_edge = (*path)[1]->e;
1229           atgt_bb = tgt_edge->dest;
1230           if (!tgt_bb)
1231             tgt_bb = atgt_bb;
1232           /* Two targets of threading would make us create loop
1233              with multiple entries.  */
1234           else if (tgt_bb != atgt_bb)
1235             goto fail;
1236         }
1237
1238       if (!tgt_bb)
1239         {
1240           /* There are no threading requests.  */
1241           return false;
1242         }
1243
1244       /* Redirecting to empty loop latch is useless.  */
1245       if (tgt_bb == loop->latch
1246           && empty_block_p (loop->latch))
1247         goto fail;
1248     }
1249
1250   /* The target block must dominate the loop latch, otherwise we would be
1251      creating a subloop.  */
1252   domst = determine_bb_domination_status (loop, tgt_bb);
1253   if (domst == DOMST_NONDOMINATING)
1254     goto fail;
1255   if (domst == DOMST_LOOP_BROKEN)
1256     {
1257       /* If the loop ceased to exist, mark it as such, and thread through its
1258          original header.  */
1259       loop->header = NULL;
1260       loop->latch = NULL;
1261       loops_state_set (LOOPS_NEED_FIXUP);
1262       return thread_block (header, false);
1263     }
1264
1265   if (tgt_bb->loop_father->header == tgt_bb)
1266     {
1267       /* If the target of the threading is a header of a subloop, we need
1268          to create a preheader for it, so that the headers of the two loops
1269          do not merge.  */
1270       if (EDGE_COUNT (tgt_bb->preds) > 2)
1271         {
1272           tgt_bb = create_preheader (tgt_bb->loop_father, 0);
1273           gcc_assert (tgt_bb != NULL);
1274         }
1275       else
1276         tgt_bb = split_edge (tgt_edge);
1277     }
1278
1279   if (latch->aux)
1280     {
1281       basic_block *bblocks;
1282       unsigned nblocks, i;
1283
1284       /* First handle the case latch edge is redirected.  We are copying
1285          the loop header but not creating a multiple entry loop.  Make the
1286          cfg manipulation code aware of that fact.  */
1287       set_loop_copy (loop, loop);
1288       loop->latch = thread_single_edge (latch);
1289       set_loop_copy (loop, NULL);
1290       gcc_assert (single_succ (loop->latch) == tgt_bb);
1291       loop->header = tgt_bb;
1292
1293       /* Remove the new pre-header blocks from our loop.  */
1294       bblocks = XCNEWVEC (basic_block, loop->num_nodes);
1295       nblocks = dfs_enumerate_from (header, 0, def_split_header_continue_p,
1296                                     bblocks, loop->num_nodes, tgt_bb);
1297       for (i = 0; i < nblocks; i++)
1298         if (bblocks[i]->loop_father == loop)
1299           {
1300             remove_bb_from_loops (bblocks[i]);
1301             add_bb_to_loop (bblocks[i], loop_outer (loop));
1302           }
1303       free (bblocks);
1304
1305       /* If the new header has multiple latches mark it so.  */
1306       FOR_EACH_EDGE (e, ei, loop->header->preds)
1307         if (e->src->loop_father == loop
1308             && e->src != loop->latch)
1309           {
1310             loop->latch = NULL;
1311             loops_state_set (LOOPS_MAY_HAVE_MULTIPLE_LATCHES);
1312           }
1313
1314       /* Cancel remaining threading requests that would make the
1315          loop a multiple entry loop.  */
1316       FOR_EACH_EDGE (e, ei, header->preds)
1317         {
1318           edge e2;
1319
1320           if (e->aux == NULL)
1321             continue;
1322
1323           vec<jump_thread_edge *> *path = THREAD_PATH (e);
1324           e2 = path->last ()->e;
1325
1326           if (e->src->loop_father != e2->dest->loop_father
1327               && e2->dest != loop->header)
1328             {
1329               delete_jump_thread_path (path);
1330               e->aux = NULL;
1331             }
1332         }
1333
1334       /* Thread the remaining edges through the former header.  */
1335       thread_block (header, false);
1336     }
1337   else
1338     {
1339       basic_block new_preheader;
1340
1341       /* Now consider the case entry edges are redirected to the new entry
1342          block.  Remember one entry edge, so that we can find the new
1343          preheader (its destination after threading).  */
1344       FOR_EACH_EDGE (e, ei, header->preds)
1345         {
1346           if (e->aux)
1347             break;
1348         }
1349
1350       /* The duplicate of the header is the new preheader of the loop.  Ensure
1351          that it is placed correctly in the loop hierarchy.  */
1352       set_loop_copy (loop, loop_outer (loop));
1353
1354       thread_block (header, false);
1355       set_loop_copy (loop, NULL);
1356       new_preheader = e->dest;
1357
1358       /* Create the new latch block.  This is always necessary, as the latch
1359          must have only a single successor, but the original header had at
1360          least two successors.  */
1361       loop->latch = NULL;
1362       mfb_kj_edge = single_succ_edge (new_preheader);
1363       loop->header = mfb_kj_edge->dest;
1364       latch = make_forwarder_block (tgt_bb, mfb_keep_just, NULL);
1365       loop->header = latch->dest;
1366       loop->latch = latch->src;
1367     }
1368
1369   return true;
1370
1371 fail:
1372   /* We failed to thread anything.  Cancel the requests.  */
1373   FOR_EACH_EDGE (e, ei, header->preds)
1374     {
1375       vec<jump_thread_edge *> *path = THREAD_PATH (e);
1376
1377       if (path)
1378         {
1379           delete_jump_thread_path (path);
1380           e->aux = NULL;
1381         }
1382     }
1383   return false;
1384 }
1385
1386 /* E1 and E2 are edges into the same basic block.  Return TRUE if the
1387    PHI arguments associated with those edges are equal or there are no
1388    PHI arguments, otherwise return FALSE.  */
1389
1390 static bool
1391 phi_args_equal_on_edges (edge e1, edge e2)
1392 {
1393   gimple_stmt_iterator gsi;
1394   int indx1 = e1->dest_idx;
1395   int indx2 = e2->dest_idx;
1396
1397   for (gsi = gsi_start_phis (e1->dest); !gsi_end_p (gsi); gsi_next (&gsi))
1398     {
1399       gimple phi = gsi_stmt (gsi);
1400
1401       if (!operand_equal_p (gimple_phi_arg_def (phi, indx1),
1402                             gimple_phi_arg_def (phi, indx2), 0))
1403         return false;
1404     }
1405   return true;
1406 }
1407
1408 /* Walk through the registered jump threads and convert them into a
1409    form convenient for this pass.
1410
1411    Any block which has incoming edges threaded to outgoing edges
1412    will have its entry in THREADED_BLOCK set.
1413
1414    Any threaded edge will have its new outgoing edge stored in the
1415    original edge's AUX field.
1416
1417    This form avoids the need to walk all the edges in the CFG to
1418    discover blocks which need processing and avoids unnecessary
1419    hash table lookups to map from threaded edge to new target.  */
1420
1421 static void
1422 mark_threaded_blocks (bitmap threaded_blocks)
1423 {
1424   unsigned int i;
1425   bitmap_iterator bi;
1426   bitmap tmp = BITMAP_ALLOC (NULL);
1427   basic_block bb;
1428   edge e;
1429   edge_iterator ei;
1430
1431   /* It is possible to have jump threads in which one is a subpath
1432      of the other.  ie, (A, B), (B, C), (C, D) where B is a joiner
1433      block and (B, C), (C, D) where no joiner block exists.
1434
1435      When this occurs ignore the jump thread request with the joiner
1436      block.  It's totally subsumed by the simpler jump thread request.
1437
1438      This results in less block copying, simpler CFGs.  More importantly,
1439      when we duplicate the joiner block, B, in this case we will create
1440      a new threading opportunity that we wouldn't be able to optimize
1441      until the next jump threading iteration.
1442
1443      So first convert the jump thread requests which do not require a
1444      joiner block.  */
1445   for (i = 0; i < paths.length (); i++)
1446     {
1447       vec<jump_thread_edge *> *path = paths[i];
1448
1449       if ((*path)[1]->type != EDGE_COPY_SRC_JOINER_BLOCK)
1450         {
1451           edge e = (*path)[0]->e;
1452           e->aux = (void *)path;
1453           bitmap_set_bit (tmp, e->dest->index);
1454         }
1455     }
1456
1457   /* Now iterate again, converting cases where we want to thread
1458      through a joiner block, but only if no other edge on the path
1459      already has a jump thread attached to it.  */
1460   for (i = 0; i < paths.length (); i++)
1461     {
1462       vec<jump_thread_edge *> *path = paths[i];
1463
1464       if ((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK)
1465         {
1466           unsigned int j;
1467
1468           for (j = 0; j < path->length (); j++)
1469             if ((*path)[j]->e->aux != NULL)
1470               break;
1471
1472           /* If we iterated through the entire path without exiting the loop,
1473              then we are good to go, attach the path to the starting edge.  */
1474           if (j == path->length ())
1475             {
1476               edge e = (*path)[0]->e;
1477               e->aux = path;
1478               bitmap_set_bit (tmp, e->dest->index);
1479             }
1480           else if (dump_file && (dump_flags & TDF_DETAILS))
1481             {
1482               dump_jump_thread_path (dump_file, *path, false);
1483             }
1484         }
1485     }
1486
1487
1488   /* If optimizing for size, only thread through block if we don't have
1489      to duplicate it or it's an otherwise empty redirection block.  */
1490   if (optimize_function_for_size_p (cfun))
1491     {
1492       EXECUTE_IF_SET_IN_BITMAP (tmp, 0, i, bi)
1493         {
1494           bb = BASIC_BLOCK_FOR_FN (cfun, i);
1495           if (EDGE_COUNT (bb->preds) > 1
1496               && !redirection_block_p (bb))
1497             {
1498               FOR_EACH_EDGE (e, ei, bb->preds)
1499                 {
1500                   if (e->aux)
1501                     {
1502                       vec<jump_thread_edge *> *path = THREAD_PATH (e);
1503                       delete_jump_thread_path (path);
1504                       e->aux = NULL;
1505                     }
1506                 }
1507             }
1508           else
1509             bitmap_set_bit (threaded_blocks, i);
1510         }
1511     }
1512   else
1513     bitmap_copy (threaded_blocks, tmp);
1514
1515   /* Look for jump threading paths which cross multiple loop headers.
1516
1517      The code to thread through loop headers will change the CFG in ways
1518      that break assumptions made by the loop optimization code.
1519
1520      We don't want to blindly cancel the requests.  We can instead do better
1521      by trimming off the end of the jump thread path.  */
1522   EXECUTE_IF_SET_IN_BITMAP (tmp, 0, i, bi)
1523     {
1524       basic_block bb = BASIC_BLOCK_FOR_FN (cfun, i);
1525       FOR_EACH_EDGE (e, ei, bb->preds)
1526         {
1527           if (e->aux)
1528             {
1529               vec<jump_thread_edge *> *path = THREAD_PATH (e);
1530
1531               for (unsigned int i = 0, crossed_headers = 0;
1532                    i < path->length ();
1533                    i++)
1534                 {
1535                   basic_block dest = (*path)[i]->e->dest;
1536                   crossed_headers += (dest == dest->loop_father->header);
1537                   if (crossed_headers > 1)
1538                     {
1539                       /* Trim from entry I onwards.  */
1540                       for (unsigned int j = i; j < path->length (); j++)
1541                         delete (*path)[j];
1542                       path->truncate (i);
1543
1544                       /* Now that we've truncated the path, make sure
1545                          what's left is still valid.   We need at least
1546                          two edges on the path and the last edge can not
1547                          be a joiner.  This should never happen, but let's
1548                          be safe.  */
1549                       if (path->length () < 2
1550                           || (path->last ()->type
1551                               == EDGE_COPY_SRC_JOINER_BLOCK))
1552                         {
1553                           delete_jump_thread_path (path);
1554                           e->aux = NULL;
1555                         }
1556                       break;
1557                     }
1558                 }
1559             }
1560         }
1561     }
1562
1563   /* If we have a joiner block (J) which has two successors S1 and S2 and
1564      we are threading though S1 and the final destination of the thread
1565      is S2, then we must verify that any PHI nodes in S2 have the same
1566      PHI arguments for the edge J->S2 and J->S1->...->S2.
1567
1568      We used to detect this prior to registering the jump thread, but
1569      that prohibits propagation of edge equivalences into non-dominated
1570      PHI nodes as the equivalency test might occur before propagation.
1571
1572      This must also occur after we truncate any jump threading paths
1573      as this scenario may only show up after truncation.
1574
1575      This works for now, but will need improvement as part of the FSA
1576      optimization.
1577
1578      Note since we've moved the thread request data to the edges,
1579      we have to iterate on those rather than the threaded_edges vector.  */
1580   EXECUTE_IF_SET_IN_BITMAP (tmp, 0, i, bi)
1581     {
1582       bb = BASIC_BLOCK_FOR_FN (cfun, i);
1583       FOR_EACH_EDGE (e, ei, bb->preds)
1584         {
1585           if (e->aux)
1586             {
1587               vec<jump_thread_edge *> *path = THREAD_PATH (e);
1588               bool have_joiner = ((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK);
1589
1590               if (have_joiner)
1591                 {
1592                   basic_block joiner = e->dest;
1593                   edge final_edge = path->last ()->e;
1594                   basic_block final_dest = final_edge->dest;
1595                   edge e2 = find_edge (joiner, final_dest);
1596
1597                   if (e2 && !phi_args_equal_on_edges (e2, final_edge))
1598                     {
1599                       delete_jump_thread_path (path);
1600                       e->aux = NULL;
1601                     }
1602                 }
1603             }
1604         }
1605     }
1606
1607   BITMAP_FREE (tmp);
1608 }
1609
1610
1611 /* Return TRUE if BB ends with a switch statement or a computed goto.
1612    Otherwise return false.  */
1613 static bool
1614 bb_ends_with_multiway_branch (basic_block bb ATTRIBUTE_UNUSED)
1615 {
1616   gimple stmt = last_stmt (bb);
1617   if (stmt && gimple_code (stmt) == GIMPLE_SWITCH)
1618     return true;
1619   if (stmt && gimple_code (stmt) == GIMPLE_GOTO
1620       && TREE_CODE (gimple_goto_dest (stmt)) == SSA_NAME)
1621     return true;
1622   return false;
1623 }
1624
1625 /* Walk through all blocks and thread incoming edges to the appropriate
1626    outgoing edge for each edge pair recorded in THREADED_EDGES.
1627
1628    It is the caller's responsibility to fix the dominance information
1629    and rewrite duplicated SSA_NAMEs back into SSA form.
1630
1631    If MAY_PEEL_LOOP_HEADERS is false, we avoid threading edges through
1632    loop headers if it does not simplify the loop.
1633
1634    Returns true if one or more edges were threaded, false otherwise.  */
1635
1636 bool
1637 thread_through_all_blocks (bool may_peel_loop_headers)
1638 {
1639   bool retval = false;
1640   unsigned int i;
1641   bitmap_iterator bi;
1642   bitmap threaded_blocks;
1643   struct loop *loop;
1644
1645   /* We must know about loops in order to preserve them.  */
1646   gcc_assert (current_loops != NULL);
1647
1648   if (!paths.exists ())
1649     return false;
1650
1651   threaded_blocks = BITMAP_ALLOC (NULL);
1652   memset (&thread_stats, 0, sizeof (thread_stats));
1653
1654   mark_threaded_blocks (threaded_blocks);
1655
1656   initialize_original_copy_tables ();
1657
1658   /* First perform the threading requests that do not affect
1659      loop structure.  */
1660   EXECUTE_IF_SET_IN_BITMAP (threaded_blocks, 0, i, bi)
1661     {
1662       basic_block bb = BASIC_BLOCK_FOR_FN (cfun, i);
1663
1664       if (EDGE_COUNT (bb->preds) > 0)
1665         retval |= thread_block (bb, true);
1666     }
1667
1668   /* Then perform the threading through loop headers.  We start with the
1669      innermost loop, so that the changes in cfg we perform won't affect
1670      further threading.  */
1671   FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
1672     {
1673       if (!loop->header
1674           || !bitmap_bit_p (threaded_blocks, loop->header->index))
1675         continue;
1676
1677       retval |= thread_through_loop_header (loop, may_peel_loop_headers);
1678     }
1679
1680   /* Any jump threading paths that are still attached to edges at this
1681      point must be one of two cases.
1682
1683      First, we could have a jump threading path which went from outside
1684      a loop to inside a loop that was ignored because a prior jump thread
1685      across a backedge was realized (which indirectly causes the loop
1686      above to ignore the latter thread).  We can detect these because the
1687      loop structures will be different and we do not currently try to
1688      optimize this case.
1689
1690      Second, we could be threading across a backedge to a point within the
1691      same loop.  This occurrs for the FSA/FSM optimization and we would
1692      like to optimize it.  However, we have to be very careful as this
1693      may completely scramble the loop structures, with the result being
1694      irreducible loops causing us to throw away our loop structure.
1695
1696      As a compromise for the latter case, if the thread path ends in
1697      a block where the last statement is a multiway branch, then go
1698      ahead and thread it, else ignore it.  */
1699   basic_block bb;
1700   edge e;
1701   FOR_EACH_BB_FN (bb, cfun)
1702     {
1703       /* If we do end up threading here, we can remove elements from
1704          BB->preds.  Thus we can not use the FOR_EACH_EDGE iterator.  */
1705       for (edge_iterator ei = ei_start (bb->preds);
1706            (e = ei_safe_edge (ei));)
1707         if (e->aux)
1708           {
1709             vec<jump_thread_edge *> *path = THREAD_PATH (e);
1710
1711             /* Case 1, threading from outside to inside the loop
1712                after we'd already threaded through the header.  */
1713             if ((*path)[0]->e->dest->loop_father
1714                 != path->last ()->e->src->loop_father)
1715               {
1716                 delete_jump_thread_path (path);
1717                 e->aux = NULL;
1718                 ei_next (&ei);
1719               }
1720            else if (bb_ends_with_multiway_branch (path->last ()->e->src))
1721               {
1722                 /* The code to thread through loop headers may have
1723                    split a block with jump threads attached to it.
1724
1725                    We can identify this with a disjoint jump threading
1726                    path.  If found, just remove it.  */
1727                 for (unsigned int i = 0; i < path->length () - 1; i++)
1728                   if ((*path)[i]->e->dest != (*path)[i + 1]->e->src)
1729                     {
1730                       delete_jump_thread_path (path);
1731                       e->aux = NULL;
1732                       ei_next (&ei);
1733                       break;
1734                     }
1735
1736                 /* Our path is still valid, thread it.  */
1737                 if (e->aux)
1738                   {
1739                     struct loop *loop = (*path)[0]->e->dest->loop_father;
1740
1741                     if (thread_block ((*path)[0]->e->dest, false))
1742                       {
1743                         /* This jump thread likely totally scrambled this loop.
1744                            So arrange for it to be fixed up.  */
1745                         loop->header = NULL;
1746                         loop->latch = NULL;
1747                         e->aux = NULL;
1748                       }
1749                     else
1750                       {
1751                         delete_jump_thread_path (path);
1752                         e->aux = NULL;
1753                         ei_next (&ei);
1754                       }
1755                   }
1756               }
1757            else
1758               {
1759                 delete_jump_thread_path (path);
1760                 e->aux = NULL;
1761                 ei_next (&ei);
1762               }
1763           }
1764         else
1765           ei_next (&ei);
1766     }
1767
1768   statistics_counter_event (cfun, "Jumps threaded",
1769                             thread_stats.num_threaded_edges);
1770
1771   free_original_copy_tables ();
1772
1773   BITMAP_FREE (threaded_blocks);
1774   threaded_blocks = NULL;
1775   paths.release ();
1776
1777   if (retval)
1778     loops_state_set (LOOPS_NEED_FIXUP);
1779
1780   return retval;
1781 }
1782
1783 /* Delete the jump threading path PATH.  We have to explcitly delete
1784    each entry in the vector, then the container.  */
1785
1786 void
1787 delete_jump_thread_path (vec<jump_thread_edge *> *path)
1788 {
1789   for (unsigned int i = 0; i < path->length (); i++)
1790     delete (*path)[i];
1791   path->release();
1792 }
1793
1794 /* Register a jump threading opportunity.  We queue up all the jump
1795    threading opportunities discovered by a pass and update the CFG
1796    and SSA form all at once.
1797
1798    E is the edge we can thread, E2 is the new target edge, i.e., we
1799    are effectively recording that E->dest can be changed to E2->dest
1800    after fixing the SSA graph.  */
1801
1802 void
1803 register_jump_thread (vec<jump_thread_edge *> *path)
1804 {
1805   if (!dbg_cnt (registered_jump_thread))
1806     {
1807       delete_jump_thread_path (path);
1808       return;
1809     }
1810
1811   /* First make sure there are no NULL outgoing edges on the jump threading
1812      path.  That can happen for jumping to a constant address.  */
1813   for (unsigned int i = 0; i < path->length (); i++)
1814     if ((*path)[i]->e == NULL)
1815       {
1816         if (dump_file && (dump_flags & TDF_DETAILS))
1817           {
1818             fprintf (dump_file,
1819                      "Found NULL edge in jump threading path.  Cancelling jump thread:\n");
1820             dump_jump_thread_path (dump_file, *path, false);
1821           }
1822
1823         delete_jump_thread_path (path);
1824         return;
1825       }
1826
1827   if (dump_file && (dump_flags & TDF_DETAILS))
1828     dump_jump_thread_path (dump_file, *path, true);
1829
1830   if (!paths.exists ())
1831     paths.create (5);
1832
1833   paths.safe_push (path);
1834 }