gcc/bb-reorder.c

   1 /* Basic block reordering routines for the GNU compiler.
   2    Copyright (C) 2000, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2010, 2011,
   3    2012 Free Software Foundation, Inc.
   4
   5    This file is part of GCC.
   6
   7    GCC is free software; you can redistribute it and/or modify it
   8    under the terms of the GNU General Public License as published by
   9    the Free Software Foundation; either version 3, or (at your option)
  10    any later version.
  11
  12    GCC is distributed in the hope that it will be useful, but WITHOUT
  13    ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  14    or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
  15    License for more details.
  16
  17    You should have received a copy of the GNU General Public License
  18    along with GCC; see the file COPYING3.  If not see
  19    <http://www.gnu.org/licenses/>.  */
  20
  21 /* This (greedy) algorithm constructs traces in several rounds.
  22    The construction starts from "seeds".  The seed for the first round
  23    is the entry point of function.  When there are more than one seed
  24    that one is selected first that has the lowest key in the heap
  25    (see function bb_to_key).  Then the algorithm repeatedly adds the most
  26    probable successor to the end of a trace.  Finally it connects the traces.
  27
  28    There are two parameters: Branch Threshold and Exec Threshold.
  29    If the edge to a successor of the actual basic block is lower than
  30    Branch Threshold or the frequency of the successor is lower than
  31    Exec Threshold the successor will be the seed in one of the next rounds.
  32    Each round has these parameters lower than the previous one.
  33    The last round has to have these parameters set to zero
  34    so that the remaining blocks are picked up.
  35
  36    The algorithm selects the most probable successor from all unvisited
  37    successors and successors that have been added to this trace.
  38    The other successors (that has not been "sent" to the next round) will be
  39    other seeds for this round and the secondary traces will start in them.
  40    If the successor has not been visited in this trace it is added to the trace
  41    (however, there is some heuristic for simple branches).
  42    If the successor has been visited in this trace the loop has been found.
  43    If the loop has many iterations the loop is rotated so that the
  44    source block of the most probable edge going out from the loop
  45    is the last block of the trace.
  46    If the loop has few iterations and there is no edge from the last block of
  47    the loop going out from loop the loop header is duplicated.
  48    Finally, the construction of the trace is terminated.
  49
  50    When connecting traces it first checks whether there is an edge from the
  51    last block of one trace to the first block of another trace.
  52    When there are still some unconnected traces it checks whether there exists
  53    a basic block BB such that BB is a successor of the last bb of one trace
  54    and BB is a predecessor of the first block of another trace. In this case,
  55    BB is duplicated and the traces are connected through this duplicate.
  56    The rest of traces are simply connected so there will be a jump to the
  57    beginning of the rest of trace.
  58
  59
  60    References:
  61
  62    "Software Trace Cache"
  63    A. Ramirez, J. Larriba-Pey, C. Navarro, J. Torrellas and M. Valero; 1999
  64    http://citeseer.nj.nec.com/15361.html
  65
  66 */
  67
  68 #include "config.h"
  69 #include "system.h"
  70 #include "coretypes.h"
  71 #include "tm.h"
  72 #include "rtl.h"
  73 #include "regs.h"
  74 #include "flags.h"
  75 #include "output.h"
  76 #include "fibheap.h"
  77 #include "target.h"
  78 #include "function.h"
  79 #include "tm_p.h"
  80 #include "obstack.h"
  81 #include "expr.h"
  82 #include "params.h"
  83 #include "diagnostic-core.h"
  84 #include "toplev.h" /* user_defined_section_attribute */
  85 #include "tree-pass.h"
  86 #include "df.h"
  87 #include "bb-reorder.h"
  88 #include "except.h"
  89
  90 /* The number of rounds.  In most cases there will only be 4 rounds, but
  91    when partitioning hot and cold basic blocks into separate sections of
  92    the .o file there will be an extra round.*/
  93 #define N_ROUNDS 5
  94
  95 /* Stubs in case we don't have a return insn.
  96    We have to check at runtime too, not only compiletime.  */
  97
  98 #ifndef HAVE_return
  99 #define HAVE_return 0
 100 #define gen_return() NULL_RTX
 101 #endif
 102
 103
 104 struct target_bb_reorder default_target_bb_reorder;
 105 #if SWITCHABLE_TARGET
 106 struct target_bb_reorder *this_target_bb_reorder = &default_target_bb_reorder;
 107 #endif
 108
 109 #define uncond_jump_length \
 110   (this_target_bb_reorder->x_uncond_jump_length)
 111
 112 /* Branch thresholds in thousandths (per mille) of the REG_BR_PROB_BASE.  */
 113 static int branch_threshold[N_ROUNDS] = {400, 200, 100, 0, 0};
 114
 115 /* Exec thresholds in thousandths (per mille) of the frequency of bb 0.  */
 116 static int exec_threshold[N_ROUNDS] = {500, 200, 50, 0, 0};
 117
 118 /* If edge frequency is lower than DUPLICATION_THRESHOLD per mille of entry
 119    block the edge destination is not duplicated while connecting traces.  */
 120 #define DUPLICATION_THRESHOLD 100
 121
 122 /* Structure to hold needed information for each basic block.  */
 123 typedef struct bbro_basic_block_data_def
 124 {
 125   /* Which trace is the bb start of (-1 means it is not a start of a trace).  */
 126   int start_of_trace;
 127
 128   /* Which trace is the bb end of (-1 means it is not an end of a trace).  */
 129   int end_of_trace;
 130
 131   /* Which trace is the bb in?  */
 132   int in_trace;
 133
 134   /* Which trace was this bb visited in?  */
 135   int visited;
 136
 137   /* Which heap is BB in (if any)?  */
 138   fibheap_t heap;
 139
 140   /* Which heap node is BB in (if any)?  */
 141   fibnode_t node;
 142 } bbro_basic_block_data;
 143
 144 /* The current size of the following dynamic array.  */
 145 static int array_size;
 146
 147 /* The array which holds needed information for basic blocks.  */
 148 static bbro_basic_block_data *bbd;
 149
 150 /* To avoid frequent reallocation the size of arrays is greater than needed,
 151    the number of elements is (not less than) 1.25 * size_wanted.  */
 152 #define GET_ARRAY_SIZE(X) ((((X) / 4) + 1) * 5)
 153
 154 /* Free the memory and set the pointer to NULL.  */
 155 #define FREE(P) (gcc_assert (P), free (P), P = 0)
 156
 157 /* Structure for holding information about a trace.  */
 158 struct trace
 159 {
 160   /* First and last basic block of the trace.  */
 161   basic_block first, last;
 162
 163   /* The round of the STC creation which this trace was found in.  */
 164   int round;
 165
 166   /* The length (i.e. the number of basic blocks) of the trace.  */
 167   int length;
 168 };
 169
 170 /* Maximum frequency and count of one of the entry blocks.  */
 171 static int max_entry_frequency;
 172 static gcov_type max_entry_count;
 173
 174 /* Local function prototypes.  */
 175 static void find_traces (int *, struct trace *);
 176 static basic_block rotate_loop (edge, struct trace *, int);
 177 static void mark_bb_visited (basic_block, int);
 178 static void find_traces_1_round (int, int, gcov_type, struct trace *, int *,
 179                                  int, fibheap_t *, int);
 180 static basic_block copy_bb (basic_block, edge, basic_block, int);
 181 static fibheapkey_t bb_to_key (basic_block);
 182 static bool better_edge_p (const_basic_block, const_edge, int, int, int, int, const_edge);
 183 static void connect_traces (int, struct trace *);
 184 static bool copy_bb_p (const_basic_block, int);
 185 static bool push_to_next_round_p (const_basic_block, int, int, int, gcov_type);
 186 \f
 187 /* Return the trace number in which BB was visited.  */
 188
 189 static int
 190 bb_visited_trace (const_basic_block bb)
 191 {
 192   gcc_assert (bb->index < array_size);
 193   return bbd[bb->index].visited;
 194 }
 195
 196 /* This function marks BB that it was visited in trace number TRACE.  */
 197
 198 static void
 199 mark_bb_visited (basic_block bb, int trace)
 200 {
 201   bbd[bb->index].visited = trace;
 202   if (bbd[bb->index].heap)
 203     {
 204       fibheap_delete_node (bbd[bb->index].heap, bbd[bb->index].node);
 205       bbd[bb->index].heap = NULL;
 206       bbd[bb->index].node = NULL;
 207     }
 208 }
 209
 210 /* Check to see if bb should be pushed into the next round of trace
 211    collections or not.  Reasons for pushing the block forward are 1).
 212    If the block is cold, we are doing partitioning, and there will be
 213    another round (cold partition blocks are not supposed to be
 214    collected into traces until the very last round); or 2). There will
 215    be another round, and the basic block is not "hot enough" for the
 216    current round of trace collection.  */
 217
 218 static bool
 219 push_to_next_round_p (const_basic_block bb, int round, int number_of_rounds,
 220                       int exec_th, gcov_type count_th)
 221 {
 222   bool there_exists_another_round;
 223   bool block_not_hot_enough;
 224
 225   there_exists_another_round = round < number_of_rounds - 1;
 226
 227   block_not_hot_enough = (bb->frequency < exec_th
 228                           || bb->count < count_th
 229                           || probably_never_executed_bb_p (bb));
 230
 231   if (there_exists_another_round
 232       && block_not_hot_enough)
 233     return true;
 234   else
 235     return false;
 236 }
 237
 238 /* Find the traces for Software Trace Cache.  Chain each trace through
 239    RBI()->next.  Store the number of traces to N_TRACES and description of
 240    traces to TRACES.  */
 241
 242 static void
 243 find_traces (int *n_traces, struct trace *traces)
 244 {
 245   int i;
 246   int number_of_rounds;
 247   edge e;
 248   edge_iterator ei;
 249   fibheap_t heap;
 250
 251   /* Add one extra round of trace collection when partitioning hot/cold
 252      basic blocks into separate sections.  The last round is for all the
 253      cold blocks (and ONLY the cold blocks).  */
 254
 255   number_of_rounds = N_ROUNDS - 1;
 256
 257   /* Insert entry points of function into heap.  */
 258   heap = fibheap_new ();
 259   max_entry_frequency = 0;
 260   max_entry_count = 0;
 261   FOR_EACH_EDGE (e, ei, ENTRY_BLOCK_PTR->succs)
 262     {
 263       bbd[e->dest->index].heap = heap;
 264       bbd[e->dest->index].node = fibheap_insert (heap, bb_to_key (e->dest),
 265                                                     e->dest);
 266       if (e->dest->frequency > max_entry_frequency)
 267         max_entry_frequency = e->dest->frequency;
 268       if (e->dest->count > max_entry_count)
 269         max_entry_count = e->dest->count;
 270     }
 271
 272   /* Find the traces.  */
 273   for (i = 0; i < number_of_rounds; i++)
 274     {
 275       gcov_type count_threshold;
 276
 277       if (dump_file)
 278         fprintf (dump_file, "STC - round %d\n", i + 1);
 279
 280       if (max_entry_count < INT_MAX / 1000)
 281         count_threshold = max_entry_count * exec_threshold[i] / 1000;
 282       else
 283         count_threshold = max_entry_count / 1000 * exec_threshold[i];
 284
 285       find_traces_1_round (REG_BR_PROB_BASE * branch_threshold[i] / 1000,
 286                            max_entry_frequency * exec_threshold[i] / 1000,
 287                            count_threshold, traces, n_traces, i, &heap,
 288                            number_of_rounds);
 289     }
 290   fibheap_delete (heap);
 291
 292   if (dump_file)
 293     {
 294       for (i = 0; i < *n_traces; i++)
 295         {
 296           basic_block bb;
 297           fprintf (dump_file, "Trace %d (round %d):  ", i + 1,
 298                    traces[i].round + 1);
 299           for (bb = traces[i].first; bb != traces[i].last; bb = (basic_block) bb->aux)
 300             fprintf (dump_file, "%d [%d] ", bb->index, bb->frequency);
 301           fprintf (dump_file, "%d [%d]\n", bb->index, bb->frequency);
 302         }
 303       fflush (dump_file);
 304     }
 305 }
 306
 307 /* Rotate loop whose back edge is BACK_EDGE in the tail of trace TRACE
 308    (with sequential number TRACE_N).  */
 309
 310 static basic_block
 311 rotate_loop (edge back_edge, struct trace *trace, int trace_n)
 312 {
 313   basic_block bb;
 314
 315   /* Information about the best end (end after rotation) of the loop.  */
 316   basic_block best_bb = NULL;
 317   edge best_edge = NULL;
 318   int best_freq = -1;
 319   gcov_type best_count = -1;
 320   /* The best edge is preferred when its destination is not visited yet
 321      or is a start block of some trace.  */
 322   bool is_preferred = false;
 323
 324   /* Find the most frequent edge that goes out from current trace.  */
 325   bb = back_edge->dest;
 326   do
 327     {
 328       edge e;
 329       edge_iterator ei;
 330
 331       FOR_EACH_EDGE (e, ei, bb->succs)
 332         if (e->dest != EXIT_BLOCK_PTR
 333             && bb_visited_trace (e->dest) != trace_n
 334             && (e->flags & EDGE_CAN_FALLTHRU)
 335             && !(e->flags & EDGE_COMPLEX))
 336         {
 337           if (is_preferred)
 338             {
 339               /* The best edge is preferred.  */
 340               if (!bb_visited_trace (e->dest)
 341                   || bbd[e->dest->index].start_of_trace >= 0)
 342                 {
 343                   /* The current edge E is also preferred.  */
 344                   int freq = EDGE_FREQUENCY (e);
 345                   if (freq > best_freq || e->count > best_count)
 346                     {
 347                       best_freq = freq;
 348                       best_count = e->count;
 349                       best_edge = e;
 350                       best_bb = bb;
 351                     }
 352                 }
 353             }
 354           else
 355             {
 356               if (!bb_visited_trace (e->dest)
 357                   || bbd[e->dest->index].start_of_trace >= 0)
 358                 {
 359                   /* The current edge E is preferred.  */
 360                   is_preferred = true;
 361                   best_freq = EDGE_FREQUENCY (e);
 362                   best_count = e->count;
 363                   best_edge = e;
 364                   best_bb = bb;
 365                 }
 366               else
 367                 {
 368                   int freq = EDGE_FREQUENCY (e);
 369                   if (!best_edge || freq > best_freq || e->count > best_count)
 370                     {
 371                       best_freq = freq;
 372                       best_count = e->count;
 373                       best_edge = e;
 374                       best_bb = bb;
 375                     }
 376                 }
 377             }
 378         }
 379       bb = (basic_block) bb->aux;
 380     }
 381   while (bb != back_edge->dest);
 382
 383   if (best_bb)
 384     {
 385       /* Rotate the loop so that the BEST_EDGE goes out from the last block of
 386          the trace.  */
 387       if (back_edge->dest == trace->first)
 388         {
 389           trace->first = (basic_block) best_bb->aux;
 390         }
 391       else
 392         {
 393           basic_block prev_bb;
 394
 395           for (prev_bb = trace->first;
 396                prev_bb->aux != back_edge->dest;
 397                prev_bb = (basic_block) prev_bb->aux)
 398             ;
 399           prev_bb->aux = best_bb->aux;
 400
 401           /* Try to get rid of uncond jump to cond jump.  */
 402           if (single_succ_p (prev_bb))
 403             {
 404               basic_block header = single_succ (prev_bb);
 405
 406               /* Duplicate HEADER if it is a small block containing cond jump
 407                  in the end.  */
 408               if (any_condjump_p (BB_END (header)) && copy_bb_p (header, 0)
 409                   && !find_reg_note (BB_END (header), REG_CROSSING_JUMP,
 410                                      NULL_RTX))
 411                 copy_bb (header, single_succ_edge (prev_bb), prev_bb, trace_n);
 412             }
 413         }
 414     }
 415   else
 416     {
 417       /* We have not found suitable loop tail so do no rotation.  */
 418       best_bb = back_edge->src;
 419     }
 420   best_bb->aux = NULL;
 421   return best_bb;
 422 }
 423
 424 /* One round of finding traces. Find traces for BRANCH_TH and EXEC_TH i.e. do
 425    not include basic blocks their probability is lower than BRANCH_TH or their
 426    frequency is lower than EXEC_TH into traces (or count is lower than
 427    COUNT_TH).  It stores the new traces into TRACES and modifies the number of
 428    traces *N_TRACES. Sets the round (which the trace belongs to) to ROUND. It
 429    expects that starting basic blocks are in *HEAP and at the end it deletes
 430    *HEAP and stores starting points for the next round into new *HEAP.  */
 431
 432 static void
 433 find_traces_1_round (int branch_th, int exec_th, gcov_type count_th,
 434                      struct trace *traces, int *n_traces, int round,
 435                      fibheap_t *heap, int number_of_rounds)
 436 {
 437   /* Heap for discarded basic blocks which are possible starting points for
 438      the next round.  */
 439   fibheap_t new_heap = fibheap_new ();
 440
 441   while (!fibheap_empty (*heap))
 442     {
 443       basic_block bb;
 444       struct trace *trace;
 445       edge best_edge, e;
 446       fibheapkey_t key;
 447       edge_iterator ei;
 448
 449       bb = (basic_block) fibheap_extract_min (*heap);
 450       bbd[bb->index].heap = NULL;
 451       bbd[bb->index].node = NULL;
 452
 453       if (dump_file)
 454         fprintf (dump_file, "Getting bb %d\n", bb->index);
 455
 456       /* If the BB's frequency is too low send BB to the next round.  When
 457          partitioning hot/cold blocks into separate sections, make sure all
 458          the cold blocks (and ONLY the cold blocks) go into the (extra) final
 459          round.  */
 460
 461       if (push_to_next_round_p (bb, round, number_of_rounds, exec_th,
 462                                 count_th))
 463         {
 464           int key = bb_to_key (bb);
 465           bbd[bb->index].heap = new_heap;
 466           bbd[bb->index].node = fibheap_insert (new_heap, key, bb);
 467
 468           if (dump_file)
 469             fprintf (dump_file,
 470                      "  Possible start point of next round: %d (key: %d)\n",
 471                      bb->index, key);
 472           continue;
 473         }
 474
 475       trace = traces + *n_traces;
 476       trace->first = bb;
 477       trace->round = round;
 478       trace->length = 0;
 479       bbd[bb->index].in_trace = *n_traces;
 480       (*n_traces)++;
 481
 482       do
 483         {
 484           int prob, freq;
 485           bool ends_in_call;
 486
 487           /* The probability and frequency of the best edge.  */
 488           int best_prob = INT_MIN / 2;
 489           int best_freq = INT_MIN / 2;
 490
 491           best_edge = NULL;
 492           mark_bb_visited (bb, *n_traces);
 493           trace->length++;
 494
 495           if (dump_file)
 496             fprintf (dump_file, "Basic block %d was visited in trace %d\n",
 497                      bb->index, *n_traces - 1);
 498
 499           ends_in_call = block_ends_with_call_p (bb);
 500
 501           /* Select the successor that will be placed after BB.  */
 502           FOR_EACH_EDGE (e, ei, bb->succs)
 503             {
 504               gcc_assert (!(e->flags & EDGE_FAKE));
 505
 506               if (e->dest == EXIT_BLOCK_PTR)
 507                 continue;
 508
 509               if (bb_visited_trace (e->dest)
 510                   && bb_visited_trace (e->dest) != *n_traces)
 511                 continue;
 512
 513               if (BB_PARTITION (e->dest) != BB_PARTITION (bb))
 514                 continue;
 515
 516               prob = e->probability;
 517               freq = e->dest->frequency;
 518
 519               /* The only sensible preference for a call instruction is the
 520                  fallthru edge.  Don't bother selecting anything else.  */
 521               if (ends_in_call)
 522                 {
 523                   if (e->flags & EDGE_CAN_FALLTHRU)
 524                     {
 525                       best_edge = e;
 526                       best_prob = prob;
 527                       best_freq = freq;
 528                     }
 529                   continue;
 530                 }
 531
 532               /* Edge that cannot be fallthru or improbable or infrequent
 533                  successor (i.e. it is unsuitable successor).  */
 534               if (!(e->flags & EDGE_CAN_FALLTHRU) || (e->flags & EDGE_COMPLEX)
 535                   || prob < branch_th || EDGE_FREQUENCY (e) < exec_th
 536                   || e->count < count_th)
 537                 continue;
 538
 539               /* If partitioning hot/cold basic blocks, don't consider edges
 540                  that cross section boundaries.  */
 541
 542               if (better_edge_p (bb, e, prob, freq, best_prob, best_freq,
 543                                  best_edge))
 544                 {
 545                   best_edge = e;
 546                   best_prob = prob;
 547                   best_freq = freq;
 548                 }
 549             }
 550
 551           /* If the best destination has multiple predecessors, and can be
 552              duplicated cheaper than a jump, don't allow it to be added
 553              to a trace.  We'll duplicate it when connecting traces.  */
 554           if (best_edge && EDGE_COUNT (best_edge->dest->preds) >= 2
 555               && copy_bb_p (best_edge->dest, 0))
 556             best_edge = NULL;
 557
 558           /* Add all non-selected successors to the heaps.  */
 559           FOR_EACH_EDGE (e, ei, bb->succs)
 560             {
 561               if (e == best_edge
 562                   || e->dest == EXIT_BLOCK_PTR
 563                   || bb_visited_trace (e->dest))
 564                 continue;
 565
 566               key = bb_to_key (e->dest);
 567
 568               if (bbd[e->dest->index].heap)
 569                 {
 570                   /* E->DEST is already in some heap.  */
 571                   if (key != bbd[e->dest->index].node->key)
 572                     {
 573                       if (dump_file)
 574                         {
 575                           fprintf (dump_file,
 576                                    "Changing key for bb %d from %ld to %ld.\n",
 577                                    e->dest->index,
 578                                    (long) bbd[e->dest->index].node->key,
 579                                    key);
 580                         }
 581                       fibheap_replace_key (bbd[e->dest->index].heap,
 582                                            bbd[e->dest->index].node, key);
 583                     }
 584                 }
 585               else
 586                 {
 587                   fibheap_t which_heap = *heap;
 588
 589                   prob = e->probability;
 590                   freq = EDGE_FREQUENCY (e);
 591
 592                   if (!(e->flags & EDGE_CAN_FALLTHRU)
 593                       || (e->flags & EDGE_COMPLEX)
 594                       || prob < branch_th || freq < exec_th
 595                       || e->count < count_th)
 596                     {
 597                       /* When partitioning hot/cold basic blocks, make sure
 598                          the cold blocks (and only the cold blocks) all get
 599                          pushed to the last round of trace collection.  */
 600
 601                       if (push_to_next_round_p (e->dest, round,
 602                                                 number_of_rounds,
 603                                                 exec_th, count_th))
 604                         which_heap = new_heap;
 605                     }
 606
 607                   bbd[e->dest->index].heap = which_heap;
 608                   bbd[e->dest->index].node = fibheap_insert (which_heap,
 609                                                                 key, e->dest);
 610
 611                   if (dump_file)
 612                     {
 613                       fprintf (dump_file,
 614                                "  Possible start of %s round: %d (key: %ld)\n",
 615                                (which_heap == new_heap) ? "next" : "this",
 616                                e->dest->index, (long) key);
 617                     }
 618
 619                 }
 620             }
 621
 622           if (best_edge) /* Suitable successor was found.  */
 623             {
 624               if (bb_visited_trace (best_edge->dest) == *n_traces)
 625                 {
 626                   /* We do nothing with one basic block loops.  */
 627                   if (best_edge->dest != bb)
 628                     {
 629                       if (EDGE_FREQUENCY (best_edge)
 630                           > 4 * best_edge->dest->frequency / 5)
 631                         {
 632                           /* The loop has at least 4 iterations.  If the loop
 633                              header is not the first block of the function
 634                              we can rotate the loop.  */
 635
 636                           if (best_edge->dest != ENTRY_BLOCK_PTR->next_bb)
 637                             {
 638                               if (dump_file)
 639                                 {
 640                                   fprintf (dump_file,
 641                                            "Rotating loop %d - %d\n",
 642                                            best_edge->dest->index, bb->index);
 643                                 }
 644                               bb->aux = best_edge->dest;
 645                               bbd[best_edge->dest->index].in_trace =
 646                                                              (*n_traces) - 1;
 647                               bb = rotate_loop (best_edge, trace, *n_traces);
 648                             }
 649                         }
 650                       else
 651                         {
 652                           /* The loop has less than 4 iterations.  */
 653
 654                           if (single_succ_p (bb)
 655                               && copy_bb_p (best_edge->dest,
 656                                             optimize_edge_for_speed_p (best_edge)))
 657                             {
 658                               bb = copy_bb (best_edge->dest, best_edge, bb,
 659                                             *n_traces);
 660                               trace->length++;
 661                             }
 662                         }
 663                     }
 664
 665                   /* Terminate the trace.  */
 666                   break;
 667                 }
 668               else
 669                 {
 670                   /* Check for a situation
 671
 672                     A
 673                    /|
 674                   B |
 675                    \|
 676                     C
 677
 678                   where
 679                   EDGE_FREQUENCY (AB) + EDGE_FREQUENCY (BC)
 680                     >= EDGE_FREQUENCY (AC).
 681                   (i.e. 2 * B->frequency >= EDGE_FREQUENCY (AC) )
 682                   Best ordering is then A B C.
 683
 684                   This situation is created for example by:
 685
 686                   if (A) B;
 687                   C;
 688
 689                   */
 690
 691                   FOR_EACH_EDGE (e, ei, bb->succs)
 692                     if (e != best_edge
 693                         && (e->flags & EDGE_CAN_FALLTHRU)
 694                         && !(e->flags & EDGE_COMPLEX)
 695                         && !bb_visited_trace (e->dest)
 696                         && single_pred_p (e->dest)
 697                         && !(e->flags & EDGE_CROSSING)
 698                         && single_succ_p (e->dest)
 699                         && (single_succ_edge (e->dest)->flags
 700                             & EDGE_CAN_FALLTHRU)
 701                         && !(single_succ_edge (e->dest)->flags & EDGE_COMPLEX)
 702                         && single_succ (e->dest) == best_edge->dest
 703                         && 2 * e->dest->frequency >= EDGE_FREQUENCY (best_edge))
 704                       {
 705                         best_edge = e;
 706                         if (dump_file)
 707                           fprintf (dump_file, "Selecting BB %d\n",
 708                                    best_edge->dest->index);
 709                         break;
 710                       }
 711
 712                   bb->aux = best_edge->dest;
 713                   bbd[best_edge->dest->index].in_trace = (*n_traces) - 1;
 714                   bb = best_edge->dest;
 715                 }
 716             }
 717         }
 718       while (best_edge);
 719       trace->last = bb;
 720       bbd[trace->first->index].start_of_trace = *n_traces - 1;
 721       bbd[trace->last->index].end_of_trace = *n_traces - 1;
 722
 723       /* The trace is terminated so we have to recount the keys in heap
 724          (some block can have a lower key because now one of its predecessors
 725          is an end of the trace).  */
 726       FOR_EACH_EDGE (e, ei, bb->succs)
 727         {
 728           if (e->dest == EXIT_BLOCK_PTR
 729               || bb_visited_trace (e->dest))
 730             continue;
 731
 732           if (bbd[e->dest->index].heap)
 733             {
 734               key = bb_to_key (e->dest);
 735               if (key != bbd[e->dest->index].node->key)
 736                 {
 737                   if (dump_file)
 738                     {
 739                       fprintf (dump_file,
 740                                "Changing key for bb %d from %ld to %ld.\n",
 741                                e->dest->index,
 742                                (long) bbd[e->dest->index].node->key, key);
 743                     }
 744                   fibheap_replace_key (bbd[e->dest->index].heap,
 745                                        bbd[e->dest->index].node,
 746                                        key);
 747                 }
 748             }
 749         }
 750     }
 751
 752   fibheap_delete (*heap);
 753
 754   /* "Return" the new heap.  */
 755   *heap = new_heap;
 756 }
 757
 758 /* Create a duplicate of the basic block OLD_BB and redirect edge E to it, add
 759    it to trace after BB, mark OLD_BB visited and update pass' data structures
 760    (TRACE is a number of trace which OLD_BB is duplicated to).  */
 761
 762 static basic_block
 763 copy_bb (basic_block old_bb, edge e, basic_block bb, int trace)
 764 {
 765   basic_block new_bb;
 766
 767   new_bb = duplicate_block (old_bb, e, bb);
 768   BB_COPY_PARTITION (new_bb, old_bb);
 769
 770   gcc_assert (e->dest == new_bb);
 771
 772   if (dump_file)
 773     fprintf (dump_file,
 774              "Duplicated bb %d (created bb %d)\n",
 775              old_bb->index, new_bb->index);
 776
 777   if (new_bb->index >= array_size || last_basic_block > array_size)
 778     {
 779       int i;
 780       int new_size;
 781
 782       new_size = MAX (last_basic_block, new_bb->index + 1);
 783       new_size = GET_ARRAY_SIZE (new_size);
 784       bbd = XRESIZEVEC (bbro_basic_block_data, bbd, new_size);
 785       for (i = array_size; i < new_size; i++)
 786         {
 787           bbd[i].start_of_trace = -1;
 788           bbd[i].end_of_trace = -1;
 789           bbd[i].in_trace = -1;
 790           bbd[i].visited = 0;
 791           bbd[i].heap = NULL;
 792           bbd[i].node = NULL;
 793         }
 794       array_size = new_size;
 795
 796       if (dump_file)
 797         {
 798           fprintf (dump_file,
 799                    "Growing the dynamic array to %d elements.\n",
 800                    array_size);
 801         }
 802     }
 803
 804   gcc_assert (!bb_visited_trace (e->dest));
 805   mark_bb_visited (new_bb, trace);
 806   new_bb->aux = bb->aux;
 807   bb->aux = new_bb;
 808
 809   bbd[new_bb->index].in_trace = trace;
 810
 811   return new_bb;
 812 }
 813
 814 /* Compute and return the key (for the heap) of the basic block BB.  */
 815
 816 static fibheapkey_t
 817 bb_to_key (basic_block bb)
 818 {
 819   edge e;
 820   edge_iterator ei;
 821   int priority = 0;
 822
 823   /* Do not start in probably never executed blocks.  */
 824
 825   if (BB_PARTITION (bb) == BB_COLD_PARTITION
 826       || probably_never_executed_bb_p (bb))
 827     return BB_FREQ_MAX;
 828
 829   /* Prefer blocks whose predecessor is an end of some trace
 830      or whose predecessor edge is EDGE_DFS_BACK.  */
 831   FOR_EACH_EDGE (e, ei, bb->preds)
 832     {
 833       if ((e->src != ENTRY_BLOCK_PTR && bbd[e->src->index].end_of_trace >= 0)
 834           || (e->flags & EDGE_DFS_BACK))
 835         {
 836           int edge_freq = EDGE_FREQUENCY (e);
 837
 838           if (edge_freq > priority)
 839             priority = edge_freq;
 840         }
 841     }
 842
 843   if (priority)
 844     /* The block with priority should have significantly lower key.  */
 845     return -(100 * BB_FREQ_MAX + 100 * priority + bb->frequency);
 846   return -bb->frequency;
 847 }
 848
 849 /* Return true when the edge E from basic block BB is better than the temporary
 850    best edge (details are in function).  The probability of edge E is PROB. The
 851    frequency of the successor is FREQ.  The current best probability is
 852    BEST_PROB, the best frequency is BEST_FREQ.
 853    The edge is considered to be equivalent when PROB does not differ much from
 854    BEST_PROB; similarly for frequency.  */
 855
 856 static bool
 857 better_edge_p (const_basic_block bb, const_edge e, int prob, int freq, int best_prob,
 858                int best_freq, const_edge cur_best_edge)
 859 {
 860   bool is_better_edge;
 861
 862   /* The BEST_* values do not have to be best, but can be a bit smaller than
 863      maximum values.  */
 864   int diff_prob = best_prob / 10;
 865   int diff_freq = best_freq / 10;
 866
 867   if (prob > best_prob + diff_prob)
 868     /* The edge has higher probability than the temporary best edge.  */
 869     is_better_edge = true;
 870   else if (prob < best_prob - diff_prob)
 871     /* The edge has lower probability than the temporary best edge.  */
 872     is_better_edge = false;
 873   else if (freq < best_freq - diff_freq)
 874     /* The edge and the temporary best edge  have almost equivalent
 875        probabilities.  The higher frequency of a successor now means
 876        that there is another edge going into that successor.
 877        This successor has lower frequency so it is better.  */
 878     is_better_edge = true;
 879   else if (freq > best_freq + diff_freq)
 880     /* This successor has higher frequency so it is worse.  */
 881     is_better_edge = false;
 882   else if (e->dest->prev_bb == bb)
 883     /* The edges have equivalent probabilities and the successors
 884        have equivalent frequencies.  Select the previous successor.  */
 885     is_better_edge = true;
 886   else
 887     is_better_edge = false;
 888
 889   /* If we are doing hot/cold partitioning, make sure that we always favor
 890      non-crossing edges over crossing edges.  */
 891
 892   if (!is_better_edge
 893       && flag_reorder_blocks_and_partition
 894       && cur_best_edge
 895       && (cur_best_edge->flags & EDGE_CROSSING)
 896       && !(e->flags & EDGE_CROSSING))
 897     is_better_edge = true;
 898
 899   return is_better_edge;
 900 }
 901
 902 /* Connect traces in array TRACES, N_TRACES is the count of traces.  */
 903
 904 static void
 905 connect_traces (int n_traces, struct trace *traces)
 906 {
 907   int i;
 908   bool *connected;
 909   bool two_passes;
 910   int last_trace;
 911   int current_pass;
 912   int current_partition;
 913   int freq_threshold;
 914   gcov_type count_threshold;
 915
 916   freq_threshold = max_entry_frequency * DUPLICATION_THRESHOLD / 1000;
 917   if (max_entry_count < INT_MAX / 1000)
 918     count_threshold = max_entry_count * DUPLICATION_THRESHOLD / 1000;
 919   else
 920     count_threshold = max_entry_count / 1000 * DUPLICATION_THRESHOLD;
 921
 922   connected = XCNEWVEC (bool, n_traces);
 923   last_trace = -1;
 924   current_pass = 1;
 925   current_partition = BB_PARTITION (traces[0].first);
 926   two_passes = false;
 927
 928   if (flag_reorder_blocks_and_partition)
 929     for (i = 0; i < n_traces && !two_passes; i++)
 930       if (BB_PARTITION (traces[0].first)
 931           != BB_PARTITION (traces[i].first))
 932         two_passes = true;
 933
 934   for (i = 0; i < n_traces || (two_passes && current_pass == 1) ; i++)
 935     {
 936       int t = i;
 937       int t2;
 938       edge e, best;
 939       int best_len;
 940
 941       if (i >= n_traces)
 942         {
 943           gcc_assert (two_passes && current_pass == 1);
 944           i = 0;
 945           t = i;
 946           current_pass = 2;
 947           if (current_partition == BB_HOT_PARTITION)
 948             current_partition = BB_COLD_PARTITION;
 949           else
 950             current_partition = BB_HOT_PARTITION;
 951         }
 952
 953       if (connected[t])
 954         continue;
 955
 956       if (two_passes
 957           && BB_PARTITION (traces[t].first) != current_partition)
 958         continue;
 959
 960       connected[t] = true;
 961
 962       /* Find the predecessor traces.  */
 963       for (t2 = t; t2 > 0;)
 964         {
 965           edge_iterator ei;
 966           best = NULL;
 967           best_len = 0;
 968           FOR_EACH_EDGE (e, ei, traces[t2].first->preds)
 969             {
 970               int si = e->src->index;
 971
 972               if (e->src != ENTRY_BLOCK_PTR
 973                   && (e->flags & EDGE_CAN_FALLTHRU)
 974                   && !(e->flags & EDGE_COMPLEX)
 975                   && bbd[si].end_of_trace >= 0
 976                   && !connected[bbd[si].end_of_trace]
 977                   && (BB_PARTITION (e->src) == current_partition)
 978                   && (!best
 979                       || e->probability > best->probability
 980                       || (e->probability == best->probability
 981                           && traces[bbd[si].end_of_trace].length > best_len)))
 982                 {
 983                   best = e;
 984                   best_len = traces[bbd[si].end_of_trace].length;
 985                 }
 986             }
 987           if (best)
 988             {
 989               best->src->aux = best->dest;
 990               t2 = bbd[best->src->index].end_of_trace;
 991               connected[t2] = true;
 992
 993               if (dump_file)
 994                 {
 995                   fprintf (dump_file, "Connection: %d %d\n",
 996                            best->src->index, best->dest->index);
 997                 }
 998             }
 999           else
1000             break;
1001         }
1002
1003       if (last_trace >= 0)
1004         traces[last_trace].last->aux = traces[t2].first;
1005       last_trace = t;
1006
1007       /* Find the successor traces.  */
1008       while (1)
1009         {
1010           /* Find the continuation of the chain.  */
1011           edge_iterator ei;
1012           best = NULL;
1013           best_len = 0;
1014           FOR_EACH_EDGE (e, ei, traces[t].last->succs)
1015             {
1016               int di = e->dest->index;
1017
1018               if (e->dest != EXIT_BLOCK_PTR
1019                   && (e->flags & EDGE_CAN_FALLTHRU)
1020                   && !(e->flags & EDGE_COMPLEX)
1021                   && bbd[di].start_of_trace >= 0
1022                   && !connected[bbd[di].start_of_trace]
1023                   && (BB_PARTITION (e->dest) == current_partition)
1024                   && (!best
1025                       || e->probability > best->probability
1026                       || (e->probability == best->probability
1027                           && traces[bbd[di].start_of_trace].length > best_len)))
1028                 {
1029                   best = e;
1030                   best_len = traces[bbd[di].start_of_trace].length;
1031                 }
1032             }
1033
1034           if (best)
1035             {
1036               if (dump_file)
1037                 {
1038                   fprintf (dump_file, "Connection: %d %d\n",
1039                            best->src->index, best->dest->index);
1040                 }
1041               t = bbd[best->dest->index].start_of_trace;
1042               traces[last_trace].last->aux = traces[t].first;
1043               connected[t] = true;
1044               last_trace = t;
1045             }
1046           else
1047             {
1048               /* Try to connect the traces by duplication of 1 block.  */
1049               edge e2;
1050               basic_block next_bb = NULL;
1051               bool try_copy = false;
1052
1053               FOR_EACH_EDGE (e, ei, traces[t].last->succs)
1054                 if (e->dest != EXIT_BLOCK_PTR
1055                     && (e->flags & EDGE_CAN_FALLTHRU)
1056                     && !(e->flags & EDGE_COMPLEX)
1057                     && (!best || e->probability > best->probability))
1058                   {
1059                     edge_iterator ei;
1060                     edge best2 = NULL;
1061                     int best2_len = 0;
1062
1063                     /* If the destination is a start of a trace which is only
1064                        one block long, then no need to search the successor
1065                        blocks of the trace.  Accept it.  */
1066                     if (bbd[e->dest->index].start_of_trace >= 0
1067                         && traces[bbd[e->dest->index].start_of_trace].length
1068                            == 1)
1069                       {
1070                         best = e;
1071                         try_copy = true;
1072                         continue;
1073                       }
1074
1075                     FOR_EACH_EDGE (e2, ei, e->dest->succs)
1076                       {
1077                         int di = e2->dest->index;
1078
1079                         if (e2->dest == EXIT_BLOCK_PTR
1080                             || ((e2->flags & EDGE_CAN_FALLTHRU)
1081                                 && !(e2->flags & EDGE_COMPLEX)
1082                                 && bbd[di].start_of_trace >= 0
1083                                 && !connected[bbd[di].start_of_trace]
1084                                 && (BB_PARTITION (e2->dest) == current_partition)
1085                                 && (EDGE_FREQUENCY (e2) >= freq_threshold)
1086                                 && (e2->count >= count_threshold)
1087                                 && (!best2
1088                                     || e2->probability > best2->probability
1089                                     || (e2->probability == best2->probability
1090                                         && traces[bbd[di].start_of_trace].length
1091                                            > best2_len))))
1092                           {
1093                             best = e;
1094                             best2 = e2;
1095                             if (e2->dest != EXIT_BLOCK_PTR)
1096                               best2_len = traces[bbd[di].start_of_trace].length;
1097                             else
1098                               best2_len = INT_MAX;
1099                             next_bb = e2->dest;
1100                             try_copy = true;
1101                           }
1102                       }
1103                   }
1104
1105               if (flag_reorder_blocks_and_partition)
1106                 try_copy = false;
1107
1108               /* Copy tiny blocks always; copy larger blocks only when the
1109                  edge is traversed frequently enough.  */
1110               if (try_copy
1111                   && copy_bb_p (best->dest,
1112                                 optimize_edge_for_speed_p (best)
1113                                 && EDGE_FREQUENCY (best) >= freq_threshold
1114                                 && best->count >= count_threshold))
1115                 {
1116                   basic_block new_bb;
1117
1118                   if (dump_file)
1119                     {
1120                       fprintf (dump_file, "Connection: %d %d ",
1121                                traces[t].last->index, best->dest->index);
1122                       if (!next_bb)
1123                         fputc ('\n', dump_file);
1124                       else if (next_bb == EXIT_BLOCK_PTR)
1125                         fprintf (dump_file, "exit\n");
1126                       else
1127                         fprintf (dump_file, "%d\n", next_bb->index);
1128                     }
1129
1130                   new_bb = copy_bb (best->dest, best, traces[t].last, t);
1131                   traces[t].last = new_bb;
1132                   if (next_bb && next_bb != EXIT_BLOCK_PTR)
1133                     {
1134                       t = bbd[next_bb->index].start_of_trace;
1135                       traces[last_trace].last->aux = traces[t].first;
1136                       connected[t] = true;
1137                       last_trace = t;
1138                     }
1139                   else
1140                     break;      /* Stop finding the successor traces.  */
1141                 }
1142               else
1143                 break;  /* Stop finding the successor traces.  */
1144             }
1145         }
1146     }
1147
1148   if (dump_file)
1149     {
1150       basic_block bb;
1151
1152       fprintf (dump_file, "Final order:\n");
1153       for (bb = traces[0].first; bb; bb = (basic_block) bb->aux)
1154         fprintf (dump_file, "%d ", bb->index);
1155       fprintf (dump_file, "\n");
1156       fflush (dump_file);
1157     }
1158
1159   FREE (connected);
1160 }
1161
1162 /* Return true when BB can and should be copied. CODE_MAY_GROW is true
1163    when code size is allowed to grow by duplication.  */
1164
1165 static bool
1166 copy_bb_p (const_basic_block bb, int code_may_grow)
1167 {
1168   int size = 0;
1169   int max_size = uncond_jump_length;
1170   rtx insn;
1171
1172   if (!bb->frequency)
1173     return false;
1174   if (EDGE_COUNT (bb->preds) < 2)
1175     return false;
1176   if (!can_duplicate_block_p (bb))
1177     return false;
1178
1179   /* Avoid duplicating blocks which have many successors (PR/13430).  */
1180   if (EDGE_COUNT (bb->succs) > 8)
1181     return false;
1182
1183   if (code_may_grow && optimize_bb_for_speed_p (bb))
1184     max_size *= PARAM_VALUE (PARAM_MAX_GROW_COPY_BB_INSNS);
1185
1186   FOR_BB_INSNS (bb, insn)
1187     {
1188       if (INSN_P (insn))
1189         size += get_attr_min_length (insn);
1190     }
1191
1192   if (size <= max_size)
1193     return true;
1194
1195   if (dump_file)
1196     {
1197       fprintf (dump_file,
1198                "Block %d can't be copied because its size = %d.\n",
1199                bb->index, size);
1200     }
1201
1202   return false;
1203 }
1204
1205 /* Return the length of unconditional jump instruction.  */
1206
1207 int
1208 get_uncond_jump_length (void)
1209 {
1210   rtx label, jump;
1211   int length;
1212
1213   label = emit_label_before (gen_label_rtx (), get_insns ());
1214   jump = emit_jump_insn (gen_jump (label));
1215
1216   length = get_attr_min_length (jump);
1217
1218   delete_insn (jump);
1219   delete_insn (label);
1220   return length;
1221 }
1222
1223 /* Emit a barrier into the footer of BB.  */
1224
1225 static void
1226 emit_barrier_after_bb (basic_block bb)
1227 {
1228   rtx barrier = emit_barrier_after (BB_END (bb));
1229   BB_FOOTER (bb) = unlink_insn_chain (barrier, barrier);
1230 }
1231
1232 /* The landing pad OLD_LP, in block OLD_BB, has edges from both partitions.
1233    Duplicate the landing pad and split the edges so that no EH edge
1234    crosses partitions.  */
1235
1236 static void
1237 fix_up_crossing_landing_pad (eh_landing_pad old_lp, basic_block old_bb)
1238 {
1239   eh_landing_pad new_lp;
1240   basic_block new_bb, last_bb, post_bb;
1241   rtx new_label, jump, post_label;
1242   unsigned new_partition;
1243   edge_iterator ei;
1244   edge e;
1245
1246   /* Generate the new landing-pad structure.  */
1247   new_lp = gen_eh_landing_pad (old_lp->region);
1248   new_lp->post_landing_pad = old_lp->post_landing_pad;
1249   new_lp->landing_pad = gen_label_rtx ();
1250   LABEL_PRESERVE_P (new_lp->landing_pad) = 1;
1251
1252   /* Put appropriate instructions in new bb.  */
1253   new_label = emit_label (new_lp->landing_pad);
1254
1255   expand_dw2_landing_pad_for_region (old_lp->region);
1256
1257   post_bb = BLOCK_FOR_INSN (old_lp->landing_pad);
1258   post_bb = single_succ (post_bb);
1259   post_label = block_label (post_bb);
1260   jump = emit_jump_insn (gen_jump (post_label));
1261   JUMP_LABEL (jump) = post_label;
1262
1263   /* Create new basic block to be dest for lp.  */
1264   last_bb = EXIT_BLOCK_PTR->prev_bb;
1265   new_bb = create_basic_block (new_label, jump, last_bb);
1266   new_bb->aux = last_bb->aux;
1267   last_bb->aux = new_bb;
1268
1269   emit_barrier_after_bb (new_bb);
1270
1271   make_edge (new_bb, post_bb, 0);
1272
1273   /* Make sure new bb is in the other partition.  */
1274   new_partition = BB_PARTITION (old_bb);
1275   new_partition ^= BB_HOT_PARTITION | BB_COLD_PARTITION;
1276   BB_SET_PARTITION (new_bb, new_partition);
1277
1278   /* Fix up the edges.  */
1279   for (ei = ei_start (old_bb->preds); (e = ei_safe_edge (ei)) != NULL; )
1280     if (BB_PARTITION (e->src) == new_partition)
1281       {
1282         rtx insn = BB_END (e->src);
1283         rtx note = find_reg_note (insn, REG_EH_REGION, NULL_RTX);
1284
1285         gcc_assert (note != NULL);
1286         gcc_checking_assert (INTVAL (XEXP (note, 0)) == old_lp->index);
1287         XEXP (note, 0) = GEN_INT (new_lp->index);
1288
1289         /* Adjust the edge to the new destination.  */
1290         redirect_edge_succ (e, new_bb);
1291       }
1292     else
1293       ei_next (&ei);
1294 }
1295
1296 /* Find the basic blocks that are rarely executed and need to be moved to
1297    a separate section of the .o file (to cut down on paging and improve
1298    cache locality).  Return a vector of all edges that cross.  */
1299
1300 static VEC(edge, heap) *
1301 find_rarely_executed_basic_blocks_and_crossing_edges (void)
1302 {
1303   VEC(edge, heap) *crossing_edges = NULL;
1304   basic_block bb;
1305   edge e;
1306   edge_iterator ei;
1307
1308   /* Mark which partition (hot/cold) each basic block belongs in.  */
1309   FOR_EACH_BB (bb)
1310     {
1311       if (probably_never_executed_bb_p (bb))
1312         BB_SET_PARTITION (bb, BB_COLD_PARTITION);
1313       else
1314         BB_SET_PARTITION (bb, BB_HOT_PARTITION);
1315     }
1316
1317   /* The format of .gcc_except_table does not allow landing pads to
1318      be in a different partition as the throw.  Fix this by either
1319      moving or duplicating the landing pads.  */
1320   if (cfun->eh->lp_array)
1321     {
1322       unsigned i;
1323       eh_landing_pad lp;
1324
1325       FOR_EACH_VEC_ELT (eh_landing_pad, cfun->eh->lp_array, i, lp)
1326         {
1327           bool all_same, all_diff;
1328
1329           if (lp == NULL
1330               || lp->landing_pad == NULL_RTX
1331               || !LABEL_P (lp->landing_pad))
1332             continue;
1333
1334           all_same = all_diff = true;
1335           bb = BLOCK_FOR_INSN (lp->landing_pad);
1336           FOR_EACH_EDGE (e, ei, bb->preds)
1337             {
1338               gcc_assert (e->flags & EDGE_EH);
1339               if (BB_PARTITION (bb) == BB_PARTITION (e->src))
1340                 all_diff = false;
1341               else
1342                 all_same = false;
1343             }
1344
1345           if (all_same)
1346             ;
1347           else if (all_diff)
1348             {
1349               int which = BB_PARTITION (bb);
1350               which ^= BB_HOT_PARTITION | BB_COLD_PARTITION;
1351               BB_SET_PARTITION (bb, which);
1352             }
1353           else
1354             fix_up_crossing_landing_pad (lp, bb);
1355         }
1356     }
1357
1358   /* Mark every edge that crosses between sections.  */
1359
1360   FOR_EACH_BB (bb)
1361     FOR_EACH_EDGE (e, ei, bb->succs)
1362       {
1363         unsigned int flags = e->flags;
1364
1365         /* We should never have EDGE_CROSSING set yet.  */
1366         gcc_checking_assert ((flags & EDGE_CROSSING) == 0);
1367
1368         if (e->src != ENTRY_BLOCK_PTR
1369             && e->dest != EXIT_BLOCK_PTR
1370             && BB_PARTITION (e->src) != BB_PARTITION (e->dest))
1371           {
1372             VEC_safe_push (edge, heap, crossing_edges, e);
1373             flags |= EDGE_CROSSING;
1374           }
1375
1376         /* Now that we've split eh edges as appropriate, allow landing pads
1377            to be merged with the post-landing pads.  */
1378         flags &= ~EDGE_PRESERVE;
1379
1380         e->flags = flags;
1381       }
1382
1383   return crossing_edges;
1384 }
1385
1386 /* Set the flag EDGE_CAN_FALLTHRU for edges that can be fallthru.  */
1387
1388 static void
1389 set_edge_can_fallthru_flag (void)
1390 {
1391   basic_block bb;
1392
1393   FOR_EACH_BB (bb)
1394     {
1395       edge e;
1396       edge_iterator ei;
1397
1398       FOR_EACH_EDGE (e, ei, bb->succs)
1399         {
1400           e->flags &= ~EDGE_CAN_FALLTHRU;
1401
1402           /* The FALLTHRU edge is also CAN_FALLTHRU edge.  */
1403           if (e->flags & EDGE_FALLTHRU)
1404             e->flags |= EDGE_CAN_FALLTHRU;
1405         }
1406
1407       /* If the BB ends with an invertible condjump all (2) edges are
1408          CAN_FALLTHRU edges.  */
1409       if (EDGE_COUNT (bb->succs) != 2)
1410         continue;
1411       if (!any_condjump_p (BB_END (bb)))
1412         continue;
1413       if (!invert_jump (BB_END (bb), JUMP_LABEL (BB_END (bb)), 0))
1414         continue;
1415       invert_jump (BB_END (bb), JUMP_LABEL (BB_END (bb)), 0);
1416       EDGE_SUCC (bb, 0)->flags |= EDGE_CAN_FALLTHRU;
1417       EDGE_SUCC (bb, 1)->flags |= EDGE_CAN_FALLTHRU;
1418     }
1419 }
1420
1421 /* If any destination of a crossing edge does not have a label, add label;
1422    Convert any easy fall-through crossing edges to unconditional jumps.  */
1423
1424 static void
1425 add_labels_and_missing_jumps (VEC(edge, heap) *crossing_edges)
1426 {
1427   size_t i;
1428   edge e;
1429
1430   FOR_EACH_VEC_ELT (edge, crossing_edges, i, e)
1431     {
1432       basic_block src = e->src;
1433       basic_block dest = e->dest;
1434       rtx label, new_jump;
1435
1436       if (dest == EXIT_BLOCK_PTR)
1437         continue;
1438
1439       /* Make sure dest has a label.  */
1440       label = block_label (dest);
1441
1442       /* Nothing to do for non-fallthru edges.  */
1443       if (src == ENTRY_BLOCK_PTR)
1444         continue;
1445       if ((e->flags & EDGE_FALLTHRU) == 0)
1446         continue;
1447
1448       /* If the block does not end with a control flow insn, then we
1449          can trivially add a jump to the end to fixup the crossing.
1450          Otherwise the jump will have to go in a new bb, which will
1451          be handled by fix_up_fall_thru_edges function.  */
1452       if (control_flow_insn_p (BB_END (src)))
1453         continue;
1454
1455       /* Make sure there's only one successor.  */
1456       gcc_assert (single_succ_p (src));
1457
1458       new_jump = emit_jump_insn_after (gen_jump (label), BB_END (src));
1459       BB_END (src) = new_jump;
1460       JUMP_LABEL (new_jump) = label;
1461       LABEL_NUSES (label) += 1;
1462
1463       emit_barrier_after_bb (src);
1464
1465       /* Mark edge as non-fallthru.  */
1466       e->flags &= ~EDGE_FALLTHRU;
1467     }
1468 }
1469
1470 /* Find any bb's where the fall-through edge is a crossing edge (note that
1471    these bb's must also contain a conditional jump or end with a call
1472    instruction; we've already dealt with fall-through edges for blocks
1473    that didn't have a conditional jump or didn't end with call instruction
1474    in the call to add_labels_and_missing_jumps).  Convert the fall-through
1475    edge to non-crossing edge by inserting a new bb to fall-through into.
1476    The new bb will contain an unconditional jump (crossing edge) to the
1477    original fall through destination.  */
1478
1479 static void
1480 fix_up_fall_thru_edges (void)
1481 {
1482   basic_block cur_bb;
1483   basic_block new_bb;
1484   edge succ1;
1485   edge succ2;
1486   edge fall_thru;
1487   edge cond_jump = NULL;
1488   edge e;
1489   bool cond_jump_crosses;
1490   int invert_worked;
1491   rtx old_jump;
1492   rtx fall_thru_label;
1493
1494   FOR_EACH_BB (cur_bb)
1495     {
1496       fall_thru = NULL;
1497       if (EDGE_COUNT (cur_bb->succs) > 0)
1498         succ1 = EDGE_SUCC (cur_bb, 0);
1499       else
1500         succ1 = NULL;
1501
1502       if (EDGE_COUNT (cur_bb->succs) > 1)
1503         succ2 = EDGE_SUCC (cur_bb, 1);
1504       else
1505         succ2 = NULL;
1506
1507       /* Find the fall-through edge.  */
1508
1509       if (succ1
1510           && (succ1->flags & EDGE_FALLTHRU))
1511         {
1512           fall_thru = succ1;
1513           cond_jump = succ2;
1514         }
1515       else if (succ2
1516                && (succ2->flags & EDGE_FALLTHRU))
1517         {
1518           fall_thru = succ2;
1519           cond_jump = succ1;
1520         }
1521       else if (succ1
1522                && (block_ends_with_call_p (cur_bb)
1523                    || can_throw_internal (BB_END (cur_bb))))
1524         {
1525           edge e;
1526           edge_iterator ei;
1527
1528           /* Find EDGE_CAN_FALLTHRU edge.  */
1529           FOR_EACH_EDGE (e, ei, cur_bb->succs)
1530             if (e->flags & EDGE_CAN_FALLTHRU)
1531               {
1532                 fall_thru = e;
1533                 break;
1534               }
1535         }
1536
1537       if (fall_thru && (fall_thru->dest != EXIT_BLOCK_PTR))
1538         {
1539           /* Check to see if the fall-thru edge is a crossing edge.  */
1540
1541           if (fall_thru->flags & EDGE_CROSSING)
1542             {
1543               /* The fall_thru edge crosses; now check the cond jump edge, if
1544                  it exists.  */
1545
1546               cond_jump_crosses = true;
1547               invert_worked  = 0;
1548               old_jump = BB_END (cur_bb);
1549
1550               /* Find the jump instruction, if there is one.  */
1551
1552               if (cond_jump)
1553                 {
1554                   if (!(cond_jump->flags & EDGE_CROSSING))
1555                     cond_jump_crosses = false;
1556
1557                   /* We know the fall-thru edge crosses; if the cond
1558                      jump edge does NOT cross, and its destination is the
1559                      next block in the bb order, invert the jump
1560                      (i.e. fix it so the fall through does not cross and
1561                      the cond jump does).  */
1562
1563                   if (!cond_jump_crosses
1564                       && cur_bb->aux == cond_jump->dest)
1565                     {
1566                       /* Find label in fall_thru block. We've already added
1567                          any missing labels, so there must be one.  */
1568
1569                       fall_thru_label = block_label (fall_thru->dest);
1570
1571                       if (old_jump && JUMP_P (old_jump) && fall_thru_label)
1572                         invert_worked = invert_jump (old_jump,
1573                                                      fall_thru_label,0);
1574                       if (invert_worked)
1575                         {
1576                           fall_thru->flags &= ~EDGE_FALLTHRU;
1577                           cond_jump->flags |= EDGE_FALLTHRU;
1578                           update_br_prob_note (cur_bb);
1579                           e = fall_thru;
1580                           fall_thru = cond_jump;
1581                           cond_jump = e;
1582                           cond_jump->flags |= EDGE_CROSSING;
1583                           fall_thru->flags &= ~EDGE_CROSSING;
1584                         }
1585                     }
1586                 }
1587
1588               if (cond_jump_crosses || !invert_worked)
1589                 {
1590                   /* This is the case where both edges out of the basic
1591                      block are crossing edges. Here we will fix up the
1592                      fall through edge. The jump edge will be taken care
1593                      of later.  The EDGE_CROSSING flag of fall_thru edge
1594                      is unset before the call to force_nonfallthru
1595                      function because if a new basic-block is created
1596                      this edge remains in the current section boundary
1597                      while the edge between new_bb and the fall_thru->dest
1598                      becomes EDGE_CROSSING.  */
1599
1600                   fall_thru->flags &= ~EDGE_CROSSING;
1601                   new_bb = force_nonfallthru (fall_thru);
1602
1603                   if (new_bb)
1604                     {
1605                       new_bb->aux = cur_bb->aux;
1606                       cur_bb->aux = new_bb;
1607
1608                       /* Make sure new fall-through bb is in same
1609                          partition as bb it's falling through from.  */
1610
1611                       BB_COPY_PARTITION (new_bb, cur_bb);
1612                       single_succ_edge (new_bb)->flags |= EDGE_CROSSING;
1613                     }
1614                   else
1615                     {
1616                       /* If a new basic-block was not created; restore
1617                          the EDGE_CROSSING flag.  */
1618                       fall_thru->flags |= EDGE_CROSSING;
1619                     }
1620
1621                   /* Add barrier after new jump */
1622                   emit_barrier_after_bb (new_bb ? new_bb : cur_bb);
1623                 }
1624             }
1625         }
1626     }
1627 }
1628
1629 /* This function checks the destination block of a "crossing jump" to
1630    see if it has any crossing predecessors that begin with a code label
1631    and end with an unconditional jump.  If so, it returns that predecessor
1632    block.  (This is to avoid creating lots of new basic blocks that all
1633    contain unconditional jumps to the same destination).  */
1634
1635 static basic_block
1636 find_jump_block (basic_block jump_dest)
1637 {
1638   basic_block source_bb = NULL;
1639   edge e;
1640   rtx insn;
1641   edge_iterator ei;
1642
1643   FOR_EACH_EDGE (e, ei, jump_dest->preds)
1644     if (e->flags & EDGE_CROSSING)
1645       {
1646         basic_block src = e->src;
1647
1648         /* Check each predecessor to see if it has a label, and contains
1649            only one executable instruction, which is an unconditional jump.
1650            If so, we can use it.  */
1651
1652         if (LABEL_P (BB_HEAD (src)))
1653           for (insn = BB_HEAD (src);
1654                !INSN_P (insn) && insn != NEXT_INSN (BB_END (src));
1655                insn = NEXT_INSN (insn))
1656             {
1657               if (INSN_P (insn)
1658                   && insn == BB_END (src)
1659                   && JUMP_P (insn)
1660                   && !any_condjump_p (insn))
1661                 {
1662                   source_bb = src;
1663                   break;
1664                 }
1665             }
1666
1667         if (source_bb)
1668           break;
1669       }
1670
1671   return source_bb;
1672 }
1673
1674 /* Find all BB's with conditional jumps that are crossing edges;
1675    insert a new bb and make the conditional jump branch to the new
1676    bb instead (make the new bb same color so conditional branch won't
1677    be a 'crossing' edge).  Insert an unconditional jump from the
1678    new bb to the original destination of the conditional jump.  */
1679
1680 static void
1681 fix_crossing_conditional_branches (void)
1682 {
1683   basic_block cur_bb;
1684   basic_block new_bb;
1685   basic_block dest;
1686   edge succ1;
1687   edge succ2;
1688   edge crossing_edge;
1689   edge new_edge;
1690   rtx old_jump;
1691   rtx set_src;
1692   rtx old_label = NULL_RTX;
1693   rtx new_label;
1694
1695   FOR_EACH_BB (cur_bb)
1696     {
1697       crossing_edge = NULL;
1698       if (EDGE_COUNT (cur_bb->succs) > 0)
1699         succ1 = EDGE_SUCC (cur_bb, 0);
1700       else
1701         succ1 = NULL;
1702
1703       if (EDGE_COUNT (cur_bb->succs) > 1)
1704         succ2 = EDGE_SUCC (cur_bb, 1);
1705       else
1706         succ2 = NULL;
1707
1708       /* We already took care of fall-through edges, so only one successor
1709          can be a crossing edge.  */
1710
1711       if (succ1 && (succ1->flags & EDGE_CROSSING))
1712         crossing_edge = succ1;
1713       else if (succ2 && (succ2->flags & EDGE_CROSSING))
1714         crossing_edge = succ2;
1715
1716       if (crossing_edge)
1717         {
1718           old_jump = BB_END (cur_bb);
1719
1720           /* Check to make sure the jump instruction is a
1721              conditional jump.  */
1722
1723           set_src = NULL_RTX;
1724
1725           if (any_condjump_p (old_jump))
1726             {
1727               if (GET_CODE (PATTERN (old_jump)) == SET)
1728                 set_src = SET_SRC (PATTERN (old_jump));
1729               else if (GET_CODE (PATTERN (old_jump)) == PARALLEL)
1730                 {
1731                   set_src = XVECEXP (PATTERN (old_jump), 0,0);
1732                   if (GET_CODE (set_src) == SET)
1733                     set_src = SET_SRC (set_src);
1734                   else
1735                     set_src = NULL_RTX;
1736                 }
1737             }
1738
1739           if (set_src && (GET_CODE (set_src) == IF_THEN_ELSE))
1740             {
1741               if (GET_CODE (XEXP (set_src, 1)) == PC)
1742                 old_label = XEXP (set_src, 2);
1743               else if (GET_CODE (XEXP (set_src, 2)) == PC)
1744                 old_label = XEXP (set_src, 1);
1745
1746               /* Check to see if new bb for jumping to that dest has
1747                  already been created; if so, use it; if not, create
1748                  a new one.  */
1749
1750               new_bb = find_jump_block (crossing_edge->dest);
1751
1752               if (new_bb)
1753                 new_label = block_label (new_bb);
1754               else
1755                 {
1756                   basic_block last_bb;
1757                   rtx new_jump;
1758
1759                   /* Create new basic block to be dest for
1760                      conditional jump.  */
1761
1762                   /* Put appropriate instructions in new bb.  */
1763
1764                   new_label = gen_label_rtx ();
1765                   emit_label (new_label);
1766
1767                   gcc_assert (GET_CODE (old_label) == LABEL_REF);
1768                   old_label = JUMP_LABEL (old_jump);
1769                   new_jump = emit_jump_insn (gen_jump (old_label));
1770                   JUMP_LABEL (new_jump) = old_label;
1771
1772                   last_bb = EXIT_BLOCK_PTR->prev_bb;
1773                   new_bb = create_basic_block (new_label, new_jump, last_bb);
1774                   new_bb->aux = last_bb->aux;
1775                   last_bb->aux = new_bb;
1776
1777                   emit_barrier_after_bb (new_bb);
1778
1779                   /* Make sure new bb is in same partition as source
1780                      of conditional branch.  */
1781                   BB_COPY_PARTITION (new_bb, cur_bb);
1782                 }
1783
1784               /* Make old jump branch to new bb.  */
1785
1786               redirect_jump (old_jump, new_label, 0);
1787
1788               /* Remove crossing_edge as predecessor of 'dest'.  */
1789
1790               dest = crossing_edge->dest;
1791
1792               redirect_edge_succ (crossing_edge, new_bb);
1793
1794               /* Make a new edge from new_bb to old dest; new edge
1795                  will be a successor for new_bb and a predecessor
1796                  for 'dest'.  */
1797
1798               if (EDGE_COUNT (new_bb->succs) == 0)
1799                 new_edge = make_edge (new_bb, dest, 0);
1800               else
1801                 new_edge = EDGE_SUCC (new_bb, 0);
1802
1803               crossing_edge->flags &= ~EDGE_CROSSING;
1804               new_edge->flags |= EDGE_CROSSING;
1805             }
1806         }
1807     }
1808 }
1809
1810 /* Find any unconditional branches that cross between hot and cold
1811    sections.  Convert them into indirect jumps instead.  */
1812
1813 static void
1814 fix_crossing_unconditional_branches (void)
1815 {
1816   basic_block cur_bb;
1817   rtx last_insn;
1818   rtx label;
1819   rtx label_addr;
1820   rtx indirect_jump_sequence;
1821   rtx jump_insn = NULL_RTX;
1822   rtx new_reg;
1823   rtx cur_insn;
1824   edge succ;
1825
1826   FOR_EACH_BB (cur_bb)
1827     {
1828       last_insn = BB_END (cur_bb);
1829
1830       if (EDGE_COUNT (cur_bb->succs) < 1)
1831         continue;
1832
1833       succ = EDGE_SUCC (cur_bb, 0);
1834
1835       /* Check to see if bb ends in a crossing (unconditional) jump.  At
1836          this point, no crossing jumps should be conditional.  */
1837
1838       if (JUMP_P (last_insn)
1839           && (succ->flags & EDGE_CROSSING))
1840         {
1841           rtx label2, table;
1842
1843           gcc_assert (!any_condjump_p (last_insn));
1844
1845           /* Make sure the jump is not already an indirect or table jump.  */
1846
1847           if (!computed_jump_p (last_insn)
1848               && !tablejump_p (last_insn, &label2, &table))
1849             {
1850               /* We have found a "crossing" unconditional branch.  Now
1851                  we must convert it to an indirect jump.  First create
1852                  reference of label, as target for jump.  */
1853
1854               label = JUMP_LABEL (last_insn);
1855               label_addr = gen_rtx_LABEL_REF (Pmode, label);
1856               LABEL_NUSES (label) += 1;
1857
1858               /* Get a register to use for the indirect jump.  */
1859
1860               new_reg = gen_reg_rtx (Pmode);
1861
1862               /* Generate indirect the jump sequence.  */
1863
1864               start_sequence ();
1865               emit_move_insn (new_reg, label_addr);
1866               emit_indirect_jump (new_reg);
1867               indirect_jump_sequence = get_insns ();
1868               end_sequence ();
1869
1870               /* Make sure every instruction in the new jump sequence has
1871                  its basic block set to be cur_bb.  */
1872
1873               for (cur_insn = indirect_jump_sequence; cur_insn;
1874                    cur_insn = NEXT_INSN (cur_insn))
1875                 {
1876                   if (!BARRIER_P (cur_insn))
1877                     BLOCK_FOR_INSN (cur_insn) = cur_bb;
1878                   if (JUMP_P (cur_insn))
1879                     jump_insn = cur_insn;
1880                 }
1881
1882               /* Insert the new (indirect) jump sequence immediately before
1883                  the unconditional jump, then delete the unconditional jump.  */
1884
1885               emit_insn_before (indirect_jump_sequence, last_insn);
1886               delete_insn (last_insn);
1887
1888               /* Make BB_END for cur_bb be the jump instruction (NOT the
1889                  barrier instruction at the end of the sequence...).  */
1890
1891               BB_END (cur_bb) = jump_insn;
1892             }
1893         }
1894     }
1895 }
1896
1897 /* Add REG_CROSSING_JUMP note to all crossing jump insns.  */
1898
1899 static void
1900 add_reg_crossing_jump_notes (void)
1901 {
1902   basic_block bb;
1903   edge e;
1904   edge_iterator ei;
1905
1906   FOR_EACH_BB (bb)
1907     FOR_EACH_EDGE (e, ei, bb->succs)
1908       if ((e->flags & EDGE_CROSSING)
1909           && JUMP_P (BB_END (e->src)))
1910         add_reg_note (BB_END (e->src), REG_CROSSING_JUMP, NULL_RTX);
1911 }
1912
1913 /* Verify, in the basic block chain, that there is at most one switch
1914    between hot/cold partitions. This is modelled on
1915    rtl_verify_flow_info_1, but it cannot go inside that function
1916    because this condition will not be true until after
1917    reorder_basic_blocks is called.  */
1918
1919 static void
1920 verify_hot_cold_block_grouping (void)
1921 {
1922   basic_block bb;
1923   int err = 0;
1924   bool switched_sections = false;
1925   int current_partition = 0;
1926
1927   FOR_EACH_BB (bb)
1928     {
1929       if (!current_partition)
1930         current_partition = BB_PARTITION (bb);
1931       if (BB_PARTITION (bb) != current_partition)
1932         {
1933           if (switched_sections)
1934             {
1935               error ("multiple hot/cold transitions found (bb %i)",
1936                      bb->index);
1937               err = 1;
1938             }
1939           else
1940             {
1941               switched_sections = true;
1942               current_partition = BB_PARTITION (bb);
1943             }
1944         }
1945     }
1946
1947   gcc_assert(!err);
1948 }
1949
1950 /* Reorder basic blocks.  The main entry point to this file.  FLAGS is
1951    the set of flags to pass to cfg_layout_initialize().  */
1952
1953 static void
1954 reorder_basic_blocks (void)
1955 {
1956   int n_traces;
1957   int i;
1958   struct trace *traces;
1959
1960   gcc_assert (current_ir_type () == IR_RTL_CFGLAYOUT);
1961
1962   if (n_basic_blocks <= NUM_FIXED_BLOCKS + 1)
1963     return;
1964
1965   set_edge_can_fallthru_flag ();
1966   mark_dfs_back_edges ();
1967
1968   /* We are estimating the length of uncond jump insn only once since the code
1969      for getting the insn length always returns the minimal length now.  */
1970   if (uncond_jump_length == 0)
1971     uncond_jump_length = get_uncond_jump_length ();
1972
1973   /* We need to know some information for each basic block.  */
1974   array_size = GET_ARRAY_SIZE (last_basic_block);
1975   bbd = XNEWVEC (bbro_basic_block_data, array_size);
1976   for (i = 0; i < array_size; i++)
1977     {
1978       bbd[i].start_of_trace = -1;
1979       bbd[i].end_of_trace = -1;
1980       bbd[i].in_trace = -1;
1981       bbd[i].visited = 0;
1982       bbd[i].heap = NULL;
1983       bbd[i].node = NULL;
1984     }
1985
1986   traces = XNEWVEC (struct trace, n_basic_blocks);
1987   n_traces = 0;
1988   find_traces (&n_traces, traces);
1989   connect_traces (n_traces, traces);
1990   FREE (traces);
1991   FREE (bbd);
1992
1993   relink_block_chain (/*stay_in_cfglayout_mode=*/true);
1994
1995   if (dump_file)
1996     {
1997       if (dump_flags & TDF_DETAILS)
1998         dump_reg_info (dump_file);
1999       dump_flow_info (dump_file, dump_flags);
2000     }
2001
2002   if (flag_reorder_blocks_and_partition)
2003     verify_hot_cold_block_grouping ();
2004 }
2005
2006 /* Determine which partition the first basic block in the function
2007    belongs to, then find the first basic block in the current function
2008    that belongs to a different section, and insert a
2009    NOTE_INSN_SWITCH_TEXT_SECTIONS note immediately before it in the
2010    instruction stream.  When writing out the assembly code,
2011    encountering this note will make the compiler switch between the
2012    hot and cold text sections.  */
2013
2014 static void
2015 insert_section_boundary_note (void)
2016 {
2017   basic_block bb;
2018   rtx new_note;
2019   int first_partition = 0;
2020
2021   if (!flag_reorder_blocks_and_partition)
2022     return;
2023
2024   FOR_EACH_BB (bb)
2025     {
2026       if (!first_partition)
2027         first_partition = BB_PARTITION (bb);
2028       if (BB_PARTITION (bb) != first_partition)
2029         {
2030           new_note = emit_note_before (NOTE_INSN_SWITCH_TEXT_SECTIONS,
2031                                        BB_HEAD (bb));
2032           /* ??? This kind of note always lives between basic blocks,
2033              but add_insn_before will set BLOCK_FOR_INSN anyway.  */
2034           BLOCK_FOR_INSN (new_note) = NULL;
2035           break;
2036         }
2037     }
2038 }
2039
2040 /* Duplicate the blocks containing computed gotos.  This basically unfactors
2041    computed gotos that were factored early on in the compilation process to
2042    speed up edge based data flow.  We used to not unfactoring them again,
2043    which can seriously pessimize code with many computed jumps in the source
2044    code, such as interpreters.  See e.g. PR15242.  */
2045
2046 static bool
2047 gate_duplicate_computed_gotos (void)
2048 {
2049   if (targetm.cannot_modify_jumps_p ())
2050     return false;
2051   return (optimize > 0
2052           && flag_expensive_optimizations
2053           && ! optimize_function_for_size_p (cfun));
2054 }
2055
2056
2057 static unsigned int
2058 duplicate_computed_gotos (void)
2059 {
2060   basic_block bb, new_bb;
2061   bitmap candidates;
2062   int max_size;
2063
2064   if (n_basic_blocks <= NUM_FIXED_BLOCKS + 1)
2065     return 0;
2066
2067   clear_bb_flags ();
2068   cfg_layout_initialize (0);
2069
2070   /* We are estimating the length of uncond jump insn only once
2071      since the code for getting the insn length always returns
2072      the minimal length now.  */
2073   if (uncond_jump_length == 0)
2074     uncond_jump_length = get_uncond_jump_length ();
2075
2076   max_size = uncond_jump_length * PARAM_VALUE (PARAM_MAX_GOTO_DUPLICATION_INSNS);
2077   candidates = BITMAP_ALLOC (NULL);
2078
2079   /* Look for blocks that end in a computed jump, and see if such blocks
2080      are suitable for unfactoring.  If a block is a candidate for unfactoring,
2081      mark it in the candidates.  */
2082   FOR_EACH_BB (bb)
2083     {
2084       rtx insn;
2085       edge e;
2086       edge_iterator ei;
2087       int size, all_flags;
2088
2089       /* Build the reorder chain for the original order of blocks.  */
2090       if (bb->next_bb != EXIT_BLOCK_PTR)
2091         bb->aux = bb->next_bb;
2092
2093       /* Obviously the block has to end in a computed jump.  */
2094       if (!computed_jump_p (BB_END (bb)))
2095         continue;
2096
2097       /* Only consider blocks that can be duplicated.  */
2098       if (find_reg_note (BB_END (bb), REG_CROSSING_JUMP, NULL_RTX)
2099           || !can_duplicate_block_p (bb))
2100         continue;
2101
2102       /* Make sure that the block is small enough.  */
2103       size = 0;
2104       FOR_BB_INSNS (bb, insn)
2105         if (INSN_P (insn))
2106           {
2107             size += get_attr_min_length (insn);
2108             if (size > max_size)
2109                break;
2110           }
2111       if (size > max_size)
2112         continue;
2113
2114       /* Final check: there must not be any incoming abnormal edges.  */
2115       all_flags = 0;
2116       FOR_EACH_EDGE (e, ei, bb->preds)
2117         all_flags |= e->flags;
2118       if (all_flags & EDGE_COMPLEX)
2119         continue;
2120
2121       bitmap_set_bit (candidates, bb->index);
2122     }
2123
2124   /* Nothing to do if there is no computed jump here.  */
2125   if (bitmap_empty_p (candidates))
2126     goto done;
2127
2128   /* Duplicate computed gotos.  */
2129   FOR_EACH_BB (bb)
2130     {
2131       if (bb->flags & BB_VISITED)
2132         continue;
2133
2134       bb->flags |= BB_VISITED;
2135
2136       /* BB must have one outgoing edge.  That edge must not lead to
2137          the exit block or the next block.
2138          The destination must have more than one predecessor.  */
2139       if (!single_succ_p (bb)
2140           || single_succ (bb) == EXIT_BLOCK_PTR
2141           || single_succ (bb) == bb->next_bb
2142           || single_pred_p (single_succ (bb)))
2143         continue;
2144
2145       /* The successor block has to be a duplication candidate.  */
2146       if (!bitmap_bit_p (candidates, single_succ (bb)->index))
2147         continue;
2148
2149       new_bb = duplicate_block (single_succ (bb), single_succ_edge (bb), bb);
2150       new_bb->aux = bb->aux;
2151       bb->aux = new_bb;
2152       new_bb->flags |= BB_VISITED;
2153     }
2154
2155 done:
2156   cfg_layout_finalize ();
2157
2158   BITMAP_FREE (candidates);
2159   return 0;
2160 }
2161
2162 struct rtl_opt_pass pass_duplicate_computed_gotos =
2163 {
2164  {
2165   RTL_PASS,
2166   "compgotos",                          /* name */
2167   gate_duplicate_computed_gotos,        /* gate */
2168   duplicate_computed_gotos,             /* execute */
2169   NULL,                                 /* sub */
2170   NULL,                                 /* next */
2171   0,                                    /* static_pass_number */
2172   TV_REORDER_BLOCKS,                    /* tv_id */
2173   0,                                    /* properties_required */
2174   0,                                    /* properties_provided */
2175   0,                                    /* properties_destroyed */
2176   0,                                    /* todo_flags_start */
2177   TODO_verify_rtl_sharing,/* todo_flags_finish */
2178  }
2179 };
2180
2181
2182 /* This function is the main 'entrance' for the optimization that
2183    partitions hot and cold basic blocks into separate sections of the
2184    .o file (to improve performance and cache locality).  Ideally it
2185    would be called after all optimizations that rearrange the CFG have
2186    been called.  However part of this optimization may introduce new
2187    register usage, so it must be called before register allocation has
2188    occurred.  This means that this optimization is actually called
2189    well before the optimization that reorders basic blocks (see
2190    function above).
2191
2192    This optimization checks the feedback information to determine
2193    which basic blocks are hot/cold, updates flags on the basic blocks
2194    to indicate which section they belong in.  This information is
2195    later used for writing out sections in the .o file.  Because hot
2196    and cold sections can be arbitrarily large (within the bounds of
2197    memory), far beyond the size of a single function, it is necessary
2198    to fix up all edges that cross section boundaries, to make sure the
2199    instructions used can actually span the required distance.  The
2200    fixes are described below.
2201
2202    Fall-through edges must be changed into jumps; it is not safe or
2203    legal to fall through across a section boundary.  Whenever a
2204    fall-through edge crossing a section boundary is encountered, a new
2205    basic block is inserted (in the same section as the fall-through
2206    source), and the fall through edge is redirected to the new basic
2207    block.  The new basic block contains an unconditional jump to the
2208    original fall-through target.  (If the unconditional jump is
2209    insufficient to cross section boundaries, that is dealt with a
2210    little later, see below).
2211
2212    In order to deal with architectures that have short conditional
2213    branches (which cannot span all of memory) we take any conditional
2214    jump that attempts to cross a section boundary and add a level of
2215    indirection: it becomes a conditional jump to a new basic block, in
2216    the same section.  The new basic block contains an unconditional
2217    jump to the original target, in the other section.
2218
2219    For those architectures whose unconditional branch is also
2220    incapable of reaching all of memory, those unconditional jumps are
2221    converted into indirect jumps, through a register.
2222
2223    IMPORTANT NOTE: This optimization causes some messy interactions
2224    with the cfg cleanup optimizations; those optimizations want to
2225    merge blocks wherever possible, and to collapse indirect jump
2226    sequences (change "A jumps to B jumps to C" directly into "A jumps
2227    to C").  Those optimizations can undo the jump fixes that
2228    partitioning is required to make (see above), in order to ensure
2229    that jumps attempting to cross section boundaries are really able
2230    to cover whatever distance the jump requires (on many architectures
2231    conditional or unconditional jumps are not able to reach all of
2232    memory).  Therefore tests have to be inserted into each such
2233    optimization to make sure that it does not undo stuff necessary to
2234    cross partition boundaries.  This would be much less of a problem
2235    if we could perform this optimization later in the compilation, but
2236    unfortunately the fact that we may need to create indirect jumps
2237    (through registers) requires that this optimization be performed
2238    before register allocation.
2239
2240    Hot and cold basic blocks are partitioned and put in separate
2241    sections of the .o file, to reduce paging and improve cache
2242    performance (hopefully).  This can result in bits of code from the
2243    same function being widely separated in the .o file.  However this
2244    is not obvious to the current bb structure.  Therefore we must take
2245    care to ensure that: 1). There are no fall_thru edges that cross
2246    between sections; 2). For those architectures which have "short"
2247    conditional branches, all conditional branches that attempt to
2248    cross between sections are converted to unconditional branches;
2249    and, 3). For those architectures which have "short" unconditional
2250    branches, all unconditional branches that attempt to cross between
2251    sections are converted to indirect jumps.
2252
2253    The code for fixing up fall_thru edges that cross between hot and
2254    cold basic blocks does so by creating new basic blocks containing
2255    unconditional branches to the appropriate label in the "other"
2256    section.  The new basic block is then put in the same (hot or cold)
2257    section as the original conditional branch, and the fall_thru edge
2258    is modified to fall into the new basic block instead.  By adding
2259    this level of indirection we end up with only unconditional branches
2260    crossing between hot and cold sections.
2261
2262    Conditional branches are dealt with by adding a level of indirection.
2263    A new basic block is added in the same (hot/cold) section as the
2264    conditional branch, and the conditional branch is retargeted to the
2265    new basic block.  The new basic block contains an unconditional branch
2266    to the original target of the conditional branch (in the other section).
2267
2268    Unconditional branches are dealt with by converting them into
2269    indirect jumps.  */
2270
2271 static unsigned
2272 partition_hot_cold_basic_blocks (void)
2273 {
2274   VEC(edge, heap) *crossing_edges;
2275
2276   if (n_basic_blocks <= NUM_FIXED_BLOCKS + 1)
2277     return 0;
2278
2279   df_set_flags (DF_DEFER_INSN_RESCAN);
2280
2281   crossing_edges = find_rarely_executed_basic_blocks_and_crossing_edges ();
2282   if (crossing_edges == NULL)
2283     return 0;
2284
2285   /* Make sure the source of any crossing edge ends in a jump and the
2286      destination of any crossing edge has a label.  */
2287   add_labels_and_missing_jumps (crossing_edges);
2288
2289   /* Convert all crossing fall_thru edges to non-crossing fall
2290      thrus to unconditional jumps (that jump to the original fall
2291      through dest).  */
2292   fix_up_fall_thru_edges ();
2293
2294   /* If the architecture does not have conditional branches that can
2295      span all of memory, convert crossing conditional branches into
2296      crossing unconditional branches.  */
2297   if (!HAS_LONG_COND_BRANCH)
2298     fix_crossing_conditional_branches ();
2299
2300   /* If the architecture does not have unconditional branches that
2301      can span all of memory, convert crossing unconditional branches
2302      into indirect jumps.  Since adding an indirect jump also adds
2303      a new register usage, update the register usage information as
2304      well.  */
2305   if (!HAS_LONG_UNCOND_BRANCH)
2306     fix_crossing_unconditional_branches ();
2307
2308   add_reg_crossing_jump_notes ();
2309
2310   /* Clear bb->aux fields that the above routines were using.  */
2311   clear_aux_for_blocks ();
2312
2313   VEC_free (edge, heap, crossing_edges);
2314
2315   /* ??? FIXME: DF generates the bb info for a block immediately.
2316      And by immediately, I mean *during* creation of the block.
2317
2318         #0  df_bb_refs_collect
2319         #1  in df_bb_refs_record
2320         #2  in create_basic_block_structure
2321
2322      Which means that the bb_has_eh_pred test in df_bb_refs_collect
2323      will *always* fail, because no edges can have been added to the
2324      block yet.  Which of course means we don't add the right
2325      artificial refs, which means we fail df_verify (much) later.
2326
2327      Cleanest solution would seem to make DF_DEFER_INSN_RESCAN imply
2328      that we also shouldn't grab data from the new blocks those new
2329      insns are in either.  In this way one can create the block, link
2330      it up properly, and have everything Just Work later, when deferred
2331      insns are processed.
2332
2333      In the meantime, we have no other option but to throw away all
2334      of the DF data and recompute it all.  */
2335   if (cfun->eh->lp_array)
2336     {
2337       df_finish_pass (true);
2338       df_scan_alloc (NULL);
2339       df_scan_blocks ();
2340       /* Not all post-landing pads use all of the EH_RETURN_DATA_REGNO
2341          data.  We blindly generated all of them when creating the new
2342          landing pad.  Delete those assignments we don't use.  */
2343       df_set_flags (DF_LR_RUN_DCE);
2344       df_analyze ();
2345     }
2346
2347   return TODO_verify_flow | TODO_verify_rtl_sharing;
2348 }
2349 \f
2350 static bool
2351 gate_handle_reorder_blocks (void)
2352 {
2353   if (targetm.cannot_modify_jumps_p ())
2354     return false;
2355   /* Don't reorder blocks when optimizing for size because extra jump insns may
2356      be created; also barrier may create extra padding.
2357
2358      More correctly we should have a block reordering mode that tried to
2359      minimize the combined size of all the jumps.  This would more or less
2360      automatically remove extra jumps, but would also try to use more short
2361      jumps instead of long jumps.  */
2362   if (!optimize_function_for_speed_p (cfun))
2363     return false;
2364   return (optimize > 0
2365           && (flag_reorder_blocks || flag_reorder_blocks_and_partition));
2366 }
2367
2368
2369 /* Reorder basic blocks.  */
2370 static unsigned int
2371 rest_of_handle_reorder_blocks (void)
2372 {
2373   basic_block bb;
2374
2375   /* Last attempt to optimize CFG, as scheduling, peepholing and insn
2376      splitting possibly introduced more crossjumping opportunities.  */
2377   cfg_layout_initialize (CLEANUP_EXPENSIVE);
2378
2379   reorder_basic_blocks ();
2380   cleanup_cfg (CLEANUP_EXPENSIVE);
2381
2382   FOR_EACH_BB (bb)
2383     if (bb->next_bb != EXIT_BLOCK_PTR)
2384       bb->aux = bb->next_bb;
2385   cfg_layout_finalize ();
2386
2387   /* Add NOTE_INSN_SWITCH_TEXT_SECTIONS notes.  */
2388   insert_section_boundary_note ();
2389   return 0;
2390 }
2391
2392 struct rtl_opt_pass pass_reorder_blocks =
2393 {
2394  {
2395   RTL_PASS,
2396   "bbro",                               /* name */
2397   gate_handle_reorder_blocks,           /* gate */
2398   rest_of_handle_reorder_blocks,        /* execute */
2399   NULL,                                 /* sub */
2400   NULL,                                 /* next */
2401   0,                                    /* static_pass_number */
2402   TV_REORDER_BLOCKS,                    /* tv_id */
2403   0,                                    /* properties_required */
2404   0,                                    /* properties_provided */
2405   0,                                    /* properties_destroyed */
2406   0,                                    /* todo_flags_start */
2407   TODO_verify_rtl_sharing,              /* todo_flags_finish */
2408  }
2409 };
2410
2411 static bool
2412 gate_handle_partition_blocks (void)
2413 {
2414   /* The optimization to partition hot/cold basic blocks into separate
2415      sections of the .o file does not work well with linkonce or with
2416      user defined section attributes.  Don't call it if either case
2417      arises.  */
2418   return (flag_reorder_blocks_and_partition
2419           && optimize
2420           /* See gate_handle_reorder_blocks.  We should not partition if
2421              we are going to omit the reordering.  */
2422           && optimize_function_for_speed_p (cfun)
2423           && !DECL_ONE_ONLY (current_function_decl)
2424           && !user_defined_section_attribute);
2425 }
2426
2427 struct rtl_opt_pass pass_partition_blocks =
2428 {
2429  {
2430   RTL_PASS,
2431   "bbpart",                             /* name */
2432   gate_handle_partition_blocks,         /* gate */
2433   partition_hot_cold_basic_blocks,      /* execute */
2434   NULL,                                 /* sub */
2435   NULL,                                 /* next */
2436   0,                                    /* static_pass_number */
2437   TV_REORDER_BLOCKS,                    /* tv_id */
2438   PROP_cfglayout,                       /* properties_required */
2439   0,                                    /* properties_provided */
2440   0,                                    /* properties_destroyed */
2441   0,                                    /* todo_flags_start */
2442   0                                     /* todo_flags_finish */
2443  }
2444 };