gcc/bb-reorder.c

   1 /* Basic block reordering routines for the GNU compiler.
   2    Copyright (C) 2000-2015 Free Software Foundation, Inc.
   3
   4    This file is part of GCC.
   5
   6    GCC is free software; you can redistribute it and/or modify it
   7    under the terms of the GNU General Public License as published by
   8    the Free Software Foundation; either version 3, or (at your option)
   9    any later version.
  10
  11    GCC is distributed in the hope that it will be useful, but WITHOUT
  12    ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  13    or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
  14    License for more details.
  15
  16    You should have received a copy of the GNU General Public License
  17    along with GCC; see the file COPYING3.  If not see
  18    <http://www.gnu.org/licenses/>.  */
  19
  20 /* This (greedy) algorithm constructs traces in several rounds.
  21    The construction starts from "seeds".  The seed for the first round
  22    is the entry point of the function.  When there are more than one seed,
  23    the one with the lowest key in the heap is selected first (see bb_to_key).
  24    Then the algorithm repeatedly adds the most probable successor to the end
  25    of a trace.  Finally it connects the traces.
  26
  27    There are two parameters: Branch Threshold and Exec Threshold.
  28    If the probability of an edge to a successor of the current basic block is
  29    lower than Branch Threshold or its frequency is lower than Exec Threshold,
  30    then the successor will be the seed in one of the next rounds.
  31    Each round has these parameters lower than the previous one.
  32    The last round has to have these parameters set to zero so that the
  33    remaining blocks are picked up.
  34
  35    The algorithm selects the most probable successor from all unvisited
  36    successors and successors that have been added to this trace.
  37    The other successors (that has not been "sent" to the next round) will be
  38    other seeds for this round and the secondary traces will start from them.
  39    If the successor has not been visited in this trace, it is added to the
  40    trace (however, there is some heuristic for simple branches).
  41    If the successor has been visited in this trace, a loop has been found.
  42    If the loop has many iterations, the loop is rotated so that the source
  43    block of the most probable edge going out of the loop is the last block
  44    of the trace.
  45    If the loop has few iterations and there is no edge from the last block of
  46    the loop going out of the loop, the loop header is duplicated.
  47
  48    When connecting traces, the algorithm first checks whether there is an edge
  49    from the last block of a trace to the first block of another trace.
  50    When there are still some unconnected traces it checks whether there exists
  51    a basic block BB such that BB is a successor of the last block of a trace
  52    and BB is a predecessor of the first block of another trace.  In this case,
  53    BB is duplicated, added at the end of the first trace and the traces are
  54    connected through it.
  55    The rest of traces are simply connected so there will be a jump to the
  56    beginning of the rest of traces.
  57
  58    The above description is for the full algorithm, which is used when the
  59    function is optimized for speed.  When the function is optimized for size,
  60    in order to reduce long jumps and connect more fallthru edges, the
  61    algorithm is modified as follows:
  62    (1) Break long traces to short ones.  A trace is broken at a block that has
  63    multiple predecessors/ successors during trace discovery.  When connecting
  64    traces, only connect Trace n with Trace n + 1.  This change reduces most
  65    long jumps compared with the above algorithm.
  66    (2) Ignore the edge probability and frequency for fallthru edges.
  67    (3) Keep the original order of blocks when there is no chance to fall
  68    through.  We rely on the results of cfg_cleanup.
  69
  70    To implement the change for code size optimization, block's index is
  71    selected as the key and all traces are found in one round.
  72
  73    References:
  74
  75    "Software Trace Cache"
  76    A. Ramirez, J. Larriba-Pey, C. Navarro, J. Torrellas and M. Valero; 1999
  77    http://citeseer.nj.nec.com/15361.html
  78
  79 */
  80
  81 #include "config.h"
  82 #include "system.h"
  83 #include "coretypes.h"
  84 #include "tm.h"
  85 #include "hash-set.h"
  86 #include "machmode.h"
  87 #include "vec.h"
  88 #include "double-int.h"
  89 #include "input.h"
  90 #include "alias.h"
  91 #include "symtab.h"
  92 #include "wide-int.h"
  93 #include "inchash.h"
  94 #include "tree.h"
  95 #include "rtl.h"
  96 #include "regs.h"
  97 #include "flags.h"
  98 #include "output.h"
  99 #include "target.h"
 100 #include "hashtab.h"
 101 #include "hard-reg-set.h"
 102 #include "function.h"
 103 #include "tm_p.h"
 104 #include "obstack.h"
 105 #include "statistics.h"
 106 #include "real.h"
 107 #include "fixed-value.h"
 108 #include "insn-config.h"
 109 #include "expmed.h"
 110 #include "dojump.h"
 111 #include "explow.h"
 112 #include "calls.h"
 113 #include "emit-rtl.h"
 114 #include "varasm.h"
 115 #include "stmt.h"
 116 #include "expr.h"
 117 #include "optabs.h"
 118 #include "params.h"
 119 #include "diagnostic-core.h"
 120 #include "toplev.h" /* user_defined_section_attribute */
 121 #include "tree-pass.h"
 122 #include "dominance.h"
 123 #include "cfg.h"
 124 #include "cfgrtl.h"
 125 #include "cfganal.h"
 126 #include "cfgbuild.h"
 127 #include "cfgcleanup.h"
 128 #include "predict.h"
 129 #include "basic-block.h"
 130 #include "df.h"
 131 #include "bb-reorder.h"
 132 #include "hash-map.h"
 133 #include "is-a.h"
 134 #include "plugin-api.h"
 135 #include "ipa-ref.h"
 136 #include "cgraph.h"
 137 #include "except.h"
 138 #include "fibonacci_heap.h"
 139
 140 /* The number of rounds.  In most cases there will only be 4 rounds, but
 141    when partitioning hot and cold basic blocks into separate sections of
 142    the object file there will be an extra round.  */
 143 #define N_ROUNDS 5
 144
 145 /* Stubs in case we don't have a return insn.
 146    We have to check at run time too, not only compile time.  */
 147
 148 #ifndef HAVE_return
 149 #define HAVE_return 0
 150 #define gen_return() NULL_RTX
 151 #endif
 152
 153
 154 struct target_bb_reorder default_target_bb_reorder;
 155 #if SWITCHABLE_TARGET
 156 struct target_bb_reorder *this_target_bb_reorder = &default_target_bb_reorder;
 157 #endif
 158
 159 #define uncond_jump_length \
 160   (this_target_bb_reorder->x_uncond_jump_length)
 161
 162 /* Branch thresholds in thousandths (per mille) of the REG_BR_PROB_BASE.  */
 163 static const int branch_threshold[N_ROUNDS] = {400, 200, 100, 0, 0};
 164
 165 /* Exec thresholds in thousandths (per mille) of the frequency of bb 0.  */
 166 static const int exec_threshold[N_ROUNDS] = {500, 200, 50, 0, 0};
 167
 168 /* If edge frequency is lower than DUPLICATION_THRESHOLD per mille of entry
 169    block the edge destination is not duplicated while connecting traces.  */
 170 #define DUPLICATION_THRESHOLD 100
 171
 172 typedef fibonacci_heap <long, basic_block_def> bb_heap_t;
 173 typedef fibonacci_node <long, basic_block_def> bb_heap_node_t;
 174
 175 /* Structure to hold needed information for each basic block.  */
 176 typedef struct bbro_basic_block_data_def
 177 {
 178   /* Which trace is the bb start of (-1 means it is not a start of any).  */
 179   int start_of_trace;
 180
 181   /* Which trace is the bb end of (-1 means it is not an end of any).  */
 182   int end_of_trace;
 183
 184   /* Which trace is the bb in?  */
 185   int in_trace;
 186
 187   /* Which trace was this bb visited in?  */
 188   int visited;
 189
 190   /* Which heap is BB in (if any)?  */
 191   bb_heap_t *heap;
 192
 193   /* Which heap node is BB in (if any)?  */
 194   bb_heap_node_t *node;
 195 } bbro_basic_block_data;
 196
 197 /* The current size of the following dynamic array.  */
 198 static int array_size;
 199
 200 /* The array which holds needed information for basic blocks.  */
 201 static bbro_basic_block_data *bbd;
 202
 203 /* To avoid frequent reallocation the size of arrays is greater than needed,
 204    the number of elements is (not less than) 1.25 * size_wanted.  */
 205 #define GET_ARRAY_SIZE(X) ((((X) / 4) + 1) * 5)
 206
 207 /* Free the memory and set the pointer to NULL.  */
 208 #define FREE(P) (gcc_assert (P), free (P), P = 0)
 209
 210 /* Structure for holding information about a trace.  */
 211 struct trace
 212 {
 213   /* First and last basic block of the trace.  */
 214   basic_block first, last;
 215
 216   /* The round of the STC creation which this trace was found in.  */
 217   int round;
 218
 219   /* The length (i.e. the number of basic blocks) of the trace.  */
 220   int length;
 221 };
 222
 223 /* Maximum frequency and count of one of the entry blocks.  */
 224 static int max_entry_frequency;
 225 static gcov_type max_entry_count;
 226
 227 /* Local function prototypes.  */
 228 static void find_traces (int *, struct trace *);
 229 static basic_block rotate_loop (edge, struct trace *, int);
 230 static void mark_bb_visited (basic_block, int);
 231 static void find_traces_1_round (int, int, gcov_type, struct trace *, int *,
 232                                  int, bb_heap_t **, int);
 233 static basic_block copy_bb (basic_block, edge, basic_block, int);
 234 static long bb_to_key (basic_block);
 235 static bool better_edge_p (const_basic_block, const_edge, int, int, int, int,
 236                            const_edge);
 237 static bool connect_better_edge_p (const_edge, bool, int, const_edge,
 238                                    struct trace *);
 239 static void connect_traces (int, struct trace *);
 240 static bool copy_bb_p (const_basic_block, int);
 241 static bool push_to_next_round_p (const_basic_block, int, int, int, gcov_type);
 242 \f
 243 /* Return the trace number in which BB was visited.  */
 244
 245 static int
 246 bb_visited_trace (const_basic_block bb)
 247 {
 248   gcc_assert (bb->index < array_size);
 249   return bbd[bb->index].visited;
 250 }
 251
 252 /* This function marks BB that it was visited in trace number TRACE.  */
 253
 254 static void
 255 mark_bb_visited (basic_block bb, int trace)
 256 {
 257   bbd[bb->index].visited = trace;
 258   if (bbd[bb->index].heap)
 259     {
 260       bbd[bb->index].heap->delete_node (bbd[bb->index].node);
 261       bbd[bb->index].heap = NULL;
 262       bbd[bb->index].node = NULL;
 263     }
 264 }
 265
 266 /* Check to see if bb should be pushed into the next round of trace
 267    collections or not.  Reasons for pushing the block forward are 1).
 268    If the block is cold, we are doing partitioning, and there will be
 269    another round (cold partition blocks are not supposed to be
 270    collected into traces until the very last round); or 2). There will
 271    be another round, and the basic block is not "hot enough" for the
 272    current round of trace collection.  */
 273
 274 static bool
 275 push_to_next_round_p (const_basic_block bb, int round, int number_of_rounds,
 276                       int exec_th, gcov_type count_th)
 277 {
 278   bool there_exists_another_round;
 279   bool block_not_hot_enough;
 280
 281   there_exists_another_round = round < number_of_rounds - 1;
 282
 283   block_not_hot_enough = (bb->frequency < exec_th
 284                           || bb->count < count_th
 285                           || probably_never_executed_bb_p (cfun, bb));
 286
 287   if (there_exists_another_round
 288       && block_not_hot_enough)
 289     return true;
 290   else
 291     return false;
 292 }
 293
 294 /* Find the traces for Software Trace Cache.  Chain each trace through
 295    RBI()->next.  Store the number of traces to N_TRACES and description of
 296    traces to TRACES.  */
 297
 298 static void
 299 find_traces (int *n_traces, struct trace *traces)
 300 {
 301   int i;
 302   int number_of_rounds;
 303   edge e;
 304   edge_iterator ei;
 305   bb_heap_t *heap = new bb_heap_t (LONG_MIN);
 306
 307   /* Add one extra round of trace collection when partitioning hot/cold
 308      basic blocks into separate sections.  The last round is for all the
 309      cold blocks (and ONLY the cold blocks).  */
 310
 311   number_of_rounds = N_ROUNDS - 1;
 312
 313   /* Insert entry points of function into heap.  */
 314   max_entry_frequency = 0;
 315   max_entry_count = 0;
 316   FOR_EACH_EDGE (e, ei, ENTRY_BLOCK_PTR_FOR_FN (cfun)->succs)
 317     {
 318       bbd[e->dest->index].heap = heap;
 319       bbd[e->dest->index].node = heap->insert (bb_to_key (e->dest), e->dest);
 320       if (e->dest->frequency > max_entry_frequency)
 321         max_entry_frequency = e->dest->frequency;
 322       if (e->dest->count > max_entry_count)
 323         max_entry_count = e->dest->count;
 324     }
 325
 326   /* Find the traces.  */
 327   for (i = 0; i < number_of_rounds; i++)
 328     {
 329       gcov_type count_threshold;
 330
 331       if (dump_file)
 332         fprintf (dump_file, "STC - round %d\n", i + 1);
 333
 334       if (max_entry_count < INT_MAX / 1000)
 335         count_threshold = max_entry_count * exec_threshold[i] / 1000;
 336       else
 337         count_threshold = max_entry_count / 1000 * exec_threshold[i];
 338
 339       find_traces_1_round (REG_BR_PROB_BASE * branch_threshold[i] / 1000,
 340                            max_entry_frequency * exec_threshold[i] / 1000,
 341                            count_threshold, traces, n_traces, i, &heap,
 342                            number_of_rounds);
 343     }
 344   delete heap;
 345
 346   if (dump_file)
 347     {
 348       for (i = 0; i < *n_traces; i++)
 349         {
 350           basic_block bb;
 351           fprintf (dump_file, "Trace %d (round %d):  ", i + 1,
 352                    traces[i].round + 1);
 353           for (bb = traces[i].first;
 354                bb != traces[i].last;
 355                bb = (basic_block) bb->aux)
 356             fprintf (dump_file, "%d [%d] ", bb->index, bb->frequency);
 357           fprintf (dump_file, "%d [%d]\n", bb->index, bb->frequency);
 358         }
 359       fflush (dump_file);
 360     }
 361 }
 362
 363 /* Rotate loop whose back edge is BACK_EDGE in the tail of trace TRACE
 364    (with sequential number TRACE_N).  */
 365
 366 static basic_block
 367 rotate_loop (edge back_edge, struct trace *trace, int trace_n)
 368 {
 369   basic_block bb;
 370
 371   /* Information about the best end (end after rotation) of the loop.  */
 372   basic_block best_bb = NULL;
 373   edge best_edge = NULL;
 374   int best_freq = -1;
 375   gcov_type best_count = -1;
 376   /* The best edge is preferred when its destination is not visited yet
 377      or is a start block of some trace.  */
 378   bool is_preferred = false;
 379
 380   /* Find the most frequent edge that goes out from current trace.  */
 381   bb = back_edge->dest;
 382   do
 383     {
 384       edge e;
 385       edge_iterator ei;
 386
 387       FOR_EACH_EDGE (e, ei, bb->succs)
 388         if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
 389             && bb_visited_trace (e->dest) != trace_n
 390             && (e->flags & EDGE_CAN_FALLTHRU)
 391             && !(e->flags & EDGE_COMPLEX))
 392         {
 393           if (is_preferred)
 394             {
 395               /* The best edge is preferred.  */
 396               if (!bb_visited_trace (e->dest)
 397                   || bbd[e->dest->index].start_of_trace >= 0)
 398                 {
 399                   /* The current edge E is also preferred.  */
 400                   int freq = EDGE_FREQUENCY (e);
 401                   if (freq > best_freq || e->count > best_count)
 402                     {
 403                       best_freq = freq;
 404                       best_count = e->count;
 405                       best_edge = e;
 406                       best_bb = bb;
 407                     }
 408                 }
 409             }
 410           else
 411             {
 412               if (!bb_visited_trace (e->dest)
 413                   || bbd[e->dest->index].start_of_trace >= 0)
 414                 {
 415                   /* The current edge E is preferred.  */
 416                   is_preferred = true;
 417                   best_freq = EDGE_FREQUENCY (e);
 418                   best_count = e->count;
 419                   best_edge = e;
 420                   best_bb = bb;
 421                 }
 422               else
 423                 {
 424                   int freq = EDGE_FREQUENCY (e);
 425                   if (!best_edge || freq > best_freq || e->count > best_count)
 426                     {
 427                       best_freq = freq;
 428                       best_count = e->count;
 429                       best_edge = e;
 430                       best_bb = bb;
 431                     }
 432                 }
 433             }
 434         }
 435       bb = (basic_block) bb->aux;
 436     }
 437   while (bb != back_edge->dest);
 438
 439   if (best_bb)
 440     {
 441       /* Rotate the loop so that the BEST_EDGE goes out from the last block of
 442          the trace.  */
 443       if (back_edge->dest == trace->first)
 444         {
 445           trace->first = (basic_block) best_bb->aux;
 446         }
 447       else
 448         {
 449           basic_block prev_bb;
 450
 451           for (prev_bb = trace->first;
 452                prev_bb->aux != back_edge->dest;
 453                prev_bb = (basic_block) prev_bb->aux)
 454             ;
 455           prev_bb->aux = best_bb->aux;
 456
 457           /* Try to get rid of uncond jump to cond jump.  */
 458           if (single_succ_p (prev_bb))
 459             {
 460               basic_block header = single_succ (prev_bb);
 461
 462               /* Duplicate HEADER if it is a small block containing cond jump
 463                  in the end.  */
 464               if (any_condjump_p (BB_END (header)) && copy_bb_p (header, 0)
 465                   && !CROSSING_JUMP_P (BB_END (header)))
 466                 copy_bb (header, single_succ_edge (prev_bb), prev_bb, trace_n);
 467             }
 468         }
 469     }
 470   else
 471     {
 472       /* We have not found suitable loop tail so do no rotation.  */
 473       best_bb = back_edge->src;
 474     }
 475   best_bb->aux = NULL;
 476   return best_bb;
 477 }
 478
 479 /* One round of finding traces.  Find traces for BRANCH_TH and EXEC_TH i.e. do
 480    not include basic blocks whose probability is lower than BRANCH_TH or whose
 481    frequency is lower than EXEC_TH into traces (or whose count is lower than
 482    COUNT_TH).  Store the new traces into TRACES and modify the number of
 483    traces *N_TRACES.  Set the round (which the trace belongs to) to ROUND.
 484    The function expects starting basic blocks to be in *HEAP and will delete
 485    *HEAP and store starting points for the next round into new *HEAP.  */
 486
 487 static void
 488 find_traces_1_round (int branch_th, int exec_th, gcov_type count_th,
 489                      struct trace *traces, int *n_traces, int round,
 490                      bb_heap_t **heap, int number_of_rounds)
 491 {
 492   /* Heap for discarded basic blocks which are possible starting points for
 493      the next round.  */
 494   bb_heap_t *new_heap = new bb_heap_t (LONG_MIN);
 495   bool for_size = optimize_function_for_size_p (cfun);
 496
 497   while (!(*heap)->empty ())
 498     {
 499       basic_block bb;
 500       struct trace *trace;
 501       edge best_edge, e;
 502       long key;
 503       edge_iterator ei;
 504
 505       bb = (*heap)->extract_min ();
 506       bbd[bb->index].heap = NULL;
 507       bbd[bb->index].node = NULL;
 508
 509       if (dump_file)
 510         fprintf (dump_file, "Getting bb %d\n", bb->index);
 511
 512       /* If the BB's frequency is too low, send BB to the next round.  When
 513          partitioning hot/cold blocks into separate sections, make sure all
 514          the cold blocks (and ONLY the cold blocks) go into the (extra) final
 515          round.  When optimizing for size, do not push to next round.  */
 516
 517       if (!for_size
 518           && push_to_next_round_p (bb, round, number_of_rounds, exec_th,
 519                                    count_th))
 520         {
 521           int key = bb_to_key (bb);
 522           bbd[bb->index].heap = new_heap;
 523           bbd[bb->index].node = new_heap->insert (key, bb);
 524
 525           if (dump_file)
 526             fprintf (dump_file,
 527                      "  Possible start point of next round: %d (key: %d)\n",
 528                      bb->index, key);
 529           continue;
 530         }
 531
 532       trace = traces + *n_traces;
 533       trace->first = bb;
 534       trace->round = round;
 535       trace->length = 0;
 536       bbd[bb->index].in_trace = *n_traces;
 537       (*n_traces)++;
 538
 539       do
 540         {
 541           int prob, freq;
 542           bool ends_in_call;
 543
 544           /* The probability and frequency of the best edge.  */
 545           int best_prob = INT_MIN / 2;
 546           int best_freq = INT_MIN / 2;
 547
 548           best_edge = NULL;
 549           mark_bb_visited (bb, *n_traces);
 550           trace->length++;
 551
 552           if (dump_file)
 553             fprintf (dump_file, "Basic block %d was visited in trace %d\n",
 554                      bb->index, *n_traces - 1);
 555
 556           ends_in_call = block_ends_with_call_p (bb);
 557
 558           /* Select the successor that will be placed after BB.  */
 559           FOR_EACH_EDGE (e, ei, bb->succs)
 560             {
 561               gcc_assert (!(e->flags & EDGE_FAKE));
 562
 563               if (e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun))
 564                 continue;
 565
 566               if (bb_visited_trace (e->dest)
 567                   && bb_visited_trace (e->dest) != *n_traces)
 568                 continue;
 569
 570               if (BB_PARTITION (e->dest) != BB_PARTITION (bb))
 571                 continue;
 572
 573               prob = e->probability;
 574               freq = e->dest->frequency;
 575
 576               /* The only sensible preference for a call instruction is the
 577                  fallthru edge.  Don't bother selecting anything else.  */
 578               if (ends_in_call)
 579                 {
 580                   if (e->flags & EDGE_CAN_FALLTHRU)
 581                     {
 582                       best_edge = e;
 583                       best_prob = prob;
 584                       best_freq = freq;
 585                     }
 586                   continue;
 587                 }
 588
 589               /* Edge that cannot be fallthru or improbable or infrequent
 590                  successor (i.e. it is unsuitable successor).  When optimizing
 591                  for size, ignore the probability and frequency.  */
 592               if (!(e->flags & EDGE_CAN_FALLTHRU) || (e->flags & EDGE_COMPLEX)
 593                   || ((prob < branch_th || EDGE_FREQUENCY (e) < exec_th
 594                       || e->count < count_th) && (!for_size)))
 595                 continue;
 596
 597               /* If partitioning hot/cold basic blocks, don't consider edges
 598                  that cross section boundaries.  */
 599
 600               if (better_edge_p (bb, e, prob, freq, best_prob, best_freq,
 601                                  best_edge))
 602                 {
 603                   best_edge = e;
 604                   best_prob = prob;
 605                   best_freq = freq;
 606                 }
 607             }
 608
 609           /* If the best destination has multiple predecessors, and can be
 610              duplicated cheaper than a jump, don't allow it to be added
 611              to a trace.  We'll duplicate it when connecting traces.  */
 612           if (best_edge && EDGE_COUNT (best_edge->dest->preds) >= 2
 613               && copy_bb_p (best_edge->dest, 0))
 614             best_edge = NULL;
 615
 616           /* If the best destination has multiple successors or predecessors,
 617              don't allow it to be added when optimizing for size.  This makes
 618              sure predecessors with smaller index are handled before the best
 619              destinarion.  It breaks long trace and reduces long jumps.
 620
 621              Take if-then-else as an example.
 622                 A
 623                / \
 624               B   C
 625                \ /
 626                 D
 627              If we do not remove the best edge B->D/C->D, the final order might
 628              be A B D ... C.  C is at the end of the program.  If D's successors
 629              and D are complicated, might need long jumps for A->C and C->D.
 630              Similar issue for order: A C D ... B.
 631
 632              After removing the best edge, the final result will be ABCD/ ACBD.
 633              It does not add jump compared with the previous order.  But it
 634              reduces the possibility of long jumps.  */
 635           if (best_edge && for_size
 636               && (EDGE_COUNT (best_edge->dest->succs) > 1
 637                  || EDGE_COUNT (best_edge->dest->preds) > 1))
 638             best_edge = NULL;
 639
 640           /* Add all non-selected successors to the heaps.  */
 641           FOR_EACH_EDGE (e, ei, bb->succs)
 642             {
 643               if (e == best_edge
 644                   || e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun)
 645                   || bb_visited_trace (e->dest))
 646                 continue;
 647
 648               key = bb_to_key (e->dest);
 649
 650               if (bbd[e->dest->index].heap)
 651                 {
 652                   /* E->DEST is already in some heap.  */
 653                   if (key != bbd[e->dest->index].node->get_key ())
 654                     {
 655                       if (dump_file)
 656                         {
 657                           fprintf (dump_file,
 658                                    "Changing key for bb %d from %ld to %ld.\n",
 659                                    e->dest->index,
 660                                    (long) bbd[e->dest->index].node->get_key (),
 661                                    key);
 662                         }
 663                       bbd[e->dest->index].heap->replace_key
 664                         (bbd[e->dest->index].node, key);
 665                     }
 666                 }
 667               else
 668                 {
 669                   bb_heap_t *which_heap = *heap;
 670
 671                   prob = e->probability;
 672                   freq = EDGE_FREQUENCY (e);
 673
 674                   if (!(e->flags & EDGE_CAN_FALLTHRU)
 675                       || (e->flags & EDGE_COMPLEX)
 676                       || prob < branch_th || freq < exec_th
 677                       || e->count < count_th)
 678                     {
 679                       /* When partitioning hot/cold basic blocks, make sure
 680                          the cold blocks (and only the cold blocks) all get
 681                          pushed to the last round of trace collection.  When
 682                          optimizing for size, do not push to next round.  */
 683
 684                       if (!for_size && push_to_next_round_p (e->dest, round,
 685                                                              number_of_rounds,
 686                                                              exec_th, count_th))
 687                         which_heap = new_heap;
 688                     }
 689
 690                   bbd[e->dest->index].heap = which_heap;
 691                   bbd[e->dest->index].node = which_heap->insert (key, e->dest);
 692
 693                   if (dump_file)
 694                     {
 695                       fprintf (dump_file,
 696                                "  Possible start of %s round: %d (key: %ld)\n",
 697                                (which_heap == new_heap) ? "next" : "this",
 698                                e->dest->index, (long) key);
 699                     }
 700
 701                 }
 702             }
 703
 704           if (best_edge) /* Suitable successor was found.  */
 705             {
 706               if (bb_visited_trace (best_edge->dest) == *n_traces)
 707                 {
 708                   /* We do nothing with one basic block loops.  */
 709                   if (best_edge->dest != bb)
 710                     {
 711                       if (EDGE_FREQUENCY (best_edge)
 712                           > 4 * best_edge->dest->frequency / 5)
 713                         {
 714                           /* The loop has at least 4 iterations.  If the loop
 715                              header is not the first block of the function
 716                              we can rotate the loop.  */
 717
 718                           if (best_edge->dest
 719                               != ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb)
 720                             {
 721                               if (dump_file)
 722                                 {
 723                                   fprintf (dump_file,
 724                                            "Rotating loop %d - %d\n",
 725                                            best_edge->dest->index, bb->index);
 726                                 }
 727                               bb->aux = best_edge->dest;
 728                               bbd[best_edge->dest->index].in_trace =
 729                                                              (*n_traces) - 1;
 730                               bb = rotate_loop (best_edge, trace, *n_traces);
 731                             }
 732                         }
 733                       else
 734                         {
 735                           /* The loop has less than 4 iterations.  */
 736
 737                           if (single_succ_p (bb)
 738                               && copy_bb_p (best_edge->dest,
 739                                             optimize_edge_for_speed_p
 740                                             (best_edge)))
 741                             {
 742                               bb = copy_bb (best_edge->dest, best_edge, bb,
 743                                             *n_traces);
 744                               trace->length++;
 745                             }
 746                         }
 747                     }
 748
 749                   /* Terminate the trace.  */
 750                   break;
 751                 }
 752               else
 753                 {
 754                   /* Check for a situation
 755
 756                     A
 757                    /|
 758                   B |
 759                    \|
 760                     C
 761
 762                   where
 763                   EDGE_FREQUENCY (AB) + EDGE_FREQUENCY (BC)
 764                     >= EDGE_FREQUENCY (AC).
 765                   (i.e. 2 * B->frequency >= EDGE_FREQUENCY (AC) )
 766                   Best ordering is then A B C.
 767
 768                   When optimizing for size, A B C is always the best order.
 769
 770                   This situation is created for example by:
 771
 772                   if (A) B;
 773                   C;
 774
 775                   */
 776
 777                   FOR_EACH_EDGE (e, ei, bb->succs)
 778                     if (e != best_edge
 779                         && (e->flags & EDGE_CAN_FALLTHRU)
 780                         && !(e->flags & EDGE_COMPLEX)
 781                         && !bb_visited_trace (e->dest)
 782                         && single_pred_p (e->dest)
 783                         && !(e->flags & EDGE_CROSSING)
 784                         && single_succ_p (e->dest)
 785                         && (single_succ_edge (e->dest)->flags
 786                             & EDGE_CAN_FALLTHRU)
 787                         && !(single_succ_edge (e->dest)->flags & EDGE_COMPLEX)
 788                         && single_succ (e->dest) == best_edge->dest
 789                         && (2 * e->dest->frequency >= EDGE_FREQUENCY (best_edge)
 790                             || for_size))
 791                       {
 792                         best_edge = e;
 793                         if (dump_file)
 794                           fprintf (dump_file, "Selecting BB %d\n",
 795                                    best_edge->dest->index);
 796                         break;
 797                       }
 798
 799                   bb->aux = best_edge->dest;
 800                   bbd[best_edge->dest->index].in_trace = (*n_traces) - 1;
 801                   bb = best_edge->dest;
 802                 }
 803             }
 804         }
 805       while (best_edge);
 806       trace->last = bb;
 807       bbd[trace->first->index].start_of_trace = *n_traces - 1;
 808       bbd[trace->last->index].end_of_trace = *n_traces - 1;
 809
 810       /* The trace is terminated so we have to recount the keys in heap
 811          (some block can have a lower key because now one of its predecessors
 812          is an end of the trace).  */
 813       FOR_EACH_EDGE (e, ei, bb->succs)
 814         {
 815           if (e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun)
 816               || bb_visited_trace (e->dest))
 817             continue;
 818
 819           if (bbd[e->dest->index].heap)
 820             {
 821               key = bb_to_key (e->dest);
 822               if (key != bbd[e->dest->index].node->get_key ())
 823                 {
 824                   if (dump_file)
 825                     {
 826                       fprintf (dump_file,
 827                                "Changing key for bb %d from %ld to %ld.\n",
 828                                e->dest->index,
 829                                (long) bbd[e->dest->index].node->get_key (), key);
 830                     }
 831                   bbd[e->dest->index].heap->replace_key
 832                     (bbd[e->dest->index].node, key);
 833                 }
 834             }
 835         }
 836     }
 837
 838   delete (*heap);
 839
 840   /* "Return" the new heap.  */
 841   *heap = new_heap;
 842 }
 843
 844 /* Create a duplicate of the basic block OLD_BB and redirect edge E to it, add
 845    it to trace after BB, mark OLD_BB visited and update pass' data structures
 846    (TRACE is a number of trace which OLD_BB is duplicated to).  */
 847
 848 static basic_block
 849 copy_bb (basic_block old_bb, edge e, basic_block bb, int trace)
 850 {
 851   basic_block new_bb;
 852
 853   new_bb = duplicate_block (old_bb, e, bb);
 854   BB_COPY_PARTITION (new_bb, old_bb);
 855
 856   gcc_assert (e->dest == new_bb);
 857
 858   if (dump_file)
 859     fprintf (dump_file,
 860              "Duplicated bb %d (created bb %d)\n",
 861              old_bb->index, new_bb->index);
 862
 863   if (new_bb->index >= array_size
 864       || last_basic_block_for_fn (cfun) > array_size)
 865     {
 866       int i;
 867       int new_size;
 868
 869       new_size = MAX (last_basic_block_for_fn (cfun), new_bb->index + 1);
 870       new_size = GET_ARRAY_SIZE (new_size);
 871       bbd = XRESIZEVEC (bbro_basic_block_data, bbd, new_size);
 872       for (i = array_size; i < new_size; i++)
 873         {
 874           bbd[i].start_of_trace = -1;
 875           bbd[i].end_of_trace = -1;
 876           bbd[i].in_trace = -1;
 877           bbd[i].visited = 0;
 878           bbd[i].heap = NULL;
 879           bbd[i].node = NULL;
 880         }
 881       array_size = new_size;
 882
 883       if (dump_file)
 884         {
 885           fprintf (dump_file,
 886                    "Growing the dynamic array to %d elements.\n",
 887                    array_size);
 888         }
 889     }
 890
 891   gcc_assert (!bb_visited_trace (e->dest));
 892   mark_bb_visited (new_bb, trace);
 893   new_bb->aux = bb->aux;
 894   bb->aux = new_bb;
 895
 896   bbd[new_bb->index].in_trace = trace;
 897
 898   return new_bb;
 899 }
 900
 901 /* Compute and return the key (for the heap) of the basic block BB.  */
 902
 903 static long
 904 bb_to_key (basic_block bb)
 905 {
 906   edge e;
 907   edge_iterator ei;
 908   int priority = 0;
 909
 910   /* Use index as key to align with its original order.  */
 911   if (optimize_function_for_size_p (cfun))
 912     return bb->index;
 913
 914   /* Do not start in probably never executed blocks.  */
 915
 916   if (BB_PARTITION (bb) == BB_COLD_PARTITION
 917       || probably_never_executed_bb_p (cfun, bb))
 918     return BB_FREQ_MAX;
 919
 920   /* Prefer blocks whose predecessor is an end of some trace
 921      or whose predecessor edge is EDGE_DFS_BACK.  */
 922   FOR_EACH_EDGE (e, ei, bb->preds)
 923     {
 924       if ((e->src != ENTRY_BLOCK_PTR_FOR_FN (cfun)
 925            && bbd[e->src->index].end_of_trace >= 0)
 926           || (e->flags & EDGE_DFS_BACK))
 927         {
 928           int edge_freq = EDGE_FREQUENCY (e);
 929
 930           if (edge_freq > priority)
 931             priority = edge_freq;
 932         }
 933     }
 934
 935   if (priority)
 936     /* The block with priority should have significantly lower key.  */
 937     return -(100 * BB_FREQ_MAX + 100 * priority + bb->frequency);
 938
 939   return -bb->frequency;
 940 }
 941
 942 /* Return true when the edge E from basic block BB is better than the temporary
 943    best edge (details are in function).  The probability of edge E is PROB. The
 944    frequency of the successor is FREQ.  The current best probability is
 945    BEST_PROB, the best frequency is BEST_FREQ.
 946    The edge is considered to be equivalent when PROB does not differ much from
 947    BEST_PROB; similarly for frequency.  */
 948
 949 static bool
 950 better_edge_p (const_basic_block bb, const_edge e, int prob, int freq,
 951                int best_prob, int best_freq, const_edge cur_best_edge)
 952 {
 953   bool is_better_edge;
 954
 955   /* The BEST_* values do not have to be best, but can be a bit smaller than
 956      maximum values.  */
 957   int diff_prob = best_prob / 10;
 958   int diff_freq = best_freq / 10;
 959
 960   /* The smaller one is better to keep the original order.  */
 961   if (optimize_function_for_size_p (cfun))
 962     return !cur_best_edge
 963            || cur_best_edge->dest->index > e->dest->index;
 964
 965   if (prob > best_prob + diff_prob)
 966     /* The edge has higher probability than the temporary best edge.  */
 967     is_better_edge = true;
 968   else if (prob < best_prob - diff_prob)
 969     /* The edge has lower probability than the temporary best edge.  */
 970     is_better_edge = false;
 971   else if (freq < best_freq - diff_freq)
 972     /* The edge and the temporary best edge  have almost equivalent
 973        probabilities.  The higher frequency of a successor now means
 974        that there is another edge going into that successor.
 975        This successor has lower frequency so it is better.  */
 976     is_better_edge = true;
 977   else if (freq > best_freq + diff_freq)
 978     /* This successor has higher frequency so it is worse.  */
 979     is_better_edge = false;
 980   else if (e->dest->prev_bb == bb)
 981     /* The edges have equivalent probabilities and the successors
 982        have equivalent frequencies.  Select the previous successor.  */
 983     is_better_edge = true;
 984   else
 985     is_better_edge = false;
 986
 987   /* If we are doing hot/cold partitioning, make sure that we always favor
 988      non-crossing edges over crossing edges.  */
 989
 990   if (!is_better_edge
 991       && flag_reorder_blocks_and_partition
 992       && cur_best_edge
 993       && (cur_best_edge->flags & EDGE_CROSSING)
 994       && !(e->flags & EDGE_CROSSING))
 995     is_better_edge = true;
 996
 997   return is_better_edge;
 998 }
 999
1000 /* Return true when the edge E is better than the temporary best edge
1001    CUR_BEST_EDGE.  If SRC_INDEX_P is true, the function compares the src bb of
1002    E and CUR_BEST_EDGE; otherwise it will compare the dest bb.
1003    BEST_LEN is the trace length of src (or dest) bb in CUR_BEST_EDGE.
1004    TRACES record the information about traces.
1005    When optimizing for size, the edge with smaller index is better.
1006    When optimizing for speed, the edge with bigger probability or longer trace
1007    is better.  */
1008
1009 static bool
1010 connect_better_edge_p (const_edge e, bool src_index_p, int best_len,
1011                        const_edge cur_best_edge, struct trace *traces)
1012 {
1013   int e_index;
1014   int b_index;
1015   bool is_better_edge;
1016
1017   if (!cur_best_edge)
1018     return true;
1019
1020   if (optimize_function_for_size_p (cfun))
1021     {
1022       e_index = src_index_p ? e->src->index : e->dest->index;
1023       b_index = src_index_p ? cur_best_edge->src->index
1024                               : cur_best_edge->dest->index;
1025       /* The smaller one is better to keep the original order.  */
1026       return b_index > e_index;
1027     }
1028
1029   if (src_index_p)
1030     {
1031       e_index = e->src->index;
1032
1033       if (e->probability > cur_best_edge->probability)
1034         /* The edge has higher probability than the temporary best edge.  */
1035         is_better_edge = true;
1036       else if (e->probability < cur_best_edge->probability)
1037         /* The edge has lower probability than the temporary best edge.  */
1038         is_better_edge = false;
1039       else if (traces[bbd[e_index].end_of_trace].length > best_len)
1040         /* The edge and the temporary best edge have equivalent probabilities.
1041            The edge with longer trace is better.  */
1042         is_better_edge = true;
1043       else
1044         is_better_edge = false;
1045     }
1046   else
1047     {
1048       e_index = e->dest->index;
1049
1050       if (e->probability > cur_best_edge->probability)
1051         /* The edge has higher probability than the temporary best edge.  */
1052         is_better_edge = true;
1053       else if (e->probability < cur_best_edge->probability)
1054         /* The edge has lower probability than the temporary best edge.  */
1055         is_better_edge = false;
1056       else if (traces[bbd[e_index].start_of_trace].length > best_len)
1057         /* The edge and the temporary best edge have equivalent probabilities.
1058            The edge with longer trace is better.  */
1059         is_better_edge = true;
1060       else
1061         is_better_edge = false;
1062     }
1063
1064   return is_better_edge;
1065 }
1066
1067 /* Connect traces in array TRACES, N_TRACES is the count of traces.  */
1068
1069 static void
1070 connect_traces (int n_traces, struct trace *traces)
1071 {
1072   int i;
1073   bool *connected;
1074   bool two_passes;
1075   int last_trace;
1076   int current_pass;
1077   int current_partition;
1078   int freq_threshold;
1079   gcov_type count_threshold;
1080   bool for_size = optimize_function_for_size_p (cfun);
1081
1082   freq_threshold = max_entry_frequency * DUPLICATION_THRESHOLD / 1000;
1083   if (max_entry_count < INT_MAX / 1000)
1084     count_threshold = max_entry_count * DUPLICATION_THRESHOLD / 1000;
1085   else
1086     count_threshold = max_entry_count / 1000 * DUPLICATION_THRESHOLD;
1087
1088   connected = XCNEWVEC (bool, n_traces);
1089   last_trace = -1;
1090   current_pass = 1;
1091   current_partition = BB_PARTITION (traces[0].first);
1092   two_passes = false;
1093
1094   if (crtl->has_bb_partition)
1095     for (i = 0; i < n_traces && !two_passes; i++)
1096       if (BB_PARTITION (traces[0].first)
1097           != BB_PARTITION (traces[i].first))
1098         two_passes = true;
1099
1100   for (i = 0; i < n_traces || (two_passes && current_pass == 1) ; i++)
1101     {
1102       int t = i;
1103       int t2;
1104       edge e, best;
1105       int best_len;
1106
1107       if (i >= n_traces)
1108         {
1109           gcc_assert (two_passes && current_pass == 1);
1110           i = 0;
1111           t = i;
1112           current_pass = 2;
1113           if (current_partition == BB_HOT_PARTITION)
1114             current_partition = BB_COLD_PARTITION;
1115           else
1116             current_partition = BB_HOT_PARTITION;
1117         }
1118
1119       if (connected[t])
1120         continue;
1121
1122       if (two_passes
1123           && BB_PARTITION (traces[t].first) != current_partition)
1124         continue;
1125
1126       connected[t] = true;
1127
1128       /* Find the predecessor traces.  */
1129       for (t2 = t; t2 > 0;)
1130         {
1131           edge_iterator ei;
1132           best = NULL;
1133           best_len = 0;
1134           FOR_EACH_EDGE (e, ei, traces[t2].first->preds)
1135             {
1136               int si = e->src->index;
1137
1138               if (e->src != ENTRY_BLOCK_PTR_FOR_FN (cfun)
1139                   && (e->flags & EDGE_CAN_FALLTHRU)
1140                   && !(e->flags & EDGE_COMPLEX)
1141                   && bbd[si].end_of_trace >= 0
1142                   && !connected[bbd[si].end_of_trace]
1143                   && (BB_PARTITION (e->src) == current_partition)
1144                   && connect_better_edge_p (e, true, best_len, best, traces))
1145                 {
1146                   best = e;
1147                   best_len = traces[bbd[si].end_of_trace].length;
1148                 }
1149             }
1150           if (best)
1151             {
1152               best->src->aux = best->dest;
1153               t2 = bbd[best->src->index].end_of_trace;
1154               connected[t2] = true;
1155
1156               if (dump_file)
1157                 {
1158                   fprintf (dump_file, "Connection: %d %d\n",
1159                            best->src->index, best->dest->index);
1160                 }
1161             }
1162           else
1163             break;
1164         }
1165
1166       if (last_trace >= 0)
1167         traces[last_trace].last->aux = traces[t2].first;
1168       last_trace = t;
1169
1170       /* Find the successor traces.  */
1171       while (1)
1172         {
1173           /* Find the continuation of the chain.  */
1174           edge_iterator ei;
1175           best = NULL;
1176           best_len = 0;
1177           FOR_EACH_EDGE (e, ei, traces[t].last->succs)
1178             {
1179               int di = e->dest->index;
1180
1181               if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
1182                   && (e->flags & EDGE_CAN_FALLTHRU)
1183                   && !(e->flags & EDGE_COMPLEX)
1184                   && bbd[di].start_of_trace >= 0
1185                   && !connected[bbd[di].start_of_trace]
1186                   && (BB_PARTITION (e->dest) == current_partition)
1187                   && connect_better_edge_p (e, false, best_len, best, traces))
1188                 {
1189                   best = e;
1190                   best_len = traces[bbd[di].start_of_trace].length;
1191                 }
1192             }
1193
1194           if (for_size)
1195             {
1196               if (!best)
1197                 /* Stop finding the successor traces.  */
1198                 break;
1199
1200               /* It is OK to connect block n with block n + 1 or a block
1201                  before n.  For others, only connect to the loop header.  */
1202               if (best->dest->index > (traces[t].last->index + 1))
1203                 {
1204                   int count = EDGE_COUNT (best->dest->preds);
1205
1206                   FOR_EACH_EDGE (e, ei, best->dest->preds)
1207                     if (e->flags & EDGE_DFS_BACK)
1208                       count--;
1209
1210                   /* If dest has multiple predecessors, skip it.  We expect
1211                      that one predecessor with smaller index connects with it
1212                      later.  */
1213                   if (count != 1)
1214                     break;
1215                 }
1216
1217               /* Only connect Trace n with Trace n + 1.  It is conservative
1218                  to keep the order as close as possible to the original order.
1219                  It also helps to reduce long jumps.  */
1220               if (last_trace != bbd[best->dest->index].start_of_trace - 1)
1221                 break;
1222
1223               if (dump_file)
1224                 fprintf (dump_file, "Connection: %d %d\n",
1225                          best->src->index, best->dest->index);
1226
1227               t = bbd[best->dest->index].start_of_trace;
1228               traces[last_trace].last->aux = traces[t].first;
1229               connected[t] = true;
1230               last_trace = t;
1231             }
1232           else if (best)
1233             {
1234               if (dump_file)
1235                 {
1236                   fprintf (dump_file, "Connection: %d %d\n",
1237                            best->src->index, best->dest->index);
1238                 }
1239               t = bbd[best->dest->index].start_of_trace;
1240               traces[last_trace].last->aux = traces[t].first;
1241               connected[t] = true;
1242               last_trace = t;
1243             }
1244           else
1245             {
1246               /* Try to connect the traces by duplication of 1 block.  */
1247               edge e2;
1248               basic_block next_bb = NULL;
1249               bool try_copy = false;
1250
1251               FOR_EACH_EDGE (e, ei, traces[t].last->succs)
1252                 if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
1253                     && (e->flags & EDGE_CAN_FALLTHRU)
1254                     && !(e->flags & EDGE_COMPLEX)
1255                     && (!best || e->probability > best->probability))
1256                   {
1257                     edge_iterator ei;
1258                     edge best2 = NULL;
1259                     int best2_len = 0;
1260
1261                     /* If the destination is a start of a trace which is only
1262                        one block long, then no need to search the successor
1263                        blocks of the trace.  Accept it.  */
1264                     if (bbd[e->dest->index].start_of_trace >= 0
1265                         && traces[bbd[e->dest->index].start_of_trace].length
1266                            == 1)
1267                       {
1268                         best = e;
1269                         try_copy = true;
1270                         continue;
1271                       }
1272
1273                     FOR_EACH_EDGE (e2, ei, e->dest->succs)
1274                       {
1275                         int di = e2->dest->index;
1276
1277                         if (e2->dest == EXIT_BLOCK_PTR_FOR_FN (cfun)
1278                             || ((e2->flags & EDGE_CAN_FALLTHRU)
1279                                 && !(e2->flags & EDGE_COMPLEX)
1280                                 && bbd[di].start_of_trace >= 0
1281                                 && !connected[bbd[di].start_of_trace]
1282                                 && BB_PARTITION (e2->dest) == current_partition
1283                                 && EDGE_FREQUENCY (e2) >= freq_threshold
1284                                 && e2->count >= count_threshold
1285                                 && (!best2
1286                                     || e2->probability > best2->probability
1287                                     || (e2->probability == best2->probability
1288                                         && traces[bbd[di].start_of_trace].length
1289                                            > best2_len))))
1290                           {
1291                             best = e;
1292                             best2 = e2;
1293                             if (e2->dest != EXIT_BLOCK_PTR_FOR_FN (cfun))
1294                               best2_len = traces[bbd[di].start_of_trace].length;
1295                             else
1296                               best2_len = INT_MAX;
1297                             next_bb = e2->dest;
1298                             try_copy = true;
1299                           }
1300                       }
1301                   }
1302
1303               if (crtl->has_bb_partition)
1304                 try_copy = false;
1305
1306               /* Copy tiny blocks always; copy larger blocks only when the
1307                  edge is traversed frequently enough.  */
1308               if (try_copy
1309                   && copy_bb_p (best->dest,
1310                                 optimize_edge_for_speed_p (best)
1311                                 && EDGE_FREQUENCY (best) >= freq_threshold
1312                                 && best->count >= count_threshold))
1313                 {
1314                   basic_block new_bb;
1315
1316                   if (dump_file)
1317                     {
1318                       fprintf (dump_file, "Connection: %d %d ",
1319                                traces[t].last->index, best->dest->index);
1320                       if (!next_bb)
1321                         fputc ('\n', dump_file);
1322                       else if (next_bb == EXIT_BLOCK_PTR_FOR_FN (cfun))
1323                         fprintf (dump_file, "exit\n");
1324                       else
1325                         fprintf (dump_file, "%d\n", next_bb->index);
1326                     }
1327
1328                   new_bb = copy_bb (best->dest, best, traces[t].last, t);
1329                   traces[t].last = new_bb;
1330                   if (next_bb && next_bb != EXIT_BLOCK_PTR_FOR_FN (cfun))
1331                     {
1332                       t = bbd[next_bb->index].start_of_trace;
1333                       traces[last_trace].last->aux = traces[t].first;
1334                       connected[t] = true;
1335                       last_trace = t;
1336                     }
1337                   else
1338                     break;      /* Stop finding the successor traces.  */
1339                 }
1340               else
1341                 break;  /* Stop finding the successor traces.  */
1342             }
1343         }
1344     }
1345
1346   if (dump_file)
1347     {
1348       basic_block bb;
1349
1350       fprintf (dump_file, "Final order:\n");
1351       for (bb = traces[0].first; bb; bb = (basic_block) bb->aux)
1352         fprintf (dump_file, "%d ", bb->index);
1353       fprintf (dump_file, "\n");
1354       fflush (dump_file);
1355     }
1356
1357   FREE (connected);
1358 }
1359
1360 /* Return true when BB can and should be copied. CODE_MAY_GROW is true
1361    when code size is allowed to grow by duplication.  */
1362
1363 static bool
1364 copy_bb_p (const_basic_block bb, int code_may_grow)
1365 {
1366   int size = 0;
1367   int max_size = uncond_jump_length;
1368   rtx_insn *insn;
1369
1370   if (!bb->frequency)
1371     return false;
1372   if (EDGE_COUNT (bb->preds) < 2)
1373     return false;
1374   if (!can_duplicate_block_p (bb))
1375     return false;
1376
1377   /* Avoid duplicating blocks which have many successors (PR/13430).  */
1378   if (EDGE_COUNT (bb->succs) > 8)
1379     return false;
1380
1381   if (code_may_grow && optimize_bb_for_speed_p (bb))
1382     max_size *= PARAM_VALUE (PARAM_MAX_GROW_COPY_BB_INSNS);
1383
1384   FOR_BB_INSNS (bb, insn)
1385     {
1386       if (INSN_P (insn))
1387         size += get_attr_min_length (insn);
1388     }
1389
1390   if (size <= max_size)
1391     return true;
1392
1393   if (dump_file)
1394     {
1395       fprintf (dump_file,
1396                "Block %d can't be copied because its size = %d.\n",
1397                bb->index, size);
1398     }
1399
1400   return false;
1401 }
1402
1403 /* Return the length of unconditional jump instruction.  */
1404
1405 int
1406 get_uncond_jump_length (void)
1407 {
1408   rtx_insn *label, *jump;
1409   int length;
1410
1411   start_sequence ();
1412   label = emit_label (gen_label_rtx ());
1413   jump = emit_jump_insn (gen_jump (label));
1414   length = get_attr_min_length (jump);
1415   end_sequence ();
1416
1417   return length;
1418 }
1419
1420 /* The landing pad OLD_LP, in block OLD_BB, has edges from both partitions.
1421    Duplicate the landing pad and split the edges so that no EH edge
1422    crosses partitions.  */
1423
1424 static void
1425 fix_up_crossing_landing_pad (eh_landing_pad old_lp, basic_block old_bb)
1426 {
1427   eh_landing_pad new_lp;
1428   basic_block new_bb, last_bb, post_bb;
1429   rtx_insn *new_label, *jump;
1430   rtx post_label;
1431   unsigned new_partition;
1432   edge_iterator ei;
1433   edge e;
1434
1435   /* Generate the new landing-pad structure.  */
1436   new_lp = gen_eh_landing_pad (old_lp->region);
1437   new_lp->post_landing_pad = old_lp->post_landing_pad;
1438   new_lp->landing_pad = gen_label_rtx ();
1439   LABEL_PRESERVE_P (new_lp->landing_pad) = 1;
1440
1441   /* Put appropriate instructions in new bb.  */
1442   new_label = emit_label (new_lp->landing_pad);
1443
1444   expand_dw2_landing_pad_for_region (old_lp->region);
1445
1446   post_bb = BLOCK_FOR_INSN (old_lp->landing_pad);
1447   post_bb = single_succ (post_bb);
1448   post_label = block_label (post_bb);
1449   jump = emit_jump_insn (gen_jump (post_label));
1450   JUMP_LABEL (jump) = post_label;
1451
1452   /* Create new basic block to be dest for lp.  */
1453   last_bb = EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb;
1454   new_bb = create_basic_block (new_label, jump, last_bb);
1455   new_bb->aux = last_bb->aux;
1456   last_bb->aux = new_bb;
1457
1458   emit_barrier_after_bb (new_bb);
1459
1460   make_edge (new_bb, post_bb, 0);
1461
1462   /* Make sure new bb is in the other partition.  */
1463   new_partition = BB_PARTITION (old_bb);
1464   new_partition ^= BB_HOT_PARTITION | BB_COLD_PARTITION;
1465   BB_SET_PARTITION (new_bb, new_partition);
1466
1467   /* Fix up the edges.  */
1468   for (ei = ei_start (old_bb->preds); (e = ei_safe_edge (ei)) != NULL; )
1469     if (BB_PARTITION (e->src) == new_partition)
1470       {
1471         rtx_insn *insn = BB_END (e->src);
1472         rtx note = find_reg_note (insn, REG_EH_REGION, NULL_RTX);
1473
1474         gcc_assert (note != NULL);
1475         gcc_checking_assert (INTVAL (XEXP (note, 0)) == old_lp->index);
1476         XEXP (note, 0) = GEN_INT (new_lp->index);
1477
1478         /* Adjust the edge to the new destination.  */
1479         redirect_edge_succ (e, new_bb);
1480       }
1481     else
1482       ei_next (&ei);
1483 }
1484
1485
1486 /* Ensure that all hot bbs are included in a hot path through the
1487    procedure. This is done by calling this function twice, once
1488    with WALK_UP true (to look for paths from the entry to hot bbs) and
1489    once with WALK_UP false (to look for paths from hot bbs to the exit).
1490    Returns the updated value of COLD_BB_COUNT and adds newly-hot bbs
1491    to BBS_IN_HOT_PARTITION.  */
1492
1493 static unsigned int
1494 sanitize_hot_paths (bool walk_up, unsigned int cold_bb_count,
1495                     vec<basic_block> *bbs_in_hot_partition)
1496 {
1497   /* Callers check this.  */
1498   gcc_checking_assert (cold_bb_count);
1499
1500   /* Keep examining hot bbs while we still have some left to check
1501      and there are remaining cold bbs.  */
1502   vec<basic_block> hot_bbs_to_check = bbs_in_hot_partition->copy ();
1503   while (! hot_bbs_to_check.is_empty ()
1504          && cold_bb_count)
1505     {
1506       basic_block bb = hot_bbs_to_check.pop ();
1507       vec<edge, va_gc> *edges = walk_up ? bb->preds : bb->succs;
1508       edge e;
1509       edge_iterator ei;
1510       int highest_probability = 0;
1511       int highest_freq = 0;
1512       gcov_type highest_count = 0;
1513       bool found = false;
1514
1515       /* Walk the preds/succs and check if there is at least one already
1516          marked hot. Keep track of the most frequent pred/succ so that we
1517          can mark it hot if we don't find one.  */
1518       FOR_EACH_EDGE (e, ei, edges)
1519         {
1520           basic_block reach_bb = walk_up ? e->src : e->dest;
1521
1522           if (e->flags & EDGE_DFS_BACK)
1523             continue;
1524
1525           if (BB_PARTITION (reach_bb) != BB_COLD_PARTITION)
1526           {
1527             found = true;
1528             break;
1529           }
1530           /* The following loop will look for the hottest edge via
1531              the edge count, if it is non-zero, then fallback to the edge
1532              frequency and finally the edge probability.  */
1533           if (e->count > highest_count)
1534             highest_count = e->count;
1535           int edge_freq = EDGE_FREQUENCY (e);
1536           if (edge_freq > highest_freq)
1537             highest_freq = edge_freq;
1538           if (e->probability > highest_probability)
1539             highest_probability = e->probability;
1540         }
1541
1542       /* If bb is reached by (or reaches, in the case of !WALK_UP) another hot
1543          block (or unpartitioned, e.g. the entry block) then it is ok. If not,
1544          then the most frequent pred (or succ) needs to be adjusted.  In the
1545          case where multiple preds/succs have the same frequency (e.g. a
1546          50-50 branch), then both will be adjusted.  */
1547       if (found)
1548         continue;
1549
1550       FOR_EACH_EDGE (e, ei, edges)
1551         {
1552           if (e->flags & EDGE_DFS_BACK)
1553             continue;
1554           /* Select the hottest edge using the edge count, if it is non-zero,
1555              then fallback to the edge frequency and finally the edge
1556              probability.  */
1557           if (highest_count)
1558             {
1559               if (e->count < highest_count)
1560                 continue;
1561             }
1562           else if (highest_freq)
1563             {
1564               if (EDGE_FREQUENCY (e) < highest_freq)
1565                 continue;
1566             }
1567           else if (e->probability < highest_probability)
1568             continue;
1569
1570           basic_block reach_bb = walk_up ? e->src : e->dest;
1571
1572           /* We have a hot bb with an immediate dominator that is cold.
1573              The dominator needs to be re-marked hot.  */
1574           BB_SET_PARTITION (reach_bb, BB_HOT_PARTITION);
1575           cold_bb_count--;
1576
1577           /* Now we need to examine newly-hot reach_bb to see if it is also
1578              dominated by a cold bb.  */
1579           bbs_in_hot_partition->safe_push (reach_bb);
1580           hot_bbs_to_check.safe_push (reach_bb);
1581         }
1582     }
1583
1584   return cold_bb_count;
1585 }
1586
1587
1588 /* Find the basic blocks that are rarely executed and need to be moved to
1589    a separate section of the .o file (to cut down on paging and improve
1590    cache locality).  Return a vector of all edges that cross.  */
1591
1592 static vec<edge>
1593 find_rarely_executed_basic_blocks_and_crossing_edges (void)
1594 {
1595   vec<edge> crossing_edges = vNULL;
1596   basic_block bb;
1597   edge e;
1598   edge_iterator ei;
1599   unsigned int cold_bb_count = 0;
1600   auto_vec<basic_block> bbs_in_hot_partition;
1601
1602   /* Mark which partition (hot/cold) each basic block belongs in.  */
1603   FOR_EACH_BB_FN (bb, cfun)
1604     {
1605       bool cold_bb = false;
1606
1607       if (probably_never_executed_bb_p (cfun, bb))
1608         {
1609           /* Handle profile insanities created by upstream optimizations
1610              by also checking the incoming edge weights. If there is a non-cold
1611              incoming edge, conservatively prevent this block from being split
1612              into the cold section.  */
1613           cold_bb = true;
1614           FOR_EACH_EDGE (e, ei, bb->preds)
1615             if (!probably_never_executed_edge_p (cfun, e))
1616               {
1617                 cold_bb = false;
1618                 break;
1619               }
1620         }
1621       if (cold_bb)
1622         {
1623           BB_SET_PARTITION (bb, BB_COLD_PARTITION);
1624           cold_bb_count++;
1625         }
1626       else
1627         {
1628           BB_SET_PARTITION (bb, BB_HOT_PARTITION);
1629           bbs_in_hot_partition.safe_push (bb);
1630         }
1631     }
1632
1633   /* Ensure that hot bbs are included along a hot path from the entry to exit.
1634      Several different possibilities may include cold bbs along all paths
1635      to/from a hot bb. One is that there are edge weight insanities
1636      due to optimization phases that do not properly update basic block profile
1637      counts. The second is that the entry of the function may not be hot, because
1638      it is entered fewer times than the number of profile training runs, but there
1639      is a loop inside the function that causes blocks within the function to be
1640      above the threshold for hotness. This is fixed by walking up from hot bbs
1641      to the entry block, and then down from hot bbs to the exit, performing
1642      partitioning fixups as necessary.  */
1643   if (cold_bb_count)
1644     {
1645       mark_dfs_back_edges ();
1646       cold_bb_count = sanitize_hot_paths (true, cold_bb_count,
1647                                           &bbs_in_hot_partition);
1648       if (cold_bb_count)
1649         sanitize_hot_paths (false, cold_bb_count, &bbs_in_hot_partition);
1650     }
1651
1652   /* The format of .gcc_except_table does not allow landing pads to
1653      be in a different partition as the throw.  Fix this by either
1654      moving or duplicating the landing pads.  */
1655   if (cfun->eh->lp_array)
1656     {
1657       unsigned i;
1658       eh_landing_pad lp;
1659
1660       FOR_EACH_VEC_ELT (*cfun->eh->lp_array, i, lp)
1661         {
1662           bool all_same, all_diff;
1663
1664           if (lp == NULL
1665               || lp->landing_pad == NULL_RTX
1666               || !LABEL_P (lp->landing_pad))
1667             continue;
1668
1669           all_same = all_diff = true;
1670           bb = BLOCK_FOR_INSN (lp->landing_pad);
1671           FOR_EACH_EDGE (e, ei, bb->preds)
1672             {
1673               gcc_assert (e->flags & EDGE_EH);
1674               if (BB_PARTITION (bb) == BB_PARTITION (e->src))
1675                 all_diff = false;
1676               else
1677                 all_same = false;
1678             }
1679
1680           if (all_same)
1681             ;
1682           else if (all_diff)
1683             {
1684               int which = BB_PARTITION (bb);
1685               which ^= BB_HOT_PARTITION | BB_COLD_PARTITION;
1686               BB_SET_PARTITION (bb, which);
1687             }
1688           else
1689             fix_up_crossing_landing_pad (lp, bb);
1690         }
1691     }
1692
1693   /* Mark every edge that crosses between sections.  */
1694
1695   FOR_EACH_BB_FN (bb, cfun)
1696     FOR_EACH_EDGE (e, ei, bb->succs)
1697       {
1698         unsigned int flags = e->flags;
1699
1700         /* We should never have EDGE_CROSSING set yet.  */
1701         gcc_checking_assert ((flags & EDGE_CROSSING) == 0);
1702
1703         if (e->src != ENTRY_BLOCK_PTR_FOR_FN (cfun)
1704             && e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
1705             && BB_PARTITION (e->src) != BB_PARTITION (e->dest))
1706           {
1707             crossing_edges.safe_push (e);
1708             flags |= EDGE_CROSSING;
1709           }
1710
1711         /* Now that we've split eh edges as appropriate, allow landing pads
1712            to be merged with the post-landing pads.  */
1713         flags &= ~EDGE_PRESERVE;
1714
1715         e->flags = flags;
1716       }
1717
1718   return crossing_edges;
1719 }
1720
1721 /* Set the flag EDGE_CAN_FALLTHRU for edges that can be fallthru.  */
1722
1723 static void
1724 set_edge_can_fallthru_flag (void)
1725 {
1726   basic_block bb;
1727
1728   FOR_EACH_BB_FN (bb, cfun)
1729     {
1730       edge e;
1731       edge_iterator ei;
1732
1733       FOR_EACH_EDGE (e, ei, bb->succs)
1734         {
1735           e->flags &= ~EDGE_CAN_FALLTHRU;
1736
1737           /* The FALLTHRU edge is also CAN_FALLTHRU edge.  */
1738           if (e->flags & EDGE_FALLTHRU)
1739             e->flags |= EDGE_CAN_FALLTHRU;
1740         }
1741
1742       /* If the BB ends with an invertible condjump all (2) edges are
1743          CAN_FALLTHRU edges.  */
1744       if (EDGE_COUNT (bb->succs) != 2)
1745         continue;
1746       if (!any_condjump_p (BB_END (bb)))
1747         continue;
1748       if (!invert_jump (BB_END (bb), JUMP_LABEL (BB_END (bb)), 0))
1749         continue;
1750       invert_jump (BB_END (bb), JUMP_LABEL (BB_END (bb)), 0);
1751       EDGE_SUCC (bb, 0)->flags |= EDGE_CAN_FALLTHRU;
1752       EDGE_SUCC (bb, 1)->flags |= EDGE_CAN_FALLTHRU;
1753     }
1754 }
1755
1756 /* If any destination of a crossing edge does not have a label, add label;
1757    Convert any easy fall-through crossing edges to unconditional jumps.  */
1758
1759 static void
1760 add_labels_and_missing_jumps (vec<edge> crossing_edges)
1761 {
1762   size_t i;
1763   edge e;
1764
1765   FOR_EACH_VEC_ELT (crossing_edges, i, e)
1766     {
1767       basic_block src = e->src;
1768       basic_block dest = e->dest;
1769       rtx label;
1770       rtx_insn *new_jump;
1771
1772       if (dest == EXIT_BLOCK_PTR_FOR_FN (cfun))
1773         continue;
1774
1775       /* Make sure dest has a label.  */
1776       label = block_label (dest);
1777
1778       /* Nothing to do for non-fallthru edges.  */
1779       if (src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
1780         continue;
1781       if ((e->flags & EDGE_FALLTHRU) == 0)
1782         continue;
1783
1784       /* If the block does not end with a control flow insn, then we
1785          can trivially add a jump to the end to fixup the crossing.
1786          Otherwise the jump will have to go in a new bb, which will
1787          be handled by fix_up_fall_thru_edges function.  */
1788       if (control_flow_insn_p (BB_END (src)))
1789         continue;
1790
1791       /* Make sure there's only one successor.  */
1792       gcc_assert (single_succ_p (src));
1793
1794       new_jump = emit_jump_insn_after (gen_jump (label), BB_END (src));
1795       BB_END (src) = new_jump;
1796       JUMP_LABEL (new_jump) = label;
1797       LABEL_NUSES (label) += 1;
1798
1799       emit_barrier_after_bb (src);
1800
1801       /* Mark edge as non-fallthru.  */
1802       e->flags &= ~EDGE_FALLTHRU;
1803     }
1804 }
1805
1806 /* Find any bb's where the fall-through edge is a crossing edge (note that
1807    these bb's must also contain a conditional jump or end with a call
1808    instruction; we've already dealt with fall-through edges for blocks
1809    that didn't have a conditional jump or didn't end with call instruction
1810    in the call to add_labels_and_missing_jumps).  Convert the fall-through
1811    edge to non-crossing edge by inserting a new bb to fall-through into.
1812    The new bb will contain an unconditional jump (crossing edge) to the
1813    original fall through destination.  */
1814
1815 static void
1816 fix_up_fall_thru_edges (void)
1817 {
1818   basic_block cur_bb;
1819   basic_block new_bb;
1820   edge succ1;
1821   edge succ2;
1822   edge fall_thru;
1823   edge cond_jump = NULL;
1824   edge e;
1825   bool cond_jump_crosses;
1826   int invert_worked;
1827   rtx_insn *old_jump;
1828   rtx fall_thru_label;
1829
1830   FOR_EACH_BB_FN (cur_bb, cfun)
1831     {
1832       fall_thru = NULL;
1833       if (EDGE_COUNT (cur_bb->succs) > 0)
1834         succ1 = EDGE_SUCC (cur_bb, 0);
1835       else
1836         succ1 = NULL;
1837
1838       if (EDGE_COUNT (cur_bb->succs) > 1)
1839         succ2 = EDGE_SUCC (cur_bb, 1);
1840       else
1841         succ2 = NULL;
1842
1843       /* Find the fall-through edge.  */
1844
1845       if (succ1
1846           && (succ1->flags & EDGE_FALLTHRU))
1847         {
1848           fall_thru = succ1;
1849           cond_jump = succ2;
1850         }
1851       else if (succ2
1852                && (succ2->flags & EDGE_FALLTHRU))
1853         {
1854           fall_thru = succ2;
1855           cond_jump = succ1;
1856         }
1857       else if (succ1
1858                && (block_ends_with_call_p (cur_bb)
1859                    || can_throw_internal (BB_END (cur_bb))))
1860         {
1861           edge e;
1862           edge_iterator ei;
1863
1864           FOR_EACH_EDGE (e, ei, cur_bb->succs)
1865             if (e->flags & EDGE_FALLTHRU)
1866               {
1867                 fall_thru = e;
1868                 break;
1869               }
1870         }
1871
1872       if (fall_thru && (fall_thru->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)))
1873         {
1874           /* Check to see if the fall-thru edge is a crossing edge.  */
1875
1876           if (fall_thru->flags & EDGE_CROSSING)
1877             {
1878               /* The fall_thru edge crosses; now check the cond jump edge, if
1879                  it exists.  */
1880
1881               cond_jump_crosses = true;
1882               invert_worked  = 0;
1883               old_jump = BB_END (cur_bb);
1884
1885               /* Find the jump instruction, if there is one.  */
1886
1887               if (cond_jump)
1888                 {
1889                   if (!(cond_jump->flags & EDGE_CROSSING))
1890                     cond_jump_crosses = false;
1891
1892                   /* We know the fall-thru edge crosses; if the cond
1893                      jump edge does NOT cross, and its destination is the
1894                      next block in the bb order, invert the jump
1895                      (i.e. fix it so the fall through does not cross and
1896                      the cond jump does).  */
1897
1898                   if (!cond_jump_crosses)
1899                     {
1900                       /* Find label in fall_thru block. We've already added
1901                          any missing labels, so there must be one.  */
1902
1903                       fall_thru_label = block_label (fall_thru->dest);
1904
1905                       if (old_jump && JUMP_P (old_jump) && fall_thru_label)
1906                         invert_worked = invert_jump (old_jump,
1907                                                      fall_thru_label,0);
1908                       if (invert_worked)
1909                         {
1910                           fall_thru->flags &= ~EDGE_FALLTHRU;
1911                           cond_jump->flags |= EDGE_FALLTHRU;
1912                           update_br_prob_note (cur_bb);
1913                           e = fall_thru;
1914                           fall_thru = cond_jump;
1915                           cond_jump = e;
1916                           cond_jump->flags |= EDGE_CROSSING;
1917                           fall_thru->flags &= ~EDGE_CROSSING;
1918                         }
1919                     }
1920                 }
1921
1922               if (cond_jump_crosses || !invert_worked)
1923                 {
1924                   /* This is the case where both edges out of the basic
1925                      block are crossing edges. Here we will fix up the
1926                      fall through edge. The jump edge will be taken care
1927                      of later.  The EDGE_CROSSING flag of fall_thru edge
1928                      is unset before the call to force_nonfallthru
1929                      function because if a new basic-block is created
1930                      this edge remains in the current section boundary
1931                      while the edge between new_bb and the fall_thru->dest
1932                      becomes EDGE_CROSSING.  */
1933
1934                   fall_thru->flags &= ~EDGE_CROSSING;
1935                   new_bb = force_nonfallthru (fall_thru);
1936
1937                   if (new_bb)
1938                     {
1939                       new_bb->aux = cur_bb->aux;
1940                       cur_bb->aux = new_bb;
1941
1942                       /* This is done by force_nonfallthru_and_redirect.  */
1943                       gcc_assert (BB_PARTITION (new_bb)
1944                                   == BB_PARTITION (cur_bb));
1945
1946                       single_succ_edge (new_bb)->flags |= EDGE_CROSSING;
1947                     }
1948                   else
1949                     {
1950                       /* If a new basic-block was not created; restore
1951                          the EDGE_CROSSING flag.  */
1952                       fall_thru->flags |= EDGE_CROSSING;
1953                     }
1954
1955                   /* Add barrier after new jump */
1956                   emit_barrier_after_bb (new_bb ? new_bb : cur_bb);
1957                 }
1958             }
1959         }
1960     }
1961 }
1962
1963 /* This function checks the destination block of a "crossing jump" to
1964    see if it has any crossing predecessors that begin with a code label
1965    and end with an unconditional jump.  If so, it returns that predecessor
1966    block.  (This is to avoid creating lots of new basic blocks that all
1967    contain unconditional jumps to the same destination).  */
1968
1969 static basic_block
1970 find_jump_block (basic_block jump_dest)
1971 {
1972   basic_block source_bb = NULL;
1973   edge e;
1974   rtx_insn *insn;
1975   edge_iterator ei;
1976
1977   FOR_EACH_EDGE (e, ei, jump_dest->preds)
1978     if (e->flags & EDGE_CROSSING)
1979       {
1980         basic_block src = e->src;
1981
1982         /* Check each predecessor to see if it has a label, and contains
1983            only one executable instruction, which is an unconditional jump.
1984            If so, we can use it.  */
1985
1986         if (LABEL_P (BB_HEAD (src)))
1987           for (insn = BB_HEAD (src);
1988                !INSN_P (insn) && insn != NEXT_INSN (BB_END (src));
1989                insn = NEXT_INSN (insn))
1990             {
1991               if (INSN_P (insn)
1992                   && insn == BB_END (src)
1993                   && JUMP_P (insn)
1994                   && !any_condjump_p (insn))
1995                 {
1996                   source_bb = src;
1997                   break;
1998                 }
1999             }
2000
2001         if (source_bb)
2002           break;
2003       }
2004
2005   return source_bb;
2006 }
2007
2008 /* Find all BB's with conditional jumps that are crossing edges;
2009    insert a new bb and make the conditional jump branch to the new
2010    bb instead (make the new bb same color so conditional branch won't
2011    be a 'crossing' edge).  Insert an unconditional jump from the
2012    new bb to the original destination of the conditional jump.  */
2013
2014 static void
2015 fix_crossing_conditional_branches (void)
2016 {
2017   basic_block cur_bb;
2018   basic_block new_bb;
2019   basic_block dest;
2020   edge succ1;
2021   edge succ2;
2022   edge crossing_edge;
2023   edge new_edge;
2024   rtx_insn *old_jump;
2025   rtx set_src;
2026   rtx old_label = NULL_RTX;
2027   rtx new_label;
2028
2029   FOR_EACH_BB_FN (cur_bb, cfun)
2030     {
2031       crossing_edge = NULL;
2032       if (EDGE_COUNT (cur_bb->succs) > 0)
2033         succ1 = EDGE_SUCC (cur_bb, 0);
2034       else
2035         succ1 = NULL;
2036
2037       if (EDGE_COUNT (cur_bb->succs) > 1)
2038         succ2 = EDGE_SUCC (cur_bb, 1);
2039       else
2040         succ2 = NULL;
2041
2042       /* We already took care of fall-through edges, so only one successor
2043          can be a crossing edge.  */
2044
2045       if (succ1 && (succ1->flags & EDGE_CROSSING))
2046         crossing_edge = succ1;
2047       else if (succ2 && (succ2->flags & EDGE_CROSSING))
2048         crossing_edge = succ2;
2049
2050       if (crossing_edge)
2051         {
2052           old_jump = BB_END (cur_bb);
2053
2054           /* Check to make sure the jump instruction is a
2055              conditional jump.  */
2056
2057           set_src = NULL_RTX;
2058
2059           if (any_condjump_p (old_jump))
2060             {
2061               if (GET_CODE (PATTERN (old_jump)) == SET)
2062                 set_src = SET_SRC (PATTERN (old_jump));
2063               else if (GET_CODE (PATTERN (old_jump)) == PARALLEL)
2064                 {
2065                   set_src = XVECEXP (PATTERN (old_jump), 0,0);
2066                   if (GET_CODE (set_src) == SET)
2067                     set_src = SET_SRC (set_src);
2068                   else
2069                     set_src = NULL_RTX;
2070                 }
2071             }
2072
2073           if (set_src && (GET_CODE (set_src) == IF_THEN_ELSE))
2074             {
2075               if (GET_CODE (XEXP (set_src, 1)) == PC)
2076                 old_label = XEXP (set_src, 2);
2077               else if (GET_CODE (XEXP (set_src, 2)) == PC)
2078                 old_label = XEXP (set_src, 1);
2079
2080               /* Check to see if new bb for jumping to that dest has
2081                  already been created; if so, use it; if not, create
2082                  a new one.  */
2083
2084               new_bb = find_jump_block (crossing_edge->dest);
2085
2086               if (new_bb)
2087                 new_label = block_label (new_bb);
2088               else
2089                 {
2090                   basic_block last_bb;
2091                   rtx_insn *new_jump;
2092
2093                   /* Create new basic block to be dest for
2094                      conditional jump.  */
2095
2096                   /* Put appropriate instructions in new bb.  */
2097
2098                   new_label = gen_label_rtx ();
2099                   emit_label (new_label);
2100
2101                   gcc_assert (GET_CODE (old_label) == LABEL_REF);
2102                   old_label = JUMP_LABEL (old_jump);
2103                   new_jump = emit_jump_insn (gen_jump (old_label));
2104                   JUMP_LABEL (new_jump) = old_label;
2105
2106                   last_bb = EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb;
2107                   new_bb = create_basic_block (new_label, new_jump, last_bb);
2108                   new_bb->aux = last_bb->aux;
2109                   last_bb->aux = new_bb;
2110
2111                   emit_barrier_after_bb (new_bb);
2112
2113                   /* Make sure new bb is in same partition as source
2114                      of conditional branch.  */
2115                   BB_COPY_PARTITION (new_bb, cur_bb);
2116                 }
2117
2118               /* Make old jump branch to new bb.  */
2119
2120               redirect_jump (old_jump, new_label, 0);
2121
2122               /* Remove crossing_edge as predecessor of 'dest'.  */
2123
2124               dest = crossing_edge->dest;
2125
2126               redirect_edge_succ (crossing_edge, new_bb);
2127
2128               /* Make a new edge from new_bb to old dest; new edge
2129                  will be a successor for new_bb and a predecessor
2130                  for 'dest'.  */
2131
2132               if (EDGE_COUNT (new_bb->succs) == 0)
2133                 new_edge = make_edge (new_bb, dest, 0);
2134               else
2135                 new_edge = EDGE_SUCC (new_bb, 0);
2136
2137               crossing_edge->flags &= ~EDGE_CROSSING;
2138               new_edge->flags |= EDGE_CROSSING;
2139             }
2140         }
2141     }
2142 }
2143
2144 /* Find any unconditional branches that cross between hot and cold
2145    sections.  Convert them into indirect jumps instead.  */
2146
2147 static void
2148 fix_crossing_unconditional_branches (void)
2149 {
2150   basic_block cur_bb;
2151   rtx_insn *last_insn;
2152   rtx label;
2153   rtx label_addr;
2154   rtx_insn *indirect_jump_sequence;
2155   rtx_insn *jump_insn = NULL;
2156   rtx new_reg;
2157   rtx_insn *cur_insn;
2158   edge succ;
2159
2160   FOR_EACH_BB_FN (cur_bb, cfun)
2161     {
2162       last_insn = BB_END (cur_bb);
2163
2164       if (EDGE_COUNT (cur_bb->succs) < 1)
2165         continue;
2166
2167       succ = EDGE_SUCC (cur_bb, 0);
2168
2169       /* Check to see if bb ends in a crossing (unconditional) jump.  At
2170          this point, no crossing jumps should be conditional.  */
2171
2172       if (JUMP_P (last_insn)
2173           && (succ->flags & EDGE_CROSSING))
2174         {
2175           gcc_assert (!any_condjump_p (last_insn));
2176
2177           /* Make sure the jump is not already an indirect or table jump.  */
2178
2179           if (!computed_jump_p (last_insn)
2180               && !tablejump_p (last_insn, NULL, NULL))
2181             {
2182               /* We have found a "crossing" unconditional branch.  Now
2183                  we must convert it to an indirect jump.  First create
2184                  reference of label, as target for jump.  */
2185
2186               label = JUMP_LABEL (last_insn);
2187               label_addr = gen_rtx_LABEL_REF (Pmode, label);
2188               LABEL_NUSES (label) += 1;
2189
2190               /* Get a register to use for the indirect jump.  */
2191
2192               new_reg = gen_reg_rtx (Pmode);
2193
2194               /* Generate indirect the jump sequence.  */
2195
2196               start_sequence ();
2197               emit_move_insn (new_reg, label_addr);
2198               emit_indirect_jump (new_reg);
2199               indirect_jump_sequence = get_insns ();
2200               end_sequence ();
2201
2202               /* Make sure every instruction in the new jump sequence has
2203                  its basic block set to be cur_bb.  */
2204
2205               for (cur_insn = indirect_jump_sequence; cur_insn;
2206                    cur_insn = NEXT_INSN (cur_insn))
2207                 {
2208                   if (!BARRIER_P (cur_insn))
2209                     BLOCK_FOR_INSN (cur_insn) = cur_bb;
2210                   if (JUMP_P (cur_insn))
2211                     jump_insn = cur_insn;
2212                 }
2213
2214               /* Insert the new (indirect) jump sequence immediately before
2215                  the unconditional jump, then delete the unconditional jump.  */
2216
2217               emit_insn_before (indirect_jump_sequence, last_insn);
2218               delete_insn (last_insn);
2219
2220               JUMP_LABEL (jump_insn) = label;
2221               LABEL_NUSES (label)++;
2222
2223               /* Make BB_END for cur_bb be the jump instruction (NOT the
2224                  barrier instruction at the end of the sequence...).  */
2225
2226               BB_END (cur_bb) = jump_insn;
2227             }
2228         }
2229     }
2230 }
2231
2232 /* Update CROSSING_JUMP_P flags on all jump insns.  */
2233
2234 static void
2235 update_crossing_jump_flags (void)
2236 {
2237   basic_block bb;
2238   edge e;
2239   edge_iterator ei;
2240
2241   FOR_EACH_BB_FN (bb, cfun)
2242     FOR_EACH_EDGE (e, ei, bb->succs)
2243       if (e->flags & EDGE_CROSSING)
2244         {
2245           if (JUMP_P (BB_END (bb))
2246               /* Some flags were added during fix_up_fall_thru_edges, via
2247                  force_nonfallthru_and_redirect.  */
2248               && !CROSSING_JUMP_P (BB_END (bb)))
2249             CROSSING_JUMP_P (BB_END (bb)) = 1;
2250           break;
2251         }
2252 }
2253
2254 /* Reorder basic blocks.  The main entry point to this file.  FLAGS is
2255    the set of flags to pass to cfg_layout_initialize().  */
2256
2257 static void
2258 reorder_basic_blocks (void)
2259 {
2260   int n_traces;
2261   int i;
2262   struct trace *traces;
2263
2264   gcc_assert (current_ir_type () == IR_RTL_CFGLAYOUT);
2265
2266   if (n_basic_blocks_for_fn (cfun) <= NUM_FIXED_BLOCKS + 1)
2267     return;
2268
2269   set_edge_can_fallthru_flag ();
2270   mark_dfs_back_edges ();
2271
2272   /* We are estimating the length of uncond jump insn only once since the code
2273      for getting the insn length always returns the minimal length now.  */
2274   if (uncond_jump_length == 0)
2275     uncond_jump_length = get_uncond_jump_length ();
2276
2277   /* We need to know some information for each basic block.  */
2278   array_size = GET_ARRAY_SIZE (last_basic_block_for_fn (cfun));
2279   bbd = XNEWVEC (bbro_basic_block_data, array_size);
2280   for (i = 0; i < array_size; i++)
2281     {
2282       bbd[i].start_of_trace = -1;
2283       bbd[i].end_of_trace = -1;
2284       bbd[i].in_trace = -1;
2285       bbd[i].visited = 0;
2286       bbd[i].heap = NULL;
2287       bbd[i].node = NULL;
2288     }
2289
2290   traces = XNEWVEC (struct trace, n_basic_blocks_for_fn (cfun));
2291   n_traces = 0;
2292   find_traces (&n_traces, traces);
2293   connect_traces (n_traces, traces);
2294   FREE (traces);
2295   FREE (bbd);
2296
2297   relink_block_chain (/*stay_in_cfglayout_mode=*/true);
2298
2299   if (dump_file)
2300     {
2301       if (dump_flags & TDF_DETAILS)
2302         dump_reg_info (dump_file);
2303       dump_flow_info (dump_file, dump_flags);
2304     }
2305
2306   /* Signal that rtl_verify_flow_info_1 can now verify that there
2307      is at most one switch between hot/cold sections.  */
2308   crtl->bb_reorder_complete = true;
2309 }
2310
2311 /* Determine which partition the first basic block in the function
2312    belongs to, then find the first basic block in the current function
2313    that belongs to a different section, and insert a
2314    NOTE_INSN_SWITCH_TEXT_SECTIONS note immediately before it in the
2315    instruction stream.  When writing out the assembly code,
2316    encountering this note will make the compiler switch between the
2317    hot and cold text sections.  */
2318
2319 void
2320 insert_section_boundary_note (void)
2321 {
2322   basic_block bb;
2323   bool switched_sections = false;
2324   int current_partition = 0;
2325
2326   if (!crtl->has_bb_partition)
2327     return;
2328
2329   FOR_EACH_BB_FN (bb, cfun)
2330     {
2331       if (!current_partition)
2332         current_partition = BB_PARTITION (bb);
2333       if (BB_PARTITION (bb) != current_partition)
2334         {
2335           gcc_assert (!switched_sections);
2336           switched_sections = true;
2337           emit_note_before (NOTE_INSN_SWITCH_TEXT_SECTIONS, BB_HEAD (bb));
2338           current_partition = BB_PARTITION (bb);
2339         }
2340     }
2341 }
2342
2343 namespace {
2344
2345 const pass_data pass_data_reorder_blocks =
2346 {
2347   RTL_PASS, /* type */
2348   "bbro", /* name */
2349   OPTGROUP_NONE, /* optinfo_flags */
2350   TV_REORDER_BLOCKS, /* tv_id */
2351   0, /* properties_required */
2352   0, /* properties_provided */
2353   0, /* properties_destroyed */
2354   0, /* todo_flags_start */
2355   0, /* todo_flags_finish */
2356 };
2357
2358 class pass_reorder_blocks : public rtl_opt_pass
2359 {
2360 public:
2361   pass_reorder_blocks (gcc::context *ctxt)
2362     : rtl_opt_pass (pass_data_reorder_blocks, ctxt)
2363   {}
2364
2365   /* opt_pass methods: */
2366   virtual bool gate (function *)
2367     {
2368       if (targetm.cannot_modify_jumps_p ())
2369         return false;
2370       return (optimize > 0
2371               && (flag_reorder_blocks || flag_reorder_blocks_and_partition));
2372     }
2373
2374   virtual unsigned int execute (function *);
2375
2376 }; // class pass_reorder_blocks
2377
2378 unsigned int
2379 pass_reorder_blocks::execute (function *fun)
2380 {
2381   basic_block bb;
2382
2383   /* Last attempt to optimize CFG, as scheduling, peepholing and insn
2384      splitting possibly introduced more crossjumping opportunities.  */
2385   cfg_layout_initialize (CLEANUP_EXPENSIVE);
2386
2387   reorder_basic_blocks ();
2388   cleanup_cfg (CLEANUP_EXPENSIVE);
2389
2390   FOR_EACH_BB_FN (bb, fun)
2391     if (bb->next_bb != EXIT_BLOCK_PTR_FOR_FN (fun))
2392       bb->aux = bb->next_bb;
2393   cfg_layout_finalize ();
2394
2395   return 0;
2396 }
2397
2398 } // anon namespace
2399
2400 rtl_opt_pass *
2401 make_pass_reorder_blocks (gcc::context *ctxt)
2402 {
2403   return new pass_reorder_blocks (ctxt);
2404 }
2405
2406 /* Duplicate the blocks containing computed gotos.  This basically unfactors
2407    computed gotos that were factored early on in the compilation process to
2408    speed up edge based data flow.  We used to not unfactoring them again,
2409    which can seriously pessimize code with many computed jumps in the source
2410    code, such as interpreters.  See e.g. PR15242.  */
2411
2412 namespace {
2413
2414 const pass_data pass_data_duplicate_computed_gotos =
2415 {
2416   RTL_PASS, /* type */
2417   "compgotos", /* name */
2418   OPTGROUP_NONE, /* optinfo_flags */
2419   TV_REORDER_BLOCKS, /* tv_id */
2420   0, /* properties_required */
2421   0, /* properties_provided */
2422   0, /* properties_destroyed */
2423   0, /* todo_flags_start */
2424   0, /* todo_flags_finish */
2425 };
2426
2427 class pass_duplicate_computed_gotos : public rtl_opt_pass
2428 {
2429 public:
2430   pass_duplicate_computed_gotos (gcc::context *ctxt)
2431     : rtl_opt_pass (pass_data_duplicate_computed_gotos, ctxt)
2432   {}
2433
2434   /* opt_pass methods: */
2435   virtual bool gate (function *);
2436   virtual unsigned int execute (function *);
2437
2438 }; // class pass_duplicate_computed_gotos
2439
2440 bool
2441 pass_duplicate_computed_gotos::gate (function *fun)
2442 {
2443   if (targetm.cannot_modify_jumps_p ())
2444     return false;
2445   return (optimize > 0
2446           && flag_expensive_optimizations
2447           && ! optimize_function_for_size_p (fun));
2448 }
2449
2450 unsigned int
2451 pass_duplicate_computed_gotos::execute (function *fun)
2452 {
2453   basic_block bb, new_bb;
2454   bitmap candidates;
2455   int max_size;
2456   bool changed = false;
2457
2458   if (n_basic_blocks_for_fn (fun) <= NUM_FIXED_BLOCKS + 1)
2459     return 0;
2460
2461   clear_bb_flags ();
2462   cfg_layout_initialize (0);
2463
2464   /* We are estimating the length of uncond jump insn only once
2465      since the code for getting the insn length always returns
2466      the minimal length now.  */
2467   if (uncond_jump_length == 0)
2468     uncond_jump_length = get_uncond_jump_length ();
2469
2470   max_size
2471     = uncond_jump_length * PARAM_VALUE (PARAM_MAX_GOTO_DUPLICATION_INSNS);
2472   candidates = BITMAP_ALLOC (NULL);
2473
2474   /* Look for blocks that end in a computed jump, and see if such blocks
2475      are suitable for unfactoring.  If a block is a candidate for unfactoring,
2476      mark it in the candidates.  */
2477   FOR_EACH_BB_FN (bb, fun)
2478     {
2479       rtx_insn *insn;
2480       edge e;
2481       edge_iterator ei;
2482       int size, all_flags;
2483
2484       /* Build the reorder chain for the original order of blocks.  */
2485       if (bb->next_bb != EXIT_BLOCK_PTR_FOR_FN (fun))
2486         bb->aux = bb->next_bb;
2487
2488       /* Obviously the block has to end in a computed jump.  */
2489       if (!computed_jump_p (BB_END (bb)))
2490         continue;
2491
2492       /* Only consider blocks that can be duplicated.  */
2493       if (CROSSING_JUMP_P (BB_END (bb))
2494           || !can_duplicate_block_p (bb))
2495         continue;
2496
2497       /* Make sure that the block is small enough.  */
2498       size = 0;
2499       FOR_BB_INSNS (bb, insn)
2500         if (INSN_P (insn))
2501           {
2502             size += get_attr_min_length (insn);
2503             if (size > max_size)
2504                break;
2505           }
2506       if (size > max_size)
2507         continue;
2508
2509       /* Final check: there must not be any incoming abnormal edges.  */
2510       all_flags = 0;
2511       FOR_EACH_EDGE (e, ei, bb->preds)
2512         all_flags |= e->flags;
2513       if (all_flags & EDGE_COMPLEX)
2514         continue;
2515
2516       bitmap_set_bit (candidates, bb->index);
2517     }
2518
2519   /* Nothing to do if there is no computed jump here.  */
2520   if (bitmap_empty_p (candidates))
2521     goto done;
2522
2523   /* Duplicate computed gotos.  */
2524   FOR_EACH_BB_FN (bb, fun)
2525     {
2526       if (bb->flags & BB_VISITED)
2527         continue;
2528
2529       bb->flags |= BB_VISITED;
2530
2531       /* BB must have one outgoing edge.  That edge must not lead to
2532          the exit block or the next block.
2533          The destination must have more than one predecessor.  */
2534       if (!single_succ_p (bb)
2535           || single_succ (bb) == EXIT_BLOCK_PTR_FOR_FN (fun)
2536           || single_succ (bb) == bb->next_bb
2537           || single_pred_p (single_succ (bb)))
2538         continue;
2539
2540       /* The successor block has to be a duplication candidate.  */
2541       if (!bitmap_bit_p (candidates, single_succ (bb)->index))
2542         continue;
2543
2544       /* Don't duplicate a partition crossing edge, which requires difficult
2545          fixup.  */
2546       if (JUMP_P (BB_END (bb)) && CROSSING_JUMP_P (BB_END (bb)))
2547         continue;
2548
2549       new_bb = duplicate_block (single_succ (bb), single_succ_edge (bb), bb);
2550       new_bb->aux = bb->aux;
2551       bb->aux = new_bb;
2552       new_bb->flags |= BB_VISITED;
2553       changed = true;
2554     }
2555
2556  done:
2557   if (changed)
2558     {
2559       /* Duplicating blocks above will redirect edges and may cause hot
2560          blocks previously reached by both hot and cold blocks to become
2561          dominated only by cold blocks.  */
2562       fixup_partitions ();
2563
2564       /* Merge the duplicated blocks into predecessors, when possible.  */
2565       cfg_layout_finalize ();
2566       cleanup_cfg (0);
2567     }
2568   else
2569     cfg_layout_finalize ();
2570
2571   BITMAP_FREE (candidates);
2572   return 0;
2573 }
2574
2575 } // anon namespace
2576
2577 rtl_opt_pass *
2578 make_pass_duplicate_computed_gotos (gcc::context *ctxt)
2579 {
2580   return new pass_duplicate_computed_gotos (ctxt);
2581 }
2582
2583 /* This function is the main 'entrance' for the optimization that
2584    partitions hot and cold basic blocks into separate sections of the
2585    .o file (to improve performance and cache locality).  Ideally it
2586    would be called after all optimizations that rearrange the CFG have
2587    been called.  However part of this optimization may introduce new
2588    register usage, so it must be called before register allocation has
2589    occurred.  This means that this optimization is actually called
2590    well before the optimization that reorders basic blocks (see
2591    function above).
2592
2593    This optimization checks the feedback information to determine
2594    which basic blocks are hot/cold, updates flags on the basic blocks
2595    to indicate which section they belong in.  This information is
2596    later used for writing out sections in the .o file.  Because hot
2597    and cold sections can be arbitrarily large (within the bounds of
2598    memory), far beyond the size of a single function, it is necessary
2599    to fix up all edges that cross section boundaries, to make sure the
2600    instructions used can actually span the required distance.  The
2601    fixes are described below.
2602
2603    Fall-through edges must be changed into jumps; it is not safe or
2604    legal to fall through across a section boundary.  Whenever a
2605    fall-through edge crossing a section boundary is encountered, a new
2606    basic block is inserted (in the same section as the fall-through
2607    source), and the fall through edge is redirected to the new basic
2608    block.  The new basic block contains an unconditional jump to the
2609    original fall-through target.  (If the unconditional jump is
2610    insufficient to cross section boundaries, that is dealt with a
2611    little later, see below).
2612
2613    In order to deal with architectures that have short conditional
2614    branches (which cannot span all of memory) we take any conditional
2615    jump that attempts to cross a section boundary and add a level of
2616    indirection: it becomes a conditional jump to a new basic block, in
2617    the same section.  The new basic block contains an unconditional
2618    jump to the original target, in the other section.
2619
2620    For those architectures whose unconditional branch is also
2621    incapable of reaching all of memory, those unconditional jumps are
2622    converted into indirect jumps, through a register.
2623
2624    IMPORTANT NOTE: This optimization causes some messy interactions
2625    with the cfg cleanup optimizations; those optimizations want to
2626    merge blocks wherever possible, and to collapse indirect jump
2627    sequences (change "A jumps to B jumps to C" directly into "A jumps
2628    to C").  Those optimizations can undo the jump fixes that
2629    partitioning is required to make (see above), in order to ensure
2630    that jumps attempting to cross section boundaries are really able
2631    to cover whatever distance the jump requires (on many architectures
2632    conditional or unconditional jumps are not able to reach all of
2633    memory).  Therefore tests have to be inserted into each such
2634    optimization to make sure that it does not undo stuff necessary to
2635    cross partition boundaries.  This would be much less of a problem
2636    if we could perform this optimization later in the compilation, but
2637    unfortunately the fact that we may need to create indirect jumps
2638    (through registers) requires that this optimization be performed
2639    before register allocation.
2640
2641    Hot and cold basic blocks are partitioned and put in separate
2642    sections of the .o file, to reduce paging and improve cache
2643    performance (hopefully).  This can result in bits of code from the
2644    same function being widely separated in the .o file.  However this
2645    is not obvious to the current bb structure.  Therefore we must take
2646    care to ensure that: 1). There are no fall_thru edges that cross
2647    between sections; 2). For those architectures which have "short"
2648    conditional branches, all conditional branches that attempt to
2649    cross between sections are converted to unconditional branches;
2650    and, 3). For those architectures which have "short" unconditional
2651    branches, all unconditional branches that attempt to cross between
2652    sections are converted to indirect jumps.
2653
2654    The code for fixing up fall_thru edges that cross between hot and
2655    cold basic blocks does so by creating new basic blocks containing
2656    unconditional branches to the appropriate label in the "other"
2657    section.  The new basic block is then put in the same (hot or cold)
2658    section as the original conditional branch, and the fall_thru edge
2659    is modified to fall into the new basic block instead.  By adding
2660    this level of indirection we end up with only unconditional branches
2661    crossing between hot and cold sections.
2662
2663    Conditional branches are dealt with by adding a level of indirection.
2664    A new basic block is added in the same (hot/cold) section as the
2665    conditional branch, and the conditional branch is retargeted to the
2666    new basic block.  The new basic block contains an unconditional branch
2667    to the original target of the conditional branch (in the other section).
2668
2669    Unconditional branches are dealt with by converting them into
2670    indirect jumps.  */
2671
2672 namespace {
2673
2674 const pass_data pass_data_partition_blocks =
2675 {
2676   RTL_PASS, /* type */
2677   "bbpart", /* name */
2678   OPTGROUP_NONE, /* optinfo_flags */
2679   TV_REORDER_BLOCKS, /* tv_id */
2680   PROP_cfglayout, /* properties_required */
2681   0, /* properties_provided */
2682   0, /* properties_destroyed */
2683   0, /* todo_flags_start */
2684   0, /* todo_flags_finish */
2685 };
2686
2687 class pass_partition_blocks : public rtl_opt_pass
2688 {
2689 public:
2690   pass_partition_blocks (gcc::context *ctxt)
2691     : rtl_opt_pass (pass_data_partition_blocks, ctxt)
2692   {}
2693
2694   /* opt_pass methods: */
2695   virtual bool gate (function *);
2696   virtual unsigned int execute (function *);
2697
2698 }; // class pass_partition_blocks
2699
2700 bool
2701 pass_partition_blocks::gate (function *fun)
2702 {
2703   /* The optimization to partition hot/cold basic blocks into separate
2704      sections of the .o file does not work well with linkonce or with
2705      user defined section attributes.  Don't call it if either case
2706      arises.  */
2707   return (flag_reorder_blocks_and_partition
2708           && optimize
2709           /* See gate_handle_reorder_blocks.  We should not partition if
2710              we are going to omit the reordering.  */
2711           && optimize_function_for_speed_p (fun)
2712           && !DECL_COMDAT_GROUP (current_function_decl)
2713           && !user_defined_section_attribute);
2714 }
2715
2716 unsigned
2717 pass_partition_blocks::execute (function *fun)
2718 {
2719   vec<edge> crossing_edges;
2720
2721   if (n_basic_blocks_for_fn (fun) <= NUM_FIXED_BLOCKS + 1)
2722     return 0;
2723
2724   df_set_flags (DF_DEFER_INSN_RESCAN);
2725
2726   crossing_edges = find_rarely_executed_basic_blocks_and_crossing_edges ();
2727   if (!crossing_edges.exists ())
2728     return 0;
2729
2730   crtl->has_bb_partition = true;
2731
2732   /* Make sure the source of any crossing edge ends in a jump and the
2733      destination of any crossing edge has a label.  */
2734   add_labels_and_missing_jumps (crossing_edges);
2735
2736   /* Convert all crossing fall_thru edges to non-crossing fall
2737      thrus to unconditional jumps (that jump to the original fall
2738      through dest).  */
2739   fix_up_fall_thru_edges ();
2740
2741   /* If the architecture does not have conditional branches that can
2742      span all of memory, convert crossing conditional branches into
2743      crossing unconditional branches.  */
2744   if (!HAS_LONG_COND_BRANCH)
2745     fix_crossing_conditional_branches ();
2746
2747   /* If the architecture does not have unconditional branches that
2748      can span all of memory, convert crossing unconditional branches
2749      into indirect jumps.  Since adding an indirect jump also adds
2750      a new register usage, update the register usage information as
2751      well.  */
2752   if (!HAS_LONG_UNCOND_BRANCH)
2753     fix_crossing_unconditional_branches ();
2754
2755   update_crossing_jump_flags ();
2756
2757   /* Clear bb->aux fields that the above routines were using.  */
2758   clear_aux_for_blocks ();
2759
2760   crossing_edges.release ();
2761
2762   /* ??? FIXME: DF generates the bb info for a block immediately.
2763      And by immediately, I mean *during* creation of the block.
2764
2765         #0  df_bb_refs_collect
2766         #1  in df_bb_refs_record
2767         #2  in create_basic_block_structure
2768
2769      Which means that the bb_has_eh_pred test in df_bb_refs_collect
2770      will *always* fail, because no edges can have been added to the
2771      block yet.  Which of course means we don't add the right
2772      artificial refs, which means we fail df_verify (much) later.
2773
2774      Cleanest solution would seem to make DF_DEFER_INSN_RESCAN imply
2775      that we also shouldn't grab data from the new blocks those new
2776      insns are in either.  In this way one can create the block, link
2777      it up properly, and have everything Just Work later, when deferred
2778      insns are processed.
2779
2780      In the meantime, we have no other option but to throw away all
2781      of the DF data and recompute it all.  */
2782   if (fun->eh->lp_array)
2783     {
2784       df_finish_pass (true);
2785       df_scan_alloc (NULL);
2786       df_scan_blocks ();
2787       /* Not all post-landing pads use all of the EH_RETURN_DATA_REGNO
2788          data.  We blindly generated all of them when creating the new
2789          landing pad.  Delete those assignments we don't use.  */
2790       df_set_flags (DF_LR_RUN_DCE);
2791       df_analyze ();
2792     }
2793
2794   return 0;
2795 }
2796
2797 } // anon namespace
2798
2799 rtl_opt_pass *
2800 make_pass_partition_blocks (gcc::context *ctxt)
2801 {
2802   return new pass_partition_blocks (ctxt);
2803 }