gcc/bb-reorder.c

   1 /* Basic block reordering routines for the GNU compiler.
   2    Copyright (C) 2000-2017 Free Software Foundation, Inc.
   3
   4    This file is part of GCC.
   5
   6    GCC is free software; you can redistribute it and/or modify it
   7    under the terms of the GNU General Public License as published by
   8    the Free Software Foundation; either version 3, or (at your option)
   9    any later version.
  10
  11    GCC is distributed in the hope that it will be useful, but WITHOUT
  12    ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  13    or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
  14    License for more details.
  15
  16    You should have received a copy of the GNU General Public License
  17    along with GCC; see the file COPYING3.  If not see
  18    <http://www.gnu.org/licenses/>.  */
  19
  20 /* This file contains the "reorder blocks" pass, which changes the control
  21    flow of a function to encounter fewer branches; the "partition blocks"
  22    pass, which divides the basic blocks into "hot" and "cold" partitions,
  23    which are kept separate; and the "duplicate computed gotos" pass, which
  24    duplicates blocks ending in an indirect jump.
  25
  26    There are two algorithms for "reorder blocks": the "simple" algorithm,
  27    which just rearranges blocks, trying to minimize the number of executed
  28    unconditional branches; and the "software trace cache" algorithm, which
  29    also copies code, and in general tries a lot harder to have long linear
  30    pieces of machine code executed.  This algorithm is described next.  */
  31
  32 /* This (greedy) algorithm constructs traces in several rounds.
  33    The construction starts from "seeds".  The seed for the first round
  34    is the entry point of the function.  When there are more than one seed,
  35    the one with the lowest key in the heap is selected first (see bb_to_key).
  36    Then the algorithm repeatedly adds the most probable successor to the end
  37    of a trace.  Finally it connects the traces.
  38
  39    There are two parameters: Branch Threshold and Exec Threshold.
  40    If the probability of an edge to a successor of the current basic block is
  41    lower than Branch Threshold or its frequency is lower than Exec Threshold,
  42    then the successor will be the seed in one of the next rounds.
  43    Each round has these parameters lower than the previous one.
  44    The last round has to have these parameters set to zero so that the
  45    remaining blocks are picked up.
  46
  47    The algorithm selects the most probable successor from all unvisited
  48    successors and successors that have been added to this trace.
  49    The other successors (that has not been "sent" to the next round) will be
  50    other seeds for this round and the secondary traces will start from them.
  51    If the successor has not been visited in this trace, it is added to the
  52    trace (however, there is some heuristic for simple branches).
  53    If the successor has been visited in this trace, a loop has been found.
  54    If the loop has many iterations, the loop is rotated so that the source
  55    block of the most probable edge going out of the loop is the last block
  56    of the trace.
  57    If the loop has few iterations and there is no edge from the last block of
  58    the loop going out of the loop, the loop header is duplicated.
  59
  60    When connecting traces, the algorithm first checks whether there is an edge
  61    from the last block of a trace to the first block of another trace.
  62    When there are still some unconnected traces it checks whether there exists
  63    a basic block BB such that BB is a successor of the last block of a trace
  64    and BB is a predecessor of the first block of another trace.  In this case,
  65    BB is duplicated, added at the end of the first trace and the traces are
  66    connected through it.
  67    The rest of traces are simply connected so there will be a jump to the
  68    beginning of the rest of traces.
  69
  70    The above description is for the full algorithm, which is used when the
  71    function is optimized for speed.  When the function is optimized for size,
  72    in order to reduce long jumps and connect more fallthru edges, the
  73    algorithm is modified as follows:
  74    (1) Break long traces to short ones.  A trace is broken at a block that has
  75    multiple predecessors/ successors during trace discovery.  When connecting
  76    traces, only connect Trace n with Trace n + 1.  This change reduces most
  77    long jumps compared with the above algorithm.
  78    (2) Ignore the edge probability and frequency for fallthru edges.
  79    (3) Keep the original order of blocks when there is no chance to fall
  80    through.  We rely on the results of cfg_cleanup.
  81
  82    To implement the change for code size optimization, block's index is
  83    selected as the key and all traces are found in one round.
  84
  85    References:
  86
  87    "Software Trace Cache"
  88    A. Ramirez, J. Larriba-Pey, C. Navarro, J. Torrellas and M. Valero; 1999
  89    http://citeseer.nj.nec.com/15361.html
  90
  91 */
  92
  93 #include "config.h"
  94 #define INCLUDE_ALGORITHM /* stable_sort */
  95 #include "system.h"
  96 #include "coretypes.h"
  97 #include "backend.h"
  98 #include "target.h"
  99 #include "rtl.h"
 100 #include "tree.h"
 101 #include "cfghooks.h"
 102 #include "df.h"
 103 #include "memmodel.h"
 104 #include "optabs.h"
 105 #include "regs.h"
 106 #include "emit-rtl.h"
 107 #include "output.h"
 108 #include "expr.h"
 109 #include "params.h"
 110 #include "tree-pass.h"
 111 #include "cfgrtl.h"
 112 #include "cfganal.h"
 113 #include "cfgbuild.h"
 114 #include "cfgcleanup.h"
 115 #include "bb-reorder.h"
 116 #include "except.h"
 117 #include "fibonacci_heap.h"
 118 #include "stringpool.h"
 119 #include "attribs.h"
 120
 121 /* The number of rounds.  In most cases there will only be 4 rounds, but
 122    when partitioning hot and cold basic blocks into separate sections of
 123    the object file there will be an extra round.  */
 124 #define N_ROUNDS 5
 125
 126 struct target_bb_reorder default_target_bb_reorder;
 127 #if SWITCHABLE_TARGET
 128 struct target_bb_reorder *this_target_bb_reorder = &default_target_bb_reorder;
 129 #endif
 130
 131 #define uncond_jump_length \
 132   (this_target_bb_reorder->x_uncond_jump_length)
 133
 134 /* Branch thresholds in thousandths (per mille) of the REG_BR_PROB_BASE.  */
 135 static const int branch_threshold[N_ROUNDS] = {400, 200, 100, 0, 0};
 136
 137 /* Exec thresholds in thousandths (per mille) of the frequency of bb 0.  */
 138 static const int exec_threshold[N_ROUNDS] = {500, 200, 50, 0, 0};
 139
 140 /* If edge frequency is lower than DUPLICATION_THRESHOLD per mille of entry
 141    block the edge destination is not duplicated while connecting traces.  */
 142 #define DUPLICATION_THRESHOLD 100
 143
 144 typedef fibonacci_heap <long, basic_block_def> bb_heap_t;
 145 typedef fibonacci_node <long, basic_block_def> bb_heap_node_t;
 146
 147 /* Structure to hold needed information for each basic block.  */
 148 struct bbro_basic_block_data
 149 {
 150   /* Which trace is the bb start of (-1 means it is not a start of any).  */
 151   int start_of_trace;
 152
 153   /* Which trace is the bb end of (-1 means it is not an end of any).  */
 154   int end_of_trace;
 155
 156   /* Which trace is the bb in?  */
 157   int in_trace;
 158
 159   /* Which trace was this bb visited in?  */
 160   int visited;
 161
 162   /* Cached maximum frequency of interesting incoming edges.
 163      Minus one means not yet computed.  */
 164   int priority;
 165
 166   /* Which heap is BB in (if any)?  */
 167   bb_heap_t *heap;
 168
 169   /* Which heap node is BB in (if any)?  */
 170   bb_heap_node_t *node;
 171 };
 172
 173 /* The current size of the following dynamic array.  */
 174 static int array_size;
 175
 176 /* The array which holds needed information for basic blocks.  */
 177 static bbro_basic_block_data *bbd;
 178
 179 /* To avoid frequent reallocation the size of arrays is greater than needed,
 180    the number of elements is (not less than) 1.25 * size_wanted.  */
 181 #define GET_ARRAY_SIZE(X) ((((X) / 4) + 1) * 5)
 182
 183 /* Free the memory and set the pointer to NULL.  */
 184 #define FREE(P) (gcc_assert (P), free (P), P = 0)
 185
 186 /* Structure for holding information about a trace.  */
 187 struct trace
 188 {
 189   /* First and last basic block of the trace.  */
 190   basic_block first, last;
 191
 192   /* The round of the STC creation which this trace was found in.  */
 193   int round;
 194
 195   /* The length (i.e. the number of basic blocks) of the trace.  */
 196   int length;
 197 };
 198
 199 /* Maximum frequency and count of one of the entry blocks.  */
 200 static int max_entry_frequency;
 201 static profile_count max_entry_count;
 202
 203 /* Local function prototypes.  */
 204 static void find_traces (int *, struct trace *);
 205 static basic_block rotate_loop (edge, struct trace *, int);
 206 static void mark_bb_visited (basic_block, int);
 207 static void find_traces_1_round (int, int, gcov_type, struct trace *, int *,
 208                                  int, bb_heap_t **, int);
 209 static basic_block copy_bb (basic_block, edge, basic_block, int);
 210 static long bb_to_key (basic_block);
 211 static bool better_edge_p (const_basic_block, const_edge, profile_probability,
 212                            int, profile_probability, int, const_edge);
 213 static bool connect_better_edge_p (const_edge, bool, int, const_edge,
 214                                    struct trace *);
 215 static void connect_traces (int, struct trace *);
 216 static bool copy_bb_p (const_basic_block, int);
 217 static bool push_to_next_round_p (const_basic_block, int, int, int, gcov_type);
 218 \f
 219 /* Return the trace number in which BB was visited.  */
 220
 221 static int
 222 bb_visited_trace (const_basic_block bb)
 223 {
 224   gcc_assert (bb->index < array_size);
 225   return bbd[bb->index].visited;
 226 }
 227
 228 /* This function marks BB that it was visited in trace number TRACE.  */
 229
 230 static void
 231 mark_bb_visited (basic_block bb, int trace)
 232 {
 233   bbd[bb->index].visited = trace;
 234   if (bbd[bb->index].heap)
 235     {
 236       bbd[bb->index].heap->delete_node (bbd[bb->index].node);
 237       bbd[bb->index].heap = NULL;
 238       bbd[bb->index].node = NULL;
 239     }
 240 }
 241
 242 /* Check to see if bb should be pushed into the next round of trace
 243    collections or not.  Reasons for pushing the block forward are 1).
 244    If the block is cold, we are doing partitioning, and there will be
 245    another round (cold partition blocks are not supposed to be
 246    collected into traces until the very last round); or 2). There will
 247    be another round, and the basic block is not "hot enough" for the
 248    current round of trace collection.  */
 249
 250 static bool
 251 push_to_next_round_p (const_basic_block bb, int round, int number_of_rounds,
 252                       int exec_th, gcov_type count_th)
 253 {
 254   bool there_exists_another_round;
 255   bool block_not_hot_enough;
 256
 257   there_exists_another_round = round < number_of_rounds - 1;
 258
 259   block_not_hot_enough = (bb->frequency < exec_th
 260                           || bb->count < count_th
 261                           || probably_never_executed_bb_p (cfun, bb));
 262
 263   if (there_exists_another_round
 264       && block_not_hot_enough)
 265     return true;
 266   else
 267     return false;
 268 }
 269
 270 /* Find the traces for Software Trace Cache.  Chain each trace through
 271    RBI()->next.  Store the number of traces to N_TRACES and description of
 272    traces to TRACES.  */
 273
 274 static void
 275 find_traces (int *n_traces, struct trace *traces)
 276 {
 277   int i;
 278   int number_of_rounds;
 279   edge e;
 280   edge_iterator ei;
 281   bb_heap_t *heap = new bb_heap_t (LONG_MIN);
 282
 283   /* Add one extra round of trace collection when partitioning hot/cold
 284      basic blocks into separate sections.  The last round is for all the
 285      cold blocks (and ONLY the cold blocks).  */
 286
 287   number_of_rounds = N_ROUNDS - 1;
 288
 289   /* Insert entry points of function into heap.  */
 290   max_entry_frequency = 0;
 291   max_entry_count = profile_count::zero ();
 292   FOR_EACH_EDGE (e, ei, ENTRY_BLOCK_PTR_FOR_FN (cfun)->succs)
 293     {
 294       bbd[e->dest->index].heap = heap;
 295       bbd[e->dest->index].node = heap->insert (bb_to_key (e->dest), e->dest);
 296       if (e->dest->frequency > max_entry_frequency)
 297         max_entry_frequency = e->dest->frequency;
 298       if (e->dest->count.initialized_p () && e->dest->count > max_entry_count)
 299         max_entry_count = e->dest->count;
 300     }
 301
 302   /* Find the traces.  */
 303   for (i = 0; i < number_of_rounds; i++)
 304     {
 305       gcov_type count_threshold;
 306
 307       if (dump_file)
 308         fprintf (dump_file, "STC - round %d\n", i + 1);
 309
 310       if (max_entry_count < INT_MAX / 1000)
 311         count_threshold = max_entry_count.to_gcov_type () * exec_threshold[i] / 1000;
 312       else
 313         count_threshold = max_entry_count.to_gcov_type () / 1000 * exec_threshold[i];
 314
 315       find_traces_1_round (REG_BR_PROB_BASE * branch_threshold[i] / 1000,
 316                            max_entry_frequency * exec_threshold[i] / 1000,
 317                            count_threshold, traces, n_traces, i, &heap,
 318                            number_of_rounds);
 319     }
 320   delete heap;
 321
 322   if (dump_file)
 323     {
 324       for (i = 0; i < *n_traces; i++)
 325         {
 326           basic_block bb;
 327           fprintf (dump_file, "Trace %d (round %d):  ", i + 1,
 328                    traces[i].round + 1);
 329           for (bb = traces[i].first;
 330                bb != traces[i].last;
 331                bb = (basic_block) bb->aux)
 332             fprintf (dump_file, "%d [%d] ", bb->index, bb->frequency);
 333           fprintf (dump_file, "%d [%d]\n", bb->index, bb->frequency);
 334         }
 335       fflush (dump_file);
 336     }
 337 }
 338
 339 /* Rotate loop whose back edge is BACK_EDGE in the tail of trace TRACE
 340    (with sequential number TRACE_N).  */
 341
 342 static basic_block
 343 rotate_loop (edge back_edge, struct trace *trace, int trace_n)
 344 {
 345   basic_block bb;
 346
 347   /* Information about the best end (end after rotation) of the loop.  */
 348   basic_block best_bb = NULL;
 349   edge best_edge = NULL;
 350   int best_freq = -1;
 351   profile_count best_count = profile_count::uninitialized ();
 352   /* The best edge is preferred when its destination is not visited yet
 353      or is a start block of some trace.  */
 354   bool is_preferred = false;
 355
 356   /* Find the most frequent edge that goes out from current trace.  */
 357   bb = back_edge->dest;
 358   do
 359     {
 360       edge e;
 361       edge_iterator ei;
 362
 363       FOR_EACH_EDGE (e, ei, bb->succs)
 364         if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
 365             && bb_visited_trace (e->dest) != trace_n
 366             && (e->flags & EDGE_CAN_FALLTHRU)
 367             && !(e->flags & EDGE_COMPLEX))
 368         {
 369           if (is_preferred)
 370             {
 371               /* The best edge is preferred.  */
 372               if (!bb_visited_trace (e->dest)
 373                   || bbd[e->dest->index].start_of_trace >= 0)
 374                 {
 375                   /* The current edge E is also preferred.  */
 376                   int freq = EDGE_FREQUENCY (e);
 377                   if (freq > best_freq || e->count > best_count)
 378                     {
 379                       best_freq = freq;
 380                       if (e->count.initialized_p ())
 381                         best_count = e->count;
 382                       best_edge = e;
 383                       best_bb = bb;
 384                     }
 385                 }
 386             }
 387           else
 388             {
 389               if (!bb_visited_trace (e->dest)
 390                   || bbd[e->dest->index].start_of_trace >= 0)
 391                 {
 392                   /* The current edge E is preferred.  */
 393                   is_preferred = true;
 394                   best_freq = EDGE_FREQUENCY (e);
 395                   best_count = e->count;
 396                   best_edge = e;
 397                   best_bb = bb;
 398                 }
 399               else
 400                 {
 401                   int freq = EDGE_FREQUENCY (e);
 402                   if (!best_edge || freq > best_freq || e->count > best_count)
 403                     {
 404                       best_freq = freq;
 405                       best_count = e->count;
 406                       best_edge = e;
 407                       best_bb = bb;
 408                     }
 409                 }
 410             }
 411         }
 412       bb = (basic_block) bb->aux;
 413     }
 414   while (bb != back_edge->dest);
 415
 416   if (best_bb)
 417     {
 418       /* Rotate the loop so that the BEST_EDGE goes out from the last block of
 419          the trace.  */
 420       if (back_edge->dest == trace->first)
 421         {
 422           trace->first = (basic_block) best_bb->aux;
 423         }
 424       else
 425         {
 426           basic_block prev_bb;
 427
 428           for (prev_bb = trace->first;
 429                prev_bb->aux != back_edge->dest;
 430                prev_bb = (basic_block) prev_bb->aux)
 431             ;
 432           prev_bb->aux = best_bb->aux;
 433
 434           /* Try to get rid of uncond jump to cond jump.  */
 435           if (single_succ_p (prev_bb))
 436             {
 437               basic_block header = single_succ (prev_bb);
 438
 439               /* Duplicate HEADER if it is a small block containing cond jump
 440                  in the end.  */
 441               if (any_condjump_p (BB_END (header)) && copy_bb_p (header, 0)
 442                   && !CROSSING_JUMP_P (BB_END (header)))
 443                 copy_bb (header, single_succ_edge (prev_bb), prev_bb, trace_n);
 444             }
 445         }
 446     }
 447   else
 448     {
 449       /* We have not found suitable loop tail so do no rotation.  */
 450       best_bb = back_edge->src;
 451     }
 452   best_bb->aux = NULL;
 453   return best_bb;
 454 }
 455
 456 /* One round of finding traces.  Find traces for BRANCH_TH and EXEC_TH i.e. do
 457    not include basic blocks whose probability is lower than BRANCH_TH or whose
 458    frequency is lower than EXEC_TH into traces (or whose count is lower than
 459    COUNT_TH).  Store the new traces into TRACES and modify the number of
 460    traces *N_TRACES.  Set the round (which the trace belongs to) to ROUND.
 461    The function expects starting basic blocks to be in *HEAP and will delete
 462    *HEAP and store starting points for the next round into new *HEAP.  */
 463
 464 static void
 465 find_traces_1_round (int branch_th, int exec_th, gcov_type count_th,
 466                      struct trace *traces, int *n_traces, int round,
 467                      bb_heap_t **heap, int number_of_rounds)
 468 {
 469   /* Heap for discarded basic blocks which are possible starting points for
 470      the next round.  */
 471   bb_heap_t *new_heap = new bb_heap_t (LONG_MIN);
 472   bool for_size = optimize_function_for_size_p (cfun);
 473
 474   while (!(*heap)->empty ())
 475     {
 476       basic_block bb;
 477       struct trace *trace;
 478       edge best_edge, e;
 479       long key;
 480       edge_iterator ei;
 481
 482       bb = (*heap)->extract_min ();
 483       bbd[bb->index].heap = NULL;
 484       bbd[bb->index].node = NULL;
 485
 486       if (dump_file)
 487         fprintf (dump_file, "Getting bb %d\n", bb->index);
 488
 489       /* If the BB's frequency is too low, send BB to the next round.  When
 490          partitioning hot/cold blocks into separate sections, make sure all
 491          the cold blocks (and ONLY the cold blocks) go into the (extra) final
 492          round.  When optimizing for size, do not push to next round.  */
 493
 494       if (!for_size
 495           && push_to_next_round_p (bb, round, number_of_rounds, exec_th,
 496                                    count_th))
 497         {
 498           int key = bb_to_key (bb);
 499           bbd[bb->index].heap = new_heap;
 500           bbd[bb->index].node = new_heap->insert (key, bb);
 501
 502           if (dump_file)
 503             fprintf (dump_file,
 504                      "  Possible start point of next round: %d (key: %d)\n",
 505                      bb->index, key);
 506           continue;
 507         }
 508
 509       trace = traces + *n_traces;
 510       trace->first = bb;
 511       trace->round = round;
 512       trace->length = 0;
 513       bbd[bb->index].in_trace = *n_traces;
 514       (*n_traces)++;
 515
 516       do
 517         {
 518           profile_probability prob;
 519           int freq;
 520           bool ends_in_call;
 521
 522           /* The probability and frequency of the best edge.  */
 523           profile_probability best_prob = profile_probability::uninitialized ();
 524           int best_freq = INT_MIN / 2;
 525
 526           best_edge = NULL;
 527           mark_bb_visited (bb, *n_traces);
 528           trace->length++;
 529
 530           if (dump_file)
 531             fprintf (dump_file, "Basic block %d was visited in trace %d\n",
 532                      bb->index, *n_traces - 1);
 533
 534           ends_in_call = block_ends_with_call_p (bb);
 535
 536           /* Select the successor that will be placed after BB.  */
 537           FOR_EACH_EDGE (e, ei, bb->succs)
 538             {
 539               gcc_assert (!(e->flags & EDGE_FAKE));
 540
 541               if (e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun))
 542                 continue;
 543
 544               if (bb_visited_trace (e->dest)
 545                   && bb_visited_trace (e->dest) != *n_traces)
 546                 continue;
 547
 548               if (BB_PARTITION (e->dest) != BB_PARTITION (bb))
 549                 continue;
 550
 551               prob = e->probability;
 552               freq = e->dest->frequency;
 553
 554               /* The only sensible preference for a call instruction is the
 555                  fallthru edge.  Don't bother selecting anything else.  */
 556               if (ends_in_call)
 557                 {
 558                   if (e->flags & EDGE_CAN_FALLTHRU)
 559                     {
 560                       best_edge = e;
 561                       best_prob = prob;
 562                       best_freq = freq;
 563                     }
 564                   continue;
 565                 }
 566
 567               /* Edge that cannot be fallthru or improbable or infrequent
 568                  successor (i.e. it is unsuitable successor).  When optimizing
 569                  for size, ignore the probability and frequency.  */
 570               if (!(e->flags & EDGE_CAN_FALLTHRU) || (e->flags & EDGE_COMPLEX)
 571                   || !prob.initialized_p ()
 572                   || ((prob.to_reg_br_prob_base () < branch_th
 573                        || EDGE_FREQUENCY (e) < exec_th
 574                       || e->count < count_th) && (!for_size)))
 575                 continue;
 576
 577               /* If partitioning hot/cold basic blocks, don't consider edges
 578                  that cross section boundaries.  */
 579
 580               if (better_edge_p (bb, e, prob, freq, best_prob, best_freq,
 581                                  best_edge))
 582                 {
 583                   best_edge = e;
 584                   best_prob = prob;
 585                   best_freq = freq;
 586                 }
 587             }
 588
 589           /* If the best destination has multiple predecessors, and can be
 590              duplicated cheaper than a jump, don't allow it to be added
 591              to a trace.  We'll duplicate it when connecting traces.  */
 592           if (best_edge && EDGE_COUNT (best_edge->dest->preds) >= 2
 593               && copy_bb_p (best_edge->dest, 0))
 594             best_edge = NULL;
 595
 596           /* If the best destination has multiple successors or predecessors,
 597              don't allow it to be added when optimizing for size.  This makes
 598              sure predecessors with smaller index are handled before the best
 599              destinarion.  It breaks long trace and reduces long jumps.
 600
 601              Take if-then-else as an example.
 602                 A
 603                / \
 604               B   C
 605                \ /
 606                 D
 607              If we do not remove the best edge B->D/C->D, the final order might
 608              be A B D ... C.  C is at the end of the program.  If D's successors
 609              and D are complicated, might need long jumps for A->C and C->D.
 610              Similar issue for order: A C D ... B.
 611
 612              After removing the best edge, the final result will be ABCD/ ACBD.
 613              It does not add jump compared with the previous order.  But it
 614              reduces the possibility of long jumps.  */
 615           if (best_edge && for_size
 616               && (EDGE_COUNT (best_edge->dest->succs) > 1
 617                  || EDGE_COUNT (best_edge->dest->preds) > 1))
 618             best_edge = NULL;
 619
 620           /* Add all non-selected successors to the heaps.  */
 621           FOR_EACH_EDGE (e, ei, bb->succs)
 622             {
 623               if (e == best_edge
 624                   || e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun)
 625                   || bb_visited_trace (e->dest))
 626                 continue;
 627
 628               key = bb_to_key (e->dest);
 629
 630               if (bbd[e->dest->index].heap)
 631                 {
 632                   /* E->DEST is already in some heap.  */
 633                   if (key != bbd[e->dest->index].node->get_key ())
 634                     {
 635                       if (dump_file)
 636                         {
 637                           fprintf (dump_file,
 638                                    "Changing key for bb %d from %ld to %ld.\n",
 639                                    e->dest->index,
 640                                    (long) bbd[e->dest->index].node->get_key (),
 641                                    key);
 642                         }
 643                       bbd[e->dest->index].heap->replace_key
 644                         (bbd[e->dest->index].node, key);
 645                     }
 646                 }
 647               else
 648                 {
 649                   bb_heap_t *which_heap = *heap;
 650
 651                   prob = e->probability;
 652                   freq = EDGE_FREQUENCY (e);
 653
 654                   if (!(e->flags & EDGE_CAN_FALLTHRU)
 655                       || (e->flags & EDGE_COMPLEX)
 656                       || !prob.initialized_p ()
 657                       || prob.to_reg_br_prob_base () < branch_th
 658                       || freq < exec_th
 659                       || e->count < count_th)
 660                     {
 661                       /* When partitioning hot/cold basic blocks, make sure
 662                          the cold blocks (and only the cold blocks) all get
 663                          pushed to the last round of trace collection.  When
 664                          optimizing for size, do not push to next round.  */
 665
 666                       if (!for_size && push_to_next_round_p (e->dest, round,
 667                                                              number_of_rounds,
 668                                                              exec_th, count_th))
 669                         which_heap = new_heap;
 670                     }
 671
 672                   bbd[e->dest->index].heap = which_heap;
 673                   bbd[e->dest->index].node = which_heap->insert (key, e->dest);
 674
 675                   if (dump_file)
 676                     {
 677                       fprintf (dump_file,
 678                                "  Possible start of %s round: %d (key: %ld)\n",
 679                                (which_heap == new_heap) ? "next" : "this",
 680                                e->dest->index, (long) key);
 681                     }
 682
 683                 }
 684             }
 685
 686           if (best_edge) /* Suitable successor was found.  */
 687             {
 688               if (bb_visited_trace (best_edge->dest) == *n_traces)
 689                 {
 690                   /* We do nothing with one basic block loops.  */
 691                   if (best_edge->dest != bb)
 692                     {
 693                       if (EDGE_FREQUENCY (best_edge)
 694                           > 4 * best_edge->dest->frequency / 5)
 695                         {
 696                           /* The loop has at least 4 iterations.  If the loop
 697                              header is not the first block of the function
 698                              we can rotate the loop.  */
 699
 700                           if (best_edge->dest
 701                               != ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb)
 702                             {
 703                               if (dump_file)
 704                                 {
 705                                   fprintf (dump_file,
 706                                            "Rotating loop %d - %d\n",
 707                                            best_edge->dest->index, bb->index);
 708                                 }
 709                               bb->aux = best_edge->dest;
 710                               bbd[best_edge->dest->index].in_trace =
 711                                                              (*n_traces) - 1;
 712                               bb = rotate_loop (best_edge, trace, *n_traces);
 713                             }
 714                         }
 715                       else
 716                         {
 717                           /* The loop has less than 4 iterations.  */
 718
 719                           if (single_succ_p (bb)
 720                               && copy_bb_p (best_edge->dest,
 721                                             optimize_edge_for_speed_p
 722                                             (best_edge)))
 723                             {
 724                               bb = copy_bb (best_edge->dest, best_edge, bb,
 725                                             *n_traces);
 726                               trace->length++;
 727                             }
 728                         }
 729                     }
 730
 731                   /* Terminate the trace.  */
 732                   break;
 733                 }
 734               else
 735                 {
 736                   /* Check for a situation
 737
 738                     A
 739                    /|
 740                   B |
 741                    \|
 742                     C
 743
 744                   where
 745                   EDGE_FREQUENCY (AB) + EDGE_FREQUENCY (BC)
 746                     >= EDGE_FREQUENCY (AC).
 747                   (i.e. 2 * B->frequency >= EDGE_FREQUENCY (AC) )
 748                   Best ordering is then A B C.
 749
 750                   When optimizing for size, A B C is always the best order.
 751
 752                   This situation is created for example by:
 753
 754                   if (A) B;
 755                   C;
 756
 757                   */
 758
 759                   FOR_EACH_EDGE (e, ei, bb->succs)
 760                     if (e != best_edge
 761                         && (e->flags & EDGE_CAN_FALLTHRU)
 762                         && !(e->flags & EDGE_COMPLEX)
 763                         && !bb_visited_trace (e->dest)
 764                         && single_pred_p (e->dest)
 765                         && !(e->flags & EDGE_CROSSING)
 766                         && single_succ_p (e->dest)
 767                         && (single_succ_edge (e->dest)->flags
 768                             & EDGE_CAN_FALLTHRU)
 769                         && !(single_succ_edge (e->dest)->flags & EDGE_COMPLEX)
 770                         && single_succ (e->dest) == best_edge->dest
 771                         && (2 * e->dest->frequency >= EDGE_FREQUENCY (best_edge)
 772                             || for_size))
 773                       {
 774                         best_edge = e;
 775                         if (dump_file)
 776                           fprintf (dump_file, "Selecting BB %d\n",
 777                                    best_edge->dest->index);
 778                         break;
 779                       }
 780
 781                   bb->aux = best_edge->dest;
 782                   bbd[best_edge->dest->index].in_trace = (*n_traces) - 1;
 783                   bb = best_edge->dest;
 784                 }
 785             }
 786         }
 787       while (best_edge);
 788       trace->last = bb;
 789       bbd[trace->first->index].start_of_trace = *n_traces - 1;
 790       if (bbd[trace->last->index].end_of_trace != *n_traces - 1)
 791         {
 792           bbd[trace->last->index].end_of_trace = *n_traces - 1;
 793           /* Update the cached maximum frequency for interesting predecessor
 794              edges for successors of the new trace end.  */
 795           FOR_EACH_EDGE (e, ei, trace->last->succs)
 796             if (EDGE_FREQUENCY (e) > bbd[e->dest->index].priority)
 797               bbd[e->dest->index].priority = EDGE_FREQUENCY (e);
 798         }
 799
 800       /* The trace is terminated so we have to recount the keys in heap
 801          (some block can have a lower key because now one of its predecessors
 802          is an end of the trace).  */
 803       FOR_EACH_EDGE (e, ei, bb->succs)
 804         {
 805           if (e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun)
 806               || bb_visited_trace (e->dest))
 807             continue;
 808
 809           if (bbd[e->dest->index].heap)
 810             {
 811               key = bb_to_key (e->dest);
 812               if (key != bbd[e->dest->index].node->get_key ())
 813                 {
 814                   if (dump_file)
 815                     {
 816                       fprintf (dump_file,
 817                                "Changing key for bb %d from %ld to %ld.\n",
 818                                e->dest->index,
 819                                (long) bbd[e->dest->index].node->get_key (), key);
 820                     }
 821                   bbd[e->dest->index].heap->replace_key
 822                     (bbd[e->dest->index].node, key);
 823                 }
 824             }
 825         }
 826     }
 827
 828   delete (*heap);
 829
 830   /* "Return" the new heap.  */
 831   *heap = new_heap;
 832 }
 833
 834 /* Create a duplicate of the basic block OLD_BB and redirect edge E to it, add
 835    it to trace after BB, mark OLD_BB visited and update pass' data structures
 836    (TRACE is a number of trace which OLD_BB is duplicated to).  */
 837
 838 static basic_block
 839 copy_bb (basic_block old_bb, edge e, basic_block bb, int trace)
 840 {
 841   basic_block new_bb;
 842
 843   new_bb = duplicate_block (old_bb, e, bb);
 844   BB_COPY_PARTITION (new_bb, old_bb);
 845
 846   gcc_assert (e->dest == new_bb);
 847
 848   if (dump_file)
 849     fprintf (dump_file,
 850              "Duplicated bb %d (created bb %d)\n",
 851              old_bb->index, new_bb->index);
 852
 853   if (new_bb->index >= array_size
 854       || last_basic_block_for_fn (cfun) > array_size)
 855     {
 856       int i;
 857       int new_size;
 858
 859       new_size = MAX (last_basic_block_for_fn (cfun), new_bb->index + 1);
 860       new_size = GET_ARRAY_SIZE (new_size);
 861       bbd = XRESIZEVEC (bbro_basic_block_data, bbd, new_size);
 862       for (i = array_size; i < new_size; i++)
 863         {
 864           bbd[i].start_of_trace = -1;
 865           bbd[i].end_of_trace = -1;
 866           bbd[i].in_trace = -1;
 867           bbd[i].visited = 0;
 868           bbd[i].priority = -1;
 869           bbd[i].heap = NULL;
 870           bbd[i].node = NULL;
 871         }
 872       array_size = new_size;
 873
 874       if (dump_file)
 875         {
 876           fprintf (dump_file,
 877                    "Growing the dynamic array to %d elements.\n",
 878                    array_size);
 879         }
 880     }
 881
 882   gcc_assert (!bb_visited_trace (e->dest));
 883   mark_bb_visited (new_bb, trace);
 884   new_bb->aux = bb->aux;
 885   bb->aux = new_bb;
 886
 887   bbd[new_bb->index].in_trace = trace;
 888
 889   return new_bb;
 890 }
 891
 892 /* Compute and return the key (for the heap) of the basic block BB.  */
 893
 894 static long
 895 bb_to_key (basic_block bb)
 896 {
 897   edge e;
 898   edge_iterator ei;
 899
 900   /* Use index as key to align with its original order.  */
 901   if (optimize_function_for_size_p (cfun))
 902     return bb->index;
 903
 904   /* Do not start in probably never executed blocks.  */
 905
 906   if (BB_PARTITION (bb) == BB_COLD_PARTITION
 907       || probably_never_executed_bb_p (cfun, bb))
 908     return BB_FREQ_MAX;
 909
 910   /* Prefer blocks whose predecessor is an end of some trace
 911      or whose predecessor edge is EDGE_DFS_BACK.  */
 912   int priority = bbd[bb->index].priority;
 913   if (priority == -1)
 914     {
 915       priority = 0;
 916       FOR_EACH_EDGE (e, ei, bb->preds)
 917         {
 918           if ((e->src != ENTRY_BLOCK_PTR_FOR_FN (cfun)
 919                && bbd[e->src->index].end_of_trace >= 0)
 920               || (e->flags & EDGE_DFS_BACK))
 921             {
 922               int edge_freq = EDGE_FREQUENCY (e);
 923
 924               if (edge_freq > priority)
 925                 priority = edge_freq;
 926             }
 927         }
 928       bbd[bb->index].priority = priority;
 929     }
 930
 931   if (priority)
 932     /* The block with priority should have significantly lower key.  */
 933     return -(100 * BB_FREQ_MAX + 100 * priority + bb->frequency);
 934
 935   return -bb->frequency;
 936 }
 937
 938 /* Return true when the edge E from basic block BB is better than the temporary
 939    best edge (details are in function).  The probability of edge E is PROB. The
 940    frequency of the successor is FREQ.  The current best probability is
 941    BEST_PROB, the best frequency is BEST_FREQ.
 942    The edge is considered to be equivalent when PROB does not differ much from
 943    BEST_PROB; similarly for frequency.  */
 944
 945 static bool
 946 better_edge_p (const_basic_block bb, const_edge e, profile_probability prob,
 947                int freq, profile_probability best_prob, int best_freq,
 948                const_edge cur_best_edge)
 949 {
 950   bool is_better_edge;
 951
 952   /* The BEST_* values do not have to be best, but can be a bit smaller than
 953      maximum values.  */
 954   profile_probability diff_prob = best_prob.apply_scale (1, 10);
 955   int diff_freq = best_freq / 10;
 956
 957   /* The smaller one is better to keep the original order.  */
 958   if (optimize_function_for_size_p (cfun))
 959     return !cur_best_edge
 960            || cur_best_edge->dest->index > e->dest->index;
 961
 962   /* Those edges are so expensive that continuing a trace is not useful
 963      performance wise.  */
 964   if (e->flags & (EDGE_ABNORMAL | EDGE_EH))
 965     return false;
 966
 967   if (prob > best_prob + diff_prob
 968       || (!best_prob.initialized_p ()
 969           && prob > profile_probability::guessed_never ()))
 970     /* The edge has higher probability than the temporary best edge.  */
 971     is_better_edge = true;
 972   else if (prob < best_prob - diff_prob)
 973     /* The edge has lower probability than the temporary best edge.  */
 974     is_better_edge = false;
 975   else if (freq < best_freq - diff_freq)
 976     /* The edge and the temporary best edge  have almost equivalent
 977        probabilities.  The higher frequency of a successor now means
 978        that there is another edge going into that successor.
 979        This successor has lower frequency so it is better.  */
 980     is_better_edge = true;
 981   else if (freq > best_freq + diff_freq)
 982     /* This successor has higher frequency so it is worse.  */
 983     is_better_edge = false;
 984   else if (e->dest->prev_bb == bb)
 985     /* The edges have equivalent probabilities and the successors
 986        have equivalent frequencies.  Select the previous successor.  */
 987     is_better_edge = true;
 988   else
 989     is_better_edge = false;
 990
 991   /* If we are doing hot/cold partitioning, make sure that we always favor
 992      non-crossing edges over crossing edges.  */
 993
 994   if (!is_better_edge
 995       && flag_reorder_blocks_and_partition
 996       && cur_best_edge
 997       && (cur_best_edge->flags & EDGE_CROSSING)
 998       && !(e->flags & EDGE_CROSSING))
 999     is_better_edge = true;
1000
1001   return is_better_edge;
1002 }
1003
1004 /* Return true when the edge E is better than the temporary best edge
1005    CUR_BEST_EDGE.  If SRC_INDEX_P is true, the function compares the src bb of
1006    E and CUR_BEST_EDGE; otherwise it will compare the dest bb.
1007    BEST_LEN is the trace length of src (or dest) bb in CUR_BEST_EDGE.
1008    TRACES record the information about traces.
1009    When optimizing for size, the edge with smaller index is better.
1010    When optimizing for speed, the edge with bigger probability or longer trace
1011    is better.  */
1012
1013 static bool
1014 connect_better_edge_p (const_edge e, bool src_index_p, int best_len,
1015                        const_edge cur_best_edge, struct trace *traces)
1016 {
1017   int e_index;
1018   int b_index;
1019   bool is_better_edge;
1020
1021   if (!cur_best_edge)
1022     return true;
1023
1024   if (optimize_function_for_size_p (cfun))
1025     {
1026       e_index = src_index_p ? e->src->index : e->dest->index;
1027       b_index = src_index_p ? cur_best_edge->src->index
1028                               : cur_best_edge->dest->index;
1029       /* The smaller one is better to keep the original order.  */
1030       return b_index > e_index;
1031     }
1032
1033   if (src_index_p)
1034     {
1035       e_index = e->src->index;
1036
1037       if (e->probability > cur_best_edge->probability)
1038         /* The edge has higher probability than the temporary best edge.  */
1039         is_better_edge = true;
1040       else if (e->probability < cur_best_edge->probability)
1041         /* The edge has lower probability than the temporary best edge.  */
1042         is_better_edge = false;
1043       else if (traces[bbd[e_index].end_of_trace].length > best_len)
1044         /* The edge and the temporary best edge have equivalent probabilities.
1045            The edge with longer trace is better.  */
1046         is_better_edge = true;
1047       else
1048         is_better_edge = false;
1049     }
1050   else
1051     {
1052       e_index = e->dest->index;
1053
1054       if (e->probability > cur_best_edge->probability)
1055         /* The edge has higher probability than the temporary best edge.  */
1056         is_better_edge = true;
1057       else if (e->probability < cur_best_edge->probability)
1058         /* The edge has lower probability than the temporary best edge.  */
1059         is_better_edge = false;
1060       else if (traces[bbd[e_index].start_of_trace].length > best_len)
1061         /* The edge and the temporary best edge have equivalent probabilities.
1062            The edge with longer trace is better.  */
1063         is_better_edge = true;
1064       else
1065         is_better_edge = false;
1066     }
1067
1068   return is_better_edge;
1069 }
1070
1071 /* Connect traces in array TRACES, N_TRACES is the count of traces.  */
1072
1073 static void
1074 connect_traces (int n_traces, struct trace *traces)
1075 {
1076   int i;
1077   bool *connected;
1078   bool two_passes;
1079   int last_trace;
1080   int current_pass;
1081   int current_partition;
1082   int freq_threshold;
1083   gcov_type count_threshold;
1084   bool for_size = optimize_function_for_size_p (cfun);
1085
1086   freq_threshold = max_entry_frequency * DUPLICATION_THRESHOLD / 1000;
1087   if (max_entry_count.to_gcov_type () < INT_MAX / 1000)
1088     count_threshold = max_entry_count.to_gcov_type () * DUPLICATION_THRESHOLD / 1000;
1089   else
1090     count_threshold = max_entry_count.to_gcov_type () / 1000 * DUPLICATION_THRESHOLD;
1091
1092   connected = XCNEWVEC (bool, n_traces);
1093   last_trace = -1;
1094   current_pass = 1;
1095   current_partition = BB_PARTITION (traces[0].first);
1096   two_passes = false;
1097
1098   if (crtl->has_bb_partition)
1099     for (i = 0; i < n_traces && !two_passes; i++)
1100       if (BB_PARTITION (traces[0].first)
1101           != BB_PARTITION (traces[i].first))
1102         two_passes = true;
1103
1104   for (i = 0; i < n_traces || (two_passes && current_pass == 1) ; i++)
1105     {
1106       int t = i;
1107       int t2;
1108       edge e, best;
1109       int best_len;
1110
1111       if (i >= n_traces)
1112         {
1113           gcc_assert (two_passes && current_pass == 1);
1114           i = 0;
1115           t = i;
1116           current_pass = 2;
1117           if (current_partition == BB_HOT_PARTITION)
1118             current_partition = BB_COLD_PARTITION;
1119           else
1120             current_partition = BB_HOT_PARTITION;
1121         }
1122
1123       if (connected[t])
1124         continue;
1125
1126       if (two_passes
1127           && BB_PARTITION (traces[t].first) != current_partition)
1128         continue;
1129
1130       connected[t] = true;
1131
1132       /* Find the predecessor traces.  */
1133       for (t2 = t; t2 > 0;)
1134         {
1135           edge_iterator ei;
1136           best = NULL;
1137           best_len = 0;
1138           FOR_EACH_EDGE (e, ei, traces[t2].first->preds)
1139             {
1140               int si = e->src->index;
1141
1142               if (e->src != ENTRY_BLOCK_PTR_FOR_FN (cfun)
1143                   && (e->flags & EDGE_CAN_FALLTHRU)
1144                   && !(e->flags & EDGE_COMPLEX)
1145                   && bbd[si].end_of_trace >= 0
1146                   && !connected[bbd[si].end_of_trace]
1147                   && (BB_PARTITION (e->src) == current_partition)
1148                   && connect_better_edge_p (e, true, best_len, best, traces))
1149                 {
1150                   best = e;
1151                   best_len = traces[bbd[si].end_of_trace].length;
1152                 }
1153             }
1154           if (best)
1155             {
1156               best->src->aux = best->dest;
1157               t2 = bbd[best->src->index].end_of_trace;
1158               connected[t2] = true;
1159
1160               if (dump_file)
1161                 {
1162                   fprintf (dump_file, "Connection: %d %d\n",
1163                            best->src->index, best->dest->index);
1164                 }
1165             }
1166           else
1167             break;
1168         }
1169
1170       if (last_trace >= 0)
1171         traces[last_trace].last->aux = traces[t2].first;
1172       last_trace = t;
1173
1174       /* Find the successor traces.  */
1175       while (1)
1176         {
1177           /* Find the continuation of the chain.  */
1178           edge_iterator ei;
1179           best = NULL;
1180           best_len = 0;
1181           FOR_EACH_EDGE (e, ei, traces[t].last->succs)
1182             {
1183               int di = e->dest->index;
1184
1185               if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
1186                   && (e->flags & EDGE_CAN_FALLTHRU)
1187                   && !(e->flags & EDGE_COMPLEX)
1188                   && bbd[di].start_of_trace >= 0
1189                   && !connected[bbd[di].start_of_trace]
1190                   && (BB_PARTITION (e->dest) == current_partition)
1191                   && connect_better_edge_p (e, false, best_len, best, traces))
1192                 {
1193                   best = e;
1194                   best_len = traces[bbd[di].start_of_trace].length;
1195                 }
1196             }
1197
1198           if (for_size)
1199             {
1200               if (!best)
1201                 /* Stop finding the successor traces.  */
1202                 break;
1203
1204               /* It is OK to connect block n with block n + 1 or a block
1205                  before n.  For others, only connect to the loop header.  */
1206               if (best->dest->index > (traces[t].last->index + 1))
1207                 {
1208                   int count = EDGE_COUNT (best->dest->preds);
1209
1210                   FOR_EACH_EDGE (e, ei, best->dest->preds)
1211                     if (e->flags & EDGE_DFS_BACK)
1212                       count--;
1213
1214                   /* If dest has multiple predecessors, skip it.  We expect
1215                      that one predecessor with smaller index connects with it
1216                      later.  */
1217                   if (count != 1)
1218                     break;
1219                 }
1220
1221               /* Only connect Trace n with Trace n + 1.  It is conservative
1222                  to keep the order as close as possible to the original order.
1223                  It also helps to reduce long jumps.  */
1224               if (last_trace != bbd[best->dest->index].start_of_trace - 1)
1225                 break;
1226
1227               if (dump_file)
1228                 fprintf (dump_file, "Connection: %d %d\n",
1229                          best->src->index, best->dest->index);
1230
1231               t = bbd[best->dest->index].start_of_trace;
1232               traces[last_trace].last->aux = traces[t].first;
1233               connected[t] = true;
1234               last_trace = t;
1235             }
1236           else if (best)
1237             {
1238               if (dump_file)
1239                 {
1240                   fprintf (dump_file, "Connection: %d %d\n",
1241                            best->src->index, best->dest->index);
1242                 }
1243               t = bbd[best->dest->index].start_of_trace;
1244               traces[last_trace].last->aux = traces[t].first;
1245               connected[t] = true;
1246               last_trace = t;
1247             }
1248           else
1249             {
1250               /* Try to connect the traces by duplication of 1 block.  */
1251               edge e2;
1252               basic_block next_bb = NULL;
1253               bool try_copy = false;
1254
1255               FOR_EACH_EDGE (e, ei, traces[t].last->succs)
1256                 if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
1257                     && (e->flags & EDGE_CAN_FALLTHRU)
1258                     && !(e->flags & EDGE_COMPLEX)
1259                     && (!best || e->probability > best->probability))
1260                   {
1261                     edge_iterator ei;
1262                     edge best2 = NULL;
1263                     int best2_len = 0;
1264
1265                     /* If the destination is a start of a trace which is only
1266                        one block long, then no need to search the successor
1267                        blocks of the trace.  Accept it.  */
1268                     if (bbd[e->dest->index].start_of_trace >= 0
1269                         && traces[bbd[e->dest->index].start_of_trace].length
1270                            == 1)
1271                       {
1272                         best = e;
1273                         try_copy = true;
1274                         continue;
1275                       }
1276
1277                     FOR_EACH_EDGE (e2, ei, e->dest->succs)
1278                       {
1279                         int di = e2->dest->index;
1280
1281                         if (e2->dest == EXIT_BLOCK_PTR_FOR_FN (cfun)
1282                             || ((e2->flags & EDGE_CAN_FALLTHRU)
1283                                 && !(e2->flags & EDGE_COMPLEX)
1284                                 && bbd[di].start_of_trace >= 0
1285                                 && !connected[bbd[di].start_of_trace]
1286                                 && BB_PARTITION (e2->dest) == current_partition
1287                                 && EDGE_FREQUENCY (e2) >= freq_threshold
1288                                 && e2->count >= count_threshold
1289                                 && (!best2
1290                                     || e2->probability > best2->probability
1291                                     || (e2->probability == best2->probability
1292                                         && traces[bbd[di].start_of_trace].length
1293                                            > best2_len))))
1294                           {
1295                             best = e;
1296                             best2 = e2;
1297                             if (e2->dest != EXIT_BLOCK_PTR_FOR_FN (cfun))
1298                               best2_len = traces[bbd[di].start_of_trace].length;
1299                             else
1300                               best2_len = INT_MAX;
1301                             next_bb = e2->dest;
1302                             try_copy = true;
1303                           }
1304                       }
1305                   }
1306
1307               /* Copy tiny blocks always; copy larger blocks only when the
1308                  edge is traversed frequently enough.  */
1309               if (try_copy
1310                   && BB_PARTITION (best->src) == BB_PARTITION (best->dest)
1311                   && copy_bb_p (best->dest,
1312                                 optimize_edge_for_speed_p (best)
1313                                 && EDGE_FREQUENCY (best) >= freq_threshold
1314                                 && (!best->count.initialized_p ()
1315                                     || best->count >= count_threshold)))
1316                 {
1317                   basic_block new_bb;
1318
1319                   if (dump_file)
1320                     {
1321                       fprintf (dump_file, "Connection: %d %d ",
1322                                traces[t].last->index, best->dest->index);
1323                       if (!next_bb)
1324                         fputc ('\n', dump_file);
1325                       else if (next_bb == EXIT_BLOCK_PTR_FOR_FN (cfun))
1326                         fprintf (dump_file, "exit\n");
1327                       else
1328                         fprintf (dump_file, "%d\n", next_bb->index);
1329                     }
1330
1331                   new_bb = copy_bb (best->dest, best, traces[t].last, t);
1332                   traces[t].last = new_bb;
1333                   if (next_bb && next_bb != EXIT_BLOCK_PTR_FOR_FN (cfun))
1334                     {
1335                       t = bbd[next_bb->index].start_of_trace;
1336                       traces[last_trace].last->aux = traces[t].first;
1337                       connected[t] = true;
1338                       last_trace = t;
1339                     }
1340                   else
1341                     break;      /* Stop finding the successor traces.  */
1342                 }
1343               else
1344                 break;  /* Stop finding the successor traces.  */
1345             }
1346         }
1347     }
1348
1349   if (dump_file)
1350     {
1351       basic_block bb;
1352
1353       fprintf (dump_file, "Final order:\n");
1354       for (bb = traces[0].first; bb; bb = (basic_block) bb->aux)
1355         fprintf (dump_file, "%d ", bb->index);
1356       fprintf (dump_file, "\n");
1357       fflush (dump_file);
1358     }
1359
1360   FREE (connected);
1361 }
1362
1363 /* Return true when BB can and should be copied. CODE_MAY_GROW is true
1364    when code size is allowed to grow by duplication.  */
1365
1366 static bool
1367 copy_bb_p (const_basic_block bb, int code_may_grow)
1368 {
1369   int size = 0;
1370   int max_size = uncond_jump_length;
1371   rtx_insn *insn;
1372
1373   if (!bb->frequency)
1374     return false;
1375   if (EDGE_COUNT (bb->preds) < 2)
1376     return false;
1377   if (!can_duplicate_block_p (bb))
1378     return false;
1379
1380   /* Avoid duplicating blocks which have many successors (PR/13430).  */
1381   if (EDGE_COUNT (bb->succs) > 8)
1382     return false;
1383
1384   if (code_may_grow && optimize_bb_for_speed_p (bb))
1385     max_size *= PARAM_VALUE (PARAM_MAX_GROW_COPY_BB_INSNS);
1386
1387   FOR_BB_INSNS (bb, insn)
1388     {
1389       if (INSN_P (insn))
1390         size += get_attr_min_length (insn);
1391     }
1392
1393   if (size <= max_size)
1394     return true;
1395
1396   if (dump_file)
1397     {
1398       fprintf (dump_file,
1399                "Block %d can't be copied because its size = %d.\n",
1400                bb->index, size);
1401     }
1402
1403   return false;
1404 }
1405
1406 /* Return the length of unconditional jump instruction.  */
1407
1408 int
1409 get_uncond_jump_length (void)
1410 {
1411   int length;
1412
1413   start_sequence ();
1414   rtx_code_label *label = emit_label (gen_label_rtx ());
1415   rtx_insn *jump = emit_jump_insn (targetm.gen_jump (label));
1416   length = get_attr_min_length (jump);
1417   end_sequence ();
1418
1419   return length;
1420 }
1421
1422 /* The landing pad OLD_LP, in block OLD_BB, has edges from both partitions.
1423    Duplicate the landing pad and split the edges so that no EH edge
1424    crosses partitions.  */
1425
1426 static void
1427 fix_up_crossing_landing_pad (eh_landing_pad old_lp, basic_block old_bb)
1428 {
1429   eh_landing_pad new_lp;
1430   basic_block new_bb, last_bb, post_bb;
1431   rtx_insn *jump;
1432   unsigned new_partition;
1433   edge_iterator ei;
1434   edge e;
1435
1436   /* Generate the new landing-pad structure.  */
1437   new_lp = gen_eh_landing_pad (old_lp->region);
1438   new_lp->post_landing_pad = old_lp->post_landing_pad;
1439   new_lp->landing_pad = gen_label_rtx ();
1440   LABEL_PRESERVE_P (new_lp->landing_pad) = 1;
1441
1442   /* Put appropriate instructions in new bb.  */
1443   rtx_code_label *new_label = emit_label (new_lp->landing_pad);
1444
1445   expand_dw2_landing_pad_for_region (old_lp->region);
1446
1447   post_bb = BLOCK_FOR_INSN (old_lp->landing_pad);
1448   post_bb = single_succ (post_bb);
1449   rtx_code_label *post_label = block_label (post_bb);
1450   jump = emit_jump_insn (targetm.gen_jump (post_label));
1451   JUMP_LABEL (jump) = post_label;
1452
1453   /* Create new basic block to be dest for lp.  */
1454   last_bb = EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb;
1455   new_bb = create_basic_block (new_label, jump, last_bb);
1456   new_bb->aux = last_bb->aux;
1457   new_bb->frequency = post_bb->frequency;
1458   new_bb->count = post_bb->count;
1459   last_bb->aux = new_bb;
1460
1461   emit_barrier_after_bb (new_bb);
1462
1463   make_single_succ_edge (new_bb, post_bb, 0);
1464
1465   /* Make sure new bb is in the other partition.  */
1466   new_partition = BB_PARTITION (old_bb);
1467   new_partition ^= BB_HOT_PARTITION | BB_COLD_PARTITION;
1468   BB_SET_PARTITION (new_bb, new_partition);
1469
1470   /* Fix up the edges.  */
1471   for (ei = ei_start (old_bb->preds); (e = ei_safe_edge (ei)) != NULL; )
1472     if (BB_PARTITION (e->src) == new_partition)
1473       {
1474         rtx_insn *insn = BB_END (e->src);
1475         rtx note = find_reg_note (insn, REG_EH_REGION, NULL_RTX);
1476
1477         gcc_assert (note != NULL);
1478         gcc_checking_assert (INTVAL (XEXP (note, 0)) == old_lp->index);
1479         XEXP (note, 0) = GEN_INT (new_lp->index);
1480
1481         /* Adjust the edge to the new destination.  */
1482         redirect_edge_succ (e, new_bb);
1483       }
1484     else
1485       ei_next (&ei);
1486 }
1487
1488
1489 /* Ensure that all hot bbs are included in a hot path through the
1490    procedure. This is done by calling this function twice, once
1491    with WALK_UP true (to look for paths from the entry to hot bbs) and
1492    once with WALK_UP false (to look for paths from hot bbs to the exit).
1493    Returns the updated value of COLD_BB_COUNT and adds newly-hot bbs
1494    to BBS_IN_HOT_PARTITION.  */
1495
1496 static unsigned int
1497 sanitize_hot_paths (bool walk_up, unsigned int cold_bb_count,
1498                     vec<basic_block> *bbs_in_hot_partition)
1499 {
1500   /* Callers check this.  */
1501   gcc_checking_assert (cold_bb_count);
1502
1503   /* Keep examining hot bbs while we still have some left to check
1504      and there are remaining cold bbs.  */
1505   vec<basic_block> hot_bbs_to_check = bbs_in_hot_partition->copy ();
1506   while (! hot_bbs_to_check.is_empty ()
1507          && cold_bb_count)
1508     {
1509       basic_block bb = hot_bbs_to_check.pop ();
1510       vec<edge, va_gc> *edges = walk_up ? bb->preds : bb->succs;
1511       edge e;
1512       edge_iterator ei;
1513       profile_probability highest_probability
1514                                  = profile_probability::uninitialized ();
1515       int highest_freq = 0;
1516       profile_count highest_count = profile_count::uninitialized ();
1517       bool found = false;
1518
1519       /* Walk the preds/succs and check if there is at least one already
1520          marked hot. Keep track of the most frequent pred/succ so that we
1521          can mark it hot if we don't find one.  */
1522       FOR_EACH_EDGE (e, ei, edges)
1523         {
1524           basic_block reach_bb = walk_up ? e->src : e->dest;
1525
1526           if (e->flags & EDGE_DFS_BACK)
1527             continue;
1528
1529           /* Do not expect profile insanities when profile was not adjusted.  */
1530           if (e->probability == profile_probability::never ()
1531               || e->count == profile_count::zero ())
1532             continue;
1533
1534           if (BB_PARTITION (reach_bb) != BB_COLD_PARTITION)
1535           {
1536             found = true;
1537             break;
1538           }
1539           /* The following loop will look for the hottest edge via
1540              the edge count, if it is non-zero, then fallback to the edge
1541              frequency and finally the edge probability.  */
1542           if (!highest_count.initialized_p () || e->count > highest_count)
1543             highest_count = e->count;
1544           int edge_freq = EDGE_FREQUENCY (e);
1545           if (edge_freq > highest_freq)
1546             highest_freq = edge_freq;
1547           if (!highest_probability.initialized_p ()
1548               || e->probability > highest_probability)
1549             highest_probability = e->probability;
1550         }
1551
1552       /* If bb is reached by (or reaches, in the case of !WALK_UP) another hot
1553          block (or unpartitioned, e.g. the entry block) then it is ok. If not,
1554          then the most frequent pred (or succ) needs to be adjusted.  In the
1555          case where multiple preds/succs have the same frequency (e.g. a
1556          50-50 branch), then both will be adjusted.  */
1557       if (found)
1558         continue;
1559
1560       FOR_EACH_EDGE (e, ei, edges)
1561         {
1562           if (e->flags & EDGE_DFS_BACK)
1563             continue;
1564           /* Do not expect profile insanities when profile was not adjusted.  */
1565           if (e->probability == profile_probability::never ()
1566               || e->count == profile_count::zero ())
1567             continue;
1568           /* Select the hottest edge using the edge count, if it is non-zero,
1569              then fallback to the edge frequency and finally the edge
1570              probability.  */
1571           if (highest_count > 0)
1572             {
1573               if (e->count < highest_count)
1574                 continue;
1575             }
1576           else if (highest_freq)
1577             {
1578               if (EDGE_FREQUENCY (e) < highest_freq)
1579                 continue;
1580             }
1581           else if (e->probability < highest_probability)
1582             continue;
1583
1584           basic_block reach_bb = walk_up ? e->src : e->dest;
1585
1586           /* We have a hot bb with an immediate dominator that is cold.
1587              The dominator needs to be re-marked hot.  */
1588           BB_SET_PARTITION (reach_bb, BB_HOT_PARTITION);
1589           if (dump_file)
1590             fprintf (dump_file, "Promoting bb %i to hot partition to sanitize "
1591                      "profile of bb %i in %s walk\n", reach_bb->index,
1592                      bb->index, walk_up ? "backward" : "forward");
1593           cold_bb_count--;
1594
1595           /* Now we need to examine newly-hot reach_bb to see if it is also
1596              dominated by a cold bb.  */
1597           bbs_in_hot_partition->safe_push (reach_bb);
1598           hot_bbs_to_check.safe_push (reach_bb);
1599         }
1600     }
1601
1602   return cold_bb_count;
1603 }
1604
1605
1606 /* Find the basic blocks that are rarely executed and need to be moved to
1607    a separate section of the .o file (to cut down on paging and improve
1608    cache locality).  Return a vector of all edges that cross.  */
1609
1610 static vec<edge>
1611 find_rarely_executed_basic_blocks_and_crossing_edges (void)
1612 {
1613   vec<edge> crossing_edges = vNULL;
1614   basic_block bb;
1615   edge e;
1616   edge_iterator ei;
1617   unsigned int cold_bb_count = 0;
1618   auto_vec<basic_block> bbs_in_hot_partition;
1619
1620   propagate_unlikely_bbs_forward ();
1621
1622   /* Mark which partition (hot/cold) each basic block belongs in.  */
1623   FOR_EACH_BB_FN (bb, cfun)
1624     {
1625       bool cold_bb = false;
1626
1627       if (probably_never_executed_bb_p (cfun, bb))
1628         {
1629           /* Handle profile insanities created by upstream optimizations
1630              by also checking the incoming edge weights. If there is a non-cold
1631              incoming edge, conservatively prevent this block from being split
1632              into the cold section.  */
1633           cold_bb = true;
1634           FOR_EACH_EDGE (e, ei, bb->preds)
1635             if (!probably_never_executed_edge_p (cfun, e))
1636               {
1637                 cold_bb = false;
1638                 break;
1639               }
1640         }
1641       if (cold_bb)
1642         {
1643           BB_SET_PARTITION (bb, BB_COLD_PARTITION);
1644           cold_bb_count++;
1645         }
1646       else
1647         {
1648           BB_SET_PARTITION (bb, BB_HOT_PARTITION);
1649           bbs_in_hot_partition.safe_push (bb);
1650         }
1651     }
1652
1653   /* Ensure that hot bbs are included along a hot path from the entry to exit.
1654      Several different possibilities may include cold bbs along all paths
1655      to/from a hot bb. One is that there are edge weight insanities
1656      due to optimization phases that do not properly update basic block profile
1657      counts. The second is that the entry of the function may not be hot, because
1658      it is entered fewer times than the number of profile training runs, but there
1659      is a loop inside the function that causes blocks within the function to be
1660      above the threshold for hotness. This is fixed by walking up from hot bbs
1661      to the entry block, and then down from hot bbs to the exit, performing
1662      partitioning fixups as necessary.  */
1663   if (cold_bb_count)
1664     {
1665       mark_dfs_back_edges ();
1666       cold_bb_count = sanitize_hot_paths (true, cold_bb_count,
1667                                           &bbs_in_hot_partition);
1668       if (cold_bb_count)
1669         sanitize_hot_paths (false, cold_bb_count, &bbs_in_hot_partition);
1670
1671       hash_set <basic_block> set;
1672       find_bbs_reachable_by_hot_paths (&set);
1673       FOR_EACH_BB_FN (bb, cfun)
1674         if (!set.contains (bb))
1675           BB_SET_PARTITION (bb, BB_COLD_PARTITION);
1676     }
1677
1678   /* The format of .gcc_except_table does not allow landing pads to
1679      be in a different partition as the throw.  Fix this by either
1680      moving or duplicating the landing pads.  */
1681   if (cfun->eh->lp_array)
1682     {
1683       unsigned i;
1684       eh_landing_pad lp;
1685
1686       FOR_EACH_VEC_ELT (*cfun->eh->lp_array, i, lp)
1687         {
1688           bool all_same, all_diff;
1689
1690           if (lp == NULL
1691               || lp->landing_pad == NULL_RTX
1692               || !LABEL_P (lp->landing_pad))
1693             continue;
1694
1695           all_same = all_diff = true;
1696           bb = BLOCK_FOR_INSN (lp->landing_pad);
1697           FOR_EACH_EDGE (e, ei, bb->preds)
1698             {
1699               gcc_assert (e->flags & EDGE_EH);
1700               if (BB_PARTITION (bb) == BB_PARTITION (e->src))
1701                 all_diff = false;
1702               else
1703                 all_same = false;
1704             }
1705
1706           if (all_same)
1707             ;
1708           else if (all_diff)
1709             {
1710               int which = BB_PARTITION (bb);
1711               which ^= BB_HOT_PARTITION | BB_COLD_PARTITION;
1712               BB_SET_PARTITION (bb, which);
1713             }
1714           else
1715             fix_up_crossing_landing_pad (lp, bb);
1716         }
1717     }
1718
1719   /* Mark every edge that crosses between sections.  */
1720
1721   FOR_EACH_BB_FN (bb, cfun)
1722     FOR_EACH_EDGE (e, ei, bb->succs)
1723       {
1724         unsigned int flags = e->flags;
1725
1726         /* We should never have EDGE_CROSSING set yet.  */
1727         gcc_checking_assert ((flags & EDGE_CROSSING) == 0);
1728
1729         if (e->src != ENTRY_BLOCK_PTR_FOR_FN (cfun)
1730             && e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
1731             && BB_PARTITION (e->src) != BB_PARTITION (e->dest))
1732           {
1733             crossing_edges.safe_push (e);
1734             flags |= EDGE_CROSSING;
1735           }
1736
1737         /* Now that we've split eh edges as appropriate, allow landing pads
1738            to be merged with the post-landing pads.  */
1739         flags &= ~EDGE_PRESERVE;
1740
1741         e->flags = flags;
1742       }
1743
1744   return crossing_edges;
1745 }
1746
1747 /* Set the flag EDGE_CAN_FALLTHRU for edges that can be fallthru.  */
1748
1749 static void
1750 set_edge_can_fallthru_flag (void)
1751 {
1752   basic_block bb;
1753
1754   FOR_EACH_BB_FN (bb, cfun)
1755     {
1756       edge e;
1757       edge_iterator ei;
1758
1759       FOR_EACH_EDGE (e, ei, bb->succs)
1760         {
1761           e->flags &= ~EDGE_CAN_FALLTHRU;
1762
1763           /* The FALLTHRU edge is also CAN_FALLTHRU edge.  */
1764           if (e->flags & EDGE_FALLTHRU)
1765             e->flags |= EDGE_CAN_FALLTHRU;
1766         }
1767
1768       /* If the BB ends with an invertible condjump all (2) edges are
1769          CAN_FALLTHRU edges.  */
1770       if (EDGE_COUNT (bb->succs) != 2)
1771         continue;
1772       if (!any_condjump_p (BB_END (bb)))
1773         continue;
1774
1775       rtx_jump_insn *bb_end_jump = as_a <rtx_jump_insn *> (BB_END (bb));
1776       if (!invert_jump (bb_end_jump, JUMP_LABEL (bb_end_jump), 0))
1777         continue;
1778       invert_jump (bb_end_jump, JUMP_LABEL (bb_end_jump), 0);
1779       EDGE_SUCC (bb, 0)->flags |= EDGE_CAN_FALLTHRU;
1780       EDGE_SUCC (bb, 1)->flags |= EDGE_CAN_FALLTHRU;
1781     }
1782 }
1783
1784 /* If any destination of a crossing edge does not have a label, add label;
1785    Convert any easy fall-through crossing edges to unconditional jumps.  */
1786
1787 static void
1788 add_labels_and_missing_jumps (vec<edge> crossing_edges)
1789 {
1790   size_t i;
1791   edge e;
1792
1793   FOR_EACH_VEC_ELT (crossing_edges, i, e)
1794     {
1795       basic_block src = e->src;
1796       basic_block dest = e->dest;
1797       rtx_jump_insn *new_jump;
1798
1799       if (dest == EXIT_BLOCK_PTR_FOR_FN (cfun))
1800         continue;
1801
1802       /* Make sure dest has a label.  */
1803       rtx_code_label *label = block_label (dest);
1804
1805       /* Nothing to do for non-fallthru edges.  */
1806       if (src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
1807         continue;
1808       if ((e->flags & EDGE_FALLTHRU) == 0)
1809         continue;
1810
1811       /* If the block does not end with a control flow insn, then we
1812          can trivially add a jump to the end to fixup the crossing.
1813          Otherwise the jump will have to go in a new bb, which will
1814          be handled by fix_up_fall_thru_edges function.  */
1815       if (control_flow_insn_p (BB_END (src)))
1816         continue;
1817
1818       /* Make sure there's only one successor.  */
1819       gcc_assert (single_succ_p (src));
1820
1821       new_jump = emit_jump_insn_after (targetm.gen_jump (label), BB_END (src));
1822       BB_END (src) = new_jump;
1823       JUMP_LABEL (new_jump) = label;
1824       LABEL_NUSES (label) += 1;
1825
1826       emit_barrier_after_bb (src);
1827
1828       /* Mark edge as non-fallthru.  */
1829       e->flags &= ~EDGE_FALLTHRU;
1830     }
1831 }
1832
1833 /* Find any bb's where the fall-through edge is a crossing edge (note that
1834    these bb's must also contain a conditional jump or end with a call
1835    instruction; we've already dealt with fall-through edges for blocks
1836    that didn't have a conditional jump or didn't end with call instruction
1837    in the call to add_labels_and_missing_jumps).  Convert the fall-through
1838    edge to non-crossing edge by inserting a new bb to fall-through into.
1839    The new bb will contain an unconditional jump (crossing edge) to the
1840    original fall through destination.  */
1841
1842 static void
1843 fix_up_fall_thru_edges (void)
1844 {
1845   basic_block cur_bb;
1846
1847   FOR_EACH_BB_FN (cur_bb, cfun)
1848     {
1849       edge succ1;
1850       edge succ2;
1851       edge fall_thru = NULL;
1852       edge cond_jump = NULL;
1853
1854       fall_thru = NULL;
1855       if (EDGE_COUNT (cur_bb->succs) > 0)
1856         succ1 = EDGE_SUCC (cur_bb, 0);
1857       else
1858         succ1 = NULL;
1859
1860       if (EDGE_COUNT (cur_bb->succs) > 1)
1861         succ2 = EDGE_SUCC (cur_bb, 1);
1862       else
1863         succ2 = NULL;
1864
1865       /* Find the fall-through edge.  */
1866
1867       if (succ1
1868           && (succ1->flags & EDGE_FALLTHRU))
1869         {
1870           fall_thru = succ1;
1871           cond_jump = succ2;
1872         }
1873       else if (succ2
1874                && (succ2->flags & EDGE_FALLTHRU))
1875         {
1876           fall_thru = succ2;
1877           cond_jump = succ1;
1878         }
1879       else if (succ2 && EDGE_COUNT (cur_bb->succs) > 2)
1880         fall_thru = find_fallthru_edge (cur_bb->succs);
1881
1882       if (fall_thru && (fall_thru->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)))
1883         {
1884           /* Check to see if the fall-thru edge is a crossing edge.  */
1885
1886           if (fall_thru->flags & EDGE_CROSSING)
1887             {
1888               /* The fall_thru edge crosses; now check the cond jump edge, if
1889                  it exists.  */
1890
1891               bool cond_jump_crosses = true;
1892               int invert_worked = 0;
1893               rtx_insn *old_jump = BB_END (cur_bb);
1894
1895               /* Find the jump instruction, if there is one.  */
1896
1897               if (cond_jump)
1898                 {
1899                   if (!(cond_jump->flags & EDGE_CROSSING))
1900                     cond_jump_crosses = false;
1901
1902                   /* We know the fall-thru edge crosses; if the cond
1903                      jump edge does NOT cross, and its destination is the
1904                      next block in the bb order, invert the jump
1905                      (i.e. fix it so the fall through does not cross and
1906                      the cond jump does).  */
1907
1908                   if (!cond_jump_crosses)
1909                     {
1910                       /* Find label in fall_thru block. We've already added
1911                          any missing labels, so there must be one.  */
1912
1913                       rtx_code_label *fall_thru_label
1914                         = block_label (fall_thru->dest);
1915
1916                       if (old_jump && fall_thru_label)
1917                         {
1918                           rtx_jump_insn *old_jump_insn
1919                             = dyn_cast <rtx_jump_insn *> (old_jump);
1920                           if (old_jump_insn)
1921                             invert_worked = invert_jump (old_jump_insn,
1922                                                          fall_thru_label, 0);
1923                         }
1924
1925                       if (invert_worked)
1926                         {
1927                           fall_thru->flags &= ~EDGE_FALLTHRU;
1928                           cond_jump->flags |= EDGE_FALLTHRU;
1929                           update_br_prob_note (cur_bb);
1930                           std::swap (fall_thru, cond_jump);
1931                           cond_jump->flags |= EDGE_CROSSING;
1932                           fall_thru->flags &= ~EDGE_CROSSING;
1933                         }
1934                     }
1935                 }
1936
1937               if (cond_jump_crosses || !invert_worked)
1938                 {
1939                   /* This is the case where both edges out of the basic
1940                      block are crossing edges. Here we will fix up the
1941                      fall through edge. The jump edge will be taken care
1942                      of later.  The EDGE_CROSSING flag of fall_thru edge
1943                      is unset before the call to force_nonfallthru
1944                      function because if a new basic-block is created
1945                      this edge remains in the current section boundary
1946                      while the edge between new_bb and the fall_thru->dest
1947                      becomes EDGE_CROSSING.  */
1948
1949                   fall_thru->flags &= ~EDGE_CROSSING;
1950                   basic_block new_bb = force_nonfallthru (fall_thru);
1951
1952                   if (new_bb)
1953                     {
1954                       new_bb->aux = cur_bb->aux;
1955                       cur_bb->aux = new_bb;
1956
1957                       /* This is done by force_nonfallthru_and_redirect.  */
1958                       gcc_assert (BB_PARTITION (new_bb)
1959                                   == BB_PARTITION (cur_bb));
1960
1961                       single_succ_edge (new_bb)->flags |= EDGE_CROSSING;
1962                     }
1963                   else
1964                     {
1965                       /* If a new basic-block was not created; restore
1966                          the EDGE_CROSSING flag.  */
1967                       fall_thru->flags |= EDGE_CROSSING;
1968                     }
1969
1970                   /* Add barrier after new jump */
1971                   emit_barrier_after_bb (new_bb ? new_bb : cur_bb);
1972                 }
1973             }
1974         }
1975     }
1976 }
1977
1978 /* This function checks the destination block of a "crossing jump" to
1979    see if it has any crossing predecessors that begin with a code label
1980    and end with an unconditional jump.  If so, it returns that predecessor
1981    block.  (This is to avoid creating lots of new basic blocks that all
1982    contain unconditional jumps to the same destination).  */
1983
1984 static basic_block
1985 find_jump_block (basic_block jump_dest)
1986 {
1987   basic_block source_bb = NULL;
1988   edge e;
1989   rtx_insn *insn;
1990   edge_iterator ei;
1991
1992   FOR_EACH_EDGE (e, ei, jump_dest->preds)
1993     if (e->flags & EDGE_CROSSING)
1994       {
1995         basic_block src = e->src;
1996
1997         /* Check each predecessor to see if it has a label, and contains
1998            only one executable instruction, which is an unconditional jump.
1999            If so, we can use it.  */
2000
2001         if (LABEL_P (BB_HEAD (src)))
2002           for (insn = BB_HEAD (src);
2003                !INSN_P (insn) && insn != NEXT_INSN (BB_END (src));
2004                insn = NEXT_INSN (insn))
2005             {
2006               if (INSN_P (insn)
2007                   && insn == BB_END (src)
2008                   && JUMP_P (insn)
2009                   && !any_condjump_p (insn))
2010                 {
2011                   source_bb = src;
2012                   break;
2013                 }
2014             }
2015
2016         if (source_bb)
2017           break;
2018       }
2019
2020   return source_bb;
2021 }
2022
2023 /* Find all BB's with conditional jumps that are crossing edges;
2024    insert a new bb and make the conditional jump branch to the new
2025    bb instead (make the new bb same color so conditional branch won't
2026    be a 'crossing' edge).  Insert an unconditional jump from the
2027    new bb to the original destination of the conditional jump.  */
2028
2029 static void
2030 fix_crossing_conditional_branches (void)
2031 {
2032   basic_block cur_bb;
2033   basic_block new_bb;
2034   basic_block dest;
2035   edge succ1;
2036   edge succ2;
2037   edge crossing_edge;
2038   edge new_edge;
2039   rtx set_src;
2040   rtx old_label = NULL_RTX;
2041   rtx_code_label *new_label;
2042
2043   FOR_EACH_BB_FN (cur_bb, cfun)
2044     {
2045       crossing_edge = NULL;
2046       if (EDGE_COUNT (cur_bb->succs) > 0)
2047         succ1 = EDGE_SUCC (cur_bb, 0);
2048       else
2049         succ1 = NULL;
2050
2051       if (EDGE_COUNT (cur_bb->succs) > 1)
2052         succ2 = EDGE_SUCC (cur_bb, 1);
2053       else
2054         succ2 = NULL;
2055
2056       /* We already took care of fall-through edges, so only one successor
2057          can be a crossing edge.  */
2058
2059       if (succ1 && (succ1->flags & EDGE_CROSSING))
2060         crossing_edge = succ1;
2061       else if (succ2 && (succ2->flags & EDGE_CROSSING))
2062         crossing_edge = succ2;
2063
2064       if (crossing_edge)
2065         {
2066           rtx_insn *old_jump = BB_END (cur_bb);
2067
2068           /* Check to make sure the jump instruction is a
2069              conditional jump.  */
2070
2071           set_src = NULL_RTX;
2072
2073           if (any_condjump_p (old_jump))
2074             {
2075               if (GET_CODE (PATTERN (old_jump)) == SET)
2076                 set_src = SET_SRC (PATTERN (old_jump));
2077               else if (GET_CODE (PATTERN (old_jump)) == PARALLEL)
2078                 {
2079                   set_src = XVECEXP (PATTERN (old_jump), 0,0);
2080                   if (GET_CODE (set_src) == SET)
2081                     set_src = SET_SRC (set_src);
2082                   else
2083                     set_src = NULL_RTX;
2084                 }
2085             }
2086
2087           if (set_src && (GET_CODE (set_src) == IF_THEN_ELSE))
2088             {
2089               rtx_jump_insn *old_jump_insn =
2090                         as_a <rtx_jump_insn *> (old_jump);
2091
2092               if (GET_CODE (XEXP (set_src, 1)) == PC)
2093                 old_label = XEXP (set_src, 2);
2094               else if (GET_CODE (XEXP (set_src, 2)) == PC)
2095                 old_label = XEXP (set_src, 1);
2096
2097               /* Check to see if new bb for jumping to that dest has
2098                  already been created; if so, use it; if not, create
2099                  a new one.  */
2100
2101               new_bb = find_jump_block (crossing_edge->dest);
2102
2103               if (new_bb)
2104                 new_label = block_label (new_bb);
2105               else
2106                 {
2107                   basic_block last_bb;
2108                   rtx_code_label *old_jump_target;
2109                   rtx_jump_insn *new_jump;
2110
2111                   /* Create new basic block to be dest for
2112                      conditional jump.  */
2113
2114                   /* Put appropriate instructions in new bb.  */
2115
2116                   new_label = gen_label_rtx ();
2117                   emit_label (new_label);
2118
2119                   gcc_assert (GET_CODE (old_label) == LABEL_REF);
2120                   old_jump_target = old_jump_insn->jump_target ();
2121                   new_jump = as_a <rtx_jump_insn *>
2122                     (emit_jump_insn (targetm.gen_jump (old_jump_target)));
2123                   new_jump->set_jump_target (old_jump_target);
2124
2125                   last_bb = EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb;
2126                   new_bb = create_basic_block (new_label, new_jump, last_bb);
2127                   new_bb->aux = last_bb->aux;
2128                   last_bb->aux = new_bb;
2129
2130                   emit_barrier_after_bb (new_bb);
2131
2132                   /* Make sure new bb is in same partition as source
2133                      of conditional branch.  */
2134                   BB_COPY_PARTITION (new_bb, cur_bb);
2135                 }
2136
2137               /* Make old jump branch to new bb.  */
2138
2139               redirect_jump (old_jump_insn, new_label, 0);
2140
2141               /* Remove crossing_edge as predecessor of 'dest'.  */
2142
2143               dest = crossing_edge->dest;
2144
2145               redirect_edge_succ (crossing_edge, new_bb);
2146
2147               /* Make a new edge from new_bb to old dest; new edge
2148                  will be a successor for new_bb and a predecessor
2149                  for 'dest'.  */
2150
2151               if (EDGE_COUNT (new_bb->succs) == 0)
2152                 new_edge = make_single_succ_edge (new_bb, dest, 0);
2153               else
2154                 new_edge = EDGE_SUCC (new_bb, 0);
2155
2156               crossing_edge->flags &= ~EDGE_CROSSING;
2157               new_edge->flags |= EDGE_CROSSING;
2158             }
2159         }
2160     }
2161 }
2162
2163 /* Find any unconditional branches that cross between hot and cold
2164    sections.  Convert them into indirect jumps instead.  */
2165
2166 static void
2167 fix_crossing_unconditional_branches (void)
2168 {
2169   basic_block cur_bb;
2170   rtx_insn *last_insn;
2171   rtx label;
2172   rtx label_addr;
2173   rtx_insn *indirect_jump_sequence;
2174   rtx_insn *jump_insn = NULL;
2175   rtx new_reg;
2176   rtx_insn *cur_insn;
2177   edge succ;
2178
2179   FOR_EACH_BB_FN (cur_bb, cfun)
2180     {
2181       last_insn = BB_END (cur_bb);
2182
2183       if (EDGE_COUNT (cur_bb->succs) < 1)
2184         continue;
2185
2186       succ = EDGE_SUCC (cur_bb, 0);
2187
2188       /* Check to see if bb ends in a crossing (unconditional) jump.  At
2189          this point, no crossing jumps should be conditional.  */
2190
2191       if (JUMP_P (last_insn)
2192           && (succ->flags & EDGE_CROSSING))
2193         {
2194           gcc_assert (!any_condjump_p (last_insn));
2195
2196           /* Make sure the jump is not already an indirect or table jump.  */
2197
2198           if (!computed_jump_p (last_insn)
2199               && !tablejump_p (last_insn, NULL, NULL))
2200             {
2201               /* We have found a "crossing" unconditional branch.  Now
2202                  we must convert it to an indirect jump.  First create
2203                  reference of label, as target for jump.  */
2204
2205               label = JUMP_LABEL (last_insn);
2206               label_addr = gen_rtx_LABEL_REF (Pmode, label);
2207               LABEL_NUSES (label) += 1;
2208
2209               /* Get a register to use for the indirect jump.  */
2210
2211               new_reg = gen_reg_rtx (Pmode);
2212
2213               /* Generate indirect the jump sequence.  */
2214
2215               start_sequence ();
2216               emit_move_insn (new_reg, label_addr);
2217               emit_indirect_jump (new_reg);
2218               indirect_jump_sequence = get_insns ();
2219               end_sequence ();
2220
2221               /* Make sure every instruction in the new jump sequence has
2222                  its basic block set to be cur_bb.  */
2223
2224               for (cur_insn = indirect_jump_sequence; cur_insn;
2225                    cur_insn = NEXT_INSN (cur_insn))
2226                 {
2227                   if (!BARRIER_P (cur_insn))
2228                     BLOCK_FOR_INSN (cur_insn) = cur_bb;
2229                   if (JUMP_P (cur_insn))
2230                     jump_insn = cur_insn;
2231                 }
2232
2233               /* Insert the new (indirect) jump sequence immediately before
2234                  the unconditional jump, then delete the unconditional jump.  */
2235
2236               emit_insn_before (indirect_jump_sequence, last_insn);
2237               delete_insn (last_insn);
2238
2239               JUMP_LABEL (jump_insn) = label;
2240               LABEL_NUSES (label)++;
2241
2242               /* Make BB_END for cur_bb be the jump instruction (NOT the
2243                  barrier instruction at the end of the sequence...).  */
2244
2245               BB_END (cur_bb) = jump_insn;
2246             }
2247         }
2248     }
2249 }
2250
2251 /* Update CROSSING_JUMP_P flags on all jump insns.  */
2252
2253 static void
2254 update_crossing_jump_flags (void)
2255 {
2256   basic_block bb;
2257   edge e;
2258   edge_iterator ei;
2259
2260   FOR_EACH_BB_FN (bb, cfun)
2261     FOR_EACH_EDGE (e, ei, bb->succs)
2262       if (e->flags & EDGE_CROSSING)
2263         {
2264           if (JUMP_P (BB_END (bb))
2265               /* Some flags were added during fix_up_fall_thru_edges, via
2266                  force_nonfallthru_and_redirect.  */
2267               && !CROSSING_JUMP_P (BB_END (bb)))
2268             CROSSING_JUMP_P (BB_END (bb)) = 1;
2269           break;
2270         }
2271 }
2272
2273 /* Reorder basic blocks using the software trace cache (STC) algorithm.  */
2274
2275 static void
2276 reorder_basic_blocks_software_trace_cache (void)
2277 {
2278   if (dump_file)
2279     fprintf (dump_file, "\nReordering with the STC algorithm.\n\n");
2280
2281   int n_traces;
2282   int i;
2283   struct trace *traces;
2284
2285   /* We are estimating the length of uncond jump insn only once since the code
2286      for getting the insn length always returns the minimal length now.  */
2287   if (uncond_jump_length == 0)
2288     uncond_jump_length = get_uncond_jump_length ();
2289
2290   /* We need to know some information for each basic block.  */
2291   array_size = GET_ARRAY_SIZE (last_basic_block_for_fn (cfun));
2292   bbd = XNEWVEC (bbro_basic_block_data, array_size);
2293   for (i = 0; i < array_size; i++)
2294     {
2295       bbd[i].start_of_trace = -1;
2296       bbd[i].end_of_trace = -1;
2297       bbd[i].in_trace = -1;
2298       bbd[i].visited = 0;
2299       bbd[i].priority = -1;
2300       bbd[i].heap = NULL;
2301       bbd[i].node = NULL;
2302     }
2303
2304   traces = XNEWVEC (struct trace, n_basic_blocks_for_fn (cfun));
2305   n_traces = 0;
2306   find_traces (&n_traces, traces);
2307   connect_traces (n_traces, traces);
2308   FREE (traces);
2309   FREE (bbd);
2310 }
2311
2312 /* Return true if edge E1 is more desirable as a fallthrough edge than
2313    edge E2 is.  */
2314
2315 static bool
2316 edge_order (edge e1, edge e2)
2317 {
2318   return EDGE_FREQUENCY (e1) > EDGE_FREQUENCY (e2);
2319 }
2320
2321 /* Reorder basic blocks using the "simple" algorithm.  This tries to
2322    maximize the dynamic number of branches that are fallthrough, without
2323    copying instructions.  The algorithm is greedy, looking at the most
2324    frequently executed branch first.  */
2325
2326 static void
2327 reorder_basic_blocks_simple (void)
2328 {
2329   if (dump_file)
2330     fprintf (dump_file, "\nReordering with the \"simple\" algorithm.\n\n");
2331
2332   edge *edges = new edge[2 * n_basic_blocks_for_fn (cfun)];
2333
2334   /* First, collect all edges that can be optimized by reordering blocks:
2335      simple jumps and conditional jumps, as well as the function entry edge.  */
2336
2337   int n = 0;
2338   edges[n++] = EDGE_SUCC (ENTRY_BLOCK_PTR_FOR_FN (cfun), 0);
2339
2340   basic_block bb;
2341   FOR_EACH_BB_FN (bb, cfun)
2342     {
2343       rtx_insn *end = BB_END (bb);
2344
2345       if (computed_jump_p (end) || tablejump_p (end, NULL, NULL))
2346         continue;
2347
2348       /* We cannot optimize asm goto.  */
2349       if (JUMP_P (end) && extract_asm_operands (end))
2350         continue;
2351
2352       if (single_succ_p (bb))
2353         edges[n++] = EDGE_SUCC (bb, 0);
2354       else if (any_condjump_p (end))
2355         {
2356           edge e0 = EDGE_SUCC (bb, 0);
2357           edge e1 = EDGE_SUCC (bb, 1);
2358           /* When optimizing for size it is best to keep the original
2359              fallthrough edges.  */
2360           if (e1->flags & EDGE_FALLTHRU)
2361             std::swap (e0, e1);
2362           edges[n++] = e0;
2363           edges[n++] = e1;
2364         }
2365     }
2366
2367   /* Sort the edges, the most desirable first.  When optimizing for size
2368      all edges are equally desirable.  */
2369
2370   if (optimize_function_for_speed_p (cfun))
2371     std::stable_sort (edges, edges + n, edge_order);
2372
2373   /* Now decide which of those edges to make fallthrough edges.  We set
2374      BB_VISITED if a block already has a fallthrough successor assigned
2375      to it.  We make ->AUX of an endpoint point to the opposite endpoint
2376      of a sequence of blocks that fall through, and ->AUX will be NULL
2377      for a block that is in such a sequence but not an endpoint anymore.
2378
2379      To start with, everything points to itself, nothing is assigned yet.  */
2380
2381   FOR_ALL_BB_FN (bb, cfun)
2382     {
2383       bb->aux = bb;
2384       bb->flags &= ~BB_VISITED;
2385     }
2386
2387   EXIT_BLOCK_PTR_FOR_FN (cfun)->aux = 0;
2388
2389   /* Now for all edges, the most desirable first, see if that edge can
2390      connect two sequences.  If it can, update AUX and BB_VISITED; if it
2391      cannot, zero out the edge in the table.  */
2392
2393   for (int j = 0; j < n; j++)
2394     {
2395       edge e = edges[j];
2396
2397       basic_block tail_a = e->src;
2398       basic_block head_b = e->dest;
2399       basic_block head_a = (basic_block) tail_a->aux;
2400       basic_block tail_b = (basic_block) head_b->aux;
2401
2402       /* An edge cannot connect two sequences if:
2403          - it crosses partitions;
2404          - its src is not a current endpoint;
2405          - its dest is not a current endpoint;
2406          - or, it would create a loop.  */
2407
2408       if (e->flags & EDGE_CROSSING
2409           || tail_a->flags & BB_VISITED
2410           || !tail_b
2411           || (!(head_b->flags & BB_VISITED) && head_b != tail_b)
2412           || tail_a == tail_b)
2413         {
2414           edges[j] = 0;
2415           continue;
2416         }
2417
2418       tail_a->aux = 0;
2419       head_b->aux = 0;
2420       head_a->aux = tail_b;
2421       tail_b->aux = head_a;
2422       tail_a->flags |= BB_VISITED;
2423     }
2424
2425   /* Put the pieces together, in the same order that the start blocks of
2426      the sequences already had.  The hot/cold partitioning gives a little
2427      complication: as a first pass only do this for blocks in the same
2428      partition as the start block, and (if there is anything left to do)
2429      in a second pass handle the other partition.  */
2430
2431   basic_block last_tail = (basic_block) ENTRY_BLOCK_PTR_FOR_FN (cfun)->aux;
2432
2433   int current_partition = BB_PARTITION (last_tail);
2434   bool need_another_pass = true;
2435
2436   for (int pass = 0; pass < 2 && need_another_pass; pass++)
2437     {
2438       need_another_pass = false;
2439
2440       FOR_EACH_BB_FN (bb, cfun)
2441         if ((bb->flags & BB_VISITED && bb->aux) || bb->aux == bb)
2442           {
2443             if (BB_PARTITION (bb) != current_partition)
2444               {
2445                 need_another_pass = true;
2446                 continue;
2447               }
2448
2449             last_tail->aux = bb;
2450             last_tail = (basic_block) bb->aux;
2451           }
2452
2453       current_partition ^= BB_HOT_PARTITION | BB_COLD_PARTITION;
2454     }
2455
2456   last_tail->aux = 0;
2457
2458   /* Finally, link all the chosen fallthrough edges.  */
2459
2460   for (int j = 0; j < n; j++)
2461     if (edges[j])
2462       edges[j]->src->aux = edges[j]->dest;
2463
2464   delete[] edges;
2465
2466   /* If the entry edge no longer falls through we have to make a new
2467      block so it can do so again.  */
2468
2469   edge e = EDGE_SUCC (ENTRY_BLOCK_PTR_FOR_FN (cfun), 0);
2470   if (e->dest != ENTRY_BLOCK_PTR_FOR_FN (cfun)->aux)
2471     {
2472       force_nonfallthru (e);
2473       e->src->aux = ENTRY_BLOCK_PTR_FOR_FN (cfun)->aux;
2474       BB_COPY_PARTITION (e->src, e->dest);
2475     }
2476 }
2477
2478 /* Reorder basic blocks.  The main entry point to this file.  */
2479
2480 static void
2481 reorder_basic_blocks (void)
2482 {
2483   gcc_assert (current_ir_type () == IR_RTL_CFGLAYOUT);
2484
2485   if (n_basic_blocks_for_fn (cfun) <= NUM_FIXED_BLOCKS + 1)
2486     return;
2487
2488   set_edge_can_fallthru_flag ();
2489   mark_dfs_back_edges ();
2490
2491   switch (flag_reorder_blocks_algorithm)
2492     {
2493     case REORDER_BLOCKS_ALGORITHM_SIMPLE:
2494       reorder_basic_blocks_simple ();
2495       break;
2496
2497     case REORDER_BLOCKS_ALGORITHM_STC:
2498       reorder_basic_blocks_software_trace_cache ();
2499       break;
2500
2501     default:
2502       gcc_unreachable ();
2503     }
2504
2505   relink_block_chain (/*stay_in_cfglayout_mode=*/true);
2506
2507   if (dump_file)
2508     {
2509       if (dump_flags & TDF_DETAILS)
2510         dump_reg_info (dump_file);
2511       dump_flow_info (dump_file, dump_flags);
2512     }
2513
2514   /* Signal that rtl_verify_flow_info_1 can now verify that there
2515      is at most one switch between hot/cold sections.  */
2516   crtl->bb_reorder_complete = true;
2517 }
2518
2519 /* Determine which partition the first basic block in the function
2520    belongs to, then find the first basic block in the current function
2521    that belongs to a different section, and insert a
2522    NOTE_INSN_SWITCH_TEXT_SECTIONS note immediately before it in the
2523    instruction stream.  When writing out the assembly code,
2524    encountering this note will make the compiler switch between the
2525    hot and cold text sections.  */
2526
2527 void
2528 insert_section_boundary_note (void)
2529 {
2530   basic_block bb;
2531   bool switched_sections = false;
2532   int current_partition = 0;
2533
2534   if (!crtl->has_bb_partition)
2535     return;
2536
2537   FOR_EACH_BB_FN (bb, cfun)
2538     {
2539       if (!current_partition)
2540         current_partition = BB_PARTITION (bb);
2541       if (BB_PARTITION (bb) != current_partition)
2542         {
2543           gcc_assert (!switched_sections);
2544           switched_sections = true;
2545           emit_note_before (NOTE_INSN_SWITCH_TEXT_SECTIONS, BB_HEAD (bb));
2546           current_partition = BB_PARTITION (bb);
2547         }
2548     }
2549 }
2550
2551 namespace {
2552
2553 const pass_data pass_data_reorder_blocks =
2554 {
2555   RTL_PASS, /* type */
2556   "bbro", /* name */
2557   OPTGROUP_NONE, /* optinfo_flags */
2558   TV_REORDER_BLOCKS, /* tv_id */
2559   0, /* properties_required */
2560   0, /* properties_provided */
2561   0, /* properties_destroyed */
2562   0, /* todo_flags_start */
2563   0, /* todo_flags_finish */
2564 };
2565
2566 class pass_reorder_blocks : public rtl_opt_pass
2567 {
2568 public:
2569   pass_reorder_blocks (gcc::context *ctxt)
2570     : rtl_opt_pass (pass_data_reorder_blocks, ctxt)
2571   {}
2572
2573   /* opt_pass methods: */
2574   virtual bool gate (function *)
2575     {
2576       if (targetm.cannot_modify_jumps_p ())
2577         return false;
2578       return (optimize > 0
2579               && (flag_reorder_blocks || flag_reorder_blocks_and_partition));
2580     }
2581
2582   virtual unsigned int execute (function *);
2583
2584 }; // class pass_reorder_blocks
2585
2586 unsigned int
2587 pass_reorder_blocks::execute (function *fun)
2588 {
2589   basic_block bb;
2590
2591   /* Last attempt to optimize CFG, as scheduling, peepholing and insn
2592      splitting possibly introduced more crossjumping opportunities.  */
2593   cfg_layout_initialize (CLEANUP_EXPENSIVE);
2594
2595   reorder_basic_blocks ();
2596   cleanup_cfg (CLEANUP_EXPENSIVE);
2597
2598   FOR_EACH_BB_FN (bb, fun)
2599     if (bb->next_bb != EXIT_BLOCK_PTR_FOR_FN (fun))
2600       bb->aux = bb->next_bb;
2601   cfg_layout_finalize ();
2602
2603   return 0;
2604 }
2605
2606 } // anon namespace
2607
2608 rtl_opt_pass *
2609 make_pass_reorder_blocks (gcc::context *ctxt)
2610 {
2611   return new pass_reorder_blocks (ctxt);
2612 }
2613
2614 /* Duplicate a block (that we already know ends in a computed jump) into its
2615    predecessors, where possible.  Return whether anything is changed.  */
2616 static bool
2617 maybe_duplicate_computed_goto (basic_block bb, int max_size)
2618 {
2619   if (single_pred_p (bb))
2620     return false;
2621
2622   /* Make sure that the block is small enough.  */
2623   rtx_insn *insn;
2624   FOR_BB_INSNS (bb, insn)
2625     if (INSN_P (insn))
2626       {
2627         max_size -= get_attr_min_length (insn);
2628         if (max_size < 0)
2629            return false;
2630       }
2631
2632   bool changed = false;
2633   edge e;
2634   edge_iterator ei;
2635   for (ei = ei_start (bb->preds); (e = ei_safe_edge (ei)); )
2636     {
2637       basic_block pred = e->src;
2638
2639       /* Do not duplicate BB into PRED if that is the last predecessor, or if
2640          we cannot merge a copy of BB with PRED.  */
2641       if (single_pred_p (bb)
2642           || !single_succ_p (pred)
2643           || e->flags & EDGE_COMPLEX
2644           || pred->index < NUM_FIXED_BLOCKS
2645           || (JUMP_P (BB_END (pred)) && !simplejump_p (BB_END (pred)))
2646           || (JUMP_P (BB_END (pred)) && CROSSING_JUMP_P (BB_END (pred))))
2647         {
2648           ei_next (&ei);
2649           continue;
2650         }
2651
2652       if (dump_file)
2653         fprintf (dump_file, "Duplicating computed goto bb %d into bb %d\n",
2654                  bb->index, e->src->index);
2655
2656       /* Remember if PRED can be duplicated; if so, the copy of BB merged
2657          with PRED can be duplicated as well.  */
2658       bool can_dup_more = can_duplicate_block_p (pred);
2659
2660       /* Make a copy of BB, merge it into PRED.  */
2661       basic_block copy = duplicate_block (bb, e, NULL);
2662       emit_barrier_after_bb (copy);
2663       reorder_insns_nobb (BB_HEAD (copy), BB_END (copy), BB_END (pred));
2664       merge_blocks (pred, copy);
2665
2666       changed = true;
2667
2668       /* Try to merge the resulting merged PRED into further predecessors.  */
2669       if (can_dup_more)
2670         maybe_duplicate_computed_goto (pred, max_size);
2671     }
2672
2673   return changed;
2674 }
2675
2676 /* Duplicate the blocks containing computed gotos.  This basically unfactors
2677    computed gotos that were factored early on in the compilation process to
2678    speed up edge based data flow.  We used to not unfactor them again, which
2679    can seriously pessimize code with many computed jumps in the source code,
2680    such as interpreters.  See e.g. PR15242.  */
2681 static void
2682 duplicate_computed_gotos (function *fun)
2683 {
2684   /* We are estimating the length of uncond jump insn only once
2685      since the code for getting the insn length always returns
2686      the minimal length now.  */
2687   if (uncond_jump_length == 0)
2688     uncond_jump_length = get_uncond_jump_length ();
2689
2690   /* Never copy a block larger than this.  */
2691   int max_size
2692     = uncond_jump_length * PARAM_VALUE (PARAM_MAX_GOTO_DUPLICATION_INSNS);
2693
2694   bool changed = false;
2695
2696   /* Try to duplicate all blocks that end in a computed jump and that
2697      can be duplicated at all.  */
2698   basic_block bb;
2699   FOR_EACH_BB_FN (bb, fun)
2700     if (computed_jump_p (BB_END (bb)) && can_duplicate_block_p (bb))
2701       changed |= maybe_duplicate_computed_goto (bb, max_size);
2702
2703   /* Duplicating blocks will redirect edges and may cause hot blocks
2704     previously reached by both hot and cold blocks to become dominated
2705     only by cold blocks.  */
2706   if (changed)
2707     fixup_partitions ();
2708 }
2709
2710 namespace {
2711
2712 const pass_data pass_data_duplicate_computed_gotos =
2713 {
2714   RTL_PASS, /* type */
2715   "compgotos", /* name */
2716   OPTGROUP_NONE, /* optinfo_flags */
2717   TV_REORDER_BLOCKS, /* tv_id */
2718   0, /* properties_required */
2719   0, /* properties_provided */
2720   0, /* properties_destroyed */
2721   0, /* todo_flags_start */
2722   0, /* todo_flags_finish */
2723 };
2724
2725 class pass_duplicate_computed_gotos : public rtl_opt_pass
2726 {
2727 public:
2728   pass_duplicate_computed_gotos (gcc::context *ctxt)
2729     : rtl_opt_pass (pass_data_duplicate_computed_gotos, ctxt)
2730   {}
2731
2732   /* opt_pass methods: */
2733   virtual bool gate (function *);
2734   virtual unsigned int execute (function *);
2735
2736 }; // class pass_duplicate_computed_gotos
2737
2738 bool
2739 pass_duplicate_computed_gotos::gate (function *fun)
2740 {
2741   if (targetm.cannot_modify_jumps_p ())
2742     return false;
2743   return (optimize > 0
2744           && flag_expensive_optimizations
2745           && ! optimize_function_for_size_p (fun));
2746 }
2747
2748 unsigned int
2749 pass_duplicate_computed_gotos::execute (function *fun)
2750 {
2751   duplicate_computed_gotos (fun);
2752
2753   return 0;
2754 }
2755
2756 } // anon namespace
2757
2758 rtl_opt_pass *
2759 make_pass_duplicate_computed_gotos (gcc::context *ctxt)
2760 {
2761   return new pass_duplicate_computed_gotos (ctxt);
2762 }
2763
2764 /* This function is the main 'entrance' for the optimization that
2765    partitions hot and cold basic blocks into separate sections of the
2766    .o file (to improve performance and cache locality).  Ideally it
2767    would be called after all optimizations that rearrange the CFG have
2768    been called.  However part of this optimization may introduce new
2769    register usage, so it must be called before register allocation has
2770    occurred.  This means that this optimization is actually called
2771    well before the optimization that reorders basic blocks (see
2772    function above).
2773
2774    This optimization checks the feedback information to determine
2775    which basic blocks are hot/cold, updates flags on the basic blocks
2776    to indicate which section they belong in.  This information is
2777    later used for writing out sections in the .o file.  Because hot
2778    and cold sections can be arbitrarily large (within the bounds of
2779    memory), far beyond the size of a single function, it is necessary
2780    to fix up all edges that cross section boundaries, to make sure the
2781    instructions used can actually span the required distance.  The
2782    fixes are described below.
2783
2784    Fall-through edges must be changed into jumps; it is not safe or
2785    legal to fall through across a section boundary.  Whenever a
2786    fall-through edge crossing a section boundary is encountered, a new
2787    basic block is inserted (in the same section as the fall-through
2788    source), and the fall through edge is redirected to the new basic
2789    block.  The new basic block contains an unconditional jump to the
2790    original fall-through target.  (If the unconditional jump is
2791    insufficient to cross section boundaries, that is dealt with a
2792    little later, see below).
2793
2794    In order to deal with architectures that have short conditional
2795    branches (which cannot span all of memory) we take any conditional
2796    jump that attempts to cross a section boundary and add a level of
2797    indirection: it becomes a conditional jump to a new basic block, in
2798    the same section.  The new basic block contains an unconditional
2799    jump to the original target, in the other section.
2800
2801    For those architectures whose unconditional branch is also
2802    incapable of reaching all of memory, those unconditional jumps are
2803    converted into indirect jumps, through a register.
2804
2805    IMPORTANT NOTE: This optimization causes some messy interactions
2806    with the cfg cleanup optimizations; those optimizations want to
2807    merge blocks wherever possible, and to collapse indirect jump
2808    sequences (change "A jumps to B jumps to C" directly into "A jumps
2809    to C").  Those optimizations can undo the jump fixes that
2810    partitioning is required to make (see above), in order to ensure
2811    that jumps attempting to cross section boundaries are really able
2812    to cover whatever distance the jump requires (on many architectures
2813    conditional or unconditional jumps are not able to reach all of
2814    memory).  Therefore tests have to be inserted into each such
2815    optimization to make sure that it does not undo stuff necessary to
2816    cross partition boundaries.  This would be much less of a problem
2817    if we could perform this optimization later in the compilation, but
2818    unfortunately the fact that we may need to create indirect jumps
2819    (through registers) requires that this optimization be performed
2820    before register allocation.
2821
2822    Hot and cold basic blocks are partitioned and put in separate
2823    sections of the .o file, to reduce paging and improve cache
2824    performance (hopefully).  This can result in bits of code from the
2825    same function being widely separated in the .o file.  However this
2826    is not obvious to the current bb structure.  Therefore we must take
2827    care to ensure that: 1). There are no fall_thru edges that cross
2828    between sections; 2). For those architectures which have "short"
2829    conditional branches, all conditional branches that attempt to
2830    cross between sections are converted to unconditional branches;
2831    and, 3). For those architectures which have "short" unconditional
2832    branches, all unconditional branches that attempt to cross between
2833    sections are converted to indirect jumps.
2834
2835    The code for fixing up fall_thru edges that cross between hot and
2836    cold basic blocks does so by creating new basic blocks containing
2837    unconditional branches to the appropriate label in the "other"
2838    section.  The new basic block is then put in the same (hot or cold)
2839    section as the original conditional branch, and the fall_thru edge
2840    is modified to fall into the new basic block instead.  By adding
2841    this level of indirection we end up with only unconditional branches
2842    crossing between hot and cold sections.
2843
2844    Conditional branches are dealt with by adding a level of indirection.
2845    A new basic block is added in the same (hot/cold) section as the
2846    conditional branch, and the conditional branch is retargeted to the
2847    new basic block.  The new basic block contains an unconditional branch
2848    to the original target of the conditional branch (in the other section).
2849
2850    Unconditional branches are dealt with by converting them into
2851    indirect jumps.  */
2852
2853 namespace {
2854
2855 const pass_data pass_data_partition_blocks =
2856 {
2857   RTL_PASS, /* type */
2858   "bbpart", /* name */
2859   OPTGROUP_NONE, /* optinfo_flags */
2860   TV_REORDER_BLOCKS, /* tv_id */
2861   PROP_cfglayout, /* properties_required */
2862   0, /* properties_provided */
2863   0, /* properties_destroyed */
2864   0, /* todo_flags_start */
2865   0, /* todo_flags_finish */
2866 };
2867
2868 class pass_partition_blocks : public rtl_opt_pass
2869 {
2870 public:
2871   pass_partition_blocks (gcc::context *ctxt)
2872     : rtl_opt_pass (pass_data_partition_blocks, ctxt)
2873   {}
2874
2875   /* opt_pass methods: */
2876   virtual bool gate (function *);
2877   virtual unsigned int execute (function *);
2878
2879 }; // class pass_partition_blocks
2880
2881 bool
2882 pass_partition_blocks::gate (function *fun)
2883 {
2884   /* The optimization to partition hot/cold basic blocks into separate
2885      sections of the .o file does not work well with linkonce or with
2886      user defined section attributes.  Don't call it if either case
2887      arises.  */
2888   return (flag_reorder_blocks_and_partition
2889           && optimize
2890           /* See pass_reorder_blocks::gate.  We should not partition if
2891              we are going to omit the reordering.  */
2892           && optimize_function_for_speed_p (fun)
2893           && !DECL_COMDAT_GROUP (current_function_decl)
2894           && !lookup_attribute ("section", DECL_ATTRIBUTES (fun->decl)));
2895 }
2896
2897 unsigned
2898 pass_partition_blocks::execute (function *fun)
2899 {
2900   vec<edge> crossing_edges;
2901
2902   if (n_basic_blocks_for_fn (fun) <= NUM_FIXED_BLOCKS + 1)
2903     return 0;
2904
2905   df_set_flags (DF_DEFER_INSN_RESCAN);
2906
2907   crossing_edges = find_rarely_executed_basic_blocks_and_crossing_edges ();
2908   if (!crossing_edges.exists ())
2909     /* Make sure to process deferred rescans and clear changeable df flags.  */
2910     return TODO_df_finish;
2911
2912   crtl->has_bb_partition = true;
2913
2914   /* Make sure the source of any crossing edge ends in a jump and the
2915      destination of any crossing edge has a label.  */
2916   add_labels_and_missing_jumps (crossing_edges);
2917
2918   /* Convert all crossing fall_thru edges to non-crossing fall
2919      thrus to unconditional jumps (that jump to the original fall
2920      through dest).  */
2921   fix_up_fall_thru_edges ();
2922
2923   /* If the architecture does not have conditional branches that can
2924      span all of memory, convert crossing conditional branches into
2925      crossing unconditional branches.  */
2926   if (!HAS_LONG_COND_BRANCH)
2927     fix_crossing_conditional_branches ();
2928
2929   /* If the architecture does not have unconditional branches that
2930      can span all of memory, convert crossing unconditional branches
2931      into indirect jumps.  Since adding an indirect jump also adds
2932      a new register usage, update the register usage information as
2933      well.  */
2934   if (!HAS_LONG_UNCOND_BRANCH)
2935     fix_crossing_unconditional_branches ();
2936
2937   update_crossing_jump_flags ();
2938
2939   /* Clear bb->aux fields that the above routines were using.  */
2940   clear_aux_for_blocks ();
2941
2942   crossing_edges.release ();
2943
2944   /* ??? FIXME: DF generates the bb info for a block immediately.
2945      And by immediately, I mean *during* creation of the block.
2946
2947         #0  df_bb_refs_collect
2948         #1  in df_bb_refs_record
2949         #2  in create_basic_block_structure
2950
2951      Which means that the bb_has_eh_pred test in df_bb_refs_collect
2952      will *always* fail, because no edges can have been added to the
2953      block yet.  Which of course means we don't add the right
2954      artificial refs, which means we fail df_verify (much) later.
2955
2956      Cleanest solution would seem to make DF_DEFER_INSN_RESCAN imply
2957      that we also shouldn't grab data from the new blocks those new
2958      insns are in either.  In this way one can create the block, link
2959      it up properly, and have everything Just Work later, when deferred
2960      insns are processed.
2961
2962      In the meantime, we have no other option but to throw away all
2963      of the DF data and recompute it all.  */
2964   if (fun->eh->lp_array)
2965     {
2966       df_finish_pass (true);
2967       df_scan_alloc (NULL);
2968       df_scan_blocks ();
2969       /* Not all post-landing pads use all of the EH_RETURN_DATA_REGNO
2970          data.  We blindly generated all of them when creating the new
2971          landing pad.  Delete those assignments we don't use.  */
2972       df_set_flags (DF_LR_RUN_DCE);
2973       df_analyze ();
2974     }
2975
2976   /* Make sure to process deferred rescans and clear changeable df flags.  */
2977   return TODO_df_finish;
2978 }
2979
2980 } // anon namespace
2981
2982 rtl_opt_pass *
2983 make_pass_partition_blocks (gcc::context *ctxt)
2984 {
2985   return new pass_partition_blocks (ctxt);
2986 }