gcc/cfgloopanal.c

   1 /* Natural loop analysis code for GNU compiler.
   2    Copyright (C) 2002-2015 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it under
   7 the terms of the GNU General Public License as published by the Free
   8 Software Foundation; either version 3, or (at your option) any later
   9 version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 #include "config.h"
  21 #include "system.h"
  22 #include "coretypes.h"
  23 #include "tm.h"
  24 #include "rtl.h"
  25 #include "hard-reg-set.h"
  26 #include "obstack.h"
  27 #include "predict.h"
  28 #include "function.h"
  29 #include "dominance.h"
  30 #include "cfg.h"
  31 #include "basic-block.h"
  32 #include "cfgloop.h"
  33 #include "symtab.h"
  34 #include "flags.h"
  35 #include "alias.h"
  36 #include "tree.h"
  37 #include "insn-config.h"
  38 #include "expmed.h"
  39 #include "dojump.h"
  40 #include "explow.h"
  41 #include "calls.h"
  42 #include "emit-rtl.h"
  43 #include "varasm.h"
  44 #include "stmt.h"
  45 #include "expr.h"
  46 #include "graphds.h"
  47 #include "params.h"
  48
  49 struct target_cfgloop default_target_cfgloop;
  50 #if SWITCHABLE_TARGET
  51 struct target_cfgloop *this_target_cfgloop = &default_target_cfgloop;
  52 #endif
  53
  54 /* Checks whether BB is executed exactly once in each LOOP iteration.  */
  55
  56 bool
  57 just_once_each_iteration_p (const struct loop *loop, const_basic_block bb)
  58 {
  59   /* It must be executed at least once each iteration.  */
  60   if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
  61     return false;
  62
  63   /* And just once.  */
  64   if (bb->loop_father != loop)
  65     return false;
  66
  67   /* But this was not enough.  We might have some irreducible loop here.  */
  68   if (bb->flags & BB_IRREDUCIBLE_LOOP)
  69     return false;
  70
  71   return true;
  72 }
  73
  74 /* Marks blocks and edges that are part of non-recognized loops; i.e. we
  75    throw away all latch edges and mark blocks inside any remaining cycle.
  76    Everything is a bit complicated due to fact we do not want to do this
  77    for parts of cycles that only "pass" through some loop -- i.e. for
  78    each cycle, we want to mark blocks that belong directly to innermost
  79    loop containing the whole cycle.
  80
  81    LOOPS is the loop tree.  */
  82
  83 #define LOOP_REPR(LOOP) ((LOOP)->num + last_basic_block_for_fn (cfun))
  84 #define BB_REPR(BB) ((BB)->index + 1)
  85
  86 bool
  87 mark_irreducible_loops (void)
  88 {
  89   basic_block act;
  90   struct graph_edge *ge;
  91   edge e;
  92   edge_iterator ei;
  93   int src, dest;
  94   unsigned depth;
  95   struct graph *g;
  96   int num = number_of_loops (cfun);
  97   struct loop *cloop;
  98   bool irred_loop_found = false;
  99   int i;
 100
 101   gcc_assert (current_loops != NULL);
 102
 103   /* Reset the flags.  */
 104   FOR_BB_BETWEEN (act, ENTRY_BLOCK_PTR_FOR_FN (cfun),
 105                   EXIT_BLOCK_PTR_FOR_FN (cfun), next_bb)
 106     {
 107       act->flags &= ~BB_IRREDUCIBLE_LOOP;
 108       FOR_EACH_EDGE (e, ei, act->succs)
 109         e->flags &= ~EDGE_IRREDUCIBLE_LOOP;
 110     }
 111
 112   /* Create the edge lists.  */
 113   g = new_graph (last_basic_block_for_fn (cfun) + num);
 114
 115   FOR_BB_BETWEEN (act, ENTRY_BLOCK_PTR_FOR_FN (cfun),
 116                   EXIT_BLOCK_PTR_FOR_FN (cfun), next_bb)
 117     FOR_EACH_EDGE (e, ei, act->succs)
 118       {
 119         /* Ignore edges to exit.  */
 120         if (e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun))
 121           continue;
 122
 123         src = BB_REPR (act);
 124         dest = BB_REPR (e->dest);
 125
 126         /* Ignore latch edges.  */
 127         if (e->dest->loop_father->header == e->dest
 128             && e->dest->loop_father->latch == act)
 129           continue;
 130
 131         /* Edges inside a single loop should be left where they are.  Edges
 132            to subloop headers should lead to representative of the subloop,
 133            but from the same place.
 134
 135            Edges exiting loops should lead from representative
 136            of the son of nearest common ancestor of the loops in that
 137            act lays.  */
 138
 139         if (e->dest->loop_father->header == e->dest)
 140           dest = LOOP_REPR (e->dest->loop_father);
 141
 142         if (!flow_bb_inside_loop_p (act->loop_father, e->dest))
 143           {
 144             depth = 1 + loop_depth (find_common_loop (act->loop_father,
 145                                                       e->dest->loop_father));
 146             if (depth == loop_depth (act->loop_father))
 147               cloop = act->loop_father;
 148             else
 149               cloop = (*act->loop_father->superloops)[depth];
 150
 151             src = LOOP_REPR (cloop);
 152           }
 153
 154         add_edge (g, src, dest)->data = e;
 155       }
 156
 157   /* Find the strongly connected components.  */
 158   graphds_scc (g, NULL);
 159
 160   /* Mark the irreducible loops.  */
 161   for (i = 0; i < g->n_vertices; i++)
 162     for (ge = g->vertices[i].succ; ge; ge = ge->succ_next)
 163       {
 164         edge real = (edge) ge->data;
 165         /* edge E in graph G is irreducible if it connects two vertices in the
 166            same scc.  */
 167
 168         /* All edges should lead from a component with higher number to the
 169            one with lower one.  */
 170         gcc_assert (g->vertices[ge->src].component >= g->vertices[ge->dest].component);
 171
 172         if (g->vertices[ge->src].component != g->vertices[ge->dest].component)
 173           continue;
 174
 175         real->flags |= EDGE_IRREDUCIBLE_LOOP;
 176         irred_loop_found = true;
 177         if (flow_bb_inside_loop_p (real->src->loop_father, real->dest))
 178           real->src->flags |= BB_IRREDUCIBLE_LOOP;
 179       }
 180
 181   free_graph (g);
 182
 183   loops_state_set (LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS);
 184   return irred_loop_found;
 185 }
 186
 187 /* Counts number of insns inside LOOP.  */
 188 int
 189 num_loop_insns (const struct loop *loop)
 190 {
 191   basic_block *bbs, bb;
 192   unsigned i, ninsns = 0;
 193   rtx_insn *insn;
 194
 195   bbs = get_loop_body (loop);
 196   for (i = 0; i < loop->num_nodes; i++)
 197     {
 198       bb = bbs[i];
 199       FOR_BB_INSNS (bb, insn)
 200         if (NONDEBUG_INSN_P (insn))
 201           ninsns++;
 202     }
 203   free (bbs);
 204
 205   if (!ninsns)
 206     ninsns = 1; /* To avoid division by zero.  */
 207
 208   return ninsns;
 209 }
 210
 211 /* Counts number of insns executed on average per iteration LOOP.  */
 212 int
 213 average_num_loop_insns (const struct loop *loop)
 214 {
 215   basic_block *bbs, bb;
 216   unsigned i, binsns, ninsns, ratio;
 217   rtx_insn *insn;
 218
 219   ninsns = 0;
 220   bbs = get_loop_body (loop);
 221   for (i = 0; i < loop->num_nodes; i++)
 222     {
 223       bb = bbs[i];
 224
 225       binsns = 0;
 226       FOR_BB_INSNS (bb, insn)
 227         if (NONDEBUG_INSN_P (insn))
 228           binsns++;
 229
 230       ratio = loop->header->frequency == 0
 231               ? BB_FREQ_MAX
 232               : (bb->frequency * BB_FREQ_MAX) / loop->header->frequency;
 233       ninsns += binsns * ratio;
 234     }
 235   free (bbs);
 236
 237   ninsns /= BB_FREQ_MAX;
 238   if (!ninsns)
 239     ninsns = 1; /* To avoid division by zero.  */
 240
 241   return ninsns;
 242 }
 243
 244 /* Returns expected number of iterations of LOOP, according to
 245    measured or guessed profile.  No bounding is done on the
 246    value.  */
 247
 248 gcov_type
 249 expected_loop_iterations_unbounded (const struct loop *loop)
 250 {
 251   edge e;
 252   edge_iterator ei;
 253
 254   if (loop->latch->count || loop->header->count)
 255     {
 256       gcov_type count_in, count_latch, expected;
 257
 258       count_in = 0;
 259       count_latch = 0;
 260
 261       FOR_EACH_EDGE (e, ei, loop->header->preds)
 262         if (e->src == loop->latch)
 263           count_latch = e->count;
 264         else
 265           count_in += e->count;
 266
 267       if (count_in == 0)
 268         expected = count_latch * 2;
 269       else
 270         expected = (count_latch + count_in - 1) / count_in;
 271
 272       return expected;
 273     }
 274   else
 275     {
 276       int freq_in, freq_latch;
 277
 278       freq_in = 0;
 279       freq_latch = 0;
 280
 281       FOR_EACH_EDGE (e, ei, loop->header->preds)
 282         if (e->src == loop->latch)
 283           freq_latch = EDGE_FREQUENCY (e);
 284         else
 285           freq_in += EDGE_FREQUENCY (e);
 286
 287       if (freq_in == 0)
 288         return freq_latch * 2;
 289
 290       return (freq_latch + freq_in - 1) / freq_in;
 291     }
 292 }
 293
 294 /* Returns expected number of LOOP iterations.  The returned value is bounded
 295    by REG_BR_PROB_BASE.  */
 296
 297 unsigned
 298 expected_loop_iterations (const struct loop *loop)
 299 {
 300   gcov_type expected = expected_loop_iterations_unbounded (loop);
 301   return (expected > REG_BR_PROB_BASE ? REG_BR_PROB_BASE : expected);
 302 }
 303
 304 /* Returns the maximum level of nesting of subloops of LOOP.  */
 305
 306 unsigned
 307 get_loop_level (const struct loop *loop)
 308 {
 309   const struct loop *ploop;
 310   unsigned mx = 0, l;
 311
 312   for (ploop = loop->inner; ploop; ploop = ploop->next)
 313     {
 314       l = get_loop_level (ploop);
 315       if (l >= mx)
 316         mx = l + 1;
 317     }
 318   return mx;
 319 }
 320
 321 /* Initialize the constants for computing set costs.  */
 322
 323 void
 324 init_set_costs (void)
 325 {
 326   int speed;
 327   rtx_insn *seq;
 328   rtx reg1 = gen_raw_REG (SImode, LAST_VIRTUAL_REGISTER + 1);
 329   rtx reg2 = gen_raw_REG (SImode, LAST_VIRTUAL_REGISTER + 2);
 330   rtx addr = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 3);
 331   rtx mem = validize_mem (gen_rtx_MEM (SImode, addr));
 332   unsigned i;
 333
 334   target_avail_regs = 0;
 335   target_clobbered_regs = 0;
 336   for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
 337     if (TEST_HARD_REG_BIT (reg_class_contents[GENERAL_REGS], i)
 338         && !fixed_regs[i])
 339       {
 340         target_avail_regs++;
 341         if (call_used_regs[i])
 342           target_clobbered_regs++;
 343       }
 344
 345   target_res_regs = 3;
 346
 347   for (speed = 0; speed < 2; speed++)
 348      {
 349       crtl->maybe_hot_insn_p = speed;
 350       /* Set up the costs for using extra registers:
 351
 352          1) If not many free registers remain, we should prefer having an
 353             additional move to decreasing the number of available registers.
 354             (TARGET_REG_COST).
 355          2) If no registers are available, we need to spill, which may require
 356             storing the old value to memory and loading it back
 357             (TARGET_SPILL_COST).  */
 358
 359       start_sequence ();
 360       emit_move_insn (reg1, reg2);
 361       seq = get_insns ();
 362       end_sequence ();
 363       target_reg_cost [speed] = seq_cost (seq, speed);
 364
 365       start_sequence ();
 366       emit_move_insn (mem, reg1);
 367       emit_move_insn (reg2, mem);
 368       seq = get_insns ();
 369       end_sequence ();
 370       target_spill_cost [speed] = seq_cost (seq, speed);
 371     }
 372   default_rtl_profile ();
 373 }
 374
 375 /* Estimates cost of increased register pressure caused by making N_NEW new
 376    registers live around the loop.  N_OLD is the number of registers live
 377    around the loop.  If CALL_P is true, also take into account that
 378    call-used registers may be clobbered in the loop body, reducing the
 379    number of available registers before we spill.  */
 380
 381 unsigned
 382 estimate_reg_pressure_cost (unsigned n_new, unsigned n_old, bool speed,
 383                             bool call_p)
 384 {
 385   unsigned cost;
 386   unsigned regs_needed = n_new + n_old;
 387   unsigned available_regs = target_avail_regs;
 388
 389   /* If there is a call in the loop body, the call-clobbered registers
 390      are not available for loop invariants.  */
 391   if (call_p)
 392     available_regs = available_regs - target_clobbered_regs;
 393
 394   /* If we have enough registers, we should use them and not restrict
 395      the transformations unnecessarily.  */
 396   if (regs_needed + target_res_regs <= available_regs)
 397     return 0;
 398
 399   if (regs_needed <= available_regs)
 400     /* If we are close to running out of registers, try to preserve
 401        them.  */
 402     cost = target_reg_cost [speed] * n_new;
 403   else
 404     /* If we run out of registers, it is very expensive to add another
 405        one.  */
 406     cost = target_spill_cost [speed] * n_new;
 407
 408   if (optimize && (flag_ira_region == IRA_REGION_ALL
 409                    || flag_ira_region == IRA_REGION_MIXED)
 410       && number_of_loops (cfun) <= (unsigned) IRA_MAX_LOOPS_NUM)
 411     /* IRA regional allocation deals with high register pressure
 412        better.  So decrease the cost (to do more accurate the cost
 413        calculation for IRA, we need to know how many registers lives
 414        through the loop transparently).  */
 415     cost /= 2;
 416
 417   return cost;
 418 }
 419
 420 /* Sets EDGE_LOOP_EXIT flag for all loop exits.  */
 421
 422 void
 423 mark_loop_exit_edges (void)
 424 {
 425   basic_block bb;
 426   edge e;
 427
 428   if (number_of_loops (cfun) <= 1)
 429     return;
 430
 431   FOR_EACH_BB_FN (bb, cfun)
 432     {
 433       edge_iterator ei;
 434
 435       FOR_EACH_EDGE (e, ei, bb->succs)
 436         {
 437           if (loop_outer (bb->loop_father)
 438               && loop_exit_edge_p (bb->loop_father, e))
 439             e->flags |= EDGE_LOOP_EXIT;
 440           else
 441             e->flags &= ~EDGE_LOOP_EXIT;
 442         }
 443     }
 444 }
 445
 446 /* Return exit edge if loop has only one exit that is likely
 447    to be executed on runtime (i.e. it is not EH or leading
 448    to noreturn call.  */
 449
 450 edge
 451 single_likely_exit (struct loop *loop)
 452 {
 453   edge found = single_exit (loop);
 454   vec<edge> exits;
 455   unsigned i;
 456   edge ex;
 457
 458   if (found)
 459     return found;
 460   exits = get_loop_exit_edges (loop);
 461   FOR_EACH_VEC_ELT (exits, i, ex)
 462     {
 463       if (ex->flags & (EDGE_EH | EDGE_ABNORMAL_CALL))
 464         continue;
 465       /* The constant of 5 is set in a way so noreturn calls are
 466          ruled out by this test.  The static branch prediction algorithm
 467          will not assign such a low probability to conditionals for usual
 468          reasons.  */
 469       if (profile_status_for_fn (cfun) != PROFILE_ABSENT
 470           && ex->probability < 5 && !ex->count)
 471         continue;
 472       if (!found)
 473         found = ex;
 474       else
 475         {
 476           exits.release ();
 477           return NULL;
 478         }
 479     }
 480   exits.release ();
 481   return found;
 482 }
 483
 484
 485 /* Gets basic blocks of a LOOP.  Header is the 0-th block, rest is in dfs
 486    order against direction of edges from latch.  Specially, if
 487    header != latch, latch is the 1-st block.  */
 488
 489 vec<basic_block>
 490 get_loop_hot_path (const struct loop *loop)
 491 {
 492   basic_block bb = loop->header;
 493   vec<basic_block> path = vNULL;
 494   bitmap visited = BITMAP_ALLOC (NULL);
 495
 496   while (true)
 497     {
 498       edge_iterator ei;
 499       edge e;
 500       edge best = NULL;
 501
 502       path.safe_push (bb);
 503       bitmap_set_bit (visited, bb->index);
 504       FOR_EACH_EDGE (e, ei, bb->succs)
 505         if ((!best || e->probability > best->probability)
 506             && !loop_exit_edge_p (loop, e)
 507             && !bitmap_bit_p (visited, e->dest->index))
 508           best = e;
 509       if (!best || best->dest == loop->header)
 510         break;
 511       bb = best->dest;
 512     }
 513   BITMAP_FREE (visited);
 514   return path;
 515 }