gcc/cfgloopanal.c

   1 /* Natural loop analysis code for GNU compiler.
   2    Copyright (C) 2002-2013 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it under
   7 the terms of the GNU General Public License as published by the Free
   8 Software Foundation; either version 3, or (at your option) any later
   9 version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 #include "config.h"
  21 #include "system.h"
  22 #include "coretypes.h"
  23 #include "tm.h"
  24 #include "rtl.h"
  25 #include "hard-reg-set.h"
  26 #include "obstack.h"
  27 #include "basic-block.h"
  28 #include "cfgloop.h"
  29 #include "expr.h"
  30 #include "graphds.h"
  31 #include "params.h"
  32
  33 struct target_cfgloop default_target_cfgloop;
  34 #if SWITCHABLE_TARGET
  35 struct target_cfgloop *this_target_cfgloop = &default_target_cfgloop;
  36 #endif
  37
  38 /* Checks whether BB is executed exactly once in each LOOP iteration.  */
  39
  40 bool
  41 just_once_each_iteration_p (const struct loop *loop, const_basic_block bb)
  42 {
  43   /* It must be executed at least once each iteration.  */
  44   if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
  45     return false;
  46
  47   /* And just once.  */
  48   if (bb->loop_father != loop)
  49     return false;
  50
  51   /* But this was not enough.  We might have some irreducible loop here.  */
  52   if (bb->flags & BB_IRREDUCIBLE_LOOP)
  53     return false;
  54
  55   return true;
  56 }
  57
  58 /* Marks blocks and edges that are part of non-recognized loops; i.e. we
  59    throw away all latch edges and mark blocks inside any remaining cycle.
  60    Everything is a bit complicated due to fact we do not want to do this
  61    for parts of cycles that only "pass" through some loop -- i.e. for
  62    each cycle, we want to mark blocks that belong directly to innermost
  63    loop containing the whole cycle.
  64
  65    LOOPS is the loop tree.  */
  66
  67 #define LOOP_REPR(LOOP) ((LOOP)->num + last_basic_block)
  68 #define BB_REPR(BB) ((BB)->index + 1)
  69
  70 bool
  71 mark_irreducible_loops (void)
  72 {
  73   basic_block act;
  74   struct graph_edge *ge;
  75   edge e;
  76   edge_iterator ei;
  77   int src, dest;
  78   unsigned depth;
  79   struct graph *g;
  80   int num = number_of_loops (cfun);
  81   struct loop *cloop;
  82   bool irred_loop_found = false;
  83   int i;
  84
  85   gcc_assert (current_loops != NULL);
  86
  87   /* Reset the flags.  */
  88   FOR_BB_BETWEEN (act, ENTRY_BLOCK_PTR, EXIT_BLOCK_PTR, next_bb)
  89     {
  90       act->flags &= ~BB_IRREDUCIBLE_LOOP;
  91       FOR_EACH_EDGE (e, ei, act->succs)
  92         e->flags &= ~EDGE_IRREDUCIBLE_LOOP;
  93     }
  94
  95   /* Create the edge lists.  */
  96   g = new_graph (last_basic_block + num);
  97
  98   FOR_BB_BETWEEN (act, ENTRY_BLOCK_PTR, EXIT_BLOCK_PTR, next_bb)
  99     FOR_EACH_EDGE (e, ei, act->succs)
 100       {
 101         /* Ignore edges to exit.  */
 102         if (e->dest == EXIT_BLOCK_PTR)
 103           continue;
 104
 105         src = BB_REPR (act);
 106         dest = BB_REPR (e->dest);
 107
 108         /* Ignore latch edges.  */
 109         if (e->dest->loop_father->header == e->dest
 110             && e->dest->loop_father->latch == act)
 111           continue;
 112
 113         /* Edges inside a single loop should be left where they are.  Edges
 114            to subloop headers should lead to representative of the subloop,
 115            but from the same place.
 116
 117            Edges exiting loops should lead from representative
 118            of the son of nearest common ancestor of the loops in that
 119            act lays.  */
 120
 121         if (e->dest->loop_father->header == e->dest)
 122           dest = LOOP_REPR (e->dest->loop_father);
 123
 124         if (!flow_bb_inside_loop_p (act->loop_father, e->dest))
 125           {
 126             depth = 1 + loop_depth (find_common_loop (act->loop_father,
 127                                                       e->dest->loop_father));
 128             if (depth == loop_depth (act->loop_father))
 129               cloop = act->loop_father;
 130             else
 131               cloop = (*act->loop_father->superloops)[depth];
 132
 133             src = LOOP_REPR (cloop);
 134           }
 135
 136         add_edge (g, src, dest)->data = e;
 137       }
 138
 139   /* Find the strongly connected components.  */
 140   graphds_scc (g, NULL);
 141
 142   /* Mark the irreducible loops.  */
 143   for (i = 0; i < g->n_vertices; i++)
 144     for (ge = g->vertices[i].succ; ge; ge = ge->succ_next)
 145       {
 146         edge real = (edge) ge->data;
 147         /* edge E in graph G is irreducible if it connects two vertices in the
 148            same scc.  */
 149
 150         /* All edges should lead from a component with higher number to the
 151            one with lower one.  */
 152         gcc_assert (g->vertices[ge->src].component >= g->vertices[ge->dest].component);
 153
 154         if (g->vertices[ge->src].component != g->vertices[ge->dest].component)
 155           continue;
 156
 157         real->flags |= EDGE_IRREDUCIBLE_LOOP;
 158         irred_loop_found = true;
 159         if (flow_bb_inside_loop_p (real->src->loop_father, real->dest))
 160           real->src->flags |= BB_IRREDUCIBLE_LOOP;
 161       }
 162
 163   free_graph (g);
 164
 165   loops_state_set (LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS);
 166   return irred_loop_found;
 167 }
 168
 169 /* Counts number of insns inside LOOP.  */
 170 int
 171 num_loop_insns (const struct loop *loop)
 172 {
 173   basic_block *bbs, bb;
 174   unsigned i, ninsns = 0;
 175   rtx insn;
 176
 177   bbs = get_loop_body (loop);
 178   for (i = 0; i < loop->num_nodes; i++)
 179     {
 180       bb = bbs[i];
 181       FOR_BB_INSNS (bb, insn)
 182         if (NONDEBUG_INSN_P (insn))
 183           ninsns++;
 184     }
 185   free (bbs);
 186
 187   if (!ninsns)
 188     ninsns = 1; /* To avoid division by zero.  */
 189
 190   return ninsns;
 191 }
 192
 193 /* Counts number of insns executed on average per iteration LOOP.  */
 194 int
 195 average_num_loop_insns (const struct loop *loop)
 196 {
 197   basic_block *bbs, bb;
 198   unsigned i, binsns, ninsns, ratio;
 199   rtx insn;
 200
 201   ninsns = 0;
 202   bbs = get_loop_body (loop);
 203   for (i = 0; i < loop->num_nodes; i++)
 204     {
 205       bb = bbs[i];
 206
 207       binsns = 0;
 208       FOR_BB_INSNS (bb, insn)
 209         if (NONDEBUG_INSN_P (insn))
 210           binsns++;
 211
 212       ratio = loop->header->frequency == 0
 213               ? BB_FREQ_MAX
 214               : (bb->frequency * BB_FREQ_MAX) / loop->header->frequency;
 215       ninsns += binsns * ratio;
 216     }
 217   free (bbs);
 218
 219   ninsns /= BB_FREQ_MAX;
 220   if (!ninsns)
 221     ninsns = 1; /* To avoid division by zero.  */
 222
 223   return ninsns;
 224 }
 225
 226 /* Returns expected number of iterations of LOOP, according to
 227    measured or guessed profile.  No bounding is done on the
 228    value.  */
 229
 230 gcov_type
 231 expected_loop_iterations_unbounded (const struct loop *loop)
 232 {
 233   edge e;
 234   edge_iterator ei;
 235
 236   if (loop->latch->count || loop->header->count)
 237     {
 238       gcov_type count_in, count_latch, expected;
 239
 240       count_in = 0;
 241       count_latch = 0;
 242
 243       FOR_EACH_EDGE (e, ei, loop->header->preds)
 244         if (e->src == loop->latch)
 245           count_latch = e->count;
 246         else
 247           count_in += e->count;
 248
 249       if (count_in == 0)
 250         expected = count_latch * 2;
 251       else
 252         expected = (count_latch + count_in - 1) / count_in;
 253
 254       return expected;
 255     }
 256   else
 257     {
 258       int freq_in, freq_latch;
 259
 260       freq_in = 0;
 261       freq_latch = 0;
 262
 263       FOR_EACH_EDGE (e, ei, loop->header->preds)
 264         if (e->src == loop->latch)
 265           freq_latch = EDGE_FREQUENCY (e);
 266         else
 267           freq_in += EDGE_FREQUENCY (e);
 268
 269       if (freq_in == 0)
 270         return freq_latch * 2;
 271
 272       return (freq_latch + freq_in - 1) / freq_in;
 273     }
 274 }
 275
 276 /* Returns expected number of LOOP iterations.  The returned value is bounded
 277    by REG_BR_PROB_BASE.  */
 278
 279 unsigned
 280 expected_loop_iterations (const struct loop *loop)
 281 {
 282   gcov_type expected = expected_loop_iterations_unbounded (loop);
 283   return (expected > REG_BR_PROB_BASE ? REG_BR_PROB_BASE : expected);
 284 }
 285
 286 /* Returns the maximum level of nesting of subloops of LOOP.  */
 287
 288 unsigned
 289 get_loop_level (const struct loop *loop)
 290 {
 291   const struct loop *ploop;
 292   unsigned mx = 0, l;
 293
 294   for (ploop = loop->inner; ploop; ploop = ploop->next)
 295     {
 296       l = get_loop_level (ploop);
 297       if (l >= mx)
 298         mx = l + 1;
 299     }
 300   return mx;
 301 }
 302
 303 /* Returns estimate on cost of computing SEQ.  */
 304
 305 static unsigned
 306 seq_cost (const_rtx seq, bool speed)
 307 {
 308   unsigned cost = 0;
 309   rtx set;
 310
 311   for (; seq; seq = NEXT_INSN (seq))
 312     {
 313       set = single_set (seq);
 314       if (set)
 315         cost += set_rtx_cost (set, speed);
 316       else
 317         cost++;
 318     }
 319
 320   return cost;
 321 }
 322
 323 /* Initialize the constants for computing set costs.  */
 324
 325 void
 326 init_set_costs (void)
 327 {
 328   int speed;
 329   rtx seq;
 330   rtx reg1 = gen_raw_REG (SImode, FIRST_PSEUDO_REGISTER);
 331   rtx reg2 = gen_raw_REG (SImode, FIRST_PSEUDO_REGISTER + 1);
 332   rtx addr = gen_raw_REG (Pmode, FIRST_PSEUDO_REGISTER + 2);
 333   rtx mem = validize_mem (gen_rtx_MEM (SImode, addr));
 334   unsigned i;
 335
 336   target_avail_regs = 0;
 337   target_clobbered_regs = 0;
 338   for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
 339     if (TEST_HARD_REG_BIT (reg_class_contents[GENERAL_REGS], i)
 340         && !fixed_regs[i])
 341       {
 342         target_avail_regs++;
 343         if (call_used_regs[i])
 344           target_clobbered_regs++;
 345       }
 346
 347   target_res_regs = 3;
 348
 349   for (speed = 0; speed < 2; speed++)
 350      {
 351       crtl->maybe_hot_insn_p = speed;
 352       /* Set up the costs for using extra registers:
 353
 354          1) If not many free registers remain, we should prefer having an
 355             additional move to decreasing the number of available registers.
 356             (TARGET_REG_COST).
 357          2) If no registers are available, we need to spill, which may require
 358             storing the old value to memory and loading it back
 359             (TARGET_SPILL_COST).  */
 360
 361       start_sequence ();
 362       emit_move_insn (reg1, reg2);
 363       seq = get_insns ();
 364       end_sequence ();
 365       target_reg_cost [speed] = seq_cost (seq, speed);
 366
 367       start_sequence ();
 368       emit_move_insn (mem, reg1);
 369       emit_move_insn (reg2, mem);
 370       seq = get_insns ();
 371       end_sequence ();
 372       target_spill_cost [speed] = seq_cost (seq, speed);
 373     }
 374   default_rtl_profile ();
 375 }
 376
 377 /* Estimates cost of increased register pressure caused by making N_NEW new
 378    registers live around the loop.  N_OLD is the number of registers live
 379    around the loop.  If CALL_P is true, also take into account that
 380    call-used registers may be clobbered in the loop body, reducing the
 381    number of available registers before we spill.  */
 382
 383 unsigned
 384 estimate_reg_pressure_cost (unsigned n_new, unsigned n_old, bool speed,
 385                             bool call_p)
 386 {
 387   unsigned cost;
 388   unsigned regs_needed = n_new + n_old;
 389   unsigned available_regs = target_avail_regs;
 390
 391   /* If there is a call in the loop body, the call-clobbered registers
 392      are not available for loop invariants.  */
 393   if (call_p)
 394     available_regs = available_regs - target_clobbered_regs;
 395
 396   /* If we have enough registers, we should use them and not restrict
 397      the transformations unnecessarily.  */
 398   if (regs_needed + target_res_regs <= available_regs)
 399     return 0;
 400
 401   if (regs_needed <= available_regs)
 402     /* If we are close to running out of registers, try to preserve
 403        them.  */
 404     cost = target_reg_cost [speed] * n_new;
 405   else
 406     /* If we run out of registers, it is very expensive to add another
 407        one.  */
 408     cost = target_spill_cost [speed] * n_new;
 409
 410   if (optimize && (flag_ira_region == IRA_REGION_ALL
 411                    || flag_ira_region == IRA_REGION_MIXED)
 412       && number_of_loops (cfun) <= (unsigned) IRA_MAX_LOOPS_NUM)
 413     /* IRA regional allocation deals with high register pressure
 414        better.  So decrease the cost (to do more accurate the cost
 415        calculation for IRA, we need to know how many registers lives
 416        through the loop transparently).  */
 417     cost /= 2;
 418
 419   return cost;
 420 }
 421
 422 /* Sets EDGE_LOOP_EXIT flag for all loop exits.  */
 423
 424 void
 425 mark_loop_exit_edges (void)
 426 {
 427   basic_block bb;
 428   edge e;
 429
 430   if (number_of_loops (cfun) <= 1)
 431     return;
 432
 433   FOR_EACH_BB (bb)
 434     {
 435       edge_iterator ei;
 436
 437       FOR_EACH_EDGE (e, ei, bb->succs)
 438         {
 439           if (loop_outer (bb->loop_father)
 440               && loop_exit_edge_p (bb->loop_father, e))
 441             e->flags |= EDGE_LOOP_EXIT;
 442           else
 443             e->flags &= ~EDGE_LOOP_EXIT;
 444         }
 445     }
 446 }
 447
 448 /* Return exit edge if loop has only one exit that is likely
 449    to be executed on runtime (i.e. it is not EH or leading
 450    to noreturn call.  */
 451
 452 edge
 453 single_likely_exit (struct loop *loop)
 454 {
 455   edge found = single_exit (loop);
 456   vec<edge> exits;
 457   unsigned i;
 458   edge ex;
 459
 460   if (found)
 461     return found;
 462   exits = get_loop_exit_edges (loop);
 463   FOR_EACH_VEC_ELT (exits, i, ex)
 464     {
 465       if (ex->flags & (EDGE_EH | EDGE_ABNORMAL_CALL))
 466         continue;
 467       /* The constant of 5 is set in a way so noreturn calls are
 468          ruled out by this test.  The static branch prediction algorithm
 469          will not assign such a low probability to conditionals for usual
 470          reasons.  */
 471       if (profile_status != PROFILE_ABSENT
 472           && ex->probability < 5 && !ex->count)
 473         continue;
 474       if (!found)
 475         found = ex;
 476       else
 477         {
 478           exits.release ();
 479           return NULL;
 480         }
 481     }
 482   exits.release ();
 483   return found;
 484 }
 485
 486
 487 /* Gets basic blocks of a LOOP.  Header is the 0-th block, rest is in dfs
 488    order against direction of edges from latch.  Specially, if
 489    header != latch, latch is the 1-st block.  */
 490
 491 vec<basic_block>
 492 get_loop_hot_path (const struct loop *loop)
 493 {
 494   basic_block bb = loop->header;
 495   vec<basic_block> path = vNULL;
 496   bitmap visited = BITMAP_ALLOC (NULL);
 497
 498   while (true)
 499     {
 500       edge_iterator ei;
 501       edge e;
 502       edge best = NULL;
 503
 504       path.safe_push (bb);
 505       bitmap_set_bit (visited, bb->index);
 506       FOR_EACH_EDGE (e, ei, bb->succs)
 507         if ((!best || e->probability > best->probability)
 508             && !loop_exit_edge_p (loop, e)
 509             && !bitmap_bit_p (visited, e->dest->index))
 510           best = e;
 511       if (!best || best->dest == loop->header)
 512         break;
 513       bb = best->dest;
 514     }
 515   BITMAP_FREE (visited);
 516   return path;
 517 }