gcc/cfgloopanal.c

   1 /* Natural loop analysis code for GNU compiler.
   2    Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
   3    Free Software Foundation, Inc.
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it under
   8 the terms of the GNU General Public License as published by the Free
   9 Software Foundation; either version 3, or (at your option) any later
  10 version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21 #include "config.h"
  22 #include "system.h"
  23 #include "coretypes.h"
  24 #include "tm.h"
  25 #include "rtl.h"
  26 #include "hard-reg-set.h"
  27 #include "obstack.h"
  28 #include "basic-block.h"
  29 #include "cfgloop.h"
  30 #include "expr.h"
  31 #include "graphds.h"
  32 #include "params.h"
  33
  34 struct target_cfgloop default_target_cfgloop;
  35 #if SWITCHABLE_TARGET
  36 struct target_cfgloop *this_target_cfgloop = &default_target_cfgloop;
  37 #endif
  38
  39 /* Checks whether BB is executed exactly once in each LOOP iteration.  */
  40
  41 bool
  42 just_once_each_iteration_p (const struct loop *loop, const_basic_block bb)
  43 {
  44   /* It must be executed at least once each iteration.  */
  45   if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
  46     return false;
  47
  48   /* And just once.  */
  49   if (bb->loop_father != loop)
  50     return false;
  51
  52   /* But this was not enough.  We might have some irreducible loop here.  */
  53   if (bb->flags & BB_IRREDUCIBLE_LOOP)
  54     return false;
  55
  56   return true;
  57 }
  58
  59 /* Marks blocks and edges that are part of non-recognized loops; i.e. we
  60    throw away all latch edges and mark blocks inside any remaining cycle.
  61    Everything is a bit complicated due to fact we do not want to do this
  62    for parts of cycles that only "pass" through some loop -- i.e. for
  63    each cycle, we want to mark blocks that belong directly to innermost
  64    loop containing the whole cycle.
  65
  66    LOOPS is the loop tree.  */
  67
  68 #define LOOP_REPR(LOOP) ((LOOP)->num + last_basic_block)
  69 #define BB_REPR(BB) ((BB)->index + 1)
  70
  71 bool
  72 mark_irreducible_loops (void)
  73 {
  74   basic_block act;
  75   struct graph_edge *ge;
  76   edge e;
  77   edge_iterator ei;
  78   int src, dest;
  79   unsigned depth;
  80   struct graph *g;
  81   int num = number_of_loops ();
  82   struct loop *cloop;
  83   bool irred_loop_found = false;
  84   int i;
  85
  86   gcc_assert (current_loops != NULL);
  87
  88   /* Reset the flags.  */
  89   FOR_BB_BETWEEN (act, ENTRY_BLOCK_PTR, EXIT_BLOCK_PTR, next_bb)
  90     {
  91       act->flags &= ~BB_IRREDUCIBLE_LOOP;
  92       FOR_EACH_EDGE (e, ei, act->succs)
  93         e->flags &= ~EDGE_IRREDUCIBLE_LOOP;
  94     }
  95
  96   /* Create the edge lists.  */
  97   g = new_graph (last_basic_block + num);
  98
  99   FOR_BB_BETWEEN (act, ENTRY_BLOCK_PTR, EXIT_BLOCK_PTR, next_bb)
 100     FOR_EACH_EDGE (e, ei, act->succs)
 101       {
 102         /* Ignore edges to exit.  */
 103         if (e->dest == EXIT_BLOCK_PTR)
 104           continue;
 105
 106         src = BB_REPR (act);
 107         dest = BB_REPR (e->dest);
 108
 109         /* Ignore latch edges.  */
 110         if (e->dest->loop_father->header == e->dest
 111             && e->dest->loop_father->latch == act)
 112           continue;
 113
 114         /* Edges inside a single loop should be left where they are.  Edges
 115            to subloop headers should lead to representative of the subloop,
 116            but from the same place.
 117
 118            Edges exiting loops should lead from representative
 119            of the son of nearest common ancestor of the loops in that
 120            act lays.  */
 121
 122         if (e->dest->loop_father->header == e->dest)
 123           dest = LOOP_REPR (e->dest->loop_father);
 124
 125         if (!flow_bb_inside_loop_p (act->loop_father, e->dest))
 126           {
 127             depth = 1 + loop_depth (find_common_loop (act->loop_father,
 128                                                       e->dest->loop_father));
 129             if (depth == loop_depth (act->loop_father))
 130               cloop = act->loop_father;
 131             else
 132               cloop = (*act->loop_father->superloops)[depth];
 133
 134             src = LOOP_REPR (cloop);
 135           }
 136
 137         add_edge (g, src, dest)->data = e;
 138       }
 139
 140   /* Find the strongly connected components.  */
 141   graphds_scc (g, NULL);
 142
 143   /* Mark the irreducible loops.  */
 144   for (i = 0; i < g->n_vertices; i++)
 145     for (ge = g->vertices[i].succ; ge; ge = ge->succ_next)
 146       {
 147         edge real = (edge) ge->data;
 148         /* edge E in graph G is irreducible if it connects two vertices in the
 149            same scc.  */
 150
 151         /* All edges should lead from a component with higher number to the
 152            one with lower one.  */
 153         gcc_assert (g->vertices[ge->src].component >= g->vertices[ge->dest].component);
 154
 155         if (g->vertices[ge->src].component != g->vertices[ge->dest].component)
 156           continue;
 157
 158         real->flags |= EDGE_IRREDUCIBLE_LOOP;
 159         irred_loop_found = true;
 160         if (flow_bb_inside_loop_p (real->src->loop_father, real->dest))
 161           real->src->flags |= BB_IRREDUCIBLE_LOOP;
 162       }
 163
 164   free_graph (g);
 165
 166   loops_state_set (LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS);
 167   return irred_loop_found;
 168 }
 169
 170 /* Counts number of insns inside LOOP.  */
 171 int
 172 num_loop_insns (const struct loop *loop)
 173 {
 174   basic_block *bbs, bb;
 175   unsigned i, ninsns = 0;
 176   rtx insn;
 177
 178   bbs = get_loop_body (loop);
 179   for (i = 0; i < loop->num_nodes; i++)
 180     {
 181       bb = bbs[i];
 182       FOR_BB_INSNS (bb, insn)
 183         if (NONDEBUG_INSN_P (insn))
 184           ninsns++;
 185     }
 186   free (bbs);
 187
 188   if (!ninsns)
 189     ninsns = 1; /* To avoid division by zero.  */
 190
 191   return ninsns;
 192 }
 193
 194 /* Counts number of insns executed on average per iteration LOOP.  */
 195 int
 196 average_num_loop_insns (const struct loop *loop)
 197 {
 198   basic_block *bbs, bb;
 199   unsigned i, binsns, ninsns, ratio;
 200   rtx insn;
 201
 202   ninsns = 0;
 203   bbs = get_loop_body (loop);
 204   for (i = 0; i < loop->num_nodes; i++)
 205     {
 206       bb = bbs[i];
 207
 208       binsns = 0;
 209       FOR_BB_INSNS (bb, insn)
 210         if (NONDEBUG_INSN_P (insn))
 211           binsns++;
 212
 213       ratio = loop->header->frequency == 0
 214               ? BB_FREQ_MAX
 215               : (bb->frequency * BB_FREQ_MAX) / loop->header->frequency;
 216       ninsns += binsns * ratio;
 217     }
 218   free (bbs);
 219
 220   ninsns /= BB_FREQ_MAX;
 221   if (!ninsns)
 222     ninsns = 1; /* To avoid division by zero.  */
 223
 224   return ninsns;
 225 }
 226
 227 /* Returns expected number of iterations of LOOP, according to
 228    measured or guessed profile.  No bounding is done on the
 229    value.  */
 230
 231 gcov_type
 232 expected_loop_iterations_unbounded (const struct loop *loop)
 233 {
 234   edge e;
 235   edge_iterator ei;
 236
 237   if (loop->latch->count || loop->header->count)
 238     {
 239       gcov_type count_in, count_latch, expected;
 240
 241       count_in = 0;
 242       count_latch = 0;
 243
 244       FOR_EACH_EDGE (e, ei, loop->header->preds)
 245         if (e->src == loop->latch)
 246           count_latch = e->count;
 247         else
 248           count_in += e->count;
 249
 250       if (count_in == 0)
 251         expected = count_latch * 2;
 252       else
 253         expected = (count_latch + count_in - 1) / count_in;
 254
 255       return expected;
 256     }
 257   else
 258     {
 259       int freq_in, freq_latch;
 260
 261       freq_in = 0;
 262       freq_latch = 0;
 263
 264       FOR_EACH_EDGE (e, ei, loop->header->preds)
 265         if (e->src == loop->latch)
 266           freq_latch = EDGE_FREQUENCY (e);
 267         else
 268           freq_in += EDGE_FREQUENCY (e);
 269
 270       if (freq_in == 0)
 271         return freq_latch * 2;
 272
 273       return (freq_latch + freq_in - 1) / freq_in;
 274     }
 275 }
 276
 277 /* Returns expected number of LOOP iterations.  The returned value is bounded
 278    by REG_BR_PROB_BASE.  */
 279
 280 unsigned
 281 expected_loop_iterations (const struct loop *loop)
 282 {
 283   gcov_type expected = expected_loop_iterations_unbounded (loop);
 284   return (expected > REG_BR_PROB_BASE ? REG_BR_PROB_BASE : expected);
 285 }
 286
 287 /* Returns the maximum level of nesting of subloops of LOOP.  */
 288
 289 unsigned
 290 get_loop_level (const struct loop *loop)
 291 {
 292   const struct loop *ploop;
 293   unsigned mx = 0, l;
 294
 295   for (ploop = loop->inner; ploop; ploop = ploop->next)
 296     {
 297       l = get_loop_level (ploop);
 298       if (l >= mx)
 299         mx = l + 1;
 300     }
 301   return mx;
 302 }
 303
 304 /* Returns estimate on cost of computing SEQ.  */
 305
 306 static unsigned
 307 seq_cost (const_rtx seq, bool speed)
 308 {
 309   unsigned cost = 0;
 310   rtx set;
 311
 312   for (; seq; seq = NEXT_INSN (seq))
 313     {
 314       set = single_set (seq);
 315       if (set)
 316         cost += set_rtx_cost (set, speed);
 317       else
 318         cost++;
 319     }
 320
 321   return cost;
 322 }
 323
 324 /* Initialize the constants for computing set costs.  */
 325
 326 void
 327 init_set_costs (void)
 328 {
 329   int speed;
 330   rtx seq;
 331   rtx reg1 = gen_raw_REG (SImode, FIRST_PSEUDO_REGISTER);
 332   rtx reg2 = gen_raw_REG (SImode, FIRST_PSEUDO_REGISTER + 1);
 333   rtx addr = gen_raw_REG (Pmode, FIRST_PSEUDO_REGISTER + 2);
 334   rtx mem = validize_mem (gen_rtx_MEM (SImode, addr));
 335   unsigned i;
 336
 337   target_avail_regs = 0;
 338   target_clobbered_regs = 0;
 339   for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
 340     if (TEST_HARD_REG_BIT (reg_class_contents[GENERAL_REGS], i)
 341         && !fixed_regs[i])
 342       {
 343         target_avail_regs++;
 344         if (call_used_regs[i])
 345           target_clobbered_regs++;
 346       }
 347
 348   target_res_regs = 3;
 349
 350   for (speed = 0; speed < 2; speed++)
 351      {
 352       crtl->maybe_hot_insn_p = speed;
 353       /* Set up the costs for using extra registers:
 354
 355          1) If not many free registers remain, we should prefer having an
 356             additional move to decreasing the number of available registers.
 357             (TARGET_REG_COST).
 358          2) If no registers are available, we need to spill, which may require
 359             storing the old value to memory and loading it back
 360             (TARGET_SPILL_COST).  */
 361
 362       start_sequence ();
 363       emit_move_insn (reg1, reg2);
 364       seq = get_insns ();
 365       end_sequence ();
 366       target_reg_cost [speed] = seq_cost (seq, speed);
 367
 368       start_sequence ();
 369       emit_move_insn (mem, reg1);
 370       emit_move_insn (reg2, mem);
 371       seq = get_insns ();
 372       end_sequence ();
 373       target_spill_cost [speed] = seq_cost (seq, speed);
 374     }
 375   default_rtl_profile ();
 376 }
 377
 378 /* Estimates cost of increased register pressure caused by making N_NEW new
 379    registers live around the loop.  N_OLD is the number of registers live
 380    around the loop.  If CALL_P is true, also take into account that
 381    call-used registers may be clobbered in the loop body, reducing the
 382    number of available registers before we spill.  */
 383
 384 unsigned
 385 estimate_reg_pressure_cost (unsigned n_new, unsigned n_old, bool speed,
 386                             bool call_p)
 387 {
 388   unsigned cost;
 389   unsigned regs_needed = n_new + n_old;
 390   unsigned available_regs = target_avail_regs;
 391
 392   /* If there is a call in the loop body, the call-clobbered registers
 393      are not available for loop invariants.  */
 394   if (call_p)
 395     available_regs = available_regs - target_clobbered_regs;
 396
 397   /* If we have enough registers, we should use them and not restrict
 398      the transformations unnecessarily.  */
 399   if (regs_needed + target_res_regs <= available_regs)
 400     return 0;
 401
 402   if (regs_needed <= available_regs)
 403     /* If we are close to running out of registers, try to preserve
 404        them.  */
 405     cost = target_reg_cost [speed] * n_new;
 406   else
 407     /* If we run out of registers, it is very expensive to add another
 408        one.  */
 409     cost = target_spill_cost [speed] * n_new;
 410
 411   if (optimize && (flag_ira_region == IRA_REGION_ALL
 412                    || flag_ira_region == IRA_REGION_MIXED)
 413       && number_of_loops () <= (unsigned) IRA_MAX_LOOPS_NUM)
 414     /* IRA regional allocation deals with high register pressure
 415        better.  So decrease the cost (to do more accurate the cost
 416        calculation for IRA, we need to know how many registers lives
 417        through the loop transparently).  */
 418     cost /= 2;
 419
 420   return cost;
 421 }
 422
 423 /* Sets EDGE_LOOP_EXIT flag for all loop exits.  */
 424
 425 void
 426 mark_loop_exit_edges (void)
 427 {
 428   basic_block bb;
 429   edge e;
 430
 431   if (number_of_loops () <= 1)
 432     return;
 433
 434   FOR_EACH_BB (bb)
 435     {
 436       edge_iterator ei;
 437
 438       FOR_EACH_EDGE (e, ei, bb->succs)
 439         {
 440           if (loop_outer (bb->loop_father)
 441               && loop_exit_edge_p (bb->loop_father, e))
 442             e->flags |= EDGE_LOOP_EXIT;
 443           else
 444             e->flags &= ~EDGE_LOOP_EXIT;
 445         }
 446     }
 447 }
 448
 449 /* Return exit edge if loop has only one exit that is likely
 450    to be executed on runtime (i.e. it is not EH or leading
 451    to noreturn call.  */
 452
 453 edge
 454 single_likely_exit (struct loop *loop)
 455 {
 456   edge found = single_exit (loop);
 457   vec<edge> exits;
 458   unsigned i;
 459   edge ex;
 460
 461   if (found)
 462     return found;
 463   exits = get_loop_exit_edges (loop);
 464   FOR_EACH_VEC_ELT (exits, i, ex)
 465     {
 466       if (ex->flags & (EDGE_EH | EDGE_ABNORMAL_CALL))
 467         continue;
 468       /* The constant of 5 is set in a way so noreturn calls are
 469          ruled out by this test.  The static branch prediction algorithm
 470          will not assign such a low probability to conditionals for usual
 471          reasons.  */
 472       if (profile_status != PROFILE_ABSENT
 473           && ex->probability < 5 && !ex->count)
 474         continue;
 475       if (!found)
 476         found = ex;
 477       else
 478         {
 479           exits.release ();
 480           return NULL;
 481         }
 482     }
 483   exits.release ();
 484   return found;
 485 }
 486
 487
 488 /* Gets basic blocks of a LOOP.  Header is the 0-th block, rest is in dfs
 489    order against direction of edges from latch.  Specially, if
 490    header != latch, latch is the 1-st block.  */
 491
 492 vec<basic_block>
 493 get_loop_hot_path (const struct loop *loop)
 494 {
 495   basic_block bb = loop->header;
 496   vec<basic_block> path = vec<basic_block>();
 497   bitmap visited = BITMAP_ALLOC (NULL);
 498
 499   while (true)
 500     {
 501       edge_iterator ei;
 502       edge e;
 503       edge best = NULL;
 504
 505       path.safe_push (bb);
 506       bitmap_set_bit (visited, bb->index);
 507       FOR_EACH_EDGE (e, ei, bb->succs)
 508         if ((!best || e->probability > best->probability)
 509             && !loop_exit_edge_p (loop, e)
 510             && !bitmap_bit_p (visited, e->dest->index))
 511           best = e;
 512       if (!best || best->dest == loop->header)
 513         break;
 514       bb = best->dest;
 515     }
 516   BITMAP_FREE (visited);
 517   return path;
 518 }