gcc/cfgloopanal.c

   1 /* Natural loop analysis code for GNU compiler.
   2    Copyright (C) 2002-2015 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it under
   7 the terms of the GNU General Public License as published by the Free
   8 Software Foundation; either version 3, or (at your option) any later
   9 version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 #include "config.h"
  21 #include "system.h"
  22 #include "coretypes.h"
  23 #include "tm.h"
  24 #include "rtl.h"
  25 #include "hard-reg-set.h"
  26 #include "obstack.h"
  27 #include "predict.h"
  28 #include "vec.h"
  29 #include "hashtab.h"
  30 #include "hash-set.h"
  31 #include "machmode.h"
  32 #include "input.h"
  33 #include "function.h"
  34 #include "dominance.h"
  35 #include "cfg.h"
  36 #include "basic-block.h"
  37 #include "cfgloop.h"
  38 #include "symtab.h"
  39 #include "flags.h"
  40 #include "statistics.h"
  41 #include "double-int.h"
  42 #include "real.h"
  43 #include "fixed-value.h"
  44 #include "alias.h"
  45 #include "wide-int.h"
  46 #include "inchash.h"
  47 #include "tree.h"
  48 #include "insn-config.h"
  49 #include "expmed.h"
  50 #include "dojump.h"
  51 #include "explow.h"
  52 #include "calls.h"
  53 #include "emit-rtl.h"
  54 #include "varasm.h"
  55 #include "stmt.h"
  56 #include "expr.h"
  57 #include "graphds.h"
  58 #include "params.h"
  59
  60 struct target_cfgloop default_target_cfgloop;
  61 #if SWITCHABLE_TARGET
  62 struct target_cfgloop *this_target_cfgloop = &default_target_cfgloop;
  63 #endif
  64
  65 /* Checks whether BB is executed exactly once in each LOOP iteration.  */
  66
  67 bool
  68 just_once_each_iteration_p (const struct loop *loop, const_basic_block bb)
  69 {
  70   /* It must be executed at least once each iteration.  */
  71   if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
  72     return false;
  73
  74   /* And just once.  */
  75   if (bb->loop_father != loop)
  76     return false;
  77
  78   /* But this was not enough.  We might have some irreducible loop here.  */
  79   if (bb->flags & BB_IRREDUCIBLE_LOOP)
  80     return false;
  81
  82   return true;
  83 }
  84
  85 /* Marks blocks and edges that are part of non-recognized loops; i.e. we
  86    throw away all latch edges and mark blocks inside any remaining cycle.
  87    Everything is a bit complicated due to fact we do not want to do this
  88    for parts of cycles that only "pass" through some loop -- i.e. for
  89    each cycle, we want to mark blocks that belong directly to innermost
  90    loop containing the whole cycle.
  91
  92    LOOPS is the loop tree.  */
  93
  94 #define LOOP_REPR(LOOP) ((LOOP)->num + last_basic_block_for_fn (cfun))
  95 #define BB_REPR(BB) ((BB)->index + 1)
  96
  97 bool
  98 mark_irreducible_loops (void)
  99 {
 100   basic_block act;
 101   struct graph_edge *ge;
 102   edge e;
 103   edge_iterator ei;
 104   int src, dest;
 105   unsigned depth;
 106   struct graph *g;
 107   int num = number_of_loops (cfun);
 108   struct loop *cloop;
 109   bool irred_loop_found = false;
 110   int i;
 111
 112   gcc_assert (current_loops != NULL);
 113
 114   /* Reset the flags.  */
 115   FOR_BB_BETWEEN (act, ENTRY_BLOCK_PTR_FOR_FN (cfun),
 116                   EXIT_BLOCK_PTR_FOR_FN (cfun), next_bb)
 117     {
 118       act->flags &= ~BB_IRREDUCIBLE_LOOP;
 119       FOR_EACH_EDGE (e, ei, act->succs)
 120         e->flags &= ~EDGE_IRREDUCIBLE_LOOP;
 121     }
 122
 123   /* Create the edge lists.  */
 124   g = new_graph (last_basic_block_for_fn (cfun) + num);
 125
 126   FOR_BB_BETWEEN (act, ENTRY_BLOCK_PTR_FOR_FN (cfun),
 127                   EXIT_BLOCK_PTR_FOR_FN (cfun), next_bb)
 128     FOR_EACH_EDGE (e, ei, act->succs)
 129       {
 130         /* Ignore edges to exit.  */
 131         if (e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun))
 132           continue;
 133
 134         src = BB_REPR (act);
 135         dest = BB_REPR (e->dest);
 136
 137         /* Ignore latch edges.  */
 138         if (e->dest->loop_father->header == e->dest
 139             && e->dest->loop_father->latch == act)
 140           continue;
 141
 142         /* Edges inside a single loop should be left where they are.  Edges
 143            to subloop headers should lead to representative of the subloop,
 144            but from the same place.
 145
 146            Edges exiting loops should lead from representative
 147            of the son of nearest common ancestor of the loops in that
 148            act lays.  */
 149
 150         if (e->dest->loop_father->header == e->dest)
 151           dest = LOOP_REPR (e->dest->loop_father);
 152
 153         if (!flow_bb_inside_loop_p (act->loop_father, e->dest))
 154           {
 155             depth = 1 + loop_depth (find_common_loop (act->loop_father,
 156                                                       e->dest->loop_father));
 157             if (depth == loop_depth (act->loop_father))
 158               cloop = act->loop_father;
 159             else
 160               cloop = (*act->loop_father->superloops)[depth];
 161
 162             src = LOOP_REPR (cloop);
 163           }
 164
 165         add_edge (g, src, dest)->data = e;
 166       }
 167
 168   /* Find the strongly connected components.  */
 169   graphds_scc (g, NULL);
 170
 171   /* Mark the irreducible loops.  */
 172   for (i = 0; i < g->n_vertices; i++)
 173     for (ge = g->vertices[i].succ; ge; ge = ge->succ_next)
 174       {
 175         edge real = (edge) ge->data;
 176         /* edge E in graph G is irreducible if it connects two vertices in the
 177            same scc.  */
 178
 179         /* All edges should lead from a component with higher number to the
 180            one with lower one.  */
 181         gcc_assert (g->vertices[ge->src].component >= g->vertices[ge->dest].component);
 182
 183         if (g->vertices[ge->src].component != g->vertices[ge->dest].component)
 184           continue;
 185
 186         real->flags |= EDGE_IRREDUCIBLE_LOOP;
 187         irred_loop_found = true;
 188         if (flow_bb_inside_loop_p (real->src->loop_father, real->dest))
 189           real->src->flags |= BB_IRREDUCIBLE_LOOP;
 190       }
 191
 192   free_graph (g);
 193
 194   loops_state_set (LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS);
 195   return irred_loop_found;
 196 }
 197
 198 /* Counts number of insns inside LOOP.  */
 199 int
 200 num_loop_insns (const struct loop *loop)
 201 {
 202   basic_block *bbs, bb;
 203   unsigned i, ninsns = 0;
 204   rtx_insn *insn;
 205
 206   bbs = get_loop_body (loop);
 207   for (i = 0; i < loop->num_nodes; i++)
 208     {
 209       bb = bbs[i];
 210       FOR_BB_INSNS (bb, insn)
 211         if (NONDEBUG_INSN_P (insn))
 212           ninsns++;
 213     }
 214   free (bbs);
 215
 216   if (!ninsns)
 217     ninsns = 1; /* To avoid division by zero.  */
 218
 219   return ninsns;
 220 }
 221
 222 /* Counts number of insns executed on average per iteration LOOP.  */
 223 int
 224 average_num_loop_insns (const struct loop *loop)
 225 {
 226   basic_block *bbs, bb;
 227   unsigned i, binsns, ninsns, ratio;
 228   rtx_insn *insn;
 229
 230   ninsns = 0;
 231   bbs = get_loop_body (loop);
 232   for (i = 0; i < loop->num_nodes; i++)
 233     {
 234       bb = bbs[i];
 235
 236       binsns = 0;
 237       FOR_BB_INSNS (bb, insn)
 238         if (NONDEBUG_INSN_P (insn))
 239           binsns++;
 240
 241       ratio = loop->header->frequency == 0
 242               ? BB_FREQ_MAX
 243               : (bb->frequency * BB_FREQ_MAX) / loop->header->frequency;
 244       ninsns += binsns * ratio;
 245     }
 246   free (bbs);
 247
 248   ninsns /= BB_FREQ_MAX;
 249   if (!ninsns)
 250     ninsns = 1; /* To avoid division by zero.  */
 251
 252   return ninsns;
 253 }
 254
 255 /* Returns expected number of iterations of LOOP, according to
 256    measured or guessed profile.  No bounding is done on the
 257    value.  */
 258
 259 gcov_type
 260 expected_loop_iterations_unbounded (const struct loop *loop)
 261 {
 262   edge e;
 263   edge_iterator ei;
 264
 265   if (loop->latch->count || loop->header->count)
 266     {
 267       gcov_type count_in, count_latch, expected;
 268
 269       count_in = 0;
 270       count_latch = 0;
 271
 272       FOR_EACH_EDGE (e, ei, loop->header->preds)
 273         if (e->src == loop->latch)
 274           count_latch = e->count;
 275         else
 276           count_in += e->count;
 277
 278       if (count_in == 0)
 279         expected = count_latch * 2;
 280       else
 281         expected = (count_latch + count_in - 1) / count_in;
 282
 283       return expected;
 284     }
 285   else
 286     {
 287       int freq_in, freq_latch;
 288
 289       freq_in = 0;
 290       freq_latch = 0;
 291
 292       FOR_EACH_EDGE (e, ei, loop->header->preds)
 293         if (e->src == loop->latch)
 294           freq_latch = EDGE_FREQUENCY (e);
 295         else
 296           freq_in += EDGE_FREQUENCY (e);
 297
 298       if (freq_in == 0)
 299         return freq_latch * 2;
 300
 301       return (freq_latch + freq_in - 1) / freq_in;
 302     }
 303 }
 304
 305 /* Returns expected number of LOOP iterations.  The returned value is bounded
 306    by REG_BR_PROB_BASE.  */
 307
 308 unsigned
 309 expected_loop_iterations (const struct loop *loop)
 310 {
 311   gcov_type expected = expected_loop_iterations_unbounded (loop);
 312   return (expected > REG_BR_PROB_BASE ? REG_BR_PROB_BASE : expected);
 313 }
 314
 315 /* Returns the maximum level of nesting of subloops of LOOP.  */
 316
 317 unsigned
 318 get_loop_level (const struct loop *loop)
 319 {
 320   const struct loop *ploop;
 321   unsigned mx = 0, l;
 322
 323   for (ploop = loop->inner; ploop; ploop = ploop->next)
 324     {
 325       l = get_loop_level (ploop);
 326       if (l >= mx)
 327         mx = l + 1;
 328     }
 329   return mx;
 330 }
 331
 332 /* Initialize the constants for computing set costs.  */
 333
 334 void
 335 init_set_costs (void)
 336 {
 337   int speed;
 338   rtx_insn *seq;
 339   rtx reg1 = gen_raw_REG (SImode, FIRST_PSEUDO_REGISTER);
 340   rtx reg2 = gen_raw_REG (SImode, FIRST_PSEUDO_REGISTER + 1);
 341   rtx addr = gen_raw_REG (Pmode, FIRST_PSEUDO_REGISTER + 2);
 342   rtx mem = validize_mem (gen_rtx_MEM (SImode, addr));
 343   unsigned i;
 344
 345   target_avail_regs = 0;
 346   target_clobbered_regs = 0;
 347   for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
 348     if (TEST_HARD_REG_BIT (reg_class_contents[GENERAL_REGS], i)
 349         && !fixed_regs[i])
 350       {
 351         target_avail_regs++;
 352         if (call_used_regs[i])
 353           target_clobbered_regs++;
 354       }
 355
 356   target_res_regs = 3;
 357
 358   for (speed = 0; speed < 2; speed++)
 359      {
 360       crtl->maybe_hot_insn_p = speed;
 361       /* Set up the costs for using extra registers:
 362
 363          1) If not many free registers remain, we should prefer having an
 364             additional move to decreasing the number of available registers.
 365             (TARGET_REG_COST).
 366          2) If no registers are available, we need to spill, which may require
 367             storing the old value to memory and loading it back
 368             (TARGET_SPILL_COST).  */
 369
 370       start_sequence ();
 371       emit_move_insn (reg1, reg2);
 372       seq = get_insns ();
 373       end_sequence ();
 374       target_reg_cost [speed] = seq_cost (seq, speed);
 375
 376       start_sequence ();
 377       emit_move_insn (mem, reg1);
 378       emit_move_insn (reg2, mem);
 379       seq = get_insns ();
 380       end_sequence ();
 381       target_spill_cost [speed] = seq_cost (seq, speed);
 382     }
 383   default_rtl_profile ();
 384 }
 385
 386 /* Estimates cost of increased register pressure caused by making N_NEW new
 387    registers live around the loop.  N_OLD is the number of registers live
 388    around the loop.  If CALL_P is true, also take into account that
 389    call-used registers may be clobbered in the loop body, reducing the
 390    number of available registers before we spill.  */
 391
 392 unsigned
 393 estimate_reg_pressure_cost (unsigned n_new, unsigned n_old, bool speed,
 394                             bool call_p)
 395 {
 396   unsigned cost;
 397   unsigned regs_needed = n_new + n_old;
 398   unsigned available_regs = target_avail_regs;
 399
 400   /* If there is a call in the loop body, the call-clobbered registers
 401      are not available for loop invariants.  */
 402   if (call_p)
 403     available_regs = available_regs - target_clobbered_regs;
 404
 405   /* If we have enough registers, we should use them and not restrict
 406      the transformations unnecessarily.  */
 407   if (regs_needed + target_res_regs <= available_regs)
 408     return 0;
 409
 410   if (regs_needed <= available_regs)
 411     /* If we are close to running out of registers, try to preserve
 412        them.  */
 413     cost = target_reg_cost [speed] * n_new;
 414   else
 415     /* If we run out of registers, it is very expensive to add another
 416        one.  */
 417     cost = target_spill_cost [speed] * n_new;
 418
 419   if (optimize && (flag_ira_region == IRA_REGION_ALL
 420                    || flag_ira_region == IRA_REGION_MIXED)
 421       && number_of_loops (cfun) <= (unsigned) IRA_MAX_LOOPS_NUM)
 422     /* IRA regional allocation deals with high register pressure
 423        better.  So decrease the cost (to do more accurate the cost
 424        calculation for IRA, we need to know how many registers lives
 425        through the loop transparently).  */
 426     cost /= 2;
 427
 428   return cost;
 429 }
 430
 431 /* Sets EDGE_LOOP_EXIT flag for all loop exits.  */
 432
 433 void
 434 mark_loop_exit_edges (void)
 435 {
 436   basic_block bb;
 437   edge e;
 438
 439   if (number_of_loops (cfun) <= 1)
 440     return;
 441
 442   FOR_EACH_BB_FN (bb, cfun)
 443     {
 444       edge_iterator ei;
 445
 446       FOR_EACH_EDGE (e, ei, bb->succs)
 447         {
 448           if (loop_outer (bb->loop_father)
 449               && loop_exit_edge_p (bb->loop_father, e))
 450             e->flags |= EDGE_LOOP_EXIT;
 451           else
 452             e->flags &= ~EDGE_LOOP_EXIT;
 453         }
 454     }
 455 }
 456
 457 /* Return exit edge if loop has only one exit that is likely
 458    to be executed on runtime (i.e. it is not EH or leading
 459    to noreturn call.  */
 460
 461 edge
 462 single_likely_exit (struct loop *loop)
 463 {
 464   edge found = single_exit (loop);
 465   vec<edge> exits;
 466   unsigned i;
 467   edge ex;
 468
 469   if (found)
 470     return found;
 471   exits = get_loop_exit_edges (loop);
 472   FOR_EACH_VEC_ELT (exits, i, ex)
 473     {
 474       if (ex->flags & (EDGE_EH | EDGE_ABNORMAL_CALL))
 475         continue;
 476       /* The constant of 5 is set in a way so noreturn calls are
 477          ruled out by this test.  The static branch prediction algorithm
 478          will not assign such a low probability to conditionals for usual
 479          reasons.  */
 480       if (profile_status_for_fn (cfun) != PROFILE_ABSENT
 481           && ex->probability < 5 && !ex->count)
 482         continue;
 483       if (!found)
 484         found = ex;
 485       else
 486         {
 487           exits.release ();
 488           return NULL;
 489         }
 490     }
 491   exits.release ();
 492   return found;
 493 }
 494
 495
 496 /* Gets basic blocks of a LOOP.  Header is the 0-th block, rest is in dfs
 497    order against direction of edges from latch.  Specially, if
 498    header != latch, latch is the 1-st block.  */
 499
 500 vec<basic_block>
 501 get_loop_hot_path (const struct loop *loop)
 502 {
 503   basic_block bb = loop->header;
 504   vec<basic_block> path = vNULL;
 505   bitmap visited = BITMAP_ALLOC (NULL);
 506
 507   while (true)
 508     {
 509       edge_iterator ei;
 510       edge e;
 511       edge best = NULL;
 512
 513       path.safe_push (bb);
 514       bitmap_set_bit (visited, bb->index);
 515       FOR_EACH_EDGE (e, ei, bb->succs)
 516         if ((!best || e->probability > best->probability)
 517             && !loop_exit_edge_p (loop, e)
 518             && !bitmap_bit_p (visited, e->dest->index))
 519           best = e;
 520       if (!best || best->dest == loop->header)
 521         break;
 522       bb = best->dest;
 523     }
 524   BITMAP_FREE (visited);
 525   return path;
 526 }