gcc/cfgloopanal.c

   1 /* Natural loop analysis code for GNU compiler.
   2    Copyright (C) 2002-2017 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it under
   7 the terms of the GNU General Public License as published by the Free
   8 Software Foundation; either version 3, or (at your option) any later
   9 version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 #include "config.h"
  21 #include "system.h"
  22 #include "coretypes.h"
  23 #include "backend.h"
  24 #include "rtl.h"
  25 #include "tree.h"
  26 #include "predict.h"
  27 #include "memmodel.h"
  28 #include "emit-rtl.h"
  29 #include "cfgloop.h"
  30 #include "explow.h"
  31 #include "expr.h"
  32 #include "graphds.h"
  33 #include "params.h"
  34
  35 struct target_cfgloop default_target_cfgloop;
  36 #if SWITCHABLE_TARGET
  37 struct target_cfgloop *this_target_cfgloop = &default_target_cfgloop;
  38 #endif
  39
  40 /* Checks whether BB is executed exactly once in each LOOP iteration.  */
  41
  42 bool
  43 just_once_each_iteration_p (const struct loop *loop, const_basic_block bb)
  44 {
  45   /* It must be executed at least once each iteration.  */
  46   if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
  47     return false;
  48
  49   /* And just once.  */
  50   if (bb->loop_father != loop)
  51     return false;
  52
  53   /* But this was not enough.  We might have some irreducible loop here.  */
  54   if (bb->flags & BB_IRREDUCIBLE_LOOP)
  55     return false;
  56
  57   return true;
  58 }
  59
  60 /* Marks blocks and edges that are part of non-recognized loops; i.e. we
  61    throw away all latch edges and mark blocks inside any remaining cycle.
  62    Everything is a bit complicated due to fact we do not want to do this
  63    for parts of cycles that only "pass" through some loop -- i.e. for
  64    each cycle, we want to mark blocks that belong directly to innermost
  65    loop containing the whole cycle.
  66
  67    LOOPS is the loop tree.  */
  68
  69 #define LOOP_REPR(LOOP) ((LOOP)->num + last_basic_block_for_fn (cfun))
  70 #define BB_REPR(BB) ((BB)->index + 1)
  71
  72 bool
  73 mark_irreducible_loops (void)
  74 {
  75   basic_block act;
  76   struct graph_edge *ge;
  77   edge e;
  78   edge_iterator ei;
  79   int src, dest;
  80   unsigned depth;
  81   struct graph *g;
  82   int num = number_of_loops (cfun);
  83   struct loop *cloop;
  84   bool irred_loop_found = false;
  85   int i;
  86
  87   gcc_assert (current_loops != NULL);
  88
  89   /* Reset the flags.  */
  90   FOR_BB_BETWEEN (act, ENTRY_BLOCK_PTR_FOR_FN (cfun),
  91                   EXIT_BLOCK_PTR_FOR_FN (cfun), next_bb)
  92     {
  93       act->flags &= ~BB_IRREDUCIBLE_LOOP;
  94       FOR_EACH_EDGE (e, ei, act->succs)
  95         e->flags &= ~EDGE_IRREDUCIBLE_LOOP;
  96     }
  97
  98   /* Create the edge lists.  */
  99   g = new_graph (last_basic_block_for_fn (cfun) + num);
 100
 101   FOR_BB_BETWEEN (act, ENTRY_BLOCK_PTR_FOR_FN (cfun),
 102                   EXIT_BLOCK_PTR_FOR_FN (cfun), next_bb)
 103     FOR_EACH_EDGE (e, ei, act->succs)
 104       {
 105         /* Ignore edges to exit.  */
 106         if (e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun))
 107           continue;
 108
 109         src = BB_REPR (act);
 110         dest = BB_REPR (e->dest);
 111
 112         /* Ignore latch edges.  */
 113         if (e->dest->loop_father->header == e->dest
 114             && e->dest->loop_father->latch == act)
 115           continue;
 116
 117         /* Edges inside a single loop should be left where they are.  Edges
 118            to subloop headers should lead to representative of the subloop,
 119            but from the same place.
 120
 121            Edges exiting loops should lead from representative
 122            of the son of nearest common ancestor of the loops in that
 123            act lays.  */
 124
 125         if (e->dest->loop_father->header == e->dest)
 126           dest = LOOP_REPR (e->dest->loop_father);
 127
 128         if (!flow_bb_inside_loop_p (act->loop_father, e->dest))
 129           {
 130             depth = 1 + loop_depth (find_common_loop (act->loop_father,
 131                                                       e->dest->loop_father));
 132             if (depth == loop_depth (act->loop_father))
 133               cloop = act->loop_father;
 134             else
 135               cloop = (*act->loop_father->superloops)[depth];
 136
 137             src = LOOP_REPR (cloop);
 138           }
 139
 140         add_edge (g, src, dest)->data = e;
 141       }
 142
 143   /* Find the strongly connected components.  */
 144   graphds_scc (g, NULL);
 145
 146   /* Mark the irreducible loops.  */
 147   for (i = 0; i < g->n_vertices; i++)
 148     for (ge = g->vertices[i].succ; ge; ge = ge->succ_next)
 149       {
 150         edge real = (edge) ge->data;
 151         /* edge E in graph G is irreducible if it connects two vertices in the
 152            same scc.  */
 153
 154         /* All edges should lead from a component with higher number to the
 155            one with lower one.  */
 156         gcc_assert (g->vertices[ge->src].component >= g->vertices[ge->dest].component);
 157
 158         if (g->vertices[ge->src].component != g->vertices[ge->dest].component)
 159           continue;
 160
 161         real->flags |= EDGE_IRREDUCIBLE_LOOP;
 162         irred_loop_found = true;
 163         if (flow_bb_inside_loop_p (real->src->loop_father, real->dest))
 164           real->src->flags |= BB_IRREDUCIBLE_LOOP;
 165       }
 166
 167   free_graph (g);
 168
 169   loops_state_set (LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS);
 170   return irred_loop_found;
 171 }
 172
 173 /* Counts number of insns inside LOOP.  */
 174 int
 175 num_loop_insns (const struct loop *loop)
 176 {
 177   basic_block *bbs, bb;
 178   unsigned i, ninsns = 0;
 179   rtx_insn *insn;
 180
 181   bbs = get_loop_body (loop);
 182   for (i = 0; i < loop->num_nodes; i++)
 183     {
 184       bb = bbs[i];
 185       FOR_BB_INSNS (bb, insn)
 186         if (NONDEBUG_INSN_P (insn))
 187           ninsns++;
 188     }
 189   free (bbs);
 190
 191   if (!ninsns)
 192     ninsns = 1; /* To avoid division by zero.  */
 193
 194   return ninsns;
 195 }
 196
 197 /* Counts number of insns executed on average per iteration LOOP.  */
 198 int
 199 average_num_loop_insns (const struct loop *loop)
 200 {
 201   basic_block *bbs, bb;
 202   unsigned i, binsns, ninsns, ratio;
 203   rtx_insn *insn;
 204
 205   ninsns = 0;
 206   bbs = get_loop_body (loop);
 207   for (i = 0; i < loop->num_nodes; i++)
 208     {
 209       bb = bbs[i];
 210
 211       binsns = 0;
 212       FOR_BB_INSNS (bb, insn)
 213         if (NONDEBUG_INSN_P (insn))
 214           binsns++;
 215
 216       ratio = loop->header->frequency == 0
 217               ? BB_FREQ_MAX
 218               : (bb->frequency * BB_FREQ_MAX) / loop->header->frequency;
 219       ninsns += binsns * ratio;
 220     }
 221   free (bbs);
 222
 223   ninsns /= BB_FREQ_MAX;
 224   if (!ninsns)
 225     ninsns = 1; /* To avoid division by zero.  */
 226
 227   return ninsns;
 228 }
 229
 230 /* Returns expected number of iterations of LOOP, according to
 231    measured or guessed profile.  No bounding is done on the
 232    value.  */
 233
 234 gcov_type
 235 expected_loop_iterations_unbounded (const struct loop *loop,
 236                                     bool *read_profile_p)
 237 {
 238   edge e;
 239   edge_iterator ei;
 240   gcov_type expected = -1;
 241
 242   if (read_profile_p)
 243     *read_profile_p = false;
 244
 245   /* If we have no profile at all, use AVG_LOOP_NITER.  */
 246   if (profile_status_for_fn (cfun) == PROFILE_ABSENT)
 247     expected = PARAM_VALUE (PARAM_AVG_LOOP_NITER);
 248   else if (loop->latch && (loop->latch->count.reliable_p ()
 249                            || loop->header->count.reliable_p ()))
 250     {
 251       profile_count count_in = profile_count::zero (),
 252                     count_latch = profile_count::zero ();
 253
 254       FOR_EACH_EDGE (e, ei, loop->header->preds)
 255         if (e->src == loop->latch)
 256           count_latch = e->count;
 257         else
 258           count_in += e->count;
 259
 260       if (!count_latch.initialized_p ())
 261         ;
 262       else if (!(count_in > profile_count::zero ()))
 263         expected = count_latch.to_gcov_type () * 2;
 264       else
 265         {
 266           expected = (count_latch.to_gcov_type () + count_in.to_gcov_type ()
 267                       - 1) / count_in.to_gcov_type ();
 268           if (read_profile_p)
 269             *read_profile_p = true;
 270         }
 271     }
 272   if (expected == -1)
 273     {
 274       int freq_in, freq_latch;
 275
 276       freq_in = 0;
 277       freq_latch = 0;
 278
 279       FOR_EACH_EDGE (e, ei, loop->header->preds)
 280         if (flow_bb_inside_loop_p (loop, e->src))
 281           freq_latch += EDGE_FREQUENCY (e);
 282         else
 283           freq_in += EDGE_FREQUENCY (e);
 284
 285       if (freq_in == 0)
 286         {
 287           /* If we have no profile at all, use AVG_LOOP_NITER iterations.  */
 288           if (!freq_latch)
 289             expected = PARAM_VALUE (PARAM_AVG_LOOP_NITER);
 290           else
 291             expected = freq_latch * 2;
 292         }
 293       else
 294         expected = (freq_latch + freq_in - 1) / freq_in;
 295     }
 296
 297   HOST_WIDE_INT max = get_max_loop_iterations_int (loop);
 298   if (max != -1 && max < expected)
 299     return max;
 300   return expected;
 301 }
 302
 303 /* Returns expected number of LOOP iterations.  The returned value is bounded
 304    by REG_BR_PROB_BASE.  */
 305
 306 unsigned
 307 expected_loop_iterations (struct loop *loop)
 308 {
 309   gcov_type expected = expected_loop_iterations_unbounded (loop);
 310   return (expected > REG_BR_PROB_BASE ? REG_BR_PROB_BASE : expected);
 311 }
 312
 313 /* Returns the maximum level of nesting of subloops of LOOP.  */
 314
 315 unsigned
 316 get_loop_level (const struct loop *loop)
 317 {
 318   const struct loop *ploop;
 319   unsigned mx = 0, l;
 320
 321   for (ploop = loop->inner; ploop; ploop = ploop->next)
 322     {
 323       l = get_loop_level (ploop);
 324       if (l >= mx)
 325         mx = l + 1;
 326     }
 327   return mx;
 328 }
 329
 330 /* Initialize the constants for computing set costs.  */
 331
 332 void
 333 init_set_costs (void)
 334 {
 335   int speed;
 336   rtx_insn *seq;
 337   rtx reg1 = gen_raw_REG (SImode, LAST_VIRTUAL_REGISTER + 1);
 338   rtx reg2 = gen_raw_REG (SImode, LAST_VIRTUAL_REGISTER + 2);
 339   rtx addr = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 3);
 340   rtx mem = validize_mem (gen_rtx_MEM (SImode, addr));
 341   unsigned i;
 342
 343   target_avail_regs = 0;
 344   target_clobbered_regs = 0;
 345   for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
 346     if (TEST_HARD_REG_BIT (reg_class_contents[GENERAL_REGS], i)
 347         && !fixed_regs[i])
 348       {
 349         target_avail_regs++;
 350         if (call_used_regs[i])
 351           target_clobbered_regs++;
 352       }
 353
 354   target_res_regs = 3;
 355
 356   for (speed = 0; speed < 2; speed++)
 357      {
 358       crtl->maybe_hot_insn_p = speed;
 359       /* Set up the costs for using extra registers:
 360
 361          1) If not many free registers remain, we should prefer having an
 362             additional move to decreasing the number of available registers.
 363             (TARGET_REG_COST).
 364          2) If no registers are available, we need to spill, which may require
 365             storing the old value to memory and loading it back
 366             (TARGET_SPILL_COST).  */
 367
 368       start_sequence ();
 369       emit_move_insn (reg1, reg2);
 370       seq = get_insns ();
 371       end_sequence ();
 372       target_reg_cost [speed] = seq_cost (seq, speed);
 373
 374       start_sequence ();
 375       emit_move_insn (mem, reg1);
 376       emit_move_insn (reg2, mem);
 377       seq = get_insns ();
 378       end_sequence ();
 379       target_spill_cost [speed] = seq_cost (seq, speed);
 380     }
 381   default_rtl_profile ();
 382 }
 383
 384 /* Estimates cost of increased register pressure caused by making N_NEW new
 385    registers live around the loop.  N_OLD is the number of registers live
 386    around the loop.  If CALL_P is true, also take into account that
 387    call-used registers may be clobbered in the loop body, reducing the
 388    number of available registers before we spill.  */
 389
 390 unsigned
 391 estimate_reg_pressure_cost (unsigned n_new, unsigned n_old, bool speed,
 392                             bool call_p)
 393 {
 394   unsigned cost;
 395   unsigned regs_needed = n_new + n_old;
 396   unsigned available_regs = target_avail_regs;
 397
 398   /* If there is a call in the loop body, the call-clobbered registers
 399      are not available for loop invariants.  */
 400   if (call_p)
 401     available_regs = available_regs - target_clobbered_regs;
 402
 403   /* If we have enough registers, we should use them and not restrict
 404      the transformations unnecessarily.  */
 405   if (regs_needed + target_res_regs <= available_regs)
 406     return 0;
 407
 408   if (regs_needed <= available_regs)
 409     /* If we are close to running out of registers, try to preserve
 410        them.  */
 411     cost = target_reg_cost [speed] * n_new;
 412   else
 413     /* If we run out of registers, it is very expensive to add another
 414        one.  */
 415     cost = target_spill_cost [speed] * n_new;
 416
 417   if (optimize && (flag_ira_region == IRA_REGION_ALL
 418                    || flag_ira_region == IRA_REGION_MIXED)
 419       && number_of_loops (cfun) <= (unsigned) IRA_MAX_LOOPS_NUM)
 420     /* IRA regional allocation deals with high register pressure
 421        better.  So decrease the cost (to do more accurate the cost
 422        calculation for IRA, we need to know how many registers lives
 423        through the loop transparently).  */
 424     cost /= 2;
 425
 426   return cost;
 427 }
 428
 429 /* Sets EDGE_LOOP_EXIT flag for all loop exits.  */
 430
 431 void
 432 mark_loop_exit_edges (void)
 433 {
 434   basic_block bb;
 435   edge e;
 436
 437   if (number_of_loops (cfun) <= 1)
 438     return;
 439
 440   FOR_EACH_BB_FN (bb, cfun)
 441     {
 442       edge_iterator ei;
 443
 444       FOR_EACH_EDGE (e, ei, bb->succs)
 445         {
 446           if (loop_outer (bb->loop_father)
 447               && loop_exit_edge_p (bb->loop_father, e))
 448             e->flags |= EDGE_LOOP_EXIT;
 449           else
 450             e->flags &= ~EDGE_LOOP_EXIT;
 451         }
 452     }
 453 }
 454
 455 /* Return exit edge if loop has only one exit that is likely
 456    to be executed on runtime (i.e. it is not EH or leading
 457    to noreturn call.  */
 458
 459 edge
 460 single_likely_exit (struct loop *loop)
 461 {
 462   edge found = single_exit (loop);
 463   vec<edge> exits;
 464   unsigned i;
 465   edge ex;
 466
 467   if (found)
 468     return found;
 469   exits = get_loop_exit_edges (loop);
 470   FOR_EACH_VEC_ELT (exits, i, ex)
 471     {
 472       if (ex->flags & (EDGE_EH | EDGE_ABNORMAL_CALL))
 473         continue;
 474       /* The constant of 5 is set in a way so noreturn calls are
 475          ruled out by this test.  The static branch prediction algorithm
 476          will not assign such a low probability to conditionals for usual
 477          reasons.
 478          FIXME: Turn to likely_never_executed  */
 479       if ((profile_status_for_fn (cfun) != PROFILE_ABSENT
 480            && ex->probability < 5)
 481           || ex->count == profile_count::zero ())
 482         continue;
 483       if (!found)
 484         found = ex;
 485       else
 486         {
 487           exits.release ();
 488           return NULL;
 489         }
 490     }
 491   exits.release ();
 492   return found;
 493 }
 494
 495
 496 /* Gets basic blocks of a LOOP.  Header is the 0-th block, rest is in dfs
 497    order against direction of edges from latch.  Specially, if
 498    header != latch, latch is the 1-st block.  */
 499
 500 vec<basic_block>
 501 get_loop_hot_path (const struct loop *loop)
 502 {
 503   basic_block bb = loop->header;
 504   vec<basic_block> path = vNULL;
 505   bitmap visited = BITMAP_ALLOC (NULL);
 506
 507   while (true)
 508     {
 509       edge_iterator ei;
 510       edge e;
 511       edge best = NULL;
 512
 513       path.safe_push (bb);
 514       bitmap_set_bit (visited, bb->index);
 515       FOR_EACH_EDGE (e, ei, bb->succs)
 516         if ((!best || e->probability > best->probability)
 517             && !loop_exit_edge_p (loop, e)
 518             && !bitmap_bit_p (visited, e->dest->index))
 519           best = e;
 520       if (!best || best->dest == loop->header)
 521         break;
 522       bb = best->dest;
 523     }
 524   BITMAP_FREE (visited);
 525   return path;
 526 }