gcc/tree-vect-loop-manip.c

   1 /* Vectorizer Specific Loop Manipulations
   2    Copyright (C) 2003-2014 Free Software Foundation, Inc.
   3    Contributed by Dorit Naishlos <dorit@il.ibm.com>
   4    and Ira Rosen <irar@il.ibm.com>
   5
   6 This file is part of GCC.
   7
   8 GCC is free software; you can redistribute it and/or modify it under
   9 the terms of the GNU General Public License as published by the Free
  10 Software Foundation; either version 3, or (at your option) any later
  11 version.
  12
  13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  15 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  16 for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with GCC; see the file COPYING3.  If not see
  20 <http://www.gnu.org/licenses/>.  */
  21
  22 #include "config.h"
  23 #include "system.h"
  24 #include "coretypes.h"
  25 #include "dumpfile.h"
  26 #include "tm.h"
  27 #include "tree.h"
  28 #include "predict.h"
  29 #include "vec.h"
  30 #include "hashtab.h"
  31 #include "hash-set.h"
  32 #include "machmode.h"
  33 #include "hard-reg-set.h"
  34 #include "input.h"
  35 #include "function.h"
  36 #include "dominance.h"
  37 #include "cfg.h"
  38 #include "cfganal.h"
  39 #include "basic-block.h"
  40 #include "gimple-pretty-print.h"
  41 #include "tree-ssa-alias.h"
  42 #include "internal-fn.h"
  43 #include "gimple-expr.h"
  44 #include "is-a.h"
  45 #include "gimple.h"
  46 #include "gimplify.h"
  47 #include "gimple-iterator.h"
  48 #include "gimplify-me.h"
  49 #include "gimple-ssa.h"
  50 #include "tree-cfg.h"
  51 #include "tree-phinodes.h"
  52 #include "ssa-iterators.h"
  53 #include "stringpool.h"
  54 #include "tree-ssanames.h"
  55 #include "tree-ssa-loop-manip.h"
  56 #include "tree-into-ssa.h"
  57 #include "tree-ssa.h"
  58 #include "tree-pass.h"
  59 #include "cfgloop.h"
  60 #include "diagnostic-core.h"
  61 #include "tree-scalar-evolution.h"
  62 #include "tree-vectorizer.h"
  63 #include "langhooks.h"
  64
  65 /*************************************************************************
  66   Simple Loop Peeling Utilities
  67
  68   Utilities to support loop peeling for vectorization purposes.
  69  *************************************************************************/
  70
  71
  72 /* Renames the use *OP_P.  */
  73
  74 static void
  75 rename_use_op (use_operand_p op_p)
  76 {
  77   tree new_name;
  78
  79   if (TREE_CODE (USE_FROM_PTR (op_p)) != SSA_NAME)
  80     return;
  81
  82   new_name = get_current_def (USE_FROM_PTR (op_p));
  83
  84   /* Something defined outside of the loop.  */
  85   if (!new_name)
  86     return;
  87
  88   /* An ordinary ssa name defined in the loop.  */
  89
  90   SET_USE (op_p, new_name);
  91 }
  92
  93
  94 /* Renames the variables in basic block BB.  */
  95
  96 static void
  97 rename_variables_in_bb (basic_block bb)
  98 {
  99   gimple_stmt_iterator gsi;
 100   gimple stmt;
 101   use_operand_p use_p;
 102   ssa_op_iter iter;
 103   edge e;
 104   edge_iterator ei;
 105   struct loop *loop = bb->loop_father;
 106
 107   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
 108     {
 109       stmt = gsi_stmt (gsi);
 110       FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_ALL_USES)
 111         rename_use_op (use_p);
 112     }
 113
 114   FOR_EACH_EDGE (e, ei, bb->preds)
 115     {
 116       if (!flow_bb_inside_loop_p (loop, e->src))
 117         continue;
 118       for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
 119         rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (gsi_stmt (gsi), e));
 120     }
 121 }
 122
 123
 124 typedef struct
 125 {
 126   tree from, to;
 127   basic_block bb;
 128 } adjust_info;
 129
 130 /* A stack of values to be adjusted in debug stmts.  We have to
 131    process them LIFO, so that the closest substitution applies.  If we
 132    processed them FIFO, without the stack, we might substitute uses
 133    with a PHI DEF that would soon become non-dominant, and when we got
 134    to the suitable one, it wouldn't have anything to substitute any
 135    more.  */
 136 static vec<adjust_info, va_heap> adjust_vec;
 137
 138 /* Adjust any debug stmts that referenced AI->from values to use the
 139    loop-closed AI->to, if the references are dominated by AI->bb and
 140    not by the definition of AI->from.  */
 141
 142 static void
 143 adjust_debug_stmts_now (adjust_info *ai)
 144 {
 145   basic_block bbphi = ai->bb;
 146   tree orig_def = ai->from;
 147   tree new_def = ai->to;
 148   imm_use_iterator imm_iter;
 149   gimple stmt;
 150   basic_block bbdef = gimple_bb (SSA_NAME_DEF_STMT (orig_def));
 151
 152   gcc_assert (dom_info_available_p (CDI_DOMINATORS));
 153
 154   /* Adjust any debug stmts that held onto non-loop-closed
 155      references.  */
 156   FOR_EACH_IMM_USE_STMT (stmt, imm_iter, orig_def)
 157     {
 158       use_operand_p use_p;
 159       basic_block bbuse;
 160
 161       if (!is_gimple_debug (stmt))
 162         continue;
 163
 164       gcc_assert (gimple_debug_bind_p (stmt));
 165
 166       bbuse = gimple_bb (stmt);
 167
 168       if ((bbuse == bbphi
 169            || dominated_by_p (CDI_DOMINATORS, bbuse, bbphi))
 170           && !(bbuse == bbdef
 171                || dominated_by_p (CDI_DOMINATORS, bbuse, bbdef)))
 172         {
 173           if (new_def)
 174             FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
 175               SET_USE (use_p, new_def);
 176           else
 177             {
 178               gimple_debug_bind_reset_value (stmt);
 179               update_stmt (stmt);
 180             }
 181         }
 182     }
 183 }
 184
 185 /* Adjust debug stmts as scheduled before.  */
 186
 187 static void
 188 adjust_vec_debug_stmts (void)
 189 {
 190   if (!MAY_HAVE_DEBUG_STMTS)
 191     return;
 192
 193   gcc_assert (adjust_vec.exists ());
 194
 195   while (!adjust_vec.is_empty ())
 196     {
 197       adjust_debug_stmts_now (&adjust_vec.last ());
 198       adjust_vec.pop ();
 199     }
 200
 201   adjust_vec.release ();
 202 }
 203
 204 /* Adjust any debug stmts that referenced FROM values to use the
 205    loop-closed TO, if the references are dominated by BB and not by
 206    the definition of FROM.  If adjust_vec is non-NULL, adjustments
 207    will be postponed until adjust_vec_debug_stmts is called.  */
 208
 209 static void
 210 adjust_debug_stmts (tree from, tree to, basic_block bb)
 211 {
 212   adjust_info ai;
 213
 214   if (MAY_HAVE_DEBUG_STMTS
 215       && TREE_CODE (from) == SSA_NAME
 216       && ! SSA_NAME_IS_DEFAULT_DEF (from)
 217       && ! virtual_operand_p (from))
 218     {
 219       ai.from = from;
 220       ai.to = to;
 221       ai.bb = bb;
 222
 223       if (adjust_vec.exists ())
 224         adjust_vec.safe_push (ai);
 225       else
 226         adjust_debug_stmts_now (&ai);
 227     }
 228 }
 229
 230 /* Change E's phi arg in UPDATE_PHI to NEW_DEF, and record information
 231    to adjust any debug stmts that referenced the old phi arg,
 232    presumably non-loop-closed references left over from other
 233    transformations.  */
 234
 235 static void
 236 adjust_phi_and_debug_stmts (gimple update_phi, edge e, tree new_def)
 237 {
 238   tree orig_def = PHI_ARG_DEF_FROM_EDGE (update_phi, e);
 239
 240   SET_PHI_ARG_DEF (update_phi, e->dest_idx, new_def);
 241
 242   if (MAY_HAVE_DEBUG_STMTS)
 243     adjust_debug_stmts (orig_def, PHI_RESULT (update_phi),
 244                         gimple_bb (update_phi));
 245 }
 246
 247
 248 /* Update PHI nodes for a guard of the LOOP.
 249
 250    Input:
 251    - LOOP, GUARD_EDGE: LOOP is a loop for which we added guard code that
 252         controls whether LOOP is to be executed.  GUARD_EDGE is the edge that
 253         originates from the guard-bb, skips LOOP and reaches the (unique) exit
 254         bb of LOOP.  This loop-exit-bb is an empty bb with one successor.
 255         We denote this bb NEW_MERGE_BB because before the guard code was added
 256         it had a single predecessor (the LOOP header), and now it became a merge
 257         point of two paths - the path that ends with the LOOP exit-edge, and
 258         the path that ends with GUARD_EDGE.
 259    - NEW_EXIT_BB: New basic block that is added by this function between LOOP
 260         and NEW_MERGE_BB. It is used to place loop-closed-ssa-form exit-phis.
 261
 262    ===> The CFG before the guard-code was added:
 263         LOOP_header_bb:
 264           loop_body
 265           if (exit_loop) goto update_bb
 266           else           goto LOOP_header_bb
 267         update_bb:
 268
 269    ==> The CFG after the guard-code was added:
 270         guard_bb:
 271           if (LOOP_guard_condition) goto new_merge_bb
 272           else                      goto LOOP_header_bb
 273         LOOP_header_bb:
 274           loop_body
 275           if (exit_loop_condition) goto new_merge_bb
 276           else                     goto LOOP_header_bb
 277         new_merge_bb:
 278           goto update_bb
 279         update_bb:
 280
 281    ==> The CFG after this function:
 282         guard_bb:
 283           if (LOOP_guard_condition) goto new_merge_bb
 284           else                      goto LOOP_header_bb
 285         LOOP_header_bb:
 286           loop_body
 287           if (exit_loop_condition) goto new_exit_bb
 288           else                     goto LOOP_header_bb
 289         new_exit_bb:
 290         new_merge_bb:
 291           goto update_bb
 292         update_bb:
 293
 294    This function:
 295    1. creates and updates the relevant phi nodes to account for the new
 296       incoming edge (GUARD_EDGE) into NEW_MERGE_BB. This involves:
 297       1.1. Create phi nodes at NEW_MERGE_BB.
 298       1.2. Update the phi nodes at the successor of NEW_MERGE_BB (denoted
 299            UPDATE_BB).  UPDATE_BB was the exit-bb of LOOP before NEW_MERGE_BB
 300    2. preserves loop-closed-ssa-form by creating the required phi nodes
 301       at the exit of LOOP (i.e, in NEW_EXIT_BB).
 302
 303    There are two flavors to this function:
 304
 305    slpeel_update_phi_nodes_for_guard1:
 306      Here the guard controls whether we enter or skip LOOP, where LOOP is a
 307      prolog_loop (loop1 below), and the new phis created in NEW_MERGE_BB are
 308      for variables that have phis in the loop header.
 309
 310    slpeel_update_phi_nodes_for_guard2:
 311      Here the guard controls whether we enter or skip LOOP, where LOOP is an
 312      epilog_loop (loop2 below), and the new phis created in NEW_MERGE_BB are
 313      for variables that have phis in the loop exit.
 314
 315    I.E., the overall structure is:
 316
 317         loop1_preheader_bb:
 318                 guard1 (goto loop1/merge1_bb)
 319         loop1
 320         loop1_exit_bb:
 321                 guard2 (goto merge1_bb/merge2_bb)
 322         merge1_bb
 323         loop2
 324         loop2_exit_bb
 325         merge2_bb
 326         next_bb
 327
 328    slpeel_update_phi_nodes_for_guard1 takes care of creating phis in
 329    loop1_exit_bb and merge1_bb. These are entry phis (phis for the vars
 330    that have phis in loop1->header).
 331
 332    slpeel_update_phi_nodes_for_guard2 takes care of creating phis in
 333    loop2_exit_bb and merge2_bb. These are exit phis (phis for the vars
 334    that have phis in next_bb). It also adds some of these phis to
 335    loop1_exit_bb.
 336
 337    slpeel_update_phi_nodes_for_guard1 is always called before
 338    slpeel_update_phi_nodes_for_guard2. They are both needed in order
 339    to create correct data-flow and loop-closed-ssa-form.
 340
 341    Generally slpeel_update_phi_nodes_for_guard1 creates phis for variables
 342    that change between iterations of a loop (and therefore have a phi-node
 343    at the loop entry), whereas slpeel_update_phi_nodes_for_guard2 creates
 344    phis for variables that are used out of the loop (and therefore have
 345    loop-closed exit phis). Some variables may be both updated between
 346    iterations and used after the loop. This is why in loop1_exit_bb we
 347    may need both entry_phis (created by slpeel_update_phi_nodes_for_guard1)
 348    and exit phis (created by slpeel_update_phi_nodes_for_guard2).
 349
 350    - IS_NEW_LOOP: if IS_NEW_LOOP is true, then LOOP is a newly created copy of
 351      an original loop. i.e., we have:
 352
 353            orig_loop
 354            guard_bb (goto LOOP/new_merge)
 355            new_loop <-- LOOP
 356            new_exit
 357            new_merge
 358            next_bb
 359
 360      If IS_NEW_LOOP is false, then LOOP is an original loop, in which case we
 361      have:
 362
 363            new_loop
 364            guard_bb (goto LOOP/new_merge)
 365            orig_loop <-- LOOP
 366            new_exit
 367            new_merge
 368            next_bb
 369
 370      The SSA names defined in the original loop have a current
 371      reaching definition that that records the corresponding new
 372      ssa-name used in the new duplicated loop copy.
 373   */
 374
 375 /* Function slpeel_update_phi_nodes_for_guard1
 376
 377    Input:
 378    - GUARD_EDGE, LOOP, IS_NEW_LOOP, NEW_EXIT_BB - as explained above.
 379    - DEFS - a bitmap of ssa names to mark new names for which we recorded
 380             information.
 381
 382    In the context of the overall structure, we have:
 383
 384         loop1_preheader_bb:
 385                 guard1 (goto loop1/merge1_bb)
 386 LOOP->  loop1
 387         loop1_exit_bb:
 388                 guard2 (goto merge1_bb/merge2_bb)
 389         merge1_bb
 390         loop2
 391         loop2_exit_bb
 392         merge2_bb
 393         next_bb
 394
 395    For each name updated between loop iterations (i.e - for each name that has
 396    an entry (loop-header) phi in LOOP) we create a new phi in:
 397    1. merge1_bb (to account for the edge from guard1)
 398    2. loop1_exit_bb (an exit-phi to keep LOOP in loop-closed form)
 399 */
 400
 401 static void
 402 slpeel_update_phi_nodes_for_guard1 (edge guard_edge, struct loop *loop,
 403                                     bool is_new_loop, basic_block *new_exit_bb)
 404 {
 405   gimple orig_phi, new_phi;
 406   gimple update_phi, update_phi2;
 407   tree guard_arg, loop_arg;
 408   basic_block new_merge_bb = guard_edge->dest;
 409   edge e = EDGE_SUCC (new_merge_bb, 0);
 410   basic_block update_bb = e->dest;
 411   basic_block orig_bb = loop->header;
 412   edge new_exit_e;
 413   tree current_new_name;
 414   gimple_stmt_iterator gsi_orig, gsi_update;
 415
 416   /* Create new bb between loop and new_merge_bb.  */
 417   *new_exit_bb = split_edge (single_exit (loop));
 418
 419   new_exit_e = EDGE_SUCC (*new_exit_bb, 0);
 420
 421   for (gsi_orig = gsi_start_phis (orig_bb),
 422        gsi_update = gsi_start_phis (update_bb);
 423        !gsi_end_p (gsi_orig) && !gsi_end_p (gsi_update);
 424        gsi_next (&gsi_orig), gsi_next (&gsi_update))
 425     {
 426       source_location loop_locus, guard_locus;
 427       tree new_res;
 428       orig_phi = gsi_stmt (gsi_orig);
 429       update_phi = gsi_stmt (gsi_update);
 430
 431       /** 1. Handle new-merge-point phis  **/
 432
 433       /* 1.1. Generate new phi node in NEW_MERGE_BB:  */
 434       new_res = copy_ssa_name (PHI_RESULT (orig_phi), NULL);
 435       new_phi = create_phi_node (new_res, new_merge_bb);
 436
 437       /* 1.2. NEW_MERGE_BB has two incoming edges: GUARD_EDGE and the exit-edge
 438             of LOOP. Set the two phi args in NEW_PHI for these edges:  */
 439       loop_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, EDGE_SUCC (loop->latch, 0));
 440       loop_locus = gimple_phi_arg_location_from_edge (orig_phi,
 441                                                       EDGE_SUCC (loop->latch,
 442                                                                  0));
 443       guard_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, loop_preheader_edge (loop));
 444       guard_locus
 445         = gimple_phi_arg_location_from_edge (orig_phi,
 446                                              loop_preheader_edge (loop));
 447
 448       add_phi_arg (new_phi, loop_arg, new_exit_e, loop_locus);
 449       add_phi_arg (new_phi, guard_arg, guard_edge, guard_locus);
 450
 451       /* 1.3. Update phi in successor block.  */
 452       gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi, e) == loop_arg
 453                   || PHI_ARG_DEF_FROM_EDGE (update_phi, e) == guard_arg);
 454       adjust_phi_and_debug_stmts (update_phi, e, PHI_RESULT (new_phi));
 455       update_phi2 = new_phi;
 456
 457
 458       /** 2. Handle loop-closed-ssa-form phis  **/
 459
 460       if (virtual_operand_p (PHI_RESULT (orig_phi)))
 461         continue;
 462
 463       /* 2.1. Generate new phi node in NEW_EXIT_BB:  */
 464       new_res = copy_ssa_name (PHI_RESULT (orig_phi), NULL);
 465       new_phi = create_phi_node (new_res, *new_exit_bb);
 466
 467       /* 2.2. NEW_EXIT_BB has one incoming edge: the exit-edge of the loop.  */
 468       add_phi_arg (new_phi, loop_arg, single_exit (loop), loop_locus);
 469
 470       /* 2.3. Update phi in successor of NEW_EXIT_BB:  */
 471       gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, new_exit_e) == loop_arg);
 472       adjust_phi_and_debug_stmts (update_phi2, new_exit_e,
 473                                   PHI_RESULT (new_phi));
 474
 475       /* 2.4. Record the newly created name with set_current_def.
 476          We want to find a name such that
 477                 name = get_current_def (orig_loop_name)
 478          and to set its current definition as follows:
 479                 set_current_def (name, new_phi_name)
 480
 481          If LOOP is a new loop then loop_arg is already the name we're
 482          looking for. If LOOP is the original loop, then loop_arg is
 483          the orig_loop_name and the relevant name is recorded in its
 484          current reaching definition.  */
 485       if (is_new_loop)
 486         current_new_name = loop_arg;
 487       else
 488         {
 489           current_new_name = get_current_def (loop_arg);
 490           /* current_def is not available only if the variable does not
 491              change inside the loop, in which case we also don't care
 492              about recording a current_def for it because we won't be
 493              trying to create loop-exit-phis for it.  */
 494           if (!current_new_name)
 495             continue;
 496         }
 497       tree new_name = get_current_def (current_new_name);
 498       /* Because of peeled_chrec optimization it is possible that we have
 499          set this earlier.  Verify the PHI has the same value.  */
 500       if (new_name)
 501         {
 502           gimple phi = SSA_NAME_DEF_STMT (new_name);
 503           gcc_assert (gimple_code (phi) == GIMPLE_PHI
 504                       && gimple_bb (phi) == *new_exit_bb
 505                       && (PHI_ARG_DEF_FROM_EDGE (phi, single_exit (loop))
 506                           == loop_arg));
 507           continue;
 508         }
 509
 510       set_current_def (current_new_name, PHI_RESULT (new_phi));
 511     }
 512 }
 513
 514
 515 /* Function slpeel_update_phi_nodes_for_guard2
 516
 517    Input:
 518    - GUARD_EDGE, LOOP, IS_NEW_LOOP, NEW_EXIT_BB - as explained above.
 519
 520    In the context of the overall structure, we have:
 521
 522         loop1_preheader_bb:
 523                 guard1 (goto loop1/merge1_bb)
 524         loop1
 525         loop1_exit_bb:
 526                 guard2 (goto merge1_bb/merge2_bb)
 527         merge1_bb
 528 LOOP->  loop2
 529         loop2_exit_bb
 530         merge2_bb
 531         next_bb
 532
 533    For each name used out side the loop (i.e - for each name that has an exit
 534    phi in next_bb) we create a new phi in:
 535    1. merge2_bb (to account for the edge from guard_bb)
 536    2. loop2_exit_bb (an exit-phi to keep LOOP in loop-closed form)
 537    3. guard2 bb (an exit phi to keep the preceding loop in loop-closed form),
 538       if needed (if it wasn't handled by slpeel_update_phis_nodes_for_phi1).
 539 */
 540
 541 static void
 542 slpeel_update_phi_nodes_for_guard2 (edge guard_edge, struct loop *loop,
 543                                     bool is_new_loop, basic_block *new_exit_bb)
 544 {
 545   gimple orig_phi, new_phi;
 546   gimple update_phi, update_phi2;
 547   tree guard_arg, loop_arg;
 548   basic_block new_merge_bb = guard_edge->dest;
 549   edge e = EDGE_SUCC (new_merge_bb, 0);
 550   basic_block update_bb = e->dest;
 551   edge new_exit_e;
 552   tree orig_def, orig_def_new_name;
 553   tree new_name, new_name2;
 554   tree arg;
 555   gimple_stmt_iterator gsi;
 556
 557   /* Create new bb between loop and new_merge_bb.  */
 558   *new_exit_bb = split_edge (single_exit (loop));
 559
 560   new_exit_e = EDGE_SUCC (*new_exit_bb, 0);
 561
 562   for (gsi = gsi_start_phis (update_bb); !gsi_end_p (gsi); gsi_next (&gsi))
 563     {
 564       tree new_res;
 565       update_phi = gsi_stmt (gsi);
 566       orig_phi = update_phi;
 567       orig_def = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
 568       /* This loop-closed-phi actually doesn't represent a use
 569          out of the loop - the phi arg is a constant.  */
 570       if (TREE_CODE (orig_def) != SSA_NAME)
 571         continue;
 572       orig_def_new_name = get_current_def (orig_def);
 573       arg = NULL_TREE;
 574
 575       /** 1. Handle new-merge-point phis  **/
 576
 577       /* 1.1. Generate new phi node in NEW_MERGE_BB:  */
 578       new_res = copy_ssa_name (PHI_RESULT (orig_phi), NULL);
 579       new_phi = create_phi_node (new_res, new_merge_bb);
 580
 581       /* 1.2. NEW_MERGE_BB has two incoming edges: GUARD_EDGE and the exit-edge
 582             of LOOP. Set the two PHI args in NEW_PHI for these edges:  */
 583       new_name = orig_def;
 584       new_name2 = NULL_TREE;
 585       if (orig_def_new_name)
 586         {
 587           new_name = orig_def_new_name;
 588           /* Some variables have both loop-entry-phis and loop-exit-phis.
 589              Such variables were given yet newer names by phis placed in
 590              guard_bb by slpeel_update_phi_nodes_for_guard1. I.e:
 591              new_name2 = get_current_def (get_current_def (orig_name)).  */
 592           new_name2 = get_current_def (new_name);
 593         }
 594
 595       if (is_new_loop)
 596         {
 597           guard_arg = orig_def;
 598           loop_arg = new_name;
 599         }
 600       else
 601         {
 602           guard_arg = new_name;
 603           loop_arg = orig_def;
 604         }
 605       if (new_name2)
 606         guard_arg = new_name2;
 607
 608       add_phi_arg (new_phi, loop_arg, new_exit_e, UNKNOWN_LOCATION);
 609       add_phi_arg (new_phi, guard_arg, guard_edge, UNKNOWN_LOCATION);
 610
 611       /* 1.3. Update phi in successor block.  */
 612       gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi, e) == orig_def);
 613       adjust_phi_and_debug_stmts (update_phi, e, PHI_RESULT (new_phi));
 614       update_phi2 = new_phi;
 615
 616
 617       /** 2. Handle loop-closed-ssa-form phis  **/
 618
 619       /* 2.1. Generate new phi node in NEW_EXIT_BB:  */
 620       new_res = copy_ssa_name (PHI_RESULT (orig_phi), NULL);
 621       new_phi = create_phi_node (new_res, *new_exit_bb);
 622
 623       /* 2.2. NEW_EXIT_BB has one incoming edge: the exit-edge of the loop.  */
 624       add_phi_arg (new_phi, loop_arg, single_exit (loop), UNKNOWN_LOCATION);
 625
 626       /* 2.3. Update phi in successor of NEW_EXIT_BB:  */
 627       gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, new_exit_e) == loop_arg);
 628       adjust_phi_and_debug_stmts (update_phi2, new_exit_e,
 629                                   PHI_RESULT (new_phi));
 630
 631
 632       /** 3. Handle loop-closed-ssa-form phis for first loop  **/
 633
 634       /* 3.1. Find the relevant names that need an exit-phi in
 635          GUARD_BB, i.e. names for which
 636          slpeel_update_phi_nodes_for_guard1 had not already created a
 637          phi node. This is the case for names that are used outside
 638          the loop (and therefore need an exit phi) but are not updated
 639          across loop iterations (and therefore don't have a
 640          loop-header-phi).
 641
 642          slpeel_update_phi_nodes_for_guard1 is responsible for
 643          creating loop-exit phis in GUARD_BB for names that have a
 644          loop-header-phi.  When such a phi is created we also record
 645          the new name in its current definition.  If this new name
 646          exists, then guard_arg was set to this new name (see 1.2
 647          above).  Therefore, if guard_arg is not this new name, this
 648          is an indication that an exit-phi in GUARD_BB was not yet
 649          created, so we take care of it here.  */
 650       if (guard_arg == new_name2)
 651         continue;
 652       arg = guard_arg;
 653
 654       /* 3.2. Generate new phi node in GUARD_BB:  */
 655       new_res = copy_ssa_name (PHI_RESULT (orig_phi), NULL);
 656       new_phi = create_phi_node (new_res, guard_edge->src);
 657
 658       /* 3.3. GUARD_BB has one incoming edge:  */
 659       gcc_assert (EDGE_COUNT (guard_edge->src->preds) == 1);
 660       add_phi_arg (new_phi, arg, EDGE_PRED (guard_edge->src, 0),
 661                    UNKNOWN_LOCATION);
 662
 663       /* 3.4. Update phi in successor of GUARD_BB:  */
 664       gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, guard_edge)
 665                                                                 == guard_arg);
 666       adjust_phi_and_debug_stmts (update_phi2, guard_edge,
 667                                   PHI_RESULT (new_phi));
 668     }
 669 }
 670
 671
 672 /* Make the LOOP iterate NITERS times. This is done by adding a new IV
 673    that starts at zero, increases by one and its limit is NITERS.
 674
 675    Assumption: the exit-condition of LOOP is the last stmt in the loop.  */
 676
 677 void
 678 slpeel_make_loop_iterate_ntimes (struct loop *loop, tree niters)
 679 {
 680   tree indx_before_incr, indx_after_incr;
 681   gimple cond_stmt;
 682   gimple orig_cond;
 683   edge exit_edge = single_exit (loop);
 684   gimple_stmt_iterator loop_cond_gsi;
 685   gimple_stmt_iterator incr_gsi;
 686   bool insert_after;
 687   tree init = build_int_cst (TREE_TYPE (niters), 0);
 688   tree step = build_int_cst (TREE_TYPE (niters), 1);
 689   source_location loop_loc;
 690   enum tree_code code;
 691
 692   orig_cond = get_loop_exit_condition (loop);
 693   gcc_assert (orig_cond);
 694   loop_cond_gsi = gsi_for_stmt (orig_cond);
 695
 696   standard_iv_increment_position (loop, &incr_gsi, &insert_after);
 697   create_iv (init, step, NULL_TREE, loop,
 698              &incr_gsi, insert_after, &indx_before_incr, &indx_after_incr);
 699
 700   indx_after_incr = force_gimple_operand_gsi (&loop_cond_gsi, indx_after_incr,
 701                                               true, NULL_TREE, true,
 702                                               GSI_SAME_STMT);
 703   niters = force_gimple_operand_gsi (&loop_cond_gsi, niters, true, NULL_TREE,
 704                                      true, GSI_SAME_STMT);
 705
 706   code = (exit_edge->flags & EDGE_TRUE_VALUE) ? GE_EXPR : LT_EXPR;
 707   cond_stmt = gimple_build_cond (code, indx_after_incr, niters, NULL_TREE,
 708                                  NULL_TREE);
 709
 710   gsi_insert_before (&loop_cond_gsi, cond_stmt, GSI_SAME_STMT);
 711
 712   /* Remove old loop exit test:  */
 713   gsi_remove (&loop_cond_gsi, true);
 714   free_stmt_vec_info (orig_cond);
 715
 716   loop_loc = find_loop_location (loop);
 717   if (dump_enabled_p ())
 718     {
 719       if (LOCATION_LOCUS (loop_loc) != UNKNOWN_LOCATION)
 720         dump_printf (MSG_NOTE, "\nloop at %s:%d: ", LOCATION_FILE (loop_loc),
 721                      LOCATION_LINE (loop_loc));
 722       dump_gimple_stmt (MSG_NOTE, TDF_SLIM, cond_stmt, 0);
 723       dump_printf (MSG_NOTE, "\n");
 724     }
 725   loop->nb_iterations = niters;
 726 }
 727
 728 /* Helper routine of slpeel_tree_duplicate_loop_to_edge_cfg.
 729    For all PHI arguments in FROM->dest and TO->dest from those
 730    edges ensure that TO->dest PHI arguments have current_def
 731    to that in from.  */
 732
 733 static void
 734 slpeel_duplicate_current_defs_from_edges (edge from, edge to)
 735 {
 736   gimple_stmt_iterator gsi_from, gsi_to;
 737
 738   for (gsi_from = gsi_start_phis (from->dest),
 739        gsi_to = gsi_start_phis (to->dest);
 740        !gsi_end_p (gsi_from) && !gsi_end_p (gsi_to);
 741        gsi_next (&gsi_from), gsi_next (&gsi_to))
 742     {
 743       gimple from_phi = gsi_stmt (gsi_from);
 744       gimple to_phi = gsi_stmt (gsi_to);
 745       tree from_arg = PHI_ARG_DEF_FROM_EDGE (from_phi, from);
 746       tree to_arg = PHI_ARG_DEF_FROM_EDGE (to_phi, to);
 747       if (TREE_CODE (from_arg) == SSA_NAME
 748           && TREE_CODE (to_arg) == SSA_NAME
 749           && get_current_def (to_arg) == NULL_TREE)
 750         set_current_def (to_arg, get_current_def (from_arg));
 751     }
 752 }
 753
 754
 755 /* Given LOOP this function generates a new copy of it and puts it
 756    on E which is either the entry or exit of LOOP.  If SCALAR_LOOP is
 757    non-NULL, assume LOOP and SCALAR_LOOP are equivalent and copy the
 758    basic blocks from SCALAR_LOOP instead of LOOP, but to either the
 759    entry or exit of LOOP.  */
 760
 761 struct loop *
 762 slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *loop,
 763                                         struct loop *scalar_loop, edge e)
 764 {
 765   struct loop *new_loop;
 766   basic_block *new_bbs, *bbs;
 767   bool at_exit;
 768   bool was_imm_dom;
 769   basic_block exit_dest;
 770   edge exit, new_exit;
 771
 772   exit = single_exit (loop);
 773   at_exit = (e == exit);
 774   if (!at_exit && e != loop_preheader_edge (loop))
 775     return NULL;
 776
 777   if (scalar_loop == NULL)
 778     scalar_loop = loop;
 779
 780   bbs = XNEWVEC (basic_block, scalar_loop->num_nodes + 1);
 781   get_loop_body_with_size (scalar_loop, bbs, scalar_loop->num_nodes);
 782
 783   /* Check whether duplication is possible.  */
 784   if (!can_copy_bbs_p (bbs, scalar_loop->num_nodes))
 785     {
 786       free (bbs);
 787       return NULL;
 788     }
 789
 790   /* Generate new loop structure.  */
 791   new_loop = duplicate_loop (scalar_loop, loop_outer (scalar_loop));
 792   duplicate_subloops (scalar_loop, new_loop);
 793
 794   exit_dest = exit->dest;
 795   was_imm_dom = (get_immediate_dominator (CDI_DOMINATORS,
 796                                           exit_dest) == loop->header ?
 797                  true : false);
 798
 799   /* Also copy the pre-header, this avoids jumping through hoops to
 800      duplicate the loop entry PHI arguments.  Create an empty
 801      pre-header unconditionally for this.  */
 802   basic_block preheader = split_edge (loop_preheader_edge (scalar_loop));
 803   edge entry_e = single_pred_edge (preheader);
 804   bbs[scalar_loop->num_nodes] = preheader;
 805   new_bbs = XNEWVEC (basic_block, scalar_loop->num_nodes + 1);
 806
 807   exit = single_exit (scalar_loop);
 808   copy_bbs (bbs, scalar_loop->num_nodes + 1, new_bbs,
 809             &exit, 1, &new_exit, NULL,
 810             e->src, true);
 811   exit = single_exit (loop);
 812   basic_block new_preheader = new_bbs[scalar_loop->num_nodes];
 813
 814   add_phi_args_after_copy (new_bbs, scalar_loop->num_nodes + 1, NULL);
 815
 816   if (scalar_loop != loop)
 817     {
 818       /* If we copied from SCALAR_LOOP rather than LOOP, SSA_NAMEs from
 819          SCALAR_LOOP will have current_def set to SSA_NAMEs in the new_loop,
 820          but LOOP will not.  slpeel_update_phi_nodes_for_guard{1,2} expects
 821          the LOOP SSA_NAMEs (on the exit edge and edge from latch to
 822          header) to have current_def set, so copy them over.  */
 823       slpeel_duplicate_current_defs_from_edges (single_exit (scalar_loop),
 824                                                 exit);
 825       slpeel_duplicate_current_defs_from_edges (EDGE_SUCC (scalar_loop->latch,
 826                                                            0),
 827                                                 EDGE_SUCC (loop->latch, 0));
 828     }
 829
 830   if (at_exit) /* Add the loop copy at exit.  */
 831     {
 832       if (scalar_loop != loop)
 833         {
 834           gimple_stmt_iterator gsi;
 835           new_exit = redirect_edge_and_branch (new_exit, exit_dest);
 836
 837           for (gsi = gsi_start_phis (exit_dest); !gsi_end_p (gsi);
 838                gsi_next (&gsi))
 839             {
 840               gimple phi = gsi_stmt (gsi);
 841               tree orig_arg = PHI_ARG_DEF_FROM_EDGE (phi, e);
 842               location_t orig_locus
 843                 = gimple_phi_arg_location_from_edge (phi, e);
 844
 845               add_phi_arg (phi, orig_arg, new_exit, orig_locus);
 846             }
 847         }
 848       redirect_edge_and_branch_force (e, new_preheader);
 849       flush_pending_stmts (e);
 850       set_immediate_dominator (CDI_DOMINATORS, new_preheader, e->src);
 851       if (was_imm_dom)
 852         set_immediate_dominator (CDI_DOMINATORS, exit_dest, new_exit->src);
 853
 854       /* And remove the non-necessary forwarder again.  Keep the other
 855          one so we have a proper pre-header for the loop at the exit edge.  */
 856       redirect_edge_pred (single_succ_edge (preheader),
 857                           single_pred (preheader));
 858       delete_basic_block (preheader);
 859       set_immediate_dominator (CDI_DOMINATORS, scalar_loop->header,
 860                                loop_preheader_edge (scalar_loop)->src);
 861     }
 862   else /* Add the copy at entry.  */
 863     {
 864       if (scalar_loop != loop)
 865         {
 866           /* Remove the non-necessary forwarder of scalar_loop again.  */
 867           redirect_edge_pred (single_succ_edge (preheader),
 868                               single_pred (preheader));
 869           delete_basic_block (preheader);
 870           set_immediate_dominator (CDI_DOMINATORS, scalar_loop->header,
 871                                    loop_preheader_edge (scalar_loop)->src);
 872           preheader = split_edge (loop_preheader_edge (loop));
 873           entry_e = single_pred_edge (preheader);
 874         }
 875
 876       redirect_edge_and_branch_force (entry_e, new_preheader);
 877       flush_pending_stmts (entry_e);
 878       set_immediate_dominator (CDI_DOMINATORS, new_preheader, entry_e->src);
 879
 880       redirect_edge_and_branch_force (new_exit, preheader);
 881       flush_pending_stmts (new_exit);
 882       set_immediate_dominator (CDI_DOMINATORS, preheader, new_exit->src);
 883
 884       /* And remove the non-necessary forwarder again.  Keep the other
 885          one so we have a proper pre-header for the loop at the exit edge.  */
 886       redirect_edge_pred (single_succ_edge (new_preheader),
 887                           single_pred (new_preheader));
 888       delete_basic_block (new_preheader);
 889       set_immediate_dominator (CDI_DOMINATORS, new_loop->header,
 890                                loop_preheader_edge (new_loop)->src);
 891     }
 892
 893   for (unsigned i = 0; i < scalar_loop->num_nodes + 1; i++)
 894     rename_variables_in_bb (new_bbs[i]);
 895
 896   if (scalar_loop != loop)
 897     {
 898       /* Update new_loop->header PHIs, so that on the preheader
 899          edge they are the ones from loop rather than scalar_loop.  */
 900       gimple_stmt_iterator gsi_orig, gsi_new;
 901       edge orig_e = loop_preheader_edge (loop);
 902       edge new_e = loop_preheader_edge (new_loop);
 903
 904       for (gsi_orig = gsi_start_phis (loop->header),
 905            gsi_new = gsi_start_phis (new_loop->header);
 906            !gsi_end_p (gsi_orig) && !gsi_end_p (gsi_new);
 907            gsi_next (&gsi_orig), gsi_next (&gsi_new))
 908         {
 909           gimple orig_phi = gsi_stmt (gsi_orig);
 910           gimple new_phi = gsi_stmt (gsi_new);
 911           tree orig_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, orig_e);
 912           location_t orig_locus
 913             = gimple_phi_arg_location_from_edge (orig_phi, orig_e);
 914
 915           add_phi_arg (new_phi, orig_arg, new_e, orig_locus);
 916         }
 917     }
 918
 919   free (new_bbs);
 920   free (bbs);
 921
 922 #ifdef ENABLE_CHECKING
 923   verify_dominators (CDI_DOMINATORS);
 924 #endif
 925
 926   return new_loop;
 927 }
 928
 929
 930 /* Given the condition statement COND, put it as the last statement
 931    of GUARD_BB; EXIT_BB is the basic block to skip the loop;
 932    Assumes that this is the single exit of the guarded loop.
 933    Returns the skip edge, inserts new stmts on the COND_EXPR_STMT_LIST.  */
 934
 935 static edge
 936 slpeel_add_loop_guard (basic_block guard_bb, tree cond,
 937                        gimple_seq cond_expr_stmt_list,
 938                        basic_block exit_bb, basic_block dom_bb,
 939                        int probability)
 940 {
 941   gimple_stmt_iterator gsi;
 942   edge new_e, enter_e;
 943   gimple cond_stmt;
 944   gimple_seq gimplify_stmt_list = NULL;
 945
 946   enter_e = EDGE_SUCC (guard_bb, 0);
 947   enter_e->flags &= ~EDGE_FALLTHRU;
 948   enter_e->flags |= EDGE_FALSE_VALUE;
 949   gsi = gsi_last_bb (guard_bb);
 950
 951   cond = force_gimple_operand_1 (cond, &gimplify_stmt_list, is_gimple_condexpr,
 952                                  NULL_TREE);
 953   if (gimplify_stmt_list)
 954     gimple_seq_add_seq (&cond_expr_stmt_list, gimplify_stmt_list);
 955   cond_stmt = gimple_build_cond_from_tree (cond, NULL_TREE, NULL_TREE);
 956   if (cond_expr_stmt_list)
 957     gsi_insert_seq_after (&gsi, cond_expr_stmt_list, GSI_NEW_STMT);
 958
 959   gsi = gsi_last_bb (guard_bb);
 960   gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
 961
 962   /* Add new edge to connect guard block to the merge/loop-exit block.  */
 963   new_e = make_edge (guard_bb, exit_bb, EDGE_TRUE_VALUE);
 964
 965   new_e->count = guard_bb->count;
 966   new_e->probability = probability;
 967   new_e->count = apply_probability (enter_e->count, probability);
 968   enter_e->count -= new_e->count;
 969   enter_e->probability = inverse_probability (probability);
 970   set_immediate_dominator (CDI_DOMINATORS, exit_bb, dom_bb);
 971   return new_e;
 972 }
 973
 974
 975 /* This function verifies that the following restrictions apply to LOOP:
 976    (1) it is innermost
 977    (2) it consists of exactly 2 basic blocks - header, and an empty latch.
 978    (3) it is single entry, single exit
 979    (4) its exit condition is the last stmt in the header
 980    (5) E is the entry/exit edge of LOOP.
 981  */
 982
 983 bool
 984 slpeel_can_duplicate_loop_p (const struct loop *loop, const_edge e)
 985 {
 986   edge exit_e = single_exit (loop);
 987   edge entry_e = loop_preheader_edge (loop);
 988   gimple orig_cond = get_loop_exit_condition (loop);
 989   gimple_stmt_iterator loop_exit_gsi = gsi_last_bb (exit_e->src);
 990
 991   if (loop->inner
 992       /* All loops have an outer scope; the only case loop->outer is NULL is for
 993          the function itself.  */
 994       || !loop_outer (loop)
 995       || loop->num_nodes != 2
 996       || !empty_block_p (loop->latch)
 997       || !single_exit (loop)
 998       /* Verify that new loop exit condition can be trivially modified.  */
 999       || (!orig_cond || orig_cond != gsi_stmt (loop_exit_gsi))
1000       || (e != exit_e && e != entry_e))
1001     return false;
1002
1003   return true;
1004 }
1005
1006 #ifdef ENABLE_CHECKING
1007 static void
1008 slpeel_verify_cfg_after_peeling (struct loop *first_loop,
1009                                  struct loop *second_loop)
1010 {
1011   basic_block loop1_exit_bb = single_exit (first_loop)->dest;
1012   basic_block loop2_entry_bb = loop_preheader_edge (second_loop)->src;
1013   basic_block loop1_entry_bb = loop_preheader_edge (first_loop)->src;
1014
1015   /* A guard that controls whether the second_loop is to be executed or skipped
1016      is placed in first_loop->exit.  first_loop->exit therefore has two
1017      successors - one is the preheader of second_loop, and the other is a bb
1018      after second_loop.
1019    */
1020   gcc_assert (EDGE_COUNT (loop1_exit_bb->succs) == 2);
1021
1022   /* 1. Verify that one of the successors of first_loop->exit is the preheader
1023         of second_loop.  */
1024
1025   /* The preheader of new_loop is expected to have two predecessors:
1026      first_loop->exit and the block that precedes first_loop.  */
1027
1028   gcc_assert (EDGE_COUNT (loop2_entry_bb->preds) == 2
1029               && ((EDGE_PRED (loop2_entry_bb, 0)->src == loop1_exit_bb
1030                    && EDGE_PRED (loop2_entry_bb, 1)->src == loop1_entry_bb)
1031                || (EDGE_PRED (loop2_entry_bb, 1)->src ==  loop1_exit_bb
1032                    && EDGE_PRED (loop2_entry_bb, 0)->src == loop1_entry_bb)));
1033
1034   /* Verify that the other successor of first_loop->exit is after the
1035      second_loop.  */
1036   /* TODO */
1037 }
1038 #endif
1039
1040 /* If the run time cost model check determines that vectorization is
1041    not profitable and hence scalar loop should be generated then set
1042    FIRST_NITERS to prologue peeled iterations. This will allow all the
1043    iterations to be executed in the prologue peeled scalar loop.  */
1044
1045 static void
1046 set_prologue_iterations (basic_block bb_before_first_loop,
1047                          tree *first_niters,
1048                          struct loop *loop,
1049                          unsigned int th,
1050                          int probability)
1051 {
1052   edge e;
1053   basic_block cond_bb, then_bb;
1054   tree var, prologue_after_cost_adjust_name;
1055   gimple_stmt_iterator gsi;
1056   gimple newphi;
1057   edge e_true, e_false, e_fallthru;
1058   gimple cond_stmt;
1059   gimple_seq stmts = NULL;
1060   tree cost_pre_condition = NULL_TREE;
1061   tree scalar_loop_iters =
1062     unshare_expr (LOOP_VINFO_NITERS_UNCHANGED (loop_vec_info_for_loop (loop)));
1063
1064   e = single_pred_edge (bb_before_first_loop);
1065   cond_bb = split_edge (e);
1066
1067   e = single_pred_edge (bb_before_first_loop);
1068   then_bb = split_edge (e);
1069   set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
1070
1071   e_false = make_single_succ_edge (cond_bb, bb_before_first_loop,
1072                                    EDGE_FALSE_VALUE);
1073   set_immediate_dominator (CDI_DOMINATORS, bb_before_first_loop, cond_bb);
1074
1075   e_true = EDGE_PRED (then_bb, 0);
1076   e_true->flags &= ~EDGE_FALLTHRU;
1077   e_true->flags |= EDGE_TRUE_VALUE;
1078
1079   e_true->probability = probability;
1080   e_false->probability = inverse_probability (probability);
1081   e_true->count = apply_probability (cond_bb->count, probability);
1082   e_false->count = cond_bb->count - e_true->count;
1083   then_bb->frequency = EDGE_FREQUENCY (e_true);
1084   then_bb->count = e_true->count;
1085
1086   e_fallthru = EDGE_SUCC (then_bb, 0);
1087   e_fallthru->count = then_bb->count;
1088
1089   gsi = gsi_last_bb (cond_bb);
1090   cost_pre_condition =
1091     fold_build2 (LE_EXPR, boolean_type_node, scalar_loop_iters,
1092                  build_int_cst (TREE_TYPE (scalar_loop_iters), th));
1093   cost_pre_condition =
1094     force_gimple_operand_gsi_1 (&gsi, cost_pre_condition, is_gimple_condexpr,
1095                                 NULL_TREE, false, GSI_CONTINUE_LINKING);
1096   cond_stmt = gimple_build_cond_from_tree (cost_pre_condition,
1097                                            NULL_TREE, NULL_TREE);
1098   gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
1099
1100   var = create_tmp_var (TREE_TYPE (scalar_loop_iters),
1101                         "prologue_after_cost_adjust");
1102   prologue_after_cost_adjust_name =
1103     force_gimple_operand (scalar_loop_iters, &stmts, false, var);
1104
1105   gsi = gsi_last_bb (then_bb);
1106   if (stmts)
1107     gsi_insert_seq_after (&gsi, stmts, GSI_NEW_STMT);
1108
1109   newphi = create_phi_node (var, bb_before_first_loop);
1110   add_phi_arg (newphi, prologue_after_cost_adjust_name, e_fallthru,
1111                UNKNOWN_LOCATION);
1112   add_phi_arg (newphi, *first_niters, e_false, UNKNOWN_LOCATION);
1113
1114   *first_niters = PHI_RESULT (newphi);
1115 }
1116
1117 /* Function slpeel_tree_peel_loop_to_edge.
1118
1119    Peel the first (last) iterations of LOOP into a new prolog (epilog) loop
1120    that is placed on the entry (exit) edge E of LOOP. After this transformation
1121    we have two loops one after the other - first-loop iterates FIRST_NITERS
1122    times, and second-loop iterates the remainder NITERS - FIRST_NITERS times.
1123    If the cost model indicates that it is profitable to emit a scalar
1124    loop instead of the vector one, then the prolog (epilog) loop will iterate
1125    for the entire unchanged scalar iterations of the loop.
1126
1127    Input:
1128    - LOOP: the loop to be peeled.
1129    - SCALAR_LOOP: if non-NULL, the alternate loop from which basic blocks
1130         should be copied.
1131    - E: the exit or entry edge of LOOP.
1132         If it is the entry edge, we peel the first iterations of LOOP. In this
1133         case first-loop is LOOP, and second-loop is the newly created loop.
1134         If it is the exit edge, we peel the last iterations of LOOP. In this
1135         case, first-loop is the newly created loop, and second-loop is LOOP.
1136    - NITERS: the number of iterations that LOOP iterates.
1137    - FIRST_NITERS: the number of iterations that the first-loop should iterate.
1138    - UPDATE_FIRST_LOOP_COUNT:  specified whether this function is responsible
1139         for updating the loop bound of the first-loop to FIRST_NITERS.  If it
1140         is false, the caller of this function may want to take care of this
1141         (this can be useful if we don't want new stmts added to first-loop).
1142    - TH: cost model profitability threshold of iterations for vectorization.
1143    - CHECK_PROFITABILITY: specify whether cost model check has not occurred
1144                           during versioning and hence needs to occur during
1145                           prologue generation or whether cost model check
1146                           has not occurred during prologue generation and hence
1147                           needs to occur during epilogue generation.
1148    - BOUND1 is the upper bound on number of iterations of the first loop (if known)
1149    - BOUND2 is the upper bound on number of iterations of the second loop (if known)
1150
1151
1152    Output:
1153    The function returns a pointer to the new loop-copy, or NULL if it failed
1154    to perform the transformation.
1155
1156    The function generates two if-then-else guards: one before the first loop,
1157    and the other before the second loop:
1158    The first guard is:
1159      if (FIRST_NITERS == 0) then skip the first loop,
1160      and go directly to the second loop.
1161    The second guard is:
1162      if (FIRST_NITERS == NITERS) then skip the second loop.
1163
1164    If the optional COND_EXPR and COND_EXPR_STMT_LIST arguments are given
1165    then the generated condition is combined with COND_EXPR and the
1166    statements in COND_EXPR_STMT_LIST are emitted together with it.
1167
1168    FORNOW only simple loops are supported (see slpeel_can_duplicate_loop_p).
1169    FORNOW the resulting code will not be in loop-closed-ssa form.
1170 */
1171
1172 static struct loop *
1173 slpeel_tree_peel_loop_to_edge (struct loop *loop, struct loop *scalar_loop,
1174                                edge e, tree *first_niters,
1175                                tree niters, bool update_first_loop_count,
1176                                unsigned int th, bool check_profitability,
1177                                tree cond_expr, gimple_seq cond_expr_stmt_list,
1178                                int bound1, int bound2)
1179 {
1180   struct loop *new_loop = NULL, *first_loop, *second_loop;
1181   edge skip_e;
1182   tree pre_condition = NULL_TREE;
1183   basic_block bb_before_second_loop, bb_after_second_loop;
1184   basic_block bb_before_first_loop;
1185   basic_block bb_between_loops;
1186   basic_block new_exit_bb;
1187   gimple_stmt_iterator gsi;
1188   edge exit_e = single_exit (loop);
1189   source_location loop_loc;
1190   /* There are many aspects to how likely the first loop is going to be executed.
1191      Without histogram we can't really do good job.  Simply set it to
1192      2/3, so the first loop is not reordered to the end of function and
1193      the hot path through stays short.  */
1194   int first_guard_probability = 2 * REG_BR_PROB_BASE / 3;
1195   int second_guard_probability = 2 * REG_BR_PROB_BASE / 3;
1196   int probability_of_second_loop;
1197
1198   if (!slpeel_can_duplicate_loop_p (loop, e))
1199     return NULL;
1200
1201   /* We might have a queued need to update virtual SSA form.  As we
1202      delete the update SSA machinery below after doing a regular
1203      incremental SSA update during loop copying make sure we don't
1204      lose that fact.
1205      ???  Needing to update virtual SSA form by renaming is unfortunate
1206      but not all of the vectorizer code inserting new loads / stores
1207      properly assigns virtual operands to those statements.  */
1208   update_ssa (TODO_update_ssa_only_virtuals);
1209
1210   /* If the loop has a virtual PHI, but exit bb doesn't, create a virtual PHI
1211      in the exit bb and rename all the uses after the loop.  This simplifies
1212      the *guard[12] routines, which assume loop closed SSA form for all PHIs
1213      (but normally loop closed SSA form doesn't require virtual PHIs to be
1214      in the same form).  Doing this early simplifies the checking what
1215      uses should be renamed.  */
1216   for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
1217     if (virtual_operand_p (gimple_phi_result (gsi_stmt (gsi))))
1218       {
1219         gimple phi = gsi_stmt (gsi);
1220         for (gsi = gsi_start_phis (exit_e->dest);
1221              !gsi_end_p (gsi); gsi_next (&gsi))
1222           if (virtual_operand_p (gimple_phi_result (gsi_stmt (gsi))))
1223             break;
1224         if (gsi_end_p (gsi))
1225           {
1226             tree new_vop = copy_ssa_name (PHI_RESULT (phi), NULL);
1227             gimple new_phi = create_phi_node (new_vop, exit_e->dest);
1228             tree vop = PHI_ARG_DEF_FROM_EDGE (phi, EDGE_SUCC (loop->latch, 0));
1229             imm_use_iterator imm_iter;
1230             gimple stmt;
1231             use_operand_p use_p;
1232
1233             add_phi_arg (new_phi, vop, exit_e, UNKNOWN_LOCATION);
1234             gimple_phi_set_result (new_phi, new_vop);
1235             FOR_EACH_IMM_USE_STMT (stmt, imm_iter, vop)
1236               if (stmt != new_phi && gimple_bb (stmt) != loop->header)
1237                 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
1238                   SET_USE (use_p, new_vop);
1239           }
1240         break;
1241       }
1242
1243   /* 1. Generate a copy of LOOP and put it on E (E is the entry/exit of LOOP).
1244         Resulting CFG would be:
1245
1246         first_loop:
1247         do {
1248         } while ...
1249
1250         second_loop:
1251         do {
1252         } while ...
1253
1254         orig_exit_bb:
1255    */
1256
1257   if (!(new_loop = slpeel_tree_duplicate_loop_to_edge_cfg (loop, scalar_loop,
1258                                                            e)))
1259     {
1260       loop_loc = find_loop_location (loop);
1261       dump_printf_loc (MSG_MISSED_OPTIMIZATION, loop_loc,
1262                        "tree_duplicate_loop_to_edge_cfg failed.\n");
1263       return NULL;
1264     }
1265
1266   if (MAY_HAVE_DEBUG_STMTS)
1267     {
1268       gcc_assert (!adjust_vec.exists ());
1269       adjust_vec.create (32);
1270     }
1271
1272   if (e == exit_e)
1273     {
1274       /* NEW_LOOP was placed after LOOP.  */
1275       first_loop = loop;
1276       second_loop = new_loop;
1277     }
1278   else
1279     {
1280       /* NEW_LOOP was placed before LOOP.  */
1281       first_loop = new_loop;
1282       second_loop = loop;
1283     }
1284
1285   /* 2.  Add the guard code in one of the following ways:
1286
1287      2.a Add the guard that controls whether the first loop is executed.
1288          This occurs when this function is invoked for prologue or epilogue
1289          generation and when the cost model check can be done at compile time.
1290
1291          Resulting CFG would be:
1292
1293          bb_before_first_loop:
1294          if (FIRST_NITERS == 0) GOTO bb_before_second_loop
1295                                 GOTO first-loop
1296
1297          first_loop:
1298          do {
1299          } while ...
1300
1301          bb_before_second_loop:
1302
1303          second_loop:
1304          do {
1305          } while ...
1306
1307          orig_exit_bb:
1308
1309      2.b Add the cost model check that allows the prologue
1310          to iterate for the entire unchanged scalar
1311          iterations of the loop in the event that the cost
1312          model indicates that the scalar loop is more
1313          profitable than the vector one. This occurs when
1314          this function is invoked for prologue generation
1315          and the cost model check needs to be done at run
1316          time.
1317
1318          Resulting CFG after prologue peeling would be:
1319
1320          if (scalar_loop_iterations <= th)
1321            FIRST_NITERS = scalar_loop_iterations
1322
1323          bb_before_first_loop:
1324          if (FIRST_NITERS == 0) GOTO bb_before_second_loop
1325                                 GOTO first-loop
1326
1327          first_loop:
1328          do {
1329          } while ...
1330
1331          bb_before_second_loop:
1332
1333          second_loop:
1334          do {
1335          } while ...
1336
1337          orig_exit_bb:
1338
1339      2.c Add the cost model check that allows the epilogue
1340          to iterate for the entire unchanged scalar
1341          iterations of the loop in the event that the cost
1342          model indicates that the scalar loop is more
1343          profitable than the vector one. This occurs when
1344          this function is invoked for epilogue generation
1345          and the cost model check needs to be done at run
1346          time.  This check is combined with any pre-existing
1347          check in COND_EXPR to avoid versioning.
1348
1349          Resulting CFG after prologue peeling would be:
1350
1351          bb_before_first_loop:
1352          if ((scalar_loop_iterations <= th)
1353              ||
1354              FIRST_NITERS == 0) GOTO bb_before_second_loop
1355                                 GOTO first-loop
1356
1357          first_loop:
1358          do {
1359          } while ...
1360
1361          bb_before_second_loop:
1362
1363          second_loop:
1364          do {
1365          } while ...
1366
1367          orig_exit_bb:
1368   */
1369
1370   bb_before_first_loop = split_edge (loop_preheader_edge (first_loop));
1371   /* Loop copying insterted a forwarder block for us here.  */
1372   bb_before_second_loop = single_exit (first_loop)->dest;
1373
1374   probability_of_second_loop = (inverse_probability (first_guard_probability)
1375                                 + combine_probabilities (second_guard_probability,
1376                                                          first_guard_probability));
1377   /* Theoretically preheader edge of first loop and exit edge should have
1378      same frequencies.  Loop exit probablities are however easy to get wrong.
1379      It is safer to copy value from original loop entry.  */
1380   bb_before_second_loop->frequency
1381      = combine_probabilities (bb_before_first_loop->frequency,
1382                               probability_of_second_loop);
1383   bb_before_second_loop->count
1384      = apply_probability (bb_before_first_loop->count,
1385                           probability_of_second_loop);
1386   single_succ_edge (bb_before_second_loop)->count
1387      = bb_before_second_loop->count;
1388
1389   /* Epilogue peeling.  */
1390   if (!update_first_loop_count)
1391     {
1392       loop_vec_info loop_vinfo = loop_vec_info_for_loop (loop);
1393       tree scalar_loop_iters = LOOP_VINFO_NITERSM1 (loop_vinfo);
1394       unsigned limit = LOOP_VINFO_VECT_FACTOR (loop_vinfo) - 1;
1395       if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
1396         limit = limit + 1;
1397       if (check_profitability
1398           && th > limit)
1399         limit = th;
1400       pre_condition =
1401         fold_build2 (LT_EXPR, boolean_type_node, scalar_loop_iters,
1402                      build_int_cst (TREE_TYPE (scalar_loop_iters), limit));
1403       if (cond_expr)
1404         {
1405           pre_condition =
1406             fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
1407                          pre_condition,
1408                          fold_build1 (TRUTH_NOT_EXPR, boolean_type_node,
1409                                       cond_expr));
1410         }
1411     }
1412
1413   /* Prologue peeling.  */
1414   else
1415     {
1416       if (check_profitability)
1417         set_prologue_iterations (bb_before_first_loop, first_niters,
1418                                  loop, th, first_guard_probability);
1419
1420       pre_condition =
1421         fold_build2 (LE_EXPR, boolean_type_node, *first_niters,
1422                      build_int_cst (TREE_TYPE (*first_niters), 0));
1423     }
1424
1425   skip_e = slpeel_add_loop_guard (bb_before_first_loop, pre_condition,
1426                                   cond_expr_stmt_list,
1427                                   bb_before_second_loop, bb_before_first_loop,
1428                                   inverse_probability (first_guard_probability));
1429   scale_loop_profile (first_loop, first_guard_probability,
1430                       check_profitability && (int)th > bound1 ? th : bound1);
1431   slpeel_update_phi_nodes_for_guard1 (skip_e, first_loop,
1432                                       first_loop == new_loop,
1433                                       &new_exit_bb);
1434
1435
1436   /* 3. Add the guard that controls whether the second loop is executed.
1437         Resulting CFG would be:
1438
1439         bb_before_first_loop:
1440         if (FIRST_NITERS == 0) GOTO bb_before_second_loop (skip first loop)
1441                                GOTO first-loop
1442
1443         first_loop:
1444         do {
1445         } while ...
1446
1447         bb_between_loops:
1448         if (FIRST_NITERS == NITERS) GOTO bb_after_second_loop (skip second loop)
1449                                     GOTO bb_before_second_loop
1450
1451         bb_before_second_loop:
1452
1453         second_loop:
1454         do {
1455         } while ...
1456
1457         bb_after_second_loop:
1458
1459         orig_exit_bb:
1460    */
1461
1462   bb_between_loops = new_exit_bb;
1463   bb_after_second_loop = split_edge (single_exit (second_loop));
1464
1465   pre_condition =
1466         fold_build2 (EQ_EXPR, boolean_type_node, *first_niters, niters);
1467   skip_e = slpeel_add_loop_guard (bb_between_loops, pre_condition, NULL,
1468                                   bb_after_second_loop, bb_before_first_loop,
1469                                   inverse_probability (second_guard_probability));
1470   scale_loop_profile (second_loop, probability_of_second_loop, bound2);
1471   slpeel_update_phi_nodes_for_guard2 (skip_e, second_loop,
1472                                      second_loop == new_loop, &new_exit_bb);
1473
1474   /* 4. Make first-loop iterate FIRST_NITERS times, if requested.
1475    */
1476   if (update_first_loop_count)
1477     slpeel_make_loop_iterate_ntimes (first_loop, *first_niters);
1478
1479   delete_update_ssa ();
1480
1481   adjust_vec_debug_stmts ();
1482
1483   return new_loop;
1484 }
1485
1486 /* Function vect_get_loop_location.
1487
1488    Extract the location of the loop in the source code.
1489    If the loop is not well formed for vectorization, an estimated
1490    location is calculated.
1491    Return the loop location if succeed and NULL if not.  */
1492
1493 source_location
1494 find_loop_location (struct loop *loop)
1495 {
1496   gimple stmt = NULL;
1497   basic_block bb;
1498   gimple_stmt_iterator si;
1499
1500   if (!loop)
1501     return UNKNOWN_LOCATION;
1502
1503   stmt = get_loop_exit_condition (loop);
1504
1505   if (stmt
1506       && LOCATION_LOCUS (gimple_location (stmt)) > BUILTINS_LOCATION)
1507     return gimple_location (stmt);
1508
1509   /* If we got here the loop is probably not "well formed",
1510      try to estimate the loop location */
1511
1512   if (!loop->header)
1513     return UNKNOWN_LOCATION;
1514
1515   bb = loop->header;
1516
1517   for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1518     {
1519       stmt = gsi_stmt (si);
1520       if (LOCATION_LOCUS (gimple_location (stmt)) > BUILTINS_LOCATION)
1521         return gimple_location (stmt);
1522     }
1523
1524   return UNKNOWN_LOCATION;
1525 }
1526
1527
1528 /* Function vect_can_advance_ivs_p
1529
1530    In case the number of iterations that LOOP iterates is unknown at compile
1531    time, an epilog loop will be generated, and the loop induction variables
1532    (IVs) will be "advanced" to the value they are supposed to take just before
1533    the epilog loop.  Here we check that the access function of the loop IVs
1534    and the expression that represents the loop bound are simple enough.
1535    These restrictions will be relaxed in the future.  */
1536
1537 bool
1538 vect_can_advance_ivs_p (loop_vec_info loop_vinfo)
1539 {
1540   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1541   basic_block bb = loop->header;
1542   gimple phi;
1543   gimple_stmt_iterator gsi;
1544
1545   /* Analyze phi functions of the loop header.  */
1546
1547   if (dump_enabled_p ())
1548     dump_printf_loc (MSG_NOTE, vect_location, "vect_can_advance_ivs_p:\n");
1549   for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1550     {
1551       tree evolution_part;
1552
1553       phi = gsi_stmt (gsi);
1554       if (dump_enabled_p ())
1555         {
1556           dump_printf_loc (MSG_NOTE, vect_location, "Analyze phi: ");
1557           dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
1558           dump_printf (MSG_NOTE, "\n");
1559         }
1560
1561       /* Skip virtual phi's. The data dependences that are associated with
1562          virtual defs/uses (i.e., memory accesses) are analyzed elsewhere.  */
1563
1564       if (virtual_operand_p (PHI_RESULT (phi)))
1565         {
1566           if (dump_enabled_p ())
1567             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1568                              "virtual phi. skip.\n");
1569           continue;
1570         }
1571
1572       /* Skip reduction phis.  */
1573
1574       if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (phi)) == vect_reduction_def)
1575         {
1576           if (dump_enabled_p ())
1577             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1578                              "reduc phi. skip.\n");
1579           continue;
1580         }
1581
1582       /* Analyze the evolution function.  */
1583
1584       evolution_part
1585         = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (vinfo_for_stmt (phi));
1586       if (evolution_part == NULL_TREE)
1587         {
1588           if (dump_enabled_p ())
1589             dump_printf (MSG_MISSED_OPTIMIZATION,
1590                          "No access function or evolution.\n");
1591           return false;
1592         }
1593
1594       /* FORNOW: We do not transform initial conditions of IVs
1595          which evolution functions are a polynomial of degree >= 2.  */
1596
1597       if (tree_is_chrec (evolution_part))
1598         return false;
1599     }
1600
1601   return true;
1602 }
1603
1604
1605 /*   Function vect_update_ivs_after_vectorizer.
1606
1607      "Advance" the induction variables of LOOP to the value they should take
1608      after the execution of LOOP.  This is currently necessary because the
1609      vectorizer does not handle induction variables that are used after the
1610      loop.  Such a situation occurs when the last iterations of LOOP are
1611      peeled, because:
1612      1. We introduced new uses after LOOP for IVs that were not originally used
1613         after LOOP: the IVs of LOOP are now used by an epilog loop.
1614      2. LOOP is going to be vectorized; this means that it will iterate N/VF
1615         times, whereas the loop IVs should be bumped N times.
1616
1617      Input:
1618      - LOOP - a loop that is going to be vectorized. The last few iterations
1619               of LOOP were peeled.
1620      - NITERS - the number of iterations that LOOP executes (before it is
1621                 vectorized). i.e, the number of times the ivs should be bumped.
1622      - UPDATE_E - a successor edge of LOOP->exit that is on the (only) path
1623                   coming out from LOOP on which there are uses of the LOOP ivs
1624                   (this is the path from LOOP->exit to epilog_loop->preheader).
1625
1626                   The new definitions of the ivs are placed in LOOP->exit.
1627                   The phi args associated with the edge UPDATE_E in the bb
1628                   UPDATE_E->dest are updated accordingly.
1629
1630      Assumption 1: Like the rest of the vectorizer, this function assumes
1631      a single loop exit that has a single predecessor.
1632
1633      Assumption 2: The phi nodes in the LOOP header and in update_bb are
1634      organized in the same order.
1635
1636      Assumption 3: The access function of the ivs is simple enough (see
1637      vect_can_advance_ivs_p).  This assumption will be relaxed in the future.
1638
1639      Assumption 4: Exactly one of the successors of LOOP exit-bb is on a path
1640      coming out of LOOP on which the ivs of LOOP are used (this is the path
1641      that leads to the epilog loop; other paths skip the epilog loop).  This
1642      path starts with the edge UPDATE_E, and its destination (denoted update_bb)
1643      needs to have its phis updated.
1644  */
1645
1646 static void
1647 vect_update_ivs_after_vectorizer (loop_vec_info loop_vinfo, tree niters,
1648                                   edge update_e)
1649 {
1650   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1651   basic_block exit_bb = single_exit (loop)->dest;
1652   gimple phi, phi1;
1653   gimple_stmt_iterator gsi, gsi1;
1654   basic_block update_bb = update_e->dest;
1655
1656   gcc_checking_assert (vect_can_advance_ivs_p (loop_vinfo));
1657
1658   /* Make sure there exists a single-predecessor exit bb:  */
1659   gcc_assert (single_pred_p (exit_bb));
1660
1661   for (gsi = gsi_start_phis (loop->header), gsi1 = gsi_start_phis (update_bb);
1662        !gsi_end_p (gsi) && !gsi_end_p (gsi1);
1663        gsi_next (&gsi), gsi_next (&gsi1))
1664     {
1665       tree init_expr;
1666       tree step_expr, off;
1667       tree type;
1668       tree var, ni, ni_name;
1669       gimple_stmt_iterator last_gsi;
1670       stmt_vec_info stmt_info;
1671
1672       phi = gsi_stmt (gsi);
1673       phi1 = gsi_stmt (gsi1);
1674       if (dump_enabled_p ())
1675         {
1676           dump_printf_loc (MSG_NOTE, vect_location,
1677                            "vect_update_ivs_after_vectorizer: phi: ");
1678           dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
1679           dump_printf (MSG_NOTE, "\n");
1680         }
1681
1682       /* Skip virtual phi's.  */
1683       if (virtual_operand_p (PHI_RESULT (phi)))
1684         {
1685           if (dump_enabled_p ())
1686             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1687                              "virtual phi. skip.\n");
1688           continue;
1689         }
1690
1691       /* Skip reduction phis.  */
1692       stmt_info = vinfo_for_stmt (phi);
1693       if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def)
1694         {
1695           if (dump_enabled_p ())
1696             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1697                              "reduc phi. skip.\n");
1698           continue;
1699         }
1700
1701       type = TREE_TYPE (gimple_phi_result (phi));
1702       step_expr = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_info);
1703       step_expr = unshare_expr (step_expr);
1704
1705       /* FORNOW: We do not support IVs whose evolution function is a polynomial
1706          of degree >= 2 or exponential.  */
1707       gcc_assert (!tree_is_chrec (step_expr));
1708
1709       init_expr = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1710
1711       off = fold_build2 (MULT_EXPR, TREE_TYPE (step_expr),
1712                          fold_convert (TREE_TYPE (step_expr), niters),
1713                          step_expr);
1714       if (POINTER_TYPE_P (type))
1715         ni = fold_build_pointer_plus (init_expr, off);
1716       else
1717         ni = fold_build2 (PLUS_EXPR, type,
1718                           init_expr, fold_convert (type, off));
1719
1720       var = create_tmp_var (type, "tmp");
1721
1722       last_gsi = gsi_last_bb (exit_bb);
1723       ni_name = force_gimple_operand_gsi (&last_gsi, ni, false, var,
1724                                           true, GSI_SAME_STMT);
1725
1726       /* Fix phi expressions in the successor bb.  */
1727       adjust_phi_and_debug_stmts (phi1, update_e, ni_name);
1728     }
1729 }
1730
1731 /* Function vect_do_peeling_for_loop_bound
1732
1733    Peel the last iterations of the loop represented by LOOP_VINFO.
1734    The peeled iterations form a new epilog loop.  Given that the loop now
1735    iterates NITERS times, the new epilog loop iterates
1736    NITERS % VECTORIZATION_FACTOR times.
1737
1738    The original loop will later be made to iterate
1739    NITERS / VECTORIZATION_FACTOR times (this value is placed into RATIO).
1740
1741    COND_EXPR and COND_EXPR_STMT_LIST are combined with a new generated
1742    test.  */
1743
1744 void
1745 vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo,
1746                                 tree ni_name, tree ratio_mult_vf_name,
1747                                 unsigned int th, bool check_profitability)
1748 {
1749   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1750   struct loop *scalar_loop = LOOP_VINFO_SCALAR_LOOP (loop_vinfo);
1751   struct loop *new_loop;
1752   edge update_e;
1753   basic_block preheader;
1754   int loop_num;
1755   int max_iter;
1756   tree cond_expr = NULL_TREE;
1757   gimple_seq cond_expr_stmt_list = NULL;
1758
1759   if (dump_enabled_p ())
1760     dump_printf_loc (MSG_NOTE, vect_location,
1761                      "=== vect_do_peeling_for_loop_bound ===\n");
1762
1763   initialize_original_copy_tables ();
1764
1765   loop_num  = loop->num;
1766
1767   new_loop
1768     = slpeel_tree_peel_loop_to_edge (loop, scalar_loop, single_exit (loop),
1769                                      &ratio_mult_vf_name, ni_name, false,
1770                                      th, check_profitability,
1771                                      cond_expr, cond_expr_stmt_list,
1772                                      0, LOOP_VINFO_VECT_FACTOR (loop_vinfo));
1773   gcc_assert (new_loop);
1774   gcc_assert (loop_num == loop->num);
1775 #ifdef ENABLE_CHECKING
1776   slpeel_verify_cfg_after_peeling (loop, new_loop);
1777 #endif
1778
1779   /* A guard that controls whether the new_loop is to be executed or skipped
1780      is placed in LOOP->exit.  LOOP->exit therefore has two successors - one
1781      is the preheader of NEW_LOOP, where the IVs from LOOP are used.  The other
1782      is a bb after NEW_LOOP, where these IVs are not used.  Find the edge that
1783      is on the path where the LOOP IVs are used and need to be updated.  */
1784
1785   preheader = loop_preheader_edge (new_loop)->src;
1786   if (EDGE_PRED (preheader, 0)->src == single_exit (loop)->dest)
1787     update_e = EDGE_PRED (preheader, 0);
1788   else
1789     update_e = EDGE_PRED (preheader, 1);
1790
1791   /* Update IVs of original loop as if they were advanced
1792      by ratio_mult_vf_name steps.  */
1793   vect_update_ivs_after_vectorizer (loop_vinfo, ratio_mult_vf_name, update_e);
1794
1795   /* For vectorization factor N, we need to copy last N-1 values in epilogue
1796      and this means N-2 loopback edge executions.
1797
1798      PEELING_FOR_GAPS works by subtracting last iteration and thus the epilogue
1799      will execute at least LOOP_VINFO_VECT_FACTOR times.  */
1800   max_iter = (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
1801               ? LOOP_VINFO_VECT_FACTOR (loop_vinfo) * 2
1802               : LOOP_VINFO_VECT_FACTOR (loop_vinfo)) - 2;
1803   if (check_profitability)
1804     max_iter = MAX (max_iter, (int) th - 1);
1805   record_niter_bound (new_loop, max_iter, false, true);
1806   dump_printf (MSG_NOTE,
1807                "Setting upper bound of nb iterations for epilogue "
1808                "loop to %d\n", max_iter);
1809
1810   /* After peeling we have to reset scalar evolution analyzer.  */
1811   scev_reset ();
1812
1813   free_original_copy_tables ();
1814 }
1815
1816
1817 /* Function vect_gen_niters_for_prolog_loop
1818
1819    Set the number of iterations for the loop represented by LOOP_VINFO
1820    to the minimum between LOOP_NITERS (the original iteration count of the loop)
1821    and the misalignment of DR - the data reference recorded in
1822    LOOP_VINFO_UNALIGNED_DR (LOOP_VINFO).  As a result, after the execution of
1823    this loop, the data reference DR will refer to an aligned location.
1824
1825    The following computation is generated:
1826
1827    If the misalignment of DR is known at compile time:
1828      addr_mis = int mis = DR_MISALIGNMENT (dr);
1829    Else, compute address misalignment in bytes:
1830      addr_mis = addr & (vectype_align - 1)
1831
1832    prolog_niters = min (LOOP_NITERS, ((VF - addr_mis/elem_size)&(VF-1))/step)
1833
1834    (elem_size = element type size; an element is the scalar element whose type
1835    is the inner type of the vectype)
1836
1837    When the step of the data-ref in the loop is not 1 (as in interleaved data
1838    and SLP), the number of iterations of the prolog must be divided by the step
1839    (which is equal to the size of interleaved group).
1840
1841    The above formulas assume that VF == number of elements in the vector. This
1842    may not hold when there are multiple-types in the loop.
1843    In this case, for some data-references in the loop the VF does not represent
1844    the number of elements that fit in the vector.  Therefore, instead of VF we
1845    use TYPE_VECTOR_SUBPARTS.  */
1846
1847 static tree
1848 vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters, int *bound)
1849 {
1850   struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
1851   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1852   tree var;
1853   gimple_seq stmts;
1854   tree iters, iters_name;
1855   edge pe;
1856   basic_block new_bb;
1857   gimple dr_stmt = DR_STMT (dr);
1858   stmt_vec_info stmt_info = vinfo_for_stmt (dr_stmt);
1859   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1860   int vectype_align = TYPE_ALIGN (vectype) / BITS_PER_UNIT;
1861   tree niters_type = TREE_TYPE (loop_niters);
1862   int nelements = TYPE_VECTOR_SUBPARTS (vectype);
1863
1864   pe = loop_preheader_edge (loop);
1865
1866   if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0)
1867     {
1868       int npeel = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
1869
1870       if (dump_enabled_p ())
1871         dump_printf_loc (MSG_NOTE, vect_location,
1872                          "known peeling = %d.\n", npeel);
1873
1874       iters = build_int_cst (niters_type, npeel);
1875       *bound = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
1876     }
1877   else
1878     {
1879       gimple_seq new_stmts = NULL;
1880       bool negative = tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0;
1881       tree offset = negative
1882           ? size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1) : NULL_TREE;
1883       tree start_addr = vect_create_addr_base_for_vector_ref (dr_stmt,
1884                                                 &new_stmts, offset, loop);
1885       tree type = unsigned_type_for (TREE_TYPE (start_addr));
1886       tree vectype_align_minus_1 = build_int_cst (type, vectype_align - 1);
1887       HOST_WIDE_INT elem_size =
1888                 int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
1889       tree elem_size_log = build_int_cst (type, exact_log2 (elem_size));
1890       tree nelements_minus_1 = build_int_cst (type, nelements - 1);
1891       tree nelements_tree = build_int_cst (type, nelements);
1892       tree byte_misalign;
1893       tree elem_misalign;
1894
1895       new_bb = gsi_insert_seq_on_edge_immediate (pe, new_stmts);
1896       gcc_assert (!new_bb);
1897
1898       /* Create:  byte_misalign = addr & (vectype_align - 1)  */
1899       byte_misalign =
1900         fold_build2 (BIT_AND_EXPR, type, fold_convert (type, start_addr),
1901                      vectype_align_minus_1);
1902
1903       /* Create:  elem_misalign = byte_misalign / element_size  */
1904       elem_misalign =
1905         fold_build2 (RSHIFT_EXPR, type, byte_misalign, elem_size_log);
1906
1907       /* Create:  (niters_type) (nelements - elem_misalign)&(nelements - 1)  */
1908       if (negative)
1909         iters = fold_build2 (MINUS_EXPR, type, elem_misalign, nelements_tree);
1910       else
1911         iters = fold_build2 (MINUS_EXPR, type, nelements_tree, elem_misalign);
1912       iters = fold_build2 (BIT_AND_EXPR, type, iters, nelements_minus_1);
1913       iters = fold_convert (niters_type, iters);
1914       *bound = nelements;
1915     }
1916
1917   /* Create:  prolog_loop_niters = min (iters, loop_niters) */
1918   /* If the loop bound is known at compile time we already verified that it is
1919      greater than vf; since the misalignment ('iters') is at most vf, there's
1920      no need to generate the MIN_EXPR in this case.  */
1921   if (TREE_CODE (loop_niters) != INTEGER_CST)
1922     iters = fold_build2 (MIN_EXPR, niters_type, iters, loop_niters);
1923
1924   if (dump_enabled_p ())
1925     {
1926       dump_printf_loc (MSG_NOTE, vect_location,
1927                        "niters for prolog loop: ");
1928       dump_generic_expr (MSG_NOTE, TDF_SLIM, iters);
1929       dump_printf (MSG_NOTE, "\n");
1930     }
1931
1932   var = create_tmp_var (niters_type, "prolog_loop_niters");
1933   stmts = NULL;
1934   iters_name = force_gimple_operand (iters, &stmts, false, var);
1935
1936   /* Insert stmt on loop preheader edge.  */
1937   if (stmts)
1938     {
1939       basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
1940       gcc_assert (!new_bb);
1941     }
1942
1943   return iters_name;
1944 }
1945
1946
1947 /* Function vect_update_init_of_dr
1948
1949    NITERS iterations were peeled from LOOP.  DR represents a data reference
1950    in LOOP.  This function updates the information recorded in DR to
1951    account for the fact that the first NITERS iterations had already been
1952    executed.  Specifically, it updates the OFFSET field of DR.  */
1953
1954 static void
1955 vect_update_init_of_dr (struct data_reference *dr, tree niters)
1956 {
1957   tree offset = DR_OFFSET (dr);
1958
1959   niters = fold_build2 (MULT_EXPR, sizetype,
1960                         fold_convert (sizetype, niters),
1961                         fold_convert (sizetype, DR_STEP (dr)));
1962   offset = fold_build2 (PLUS_EXPR, sizetype,
1963                         fold_convert (sizetype, offset), niters);
1964   DR_OFFSET (dr) = offset;
1965 }
1966
1967
1968 /* Function vect_update_inits_of_drs
1969
1970    NITERS iterations were peeled from the loop represented by LOOP_VINFO.
1971    This function updates the information recorded for the data references in
1972    the loop to account for the fact that the first NITERS iterations had
1973    already been executed.  Specifically, it updates the initial_condition of
1974    the access_function of all the data_references in the loop.  */
1975
1976 static void
1977 vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters)
1978 {
1979   unsigned int i;
1980   vec<data_reference_p> datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1981   struct data_reference *dr;
1982
1983  if (dump_enabled_p ())
1984     dump_printf_loc (MSG_NOTE, vect_location,
1985                      "=== vect_update_inits_of_dr ===\n");
1986
1987   FOR_EACH_VEC_ELT (datarefs, i, dr)
1988     vect_update_init_of_dr (dr, niters);
1989 }
1990
1991
1992 /* Function vect_do_peeling_for_alignment
1993
1994    Peel the first 'niters' iterations of the loop represented by LOOP_VINFO.
1995    'niters' is set to the misalignment of one of the data references in the
1996    loop, thereby forcing it to refer to an aligned location at the beginning
1997    of the execution of this loop.  The data reference for which we are
1998    peeling is recorded in LOOP_VINFO_UNALIGNED_DR.  */
1999
2000 void
2001 vect_do_peeling_for_alignment (loop_vec_info loop_vinfo, tree ni_name,
2002                                unsigned int th, bool check_profitability)
2003 {
2004   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2005   struct loop *scalar_loop = LOOP_VINFO_SCALAR_LOOP (loop_vinfo);
2006   tree niters_of_prolog_loop;
2007   tree wide_prolog_niters;
2008   struct loop *new_loop;
2009   int max_iter;
2010   int bound = 0;
2011
2012   if (dump_enabled_p ())
2013     dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
2014                      "loop peeled for vectorization to enhance"
2015                      " alignment\n");
2016
2017   initialize_original_copy_tables ();
2018
2019   gimple_seq stmts = NULL;
2020   gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
2021   niters_of_prolog_loop = vect_gen_niters_for_prolog_loop (loop_vinfo,
2022                                                            ni_name,
2023                                                            &bound);
2024
2025   /* Peel the prolog loop and iterate it niters_of_prolog_loop.  */
2026   new_loop =
2027     slpeel_tree_peel_loop_to_edge (loop, scalar_loop,
2028                                    loop_preheader_edge (loop),
2029                                    &niters_of_prolog_loop, ni_name, true,
2030                                    th, check_profitability, NULL_TREE, NULL,
2031                                    bound, 0);
2032
2033   gcc_assert (new_loop);
2034 #ifdef ENABLE_CHECKING
2035   slpeel_verify_cfg_after_peeling (new_loop, loop);
2036 #endif
2037   /* For vectorization factor N, we need to copy at most N-1 values
2038      for alignment and this means N-2 loopback edge executions.  */
2039   max_iter = LOOP_VINFO_VECT_FACTOR (loop_vinfo) - 2;
2040   if (check_profitability)
2041     max_iter = MAX (max_iter, (int) th - 1);
2042   record_niter_bound (new_loop, max_iter, false, true);
2043   dump_printf (MSG_NOTE,
2044                "Setting upper bound of nb iterations for prologue "
2045                "loop to %d\n", max_iter);
2046
2047   /* Update number of times loop executes.  */
2048   LOOP_VINFO_NITERS (loop_vinfo) = fold_build2 (MINUS_EXPR,
2049                 TREE_TYPE (ni_name), ni_name, niters_of_prolog_loop);
2050   LOOP_VINFO_NITERSM1 (loop_vinfo) = fold_build2 (MINUS_EXPR,
2051                 TREE_TYPE (ni_name),
2052                 LOOP_VINFO_NITERSM1 (loop_vinfo), niters_of_prolog_loop);
2053
2054   if (types_compatible_p (sizetype, TREE_TYPE (niters_of_prolog_loop)))
2055     wide_prolog_niters = niters_of_prolog_loop;
2056   else
2057     {
2058       gimple_seq seq = NULL;
2059       edge pe = loop_preheader_edge (loop);
2060       tree wide_iters = fold_convert (sizetype, niters_of_prolog_loop);
2061       tree var = create_tmp_var (sizetype, "prolog_loop_adjusted_niters");
2062       wide_prolog_niters = force_gimple_operand (wide_iters, &seq, false,
2063                                                  var);
2064       if (seq)
2065         {
2066           /* Insert stmt on loop preheader edge.  */
2067           basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
2068           gcc_assert (!new_bb);
2069         }
2070     }
2071
2072   /* Update the init conditions of the access functions of all data refs.  */
2073   vect_update_inits_of_drs (loop_vinfo, wide_prolog_niters);
2074
2075   /* After peeling we have to reset scalar evolution analyzer.  */
2076   scev_reset ();
2077
2078   free_original_copy_tables ();
2079 }
2080
2081
2082 /* Function vect_create_cond_for_align_checks.
2083
2084    Create a conditional expression that represents the alignment checks for
2085    all of data references (array element references) whose alignment must be
2086    checked at runtime.
2087
2088    Input:
2089    COND_EXPR  - input conditional expression.  New conditions will be chained
2090                 with logical AND operation.
2091    LOOP_VINFO - two fields of the loop information are used.
2092                 LOOP_VINFO_PTR_MASK is the mask used to check the alignment.
2093                 LOOP_VINFO_MAY_MISALIGN_STMTS contains the refs to be checked.
2094
2095    Output:
2096    COND_EXPR_STMT_LIST - statements needed to construct the conditional
2097                          expression.
2098    The returned value is the conditional expression to be used in the if
2099    statement that controls which version of the loop gets executed at runtime.
2100
2101    The algorithm makes two assumptions:
2102      1) The number of bytes "n" in a vector is a power of 2.
2103      2) An address "a" is aligned if a%n is zero and that this
2104         test can be done as a&(n-1) == 0.  For example, for 16
2105         byte vectors the test is a&0xf == 0.  */
2106
2107 static void
2108 vect_create_cond_for_align_checks (loop_vec_info loop_vinfo,
2109                                    tree *cond_expr,
2110                                    gimple_seq *cond_expr_stmt_list)
2111 {
2112   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2113   vec<gimple> may_misalign_stmts
2114     = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
2115   gimple ref_stmt;
2116   int mask = LOOP_VINFO_PTR_MASK (loop_vinfo);
2117   tree mask_cst;
2118   unsigned int i;
2119   tree int_ptrsize_type;
2120   char tmp_name[20];
2121   tree or_tmp_name = NULL_TREE;
2122   tree and_tmp_name;
2123   gimple and_stmt;
2124   tree ptrsize_zero;
2125   tree part_cond_expr;
2126
2127   /* Check that mask is one less than a power of 2, i.e., mask is
2128      all zeros followed by all ones.  */
2129   gcc_assert ((mask != 0) && ((mask & (mask+1)) == 0));
2130
2131   int_ptrsize_type = signed_type_for (ptr_type_node);
2132
2133   /* Create expression (mask & (dr_1 || ... || dr_n)) where dr_i is the address
2134      of the first vector of the i'th data reference. */
2135
2136   FOR_EACH_VEC_ELT (may_misalign_stmts, i, ref_stmt)
2137     {
2138       gimple_seq new_stmt_list = NULL;
2139       tree addr_base;
2140       tree addr_tmp_name;
2141       tree new_or_tmp_name;
2142       gimple addr_stmt, or_stmt;
2143       stmt_vec_info stmt_vinfo = vinfo_for_stmt (ref_stmt);
2144       tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
2145       bool negative = tree_int_cst_compare
2146         (DR_STEP (STMT_VINFO_DATA_REF (stmt_vinfo)), size_zero_node) < 0;
2147       tree offset = negative
2148         ? size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1) : NULL_TREE;
2149
2150       /* create: addr_tmp = (int)(address_of_first_vector) */
2151       addr_base =
2152         vect_create_addr_base_for_vector_ref (ref_stmt, &new_stmt_list,
2153                                               offset, loop);
2154       if (new_stmt_list != NULL)
2155         gimple_seq_add_seq (cond_expr_stmt_list, new_stmt_list);
2156
2157       sprintf (tmp_name, "addr2int%d", i);
2158       addr_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, tmp_name);
2159       addr_stmt = gimple_build_assign_with_ops (NOP_EXPR, addr_tmp_name,
2160                                                 addr_base, NULL_TREE);
2161       gimple_seq_add_stmt (cond_expr_stmt_list, addr_stmt);
2162
2163       /* The addresses are OR together.  */
2164
2165       if (or_tmp_name != NULL_TREE)
2166         {
2167           /* create: or_tmp = or_tmp | addr_tmp */
2168           sprintf (tmp_name, "orptrs%d", i);
2169           new_or_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, tmp_name);
2170           or_stmt = gimple_build_assign_with_ops (BIT_IOR_EXPR,
2171                                                   new_or_tmp_name,
2172                                                   or_tmp_name, addr_tmp_name);
2173           gimple_seq_add_stmt (cond_expr_stmt_list, or_stmt);
2174           or_tmp_name = new_or_tmp_name;
2175         }
2176       else
2177         or_tmp_name = addr_tmp_name;
2178
2179     } /* end for i */
2180
2181   mask_cst = build_int_cst (int_ptrsize_type, mask);
2182
2183   /* create: and_tmp = or_tmp & mask  */
2184   and_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, "andmask");
2185
2186   and_stmt = gimple_build_assign_with_ops (BIT_AND_EXPR, and_tmp_name,
2187                                            or_tmp_name, mask_cst);
2188   gimple_seq_add_stmt (cond_expr_stmt_list, and_stmt);
2189
2190   /* Make and_tmp the left operand of the conditional test against zero.
2191      if and_tmp has a nonzero bit then some address is unaligned.  */
2192   ptrsize_zero = build_int_cst (int_ptrsize_type, 0);
2193   part_cond_expr = fold_build2 (EQ_EXPR, boolean_type_node,
2194                                 and_tmp_name, ptrsize_zero);
2195   if (*cond_expr)
2196     *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2197                               *cond_expr, part_cond_expr);
2198   else
2199     *cond_expr = part_cond_expr;
2200 }
2201
2202 /* Function vect_create_cond_for_alias_checks.
2203
2204    Create a conditional expression that represents the run-time checks for
2205    overlapping of address ranges represented by a list of data references
2206    relations passed as input.
2207
2208    Input:
2209    COND_EXPR  - input conditional expression.  New conditions will be chained
2210                 with logical AND operation.  If it is NULL, then the function
2211                 is used to return the number of alias checks.
2212    LOOP_VINFO - field LOOP_VINFO_MAY_ALIAS_STMTS contains the list of ddrs
2213                 to be checked.
2214
2215    Output:
2216    COND_EXPR - conditional expression.
2217
2218    The returned COND_EXPR is the conditional expression to be used in the if
2219    statement that controls which version of the loop gets executed at runtime.
2220 */
2221
2222 void
2223 vect_create_cond_for_alias_checks (loop_vec_info loop_vinfo, tree * cond_expr)
2224 {
2225   vec<dr_with_seg_len_pair_t> comp_alias_ddrs =
2226     LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo);
2227   tree part_cond_expr;
2228
2229   /* Create expression
2230      ((store_ptr_0 + store_segment_length_0) <= load_ptr_0)
2231      || (load_ptr_0 + load_segment_length_0) <= store_ptr_0))
2232      &&
2233      ...
2234      &&
2235      ((store_ptr_n + store_segment_length_n) <= load_ptr_n)
2236      || (load_ptr_n + load_segment_length_n) <= store_ptr_n))  */
2237
2238   if (comp_alias_ddrs.is_empty ())
2239     return;
2240
2241   for (size_t i = 0, s = comp_alias_ddrs.length (); i < s; ++i)
2242     {
2243       const dr_with_seg_len& dr_a = comp_alias_ddrs[i].first;
2244       const dr_with_seg_len& dr_b = comp_alias_ddrs[i].second;
2245       tree segment_length_a = dr_a.seg_len;
2246       tree segment_length_b = dr_b.seg_len;
2247
2248       tree addr_base_a
2249         = fold_build_pointer_plus (DR_BASE_ADDRESS (dr_a.dr), dr_a.offset);
2250       tree addr_base_b
2251         = fold_build_pointer_plus (DR_BASE_ADDRESS (dr_b.dr), dr_b.offset);
2252
2253       if (dump_enabled_p ())
2254         {
2255           dump_printf_loc (MSG_NOTE, vect_location,
2256                            "create runtime check for data references ");
2257           dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_a.dr));
2258           dump_printf (MSG_NOTE, " and ");
2259           dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_b.dr));
2260           dump_printf (MSG_NOTE, "\n");
2261         }
2262
2263       tree seg_a_min = addr_base_a;
2264       tree seg_a_max = fold_build_pointer_plus (addr_base_a, segment_length_a);
2265       /* For negative step, we need to adjust address range by TYPE_SIZE_UNIT
2266          bytes, e.g., int a[3] -> a[1] range is [a+4, a+16) instead of
2267          [a, a+12) */
2268       if (tree_int_cst_compare (DR_STEP (dr_a.dr), size_zero_node) < 0)
2269         {
2270           tree unit_size = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr_a.dr)));
2271           seg_a_min = fold_build_pointer_plus (seg_a_max, unit_size);
2272           seg_a_max = fold_build_pointer_plus (addr_base_a, unit_size);
2273         }
2274
2275       tree seg_b_min = addr_base_b;
2276       tree seg_b_max = fold_build_pointer_plus (addr_base_b, segment_length_b);
2277       if (tree_int_cst_compare (DR_STEP (dr_b.dr), size_zero_node) < 0)
2278         {
2279           tree unit_size = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr_b.dr)));
2280           seg_b_min = fold_build_pointer_plus (seg_b_max, unit_size);
2281           seg_b_max = fold_build_pointer_plus (addr_base_b, unit_size);
2282         }
2283
2284       part_cond_expr =
2285         fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
2286           fold_build2 (LE_EXPR, boolean_type_node, seg_a_max, seg_b_min),
2287           fold_build2 (LE_EXPR, boolean_type_node, seg_b_max, seg_a_min));
2288
2289       if (*cond_expr)
2290         *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2291                                   *cond_expr, part_cond_expr);
2292       else
2293         *cond_expr = part_cond_expr;
2294     }
2295
2296   if (dump_enabled_p ())
2297     dump_printf_loc (MSG_NOTE, vect_location,
2298                      "created %u versioning for alias checks.\n",
2299                      comp_alias_ddrs.length ());
2300
2301   comp_alias_ddrs.release ();
2302 }
2303
2304
2305 /* Function vect_loop_versioning.
2306
2307    If the loop has data references that may or may not be aligned or/and
2308    has data reference relations whose independence was not proven then
2309    two versions of the loop need to be generated, one which is vectorized
2310    and one which isn't.  A test is then generated to control which of the
2311    loops is executed.  The test checks for the alignment of all of the
2312    data references that may or may not be aligned.  An additional
2313    sequence of runtime tests is generated for each pairs of DDRs whose
2314    independence was not proven.  The vectorized version of loop is
2315    executed only if both alias and alignment tests are passed.
2316
2317    The test generated to check which version of loop is executed
2318    is modified to also check for profitability as indicated by the
2319    cost model initially.
2320
2321    The versioning precondition(s) are placed in *COND_EXPR and
2322    *COND_EXPR_STMT_LIST.  */
2323
2324 void
2325 vect_loop_versioning (loop_vec_info loop_vinfo,
2326                       unsigned int th, bool check_profitability)
2327 {
2328   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2329   struct loop *scalar_loop = LOOP_VINFO_SCALAR_LOOP (loop_vinfo);
2330   basic_block condition_bb;
2331   gimple_stmt_iterator gsi, cond_exp_gsi;
2332   basic_block merge_bb;
2333   basic_block new_exit_bb;
2334   edge new_exit_e, e;
2335   gimple orig_phi, new_phi;
2336   tree cond_expr = NULL_TREE;
2337   gimple_seq cond_expr_stmt_list = NULL;
2338   tree arg;
2339   unsigned prob = 4 * REG_BR_PROB_BASE / 5;
2340   gimple_seq gimplify_stmt_list = NULL;
2341   tree scalar_loop_iters = LOOP_VINFO_NITERS (loop_vinfo);
2342   bool version_align = LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo);
2343   bool version_alias = LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo);
2344
2345   if (check_profitability)
2346     {
2347       cond_expr = fold_build2 (GT_EXPR, boolean_type_node, scalar_loop_iters,
2348                                build_int_cst (TREE_TYPE (scalar_loop_iters), th));
2349       cond_expr = force_gimple_operand_1 (cond_expr, &cond_expr_stmt_list,
2350                                           is_gimple_condexpr, NULL_TREE);
2351     }
2352
2353   if (version_align)
2354     vect_create_cond_for_align_checks (loop_vinfo, &cond_expr,
2355                                        &cond_expr_stmt_list);
2356
2357   if (version_alias)
2358     vect_create_cond_for_alias_checks (loop_vinfo, &cond_expr);
2359
2360   cond_expr = force_gimple_operand_1 (cond_expr, &gimplify_stmt_list,
2361                                       is_gimple_condexpr, NULL_TREE);
2362   gimple_seq_add_seq (&cond_expr_stmt_list, gimplify_stmt_list);
2363
2364   initialize_original_copy_tables ();
2365   if (scalar_loop)
2366     {
2367       edge scalar_e;
2368       basic_block preheader, scalar_preheader;
2369
2370       /* We don't want to scale SCALAR_LOOP's frequencies, we need to
2371          scale LOOP's frequencies instead.  */
2372       loop_version (scalar_loop, cond_expr, &condition_bb,
2373                     prob, REG_BR_PROB_BASE, REG_BR_PROB_BASE - prob, true);
2374       scale_loop_frequencies (loop, prob, REG_BR_PROB_BASE);
2375       /* CONDITION_BB was created above SCALAR_LOOP's preheader,
2376          while we need to move it above LOOP's preheader.  */
2377       e = loop_preheader_edge (loop);
2378       scalar_e = loop_preheader_edge (scalar_loop);
2379       gcc_assert (empty_block_p (e->src)
2380                   && single_pred_p (e->src));
2381       gcc_assert (empty_block_p (scalar_e->src)
2382                   && single_pred_p (scalar_e->src));
2383       gcc_assert (single_pred_p (condition_bb));
2384       preheader = e->src;
2385       scalar_preheader = scalar_e->src;
2386       scalar_e = find_edge (condition_bb, scalar_preheader);
2387       e = single_pred_edge (preheader);
2388       redirect_edge_and_branch_force (single_pred_edge (condition_bb),
2389                                       scalar_preheader);
2390       redirect_edge_and_branch_force (scalar_e, preheader);
2391       redirect_edge_and_branch_force (e, condition_bb);
2392       set_immediate_dominator (CDI_DOMINATORS, condition_bb,
2393                                single_pred (condition_bb));
2394       set_immediate_dominator (CDI_DOMINATORS, scalar_preheader,
2395                                single_pred (scalar_preheader));
2396       set_immediate_dominator (CDI_DOMINATORS, preheader,
2397                                condition_bb);
2398     }
2399   else
2400     loop_version (loop, cond_expr, &condition_bb,
2401                   prob, prob, REG_BR_PROB_BASE - prob, true);
2402
2403   if (LOCATION_LOCUS (vect_location) != UNKNOWN_LOCATION
2404       && dump_enabled_p ())
2405     {
2406       if (version_alias)
2407         dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
2408                          "loop versioned for vectorization because of "
2409                          "possible aliasing\n");
2410       if (version_align)
2411         dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
2412                          "loop versioned for vectorization to enhance "
2413                          "alignment\n");
2414
2415     }
2416   free_original_copy_tables ();
2417
2418   /* Loop versioning violates an assumption we try to maintain during
2419      vectorization - that the loop exit block has a single predecessor.
2420      After versioning, the exit block of both loop versions is the same
2421      basic block (i.e. it has two predecessors). Just in order to simplify
2422      following transformations in the vectorizer, we fix this situation
2423      here by adding a new (empty) block on the exit-edge of the loop,
2424      with the proper loop-exit phis to maintain loop-closed-form.
2425      If loop versioning wasn't done from loop, but scalar_loop instead,
2426      merge_bb will have already just a single successor.  */
2427
2428   merge_bb = single_exit (loop)->dest;
2429   if (scalar_loop == NULL || EDGE_COUNT (merge_bb->preds) >= 2)
2430     {
2431       gcc_assert (EDGE_COUNT (merge_bb->preds) >= 2);
2432       new_exit_bb = split_edge (single_exit (loop));
2433       new_exit_e = single_exit (loop);
2434       e = EDGE_SUCC (new_exit_bb, 0);
2435
2436       for (gsi = gsi_start_phis (merge_bb); !gsi_end_p (gsi); gsi_next (&gsi))
2437         {
2438           tree new_res;
2439           orig_phi = gsi_stmt (gsi);
2440           new_res = copy_ssa_name (PHI_RESULT (orig_phi), NULL);
2441           new_phi = create_phi_node (new_res, new_exit_bb);
2442           arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
2443           add_phi_arg (new_phi, arg, new_exit_e,
2444                        gimple_phi_arg_location_from_edge (orig_phi, e));
2445           adjust_phi_and_debug_stmts (orig_phi, e, PHI_RESULT (new_phi));
2446         }
2447     }
2448
2449   /* End loop-exit-fixes after versioning.  */
2450
2451   if (cond_expr_stmt_list)
2452     {
2453       cond_exp_gsi = gsi_last_bb (condition_bb);
2454       gsi_insert_seq_before (&cond_exp_gsi, cond_expr_stmt_list,
2455                              GSI_SAME_STMT);
2456     }
2457   update_ssa (TODO_update_ssa);
2458 }