gcc/tree-vect-loop.c

   1 /* Loop Vectorization
   2    Copyright (C) 2003-2013 Free Software Foundation, Inc.
   3    Contributed by Dorit Naishlos <dorit@il.ibm.com> and
   4    Ira Rosen <irar@il.ibm.com>
   5
   6 This file is part of GCC.
   7
   8 GCC is free software; you can redistribute it and/or modify it under
   9 the terms of the GNU General Public License as published by the Free
  10 Software Foundation; either version 3, or (at your option) any later
  11 version.
  12
  13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  15 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  16 for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with GCC; see the file COPYING3.  If not see
  20 <http://www.gnu.org/licenses/>.  */
  21
  22 #include "config.h"
  23 #include "system.h"
  24 #include "coretypes.h"
  25 #include "dumpfile.h"
  26 #include "tm.h"
  27 #include "ggc.h"
  28 #include "tree.h"
  29 #include "stor-layout.h"
  30 #include "basic-block.h"
  31 #include "gimple-pretty-print.h"
  32 #include "gimple.h"
  33 #include "gimplify.h"
  34 #include "gimple-iterator.h"
  35 #include "gimplify-me.h"
  36 #include "gimple-ssa.h"
  37 #include "tree-phinodes.h"
  38 #include "ssa-iterators.h"
  39 #include "stringpool.h"
  40 #include "tree-ssanames.h"
  41 #include "tree-ssa-loop-ivopts.h"
  42 #include "tree-ssa-loop-manip.h"
  43 #include "tree-ssa-loop-niter.h"
  44 #include "tree-pass.h"
  45 #include "cfgloop.h"
  46 #include "expr.h"
  47 #include "recog.h"
  48 #include "optabs.h"
  49 #include "params.h"
  50 #include "diagnostic-core.h"
  51 #include "tree-chrec.h"
  52 #include "tree-scalar-evolution.h"
  53 #include "tree-vectorizer.h"
  54 #include "target.h"
  55
  56 /* Loop Vectorization Pass.
  57
  58    This pass tries to vectorize loops.
  59
  60    For example, the vectorizer transforms the following simple loop:
  61
  62         short a[N]; short b[N]; short c[N]; int i;
  63
  64         for (i=0; i<N; i++){
  65           a[i] = b[i] + c[i];
  66         }
  67
  68    as if it was manually vectorized by rewriting the source code into:
  69
  70         typedef int __attribute__((mode(V8HI))) v8hi;
  71         short a[N];  short b[N]; short c[N];   int i;
  72         v8hi *pa = (v8hi*)a, *pb = (v8hi*)b, *pc = (v8hi*)c;
  73         v8hi va, vb, vc;
  74
  75         for (i=0; i<N/8; i++){
  76           vb = pb[i];
  77           vc = pc[i];
  78           va = vb + vc;
  79           pa[i] = va;
  80         }
  81
  82         The main entry to this pass is vectorize_loops(), in which
  83    the vectorizer applies a set of analyses on a given set of loops,
  84    followed by the actual vectorization transformation for the loops that
  85    had successfully passed the analysis phase.
  86         Throughout this pass we make a distinction between two types of
  87    data: scalars (which are represented by SSA_NAMES), and memory references
  88    ("data-refs").  These two types of data require different handling both
  89    during analysis and transformation. The types of data-refs that the
  90    vectorizer currently supports are ARRAY_REFS which base is an array DECL
  91    (not a pointer), and INDIRECT_REFS through pointers; both array and pointer
  92    accesses are required to have a simple (consecutive) access pattern.
  93
  94    Analysis phase:
  95    ===============
  96         The driver for the analysis phase is vect_analyze_loop().
  97    It applies a set of analyses, some of which rely on the scalar evolution
  98    analyzer (scev) developed by Sebastian Pop.
  99
 100         During the analysis phase the vectorizer records some information
 101    per stmt in a "stmt_vec_info" struct which is attached to each stmt in the
 102    loop, as well as general information about the loop as a whole, which is
 103    recorded in a "loop_vec_info" struct attached to each loop.
 104
 105    Transformation phase:
 106    =====================
 107         The loop transformation phase scans all the stmts in the loop, and
 108    creates a vector stmt (or a sequence of stmts) for each scalar stmt S in
 109    the loop that needs to be vectorized.  It inserts the vector code sequence
 110    just before the scalar stmt S, and records a pointer to the vector code
 111    in STMT_VINFO_VEC_STMT (stmt_info) (stmt_info is the stmt_vec_info struct
 112    attached to S).  This pointer will be used for the vectorization of following
 113    stmts which use the def of stmt S. Stmt S is removed if it writes to memory;
 114    otherwise, we rely on dead code elimination for removing it.
 115
 116         For example, say stmt S1 was vectorized into stmt VS1:
 117
 118    VS1: vb = px[i];
 119    S1:  b = x[i];    STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
 120    S2:  a = b;
 121
 122    To vectorize stmt S2, the vectorizer first finds the stmt that defines
 123    the operand 'b' (S1), and gets the relevant vector def 'vb' from the
 124    vector stmt VS1 pointed to by STMT_VINFO_VEC_STMT (stmt_info (S1)).  The
 125    resulting sequence would be:
 126
 127    VS1: vb = px[i];
 128    S1:  b = x[i];       STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
 129    VS2: va = vb;
 130    S2:  a = b;          STMT_VINFO_VEC_STMT (stmt_info (S2)) = VS2
 131
 132         Operands that are not SSA_NAMEs, are data-refs that appear in
 133    load/store operations (like 'x[i]' in S1), and are handled differently.
 134
 135    Target modeling:
 136    =================
 137         Currently the only target specific information that is used is the
 138    size of the vector (in bytes) - "TARGET_VECTORIZE_UNITS_PER_SIMD_WORD".
 139    Targets that can support different sizes of vectors, for now will need
 140    to specify one value for "TARGET_VECTORIZE_UNITS_PER_SIMD_WORD".  More
 141    flexibility will be added in the future.
 142
 143         Since we only vectorize operations which vector form can be
 144    expressed using existing tree codes, to verify that an operation is
 145    supported, the vectorizer checks the relevant optab at the relevant
 146    machine_mode (e.g, optab_handler (add_optab, V8HImode)).  If
 147    the value found is CODE_FOR_nothing, then there's no target support, and
 148    we can't vectorize the stmt.
 149
 150    For additional information on this project see:
 151    http://gcc.gnu.org/projects/tree-ssa/vectorization.html
 152 */
 153
 154 static void vect_estimate_min_profitable_iters (loop_vec_info, int *, int *);
 155
 156 /* Function vect_determine_vectorization_factor
 157
 158    Determine the vectorization factor (VF).  VF is the number of data elements
 159    that are operated upon in parallel in a single iteration of the vectorized
 160    loop.  For example, when vectorizing a loop that operates on 4byte elements,
 161    on a target with vector size (VS) 16byte, the VF is set to 4, since 4
 162    elements can fit in a single vector register.
 163
 164    We currently support vectorization of loops in which all types operated upon
 165    are of the same size.  Therefore this function currently sets VF according to
 166    the size of the types operated upon, and fails if there are multiple sizes
 167    in the loop.
 168
 169    VF is also the factor by which the loop iterations are strip-mined, e.g.:
 170    original loop:
 171         for (i=0; i<N; i++){
 172           a[i] = b[i] + c[i];
 173         }
 174
 175    vectorized loop:
 176         for (i=0; i<N; i+=VF){
 177           a[i:VF] = b[i:VF] + c[i:VF];
 178         }
 179 */
 180
 181 static bool
 182 vect_determine_vectorization_factor (loop_vec_info loop_vinfo)
 183 {
 184   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
 185   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
 186   int nbbs = loop->num_nodes;
 187   gimple_stmt_iterator si;
 188   unsigned int vectorization_factor = 0;
 189   tree scalar_type;
 190   gimple phi;
 191   tree vectype;
 192   unsigned int nunits;
 193   stmt_vec_info stmt_info;
 194   int i;
 195   HOST_WIDE_INT dummy;
 196   gimple stmt, pattern_stmt = NULL;
 197   gimple_seq pattern_def_seq = NULL;
 198   gimple_stmt_iterator pattern_def_si = gsi_none ();
 199   bool analyze_pattern_stmt = false;
 200
 201   if (dump_enabled_p ())
 202     dump_printf_loc (MSG_NOTE, vect_location,
 203                      "=== vect_determine_vectorization_factor ===\n");
 204
 205   for (i = 0; i < nbbs; i++)
 206     {
 207       basic_block bb = bbs[i];
 208
 209       for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
 210         {
 211           phi = gsi_stmt (si);
 212           stmt_info = vinfo_for_stmt (phi);
 213           if (dump_enabled_p ())
 214             {
 215               dump_printf_loc (MSG_NOTE, vect_location, "==> examining phi: ");
 216               dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
 217               dump_printf (MSG_NOTE, "\n");
 218             }
 219
 220           gcc_assert (stmt_info);
 221
 222           if (STMT_VINFO_RELEVANT_P (stmt_info))
 223             {
 224               gcc_assert (!STMT_VINFO_VECTYPE (stmt_info));
 225               scalar_type = TREE_TYPE (PHI_RESULT (phi));
 226
 227               if (dump_enabled_p ())
 228                 {
 229                   dump_printf_loc (MSG_NOTE, vect_location,
 230                                    "get vectype for scalar type:  ");
 231                   dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
 232                   dump_printf (MSG_NOTE, "\n");
 233                 }
 234
 235               vectype = get_vectype_for_scalar_type (scalar_type);
 236               if (!vectype)
 237                 {
 238                   if (dump_enabled_p ())
 239                     {
 240                       dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 241                                        "not vectorized: unsupported "
 242                                        "data-type ");
 243                       dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
 244                                          scalar_type);
 245                       dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
 246                     }
 247                   return false;
 248                 }
 249               STMT_VINFO_VECTYPE (stmt_info) = vectype;
 250
 251               if (dump_enabled_p ())
 252                 {
 253                   dump_printf_loc (MSG_NOTE, vect_location, "vectype: ");
 254                   dump_generic_expr (MSG_NOTE, TDF_SLIM, vectype);
 255                   dump_printf (MSG_NOTE, "\n");
 256                 }
 257
 258               nunits = TYPE_VECTOR_SUBPARTS (vectype);
 259               if (dump_enabled_p ())
 260                 dump_printf_loc (MSG_NOTE, vect_location, "nunits = %d\n",
 261                                  nunits);
 262
 263               if (!vectorization_factor
 264                   || (nunits > vectorization_factor))
 265                 vectorization_factor = nunits;
 266             }
 267         }
 268
 269       for (si = gsi_start_bb (bb); !gsi_end_p (si) || analyze_pattern_stmt;)
 270         {
 271           tree vf_vectype;
 272
 273           if (analyze_pattern_stmt)
 274             stmt = pattern_stmt;
 275           else
 276             stmt = gsi_stmt (si);
 277
 278           stmt_info = vinfo_for_stmt (stmt);
 279
 280           if (dump_enabled_p ())
 281             {
 282               dump_printf_loc (MSG_NOTE, vect_location,
 283                                "==> examining statement: ");
 284               dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
 285               dump_printf (MSG_NOTE, "\n");
 286             }
 287
 288           gcc_assert (stmt_info);
 289
 290           /* Skip stmts which do not need to be vectorized.  */
 291           if ((!STMT_VINFO_RELEVANT_P (stmt_info)
 292                && !STMT_VINFO_LIVE_P (stmt_info))
 293               || gimple_clobber_p (stmt))
 294             {
 295               if (STMT_VINFO_IN_PATTERN_P (stmt_info)
 296                   && (pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info))
 297                   && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
 298                       || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
 299                 {
 300                   stmt = pattern_stmt;
 301                   stmt_info = vinfo_for_stmt (pattern_stmt);
 302                   if (dump_enabled_p ())
 303                     {
 304                       dump_printf_loc (MSG_NOTE, vect_location,
 305                                        "==> examining pattern statement: ");
 306                       dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
 307                       dump_printf (MSG_NOTE, "\n");
 308                     }
 309                 }
 310               else
 311                 {
 312                   if (dump_enabled_p ())
 313                     dump_printf_loc (MSG_NOTE, vect_location, "skip.\n");
 314                   gsi_next (&si);
 315                   continue;
 316                 }
 317             }
 318           else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
 319                    && (pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info))
 320                    && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
 321                        || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
 322             analyze_pattern_stmt = true;
 323
 324           /* If a pattern statement has def stmts, analyze them too.  */
 325           if (is_pattern_stmt_p (stmt_info))
 326             {
 327               if (pattern_def_seq == NULL)
 328                 {
 329                   pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info);
 330                   pattern_def_si = gsi_start (pattern_def_seq);
 331                 }
 332               else if (!gsi_end_p (pattern_def_si))
 333                 gsi_next (&pattern_def_si);
 334               if (pattern_def_seq != NULL)
 335                 {
 336                   gimple pattern_def_stmt = NULL;
 337                   stmt_vec_info pattern_def_stmt_info = NULL;
 338
 339                   while (!gsi_end_p (pattern_def_si))
 340                     {
 341                       pattern_def_stmt = gsi_stmt (pattern_def_si);
 342                       pattern_def_stmt_info
 343                         = vinfo_for_stmt (pattern_def_stmt);
 344                       if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info)
 345                           || STMT_VINFO_LIVE_P (pattern_def_stmt_info))
 346                         break;
 347                       gsi_next (&pattern_def_si);
 348                     }
 349
 350                   if (!gsi_end_p (pattern_def_si))
 351                     {
 352                       if (dump_enabled_p ())
 353                         {
 354                           dump_printf_loc (MSG_NOTE, vect_location,
 355                                            "==> examining pattern def stmt: ");
 356                           dump_gimple_stmt (MSG_NOTE, TDF_SLIM,
 357                                             pattern_def_stmt, 0);
 358                           dump_printf (MSG_NOTE, "\n");
 359                         }
 360
 361                       stmt = pattern_def_stmt;
 362                       stmt_info = pattern_def_stmt_info;
 363                     }
 364                   else
 365                     {
 366                       pattern_def_si = gsi_none ();
 367                       analyze_pattern_stmt = false;
 368                     }
 369                 }
 370               else
 371                 analyze_pattern_stmt = false;
 372             }
 373
 374           if (gimple_get_lhs (stmt) == NULL_TREE)
 375             {
 376               if (dump_enabled_p ())
 377                 {
 378                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 379                                    "not vectorized: irregular stmt.");
 380                   dump_gimple_stmt (MSG_MISSED_OPTIMIZATION,  TDF_SLIM, stmt,
 381                                     0);
 382                   dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
 383                 }
 384               return false;
 385             }
 386
 387           if (VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))))
 388             {
 389               if (dump_enabled_p ())
 390                 {
 391                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 392                                    "not vectorized: vector stmt in loop:");
 393                   dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
 394                   dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
 395                 }
 396               return false;
 397             }
 398
 399           if (STMT_VINFO_VECTYPE (stmt_info))
 400             {
 401               /* The only case when a vectype had been already set is for stmts
 402                  that contain a dataref, or for "pattern-stmts" (stmts
 403                  generated by the vectorizer to represent/replace a certain
 404                  idiom).  */
 405               gcc_assert (STMT_VINFO_DATA_REF (stmt_info)
 406                           || is_pattern_stmt_p (stmt_info)
 407                           || !gsi_end_p (pattern_def_si));
 408               vectype = STMT_VINFO_VECTYPE (stmt_info);
 409             }
 410           else
 411             {
 412               gcc_assert (!STMT_VINFO_DATA_REF (stmt_info));
 413               scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
 414               if (dump_enabled_p ())
 415                 {
 416                   dump_printf_loc (MSG_NOTE, vect_location,
 417                                    "get vectype for scalar type:  ");
 418                   dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
 419                   dump_printf (MSG_NOTE, "\n");
 420                 }
 421               vectype = get_vectype_for_scalar_type (scalar_type);
 422               if (!vectype)
 423                 {
 424                   if (dump_enabled_p ())
 425                     {
 426                       dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 427                                        "not vectorized: unsupported "
 428                                        "data-type ");
 429                       dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
 430                                          scalar_type);
 431                       dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
 432                     }
 433                   return false;
 434                 }
 435
 436               STMT_VINFO_VECTYPE (stmt_info) = vectype;
 437
 438               if (dump_enabled_p ())
 439                 {
 440                   dump_printf_loc (MSG_NOTE, vect_location, "vectype: ");
 441                   dump_generic_expr (MSG_NOTE, TDF_SLIM, vectype);
 442                   dump_printf (MSG_NOTE, "\n");
 443                 }
 444             }
 445
 446           /* The vectorization factor is according to the smallest
 447              scalar type (or the largest vector size, but we only
 448              support one vector size per loop).  */
 449           scalar_type = vect_get_smallest_scalar_type (stmt, &dummy,
 450                                                        &dummy);
 451           if (dump_enabled_p ())
 452             {
 453               dump_printf_loc (MSG_NOTE, vect_location,
 454                                "get vectype for scalar type:  ");
 455               dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
 456               dump_printf (MSG_NOTE, "\n");
 457             }
 458           vf_vectype = get_vectype_for_scalar_type (scalar_type);
 459           if (!vf_vectype)
 460             {
 461               if (dump_enabled_p ())
 462                 {
 463                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 464                                    "not vectorized: unsupported data-type ");
 465                   dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
 466                                      scalar_type);
 467                   dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
 468                 }
 469               return false;
 470             }
 471
 472           if ((GET_MODE_SIZE (TYPE_MODE (vectype))
 473                != GET_MODE_SIZE (TYPE_MODE (vf_vectype))))
 474             {
 475               if (dump_enabled_p ())
 476                 {
 477                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 478                                    "not vectorized: different sized vector "
 479                                    "types in statement, ");
 480                   dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
 481                                      vectype);
 482                   dump_printf (MSG_MISSED_OPTIMIZATION, " and ");
 483                   dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
 484                                      vf_vectype);
 485                   dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
 486                 }
 487               return false;
 488             }
 489
 490           if (dump_enabled_p ())
 491             {
 492               dump_printf_loc (MSG_NOTE, vect_location, "vectype: ");
 493               dump_generic_expr (MSG_NOTE, TDF_SLIM, vf_vectype);
 494               dump_printf (MSG_NOTE, "\n");
 495             }
 496
 497           nunits = TYPE_VECTOR_SUBPARTS (vf_vectype);
 498           if (dump_enabled_p ())
 499             dump_printf_loc (MSG_NOTE, vect_location, "nunits = %d\n", nunits);
 500           if (!vectorization_factor
 501               || (nunits > vectorization_factor))
 502             vectorization_factor = nunits;
 503
 504           if (!analyze_pattern_stmt && gsi_end_p (pattern_def_si))
 505             {
 506               pattern_def_seq = NULL;
 507               gsi_next (&si);
 508             }
 509         }
 510     }
 511
 512   /* TODO: Analyze cost. Decide if worth while to vectorize.  */
 513   if (dump_enabled_p ())
 514     dump_printf_loc (MSG_NOTE, vect_location, "vectorization factor = %d\n",
 515                      vectorization_factor);
 516   if (vectorization_factor <= 1)
 517     {
 518       if (dump_enabled_p ())
 519         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 520                          "not vectorized: unsupported data-type\n");
 521       return false;
 522     }
 523   LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
 524
 525   return true;
 526 }
 527
 528
 529 /* Function vect_is_simple_iv_evolution.
 530
 531    FORNOW: A simple evolution of an induction variables in the loop is
 532    considered a polynomial evolution.  */
 533
 534 static bool
 535 vect_is_simple_iv_evolution (unsigned loop_nb, tree access_fn, tree * init,
 536                              tree * step)
 537 {
 538   tree init_expr;
 539   tree step_expr;
 540   tree evolution_part = evolution_part_in_loop_num (access_fn, loop_nb);
 541   basic_block bb;
 542
 543   /* When there is no evolution in this loop, the evolution function
 544      is not "simple".  */
 545   if (evolution_part == NULL_TREE)
 546     return false;
 547
 548   /* When the evolution is a polynomial of degree >= 2
 549      the evolution function is not "simple".  */
 550   if (tree_is_chrec (evolution_part))
 551     return false;
 552
 553   step_expr = evolution_part;
 554   init_expr = unshare_expr (initial_condition_in_loop_num (access_fn, loop_nb));
 555
 556   if (dump_enabled_p ())
 557     {
 558       dump_printf_loc (MSG_NOTE, vect_location, "step: ");
 559       dump_generic_expr (MSG_NOTE, TDF_SLIM, step_expr);
 560       dump_printf (MSG_NOTE, ",  init: ");
 561       dump_generic_expr (MSG_NOTE, TDF_SLIM, init_expr);
 562       dump_printf (MSG_NOTE, "\n");
 563     }
 564
 565   *init = init_expr;
 566   *step = step_expr;
 567
 568   if (TREE_CODE (step_expr) != INTEGER_CST
 569       && (TREE_CODE (step_expr) != SSA_NAME
 570           || ((bb = gimple_bb (SSA_NAME_DEF_STMT (step_expr)))
 571               && flow_bb_inside_loop_p (get_loop (cfun, loop_nb), bb))
 572           || (!INTEGRAL_TYPE_P (TREE_TYPE (step_expr))
 573               && (!SCALAR_FLOAT_TYPE_P (TREE_TYPE (step_expr))
 574                   || !flag_associative_math)))
 575       && (TREE_CODE (step_expr) != REAL_CST
 576           || !flag_associative_math))
 577     {
 578       if (dump_enabled_p ())
 579         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 580                          "step unknown.\n");
 581       return false;
 582     }
 583
 584   return true;
 585 }
 586
 587 /* Function vect_analyze_scalar_cycles_1.
 588
 589    Examine the cross iteration def-use cycles of scalar variables
 590    in LOOP.  LOOP_VINFO represents the loop that is now being
 591    considered for vectorization (can be LOOP, or an outer-loop
 592    enclosing LOOP).  */
 593
 594 static void
 595 vect_analyze_scalar_cycles_1 (loop_vec_info loop_vinfo, struct loop *loop)
 596 {
 597   basic_block bb = loop->header;
 598   tree init, step;
 599   stack_vec<gimple, 64> worklist;
 600   gimple_stmt_iterator gsi;
 601   bool double_reduc;
 602
 603   if (dump_enabled_p ())
 604     dump_printf_loc (MSG_NOTE, vect_location,
 605                      "=== vect_analyze_scalar_cycles ===\n");
 606
 607   /* First - identify all inductions.  Reduction detection assumes that all the
 608      inductions have been identified, therefore, this order must not be
 609      changed.  */
 610   for (gsi = gsi_start_phis  (bb); !gsi_end_p (gsi); gsi_next (&gsi))
 611     {
 612       gimple phi = gsi_stmt (gsi);
 613       tree access_fn = NULL;
 614       tree def = PHI_RESULT (phi);
 615       stmt_vec_info stmt_vinfo = vinfo_for_stmt (phi);
 616
 617       if (dump_enabled_p ())
 618         {
 619           dump_printf_loc (MSG_NOTE, vect_location, "Analyze phi: ");
 620           dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
 621           dump_printf (MSG_NOTE, "\n");
 622         }
 623
 624       /* Skip virtual phi's.  The data dependences that are associated with
 625          virtual defs/uses (i.e., memory accesses) are analyzed elsewhere.  */
 626       if (virtual_operand_p (def))
 627         continue;
 628
 629       STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_unknown_def_type;
 630
 631       /* Analyze the evolution function.  */
 632       access_fn = analyze_scalar_evolution (loop, def);
 633       if (access_fn)
 634         {
 635           STRIP_NOPS (access_fn);
 636           if (dump_enabled_p ())
 637             {
 638               dump_printf_loc (MSG_NOTE, vect_location,
 639                                "Access function of PHI: ");
 640               dump_generic_expr (MSG_NOTE, TDF_SLIM, access_fn);
 641               dump_printf (MSG_NOTE, "\n");
 642             }
 643           STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_vinfo)
 644             = evolution_part_in_loop_num (access_fn, loop->num);
 645         }
 646
 647       if (!access_fn
 648           || !vect_is_simple_iv_evolution (loop->num, access_fn, &init, &step)
 649           || (LOOP_VINFO_LOOP (loop_vinfo) != loop
 650               && TREE_CODE (step) != INTEGER_CST))
 651         {
 652           worklist.safe_push (phi);
 653           continue;
 654         }
 655
 656       gcc_assert (STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_vinfo) != NULL_TREE);
 657
 658       if (dump_enabled_p ())
 659         dump_printf_loc (MSG_NOTE, vect_location, "Detected induction.\n");
 660       STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_induction_def;
 661     }
 662
 663
 664   /* Second - identify all reductions and nested cycles.  */
 665   while (worklist.length () > 0)
 666     {
 667       gimple phi = worklist.pop ();
 668       tree def = PHI_RESULT (phi);
 669       stmt_vec_info stmt_vinfo = vinfo_for_stmt (phi);
 670       gimple reduc_stmt;
 671       bool nested_cycle;
 672
 673       if (dump_enabled_p ())
 674         {
 675           dump_printf_loc (MSG_NOTE, vect_location, "Analyze phi: ");
 676           dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
 677           dump_printf (MSG_NOTE, "\n");
 678         }
 679
 680       gcc_assert (!virtual_operand_p (def)
 681                   && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_unknown_def_type);
 682
 683       nested_cycle = (loop != LOOP_VINFO_LOOP (loop_vinfo));
 684       reduc_stmt = vect_force_simple_reduction (loop_vinfo, phi, !nested_cycle,
 685                                                 &double_reduc);
 686       if (reduc_stmt)
 687         {
 688           if (double_reduc)
 689             {
 690               if (dump_enabled_p ())
 691                 dump_printf_loc (MSG_NOTE, vect_location,
 692                                  "Detected double reduction.\n");
 693
 694               STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_double_reduction_def;
 695               STMT_VINFO_DEF_TYPE (vinfo_for_stmt (reduc_stmt)) =
 696                                                     vect_double_reduction_def;
 697             }
 698           else
 699             {
 700               if (nested_cycle)
 701                 {
 702                   if (dump_enabled_p ())
 703                     dump_printf_loc (MSG_NOTE, vect_location,
 704                                      "Detected vectorizable nested cycle.\n");
 705
 706                   STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_nested_cycle;
 707                   STMT_VINFO_DEF_TYPE (vinfo_for_stmt (reduc_stmt)) =
 708                                                              vect_nested_cycle;
 709                 }
 710               else
 711                 {
 712                   if (dump_enabled_p ())
 713                     dump_printf_loc (MSG_NOTE, vect_location,
 714                                      "Detected reduction.\n");
 715
 716                   STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_reduction_def;
 717                   STMT_VINFO_DEF_TYPE (vinfo_for_stmt (reduc_stmt)) =
 718                                                            vect_reduction_def;
 719                   /* Store the reduction cycles for possible vectorization in
 720                      loop-aware SLP.  */
 721                   LOOP_VINFO_REDUCTIONS (loop_vinfo).safe_push (reduc_stmt);
 722                 }
 723             }
 724         }
 725       else
 726         if (dump_enabled_p ())
 727           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 728                            "Unknown def-use cycle pattern.\n");
 729     }
 730 }
 731
 732
 733 /* Function vect_analyze_scalar_cycles.
 734
 735    Examine the cross iteration def-use cycles of scalar variables, by
 736    analyzing the loop-header PHIs of scalar variables.  Classify each
 737    cycle as one of the following: invariant, induction, reduction, unknown.
 738    We do that for the loop represented by LOOP_VINFO, and also to its
 739    inner-loop, if exists.
 740    Examples for scalar cycles:
 741
 742    Example1: reduction:
 743
 744               loop1:
 745               for (i=0; i<N; i++)
 746                  sum += a[i];
 747
 748    Example2: induction:
 749
 750               loop2:
 751               for (i=0; i<N; i++)
 752                  a[i] = i;  */
 753
 754 static void
 755 vect_analyze_scalar_cycles (loop_vec_info loop_vinfo)
 756 {
 757   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
 758
 759   vect_analyze_scalar_cycles_1 (loop_vinfo, loop);
 760
 761   /* When vectorizing an outer-loop, the inner-loop is executed sequentially.
 762      Reductions in such inner-loop therefore have different properties than
 763      the reductions in the nest that gets vectorized:
 764      1. When vectorized, they are executed in the same order as in the original
 765         scalar loop, so we can't change the order of computation when
 766         vectorizing them.
 767      2. FIXME: Inner-loop reductions can be used in the inner-loop, so the
 768         current checks are too strict.  */
 769
 770   if (loop->inner)
 771     vect_analyze_scalar_cycles_1 (loop_vinfo, loop->inner);
 772 }
 773
 774 /* Function vect_get_loop_niters.
 775
 776    Determine how many iterations the loop is executed.
 777    If an expression that represents the number of iterations
 778    can be constructed, place it in NUMBER_OF_ITERATIONS.
 779    Return the loop exit condition.  */
 780
 781 static gimple
 782 vect_get_loop_niters (struct loop *loop, tree *number_of_iterations)
 783 {
 784   tree niters;
 785
 786   if (dump_enabled_p ())
 787     dump_printf_loc (MSG_NOTE, vect_location,
 788                      "=== get_loop_niters ===\n");
 789   niters = number_of_exit_cond_executions (loop);
 790
 791   if (niters != NULL_TREE
 792       && niters != chrec_dont_know)
 793     {
 794       *number_of_iterations = niters;
 795
 796       if (dump_enabled_p ())
 797         {
 798           dump_printf_loc (MSG_NOTE, vect_location, "==> get_loop_niters:");
 799           dump_generic_expr (MSG_NOTE, TDF_SLIM, *number_of_iterations);
 800           dump_printf (MSG_NOTE, "\n");
 801         }
 802     }
 803
 804   return get_loop_exit_condition (loop);
 805 }
 806
 807
 808 /* Function bb_in_loop_p
 809
 810    Used as predicate for dfs order traversal of the loop bbs.  */
 811
 812 static bool
 813 bb_in_loop_p (const_basic_block bb, const void *data)
 814 {
 815   const struct loop *const loop = (const struct loop *)data;
 816   if (flow_bb_inside_loop_p (loop, bb))
 817     return true;
 818   return false;
 819 }
 820
 821
 822 /* Function new_loop_vec_info.
 823
 824    Create and initialize a new loop_vec_info struct for LOOP, as well as
 825    stmt_vec_info structs for all the stmts in LOOP.  */
 826
 827 static loop_vec_info
 828 new_loop_vec_info (struct loop *loop)
 829 {
 830   loop_vec_info res;
 831   basic_block *bbs;
 832   gimple_stmt_iterator si;
 833   unsigned int i, nbbs;
 834
 835   res = (loop_vec_info) xcalloc (1, sizeof (struct _loop_vec_info));
 836   LOOP_VINFO_LOOP (res) = loop;
 837
 838   bbs = get_loop_body (loop);
 839
 840   /* Create/Update stmt_info for all stmts in the loop.  */
 841   for (i = 0; i < loop->num_nodes; i++)
 842     {
 843       basic_block bb = bbs[i];
 844
 845       /* BBs in a nested inner-loop will have been already processed (because
 846          we will have called vect_analyze_loop_form for any nested inner-loop).
 847          Therefore, for stmts in an inner-loop we just want to update the
 848          STMT_VINFO_LOOP_VINFO field of their stmt_info to point to the new
 849          loop_info of the outer-loop we are currently considering to vectorize
 850          (instead of the loop_info of the inner-loop).
 851          For stmts in other BBs we need to create a stmt_info from scratch.  */
 852       if (bb->loop_father != loop)
 853         {
 854           /* Inner-loop bb.  */
 855           gcc_assert (loop->inner && bb->loop_father == loop->inner);
 856           for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
 857             {
 858               gimple phi = gsi_stmt (si);
 859               stmt_vec_info stmt_info = vinfo_for_stmt (phi);
 860               loop_vec_info inner_loop_vinfo =
 861                 STMT_VINFO_LOOP_VINFO (stmt_info);
 862               gcc_assert (loop->inner == LOOP_VINFO_LOOP (inner_loop_vinfo));
 863               STMT_VINFO_LOOP_VINFO (stmt_info) = res;
 864             }
 865           for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
 866            {
 867               gimple stmt = gsi_stmt (si);
 868               stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
 869               loop_vec_info inner_loop_vinfo =
 870                  STMT_VINFO_LOOP_VINFO (stmt_info);
 871               gcc_assert (loop->inner == LOOP_VINFO_LOOP (inner_loop_vinfo));
 872               STMT_VINFO_LOOP_VINFO (stmt_info) = res;
 873            }
 874         }
 875       else
 876         {
 877           /* bb in current nest.  */
 878           for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
 879             {
 880               gimple phi = gsi_stmt (si);
 881               gimple_set_uid (phi, 0);
 882               set_vinfo_for_stmt (phi, new_stmt_vec_info (phi, res, NULL));
 883             }
 884
 885           for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
 886             {
 887               gimple stmt = gsi_stmt (si);
 888               gimple_set_uid (stmt, 0);
 889               set_vinfo_for_stmt (stmt, new_stmt_vec_info (stmt, res, NULL));
 890             }
 891         }
 892     }
 893
 894   /* CHECKME: We want to visit all BBs before their successors (except for
 895      latch blocks, for which this assertion wouldn't hold).  In the simple
 896      case of the loop forms we allow, a dfs order of the BBs would the same
 897      as reversed postorder traversal, so we are safe.  */
 898
 899    free (bbs);
 900    bbs = XCNEWVEC (basic_block, loop->num_nodes);
 901    nbbs = dfs_enumerate_from (loop->header, 0, bb_in_loop_p,
 902                               bbs, loop->num_nodes, loop);
 903    gcc_assert (nbbs == loop->num_nodes);
 904
 905   LOOP_VINFO_BBS (res) = bbs;
 906   LOOP_VINFO_NITERS (res) = NULL;
 907   LOOP_VINFO_NITERS_UNCHANGED (res) = NULL;
 908   LOOP_VINFO_COST_MODEL_MIN_ITERS (res) = 0;
 909   LOOP_VINFO_VECTORIZABLE_P (res) = 0;
 910   LOOP_PEELING_FOR_ALIGNMENT (res) = 0;
 911   LOOP_VINFO_VECT_FACTOR (res) = 0;
 912   LOOP_VINFO_LOOP_NEST (res).create (3);
 913   LOOP_VINFO_DATAREFS (res).create (10);
 914   LOOP_VINFO_DDRS (res).create (10 * 10);
 915   LOOP_VINFO_UNALIGNED_DR (res) = NULL;
 916   LOOP_VINFO_MAY_MISALIGN_STMTS (res).create (
 917              PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIGNMENT_CHECKS));
 918   LOOP_VINFO_MAY_ALIAS_DDRS (res).create (
 919              PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS));
 920   LOOP_VINFO_GROUPED_STORES (res).create (10);
 921   LOOP_VINFO_REDUCTIONS (res).create (10);
 922   LOOP_VINFO_REDUCTION_CHAINS (res).create (10);
 923   LOOP_VINFO_SLP_INSTANCES (res).create (10);
 924   LOOP_VINFO_SLP_UNROLLING_FACTOR (res) = 1;
 925   LOOP_VINFO_TARGET_COST_DATA (res) = init_cost (loop);
 926   LOOP_VINFO_PEELING_FOR_GAPS (res) = false;
 927   LOOP_VINFO_OPERANDS_SWAPPED (res) = false;
 928
 929   return res;
 930 }
 931
 932
 933 /* Function destroy_loop_vec_info.
 934
 935    Free LOOP_VINFO struct, as well as all the stmt_vec_info structs of all the
 936    stmts in the loop.  */
 937
 938 void
 939 destroy_loop_vec_info (loop_vec_info loop_vinfo, bool clean_stmts)
 940 {
 941   struct loop *loop;
 942   basic_block *bbs;
 943   int nbbs;
 944   gimple_stmt_iterator si;
 945   int j;
 946   vec<slp_instance> slp_instances;
 947   slp_instance instance;
 948   bool swapped;
 949
 950   if (!loop_vinfo)
 951     return;
 952
 953   loop = LOOP_VINFO_LOOP (loop_vinfo);
 954
 955   bbs = LOOP_VINFO_BBS (loop_vinfo);
 956   nbbs = clean_stmts ? loop->num_nodes : 0;
 957   swapped = LOOP_VINFO_OPERANDS_SWAPPED (loop_vinfo);
 958
 959   for (j = 0; j < nbbs; j++)
 960     {
 961       basic_block bb = bbs[j];
 962       for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
 963         free_stmt_vec_info (gsi_stmt (si));
 964
 965       for (si = gsi_start_bb (bb); !gsi_end_p (si); )
 966         {
 967           gimple stmt = gsi_stmt (si);
 968
 969           /* We may have broken canonical form by moving a constant
 970              into RHS1 of a commutative op.  Fix such occurrences.  */
 971           if (swapped && is_gimple_assign (stmt))
 972             {
 973               enum tree_code code = gimple_assign_rhs_code (stmt);
 974
 975               if ((code == PLUS_EXPR
 976                    || code == POINTER_PLUS_EXPR
 977                    || code == MULT_EXPR)
 978                   && CONSTANT_CLASS_P (gimple_assign_rhs1 (stmt)))
 979                 swap_ssa_operands (stmt,
 980                                    gimple_assign_rhs1_ptr (stmt),
 981                                    gimple_assign_rhs2_ptr (stmt));
 982             }
 983
 984           /* Free stmt_vec_info.  */
 985           free_stmt_vec_info (stmt);
 986           gsi_next (&si);
 987         }
 988     }
 989
 990   free (LOOP_VINFO_BBS (loop_vinfo));
 991   vect_destroy_datarefs (loop_vinfo, NULL);
 992   free_dependence_relations (LOOP_VINFO_DDRS (loop_vinfo));
 993   LOOP_VINFO_LOOP_NEST (loop_vinfo).release ();
 994   LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo).release ();
 995   LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo).release ();
 996   slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
 997   FOR_EACH_VEC_ELT (slp_instances, j, instance)
 998     vect_free_slp_instance (instance);
 999
1000   LOOP_VINFO_SLP_INSTANCES (loop_vinfo).release ();
1001   LOOP_VINFO_GROUPED_STORES (loop_vinfo).release ();
1002   LOOP_VINFO_REDUCTIONS (loop_vinfo).release ();
1003   LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo).release ();
1004
1005   if (LOOP_VINFO_PEELING_HTAB (loop_vinfo).is_created ())
1006     LOOP_VINFO_PEELING_HTAB (loop_vinfo).dispose ();
1007
1008   destroy_cost_data (LOOP_VINFO_TARGET_COST_DATA (loop_vinfo));
1009
1010   free (loop_vinfo);
1011   loop->aux = NULL;
1012 }
1013
1014
1015 /* Function vect_analyze_loop_1.
1016
1017    Apply a set of analyses on LOOP, and create a loop_vec_info struct
1018    for it. The different analyses will record information in the
1019    loop_vec_info struct.  This is a subset of the analyses applied in
1020    vect_analyze_loop, to be applied on an inner-loop nested in the loop
1021    that is now considered for (outer-loop) vectorization.  */
1022
1023 static loop_vec_info
1024 vect_analyze_loop_1 (struct loop *loop)
1025 {
1026   loop_vec_info loop_vinfo;
1027
1028   if (dump_enabled_p ())
1029     dump_printf_loc (MSG_NOTE, vect_location,
1030                      "===== analyze_loop_nest_1 =====\n");
1031
1032   /* Check the CFG characteristics of the loop (nesting, entry/exit, etc.  */
1033
1034   loop_vinfo = vect_analyze_loop_form (loop);
1035   if (!loop_vinfo)
1036     {
1037       if (dump_enabled_p ())
1038         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1039                          "bad inner-loop form.\n");
1040       return NULL;
1041     }
1042
1043   return loop_vinfo;
1044 }
1045
1046
1047 /* Function vect_analyze_loop_form.
1048
1049    Verify that certain CFG restrictions hold, including:
1050    - the loop has a pre-header
1051    - the loop has a single entry and exit
1052    - the loop exit condition is simple enough, and the number of iterations
1053      can be analyzed (a countable loop).  */
1054
1055 loop_vec_info
1056 vect_analyze_loop_form (struct loop *loop)
1057 {
1058   loop_vec_info loop_vinfo;
1059   gimple loop_cond;
1060   tree number_of_iterations = NULL;
1061   loop_vec_info inner_loop_vinfo = NULL;
1062
1063   if (dump_enabled_p ())
1064     dump_printf_loc (MSG_NOTE, vect_location,
1065                      "=== vect_analyze_loop_form ===\n");
1066
1067   /* Different restrictions apply when we are considering an inner-most loop,
1068      vs. an outer (nested) loop.
1069      (FORNOW. May want to relax some of these restrictions in the future).  */
1070
1071   if (!loop->inner)
1072     {
1073       /* Inner-most loop.  We currently require that the number of BBs is
1074          exactly 2 (the header and latch).  Vectorizable inner-most loops
1075          look like this:
1076
1077                         (pre-header)
1078                            |
1079                           header <--------+
1080                            | |            |
1081                            | +--> latch --+
1082                            |
1083                         (exit-bb)  */
1084
1085       if (loop->num_nodes != 2)
1086         {
1087           if (dump_enabled_p ())
1088             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1089                              "not vectorized: control flow in loop.\n");
1090           return NULL;
1091         }
1092
1093       if (empty_block_p (loop->header))
1094     {
1095           if (dump_enabled_p ())
1096             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1097                              "not vectorized: empty loop.\n");
1098       return NULL;
1099     }
1100     }
1101   else
1102     {
1103       struct loop *innerloop = loop->inner;
1104       edge entryedge;
1105
1106       /* Nested loop. We currently require that the loop is doubly-nested,
1107          contains a single inner loop, and the number of BBs is exactly 5.
1108          Vectorizable outer-loops look like this:
1109
1110                         (pre-header)
1111                            |
1112                           header <---+
1113                            |         |
1114                           inner-loop |
1115                            |         |
1116                           tail ------+
1117                            |
1118                         (exit-bb)
1119
1120          The inner-loop has the properties expected of inner-most loops
1121          as described above.  */
1122
1123       if ((loop->inner)->inner || (loop->inner)->next)
1124         {
1125           if (dump_enabled_p ())
1126             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1127                              "not vectorized: multiple nested loops.\n");
1128           return NULL;
1129         }
1130
1131       /* Analyze the inner-loop.  */
1132       inner_loop_vinfo = vect_analyze_loop_1 (loop->inner);
1133       if (!inner_loop_vinfo)
1134         {
1135           if (dump_enabled_p ())
1136             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1137                              "not vectorized: Bad inner loop.\n");
1138           return NULL;
1139         }
1140
1141       if (!expr_invariant_in_loop_p (loop,
1142                                         LOOP_VINFO_NITERS (inner_loop_vinfo)))
1143         {
1144           if (dump_enabled_p ())
1145             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1146                              "not vectorized: inner-loop count not"
1147                              " invariant.\n");
1148           destroy_loop_vec_info (inner_loop_vinfo, true);
1149           return NULL;
1150         }
1151
1152       if (loop->num_nodes != 5)
1153         {
1154           if (dump_enabled_p ())
1155             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1156                              "not vectorized: control flow in loop.\n");
1157           destroy_loop_vec_info (inner_loop_vinfo, true);
1158           return NULL;
1159         }
1160
1161       gcc_assert (EDGE_COUNT (innerloop->header->preds) == 2);
1162       entryedge = EDGE_PRED (innerloop->header, 0);
1163       if (EDGE_PRED (innerloop->header, 0)->src == innerloop->latch)
1164         entryedge = EDGE_PRED (innerloop->header, 1);
1165
1166       if (entryedge->src != loop->header
1167           || !single_exit (innerloop)
1168           || single_exit (innerloop)->dest !=  EDGE_PRED (loop->latch, 0)->src)
1169         {
1170           if (dump_enabled_p ())
1171             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1172                              "not vectorized: unsupported outerloop form.\n");
1173           destroy_loop_vec_info (inner_loop_vinfo, true);
1174           return NULL;
1175         }
1176
1177       if (dump_enabled_p ())
1178         dump_printf_loc (MSG_NOTE, vect_location,
1179                          "Considering outer-loop vectorization.\n");
1180     }
1181
1182   if (!single_exit (loop)
1183       || EDGE_COUNT (loop->header->preds) != 2)
1184     {
1185       if (dump_enabled_p ())
1186         {
1187           if (!single_exit (loop))
1188             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1189                              "not vectorized: multiple exits.\n");
1190           else if (EDGE_COUNT (loop->header->preds) != 2)
1191             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1192                              "not vectorized: too many incoming edges.\n");
1193         }
1194       if (inner_loop_vinfo)
1195         destroy_loop_vec_info (inner_loop_vinfo, true);
1196       return NULL;
1197     }
1198
1199   /* We assume that the loop exit condition is at the end of the loop. i.e,
1200      that the loop is represented as a do-while (with a proper if-guard
1201      before the loop if needed), where the loop header contains all the
1202      executable statements, and the latch is empty.  */
1203   if (!empty_block_p (loop->latch)
1204       || !gimple_seq_empty_p (phi_nodes (loop->latch)))
1205     {
1206       if (dump_enabled_p ())
1207         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1208                          "not vectorized: latch block not empty.\n");
1209       if (inner_loop_vinfo)
1210         destroy_loop_vec_info (inner_loop_vinfo, true);
1211       return NULL;
1212     }
1213
1214   /* Make sure there exists a single-predecessor exit bb:  */
1215   if (!single_pred_p (single_exit (loop)->dest))
1216     {
1217       edge e = single_exit (loop);
1218       if (!(e->flags & EDGE_ABNORMAL))
1219         {
1220           split_loop_exit_edge (e);
1221           if (dump_enabled_p ())
1222             dump_printf (MSG_NOTE, "split exit edge.\n");
1223         }
1224       else
1225         {
1226           if (dump_enabled_p ())
1227             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1228                              "not vectorized: abnormal loop exit edge.\n");
1229           if (inner_loop_vinfo)
1230             destroy_loop_vec_info (inner_loop_vinfo, true);
1231           return NULL;
1232         }
1233     }
1234
1235   loop_cond = vect_get_loop_niters (loop, &number_of_iterations);
1236   if (!loop_cond)
1237     {
1238       if (dump_enabled_p ())
1239         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1240                          "not vectorized: complicated exit condition.\n");
1241       if (inner_loop_vinfo)
1242         destroy_loop_vec_info (inner_loop_vinfo, true);
1243       return NULL;
1244     }
1245
1246   if (!number_of_iterations)
1247     {
1248       if (dump_enabled_p ())
1249         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1250                          "not vectorized: number of iterations cannot be "
1251                          "computed.\n");
1252       if (inner_loop_vinfo)
1253         destroy_loop_vec_info (inner_loop_vinfo, true);
1254       return NULL;
1255     }
1256
1257   if (chrec_contains_undetermined (number_of_iterations))
1258     {
1259       if (dump_enabled_p ())
1260             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1261                              "Infinite number of iterations.\n");
1262       if (inner_loop_vinfo)
1263         destroy_loop_vec_info (inner_loop_vinfo, true);
1264       return NULL;
1265     }
1266
1267   if (!NITERS_KNOWN_P (number_of_iterations))
1268     {
1269       if (dump_enabled_p ())
1270         {
1271           dump_printf_loc (MSG_NOTE, vect_location,
1272                            "Symbolic number of iterations is ");
1273           dump_generic_expr (MSG_NOTE, TDF_DETAILS, number_of_iterations);
1274           dump_printf (MSG_NOTE, "\n");
1275         }
1276     }
1277   else if (TREE_INT_CST_LOW (number_of_iterations) == 0)
1278     {
1279       if (dump_enabled_p ())
1280         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1281                          "not vectorized: number of iterations = 0.\n");
1282       if (inner_loop_vinfo)
1283         destroy_loop_vec_info (inner_loop_vinfo, true);
1284       return NULL;
1285     }
1286
1287   loop_vinfo = new_loop_vec_info (loop);
1288   LOOP_VINFO_NITERS (loop_vinfo) = number_of_iterations;
1289   LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo) = number_of_iterations;
1290
1291   STMT_VINFO_TYPE (vinfo_for_stmt (loop_cond)) = loop_exit_ctrl_vec_info_type;
1292
1293   /* CHECKME: May want to keep it around it in the future.  */
1294   if (inner_loop_vinfo)
1295     destroy_loop_vec_info (inner_loop_vinfo, false);
1296
1297   gcc_assert (!loop->aux);
1298   loop->aux = loop_vinfo;
1299   return loop_vinfo;
1300 }
1301
1302
1303 /* Function vect_analyze_loop_operations.
1304
1305    Scan the loop stmts and make sure they are all vectorizable.  */
1306
1307 static bool
1308 vect_analyze_loop_operations (loop_vec_info loop_vinfo, bool slp)
1309 {
1310   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1311   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
1312   int nbbs = loop->num_nodes;
1313   gimple_stmt_iterator si;
1314   unsigned int vectorization_factor = 0;
1315   int i;
1316   gimple phi;
1317   stmt_vec_info stmt_info;
1318   bool need_to_vectorize = false;
1319   int min_profitable_iters;
1320   int min_scalar_loop_bound;
1321   unsigned int th;
1322   bool only_slp_in_loop = true, ok;
1323   HOST_WIDE_INT max_niter;
1324   HOST_WIDE_INT estimated_niter;
1325   int min_profitable_estimate;
1326
1327   if (dump_enabled_p ())
1328     dump_printf_loc (MSG_NOTE, vect_location,
1329                      "=== vect_analyze_loop_operations ===\n");
1330
1331   gcc_assert (LOOP_VINFO_VECT_FACTOR (loop_vinfo));
1332   vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1333   if (slp)
1334     {
1335       /* If all the stmts in the loop can be SLPed, we perform only SLP, and
1336          vectorization factor of the loop is the unrolling factor required by
1337          the SLP instances.  If that unrolling factor is 1, we say, that we
1338          perform pure SLP on loop - cross iteration parallelism is not
1339          exploited.  */
1340       for (i = 0; i < nbbs; i++)
1341         {
1342           basic_block bb = bbs[i];
1343           for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1344             {
1345               gimple stmt = gsi_stmt (si);
1346               stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1347               gcc_assert (stmt_info);
1348               if ((STMT_VINFO_RELEVANT_P (stmt_info)
1349                    || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info)))
1350                   && !PURE_SLP_STMT (stmt_info))
1351                 /* STMT needs both SLP and loop-based vectorization.  */
1352                 only_slp_in_loop = false;
1353             }
1354         }
1355
1356       if (only_slp_in_loop)
1357         vectorization_factor = LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo);
1358       else
1359         vectorization_factor = least_common_multiple (vectorization_factor,
1360                                 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo));
1361
1362       LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
1363       if (dump_enabled_p ())
1364         dump_printf_loc (MSG_NOTE, vect_location,
1365                          "Updating vectorization factor to %d\n",
1366                          vectorization_factor);
1367     }
1368
1369   for (i = 0; i < nbbs; i++)
1370     {
1371       basic_block bb = bbs[i];
1372
1373       for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
1374         {
1375           phi = gsi_stmt (si);
1376           ok = true;
1377
1378           stmt_info = vinfo_for_stmt (phi);
1379           if (dump_enabled_p ())
1380             {
1381               dump_printf_loc (MSG_NOTE, vect_location, "examining phi: ");
1382               dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
1383               dump_printf (MSG_NOTE, "\n");
1384             }
1385
1386           /* Inner-loop loop-closed exit phi in outer-loop vectorization
1387              (i.e., a phi in the tail of the outer-loop).  */
1388           if (! is_loop_header_bb_p (bb))
1389             {
1390               /* FORNOW: we currently don't support the case that these phis
1391                  are not used in the outerloop (unless it is double reduction,
1392                  i.e., this phi is vect_reduction_def), cause this case
1393                  requires to actually do something here.  */
1394               if ((!STMT_VINFO_RELEVANT_P (stmt_info)
1395                    || STMT_VINFO_LIVE_P (stmt_info))
1396                   && STMT_VINFO_DEF_TYPE (stmt_info)
1397                      != vect_double_reduction_def)
1398                 {
1399                   if (dump_enabled_p ())
1400                     dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1401                                      "Unsupported loop-closed phi in "
1402                                      "outer-loop.\n");
1403                   return false;
1404                 }
1405
1406               /* If PHI is used in the outer loop, we check that its operand
1407                  is defined in the inner loop.  */
1408               if (STMT_VINFO_RELEVANT_P (stmt_info))
1409                 {
1410                   tree phi_op;
1411                   gimple op_def_stmt;
1412
1413                   if (gimple_phi_num_args (phi) != 1)
1414                     return false;
1415
1416                   phi_op = PHI_ARG_DEF (phi, 0);
1417                   if (TREE_CODE (phi_op) != SSA_NAME)
1418                     return false;
1419
1420                   op_def_stmt = SSA_NAME_DEF_STMT (phi_op);
1421                   if (gimple_nop_p (op_def_stmt)
1422                       || !flow_bb_inside_loop_p (loop, gimple_bb (op_def_stmt))
1423                       || !vinfo_for_stmt (op_def_stmt))
1424                     return false;
1425
1426                   if (STMT_VINFO_RELEVANT (vinfo_for_stmt (op_def_stmt))
1427                         != vect_used_in_outer
1428                       && STMT_VINFO_RELEVANT (vinfo_for_stmt (op_def_stmt))
1429                            != vect_used_in_outer_by_reduction)
1430                     return false;
1431                 }
1432
1433               continue;
1434             }
1435
1436           gcc_assert (stmt_info);
1437
1438           if (STMT_VINFO_LIVE_P (stmt_info))
1439             {
1440               /* FORNOW: not yet supported.  */
1441               if (dump_enabled_p ())
1442                 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1443                                  "not vectorized: value used after loop.\n");
1444               return false;
1445             }
1446
1447           if (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope
1448               && STMT_VINFO_DEF_TYPE (stmt_info) != vect_induction_def)
1449             {
1450               /* A scalar-dependence cycle that we don't support.  */
1451               if (dump_enabled_p ())
1452                 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1453                                  "not vectorized: scalar dependence cycle.\n");
1454               return false;
1455             }
1456
1457           if (STMT_VINFO_RELEVANT_P (stmt_info))
1458             {
1459               need_to_vectorize = true;
1460               if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def)
1461                 ok = vectorizable_induction (phi, NULL, NULL);
1462             }
1463
1464           if (!ok)
1465             {
1466               if (dump_enabled_p ())
1467                 {
1468                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1469                                    "not vectorized: relevant phi not "
1470                                    "supported: ");
1471                   dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, phi, 0);
1472                   dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
1473                 }
1474               return false;
1475             }
1476         }
1477
1478       for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1479         {
1480           gimple stmt = gsi_stmt (si);
1481           if (!gimple_clobber_p (stmt)
1482               && !vect_analyze_stmt (stmt, &need_to_vectorize, NULL))
1483             return false;
1484         }
1485     } /* bbs */
1486
1487   /* All operations in the loop are either irrelevant (deal with loop
1488      control, or dead), or only used outside the loop and can be moved
1489      out of the loop (e.g. invariants, inductions).  The loop can be
1490      optimized away by scalar optimizations.  We're better off not
1491      touching this loop.  */
1492   if (!need_to_vectorize)
1493     {
1494       if (dump_enabled_p ())
1495         dump_printf_loc (MSG_NOTE, vect_location,
1496                          "All the computation can be taken out of the loop.\n");
1497       if (dump_enabled_p ())
1498         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1499                          "not vectorized: redundant loop. no profit to "
1500                          "vectorize.\n");
1501       return false;
1502     }
1503
1504   if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) && dump_enabled_p ())
1505     dump_printf_loc (MSG_NOTE, vect_location,
1506                      "vectorization_factor = %d, niters = "
1507                      HOST_WIDE_INT_PRINT_DEC "\n", vectorization_factor,
1508                      LOOP_VINFO_INT_NITERS (loop_vinfo));
1509
1510   if ((LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
1511        && (LOOP_VINFO_INT_NITERS (loop_vinfo) < vectorization_factor))
1512       || ((max_niter = max_stmt_executions_int (loop)) != -1
1513           && (unsigned HOST_WIDE_INT) max_niter < vectorization_factor))
1514     {
1515       if (dump_enabled_p ())
1516         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1517                          "not vectorized: iteration count too small.\n");
1518       if (dump_enabled_p ())
1519         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1520                          "not vectorized: iteration count smaller than "
1521                          "vectorization factor.\n");
1522       return false;
1523     }
1524
1525   /* Analyze cost.  Decide if worth while to vectorize.  */
1526
1527   /* Once VF is set, SLP costs should be updated since the number of created
1528      vector stmts depends on VF.  */
1529   vect_update_slp_costs_according_to_vf (loop_vinfo);
1530
1531   vect_estimate_min_profitable_iters (loop_vinfo, &min_profitable_iters,
1532                                       &min_profitable_estimate);
1533   LOOP_VINFO_COST_MODEL_MIN_ITERS (loop_vinfo) = min_profitable_iters;
1534
1535   if (min_profitable_iters < 0)
1536     {
1537       if (dump_enabled_p ())
1538         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1539                          "not vectorized: vectorization not profitable.\n");
1540       if (dump_enabled_p ())
1541         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1542                          "not vectorized: vector version will never be "
1543                          "profitable.\n");
1544       return false;
1545     }
1546
1547   min_scalar_loop_bound = ((PARAM_VALUE (PARAM_MIN_VECT_LOOP_BOUND)
1548                             * vectorization_factor) - 1);
1549
1550
1551   /* Use the cost model only if it is more conservative than user specified
1552      threshold.  */
1553
1554   th = (unsigned) min_scalar_loop_bound;
1555   if (min_profitable_iters
1556       && (!min_scalar_loop_bound
1557           || min_profitable_iters > min_scalar_loop_bound))
1558     th = (unsigned) min_profitable_iters;
1559
1560   if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
1561       && LOOP_VINFO_INT_NITERS (loop_vinfo) <= th)
1562     {
1563       if (dump_enabled_p ())
1564         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1565                          "not vectorized: vectorization not profitable.\n");
1566       if (dump_enabled_p ())
1567         dump_printf_loc (MSG_NOTE, vect_location,
1568                          "not vectorized: iteration count smaller than user "
1569                          "specified loop bound parameter or minimum profitable "
1570                          "iterations (whichever is more conservative).\n");
1571       return false;
1572     }
1573
1574   if ((estimated_niter = estimated_stmt_executions_int (loop)) != -1
1575       && ((unsigned HOST_WIDE_INT) estimated_niter
1576           <= MAX (th, (unsigned)min_profitable_estimate)))
1577     {
1578       if (dump_enabled_p ())
1579         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1580                          "not vectorized: estimated iteration count too "
1581                          "small.\n");
1582       if (dump_enabled_p ())
1583         dump_printf_loc (MSG_NOTE, vect_location,
1584                          "not vectorized: estimated iteration count smaller "
1585                          "than specified loop bound parameter or minimum "
1586                          "profitable iterations (whichever is more "
1587                          "conservative).\n");
1588       return false;
1589     }
1590
1591   if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo)
1592       || ((int) tree_ctz (LOOP_VINFO_NITERS (loop_vinfo))
1593           < exact_log2 (vectorization_factor)))
1594     {
1595       if (dump_enabled_p ())
1596         dump_printf_loc (MSG_NOTE, vect_location, "epilog loop required.\n");
1597       if (!vect_can_advance_ivs_p (loop_vinfo))
1598         {
1599           if (dump_enabled_p ())
1600             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1601                              "not vectorized: can't create epilog loop 1.\n");
1602           return false;
1603         }
1604       if (!slpeel_can_duplicate_loop_p (loop, single_exit (loop)))
1605         {
1606           if (dump_enabled_p ())
1607             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1608                              "not vectorized: can't create epilog loop 2.\n");
1609           return false;
1610         }
1611     }
1612
1613   return true;
1614 }
1615
1616
1617 /* Function vect_analyze_loop_2.
1618
1619    Apply a set of analyses on LOOP, and create a loop_vec_info struct
1620    for it.  The different analyses will record information in the
1621    loop_vec_info struct.  */
1622 static bool
1623 vect_analyze_loop_2 (loop_vec_info loop_vinfo)
1624 {
1625   bool ok, slp = false;
1626   int max_vf = MAX_VECTORIZATION_FACTOR;
1627   int min_vf = 2;
1628
1629   /* Find all data references in the loop (which correspond to vdefs/vuses)
1630      and analyze their evolution in the loop.  Also adjust the minimal
1631      vectorization factor according to the loads and stores.
1632
1633      FORNOW: Handle only simple, array references, which
1634      alignment can be forced, and aligned pointer-references.  */
1635
1636   ok = vect_analyze_data_refs (loop_vinfo, NULL, &min_vf);
1637   if (!ok)
1638     {
1639       if (dump_enabled_p ())
1640         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1641                          "bad data references.\n");
1642       return false;
1643     }
1644
1645   /* Analyze the access patterns of the data-refs in the loop (consecutive,
1646      complex, etc.). FORNOW: Only handle consecutive access pattern.  */
1647
1648   ok = vect_analyze_data_ref_accesses (loop_vinfo, NULL);
1649   if (!ok)
1650     {
1651       if (dump_enabled_p ())
1652         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1653                          "bad data access.\n");
1654       return false;
1655     }
1656
1657   /* Classify all cross-iteration scalar data-flow cycles.
1658      Cross-iteration cycles caused by virtual phis are analyzed separately.  */
1659
1660   vect_analyze_scalar_cycles (loop_vinfo);
1661
1662   vect_pattern_recog (loop_vinfo, NULL);
1663
1664   /* Data-flow analysis to detect stmts that do not need to be vectorized.  */
1665
1666   ok = vect_mark_stmts_to_be_vectorized (loop_vinfo);
1667   if (!ok)
1668     {
1669       if (dump_enabled_p ())
1670         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1671                          "unexpected pattern.\n");
1672       return false;
1673     }
1674
1675   /* Analyze data dependences between the data-refs in the loop
1676      and adjust the maximum vectorization factor according to
1677      the dependences.
1678      FORNOW: fail at the first data dependence that we encounter.  */
1679
1680   ok = vect_analyze_data_ref_dependences (loop_vinfo, &max_vf);
1681   if (!ok
1682       || max_vf < min_vf)
1683     {
1684       if (dump_enabled_p ())
1685             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1686                              "bad data dependence.\n");
1687       return false;
1688     }
1689
1690   ok = vect_determine_vectorization_factor (loop_vinfo);
1691   if (!ok)
1692     {
1693       if (dump_enabled_p ())
1694         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1695                          "can't determine vectorization factor.\n");
1696       return false;
1697     }
1698   if (max_vf < LOOP_VINFO_VECT_FACTOR (loop_vinfo))
1699     {
1700       if (dump_enabled_p ())
1701         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1702                          "bad data dependence.\n");
1703       return false;
1704     }
1705
1706   /* Analyze the alignment of the data-refs in the loop.
1707      Fail if a data reference is found that cannot be vectorized.  */
1708
1709   ok = vect_analyze_data_refs_alignment (loop_vinfo, NULL);
1710   if (!ok)
1711     {
1712       if (dump_enabled_p ())
1713         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1714                          "bad data alignment.\n");
1715       return false;
1716     }
1717
1718   /* Prune the list of ddrs to be tested at run-time by versioning for alias.
1719      It is important to call pruning after vect_analyze_data_ref_accesses,
1720      since we use grouping information gathered by interleaving analysis.  */
1721   ok = vect_prune_runtime_alias_test_list (loop_vinfo);
1722   if (!ok)
1723     {
1724       if (dump_enabled_p ())
1725         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1726                          "too long list of versioning for alias "
1727                          "run-time tests.\n");
1728       return false;
1729     }
1730
1731   /* This pass will decide on using loop versioning and/or loop peeling in
1732      order to enhance the alignment of data references in the loop.  */
1733
1734   ok = vect_enhance_data_refs_alignment (loop_vinfo);
1735   if (!ok)
1736     {
1737       if (dump_enabled_p ())
1738         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1739                          "bad data alignment.\n");
1740       return false;
1741     }
1742
1743   /* Check the SLP opportunities in the loop, analyze and build SLP trees.  */
1744   ok = vect_analyze_slp (loop_vinfo, NULL);
1745   if (ok)
1746     {
1747       /* Decide which possible SLP instances to SLP.  */
1748       slp = vect_make_slp_decision (loop_vinfo);
1749
1750       /* Find stmts that need to be both vectorized and SLPed.  */
1751       vect_detect_hybrid_slp (loop_vinfo);
1752     }
1753   else
1754     return false;
1755
1756   /* Scan all the operations in the loop and make sure they are
1757      vectorizable.  */
1758
1759   ok = vect_analyze_loop_operations (loop_vinfo, slp);
1760   if (!ok)
1761     {
1762       if (dump_enabled_p ())
1763         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1764                          "bad operation or unsupported loop bound.\n");
1765       return false;
1766     }
1767
1768   return true;
1769 }
1770
1771 /* Function vect_analyze_loop.
1772
1773    Apply a set of analyses on LOOP, and create a loop_vec_info struct
1774    for it.  The different analyses will record information in the
1775    loop_vec_info struct.  */
1776 loop_vec_info
1777 vect_analyze_loop (struct loop *loop)
1778 {
1779   loop_vec_info loop_vinfo;
1780   unsigned int vector_sizes;
1781
1782   /* Autodetect first vector size we try.  */
1783   current_vector_size = 0;
1784   vector_sizes = targetm.vectorize.autovectorize_vector_sizes ();
1785
1786   if (dump_enabled_p ())
1787     dump_printf_loc (MSG_NOTE, vect_location,
1788                      "===== analyze_loop_nest =====\n");
1789
1790   if (loop_outer (loop)
1791       && loop_vec_info_for_loop (loop_outer (loop))
1792       && LOOP_VINFO_VECTORIZABLE_P (loop_vec_info_for_loop (loop_outer (loop))))
1793     {
1794       if (dump_enabled_p ())
1795         dump_printf_loc (MSG_NOTE, vect_location,
1796                          "outer-loop already vectorized.\n");
1797       return NULL;
1798     }
1799
1800   while (1)
1801     {
1802       /* Check the CFG characteristics of the loop (nesting, entry/exit).  */
1803       loop_vinfo = vect_analyze_loop_form (loop);
1804       if (!loop_vinfo)
1805         {
1806           if (dump_enabled_p ())
1807             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1808                              "bad loop form.\n");
1809           return NULL;
1810         }
1811
1812       if (vect_analyze_loop_2 (loop_vinfo))
1813         {
1814           LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1;
1815
1816           return loop_vinfo;
1817         }
1818
1819       destroy_loop_vec_info (loop_vinfo, true);
1820
1821       vector_sizes &= ~current_vector_size;
1822       if (vector_sizes == 0
1823           || current_vector_size == 0)
1824         return NULL;
1825
1826       /* Try the next biggest vector size.  */
1827       current_vector_size = 1 << floor_log2 (vector_sizes);
1828       if (dump_enabled_p ())
1829         dump_printf_loc (MSG_NOTE, vect_location,
1830                          "***** Re-trying analysis with "
1831                          "vector size %d\n", current_vector_size);
1832     }
1833 }
1834
1835
1836 /* Function reduction_code_for_scalar_code
1837
1838    Input:
1839    CODE - tree_code of a reduction operations.
1840
1841    Output:
1842    REDUC_CODE - the corresponding tree-code to be used to reduce the
1843       vector of partial results into a single scalar result (which
1844       will also reside in a vector) or ERROR_MARK if the operation is
1845       a supported reduction operation, but does not have such tree-code.
1846
1847    Return FALSE if CODE currently cannot be vectorized as reduction.  */
1848
1849 static bool
1850 reduction_code_for_scalar_code (enum tree_code code,
1851                                 enum tree_code *reduc_code)
1852 {
1853   switch (code)
1854     {
1855       case MAX_EXPR:
1856         *reduc_code = REDUC_MAX_EXPR;
1857         return true;
1858
1859       case MIN_EXPR:
1860         *reduc_code = REDUC_MIN_EXPR;
1861         return true;
1862
1863       case PLUS_EXPR:
1864         *reduc_code = REDUC_PLUS_EXPR;
1865         return true;
1866
1867       case MULT_EXPR:
1868       case MINUS_EXPR:
1869       case BIT_IOR_EXPR:
1870       case BIT_XOR_EXPR:
1871       case BIT_AND_EXPR:
1872         *reduc_code = ERROR_MARK;
1873         return true;
1874
1875       default:
1876        return false;
1877     }
1878 }
1879
1880
1881 /* Error reporting helper for vect_is_simple_reduction below.  GIMPLE statement
1882    STMT is printed with a message MSG. */
1883
1884 static void
1885 report_vect_op (int msg_type, gimple stmt, const char *msg)
1886 {
1887   dump_printf_loc (msg_type, vect_location, "%s", msg);
1888   dump_gimple_stmt (msg_type, TDF_SLIM, stmt, 0);
1889   dump_printf (msg_type, "\n");
1890 }
1891
1892
1893 /* Detect SLP reduction of the form:
1894
1895    #a1 = phi <a5, a0>
1896    a2 = operation (a1)
1897    a3 = operation (a2)
1898    a4 = operation (a3)
1899    a5 = operation (a4)
1900
1901    #a = phi <a5>
1902
1903    PHI is the reduction phi node (#a1 = phi <a5, a0> above)
1904    FIRST_STMT is the first reduction stmt in the chain
1905    (a2 = operation (a1)).
1906
1907    Return TRUE if a reduction chain was detected.  */
1908
1909 static bool
1910 vect_is_slp_reduction (loop_vec_info loop_info, gimple phi, gimple first_stmt)
1911 {
1912   struct loop *loop = (gimple_bb (phi))->loop_father;
1913   struct loop *vect_loop = LOOP_VINFO_LOOP (loop_info);
1914   enum tree_code code;
1915   gimple current_stmt = NULL, loop_use_stmt = NULL, first, next_stmt;
1916   stmt_vec_info use_stmt_info, current_stmt_info;
1917   tree lhs;
1918   imm_use_iterator imm_iter;
1919   use_operand_p use_p;
1920   int nloop_uses, size = 0, n_out_of_loop_uses;
1921   bool found = false;
1922
1923   if (loop != vect_loop)
1924     return false;
1925
1926   lhs = PHI_RESULT (phi);
1927   code = gimple_assign_rhs_code (first_stmt);
1928   while (1)
1929     {
1930       nloop_uses = 0;
1931       n_out_of_loop_uses = 0;
1932       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
1933         {
1934           gimple use_stmt = USE_STMT (use_p);
1935           if (is_gimple_debug (use_stmt))
1936             continue;
1937
1938           use_stmt = USE_STMT (use_p);
1939
1940           /* Check if we got back to the reduction phi.  */
1941           if (use_stmt == phi)
1942             {
1943               loop_use_stmt = use_stmt;
1944               found = true;
1945               break;
1946             }
1947
1948           if (flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
1949             {
1950               if (vinfo_for_stmt (use_stmt)
1951                   && !STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
1952                 {
1953                   loop_use_stmt = use_stmt;
1954                   nloop_uses++;
1955                 }
1956             }
1957            else
1958              n_out_of_loop_uses++;
1959
1960            /* There are can be either a single use in the loop or two uses in
1961               phi nodes.  */
1962            if (nloop_uses > 1 || (n_out_of_loop_uses && nloop_uses))
1963              return false;
1964         }
1965
1966       if (found)
1967         break;
1968
1969       /* We reached a statement with no loop uses.  */
1970       if (nloop_uses == 0)
1971         return false;
1972
1973       /* This is a loop exit phi, and we haven't reached the reduction phi.  */
1974       if (gimple_code (loop_use_stmt) == GIMPLE_PHI)
1975         return false;
1976
1977       if (!is_gimple_assign (loop_use_stmt)
1978           || code != gimple_assign_rhs_code (loop_use_stmt)
1979           || !flow_bb_inside_loop_p (loop, gimple_bb (loop_use_stmt)))
1980         return false;
1981
1982       /* Insert USE_STMT into reduction chain.  */
1983       use_stmt_info = vinfo_for_stmt (loop_use_stmt);
1984       if (current_stmt)
1985         {
1986           current_stmt_info = vinfo_for_stmt (current_stmt);
1987           GROUP_NEXT_ELEMENT (current_stmt_info) = loop_use_stmt;
1988           GROUP_FIRST_ELEMENT (use_stmt_info)
1989             = GROUP_FIRST_ELEMENT (current_stmt_info);
1990         }
1991       else
1992         GROUP_FIRST_ELEMENT (use_stmt_info) = loop_use_stmt;
1993
1994       lhs = gimple_assign_lhs (loop_use_stmt);
1995       current_stmt = loop_use_stmt;
1996       size++;
1997    }
1998
1999   if (!found || loop_use_stmt != phi || size < 2)
2000     return false;
2001
2002   /* Swap the operands, if needed, to make the reduction operand be the second
2003      operand.  */
2004   lhs = PHI_RESULT (phi);
2005   next_stmt = GROUP_FIRST_ELEMENT (vinfo_for_stmt (current_stmt));
2006   while (next_stmt)
2007     {
2008       if (gimple_assign_rhs2 (next_stmt) == lhs)
2009         {
2010           tree op = gimple_assign_rhs1 (next_stmt);
2011           gimple def_stmt = NULL;
2012
2013           if (TREE_CODE (op) == SSA_NAME)
2014             def_stmt = SSA_NAME_DEF_STMT (op);
2015
2016           /* Check that the other def is either defined in the loop
2017              ("vect_internal_def"), or it's an induction (defined by a
2018              loop-header phi-node).  */
2019           if (def_stmt
2020               && gimple_bb (def_stmt)
2021               && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
2022               && (is_gimple_assign (def_stmt)
2023                   || is_gimple_call (def_stmt)
2024                   || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt))
2025                            == vect_induction_def
2026                   || (gimple_code (def_stmt) == GIMPLE_PHI
2027                       && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt))
2028                                   == vect_internal_def
2029                       && !is_loop_header_bb_p (gimple_bb (def_stmt)))))
2030             {
2031               lhs = gimple_assign_lhs (next_stmt);
2032               next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
2033               continue;
2034             }
2035
2036           return false;
2037         }
2038       else
2039         {
2040           tree op = gimple_assign_rhs2 (next_stmt);
2041           gimple def_stmt = NULL;
2042
2043           if (TREE_CODE (op) == SSA_NAME)
2044             def_stmt = SSA_NAME_DEF_STMT (op);
2045
2046           /* Check that the other def is either defined in the loop
2047             ("vect_internal_def"), or it's an induction (defined by a
2048             loop-header phi-node).  */
2049           if (def_stmt
2050               && gimple_bb (def_stmt)
2051               && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
2052               && (is_gimple_assign (def_stmt)
2053                   || is_gimple_call (def_stmt)
2054                   || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt))
2055                               == vect_induction_def
2056                   || (gimple_code (def_stmt) == GIMPLE_PHI
2057                       && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt))
2058                                   == vect_internal_def
2059                       && !is_loop_header_bb_p (gimple_bb (def_stmt)))))
2060             {
2061               if (dump_enabled_p ())
2062                 {
2063                   dump_printf_loc (MSG_NOTE, vect_location, "swapping oprnds: ");
2064                   dump_gimple_stmt (MSG_NOTE, TDF_SLIM, next_stmt, 0);
2065                   dump_printf (MSG_NOTE, "\n");
2066                 }
2067
2068               swap_ssa_operands (next_stmt,
2069                                  gimple_assign_rhs1_ptr (next_stmt),
2070                                  gimple_assign_rhs2_ptr (next_stmt));
2071               update_stmt (next_stmt);
2072
2073               if (CONSTANT_CLASS_P (gimple_assign_rhs1 (next_stmt)))
2074                 LOOP_VINFO_OPERANDS_SWAPPED (loop_info) = true;
2075             }
2076           else
2077             return false;
2078         }
2079
2080       lhs = gimple_assign_lhs (next_stmt);
2081       next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
2082     }
2083
2084   /* Save the chain for further analysis in SLP detection.  */
2085   first = GROUP_FIRST_ELEMENT (vinfo_for_stmt (current_stmt));
2086   LOOP_VINFO_REDUCTION_CHAINS (loop_info).safe_push (first);
2087   GROUP_SIZE (vinfo_for_stmt (first)) = size;
2088
2089   return true;
2090 }
2091
2092
2093 /* Function vect_is_simple_reduction_1
2094
2095    (1) Detect a cross-iteration def-use cycle that represents a simple
2096    reduction computation.  We look for the following pattern:
2097
2098    loop_header:
2099      a1 = phi < a0, a2 >
2100      a3 = ...
2101      a2 = operation (a3, a1)
2102
2103    or
2104
2105    a3 = ...
2106    loop_header:
2107      a1 = phi < a0, a2 >
2108      a2 = operation (a3, a1)
2109
2110    such that:
2111    1. operation is commutative and associative and it is safe to
2112       change the order of the computation (if CHECK_REDUCTION is true)
2113    2. no uses for a2 in the loop (a2 is used out of the loop)
2114    3. no uses of a1 in the loop besides the reduction operation
2115    4. no uses of a1 outside the loop.
2116
2117    Conditions 1,4 are tested here.
2118    Conditions 2,3 are tested in vect_mark_stmts_to_be_vectorized.
2119
2120    (2) Detect a cross-iteration def-use cycle in nested loops, i.e.,
2121    nested cycles, if CHECK_REDUCTION is false.
2122
2123    (3) Detect cycles of phi nodes in outer-loop vectorization, i.e., double
2124    reductions:
2125
2126      a1 = phi < a0, a2 >
2127      inner loop (def of a3)
2128      a2 = phi < a3 >
2129
2130    If MODIFY is true it tries also to rework the code in-place to enable
2131    detection of more reduction patterns.  For the time being we rewrite
2132    "res -= RHS" into "rhs += -RHS" when it seems worthwhile.
2133 */
2134
2135 static gimple
2136 vect_is_simple_reduction_1 (loop_vec_info loop_info, gimple phi,
2137                             bool check_reduction, bool *double_reduc,
2138                             bool modify)
2139 {
2140   struct loop *loop = (gimple_bb (phi))->loop_father;
2141   struct loop *vect_loop = LOOP_VINFO_LOOP (loop_info);
2142   edge latch_e = loop_latch_edge (loop);
2143   tree loop_arg = PHI_ARG_DEF_FROM_EDGE (phi, latch_e);
2144   gimple def_stmt, def1 = NULL, def2 = NULL;
2145   enum tree_code orig_code, code;
2146   tree op1, op2, op3 = NULL_TREE, op4 = NULL_TREE;
2147   tree type;
2148   int nloop_uses;
2149   tree name;
2150   imm_use_iterator imm_iter;
2151   use_operand_p use_p;
2152   bool phi_def;
2153
2154   *double_reduc = false;
2155
2156   /* If CHECK_REDUCTION is true, we assume inner-most loop vectorization,
2157      otherwise, we assume outer loop vectorization.  */
2158   gcc_assert ((check_reduction && loop == vect_loop)
2159               || (!check_reduction && flow_loop_nested_p (vect_loop, loop)));
2160
2161   name = PHI_RESULT (phi);
2162   nloop_uses = 0;
2163   FOR_EACH_IMM_USE_FAST (use_p, imm_iter, name)
2164     {
2165       gimple use_stmt = USE_STMT (use_p);
2166       if (is_gimple_debug (use_stmt))
2167         continue;
2168
2169       if (!flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
2170         {
2171           if (dump_enabled_p ())
2172             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2173                              "intermediate value used outside loop.\n");
2174
2175           return NULL;
2176         }
2177
2178       if (vinfo_for_stmt (use_stmt)
2179           && !is_pattern_stmt_p (vinfo_for_stmt (use_stmt)))
2180         nloop_uses++;
2181       if (nloop_uses > 1)
2182         {
2183           if (dump_enabled_p ())
2184             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2185                              "reduction used in loop.\n");
2186           return NULL;
2187         }
2188     }
2189
2190   if (TREE_CODE (loop_arg) != SSA_NAME)
2191     {
2192       if (dump_enabled_p ())
2193         {
2194           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2195                            "reduction: not ssa_name: ");
2196           dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, loop_arg);
2197           dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
2198         }
2199       return NULL;
2200     }
2201
2202   def_stmt = SSA_NAME_DEF_STMT (loop_arg);
2203   if (!def_stmt)
2204     {
2205       if (dump_enabled_p ())
2206         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2207                          "reduction: no def_stmt.\n");
2208       return NULL;
2209     }
2210
2211   if (!is_gimple_assign (def_stmt) && gimple_code (def_stmt) != GIMPLE_PHI)
2212     {
2213       if (dump_enabled_p ())
2214         {
2215           dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
2216           dump_printf (MSG_NOTE, "\n");
2217         }
2218       return NULL;
2219     }
2220
2221   if (is_gimple_assign (def_stmt))
2222     {
2223       name = gimple_assign_lhs (def_stmt);
2224       phi_def = false;
2225     }
2226   else
2227     {
2228       name = PHI_RESULT (def_stmt);
2229       phi_def = true;
2230     }
2231
2232   nloop_uses = 0;
2233   FOR_EACH_IMM_USE_FAST (use_p, imm_iter, name)
2234     {
2235       gimple use_stmt = USE_STMT (use_p);
2236       if (is_gimple_debug (use_stmt))
2237         continue;
2238       if (flow_bb_inside_loop_p (loop, gimple_bb (use_stmt))
2239           && vinfo_for_stmt (use_stmt)
2240           && !is_pattern_stmt_p (vinfo_for_stmt (use_stmt)))
2241         nloop_uses++;
2242       if (nloop_uses > 1)
2243         {
2244           if (dump_enabled_p ())
2245             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2246                              "reduction used in loop.\n");
2247           return NULL;
2248         }
2249     }
2250
2251   /* If DEF_STMT is a phi node itself, we expect it to have a single argument
2252      defined in the inner loop.  */
2253   if (phi_def)
2254     {
2255       op1 = PHI_ARG_DEF (def_stmt, 0);
2256
2257       if (gimple_phi_num_args (def_stmt) != 1
2258           || TREE_CODE (op1) != SSA_NAME)
2259         {
2260           if (dump_enabled_p ())
2261             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2262                              "unsupported phi node definition.\n");
2263
2264           return NULL;
2265         }
2266
2267       def1 = SSA_NAME_DEF_STMT (op1);
2268       if (flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
2269           && loop->inner
2270           && flow_bb_inside_loop_p (loop->inner, gimple_bb (def1))
2271           && is_gimple_assign (def1))
2272         {
2273           if (dump_enabled_p ())
2274             report_vect_op (MSG_NOTE, def_stmt,
2275                             "detected double reduction: ");
2276
2277           *double_reduc = true;
2278           return def_stmt;
2279         }
2280
2281       return NULL;
2282     }
2283
2284   code = orig_code = gimple_assign_rhs_code (def_stmt);
2285
2286   /* We can handle "res -= x[i]", which is non-associative by
2287      simply rewriting this into "res += -x[i]".  Avoid changing
2288      gimple instruction for the first simple tests and only do this
2289      if we're allowed to change code at all.  */
2290   if (code == MINUS_EXPR
2291       && modify
2292       && (op1 = gimple_assign_rhs1 (def_stmt))
2293       && TREE_CODE (op1) == SSA_NAME
2294       && SSA_NAME_DEF_STMT (op1) == phi)
2295     code = PLUS_EXPR;
2296
2297   if (check_reduction
2298       && (!commutative_tree_code (code) || !associative_tree_code (code)))
2299     {
2300       if (dump_enabled_p ())
2301         report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
2302                         "reduction: not commutative/associative: ");
2303       return NULL;
2304     }
2305
2306   if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
2307     {
2308       if (code != COND_EXPR)
2309         {
2310           if (dump_enabled_p ())
2311             report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
2312                             "reduction: not binary operation: ");
2313
2314           return NULL;
2315         }
2316
2317       op3 = gimple_assign_rhs1 (def_stmt);
2318       if (COMPARISON_CLASS_P (op3))
2319         {
2320           op4 = TREE_OPERAND (op3, 1);
2321           op3 = TREE_OPERAND (op3, 0);
2322         }
2323
2324       op1 = gimple_assign_rhs2 (def_stmt);
2325       op2 = gimple_assign_rhs3 (def_stmt);
2326
2327       if (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op2) != SSA_NAME)
2328         {
2329           if (dump_enabled_p ())
2330             report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
2331                             "reduction: uses not ssa_names: ");
2332
2333           return NULL;
2334         }
2335     }
2336   else
2337     {
2338       op1 = gimple_assign_rhs1 (def_stmt);
2339       op2 = gimple_assign_rhs2 (def_stmt);
2340
2341       if (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op2) != SSA_NAME)
2342         {
2343           if (dump_enabled_p ())
2344             report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
2345                             "reduction: uses not ssa_names: ");
2346
2347           return NULL;
2348         }
2349    }
2350
2351   type = TREE_TYPE (gimple_assign_lhs (def_stmt));
2352   if ((TREE_CODE (op1) == SSA_NAME
2353        && !types_compatible_p (type,TREE_TYPE (op1)))
2354       || (TREE_CODE (op2) == SSA_NAME
2355           && !types_compatible_p (type, TREE_TYPE (op2)))
2356       || (op3 && TREE_CODE (op3) == SSA_NAME
2357           && !types_compatible_p (type, TREE_TYPE (op3)))
2358       || (op4 && TREE_CODE (op4) == SSA_NAME
2359           && !types_compatible_p (type, TREE_TYPE (op4))))
2360     {
2361       if (dump_enabled_p ())
2362         {
2363           dump_printf_loc (MSG_NOTE, vect_location,
2364                            "reduction: multiple types: operation type: ");
2365           dump_generic_expr (MSG_NOTE, TDF_SLIM, type);
2366           dump_printf (MSG_NOTE, ", operands types: ");
2367           dump_generic_expr (MSG_NOTE, TDF_SLIM,
2368                              TREE_TYPE (op1));
2369           dump_printf (MSG_NOTE, ",");
2370           dump_generic_expr (MSG_NOTE, TDF_SLIM,
2371                              TREE_TYPE (op2));
2372           if (op3)
2373             {
2374               dump_printf (MSG_NOTE, ",");
2375               dump_generic_expr (MSG_NOTE, TDF_SLIM,
2376                                  TREE_TYPE (op3));
2377             }
2378
2379           if (op4)
2380             {
2381               dump_printf (MSG_NOTE, ",");
2382               dump_generic_expr (MSG_NOTE, TDF_SLIM,
2383                                  TREE_TYPE (op4));
2384             }
2385           dump_printf (MSG_NOTE, "\n");
2386         }
2387
2388       return NULL;
2389     }
2390
2391   /* Check that it's ok to change the order of the computation.
2392      Generally, when vectorizing a reduction we change the order of the
2393      computation.  This may change the behavior of the program in some
2394      cases, so we need to check that this is ok.  One exception is when
2395      vectorizing an outer-loop: the inner-loop is executed sequentially,
2396      and therefore vectorizing reductions in the inner-loop during
2397      outer-loop vectorization is safe.  */
2398
2399   /* CHECKME: check for !flag_finite_math_only too?  */
2400   if (SCALAR_FLOAT_TYPE_P (type) && !flag_associative_math
2401       && check_reduction)
2402     {
2403       /* Changing the order of operations changes the semantics.  */
2404       if (dump_enabled_p ())
2405         report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
2406                         "reduction: unsafe fp math optimization: ");
2407       return NULL;
2408     }
2409   else if (INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_TRAPS (type)
2410            && check_reduction)
2411     {
2412       /* Changing the order of operations changes the semantics.  */
2413       if (dump_enabled_p ())
2414         report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
2415                         "reduction: unsafe int math optimization: ");
2416       return NULL;
2417     }
2418   else if (SAT_FIXED_POINT_TYPE_P (type) && check_reduction)
2419     {
2420       /* Changing the order of operations changes the semantics.  */
2421       if (dump_enabled_p ())
2422         report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
2423                         "reduction: unsafe fixed-point math optimization: ");
2424       return NULL;
2425     }
2426
2427   /* If we detected "res -= x[i]" earlier, rewrite it into
2428      "res += -x[i]" now.  If this turns out to be useless reassoc
2429      will clean it up again.  */
2430   if (orig_code == MINUS_EXPR)
2431     {
2432       tree rhs = gimple_assign_rhs2 (def_stmt);
2433       tree negrhs = make_ssa_name (TREE_TYPE (rhs), NULL);
2434       gimple negate_stmt = gimple_build_assign_with_ops (NEGATE_EXPR, negrhs,
2435                                                          rhs, NULL);
2436       gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
2437       set_vinfo_for_stmt (negate_stmt, new_stmt_vec_info (negate_stmt,
2438                                                           loop_info, NULL));
2439       gsi_insert_before (&gsi, negate_stmt, GSI_NEW_STMT);
2440       gimple_assign_set_rhs2 (def_stmt, negrhs);
2441       gimple_assign_set_rhs_code (def_stmt, PLUS_EXPR);
2442       update_stmt (def_stmt);
2443     }
2444
2445   /* Reduction is safe. We're dealing with one of the following:
2446      1) integer arithmetic and no trapv
2447      2) floating point arithmetic, and special flags permit this optimization
2448      3) nested cycle (i.e., outer loop vectorization).  */
2449   if (TREE_CODE (op1) == SSA_NAME)
2450     def1 = SSA_NAME_DEF_STMT (op1);
2451
2452   if (TREE_CODE (op2) == SSA_NAME)
2453     def2 = SSA_NAME_DEF_STMT (op2);
2454
2455   if (code != COND_EXPR
2456       && ((!def1 || gimple_nop_p (def1)) && (!def2 || gimple_nop_p (def2))))
2457     {
2458       if (dump_enabled_p ())
2459         report_vect_op (MSG_NOTE, def_stmt, "reduction: no defs for operands: ");
2460       return NULL;
2461     }
2462
2463   /* Check that one def is the reduction def, defined by PHI,
2464      the other def is either defined in the loop ("vect_internal_def"),
2465      or it's an induction (defined by a loop-header phi-node).  */
2466
2467   if (def2 && def2 == phi
2468       && (code == COND_EXPR
2469           || !def1 || gimple_nop_p (def1)
2470           || !flow_bb_inside_loop_p (loop, gimple_bb (def1))
2471           || (def1 && flow_bb_inside_loop_p (loop, gimple_bb (def1))
2472               && (is_gimple_assign (def1)
2473                   || is_gimple_call (def1)
2474                   || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def1))
2475                       == vect_induction_def
2476                   || (gimple_code (def1) == GIMPLE_PHI
2477                       && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def1))
2478                           == vect_internal_def
2479                       && !is_loop_header_bb_p (gimple_bb (def1)))))))
2480     {
2481       if (dump_enabled_p ())
2482         report_vect_op (MSG_NOTE, def_stmt, "detected reduction: ");
2483       return def_stmt;
2484     }
2485
2486   if (def1 && def1 == phi
2487       && (code == COND_EXPR
2488           || !def2 || gimple_nop_p (def2)
2489           || !flow_bb_inside_loop_p (loop, gimple_bb (def2))
2490           || (def2 && flow_bb_inside_loop_p (loop, gimple_bb (def2))
2491               && (is_gimple_assign (def2)
2492                   || is_gimple_call (def2)
2493                   || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def2))
2494                       == vect_induction_def
2495                   || (gimple_code (def2) == GIMPLE_PHI
2496                       && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def2))
2497                           == vect_internal_def
2498                       && !is_loop_header_bb_p (gimple_bb (def2)))))))
2499     {
2500       if (check_reduction)
2501         {
2502           /* Swap operands (just for simplicity - so that the rest of the code
2503              can assume that the reduction variable is always the last (second)
2504              argument).  */
2505           if (dump_enabled_p ())
2506             report_vect_op (MSG_NOTE, def_stmt,
2507                             "detected reduction: need to swap operands: ");
2508
2509           swap_ssa_operands (def_stmt, gimple_assign_rhs1_ptr (def_stmt),
2510                              gimple_assign_rhs2_ptr (def_stmt));
2511
2512           if (CONSTANT_CLASS_P (gimple_assign_rhs1 (def_stmt)))
2513             LOOP_VINFO_OPERANDS_SWAPPED (loop_info) = true;
2514         }
2515       else
2516         {
2517           if (dump_enabled_p ())
2518             report_vect_op (MSG_NOTE, def_stmt, "detected reduction: ");
2519         }
2520
2521       return def_stmt;
2522     }
2523
2524   /* Try to find SLP reduction chain.  */
2525   if (check_reduction && vect_is_slp_reduction (loop_info, phi, def_stmt))
2526     {
2527       if (dump_enabled_p ())
2528         report_vect_op (MSG_NOTE, def_stmt,
2529                         "reduction: detected reduction chain: ");
2530
2531       return def_stmt;
2532     }
2533
2534   if (dump_enabled_p ())
2535     report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
2536                     "reduction: unknown pattern: ");
2537
2538   return NULL;
2539 }
2540
2541 /* Wrapper around vect_is_simple_reduction_1, that won't modify code
2542    in-place.  Arguments as there.  */
2543
2544 static gimple
2545 vect_is_simple_reduction (loop_vec_info loop_info, gimple phi,
2546                           bool check_reduction, bool *double_reduc)
2547 {
2548   return vect_is_simple_reduction_1 (loop_info, phi, check_reduction,
2549                                      double_reduc, false);
2550 }
2551
2552 /* Wrapper around vect_is_simple_reduction_1, which will modify code
2553    in-place if it enables detection of more reductions.  Arguments
2554    as there.  */
2555
2556 gimple
2557 vect_force_simple_reduction (loop_vec_info loop_info, gimple phi,
2558                           bool check_reduction, bool *double_reduc)
2559 {
2560   return vect_is_simple_reduction_1 (loop_info, phi, check_reduction,
2561                                      double_reduc, true);
2562 }
2563
2564 /* Calculate the cost of one scalar iteration of the loop.  */
2565 int
2566 vect_get_single_scalar_iteration_cost (loop_vec_info loop_vinfo)
2567 {
2568   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2569   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
2570   int nbbs = loop->num_nodes, factor, scalar_single_iter_cost = 0;
2571   int innerloop_iters, i, stmt_cost;
2572
2573   /* Count statements in scalar loop.  Using this as scalar cost for a single
2574      iteration for now.
2575
2576      TODO: Add outer loop support.
2577
2578      TODO: Consider assigning different costs to different scalar
2579      statements.  */
2580
2581   /* FORNOW.  */
2582   innerloop_iters = 1;
2583   if (loop->inner)
2584     innerloop_iters = 50; /* FIXME */
2585
2586   for (i = 0; i < nbbs; i++)
2587     {
2588       gimple_stmt_iterator si;
2589       basic_block bb = bbs[i];
2590
2591       if (bb->loop_father == loop->inner)
2592         factor = innerloop_iters;
2593       else
2594         factor = 1;
2595
2596       for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
2597         {
2598           gimple stmt = gsi_stmt (si);
2599           stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2600
2601           if (!is_gimple_assign (stmt) && !is_gimple_call (stmt))
2602             continue;
2603
2604           /* Skip stmts that are not vectorized inside the loop.  */
2605           if (stmt_info
2606               && !STMT_VINFO_RELEVANT_P (stmt_info)
2607               && (!STMT_VINFO_LIVE_P (stmt_info)
2608                   || !VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info)))
2609               && !STMT_VINFO_IN_PATTERN_P (stmt_info))
2610             continue;
2611
2612           if (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt)))
2613             {
2614               if (DR_IS_READ (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))))
2615                stmt_cost = vect_get_stmt_cost (scalar_load);
2616              else
2617                stmt_cost = vect_get_stmt_cost (scalar_store);
2618             }
2619           else
2620             stmt_cost = vect_get_stmt_cost (scalar_stmt);
2621
2622           scalar_single_iter_cost += stmt_cost * factor;
2623         }
2624     }
2625   return scalar_single_iter_cost;
2626 }
2627
2628 /* Calculate cost of peeling the loop PEEL_ITERS_PROLOGUE times.  */
2629 int
2630 vect_get_known_peeling_cost (loop_vec_info loop_vinfo, int peel_iters_prologue,
2631                              int *peel_iters_epilogue,
2632                              int scalar_single_iter_cost,
2633                              stmt_vector_for_cost *prologue_cost_vec,
2634                              stmt_vector_for_cost *epilogue_cost_vec)
2635 {
2636   int retval = 0;
2637   int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2638
2639   if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
2640     {
2641       *peel_iters_epilogue = vf/2;
2642       if (dump_enabled_p ())
2643         dump_printf_loc (MSG_NOTE, vect_location,
2644                          "cost model: epilogue peel iters set to vf/2 "
2645                          "because loop iterations are unknown .\n");
2646
2647       /* If peeled iterations are known but number of scalar loop
2648          iterations are unknown, count a taken branch per peeled loop.  */
2649       retval = record_stmt_cost (prologue_cost_vec, 2, cond_branch_taken,
2650                                  NULL, 0, vect_prologue);
2651     }
2652   else
2653     {
2654       int niters = LOOP_VINFO_INT_NITERS (loop_vinfo);
2655       peel_iters_prologue = niters < peel_iters_prologue ?
2656                             niters : peel_iters_prologue;
2657       *peel_iters_epilogue = (niters - peel_iters_prologue) % vf;
2658       /* If we need to peel for gaps, but no peeling is required, we have to
2659          peel VF iterations.  */
2660       if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) && !*peel_iters_epilogue)
2661         *peel_iters_epilogue = vf;
2662     }
2663
2664   if (peel_iters_prologue)
2665     retval += record_stmt_cost (prologue_cost_vec,
2666                                 peel_iters_prologue * scalar_single_iter_cost,
2667                                 scalar_stmt, NULL, 0, vect_prologue);
2668   if (*peel_iters_epilogue)
2669     retval += record_stmt_cost (epilogue_cost_vec,
2670                                 *peel_iters_epilogue * scalar_single_iter_cost,
2671                                 scalar_stmt, NULL, 0, vect_epilogue);
2672   return retval;
2673 }
2674
2675 /* Function vect_estimate_min_profitable_iters
2676
2677    Return the number of iterations required for the vector version of the
2678    loop to be profitable relative to the cost of the scalar version of the
2679    loop.  */
2680
2681 static void
2682 vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo,
2683                                     int *ret_min_profitable_niters,
2684                                     int *ret_min_profitable_estimate)
2685 {
2686   int min_profitable_iters;
2687   int min_profitable_estimate;
2688   int peel_iters_prologue;
2689   int peel_iters_epilogue;
2690   unsigned vec_inside_cost = 0;
2691   int vec_outside_cost = 0;
2692   unsigned vec_prologue_cost = 0;
2693   unsigned vec_epilogue_cost = 0;
2694   int scalar_single_iter_cost = 0;
2695   int scalar_outside_cost = 0;
2696   int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2697   int npeel = LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo);
2698   void *target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
2699
2700   /* Cost model disabled.  */
2701   if (unlimited_cost_model ())
2702     {
2703       dump_printf_loc (MSG_NOTE, vect_location, "cost model disabled.\n");
2704       *ret_min_profitable_niters = 0;
2705       *ret_min_profitable_estimate = 0;
2706       return;
2707     }
2708
2709   /* Requires loop versioning tests to handle misalignment.  */
2710   if (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo))
2711     {
2712       /*  FIXME: Make cost depend on complexity of individual check.  */
2713       unsigned len = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo).length ();
2714       (void) add_stmt_cost (target_cost_data, len, vector_stmt, NULL, 0,
2715                             vect_prologue);
2716       dump_printf (MSG_NOTE,
2717                    "cost model: Adding cost of checks for loop "
2718                    "versioning to treat misalignment.\n");
2719     }
2720
2721   /* Requires loop versioning with alias checks.  */
2722   if (LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo))
2723     {
2724       /*  FIXME: Make cost depend on complexity of individual check.  */
2725       unsigned len = LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo).length ();
2726       (void) add_stmt_cost (target_cost_data, len, vector_stmt, NULL, 0,
2727                             vect_prologue);
2728       dump_printf (MSG_NOTE,
2729                    "cost model: Adding cost of checks for loop "
2730                    "versioning aliasing.\n");
2731     }
2732
2733   if (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo)
2734       || LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo))
2735     (void) add_stmt_cost (target_cost_data, 1, cond_branch_taken, NULL, 0,
2736                           vect_prologue);
2737
2738   /* Count statements in scalar loop.  Using this as scalar cost for a single
2739      iteration for now.
2740
2741      TODO: Add outer loop support.
2742
2743      TODO: Consider assigning different costs to different scalar
2744      statements.  */
2745
2746   scalar_single_iter_cost = vect_get_single_scalar_iteration_cost (loop_vinfo);
2747
2748   /* Add additional cost for the peeled instructions in prologue and epilogue
2749      loop.
2750
2751      FORNOW: If we don't know the value of peel_iters for prologue or epilogue
2752      at compile-time - we assume it's vf/2 (the worst would be vf-1).
2753
2754      TODO: Build an expression that represents peel_iters for prologue and
2755      epilogue to be used in a run-time test.  */
2756
2757   if (npeel  < 0)
2758     {
2759       peel_iters_prologue = vf/2;
2760       dump_printf (MSG_NOTE, "cost model: "
2761                    "prologue peel iters set to vf/2.\n");
2762
2763       /* If peeling for alignment is unknown, loop bound of main loop becomes
2764          unknown.  */
2765       peel_iters_epilogue = vf/2;
2766       dump_printf (MSG_NOTE, "cost model: "
2767                    "epilogue peel iters set to vf/2 because "
2768                    "peeling for alignment is unknown.\n");
2769
2770       /* If peeled iterations are unknown, count a taken branch and a not taken
2771          branch per peeled loop. Even if scalar loop iterations are known,
2772          vector iterations are not known since peeled prologue iterations are
2773          not known. Hence guards remain the same.  */
2774       (void) add_stmt_cost (target_cost_data, 2, cond_branch_taken,
2775                             NULL, 0, vect_prologue);
2776       (void) add_stmt_cost (target_cost_data, 2, cond_branch_not_taken,
2777                             NULL, 0, vect_prologue);
2778       /* FORNOW: Don't attempt to pass individual scalar instructions to
2779          the model; just assume linear cost for scalar iterations.  */
2780       (void) add_stmt_cost (target_cost_data,
2781                             peel_iters_prologue * scalar_single_iter_cost,
2782                             scalar_stmt, NULL, 0, vect_prologue);
2783       (void) add_stmt_cost (target_cost_data,
2784                             peel_iters_epilogue * scalar_single_iter_cost,
2785                             scalar_stmt, NULL, 0, vect_epilogue);
2786     }
2787   else
2788     {
2789       stmt_vector_for_cost prologue_cost_vec, epilogue_cost_vec;
2790       stmt_info_for_cost *si;
2791       int j;
2792       void *data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
2793
2794       prologue_cost_vec.create (2);
2795       epilogue_cost_vec.create (2);
2796       peel_iters_prologue = npeel;
2797
2798       (void) vect_get_known_peeling_cost (loop_vinfo, peel_iters_prologue,
2799                                           &peel_iters_epilogue,
2800                                           scalar_single_iter_cost,
2801                                           &prologue_cost_vec,
2802                                           &epilogue_cost_vec);
2803
2804       FOR_EACH_VEC_ELT (prologue_cost_vec, j, si)
2805         {
2806           struct _stmt_vec_info *stmt_info
2807             = si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
2808           (void) add_stmt_cost (data, si->count, si->kind, stmt_info,
2809                                 si->misalign, vect_prologue);
2810         }
2811
2812       FOR_EACH_VEC_ELT (epilogue_cost_vec, j, si)
2813         {
2814           struct _stmt_vec_info *stmt_info
2815             = si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
2816           (void) add_stmt_cost (data, si->count, si->kind, stmt_info,
2817                                 si->misalign, vect_epilogue);
2818         }
2819
2820       prologue_cost_vec.release ();
2821       epilogue_cost_vec.release ();
2822     }
2823
2824   /* FORNOW: The scalar outside cost is incremented in one of the
2825      following ways:
2826
2827      1. The vectorizer checks for alignment and aliasing and generates
2828      a condition that allows dynamic vectorization.  A cost model
2829      check is ANDED with the versioning condition.  Hence scalar code
2830      path now has the added cost of the versioning check.
2831
2832        if (cost > th & versioning_check)
2833          jmp to vector code
2834
2835      Hence run-time scalar is incremented by not-taken branch cost.
2836
2837      2. The vectorizer then checks if a prologue is required.  If the
2838      cost model check was not done before during versioning, it has to
2839      be done before the prologue check.
2840
2841        if (cost <= th)
2842          prologue = scalar_iters
2843        if (prologue == 0)
2844          jmp to vector code
2845        else
2846          execute prologue
2847        if (prologue == num_iters)
2848          go to exit
2849
2850      Hence the run-time scalar cost is incremented by a taken branch,
2851      plus a not-taken branch, plus a taken branch cost.
2852
2853      3. The vectorizer then checks if an epilogue is required.  If the
2854      cost model check was not done before during prologue check, it
2855      has to be done with the epilogue check.
2856
2857        if (prologue == 0)
2858          jmp to vector code
2859        else
2860          execute prologue
2861        if (prologue == num_iters)
2862          go to exit
2863        vector code:
2864          if ((cost <= th) | (scalar_iters-prologue-epilogue == 0))
2865            jmp to epilogue
2866
2867      Hence the run-time scalar cost should be incremented by 2 taken
2868      branches.
2869
2870      TODO: The back end may reorder the BBS's differently and reverse
2871      conditions/branch directions.  Change the estimates below to
2872      something more reasonable.  */
2873
2874   /* If the number of iterations is known and we do not do versioning, we can
2875      decide whether to vectorize at compile time.  Hence the scalar version
2876      do not carry cost model guard costs.  */
2877   if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
2878       || LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo)
2879       || LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo))
2880     {
2881       /* Cost model check occurs at versioning.  */
2882       if (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo)
2883           || LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo))
2884         scalar_outside_cost += vect_get_stmt_cost (cond_branch_not_taken);
2885       else
2886         {
2887           /* Cost model check occurs at prologue generation.  */
2888           if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) < 0)
2889             scalar_outside_cost += 2 * vect_get_stmt_cost (cond_branch_taken)
2890               + vect_get_stmt_cost (cond_branch_not_taken);
2891           /* Cost model check occurs at epilogue generation.  */
2892           else
2893             scalar_outside_cost += 2 * vect_get_stmt_cost (cond_branch_taken);
2894         }
2895     }
2896
2897   /* Complete the target-specific cost calculations.  */
2898   finish_cost (LOOP_VINFO_TARGET_COST_DATA (loop_vinfo), &vec_prologue_cost,
2899                &vec_inside_cost, &vec_epilogue_cost);
2900
2901   vec_outside_cost = (int)(vec_prologue_cost + vec_epilogue_cost);
2902
2903   /* Calculate number of iterations required to make the vector version
2904      profitable, relative to the loop bodies only.  The following condition
2905      must hold true:
2906      SIC * niters + SOC > VIC * ((niters-PL_ITERS-EP_ITERS)/VF) + VOC
2907      where
2908      SIC = scalar iteration cost, VIC = vector iteration cost,
2909      VOC = vector outside cost, VF = vectorization factor,
2910      PL_ITERS = prologue iterations, EP_ITERS= epilogue iterations
2911      SOC = scalar outside cost for run time cost model check.  */
2912
2913   if ((scalar_single_iter_cost * vf) > (int) vec_inside_cost)
2914     {
2915       if (vec_outside_cost <= 0)
2916         min_profitable_iters = 1;
2917       else
2918         {
2919           min_profitable_iters = ((vec_outside_cost - scalar_outside_cost) * vf
2920                                   - vec_inside_cost * peel_iters_prologue
2921                                   - vec_inside_cost * peel_iters_epilogue)
2922                                  / ((scalar_single_iter_cost * vf)
2923                                     - vec_inside_cost);
2924
2925           if ((scalar_single_iter_cost * vf * min_profitable_iters)
2926               <= (((int) vec_inside_cost * min_profitable_iters)
2927                   + (((int) vec_outside_cost - scalar_outside_cost) * vf)))
2928             min_profitable_iters++;
2929         }
2930     }
2931   /* vector version will never be profitable.  */
2932   else
2933     {
2934       if (dump_enabled_p ())
2935         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2936                          "cost model: the vector iteration cost = %d "
2937                          "divided by the scalar iteration cost = %d "
2938                          "is greater or equal to the vectorization factor = %d"
2939                          ".\n",
2940                          vec_inside_cost, scalar_single_iter_cost, vf);
2941       *ret_min_profitable_niters = -1;
2942       *ret_min_profitable_estimate = -1;
2943       return;
2944     }
2945
2946   if (dump_enabled_p ())
2947     {
2948       dump_printf_loc (MSG_NOTE, vect_location, "Cost model analysis: \n");
2949       dump_printf (MSG_NOTE, "  Vector inside of loop cost: %d\n",
2950                    vec_inside_cost);
2951       dump_printf (MSG_NOTE, "  Vector prologue cost: %d\n",
2952                    vec_prologue_cost);
2953       dump_printf (MSG_NOTE, "  Vector epilogue cost: %d\n",
2954                    vec_epilogue_cost);
2955       dump_printf (MSG_NOTE, "  Scalar iteration cost: %d\n",
2956                    scalar_single_iter_cost);
2957       dump_printf (MSG_NOTE, "  Scalar outside cost: %d\n",
2958                    scalar_outside_cost);
2959       dump_printf (MSG_NOTE, "  Vector outside cost: %d\n",
2960                    vec_outside_cost);
2961       dump_printf (MSG_NOTE, "  prologue iterations: %d\n",
2962                    peel_iters_prologue);
2963       dump_printf (MSG_NOTE, "  epilogue iterations: %d\n",
2964                    peel_iters_epilogue);
2965       dump_printf (MSG_NOTE,
2966                    "  Calculated minimum iters for profitability: %d\n",
2967                    min_profitable_iters);
2968       dump_printf (MSG_NOTE, "\n");
2969     }
2970
2971   min_profitable_iters =
2972         min_profitable_iters < vf ? vf : min_profitable_iters;
2973
2974   /* Because the condition we create is:
2975      if (niters <= min_profitable_iters)
2976        then skip the vectorized loop.  */
2977   min_profitable_iters--;
2978
2979   if (dump_enabled_p ())
2980     dump_printf_loc (MSG_NOTE, vect_location,
2981                      "  Runtime profitability threshold = %d\n",
2982                      min_profitable_iters);
2983
2984   *ret_min_profitable_niters = min_profitable_iters;
2985
2986   /* Calculate number of iterations required to make the vector version
2987      profitable, relative to the loop bodies only.
2988
2989      Non-vectorized variant is SIC * niters and it must win over vector
2990      variant on the expected loop trip count.  The following condition must hold true:
2991      SIC * niters > VIC * ((niters-PL_ITERS-EP_ITERS)/VF) + VOC + SOC  */
2992
2993   if (vec_outside_cost <= 0)
2994     min_profitable_estimate = 1;
2995   else
2996     {
2997       min_profitable_estimate = ((vec_outside_cost + scalar_outside_cost) * vf
2998                                  - vec_inside_cost * peel_iters_prologue
2999                                  - vec_inside_cost * peel_iters_epilogue)
3000                                  / ((scalar_single_iter_cost * vf)
3001                                    - vec_inside_cost);
3002     }
3003   min_profitable_estimate --;
3004   min_profitable_estimate = MAX (min_profitable_estimate, min_profitable_iters);
3005   if (dump_enabled_p ())
3006     dump_printf_loc (MSG_NOTE, vect_location,
3007                      "  Static estimate profitability threshold = %d\n",
3008                       min_profitable_iters);
3009
3010   *ret_min_profitable_estimate = min_profitable_estimate;
3011 }
3012
3013
3014 /* TODO: Close dependency between vect_model_*_cost and vectorizable_*
3015    functions. Design better to avoid maintenance issues.  */
3016
3017 /* Function vect_model_reduction_cost.
3018
3019    Models cost for a reduction operation, including the vector ops
3020    generated within the strip-mine loop, the initial definition before
3021    the loop, and the epilogue code that must be generated.  */
3022
3023 static bool
3024 vect_model_reduction_cost (stmt_vec_info stmt_info, enum tree_code reduc_code,
3025                            int ncopies)
3026 {
3027   int prologue_cost = 0, epilogue_cost = 0;
3028   enum tree_code code;
3029   optab optab;
3030   tree vectype;
3031   gimple stmt, orig_stmt;
3032   tree reduction_op;
3033   enum machine_mode mode;
3034   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3035   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3036   void *target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
3037
3038   /* Cost of reduction op inside loop.  */
3039   unsigned inside_cost = add_stmt_cost (target_cost_data, ncopies, vector_stmt,
3040                                         stmt_info, 0, vect_body);
3041   stmt = STMT_VINFO_STMT (stmt_info);
3042
3043   switch (get_gimple_rhs_class (gimple_assign_rhs_code (stmt)))
3044     {
3045     case GIMPLE_SINGLE_RHS:
3046       gcc_assert (TREE_OPERAND_LENGTH (gimple_assign_rhs1 (stmt)) == ternary_op);
3047       reduction_op = TREE_OPERAND (gimple_assign_rhs1 (stmt), 2);
3048       break;
3049     case GIMPLE_UNARY_RHS:
3050       reduction_op = gimple_assign_rhs1 (stmt);
3051       break;
3052     case GIMPLE_BINARY_RHS:
3053       reduction_op = gimple_assign_rhs2 (stmt);
3054       break;
3055     case GIMPLE_TERNARY_RHS:
3056       reduction_op = gimple_assign_rhs3 (stmt);
3057       break;
3058     default:
3059       gcc_unreachable ();
3060     }
3061
3062   vectype = get_vectype_for_scalar_type (TREE_TYPE (reduction_op));
3063   if (!vectype)
3064     {
3065       if (dump_enabled_p ())
3066         {
3067           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3068                            "unsupported data-type ");
3069           dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
3070                              TREE_TYPE (reduction_op));
3071           dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
3072         }
3073       return false;
3074    }
3075
3076   mode = TYPE_MODE (vectype);
3077   orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
3078
3079   if (!orig_stmt)
3080     orig_stmt = STMT_VINFO_STMT (stmt_info);
3081
3082   code = gimple_assign_rhs_code (orig_stmt);
3083
3084   /* Add in cost for initial definition.  */
3085   prologue_cost += add_stmt_cost (target_cost_data, 1, scalar_to_vec,
3086                                   stmt_info, 0, vect_prologue);
3087
3088   /* Determine cost of epilogue code.
3089
3090      We have a reduction operator that will reduce the vector in one statement.
3091      Also requires scalar extract.  */
3092
3093   if (!nested_in_vect_loop_p (loop, orig_stmt))
3094     {
3095       if (reduc_code != ERROR_MARK)
3096         {
3097           epilogue_cost += add_stmt_cost (target_cost_data, 1, vector_stmt,
3098                                           stmt_info, 0, vect_epilogue);
3099           epilogue_cost += add_stmt_cost (target_cost_data, 1, vec_to_scalar,
3100                                           stmt_info, 0, vect_epilogue);
3101         }
3102       else
3103         {
3104           int vec_size_in_bits = tree_to_uhwi (TYPE_SIZE (vectype));
3105           tree bitsize =
3106             TYPE_SIZE (TREE_TYPE (gimple_assign_lhs (orig_stmt)));
3107           int element_bitsize = tree_to_uhwi (bitsize);
3108           int nelements = vec_size_in_bits / element_bitsize;
3109
3110           optab = optab_for_tree_code (code, vectype, optab_default);
3111
3112           /* We have a whole vector shift available.  */
3113           if (VECTOR_MODE_P (mode)
3114               && optab_handler (optab, mode) != CODE_FOR_nothing
3115               && optab_handler (vec_shr_optab, mode) != CODE_FOR_nothing)
3116             {
3117               /* Final reduction via vector shifts and the reduction operator.
3118                  Also requires scalar extract.  */
3119               epilogue_cost += add_stmt_cost (target_cost_data,
3120                                               exact_log2 (nelements) * 2,
3121                                               vector_stmt, stmt_info, 0,
3122                                               vect_epilogue);
3123               epilogue_cost += add_stmt_cost (target_cost_data, 1,
3124                                               vec_to_scalar, stmt_info, 0,
3125                                               vect_epilogue);
3126             }
3127           else
3128             /* Use extracts and reduction op for final reduction.  For N
3129                elements, we have N extracts and N-1 reduction ops.  */
3130             epilogue_cost += add_stmt_cost (target_cost_data,
3131                                             nelements + nelements - 1,
3132                                             vector_stmt, stmt_info, 0,
3133                                             vect_epilogue);
3134         }
3135     }
3136
3137   if (dump_enabled_p ())
3138     dump_printf (MSG_NOTE,
3139                  "vect_model_reduction_cost: inside_cost = %d, "
3140                  "prologue_cost = %d, epilogue_cost = %d .\n", inside_cost,
3141                  prologue_cost, epilogue_cost);
3142
3143   return true;
3144 }
3145
3146
3147 /* Function vect_model_induction_cost.
3148
3149    Models cost for induction operations.  */
3150
3151 static void
3152 vect_model_induction_cost (stmt_vec_info stmt_info, int ncopies)
3153 {
3154   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3155   void *target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
3156   unsigned inside_cost, prologue_cost;
3157
3158   /* loop cost for vec_loop.  */
3159   inside_cost = add_stmt_cost (target_cost_data, ncopies, vector_stmt,
3160                                stmt_info, 0, vect_body);
3161
3162   /* prologue cost for vec_init and vec_step.  */
3163   prologue_cost = add_stmt_cost (target_cost_data, 2, scalar_to_vec,
3164                                  stmt_info, 0, vect_prologue);
3165
3166   if (dump_enabled_p ())
3167     dump_printf_loc (MSG_NOTE, vect_location,
3168                      "vect_model_induction_cost: inside_cost = %d, "
3169                      "prologue_cost = %d .\n", inside_cost, prologue_cost);
3170 }
3171
3172
3173 /* Function get_initial_def_for_induction
3174
3175    Input:
3176    STMT - a stmt that performs an induction operation in the loop.
3177    IV_PHI - the initial value of the induction variable
3178
3179    Output:
3180    Return a vector variable, initialized with the first VF values of
3181    the induction variable.  E.g., for an iv with IV_PHI='X' and
3182    evolution S, for a vector of 4 units, we want to return:
3183    [X, X + S, X + 2*S, X + 3*S].  */
3184
3185 static tree
3186 get_initial_def_for_induction (gimple iv_phi)
3187 {
3188   stmt_vec_info stmt_vinfo = vinfo_for_stmt (iv_phi);
3189   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
3190   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3191   tree vectype;
3192   int nunits;
3193   edge pe = loop_preheader_edge (loop);
3194   struct loop *iv_loop;
3195   basic_block new_bb;
3196   tree new_vec, vec_init, vec_step, t;
3197   tree access_fn;
3198   tree new_var;
3199   tree new_name;
3200   gimple init_stmt, induction_phi, new_stmt;
3201   tree induc_def, vec_def, vec_dest;
3202   tree init_expr, step_expr;
3203   int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3204   int i;
3205   bool ok;
3206   int ncopies;
3207   tree expr;
3208   stmt_vec_info phi_info = vinfo_for_stmt (iv_phi);
3209   bool nested_in_vect_loop = false;
3210   gimple_seq stmts = NULL;
3211   imm_use_iterator imm_iter;
3212   use_operand_p use_p;
3213   gimple exit_phi;
3214   edge latch_e;
3215   tree loop_arg;
3216   gimple_stmt_iterator si;
3217   basic_block bb = gimple_bb (iv_phi);
3218   tree stepvectype;
3219   tree resvectype;
3220
3221   /* Is phi in an inner-loop, while vectorizing an enclosing outer-loop?  */
3222   if (nested_in_vect_loop_p (loop, iv_phi))
3223     {
3224       nested_in_vect_loop = true;
3225       iv_loop = loop->inner;
3226     }
3227   else
3228     iv_loop = loop;
3229   gcc_assert (iv_loop == (gimple_bb (iv_phi))->loop_father);
3230
3231   latch_e = loop_latch_edge (iv_loop);
3232   loop_arg = PHI_ARG_DEF_FROM_EDGE (iv_phi, latch_e);
3233
3234   access_fn = analyze_scalar_evolution (iv_loop, PHI_RESULT (iv_phi));
3235   gcc_assert (access_fn);
3236   STRIP_NOPS (access_fn);
3237   ok = vect_is_simple_iv_evolution (iv_loop->num, access_fn,
3238                                     &init_expr, &step_expr);
3239   gcc_assert (ok);
3240   pe = loop_preheader_edge (iv_loop);
3241
3242   vectype = get_vectype_for_scalar_type (TREE_TYPE (init_expr));
3243   resvectype = get_vectype_for_scalar_type (TREE_TYPE (PHI_RESULT (iv_phi)));
3244   gcc_assert (vectype);
3245   nunits = TYPE_VECTOR_SUBPARTS (vectype);
3246   ncopies = vf / nunits;
3247
3248   gcc_assert (phi_info);
3249   gcc_assert (ncopies >= 1);
3250
3251   /* Find the first insertion point in the BB.  */
3252   si = gsi_after_labels (bb);
3253
3254   /* Create the vector that holds the initial_value of the induction.  */
3255   if (nested_in_vect_loop)
3256     {
3257       /* iv_loop is nested in the loop to be vectorized.  init_expr had already
3258          been created during vectorization of previous stmts.  We obtain it
3259          from the STMT_VINFO_VEC_STMT of the defining stmt.  */
3260       tree iv_def = PHI_ARG_DEF_FROM_EDGE (iv_phi,
3261                                            loop_preheader_edge (iv_loop));
3262       vec_init = vect_get_vec_def_for_operand (iv_def, iv_phi, NULL);
3263       /* If the initial value is not of proper type, convert it.  */
3264       if (!useless_type_conversion_p (vectype, TREE_TYPE (vec_init)))
3265         {
3266           new_stmt = gimple_build_assign_with_ops
3267               (VIEW_CONVERT_EXPR,
3268                vect_get_new_vect_var (vectype, vect_simple_var, "vec_iv_"),
3269                build1 (VIEW_CONVERT_EXPR, vectype, vec_init), NULL_TREE);
3270           vec_init = make_ssa_name (gimple_assign_lhs (new_stmt), new_stmt);
3271           gimple_assign_set_lhs (new_stmt, vec_init);
3272           new_bb = gsi_insert_on_edge_immediate (loop_preheader_edge (iv_loop),
3273                                                  new_stmt);
3274           gcc_assert (!new_bb);
3275           set_vinfo_for_stmt (new_stmt,
3276                               new_stmt_vec_info (new_stmt, loop_vinfo, NULL));
3277         }
3278     }
3279   else
3280     {
3281       vec<constructor_elt, va_gc> *v;
3282
3283       /* iv_loop is the loop to be vectorized. Create:
3284          vec_init = [X, X+S, X+2*S, X+3*S] (S = step_expr, X = init_expr)  */
3285       new_var = vect_get_new_vect_var (TREE_TYPE (vectype),
3286                                        vect_scalar_var, "var_");
3287       new_name = force_gimple_operand (fold_convert (TREE_TYPE (vectype),
3288                                                      init_expr),
3289                                        &stmts, false, new_var);
3290       if (stmts)
3291         {
3292           new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
3293           gcc_assert (!new_bb);
3294         }
3295
3296       vec_alloc (v, nunits);
3297       bool constant_p = is_gimple_min_invariant (new_name);
3298       CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, new_name);
3299       for (i = 1; i < nunits; i++)
3300         {
3301           /* Create: new_name_i = new_name + step_expr  */
3302           new_name = fold_build2 (PLUS_EXPR, TREE_TYPE (new_name),
3303                                   new_name, step_expr);
3304           if (!is_gimple_min_invariant (new_name))
3305             {
3306               init_stmt = gimple_build_assign (new_var, new_name);
3307               new_name = make_ssa_name (new_var, init_stmt);
3308               gimple_assign_set_lhs (init_stmt, new_name);
3309               new_bb = gsi_insert_on_edge_immediate (pe, init_stmt);
3310               gcc_assert (!new_bb);
3311               if (dump_enabled_p ())
3312                 {
3313                   dump_printf_loc (MSG_NOTE, vect_location,
3314                                    "created new init_stmt: ");
3315                   dump_gimple_stmt (MSG_NOTE, TDF_SLIM, init_stmt, 0);
3316                   dump_printf (MSG_NOTE, "\n");
3317                 }
3318               constant_p = false;
3319             }
3320           CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, new_name);
3321         }
3322       /* Create a vector from [new_name_0, new_name_1, ..., new_name_nunits-1]  */
3323       if (constant_p)
3324         new_vec = build_vector_from_ctor (vectype, v);
3325       else
3326         new_vec = build_constructor (vectype, v);
3327       vec_init = vect_init_vector (iv_phi, new_vec, vectype, NULL);
3328     }
3329
3330
3331   /* Create the vector that holds the step of the induction.  */
3332   if (nested_in_vect_loop)
3333     /* iv_loop is nested in the loop to be vectorized. Generate:
3334        vec_step = [S, S, S, S]  */
3335     new_name = step_expr;
3336   else
3337     {
3338       /* iv_loop is the loop to be vectorized. Generate:
3339           vec_step = [VF*S, VF*S, VF*S, VF*S]  */
3340       if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (step_expr)))
3341         {
3342           expr = build_int_cst (integer_type_node, vf);
3343           expr = fold_convert (TREE_TYPE (step_expr), expr);
3344         }
3345       else
3346         expr = build_int_cst (TREE_TYPE (step_expr), vf);
3347       new_name = fold_build2 (MULT_EXPR, TREE_TYPE (step_expr),
3348                               expr, step_expr);
3349       if (TREE_CODE (step_expr) == SSA_NAME)
3350         new_name = vect_init_vector (iv_phi, new_name,
3351                                      TREE_TYPE (step_expr), NULL);
3352     }
3353
3354   t = unshare_expr (new_name);
3355   gcc_assert (CONSTANT_CLASS_P (new_name)
3356               || TREE_CODE (new_name) == SSA_NAME);
3357   stepvectype = get_vectype_for_scalar_type (TREE_TYPE (new_name));
3358   gcc_assert (stepvectype);
3359   new_vec = build_vector_from_val (stepvectype, t);
3360   vec_step = vect_init_vector (iv_phi, new_vec, stepvectype, NULL);
3361
3362
3363   /* Create the following def-use cycle:
3364      loop prolog:
3365          vec_init = ...
3366          vec_step = ...
3367      loop:
3368          vec_iv = PHI <vec_init, vec_loop>
3369          ...
3370          STMT
3371          ...
3372          vec_loop = vec_iv + vec_step;  */
3373
3374   /* Create the induction-phi that defines the induction-operand.  */
3375   vec_dest = vect_get_new_vect_var (vectype, vect_simple_var, "vec_iv_");
3376   induction_phi = create_phi_node (vec_dest, iv_loop->header);
3377   set_vinfo_for_stmt (induction_phi,
3378                       new_stmt_vec_info (induction_phi, loop_vinfo, NULL));
3379   induc_def = PHI_RESULT (induction_phi);
3380
3381   /* Create the iv update inside the loop  */
3382   new_stmt = gimple_build_assign_with_ops (PLUS_EXPR, vec_dest,
3383                                            induc_def, vec_step);
3384   vec_def = make_ssa_name (vec_dest, new_stmt);
3385   gimple_assign_set_lhs (new_stmt, vec_def);
3386   gsi_insert_before (&si, new_stmt, GSI_SAME_STMT);
3387   set_vinfo_for_stmt (new_stmt, new_stmt_vec_info (new_stmt, loop_vinfo,
3388                                                    NULL));
3389
3390   /* Set the arguments of the phi node:  */
3391   add_phi_arg (induction_phi, vec_init, pe, UNKNOWN_LOCATION);
3392   add_phi_arg (induction_phi, vec_def, loop_latch_edge (iv_loop),
3393                UNKNOWN_LOCATION);
3394
3395
3396   /* In case that vectorization factor (VF) is bigger than the number
3397      of elements that we can fit in a vectype (nunits), we have to generate
3398      more than one vector stmt - i.e - we need to "unroll" the
3399      vector stmt by a factor VF/nunits.  For more details see documentation
3400      in vectorizable_operation.  */
3401
3402   if (ncopies > 1)
3403     {
3404       stmt_vec_info prev_stmt_vinfo;
3405       /* FORNOW. This restriction should be relaxed.  */
3406       gcc_assert (!nested_in_vect_loop);
3407
3408       /* Create the vector that holds the step of the induction.  */
3409       if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (step_expr)))
3410         {
3411           expr = build_int_cst (integer_type_node, nunits);
3412           expr = fold_convert (TREE_TYPE (step_expr), expr);
3413         }
3414       else
3415         expr = build_int_cst (TREE_TYPE (step_expr), nunits);
3416       new_name = fold_build2 (MULT_EXPR, TREE_TYPE (step_expr),
3417                               expr, step_expr);
3418       if (TREE_CODE (step_expr) == SSA_NAME)
3419         new_name = vect_init_vector (iv_phi, new_name,
3420                                      TREE_TYPE (step_expr), NULL);
3421       t = unshare_expr (new_name);
3422       gcc_assert (CONSTANT_CLASS_P (new_name)
3423                   || TREE_CODE (new_name) == SSA_NAME);
3424       new_vec = build_vector_from_val (stepvectype, t);
3425       vec_step = vect_init_vector (iv_phi, new_vec, stepvectype, NULL);
3426
3427       vec_def = induc_def;
3428       prev_stmt_vinfo = vinfo_for_stmt (induction_phi);
3429       for (i = 1; i < ncopies; i++)
3430         {
3431           /* vec_i = vec_prev + vec_step  */
3432           new_stmt = gimple_build_assign_with_ops (PLUS_EXPR, vec_dest,
3433                                                    vec_def, vec_step);
3434           vec_def = make_ssa_name (vec_dest, new_stmt);
3435           gimple_assign_set_lhs (new_stmt, vec_def);
3436
3437           gsi_insert_before (&si, new_stmt, GSI_SAME_STMT);
3438           if (!useless_type_conversion_p (resvectype, vectype))
3439             {
3440               new_stmt = gimple_build_assign_with_ops
3441                   (VIEW_CONVERT_EXPR,
3442                    vect_get_new_vect_var (resvectype, vect_simple_var,
3443                                           "vec_iv_"),
3444                    build1 (VIEW_CONVERT_EXPR, resvectype,
3445                            gimple_assign_lhs (new_stmt)), NULL_TREE);
3446               gimple_assign_set_lhs (new_stmt,
3447                                      make_ssa_name
3448                                        (gimple_assign_lhs (new_stmt), new_stmt));
3449               gsi_insert_before (&si, new_stmt, GSI_SAME_STMT);
3450             }
3451           set_vinfo_for_stmt (new_stmt,
3452                               new_stmt_vec_info (new_stmt, loop_vinfo, NULL));
3453           STMT_VINFO_RELATED_STMT (prev_stmt_vinfo) = new_stmt;
3454           prev_stmt_vinfo = vinfo_for_stmt (new_stmt);
3455         }
3456     }
3457
3458   if (nested_in_vect_loop)
3459     {
3460       /* Find the loop-closed exit-phi of the induction, and record
3461          the final vector of induction results:  */
3462       exit_phi = NULL;
3463       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, loop_arg)
3464         {
3465           if (!flow_bb_inside_loop_p (iv_loop, gimple_bb (USE_STMT (use_p))))
3466             {
3467               exit_phi = USE_STMT (use_p);
3468               break;
3469             }
3470         }
3471       if (exit_phi)
3472         {
3473           stmt_vec_info stmt_vinfo = vinfo_for_stmt (exit_phi);
3474           /* FORNOW. Currently not supporting the case that an inner-loop induction
3475              is not used in the outer-loop (i.e. only outside the outer-loop).  */
3476           gcc_assert (STMT_VINFO_RELEVANT_P (stmt_vinfo)
3477                       && !STMT_VINFO_LIVE_P (stmt_vinfo));
3478
3479           STMT_VINFO_VEC_STMT (stmt_vinfo) = new_stmt;
3480           if (dump_enabled_p ())
3481             {
3482               dump_printf_loc (MSG_NOTE, vect_location,
3483                                "vector of inductions after inner-loop:");
3484               dump_gimple_stmt (MSG_NOTE, TDF_SLIM, new_stmt, 0);
3485               dump_printf (MSG_NOTE, "\n");
3486             }
3487         }
3488     }
3489
3490
3491   if (dump_enabled_p ())
3492     {
3493       dump_printf_loc (MSG_NOTE, vect_location,
3494                        "transform induction: created def-use cycle: ");
3495       dump_gimple_stmt (MSG_NOTE, TDF_SLIM, induction_phi, 0);
3496       dump_printf (MSG_NOTE, "\n");
3497       dump_gimple_stmt (MSG_NOTE, TDF_SLIM,
3498                         SSA_NAME_DEF_STMT (vec_def), 0);
3499       dump_printf (MSG_NOTE, "\n");
3500     }
3501
3502   STMT_VINFO_VEC_STMT (phi_info) = induction_phi;
3503   if (!useless_type_conversion_p (resvectype, vectype))
3504     {
3505       new_stmt = gimple_build_assign_with_ops
3506          (VIEW_CONVERT_EXPR,
3507           vect_get_new_vect_var (resvectype, vect_simple_var, "vec_iv_"),
3508           build1 (VIEW_CONVERT_EXPR, resvectype, induc_def), NULL_TREE);
3509       induc_def = make_ssa_name (gimple_assign_lhs (new_stmt), new_stmt);
3510       gimple_assign_set_lhs (new_stmt, induc_def);
3511       si = gsi_after_labels (bb);
3512       gsi_insert_before (&si, new_stmt, GSI_SAME_STMT);
3513       set_vinfo_for_stmt (new_stmt,
3514                           new_stmt_vec_info (new_stmt, loop_vinfo, NULL));
3515       STMT_VINFO_RELATED_STMT (vinfo_for_stmt (new_stmt))
3516         = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (induction_phi));
3517     }
3518
3519   return induc_def;
3520 }
3521
3522
3523 /* Function get_initial_def_for_reduction
3524
3525    Input:
3526    STMT - a stmt that performs a reduction operation in the loop.
3527    INIT_VAL - the initial value of the reduction variable
3528
3529    Output:
3530    ADJUSTMENT_DEF - a tree that holds a value to be added to the final result
3531         of the reduction (used for adjusting the epilog - see below).
3532    Return a vector variable, initialized according to the operation that STMT
3533         performs. This vector will be used as the initial value of the
3534         vector of partial results.
3535
3536    Option1 (adjust in epilog): Initialize the vector as follows:
3537      add/bit or/xor:    [0,0,...,0,0]
3538      mult/bit and:      [1,1,...,1,1]
3539      min/max/cond_expr: [init_val,init_val,..,init_val,init_val]
3540    and when necessary (e.g. add/mult case) let the caller know
3541    that it needs to adjust the result by init_val.
3542
3543    Option2: Initialize the vector as follows:
3544      add/bit or/xor:    [init_val,0,0,...,0]
3545      mult/bit and:      [init_val,1,1,...,1]
3546      min/max/cond_expr: [init_val,init_val,...,init_val]
3547    and no adjustments are needed.
3548
3549    For example, for the following code:
3550
3551    s = init_val;
3552    for (i=0;i<n;i++)
3553      s = s + a[i];
3554
3555    STMT is 's = s + a[i]', and the reduction variable is 's'.
3556    For a vector of 4 units, we want to return either [0,0,0,init_val],
3557    or [0,0,0,0] and let the caller know that it needs to adjust
3558    the result at the end by 'init_val'.
3559
3560    FORNOW, we are using the 'adjust in epilog' scheme, because this way the
3561    initialization vector is simpler (same element in all entries), if
3562    ADJUSTMENT_DEF is not NULL, and Option2 otherwise.
3563
3564    A cost model should help decide between these two schemes.  */
3565
3566 tree
3567 get_initial_def_for_reduction (gimple stmt, tree init_val,
3568                                tree *adjustment_def)
3569 {
3570   stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
3571   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
3572   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3573   tree scalar_type = TREE_TYPE (init_val);
3574   tree vectype = get_vectype_for_scalar_type (scalar_type);
3575   int nunits;
3576   enum tree_code code = gimple_assign_rhs_code (stmt);
3577   tree def_for_init;
3578   tree init_def;
3579   tree *elts;
3580   int i;
3581   bool nested_in_vect_loop = false;
3582   tree init_value;
3583   REAL_VALUE_TYPE real_init_val = dconst0;
3584   int int_init_val = 0;
3585   gimple def_stmt = NULL;
3586
3587   gcc_assert (vectype);
3588   nunits = TYPE_VECTOR_SUBPARTS (vectype);
3589
3590   gcc_assert (POINTER_TYPE_P (scalar_type) || INTEGRAL_TYPE_P (scalar_type)
3591               || SCALAR_FLOAT_TYPE_P (scalar_type));
3592
3593   if (nested_in_vect_loop_p (loop, stmt))
3594     nested_in_vect_loop = true;
3595   else
3596     gcc_assert (loop == (gimple_bb (stmt))->loop_father);
3597
3598   /* In case of double reduction we only create a vector variable to be put
3599      in the reduction phi node.  The actual statement creation is done in
3600      vect_create_epilog_for_reduction.  */
3601   if (adjustment_def && nested_in_vect_loop
3602       && TREE_CODE (init_val) == SSA_NAME
3603       && (def_stmt = SSA_NAME_DEF_STMT (init_val))
3604       && gimple_code (def_stmt) == GIMPLE_PHI
3605       && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
3606       && vinfo_for_stmt (def_stmt)
3607       && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt))
3608           == vect_double_reduction_def)
3609     {
3610       *adjustment_def = NULL;
3611       return vect_create_destination_var (init_val, vectype);
3612     }
3613
3614   if (TREE_CONSTANT (init_val))
3615     {
3616       if (SCALAR_FLOAT_TYPE_P (scalar_type))
3617         init_value = build_real (scalar_type, TREE_REAL_CST (init_val));
3618       else
3619         init_value = build_int_cst (scalar_type, TREE_INT_CST_LOW (init_val));
3620     }
3621   else
3622     init_value = init_val;
3623
3624   switch (code)
3625     {
3626       case WIDEN_SUM_EXPR:
3627       case DOT_PROD_EXPR:
3628       case PLUS_EXPR:
3629       case MINUS_EXPR:
3630       case BIT_IOR_EXPR:
3631       case BIT_XOR_EXPR:
3632       case MULT_EXPR:
3633       case BIT_AND_EXPR:
3634         /* ADJUSMENT_DEF is NULL when called from
3635            vect_create_epilog_for_reduction to vectorize double reduction.  */
3636         if (adjustment_def)
3637           {
3638             if (nested_in_vect_loop)
3639               *adjustment_def = vect_get_vec_def_for_operand (init_val, stmt,
3640                                                               NULL);
3641             else
3642               *adjustment_def = init_val;
3643           }
3644
3645         if (code == MULT_EXPR)
3646           {
3647             real_init_val = dconst1;
3648             int_init_val = 1;
3649           }
3650
3651         if (code == BIT_AND_EXPR)
3652           int_init_val = -1;
3653
3654         if (SCALAR_FLOAT_TYPE_P (scalar_type))
3655           def_for_init = build_real (scalar_type, real_init_val);
3656         else
3657           def_for_init = build_int_cst (scalar_type, int_init_val);
3658
3659         /* Create a vector of '0' or '1' except the first element.  */
3660         elts = XALLOCAVEC (tree, nunits);
3661         for (i = nunits - 2; i >= 0; --i)
3662           elts[i + 1] = def_for_init;
3663
3664         /* Option1: the first element is '0' or '1' as well.  */
3665         if (adjustment_def)
3666           {
3667             elts[0] = def_for_init;
3668             init_def = build_vector (vectype, elts);
3669             break;
3670           }
3671
3672         /* Option2: the first element is INIT_VAL.  */
3673         elts[0] = init_val;
3674         if (TREE_CONSTANT (init_val))
3675           init_def = build_vector (vectype, elts);
3676         else
3677           {
3678             vec<constructor_elt, va_gc> *v;
3679             vec_alloc (v, nunits);
3680             CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, init_val);
3681             for (i = 1; i < nunits; ++i)
3682               CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, elts[i]);
3683             init_def = build_constructor (vectype, v);
3684           }
3685
3686         break;
3687
3688       case MIN_EXPR:
3689       case MAX_EXPR:
3690       case COND_EXPR:
3691         if (adjustment_def)
3692           {
3693             *adjustment_def = NULL_TREE;
3694             init_def = vect_get_vec_def_for_operand (init_val, stmt, NULL);
3695             break;
3696           }
3697
3698         init_def = build_vector_from_val (vectype, init_value);
3699         break;
3700
3701       default:
3702         gcc_unreachable ();
3703     }
3704
3705   return init_def;
3706 }
3707
3708
3709 /* Function vect_create_epilog_for_reduction
3710
3711    Create code at the loop-epilog to finalize the result of a reduction
3712    computation.
3713
3714    VECT_DEFS is list of vector of partial results, i.e., the lhs's of vector
3715      reduction statements.
3716    STMT is the scalar reduction stmt that is being vectorized.
3717    NCOPIES is > 1 in case the vectorization factor (VF) is bigger than the
3718      number of elements that we can fit in a vectype (nunits).  In this case
3719      we have to generate more than one vector stmt - i.e - we need to "unroll"
3720      the vector stmt by a factor VF/nunits.  For more details see documentation
3721      in vectorizable_operation.
3722    REDUC_CODE is the tree-code for the epilog reduction.
3723    REDUCTION_PHIS is a list of the phi-nodes that carry the reduction
3724      computation.
3725    REDUC_INDEX is the index of the operand in the right hand side of the
3726      statement that is defined by REDUCTION_PHI.
3727    DOUBLE_REDUC is TRUE if double reduction phi nodes should be handled.
3728    SLP_NODE is an SLP node containing a group of reduction statements. The
3729      first one in this group is STMT.
3730
3731    This function:
3732    1. Creates the reduction def-use cycles: sets the arguments for
3733       REDUCTION_PHIS:
3734       The loop-entry argument is the vectorized initial-value of the reduction.
3735       The loop-latch argument is taken from VECT_DEFS - the vector of partial
3736       sums.
3737    2. "Reduces" each vector of partial results VECT_DEFS into a single result,
3738       by applying the operation specified by REDUC_CODE if available, or by
3739       other means (whole-vector shifts or a scalar loop).
3740       The function also creates a new phi node at the loop exit to preserve
3741       loop-closed form, as illustrated below.
3742
3743      The flow at the entry to this function:
3744
3745         loop:
3746           vec_def = phi <null, null>            # REDUCTION_PHI
3747           VECT_DEF = vector_stmt                # vectorized form of STMT
3748           s_loop = scalar_stmt                  # (scalar) STMT
3749         loop_exit:
3750           s_out0 = phi <s_loop>                 # (scalar) EXIT_PHI
3751           use <s_out0>
3752           use <s_out0>
3753
3754      The above is transformed by this function into:
3755
3756         loop:
3757           vec_def = phi <vec_init, VECT_DEF>    # REDUCTION_PHI
3758           VECT_DEF = vector_stmt                # vectorized form of STMT
3759           s_loop = scalar_stmt                  # (scalar) STMT
3760         loop_exit:
3761           s_out0 = phi <s_loop>                 # (scalar) EXIT_PHI
3762           v_out1 = phi <VECT_DEF>               # NEW_EXIT_PHI
3763           v_out2 = reduce <v_out1>
3764           s_out3 = extract_field <v_out2, 0>
3765           s_out4 = adjust_result <s_out3>
3766           use <s_out4>
3767           use <s_out4>
3768 */
3769
3770 static void
3771 vect_create_epilog_for_reduction (vec<tree> vect_defs, gimple stmt,
3772                                   int ncopies, enum tree_code reduc_code,
3773                                   vec<gimple> reduction_phis,
3774                                   int reduc_index, bool double_reduc,
3775                                   slp_tree slp_node)
3776 {
3777   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3778   stmt_vec_info prev_phi_info;
3779   tree vectype;
3780   enum machine_mode mode;
3781   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3782   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo), *outer_loop = NULL;
3783   basic_block exit_bb;
3784   tree scalar_dest;
3785   tree scalar_type;
3786   gimple new_phi = NULL, phi;
3787   gimple_stmt_iterator exit_gsi;
3788   tree vec_dest;
3789   tree new_temp = NULL_TREE, new_dest, new_name, new_scalar_dest;
3790   gimple epilog_stmt = NULL;
3791   enum tree_code code = gimple_assign_rhs_code (stmt);
3792   gimple exit_phi;
3793   tree bitsize, bitpos;
3794   tree adjustment_def = NULL;
3795   tree vec_initial_def = NULL;
3796   tree reduction_op, expr, def;
3797   tree orig_name, scalar_result;
3798   imm_use_iterator imm_iter, phi_imm_iter;
3799   use_operand_p use_p, phi_use_p;
3800   bool extract_scalar_result = false;
3801   gimple use_stmt, orig_stmt, reduction_phi = NULL;
3802   bool nested_in_vect_loop = false;
3803   vec<gimple> new_phis = vNULL;
3804   vec<gimple> inner_phis = vNULL;
3805   enum vect_def_type dt = vect_unknown_def_type;
3806   int j, i;
3807   vec<tree> scalar_results = vNULL;
3808   unsigned int group_size = 1, k, ratio;
3809   vec<tree> vec_initial_defs = vNULL;
3810   vec<gimple> phis;
3811   bool slp_reduc = false;
3812   tree new_phi_result;
3813   gimple inner_phi = NULL;
3814
3815   if (slp_node)
3816     group_size = SLP_TREE_SCALAR_STMTS (slp_node).length ();
3817
3818   if (nested_in_vect_loop_p (loop, stmt))
3819     {
3820       outer_loop = loop;
3821       loop = loop->inner;
3822       nested_in_vect_loop = true;
3823       gcc_assert (!slp_node);
3824     }
3825
3826   switch (get_gimple_rhs_class (gimple_assign_rhs_code (stmt)))
3827     {
3828     case GIMPLE_SINGLE_RHS:
3829       gcc_assert (TREE_OPERAND_LENGTH (gimple_assign_rhs1 (stmt))
3830                   == ternary_op);
3831       reduction_op = TREE_OPERAND (gimple_assign_rhs1 (stmt), reduc_index);
3832       break;
3833     case GIMPLE_UNARY_RHS:
3834       reduction_op = gimple_assign_rhs1 (stmt);
3835       break;
3836     case GIMPLE_BINARY_RHS:
3837       reduction_op = reduc_index ?
3838                      gimple_assign_rhs2 (stmt) : gimple_assign_rhs1 (stmt);
3839       break;
3840     case GIMPLE_TERNARY_RHS:
3841       reduction_op = gimple_op (stmt, reduc_index + 1);
3842       break;
3843     default:
3844       gcc_unreachable ();
3845     }
3846
3847   vectype = get_vectype_for_scalar_type (TREE_TYPE (reduction_op));
3848   gcc_assert (vectype);
3849   mode = TYPE_MODE (vectype);
3850
3851   /* 1. Create the reduction def-use cycle:
3852      Set the arguments of REDUCTION_PHIS, i.e., transform
3853
3854         loop:
3855           vec_def = phi <null, null>            # REDUCTION_PHI
3856           VECT_DEF = vector_stmt                # vectorized form of STMT
3857           ...
3858
3859      into:
3860
3861         loop:
3862           vec_def = phi <vec_init, VECT_DEF>    # REDUCTION_PHI
3863           VECT_DEF = vector_stmt                # vectorized form of STMT
3864           ...
3865
3866      (in case of SLP, do it for all the phis). */
3867
3868   /* Get the loop-entry arguments.  */
3869   if (slp_node)
3870     vect_get_vec_defs (reduction_op, NULL_TREE, stmt, &vec_initial_defs,
3871                        NULL, slp_node, reduc_index);
3872   else
3873     {
3874       vec_initial_defs.create (1);
3875      /* For the case of reduction, vect_get_vec_def_for_operand returns
3876         the scalar def before the loop, that defines the initial value
3877         of the reduction variable.  */
3878       vec_initial_def = vect_get_vec_def_for_operand (reduction_op, stmt,
3879                                                       &adjustment_def);
3880       vec_initial_defs.quick_push (vec_initial_def);
3881     }
3882
3883   /* Set phi nodes arguments.  */
3884   FOR_EACH_VEC_ELT (reduction_phis, i, phi)
3885     {
3886       tree vec_init_def = vec_initial_defs[i];
3887       tree def = vect_defs[i];
3888       for (j = 0; j < ncopies; j++)
3889         {
3890           /* Set the loop-entry arg of the reduction-phi.  */
3891           add_phi_arg (phi, vec_init_def, loop_preheader_edge (loop),
3892                        UNKNOWN_LOCATION);
3893
3894           /* Set the loop-latch arg for the reduction-phi.  */
3895           if (j > 0)
3896             def = vect_get_vec_def_for_stmt_copy (vect_unknown_def_type, def);
3897
3898           add_phi_arg (phi, def, loop_latch_edge (loop), UNKNOWN_LOCATION);
3899
3900           if (dump_enabled_p ())
3901             {
3902               dump_printf_loc (MSG_NOTE, vect_location,
3903                                "transform reduction: created def-use cycle: ");
3904               dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
3905               dump_printf (MSG_NOTE, "\n");
3906               dump_gimple_stmt (MSG_NOTE, TDF_SLIM, SSA_NAME_DEF_STMT (def), 0);
3907               dump_printf (MSG_NOTE, "\n");
3908             }
3909
3910           phi = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (phi));
3911         }
3912     }
3913
3914   vec_initial_defs.release ();
3915
3916   /* 2. Create epilog code.
3917         The reduction epilog code operates across the elements of the vector
3918         of partial results computed by the vectorized loop.
3919         The reduction epilog code consists of:
3920
3921         step 1: compute the scalar result in a vector (v_out2)
3922         step 2: extract the scalar result (s_out3) from the vector (v_out2)
3923         step 3: adjust the scalar result (s_out3) if needed.
3924
3925         Step 1 can be accomplished using one the following three schemes:
3926           (scheme 1) using reduc_code, if available.
3927           (scheme 2) using whole-vector shifts, if available.
3928           (scheme 3) using a scalar loop. In this case steps 1+2 above are
3929                      combined.
3930
3931           The overall epilog code looks like this:
3932
3933           s_out0 = phi <s_loop>         # original EXIT_PHI
3934           v_out1 = phi <VECT_DEF>       # NEW_EXIT_PHI
3935           v_out2 = reduce <v_out1>              # step 1
3936           s_out3 = extract_field <v_out2, 0>    # step 2
3937           s_out4 = adjust_result <s_out3>       # step 3
3938
3939           (step 3 is optional, and steps 1 and 2 may be combined).
3940           Lastly, the uses of s_out0 are replaced by s_out4.  */
3941
3942
3943   /* 2.1 Create new loop-exit-phis to preserve loop-closed form:
3944          v_out1 = phi <VECT_DEF>
3945          Store them in NEW_PHIS.  */
3946
3947   exit_bb = single_exit (loop)->dest;
3948   prev_phi_info = NULL;
3949   new_phis.create (vect_defs.length ());
3950   FOR_EACH_VEC_ELT (vect_defs, i, def)
3951     {
3952       for (j = 0; j < ncopies; j++)
3953         {
3954           tree new_def = copy_ssa_name (def, NULL);
3955           phi = create_phi_node (new_def, exit_bb);
3956           set_vinfo_for_stmt (phi, new_stmt_vec_info (phi, loop_vinfo, NULL));
3957           if (j == 0)
3958             new_phis.quick_push (phi);
3959           else
3960             {
3961               def = vect_get_vec_def_for_stmt_copy (dt, def);
3962               STMT_VINFO_RELATED_STMT (prev_phi_info) = phi;
3963             }
3964
3965           SET_PHI_ARG_DEF (phi, single_exit (loop)->dest_idx, def);
3966           prev_phi_info = vinfo_for_stmt (phi);
3967         }
3968     }
3969
3970   /* The epilogue is created for the outer-loop, i.e., for the loop being
3971      vectorized.  Create exit phis for the outer loop.  */
3972   if (double_reduc)
3973     {
3974       loop = outer_loop;
3975       exit_bb = single_exit (loop)->dest;
3976       inner_phis.create (vect_defs.length ());
3977       FOR_EACH_VEC_ELT (new_phis, i, phi)
3978         {
3979           tree new_result = copy_ssa_name (PHI_RESULT (phi), NULL);
3980           gimple outer_phi = create_phi_node (new_result, exit_bb);
3981           SET_PHI_ARG_DEF (outer_phi, single_exit (loop)->dest_idx,
3982                            PHI_RESULT (phi));
3983           set_vinfo_for_stmt (outer_phi, new_stmt_vec_info (outer_phi,
3984                                                             loop_vinfo, NULL));
3985           inner_phis.quick_push (phi);
3986           new_phis[i] = outer_phi;
3987           prev_phi_info = vinfo_for_stmt (outer_phi);
3988           while (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (phi)))
3989             {
3990               phi = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (phi));
3991               new_result = copy_ssa_name (PHI_RESULT (phi), NULL);
3992               outer_phi = create_phi_node (new_result, exit_bb);
3993               SET_PHI_ARG_DEF (outer_phi, single_exit (loop)->dest_idx,
3994                                PHI_RESULT (phi));
3995               set_vinfo_for_stmt (outer_phi, new_stmt_vec_info (outer_phi,
3996                                                         loop_vinfo, NULL));
3997               STMT_VINFO_RELATED_STMT (prev_phi_info) = outer_phi;
3998               prev_phi_info = vinfo_for_stmt (outer_phi);
3999             }
4000         }
4001     }
4002
4003   exit_gsi = gsi_after_labels (exit_bb);
4004
4005   /* 2.2 Get the relevant tree-code to use in the epilog for schemes 2,3
4006          (i.e. when reduc_code is not available) and in the final adjustment
4007          code (if needed).  Also get the original scalar reduction variable as
4008          defined in the loop.  In case STMT is a "pattern-stmt" (i.e. - it
4009          represents a reduction pattern), the tree-code and scalar-def are
4010          taken from the original stmt that the pattern-stmt (STMT) replaces.
4011          Otherwise (it is a regular reduction) - the tree-code and scalar-def
4012          are taken from STMT.  */
4013
4014   orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
4015   if (!orig_stmt)
4016     {
4017       /* Regular reduction  */
4018       orig_stmt = stmt;
4019     }
4020   else
4021     {
4022       /* Reduction pattern  */
4023       stmt_vec_info stmt_vinfo = vinfo_for_stmt (orig_stmt);
4024       gcc_assert (STMT_VINFO_IN_PATTERN_P (stmt_vinfo));
4025       gcc_assert (STMT_VINFO_RELATED_STMT (stmt_vinfo) == stmt);
4026     }
4027
4028   code = gimple_assign_rhs_code (orig_stmt);
4029   /* For MINUS_EXPR the initial vector is [init_val,0,...,0], therefore,
4030      partial results are added and not subtracted.  */
4031   if (code == MINUS_EXPR)
4032     code = PLUS_EXPR;
4033
4034   scalar_dest = gimple_assign_lhs (orig_stmt);
4035   scalar_type = TREE_TYPE (scalar_dest);
4036   scalar_results.create (group_size);
4037   new_scalar_dest = vect_create_destination_var (scalar_dest, NULL);
4038   bitsize = TYPE_SIZE (scalar_type);
4039
4040   /* In case this is a reduction in an inner-loop while vectorizing an outer
4041      loop - we don't need to extract a single scalar result at the end of the
4042      inner-loop (unless it is double reduction, i.e., the use of reduction is
4043      outside the outer-loop).  The final vector of partial results will be used
4044      in the vectorized outer-loop, or reduced to a scalar result at the end of
4045      the outer-loop.  */
4046   if (nested_in_vect_loop && !double_reduc)
4047     goto vect_finalize_reduction;
4048
4049   /* SLP reduction without reduction chain, e.g.,
4050      # a1 = phi <a2, a0>
4051      # b1 = phi <b2, b0>
4052      a2 = operation (a1)
4053      b2 = operation (b1)  */
4054   slp_reduc = (slp_node && !GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)));
4055
4056   /* In case of reduction chain, e.g.,
4057      # a1 = phi <a3, a0>
4058      a2 = operation (a1)
4059      a3 = operation (a2),
4060
4061      we may end up with more than one vector result.  Here we reduce them to
4062      one vector.  */
4063   if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
4064     {
4065       tree first_vect = PHI_RESULT (new_phis[0]);
4066       tree tmp;
4067       gimple new_vec_stmt = NULL;
4068
4069       vec_dest = vect_create_destination_var (scalar_dest, vectype);
4070       for (k = 1; k < new_phis.length (); k++)
4071         {
4072           gimple next_phi = new_phis[k];
4073           tree second_vect = PHI_RESULT (next_phi);
4074
4075           tmp = build2 (code, vectype,  first_vect, second_vect);
4076           new_vec_stmt = gimple_build_assign (vec_dest, tmp);
4077           first_vect = make_ssa_name (vec_dest, new_vec_stmt);
4078           gimple_assign_set_lhs (new_vec_stmt, first_vect);
4079           gsi_insert_before (&exit_gsi, new_vec_stmt, GSI_SAME_STMT);
4080         }
4081
4082       new_phi_result = first_vect;
4083       if (new_vec_stmt)
4084         {
4085           new_phis.truncate (0);
4086           new_phis.safe_push (new_vec_stmt);
4087         }
4088     }
4089   else
4090     new_phi_result = PHI_RESULT (new_phis[0]);
4091
4092   /* 2.3 Create the reduction code, using one of the three schemes described
4093          above. In SLP we simply need to extract all the elements from the
4094          vector (without reducing them), so we use scalar shifts.  */
4095   if (reduc_code != ERROR_MARK && !slp_reduc)
4096     {
4097       tree tmp;
4098
4099       /*** Case 1:  Create:
4100            v_out2 = reduc_expr <v_out1>  */
4101
4102       if (dump_enabled_p ())
4103         dump_printf_loc (MSG_NOTE, vect_location,
4104                          "Reduce using direct vector reduction.\n");
4105
4106       vec_dest = vect_create_destination_var (scalar_dest, vectype);
4107       tmp = build1 (reduc_code, vectype, new_phi_result);
4108       epilog_stmt = gimple_build_assign (vec_dest, tmp);
4109       new_temp = make_ssa_name (vec_dest, epilog_stmt);
4110       gimple_assign_set_lhs (epilog_stmt, new_temp);
4111       gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
4112
4113       extract_scalar_result = true;
4114     }
4115   else
4116     {
4117       enum tree_code shift_code = ERROR_MARK;
4118       bool have_whole_vector_shift = true;
4119       int bit_offset;
4120       int element_bitsize = tree_to_uhwi (bitsize);
4121       int vec_size_in_bits = tree_to_uhwi (TYPE_SIZE (vectype));
4122       tree vec_temp;
4123
4124       if (optab_handler (vec_shr_optab, mode) != CODE_FOR_nothing)
4125         shift_code = VEC_RSHIFT_EXPR;
4126       else
4127         have_whole_vector_shift = false;
4128
4129       /* Regardless of whether we have a whole vector shift, if we're
4130          emulating the operation via tree-vect-generic, we don't want
4131          to use it.  Only the first round of the reduction is likely
4132          to still be profitable via emulation.  */
4133       /* ??? It might be better to emit a reduction tree code here, so that
4134          tree-vect-generic can expand the first round via bit tricks.  */
4135       if (!VECTOR_MODE_P (mode))
4136         have_whole_vector_shift = false;
4137       else
4138         {
4139           optab optab = optab_for_tree_code (code, vectype, optab_default);
4140           if (optab_handler (optab, mode) == CODE_FOR_nothing)
4141             have_whole_vector_shift = false;
4142         }
4143
4144       if (have_whole_vector_shift && !slp_reduc)
4145         {
4146           /*** Case 2: Create:
4147              for (offset = VS/2; offset >= element_size; offset/=2)
4148                 {
4149                   Create:  va' = vec_shift <va, offset>
4150                   Create:  va = vop <va, va'>
4151                 }  */
4152
4153           if (dump_enabled_p ())
4154             dump_printf_loc (MSG_NOTE, vect_location,
4155                              "Reduce using vector shifts\n");
4156
4157           vec_dest = vect_create_destination_var (scalar_dest, vectype);
4158           new_temp = new_phi_result;
4159           for (bit_offset = vec_size_in_bits/2;
4160                bit_offset >= element_bitsize;
4161                bit_offset /= 2)
4162             {
4163               tree bitpos = size_int (bit_offset);
4164
4165               epilog_stmt = gimple_build_assign_with_ops (shift_code,
4166                                                vec_dest, new_temp, bitpos);
4167               new_name = make_ssa_name (vec_dest, epilog_stmt);
4168               gimple_assign_set_lhs (epilog_stmt, new_name);
4169               gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
4170
4171               epilog_stmt = gimple_build_assign_with_ops (code, vec_dest,
4172                                                           new_name, new_temp);
4173               new_temp = make_ssa_name (vec_dest, epilog_stmt);
4174               gimple_assign_set_lhs (epilog_stmt, new_temp);
4175               gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
4176             }
4177
4178           extract_scalar_result = true;
4179         }
4180       else
4181         {
4182           tree rhs;
4183
4184           /*** Case 3: Create:
4185              s = extract_field <v_out2, 0>
4186              for (offset = element_size;
4187                   offset < vector_size;
4188                   offset += element_size;)
4189                {
4190                  Create:  s' = extract_field <v_out2, offset>
4191                  Create:  s = op <s, s'>  // For non SLP cases
4192                }  */
4193
4194           if (dump_enabled_p ())
4195             dump_printf_loc (MSG_NOTE, vect_location,
4196                              "Reduce using scalar code.\n");
4197
4198           vec_size_in_bits = tree_to_uhwi (TYPE_SIZE (vectype));
4199           FOR_EACH_VEC_ELT (new_phis, i, new_phi)
4200             {
4201               if (gimple_code (new_phi) == GIMPLE_PHI)
4202                 vec_temp = PHI_RESULT (new_phi);
4203               else
4204                 vec_temp = gimple_assign_lhs (new_phi);
4205               rhs = build3 (BIT_FIELD_REF, scalar_type, vec_temp, bitsize,
4206                             bitsize_zero_node);
4207               epilog_stmt = gimple_build_assign (new_scalar_dest, rhs);
4208               new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
4209               gimple_assign_set_lhs (epilog_stmt, new_temp);
4210               gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
4211
4212               /* In SLP we don't need to apply reduction operation, so we just
4213                  collect s' values in SCALAR_RESULTS.  */
4214               if (slp_reduc)
4215                 scalar_results.safe_push (new_temp);
4216
4217               for (bit_offset = element_bitsize;
4218                    bit_offset < vec_size_in_bits;
4219                    bit_offset += element_bitsize)
4220                 {
4221                   tree bitpos = bitsize_int (bit_offset);
4222                   tree rhs = build3 (BIT_FIELD_REF, scalar_type, vec_temp,
4223                                      bitsize, bitpos);
4224
4225                   epilog_stmt = gimple_build_assign (new_scalar_dest, rhs);
4226                   new_name = make_ssa_name (new_scalar_dest, epilog_stmt);
4227                   gimple_assign_set_lhs (epilog_stmt, new_name);
4228                   gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
4229
4230                   if (slp_reduc)
4231                     {
4232                       /* In SLP we don't need to apply reduction operation, so
4233                          we just collect s' values in SCALAR_RESULTS.  */
4234                       new_temp = new_name;
4235                       scalar_results.safe_push (new_name);
4236                     }
4237                   else
4238                     {
4239                       epilog_stmt = gimple_build_assign_with_ops (code,
4240                                           new_scalar_dest, new_name, new_temp);
4241                       new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
4242                       gimple_assign_set_lhs (epilog_stmt, new_temp);
4243                       gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
4244                     }
4245                 }
4246             }
4247
4248           /* The only case where we need to reduce scalar results in SLP, is
4249              unrolling.  If the size of SCALAR_RESULTS is greater than
4250              GROUP_SIZE, we reduce them combining elements modulo
4251              GROUP_SIZE.  */
4252           if (slp_reduc)
4253             {
4254               tree res, first_res, new_res;
4255               gimple new_stmt;
4256
4257               /* Reduce multiple scalar results in case of SLP unrolling.  */
4258               for (j = group_size; scalar_results.iterate (j, &res);
4259                    j++)
4260                 {
4261                   first_res = scalar_results[j % group_size];
4262                   new_stmt = gimple_build_assign_with_ops (code,
4263                                               new_scalar_dest, first_res, res);
4264                   new_res = make_ssa_name (new_scalar_dest, new_stmt);
4265                   gimple_assign_set_lhs (new_stmt, new_res);
4266                   gsi_insert_before (&exit_gsi, new_stmt, GSI_SAME_STMT);
4267                   scalar_results[j % group_size] = new_res;
4268                 }
4269             }
4270           else
4271             /* Not SLP - we have one scalar to keep in SCALAR_RESULTS.  */
4272             scalar_results.safe_push (new_temp);
4273
4274           extract_scalar_result = false;
4275         }
4276     }
4277
4278   /* 2.4  Extract the final scalar result.  Create:
4279           s_out3 = extract_field <v_out2, bitpos>  */
4280
4281   if (extract_scalar_result)
4282     {
4283       tree rhs;
4284
4285       if (dump_enabled_p ())
4286         dump_printf_loc (MSG_NOTE, vect_location,
4287                          "extract scalar result\n");
4288
4289       if (BYTES_BIG_ENDIAN)
4290         bitpos = size_binop (MULT_EXPR,
4291                              bitsize_int (TYPE_VECTOR_SUBPARTS (vectype) - 1),
4292                              TYPE_SIZE (scalar_type));
4293       else
4294         bitpos = bitsize_zero_node;
4295
4296       rhs = build3 (BIT_FIELD_REF, scalar_type, new_temp, bitsize, bitpos);
4297       epilog_stmt = gimple_build_assign (new_scalar_dest, rhs);
4298       new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
4299       gimple_assign_set_lhs (epilog_stmt, new_temp);
4300       gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
4301       scalar_results.safe_push (new_temp);
4302     }
4303
4304 vect_finalize_reduction:
4305
4306   if (double_reduc)
4307     loop = loop->inner;
4308
4309   /* 2.5 Adjust the final result by the initial value of the reduction
4310          variable. (When such adjustment is not needed, then
4311          'adjustment_def' is zero).  For example, if code is PLUS we create:
4312          new_temp = loop_exit_def + adjustment_def  */
4313
4314   if (adjustment_def)
4315     {
4316       gcc_assert (!slp_reduc);
4317       if (nested_in_vect_loop)
4318         {
4319           new_phi = new_phis[0];
4320           gcc_assert (TREE_CODE (TREE_TYPE (adjustment_def)) == VECTOR_TYPE);
4321           expr = build2 (code, vectype, PHI_RESULT (new_phi), adjustment_def);
4322           new_dest = vect_create_destination_var (scalar_dest, vectype);
4323         }
4324       else
4325         {
4326           new_temp = scalar_results[0];
4327           gcc_assert (TREE_CODE (TREE_TYPE (adjustment_def)) != VECTOR_TYPE);
4328           expr = build2 (code, scalar_type, new_temp, adjustment_def);
4329           new_dest = vect_create_destination_var (scalar_dest, scalar_type);
4330         }
4331
4332       epilog_stmt = gimple_build_assign (new_dest, expr);
4333       new_temp = make_ssa_name (new_dest, epilog_stmt);
4334       gimple_assign_set_lhs (epilog_stmt, new_temp);
4335       gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
4336       if (nested_in_vect_loop)
4337         {
4338           set_vinfo_for_stmt (epilog_stmt,
4339                               new_stmt_vec_info (epilog_stmt, loop_vinfo,
4340                                                  NULL));
4341           STMT_VINFO_RELATED_STMT (vinfo_for_stmt (epilog_stmt)) =
4342                 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (new_phi));
4343
4344           if (!double_reduc)
4345             scalar_results.quick_push (new_temp);
4346           else
4347             scalar_results[0] = new_temp;
4348         }
4349       else
4350         scalar_results[0] = new_temp;
4351
4352       new_phis[0] = epilog_stmt;
4353     }
4354
4355   /* 2.6  Handle the loop-exit phis.  Replace the uses of scalar loop-exit
4356           phis with new adjusted scalar results, i.e., replace use <s_out0>
4357           with use <s_out4>.
4358
4359      Transform:
4360         loop_exit:
4361           s_out0 = phi <s_loop>                 # (scalar) EXIT_PHI
4362           v_out1 = phi <VECT_DEF>               # NEW_EXIT_PHI
4363           v_out2 = reduce <v_out1>
4364           s_out3 = extract_field <v_out2, 0>
4365           s_out4 = adjust_result <s_out3>
4366           use <s_out0>
4367           use <s_out0>
4368
4369      into:
4370
4371         loop_exit:
4372           s_out0 = phi <s_loop>                 # (scalar) EXIT_PHI
4373           v_out1 = phi <VECT_DEF>               # NEW_EXIT_PHI
4374           v_out2 = reduce <v_out1>
4375           s_out3 = extract_field <v_out2, 0>
4376           s_out4 = adjust_result <s_out3>
4377           use <s_out4>
4378           use <s_out4> */
4379
4380
4381   /* In SLP reduction chain we reduce vector results into one vector if
4382      necessary, hence we set here GROUP_SIZE to 1.  SCALAR_DEST is the LHS of
4383      the last stmt in the reduction chain, since we are looking for the loop
4384      exit phi node.  */
4385   if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
4386     {
4387       scalar_dest = gimple_assign_lhs (
4388                         SLP_TREE_SCALAR_STMTS (slp_node)[group_size - 1]);
4389       group_size = 1;
4390     }
4391
4392   /* In SLP we may have several statements in NEW_PHIS and REDUCTION_PHIS (in
4393      case that GROUP_SIZE is greater than vectorization factor).  Therefore, we
4394      need to match SCALAR_RESULTS with corresponding statements.  The first
4395      (GROUP_SIZE / number of new vector stmts) scalar results correspond to
4396      the first vector stmt, etc.
4397      (RATIO is equal to (GROUP_SIZE / number of new vector stmts)).  */
4398   if (group_size > new_phis.length ())
4399     {
4400       ratio = group_size / new_phis.length ();
4401       gcc_assert (!(group_size % new_phis.length ()));
4402     }
4403   else
4404     ratio = 1;
4405
4406   for (k = 0; k < group_size; k++)
4407     {
4408       if (k % ratio == 0)
4409         {
4410           epilog_stmt = new_phis[k / ratio];
4411           reduction_phi = reduction_phis[k / ratio];
4412           if (double_reduc)
4413             inner_phi = inner_phis[k / ratio];
4414         }
4415
4416       if (slp_reduc)
4417         {
4418           gimple current_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[k];
4419
4420           orig_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (current_stmt));
4421           /* SLP statements can't participate in patterns.  */
4422           gcc_assert (!orig_stmt);
4423           scalar_dest = gimple_assign_lhs (current_stmt);
4424         }
4425
4426       phis.create (3);
4427       /* Find the loop-closed-use at the loop exit of the original scalar
4428          result.  (The reduction result is expected to have two immediate uses -
4429          one at the latch block, and one at the loop exit).  */
4430       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
4431         if (!flow_bb_inside_loop_p (loop, gimple_bb (USE_STMT (use_p)))
4432             && !is_gimple_debug (USE_STMT (use_p)))
4433           phis.safe_push (USE_STMT (use_p));
4434
4435       /* While we expect to have found an exit_phi because of loop-closed-ssa
4436          form we can end up without one if the scalar cycle is dead.  */
4437
4438       FOR_EACH_VEC_ELT (phis, i, exit_phi)
4439         {
4440           if (outer_loop)
4441             {
4442               stmt_vec_info exit_phi_vinfo = vinfo_for_stmt (exit_phi);
4443               gimple vect_phi;
4444
4445               /* FORNOW. Currently not supporting the case that an inner-loop
4446                  reduction is not used in the outer-loop (but only outside the
4447                  outer-loop), unless it is double reduction.  */
4448               gcc_assert ((STMT_VINFO_RELEVANT_P (exit_phi_vinfo)
4449                            && !STMT_VINFO_LIVE_P (exit_phi_vinfo))
4450                           || double_reduc);
4451
4452               STMT_VINFO_VEC_STMT (exit_phi_vinfo) = epilog_stmt;
4453               if (!double_reduc
4454                   || STMT_VINFO_DEF_TYPE (exit_phi_vinfo)
4455                       != vect_double_reduction_def)
4456                 continue;
4457
4458               /* Handle double reduction:
4459
4460                  stmt1: s1 = phi <s0, s2>  - double reduction phi (outer loop)
4461                  stmt2:   s3 = phi <s1, s4> - (regular) reduc phi (inner loop)
4462                  stmt3:   s4 = use (s3)     - (regular) reduc stmt (inner loop)
4463                  stmt4: s2 = phi <s4>      - double reduction stmt (outer loop)
4464
4465                  At that point the regular reduction (stmt2 and stmt3) is
4466                  already vectorized, as well as the exit phi node, stmt4.
4467                  Here we vectorize the phi node of double reduction, stmt1, and
4468                  update all relevant statements.  */
4469
4470               /* Go through all the uses of s2 to find double reduction phi
4471                  node, i.e., stmt1 above.  */
4472               orig_name = PHI_RESULT (exit_phi);
4473               FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, orig_name)
4474                 {
4475                   stmt_vec_info use_stmt_vinfo;
4476                   stmt_vec_info new_phi_vinfo;
4477                   tree vect_phi_init, preheader_arg, vect_phi_res, init_def;
4478                   basic_block bb = gimple_bb (use_stmt);
4479                   gimple use;
4480
4481                   /* Check that USE_STMT is really double reduction phi
4482                      node.  */
4483                   if (gimple_code (use_stmt) != GIMPLE_PHI
4484                       || gimple_phi_num_args (use_stmt) != 2
4485                       || bb->loop_father != outer_loop)
4486                     continue;
4487                   use_stmt_vinfo = vinfo_for_stmt (use_stmt);
4488                   if (!use_stmt_vinfo
4489                       || STMT_VINFO_DEF_TYPE (use_stmt_vinfo)
4490                           != vect_double_reduction_def)
4491                     continue;
4492
4493                   /* Create vector phi node for double reduction:
4494                      vs1 = phi <vs0, vs2>
4495                      vs1 was created previously in this function by a call to
4496                        vect_get_vec_def_for_operand and is stored in
4497                        vec_initial_def;
4498                      vs2 is defined by INNER_PHI, the vectorized EXIT_PHI;
4499                      vs0 is created here.  */
4500
4501                   /* Create vector phi node.  */
4502                   vect_phi = create_phi_node (vec_initial_def, bb);
4503                   new_phi_vinfo = new_stmt_vec_info (vect_phi,
4504                                     loop_vec_info_for_loop (outer_loop), NULL);
4505                   set_vinfo_for_stmt (vect_phi, new_phi_vinfo);
4506
4507                   /* Create vs0 - initial def of the double reduction phi.  */
4508                   preheader_arg = PHI_ARG_DEF_FROM_EDGE (use_stmt,
4509                                              loop_preheader_edge (outer_loop));
4510                   init_def = get_initial_def_for_reduction (stmt,
4511                                                           preheader_arg, NULL);
4512                   vect_phi_init = vect_init_vector (use_stmt, init_def,
4513                                                     vectype, NULL);
4514
4515                   /* Update phi node arguments with vs0 and vs2.  */
4516                   add_phi_arg (vect_phi, vect_phi_init,
4517                                loop_preheader_edge (outer_loop),
4518                                UNKNOWN_LOCATION);
4519                   add_phi_arg (vect_phi, PHI_RESULT (inner_phi),
4520                                loop_latch_edge (outer_loop), UNKNOWN_LOCATION);
4521                   if (dump_enabled_p ())
4522                     {
4523                       dump_printf_loc (MSG_NOTE, vect_location,
4524                                        "created double reduction phi node: ");
4525                       dump_gimple_stmt (MSG_NOTE, TDF_SLIM, vect_phi, 0);
4526                       dump_printf (MSG_NOTE, "\n");
4527                     }
4528
4529                   vect_phi_res = PHI_RESULT (vect_phi);
4530
4531                   /* Replace the use, i.e., set the correct vs1 in the regular
4532                      reduction phi node.  FORNOW, NCOPIES is always 1, so the
4533                      loop is redundant.  */
4534                   use = reduction_phi;
4535                   for (j = 0; j < ncopies; j++)
4536                     {
4537                       edge pr_edge = loop_preheader_edge (loop);
4538                       SET_PHI_ARG_DEF (use, pr_edge->dest_idx, vect_phi_res);
4539                       use = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (use));
4540                     }
4541                 }
4542             }
4543         }
4544
4545       phis.release ();
4546       if (nested_in_vect_loop)
4547         {
4548           if (double_reduc)
4549             loop = outer_loop;
4550           else
4551             continue;
4552         }
4553
4554       phis.create (3);
4555       /* Find the loop-closed-use at the loop exit of the original scalar
4556          result.  (The reduction result is expected to have two immediate uses,
4557          one at the latch block, and one at the loop exit).  For double
4558          reductions we are looking for exit phis of the outer loop.  */
4559       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
4560         {
4561           if (!flow_bb_inside_loop_p (loop, gimple_bb (USE_STMT (use_p))))
4562             {
4563               if (!is_gimple_debug (USE_STMT (use_p)))
4564                 phis.safe_push (USE_STMT (use_p));
4565             }
4566           else
4567             {
4568               if (double_reduc && gimple_code (USE_STMT (use_p)) == GIMPLE_PHI)
4569                 {
4570                   tree phi_res = PHI_RESULT (USE_STMT (use_p));
4571
4572                   FOR_EACH_IMM_USE_FAST (phi_use_p, phi_imm_iter, phi_res)
4573                     {
4574                       if (!flow_bb_inside_loop_p (loop,
4575                                              gimple_bb (USE_STMT (phi_use_p)))
4576                           && !is_gimple_debug (USE_STMT (phi_use_p)))
4577                         phis.safe_push (USE_STMT (phi_use_p));
4578                     }
4579                 }
4580             }
4581         }
4582
4583       FOR_EACH_VEC_ELT (phis, i, exit_phi)
4584         {
4585           /* Replace the uses:  */
4586           orig_name = PHI_RESULT (exit_phi);
4587           scalar_result = scalar_results[k];
4588           FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, orig_name)
4589             FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
4590               SET_USE (use_p, scalar_result);
4591         }
4592
4593       phis.release ();
4594     }
4595
4596   scalar_results.release ();
4597   inner_phis.release ();
4598   new_phis.release ();
4599 }
4600
4601
4602 /* Function vectorizable_reduction.
4603
4604    Check if STMT performs a reduction operation that can be vectorized.
4605    If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4606    stmt to replace it, put it in VEC_STMT, and insert it at GSI.
4607    Return FALSE if not a vectorizable STMT, TRUE otherwise.
4608
4609    This function also handles reduction idioms (patterns) that have been
4610    recognized in advance during vect_pattern_recog.  In this case, STMT may be
4611    of this form:
4612      X = pattern_expr (arg0, arg1, ..., X)
4613    and it's STMT_VINFO_RELATED_STMT points to the last stmt in the original
4614    sequence that had been detected and replaced by the pattern-stmt (STMT).
4615
4616    In some cases of reduction patterns, the type of the reduction variable X is
4617    different than the type of the other arguments of STMT.
4618    In such cases, the vectype that is used when transforming STMT into a vector
4619    stmt is different than the vectype that is used to determine the
4620    vectorization factor, because it consists of a different number of elements
4621    than the actual number of elements that are being operated upon in parallel.
4622
4623    For example, consider an accumulation of shorts into an int accumulator.
4624    On some targets it's possible to vectorize this pattern operating on 8
4625    shorts at a time (hence, the vectype for purposes of determining the
4626    vectorization factor should be V8HI); on the other hand, the vectype that
4627    is used to create the vector form is actually V4SI (the type of the result).
4628
4629    Upon entry to this function, STMT_VINFO_VECTYPE records the vectype that
4630    indicates what is the actual level of parallelism (V8HI in the example), so
4631    that the right vectorization factor would be derived.  This vectype
4632    corresponds to the type of arguments to the reduction stmt, and should *NOT*
4633    be used to create the vectorized stmt.  The right vectype for the vectorized
4634    stmt is obtained from the type of the result X:
4635         get_vectype_for_scalar_type (TREE_TYPE (X))
4636
4637    This means that, contrary to "regular" reductions (or "regular" stmts in
4638    general), the following equation:
4639       STMT_VINFO_VECTYPE == get_vectype_for_scalar_type (TREE_TYPE (X))
4640    does *NOT* necessarily hold for reduction patterns.  */
4641
4642 bool
4643 vectorizable_reduction (gimple stmt, gimple_stmt_iterator *gsi,
4644                         gimple *vec_stmt, slp_tree slp_node)
4645 {
4646   tree vec_dest;
4647   tree scalar_dest;
4648   tree loop_vec_def0 = NULL_TREE, loop_vec_def1 = NULL_TREE;
4649   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4650   tree vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4651   tree vectype_in = NULL_TREE;
4652   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4653   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4654   enum tree_code code, orig_code, epilog_reduc_code;
4655   enum machine_mode vec_mode;
4656   int op_type;
4657   optab optab, reduc_optab;
4658   tree new_temp = NULL_TREE;
4659   tree def;
4660   gimple def_stmt;
4661   enum vect_def_type dt;
4662   gimple new_phi = NULL;
4663   tree scalar_type;
4664   bool is_simple_use;
4665   gimple orig_stmt;
4666   stmt_vec_info orig_stmt_info;
4667   tree expr = NULL_TREE;
4668   int i;
4669   int ncopies;
4670   int epilog_copies;
4671   stmt_vec_info prev_stmt_info, prev_phi_info;
4672   bool single_defuse_cycle = false;
4673   tree reduc_def = NULL_TREE;
4674   gimple new_stmt = NULL;
4675   int j;
4676   tree ops[3];
4677   bool nested_cycle = false, found_nested_cycle_def = false;
4678   gimple reduc_def_stmt = NULL;
4679   /* The default is that the reduction variable is the last in statement.  */
4680   int reduc_index = 2;
4681   bool double_reduc = false, dummy;
4682   basic_block def_bb;
4683   struct loop * def_stmt_loop, *outer_loop = NULL;
4684   tree def_arg;
4685   gimple def_arg_stmt;
4686   vec<tree> vec_oprnds0 = vNULL;
4687   vec<tree> vec_oprnds1 = vNULL;
4688   vec<tree> vect_defs = vNULL;
4689   vec<gimple> phis = vNULL;
4690   int vec_num;
4691   tree def0, def1, tem, op0, op1 = NULL_TREE;
4692
4693   /* In case of reduction chain we switch to the first stmt in the chain, but
4694      we don't update STMT_INFO, since only the last stmt is marked as reduction
4695      and has reduction properties.  */
4696   if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
4697     stmt = GROUP_FIRST_ELEMENT (stmt_info);
4698
4699   if (nested_in_vect_loop_p (loop, stmt))
4700     {
4701       outer_loop = loop;
4702       loop = loop->inner;
4703       nested_cycle = true;
4704     }
4705
4706   /* 1. Is vectorizable reduction?  */
4707   /* Not supportable if the reduction variable is used in the loop, unless
4708      it's a reduction chain.  */
4709   if (STMT_VINFO_RELEVANT (stmt_info) > vect_used_in_outer
4710       && !GROUP_FIRST_ELEMENT (stmt_info))
4711     return false;
4712
4713   /* Reductions that are not used even in an enclosing outer-loop,
4714      are expected to be "live" (used out of the loop).  */
4715   if (STMT_VINFO_RELEVANT (stmt_info) == vect_unused_in_scope
4716       && !STMT_VINFO_LIVE_P (stmt_info))
4717     return false;
4718
4719   /* Make sure it was already recognized as a reduction computation.  */
4720   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_reduction_def
4721       && STMT_VINFO_DEF_TYPE (stmt_info) != vect_nested_cycle)
4722     return false;
4723
4724   /* 2. Has this been recognized as a reduction pattern?
4725
4726      Check if STMT represents a pattern that has been recognized
4727      in earlier analysis stages.  For stmts that represent a pattern,
4728      the STMT_VINFO_RELATED_STMT field records the last stmt in
4729      the original sequence that constitutes the pattern.  */
4730
4731   orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
4732   if (orig_stmt)
4733     {
4734       orig_stmt_info = vinfo_for_stmt (orig_stmt);
4735       gcc_assert (STMT_VINFO_IN_PATTERN_P (orig_stmt_info));
4736       gcc_assert (!STMT_VINFO_IN_PATTERN_P (stmt_info));
4737     }
4738
4739   /* 3. Check the operands of the operation.  The first operands are defined
4740         inside the loop body. The last operand is the reduction variable,
4741         which is defined by the loop-header-phi.  */
4742
4743   gcc_assert (is_gimple_assign (stmt));
4744
4745   /* Flatten RHS.  */
4746   switch (get_gimple_rhs_class (gimple_assign_rhs_code (stmt)))
4747     {
4748     case GIMPLE_SINGLE_RHS:
4749       op_type = TREE_OPERAND_LENGTH (gimple_assign_rhs1 (stmt));
4750       if (op_type == ternary_op)
4751         {
4752           tree rhs = gimple_assign_rhs1 (stmt);
4753           ops[0] = TREE_OPERAND (rhs, 0);
4754           ops[1] = TREE_OPERAND (rhs, 1);
4755           ops[2] = TREE_OPERAND (rhs, 2);
4756           code = TREE_CODE (rhs);
4757         }
4758       else
4759         return false;
4760       break;
4761
4762     case GIMPLE_BINARY_RHS:
4763       code = gimple_assign_rhs_code (stmt);
4764       op_type = TREE_CODE_LENGTH (code);
4765       gcc_assert (op_type == binary_op);
4766       ops[0] = gimple_assign_rhs1 (stmt);
4767       ops[1] = gimple_assign_rhs2 (stmt);
4768       break;
4769
4770     case GIMPLE_TERNARY_RHS:
4771       code = gimple_assign_rhs_code (stmt);
4772       op_type = TREE_CODE_LENGTH (code);
4773       gcc_assert (op_type == ternary_op);
4774       ops[0] = gimple_assign_rhs1 (stmt);
4775       ops[1] = gimple_assign_rhs2 (stmt);
4776       ops[2] = gimple_assign_rhs3 (stmt);
4777       break;
4778
4779     case GIMPLE_UNARY_RHS:
4780       return false;
4781
4782     default:
4783       gcc_unreachable ();
4784     }
4785
4786   if (code == COND_EXPR && slp_node)
4787     return false;
4788
4789   scalar_dest = gimple_assign_lhs (stmt);
4790   scalar_type = TREE_TYPE (scalar_dest);
4791   if (!POINTER_TYPE_P (scalar_type) && !INTEGRAL_TYPE_P (scalar_type)
4792       && !SCALAR_FLOAT_TYPE_P (scalar_type))
4793     return false;
4794
4795   /* Do not try to vectorize bit-precision reductions.  */
4796   if ((TYPE_PRECISION (scalar_type)
4797        != GET_MODE_PRECISION (TYPE_MODE (scalar_type))))
4798     return false;
4799
4800   /* All uses but the last are expected to be defined in the loop.
4801      The last use is the reduction variable.  In case of nested cycle this
4802      assumption is not true: we use reduc_index to record the index of the
4803      reduction variable.  */
4804   for (i = 0; i < op_type - 1; i++)
4805     {
4806       /* The condition of COND_EXPR is checked in vectorizable_condition().  */
4807       if (i == 0 && code == COND_EXPR)
4808         continue;
4809
4810       is_simple_use = vect_is_simple_use_1 (ops[i], stmt, loop_vinfo, NULL,
4811                                             &def_stmt, &def, &dt, &tem);
4812       if (!vectype_in)
4813         vectype_in = tem;
4814       gcc_assert (is_simple_use);
4815
4816       if (dt != vect_internal_def
4817           && dt != vect_external_def
4818           && dt != vect_constant_def
4819           && dt != vect_induction_def
4820           && !(dt == vect_nested_cycle && nested_cycle))
4821         return false;
4822
4823       if (dt == vect_nested_cycle)
4824         {
4825           found_nested_cycle_def = true;
4826           reduc_def_stmt = def_stmt;
4827           reduc_index = i;
4828         }
4829     }
4830
4831   is_simple_use = vect_is_simple_use_1 (ops[i], stmt, loop_vinfo, NULL,
4832                                         &def_stmt, &def, &dt, &tem);
4833   if (!vectype_in)
4834     vectype_in = tem;
4835   gcc_assert (is_simple_use);
4836   if (!(dt == vect_reduction_def
4837         || dt == vect_nested_cycle
4838         || ((dt == vect_internal_def || dt == vect_external_def
4839              || dt == vect_constant_def || dt == vect_induction_def)
4840             && nested_cycle && found_nested_cycle_def)))
4841     {
4842       /* For pattern recognized stmts, orig_stmt might be a reduction,
4843          but some helper statements for the pattern might not, or
4844          might be COND_EXPRs with reduction uses in the condition.  */
4845       gcc_assert (orig_stmt);
4846       return false;
4847     }
4848   if (!found_nested_cycle_def)
4849     reduc_def_stmt = def_stmt;
4850
4851   gcc_assert (gimple_code (reduc_def_stmt) == GIMPLE_PHI);
4852   if (orig_stmt)
4853     gcc_assert (orig_stmt == vect_is_simple_reduction (loop_vinfo,
4854                                                        reduc_def_stmt,
4855                                                        !nested_cycle,
4856                                                        &dummy));
4857   else
4858     {
4859       gimple tmp = vect_is_simple_reduction (loop_vinfo, reduc_def_stmt,
4860                                              !nested_cycle, &dummy);
4861       /* We changed STMT to be the first stmt in reduction chain, hence we
4862          check that in this case the first element in the chain is STMT.  */
4863       gcc_assert (stmt == tmp
4864                   || GROUP_FIRST_ELEMENT (vinfo_for_stmt (tmp)) == stmt);
4865     }
4866
4867   if (STMT_VINFO_LIVE_P (vinfo_for_stmt (reduc_def_stmt)))
4868     return false;
4869
4870   if (slp_node || PURE_SLP_STMT (stmt_info))
4871     ncopies = 1;
4872   else
4873     ncopies = (LOOP_VINFO_VECT_FACTOR (loop_vinfo)
4874                / TYPE_VECTOR_SUBPARTS (vectype_in));
4875
4876   gcc_assert (ncopies >= 1);
4877
4878   vec_mode = TYPE_MODE (vectype_in);
4879
4880   if (code == COND_EXPR)
4881     {
4882       if (!vectorizable_condition (stmt, gsi, NULL, ops[reduc_index], 0, NULL))
4883         {
4884           if (dump_enabled_p ())
4885             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4886                              "unsupported condition in reduction\n");
4887
4888             return false;
4889         }
4890     }
4891   else
4892     {
4893       /* 4. Supportable by target?  */
4894
4895       if (code == LSHIFT_EXPR || code == RSHIFT_EXPR
4896           || code == LROTATE_EXPR || code == RROTATE_EXPR)
4897         {
4898           /* Shifts and rotates are only supported by vectorizable_shifts,
4899              not vectorizable_reduction.  */
4900           if (dump_enabled_p ())
4901             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4902                              "unsupported shift or rotation.\n");
4903           return false;
4904         }
4905
4906       /* 4.1. check support for the operation in the loop  */
4907       optab = optab_for_tree_code (code, vectype_in, optab_default);
4908       if (!optab)
4909         {
4910           if (dump_enabled_p ())
4911             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4912                              "no optab.\n");
4913
4914           return false;
4915         }
4916
4917       if (optab_handler (optab, vec_mode) == CODE_FOR_nothing)
4918         {
4919           if (dump_enabled_p ())
4920             dump_printf (MSG_NOTE, "op not supported by target.\n");
4921
4922           if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
4923               || LOOP_VINFO_VECT_FACTOR (loop_vinfo)
4924                   < vect_min_worthwhile_factor (code))
4925             return false;
4926
4927           if (dump_enabled_p ())
4928             dump_printf (MSG_NOTE, "proceeding using word mode.\n");
4929         }
4930
4931       /* Worthwhile without SIMD support?  */
4932       if (!VECTOR_MODE_P (TYPE_MODE (vectype_in))
4933           && LOOP_VINFO_VECT_FACTOR (loop_vinfo)
4934              < vect_min_worthwhile_factor (code))
4935         {
4936           if (dump_enabled_p ())
4937             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4938                              "not worthwhile without SIMD support.\n");
4939
4940           return false;
4941         }
4942     }
4943
4944   /* 4.2. Check support for the epilog operation.
4945
4946           If STMT represents a reduction pattern, then the type of the
4947           reduction variable may be different than the type of the rest
4948           of the arguments.  For example, consider the case of accumulation
4949           of shorts into an int accumulator; The original code:
4950                         S1: int_a = (int) short_a;
4951           orig_stmt->   S2: int_acc = plus <int_a ,int_acc>;
4952
4953           was replaced with:
4954                         STMT: int_acc = widen_sum <short_a, int_acc>
4955
4956           This means that:
4957           1. The tree-code that is used to create the vector operation in the
4958              epilog code (that reduces the partial results) is not the
4959              tree-code of STMT, but is rather the tree-code of the original
4960              stmt from the pattern that STMT is replacing.  I.e, in the example
4961              above we want to use 'widen_sum' in the loop, but 'plus' in the
4962              epilog.
4963           2. The type (mode) we use to check available target support
4964              for the vector operation to be created in the *epilog*, is
4965              determined by the type of the reduction variable (in the example
4966              above we'd check this: optab_handler (plus_optab, vect_int_mode])).
4967              However the type (mode) we use to check available target support
4968              for the vector operation to be created *inside the loop*, is
4969              determined by the type of the other arguments to STMT (in the
4970              example we'd check this: optab_handler (widen_sum_optab,
4971              vect_short_mode)).
4972
4973           This is contrary to "regular" reductions, in which the types of all
4974           the arguments are the same as the type of the reduction variable.
4975           For "regular" reductions we can therefore use the same vector type
4976           (and also the same tree-code) when generating the epilog code and
4977           when generating the code inside the loop.  */
4978
4979   if (orig_stmt)
4980     {
4981       /* This is a reduction pattern: get the vectype from the type of the
4982          reduction variable, and get the tree-code from orig_stmt.  */
4983       orig_code = gimple_assign_rhs_code (orig_stmt);
4984       gcc_assert (vectype_out);
4985       vec_mode = TYPE_MODE (vectype_out);
4986     }
4987   else
4988     {
4989       /* Regular reduction: use the same vectype and tree-code as used for
4990          the vector code inside the loop can be used for the epilog code. */
4991       orig_code = code;
4992     }
4993
4994   if (nested_cycle)
4995     {
4996       def_bb = gimple_bb (reduc_def_stmt);
4997       def_stmt_loop = def_bb->loop_father;
4998       def_arg = PHI_ARG_DEF_FROM_EDGE (reduc_def_stmt,
4999                                        loop_preheader_edge (def_stmt_loop));
5000       if (TREE_CODE (def_arg) == SSA_NAME
5001           && (def_arg_stmt = SSA_NAME_DEF_STMT (def_arg))
5002           && gimple_code (def_arg_stmt) == GIMPLE_PHI
5003           && flow_bb_inside_loop_p (outer_loop, gimple_bb (def_arg_stmt))
5004           && vinfo_for_stmt (def_arg_stmt)
5005           && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_arg_stmt))
5006               == vect_double_reduction_def)
5007         double_reduc = true;
5008     }
5009
5010   epilog_reduc_code = ERROR_MARK;
5011   if (reduction_code_for_scalar_code (orig_code, &epilog_reduc_code))
5012     {
5013       reduc_optab = optab_for_tree_code (epilog_reduc_code, vectype_out,
5014                                          optab_default);
5015       if (!reduc_optab)
5016         {
5017           if (dump_enabled_p ())
5018             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5019                              "no optab for reduction.\n");
5020
5021           epilog_reduc_code = ERROR_MARK;
5022         }
5023
5024       if (reduc_optab
5025           && optab_handler (reduc_optab, vec_mode) == CODE_FOR_nothing)
5026         {
5027           if (dump_enabled_p ())
5028             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5029                              "reduc op not supported by target.\n");
5030
5031           epilog_reduc_code = ERROR_MARK;
5032         }
5033     }
5034   else
5035     {
5036       if (!nested_cycle || double_reduc)
5037         {
5038           if (dump_enabled_p ())
5039             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5040                              "no reduc code for scalar code.\n");
5041
5042           return false;
5043         }
5044     }
5045
5046   if (double_reduc && ncopies > 1)
5047     {
5048       if (dump_enabled_p ())
5049         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5050                          "multiple types in double reduction\n");
5051
5052       return false;
5053     }
5054
5055   /* In case of widenning multiplication by a constant, we update the type
5056      of the constant to be the type of the other operand.  We check that the
5057      constant fits the type in the pattern recognition pass.  */
5058   if (code == DOT_PROD_EXPR
5059       && !types_compatible_p (TREE_TYPE (ops[0]), TREE_TYPE (ops[1])))
5060     {
5061       if (TREE_CODE (ops[0]) == INTEGER_CST)
5062         ops[0] = fold_convert (TREE_TYPE (ops[1]), ops[0]);
5063       else if (TREE_CODE (ops[1]) == INTEGER_CST)
5064         ops[1] = fold_convert (TREE_TYPE (ops[0]), ops[1]);
5065       else
5066         {
5067           if (dump_enabled_p ())
5068             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5069                              "invalid types in dot-prod\n");
5070
5071           return false;
5072         }
5073     }
5074
5075   if (!vec_stmt) /* transformation not required.  */
5076     {
5077       if (!vect_model_reduction_cost (stmt_info, epilog_reduc_code, ncopies))
5078         return false;
5079       STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
5080       return true;
5081     }
5082
5083   /** Transform.  **/
5084
5085   if (dump_enabled_p ())
5086     dump_printf_loc (MSG_NOTE, vect_location, "transform reduction.\n");
5087
5088   /* FORNOW: Multiple types are not supported for condition.  */
5089   if (code == COND_EXPR)
5090     gcc_assert (ncopies == 1);
5091
5092   /* Create the destination vector  */
5093   vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
5094
5095   /* In case the vectorization factor (VF) is bigger than the number
5096      of elements that we can fit in a vectype (nunits), we have to generate
5097      more than one vector stmt - i.e - we need to "unroll" the
5098      vector stmt by a factor VF/nunits.  For more details see documentation
5099      in vectorizable_operation.  */
5100
5101   /* If the reduction is used in an outer loop we need to generate
5102      VF intermediate results, like so (e.g. for ncopies=2):
5103         r0 = phi (init, r0)
5104         r1 = phi (init, r1)
5105         r0 = x0 + r0;
5106         r1 = x1 + r1;
5107     (i.e. we generate VF results in 2 registers).
5108     In this case we have a separate def-use cycle for each copy, and therefore
5109     for each copy we get the vector def for the reduction variable from the
5110     respective phi node created for this copy.
5111
5112     Otherwise (the reduction is unused in the loop nest), we can combine
5113     together intermediate results, like so (e.g. for ncopies=2):
5114         r = phi (init, r)
5115         r = x0 + r;
5116         r = x1 + r;
5117    (i.e. we generate VF/2 results in a single register).
5118    In this case for each copy we get the vector def for the reduction variable
5119    from the vectorized reduction operation generated in the previous iteration.
5120   */
5121
5122   if (STMT_VINFO_RELEVANT (stmt_info) == vect_unused_in_scope)
5123     {
5124       single_defuse_cycle = true;
5125       epilog_copies = 1;
5126     }
5127   else
5128     epilog_copies = ncopies;
5129
5130   prev_stmt_info = NULL;
5131   prev_phi_info = NULL;
5132   if (slp_node)
5133     {
5134       vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
5135       gcc_assert (TYPE_VECTOR_SUBPARTS (vectype_out)
5136                   == TYPE_VECTOR_SUBPARTS (vectype_in));
5137     }
5138   else
5139     {
5140       vec_num = 1;
5141       vec_oprnds0.create (1);
5142       if (op_type == ternary_op)
5143         vec_oprnds1.create (1);
5144     }
5145
5146   phis.create (vec_num);
5147   vect_defs.create (vec_num);
5148   if (!slp_node)
5149     vect_defs.quick_push (NULL_TREE);
5150
5151   for (j = 0; j < ncopies; j++)
5152     {
5153       if (j == 0 || !single_defuse_cycle)
5154         {
5155           for (i = 0; i < vec_num; i++)
5156             {
5157               /* Create the reduction-phi that defines the reduction
5158                  operand.  */
5159               new_phi = create_phi_node (vec_dest, loop->header);
5160               set_vinfo_for_stmt (new_phi,
5161                                   new_stmt_vec_info (new_phi, loop_vinfo,
5162                                                      NULL));
5163                if (j == 0 || slp_node)
5164                  phis.quick_push (new_phi);
5165             }
5166         }
5167
5168       if (code == COND_EXPR)
5169         {
5170           gcc_assert (!slp_node);
5171           vectorizable_condition (stmt, gsi, vec_stmt,
5172                                   PHI_RESULT (phis[0]),
5173                                   reduc_index, NULL);
5174           /* Multiple types are not supported for condition.  */
5175           break;
5176         }
5177
5178       /* Handle uses.  */
5179       if (j == 0)
5180         {
5181           op0 = ops[!reduc_index];
5182           if (op_type == ternary_op)
5183             {
5184               if (reduc_index == 0)
5185                 op1 = ops[2];
5186               else
5187                 op1 = ops[1];
5188             }
5189
5190           if (slp_node)
5191             vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
5192                                slp_node, -1);
5193           else
5194             {
5195               loop_vec_def0 = vect_get_vec_def_for_operand (ops[!reduc_index],
5196                                                             stmt, NULL);
5197               vec_oprnds0.quick_push (loop_vec_def0);
5198               if (op_type == ternary_op)
5199                {
5200                  loop_vec_def1 = vect_get_vec_def_for_operand (op1, stmt,
5201                                                                NULL);
5202                  vec_oprnds1.quick_push (loop_vec_def1);
5203                }
5204             }
5205         }
5206       else
5207         {
5208           if (!slp_node)
5209             {
5210               enum vect_def_type dt;
5211               gimple dummy_stmt;
5212               tree dummy;
5213
5214               vect_is_simple_use (ops[!reduc_index], stmt, loop_vinfo, NULL,
5215                                   &dummy_stmt, &dummy, &dt);
5216               loop_vec_def0 = vect_get_vec_def_for_stmt_copy (dt,
5217                                                               loop_vec_def0);
5218               vec_oprnds0[0] = loop_vec_def0;
5219               if (op_type == ternary_op)
5220                 {
5221                   vect_is_simple_use (op1, stmt, loop_vinfo, NULL, &dummy_stmt,
5222                                       &dummy, &dt);
5223                   loop_vec_def1 = vect_get_vec_def_for_stmt_copy (dt,
5224                                                                 loop_vec_def1);
5225                   vec_oprnds1[0] = loop_vec_def1;
5226                 }
5227             }
5228
5229           if (single_defuse_cycle)
5230             reduc_def = gimple_assign_lhs (new_stmt);
5231
5232           STMT_VINFO_RELATED_STMT (prev_phi_info) = new_phi;
5233         }
5234
5235       FOR_EACH_VEC_ELT (vec_oprnds0, i, def0)
5236         {
5237           if (slp_node)
5238             reduc_def = PHI_RESULT (phis[i]);
5239           else
5240             {
5241               if (!single_defuse_cycle || j == 0)
5242                 reduc_def = PHI_RESULT (new_phi);
5243             }
5244
5245           def1 = ((op_type == ternary_op)
5246                   ? vec_oprnds1[i] : NULL);
5247           if (op_type == binary_op)
5248             {
5249               if (reduc_index == 0)
5250                 expr = build2 (code, vectype_out, reduc_def, def0);
5251               else
5252                 expr = build2 (code, vectype_out, def0, reduc_def);
5253             }
5254           else
5255             {
5256               if (reduc_index == 0)
5257                 expr = build3 (code, vectype_out, reduc_def, def0, def1);
5258               else
5259                 {
5260                   if (reduc_index == 1)
5261                     expr = build3 (code, vectype_out, def0, reduc_def, def1);
5262                   else
5263                     expr = build3 (code, vectype_out, def0, def1, reduc_def);
5264                 }
5265             }
5266
5267           new_stmt = gimple_build_assign (vec_dest, expr);
5268           new_temp = make_ssa_name (vec_dest, new_stmt);
5269           gimple_assign_set_lhs (new_stmt, new_temp);
5270           vect_finish_stmt_generation (stmt, new_stmt, gsi);
5271
5272           if (slp_node)
5273             {
5274               SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5275               vect_defs.quick_push (new_temp);
5276             }
5277           else
5278             vect_defs[0] = new_temp;
5279         }
5280
5281       if (slp_node)
5282         continue;
5283
5284       if (j == 0)
5285         STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5286       else
5287         STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5288
5289       prev_stmt_info = vinfo_for_stmt (new_stmt);
5290       prev_phi_info = vinfo_for_stmt (new_phi);
5291     }
5292
5293   /* Finalize the reduction-phi (set its arguments) and create the
5294      epilog reduction code.  */
5295   if ((!single_defuse_cycle || code == COND_EXPR) && !slp_node)
5296     {
5297       new_temp = gimple_assign_lhs (*vec_stmt);
5298       vect_defs[0] = new_temp;
5299     }
5300
5301   vect_create_epilog_for_reduction (vect_defs, stmt, epilog_copies,
5302                                     epilog_reduc_code, phis, reduc_index,
5303                                     double_reduc, slp_node);
5304
5305   phis.release ();
5306   vect_defs.release ();
5307   vec_oprnds0.release ();
5308   vec_oprnds1.release ();
5309
5310   return true;
5311 }
5312
5313 /* Function vect_min_worthwhile_factor.
5314
5315    For a loop where we could vectorize the operation indicated by CODE,
5316    return the minimum vectorization factor that makes it worthwhile
5317    to use generic vectors.  */
5318 int
5319 vect_min_worthwhile_factor (enum tree_code code)
5320 {
5321   switch (code)
5322     {
5323     case PLUS_EXPR:
5324     case MINUS_EXPR:
5325     case NEGATE_EXPR:
5326       return 4;
5327
5328     case BIT_AND_EXPR:
5329     case BIT_IOR_EXPR:
5330     case BIT_XOR_EXPR:
5331     case BIT_NOT_EXPR:
5332       return 2;
5333
5334     default:
5335       return INT_MAX;
5336     }
5337 }
5338
5339
5340 /* Function vectorizable_induction
5341
5342    Check if PHI performs an induction computation that can be vectorized.
5343    If VEC_STMT is also passed, vectorize the induction PHI: create a vectorized
5344    phi to replace it, put it in VEC_STMT, and add it to the same basic block.
5345    Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
5346
5347 bool
5348 vectorizable_induction (gimple phi, gimple_stmt_iterator *gsi ATTRIBUTE_UNUSED,
5349                         gimple *vec_stmt)
5350 {
5351   stmt_vec_info stmt_info = vinfo_for_stmt (phi);
5352   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5353   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5354   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5355   int nunits = TYPE_VECTOR_SUBPARTS (vectype);
5356   int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
5357   tree vec_def;
5358
5359   gcc_assert (ncopies >= 1);
5360   /* FORNOW. These restrictions should be relaxed.  */
5361   if (nested_in_vect_loop_p (loop, phi))
5362     {
5363       imm_use_iterator imm_iter;
5364       use_operand_p use_p;
5365       gimple exit_phi;
5366       edge latch_e;
5367       tree loop_arg;
5368
5369       if (ncopies > 1)
5370         {
5371           if (dump_enabled_p ())
5372             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5373                              "multiple types in nested loop.\n");
5374           return false;
5375         }
5376
5377       exit_phi = NULL;
5378       latch_e = loop_latch_edge (loop->inner);
5379       loop_arg = PHI_ARG_DEF_FROM_EDGE (phi, latch_e);
5380       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, loop_arg)
5381         {
5382           if (!flow_bb_inside_loop_p (loop->inner,
5383                                       gimple_bb (USE_STMT (use_p))))
5384             {
5385               exit_phi = USE_STMT (use_p);
5386               break;
5387             }
5388         }
5389       if (exit_phi)
5390         {
5391           stmt_vec_info exit_phi_vinfo  = vinfo_for_stmt (exit_phi);
5392           if (!(STMT_VINFO_RELEVANT_P (exit_phi_vinfo)
5393                 && !STMT_VINFO_LIVE_P (exit_phi_vinfo)))
5394             {
5395               if (dump_enabled_p ())
5396                 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5397                                  "inner-loop induction only used outside "
5398                                  "of the outer vectorized loop.\n");
5399               return false;
5400             }
5401         }
5402     }
5403
5404   if (!STMT_VINFO_RELEVANT_P (stmt_info))
5405     return false;
5406
5407   /* FORNOW: SLP not supported.  */
5408   if (STMT_SLP_TYPE (stmt_info))
5409     return false;
5410
5411   gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def);
5412
5413   if (gimple_code (phi) != GIMPLE_PHI)
5414     return false;
5415
5416   if (!vec_stmt) /* transformation not required.  */
5417     {
5418       STMT_VINFO_TYPE (stmt_info) = induc_vec_info_type;
5419       if (dump_enabled_p ())
5420         dump_printf_loc (MSG_NOTE, vect_location,
5421                          "=== vectorizable_induction ===\n");
5422       vect_model_induction_cost (stmt_info, ncopies);
5423       return true;
5424     }
5425
5426   /** Transform.  **/
5427
5428   if (dump_enabled_p ())
5429     dump_printf_loc (MSG_NOTE, vect_location, "transform induction phi.\n");
5430
5431   vec_def = get_initial_def_for_induction (phi);
5432   *vec_stmt = SSA_NAME_DEF_STMT (vec_def);
5433   return true;
5434 }
5435
5436 /* Function vectorizable_live_operation.
5437
5438    STMT computes a value that is used outside the loop.  Check if
5439    it can be supported.  */
5440
5441 bool
5442 vectorizable_live_operation (gimple stmt,
5443                              gimple_stmt_iterator *gsi ATTRIBUTE_UNUSED,
5444                              gimple *vec_stmt)
5445 {
5446   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5447   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5448   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5449   int i;
5450   int op_type;
5451   tree op;
5452   tree def;
5453   gimple def_stmt;
5454   enum vect_def_type dt;
5455   enum tree_code code;
5456   enum gimple_rhs_class rhs_class;
5457
5458   gcc_assert (STMT_VINFO_LIVE_P (stmt_info));
5459
5460   if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def)
5461     return false;
5462
5463   if (!is_gimple_assign (stmt))
5464     {
5465       if (gimple_call_internal_p (stmt)
5466           && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE
5467           && gimple_call_lhs (stmt)
5468           && loop->simduid
5469           && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
5470           && loop->simduid
5471              == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
5472         {
5473           edge e = single_exit (loop);
5474           basic_block merge_bb = e->dest;
5475           imm_use_iterator imm_iter;
5476           use_operand_p use_p;
5477           tree lhs = gimple_call_lhs (stmt);
5478
5479           FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
5480             {
5481               gimple use_stmt = USE_STMT (use_p);
5482               if (gimple_code (use_stmt) == GIMPLE_PHI
5483                   || gimple_bb (use_stmt) == merge_bb)
5484                 {
5485                   if (vec_stmt)
5486                     {
5487                       tree vfm1
5488                         = build_int_cst (unsigned_type_node,
5489                                          loop_vinfo->vectorization_factor - 1);
5490                       SET_PHI_ARG_DEF (use_stmt, e->dest_idx, vfm1);
5491                     }
5492                   return true;
5493                 }
5494             }
5495         }
5496
5497       return false;
5498     }
5499
5500   if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
5501     return false;
5502
5503   /* FORNOW. CHECKME. */
5504   if (nested_in_vect_loop_p (loop, stmt))
5505     return false;
5506
5507   code = gimple_assign_rhs_code (stmt);
5508   op_type = TREE_CODE_LENGTH (code);
5509   rhs_class = get_gimple_rhs_class (code);
5510   gcc_assert (rhs_class != GIMPLE_UNARY_RHS || op_type == unary_op);
5511   gcc_assert (rhs_class != GIMPLE_BINARY_RHS || op_type == binary_op);
5512
5513   /* FORNOW: support only if all uses are invariant.  This means
5514      that the scalar operations can remain in place, unvectorized.
5515      The original last scalar value that they compute will be used.  */
5516
5517   for (i = 0; i < op_type; i++)
5518     {
5519       if (rhs_class == GIMPLE_SINGLE_RHS)
5520         op = TREE_OPERAND (gimple_op (stmt, 1), i);
5521       else
5522         op = gimple_op (stmt, i + 1);
5523       if (op
5524           && !vect_is_simple_use (op, stmt, loop_vinfo, NULL, &def_stmt, &def,
5525                                   &dt))
5526         {
5527           if (dump_enabled_p ())
5528             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5529                              "use not simple.\n");
5530           return false;
5531         }
5532
5533       if (dt != vect_external_def && dt != vect_constant_def)
5534         return false;
5535     }
5536
5537   /* No transformation is required for the cases we currently support.  */
5538   return true;
5539 }
5540
5541 /* Kill any debug uses outside LOOP of SSA names defined in STMT.  */
5542
5543 static void
5544 vect_loop_kill_debug_uses (struct loop *loop, gimple stmt)
5545 {
5546   ssa_op_iter op_iter;
5547   imm_use_iterator imm_iter;
5548   def_operand_p def_p;
5549   gimple ustmt;
5550
5551   FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
5552     {
5553       FOR_EACH_IMM_USE_STMT (ustmt, imm_iter, DEF_FROM_PTR (def_p))
5554         {
5555           basic_block bb;
5556
5557           if (!is_gimple_debug (ustmt))
5558             continue;
5559
5560           bb = gimple_bb (ustmt);
5561
5562           if (!flow_bb_inside_loop_p (loop, bb))
5563             {
5564               if (gimple_debug_bind_p (ustmt))
5565                 {
5566                   if (dump_enabled_p ())
5567                     dump_printf_loc (MSG_NOTE, vect_location,
5568                                      "killing debug use\n");
5569
5570                   gimple_debug_bind_reset_value (ustmt);
5571                   update_stmt (ustmt);
5572                 }
5573               else
5574                 gcc_unreachable ();
5575             }
5576         }
5577     }
5578 }
5579
5580 /* Function vect_transform_loop.
5581
5582    The analysis phase has determined that the loop is vectorizable.
5583    Vectorize the loop - created vectorized stmts to replace the scalar
5584    stmts in the loop, and update the loop exit condition.  */
5585
5586 void
5587 vect_transform_loop (loop_vec_info loop_vinfo)
5588 {
5589   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5590   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
5591   int nbbs = loop->num_nodes;
5592   gimple_stmt_iterator si;
5593   int i;
5594   tree ratio = NULL;
5595   int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
5596   bool grouped_store;
5597   bool slp_scheduled = false;
5598   unsigned int nunits;
5599   gimple stmt, pattern_stmt;
5600   gimple_seq pattern_def_seq = NULL;
5601   gimple_stmt_iterator pattern_def_si = gsi_none ();
5602   bool transform_pattern_stmt = false;
5603   bool check_profitability = false;
5604   int th;
5605   /* Record number of iterations before we started tampering with the profile. */
5606   gcov_type expected_iterations = expected_loop_iterations_unbounded (loop);
5607
5608   if (dump_enabled_p ())
5609     dump_printf_loc (MSG_NOTE, vect_location, "=== vec_transform_loop ===\n");
5610
5611   /* If profile is inprecise, we have chance to fix it up.  */
5612   if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
5613     expected_iterations = LOOP_VINFO_INT_NITERS (loop_vinfo);
5614
5615   /* Use the more conservative vectorization threshold.  If the number
5616      of iterations is constant assume the cost check has been performed
5617      by our caller.  If the threshold makes all loops profitable that
5618      run at least the vectorization factor number of times checking
5619      is pointless, too.  */
5620   th = ((PARAM_VALUE (PARAM_MIN_VECT_LOOP_BOUND)
5621          * LOOP_VINFO_VECT_FACTOR (loop_vinfo)) - 1);
5622   th = MAX (th, LOOP_VINFO_COST_MODEL_MIN_ITERS (loop_vinfo));
5623   if (th >= LOOP_VINFO_VECT_FACTOR (loop_vinfo) - 1
5624       && !LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
5625     {
5626       if (dump_enabled_p ())
5627         dump_printf_loc (MSG_NOTE, vect_location,
5628                          "Profitability threshold is %d loop iterations.\n",
5629                          th);
5630       check_profitability = true;
5631     }
5632
5633   /* Version the loop first, if required, so the profitability check
5634      comes first.  */
5635
5636   if (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo)
5637       || LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo))
5638     {
5639       vect_loop_versioning (loop_vinfo, th, check_profitability);
5640       check_profitability = false;
5641     }
5642
5643   /* Peel the loop if there are data refs with unknown alignment.
5644      Only one data ref with unknown store is allowed.  */
5645
5646   if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo))
5647     {
5648       vect_do_peeling_for_alignment (loop_vinfo, th, check_profitability);
5649       check_profitability = false;
5650     }
5651
5652   /* If the loop has a symbolic number of iterations 'n' (i.e. it's not a
5653      compile time constant), or it is a constant that doesn't divide by the
5654      vectorization factor, then an epilog loop needs to be created.
5655      We therefore duplicate the loop: the original loop will be vectorized,
5656      and will compute the first (n/VF) iterations.  The second copy of the loop
5657      will remain scalar and will compute the remaining (n%VF) iterations.
5658      (VF is the vectorization factor).  */
5659
5660   if ((int) tree_ctz (LOOP_VINFO_NITERS (loop_vinfo))
5661       < exact_log2 (vectorization_factor)
5662       || LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
5663     vect_do_peeling_for_loop_bound (loop_vinfo, &ratio,
5664                                     th, check_profitability);
5665   else if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
5666     ratio = build_int_cst (TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo)),
5667                 LOOP_VINFO_INT_NITERS (loop_vinfo) / vectorization_factor);
5668   else
5669     {
5670       tree ni_name, ratio_mult_vf;
5671       vect_generate_tmps_on_preheader (loop_vinfo, &ni_name, &ratio_mult_vf,
5672                                        &ratio, NULL);
5673     }
5674
5675   /* 1) Make sure the loop header has exactly two entries
5676      2) Make sure we have a preheader basic block.  */
5677
5678   gcc_assert (EDGE_COUNT (loop->header->preds) == 2);
5679
5680   split_edge (loop_preheader_edge (loop));
5681
5682   /* FORNOW: the vectorizer supports only loops which body consist
5683      of one basic block (header + empty latch). When the vectorizer will
5684      support more involved loop forms, the order by which the BBs are
5685      traversed need to be reconsidered.  */
5686
5687   for (i = 0; i < nbbs; i++)
5688     {
5689       basic_block bb = bbs[i];
5690       stmt_vec_info stmt_info;
5691       gimple phi;
5692
5693       for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
5694         {
5695           phi = gsi_stmt (si);
5696           if (dump_enabled_p ())
5697             {
5698               dump_printf_loc (MSG_NOTE, vect_location,
5699                                "------>vectorizing phi: ");
5700               dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
5701               dump_printf (MSG_NOTE, "\n");
5702             }
5703           stmt_info = vinfo_for_stmt (phi);
5704           if (!stmt_info)
5705             continue;
5706
5707           if (MAY_HAVE_DEBUG_STMTS && !STMT_VINFO_LIVE_P (stmt_info))
5708             vect_loop_kill_debug_uses (loop, phi);
5709
5710           if (!STMT_VINFO_RELEVANT_P (stmt_info)
5711               && !STMT_VINFO_LIVE_P (stmt_info))
5712             continue;
5713
5714           if ((TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info))
5715                 != (unsigned HOST_WIDE_INT) vectorization_factor)
5716               && dump_enabled_p ())
5717             dump_printf_loc (MSG_NOTE, vect_location, "multiple-types.\n");
5718
5719           if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def)
5720             {
5721               if (dump_enabled_p ())
5722                 dump_printf_loc (MSG_NOTE, vect_location, "transform phi.\n");
5723               vect_transform_stmt (phi, NULL, NULL, NULL, NULL);
5724             }
5725         }
5726
5727       pattern_stmt = NULL;
5728       for (si = gsi_start_bb (bb); !gsi_end_p (si) || transform_pattern_stmt;)
5729         {
5730           bool is_store;
5731
5732           if (transform_pattern_stmt)
5733             stmt = pattern_stmt;
5734           else
5735             {
5736               stmt = gsi_stmt (si);
5737               /* During vectorization remove existing clobber stmts.  */
5738               if (gimple_clobber_p (stmt))
5739                 {
5740                   unlink_stmt_vdef (stmt);
5741                   gsi_remove (&si, true);
5742                   release_defs (stmt);
5743                   continue;
5744                 }
5745             }
5746
5747           if (dump_enabled_p ())
5748             {
5749               dump_printf_loc (MSG_NOTE, vect_location,
5750                                "------>vectorizing statement: ");
5751               dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
5752               dump_printf (MSG_NOTE, "\n");
5753             }
5754
5755           stmt_info = vinfo_for_stmt (stmt);
5756
5757           /* vector stmts created in the outer-loop during vectorization of
5758              stmts in an inner-loop may not have a stmt_info, and do not
5759              need to be vectorized.  */
5760           if (!stmt_info)
5761             {
5762               gsi_next (&si);
5763               continue;
5764             }
5765
5766           if (MAY_HAVE_DEBUG_STMTS && !STMT_VINFO_LIVE_P (stmt_info))
5767             vect_loop_kill_debug_uses (loop, stmt);
5768
5769           if (!STMT_VINFO_RELEVANT_P (stmt_info)
5770               && !STMT_VINFO_LIVE_P (stmt_info))
5771             {
5772               if (STMT_VINFO_IN_PATTERN_P (stmt_info)
5773                   && (pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info))
5774                   && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
5775                       || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
5776                 {
5777                   stmt = pattern_stmt;
5778                   stmt_info = vinfo_for_stmt (stmt);
5779                 }
5780               else
5781                 {
5782                   gsi_next (&si);
5783                   continue;
5784                 }
5785             }
5786           else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
5787                    && (pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info))
5788                    && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
5789                        || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
5790             transform_pattern_stmt = true;
5791
5792           /* If pattern statement has def stmts, vectorize them too.  */
5793           if (is_pattern_stmt_p (stmt_info))
5794             {
5795               if (pattern_def_seq == NULL)
5796                 {
5797                   pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info);
5798                   pattern_def_si = gsi_start (pattern_def_seq);
5799                 }
5800               else if (!gsi_end_p (pattern_def_si))
5801                 gsi_next (&pattern_def_si);
5802               if (pattern_def_seq != NULL)
5803                 {
5804                   gimple pattern_def_stmt = NULL;
5805                   stmt_vec_info pattern_def_stmt_info = NULL;
5806
5807                   while (!gsi_end_p (pattern_def_si))
5808                     {
5809                       pattern_def_stmt = gsi_stmt (pattern_def_si);
5810                       pattern_def_stmt_info
5811                         = vinfo_for_stmt (pattern_def_stmt);
5812                       if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info)
5813                           || STMT_VINFO_LIVE_P (pattern_def_stmt_info))
5814                         break;
5815                       gsi_next (&pattern_def_si);
5816                     }
5817
5818                   if (!gsi_end_p (pattern_def_si))
5819                     {
5820                       if (dump_enabled_p ())
5821                         {
5822                           dump_printf_loc (MSG_NOTE, vect_location,
5823                                            "==> vectorizing pattern def "
5824                                            "stmt: ");
5825                           dump_gimple_stmt (MSG_NOTE, TDF_SLIM,
5826                                             pattern_def_stmt, 0);
5827                           dump_printf (MSG_NOTE, "\n");
5828                         }
5829
5830                       stmt = pattern_def_stmt;
5831                       stmt_info = pattern_def_stmt_info;
5832                     }
5833                   else
5834                     {
5835                       pattern_def_si = gsi_none ();
5836                       transform_pattern_stmt = false;
5837                     }
5838                 }
5839               else
5840                 transform_pattern_stmt = false;
5841             }
5842
5843           gcc_assert (STMT_VINFO_VECTYPE (stmt_info));
5844           nunits = (unsigned int) TYPE_VECTOR_SUBPARTS (
5845                                                STMT_VINFO_VECTYPE (stmt_info));
5846           if (!STMT_SLP_TYPE (stmt_info)
5847               && nunits != (unsigned int) vectorization_factor
5848               && dump_enabled_p ())
5849             /* For SLP VF is set according to unrolling factor, and not to
5850                vector size, hence for SLP this print is not valid.  */
5851             dump_printf_loc (MSG_NOTE, vect_location,
5852                              "multiple-types.\n");
5853
5854           /* SLP. Schedule all the SLP instances when the first SLP stmt is
5855              reached.  */
5856           if (STMT_SLP_TYPE (stmt_info))
5857             {
5858               if (!slp_scheduled)
5859                 {
5860                   slp_scheduled = true;
5861
5862                   if (dump_enabled_p ())
5863                     dump_printf_loc (MSG_NOTE, vect_location,
5864                                      "=== scheduling SLP instances ===\n");
5865
5866                   vect_schedule_slp (loop_vinfo, NULL);
5867                 }
5868
5869               /* Hybrid SLP stmts must be vectorized in addition to SLP.  */
5870               if (!vinfo_for_stmt (stmt) || PURE_SLP_STMT (stmt_info))
5871                 {
5872                   if (!transform_pattern_stmt && gsi_end_p (pattern_def_si))
5873                     {
5874                       pattern_def_seq = NULL;
5875                       gsi_next (&si);
5876                     }
5877                   continue;
5878                 }
5879             }
5880
5881           /* -------- vectorize statement ------------ */
5882           if (dump_enabled_p ())
5883             dump_printf_loc (MSG_NOTE, vect_location, "transform statement.\n");
5884
5885           grouped_store = false;
5886           is_store = vect_transform_stmt (stmt, &si, &grouped_store, NULL, NULL);
5887           if (is_store)
5888             {
5889               if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
5890                 {
5891                   /* Interleaving. If IS_STORE is TRUE, the vectorization of the
5892                      interleaving chain was completed - free all the stores in
5893                      the chain.  */
5894                   gsi_next (&si);
5895                   vect_remove_stores (GROUP_FIRST_ELEMENT (stmt_info));
5896                   continue;
5897                 }
5898               else
5899                 {
5900                   /* Free the attached stmt_vec_info and remove the stmt.  */
5901                   gimple store = gsi_stmt (si);
5902                   free_stmt_vec_info (store);
5903                   unlink_stmt_vdef (store);
5904                   gsi_remove (&si, true);
5905                   release_defs (store);
5906                   continue;
5907                 }
5908             }
5909
5910           if (!transform_pattern_stmt && gsi_end_p (pattern_def_si))
5911             {
5912               pattern_def_seq = NULL;
5913               gsi_next (&si);
5914             }
5915         }                       /* stmts in BB */
5916     }                           /* BBs in loop */
5917
5918   slpeel_make_loop_iterate_ntimes (loop, ratio);
5919
5920   /* Reduce loop iterations by the vectorization factor.  */
5921   scale_loop_profile (loop, GCOV_COMPUTE_SCALE (1, vectorization_factor),
5922                       expected_iterations / vectorization_factor);
5923   loop->nb_iterations_upper_bound
5924     = loop->nb_iterations_upper_bound.udiv (double_int::from_uhwi (vectorization_factor),
5925                                             FLOOR_DIV_EXPR);
5926   if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
5927       && loop->nb_iterations_upper_bound != double_int_zero)
5928     loop->nb_iterations_upper_bound = loop->nb_iterations_upper_bound - double_int_one;
5929   if (loop->any_estimate)
5930     {
5931       loop->nb_iterations_estimate
5932         = loop->nb_iterations_estimate.udiv (double_int::from_uhwi (vectorization_factor),
5933                                              FLOOR_DIV_EXPR);
5934        if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
5935            && loop->nb_iterations_estimate != double_int_zero)
5936          loop->nb_iterations_estimate = loop->nb_iterations_estimate - double_int_one;
5937     }
5938
5939   if (dump_enabled_p ())
5940     {
5941       dump_printf_loc (MSG_NOTE, vect_location,
5942                        "LOOP VECTORIZED\n");
5943       if (loop->inner)
5944         dump_printf_loc (MSG_NOTE, vect_location,
5945                          "OUTER LOOP VECTORIZED\n");
5946       dump_printf (MSG_NOTE, "\n");
5947     }
5948 }