gcc/tree-vect-loop.c

   1 /* Loop Vectorization
   2    Copyright (C) 2003-2013 Free Software Foundation, Inc.
   3    Contributed by Dorit Naishlos <dorit@il.ibm.com> and
   4    Ira Rosen <irar@il.ibm.com>
   5
   6 This file is part of GCC.
   7
   8 GCC is free software; you can redistribute it and/or modify it under
   9 the terms of the GNU General Public License as published by the Free
  10 Software Foundation; either version 3, or (at your option) any later
  11 version.
  12
  13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  15 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  16 for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with GCC; see the file COPYING3.  If not see
  20 <http://www.gnu.org/licenses/>.  */
  21
  22 #include "config.h"
  23 #include "system.h"
  24 #include "coretypes.h"
  25 #include "dumpfile.h"
  26 #include "tm.h"
  27 #include "ggc.h"
  28 #include "tree.h"
  29 #include "basic-block.h"
  30 #include "gimple-pretty-print.h"
  31 #include "gimple.h"
  32 #include "gimple-ssa.h"
  33 #include "tree-phinodes.h"
  34 #include "ssa-iterators.h"
  35 #include "tree-ssanames.h"
  36 #include "tree-ssa-loop-ivopts.h"
  37 #include "tree-ssa-loop-manip.h"
  38 #include "tree-ssa-loop-niter.h"
  39 #include "tree-pass.h"
  40 #include "cfgloop.h"
  41 #include "expr.h"
  42 #include "recog.h"
  43 #include "optabs.h"
  44 #include "params.h"
  45 #include "diagnostic-core.h"
  46 #include "tree-chrec.h"
  47 #include "tree-scalar-evolution.h"
  48 #include "tree-vectorizer.h"
  49 #include "target.h"
  50
  51 /* Loop Vectorization Pass.
  52
  53    This pass tries to vectorize loops.
  54
  55    For example, the vectorizer transforms the following simple loop:
  56
  57         short a[N]; short b[N]; short c[N]; int i;
  58
  59         for (i=0; i<N; i++){
  60           a[i] = b[i] + c[i];
  61         }
  62
  63    as if it was manually vectorized by rewriting the source code into:
  64
  65         typedef int __attribute__((mode(V8HI))) v8hi;
  66         short a[N];  short b[N]; short c[N];   int i;
  67         v8hi *pa = (v8hi*)a, *pb = (v8hi*)b, *pc = (v8hi*)c;
  68         v8hi va, vb, vc;
  69
  70         for (i=0; i<N/8; i++){
  71           vb = pb[i];
  72           vc = pc[i];
  73           va = vb + vc;
  74           pa[i] = va;
  75         }
  76
  77         The main entry to this pass is vectorize_loops(), in which
  78    the vectorizer applies a set of analyses on a given set of loops,
  79    followed by the actual vectorization transformation for the loops that
  80    had successfully passed the analysis phase.
  81         Throughout this pass we make a distinction between two types of
  82    data: scalars (which are represented by SSA_NAMES), and memory references
  83    ("data-refs").  These two types of data require different handling both
  84    during analysis and transformation. The types of data-refs that the
  85    vectorizer currently supports are ARRAY_REFS which base is an array DECL
  86    (not a pointer), and INDIRECT_REFS through pointers; both array and pointer
  87    accesses are required to have a simple (consecutive) access pattern.
  88
  89    Analysis phase:
  90    ===============
  91         The driver for the analysis phase is vect_analyze_loop().
  92    It applies a set of analyses, some of which rely on the scalar evolution
  93    analyzer (scev) developed by Sebastian Pop.
  94
  95         During the analysis phase the vectorizer records some information
  96    per stmt in a "stmt_vec_info" struct which is attached to each stmt in the
  97    loop, as well as general information about the loop as a whole, which is
  98    recorded in a "loop_vec_info" struct attached to each loop.
  99
 100    Transformation phase:
 101    =====================
 102         The loop transformation phase scans all the stmts in the loop, and
 103    creates a vector stmt (or a sequence of stmts) for each scalar stmt S in
 104    the loop that needs to be vectorized.  It inserts the vector code sequence
 105    just before the scalar stmt S, and records a pointer to the vector code
 106    in STMT_VINFO_VEC_STMT (stmt_info) (stmt_info is the stmt_vec_info struct
 107    attached to S).  This pointer will be used for the vectorization of following
 108    stmts which use the def of stmt S. Stmt S is removed if it writes to memory;
 109    otherwise, we rely on dead code elimination for removing it.
 110
 111         For example, say stmt S1 was vectorized into stmt VS1:
 112
 113    VS1: vb = px[i];
 114    S1:  b = x[i];    STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
 115    S2:  a = b;
 116
 117    To vectorize stmt S2, the vectorizer first finds the stmt that defines
 118    the operand 'b' (S1), and gets the relevant vector def 'vb' from the
 119    vector stmt VS1 pointed to by STMT_VINFO_VEC_STMT (stmt_info (S1)).  The
 120    resulting sequence would be:
 121
 122    VS1: vb = px[i];
 123    S1:  b = x[i];       STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
 124    VS2: va = vb;
 125    S2:  a = b;          STMT_VINFO_VEC_STMT (stmt_info (S2)) = VS2
 126
 127         Operands that are not SSA_NAMEs, are data-refs that appear in
 128    load/store operations (like 'x[i]' in S1), and are handled differently.
 129
 130    Target modeling:
 131    =================
 132         Currently the only target specific information that is used is the
 133    size of the vector (in bytes) - "TARGET_VECTORIZE_UNITS_PER_SIMD_WORD".
 134    Targets that can support different sizes of vectors, for now will need
 135    to specify one value for "TARGET_VECTORIZE_UNITS_PER_SIMD_WORD".  More
 136    flexibility will be added in the future.
 137
 138         Since we only vectorize operations which vector form can be
 139    expressed using existing tree codes, to verify that an operation is
 140    supported, the vectorizer checks the relevant optab at the relevant
 141    machine_mode (e.g, optab_handler (add_optab, V8HImode)).  If
 142    the value found is CODE_FOR_nothing, then there's no target support, and
 143    we can't vectorize the stmt.
 144
 145    For additional information on this project see:
 146    http://gcc.gnu.org/projects/tree-ssa/vectorization.html
 147 */
 148
 149 static void vect_estimate_min_profitable_iters (loop_vec_info, int *, int *);
 150
 151 /* Function vect_determine_vectorization_factor
 152
 153    Determine the vectorization factor (VF).  VF is the number of data elements
 154    that are operated upon in parallel in a single iteration of the vectorized
 155    loop.  For example, when vectorizing a loop that operates on 4byte elements,
 156    on a target with vector size (VS) 16byte, the VF is set to 4, since 4
 157    elements can fit in a single vector register.
 158
 159    We currently support vectorization of loops in which all types operated upon
 160    are of the same size.  Therefore this function currently sets VF according to
 161    the size of the types operated upon, and fails if there are multiple sizes
 162    in the loop.
 163
 164    VF is also the factor by which the loop iterations are strip-mined, e.g.:
 165    original loop:
 166         for (i=0; i<N; i++){
 167           a[i] = b[i] + c[i];
 168         }
 169
 170    vectorized loop:
 171         for (i=0; i<N; i+=VF){
 172           a[i:VF] = b[i:VF] + c[i:VF];
 173         }
 174 */
 175
 176 static bool
 177 vect_determine_vectorization_factor (loop_vec_info loop_vinfo)
 178 {
 179   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
 180   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
 181   int nbbs = loop->num_nodes;
 182   gimple_stmt_iterator si;
 183   unsigned int vectorization_factor = 0;
 184   tree scalar_type;
 185   gimple phi;
 186   tree vectype;
 187   unsigned int nunits;
 188   stmt_vec_info stmt_info;
 189   int i;
 190   HOST_WIDE_INT dummy;
 191   gimple stmt, pattern_stmt = NULL;
 192   gimple_seq pattern_def_seq = NULL;
 193   gimple_stmt_iterator pattern_def_si = gsi_none ();
 194   bool analyze_pattern_stmt = false;
 195
 196   if (dump_enabled_p ())
 197     dump_printf_loc (MSG_NOTE, vect_location,
 198                      "=== vect_determine_vectorization_factor ===\n");
 199
 200   for (i = 0; i < nbbs; i++)
 201     {
 202       basic_block bb = bbs[i];
 203
 204       for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
 205         {
 206           phi = gsi_stmt (si);
 207           stmt_info = vinfo_for_stmt (phi);
 208           if (dump_enabled_p ())
 209             {
 210               dump_printf_loc (MSG_NOTE, vect_location, "==> examining phi: ");
 211               dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
 212               dump_printf (MSG_NOTE, "\n");
 213             }
 214
 215           gcc_assert (stmt_info);
 216
 217           if (STMT_VINFO_RELEVANT_P (stmt_info))
 218             {
 219               gcc_assert (!STMT_VINFO_VECTYPE (stmt_info));
 220               scalar_type = TREE_TYPE (PHI_RESULT (phi));
 221
 222               if (dump_enabled_p ())
 223                 {
 224                   dump_printf_loc (MSG_NOTE, vect_location,
 225                                    "get vectype for scalar type:  ");
 226                   dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
 227                   dump_printf (MSG_NOTE, "\n");
 228                 }
 229
 230               vectype = get_vectype_for_scalar_type (scalar_type);
 231               if (!vectype)
 232                 {
 233                   if (dump_enabled_p ())
 234                     {
 235                       dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 236                                        "not vectorized: unsupported "
 237                                        "data-type ");
 238                       dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
 239                                          scalar_type);
 240                       dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
 241                     }
 242                   return false;
 243                 }
 244               STMT_VINFO_VECTYPE (stmt_info) = vectype;
 245
 246               if (dump_enabled_p ())
 247                 {
 248                   dump_printf_loc (MSG_NOTE, vect_location, "vectype: ");
 249                   dump_generic_expr (MSG_NOTE, TDF_SLIM, vectype);
 250                   dump_printf (MSG_NOTE, "\n");
 251                 }
 252
 253               nunits = TYPE_VECTOR_SUBPARTS (vectype);
 254               if (dump_enabled_p ())
 255                 dump_printf_loc (MSG_NOTE, vect_location, "nunits = %d\n",
 256                                  nunits);
 257
 258               if (!vectorization_factor
 259                   || (nunits > vectorization_factor))
 260                 vectorization_factor = nunits;
 261             }
 262         }
 263
 264       for (si = gsi_start_bb (bb); !gsi_end_p (si) || analyze_pattern_stmt;)
 265         {
 266           tree vf_vectype;
 267
 268           if (analyze_pattern_stmt)
 269             stmt = pattern_stmt;
 270           else
 271             stmt = gsi_stmt (si);
 272
 273           stmt_info = vinfo_for_stmt (stmt);
 274
 275           if (dump_enabled_p ())
 276             {
 277               dump_printf_loc (MSG_NOTE, vect_location,
 278                                "==> examining statement: ");
 279               dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
 280               dump_printf (MSG_NOTE, "\n");
 281             }
 282
 283           gcc_assert (stmt_info);
 284
 285           /* Skip stmts which do not need to be vectorized.  */
 286           if ((!STMT_VINFO_RELEVANT_P (stmt_info)
 287                && !STMT_VINFO_LIVE_P (stmt_info))
 288               || gimple_clobber_p (stmt))
 289             {
 290               if (STMT_VINFO_IN_PATTERN_P (stmt_info)
 291                   && (pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info))
 292                   && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
 293                       || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
 294                 {
 295                   stmt = pattern_stmt;
 296                   stmt_info = vinfo_for_stmt (pattern_stmt);
 297                   if (dump_enabled_p ())
 298                     {
 299                       dump_printf_loc (MSG_NOTE, vect_location,
 300                                        "==> examining pattern statement: ");
 301                       dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
 302                       dump_printf (MSG_NOTE, "\n");
 303                     }
 304                 }
 305               else
 306                 {
 307                   if (dump_enabled_p ())
 308                     dump_printf_loc (MSG_NOTE, vect_location, "skip.\n");
 309                   gsi_next (&si);
 310                   continue;
 311                 }
 312             }
 313           else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
 314                    && (pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info))
 315                    && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
 316                        || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
 317             analyze_pattern_stmt = true;
 318
 319           /* If a pattern statement has def stmts, analyze them too.  */
 320           if (is_pattern_stmt_p (stmt_info))
 321             {
 322               if (pattern_def_seq == NULL)
 323                 {
 324                   pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info);
 325                   pattern_def_si = gsi_start (pattern_def_seq);
 326                 }
 327               else if (!gsi_end_p (pattern_def_si))
 328                 gsi_next (&pattern_def_si);
 329               if (pattern_def_seq != NULL)
 330                 {
 331                   gimple pattern_def_stmt = NULL;
 332                   stmt_vec_info pattern_def_stmt_info = NULL;
 333
 334                   while (!gsi_end_p (pattern_def_si))
 335                     {
 336                       pattern_def_stmt = gsi_stmt (pattern_def_si);
 337                       pattern_def_stmt_info
 338                         = vinfo_for_stmt (pattern_def_stmt);
 339                       if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info)
 340                           || STMT_VINFO_LIVE_P (pattern_def_stmt_info))
 341                         break;
 342                       gsi_next (&pattern_def_si);
 343                     }
 344
 345                   if (!gsi_end_p (pattern_def_si))
 346                     {
 347                       if (dump_enabled_p ())
 348                         {
 349                           dump_printf_loc (MSG_NOTE, vect_location,
 350                                            "==> examining pattern def stmt: ");
 351                           dump_gimple_stmt (MSG_NOTE, TDF_SLIM,
 352                                             pattern_def_stmt, 0);
 353                           dump_printf (MSG_NOTE, "\n");
 354                         }
 355
 356                       stmt = pattern_def_stmt;
 357                       stmt_info = pattern_def_stmt_info;
 358                     }
 359                   else
 360                     {
 361                       pattern_def_si = gsi_none ();
 362                       analyze_pattern_stmt = false;
 363                     }
 364                 }
 365               else
 366                 analyze_pattern_stmt = false;
 367             }
 368
 369           if (gimple_get_lhs (stmt) == NULL_TREE)
 370             {
 371               if (dump_enabled_p ())
 372                 {
 373                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 374                                    "not vectorized: irregular stmt.");
 375                   dump_gimple_stmt (MSG_MISSED_OPTIMIZATION,  TDF_SLIM, stmt,
 376                                     0);
 377                   dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
 378                 }
 379               return false;
 380             }
 381
 382           if (VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))))
 383             {
 384               if (dump_enabled_p ())
 385                 {
 386                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 387                                    "not vectorized: vector stmt in loop:");
 388                   dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
 389                   dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
 390                 }
 391               return false;
 392             }
 393
 394           if (STMT_VINFO_VECTYPE (stmt_info))
 395             {
 396               /* The only case when a vectype had been already set is for stmts
 397                  that contain a dataref, or for "pattern-stmts" (stmts
 398                  generated by the vectorizer to represent/replace a certain
 399                  idiom).  */
 400               gcc_assert (STMT_VINFO_DATA_REF (stmt_info)
 401                           || is_pattern_stmt_p (stmt_info)
 402                           || !gsi_end_p (pattern_def_si));
 403               vectype = STMT_VINFO_VECTYPE (stmt_info);
 404             }
 405           else
 406             {
 407               gcc_assert (!STMT_VINFO_DATA_REF (stmt_info));
 408               scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
 409               if (dump_enabled_p ())
 410                 {
 411                   dump_printf_loc (MSG_NOTE, vect_location,
 412                                    "get vectype for scalar type:  ");
 413                   dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
 414                   dump_printf (MSG_NOTE, "\n");
 415                 }
 416               vectype = get_vectype_for_scalar_type (scalar_type);
 417               if (!vectype)
 418                 {
 419                   if (dump_enabled_p ())
 420                     {
 421                       dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 422                                        "not vectorized: unsupported "
 423                                        "data-type ");
 424                       dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
 425                                          scalar_type);
 426                       dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
 427                     }
 428                   return false;
 429                 }
 430
 431               STMT_VINFO_VECTYPE (stmt_info) = vectype;
 432
 433               if (dump_enabled_p ())
 434                 {
 435                   dump_printf_loc (MSG_NOTE, vect_location, "vectype: ");
 436                   dump_generic_expr (MSG_NOTE, TDF_SLIM, vectype);
 437                   dump_printf (MSG_NOTE, "\n");
 438                 }
 439             }
 440
 441           /* The vectorization factor is according to the smallest
 442              scalar type (or the largest vector size, but we only
 443              support one vector size per loop).  */
 444           scalar_type = vect_get_smallest_scalar_type (stmt, &dummy,
 445                                                        &dummy);
 446           if (dump_enabled_p ())
 447             {
 448               dump_printf_loc (MSG_NOTE, vect_location,
 449                                "get vectype for scalar type:  ");
 450               dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
 451               dump_printf (MSG_NOTE, "\n");
 452             }
 453           vf_vectype = get_vectype_for_scalar_type (scalar_type);
 454           if (!vf_vectype)
 455             {
 456               if (dump_enabled_p ())
 457                 {
 458                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 459                                    "not vectorized: unsupported data-type ");
 460                   dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
 461                                      scalar_type);
 462                   dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
 463                 }
 464               return false;
 465             }
 466
 467           if ((GET_MODE_SIZE (TYPE_MODE (vectype))
 468                != GET_MODE_SIZE (TYPE_MODE (vf_vectype))))
 469             {
 470               if (dump_enabled_p ())
 471                 {
 472                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 473                                    "not vectorized: different sized vector "
 474                                    "types in statement, ");
 475                   dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
 476                                      vectype);
 477                   dump_printf (MSG_MISSED_OPTIMIZATION, " and ");
 478                   dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
 479                                      vf_vectype);
 480                   dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
 481                 }
 482               return false;
 483             }
 484
 485           if (dump_enabled_p ())
 486             {
 487               dump_printf_loc (MSG_NOTE, vect_location, "vectype: ");
 488               dump_generic_expr (MSG_NOTE, TDF_SLIM, vf_vectype);
 489               dump_printf (MSG_NOTE, "\n");
 490             }
 491
 492           nunits = TYPE_VECTOR_SUBPARTS (vf_vectype);
 493           if (dump_enabled_p ())
 494             dump_printf_loc (MSG_NOTE, vect_location, "nunits = %d\n", nunits);
 495           if (!vectorization_factor
 496               || (nunits > vectorization_factor))
 497             vectorization_factor = nunits;
 498
 499           if (!analyze_pattern_stmt && gsi_end_p (pattern_def_si))
 500             {
 501               pattern_def_seq = NULL;
 502               gsi_next (&si);
 503             }
 504         }
 505     }
 506
 507   /* TODO: Analyze cost. Decide if worth while to vectorize.  */
 508   if (dump_enabled_p ())
 509     dump_printf_loc (MSG_NOTE, vect_location, "vectorization factor = %d\n",
 510                      vectorization_factor);
 511   if (vectorization_factor <= 1)
 512     {
 513       if (dump_enabled_p ())
 514         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 515                          "not vectorized: unsupported data-type\n");
 516       return false;
 517     }
 518   LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
 519
 520   return true;
 521 }
 522
 523
 524 /* Function vect_is_simple_iv_evolution.
 525
 526    FORNOW: A simple evolution of an induction variables in the loop is
 527    considered a polynomial evolution.  */
 528
 529 static bool
 530 vect_is_simple_iv_evolution (unsigned loop_nb, tree access_fn, tree * init,
 531                              tree * step)
 532 {
 533   tree init_expr;
 534   tree step_expr;
 535   tree evolution_part = evolution_part_in_loop_num (access_fn, loop_nb);
 536   basic_block bb;
 537
 538   /* When there is no evolution in this loop, the evolution function
 539      is not "simple".  */
 540   if (evolution_part == NULL_TREE)
 541     return false;
 542
 543   /* When the evolution is a polynomial of degree >= 2
 544      the evolution function is not "simple".  */
 545   if (tree_is_chrec (evolution_part))
 546     return false;
 547
 548   step_expr = evolution_part;
 549   init_expr = unshare_expr (initial_condition_in_loop_num (access_fn, loop_nb));
 550
 551   if (dump_enabled_p ())
 552     {
 553       dump_printf_loc (MSG_NOTE, vect_location, "step: ");
 554       dump_generic_expr (MSG_NOTE, TDF_SLIM, step_expr);
 555       dump_printf (MSG_NOTE, ",  init: ");
 556       dump_generic_expr (MSG_NOTE, TDF_SLIM, init_expr);
 557       dump_printf (MSG_NOTE, "\n");
 558     }
 559
 560   *init = init_expr;
 561   *step = step_expr;
 562
 563   if (TREE_CODE (step_expr) != INTEGER_CST
 564       && (TREE_CODE (step_expr) != SSA_NAME
 565           || ((bb = gimple_bb (SSA_NAME_DEF_STMT (step_expr)))
 566               && flow_bb_inside_loop_p (get_loop (cfun, loop_nb), bb))
 567           || (!INTEGRAL_TYPE_P (TREE_TYPE (step_expr))
 568               && (!SCALAR_FLOAT_TYPE_P (TREE_TYPE (step_expr))
 569                   || !flag_associative_math)))
 570       && (TREE_CODE (step_expr) != REAL_CST
 571           || !flag_associative_math))
 572     {
 573       if (dump_enabled_p ())
 574         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 575                          "step unknown.\n");
 576       return false;
 577     }
 578
 579   return true;
 580 }
 581
 582 /* Function vect_analyze_scalar_cycles_1.
 583
 584    Examine the cross iteration def-use cycles of scalar variables
 585    in LOOP.  LOOP_VINFO represents the loop that is now being
 586    considered for vectorization (can be LOOP, or an outer-loop
 587    enclosing LOOP).  */
 588
 589 static void
 590 vect_analyze_scalar_cycles_1 (loop_vec_info loop_vinfo, struct loop *loop)
 591 {
 592   basic_block bb = loop->header;
 593   tree init, step;
 594   vec<gimple> worklist;
 595   worklist.create (64);
 596   gimple_stmt_iterator gsi;
 597   bool double_reduc;
 598
 599   if (dump_enabled_p ())
 600     dump_printf_loc (MSG_NOTE, vect_location,
 601                      "=== vect_analyze_scalar_cycles ===\n");
 602
 603   /* First - identify all inductions.  Reduction detection assumes that all the
 604      inductions have been identified, therefore, this order must not be
 605      changed.  */
 606   for (gsi = gsi_start_phis  (bb); !gsi_end_p (gsi); gsi_next (&gsi))
 607     {
 608       gimple phi = gsi_stmt (gsi);
 609       tree access_fn = NULL;
 610       tree def = PHI_RESULT (phi);
 611       stmt_vec_info stmt_vinfo = vinfo_for_stmt (phi);
 612
 613       if (dump_enabled_p ())
 614         {
 615           dump_printf_loc (MSG_NOTE, vect_location, "Analyze phi: ");
 616           dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
 617           dump_printf (MSG_NOTE, "\n");
 618         }
 619
 620       /* Skip virtual phi's.  The data dependences that are associated with
 621          virtual defs/uses (i.e., memory accesses) are analyzed elsewhere.  */
 622       if (virtual_operand_p (def))
 623         continue;
 624
 625       STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_unknown_def_type;
 626
 627       /* Analyze the evolution function.  */
 628       access_fn = analyze_scalar_evolution (loop, def);
 629       if (access_fn)
 630         {
 631           STRIP_NOPS (access_fn);
 632           if (dump_enabled_p ())
 633             {
 634               dump_printf_loc (MSG_NOTE, vect_location,
 635                                "Access function of PHI: ");
 636               dump_generic_expr (MSG_NOTE, TDF_SLIM, access_fn);
 637               dump_printf (MSG_NOTE, "\n");
 638             }
 639           STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_vinfo)
 640             = evolution_part_in_loop_num (access_fn, loop->num);
 641         }
 642
 643       if (!access_fn
 644           || !vect_is_simple_iv_evolution (loop->num, access_fn, &init, &step)
 645           || (LOOP_VINFO_LOOP (loop_vinfo) != loop
 646               && TREE_CODE (step) != INTEGER_CST))
 647         {
 648           worklist.safe_push (phi);
 649           continue;
 650         }
 651
 652       gcc_assert (STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_vinfo) != NULL_TREE);
 653
 654       if (dump_enabled_p ())
 655         dump_printf_loc (MSG_NOTE, vect_location, "Detected induction.\n");
 656       STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_induction_def;
 657     }
 658
 659
 660   /* Second - identify all reductions and nested cycles.  */
 661   while (worklist.length () > 0)
 662     {
 663       gimple phi = worklist.pop ();
 664       tree def = PHI_RESULT (phi);
 665       stmt_vec_info stmt_vinfo = vinfo_for_stmt (phi);
 666       gimple reduc_stmt;
 667       bool nested_cycle;
 668
 669       if (dump_enabled_p ())
 670         {
 671           dump_printf_loc (MSG_NOTE, vect_location, "Analyze phi: ");
 672           dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
 673           dump_printf (MSG_NOTE, "\n");
 674         }
 675
 676       gcc_assert (!virtual_operand_p (def)
 677                   && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_unknown_def_type);
 678
 679       nested_cycle = (loop != LOOP_VINFO_LOOP (loop_vinfo));
 680       reduc_stmt = vect_force_simple_reduction (loop_vinfo, phi, !nested_cycle,
 681                                                 &double_reduc);
 682       if (reduc_stmt)
 683         {
 684           if (double_reduc)
 685             {
 686               if (dump_enabled_p ())
 687                 dump_printf_loc (MSG_NOTE, vect_location,
 688                                  "Detected double reduction.\n");
 689
 690               STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_double_reduction_def;
 691               STMT_VINFO_DEF_TYPE (vinfo_for_stmt (reduc_stmt)) =
 692                                                     vect_double_reduction_def;
 693             }
 694           else
 695             {
 696               if (nested_cycle)
 697                 {
 698                   if (dump_enabled_p ())
 699                     dump_printf_loc (MSG_NOTE, vect_location,
 700                                      "Detected vectorizable nested cycle.\n");
 701
 702                   STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_nested_cycle;
 703                   STMT_VINFO_DEF_TYPE (vinfo_for_stmt (reduc_stmt)) =
 704                                                              vect_nested_cycle;
 705                 }
 706               else
 707                 {
 708                   if (dump_enabled_p ())
 709                     dump_printf_loc (MSG_NOTE, vect_location,
 710                                      "Detected reduction.\n");
 711
 712                   STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_reduction_def;
 713                   STMT_VINFO_DEF_TYPE (vinfo_for_stmt (reduc_stmt)) =
 714                                                            vect_reduction_def;
 715                   /* Store the reduction cycles for possible vectorization in
 716                      loop-aware SLP.  */
 717                   LOOP_VINFO_REDUCTIONS (loop_vinfo).safe_push (reduc_stmt);
 718                 }
 719             }
 720         }
 721       else
 722         if (dump_enabled_p ())
 723           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 724                            "Unknown def-use cycle pattern.\n");
 725     }
 726
 727   worklist.release ();
 728 }
 729
 730
 731 /* Function vect_analyze_scalar_cycles.
 732
 733    Examine the cross iteration def-use cycles of scalar variables, by
 734    analyzing the loop-header PHIs of scalar variables.  Classify each
 735    cycle as one of the following: invariant, induction, reduction, unknown.
 736    We do that for the loop represented by LOOP_VINFO, and also to its
 737    inner-loop, if exists.
 738    Examples for scalar cycles:
 739
 740    Example1: reduction:
 741
 742               loop1:
 743               for (i=0; i<N; i++)
 744                  sum += a[i];
 745
 746    Example2: induction:
 747
 748               loop2:
 749               for (i=0; i<N; i++)
 750                  a[i] = i;  */
 751
 752 static void
 753 vect_analyze_scalar_cycles (loop_vec_info loop_vinfo)
 754 {
 755   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
 756
 757   vect_analyze_scalar_cycles_1 (loop_vinfo, loop);
 758
 759   /* When vectorizing an outer-loop, the inner-loop is executed sequentially.
 760      Reductions in such inner-loop therefore have different properties than
 761      the reductions in the nest that gets vectorized:
 762      1. When vectorized, they are executed in the same order as in the original
 763         scalar loop, so we can't change the order of computation when
 764         vectorizing them.
 765      2. FIXME: Inner-loop reductions can be used in the inner-loop, so the
 766         current checks are too strict.  */
 767
 768   if (loop->inner)
 769     vect_analyze_scalar_cycles_1 (loop_vinfo, loop->inner);
 770 }
 771
 772 /* Function vect_get_loop_niters.
 773
 774    Determine how many iterations the loop is executed.
 775    If an expression that represents the number of iterations
 776    can be constructed, place it in NUMBER_OF_ITERATIONS.
 777    Return the loop exit condition.  */
 778
 779 static gimple
 780 vect_get_loop_niters (struct loop *loop, tree *number_of_iterations)
 781 {
 782   tree niters;
 783
 784   if (dump_enabled_p ())
 785     dump_printf_loc (MSG_NOTE, vect_location,
 786                      "=== get_loop_niters ===\n");
 787   niters = number_of_exit_cond_executions (loop);
 788
 789   if (niters != NULL_TREE
 790       && niters != chrec_dont_know)
 791     {
 792       *number_of_iterations = niters;
 793
 794       if (dump_enabled_p ())
 795         {
 796           dump_printf_loc (MSG_NOTE, vect_location, "==> get_loop_niters:");
 797           dump_generic_expr (MSG_NOTE, TDF_SLIM, *number_of_iterations);
 798           dump_printf (MSG_NOTE, "\n");
 799         }
 800     }
 801
 802   return get_loop_exit_condition (loop);
 803 }
 804
 805
 806 /* Function bb_in_loop_p
 807
 808    Used as predicate for dfs order traversal of the loop bbs.  */
 809
 810 static bool
 811 bb_in_loop_p (const_basic_block bb, const void *data)
 812 {
 813   const struct loop *const loop = (const struct loop *)data;
 814   if (flow_bb_inside_loop_p (loop, bb))
 815     return true;
 816   return false;
 817 }
 818
 819
 820 /* Function new_loop_vec_info.
 821
 822    Create and initialize a new loop_vec_info struct for LOOP, as well as
 823    stmt_vec_info structs for all the stmts in LOOP.  */
 824
 825 static loop_vec_info
 826 new_loop_vec_info (struct loop *loop)
 827 {
 828   loop_vec_info res;
 829   basic_block *bbs;
 830   gimple_stmt_iterator si;
 831   unsigned int i, nbbs;
 832
 833   res = (loop_vec_info) xcalloc (1, sizeof (struct _loop_vec_info));
 834   LOOP_VINFO_LOOP (res) = loop;
 835
 836   bbs = get_loop_body (loop);
 837
 838   /* Create/Update stmt_info for all stmts in the loop.  */
 839   for (i = 0; i < loop->num_nodes; i++)
 840     {
 841       basic_block bb = bbs[i];
 842
 843       /* BBs in a nested inner-loop will have been already processed (because
 844          we will have called vect_analyze_loop_form for any nested inner-loop).
 845          Therefore, for stmts in an inner-loop we just want to update the
 846          STMT_VINFO_LOOP_VINFO field of their stmt_info to point to the new
 847          loop_info of the outer-loop we are currently considering to vectorize
 848          (instead of the loop_info of the inner-loop).
 849          For stmts in other BBs we need to create a stmt_info from scratch.  */
 850       if (bb->loop_father != loop)
 851         {
 852           /* Inner-loop bb.  */
 853           gcc_assert (loop->inner && bb->loop_father == loop->inner);
 854           for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
 855             {
 856               gimple phi = gsi_stmt (si);
 857               stmt_vec_info stmt_info = vinfo_for_stmt (phi);
 858               loop_vec_info inner_loop_vinfo =
 859                 STMT_VINFO_LOOP_VINFO (stmt_info);
 860               gcc_assert (loop->inner == LOOP_VINFO_LOOP (inner_loop_vinfo));
 861               STMT_VINFO_LOOP_VINFO (stmt_info) = res;
 862             }
 863           for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
 864            {
 865               gimple stmt = gsi_stmt (si);
 866               stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
 867               loop_vec_info inner_loop_vinfo =
 868                  STMT_VINFO_LOOP_VINFO (stmt_info);
 869               gcc_assert (loop->inner == LOOP_VINFO_LOOP (inner_loop_vinfo));
 870               STMT_VINFO_LOOP_VINFO (stmt_info) = res;
 871            }
 872         }
 873       else
 874         {
 875           /* bb in current nest.  */
 876           for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
 877             {
 878               gimple phi = gsi_stmt (si);
 879               gimple_set_uid (phi, 0);
 880               set_vinfo_for_stmt (phi, new_stmt_vec_info (phi, res, NULL));
 881             }
 882
 883           for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
 884             {
 885               gimple stmt = gsi_stmt (si);
 886               gimple_set_uid (stmt, 0);
 887               set_vinfo_for_stmt (stmt, new_stmt_vec_info (stmt, res, NULL));
 888             }
 889         }
 890     }
 891
 892   /* CHECKME: We want to visit all BBs before their successors (except for
 893      latch blocks, for which this assertion wouldn't hold).  In the simple
 894      case of the loop forms we allow, a dfs order of the BBs would the same
 895      as reversed postorder traversal, so we are safe.  */
 896
 897    free (bbs);
 898    bbs = XCNEWVEC (basic_block, loop->num_nodes);
 899    nbbs = dfs_enumerate_from (loop->header, 0, bb_in_loop_p,
 900                               bbs, loop->num_nodes, loop);
 901    gcc_assert (nbbs == loop->num_nodes);
 902
 903   LOOP_VINFO_BBS (res) = bbs;
 904   LOOP_VINFO_NITERS (res) = NULL;
 905   LOOP_VINFO_NITERS_UNCHANGED (res) = NULL;
 906   LOOP_VINFO_COST_MODEL_MIN_ITERS (res) = 0;
 907   LOOP_VINFO_VECTORIZABLE_P (res) = 0;
 908   LOOP_PEELING_FOR_ALIGNMENT (res) = 0;
 909   LOOP_VINFO_VECT_FACTOR (res) = 0;
 910   LOOP_VINFO_LOOP_NEST (res).create (3);
 911   LOOP_VINFO_DATAREFS (res).create (10);
 912   LOOP_VINFO_DDRS (res).create (10 * 10);
 913   LOOP_VINFO_UNALIGNED_DR (res) = NULL;
 914   LOOP_VINFO_MAY_MISALIGN_STMTS (res).create (
 915              PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIGNMENT_CHECKS));
 916   LOOP_VINFO_MAY_ALIAS_DDRS (res).create (
 917              PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS));
 918   LOOP_VINFO_GROUPED_STORES (res).create (10);
 919   LOOP_VINFO_REDUCTIONS (res).create (10);
 920   LOOP_VINFO_REDUCTION_CHAINS (res).create (10);
 921   LOOP_VINFO_SLP_INSTANCES (res).create (10);
 922   LOOP_VINFO_SLP_UNROLLING_FACTOR (res) = 1;
 923   LOOP_VINFO_TARGET_COST_DATA (res) = init_cost (loop);
 924   LOOP_VINFO_PEELING_FOR_GAPS (res) = false;
 925   LOOP_VINFO_OPERANDS_SWAPPED (res) = false;
 926
 927   return res;
 928 }
 929
 930
 931 /* Function destroy_loop_vec_info.
 932
 933    Free LOOP_VINFO struct, as well as all the stmt_vec_info structs of all the
 934    stmts in the loop.  */
 935
 936 void
 937 destroy_loop_vec_info (loop_vec_info loop_vinfo, bool clean_stmts)
 938 {
 939   struct loop *loop;
 940   basic_block *bbs;
 941   int nbbs;
 942   gimple_stmt_iterator si;
 943   int j;
 944   vec<slp_instance> slp_instances;
 945   slp_instance instance;
 946   bool swapped;
 947
 948   if (!loop_vinfo)
 949     return;
 950
 951   loop = LOOP_VINFO_LOOP (loop_vinfo);
 952
 953   bbs = LOOP_VINFO_BBS (loop_vinfo);
 954   nbbs = clean_stmts ? loop->num_nodes : 0;
 955   swapped = LOOP_VINFO_OPERANDS_SWAPPED (loop_vinfo);
 956
 957   for (j = 0; j < nbbs; j++)
 958     {
 959       basic_block bb = bbs[j];
 960       for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
 961         free_stmt_vec_info (gsi_stmt (si));
 962
 963       for (si = gsi_start_bb (bb); !gsi_end_p (si); )
 964         {
 965           gimple stmt = gsi_stmt (si);
 966
 967           /* We may have broken canonical form by moving a constant
 968              into RHS1 of a commutative op.  Fix such occurrences.  */
 969           if (swapped && is_gimple_assign (stmt))
 970             {
 971               enum tree_code code = gimple_assign_rhs_code (stmt);
 972
 973               if ((code == PLUS_EXPR
 974                    || code == POINTER_PLUS_EXPR
 975                    || code == MULT_EXPR)
 976                   && CONSTANT_CLASS_P (gimple_assign_rhs1 (stmt)))
 977                 swap_ssa_operands (stmt,
 978                                    gimple_assign_rhs1_ptr (stmt),
 979                                    gimple_assign_rhs2_ptr (stmt));
 980             }
 981
 982           /* Free stmt_vec_info.  */
 983           free_stmt_vec_info (stmt);
 984           gsi_next (&si);
 985         }
 986     }
 987
 988   free (LOOP_VINFO_BBS (loop_vinfo));
 989   vect_destroy_datarefs (loop_vinfo, NULL);
 990   free_dependence_relations (LOOP_VINFO_DDRS (loop_vinfo));
 991   LOOP_VINFO_LOOP_NEST (loop_vinfo).release ();
 992   LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo).release ();
 993   LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo).release ();
 994   slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
 995   FOR_EACH_VEC_ELT (slp_instances, j, instance)
 996     vect_free_slp_instance (instance);
 997
 998   LOOP_VINFO_SLP_INSTANCES (loop_vinfo).release ();
 999   LOOP_VINFO_GROUPED_STORES (loop_vinfo).release ();
1000   LOOP_VINFO_REDUCTIONS (loop_vinfo).release ();
1001   LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo).release ();
1002
1003   if (LOOP_VINFO_PEELING_HTAB (loop_vinfo).is_created ())
1004     LOOP_VINFO_PEELING_HTAB (loop_vinfo).dispose ();
1005
1006   destroy_cost_data (LOOP_VINFO_TARGET_COST_DATA (loop_vinfo));
1007
1008   free (loop_vinfo);
1009   loop->aux = NULL;
1010 }
1011
1012
1013 /* Function vect_analyze_loop_1.
1014
1015    Apply a set of analyses on LOOP, and create a loop_vec_info struct
1016    for it. The different analyses will record information in the
1017    loop_vec_info struct.  This is a subset of the analyses applied in
1018    vect_analyze_loop, to be applied on an inner-loop nested in the loop
1019    that is now considered for (outer-loop) vectorization.  */
1020
1021 static loop_vec_info
1022 vect_analyze_loop_1 (struct loop *loop)
1023 {
1024   loop_vec_info loop_vinfo;
1025
1026   if (dump_enabled_p ())
1027     dump_printf_loc (MSG_NOTE, vect_location,
1028                      "===== analyze_loop_nest_1 =====\n");
1029
1030   /* Check the CFG characteristics of the loop (nesting, entry/exit, etc.  */
1031
1032   loop_vinfo = vect_analyze_loop_form (loop);
1033   if (!loop_vinfo)
1034     {
1035       if (dump_enabled_p ())
1036         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1037                          "bad inner-loop form.\n");
1038       return NULL;
1039     }
1040
1041   return loop_vinfo;
1042 }
1043
1044
1045 /* Function vect_analyze_loop_form.
1046
1047    Verify that certain CFG restrictions hold, including:
1048    - the loop has a pre-header
1049    - the loop has a single entry and exit
1050    - the loop exit condition is simple enough, and the number of iterations
1051      can be analyzed (a countable loop).  */
1052
1053 loop_vec_info
1054 vect_analyze_loop_form (struct loop *loop)
1055 {
1056   loop_vec_info loop_vinfo;
1057   gimple loop_cond;
1058   tree number_of_iterations = NULL;
1059   loop_vec_info inner_loop_vinfo = NULL;
1060
1061   if (dump_enabled_p ())
1062     dump_printf_loc (MSG_NOTE, vect_location,
1063                      "=== vect_analyze_loop_form ===\n");
1064
1065   /* Different restrictions apply when we are considering an inner-most loop,
1066      vs. an outer (nested) loop.
1067      (FORNOW. May want to relax some of these restrictions in the future).  */
1068
1069   if (!loop->inner)
1070     {
1071       /* Inner-most loop.  We currently require that the number of BBs is
1072          exactly 2 (the header and latch).  Vectorizable inner-most loops
1073          look like this:
1074
1075                         (pre-header)
1076                            |
1077                           header <--------+
1078                            | |            |
1079                            | +--> latch --+
1080                            |
1081                         (exit-bb)  */
1082
1083       if (loop->num_nodes != 2)
1084         {
1085           if (dump_enabled_p ())
1086             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1087                              "not vectorized: control flow in loop.\n");
1088           return NULL;
1089         }
1090
1091       if (empty_block_p (loop->header))
1092     {
1093           if (dump_enabled_p ())
1094             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1095                              "not vectorized: empty loop.\n");
1096       return NULL;
1097     }
1098     }
1099   else
1100     {
1101       struct loop *innerloop = loop->inner;
1102       edge entryedge;
1103
1104       /* Nested loop. We currently require that the loop is doubly-nested,
1105          contains a single inner loop, and the number of BBs is exactly 5.
1106          Vectorizable outer-loops look like this:
1107
1108                         (pre-header)
1109                            |
1110                           header <---+
1111                            |         |
1112                           inner-loop |
1113                            |         |
1114                           tail ------+
1115                            |
1116                         (exit-bb)
1117
1118          The inner-loop has the properties expected of inner-most loops
1119          as described above.  */
1120
1121       if ((loop->inner)->inner || (loop->inner)->next)
1122         {
1123           if (dump_enabled_p ())
1124             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1125                              "not vectorized: multiple nested loops.\n");
1126           return NULL;
1127         }
1128
1129       /* Analyze the inner-loop.  */
1130       inner_loop_vinfo = vect_analyze_loop_1 (loop->inner);
1131       if (!inner_loop_vinfo)
1132         {
1133           if (dump_enabled_p ())
1134             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1135                              "not vectorized: Bad inner loop.\n");
1136           return NULL;
1137         }
1138
1139       if (!expr_invariant_in_loop_p (loop,
1140                                         LOOP_VINFO_NITERS (inner_loop_vinfo)))
1141         {
1142           if (dump_enabled_p ())
1143             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1144                              "not vectorized: inner-loop count not"
1145                              " invariant.\n");
1146           destroy_loop_vec_info (inner_loop_vinfo, true);
1147           return NULL;
1148         }
1149
1150       if (loop->num_nodes != 5)
1151         {
1152           if (dump_enabled_p ())
1153             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1154                              "not vectorized: control flow in loop.\n");
1155           destroy_loop_vec_info (inner_loop_vinfo, true);
1156           return NULL;
1157         }
1158
1159       gcc_assert (EDGE_COUNT (innerloop->header->preds) == 2);
1160       entryedge = EDGE_PRED (innerloop->header, 0);
1161       if (EDGE_PRED (innerloop->header, 0)->src == innerloop->latch)
1162         entryedge = EDGE_PRED (innerloop->header, 1);
1163
1164       if (entryedge->src != loop->header
1165           || !single_exit (innerloop)
1166           || single_exit (innerloop)->dest !=  EDGE_PRED (loop->latch, 0)->src)
1167         {
1168           if (dump_enabled_p ())
1169             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1170                              "not vectorized: unsupported outerloop form.\n");
1171           destroy_loop_vec_info (inner_loop_vinfo, true);
1172           return NULL;
1173         }
1174
1175       if (dump_enabled_p ())
1176         dump_printf_loc (MSG_NOTE, vect_location,
1177                          "Considering outer-loop vectorization.\n");
1178     }
1179
1180   if (!single_exit (loop)
1181       || EDGE_COUNT (loop->header->preds) != 2)
1182     {
1183       if (dump_enabled_p ())
1184         {
1185           if (!single_exit (loop))
1186             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1187                              "not vectorized: multiple exits.\n");
1188           else if (EDGE_COUNT (loop->header->preds) != 2)
1189             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1190                              "not vectorized: too many incoming edges.\n");
1191         }
1192       if (inner_loop_vinfo)
1193         destroy_loop_vec_info (inner_loop_vinfo, true);
1194       return NULL;
1195     }
1196
1197   /* We assume that the loop exit condition is at the end of the loop. i.e,
1198      that the loop is represented as a do-while (with a proper if-guard
1199      before the loop if needed), where the loop header contains all the
1200      executable statements, and the latch is empty.  */
1201   if (!empty_block_p (loop->latch)
1202       || !gimple_seq_empty_p (phi_nodes (loop->latch)))
1203     {
1204       if (dump_enabled_p ())
1205         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1206                          "not vectorized: latch block not empty.\n");
1207       if (inner_loop_vinfo)
1208         destroy_loop_vec_info (inner_loop_vinfo, true);
1209       return NULL;
1210     }
1211
1212   /* Make sure there exists a single-predecessor exit bb:  */
1213   if (!single_pred_p (single_exit (loop)->dest))
1214     {
1215       edge e = single_exit (loop);
1216       if (!(e->flags & EDGE_ABNORMAL))
1217         {
1218           split_loop_exit_edge (e);
1219           if (dump_enabled_p ())
1220             dump_printf (MSG_NOTE, "split exit edge.\n");
1221         }
1222       else
1223         {
1224           if (dump_enabled_p ())
1225             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1226                              "not vectorized: abnormal loop exit edge.\n");
1227           if (inner_loop_vinfo)
1228             destroy_loop_vec_info (inner_loop_vinfo, true);
1229           return NULL;
1230         }
1231     }
1232
1233   loop_cond = vect_get_loop_niters (loop, &number_of_iterations);
1234   if (!loop_cond)
1235     {
1236       if (dump_enabled_p ())
1237         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1238                          "not vectorized: complicated exit condition.\n");
1239       if (inner_loop_vinfo)
1240         destroy_loop_vec_info (inner_loop_vinfo, true);
1241       return NULL;
1242     }
1243
1244   if (!number_of_iterations)
1245     {
1246       if (dump_enabled_p ())
1247         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1248                          "not vectorized: number of iterations cannot be "
1249                          "computed.\n");
1250       if (inner_loop_vinfo)
1251         destroy_loop_vec_info (inner_loop_vinfo, true);
1252       return NULL;
1253     }
1254
1255   if (chrec_contains_undetermined (number_of_iterations))
1256     {
1257       if (dump_enabled_p ())
1258             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1259                              "Infinite number of iterations.\n");
1260       if (inner_loop_vinfo)
1261         destroy_loop_vec_info (inner_loop_vinfo, true);
1262       return NULL;
1263     }
1264
1265   if (!NITERS_KNOWN_P (number_of_iterations))
1266     {
1267       if (dump_enabled_p ())
1268         {
1269           dump_printf_loc (MSG_NOTE, vect_location,
1270                            "Symbolic number of iterations is ");
1271           dump_generic_expr (MSG_NOTE, TDF_DETAILS, number_of_iterations);
1272           dump_printf (MSG_NOTE, "\n");
1273         }
1274     }
1275   else if (TREE_INT_CST_LOW (number_of_iterations) == 0)
1276     {
1277       if (dump_enabled_p ())
1278         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1279                          "not vectorized: number of iterations = 0.\n");
1280       if (inner_loop_vinfo)
1281         destroy_loop_vec_info (inner_loop_vinfo, true);
1282       return NULL;
1283     }
1284
1285   loop_vinfo = new_loop_vec_info (loop);
1286   LOOP_VINFO_NITERS (loop_vinfo) = number_of_iterations;
1287   LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo) = number_of_iterations;
1288
1289   STMT_VINFO_TYPE (vinfo_for_stmt (loop_cond)) = loop_exit_ctrl_vec_info_type;
1290
1291   /* CHECKME: May want to keep it around it in the future.  */
1292   if (inner_loop_vinfo)
1293     destroy_loop_vec_info (inner_loop_vinfo, false);
1294
1295   gcc_assert (!loop->aux);
1296   loop->aux = loop_vinfo;
1297   return loop_vinfo;
1298 }
1299
1300
1301 /* Function vect_analyze_loop_operations.
1302
1303    Scan the loop stmts and make sure they are all vectorizable.  */
1304
1305 static bool
1306 vect_analyze_loop_operations (loop_vec_info loop_vinfo, bool slp)
1307 {
1308   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1309   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
1310   int nbbs = loop->num_nodes;
1311   gimple_stmt_iterator si;
1312   unsigned int vectorization_factor = 0;
1313   int i;
1314   gimple phi;
1315   stmt_vec_info stmt_info;
1316   bool need_to_vectorize = false;
1317   int min_profitable_iters;
1318   int min_scalar_loop_bound;
1319   unsigned int th;
1320   bool only_slp_in_loop = true, ok;
1321   HOST_WIDE_INT max_niter;
1322   HOST_WIDE_INT estimated_niter;
1323   int min_profitable_estimate;
1324
1325   if (dump_enabled_p ())
1326     dump_printf_loc (MSG_NOTE, vect_location,
1327                      "=== vect_analyze_loop_operations ===\n");
1328
1329   gcc_assert (LOOP_VINFO_VECT_FACTOR (loop_vinfo));
1330   vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1331   if (slp)
1332     {
1333       /* If all the stmts in the loop can be SLPed, we perform only SLP, and
1334          vectorization factor of the loop is the unrolling factor required by
1335          the SLP instances.  If that unrolling factor is 1, we say, that we
1336          perform pure SLP on loop - cross iteration parallelism is not
1337          exploited.  */
1338       for (i = 0; i < nbbs; i++)
1339         {
1340           basic_block bb = bbs[i];
1341           for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1342             {
1343               gimple stmt = gsi_stmt (si);
1344               stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1345               gcc_assert (stmt_info);
1346               if ((STMT_VINFO_RELEVANT_P (stmt_info)
1347                    || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info)))
1348                   && !PURE_SLP_STMT (stmt_info))
1349                 /* STMT needs both SLP and loop-based vectorization.  */
1350                 only_slp_in_loop = false;
1351             }
1352         }
1353
1354       if (only_slp_in_loop)
1355         vectorization_factor = LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo);
1356       else
1357         vectorization_factor = least_common_multiple (vectorization_factor,
1358                                 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo));
1359
1360       LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
1361       if (dump_enabled_p ())
1362         dump_printf_loc (MSG_NOTE, vect_location,
1363                          "Updating vectorization factor to %d\n",
1364                          vectorization_factor);
1365     }
1366
1367   for (i = 0; i < nbbs; i++)
1368     {
1369       basic_block bb = bbs[i];
1370
1371       for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
1372         {
1373           phi = gsi_stmt (si);
1374           ok = true;
1375
1376           stmt_info = vinfo_for_stmt (phi);
1377           if (dump_enabled_p ())
1378             {
1379               dump_printf_loc (MSG_NOTE, vect_location, "examining phi: ");
1380               dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
1381               dump_printf (MSG_NOTE, "\n");
1382             }
1383
1384           /* Inner-loop loop-closed exit phi in outer-loop vectorization
1385              (i.e., a phi in the tail of the outer-loop).  */
1386           if (! is_loop_header_bb_p (bb))
1387             {
1388               /* FORNOW: we currently don't support the case that these phis
1389                  are not used in the outerloop (unless it is double reduction,
1390                  i.e., this phi is vect_reduction_def), cause this case
1391                  requires to actually do something here.  */
1392               if ((!STMT_VINFO_RELEVANT_P (stmt_info)
1393                    || STMT_VINFO_LIVE_P (stmt_info))
1394                   && STMT_VINFO_DEF_TYPE (stmt_info)
1395                      != vect_double_reduction_def)
1396                 {
1397                   if (dump_enabled_p ())
1398                     dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1399                                      "Unsupported loop-closed phi in "
1400                                      "outer-loop.\n");
1401                   return false;
1402                 }
1403
1404               /* If PHI is used in the outer loop, we check that its operand
1405                  is defined in the inner loop.  */
1406               if (STMT_VINFO_RELEVANT_P (stmt_info))
1407                 {
1408                   tree phi_op;
1409                   gimple op_def_stmt;
1410
1411                   if (gimple_phi_num_args (phi) != 1)
1412                     return false;
1413
1414                   phi_op = PHI_ARG_DEF (phi, 0);
1415                   if (TREE_CODE (phi_op) != SSA_NAME)
1416                     return false;
1417
1418                   op_def_stmt = SSA_NAME_DEF_STMT (phi_op);
1419                   if (gimple_nop_p (op_def_stmt)
1420                       || !flow_bb_inside_loop_p (loop, gimple_bb (op_def_stmt))
1421                       || !vinfo_for_stmt (op_def_stmt))
1422                     return false;
1423
1424                   if (STMT_VINFO_RELEVANT (vinfo_for_stmt (op_def_stmt))
1425                         != vect_used_in_outer
1426                       && STMT_VINFO_RELEVANT (vinfo_for_stmt (op_def_stmt))
1427                            != vect_used_in_outer_by_reduction)
1428                     return false;
1429                 }
1430
1431               continue;
1432             }
1433
1434           gcc_assert (stmt_info);
1435
1436           if (STMT_VINFO_LIVE_P (stmt_info))
1437             {
1438               /* FORNOW: not yet supported.  */
1439               if (dump_enabled_p ())
1440                 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1441                                  "not vectorized: value used after loop.\n");
1442               return false;
1443             }
1444
1445           if (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope
1446               && STMT_VINFO_DEF_TYPE (stmt_info) != vect_induction_def)
1447             {
1448               /* A scalar-dependence cycle that we don't support.  */
1449               if (dump_enabled_p ())
1450                 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1451                                  "not vectorized: scalar dependence cycle.\n");
1452               return false;
1453             }
1454
1455           if (STMT_VINFO_RELEVANT_P (stmt_info))
1456             {
1457               need_to_vectorize = true;
1458               if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def)
1459                 ok = vectorizable_induction (phi, NULL, NULL);
1460             }
1461
1462           if (!ok)
1463             {
1464               if (dump_enabled_p ())
1465                 {
1466                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1467                                    "not vectorized: relevant phi not "
1468                                    "supported: ");
1469                   dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, phi, 0);
1470                   dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
1471                 }
1472               return false;
1473             }
1474         }
1475
1476       for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1477         {
1478           gimple stmt = gsi_stmt (si);
1479           if (!gimple_clobber_p (stmt)
1480               && !vect_analyze_stmt (stmt, &need_to_vectorize, NULL))
1481             return false;
1482         }
1483     } /* bbs */
1484
1485   /* All operations in the loop are either irrelevant (deal with loop
1486      control, or dead), or only used outside the loop and can be moved
1487      out of the loop (e.g. invariants, inductions).  The loop can be
1488      optimized away by scalar optimizations.  We're better off not
1489      touching this loop.  */
1490   if (!need_to_vectorize)
1491     {
1492       if (dump_enabled_p ())
1493         dump_printf_loc (MSG_NOTE, vect_location,
1494                          "All the computation can be taken out of the loop.\n");
1495       if (dump_enabled_p ())
1496         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1497                          "not vectorized: redundant loop. no profit to "
1498                          "vectorize.\n");
1499       return false;
1500     }
1501
1502   if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) && dump_enabled_p ())
1503     dump_printf_loc (MSG_NOTE, vect_location,
1504                      "vectorization_factor = %d, niters = "
1505                      HOST_WIDE_INT_PRINT_DEC "\n", vectorization_factor,
1506                      LOOP_VINFO_INT_NITERS (loop_vinfo));
1507
1508   if ((LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
1509        && (LOOP_VINFO_INT_NITERS (loop_vinfo) < vectorization_factor))
1510       || ((max_niter = max_stmt_executions_int (loop)) != -1
1511           && (unsigned HOST_WIDE_INT) max_niter < vectorization_factor))
1512     {
1513       if (dump_enabled_p ())
1514         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1515                          "not vectorized: iteration count too small.\n");
1516       if (dump_enabled_p ())
1517         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1518                          "not vectorized: iteration count smaller than "
1519                          "vectorization factor.\n");
1520       return false;
1521     }
1522
1523   /* Analyze cost.  Decide if worth while to vectorize.  */
1524
1525   /* Once VF is set, SLP costs should be updated since the number of created
1526      vector stmts depends on VF.  */
1527   vect_update_slp_costs_according_to_vf (loop_vinfo);
1528
1529   vect_estimate_min_profitable_iters (loop_vinfo, &min_profitable_iters,
1530                                       &min_profitable_estimate);
1531   LOOP_VINFO_COST_MODEL_MIN_ITERS (loop_vinfo) = min_profitable_iters;
1532
1533   if (min_profitable_iters < 0)
1534     {
1535       if (dump_enabled_p ())
1536         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1537                          "not vectorized: vectorization not profitable.\n");
1538       if (dump_enabled_p ())
1539         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1540                          "not vectorized: vector version will never be "
1541                          "profitable.\n");
1542       return false;
1543     }
1544
1545   min_scalar_loop_bound = ((PARAM_VALUE (PARAM_MIN_VECT_LOOP_BOUND)
1546                             * vectorization_factor) - 1);
1547
1548
1549   /* Use the cost model only if it is more conservative than user specified
1550      threshold.  */
1551
1552   th = (unsigned) min_scalar_loop_bound;
1553   if (min_profitable_iters
1554       && (!min_scalar_loop_bound
1555           || min_profitable_iters > min_scalar_loop_bound))
1556     th = (unsigned) min_profitable_iters;
1557
1558   if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
1559       && LOOP_VINFO_INT_NITERS (loop_vinfo) <= th)
1560     {
1561       if (dump_enabled_p ())
1562         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1563                          "not vectorized: vectorization not profitable.\n");
1564       if (dump_enabled_p ())
1565         dump_printf_loc (MSG_NOTE, vect_location,
1566                          "not vectorized: iteration count smaller than user "
1567                          "specified loop bound parameter or minimum profitable "
1568                          "iterations (whichever is more conservative).\n");
1569       return false;
1570     }
1571
1572   if ((estimated_niter = estimated_stmt_executions_int (loop)) != -1
1573       && ((unsigned HOST_WIDE_INT) estimated_niter
1574           <= MAX (th, (unsigned)min_profitable_estimate)))
1575     {
1576       if (dump_enabled_p ())
1577         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1578                          "not vectorized: estimated iteration count too "
1579                          "small.\n");
1580       if (dump_enabled_p ())
1581         dump_printf_loc (MSG_NOTE, vect_location,
1582                          "not vectorized: estimated iteration count smaller "
1583                          "than specified loop bound parameter or minimum "
1584                          "profitable iterations (whichever is more "
1585                          "conservative).\n");
1586       return false;
1587     }
1588
1589   if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo)
1590       || ((int) tree_ctz (LOOP_VINFO_NITERS (loop_vinfo))
1591           < exact_log2 (vectorization_factor)))
1592     {
1593       if (dump_enabled_p ())
1594         dump_printf_loc (MSG_NOTE, vect_location, "epilog loop required.\n");
1595       if (!vect_can_advance_ivs_p (loop_vinfo))
1596         {
1597           if (dump_enabled_p ())
1598             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1599                              "not vectorized: can't create epilog loop 1.\n");
1600           return false;
1601         }
1602       if (!slpeel_can_duplicate_loop_p (loop, single_exit (loop)))
1603         {
1604           if (dump_enabled_p ())
1605             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1606                              "not vectorized: can't create epilog loop 2.\n");
1607           return false;
1608         }
1609     }
1610
1611   return true;
1612 }
1613
1614
1615 /* Function vect_analyze_loop_2.
1616
1617    Apply a set of analyses on LOOP, and create a loop_vec_info struct
1618    for it.  The different analyses will record information in the
1619    loop_vec_info struct.  */
1620 static bool
1621 vect_analyze_loop_2 (loop_vec_info loop_vinfo)
1622 {
1623   bool ok, slp = false;
1624   int max_vf = MAX_VECTORIZATION_FACTOR;
1625   int min_vf = 2;
1626
1627   /* Find all data references in the loop (which correspond to vdefs/vuses)
1628      and analyze their evolution in the loop.  Also adjust the minimal
1629      vectorization factor according to the loads and stores.
1630
1631      FORNOW: Handle only simple, array references, which
1632      alignment can be forced, and aligned pointer-references.  */
1633
1634   ok = vect_analyze_data_refs (loop_vinfo, NULL, &min_vf);
1635   if (!ok)
1636     {
1637       if (dump_enabled_p ())
1638         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1639                          "bad data references.\n");
1640       return false;
1641     }
1642
1643   /* Analyze the access patterns of the data-refs in the loop (consecutive,
1644      complex, etc.). FORNOW: Only handle consecutive access pattern.  */
1645
1646   ok = vect_analyze_data_ref_accesses (loop_vinfo, NULL);
1647   if (!ok)
1648     {
1649       if (dump_enabled_p ())
1650         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1651                          "bad data access.\n");
1652       return false;
1653     }
1654
1655   /* Classify all cross-iteration scalar data-flow cycles.
1656      Cross-iteration cycles caused by virtual phis are analyzed separately.  */
1657
1658   vect_analyze_scalar_cycles (loop_vinfo);
1659
1660   vect_pattern_recog (loop_vinfo, NULL);
1661
1662   /* Data-flow analysis to detect stmts that do not need to be vectorized.  */
1663
1664   ok = vect_mark_stmts_to_be_vectorized (loop_vinfo);
1665   if (!ok)
1666     {
1667       if (dump_enabled_p ())
1668         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1669                          "unexpected pattern.\n");
1670       return false;
1671     }
1672
1673   /* Analyze data dependences between the data-refs in the loop
1674      and adjust the maximum vectorization factor according to
1675      the dependences.
1676      FORNOW: fail at the first data dependence that we encounter.  */
1677
1678   ok = vect_analyze_data_ref_dependences (loop_vinfo, &max_vf);
1679   if (!ok
1680       || max_vf < min_vf)
1681     {
1682       if (dump_enabled_p ())
1683             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1684                              "bad data dependence.\n");
1685       return false;
1686     }
1687
1688   ok = vect_determine_vectorization_factor (loop_vinfo);
1689   if (!ok)
1690     {
1691       if (dump_enabled_p ())
1692         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1693                          "can't determine vectorization factor.\n");
1694       return false;
1695     }
1696   if (max_vf < LOOP_VINFO_VECT_FACTOR (loop_vinfo))
1697     {
1698       if (dump_enabled_p ())
1699         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1700                          "bad data dependence.\n");
1701       return false;
1702     }
1703
1704   /* Analyze the alignment of the data-refs in the loop.
1705      Fail if a data reference is found that cannot be vectorized.  */
1706
1707   ok = vect_analyze_data_refs_alignment (loop_vinfo, NULL);
1708   if (!ok)
1709     {
1710       if (dump_enabled_p ())
1711         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1712                          "bad data alignment.\n");
1713       return false;
1714     }
1715
1716   /* Prune the list of ddrs to be tested at run-time by versioning for alias.
1717      It is important to call pruning after vect_analyze_data_ref_accesses,
1718      since we use grouping information gathered by interleaving analysis.  */
1719   ok = vect_prune_runtime_alias_test_list (loop_vinfo);
1720   if (!ok)
1721     {
1722       if (dump_enabled_p ())
1723         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1724                          "too long list of versioning for alias "
1725                          "run-time tests.\n");
1726       return false;
1727     }
1728
1729   /* This pass will decide on using loop versioning and/or loop peeling in
1730      order to enhance the alignment of data references in the loop.  */
1731
1732   ok = vect_enhance_data_refs_alignment (loop_vinfo);
1733   if (!ok)
1734     {
1735       if (dump_enabled_p ())
1736         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1737                          "bad data alignment.\n");
1738       return false;
1739     }
1740
1741   /* Check the SLP opportunities in the loop, analyze and build SLP trees.  */
1742   ok = vect_analyze_slp (loop_vinfo, NULL);
1743   if (ok)
1744     {
1745       /* Decide which possible SLP instances to SLP.  */
1746       slp = vect_make_slp_decision (loop_vinfo);
1747
1748       /* Find stmts that need to be both vectorized and SLPed.  */
1749       vect_detect_hybrid_slp (loop_vinfo);
1750     }
1751   else
1752     return false;
1753
1754   /* Scan all the operations in the loop and make sure they are
1755      vectorizable.  */
1756
1757   ok = vect_analyze_loop_operations (loop_vinfo, slp);
1758   if (!ok)
1759     {
1760       if (dump_enabled_p ())
1761         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1762                          "bad operation or unsupported loop bound.\n");
1763       return false;
1764     }
1765
1766   return true;
1767 }
1768
1769 /* Function vect_analyze_loop.
1770
1771    Apply a set of analyses on LOOP, and create a loop_vec_info struct
1772    for it.  The different analyses will record information in the
1773    loop_vec_info struct.  */
1774 loop_vec_info
1775 vect_analyze_loop (struct loop *loop)
1776 {
1777   loop_vec_info loop_vinfo;
1778   unsigned int vector_sizes;
1779
1780   /* Autodetect first vector size we try.  */
1781   current_vector_size = 0;
1782   vector_sizes = targetm.vectorize.autovectorize_vector_sizes ();
1783
1784   if (dump_enabled_p ())
1785     dump_printf_loc (MSG_NOTE, vect_location,
1786                      "===== analyze_loop_nest =====\n");
1787
1788   if (loop_outer (loop)
1789       && loop_vec_info_for_loop (loop_outer (loop))
1790       && LOOP_VINFO_VECTORIZABLE_P (loop_vec_info_for_loop (loop_outer (loop))))
1791     {
1792       if (dump_enabled_p ())
1793         dump_printf_loc (MSG_NOTE, vect_location,
1794                          "outer-loop already vectorized.\n");
1795       return NULL;
1796     }
1797
1798   while (1)
1799     {
1800       /* Check the CFG characteristics of the loop (nesting, entry/exit).  */
1801       loop_vinfo = vect_analyze_loop_form (loop);
1802       if (!loop_vinfo)
1803         {
1804           if (dump_enabled_p ())
1805             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1806                              "bad loop form.\n");
1807           return NULL;
1808         }
1809
1810       if (vect_analyze_loop_2 (loop_vinfo))
1811         {
1812           LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1;
1813
1814           return loop_vinfo;
1815         }
1816
1817       destroy_loop_vec_info (loop_vinfo, true);
1818
1819       vector_sizes &= ~current_vector_size;
1820       if (vector_sizes == 0
1821           || current_vector_size == 0)
1822         return NULL;
1823
1824       /* Try the next biggest vector size.  */
1825       current_vector_size = 1 << floor_log2 (vector_sizes);
1826       if (dump_enabled_p ())
1827         dump_printf_loc (MSG_NOTE, vect_location,
1828                          "***** Re-trying analysis with "
1829                          "vector size %d\n", current_vector_size);
1830     }
1831 }
1832
1833
1834 /* Function reduction_code_for_scalar_code
1835
1836    Input:
1837    CODE - tree_code of a reduction operations.
1838
1839    Output:
1840    REDUC_CODE - the corresponding tree-code to be used to reduce the
1841       vector of partial results into a single scalar result (which
1842       will also reside in a vector) or ERROR_MARK if the operation is
1843       a supported reduction operation, but does not have such tree-code.
1844
1845    Return FALSE if CODE currently cannot be vectorized as reduction.  */
1846
1847 static bool
1848 reduction_code_for_scalar_code (enum tree_code code,
1849                                 enum tree_code *reduc_code)
1850 {
1851   switch (code)
1852     {
1853       case MAX_EXPR:
1854         *reduc_code = REDUC_MAX_EXPR;
1855         return true;
1856
1857       case MIN_EXPR:
1858         *reduc_code = REDUC_MIN_EXPR;
1859         return true;
1860
1861       case PLUS_EXPR:
1862         *reduc_code = REDUC_PLUS_EXPR;
1863         return true;
1864
1865       case MULT_EXPR:
1866       case MINUS_EXPR:
1867       case BIT_IOR_EXPR:
1868       case BIT_XOR_EXPR:
1869       case BIT_AND_EXPR:
1870         *reduc_code = ERROR_MARK;
1871         return true;
1872
1873       default:
1874        return false;
1875     }
1876 }
1877
1878
1879 /* Error reporting helper for vect_is_simple_reduction below.  GIMPLE statement
1880    STMT is printed with a message MSG. */
1881
1882 static void
1883 report_vect_op (int msg_type, gimple stmt, const char *msg)
1884 {
1885   dump_printf_loc (msg_type, vect_location, "%s", msg);
1886   dump_gimple_stmt (msg_type, TDF_SLIM, stmt, 0);
1887   dump_printf (msg_type, "\n");
1888 }
1889
1890
1891 /* Detect SLP reduction of the form:
1892
1893    #a1 = phi <a5, a0>
1894    a2 = operation (a1)
1895    a3 = operation (a2)
1896    a4 = operation (a3)
1897    a5 = operation (a4)
1898
1899    #a = phi <a5>
1900
1901    PHI is the reduction phi node (#a1 = phi <a5, a0> above)
1902    FIRST_STMT is the first reduction stmt in the chain
1903    (a2 = operation (a1)).
1904
1905    Return TRUE if a reduction chain was detected.  */
1906
1907 static bool
1908 vect_is_slp_reduction (loop_vec_info loop_info, gimple phi, gimple first_stmt)
1909 {
1910   struct loop *loop = (gimple_bb (phi))->loop_father;
1911   struct loop *vect_loop = LOOP_VINFO_LOOP (loop_info);
1912   enum tree_code code;
1913   gimple current_stmt = NULL, loop_use_stmt = NULL, first, next_stmt;
1914   stmt_vec_info use_stmt_info, current_stmt_info;
1915   tree lhs;
1916   imm_use_iterator imm_iter;
1917   use_operand_p use_p;
1918   int nloop_uses, size = 0, n_out_of_loop_uses;
1919   bool found = false;
1920
1921   if (loop != vect_loop)
1922     return false;
1923
1924   lhs = PHI_RESULT (phi);
1925   code = gimple_assign_rhs_code (first_stmt);
1926   while (1)
1927     {
1928       nloop_uses = 0;
1929       n_out_of_loop_uses = 0;
1930       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
1931         {
1932           gimple use_stmt = USE_STMT (use_p);
1933           if (is_gimple_debug (use_stmt))
1934             continue;
1935
1936           use_stmt = USE_STMT (use_p);
1937
1938           /* Check if we got back to the reduction phi.  */
1939           if (use_stmt == phi)
1940             {
1941               loop_use_stmt = use_stmt;
1942               found = true;
1943               break;
1944             }
1945
1946           if (flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
1947             {
1948               if (vinfo_for_stmt (use_stmt)
1949                   && !STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
1950                 {
1951                   loop_use_stmt = use_stmt;
1952                   nloop_uses++;
1953                 }
1954             }
1955            else
1956              n_out_of_loop_uses++;
1957
1958            /* There are can be either a single use in the loop or two uses in
1959               phi nodes.  */
1960            if (nloop_uses > 1 || (n_out_of_loop_uses && nloop_uses))
1961              return false;
1962         }
1963
1964       if (found)
1965         break;
1966
1967       /* We reached a statement with no loop uses.  */
1968       if (nloop_uses == 0)
1969         return false;
1970
1971       /* This is a loop exit phi, and we haven't reached the reduction phi.  */
1972       if (gimple_code (loop_use_stmt) == GIMPLE_PHI)
1973         return false;
1974
1975       if (!is_gimple_assign (loop_use_stmt)
1976           || code != gimple_assign_rhs_code (loop_use_stmt)
1977           || !flow_bb_inside_loop_p (loop, gimple_bb (loop_use_stmt)))
1978         return false;
1979
1980       /* Insert USE_STMT into reduction chain.  */
1981       use_stmt_info = vinfo_for_stmt (loop_use_stmt);
1982       if (current_stmt)
1983         {
1984           current_stmt_info = vinfo_for_stmt (current_stmt);
1985           GROUP_NEXT_ELEMENT (current_stmt_info) = loop_use_stmt;
1986           GROUP_FIRST_ELEMENT (use_stmt_info)
1987             = GROUP_FIRST_ELEMENT (current_stmt_info);
1988         }
1989       else
1990         GROUP_FIRST_ELEMENT (use_stmt_info) = loop_use_stmt;
1991
1992       lhs = gimple_assign_lhs (loop_use_stmt);
1993       current_stmt = loop_use_stmt;
1994       size++;
1995    }
1996
1997   if (!found || loop_use_stmt != phi || size < 2)
1998     return false;
1999
2000   /* Swap the operands, if needed, to make the reduction operand be the second
2001      operand.  */
2002   lhs = PHI_RESULT (phi);
2003   next_stmt = GROUP_FIRST_ELEMENT (vinfo_for_stmt (current_stmt));
2004   while (next_stmt)
2005     {
2006       if (gimple_assign_rhs2 (next_stmt) == lhs)
2007         {
2008           tree op = gimple_assign_rhs1 (next_stmt);
2009           gimple def_stmt = NULL;
2010
2011           if (TREE_CODE (op) == SSA_NAME)
2012             def_stmt = SSA_NAME_DEF_STMT (op);
2013
2014           /* Check that the other def is either defined in the loop
2015              ("vect_internal_def"), or it's an induction (defined by a
2016              loop-header phi-node).  */
2017           if (def_stmt
2018               && gimple_bb (def_stmt)
2019               && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
2020               && (is_gimple_assign (def_stmt)
2021                   || is_gimple_call (def_stmt)
2022                   || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt))
2023                            == vect_induction_def
2024                   || (gimple_code (def_stmt) == GIMPLE_PHI
2025                       && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt))
2026                                   == vect_internal_def
2027                       && !is_loop_header_bb_p (gimple_bb (def_stmt)))))
2028             {
2029               lhs = gimple_assign_lhs (next_stmt);
2030               next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
2031               continue;
2032             }
2033
2034           return false;
2035         }
2036       else
2037         {
2038           tree op = gimple_assign_rhs2 (next_stmt);
2039           gimple def_stmt = NULL;
2040
2041           if (TREE_CODE (op) == SSA_NAME)
2042             def_stmt = SSA_NAME_DEF_STMT (op);
2043
2044           /* Check that the other def is either defined in the loop
2045             ("vect_internal_def"), or it's an induction (defined by a
2046             loop-header phi-node).  */
2047           if (def_stmt
2048               && gimple_bb (def_stmt)
2049               && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
2050               && (is_gimple_assign (def_stmt)
2051                   || is_gimple_call (def_stmt)
2052                   || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt))
2053                               == vect_induction_def
2054                   || (gimple_code (def_stmt) == GIMPLE_PHI
2055                       && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt))
2056                                   == vect_internal_def
2057                       && !is_loop_header_bb_p (gimple_bb (def_stmt)))))
2058             {
2059               if (dump_enabled_p ())
2060                 {
2061                   dump_printf_loc (MSG_NOTE, vect_location, "swapping oprnds: ");
2062                   dump_gimple_stmt (MSG_NOTE, TDF_SLIM, next_stmt, 0);
2063                   dump_printf (MSG_NOTE, "\n");
2064                 }
2065
2066               swap_ssa_operands (next_stmt,
2067                                  gimple_assign_rhs1_ptr (next_stmt),
2068                                  gimple_assign_rhs2_ptr (next_stmt));
2069               update_stmt (next_stmt);
2070
2071               if (CONSTANT_CLASS_P (gimple_assign_rhs1 (next_stmt)))
2072                 LOOP_VINFO_OPERANDS_SWAPPED (loop_info) = true;
2073             }
2074           else
2075             return false;
2076         }
2077
2078       lhs = gimple_assign_lhs (next_stmt);
2079       next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
2080     }
2081
2082   /* Save the chain for further analysis in SLP detection.  */
2083   first = GROUP_FIRST_ELEMENT (vinfo_for_stmt (current_stmt));
2084   LOOP_VINFO_REDUCTION_CHAINS (loop_info).safe_push (first);
2085   GROUP_SIZE (vinfo_for_stmt (first)) = size;
2086
2087   return true;
2088 }
2089
2090
2091 /* Function vect_is_simple_reduction_1
2092
2093    (1) Detect a cross-iteration def-use cycle that represents a simple
2094    reduction computation.  We look for the following pattern:
2095
2096    loop_header:
2097      a1 = phi < a0, a2 >
2098      a3 = ...
2099      a2 = operation (a3, a1)
2100
2101    or
2102
2103    a3 = ...
2104    loop_header:
2105      a1 = phi < a0, a2 >
2106      a2 = operation (a3, a1)
2107
2108    such that:
2109    1. operation is commutative and associative and it is safe to
2110       change the order of the computation (if CHECK_REDUCTION is true)
2111    2. no uses for a2 in the loop (a2 is used out of the loop)
2112    3. no uses of a1 in the loop besides the reduction operation
2113    4. no uses of a1 outside the loop.
2114
2115    Conditions 1,4 are tested here.
2116    Conditions 2,3 are tested in vect_mark_stmts_to_be_vectorized.
2117
2118    (2) Detect a cross-iteration def-use cycle in nested loops, i.e.,
2119    nested cycles, if CHECK_REDUCTION is false.
2120
2121    (3) Detect cycles of phi nodes in outer-loop vectorization, i.e., double
2122    reductions:
2123
2124      a1 = phi < a0, a2 >
2125      inner loop (def of a3)
2126      a2 = phi < a3 >
2127
2128    If MODIFY is true it tries also to rework the code in-place to enable
2129    detection of more reduction patterns.  For the time being we rewrite
2130    "res -= RHS" into "rhs += -RHS" when it seems worthwhile.
2131 */
2132
2133 static gimple
2134 vect_is_simple_reduction_1 (loop_vec_info loop_info, gimple phi,
2135                             bool check_reduction, bool *double_reduc,
2136                             bool modify)
2137 {
2138   struct loop *loop = (gimple_bb (phi))->loop_father;
2139   struct loop *vect_loop = LOOP_VINFO_LOOP (loop_info);
2140   edge latch_e = loop_latch_edge (loop);
2141   tree loop_arg = PHI_ARG_DEF_FROM_EDGE (phi, latch_e);
2142   gimple def_stmt, def1 = NULL, def2 = NULL;
2143   enum tree_code orig_code, code;
2144   tree op1, op2, op3 = NULL_TREE, op4 = NULL_TREE;
2145   tree type;
2146   int nloop_uses;
2147   tree name;
2148   imm_use_iterator imm_iter;
2149   use_operand_p use_p;
2150   bool phi_def;
2151
2152   *double_reduc = false;
2153
2154   /* If CHECK_REDUCTION is true, we assume inner-most loop vectorization,
2155      otherwise, we assume outer loop vectorization.  */
2156   gcc_assert ((check_reduction && loop == vect_loop)
2157               || (!check_reduction && flow_loop_nested_p (vect_loop, loop)));
2158
2159   name = PHI_RESULT (phi);
2160   nloop_uses = 0;
2161   FOR_EACH_IMM_USE_FAST (use_p, imm_iter, name)
2162     {
2163       gimple use_stmt = USE_STMT (use_p);
2164       if (is_gimple_debug (use_stmt))
2165         continue;
2166
2167       if (!flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
2168         {
2169           if (dump_enabled_p ())
2170             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2171                              "intermediate value used outside loop.\n");
2172
2173           return NULL;
2174         }
2175
2176       if (vinfo_for_stmt (use_stmt)
2177           && !is_pattern_stmt_p (vinfo_for_stmt (use_stmt)))
2178         nloop_uses++;
2179       if (nloop_uses > 1)
2180         {
2181           if (dump_enabled_p ())
2182             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2183                              "reduction used in loop.\n");
2184           return NULL;
2185         }
2186     }
2187
2188   if (TREE_CODE (loop_arg) != SSA_NAME)
2189     {
2190       if (dump_enabled_p ())
2191         {
2192           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2193                            "reduction: not ssa_name: ");
2194           dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, loop_arg);
2195           dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
2196         }
2197       return NULL;
2198     }
2199
2200   def_stmt = SSA_NAME_DEF_STMT (loop_arg);
2201   if (!def_stmt)
2202     {
2203       if (dump_enabled_p ())
2204         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2205                          "reduction: no def_stmt.\n");
2206       return NULL;
2207     }
2208
2209   if (!is_gimple_assign (def_stmt) && gimple_code (def_stmt) != GIMPLE_PHI)
2210     {
2211       if (dump_enabled_p ())
2212         {
2213           dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
2214           dump_printf (MSG_NOTE, "\n");
2215         }
2216       return NULL;
2217     }
2218
2219   if (is_gimple_assign (def_stmt))
2220     {
2221       name = gimple_assign_lhs (def_stmt);
2222       phi_def = false;
2223     }
2224   else
2225     {
2226       name = PHI_RESULT (def_stmt);
2227       phi_def = true;
2228     }
2229
2230   nloop_uses = 0;
2231   FOR_EACH_IMM_USE_FAST (use_p, imm_iter, name)
2232     {
2233       gimple use_stmt = USE_STMT (use_p);
2234       if (is_gimple_debug (use_stmt))
2235         continue;
2236       if (flow_bb_inside_loop_p (loop, gimple_bb (use_stmt))
2237           && vinfo_for_stmt (use_stmt)
2238           && !is_pattern_stmt_p (vinfo_for_stmt (use_stmt)))
2239         nloop_uses++;
2240       if (nloop_uses > 1)
2241         {
2242           if (dump_enabled_p ())
2243             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2244                              "reduction used in loop.\n");
2245           return NULL;
2246         }
2247     }
2248
2249   /* If DEF_STMT is a phi node itself, we expect it to have a single argument
2250      defined in the inner loop.  */
2251   if (phi_def)
2252     {
2253       op1 = PHI_ARG_DEF (def_stmt, 0);
2254
2255       if (gimple_phi_num_args (def_stmt) != 1
2256           || TREE_CODE (op1) != SSA_NAME)
2257         {
2258           if (dump_enabled_p ())
2259             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2260                              "unsupported phi node definition.\n");
2261
2262           return NULL;
2263         }
2264
2265       def1 = SSA_NAME_DEF_STMT (op1);
2266       if (flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
2267           && loop->inner
2268           && flow_bb_inside_loop_p (loop->inner, gimple_bb (def1))
2269           && is_gimple_assign (def1))
2270         {
2271           if (dump_enabled_p ())
2272             report_vect_op (MSG_NOTE, def_stmt,
2273                             "detected double reduction: ");
2274
2275           *double_reduc = true;
2276           return def_stmt;
2277         }
2278
2279       return NULL;
2280     }
2281
2282   code = orig_code = gimple_assign_rhs_code (def_stmt);
2283
2284   /* We can handle "res -= x[i]", which is non-associative by
2285      simply rewriting this into "res += -x[i]".  Avoid changing
2286      gimple instruction for the first simple tests and only do this
2287      if we're allowed to change code at all.  */
2288   if (code == MINUS_EXPR
2289       && modify
2290       && (op1 = gimple_assign_rhs1 (def_stmt))
2291       && TREE_CODE (op1) == SSA_NAME
2292       && SSA_NAME_DEF_STMT (op1) == phi)
2293     code = PLUS_EXPR;
2294
2295   if (check_reduction
2296       && (!commutative_tree_code (code) || !associative_tree_code (code)))
2297     {
2298       if (dump_enabled_p ())
2299         report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
2300                         "reduction: not commutative/associative: ");
2301       return NULL;
2302     }
2303
2304   if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
2305     {
2306       if (code != COND_EXPR)
2307         {
2308           if (dump_enabled_p ())
2309             report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
2310                             "reduction: not binary operation: ");
2311
2312           return NULL;
2313         }
2314
2315       op3 = gimple_assign_rhs1 (def_stmt);
2316       if (COMPARISON_CLASS_P (op3))
2317         {
2318           op4 = TREE_OPERAND (op3, 1);
2319           op3 = TREE_OPERAND (op3, 0);
2320         }
2321
2322       op1 = gimple_assign_rhs2 (def_stmt);
2323       op2 = gimple_assign_rhs3 (def_stmt);
2324
2325       if (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op2) != SSA_NAME)
2326         {
2327           if (dump_enabled_p ())
2328             report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
2329                             "reduction: uses not ssa_names: ");
2330
2331           return NULL;
2332         }
2333     }
2334   else
2335     {
2336       op1 = gimple_assign_rhs1 (def_stmt);
2337       op2 = gimple_assign_rhs2 (def_stmt);
2338
2339       if (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op2) != SSA_NAME)
2340         {
2341           if (dump_enabled_p ())
2342             report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
2343                             "reduction: uses not ssa_names: ");
2344
2345           return NULL;
2346         }
2347    }
2348
2349   type = TREE_TYPE (gimple_assign_lhs (def_stmt));
2350   if ((TREE_CODE (op1) == SSA_NAME
2351        && !types_compatible_p (type,TREE_TYPE (op1)))
2352       || (TREE_CODE (op2) == SSA_NAME
2353           && !types_compatible_p (type, TREE_TYPE (op2)))
2354       || (op3 && TREE_CODE (op3) == SSA_NAME
2355           && !types_compatible_p (type, TREE_TYPE (op3)))
2356       || (op4 && TREE_CODE (op4) == SSA_NAME
2357           && !types_compatible_p (type, TREE_TYPE (op4))))
2358     {
2359       if (dump_enabled_p ())
2360         {
2361           dump_printf_loc (MSG_NOTE, vect_location,
2362                            "reduction: multiple types: operation type: ");
2363           dump_generic_expr (MSG_NOTE, TDF_SLIM, type);
2364           dump_printf (MSG_NOTE, ", operands types: ");
2365           dump_generic_expr (MSG_NOTE, TDF_SLIM,
2366                              TREE_TYPE (op1));
2367           dump_printf (MSG_NOTE, ",");
2368           dump_generic_expr (MSG_NOTE, TDF_SLIM,
2369                              TREE_TYPE (op2));
2370           if (op3)
2371             {
2372               dump_printf (MSG_NOTE, ",");
2373               dump_generic_expr (MSG_NOTE, TDF_SLIM,
2374                                  TREE_TYPE (op3));
2375             }
2376
2377           if (op4)
2378             {
2379               dump_printf (MSG_NOTE, ",");
2380               dump_generic_expr (MSG_NOTE, TDF_SLIM,
2381                                  TREE_TYPE (op4));
2382             }
2383           dump_printf (MSG_NOTE, "\n");
2384         }
2385
2386       return NULL;
2387     }
2388
2389   /* Check that it's ok to change the order of the computation.
2390      Generally, when vectorizing a reduction we change the order of the
2391      computation.  This may change the behavior of the program in some
2392      cases, so we need to check that this is ok.  One exception is when
2393      vectorizing an outer-loop: the inner-loop is executed sequentially,
2394      and therefore vectorizing reductions in the inner-loop during
2395      outer-loop vectorization is safe.  */
2396
2397   /* CHECKME: check for !flag_finite_math_only too?  */
2398   if (SCALAR_FLOAT_TYPE_P (type) && !flag_associative_math
2399       && check_reduction)
2400     {
2401       /* Changing the order of operations changes the semantics.  */
2402       if (dump_enabled_p ())
2403         report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
2404                         "reduction: unsafe fp math optimization: ");
2405       return NULL;
2406     }
2407   else if (INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_TRAPS (type)
2408            && check_reduction)
2409     {
2410       /* Changing the order of operations changes the semantics.  */
2411       if (dump_enabled_p ())
2412         report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
2413                         "reduction: unsafe int math optimization: ");
2414       return NULL;
2415     }
2416   else if (SAT_FIXED_POINT_TYPE_P (type) && check_reduction)
2417     {
2418       /* Changing the order of operations changes the semantics.  */
2419       if (dump_enabled_p ())
2420         report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
2421                         "reduction: unsafe fixed-point math optimization: ");
2422       return NULL;
2423     }
2424
2425   /* If we detected "res -= x[i]" earlier, rewrite it into
2426      "res += -x[i]" now.  If this turns out to be useless reassoc
2427      will clean it up again.  */
2428   if (orig_code == MINUS_EXPR)
2429     {
2430       tree rhs = gimple_assign_rhs2 (def_stmt);
2431       tree negrhs = make_ssa_name (TREE_TYPE (rhs), NULL);
2432       gimple negate_stmt = gimple_build_assign_with_ops (NEGATE_EXPR, negrhs,
2433                                                          rhs, NULL);
2434       gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
2435       set_vinfo_for_stmt (negate_stmt, new_stmt_vec_info (negate_stmt,
2436                                                           loop_info, NULL));
2437       gsi_insert_before (&gsi, negate_stmt, GSI_NEW_STMT);
2438       gimple_assign_set_rhs2 (def_stmt, negrhs);
2439       gimple_assign_set_rhs_code (def_stmt, PLUS_EXPR);
2440       update_stmt (def_stmt);
2441     }
2442
2443   /* Reduction is safe. We're dealing with one of the following:
2444      1) integer arithmetic and no trapv
2445      2) floating point arithmetic, and special flags permit this optimization
2446      3) nested cycle (i.e., outer loop vectorization).  */
2447   if (TREE_CODE (op1) == SSA_NAME)
2448     def1 = SSA_NAME_DEF_STMT (op1);
2449
2450   if (TREE_CODE (op2) == SSA_NAME)
2451     def2 = SSA_NAME_DEF_STMT (op2);
2452
2453   if (code != COND_EXPR
2454       && ((!def1 || gimple_nop_p (def1)) && (!def2 || gimple_nop_p (def2))))
2455     {
2456       if (dump_enabled_p ())
2457         report_vect_op (MSG_NOTE, def_stmt, "reduction: no defs for operands: ");
2458       return NULL;
2459     }
2460
2461   /* Check that one def is the reduction def, defined by PHI,
2462      the other def is either defined in the loop ("vect_internal_def"),
2463      or it's an induction (defined by a loop-header phi-node).  */
2464
2465   if (def2 && def2 == phi
2466       && (code == COND_EXPR
2467           || !def1 || gimple_nop_p (def1)
2468           || !flow_bb_inside_loop_p (loop, gimple_bb (def1))
2469           || (def1 && flow_bb_inside_loop_p (loop, gimple_bb (def1))
2470               && (is_gimple_assign (def1)
2471                   || is_gimple_call (def1)
2472                   || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def1))
2473                       == vect_induction_def
2474                   || (gimple_code (def1) == GIMPLE_PHI
2475                       && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def1))
2476                           == vect_internal_def
2477                       && !is_loop_header_bb_p (gimple_bb (def1)))))))
2478     {
2479       if (dump_enabled_p ())
2480         report_vect_op (MSG_NOTE, def_stmt, "detected reduction: ");
2481       return def_stmt;
2482     }
2483
2484   if (def1 && def1 == phi
2485       && (code == COND_EXPR
2486           || !def2 || gimple_nop_p (def2)
2487           || !flow_bb_inside_loop_p (loop, gimple_bb (def2))
2488           || (def2 && flow_bb_inside_loop_p (loop, gimple_bb (def2))
2489               && (is_gimple_assign (def2)
2490                   || is_gimple_call (def2)
2491                   || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def2))
2492                       == vect_induction_def
2493                   || (gimple_code (def2) == GIMPLE_PHI
2494                       && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def2))
2495                           == vect_internal_def
2496                       && !is_loop_header_bb_p (gimple_bb (def2)))))))
2497     {
2498       if (check_reduction)
2499         {
2500           /* Swap operands (just for simplicity - so that the rest of the code
2501              can assume that the reduction variable is always the last (second)
2502              argument).  */
2503           if (dump_enabled_p ())
2504             report_vect_op (MSG_NOTE, def_stmt,
2505                             "detected reduction: need to swap operands: ");
2506
2507           swap_ssa_operands (def_stmt, gimple_assign_rhs1_ptr (def_stmt),
2508                              gimple_assign_rhs2_ptr (def_stmt));
2509
2510           if (CONSTANT_CLASS_P (gimple_assign_rhs1 (def_stmt)))
2511             LOOP_VINFO_OPERANDS_SWAPPED (loop_info) = true;
2512         }
2513       else
2514         {
2515           if (dump_enabled_p ())
2516             report_vect_op (MSG_NOTE, def_stmt, "detected reduction: ");
2517         }
2518
2519       return def_stmt;
2520     }
2521
2522   /* Try to find SLP reduction chain.  */
2523   if (check_reduction && vect_is_slp_reduction (loop_info, phi, def_stmt))
2524     {
2525       if (dump_enabled_p ())
2526         report_vect_op (MSG_NOTE, def_stmt,
2527                         "reduction: detected reduction chain: ");
2528
2529       return def_stmt;
2530     }
2531
2532   if (dump_enabled_p ())
2533     report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
2534                     "reduction: unknown pattern: ");
2535
2536   return NULL;
2537 }
2538
2539 /* Wrapper around vect_is_simple_reduction_1, that won't modify code
2540    in-place.  Arguments as there.  */
2541
2542 static gimple
2543 vect_is_simple_reduction (loop_vec_info loop_info, gimple phi,
2544                           bool check_reduction, bool *double_reduc)
2545 {
2546   return vect_is_simple_reduction_1 (loop_info, phi, check_reduction,
2547                                      double_reduc, false);
2548 }
2549
2550 /* Wrapper around vect_is_simple_reduction_1, which will modify code
2551    in-place if it enables detection of more reductions.  Arguments
2552    as there.  */
2553
2554 gimple
2555 vect_force_simple_reduction (loop_vec_info loop_info, gimple phi,
2556                           bool check_reduction, bool *double_reduc)
2557 {
2558   return vect_is_simple_reduction_1 (loop_info, phi, check_reduction,
2559                                      double_reduc, true);
2560 }
2561
2562 /* Calculate the cost of one scalar iteration of the loop.  */
2563 int
2564 vect_get_single_scalar_iteration_cost (loop_vec_info loop_vinfo)
2565 {
2566   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2567   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
2568   int nbbs = loop->num_nodes, factor, scalar_single_iter_cost = 0;
2569   int innerloop_iters, i, stmt_cost;
2570
2571   /* Count statements in scalar loop.  Using this as scalar cost for a single
2572      iteration for now.
2573
2574      TODO: Add outer loop support.
2575
2576      TODO: Consider assigning different costs to different scalar
2577      statements.  */
2578
2579   /* FORNOW.  */
2580   innerloop_iters = 1;
2581   if (loop->inner)
2582     innerloop_iters = 50; /* FIXME */
2583
2584   for (i = 0; i < nbbs; i++)
2585     {
2586       gimple_stmt_iterator si;
2587       basic_block bb = bbs[i];
2588
2589       if (bb->loop_father == loop->inner)
2590         factor = innerloop_iters;
2591       else
2592         factor = 1;
2593
2594       for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
2595         {
2596           gimple stmt = gsi_stmt (si);
2597           stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2598
2599           if (!is_gimple_assign (stmt) && !is_gimple_call (stmt))
2600             continue;
2601
2602           /* Skip stmts that are not vectorized inside the loop.  */
2603           if (stmt_info
2604               && !STMT_VINFO_RELEVANT_P (stmt_info)
2605               && (!STMT_VINFO_LIVE_P (stmt_info)
2606                   || !VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info)))
2607               && !STMT_VINFO_IN_PATTERN_P (stmt_info))
2608             continue;
2609
2610           if (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt)))
2611             {
2612               if (DR_IS_READ (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))))
2613                stmt_cost = vect_get_stmt_cost (scalar_load);
2614              else
2615                stmt_cost = vect_get_stmt_cost (scalar_store);
2616             }
2617           else
2618             stmt_cost = vect_get_stmt_cost (scalar_stmt);
2619
2620           scalar_single_iter_cost += stmt_cost * factor;
2621         }
2622     }
2623   return scalar_single_iter_cost;
2624 }
2625
2626 /* Calculate cost of peeling the loop PEEL_ITERS_PROLOGUE times.  */
2627 int
2628 vect_get_known_peeling_cost (loop_vec_info loop_vinfo, int peel_iters_prologue,
2629                              int *peel_iters_epilogue,
2630                              int scalar_single_iter_cost,
2631                              stmt_vector_for_cost *prologue_cost_vec,
2632                              stmt_vector_for_cost *epilogue_cost_vec)
2633 {
2634   int retval = 0;
2635   int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2636
2637   if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
2638     {
2639       *peel_iters_epilogue = vf/2;
2640       if (dump_enabled_p ())
2641         dump_printf_loc (MSG_NOTE, vect_location,
2642                          "cost model: epilogue peel iters set to vf/2 "
2643                          "because loop iterations are unknown .\n");
2644
2645       /* If peeled iterations are known but number of scalar loop
2646          iterations are unknown, count a taken branch per peeled loop.  */
2647       retval = record_stmt_cost (prologue_cost_vec, 2, cond_branch_taken,
2648                                  NULL, 0, vect_prologue);
2649     }
2650   else
2651     {
2652       int niters = LOOP_VINFO_INT_NITERS (loop_vinfo);
2653       peel_iters_prologue = niters < peel_iters_prologue ?
2654                             niters : peel_iters_prologue;
2655       *peel_iters_epilogue = (niters - peel_iters_prologue) % vf;
2656       /* If we need to peel for gaps, but no peeling is required, we have to
2657          peel VF iterations.  */
2658       if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) && !*peel_iters_epilogue)
2659         *peel_iters_epilogue = vf;
2660     }
2661
2662   if (peel_iters_prologue)
2663     retval += record_stmt_cost (prologue_cost_vec,
2664                                 peel_iters_prologue * scalar_single_iter_cost,
2665                                 scalar_stmt, NULL, 0, vect_prologue);
2666   if (*peel_iters_epilogue)
2667     retval += record_stmt_cost (epilogue_cost_vec,
2668                                 *peel_iters_epilogue * scalar_single_iter_cost,
2669                                 scalar_stmt, NULL, 0, vect_epilogue);
2670   return retval;
2671 }
2672
2673 /* Function vect_estimate_min_profitable_iters
2674
2675    Return the number of iterations required for the vector version of the
2676    loop to be profitable relative to the cost of the scalar version of the
2677    loop.  */
2678
2679 static void
2680 vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo,
2681                                     int *ret_min_profitable_niters,
2682                                     int *ret_min_profitable_estimate)
2683 {
2684   int min_profitable_iters;
2685   int min_profitable_estimate;
2686   int peel_iters_prologue;
2687   int peel_iters_epilogue;
2688   unsigned vec_inside_cost = 0;
2689   int vec_outside_cost = 0;
2690   unsigned vec_prologue_cost = 0;
2691   unsigned vec_epilogue_cost = 0;
2692   int scalar_single_iter_cost = 0;
2693   int scalar_outside_cost = 0;
2694   int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2695   int npeel = LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo);
2696   void *target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
2697
2698   /* Cost model disabled.  */
2699   if (unlimited_cost_model ())
2700     {
2701       dump_printf_loc (MSG_NOTE, vect_location, "cost model disabled.\n");
2702       *ret_min_profitable_niters = 0;
2703       *ret_min_profitable_estimate = 0;
2704       return;
2705     }
2706
2707   /* Requires loop versioning tests to handle misalignment.  */
2708   if (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo))
2709     {
2710       /*  FIXME: Make cost depend on complexity of individual check.  */
2711       unsigned len = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo).length ();
2712       (void) add_stmt_cost (target_cost_data, len, vector_stmt, NULL, 0,
2713                             vect_prologue);
2714       dump_printf (MSG_NOTE,
2715                    "cost model: Adding cost of checks for loop "
2716                    "versioning to treat misalignment.\n");
2717     }
2718
2719   /* Requires loop versioning with alias checks.  */
2720   if (LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo))
2721     {
2722       /*  FIXME: Make cost depend on complexity of individual check.  */
2723       unsigned len = LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo).length ();
2724       (void) add_stmt_cost (target_cost_data, len, vector_stmt, NULL, 0,
2725                             vect_prologue);
2726       dump_printf (MSG_NOTE,
2727                    "cost model: Adding cost of checks for loop "
2728                    "versioning aliasing.\n");
2729     }
2730
2731   if (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo)
2732       || LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo))
2733     (void) add_stmt_cost (target_cost_data, 1, cond_branch_taken, NULL, 0,
2734                           vect_prologue);
2735
2736   /* Count statements in scalar loop.  Using this as scalar cost for a single
2737      iteration for now.
2738
2739      TODO: Add outer loop support.
2740
2741      TODO: Consider assigning different costs to different scalar
2742      statements.  */
2743
2744   scalar_single_iter_cost = vect_get_single_scalar_iteration_cost (loop_vinfo);
2745
2746   /* Add additional cost for the peeled instructions in prologue and epilogue
2747      loop.
2748
2749      FORNOW: If we don't know the value of peel_iters for prologue or epilogue
2750      at compile-time - we assume it's vf/2 (the worst would be vf-1).
2751
2752      TODO: Build an expression that represents peel_iters for prologue and
2753      epilogue to be used in a run-time test.  */
2754
2755   if (npeel  < 0)
2756     {
2757       peel_iters_prologue = vf/2;
2758       dump_printf (MSG_NOTE, "cost model: "
2759                    "prologue peel iters set to vf/2.\n");
2760
2761       /* If peeling for alignment is unknown, loop bound of main loop becomes
2762          unknown.  */
2763       peel_iters_epilogue = vf/2;
2764       dump_printf (MSG_NOTE, "cost model: "
2765                    "epilogue peel iters set to vf/2 because "
2766                    "peeling for alignment is unknown.\n");
2767
2768       /* If peeled iterations are unknown, count a taken branch and a not taken
2769          branch per peeled loop. Even if scalar loop iterations are known,
2770          vector iterations are not known since peeled prologue iterations are
2771          not known. Hence guards remain the same.  */
2772       (void) add_stmt_cost (target_cost_data, 2, cond_branch_taken,
2773                             NULL, 0, vect_prologue);
2774       (void) add_stmt_cost (target_cost_data, 2, cond_branch_not_taken,
2775                             NULL, 0, vect_prologue);
2776       /* FORNOW: Don't attempt to pass individual scalar instructions to
2777          the model; just assume linear cost for scalar iterations.  */
2778       (void) add_stmt_cost (target_cost_data,
2779                             peel_iters_prologue * scalar_single_iter_cost,
2780                             scalar_stmt, NULL, 0, vect_prologue);
2781       (void) add_stmt_cost (target_cost_data,
2782                             peel_iters_epilogue * scalar_single_iter_cost,
2783                             scalar_stmt, NULL, 0, vect_epilogue);
2784     }
2785   else
2786     {
2787       stmt_vector_for_cost prologue_cost_vec, epilogue_cost_vec;
2788       stmt_info_for_cost *si;
2789       int j;
2790       void *data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
2791
2792       prologue_cost_vec.create (2);
2793       epilogue_cost_vec.create (2);
2794       peel_iters_prologue = npeel;
2795
2796       (void) vect_get_known_peeling_cost (loop_vinfo, peel_iters_prologue,
2797                                           &peel_iters_epilogue,
2798                                           scalar_single_iter_cost,
2799                                           &prologue_cost_vec,
2800                                           &epilogue_cost_vec);
2801
2802       FOR_EACH_VEC_ELT (prologue_cost_vec, j, si)
2803         {
2804           struct _stmt_vec_info *stmt_info
2805             = si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
2806           (void) add_stmt_cost (data, si->count, si->kind, stmt_info,
2807                                 si->misalign, vect_prologue);
2808         }
2809
2810       FOR_EACH_VEC_ELT (epilogue_cost_vec, j, si)
2811         {
2812           struct _stmt_vec_info *stmt_info
2813             = si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
2814           (void) add_stmt_cost (data, si->count, si->kind, stmt_info,
2815                                 si->misalign, vect_epilogue);
2816         }
2817
2818       prologue_cost_vec.release ();
2819       epilogue_cost_vec.release ();
2820     }
2821
2822   /* FORNOW: The scalar outside cost is incremented in one of the
2823      following ways:
2824
2825      1. The vectorizer checks for alignment and aliasing and generates
2826      a condition that allows dynamic vectorization.  A cost model
2827      check is ANDED with the versioning condition.  Hence scalar code
2828      path now has the added cost of the versioning check.
2829
2830        if (cost > th & versioning_check)
2831          jmp to vector code
2832
2833      Hence run-time scalar is incremented by not-taken branch cost.
2834
2835      2. The vectorizer then checks if a prologue is required.  If the
2836      cost model check was not done before during versioning, it has to
2837      be done before the prologue check.
2838
2839        if (cost <= th)
2840          prologue = scalar_iters
2841        if (prologue == 0)
2842          jmp to vector code
2843        else
2844          execute prologue
2845        if (prologue == num_iters)
2846          go to exit
2847
2848      Hence the run-time scalar cost is incremented by a taken branch,
2849      plus a not-taken branch, plus a taken branch cost.
2850
2851      3. The vectorizer then checks if an epilogue is required.  If the
2852      cost model check was not done before during prologue check, it
2853      has to be done with the epilogue check.
2854
2855        if (prologue == 0)
2856          jmp to vector code
2857        else
2858          execute prologue
2859        if (prologue == num_iters)
2860          go to exit
2861        vector code:
2862          if ((cost <= th) | (scalar_iters-prologue-epilogue == 0))
2863            jmp to epilogue
2864
2865      Hence the run-time scalar cost should be incremented by 2 taken
2866      branches.
2867
2868      TODO: The back end may reorder the BBS's differently and reverse
2869      conditions/branch directions.  Change the estimates below to
2870      something more reasonable.  */
2871
2872   /* If the number of iterations is known and we do not do versioning, we can
2873      decide whether to vectorize at compile time.  Hence the scalar version
2874      do not carry cost model guard costs.  */
2875   if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
2876       || LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo)
2877       || LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo))
2878     {
2879       /* Cost model check occurs at versioning.  */
2880       if (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo)
2881           || LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo))
2882         scalar_outside_cost += vect_get_stmt_cost (cond_branch_not_taken);
2883       else
2884         {
2885           /* Cost model check occurs at prologue generation.  */
2886           if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) < 0)
2887             scalar_outside_cost += 2 * vect_get_stmt_cost (cond_branch_taken)
2888               + vect_get_stmt_cost (cond_branch_not_taken);
2889           /* Cost model check occurs at epilogue generation.  */
2890           else
2891             scalar_outside_cost += 2 * vect_get_stmt_cost (cond_branch_taken);
2892         }
2893     }
2894
2895   /* Complete the target-specific cost calculations.  */
2896   finish_cost (LOOP_VINFO_TARGET_COST_DATA (loop_vinfo), &vec_prologue_cost,
2897                &vec_inside_cost, &vec_epilogue_cost);
2898
2899   vec_outside_cost = (int)(vec_prologue_cost + vec_epilogue_cost);
2900
2901   /* Calculate number of iterations required to make the vector version
2902      profitable, relative to the loop bodies only.  The following condition
2903      must hold true:
2904      SIC * niters + SOC > VIC * ((niters-PL_ITERS-EP_ITERS)/VF) + VOC
2905      where
2906      SIC = scalar iteration cost, VIC = vector iteration cost,
2907      VOC = vector outside cost, VF = vectorization factor,
2908      PL_ITERS = prologue iterations, EP_ITERS= epilogue iterations
2909      SOC = scalar outside cost for run time cost model check.  */
2910
2911   if ((scalar_single_iter_cost * vf) > (int) vec_inside_cost)
2912     {
2913       if (vec_outside_cost <= 0)
2914         min_profitable_iters = 1;
2915       else
2916         {
2917           min_profitable_iters = ((vec_outside_cost - scalar_outside_cost) * vf
2918                                   - vec_inside_cost * peel_iters_prologue
2919                                   - vec_inside_cost * peel_iters_epilogue)
2920                                  / ((scalar_single_iter_cost * vf)
2921                                     - vec_inside_cost);
2922
2923           if ((scalar_single_iter_cost * vf * min_profitable_iters)
2924               <= (((int) vec_inside_cost * min_profitable_iters)
2925                   + (((int) vec_outside_cost - scalar_outside_cost) * vf)))
2926             min_profitable_iters++;
2927         }
2928     }
2929   /* vector version will never be profitable.  */
2930   else
2931     {
2932       if (dump_enabled_p ())
2933         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2934                          "cost model: the vector iteration cost = %d "
2935                          "divided by the scalar iteration cost = %d "
2936                          "is greater or equal to the vectorization factor = %d"
2937                          ".\n",
2938                          vec_inside_cost, scalar_single_iter_cost, vf);
2939       *ret_min_profitable_niters = -1;
2940       *ret_min_profitable_estimate = -1;
2941       return;
2942     }
2943
2944   if (dump_enabled_p ())
2945     {
2946       dump_printf_loc (MSG_NOTE, vect_location, "Cost model analysis: \n");
2947       dump_printf (MSG_NOTE, "  Vector inside of loop cost: %d\n",
2948                    vec_inside_cost);
2949       dump_printf (MSG_NOTE, "  Vector prologue cost: %d\n",
2950                    vec_prologue_cost);
2951       dump_printf (MSG_NOTE, "  Vector epilogue cost: %d\n",
2952                    vec_epilogue_cost);
2953       dump_printf (MSG_NOTE, "  Scalar iteration cost: %d\n",
2954                    scalar_single_iter_cost);
2955       dump_printf (MSG_NOTE, "  Scalar outside cost: %d\n",
2956                    scalar_outside_cost);
2957       dump_printf (MSG_NOTE, "  Vector outside cost: %d\n",
2958                    vec_outside_cost);
2959       dump_printf (MSG_NOTE, "  prologue iterations: %d\n",
2960                    peel_iters_prologue);
2961       dump_printf (MSG_NOTE, "  epilogue iterations: %d\n",
2962                    peel_iters_epilogue);
2963       dump_printf (MSG_NOTE,
2964                    "  Calculated minimum iters for profitability: %d\n",
2965                    min_profitable_iters);
2966       dump_printf (MSG_NOTE, "\n");
2967     }
2968
2969   min_profitable_iters =
2970         min_profitable_iters < vf ? vf : min_profitable_iters;
2971
2972   /* Because the condition we create is:
2973      if (niters <= min_profitable_iters)
2974        then skip the vectorized loop.  */
2975   min_profitable_iters--;
2976
2977   if (dump_enabled_p ())
2978     dump_printf_loc (MSG_NOTE, vect_location,
2979                      "  Runtime profitability threshold = %d\n",
2980                      min_profitable_iters);
2981
2982   *ret_min_profitable_niters = min_profitable_iters;
2983
2984   /* Calculate number of iterations required to make the vector version
2985      profitable, relative to the loop bodies only.
2986
2987      Non-vectorized variant is SIC * niters and it must win over vector
2988      variant on the expected loop trip count.  The following condition must hold true:
2989      SIC * niters > VIC * ((niters-PL_ITERS-EP_ITERS)/VF) + VOC + SOC  */
2990
2991   if (vec_outside_cost <= 0)
2992     min_profitable_estimate = 1;
2993   else
2994     {
2995       min_profitable_estimate = ((vec_outside_cost + scalar_outside_cost) * vf
2996                                  - vec_inside_cost * peel_iters_prologue
2997                                  - vec_inside_cost * peel_iters_epilogue)
2998                                  / ((scalar_single_iter_cost * vf)
2999                                    - vec_inside_cost);
3000     }
3001   min_profitable_estimate --;
3002   min_profitable_estimate = MAX (min_profitable_estimate, min_profitable_iters);
3003   if (dump_enabled_p ())
3004     dump_printf_loc (MSG_NOTE, vect_location,
3005                      "  Static estimate profitability threshold = %d\n",
3006                       min_profitable_iters);
3007
3008   *ret_min_profitable_estimate = min_profitable_estimate;
3009 }
3010
3011
3012 /* TODO: Close dependency between vect_model_*_cost and vectorizable_*
3013    functions. Design better to avoid maintenance issues.  */
3014
3015 /* Function vect_model_reduction_cost.
3016
3017    Models cost for a reduction operation, including the vector ops
3018    generated within the strip-mine loop, the initial definition before
3019    the loop, and the epilogue code that must be generated.  */
3020
3021 static bool
3022 vect_model_reduction_cost (stmt_vec_info stmt_info, enum tree_code reduc_code,
3023                            int ncopies)
3024 {
3025   int prologue_cost = 0, epilogue_cost = 0;
3026   enum tree_code code;
3027   optab optab;
3028   tree vectype;
3029   gimple stmt, orig_stmt;
3030   tree reduction_op;
3031   enum machine_mode mode;
3032   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3033   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3034   void *target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
3035
3036   /* Cost of reduction op inside loop.  */
3037   unsigned inside_cost = add_stmt_cost (target_cost_data, ncopies, vector_stmt,
3038                                         stmt_info, 0, vect_body);
3039   stmt = STMT_VINFO_STMT (stmt_info);
3040
3041   switch (get_gimple_rhs_class (gimple_assign_rhs_code (stmt)))
3042     {
3043     case GIMPLE_SINGLE_RHS:
3044       gcc_assert (TREE_OPERAND_LENGTH (gimple_assign_rhs1 (stmt)) == ternary_op);
3045       reduction_op = TREE_OPERAND (gimple_assign_rhs1 (stmt), 2);
3046       break;
3047     case GIMPLE_UNARY_RHS:
3048       reduction_op = gimple_assign_rhs1 (stmt);
3049       break;
3050     case GIMPLE_BINARY_RHS:
3051       reduction_op = gimple_assign_rhs2 (stmt);
3052       break;
3053     case GIMPLE_TERNARY_RHS:
3054       reduction_op = gimple_assign_rhs3 (stmt);
3055       break;
3056     default:
3057       gcc_unreachable ();
3058     }
3059
3060   vectype = get_vectype_for_scalar_type (TREE_TYPE (reduction_op));
3061   if (!vectype)
3062     {
3063       if (dump_enabled_p ())
3064         {
3065           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3066                            "unsupported data-type ");
3067           dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
3068                              TREE_TYPE (reduction_op));
3069           dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
3070         }
3071       return false;
3072    }
3073
3074   mode = TYPE_MODE (vectype);
3075   orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
3076
3077   if (!orig_stmt)
3078     orig_stmt = STMT_VINFO_STMT (stmt_info);
3079
3080   code = gimple_assign_rhs_code (orig_stmt);
3081
3082   /* Add in cost for initial definition.  */
3083   prologue_cost += add_stmt_cost (target_cost_data, 1, scalar_to_vec,
3084                                   stmt_info, 0, vect_prologue);
3085
3086   /* Determine cost of epilogue code.
3087
3088      We have a reduction operator that will reduce the vector in one statement.
3089      Also requires scalar extract.  */
3090
3091   if (!nested_in_vect_loop_p (loop, orig_stmt))
3092     {
3093       if (reduc_code != ERROR_MARK)
3094         {
3095           epilogue_cost += add_stmt_cost (target_cost_data, 1, vector_stmt,
3096                                           stmt_info, 0, vect_epilogue);
3097           epilogue_cost += add_stmt_cost (target_cost_data, 1, vec_to_scalar,
3098                                           stmt_info, 0, vect_epilogue);
3099         }
3100       else
3101         {
3102           int vec_size_in_bits = tree_low_cst (TYPE_SIZE (vectype), 1);
3103           tree bitsize =
3104             TYPE_SIZE (TREE_TYPE (gimple_assign_lhs (orig_stmt)));
3105           int element_bitsize = tree_low_cst (bitsize, 1);
3106           int nelements = vec_size_in_bits / element_bitsize;
3107
3108           optab = optab_for_tree_code (code, vectype, optab_default);
3109
3110           /* We have a whole vector shift available.  */
3111           if (VECTOR_MODE_P (mode)
3112               && optab_handler (optab, mode) != CODE_FOR_nothing
3113               && optab_handler (vec_shr_optab, mode) != CODE_FOR_nothing)
3114             {
3115               /* Final reduction via vector shifts and the reduction operator.
3116                  Also requires scalar extract.  */
3117               epilogue_cost += add_stmt_cost (target_cost_data,
3118                                               exact_log2 (nelements) * 2,
3119                                               vector_stmt, stmt_info, 0,
3120                                               vect_epilogue);
3121               epilogue_cost += add_stmt_cost (target_cost_data, 1,
3122                                               vec_to_scalar, stmt_info, 0,
3123                                               vect_epilogue);
3124             }
3125           else
3126             /* Use extracts and reduction op for final reduction.  For N
3127                elements, we have N extracts and N-1 reduction ops.  */
3128             epilogue_cost += add_stmt_cost (target_cost_data,
3129                                             nelements + nelements - 1,
3130                                             vector_stmt, stmt_info, 0,
3131                                             vect_epilogue);
3132         }
3133     }
3134
3135   if (dump_enabled_p ())
3136     dump_printf (MSG_NOTE,
3137                  "vect_model_reduction_cost: inside_cost = %d, "
3138                  "prologue_cost = %d, epilogue_cost = %d .\n", inside_cost,
3139                  prologue_cost, epilogue_cost);
3140
3141   return true;
3142 }
3143
3144
3145 /* Function vect_model_induction_cost.
3146
3147    Models cost for induction operations.  */
3148
3149 static void
3150 vect_model_induction_cost (stmt_vec_info stmt_info, int ncopies)
3151 {
3152   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3153   void *target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
3154   unsigned inside_cost, prologue_cost;
3155
3156   /* loop cost for vec_loop.  */
3157   inside_cost = add_stmt_cost (target_cost_data, ncopies, vector_stmt,
3158                                stmt_info, 0, vect_body);
3159
3160   /* prologue cost for vec_init and vec_step.  */
3161   prologue_cost = add_stmt_cost (target_cost_data, 2, scalar_to_vec,
3162                                  stmt_info, 0, vect_prologue);
3163
3164   if (dump_enabled_p ())
3165     dump_printf_loc (MSG_NOTE, vect_location,
3166                      "vect_model_induction_cost: inside_cost = %d, "
3167                      "prologue_cost = %d .\n", inside_cost, prologue_cost);
3168 }
3169
3170
3171 /* Function get_initial_def_for_induction
3172
3173    Input:
3174    STMT - a stmt that performs an induction operation in the loop.
3175    IV_PHI - the initial value of the induction variable
3176
3177    Output:
3178    Return a vector variable, initialized with the first VF values of
3179    the induction variable.  E.g., for an iv with IV_PHI='X' and
3180    evolution S, for a vector of 4 units, we want to return:
3181    [X, X + S, X + 2*S, X + 3*S].  */
3182
3183 static tree
3184 get_initial_def_for_induction (gimple iv_phi)
3185 {
3186   stmt_vec_info stmt_vinfo = vinfo_for_stmt (iv_phi);
3187   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
3188   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3189   tree vectype;
3190   int nunits;
3191   edge pe = loop_preheader_edge (loop);
3192   struct loop *iv_loop;
3193   basic_block new_bb;
3194   tree new_vec, vec_init, vec_step, t;
3195   tree access_fn;
3196   tree new_var;
3197   tree new_name;
3198   gimple init_stmt, induction_phi, new_stmt;
3199   tree induc_def, vec_def, vec_dest;
3200   tree init_expr, step_expr;
3201   int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3202   int i;
3203   bool ok;
3204   int ncopies;
3205   tree expr;
3206   stmt_vec_info phi_info = vinfo_for_stmt (iv_phi);
3207   bool nested_in_vect_loop = false;
3208   gimple_seq stmts = NULL;
3209   imm_use_iterator imm_iter;
3210   use_operand_p use_p;
3211   gimple exit_phi;
3212   edge latch_e;
3213   tree loop_arg;
3214   gimple_stmt_iterator si;
3215   basic_block bb = gimple_bb (iv_phi);
3216   tree stepvectype;
3217   tree resvectype;
3218
3219   /* Is phi in an inner-loop, while vectorizing an enclosing outer-loop?  */
3220   if (nested_in_vect_loop_p (loop, iv_phi))
3221     {
3222       nested_in_vect_loop = true;
3223       iv_loop = loop->inner;
3224     }
3225   else
3226     iv_loop = loop;
3227   gcc_assert (iv_loop == (gimple_bb (iv_phi))->loop_father);
3228
3229   latch_e = loop_latch_edge (iv_loop);
3230   loop_arg = PHI_ARG_DEF_FROM_EDGE (iv_phi, latch_e);
3231
3232   access_fn = analyze_scalar_evolution (iv_loop, PHI_RESULT (iv_phi));
3233   gcc_assert (access_fn);
3234   STRIP_NOPS (access_fn);
3235   ok = vect_is_simple_iv_evolution (iv_loop->num, access_fn,
3236                                     &init_expr, &step_expr);
3237   gcc_assert (ok);
3238   pe = loop_preheader_edge (iv_loop);
3239
3240   vectype = get_vectype_for_scalar_type (TREE_TYPE (init_expr));
3241   resvectype = get_vectype_for_scalar_type (TREE_TYPE (PHI_RESULT (iv_phi)));
3242   gcc_assert (vectype);
3243   nunits = TYPE_VECTOR_SUBPARTS (vectype);
3244   ncopies = vf / nunits;
3245
3246   gcc_assert (phi_info);
3247   gcc_assert (ncopies >= 1);
3248
3249   /* Find the first insertion point in the BB.  */
3250   si = gsi_after_labels (bb);
3251
3252   /* Create the vector that holds the initial_value of the induction.  */
3253   if (nested_in_vect_loop)
3254     {
3255       /* iv_loop is nested in the loop to be vectorized.  init_expr had already
3256          been created during vectorization of previous stmts.  We obtain it
3257          from the STMT_VINFO_VEC_STMT of the defining stmt.  */
3258       tree iv_def = PHI_ARG_DEF_FROM_EDGE (iv_phi,
3259                                            loop_preheader_edge (iv_loop));
3260       vec_init = vect_get_vec_def_for_operand (iv_def, iv_phi, NULL);
3261       /* If the initial value is not of proper type, convert it.  */
3262       if (!useless_type_conversion_p (vectype, TREE_TYPE (vec_init)))
3263         {
3264           new_stmt = gimple_build_assign_with_ops
3265               (VIEW_CONVERT_EXPR,
3266                vect_get_new_vect_var (vectype, vect_simple_var, "vec_iv_"),
3267                build1 (VIEW_CONVERT_EXPR, vectype, vec_init), NULL_TREE);
3268           vec_init = make_ssa_name (gimple_assign_lhs (new_stmt), new_stmt);
3269           gimple_assign_set_lhs (new_stmt, vec_init);
3270           new_bb = gsi_insert_on_edge_immediate (loop_preheader_edge (iv_loop),
3271                                                  new_stmt);
3272           gcc_assert (!new_bb);
3273           set_vinfo_for_stmt (new_stmt,
3274                               new_stmt_vec_info (new_stmt, loop_vinfo, NULL));
3275         }
3276     }
3277   else
3278     {
3279       vec<constructor_elt, va_gc> *v;
3280
3281       /* iv_loop is the loop to be vectorized. Create:
3282          vec_init = [X, X+S, X+2*S, X+3*S] (S = step_expr, X = init_expr)  */
3283       new_var = vect_get_new_vect_var (TREE_TYPE (vectype),
3284                                        vect_scalar_var, "var_");
3285       new_name = force_gimple_operand (fold_convert (TREE_TYPE (vectype),
3286                                                      init_expr),
3287                                        &stmts, false, new_var);
3288       if (stmts)
3289         {
3290           new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
3291           gcc_assert (!new_bb);
3292         }
3293
3294       vec_alloc (v, nunits);
3295       bool constant_p = is_gimple_min_invariant (new_name);
3296       CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, new_name);
3297       for (i = 1; i < nunits; i++)
3298         {
3299           /* Create: new_name_i = new_name + step_expr  */
3300           new_name = fold_build2 (PLUS_EXPR, TREE_TYPE (new_name),
3301                                   new_name, step_expr);
3302           if (!is_gimple_min_invariant (new_name))
3303             {
3304               init_stmt = gimple_build_assign (new_var, new_name);
3305               new_name = make_ssa_name (new_var, init_stmt);
3306               gimple_assign_set_lhs (init_stmt, new_name);
3307               new_bb = gsi_insert_on_edge_immediate (pe, init_stmt);
3308               gcc_assert (!new_bb);
3309               if (dump_enabled_p ())
3310                 {
3311                   dump_printf_loc (MSG_NOTE, vect_location,
3312                                    "created new init_stmt: ");
3313                   dump_gimple_stmt (MSG_NOTE, TDF_SLIM, init_stmt, 0);
3314                   dump_printf (MSG_NOTE, "\n");
3315                 }
3316               constant_p = false;
3317             }
3318           CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, new_name);
3319         }
3320       /* Create a vector from [new_name_0, new_name_1, ..., new_name_nunits-1]  */
3321       if (constant_p)
3322         new_vec = build_vector_from_ctor (vectype, v);
3323       else
3324         new_vec = build_constructor (vectype, v);
3325       vec_init = vect_init_vector (iv_phi, new_vec, vectype, NULL);
3326     }
3327
3328
3329   /* Create the vector that holds the step of the induction.  */
3330   if (nested_in_vect_loop)
3331     /* iv_loop is nested in the loop to be vectorized. Generate:
3332        vec_step = [S, S, S, S]  */
3333     new_name = step_expr;
3334   else
3335     {
3336       /* iv_loop is the loop to be vectorized. Generate:
3337           vec_step = [VF*S, VF*S, VF*S, VF*S]  */
3338       if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (step_expr)))
3339         {
3340           expr = build_int_cst (integer_type_node, vf);
3341           expr = fold_convert (TREE_TYPE (step_expr), expr);
3342         }
3343       else
3344         expr = build_int_cst (TREE_TYPE (step_expr), vf);
3345       new_name = fold_build2 (MULT_EXPR, TREE_TYPE (step_expr),
3346                               expr, step_expr);
3347       if (TREE_CODE (step_expr) == SSA_NAME)
3348         new_name = vect_init_vector (iv_phi, new_name,
3349                                      TREE_TYPE (step_expr), NULL);
3350     }
3351
3352   t = unshare_expr (new_name);
3353   gcc_assert (CONSTANT_CLASS_P (new_name)
3354               || TREE_CODE (new_name) == SSA_NAME);
3355   stepvectype = get_vectype_for_scalar_type (TREE_TYPE (new_name));
3356   gcc_assert (stepvectype);
3357   new_vec = build_vector_from_val (stepvectype, t);
3358   vec_step = vect_init_vector (iv_phi, new_vec, stepvectype, NULL);
3359
3360
3361   /* Create the following def-use cycle:
3362      loop prolog:
3363          vec_init = ...
3364          vec_step = ...
3365      loop:
3366          vec_iv = PHI <vec_init, vec_loop>
3367          ...
3368          STMT
3369          ...
3370          vec_loop = vec_iv + vec_step;  */
3371
3372   /* Create the induction-phi that defines the induction-operand.  */
3373   vec_dest = vect_get_new_vect_var (vectype, vect_simple_var, "vec_iv_");
3374   induction_phi = create_phi_node (vec_dest, iv_loop->header);
3375   set_vinfo_for_stmt (induction_phi,
3376                       new_stmt_vec_info (induction_phi, loop_vinfo, NULL));
3377   induc_def = PHI_RESULT (induction_phi);
3378
3379   /* Create the iv update inside the loop  */
3380   new_stmt = gimple_build_assign_with_ops (PLUS_EXPR, vec_dest,
3381                                            induc_def, vec_step);
3382   vec_def = make_ssa_name (vec_dest, new_stmt);
3383   gimple_assign_set_lhs (new_stmt, vec_def);
3384   gsi_insert_before (&si, new_stmt, GSI_SAME_STMT);
3385   set_vinfo_for_stmt (new_stmt, new_stmt_vec_info (new_stmt, loop_vinfo,
3386                                                    NULL));
3387
3388   /* Set the arguments of the phi node:  */
3389   add_phi_arg (induction_phi, vec_init, pe, UNKNOWN_LOCATION);
3390   add_phi_arg (induction_phi, vec_def, loop_latch_edge (iv_loop),
3391                UNKNOWN_LOCATION);
3392
3393
3394   /* In case that vectorization factor (VF) is bigger than the number
3395      of elements that we can fit in a vectype (nunits), we have to generate
3396      more than one vector stmt - i.e - we need to "unroll" the
3397      vector stmt by a factor VF/nunits.  For more details see documentation
3398      in vectorizable_operation.  */
3399
3400   if (ncopies > 1)
3401     {
3402       stmt_vec_info prev_stmt_vinfo;
3403       /* FORNOW. This restriction should be relaxed.  */
3404       gcc_assert (!nested_in_vect_loop);
3405
3406       /* Create the vector that holds the step of the induction.  */
3407       if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (step_expr)))
3408         {
3409           expr = build_int_cst (integer_type_node, nunits);
3410           expr = fold_convert (TREE_TYPE (step_expr), expr);
3411         }
3412       else
3413         expr = build_int_cst (TREE_TYPE (step_expr), nunits);
3414       new_name = fold_build2 (MULT_EXPR, TREE_TYPE (step_expr),
3415                               expr, step_expr);
3416       if (TREE_CODE (step_expr) == SSA_NAME)
3417         new_name = vect_init_vector (iv_phi, new_name,
3418                                      TREE_TYPE (step_expr), NULL);
3419       t = unshare_expr (new_name);
3420       gcc_assert (CONSTANT_CLASS_P (new_name)
3421                   || TREE_CODE (new_name) == SSA_NAME);
3422       new_vec = build_vector_from_val (stepvectype, t);
3423       vec_step = vect_init_vector (iv_phi, new_vec, stepvectype, NULL);
3424
3425       vec_def = induc_def;
3426       prev_stmt_vinfo = vinfo_for_stmt (induction_phi);
3427       for (i = 1; i < ncopies; i++)
3428         {
3429           /* vec_i = vec_prev + vec_step  */
3430           new_stmt = gimple_build_assign_with_ops (PLUS_EXPR, vec_dest,
3431                                                    vec_def, vec_step);
3432           vec_def = make_ssa_name (vec_dest, new_stmt);
3433           gimple_assign_set_lhs (new_stmt, vec_def);
3434
3435           gsi_insert_before (&si, new_stmt, GSI_SAME_STMT);
3436           if (!useless_type_conversion_p (resvectype, vectype))
3437             {
3438               new_stmt = gimple_build_assign_with_ops
3439                   (VIEW_CONVERT_EXPR,
3440                    vect_get_new_vect_var (resvectype, vect_simple_var,
3441                                           "vec_iv_"),
3442                    build1 (VIEW_CONVERT_EXPR, resvectype,
3443                            gimple_assign_lhs (new_stmt)), NULL_TREE);
3444               gimple_assign_set_lhs (new_stmt,
3445                                      make_ssa_name
3446                                        (gimple_assign_lhs (new_stmt), new_stmt));
3447               gsi_insert_before (&si, new_stmt, GSI_SAME_STMT);
3448             }
3449           set_vinfo_for_stmt (new_stmt,
3450                               new_stmt_vec_info (new_stmt, loop_vinfo, NULL));
3451           STMT_VINFO_RELATED_STMT (prev_stmt_vinfo) = new_stmt;
3452           prev_stmt_vinfo = vinfo_for_stmt (new_stmt);
3453         }
3454     }
3455
3456   if (nested_in_vect_loop)
3457     {
3458       /* Find the loop-closed exit-phi of the induction, and record
3459          the final vector of induction results:  */
3460       exit_phi = NULL;
3461       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, loop_arg)
3462         {
3463           if (!flow_bb_inside_loop_p (iv_loop, gimple_bb (USE_STMT (use_p))))
3464             {
3465               exit_phi = USE_STMT (use_p);
3466               break;
3467             }
3468         }
3469       if (exit_phi)
3470         {
3471           stmt_vec_info stmt_vinfo = vinfo_for_stmt (exit_phi);
3472           /* FORNOW. Currently not supporting the case that an inner-loop induction
3473              is not used in the outer-loop (i.e. only outside the outer-loop).  */
3474           gcc_assert (STMT_VINFO_RELEVANT_P (stmt_vinfo)
3475                       && !STMT_VINFO_LIVE_P (stmt_vinfo));
3476
3477           STMT_VINFO_VEC_STMT (stmt_vinfo) = new_stmt;
3478           if (dump_enabled_p ())
3479             {
3480               dump_printf_loc (MSG_NOTE, vect_location,
3481                                "vector of inductions after inner-loop:");
3482               dump_gimple_stmt (MSG_NOTE, TDF_SLIM, new_stmt, 0);
3483               dump_printf (MSG_NOTE, "\n");
3484             }
3485         }
3486     }
3487
3488
3489   if (dump_enabled_p ())
3490     {
3491       dump_printf_loc (MSG_NOTE, vect_location,
3492                        "transform induction: created def-use cycle: ");
3493       dump_gimple_stmt (MSG_NOTE, TDF_SLIM, induction_phi, 0);
3494       dump_printf (MSG_NOTE, "\n");
3495       dump_gimple_stmt (MSG_NOTE, TDF_SLIM,
3496                         SSA_NAME_DEF_STMT (vec_def), 0);
3497       dump_printf (MSG_NOTE, "\n");
3498     }
3499
3500   STMT_VINFO_VEC_STMT (phi_info) = induction_phi;
3501   if (!useless_type_conversion_p (resvectype, vectype))
3502     {
3503       new_stmt = gimple_build_assign_with_ops
3504          (VIEW_CONVERT_EXPR,
3505           vect_get_new_vect_var (resvectype, vect_simple_var, "vec_iv_"),
3506           build1 (VIEW_CONVERT_EXPR, resvectype, induc_def), NULL_TREE);
3507       induc_def = make_ssa_name (gimple_assign_lhs (new_stmt), new_stmt);
3508       gimple_assign_set_lhs (new_stmt, induc_def);
3509       si = gsi_after_labels (bb);
3510       gsi_insert_before (&si, new_stmt, GSI_SAME_STMT);
3511       set_vinfo_for_stmt (new_stmt,
3512                           new_stmt_vec_info (new_stmt, loop_vinfo, NULL));
3513       STMT_VINFO_RELATED_STMT (vinfo_for_stmt (new_stmt))
3514         = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (induction_phi));
3515     }
3516
3517   return induc_def;
3518 }
3519
3520
3521 /* Function get_initial_def_for_reduction
3522
3523    Input:
3524    STMT - a stmt that performs a reduction operation in the loop.
3525    INIT_VAL - the initial value of the reduction variable
3526
3527    Output:
3528    ADJUSTMENT_DEF - a tree that holds a value to be added to the final result
3529         of the reduction (used for adjusting the epilog - see below).
3530    Return a vector variable, initialized according to the operation that STMT
3531         performs. This vector will be used as the initial value of the
3532         vector of partial results.
3533
3534    Option1 (adjust in epilog): Initialize the vector as follows:
3535      add/bit or/xor:    [0,0,...,0,0]
3536      mult/bit and:      [1,1,...,1,1]
3537      min/max/cond_expr: [init_val,init_val,..,init_val,init_val]
3538    and when necessary (e.g. add/mult case) let the caller know
3539    that it needs to adjust the result by init_val.
3540
3541    Option2: Initialize the vector as follows:
3542      add/bit or/xor:    [init_val,0,0,...,0]
3543      mult/bit and:      [init_val,1,1,...,1]
3544      min/max/cond_expr: [init_val,init_val,...,init_val]
3545    and no adjustments are needed.
3546
3547    For example, for the following code:
3548
3549    s = init_val;
3550    for (i=0;i<n;i++)
3551      s = s + a[i];
3552
3553    STMT is 's = s + a[i]', and the reduction variable is 's'.
3554    For a vector of 4 units, we want to return either [0,0,0,init_val],
3555    or [0,0,0,0] and let the caller know that it needs to adjust
3556    the result at the end by 'init_val'.
3557
3558    FORNOW, we are using the 'adjust in epilog' scheme, because this way the
3559    initialization vector is simpler (same element in all entries), if
3560    ADJUSTMENT_DEF is not NULL, and Option2 otherwise.
3561
3562    A cost model should help decide between these two schemes.  */
3563
3564 tree
3565 get_initial_def_for_reduction (gimple stmt, tree init_val,
3566                                tree *adjustment_def)
3567 {
3568   stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
3569   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
3570   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3571   tree scalar_type = TREE_TYPE (init_val);
3572   tree vectype = get_vectype_for_scalar_type (scalar_type);
3573   int nunits;
3574   enum tree_code code = gimple_assign_rhs_code (stmt);
3575   tree def_for_init;
3576   tree init_def;
3577   tree *elts;
3578   int i;
3579   bool nested_in_vect_loop = false;
3580   tree init_value;
3581   REAL_VALUE_TYPE real_init_val = dconst0;
3582   int int_init_val = 0;
3583   gimple def_stmt = NULL;
3584
3585   gcc_assert (vectype);
3586   nunits = TYPE_VECTOR_SUBPARTS (vectype);
3587
3588   gcc_assert (POINTER_TYPE_P (scalar_type) || INTEGRAL_TYPE_P (scalar_type)
3589               || SCALAR_FLOAT_TYPE_P (scalar_type));
3590
3591   if (nested_in_vect_loop_p (loop, stmt))
3592     nested_in_vect_loop = true;
3593   else
3594     gcc_assert (loop == (gimple_bb (stmt))->loop_father);
3595
3596   /* In case of double reduction we only create a vector variable to be put
3597      in the reduction phi node.  The actual statement creation is done in
3598      vect_create_epilog_for_reduction.  */
3599   if (adjustment_def && nested_in_vect_loop
3600       && TREE_CODE (init_val) == SSA_NAME
3601       && (def_stmt = SSA_NAME_DEF_STMT (init_val))
3602       && gimple_code (def_stmt) == GIMPLE_PHI
3603       && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
3604       && vinfo_for_stmt (def_stmt)
3605       && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt))
3606           == vect_double_reduction_def)
3607     {
3608       *adjustment_def = NULL;
3609       return vect_create_destination_var (init_val, vectype);
3610     }
3611
3612   if (TREE_CONSTANT (init_val))
3613     {
3614       if (SCALAR_FLOAT_TYPE_P (scalar_type))
3615         init_value = build_real (scalar_type, TREE_REAL_CST (init_val));
3616       else
3617         init_value = build_int_cst (scalar_type, TREE_INT_CST_LOW (init_val));
3618     }
3619   else
3620     init_value = init_val;
3621
3622   switch (code)
3623     {
3624       case WIDEN_SUM_EXPR:
3625       case DOT_PROD_EXPR:
3626       case PLUS_EXPR:
3627       case MINUS_EXPR:
3628       case BIT_IOR_EXPR:
3629       case BIT_XOR_EXPR:
3630       case MULT_EXPR:
3631       case BIT_AND_EXPR:
3632         /* ADJUSMENT_DEF is NULL when called from
3633            vect_create_epilog_for_reduction to vectorize double reduction.  */
3634         if (adjustment_def)
3635           {
3636             if (nested_in_vect_loop)
3637               *adjustment_def = vect_get_vec_def_for_operand (init_val, stmt,
3638                                                               NULL);
3639             else
3640               *adjustment_def = init_val;
3641           }
3642
3643         if (code == MULT_EXPR)
3644           {
3645             real_init_val = dconst1;
3646             int_init_val = 1;
3647           }
3648
3649         if (code == BIT_AND_EXPR)
3650           int_init_val = -1;
3651
3652         if (SCALAR_FLOAT_TYPE_P (scalar_type))
3653           def_for_init = build_real (scalar_type, real_init_val);
3654         else
3655           def_for_init = build_int_cst (scalar_type, int_init_val);
3656
3657         /* Create a vector of '0' or '1' except the first element.  */
3658         elts = XALLOCAVEC (tree, nunits);
3659         for (i = nunits - 2; i >= 0; --i)
3660           elts[i + 1] = def_for_init;
3661
3662         /* Option1: the first element is '0' or '1' as well.  */
3663         if (adjustment_def)
3664           {
3665             elts[0] = def_for_init;
3666             init_def = build_vector (vectype, elts);
3667             break;
3668           }
3669
3670         /* Option2: the first element is INIT_VAL.  */
3671         elts[0] = init_val;
3672         if (TREE_CONSTANT (init_val))
3673           init_def = build_vector (vectype, elts);
3674         else
3675           {
3676             vec<constructor_elt, va_gc> *v;
3677             vec_alloc (v, nunits);
3678             CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, init_val);
3679             for (i = 1; i < nunits; ++i)
3680               CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, elts[i]);
3681             init_def = build_constructor (vectype, v);
3682           }
3683
3684         break;
3685
3686       case MIN_EXPR:
3687       case MAX_EXPR:
3688       case COND_EXPR:
3689         if (adjustment_def)
3690           {
3691             *adjustment_def = NULL_TREE;
3692             init_def = vect_get_vec_def_for_operand (init_val, stmt, NULL);
3693             break;
3694           }
3695
3696         init_def = build_vector_from_val (vectype, init_value);
3697         break;
3698
3699       default:
3700         gcc_unreachable ();
3701     }
3702
3703   return init_def;
3704 }
3705
3706
3707 /* Function vect_create_epilog_for_reduction
3708
3709    Create code at the loop-epilog to finalize the result of a reduction
3710    computation.
3711
3712    VECT_DEFS is list of vector of partial results, i.e., the lhs's of vector
3713      reduction statements.
3714    STMT is the scalar reduction stmt that is being vectorized.
3715    NCOPIES is > 1 in case the vectorization factor (VF) is bigger than the
3716      number of elements that we can fit in a vectype (nunits).  In this case
3717      we have to generate more than one vector stmt - i.e - we need to "unroll"
3718      the vector stmt by a factor VF/nunits.  For more details see documentation
3719      in vectorizable_operation.
3720    REDUC_CODE is the tree-code for the epilog reduction.
3721    REDUCTION_PHIS is a list of the phi-nodes that carry the reduction
3722      computation.
3723    REDUC_INDEX is the index of the operand in the right hand side of the
3724      statement that is defined by REDUCTION_PHI.
3725    DOUBLE_REDUC is TRUE if double reduction phi nodes should be handled.
3726    SLP_NODE is an SLP node containing a group of reduction statements. The
3727      first one in this group is STMT.
3728
3729    This function:
3730    1. Creates the reduction def-use cycles: sets the arguments for
3731       REDUCTION_PHIS:
3732       The loop-entry argument is the vectorized initial-value of the reduction.
3733       The loop-latch argument is taken from VECT_DEFS - the vector of partial
3734       sums.
3735    2. "Reduces" each vector of partial results VECT_DEFS into a single result,
3736       by applying the operation specified by REDUC_CODE if available, or by
3737       other means (whole-vector shifts or a scalar loop).
3738       The function also creates a new phi node at the loop exit to preserve
3739       loop-closed form, as illustrated below.
3740
3741      The flow at the entry to this function:
3742
3743         loop:
3744           vec_def = phi <null, null>            # REDUCTION_PHI
3745           VECT_DEF = vector_stmt                # vectorized form of STMT
3746           s_loop = scalar_stmt                  # (scalar) STMT
3747         loop_exit:
3748           s_out0 = phi <s_loop>                 # (scalar) EXIT_PHI
3749           use <s_out0>
3750           use <s_out0>
3751
3752      The above is transformed by this function into:
3753
3754         loop:
3755           vec_def = phi <vec_init, VECT_DEF>    # REDUCTION_PHI
3756           VECT_DEF = vector_stmt                # vectorized form of STMT
3757           s_loop = scalar_stmt                  # (scalar) STMT
3758         loop_exit:
3759           s_out0 = phi <s_loop>                 # (scalar) EXIT_PHI
3760           v_out1 = phi <VECT_DEF>               # NEW_EXIT_PHI
3761           v_out2 = reduce <v_out1>
3762           s_out3 = extract_field <v_out2, 0>
3763           s_out4 = adjust_result <s_out3>
3764           use <s_out4>
3765           use <s_out4>
3766 */
3767
3768 static void
3769 vect_create_epilog_for_reduction (vec<tree> vect_defs, gimple stmt,
3770                                   int ncopies, enum tree_code reduc_code,
3771                                   vec<gimple> reduction_phis,
3772                                   int reduc_index, bool double_reduc,
3773                                   slp_tree slp_node)
3774 {
3775   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3776   stmt_vec_info prev_phi_info;
3777   tree vectype;
3778   enum machine_mode mode;
3779   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3780   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo), *outer_loop = NULL;
3781   basic_block exit_bb;
3782   tree scalar_dest;
3783   tree scalar_type;
3784   gimple new_phi = NULL, phi;
3785   gimple_stmt_iterator exit_gsi;
3786   tree vec_dest;
3787   tree new_temp = NULL_TREE, new_dest, new_name, new_scalar_dest;
3788   gimple epilog_stmt = NULL;
3789   enum tree_code code = gimple_assign_rhs_code (stmt);
3790   gimple exit_phi;
3791   tree bitsize, bitpos;
3792   tree adjustment_def = NULL;
3793   tree vec_initial_def = NULL;
3794   tree reduction_op, expr, def;
3795   tree orig_name, scalar_result;
3796   imm_use_iterator imm_iter, phi_imm_iter;
3797   use_operand_p use_p, phi_use_p;
3798   bool extract_scalar_result = false;
3799   gimple use_stmt, orig_stmt, reduction_phi = NULL;
3800   bool nested_in_vect_loop = false;
3801   vec<gimple> new_phis = vNULL;
3802   vec<gimple> inner_phis = vNULL;
3803   enum vect_def_type dt = vect_unknown_def_type;
3804   int j, i;
3805   vec<tree> scalar_results = vNULL;
3806   unsigned int group_size = 1, k, ratio;
3807   vec<tree> vec_initial_defs = vNULL;
3808   vec<gimple> phis;
3809   bool slp_reduc = false;
3810   tree new_phi_result;
3811   gimple inner_phi = NULL;
3812
3813   if (slp_node)
3814     group_size = SLP_TREE_SCALAR_STMTS (slp_node).length ();
3815
3816   if (nested_in_vect_loop_p (loop, stmt))
3817     {
3818       outer_loop = loop;
3819       loop = loop->inner;
3820       nested_in_vect_loop = true;
3821       gcc_assert (!slp_node);
3822     }
3823
3824   switch (get_gimple_rhs_class (gimple_assign_rhs_code (stmt)))
3825     {
3826     case GIMPLE_SINGLE_RHS:
3827       gcc_assert (TREE_OPERAND_LENGTH (gimple_assign_rhs1 (stmt))
3828                   == ternary_op);
3829       reduction_op = TREE_OPERAND (gimple_assign_rhs1 (stmt), reduc_index);
3830       break;
3831     case GIMPLE_UNARY_RHS:
3832       reduction_op = gimple_assign_rhs1 (stmt);
3833       break;
3834     case GIMPLE_BINARY_RHS:
3835       reduction_op = reduc_index ?
3836                      gimple_assign_rhs2 (stmt) : gimple_assign_rhs1 (stmt);
3837       break;
3838     case GIMPLE_TERNARY_RHS:
3839       reduction_op = gimple_op (stmt, reduc_index + 1);
3840       break;
3841     default:
3842       gcc_unreachable ();
3843     }
3844
3845   vectype = get_vectype_for_scalar_type (TREE_TYPE (reduction_op));
3846   gcc_assert (vectype);
3847   mode = TYPE_MODE (vectype);
3848
3849   /* 1. Create the reduction def-use cycle:
3850      Set the arguments of REDUCTION_PHIS, i.e., transform
3851
3852         loop:
3853           vec_def = phi <null, null>            # REDUCTION_PHI
3854           VECT_DEF = vector_stmt                # vectorized form of STMT
3855           ...
3856
3857      into:
3858
3859         loop:
3860           vec_def = phi <vec_init, VECT_DEF>    # REDUCTION_PHI
3861           VECT_DEF = vector_stmt                # vectorized form of STMT
3862           ...
3863
3864      (in case of SLP, do it for all the phis). */
3865
3866   /* Get the loop-entry arguments.  */
3867   if (slp_node)
3868     vect_get_vec_defs (reduction_op, NULL_TREE, stmt, &vec_initial_defs,
3869                        NULL, slp_node, reduc_index);
3870   else
3871     {
3872       vec_initial_defs.create (1);
3873      /* For the case of reduction, vect_get_vec_def_for_operand returns
3874         the scalar def before the loop, that defines the initial value
3875         of the reduction variable.  */
3876       vec_initial_def = vect_get_vec_def_for_operand (reduction_op, stmt,
3877                                                       &adjustment_def);
3878       vec_initial_defs.quick_push (vec_initial_def);
3879     }
3880
3881   /* Set phi nodes arguments.  */
3882   FOR_EACH_VEC_ELT (reduction_phis, i, phi)
3883     {
3884       tree vec_init_def = vec_initial_defs[i];
3885       tree def = vect_defs[i];
3886       for (j = 0; j < ncopies; j++)
3887         {
3888           /* Set the loop-entry arg of the reduction-phi.  */
3889           add_phi_arg (phi, vec_init_def, loop_preheader_edge (loop),
3890                        UNKNOWN_LOCATION);
3891
3892           /* Set the loop-latch arg for the reduction-phi.  */
3893           if (j > 0)
3894             def = vect_get_vec_def_for_stmt_copy (vect_unknown_def_type, def);
3895
3896           add_phi_arg (phi, def, loop_latch_edge (loop), UNKNOWN_LOCATION);
3897
3898           if (dump_enabled_p ())
3899             {
3900               dump_printf_loc (MSG_NOTE, vect_location,
3901                                "transform reduction: created def-use cycle: ");
3902               dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
3903               dump_printf (MSG_NOTE, "\n");
3904               dump_gimple_stmt (MSG_NOTE, TDF_SLIM, SSA_NAME_DEF_STMT (def), 0);
3905               dump_printf (MSG_NOTE, "\n");
3906             }
3907
3908           phi = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (phi));
3909         }
3910     }
3911
3912   vec_initial_defs.release ();
3913
3914   /* 2. Create epilog code.
3915         The reduction epilog code operates across the elements of the vector
3916         of partial results computed by the vectorized loop.
3917         The reduction epilog code consists of:
3918
3919         step 1: compute the scalar result in a vector (v_out2)
3920         step 2: extract the scalar result (s_out3) from the vector (v_out2)
3921         step 3: adjust the scalar result (s_out3) if needed.
3922
3923         Step 1 can be accomplished using one the following three schemes:
3924           (scheme 1) using reduc_code, if available.
3925           (scheme 2) using whole-vector shifts, if available.
3926           (scheme 3) using a scalar loop. In this case steps 1+2 above are
3927                      combined.
3928
3929           The overall epilog code looks like this:
3930
3931           s_out0 = phi <s_loop>         # original EXIT_PHI
3932           v_out1 = phi <VECT_DEF>       # NEW_EXIT_PHI
3933           v_out2 = reduce <v_out1>              # step 1
3934           s_out3 = extract_field <v_out2, 0>    # step 2
3935           s_out4 = adjust_result <s_out3>       # step 3
3936
3937           (step 3 is optional, and steps 1 and 2 may be combined).
3938           Lastly, the uses of s_out0 are replaced by s_out4.  */
3939
3940
3941   /* 2.1 Create new loop-exit-phis to preserve loop-closed form:
3942          v_out1 = phi <VECT_DEF>
3943          Store them in NEW_PHIS.  */
3944
3945   exit_bb = single_exit (loop)->dest;
3946   prev_phi_info = NULL;
3947   new_phis.create (vect_defs.length ());
3948   FOR_EACH_VEC_ELT (vect_defs, i, def)
3949     {
3950       for (j = 0; j < ncopies; j++)
3951         {
3952           tree new_def = copy_ssa_name (def, NULL);
3953           phi = create_phi_node (new_def, exit_bb);
3954           set_vinfo_for_stmt (phi, new_stmt_vec_info (phi, loop_vinfo, NULL));
3955           if (j == 0)
3956             new_phis.quick_push (phi);
3957           else
3958             {
3959               def = vect_get_vec_def_for_stmt_copy (dt, def);
3960               STMT_VINFO_RELATED_STMT (prev_phi_info) = phi;
3961             }
3962
3963           SET_PHI_ARG_DEF (phi, single_exit (loop)->dest_idx, def);
3964           prev_phi_info = vinfo_for_stmt (phi);
3965         }
3966     }
3967
3968   /* The epilogue is created for the outer-loop, i.e., for the loop being
3969      vectorized.  Create exit phis for the outer loop.  */
3970   if (double_reduc)
3971     {
3972       loop = outer_loop;
3973       exit_bb = single_exit (loop)->dest;
3974       inner_phis.create (vect_defs.length ());
3975       FOR_EACH_VEC_ELT (new_phis, i, phi)
3976         {
3977           tree new_result = copy_ssa_name (PHI_RESULT (phi), NULL);
3978           gimple outer_phi = create_phi_node (new_result, exit_bb);
3979           SET_PHI_ARG_DEF (outer_phi, single_exit (loop)->dest_idx,
3980                            PHI_RESULT (phi));
3981           set_vinfo_for_stmt (outer_phi, new_stmt_vec_info (outer_phi,
3982                                                             loop_vinfo, NULL));
3983           inner_phis.quick_push (phi);
3984           new_phis[i] = outer_phi;
3985           prev_phi_info = vinfo_for_stmt (outer_phi);
3986           while (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (phi)))
3987             {
3988               phi = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (phi));
3989               new_result = copy_ssa_name (PHI_RESULT (phi), NULL);
3990               outer_phi = create_phi_node (new_result, exit_bb);
3991               SET_PHI_ARG_DEF (outer_phi, single_exit (loop)->dest_idx,
3992                                PHI_RESULT (phi));
3993               set_vinfo_for_stmt (outer_phi, new_stmt_vec_info (outer_phi,
3994                                                         loop_vinfo, NULL));
3995               STMT_VINFO_RELATED_STMT (prev_phi_info) = outer_phi;
3996               prev_phi_info = vinfo_for_stmt (outer_phi);
3997             }
3998         }
3999     }
4000
4001   exit_gsi = gsi_after_labels (exit_bb);
4002
4003   /* 2.2 Get the relevant tree-code to use in the epilog for schemes 2,3
4004          (i.e. when reduc_code is not available) and in the final adjustment
4005          code (if needed).  Also get the original scalar reduction variable as
4006          defined in the loop.  In case STMT is a "pattern-stmt" (i.e. - it
4007          represents a reduction pattern), the tree-code and scalar-def are
4008          taken from the original stmt that the pattern-stmt (STMT) replaces.
4009          Otherwise (it is a regular reduction) - the tree-code and scalar-def
4010          are taken from STMT.  */
4011
4012   orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
4013   if (!orig_stmt)
4014     {
4015       /* Regular reduction  */
4016       orig_stmt = stmt;
4017     }
4018   else
4019     {
4020       /* Reduction pattern  */
4021       stmt_vec_info stmt_vinfo = vinfo_for_stmt (orig_stmt);
4022       gcc_assert (STMT_VINFO_IN_PATTERN_P (stmt_vinfo));
4023       gcc_assert (STMT_VINFO_RELATED_STMT (stmt_vinfo) == stmt);
4024     }
4025
4026   code = gimple_assign_rhs_code (orig_stmt);
4027   /* For MINUS_EXPR the initial vector is [init_val,0,...,0], therefore,
4028      partial results are added and not subtracted.  */
4029   if (code == MINUS_EXPR)
4030     code = PLUS_EXPR;
4031
4032   scalar_dest = gimple_assign_lhs (orig_stmt);
4033   scalar_type = TREE_TYPE (scalar_dest);
4034   scalar_results.create (group_size);
4035   new_scalar_dest = vect_create_destination_var (scalar_dest, NULL);
4036   bitsize = TYPE_SIZE (scalar_type);
4037
4038   /* In case this is a reduction in an inner-loop while vectorizing an outer
4039      loop - we don't need to extract a single scalar result at the end of the
4040      inner-loop (unless it is double reduction, i.e., the use of reduction is
4041      outside the outer-loop).  The final vector of partial results will be used
4042      in the vectorized outer-loop, or reduced to a scalar result at the end of
4043      the outer-loop.  */
4044   if (nested_in_vect_loop && !double_reduc)
4045     goto vect_finalize_reduction;
4046
4047   /* SLP reduction without reduction chain, e.g.,
4048      # a1 = phi <a2, a0>
4049      # b1 = phi <b2, b0>
4050      a2 = operation (a1)
4051      b2 = operation (b1)  */
4052   slp_reduc = (slp_node && !GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)));
4053
4054   /* In case of reduction chain, e.g.,
4055      # a1 = phi <a3, a0>
4056      a2 = operation (a1)
4057      a3 = operation (a2),
4058
4059      we may end up with more than one vector result.  Here we reduce them to
4060      one vector.  */
4061   if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
4062     {
4063       tree first_vect = PHI_RESULT (new_phis[0]);
4064       tree tmp;
4065       gimple new_vec_stmt = NULL;
4066
4067       vec_dest = vect_create_destination_var (scalar_dest, vectype);
4068       for (k = 1; k < new_phis.length (); k++)
4069         {
4070           gimple next_phi = new_phis[k];
4071           tree second_vect = PHI_RESULT (next_phi);
4072
4073           tmp = build2 (code, vectype,  first_vect, second_vect);
4074           new_vec_stmt = gimple_build_assign (vec_dest, tmp);
4075           first_vect = make_ssa_name (vec_dest, new_vec_stmt);
4076           gimple_assign_set_lhs (new_vec_stmt, first_vect);
4077           gsi_insert_before (&exit_gsi, new_vec_stmt, GSI_SAME_STMT);
4078         }
4079
4080       new_phi_result = first_vect;
4081       if (new_vec_stmt)
4082         {
4083           new_phis.truncate (0);
4084           new_phis.safe_push (new_vec_stmt);
4085         }
4086     }
4087   else
4088     new_phi_result = PHI_RESULT (new_phis[0]);
4089
4090   /* 2.3 Create the reduction code, using one of the three schemes described
4091          above. In SLP we simply need to extract all the elements from the
4092          vector (without reducing them), so we use scalar shifts.  */
4093   if (reduc_code != ERROR_MARK && !slp_reduc)
4094     {
4095       tree tmp;
4096
4097       /*** Case 1:  Create:
4098            v_out2 = reduc_expr <v_out1>  */
4099
4100       if (dump_enabled_p ())
4101         dump_printf_loc (MSG_NOTE, vect_location,
4102                          "Reduce using direct vector reduction.\n");
4103
4104       vec_dest = vect_create_destination_var (scalar_dest, vectype);
4105       tmp = build1 (reduc_code, vectype, new_phi_result);
4106       epilog_stmt = gimple_build_assign (vec_dest, tmp);
4107       new_temp = make_ssa_name (vec_dest, epilog_stmt);
4108       gimple_assign_set_lhs (epilog_stmt, new_temp);
4109       gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
4110
4111       extract_scalar_result = true;
4112     }
4113   else
4114     {
4115       enum tree_code shift_code = ERROR_MARK;
4116       bool have_whole_vector_shift = true;
4117       int bit_offset;
4118       int element_bitsize = tree_low_cst (bitsize, 1);
4119       int vec_size_in_bits = tree_low_cst (TYPE_SIZE (vectype), 1);
4120       tree vec_temp;
4121
4122       if (optab_handler (vec_shr_optab, mode) != CODE_FOR_nothing)
4123         shift_code = VEC_RSHIFT_EXPR;
4124       else
4125         have_whole_vector_shift = false;
4126
4127       /* Regardless of whether we have a whole vector shift, if we're
4128          emulating the operation via tree-vect-generic, we don't want
4129          to use it.  Only the first round of the reduction is likely
4130          to still be profitable via emulation.  */
4131       /* ??? It might be better to emit a reduction tree code here, so that
4132          tree-vect-generic can expand the first round via bit tricks.  */
4133       if (!VECTOR_MODE_P (mode))
4134         have_whole_vector_shift = false;
4135       else
4136         {
4137           optab optab = optab_for_tree_code (code, vectype, optab_default);
4138           if (optab_handler (optab, mode) == CODE_FOR_nothing)
4139             have_whole_vector_shift = false;
4140         }
4141
4142       if (have_whole_vector_shift && !slp_reduc)
4143         {
4144           /*** Case 2: Create:
4145              for (offset = VS/2; offset >= element_size; offset/=2)
4146                 {
4147                   Create:  va' = vec_shift <va, offset>
4148                   Create:  va = vop <va, va'>
4149                 }  */
4150
4151           if (dump_enabled_p ())
4152             dump_printf_loc (MSG_NOTE, vect_location,
4153                              "Reduce using vector shifts\n");
4154
4155           vec_dest = vect_create_destination_var (scalar_dest, vectype);
4156           new_temp = new_phi_result;
4157           for (bit_offset = vec_size_in_bits/2;
4158                bit_offset >= element_bitsize;
4159                bit_offset /= 2)
4160             {
4161               tree bitpos = size_int (bit_offset);
4162
4163               epilog_stmt = gimple_build_assign_with_ops (shift_code,
4164                                                vec_dest, new_temp, bitpos);
4165               new_name = make_ssa_name (vec_dest, epilog_stmt);
4166               gimple_assign_set_lhs (epilog_stmt, new_name);
4167               gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
4168
4169               epilog_stmt = gimple_build_assign_with_ops (code, vec_dest,
4170                                                           new_name, new_temp);
4171               new_temp = make_ssa_name (vec_dest, epilog_stmt);
4172               gimple_assign_set_lhs (epilog_stmt, new_temp);
4173               gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
4174             }
4175
4176           extract_scalar_result = true;
4177         }
4178       else
4179         {
4180           tree rhs;
4181
4182           /*** Case 3: Create:
4183              s = extract_field <v_out2, 0>
4184              for (offset = element_size;
4185                   offset < vector_size;
4186                   offset += element_size;)
4187                {
4188                  Create:  s' = extract_field <v_out2, offset>
4189                  Create:  s = op <s, s'>  // For non SLP cases
4190                }  */
4191
4192           if (dump_enabled_p ())
4193             dump_printf_loc (MSG_NOTE, vect_location,
4194                              "Reduce using scalar code.\n");
4195
4196           vec_size_in_bits = tree_low_cst (TYPE_SIZE (vectype), 1);
4197           FOR_EACH_VEC_ELT (new_phis, i, new_phi)
4198             {
4199               if (gimple_code (new_phi) == GIMPLE_PHI)
4200                 vec_temp = PHI_RESULT (new_phi);
4201               else
4202                 vec_temp = gimple_assign_lhs (new_phi);
4203               rhs = build3 (BIT_FIELD_REF, scalar_type, vec_temp, bitsize,
4204                             bitsize_zero_node);
4205               epilog_stmt = gimple_build_assign (new_scalar_dest, rhs);
4206               new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
4207               gimple_assign_set_lhs (epilog_stmt, new_temp);
4208               gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
4209
4210               /* In SLP we don't need to apply reduction operation, so we just
4211                  collect s' values in SCALAR_RESULTS.  */
4212               if (slp_reduc)
4213                 scalar_results.safe_push (new_temp);
4214
4215               for (bit_offset = element_bitsize;
4216                    bit_offset < vec_size_in_bits;
4217                    bit_offset += element_bitsize)
4218                 {
4219                   tree bitpos = bitsize_int (bit_offset);
4220                   tree rhs = build3 (BIT_FIELD_REF, scalar_type, vec_temp,
4221                                      bitsize, bitpos);
4222
4223                   epilog_stmt = gimple_build_assign (new_scalar_dest, rhs);
4224                   new_name = make_ssa_name (new_scalar_dest, epilog_stmt);
4225                   gimple_assign_set_lhs (epilog_stmt, new_name);
4226                   gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
4227
4228                   if (slp_reduc)
4229                     {
4230                       /* In SLP we don't need to apply reduction operation, so
4231                          we just collect s' values in SCALAR_RESULTS.  */
4232                       new_temp = new_name;
4233                       scalar_results.safe_push (new_name);
4234                     }
4235                   else
4236                     {
4237                       epilog_stmt = gimple_build_assign_with_ops (code,
4238                                           new_scalar_dest, new_name, new_temp);
4239                       new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
4240                       gimple_assign_set_lhs (epilog_stmt, new_temp);
4241                       gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
4242                     }
4243                 }
4244             }
4245
4246           /* The only case where we need to reduce scalar results in SLP, is
4247              unrolling.  If the size of SCALAR_RESULTS is greater than
4248              GROUP_SIZE, we reduce them combining elements modulo
4249              GROUP_SIZE.  */
4250           if (slp_reduc)
4251             {
4252               tree res, first_res, new_res;
4253               gimple new_stmt;
4254
4255               /* Reduce multiple scalar results in case of SLP unrolling.  */
4256               for (j = group_size; scalar_results.iterate (j, &res);
4257                    j++)
4258                 {
4259                   first_res = scalar_results[j % group_size];
4260                   new_stmt = gimple_build_assign_with_ops (code,
4261                                               new_scalar_dest, first_res, res);
4262                   new_res = make_ssa_name (new_scalar_dest, new_stmt);
4263                   gimple_assign_set_lhs (new_stmt, new_res);
4264                   gsi_insert_before (&exit_gsi, new_stmt, GSI_SAME_STMT);
4265                   scalar_results[j % group_size] = new_res;
4266                 }
4267             }
4268           else
4269             /* Not SLP - we have one scalar to keep in SCALAR_RESULTS.  */
4270             scalar_results.safe_push (new_temp);
4271
4272           extract_scalar_result = false;
4273         }
4274     }
4275
4276   /* 2.4  Extract the final scalar result.  Create:
4277           s_out3 = extract_field <v_out2, bitpos>  */
4278
4279   if (extract_scalar_result)
4280     {
4281       tree rhs;
4282
4283       if (dump_enabled_p ())
4284         dump_printf_loc (MSG_NOTE, vect_location,
4285                          "extract scalar result\n");
4286
4287       if (BYTES_BIG_ENDIAN)
4288         bitpos = size_binop (MULT_EXPR,
4289                              bitsize_int (TYPE_VECTOR_SUBPARTS (vectype) - 1),
4290                              TYPE_SIZE (scalar_type));
4291       else
4292         bitpos = bitsize_zero_node;
4293
4294       rhs = build3 (BIT_FIELD_REF, scalar_type, new_temp, bitsize, bitpos);
4295       epilog_stmt = gimple_build_assign (new_scalar_dest, rhs);
4296       new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
4297       gimple_assign_set_lhs (epilog_stmt, new_temp);
4298       gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
4299       scalar_results.safe_push (new_temp);
4300     }
4301
4302 vect_finalize_reduction:
4303
4304   if (double_reduc)
4305     loop = loop->inner;
4306
4307   /* 2.5 Adjust the final result by the initial value of the reduction
4308          variable. (When such adjustment is not needed, then
4309          'adjustment_def' is zero).  For example, if code is PLUS we create:
4310          new_temp = loop_exit_def + adjustment_def  */
4311
4312   if (adjustment_def)
4313     {
4314       gcc_assert (!slp_reduc);
4315       if (nested_in_vect_loop)
4316         {
4317           new_phi = new_phis[0];
4318           gcc_assert (TREE_CODE (TREE_TYPE (adjustment_def)) == VECTOR_TYPE);
4319           expr = build2 (code, vectype, PHI_RESULT (new_phi), adjustment_def);
4320           new_dest = vect_create_destination_var (scalar_dest, vectype);
4321         }
4322       else
4323         {
4324           new_temp = scalar_results[0];
4325           gcc_assert (TREE_CODE (TREE_TYPE (adjustment_def)) != VECTOR_TYPE);
4326           expr = build2 (code, scalar_type, new_temp, adjustment_def);
4327           new_dest = vect_create_destination_var (scalar_dest, scalar_type);
4328         }
4329
4330       epilog_stmt = gimple_build_assign (new_dest, expr);
4331       new_temp = make_ssa_name (new_dest, epilog_stmt);
4332       gimple_assign_set_lhs (epilog_stmt, new_temp);
4333       SSA_NAME_DEF_STMT (new_temp) = epilog_stmt;
4334       gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
4335       if (nested_in_vect_loop)
4336         {
4337           set_vinfo_for_stmt (epilog_stmt,
4338                               new_stmt_vec_info (epilog_stmt, loop_vinfo,
4339                                                  NULL));
4340           STMT_VINFO_RELATED_STMT (vinfo_for_stmt (epilog_stmt)) =
4341                 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (new_phi));
4342
4343           if (!double_reduc)
4344             scalar_results.quick_push (new_temp);
4345           else
4346             scalar_results[0] = new_temp;
4347         }
4348       else
4349         scalar_results[0] = new_temp;
4350
4351       new_phis[0] = epilog_stmt;
4352     }
4353
4354   /* 2.6  Handle the loop-exit phis.  Replace the uses of scalar loop-exit
4355           phis with new adjusted scalar results, i.e., replace use <s_out0>
4356           with use <s_out4>.
4357
4358      Transform:
4359         loop_exit:
4360           s_out0 = phi <s_loop>                 # (scalar) EXIT_PHI
4361           v_out1 = phi <VECT_DEF>               # NEW_EXIT_PHI
4362           v_out2 = reduce <v_out1>
4363           s_out3 = extract_field <v_out2, 0>
4364           s_out4 = adjust_result <s_out3>
4365           use <s_out0>
4366           use <s_out0>
4367
4368      into:
4369
4370         loop_exit:
4371           s_out0 = phi <s_loop>                 # (scalar) EXIT_PHI
4372           v_out1 = phi <VECT_DEF>               # NEW_EXIT_PHI
4373           v_out2 = reduce <v_out1>
4374           s_out3 = extract_field <v_out2, 0>
4375           s_out4 = adjust_result <s_out3>
4376           use <s_out4>
4377           use <s_out4> */
4378
4379
4380   /* In SLP reduction chain we reduce vector results into one vector if
4381      necessary, hence we set here GROUP_SIZE to 1.  SCALAR_DEST is the LHS of
4382      the last stmt in the reduction chain, since we are looking for the loop
4383      exit phi node.  */
4384   if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
4385     {
4386       scalar_dest = gimple_assign_lhs (
4387                         SLP_TREE_SCALAR_STMTS (slp_node)[group_size - 1]);
4388       group_size = 1;
4389     }
4390
4391   /* In SLP we may have several statements in NEW_PHIS and REDUCTION_PHIS (in
4392      case that GROUP_SIZE is greater than vectorization factor).  Therefore, we
4393      need to match SCALAR_RESULTS with corresponding statements.  The first
4394      (GROUP_SIZE / number of new vector stmts) scalar results correspond to
4395      the first vector stmt, etc.
4396      (RATIO is equal to (GROUP_SIZE / number of new vector stmts)).  */
4397   if (group_size > new_phis.length ())
4398     {
4399       ratio = group_size / new_phis.length ();
4400       gcc_assert (!(group_size % new_phis.length ()));
4401     }
4402   else
4403     ratio = 1;
4404
4405   for (k = 0; k < group_size; k++)
4406     {
4407       if (k % ratio == 0)
4408         {
4409           epilog_stmt = new_phis[k / ratio];
4410           reduction_phi = reduction_phis[k / ratio];
4411           if (double_reduc)
4412             inner_phi = inner_phis[k / ratio];
4413         }
4414
4415       if (slp_reduc)
4416         {
4417           gimple current_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[k];
4418
4419           orig_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (current_stmt));
4420           /* SLP statements can't participate in patterns.  */
4421           gcc_assert (!orig_stmt);
4422           scalar_dest = gimple_assign_lhs (current_stmt);
4423         }
4424
4425       phis.create (3);
4426       /* Find the loop-closed-use at the loop exit of the original scalar
4427          result.  (The reduction result is expected to have two immediate uses -
4428          one at the latch block, and one at the loop exit).  */
4429       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
4430         if (!flow_bb_inside_loop_p (loop, gimple_bb (USE_STMT (use_p)))
4431             && !is_gimple_debug (USE_STMT (use_p)))
4432           phis.safe_push (USE_STMT (use_p));
4433
4434       /* While we expect to have found an exit_phi because of loop-closed-ssa
4435          form we can end up without one if the scalar cycle is dead.  */
4436
4437       FOR_EACH_VEC_ELT (phis, i, exit_phi)
4438         {
4439           if (outer_loop)
4440             {
4441               stmt_vec_info exit_phi_vinfo = vinfo_for_stmt (exit_phi);
4442               gimple vect_phi;
4443
4444               /* FORNOW. Currently not supporting the case that an inner-loop
4445                  reduction is not used in the outer-loop (but only outside the
4446                  outer-loop), unless it is double reduction.  */
4447               gcc_assert ((STMT_VINFO_RELEVANT_P (exit_phi_vinfo)
4448                            && !STMT_VINFO_LIVE_P (exit_phi_vinfo))
4449                           || double_reduc);
4450
4451               STMT_VINFO_VEC_STMT (exit_phi_vinfo) = epilog_stmt;
4452               if (!double_reduc
4453                   || STMT_VINFO_DEF_TYPE (exit_phi_vinfo)
4454                       != vect_double_reduction_def)
4455                 continue;
4456
4457               /* Handle double reduction:
4458
4459                  stmt1: s1 = phi <s0, s2>  - double reduction phi (outer loop)
4460                  stmt2:   s3 = phi <s1, s4> - (regular) reduc phi (inner loop)
4461                  stmt3:   s4 = use (s3)     - (regular) reduc stmt (inner loop)
4462                  stmt4: s2 = phi <s4>      - double reduction stmt (outer loop)
4463
4464                  At that point the regular reduction (stmt2 and stmt3) is
4465                  already vectorized, as well as the exit phi node, stmt4.
4466                  Here we vectorize the phi node of double reduction, stmt1, and
4467                  update all relevant statements.  */
4468
4469               /* Go through all the uses of s2 to find double reduction phi
4470                  node, i.e., stmt1 above.  */
4471               orig_name = PHI_RESULT (exit_phi);
4472               FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, orig_name)
4473                 {
4474                   stmt_vec_info use_stmt_vinfo;
4475                   stmt_vec_info new_phi_vinfo;
4476                   tree vect_phi_init, preheader_arg, vect_phi_res, init_def;
4477                   basic_block bb = gimple_bb (use_stmt);
4478                   gimple use;
4479
4480                   /* Check that USE_STMT is really double reduction phi
4481                      node.  */
4482                   if (gimple_code (use_stmt) != GIMPLE_PHI
4483                       || gimple_phi_num_args (use_stmt) != 2
4484                       || bb->loop_father != outer_loop)
4485                     continue;
4486                   use_stmt_vinfo = vinfo_for_stmt (use_stmt);
4487                   if (!use_stmt_vinfo
4488                       || STMT_VINFO_DEF_TYPE (use_stmt_vinfo)
4489                           != vect_double_reduction_def)
4490                     continue;
4491
4492                   /* Create vector phi node for double reduction:
4493                      vs1 = phi <vs0, vs2>
4494                      vs1 was created previously in this function by a call to
4495                        vect_get_vec_def_for_operand and is stored in
4496                        vec_initial_def;
4497                      vs2 is defined by INNER_PHI, the vectorized EXIT_PHI;
4498                      vs0 is created here.  */
4499
4500                   /* Create vector phi node.  */
4501                   vect_phi = create_phi_node (vec_initial_def, bb);
4502                   new_phi_vinfo = new_stmt_vec_info (vect_phi,
4503                                     loop_vec_info_for_loop (outer_loop), NULL);
4504                   set_vinfo_for_stmt (vect_phi, new_phi_vinfo);
4505
4506                   /* Create vs0 - initial def of the double reduction phi.  */
4507                   preheader_arg = PHI_ARG_DEF_FROM_EDGE (use_stmt,
4508                                              loop_preheader_edge (outer_loop));
4509                   init_def = get_initial_def_for_reduction (stmt,
4510                                                           preheader_arg, NULL);
4511                   vect_phi_init = vect_init_vector (use_stmt, init_def,
4512                                                     vectype, NULL);
4513
4514                   /* Update phi node arguments with vs0 and vs2.  */
4515                   add_phi_arg (vect_phi, vect_phi_init,
4516                                loop_preheader_edge (outer_loop),
4517                                UNKNOWN_LOCATION);
4518                   add_phi_arg (vect_phi, PHI_RESULT (inner_phi),
4519                                loop_latch_edge (outer_loop), UNKNOWN_LOCATION);
4520                   if (dump_enabled_p ())
4521                     {
4522                       dump_printf_loc (MSG_NOTE, vect_location,
4523                                        "created double reduction phi node: ");
4524                       dump_gimple_stmt (MSG_NOTE, TDF_SLIM, vect_phi, 0);
4525                       dump_printf (MSG_NOTE, "\n");
4526                     }
4527
4528                   vect_phi_res = PHI_RESULT (vect_phi);
4529
4530                   /* Replace the use, i.e., set the correct vs1 in the regular
4531                      reduction phi node.  FORNOW, NCOPIES is always 1, so the
4532                      loop is redundant.  */
4533                   use = reduction_phi;
4534                   for (j = 0; j < ncopies; j++)
4535                     {
4536                       edge pr_edge = loop_preheader_edge (loop);
4537                       SET_PHI_ARG_DEF (use, pr_edge->dest_idx, vect_phi_res);
4538                       use = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (use));
4539                     }
4540                 }
4541             }
4542         }
4543
4544       phis.release ();
4545       if (nested_in_vect_loop)
4546         {
4547           if (double_reduc)
4548             loop = outer_loop;
4549           else
4550             continue;
4551         }
4552
4553       phis.create (3);
4554       /* Find the loop-closed-use at the loop exit of the original scalar
4555          result.  (The reduction result is expected to have two immediate uses,
4556          one at the latch block, and one at the loop exit).  For double
4557          reductions we are looking for exit phis of the outer loop.  */
4558       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
4559         {
4560           if (!flow_bb_inside_loop_p (loop, gimple_bb (USE_STMT (use_p))))
4561             {
4562               if (!is_gimple_debug (USE_STMT (use_p)))
4563                 phis.safe_push (USE_STMT (use_p));
4564             }
4565           else
4566             {
4567               if (double_reduc && gimple_code (USE_STMT (use_p)) == GIMPLE_PHI)
4568                 {
4569                   tree phi_res = PHI_RESULT (USE_STMT (use_p));
4570
4571                   FOR_EACH_IMM_USE_FAST (phi_use_p, phi_imm_iter, phi_res)
4572                     {
4573                       if (!flow_bb_inside_loop_p (loop,
4574                                              gimple_bb (USE_STMT (phi_use_p)))
4575                           && !is_gimple_debug (USE_STMT (phi_use_p)))
4576                         phis.safe_push (USE_STMT (phi_use_p));
4577                     }
4578                 }
4579             }
4580         }
4581
4582       FOR_EACH_VEC_ELT (phis, i, exit_phi)
4583         {
4584           /* Replace the uses:  */
4585           orig_name = PHI_RESULT (exit_phi);
4586           scalar_result = scalar_results[k];
4587           FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, orig_name)
4588             FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
4589               SET_USE (use_p, scalar_result);
4590         }
4591
4592       phis.release ();
4593     }
4594
4595   scalar_results.release ();
4596   inner_phis.release ();
4597   new_phis.release ();
4598 }
4599
4600
4601 /* Function vectorizable_reduction.
4602
4603    Check if STMT performs a reduction operation that can be vectorized.
4604    If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4605    stmt to replace it, put it in VEC_STMT, and insert it at GSI.
4606    Return FALSE if not a vectorizable STMT, TRUE otherwise.
4607
4608    This function also handles reduction idioms (patterns) that have been
4609    recognized in advance during vect_pattern_recog.  In this case, STMT may be
4610    of this form:
4611      X = pattern_expr (arg0, arg1, ..., X)
4612    and it's STMT_VINFO_RELATED_STMT points to the last stmt in the original
4613    sequence that had been detected and replaced by the pattern-stmt (STMT).
4614
4615    In some cases of reduction patterns, the type of the reduction variable X is
4616    different than the type of the other arguments of STMT.
4617    In such cases, the vectype that is used when transforming STMT into a vector
4618    stmt is different than the vectype that is used to determine the
4619    vectorization factor, because it consists of a different number of elements
4620    than the actual number of elements that are being operated upon in parallel.
4621
4622    For example, consider an accumulation of shorts into an int accumulator.
4623    On some targets it's possible to vectorize this pattern operating on 8
4624    shorts at a time (hence, the vectype for purposes of determining the
4625    vectorization factor should be V8HI); on the other hand, the vectype that
4626    is used to create the vector form is actually V4SI (the type of the result).
4627
4628    Upon entry to this function, STMT_VINFO_VECTYPE records the vectype that
4629    indicates what is the actual level of parallelism (V8HI in the example), so
4630    that the right vectorization factor would be derived.  This vectype
4631    corresponds to the type of arguments to the reduction stmt, and should *NOT*
4632    be used to create the vectorized stmt.  The right vectype for the vectorized
4633    stmt is obtained from the type of the result X:
4634         get_vectype_for_scalar_type (TREE_TYPE (X))
4635
4636    This means that, contrary to "regular" reductions (or "regular" stmts in
4637    general), the following equation:
4638       STMT_VINFO_VECTYPE == get_vectype_for_scalar_type (TREE_TYPE (X))
4639    does *NOT* necessarily hold for reduction patterns.  */
4640
4641 bool
4642 vectorizable_reduction (gimple stmt, gimple_stmt_iterator *gsi,
4643                         gimple *vec_stmt, slp_tree slp_node)
4644 {
4645   tree vec_dest;
4646   tree scalar_dest;
4647   tree loop_vec_def0 = NULL_TREE, loop_vec_def1 = NULL_TREE;
4648   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4649   tree vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4650   tree vectype_in = NULL_TREE;
4651   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4652   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4653   enum tree_code code, orig_code, epilog_reduc_code;
4654   enum machine_mode vec_mode;
4655   int op_type;
4656   optab optab, reduc_optab;
4657   tree new_temp = NULL_TREE;
4658   tree def;
4659   gimple def_stmt;
4660   enum vect_def_type dt;
4661   gimple new_phi = NULL;
4662   tree scalar_type;
4663   bool is_simple_use;
4664   gimple orig_stmt;
4665   stmt_vec_info orig_stmt_info;
4666   tree expr = NULL_TREE;
4667   int i;
4668   int ncopies;
4669   int epilog_copies;
4670   stmt_vec_info prev_stmt_info, prev_phi_info;
4671   bool single_defuse_cycle = false;
4672   tree reduc_def = NULL_TREE;
4673   gimple new_stmt = NULL;
4674   int j;
4675   tree ops[3];
4676   bool nested_cycle = false, found_nested_cycle_def = false;
4677   gimple reduc_def_stmt = NULL;
4678   /* The default is that the reduction variable is the last in statement.  */
4679   int reduc_index = 2;
4680   bool double_reduc = false, dummy;
4681   basic_block def_bb;
4682   struct loop * def_stmt_loop, *outer_loop = NULL;
4683   tree def_arg;
4684   gimple def_arg_stmt;
4685   vec<tree> vec_oprnds0 = vNULL;
4686   vec<tree> vec_oprnds1 = vNULL;
4687   vec<tree> vect_defs = vNULL;
4688   vec<gimple> phis = vNULL;
4689   int vec_num;
4690   tree def0, def1, tem, op0, op1 = NULL_TREE;
4691
4692   /* In case of reduction chain we switch to the first stmt in the chain, but
4693      we don't update STMT_INFO, since only the last stmt is marked as reduction
4694      and has reduction properties.  */
4695   if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
4696     stmt = GROUP_FIRST_ELEMENT (stmt_info);
4697
4698   if (nested_in_vect_loop_p (loop, stmt))
4699     {
4700       outer_loop = loop;
4701       loop = loop->inner;
4702       nested_cycle = true;
4703     }
4704
4705   /* 1. Is vectorizable reduction?  */
4706   /* Not supportable if the reduction variable is used in the loop, unless
4707      it's a reduction chain.  */
4708   if (STMT_VINFO_RELEVANT (stmt_info) > vect_used_in_outer
4709       && !GROUP_FIRST_ELEMENT (stmt_info))
4710     return false;
4711
4712   /* Reductions that are not used even in an enclosing outer-loop,
4713      are expected to be "live" (used out of the loop).  */
4714   if (STMT_VINFO_RELEVANT (stmt_info) == vect_unused_in_scope
4715       && !STMT_VINFO_LIVE_P (stmt_info))
4716     return false;
4717
4718   /* Make sure it was already recognized as a reduction computation.  */
4719   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_reduction_def
4720       && STMT_VINFO_DEF_TYPE (stmt_info) != vect_nested_cycle)
4721     return false;
4722
4723   /* 2. Has this been recognized as a reduction pattern?
4724
4725      Check if STMT represents a pattern that has been recognized
4726      in earlier analysis stages.  For stmts that represent a pattern,
4727      the STMT_VINFO_RELATED_STMT field records the last stmt in
4728      the original sequence that constitutes the pattern.  */
4729
4730   orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
4731   if (orig_stmt)
4732     {
4733       orig_stmt_info = vinfo_for_stmt (orig_stmt);
4734       gcc_assert (STMT_VINFO_IN_PATTERN_P (orig_stmt_info));
4735       gcc_assert (!STMT_VINFO_IN_PATTERN_P (stmt_info));
4736     }
4737
4738   /* 3. Check the operands of the operation.  The first operands are defined
4739         inside the loop body. The last operand is the reduction variable,
4740         which is defined by the loop-header-phi.  */
4741
4742   gcc_assert (is_gimple_assign (stmt));
4743
4744   /* Flatten RHS.  */
4745   switch (get_gimple_rhs_class (gimple_assign_rhs_code (stmt)))
4746     {
4747     case GIMPLE_SINGLE_RHS:
4748       op_type = TREE_OPERAND_LENGTH (gimple_assign_rhs1 (stmt));
4749       if (op_type == ternary_op)
4750         {
4751           tree rhs = gimple_assign_rhs1 (stmt);
4752           ops[0] = TREE_OPERAND (rhs, 0);
4753           ops[1] = TREE_OPERAND (rhs, 1);
4754           ops[2] = TREE_OPERAND (rhs, 2);
4755           code = TREE_CODE (rhs);
4756         }
4757       else
4758         return false;
4759       break;
4760
4761     case GIMPLE_BINARY_RHS:
4762       code = gimple_assign_rhs_code (stmt);
4763       op_type = TREE_CODE_LENGTH (code);
4764       gcc_assert (op_type == binary_op);
4765       ops[0] = gimple_assign_rhs1 (stmt);
4766       ops[1] = gimple_assign_rhs2 (stmt);
4767       break;
4768
4769     case GIMPLE_TERNARY_RHS:
4770       code = gimple_assign_rhs_code (stmt);
4771       op_type = TREE_CODE_LENGTH (code);
4772       gcc_assert (op_type == ternary_op);
4773       ops[0] = gimple_assign_rhs1 (stmt);
4774       ops[1] = gimple_assign_rhs2 (stmt);
4775       ops[2] = gimple_assign_rhs3 (stmt);
4776       break;
4777
4778     case GIMPLE_UNARY_RHS:
4779       return false;
4780
4781     default:
4782       gcc_unreachable ();
4783     }
4784
4785   if (code == COND_EXPR && slp_node)
4786     return false;
4787
4788   scalar_dest = gimple_assign_lhs (stmt);
4789   scalar_type = TREE_TYPE (scalar_dest);
4790   if (!POINTER_TYPE_P (scalar_type) && !INTEGRAL_TYPE_P (scalar_type)
4791       && !SCALAR_FLOAT_TYPE_P (scalar_type))
4792     return false;
4793
4794   /* Do not try to vectorize bit-precision reductions.  */
4795   if ((TYPE_PRECISION (scalar_type)
4796        != GET_MODE_PRECISION (TYPE_MODE (scalar_type))))
4797     return false;
4798
4799   /* All uses but the last are expected to be defined in the loop.
4800      The last use is the reduction variable.  In case of nested cycle this
4801      assumption is not true: we use reduc_index to record the index of the
4802      reduction variable.  */
4803   for (i = 0; i < op_type - 1; i++)
4804     {
4805       /* The condition of COND_EXPR is checked in vectorizable_condition().  */
4806       if (i == 0 && code == COND_EXPR)
4807         continue;
4808
4809       is_simple_use = vect_is_simple_use_1 (ops[i], stmt, loop_vinfo, NULL,
4810                                             &def_stmt, &def, &dt, &tem);
4811       if (!vectype_in)
4812         vectype_in = tem;
4813       gcc_assert (is_simple_use);
4814
4815       if (dt != vect_internal_def
4816           && dt != vect_external_def
4817           && dt != vect_constant_def
4818           && dt != vect_induction_def
4819           && !(dt == vect_nested_cycle && nested_cycle))
4820         return false;
4821
4822       if (dt == vect_nested_cycle)
4823         {
4824           found_nested_cycle_def = true;
4825           reduc_def_stmt = def_stmt;
4826           reduc_index = i;
4827         }
4828     }
4829
4830   is_simple_use = vect_is_simple_use_1 (ops[i], stmt, loop_vinfo, NULL,
4831                                         &def_stmt, &def, &dt, &tem);
4832   if (!vectype_in)
4833     vectype_in = tem;
4834   gcc_assert (is_simple_use);
4835   if (!(dt == vect_reduction_def
4836         || dt == vect_nested_cycle
4837         || ((dt == vect_internal_def || dt == vect_external_def
4838              || dt == vect_constant_def || dt == vect_induction_def)
4839             && nested_cycle && found_nested_cycle_def)))
4840     {
4841       /* For pattern recognized stmts, orig_stmt might be a reduction,
4842          but some helper statements for the pattern might not, or
4843          might be COND_EXPRs with reduction uses in the condition.  */
4844       gcc_assert (orig_stmt);
4845       return false;
4846     }
4847   if (!found_nested_cycle_def)
4848     reduc_def_stmt = def_stmt;
4849
4850   gcc_assert (gimple_code (reduc_def_stmt) == GIMPLE_PHI);
4851   if (orig_stmt)
4852     gcc_assert (orig_stmt == vect_is_simple_reduction (loop_vinfo,
4853                                                        reduc_def_stmt,
4854                                                        !nested_cycle,
4855                                                        &dummy));
4856   else
4857     {
4858       gimple tmp = vect_is_simple_reduction (loop_vinfo, reduc_def_stmt,
4859                                              !nested_cycle, &dummy);
4860       /* We changed STMT to be the first stmt in reduction chain, hence we
4861          check that in this case the first element in the chain is STMT.  */
4862       gcc_assert (stmt == tmp
4863                   || GROUP_FIRST_ELEMENT (vinfo_for_stmt (tmp)) == stmt);
4864     }
4865
4866   if (STMT_VINFO_LIVE_P (vinfo_for_stmt (reduc_def_stmt)))
4867     return false;
4868
4869   if (slp_node || PURE_SLP_STMT (stmt_info))
4870     ncopies = 1;
4871   else
4872     ncopies = (LOOP_VINFO_VECT_FACTOR (loop_vinfo)
4873                / TYPE_VECTOR_SUBPARTS (vectype_in));
4874
4875   gcc_assert (ncopies >= 1);
4876
4877   vec_mode = TYPE_MODE (vectype_in);
4878
4879   if (code == COND_EXPR)
4880     {
4881       if (!vectorizable_condition (stmt, gsi, NULL, ops[reduc_index], 0, NULL))
4882         {
4883           if (dump_enabled_p ())
4884             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4885                              "unsupported condition in reduction\n");
4886
4887             return false;
4888         }
4889     }
4890   else
4891     {
4892       /* 4. Supportable by target?  */
4893
4894       if (code == LSHIFT_EXPR || code == RSHIFT_EXPR
4895           || code == LROTATE_EXPR || code == RROTATE_EXPR)
4896         {
4897           /* Shifts and rotates are only supported by vectorizable_shifts,
4898              not vectorizable_reduction.  */
4899           if (dump_enabled_p ())
4900             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4901                              "unsupported shift or rotation.\n");
4902           return false;
4903         }
4904
4905       /* 4.1. check support for the operation in the loop  */
4906       optab = optab_for_tree_code (code, vectype_in, optab_default);
4907       if (!optab)
4908         {
4909           if (dump_enabled_p ())
4910             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4911                              "no optab.\n");
4912
4913           return false;
4914         }
4915
4916       if (optab_handler (optab, vec_mode) == CODE_FOR_nothing)
4917         {
4918           if (dump_enabled_p ())
4919             dump_printf (MSG_NOTE, "op not supported by target.\n");
4920
4921           if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
4922               || LOOP_VINFO_VECT_FACTOR (loop_vinfo)
4923                   < vect_min_worthwhile_factor (code))
4924             return false;
4925
4926           if (dump_enabled_p ())
4927             dump_printf (MSG_NOTE, "proceeding using word mode.\n");
4928         }
4929
4930       /* Worthwhile without SIMD support?  */
4931       if (!VECTOR_MODE_P (TYPE_MODE (vectype_in))
4932           && LOOP_VINFO_VECT_FACTOR (loop_vinfo)
4933              < vect_min_worthwhile_factor (code))
4934         {
4935           if (dump_enabled_p ())
4936             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4937                              "not worthwhile without SIMD support.\n");
4938
4939           return false;
4940         }
4941     }
4942
4943   /* 4.2. Check support for the epilog operation.
4944
4945           If STMT represents a reduction pattern, then the type of the
4946           reduction variable may be different than the type of the rest
4947           of the arguments.  For example, consider the case of accumulation
4948           of shorts into an int accumulator; The original code:
4949                         S1: int_a = (int) short_a;
4950           orig_stmt->   S2: int_acc = plus <int_a ,int_acc>;
4951
4952           was replaced with:
4953                         STMT: int_acc = widen_sum <short_a, int_acc>
4954
4955           This means that:
4956           1. The tree-code that is used to create the vector operation in the
4957              epilog code (that reduces the partial results) is not the
4958              tree-code of STMT, but is rather the tree-code of the original
4959              stmt from the pattern that STMT is replacing.  I.e, in the example
4960              above we want to use 'widen_sum' in the loop, but 'plus' in the
4961              epilog.
4962           2. The type (mode) we use to check available target support
4963              for the vector operation to be created in the *epilog*, is
4964              determined by the type of the reduction variable (in the example
4965              above we'd check this: optab_handler (plus_optab, vect_int_mode])).
4966              However the type (mode) we use to check available target support
4967              for the vector operation to be created *inside the loop*, is
4968              determined by the type of the other arguments to STMT (in the
4969              example we'd check this: optab_handler (widen_sum_optab,
4970              vect_short_mode)).
4971
4972           This is contrary to "regular" reductions, in which the types of all
4973           the arguments are the same as the type of the reduction variable.
4974           For "regular" reductions we can therefore use the same vector type
4975           (and also the same tree-code) when generating the epilog code and
4976           when generating the code inside the loop.  */
4977
4978   if (orig_stmt)
4979     {
4980       /* This is a reduction pattern: get the vectype from the type of the
4981          reduction variable, and get the tree-code from orig_stmt.  */
4982       orig_code = gimple_assign_rhs_code (orig_stmt);
4983       gcc_assert (vectype_out);
4984       vec_mode = TYPE_MODE (vectype_out);
4985     }
4986   else
4987     {
4988       /* Regular reduction: use the same vectype and tree-code as used for
4989          the vector code inside the loop can be used for the epilog code. */
4990       orig_code = code;
4991     }
4992
4993   if (nested_cycle)
4994     {
4995       def_bb = gimple_bb (reduc_def_stmt);
4996       def_stmt_loop = def_bb->loop_father;
4997       def_arg = PHI_ARG_DEF_FROM_EDGE (reduc_def_stmt,
4998                                        loop_preheader_edge (def_stmt_loop));
4999       if (TREE_CODE (def_arg) == SSA_NAME
5000           && (def_arg_stmt = SSA_NAME_DEF_STMT (def_arg))
5001           && gimple_code (def_arg_stmt) == GIMPLE_PHI
5002           && flow_bb_inside_loop_p (outer_loop, gimple_bb (def_arg_stmt))
5003           && vinfo_for_stmt (def_arg_stmt)
5004           && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_arg_stmt))
5005               == vect_double_reduction_def)
5006         double_reduc = true;
5007     }
5008
5009   epilog_reduc_code = ERROR_MARK;
5010   if (reduction_code_for_scalar_code (orig_code, &epilog_reduc_code))
5011     {
5012       reduc_optab = optab_for_tree_code (epilog_reduc_code, vectype_out,
5013                                          optab_default);
5014       if (!reduc_optab)
5015         {
5016           if (dump_enabled_p ())
5017             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5018                              "no optab for reduction.\n");
5019
5020           epilog_reduc_code = ERROR_MARK;
5021         }
5022
5023       if (reduc_optab
5024           && optab_handler (reduc_optab, vec_mode) == CODE_FOR_nothing)
5025         {
5026           if (dump_enabled_p ())
5027             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5028                              "reduc op not supported by target.\n");
5029
5030           epilog_reduc_code = ERROR_MARK;
5031         }
5032     }
5033   else
5034     {
5035       if (!nested_cycle || double_reduc)
5036         {
5037           if (dump_enabled_p ())
5038             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5039                              "no reduc code for scalar code.\n");
5040
5041           return false;
5042         }
5043     }
5044
5045   if (double_reduc && ncopies > 1)
5046     {
5047       if (dump_enabled_p ())
5048         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5049                          "multiple types in double reduction\n");
5050
5051       return false;
5052     }
5053
5054   /* In case of widenning multiplication by a constant, we update the type
5055      of the constant to be the type of the other operand.  We check that the
5056      constant fits the type in the pattern recognition pass.  */
5057   if (code == DOT_PROD_EXPR
5058       && !types_compatible_p (TREE_TYPE (ops[0]), TREE_TYPE (ops[1])))
5059     {
5060       if (TREE_CODE (ops[0]) == INTEGER_CST)
5061         ops[0] = fold_convert (TREE_TYPE (ops[1]), ops[0]);
5062       else if (TREE_CODE (ops[1]) == INTEGER_CST)
5063         ops[1] = fold_convert (TREE_TYPE (ops[0]), ops[1]);
5064       else
5065         {
5066           if (dump_enabled_p ())
5067             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5068                              "invalid types in dot-prod\n");
5069
5070           return false;
5071         }
5072     }
5073
5074   if (!vec_stmt) /* transformation not required.  */
5075     {
5076       if (!vect_model_reduction_cost (stmt_info, epilog_reduc_code, ncopies))
5077         return false;
5078       STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
5079       return true;
5080     }
5081
5082   /** Transform.  **/
5083
5084   if (dump_enabled_p ())
5085     dump_printf_loc (MSG_NOTE, vect_location, "transform reduction.\n");
5086
5087   /* FORNOW: Multiple types are not supported for condition.  */
5088   if (code == COND_EXPR)
5089     gcc_assert (ncopies == 1);
5090
5091   /* Create the destination vector  */
5092   vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
5093
5094   /* In case the vectorization factor (VF) is bigger than the number
5095      of elements that we can fit in a vectype (nunits), we have to generate
5096      more than one vector stmt - i.e - we need to "unroll" the
5097      vector stmt by a factor VF/nunits.  For more details see documentation
5098      in vectorizable_operation.  */
5099
5100   /* If the reduction is used in an outer loop we need to generate
5101      VF intermediate results, like so (e.g. for ncopies=2):
5102         r0 = phi (init, r0)
5103         r1 = phi (init, r1)
5104         r0 = x0 + r0;
5105         r1 = x1 + r1;
5106     (i.e. we generate VF results in 2 registers).
5107     In this case we have a separate def-use cycle for each copy, and therefore
5108     for each copy we get the vector def for the reduction variable from the
5109     respective phi node created for this copy.
5110
5111     Otherwise (the reduction is unused in the loop nest), we can combine
5112     together intermediate results, like so (e.g. for ncopies=2):
5113         r = phi (init, r)
5114         r = x0 + r;
5115         r = x1 + r;
5116    (i.e. we generate VF/2 results in a single register).
5117    In this case for each copy we get the vector def for the reduction variable
5118    from the vectorized reduction operation generated in the previous iteration.
5119   */
5120
5121   if (STMT_VINFO_RELEVANT (stmt_info) == vect_unused_in_scope)
5122     {
5123       single_defuse_cycle = true;
5124       epilog_copies = 1;
5125     }
5126   else
5127     epilog_copies = ncopies;
5128
5129   prev_stmt_info = NULL;
5130   prev_phi_info = NULL;
5131   if (slp_node)
5132     {
5133       vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
5134       gcc_assert (TYPE_VECTOR_SUBPARTS (vectype_out)
5135                   == TYPE_VECTOR_SUBPARTS (vectype_in));
5136     }
5137   else
5138     {
5139       vec_num = 1;
5140       vec_oprnds0.create (1);
5141       if (op_type == ternary_op)
5142         vec_oprnds1.create (1);
5143     }
5144
5145   phis.create (vec_num);
5146   vect_defs.create (vec_num);
5147   if (!slp_node)
5148     vect_defs.quick_push (NULL_TREE);
5149
5150   for (j = 0; j < ncopies; j++)
5151     {
5152       if (j == 0 || !single_defuse_cycle)
5153         {
5154           for (i = 0; i < vec_num; i++)
5155             {
5156               /* Create the reduction-phi that defines the reduction
5157                  operand.  */
5158               new_phi = create_phi_node (vec_dest, loop->header);
5159               set_vinfo_for_stmt (new_phi,
5160                                   new_stmt_vec_info (new_phi, loop_vinfo,
5161                                                      NULL));
5162                if (j == 0 || slp_node)
5163                  phis.quick_push (new_phi);
5164             }
5165         }
5166
5167       if (code == COND_EXPR)
5168         {
5169           gcc_assert (!slp_node);
5170           vectorizable_condition (stmt, gsi, vec_stmt,
5171                                   PHI_RESULT (phis[0]),
5172                                   reduc_index, NULL);
5173           /* Multiple types are not supported for condition.  */
5174           break;
5175         }
5176
5177       /* Handle uses.  */
5178       if (j == 0)
5179         {
5180           op0 = ops[!reduc_index];
5181           if (op_type == ternary_op)
5182             {
5183               if (reduc_index == 0)
5184                 op1 = ops[2];
5185               else
5186                 op1 = ops[1];
5187             }
5188
5189           if (slp_node)
5190             vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
5191                                slp_node, -1);
5192           else
5193             {
5194               loop_vec_def0 = vect_get_vec_def_for_operand (ops[!reduc_index],
5195                                                             stmt, NULL);
5196               vec_oprnds0.quick_push (loop_vec_def0);
5197               if (op_type == ternary_op)
5198                {
5199                  loop_vec_def1 = vect_get_vec_def_for_operand (op1, stmt,
5200                                                                NULL);
5201                  vec_oprnds1.quick_push (loop_vec_def1);
5202                }
5203             }
5204         }
5205       else
5206         {
5207           if (!slp_node)
5208             {
5209               enum vect_def_type dt;
5210               gimple dummy_stmt;
5211               tree dummy;
5212
5213               vect_is_simple_use (ops[!reduc_index], stmt, loop_vinfo, NULL,
5214                                   &dummy_stmt, &dummy, &dt);
5215               loop_vec_def0 = vect_get_vec_def_for_stmt_copy (dt,
5216                                                               loop_vec_def0);
5217               vec_oprnds0[0] = loop_vec_def0;
5218               if (op_type == ternary_op)
5219                 {
5220                   vect_is_simple_use (op1, stmt, loop_vinfo, NULL, &dummy_stmt,
5221                                       &dummy, &dt);
5222                   loop_vec_def1 = vect_get_vec_def_for_stmt_copy (dt,
5223                                                                 loop_vec_def1);
5224                   vec_oprnds1[0] = loop_vec_def1;
5225                 }
5226             }
5227
5228           if (single_defuse_cycle)
5229             reduc_def = gimple_assign_lhs (new_stmt);
5230
5231           STMT_VINFO_RELATED_STMT (prev_phi_info) = new_phi;
5232         }
5233
5234       FOR_EACH_VEC_ELT (vec_oprnds0, i, def0)
5235         {
5236           if (slp_node)
5237             reduc_def = PHI_RESULT (phis[i]);
5238           else
5239             {
5240               if (!single_defuse_cycle || j == 0)
5241                 reduc_def = PHI_RESULT (new_phi);
5242             }
5243
5244           def1 = ((op_type == ternary_op)
5245                   ? vec_oprnds1[i] : NULL);
5246           if (op_type == binary_op)
5247             {
5248               if (reduc_index == 0)
5249                 expr = build2 (code, vectype_out, reduc_def, def0);
5250               else
5251                 expr = build2 (code, vectype_out, def0, reduc_def);
5252             }
5253           else
5254             {
5255               if (reduc_index == 0)
5256                 expr = build3 (code, vectype_out, reduc_def, def0, def1);
5257               else
5258                 {
5259                   if (reduc_index == 1)
5260                     expr = build3 (code, vectype_out, def0, reduc_def, def1);
5261                   else
5262                     expr = build3 (code, vectype_out, def0, def1, reduc_def);
5263                 }
5264             }
5265
5266           new_stmt = gimple_build_assign (vec_dest, expr);
5267           new_temp = make_ssa_name (vec_dest, new_stmt);
5268           gimple_assign_set_lhs (new_stmt, new_temp);
5269           vect_finish_stmt_generation (stmt, new_stmt, gsi);
5270
5271           if (slp_node)
5272             {
5273               SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5274               vect_defs.quick_push (new_temp);
5275             }
5276           else
5277             vect_defs[0] = new_temp;
5278         }
5279
5280       if (slp_node)
5281         continue;
5282
5283       if (j == 0)
5284         STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5285       else
5286         STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5287
5288       prev_stmt_info = vinfo_for_stmt (new_stmt);
5289       prev_phi_info = vinfo_for_stmt (new_phi);
5290     }
5291
5292   /* Finalize the reduction-phi (set its arguments) and create the
5293      epilog reduction code.  */
5294   if ((!single_defuse_cycle || code == COND_EXPR) && !slp_node)
5295     {
5296       new_temp = gimple_assign_lhs (*vec_stmt);
5297       vect_defs[0] = new_temp;
5298     }
5299
5300   vect_create_epilog_for_reduction (vect_defs, stmt, epilog_copies,
5301                                     epilog_reduc_code, phis, reduc_index,
5302                                     double_reduc, slp_node);
5303
5304   phis.release ();
5305   vect_defs.release ();
5306   vec_oprnds0.release ();
5307   vec_oprnds1.release ();
5308
5309   return true;
5310 }
5311
5312 /* Function vect_min_worthwhile_factor.
5313
5314    For a loop where we could vectorize the operation indicated by CODE,
5315    return the minimum vectorization factor that makes it worthwhile
5316    to use generic vectors.  */
5317 int
5318 vect_min_worthwhile_factor (enum tree_code code)
5319 {
5320   switch (code)
5321     {
5322     case PLUS_EXPR:
5323     case MINUS_EXPR:
5324     case NEGATE_EXPR:
5325       return 4;
5326
5327     case BIT_AND_EXPR:
5328     case BIT_IOR_EXPR:
5329     case BIT_XOR_EXPR:
5330     case BIT_NOT_EXPR:
5331       return 2;
5332
5333     default:
5334       return INT_MAX;
5335     }
5336 }
5337
5338
5339 /* Function vectorizable_induction
5340
5341    Check if PHI performs an induction computation that can be vectorized.
5342    If VEC_STMT is also passed, vectorize the induction PHI: create a vectorized
5343    phi to replace it, put it in VEC_STMT, and add it to the same basic block.
5344    Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
5345
5346 bool
5347 vectorizable_induction (gimple phi, gimple_stmt_iterator *gsi ATTRIBUTE_UNUSED,
5348                         gimple *vec_stmt)
5349 {
5350   stmt_vec_info stmt_info = vinfo_for_stmt (phi);
5351   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5352   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5353   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5354   int nunits = TYPE_VECTOR_SUBPARTS (vectype);
5355   int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
5356   tree vec_def;
5357
5358   gcc_assert (ncopies >= 1);
5359   /* FORNOW. These restrictions should be relaxed.  */
5360   if (nested_in_vect_loop_p (loop, phi))
5361     {
5362       imm_use_iterator imm_iter;
5363       use_operand_p use_p;
5364       gimple exit_phi;
5365       edge latch_e;
5366       tree loop_arg;
5367
5368       if (ncopies > 1)
5369         {
5370           if (dump_enabled_p ())
5371             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5372                              "multiple types in nested loop.\n");
5373           return false;
5374         }
5375
5376       exit_phi = NULL;
5377       latch_e = loop_latch_edge (loop->inner);
5378       loop_arg = PHI_ARG_DEF_FROM_EDGE (phi, latch_e);
5379       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, loop_arg)
5380         {
5381           if (!flow_bb_inside_loop_p (loop->inner,
5382                                       gimple_bb (USE_STMT (use_p))))
5383             {
5384               exit_phi = USE_STMT (use_p);
5385               break;
5386             }
5387         }
5388       if (exit_phi)
5389         {
5390           stmt_vec_info exit_phi_vinfo  = vinfo_for_stmt (exit_phi);
5391           if (!(STMT_VINFO_RELEVANT_P (exit_phi_vinfo)
5392                 && !STMT_VINFO_LIVE_P (exit_phi_vinfo)))
5393             {
5394               if (dump_enabled_p ())
5395                 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5396                                  "inner-loop induction only used outside "
5397                                  "of the outer vectorized loop.\n");
5398               return false;
5399             }
5400         }
5401     }
5402
5403   if (!STMT_VINFO_RELEVANT_P (stmt_info))
5404     return false;
5405
5406   /* FORNOW: SLP not supported.  */
5407   if (STMT_SLP_TYPE (stmt_info))
5408     return false;
5409
5410   gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def);
5411
5412   if (gimple_code (phi) != GIMPLE_PHI)
5413     return false;
5414
5415   if (!vec_stmt) /* transformation not required.  */
5416     {
5417       STMT_VINFO_TYPE (stmt_info) = induc_vec_info_type;
5418       if (dump_enabled_p ())
5419         dump_printf_loc (MSG_NOTE, vect_location,
5420                          "=== vectorizable_induction ===\n");
5421       vect_model_induction_cost (stmt_info, ncopies);
5422       return true;
5423     }
5424
5425   /** Transform.  **/
5426
5427   if (dump_enabled_p ())
5428     dump_printf_loc (MSG_NOTE, vect_location, "transform induction phi.\n");
5429
5430   vec_def = get_initial_def_for_induction (phi);
5431   *vec_stmt = SSA_NAME_DEF_STMT (vec_def);
5432   return true;
5433 }
5434
5435 /* Function vectorizable_live_operation.
5436
5437    STMT computes a value that is used outside the loop.  Check if
5438    it can be supported.  */
5439
5440 bool
5441 vectorizable_live_operation (gimple stmt,
5442                              gimple_stmt_iterator *gsi ATTRIBUTE_UNUSED,
5443                              gimple *vec_stmt)
5444 {
5445   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5446   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5447   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5448   int i;
5449   int op_type;
5450   tree op;
5451   tree def;
5452   gimple def_stmt;
5453   enum vect_def_type dt;
5454   enum tree_code code;
5455   enum gimple_rhs_class rhs_class;
5456
5457   gcc_assert (STMT_VINFO_LIVE_P (stmt_info));
5458
5459   if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def)
5460     return false;
5461
5462   if (!is_gimple_assign (stmt))
5463     {
5464       if (gimple_call_internal_p (stmt)
5465           && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE
5466           && gimple_call_lhs (stmt)
5467           && loop->simduid
5468           && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
5469           && loop->simduid
5470              == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
5471         {
5472           edge e = single_exit (loop);
5473           basic_block merge_bb = e->dest;
5474           imm_use_iterator imm_iter;
5475           use_operand_p use_p;
5476           tree lhs = gimple_call_lhs (stmt);
5477
5478           FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
5479             {
5480               gimple use_stmt = USE_STMT (use_p);
5481               if (gimple_code (use_stmt) == GIMPLE_PHI
5482                   || gimple_bb (use_stmt) == merge_bb)
5483                 {
5484                   if (vec_stmt)
5485                     {
5486                       tree vfm1
5487                         = build_int_cst (unsigned_type_node,
5488                                          loop_vinfo->vectorization_factor - 1);
5489                       SET_PHI_ARG_DEF (use_stmt, e->dest_idx, vfm1);
5490                     }
5491                   return true;
5492                 }
5493             }
5494         }
5495
5496       return false;
5497     }
5498
5499   if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
5500     return false;
5501
5502   /* FORNOW. CHECKME. */
5503   if (nested_in_vect_loop_p (loop, stmt))
5504     return false;
5505
5506   code = gimple_assign_rhs_code (stmt);
5507   op_type = TREE_CODE_LENGTH (code);
5508   rhs_class = get_gimple_rhs_class (code);
5509   gcc_assert (rhs_class != GIMPLE_UNARY_RHS || op_type == unary_op);
5510   gcc_assert (rhs_class != GIMPLE_BINARY_RHS || op_type == binary_op);
5511
5512   /* FORNOW: support only if all uses are invariant.  This means
5513      that the scalar operations can remain in place, unvectorized.
5514      The original last scalar value that they compute will be used.  */
5515
5516   for (i = 0; i < op_type; i++)
5517     {
5518       if (rhs_class == GIMPLE_SINGLE_RHS)
5519         op = TREE_OPERAND (gimple_op (stmt, 1), i);
5520       else
5521         op = gimple_op (stmt, i + 1);
5522       if (op
5523           && !vect_is_simple_use (op, stmt, loop_vinfo, NULL, &def_stmt, &def,
5524                                   &dt))
5525         {
5526           if (dump_enabled_p ())
5527             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5528                              "use not simple.\n");
5529           return false;
5530         }
5531
5532       if (dt != vect_external_def && dt != vect_constant_def)
5533         return false;
5534     }
5535
5536   /* No transformation is required for the cases we currently support.  */
5537   return true;
5538 }
5539
5540 /* Kill any debug uses outside LOOP of SSA names defined in STMT.  */
5541
5542 static void
5543 vect_loop_kill_debug_uses (struct loop *loop, gimple stmt)
5544 {
5545   ssa_op_iter op_iter;
5546   imm_use_iterator imm_iter;
5547   def_operand_p def_p;
5548   gimple ustmt;
5549
5550   FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
5551     {
5552       FOR_EACH_IMM_USE_STMT (ustmt, imm_iter, DEF_FROM_PTR (def_p))
5553         {
5554           basic_block bb;
5555
5556           if (!is_gimple_debug (ustmt))
5557             continue;
5558
5559           bb = gimple_bb (ustmt);
5560
5561           if (!flow_bb_inside_loop_p (loop, bb))
5562             {
5563               if (gimple_debug_bind_p (ustmt))
5564                 {
5565                   if (dump_enabled_p ())
5566                     dump_printf_loc (MSG_NOTE, vect_location,
5567                                      "killing debug use\n");
5568
5569                   gimple_debug_bind_reset_value (ustmt);
5570                   update_stmt (ustmt);
5571                 }
5572               else
5573                 gcc_unreachable ();
5574             }
5575         }
5576     }
5577 }
5578
5579 /* Function vect_transform_loop.
5580
5581    The analysis phase has determined that the loop is vectorizable.
5582    Vectorize the loop - created vectorized stmts to replace the scalar
5583    stmts in the loop, and update the loop exit condition.  */
5584
5585 void
5586 vect_transform_loop (loop_vec_info loop_vinfo)
5587 {
5588   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5589   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
5590   int nbbs = loop->num_nodes;
5591   gimple_stmt_iterator si;
5592   int i;
5593   tree ratio = NULL;
5594   int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
5595   bool grouped_store;
5596   bool slp_scheduled = false;
5597   unsigned int nunits;
5598   gimple stmt, pattern_stmt;
5599   gimple_seq pattern_def_seq = NULL;
5600   gimple_stmt_iterator pattern_def_si = gsi_none ();
5601   bool transform_pattern_stmt = false;
5602   bool check_profitability = false;
5603   int th;
5604   /* Record number of iterations before we started tampering with the profile. */
5605   gcov_type expected_iterations = expected_loop_iterations_unbounded (loop);
5606
5607   if (dump_enabled_p ())
5608     dump_printf_loc (MSG_NOTE, vect_location, "=== vec_transform_loop ===\n");
5609
5610   /* If profile is inprecise, we have chance to fix it up.  */
5611   if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
5612     expected_iterations = LOOP_VINFO_INT_NITERS (loop_vinfo);
5613
5614   /* Use the more conservative vectorization threshold.  If the number
5615      of iterations is constant assume the cost check has been performed
5616      by our caller.  If the threshold makes all loops profitable that
5617      run at least the vectorization factor number of times checking
5618      is pointless, too.  */
5619   th = ((PARAM_VALUE (PARAM_MIN_VECT_LOOP_BOUND)
5620          * LOOP_VINFO_VECT_FACTOR (loop_vinfo)) - 1);
5621   th = MAX (th, LOOP_VINFO_COST_MODEL_MIN_ITERS (loop_vinfo));
5622   if (th >= LOOP_VINFO_VECT_FACTOR (loop_vinfo) - 1
5623       && !LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
5624     {
5625       if (dump_enabled_p ())
5626         dump_printf_loc (MSG_NOTE, vect_location,
5627                          "Profitability threshold is %d loop iterations.\n",
5628                          th);
5629       check_profitability = true;
5630     }
5631
5632   /* Version the loop first, if required, so the profitability check
5633      comes first.  */
5634
5635   if (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo)
5636       || LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo))
5637     {
5638       vect_loop_versioning (loop_vinfo, th, check_profitability);
5639       check_profitability = false;
5640     }
5641
5642   /* Peel the loop if there are data refs with unknown alignment.
5643      Only one data ref with unknown store is allowed.  */
5644
5645   if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo))
5646     {
5647       vect_do_peeling_for_alignment (loop_vinfo, th, check_profitability);
5648       check_profitability = false;
5649     }
5650
5651   /* If the loop has a symbolic number of iterations 'n' (i.e. it's not a
5652      compile time constant), or it is a constant that doesn't divide by the
5653      vectorization factor, then an epilog loop needs to be created.
5654      We therefore duplicate the loop: the original loop will be vectorized,
5655      and will compute the first (n/VF) iterations.  The second copy of the loop
5656      will remain scalar and will compute the remaining (n%VF) iterations.
5657      (VF is the vectorization factor).  */
5658
5659   if ((int) tree_ctz (LOOP_VINFO_NITERS (loop_vinfo))
5660       < exact_log2 (vectorization_factor)
5661       || LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
5662     vect_do_peeling_for_loop_bound (loop_vinfo, &ratio,
5663                                     th, check_profitability);
5664   else if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
5665     ratio = build_int_cst (TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo)),
5666                 LOOP_VINFO_INT_NITERS (loop_vinfo) / vectorization_factor);
5667   else
5668     {
5669       tree ni_name, ratio_mult_vf;
5670       vect_generate_tmps_on_preheader (loop_vinfo, &ni_name, &ratio_mult_vf,
5671                                        &ratio, NULL);
5672     }
5673
5674   /* 1) Make sure the loop header has exactly two entries
5675      2) Make sure we have a preheader basic block.  */
5676
5677   gcc_assert (EDGE_COUNT (loop->header->preds) == 2);
5678
5679   split_edge (loop_preheader_edge (loop));
5680
5681   /* FORNOW: the vectorizer supports only loops which body consist
5682      of one basic block (header + empty latch). When the vectorizer will
5683      support more involved loop forms, the order by which the BBs are
5684      traversed need to be reconsidered.  */
5685
5686   for (i = 0; i < nbbs; i++)
5687     {
5688       basic_block bb = bbs[i];
5689       stmt_vec_info stmt_info;
5690       gimple phi;
5691
5692       for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
5693         {
5694           phi = gsi_stmt (si);
5695           if (dump_enabled_p ())
5696             {
5697               dump_printf_loc (MSG_NOTE, vect_location,
5698                                "------>vectorizing phi: ");
5699               dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
5700               dump_printf (MSG_NOTE, "\n");
5701             }
5702           stmt_info = vinfo_for_stmt (phi);
5703           if (!stmt_info)
5704             continue;
5705
5706           if (MAY_HAVE_DEBUG_STMTS && !STMT_VINFO_LIVE_P (stmt_info))
5707             vect_loop_kill_debug_uses (loop, phi);
5708
5709           if (!STMT_VINFO_RELEVANT_P (stmt_info)
5710               && !STMT_VINFO_LIVE_P (stmt_info))
5711             continue;
5712
5713           if ((TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info))
5714                 != (unsigned HOST_WIDE_INT) vectorization_factor)
5715               && dump_enabled_p ())
5716             dump_printf_loc (MSG_NOTE, vect_location, "multiple-types.\n");
5717
5718           if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def)
5719             {
5720               if (dump_enabled_p ())
5721                 dump_printf_loc (MSG_NOTE, vect_location, "transform phi.\n");
5722               vect_transform_stmt (phi, NULL, NULL, NULL, NULL);
5723             }
5724         }
5725
5726       pattern_stmt = NULL;
5727       for (si = gsi_start_bb (bb); !gsi_end_p (si) || transform_pattern_stmt;)
5728         {
5729           bool is_store;
5730
5731           if (transform_pattern_stmt)
5732             stmt = pattern_stmt;
5733           else
5734             {
5735               stmt = gsi_stmt (si);
5736               /* During vectorization remove existing clobber stmts.  */
5737               if (gimple_clobber_p (stmt))
5738                 {
5739                   unlink_stmt_vdef (stmt);
5740                   gsi_remove (&si, true);
5741                   release_defs (stmt);
5742                   continue;
5743                 }
5744             }
5745
5746           if (dump_enabled_p ())
5747             {
5748               dump_printf_loc (MSG_NOTE, vect_location,
5749                                "------>vectorizing statement: ");
5750               dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
5751               dump_printf (MSG_NOTE, "\n");
5752             }
5753
5754           stmt_info = vinfo_for_stmt (stmt);
5755
5756           /* vector stmts created in the outer-loop during vectorization of
5757              stmts in an inner-loop may not have a stmt_info, and do not
5758              need to be vectorized.  */
5759           if (!stmt_info)
5760             {
5761               gsi_next (&si);
5762               continue;
5763             }
5764
5765           if (MAY_HAVE_DEBUG_STMTS && !STMT_VINFO_LIVE_P (stmt_info))
5766             vect_loop_kill_debug_uses (loop, stmt);
5767
5768           if (!STMT_VINFO_RELEVANT_P (stmt_info)
5769               && !STMT_VINFO_LIVE_P (stmt_info))
5770             {
5771               if (STMT_VINFO_IN_PATTERN_P (stmt_info)
5772                   && (pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info))
5773                   && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
5774                       || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
5775                 {
5776                   stmt = pattern_stmt;
5777                   stmt_info = vinfo_for_stmt (stmt);
5778                 }
5779               else
5780                 {
5781                   gsi_next (&si);
5782                   continue;
5783                 }
5784             }
5785           else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
5786                    && (pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info))
5787                    && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
5788                        || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
5789             transform_pattern_stmt = true;
5790
5791           /* If pattern statement has def stmts, vectorize them too.  */
5792           if (is_pattern_stmt_p (stmt_info))
5793             {
5794               if (pattern_def_seq == NULL)
5795                 {
5796                   pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info);
5797                   pattern_def_si = gsi_start (pattern_def_seq);
5798                 }
5799               else if (!gsi_end_p (pattern_def_si))
5800                 gsi_next (&pattern_def_si);
5801               if (pattern_def_seq != NULL)
5802                 {
5803                   gimple pattern_def_stmt = NULL;
5804                   stmt_vec_info pattern_def_stmt_info = NULL;
5805
5806                   while (!gsi_end_p (pattern_def_si))
5807                     {
5808                       pattern_def_stmt = gsi_stmt (pattern_def_si);
5809                       pattern_def_stmt_info
5810                         = vinfo_for_stmt (pattern_def_stmt);
5811                       if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info)
5812                           || STMT_VINFO_LIVE_P (pattern_def_stmt_info))
5813                         break;
5814                       gsi_next (&pattern_def_si);
5815                     }
5816
5817                   if (!gsi_end_p (pattern_def_si))
5818                     {
5819                       if (dump_enabled_p ())
5820                         {
5821                           dump_printf_loc (MSG_NOTE, vect_location,
5822                                            "==> vectorizing pattern def "
5823                                            "stmt: ");
5824                           dump_gimple_stmt (MSG_NOTE, TDF_SLIM,
5825                                             pattern_def_stmt, 0);
5826                           dump_printf (MSG_NOTE, "\n");
5827                         }
5828
5829                       stmt = pattern_def_stmt;
5830                       stmt_info = pattern_def_stmt_info;
5831                     }
5832                   else
5833                     {
5834                       pattern_def_si = gsi_none ();
5835                       transform_pattern_stmt = false;
5836                     }
5837                 }
5838               else
5839                 transform_pattern_stmt = false;
5840             }
5841
5842           gcc_assert (STMT_VINFO_VECTYPE (stmt_info));
5843           nunits = (unsigned int) TYPE_VECTOR_SUBPARTS (
5844                                                STMT_VINFO_VECTYPE (stmt_info));
5845           if (!STMT_SLP_TYPE (stmt_info)
5846               && nunits != (unsigned int) vectorization_factor
5847               && dump_enabled_p ())
5848             /* For SLP VF is set according to unrolling factor, and not to
5849                vector size, hence for SLP this print is not valid.  */
5850             dump_printf_loc (MSG_NOTE, vect_location,
5851                              "multiple-types.\n");
5852
5853           /* SLP. Schedule all the SLP instances when the first SLP stmt is
5854              reached.  */
5855           if (STMT_SLP_TYPE (stmt_info))
5856             {
5857               if (!slp_scheduled)
5858                 {
5859                   slp_scheduled = true;
5860
5861                   if (dump_enabled_p ())
5862                     dump_printf_loc (MSG_NOTE, vect_location,
5863                                      "=== scheduling SLP instances ===\n");
5864
5865                   vect_schedule_slp (loop_vinfo, NULL);
5866                 }
5867
5868               /* Hybrid SLP stmts must be vectorized in addition to SLP.  */
5869               if (!vinfo_for_stmt (stmt) || PURE_SLP_STMT (stmt_info))
5870                 {
5871                   if (!transform_pattern_stmt && gsi_end_p (pattern_def_si))
5872                     {
5873                       pattern_def_seq = NULL;
5874                       gsi_next (&si);
5875                     }
5876                   continue;
5877                 }
5878             }
5879
5880           /* -------- vectorize statement ------------ */
5881           if (dump_enabled_p ())
5882             dump_printf_loc (MSG_NOTE, vect_location, "transform statement.\n");
5883
5884           grouped_store = false;
5885           is_store = vect_transform_stmt (stmt, &si, &grouped_store, NULL, NULL);
5886           if (is_store)
5887             {
5888               if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
5889                 {
5890                   /* Interleaving. If IS_STORE is TRUE, the vectorization of the
5891                      interleaving chain was completed - free all the stores in
5892                      the chain.  */
5893                   gsi_next (&si);
5894                   vect_remove_stores (GROUP_FIRST_ELEMENT (stmt_info));
5895                   continue;
5896                 }
5897               else
5898                 {
5899                   /* Free the attached stmt_vec_info and remove the stmt.  */
5900                   gimple store = gsi_stmt (si);
5901                   free_stmt_vec_info (store);
5902                   unlink_stmt_vdef (store);
5903                   gsi_remove (&si, true);
5904                   release_defs (store);
5905                   continue;
5906                 }
5907             }
5908
5909           if (!transform_pattern_stmt && gsi_end_p (pattern_def_si))
5910             {
5911               pattern_def_seq = NULL;
5912               gsi_next (&si);
5913             }
5914         }                       /* stmts in BB */
5915     }                           /* BBs in loop */
5916
5917   slpeel_make_loop_iterate_ntimes (loop, ratio);
5918
5919   /* Reduce loop iterations by the vectorization factor.  */
5920   scale_loop_profile (loop, GCOV_COMPUTE_SCALE (1, vectorization_factor),
5921                       expected_iterations / vectorization_factor);
5922   loop->nb_iterations_upper_bound
5923     = loop->nb_iterations_upper_bound.udiv (double_int::from_uhwi (vectorization_factor),
5924                                             FLOOR_DIV_EXPR);
5925   if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
5926       && loop->nb_iterations_upper_bound != double_int_zero)
5927     loop->nb_iterations_upper_bound = loop->nb_iterations_upper_bound - double_int_one;
5928   if (loop->any_estimate)
5929     {
5930       loop->nb_iterations_estimate
5931         = loop->nb_iterations_estimate.udiv (double_int::from_uhwi (vectorization_factor),
5932                                              FLOOR_DIV_EXPR);
5933        if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
5934            && loop->nb_iterations_estimate != double_int_zero)
5935          loop->nb_iterations_estimate = loop->nb_iterations_estimate - double_int_one;
5936     }
5937
5938   if (dump_enabled_p ())
5939     {
5940       dump_printf_loc (MSG_NOTE, vect_location,
5941                        "LOOP VECTORIZED\n");
5942       if (loop->inner)
5943         dump_printf_loc (MSG_NOTE, vect_location,
5944                          "OUTER LOOP VECTORIZED\n");
5945       dump_printf (MSG_NOTE, "\n");
5946     }
5947 }