gcc/tree-vect-loop.c

   1 /* Loop Vectorization
   2    Copyright (C) 2003-2013 Free Software Foundation, Inc.
   3    Contributed by Dorit Naishlos <dorit@il.ibm.com> and
   4    Ira Rosen <irar@il.ibm.com>
   5
   6 This file is part of GCC.
   7
   8 GCC is free software; you can redistribute it and/or modify it under
   9 the terms of the GNU General Public License as published by the Free
  10 Software Foundation; either version 3, or (at your option) any later
  11 version.
  12
  13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  15 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  16 for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with GCC; see the file COPYING3.  If not see
  20 <http://www.gnu.org/licenses/>.  */
  21
  22 #include "config.h"
  23 #include "system.h"
  24 #include "coretypes.h"
  25 #include "dumpfile.h"
  26 #include "tm.h"
  27 #include "ggc.h"
  28 #include "tree.h"
  29 #include "basic-block.h"
  30 #include "gimple-pretty-print.h"
  31 #include "tree-flow.h"
  32 #include "tree-pass.h"
  33 #include "cfgloop.h"
  34 #include "expr.h"
  35 #include "recog.h"
  36 #include "optabs.h"
  37 #include "params.h"
  38 #include "diagnostic-core.h"
  39 #include "tree-chrec.h"
  40 #include "tree-scalar-evolution.h"
  41 #include "tree-vectorizer.h"
  42 #include "target.h"
  43
  44 /* Loop Vectorization Pass.
  45
  46    This pass tries to vectorize loops.
  47
  48    For example, the vectorizer transforms the following simple loop:
  49
  50         short a[N]; short b[N]; short c[N]; int i;
  51
  52         for (i=0; i<N; i++){
  53           a[i] = b[i] + c[i];
  54         }
  55
  56    as if it was manually vectorized by rewriting the source code into:
  57
  58         typedef int __attribute__((mode(V8HI))) v8hi;
  59         short a[N];  short b[N]; short c[N];   int i;
  60         v8hi *pa = (v8hi*)a, *pb = (v8hi*)b, *pc = (v8hi*)c;
  61         v8hi va, vb, vc;
  62
  63         for (i=0; i<N/8; i++){
  64           vb = pb[i];
  65           vc = pc[i];
  66           va = vb + vc;
  67           pa[i] = va;
  68         }
  69
  70         The main entry to this pass is vectorize_loops(), in which
  71    the vectorizer applies a set of analyses on a given set of loops,
  72    followed by the actual vectorization transformation for the loops that
  73    had successfully passed the analysis phase.
  74         Throughout this pass we make a distinction between two types of
  75    data: scalars (which are represented by SSA_NAMES), and memory references
  76    ("data-refs").  These two types of data require different handling both
  77    during analysis and transformation. The types of data-refs that the
  78    vectorizer currently supports are ARRAY_REFS which base is an array DECL
  79    (not a pointer), and INDIRECT_REFS through pointers; both array and pointer
  80    accesses are required to have a simple (consecutive) access pattern.
  81
  82    Analysis phase:
  83    ===============
  84         The driver for the analysis phase is vect_analyze_loop().
  85    It applies a set of analyses, some of which rely on the scalar evolution
  86    analyzer (scev) developed by Sebastian Pop.
  87
  88         During the analysis phase the vectorizer records some information
  89    per stmt in a "stmt_vec_info" struct which is attached to each stmt in the
  90    loop, as well as general information about the loop as a whole, which is
  91    recorded in a "loop_vec_info" struct attached to each loop.
  92
  93    Transformation phase:
  94    =====================
  95         The loop transformation phase scans all the stmts in the loop, and
  96    creates a vector stmt (or a sequence of stmts) for each scalar stmt S in
  97    the loop that needs to be vectorized.  It inserts the vector code sequence
  98    just before the scalar stmt S, and records a pointer to the vector code
  99    in STMT_VINFO_VEC_STMT (stmt_info) (stmt_info is the stmt_vec_info struct
 100    attached to S).  This pointer will be used for the vectorization of following
 101    stmts which use the def of stmt S. Stmt S is removed if it writes to memory;
 102    otherwise, we rely on dead code elimination for removing it.
 103
 104         For example, say stmt S1 was vectorized into stmt VS1:
 105
 106    VS1: vb = px[i];
 107    S1:  b = x[i];    STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
 108    S2:  a = b;
 109
 110    To vectorize stmt S2, the vectorizer first finds the stmt that defines
 111    the operand 'b' (S1), and gets the relevant vector def 'vb' from the
 112    vector stmt VS1 pointed to by STMT_VINFO_VEC_STMT (stmt_info (S1)).  The
 113    resulting sequence would be:
 114
 115    VS1: vb = px[i];
 116    S1:  b = x[i];       STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
 117    VS2: va = vb;
 118    S2:  a = b;          STMT_VINFO_VEC_STMT (stmt_info (S2)) = VS2
 119
 120         Operands that are not SSA_NAMEs, are data-refs that appear in
 121    load/store operations (like 'x[i]' in S1), and are handled differently.
 122
 123    Target modeling:
 124    =================
 125         Currently the only target specific information that is used is the
 126    size of the vector (in bytes) - "TARGET_VECTORIZE_UNITS_PER_SIMD_WORD".
 127    Targets that can support different sizes of vectors, for now will need
 128    to specify one value for "TARGET_VECTORIZE_UNITS_PER_SIMD_WORD".  More
 129    flexibility will be added in the future.
 130
 131         Since we only vectorize operations which vector form can be
 132    expressed using existing tree codes, to verify that an operation is
 133    supported, the vectorizer checks the relevant optab at the relevant
 134    machine_mode (e.g, optab_handler (add_optab, V8HImode)).  If
 135    the value found is CODE_FOR_nothing, then there's no target support, and
 136    we can't vectorize the stmt.
 137
 138    For additional information on this project see:
 139    http://gcc.gnu.org/projects/tree-ssa/vectorization.html
 140 */
 141
 142 static void vect_estimate_min_profitable_iters (loop_vec_info, int *, int *);
 143
 144 /* Function vect_determine_vectorization_factor
 145
 146    Determine the vectorization factor (VF).  VF is the number of data elements
 147    that are operated upon in parallel in a single iteration of the vectorized
 148    loop.  For example, when vectorizing a loop that operates on 4byte elements,
 149    on a target with vector size (VS) 16byte, the VF is set to 4, since 4
 150    elements can fit in a single vector register.
 151
 152    We currently support vectorization of loops in which all types operated upon
 153    are of the same size.  Therefore this function currently sets VF according to
 154    the size of the types operated upon, and fails if there are multiple sizes
 155    in the loop.
 156
 157    VF is also the factor by which the loop iterations are strip-mined, e.g.:
 158    original loop:
 159         for (i=0; i<N; i++){
 160           a[i] = b[i] + c[i];
 161         }
 162
 163    vectorized loop:
 164         for (i=0; i<N; i+=VF){
 165           a[i:VF] = b[i:VF] + c[i:VF];
 166         }
 167 */
 168
 169 static bool
 170 vect_determine_vectorization_factor (loop_vec_info loop_vinfo)
 171 {
 172   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
 173   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
 174   int nbbs = loop->num_nodes;
 175   gimple_stmt_iterator si;
 176   unsigned int vectorization_factor = 0;
 177   tree scalar_type;
 178   gimple phi;
 179   tree vectype;
 180   unsigned int nunits;
 181   stmt_vec_info stmt_info;
 182   int i;
 183   HOST_WIDE_INT dummy;
 184   gimple stmt, pattern_stmt = NULL;
 185   gimple_seq pattern_def_seq = NULL;
 186   gimple_stmt_iterator pattern_def_si = gsi_none ();
 187   bool analyze_pattern_stmt = false;
 188
 189   if (dump_enabled_p ())
 190     dump_printf_loc (MSG_NOTE, vect_location,
 191                      "=== vect_determine_vectorization_factor ===");
 192
 193   for (i = 0; i < nbbs; i++)
 194     {
 195       basic_block bb = bbs[i];
 196
 197       for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
 198         {
 199           phi = gsi_stmt (si);
 200           stmt_info = vinfo_for_stmt (phi);
 201           if (dump_enabled_p ())
 202             {
 203               dump_printf_loc (MSG_NOTE, vect_location, "==> examining phi: ");
 204               dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
 205             }
 206
 207           gcc_assert (stmt_info);
 208
 209           if (STMT_VINFO_RELEVANT_P (stmt_info))
 210             {
 211               gcc_assert (!STMT_VINFO_VECTYPE (stmt_info));
 212               scalar_type = TREE_TYPE (PHI_RESULT (phi));
 213
 214               if (dump_enabled_p ())
 215                 {
 216                   dump_printf_loc (MSG_NOTE, vect_location,
 217                                    "get vectype for scalar type:  ");
 218                   dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
 219                 }
 220
 221               vectype = get_vectype_for_scalar_type (scalar_type);
 222               if (!vectype)
 223                 {
 224                   if (dump_enabled_p ())
 225                     {
 226                       dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 227                                        "not vectorized: unsupported "
 228                                        "data-type ");
 229                       dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
 230                                          scalar_type);
 231                     }
 232                   return false;
 233                 }
 234               STMT_VINFO_VECTYPE (stmt_info) = vectype;
 235
 236               if (dump_enabled_p ())
 237                 {
 238                   dump_printf_loc (MSG_NOTE, vect_location, "vectype: ");
 239                   dump_generic_expr (MSG_NOTE, TDF_SLIM, vectype);
 240                 }
 241
 242               nunits = TYPE_VECTOR_SUBPARTS (vectype);
 243               if (dump_enabled_p ())
 244                 dump_printf_loc (MSG_NOTE, vect_location, "nunits = %d", nunits);
 245
 246               if (!vectorization_factor
 247                   || (nunits > vectorization_factor))
 248                 vectorization_factor = nunits;
 249             }
 250         }
 251
 252       for (si = gsi_start_bb (bb); !gsi_end_p (si) || analyze_pattern_stmt;)
 253         {
 254           tree vf_vectype;
 255
 256           if (analyze_pattern_stmt)
 257             stmt = pattern_stmt;
 258           else
 259             stmt = gsi_stmt (si);
 260
 261           stmt_info = vinfo_for_stmt (stmt);
 262
 263           if (dump_enabled_p ())
 264             {
 265               dump_printf_loc (MSG_NOTE, vect_location,
 266                                "==> examining statement: ");
 267               dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
 268             }
 269
 270           gcc_assert (stmt_info);
 271
 272           /* Skip stmts which do not need to be vectorized.  */
 273           if ((!STMT_VINFO_RELEVANT_P (stmt_info)
 274                && !STMT_VINFO_LIVE_P (stmt_info))
 275               || gimple_clobber_p (stmt))
 276             {
 277               if (STMT_VINFO_IN_PATTERN_P (stmt_info)
 278                   && (pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info))
 279                   && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
 280                       || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
 281                 {
 282                   stmt = pattern_stmt;
 283                   stmt_info = vinfo_for_stmt (pattern_stmt);
 284                   if (dump_enabled_p ())
 285                     {
 286                       dump_printf_loc (MSG_NOTE, vect_location,
 287                                        "==> examining pattern statement: ");
 288                       dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
 289                     }
 290                 }
 291               else
 292                 {
 293                   if (dump_enabled_p ())
 294                     dump_printf_loc (MSG_NOTE, vect_location, "skip.");
 295                   gsi_next (&si);
 296                   continue;
 297                 }
 298             }
 299           else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
 300                    && (pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info))
 301                    && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
 302                        || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
 303             analyze_pattern_stmt = true;
 304
 305           /* If a pattern statement has def stmts, analyze them too.  */
 306           if (is_pattern_stmt_p (stmt_info))
 307             {
 308               if (pattern_def_seq == NULL)
 309                 {
 310                   pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info);
 311                   pattern_def_si = gsi_start (pattern_def_seq);
 312                 }
 313               else if (!gsi_end_p (pattern_def_si))
 314                 gsi_next (&pattern_def_si);
 315               if (pattern_def_seq != NULL)
 316                 {
 317                   gimple pattern_def_stmt = NULL;
 318                   stmt_vec_info pattern_def_stmt_info = NULL;
 319
 320                   while (!gsi_end_p (pattern_def_si))
 321                     {
 322                       pattern_def_stmt = gsi_stmt (pattern_def_si);
 323                       pattern_def_stmt_info
 324                         = vinfo_for_stmt (pattern_def_stmt);
 325                       if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info)
 326                           || STMT_VINFO_LIVE_P (pattern_def_stmt_info))
 327                         break;
 328                       gsi_next (&pattern_def_si);
 329                     }
 330
 331                   if (!gsi_end_p (pattern_def_si))
 332                     {
 333                       if (dump_enabled_p ())
 334                         {
 335                           dump_printf_loc (MSG_NOTE, vect_location,
 336                                            "==> examining pattern def stmt: ");
 337                           dump_gimple_stmt (MSG_NOTE, TDF_SLIM,
 338                                             pattern_def_stmt, 0);
 339                         }
 340
 341                       stmt = pattern_def_stmt;
 342                       stmt_info = pattern_def_stmt_info;
 343                     }
 344                   else
 345                     {
 346                       pattern_def_si = gsi_none ();
 347                       analyze_pattern_stmt = false;
 348                     }
 349                 }
 350               else
 351                 analyze_pattern_stmt = false;
 352             }
 353
 354           if (gimple_get_lhs (stmt) == NULL_TREE)
 355             {
 356               if (dump_enabled_p ())
 357                 {
 358                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 359                                    "not vectorized: irregular stmt.");
 360                   dump_gimple_stmt (MSG_MISSED_OPTIMIZATION,  TDF_SLIM, stmt,
 361                                     0);
 362                 }
 363               return false;
 364             }
 365
 366           if (VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))))
 367             {
 368               if (dump_enabled_p ())
 369                 {
 370                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 371                                    "not vectorized: vector stmt in loop:");
 372                   dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
 373                 }
 374               return false;
 375             }
 376
 377           if (STMT_VINFO_VECTYPE (stmt_info))
 378             {
 379               /* The only case when a vectype had been already set is for stmts
 380                  that contain a dataref, or for "pattern-stmts" (stmts
 381                  generated by the vectorizer to represent/replace a certain
 382                  idiom).  */
 383               gcc_assert (STMT_VINFO_DATA_REF (stmt_info)
 384                           || is_pattern_stmt_p (stmt_info)
 385                           || !gsi_end_p (pattern_def_si));
 386               vectype = STMT_VINFO_VECTYPE (stmt_info);
 387             }
 388           else
 389             {
 390               gcc_assert (!STMT_VINFO_DATA_REF (stmt_info));
 391               scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
 392               if (dump_enabled_p ())
 393                 {
 394                   dump_printf_loc (MSG_NOTE, vect_location,
 395                                    "get vectype for scalar type:  ");
 396                   dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
 397                 }
 398               vectype = get_vectype_for_scalar_type (scalar_type);
 399               if (!vectype)
 400                 {
 401                   if (dump_enabled_p ())
 402                     {
 403                       dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 404                                        "not vectorized: unsupported "
 405                                        "data-type ");
 406                       dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
 407                                          scalar_type);
 408                     }
 409                   return false;
 410                 }
 411
 412               STMT_VINFO_VECTYPE (stmt_info) = vectype;
 413
 414               if (dump_enabled_p ())
 415                 {
 416                   dump_printf_loc (MSG_NOTE, vect_location, "vectype: ");
 417                   dump_generic_expr (MSG_NOTE, TDF_SLIM, vectype);
 418                 }
 419             }
 420
 421           /* The vectorization factor is according to the smallest
 422              scalar type (or the largest vector size, but we only
 423              support one vector size per loop).  */
 424           scalar_type = vect_get_smallest_scalar_type (stmt, &dummy,
 425                                                        &dummy);
 426           if (dump_enabled_p ())
 427             {
 428               dump_printf_loc (MSG_NOTE, vect_location,
 429                                "get vectype for scalar type:  ");
 430               dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
 431             }
 432           vf_vectype = get_vectype_for_scalar_type (scalar_type);
 433           if (!vf_vectype)
 434             {
 435               if (dump_enabled_p ())
 436                 {
 437                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 438                                    "not vectorized: unsupported data-type ");
 439                   dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
 440                                      scalar_type);
 441                 }
 442               return false;
 443             }
 444
 445           if ((GET_MODE_SIZE (TYPE_MODE (vectype))
 446                != GET_MODE_SIZE (TYPE_MODE (vf_vectype))))
 447             {
 448               if (dump_enabled_p ())
 449                 {
 450                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 451                                    "not vectorized: different sized vector "
 452                                    "types in statement, ");
 453                   dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
 454                                      vectype);
 455                   dump_printf (MSG_MISSED_OPTIMIZATION, " and ");
 456                   dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
 457                                      vf_vectype);
 458                 }
 459               return false;
 460             }
 461
 462           if (dump_enabled_p ())
 463             {
 464               dump_printf_loc (MSG_NOTE, vect_location, "vectype: ");
 465               dump_generic_expr (MSG_NOTE, TDF_SLIM, vf_vectype);
 466             }
 467
 468           nunits = TYPE_VECTOR_SUBPARTS (vf_vectype);
 469           if (dump_enabled_p ())
 470             dump_printf_loc (MSG_NOTE, vect_location, "nunits = %d", nunits);
 471           if (!vectorization_factor
 472               || (nunits > vectorization_factor))
 473             vectorization_factor = nunits;
 474
 475           if (!analyze_pattern_stmt && gsi_end_p (pattern_def_si))
 476             {
 477               pattern_def_seq = NULL;
 478               gsi_next (&si);
 479             }
 480         }
 481     }
 482
 483   /* TODO: Analyze cost. Decide if worth while to vectorize.  */
 484   if (dump_enabled_p ())
 485     dump_printf_loc (MSG_NOTE, vect_location, "vectorization factor = %d",
 486                      vectorization_factor);
 487   if (vectorization_factor <= 1)
 488     {
 489       if (dump_enabled_p ())
 490         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 491                          "not vectorized: unsupported data-type");
 492       return false;
 493     }
 494   LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
 495
 496   return true;
 497 }
 498
 499
 500 /* Function vect_is_simple_iv_evolution.
 501
 502    FORNOW: A simple evolution of an induction variables in the loop is
 503    considered a polynomial evolution.  */
 504
 505 static bool
 506 vect_is_simple_iv_evolution (unsigned loop_nb, tree access_fn, tree * init,
 507                              tree * step)
 508 {
 509   tree init_expr;
 510   tree step_expr;
 511   tree evolution_part = evolution_part_in_loop_num (access_fn, loop_nb);
 512   basic_block bb;
 513
 514   /* When there is no evolution in this loop, the evolution function
 515      is not "simple".  */
 516   if (evolution_part == NULL_TREE)
 517     return false;
 518
 519   /* When the evolution is a polynomial of degree >= 2
 520      the evolution function is not "simple".  */
 521   if (tree_is_chrec (evolution_part))
 522     return false;
 523
 524   step_expr = evolution_part;
 525   init_expr = unshare_expr (initial_condition_in_loop_num (access_fn, loop_nb));
 526
 527   if (dump_enabled_p ())
 528     {
 529       dump_printf_loc (MSG_NOTE, vect_location, "step: ");
 530       dump_generic_expr (MSG_NOTE, TDF_SLIM, step_expr);
 531       dump_printf (MSG_NOTE, ",  init: ");
 532       dump_generic_expr (MSG_NOTE, TDF_SLIM, init_expr);
 533     }
 534
 535   *init = init_expr;
 536   *step = step_expr;
 537
 538   if (TREE_CODE (step_expr) != INTEGER_CST
 539       && (TREE_CODE (step_expr) != SSA_NAME
 540           || ((bb = gimple_bb (SSA_NAME_DEF_STMT (step_expr)))
 541               && flow_bb_inside_loop_p (get_loop (cfun, loop_nb), bb))
 542           || (!INTEGRAL_TYPE_P (TREE_TYPE (step_expr))
 543               && (!SCALAR_FLOAT_TYPE_P (TREE_TYPE (step_expr))
 544                   || !flag_associative_math)))
 545       && (TREE_CODE (step_expr) != REAL_CST
 546           || !flag_associative_math))
 547     {
 548       if (dump_enabled_p ())
 549         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 550                          "step unknown.");
 551       return false;
 552     }
 553
 554   return true;
 555 }
 556
 557 /* Function vect_analyze_scalar_cycles_1.
 558
 559    Examine the cross iteration def-use cycles of scalar variables
 560    in LOOP.  LOOP_VINFO represents the loop that is now being
 561    considered for vectorization (can be LOOP, or an outer-loop
 562    enclosing LOOP).  */
 563
 564 static void
 565 vect_analyze_scalar_cycles_1 (loop_vec_info loop_vinfo, struct loop *loop)
 566 {
 567   basic_block bb = loop->header;
 568   tree init, step;
 569   vec<gimple> worklist;
 570   worklist.create (64);
 571   gimple_stmt_iterator gsi;
 572   bool double_reduc;
 573
 574   if (dump_enabled_p ())
 575     dump_printf_loc (MSG_NOTE, vect_location,
 576                      "=== vect_analyze_scalar_cycles ===");
 577
 578   /* First - identify all inductions.  Reduction detection assumes that all the
 579      inductions have been identified, therefore, this order must not be
 580      changed.  */
 581   for (gsi = gsi_start_phis  (bb); !gsi_end_p (gsi); gsi_next (&gsi))
 582     {
 583       gimple phi = gsi_stmt (gsi);
 584       tree access_fn = NULL;
 585       tree def = PHI_RESULT (phi);
 586       stmt_vec_info stmt_vinfo = vinfo_for_stmt (phi);
 587
 588       if (dump_enabled_p ())
 589         {
 590           dump_printf_loc (MSG_NOTE, vect_location, "Analyze phi: ");
 591           dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
 592         }
 593
 594       /* Skip virtual phi's.  The data dependences that are associated with
 595          virtual defs/uses (i.e., memory accesses) are analyzed elsewhere.  */
 596       if (virtual_operand_p (def))
 597         continue;
 598
 599       STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_unknown_def_type;
 600
 601       /* Analyze the evolution function.  */
 602       access_fn = analyze_scalar_evolution (loop, def);
 603       if (access_fn)
 604         {
 605           STRIP_NOPS (access_fn);
 606           if (dump_enabled_p ())
 607             {
 608               dump_printf_loc (MSG_NOTE, vect_location,
 609                                "Access function of PHI: ");
 610               dump_generic_expr (MSG_NOTE, TDF_SLIM, access_fn);
 611             }
 612           STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_vinfo)
 613             = evolution_part_in_loop_num (access_fn, loop->num);
 614         }
 615
 616       if (!access_fn
 617           || !vect_is_simple_iv_evolution (loop->num, access_fn, &init, &step)
 618           || (LOOP_VINFO_LOOP (loop_vinfo) != loop
 619               && TREE_CODE (step) != INTEGER_CST))
 620         {
 621           worklist.safe_push (phi);
 622           continue;
 623         }
 624
 625       gcc_assert (STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_vinfo) != NULL_TREE);
 626
 627       if (dump_enabled_p ())
 628         dump_printf_loc (MSG_NOTE, vect_location, "Detected induction.");
 629       STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_induction_def;
 630     }
 631
 632
 633   /* Second - identify all reductions and nested cycles.  */
 634   while (worklist.length () > 0)
 635     {
 636       gimple phi = worklist.pop ();
 637       tree def = PHI_RESULT (phi);
 638       stmt_vec_info stmt_vinfo = vinfo_for_stmt (phi);
 639       gimple reduc_stmt;
 640       bool nested_cycle;
 641
 642       if (dump_enabled_p ())
 643         {
 644           dump_printf_loc (MSG_NOTE, vect_location, "Analyze phi: ");
 645           dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
 646         }
 647
 648       gcc_assert (!virtual_operand_p (def)
 649                   && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_unknown_def_type);
 650
 651       nested_cycle = (loop != LOOP_VINFO_LOOP (loop_vinfo));
 652       reduc_stmt = vect_force_simple_reduction (loop_vinfo, phi, !nested_cycle,
 653                                                 &double_reduc);
 654       if (reduc_stmt)
 655         {
 656           if (double_reduc)
 657             {
 658               if (dump_enabled_p ())
 659                 dump_printf_loc (MSG_NOTE, vect_location,
 660                                  "Detected double reduction.");
 661
 662               STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_double_reduction_def;
 663               STMT_VINFO_DEF_TYPE (vinfo_for_stmt (reduc_stmt)) =
 664                                                     vect_double_reduction_def;
 665             }
 666           else
 667             {
 668               if (nested_cycle)
 669                 {
 670                   if (dump_enabled_p ())
 671                     dump_printf_loc (MSG_NOTE, vect_location,
 672                                      "Detected vectorizable nested cycle.");
 673
 674                   STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_nested_cycle;
 675                   STMT_VINFO_DEF_TYPE (vinfo_for_stmt (reduc_stmt)) =
 676                                                              vect_nested_cycle;
 677                 }
 678               else
 679                 {
 680                   if (dump_enabled_p ())
 681                     dump_printf_loc (MSG_NOTE, vect_location,
 682                                      "Detected reduction.");
 683
 684                   STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_reduction_def;
 685                   STMT_VINFO_DEF_TYPE (vinfo_for_stmt (reduc_stmt)) =
 686                                                            vect_reduction_def;
 687                   /* Store the reduction cycles for possible vectorization in
 688                      loop-aware SLP.  */
 689                   LOOP_VINFO_REDUCTIONS (loop_vinfo).safe_push (reduc_stmt);
 690                 }
 691             }
 692         }
 693       else
 694         if (dump_enabled_p ())
 695           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 696                            "Unknown def-use cycle pattern.");
 697     }
 698
 699   worklist.release ();
 700 }
 701
 702
 703 /* Function vect_analyze_scalar_cycles.
 704
 705    Examine the cross iteration def-use cycles of scalar variables, by
 706    analyzing the loop-header PHIs of scalar variables.  Classify each
 707    cycle as one of the following: invariant, induction, reduction, unknown.
 708    We do that for the loop represented by LOOP_VINFO, and also to its
 709    inner-loop, if exists.
 710    Examples for scalar cycles:
 711
 712    Example1: reduction:
 713
 714               loop1:
 715               for (i=0; i<N; i++)
 716                  sum += a[i];
 717
 718    Example2: induction:
 719
 720               loop2:
 721               for (i=0; i<N; i++)
 722                  a[i] = i;  */
 723
 724 static void
 725 vect_analyze_scalar_cycles (loop_vec_info loop_vinfo)
 726 {
 727   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
 728
 729   vect_analyze_scalar_cycles_1 (loop_vinfo, loop);
 730
 731   /* When vectorizing an outer-loop, the inner-loop is executed sequentially.
 732      Reductions in such inner-loop therefore have different properties than
 733      the reductions in the nest that gets vectorized:
 734      1. When vectorized, they are executed in the same order as in the original
 735         scalar loop, so we can't change the order of computation when
 736         vectorizing them.
 737      2. FIXME: Inner-loop reductions can be used in the inner-loop, so the
 738         current checks are too strict.  */
 739
 740   if (loop->inner)
 741     vect_analyze_scalar_cycles_1 (loop_vinfo, loop->inner);
 742 }
 743
 744 /* Function vect_get_loop_niters.
 745
 746    Determine how many iterations the loop is executed.
 747    If an expression that represents the number of iterations
 748    can be constructed, place it in NUMBER_OF_ITERATIONS.
 749    Return the loop exit condition.  */
 750
 751 static gimple
 752 vect_get_loop_niters (struct loop *loop, tree *number_of_iterations)
 753 {
 754   tree niters;
 755
 756   if (dump_enabled_p ())
 757     dump_printf_loc (MSG_NOTE, vect_location,
 758                      "=== get_loop_niters ===");
 759   niters = number_of_exit_cond_executions (loop);
 760
 761   if (niters != NULL_TREE
 762       && niters != chrec_dont_know)
 763     {
 764       *number_of_iterations = niters;
 765
 766       if (dump_enabled_p ())
 767         {
 768           dump_printf_loc (MSG_NOTE, vect_location, "==> get_loop_niters:");
 769           dump_generic_expr (MSG_NOTE, TDF_SLIM, *number_of_iterations);
 770         }
 771     }
 772
 773   return get_loop_exit_condition (loop);
 774 }
 775
 776
 777 /* Function bb_in_loop_p
 778
 779    Used as predicate for dfs order traversal of the loop bbs.  */
 780
 781 static bool
 782 bb_in_loop_p (const_basic_block bb, const void *data)
 783 {
 784   const struct loop *const loop = (const struct loop *)data;
 785   if (flow_bb_inside_loop_p (loop, bb))
 786     return true;
 787   return false;
 788 }
 789
 790
 791 /* Function new_loop_vec_info.
 792
 793    Create and initialize a new loop_vec_info struct for LOOP, as well as
 794    stmt_vec_info structs for all the stmts in LOOP.  */
 795
 796 static loop_vec_info
 797 new_loop_vec_info (struct loop *loop)
 798 {
 799   loop_vec_info res;
 800   basic_block *bbs;
 801   gimple_stmt_iterator si;
 802   unsigned int i, nbbs;
 803
 804   res = (loop_vec_info) xcalloc (1, sizeof (struct _loop_vec_info));
 805   LOOP_VINFO_LOOP (res) = loop;
 806
 807   bbs = get_loop_body (loop);
 808
 809   /* Create/Update stmt_info for all stmts in the loop.  */
 810   for (i = 0; i < loop->num_nodes; i++)
 811     {
 812       basic_block bb = bbs[i];
 813
 814       /* BBs in a nested inner-loop will have been already processed (because
 815          we will have called vect_analyze_loop_form for any nested inner-loop).
 816          Therefore, for stmts in an inner-loop we just want to update the
 817          STMT_VINFO_LOOP_VINFO field of their stmt_info to point to the new
 818          loop_info of the outer-loop we are currently considering to vectorize
 819          (instead of the loop_info of the inner-loop).
 820          For stmts in other BBs we need to create a stmt_info from scratch.  */
 821       if (bb->loop_father != loop)
 822         {
 823           /* Inner-loop bb.  */
 824           gcc_assert (loop->inner && bb->loop_father == loop->inner);
 825           for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
 826             {
 827               gimple phi = gsi_stmt (si);
 828               stmt_vec_info stmt_info = vinfo_for_stmt (phi);
 829               loop_vec_info inner_loop_vinfo =
 830                 STMT_VINFO_LOOP_VINFO (stmt_info);
 831               gcc_assert (loop->inner == LOOP_VINFO_LOOP (inner_loop_vinfo));
 832               STMT_VINFO_LOOP_VINFO (stmt_info) = res;
 833             }
 834           for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
 835            {
 836               gimple stmt = gsi_stmt (si);
 837               stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
 838               loop_vec_info inner_loop_vinfo =
 839                  STMT_VINFO_LOOP_VINFO (stmt_info);
 840               gcc_assert (loop->inner == LOOP_VINFO_LOOP (inner_loop_vinfo));
 841               STMT_VINFO_LOOP_VINFO (stmt_info) = res;
 842            }
 843         }
 844       else
 845         {
 846           /* bb in current nest.  */
 847           for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
 848             {
 849               gimple phi = gsi_stmt (si);
 850               gimple_set_uid (phi, 0);
 851               set_vinfo_for_stmt (phi, new_stmt_vec_info (phi, res, NULL));
 852             }
 853
 854           for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
 855             {
 856               gimple stmt = gsi_stmt (si);
 857               gimple_set_uid (stmt, 0);
 858               set_vinfo_for_stmt (stmt, new_stmt_vec_info (stmt, res, NULL));
 859             }
 860         }
 861     }
 862
 863   /* CHECKME: We want to visit all BBs before their successors (except for
 864      latch blocks, for which this assertion wouldn't hold).  In the simple
 865      case of the loop forms we allow, a dfs order of the BBs would the same
 866      as reversed postorder traversal, so we are safe.  */
 867
 868    free (bbs);
 869    bbs = XCNEWVEC (basic_block, loop->num_nodes);
 870    nbbs = dfs_enumerate_from (loop->header, 0, bb_in_loop_p,
 871                               bbs, loop->num_nodes, loop);
 872    gcc_assert (nbbs == loop->num_nodes);
 873
 874   LOOP_VINFO_BBS (res) = bbs;
 875   LOOP_VINFO_NITERS (res) = NULL;
 876   LOOP_VINFO_NITERS_UNCHANGED (res) = NULL;
 877   LOOP_VINFO_COST_MODEL_MIN_ITERS (res) = 0;
 878   LOOP_VINFO_VECTORIZABLE_P (res) = 0;
 879   LOOP_PEELING_FOR_ALIGNMENT (res) = 0;
 880   LOOP_VINFO_VECT_FACTOR (res) = 0;
 881   LOOP_VINFO_LOOP_NEST (res).create (3);
 882   LOOP_VINFO_DATAREFS (res).create (10);
 883   LOOP_VINFO_DDRS (res).create (10 * 10);
 884   LOOP_VINFO_UNALIGNED_DR (res) = NULL;
 885   LOOP_VINFO_MAY_MISALIGN_STMTS (res).create (
 886              PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIGNMENT_CHECKS));
 887   LOOP_VINFO_MAY_ALIAS_DDRS (res).create (
 888              PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS));
 889   LOOP_VINFO_GROUPED_STORES (res).create (10);
 890   LOOP_VINFO_REDUCTIONS (res).create (10);
 891   LOOP_VINFO_REDUCTION_CHAINS (res).create (10);
 892   LOOP_VINFO_SLP_INSTANCES (res).create (10);
 893   LOOP_VINFO_SLP_UNROLLING_FACTOR (res) = 1;
 894   LOOP_VINFO_TARGET_COST_DATA (res) = init_cost (loop);
 895   LOOP_VINFO_PEELING_FOR_GAPS (res) = false;
 896   LOOP_VINFO_OPERANDS_SWAPPED (res) = false;
 897
 898   return res;
 899 }
 900
 901
 902 /* Function destroy_loop_vec_info.
 903
 904    Free LOOP_VINFO struct, as well as all the stmt_vec_info structs of all the
 905    stmts in the loop.  */
 906
 907 void
 908 destroy_loop_vec_info (loop_vec_info loop_vinfo, bool clean_stmts)
 909 {
 910   struct loop *loop;
 911   basic_block *bbs;
 912   int nbbs;
 913   gimple_stmt_iterator si;
 914   int j;
 915   vec<slp_instance> slp_instances;
 916   slp_instance instance;
 917   bool swapped;
 918
 919   if (!loop_vinfo)
 920     return;
 921
 922   loop = LOOP_VINFO_LOOP (loop_vinfo);
 923
 924   bbs = LOOP_VINFO_BBS (loop_vinfo);
 925   nbbs = clean_stmts ? loop->num_nodes : 0;
 926   swapped = LOOP_VINFO_OPERANDS_SWAPPED (loop_vinfo);
 927
 928   for (j = 0; j < nbbs; j++)
 929     {
 930       basic_block bb = bbs[j];
 931       for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
 932         free_stmt_vec_info (gsi_stmt (si));
 933
 934       for (si = gsi_start_bb (bb); !gsi_end_p (si); )
 935         {
 936           gimple stmt = gsi_stmt (si);
 937
 938           /* We may have broken canonical form by moving a constant
 939              into RHS1 of a commutative op.  Fix such occurrences.  */
 940           if (swapped && is_gimple_assign (stmt))
 941             {
 942               enum tree_code code = gimple_assign_rhs_code (stmt);
 943
 944               if ((code == PLUS_EXPR
 945                    || code == POINTER_PLUS_EXPR
 946                    || code == MULT_EXPR)
 947                   && CONSTANT_CLASS_P (gimple_assign_rhs1 (stmt)))
 948                 swap_tree_operands (stmt,
 949                                     gimple_assign_rhs1_ptr (stmt),
 950                                     gimple_assign_rhs2_ptr (stmt));
 951             }
 952
 953           /* Free stmt_vec_info.  */
 954           free_stmt_vec_info (stmt);
 955           gsi_next (&si);
 956         }
 957     }
 958
 959   free (LOOP_VINFO_BBS (loop_vinfo));
 960   free_data_refs (LOOP_VINFO_DATAREFS (loop_vinfo));
 961   free_dependence_relations (LOOP_VINFO_DDRS (loop_vinfo));
 962   LOOP_VINFO_LOOP_NEST (loop_vinfo).release ();
 963   LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo).release ();
 964   LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo).release ();
 965   slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
 966   FOR_EACH_VEC_ELT (slp_instances, j, instance)
 967     vect_free_slp_instance (instance);
 968
 969   LOOP_VINFO_SLP_INSTANCES (loop_vinfo).release ();
 970   LOOP_VINFO_GROUPED_STORES (loop_vinfo).release ();
 971   LOOP_VINFO_REDUCTIONS (loop_vinfo).release ();
 972   LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo).release ();
 973
 974   if (LOOP_VINFO_PEELING_HTAB (loop_vinfo).is_created ())
 975     LOOP_VINFO_PEELING_HTAB (loop_vinfo).dispose ();
 976
 977   destroy_cost_data (LOOP_VINFO_TARGET_COST_DATA (loop_vinfo));
 978
 979   free (loop_vinfo);
 980   loop->aux = NULL;
 981 }
 982
 983
 984 /* Function vect_analyze_loop_1.
 985
 986    Apply a set of analyses on LOOP, and create a loop_vec_info struct
 987    for it. The different analyses will record information in the
 988    loop_vec_info struct.  This is a subset of the analyses applied in
 989    vect_analyze_loop, to be applied on an inner-loop nested in the loop
 990    that is now considered for (outer-loop) vectorization.  */
 991
 992 static loop_vec_info
 993 vect_analyze_loop_1 (struct loop *loop)
 994 {
 995   loop_vec_info loop_vinfo;
 996
 997   if (dump_enabled_p ())
 998     dump_printf_loc (MSG_NOTE, vect_location,
 999                      "===== analyze_loop_nest_1 =====");
1000
1001   /* Check the CFG characteristics of the loop (nesting, entry/exit, etc.  */
1002
1003   loop_vinfo = vect_analyze_loop_form (loop);
1004   if (!loop_vinfo)
1005     {
1006       if (dump_enabled_p ())
1007         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1008                          "bad inner-loop form.");
1009       return NULL;
1010     }
1011
1012   return loop_vinfo;
1013 }
1014
1015
1016 /* Function vect_analyze_loop_form.
1017
1018    Verify that certain CFG restrictions hold, including:
1019    - the loop has a pre-header
1020    - the loop has a single entry and exit
1021    - the loop exit condition is simple enough, and the number of iterations
1022      can be analyzed (a countable loop).  */
1023
1024 loop_vec_info
1025 vect_analyze_loop_form (struct loop *loop)
1026 {
1027   loop_vec_info loop_vinfo;
1028   gimple loop_cond;
1029   tree number_of_iterations = NULL;
1030   loop_vec_info inner_loop_vinfo = NULL;
1031
1032   if (dump_enabled_p ())
1033     dump_printf_loc (MSG_NOTE, vect_location,
1034                      "=== vect_analyze_loop_form ===");
1035
1036   /* Different restrictions apply when we are considering an inner-most loop,
1037      vs. an outer (nested) loop.
1038      (FORNOW. May want to relax some of these restrictions in the future).  */
1039
1040   if (!loop->inner)
1041     {
1042       /* Inner-most loop.  We currently require that the number of BBs is
1043          exactly 2 (the header and latch).  Vectorizable inner-most loops
1044          look like this:
1045
1046                         (pre-header)
1047                            |
1048                           header <--------+
1049                            | |            |
1050                            | +--> latch --+
1051                            |
1052                         (exit-bb)  */
1053
1054       if (loop->num_nodes != 2)
1055         {
1056           if (dump_enabled_p ())
1057             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1058                              "not vectorized: control flow in loop.");
1059           return NULL;
1060         }
1061
1062       if (empty_block_p (loop->header))
1063     {
1064           if (dump_enabled_p ())
1065             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1066                              "not vectorized: empty loop.");
1067       return NULL;
1068     }
1069     }
1070   else
1071     {
1072       struct loop *innerloop = loop->inner;
1073       edge entryedge;
1074
1075       /* Nested loop. We currently require that the loop is doubly-nested,
1076          contains a single inner loop, and the number of BBs is exactly 5.
1077          Vectorizable outer-loops look like this:
1078
1079                         (pre-header)
1080                            |
1081                           header <---+
1082                            |         |
1083                           inner-loop |
1084                            |         |
1085                           tail ------+
1086                            |
1087                         (exit-bb)
1088
1089          The inner-loop has the properties expected of inner-most loops
1090          as described above.  */
1091
1092       if ((loop->inner)->inner || (loop->inner)->next)
1093         {
1094           if (dump_enabled_p ())
1095             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1096                              "not vectorized: multiple nested loops.");
1097           return NULL;
1098         }
1099
1100       /* Analyze the inner-loop.  */
1101       inner_loop_vinfo = vect_analyze_loop_1 (loop->inner);
1102       if (!inner_loop_vinfo)
1103         {
1104           if (dump_enabled_p ())
1105             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1106                              "not vectorized: Bad inner loop.");
1107           return NULL;
1108         }
1109
1110       if (!expr_invariant_in_loop_p (loop,
1111                                         LOOP_VINFO_NITERS (inner_loop_vinfo)))
1112         {
1113           if (dump_enabled_p ())
1114             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1115                              "not vectorized: inner-loop count not invariant.");
1116           destroy_loop_vec_info (inner_loop_vinfo, true);
1117           return NULL;
1118         }
1119
1120       if (loop->num_nodes != 5)
1121         {
1122           if (dump_enabled_p ())
1123             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1124                              "not vectorized: control flow in loop.");
1125           destroy_loop_vec_info (inner_loop_vinfo, true);
1126           return NULL;
1127         }
1128
1129       gcc_assert (EDGE_COUNT (innerloop->header->preds) == 2);
1130       entryedge = EDGE_PRED (innerloop->header, 0);
1131       if (EDGE_PRED (innerloop->header, 0)->src == innerloop->latch)
1132         entryedge = EDGE_PRED (innerloop->header, 1);
1133
1134       if (entryedge->src != loop->header
1135           || !single_exit (innerloop)
1136           || single_exit (innerloop)->dest !=  EDGE_PRED (loop->latch, 0)->src)
1137         {
1138           if (dump_enabled_p ())
1139             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1140                              "not vectorized: unsupported outerloop form.");
1141           destroy_loop_vec_info (inner_loop_vinfo, true);
1142           return NULL;
1143         }
1144
1145       if (dump_enabled_p ())
1146         dump_printf_loc (MSG_NOTE, vect_location,
1147                          "Considering outer-loop vectorization.");
1148     }
1149
1150   if (!single_exit (loop)
1151       || EDGE_COUNT (loop->header->preds) != 2)
1152     {
1153       if (dump_enabled_p ())
1154         {
1155           if (!single_exit (loop))
1156             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1157                              "not vectorized: multiple exits.");
1158           else if (EDGE_COUNT (loop->header->preds) != 2)
1159             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1160                              "not vectorized: too many incoming edges.");
1161         }
1162       if (inner_loop_vinfo)
1163         destroy_loop_vec_info (inner_loop_vinfo, true);
1164       return NULL;
1165     }
1166
1167   /* We assume that the loop exit condition is at the end of the loop. i.e,
1168      that the loop is represented as a do-while (with a proper if-guard
1169      before the loop if needed), where the loop header contains all the
1170      executable statements, and the latch is empty.  */
1171   if (!empty_block_p (loop->latch)
1172       || !gimple_seq_empty_p (phi_nodes (loop->latch)))
1173     {
1174       if (dump_enabled_p ())
1175         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1176                          "not vectorized: latch block not empty.");
1177       if (inner_loop_vinfo)
1178         destroy_loop_vec_info (inner_loop_vinfo, true);
1179       return NULL;
1180     }
1181
1182   /* Make sure there exists a single-predecessor exit bb:  */
1183   if (!single_pred_p (single_exit (loop)->dest))
1184     {
1185       edge e = single_exit (loop);
1186       if (!(e->flags & EDGE_ABNORMAL))
1187         {
1188           split_loop_exit_edge (e);
1189           if (dump_enabled_p ())
1190             dump_printf (MSG_NOTE, "split exit edge.");
1191         }
1192       else
1193         {
1194           if (dump_enabled_p ())
1195             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1196                              "not vectorized: abnormal loop exit edge.");
1197           if (inner_loop_vinfo)
1198             destroy_loop_vec_info (inner_loop_vinfo, true);
1199           return NULL;
1200         }
1201     }
1202
1203   loop_cond = vect_get_loop_niters (loop, &number_of_iterations);
1204   if (!loop_cond)
1205     {
1206       if (dump_enabled_p ())
1207         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1208                          "not vectorized: complicated exit condition.");
1209       if (inner_loop_vinfo)
1210         destroy_loop_vec_info (inner_loop_vinfo, true);
1211       return NULL;
1212     }
1213
1214   if (!number_of_iterations)
1215     {
1216       if (dump_enabled_p ())
1217         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1218                          "not vectorized: number of iterations cannot be "
1219                          "computed.");
1220       if (inner_loop_vinfo)
1221         destroy_loop_vec_info (inner_loop_vinfo, true);
1222       return NULL;
1223     }
1224
1225   if (chrec_contains_undetermined (number_of_iterations))
1226     {
1227       if (dump_enabled_p ())
1228             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1229                              "Infinite number of iterations.");
1230       if (inner_loop_vinfo)
1231         destroy_loop_vec_info (inner_loop_vinfo, true);
1232       return NULL;
1233     }
1234
1235   if (!NITERS_KNOWN_P (number_of_iterations))
1236     {
1237       if (dump_enabled_p ())
1238         {
1239           dump_printf_loc (MSG_NOTE, vect_location,
1240                            "Symbolic number of iterations is ");
1241           dump_generic_expr (MSG_NOTE, TDF_DETAILS, number_of_iterations);
1242         }
1243     }
1244   else if (TREE_INT_CST_LOW (number_of_iterations) == 0)
1245     {
1246       if (dump_enabled_p ())
1247         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1248                          "not vectorized: number of iterations = 0.");
1249       if (inner_loop_vinfo)
1250         destroy_loop_vec_info (inner_loop_vinfo, true);
1251       return NULL;
1252     }
1253
1254   loop_vinfo = new_loop_vec_info (loop);
1255   LOOP_VINFO_NITERS (loop_vinfo) = number_of_iterations;
1256   LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo) = number_of_iterations;
1257
1258   STMT_VINFO_TYPE (vinfo_for_stmt (loop_cond)) = loop_exit_ctrl_vec_info_type;
1259
1260   /* CHECKME: May want to keep it around it in the future.  */
1261   if (inner_loop_vinfo)
1262     destroy_loop_vec_info (inner_loop_vinfo, false);
1263
1264   gcc_assert (!loop->aux);
1265   loop->aux = loop_vinfo;
1266   return loop_vinfo;
1267 }
1268
1269
1270 /* Function vect_analyze_loop_operations.
1271
1272    Scan the loop stmts and make sure they are all vectorizable.  */
1273
1274 static bool
1275 vect_analyze_loop_operations (loop_vec_info loop_vinfo, bool slp)
1276 {
1277   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1278   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
1279   int nbbs = loop->num_nodes;
1280   gimple_stmt_iterator si;
1281   unsigned int vectorization_factor = 0;
1282   int i;
1283   gimple phi;
1284   stmt_vec_info stmt_info;
1285   bool need_to_vectorize = false;
1286   int min_profitable_iters;
1287   int min_scalar_loop_bound;
1288   unsigned int th;
1289   bool only_slp_in_loop = true, ok;
1290   HOST_WIDE_INT max_niter;
1291   HOST_WIDE_INT estimated_niter;
1292   int min_profitable_estimate;
1293
1294   if (dump_enabled_p ())
1295     dump_printf_loc (MSG_NOTE, vect_location,
1296                      "=== vect_analyze_loop_operations ===");
1297
1298   gcc_assert (LOOP_VINFO_VECT_FACTOR (loop_vinfo));
1299   vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1300   if (slp)
1301     {
1302       /* If all the stmts in the loop can be SLPed, we perform only SLP, and
1303          vectorization factor of the loop is the unrolling factor required by
1304          the SLP instances.  If that unrolling factor is 1, we say, that we
1305          perform pure SLP on loop - cross iteration parallelism is not
1306          exploited.  */
1307       for (i = 0; i < nbbs; i++)
1308         {
1309           basic_block bb = bbs[i];
1310           for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1311             {
1312               gimple stmt = gsi_stmt (si);
1313               stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1314               gcc_assert (stmt_info);
1315               if ((STMT_VINFO_RELEVANT_P (stmt_info)
1316                    || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info)))
1317                   && !PURE_SLP_STMT (stmt_info))
1318                 /* STMT needs both SLP and loop-based vectorization.  */
1319                 only_slp_in_loop = false;
1320             }
1321         }
1322
1323       if (only_slp_in_loop)
1324         vectorization_factor = LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo);
1325       else
1326         vectorization_factor = least_common_multiple (vectorization_factor,
1327                                 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo));
1328
1329       LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
1330       if (dump_enabled_p ())
1331         dump_printf_loc (MSG_NOTE, vect_location,
1332                          "Updating vectorization factor to %d ",
1333                          vectorization_factor);
1334     }
1335
1336   for (i = 0; i < nbbs; i++)
1337     {
1338       basic_block bb = bbs[i];
1339
1340       for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
1341         {
1342           phi = gsi_stmt (si);
1343           ok = true;
1344
1345           stmt_info = vinfo_for_stmt (phi);
1346           if (dump_enabled_p ())
1347             {
1348               dump_printf_loc (MSG_NOTE, vect_location, "examining phi: ");
1349               dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
1350             }
1351
1352           /* Inner-loop loop-closed exit phi in outer-loop vectorization
1353              (i.e., a phi in the tail of the outer-loop).  */
1354           if (! is_loop_header_bb_p (bb))
1355             {
1356               /* FORNOW: we currently don't support the case that these phis
1357                  are not used in the outerloop (unless it is double reduction,
1358                  i.e., this phi is vect_reduction_def), cause this case
1359                  requires to actually do something here.  */
1360               if ((!STMT_VINFO_RELEVANT_P (stmt_info)
1361                    || STMT_VINFO_LIVE_P (stmt_info))
1362                   && STMT_VINFO_DEF_TYPE (stmt_info)
1363                      != vect_double_reduction_def)
1364                 {
1365                   if (dump_enabled_p ())
1366                     dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1367                                      "Unsupported loop-closed phi in "
1368                                      "outer-loop.");
1369                   return false;
1370                 }
1371
1372               /* If PHI is used in the outer loop, we check that its operand
1373                  is defined in the inner loop.  */
1374               if (STMT_VINFO_RELEVANT_P (stmt_info))
1375                 {
1376                   tree phi_op;
1377                   gimple op_def_stmt;
1378
1379                   if (gimple_phi_num_args (phi) != 1)
1380                     return false;
1381
1382                   phi_op = PHI_ARG_DEF (phi, 0);
1383                   if (TREE_CODE (phi_op) != SSA_NAME)
1384                     return false;
1385
1386                   op_def_stmt = SSA_NAME_DEF_STMT (phi_op);
1387                   if (!op_def_stmt
1388                       || !flow_bb_inside_loop_p (loop, gimple_bb (op_def_stmt))
1389                       || !vinfo_for_stmt (op_def_stmt))
1390                     return false;
1391
1392                   if (STMT_VINFO_RELEVANT (vinfo_for_stmt (op_def_stmt))
1393                         != vect_used_in_outer
1394                       && STMT_VINFO_RELEVANT (vinfo_for_stmt (op_def_stmt))
1395                            != vect_used_in_outer_by_reduction)
1396                     return false;
1397                 }
1398
1399               continue;
1400             }
1401
1402           gcc_assert (stmt_info);
1403
1404           if (STMT_VINFO_LIVE_P (stmt_info))
1405             {
1406               /* FORNOW: not yet supported.  */
1407               if (dump_enabled_p ())
1408                 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1409                                  "not vectorized: value used after loop.");
1410               return false;
1411             }
1412
1413           if (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope
1414               && STMT_VINFO_DEF_TYPE (stmt_info) != vect_induction_def)
1415             {
1416               /* A scalar-dependence cycle that we don't support.  */
1417               if (dump_enabled_p ())
1418                 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1419                                  "not vectorized: scalar dependence cycle.");
1420               return false;
1421             }
1422
1423           if (STMT_VINFO_RELEVANT_P (stmt_info))
1424             {
1425               need_to_vectorize = true;
1426               if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def)
1427                 ok = vectorizable_induction (phi, NULL, NULL);
1428             }
1429
1430           if (!ok)
1431             {
1432               if (dump_enabled_p ())
1433                 {
1434                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1435                                    "not vectorized: relevant phi not "
1436                                    "supported: ");
1437                   dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, phi, 0);
1438                 }
1439               return false;
1440             }
1441         }
1442
1443       for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1444         {
1445           gimple stmt = gsi_stmt (si);
1446           if (!gimple_clobber_p (stmt)
1447               && !vect_analyze_stmt (stmt, &need_to_vectorize, NULL))
1448             return false;
1449         }
1450     } /* bbs */
1451
1452   /* All operations in the loop are either irrelevant (deal with loop
1453      control, or dead), or only used outside the loop and can be moved
1454      out of the loop (e.g. invariants, inductions).  The loop can be
1455      optimized away by scalar optimizations.  We're better off not
1456      touching this loop.  */
1457   if (!need_to_vectorize)
1458     {
1459       if (dump_enabled_p ())
1460         dump_printf_loc (MSG_NOTE, vect_location,
1461                          "All the computation can be taken out of the loop.");
1462       if (dump_enabled_p ())
1463         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1464                          "not vectorized: redundant loop. no profit to "
1465                          "vectorize.");
1466       return false;
1467     }
1468
1469   if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) && dump_enabled_p ())
1470     dump_printf_loc (MSG_NOTE, vect_location,
1471                      "vectorization_factor = %d, niters = "
1472                      HOST_WIDE_INT_PRINT_DEC, vectorization_factor,
1473                      LOOP_VINFO_INT_NITERS (loop_vinfo));
1474
1475   if ((LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
1476        && (LOOP_VINFO_INT_NITERS (loop_vinfo) < vectorization_factor))
1477       || ((max_niter = max_stmt_executions_int (loop)) != -1
1478           && (unsigned HOST_WIDE_INT) max_niter < vectorization_factor))
1479     {
1480       if (dump_enabled_p ())
1481         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1482                          "not vectorized: iteration count too small.");
1483       if (dump_enabled_p ())
1484         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1485                          "not vectorized: iteration count smaller than "
1486                          "vectorization factor.");
1487       return false;
1488     }
1489
1490   /* Analyze cost.  Decide if worth while to vectorize.  */
1491
1492   /* Once VF is set, SLP costs should be updated since the number of created
1493      vector stmts depends on VF.  */
1494   vect_update_slp_costs_according_to_vf (loop_vinfo);
1495
1496   vect_estimate_min_profitable_iters (loop_vinfo, &min_profitable_iters,
1497                                       &min_profitable_estimate);
1498   LOOP_VINFO_COST_MODEL_MIN_ITERS (loop_vinfo) = min_profitable_iters;
1499
1500   if (min_profitable_iters < 0)
1501     {
1502       if (dump_enabled_p ())
1503         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1504                          "not vectorized: vectorization not profitable.");
1505       if (dump_enabled_p ())
1506         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1507                          "not vectorized: vector version will never be "
1508                          "profitable.");
1509       return false;
1510     }
1511
1512   min_scalar_loop_bound = ((PARAM_VALUE (PARAM_MIN_VECT_LOOP_BOUND)
1513                             * vectorization_factor) - 1);
1514
1515
1516   /* Use the cost model only if it is more conservative than user specified
1517      threshold.  */
1518
1519   th = (unsigned) min_scalar_loop_bound;
1520   if (min_profitable_iters
1521       && (!min_scalar_loop_bound
1522           || min_profitable_iters > min_scalar_loop_bound))
1523     th = (unsigned) min_profitable_iters;
1524
1525   if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
1526       && LOOP_VINFO_INT_NITERS (loop_vinfo) <= th)
1527     {
1528       if (dump_enabled_p ())
1529         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1530                          "not vectorized: vectorization not profitable.");
1531       if (dump_enabled_p ())
1532         dump_printf_loc (MSG_NOTE, vect_location,
1533                          "not vectorized: iteration count smaller than user "
1534                          "specified loop bound parameter or minimum profitable "
1535                          "iterations (whichever is more conservative).");
1536       return false;
1537     }
1538
1539   if ((estimated_niter = estimated_stmt_executions_int (loop)) != -1
1540       && ((unsigned HOST_WIDE_INT) estimated_niter
1541           <= MAX (th, (unsigned)min_profitable_estimate)))
1542     {
1543       if (dump_enabled_p ())
1544         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1545                          "not vectorized: estimated iteration count too "
1546                          "small.");
1547       if (dump_enabled_p ())
1548         dump_printf_loc (MSG_NOTE, vect_location,
1549                          "not vectorized: estimated iteration count smaller "
1550                          "than specified loop bound parameter or minimum "
1551                          "profitable iterations (whichever is more "
1552                          "conservative).");
1553       return false;
1554     }
1555
1556   if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
1557       || LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0
1558       || LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo))
1559     {
1560       if (dump_enabled_p ())
1561         dump_printf_loc (MSG_NOTE, vect_location, "epilog loop required.");
1562       if (!vect_can_advance_ivs_p (loop_vinfo))
1563         {
1564           if (dump_enabled_p ())
1565             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1566                              "not vectorized: can't create epilog loop 1.");
1567           return false;
1568         }
1569       if (!slpeel_can_duplicate_loop_p (loop, single_exit (loop)))
1570         {
1571           if (dump_enabled_p ())
1572             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1573                              "not vectorized: can't create epilog loop 2.");
1574           return false;
1575         }
1576     }
1577
1578   return true;
1579 }
1580
1581
1582 /* Function vect_analyze_loop_2.
1583
1584    Apply a set of analyses on LOOP, and create a loop_vec_info struct
1585    for it.  The different analyses will record information in the
1586    loop_vec_info struct.  */
1587 static bool
1588 vect_analyze_loop_2 (loop_vec_info loop_vinfo)
1589 {
1590   bool ok, slp = false;
1591   int max_vf = MAX_VECTORIZATION_FACTOR;
1592   int min_vf = 2;
1593
1594   /* Find all data references in the loop (which correspond to vdefs/vuses)
1595      and analyze their evolution in the loop.  Also adjust the minimal
1596      vectorization factor according to the loads and stores.
1597
1598      FORNOW: Handle only simple, array references, which
1599      alignment can be forced, and aligned pointer-references.  */
1600
1601   ok = vect_analyze_data_refs (loop_vinfo, NULL, &min_vf);
1602   if (!ok)
1603     {
1604       if (dump_enabled_p ())
1605         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1606                          "bad data references.");
1607       return false;
1608     }
1609
1610   /* Analyze the access patterns of the data-refs in the loop (consecutive,
1611      complex, etc.). FORNOW: Only handle consecutive access pattern.  */
1612
1613   ok = vect_analyze_data_ref_accesses (loop_vinfo, NULL);
1614   if (!ok)
1615     {
1616       if (dump_enabled_p ())
1617         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1618                          "bad data access.");
1619       return false;
1620     }
1621
1622   /* Classify all cross-iteration scalar data-flow cycles.
1623      Cross-iteration cycles caused by virtual phis are analyzed separately.  */
1624
1625   vect_analyze_scalar_cycles (loop_vinfo);
1626
1627   vect_pattern_recog (loop_vinfo, NULL);
1628
1629   /* Data-flow analysis to detect stmts that do not need to be vectorized.  */
1630
1631   ok = vect_mark_stmts_to_be_vectorized (loop_vinfo);
1632   if (!ok)
1633     {
1634       if (dump_enabled_p ())
1635         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1636                          "unexpected pattern.");
1637       return false;
1638     }
1639
1640   /* Analyze data dependences between the data-refs in the loop
1641      and adjust the maximum vectorization factor according to
1642      the dependences.
1643      FORNOW: fail at the first data dependence that we encounter.  */
1644
1645   ok = vect_analyze_data_ref_dependences (loop_vinfo, &max_vf);
1646   if (!ok
1647       || max_vf < min_vf)
1648     {
1649       if (dump_enabled_p ())
1650             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1651                              "bad data dependence.");
1652       return false;
1653     }
1654
1655   ok = vect_determine_vectorization_factor (loop_vinfo);
1656   if (!ok)
1657     {
1658       if (dump_enabled_p ())
1659         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1660                          "can't determine vectorization factor.");
1661       return false;
1662     }
1663   if (max_vf < LOOP_VINFO_VECT_FACTOR (loop_vinfo))
1664     {
1665       if (dump_enabled_p ())
1666         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1667                          "bad data dependence.");
1668       return false;
1669     }
1670
1671   /* Analyze the alignment of the data-refs in the loop.
1672      Fail if a data reference is found that cannot be vectorized.  */
1673
1674   ok = vect_analyze_data_refs_alignment (loop_vinfo, NULL);
1675   if (!ok)
1676     {
1677       if (dump_enabled_p ())
1678         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1679                          "bad data alignment.");
1680       return false;
1681     }
1682
1683   /* Prune the list of ddrs to be tested at run-time by versioning for alias.
1684      It is important to call pruning after vect_analyze_data_ref_accesses,
1685      since we use grouping information gathered by interleaving analysis.  */
1686   ok = vect_prune_runtime_alias_test_list (loop_vinfo);
1687   if (!ok)
1688     {
1689       if (dump_enabled_p ())
1690         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1691                          "too long list of versioning for alias "
1692                          "run-time tests.");
1693       return false;
1694     }
1695
1696   /* This pass will decide on using loop versioning and/or loop peeling in
1697      order to enhance the alignment of data references in the loop.  */
1698
1699   ok = vect_enhance_data_refs_alignment (loop_vinfo);
1700   if (!ok)
1701     {
1702       if (dump_enabled_p ())
1703         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1704                          "bad data alignment.");
1705       return false;
1706     }
1707
1708   /* Check the SLP opportunities in the loop, analyze and build SLP trees.  */
1709   ok = vect_analyze_slp (loop_vinfo, NULL);
1710   if (ok)
1711     {
1712       /* Decide which possible SLP instances to SLP.  */
1713       slp = vect_make_slp_decision (loop_vinfo);
1714
1715       /* Find stmts that need to be both vectorized and SLPed.  */
1716       vect_detect_hybrid_slp (loop_vinfo);
1717     }
1718   else
1719     return false;
1720
1721   /* Scan all the operations in the loop and make sure they are
1722      vectorizable.  */
1723
1724   ok = vect_analyze_loop_operations (loop_vinfo, slp);
1725   if (!ok)
1726     {
1727       if (dump_enabled_p ())
1728         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1729                          "bad operation or unsupported loop bound.");
1730       return false;
1731     }
1732
1733   return true;
1734 }
1735
1736 /* Function vect_analyze_loop.
1737
1738    Apply a set of analyses on LOOP, and create a loop_vec_info struct
1739    for it.  The different analyses will record information in the
1740    loop_vec_info struct.  */
1741 loop_vec_info
1742 vect_analyze_loop (struct loop *loop)
1743 {
1744   loop_vec_info loop_vinfo;
1745   unsigned int vector_sizes;
1746
1747   /* Autodetect first vector size we try.  */
1748   current_vector_size = 0;
1749   vector_sizes = targetm.vectorize.autovectorize_vector_sizes ();
1750
1751   if (dump_enabled_p ())
1752     dump_printf_loc (MSG_NOTE, vect_location,
1753                      "===== analyze_loop_nest =====");
1754
1755   if (loop_outer (loop)
1756       && loop_vec_info_for_loop (loop_outer (loop))
1757       && LOOP_VINFO_VECTORIZABLE_P (loop_vec_info_for_loop (loop_outer (loop))))
1758     {
1759       if (dump_enabled_p ())
1760         dump_printf_loc (MSG_NOTE, vect_location,
1761                          "outer-loop already vectorized.");
1762       return NULL;
1763     }
1764
1765   while (1)
1766     {
1767       /* Check the CFG characteristics of the loop (nesting, entry/exit).  */
1768       loop_vinfo = vect_analyze_loop_form (loop);
1769       if (!loop_vinfo)
1770         {
1771           if (dump_enabled_p ())
1772             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1773                              "bad loop form.");
1774           return NULL;
1775         }
1776
1777       if (vect_analyze_loop_2 (loop_vinfo))
1778         {
1779           LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1;
1780
1781           return loop_vinfo;
1782         }
1783
1784       destroy_loop_vec_info (loop_vinfo, true);
1785
1786       vector_sizes &= ~current_vector_size;
1787       if (vector_sizes == 0
1788           || current_vector_size == 0)
1789         return NULL;
1790
1791       /* Try the next biggest vector size.  */
1792       current_vector_size = 1 << floor_log2 (vector_sizes);
1793       if (dump_enabled_p ())
1794         dump_printf_loc (MSG_NOTE, vect_location,
1795                          "***** Re-trying analysis with "
1796                          "vector size %d\n", current_vector_size);
1797     }
1798 }
1799
1800
1801 /* Function reduction_code_for_scalar_code
1802
1803    Input:
1804    CODE - tree_code of a reduction operations.
1805
1806    Output:
1807    REDUC_CODE - the corresponding tree-code to be used to reduce the
1808       vector of partial results into a single scalar result (which
1809       will also reside in a vector) or ERROR_MARK if the operation is
1810       a supported reduction operation, but does not have such tree-code.
1811
1812    Return FALSE if CODE currently cannot be vectorized as reduction.  */
1813
1814 static bool
1815 reduction_code_for_scalar_code (enum tree_code code,
1816                                 enum tree_code *reduc_code)
1817 {
1818   switch (code)
1819     {
1820       case MAX_EXPR:
1821         *reduc_code = REDUC_MAX_EXPR;
1822         return true;
1823
1824       case MIN_EXPR:
1825         *reduc_code = REDUC_MIN_EXPR;
1826         return true;
1827
1828       case PLUS_EXPR:
1829         *reduc_code = REDUC_PLUS_EXPR;
1830         return true;
1831
1832       case MULT_EXPR:
1833       case MINUS_EXPR:
1834       case BIT_IOR_EXPR:
1835       case BIT_XOR_EXPR:
1836       case BIT_AND_EXPR:
1837         *reduc_code = ERROR_MARK;
1838         return true;
1839
1840       default:
1841        return false;
1842     }
1843 }
1844
1845
1846 /* Error reporting helper for vect_is_simple_reduction below.  GIMPLE statement
1847    STMT is printed with a message MSG. */
1848
1849 static void
1850 report_vect_op (int msg_type, gimple stmt, const char *msg)
1851 {
1852   dump_printf_loc (msg_type, vect_location, "%s", msg);
1853   dump_gimple_stmt (msg_type, TDF_SLIM, stmt, 0);
1854 }
1855
1856
1857 /* Detect SLP reduction of the form:
1858
1859    #a1 = phi <a5, a0>
1860    a2 = operation (a1)
1861    a3 = operation (a2)
1862    a4 = operation (a3)
1863    a5 = operation (a4)
1864
1865    #a = phi <a5>
1866
1867    PHI is the reduction phi node (#a1 = phi <a5, a0> above)
1868    FIRST_STMT is the first reduction stmt in the chain
1869    (a2 = operation (a1)).
1870
1871    Return TRUE if a reduction chain was detected.  */
1872
1873 static bool
1874 vect_is_slp_reduction (loop_vec_info loop_info, gimple phi, gimple first_stmt)
1875 {
1876   struct loop *loop = (gimple_bb (phi))->loop_father;
1877   struct loop *vect_loop = LOOP_VINFO_LOOP (loop_info);
1878   enum tree_code code;
1879   gimple current_stmt = NULL, loop_use_stmt = NULL, first, next_stmt;
1880   stmt_vec_info use_stmt_info, current_stmt_info;
1881   tree lhs;
1882   imm_use_iterator imm_iter;
1883   use_operand_p use_p;
1884   int nloop_uses, size = 0, n_out_of_loop_uses;
1885   bool found = false;
1886
1887   if (loop != vect_loop)
1888     return false;
1889
1890   lhs = PHI_RESULT (phi);
1891   code = gimple_assign_rhs_code (first_stmt);
1892   while (1)
1893     {
1894       nloop_uses = 0;
1895       n_out_of_loop_uses = 0;
1896       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
1897         {
1898           gimple use_stmt = USE_STMT (use_p);
1899           if (is_gimple_debug (use_stmt))
1900             continue;
1901
1902           use_stmt = USE_STMT (use_p);
1903
1904           /* Check if we got back to the reduction phi.  */
1905           if (use_stmt == phi)
1906             {
1907               loop_use_stmt = use_stmt;
1908               found = true;
1909               break;
1910             }
1911
1912           if (flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
1913             {
1914               if (vinfo_for_stmt (use_stmt)
1915                   && !STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
1916                 {
1917                   loop_use_stmt = use_stmt;
1918                   nloop_uses++;
1919                 }
1920             }
1921            else
1922              n_out_of_loop_uses++;
1923
1924            /* There are can be either a single use in the loop or two uses in
1925               phi nodes.  */
1926            if (nloop_uses > 1 || (n_out_of_loop_uses && nloop_uses))
1927              return false;
1928         }
1929
1930       if (found)
1931         break;
1932
1933       /* We reached a statement with no loop uses.  */
1934       if (nloop_uses == 0)
1935         return false;
1936
1937       /* This is a loop exit phi, and we haven't reached the reduction phi.  */
1938       if (gimple_code (loop_use_stmt) == GIMPLE_PHI)
1939         return false;
1940
1941       if (!is_gimple_assign (loop_use_stmt)
1942           || code != gimple_assign_rhs_code (loop_use_stmt)
1943           || !flow_bb_inside_loop_p (loop, gimple_bb (loop_use_stmt)))
1944         return false;
1945
1946       /* Insert USE_STMT into reduction chain.  */
1947       use_stmt_info = vinfo_for_stmt (loop_use_stmt);
1948       if (current_stmt)
1949         {
1950           current_stmt_info = vinfo_for_stmt (current_stmt);
1951           GROUP_NEXT_ELEMENT (current_stmt_info) = loop_use_stmt;
1952           GROUP_FIRST_ELEMENT (use_stmt_info)
1953             = GROUP_FIRST_ELEMENT (current_stmt_info);
1954         }
1955       else
1956         GROUP_FIRST_ELEMENT (use_stmt_info) = loop_use_stmt;
1957
1958       lhs = gimple_assign_lhs (loop_use_stmt);
1959       current_stmt = loop_use_stmt;
1960       size++;
1961    }
1962
1963   if (!found || loop_use_stmt != phi || size < 2)
1964     return false;
1965
1966   /* Swap the operands, if needed, to make the reduction operand be the second
1967      operand.  */
1968   lhs = PHI_RESULT (phi);
1969   next_stmt = GROUP_FIRST_ELEMENT (vinfo_for_stmt (current_stmt));
1970   while (next_stmt)
1971     {
1972       if (gimple_assign_rhs2 (next_stmt) == lhs)
1973         {
1974           tree op = gimple_assign_rhs1 (next_stmt);
1975           gimple def_stmt = NULL;
1976
1977           if (TREE_CODE (op) == SSA_NAME)
1978             def_stmt = SSA_NAME_DEF_STMT (op);
1979
1980           /* Check that the other def is either defined in the loop
1981              ("vect_internal_def"), or it's an induction (defined by a
1982              loop-header phi-node).  */
1983           if (def_stmt
1984               && gimple_bb (def_stmt)
1985               && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
1986               && (is_gimple_assign (def_stmt)
1987                   || is_gimple_call (def_stmt)
1988                   || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt))
1989                            == vect_induction_def
1990                   || (gimple_code (def_stmt) == GIMPLE_PHI
1991                       && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt))
1992                                   == vect_internal_def
1993                       && !is_loop_header_bb_p (gimple_bb (def_stmt)))))
1994             {
1995               lhs = gimple_assign_lhs (next_stmt);
1996               next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
1997               continue;
1998             }
1999
2000           return false;
2001         }
2002       else
2003         {
2004           tree op = gimple_assign_rhs2 (next_stmt);
2005           gimple def_stmt = NULL;
2006
2007           if (TREE_CODE (op) == SSA_NAME)
2008             def_stmt = SSA_NAME_DEF_STMT (op);
2009
2010           /* Check that the other def is either defined in the loop
2011             ("vect_internal_def"), or it's an induction (defined by a
2012             loop-header phi-node).  */
2013           if (def_stmt
2014               && gimple_bb (def_stmt)
2015               && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
2016               && (is_gimple_assign (def_stmt)
2017                   || is_gimple_call (def_stmt)
2018                   || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt))
2019                               == vect_induction_def
2020                   || (gimple_code (def_stmt) == GIMPLE_PHI
2021                       && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt))
2022                                   == vect_internal_def
2023                       && !is_loop_header_bb_p (gimple_bb (def_stmt)))))
2024             {
2025               if (dump_enabled_p ())
2026                 {
2027                   dump_printf_loc (MSG_NOTE, vect_location, "swapping oprnds: ");
2028                   dump_gimple_stmt (MSG_NOTE, TDF_SLIM, next_stmt, 0);
2029                 }
2030
2031               swap_tree_operands (next_stmt,
2032                                   gimple_assign_rhs1_ptr (next_stmt),
2033                                   gimple_assign_rhs2_ptr (next_stmt));
2034               update_stmt (next_stmt);
2035
2036               if (CONSTANT_CLASS_P (gimple_assign_rhs1 (next_stmt)))
2037                 LOOP_VINFO_OPERANDS_SWAPPED (loop_info) = true;
2038             }
2039           else
2040             return false;
2041         }
2042
2043       lhs = gimple_assign_lhs (next_stmt);
2044       next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
2045     }
2046
2047   /* Save the chain for further analysis in SLP detection.  */
2048   first = GROUP_FIRST_ELEMENT (vinfo_for_stmt (current_stmt));
2049   LOOP_VINFO_REDUCTION_CHAINS (loop_info).safe_push (first);
2050   GROUP_SIZE (vinfo_for_stmt (first)) = size;
2051
2052   return true;
2053 }
2054
2055
2056 /* Function vect_is_simple_reduction_1
2057
2058    (1) Detect a cross-iteration def-use cycle that represents a simple
2059    reduction computation.  We look for the following pattern:
2060
2061    loop_header:
2062      a1 = phi < a0, a2 >
2063      a3 = ...
2064      a2 = operation (a3, a1)
2065
2066    such that:
2067    1. operation is commutative and associative and it is safe to
2068       change the order of the computation (if CHECK_REDUCTION is true)
2069    2. no uses for a2 in the loop (a2 is used out of the loop)
2070    3. no uses of a1 in the loop besides the reduction operation
2071    4. no uses of a1 outside the loop.
2072
2073    Conditions 1,4 are tested here.
2074    Conditions 2,3 are tested in vect_mark_stmts_to_be_vectorized.
2075
2076    (2) Detect a cross-iteration def-use cycle in nested loops, i.e.,
2077    nested cycles, if CHECK_REDUCTION is false.
2078
2079    (3) Detect cycles of phi nodes in outer-loop vectorization, i.e., double
2080    reductions:
2081
2082      a1 = phi < a0, a2 >
2083      inner loop (def of a3)
2084      a2 = phi < a3 >
2085
2086    If MODIFY is true it tries also to rework the code in-place to enable
2087    detection of more reduction patterns.  For the time being we rewrite
2088    "res -= RHS" into "rhs += -RHS" when it seems worthwhile.
2089 */
2090
2091 static gimple
2092 vect_is_simple_reduction_1 (loop_vec_info loop_info, gimple phi,
2093                             bool check_reduction, bool *double_reduc,
2094                             bool modify)
2095 {
2096   struct loop *loop = (gimple_bb (phi))->loop_father;
2097   struct loop *vect_loop = LOOP_VINFO_LOOP (loop_info);
2098   edge latch_e = loop_latch_edge (loop);
2099   tree loop_arg = PHI_ARG_DEF_FROM_EDGE (phi, latch_e);
2100   gimple def_stmt, def1 = NULL, def2 = NULL;
2101   enum tree_code orig_code, code;
2102   tree op1, op2, op3 = NULL_TREE, op4 = NULL_TREE;
2103   tree type;
2104   int nloop_uses;
2105   tree name;
2106   imm_use_iterator imm_iter;
2107   use_operand_p use_p;
2108   bool phi_def;
2109
2110   *double_reduc = false;
2111
2112   /* If CHECK_REDUCTION is true, we assume inner-most loop vectorization,
2113      otherwise, we assume outer loop vectorization.  */
2114   gcc_assert ((check_reduction && loop == vect_loop)
2115               || (!check_reduction && flow_loop_nested_p (vect_loop, loop)));
2116
2117   name = PHI_RESULT (phi);
2118   nloop_uses = 0;
2119   FOR_EACH_IMM_USE_FAST (use_p, imm_iter, name)
2120     {
2121       gimple use_stmt = USE_STMT (use_p);
2122       if (is_gimple_debug (use_stmt))
2123         continue;
2124
2125       if (!flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
2126         {
2127           if (dump_enabled_p ())
2128             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2129                              "intermediate value used outside loop.");
2130
2131           return NULL;
2132         }
2133
2134       if (vinfo_for_stmt (use_stmt)
2135           && !is_pattern_stmt_p (vinfo_for_stmt (use_stmt)))
2136         nloop_uses++;
2137       if (nloop_uses > 1)
2138         {
2139           if (dump_enabled_p ())
2140             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2141                              "reduction used in loop.");
2142           return NULL;
2143         }
2144     }
2145
2146   if (TREE_CODE (loop_arg) != SSA_NAME)
2147     {
2148       if (dump_enabled_p ())
2149         {
2150           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2151                            "reduction: not ssa_name: ");
2152           dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, loop_arg);
2153         }
2154       return NULL;
2155     }
2156
2157   def_stmt = SSA_NAME_DEF_STMT (loop_arg);
2158   if (!def_stmt)
2159     {
2160       if (dump_enabled_p ())
2161         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2162                          "reduction: no def_stmt.");
2163       return NULL;
2164     }
2165
2166   if (!is_gimple_assign (def_stmt) && gimple_code (def_stmt) != GIMPLE_PHI)
2167     {
2168       if (dump_enabled_p ())
2169         dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
2170       return NULL;
2171     }
2172
2173   if (is_gimple_assign (def_stmt))
2174     {
2175       name = gimple_assign_lhs (def_stmt);
2176       phi_def = false;
2177     }
2178   else
2179     {
2180       name = PHI_RESULT (def_stmt);
2181       phi_def = true;
2182     }
2183
2184   nloop_uses = 0;
2185   FOR_EACH_IMM_USE_FAST (use_p, imm_iter, name)
2186     {
2187       gimple use_stmt = USE_STMT (use_p);
2188       if (is_gimple_debug (use_stmt))
2189         continue;
2190       if (flow_bb_inside_loop_p (loop, gimple_bb (use_stmt))
2191           && vinfo_for_stmt (use_stmt)
2192           && !is_pattern_stmt_p (vinfo_for_stmt (use_stmt)))
2193         nloop_uses++;
2194       if (nloop_uses > 1)
2195         {
2196           if (dump_enabled_p ())
2197             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2198                              "reduction used in loop.");
2199           return NULL;
2200         }
2201     }
2202
2203   /* If DEF_STMT is a phi node itself, we expect it to have a single argument
2204      defined in the inner loop.  */
2205   if (phi_def)
2206     {
2207       op1 = PHI_ARG_DEF (def_stmt, 0);
2208
2209       if (gimple_phi_num_args (def_stmt) != 1
2210           || TREE_CODE (op1) != SSA_NAME)
2211         {
2212           if (dump_enabled_p ())
2213             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2214                              "unsupported phi node definition.");
2215
2216           return NULL;
2217         }
2218
2219       def1 = SSA_NAME_DEF_STMT (op1);
2220       if (flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
2221           && loop->inner
2222           && flow_bb_inside_loop_p (loop->inner, gimple_bb (def1))
2223           && is_gimple_assign (def1))
2224         {
2225           if (dump_enabled_p ())
2226             report_vect_op (MSG_NOTE, def_stmt,
2227                             "detected double reduction: ");
2228
2229           *double_reduc = true;
2230           return def_stmt;
2231         }
2232
2233       return NULL;
2234     }
2235
2236   code = orig_code = gimple_assign_rhs_code (def_stmt);
2237
2238   /* We can handle "res -= x[i]", which is non-associative by
2239      simply rewriting this into "res += -x[i]".  Avoid changing
2240      gimple instruction for the first simple tests and only do this
2241      if we're allowed to change code at all.  */
2242   if (code == MINUS_EXPR
2243       && modify
2244       && (op1 = gimple_assign_rhs1 (def_stmt))
2245       && TREE_CODE (op1) == SSA_NAME
2246       && SSA_NAME_DEF_STMT (op1) == phi)
2247     code = PLUS_EXPR;
2248
2249   if (check_reduction
2250       && (!commutative_tree_code (code) || !associative_tree_code (code)))
2251     {
2252       if (dump_enabled_p ())
2253         report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
2254                         "reduction: not commutative/associative: ");
2255       return NULL;
2256     }
2257
2258   if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
2259     {
2260       if (code != COND_EXPR)
2261         {
2262           if (dump_enabled_p ())
2263             report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
2264                             "reduction: not binary operation: ");
2265
2266           return NULL;
2267         }
2268
2269       op3 = gimple_assign_rhs1 (def_stmt);
2270       if (COMPARISON_CLASS_P (op3))
2271         {
2272           op4 = TREE_OPERAND (op3, 1);
2273           op3 = TREE_OPERAND (op3, 0);
2274         }
2275
2276       op1 = gimple_assign_rhs2 (def_stmt);
2277       op2 = gimple_assign_rhs3 (def_stmt);
2278
2279       if (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op2) != SSA_NAME)
2280         {
2281           if (dump_enabled_p ())
2282             report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
2283                             "reduction: uses not ssa_names: ");
2284
2285           return NULL;
2286         }
2287     }
2288   else
2289     {
2290       op1 = gimple_assign_rhs1 (def_stmt);
2291       op2 = gimple_assign_rhs2 (def_stmt);
2292
2293       if (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op2) != SSA_NAME)
2294         {
2295           if (dump_enabled_p ())
2296             report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
2297                             "reduction: uses not ssa_names: ");
2298
2299           return NULL;
2300         }
2301    }
2302
2303   type = TREE_TYPE (gimple_assign_lhs (def_stmt));
2304   if ((TREE_CODE (op1) == SSA_NAME
2305        && !types_compatible_p (type,TREE_TYPE (op1)))
2306       || (TREE_CODE (op2) == SSA_NAME
2307           && !types_compatible_p (type, TREE_TYPE (op2)))
2308       || (op3 && TREE_CODE (op3) == SSA_NAME
2309           && !types_compatible_p (type, TREE_TYPE (op3)))
2310       || (op4 && TREE_CODE (op4) == SSA_NAME
2311           && !types_compatible_p (type, TREE_TYPE (op4))))
2312     {
2313       if (dump_enabled_p ())
2314         {
2315           dump_printf_loc (MSG_NOTE, vect_location,
2316                            "reduction: multiple types: operation type: ");
2317           dump_generic_expr (MSG_NOTE, TDF_SLIM, type);
2318           dump_printf (MSG_NOTE, ", operands types: ");
2319           dump_generic_expr (MSG_NOTE, TDF_SLIM,
2320                              TREE_TYPE (op1));
2321           dump_printf (MSG_NOTE, ",");
2322           dump_generic_expr (MSG_NOTE, TDF_SLIM,
2323                              TREE_TYPE (op2));
2324           if (op3)
2325             {
2326               dump_printf (MSG_NOTE, ",");
2327               dump_generic_expr (MSG_NOTE, TDF_SLIM,
2328                                  TREE_TYPE (op3));
2329             }
2330
2331           if (op4)
2332             {
2333               dump_printf (MSG_NOTE, ",");
2334               dump_generic_expr (MSG_NOTE, TDF_SLIM,
2335                                  TREE_TYPE (op4));
2336             }
2337         }
2338
2339       return NULL;
2340     }
2341
2342   /* Check that it's ok to change the order of the computation.
2343      Generally, when vectorizing a reduction we change the order of the
2344      computation.  This may change the behavior of the program in some
2345      cases, so we need to check that this is ok.  One exception is when
2346      vectorizing an outer-loop: the inner-loop is executed sequentially,
2347      and therefore vectorizing reductions in the inner-loop during
2348      outer-loop vectorization is safe.  */
2349
2350   /* CHECKME: check for !flag_finite_math_only too?  */
2351   if (SCALAR_FLOAT_TYPE_P (type) && !flag_associative_math
2352       && check_reduction)
2353     {
2354       /* Changing the order of operations changes the semantics.  */
2355       if (dump_enabled_p ())
2356         report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
2357                         "reduction: unsafe fp math optimization: ");
2358       return NULL;
2359     }
2360   else if (INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_TRAPS (type)
2361            && check_reduction)
2362     {
2363       /* Changing the order of operations changes the semantics.  */
2364       if (dump_enabled_p ())
2365         report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
2366                         "reduction: unsafe int math optimization: ");
2367       return NULL;
2368     }
2369   else if (SAT_FIXED_POINT_TYPE_P (type) && check_reduction)
2370     {
2371       /* Changing the order of operations changes the semantics.  */
2372       if (dump_enabled_p ())
2373         report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
2374                         "reduction: unsafe fixed-point math optimization: ");
2375       return NULL;
2376     }
2377
2378   /* If we detected "res -= x[i]" earlier, rewrite it into
2379      "res += -x[i]" now.  If this turns out to be useless reassoc
2380      will clean it up again.  */
2381   if (orig_code == MINUS_EXPR)
2382     {
2383       tree rhs = gimple_assign_rhs2 (def_stmt);
2384       tree negrhs = make_ssa_name (TREE_TYPE (rhs), NULL);
2385       gimple negate_stmt = gimple_build_assign_with_ops (NEGATE_EXPR, negrhs,
2386                                                          rhs, NULL);
2387       gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
2388       set_vinfo_for_stmt (negate_stmt, new_stmt_vec_info (negate_stmt,
2389                                                           loop_info, NULL));
2390       gsi_insert_before (&gsi, negate_stmt, GSI_NEW_STMT);
2391       gimple_assign_set_rhs2 (def_stmt, negrhs);
2392       gimple_assign_set_rhs_code (def_stmt, PLUS_EXPR);
2393       update_stmt (def_stmt);
2394     }
2395
2396   /* Reduction is safe. We're dealing with one of the following:
2397      1) integer arithmetic and no trapv
2398      2) floating point arithmetic, and special flags permit this optimization
2399      3) nested cycle (i.e., outer loop vectorization).  */
2400   if (TREE_CODE (op1) == SSA_NAME)
2401     def1 = SSA_NAME_DEF_STMT (op1);
2402
2403   if (TREE_CODE (op2) == SSA_NAME)
2404     def2 = SSA_NAME_DEF_STMT (op2);
2405
2406   if (code != COND_EXPR
2407       && ((!def1 || gimple_nop_p (def1)) && (!def2 || gimple_nop_p (def2))))
2408     {
2409       if (dump_enabled_p ())
2410         report_vect_op (MSG_NOTE, def_stmt, "reduction: no defs for operands: ");
2411       return NULL;
2412     }
2413
2414   /* Check that one def is the reduction def, defined by PHI,
2415      the other def is either defined in the loop ("vect_internal_def"),
2416      or it's an induction (defined by a loop-header phi-node).  */
2417
2418   if (def2 && def2 == phi
2419       && (code == COND_EXPR
2420           || !def1 || gimple_nop_p (def1)
2421           || (def1 && flow_bb_inside_loop_p (loop, gimple_bb (def1))
2422               && (is_gimple_assign (def1)
2423                   || is_gimple_call (def1)
2424                   || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def1))
2425                       == vect_induction_def
2426                   || (gimple_code (def1) == GIMPLE_PHI
2427                       && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def1))
2428                           == vect_internal_def
2429                       && !is_loop_header_bb_p (gimple_bb (def1)))))))
2430     {
2431       if (dump_enabled_p ())
2432         report_vect_op (MSG_NOTE, def_stmt, "detected reduction: ");
2433       return def_stmt;
2434     }
2435
2436   if (def1 && def1 == phi
2437       && (code == COND_EXPR
2438           || !def2 || gimple_nop_p (def2)
2439           || (def2 && flow_bb_inside_loop_p (loop, gimple_bb (def2))
2440               && (is_gimple_assign (def2)
2441                   || is_gimple_call (def2)
2442                   || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def2))
2443                       == vect_induction_def
2444                   || (gimple_code (def2) == GIMPLE_PHI
2445                       && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def2))
2446                           == vect_internal_def
2447                       && !is_loop_header_bb_p (gimple_bb (def2)))))))
2448     {
2449       if (check_reduction)
2450         {
2451           /* Swap operands (just for simplicity - so that the rest of the code
2452              can assume that the reduction variable is always the last (second)
2453              argument).  */
2454           if (dump_enabled_p ())
2455             report_vect_op (MSG_NOTE, def_stmt,
2456                             "detected reduction: need to swap operands: ");
2457
2458           swap_tree_operands (def_stmt, gimple_assign_rhs1_ptr (def_stmt),
2459                               gimple_assign_rhs2_ptr (def_stmt));
2460
2461           if (CONSTANT_CLASS_P (gimple_assign_rhs1 (def_stmt)))
2462             LOOP_VINFO_OPERANDS_SWAPPED (loop_info) = true;
2463         }
2464       else
2465         {
2466           if (dump_enabled_p ())
2467             report_vect_op (MSG_NOTE, def_stmt, "detected reduction: ");
2468         }
2469
2470       return def_stmt;
2471     }
2472
2473   /* Try to find SLP reduction chain.  */
2474   if (check_reduction && vect_is_slp_reduction (loop_info, phi, def_stmt))
2475     {
2476       if (dump_enabled_p ())
2477         report_vect_op (MSG_NOTE, def_stmt,
2478                         "reduction: detected reduction chain: ");
2479
2480       return def_stmt;
2481     }
2482
2483   if (dump_enabled_p ())
2484     report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
2485                     "reduction: unknown pattern: ");
2486
2487   return NULL;
2488 }
2489
2490 /* Wrapper around vect_is_simple_reduction_1, that won't modify code
2491    in-place.  Arguments as there.  */
2492
2493 static gimple
2494 vect_is_simple_reduction (loop_vec_info loop_info, gimple phi,
2495                           bool check_reduction, bool *double_reduc)
2496 {
2497   return vect_is_simple_reduction_1 (loop_info, phi, check_reduction,
2498                                      double_reduc, false);
2499 }
2500
2501 /* Wrapper around vect_is_simple_reduction_1, which will modify code
2502    in-place if it enables detection of more reductions.  Arguments
2503    as there.  */
2504
2505 gimple
2506 vect_force_simple_reduction (loop_vec_info loop_info, gimple phi,
2507                           bool check_reduction, bool *double_reduc)
2508 {
2509   return vect_is_simple_reduction_1 (loop_info, phi, check_reduction,
2510                                      double_reduc, true);
2511 }
2512
2513 /* Calculate the cost of one scalar iteration of the loop.  */
2514 int
2515 vect_get_single_scalar_iteration_cost (loop_vec_info loop_vinfo)
2516 {
2517   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2518   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
2519   int nbbs = loop->num_nodes, factor, scalar_single_iter_cost = 0;
2520   int innerloop_iters, i, stmt_cost;
2521
2522   /* Count statements in scalar loop.  Using this as scalar cost for a single
2523      iteration for now.
2524
2525      TODO: Add outer loop support.
2526
2527      TODO: Consider assigning different costs to different scalar
2528      statements.  */
2529
2530   /* FORNOW.  */
2531   innerloop_iters = 1;
2532   if (loop->inner)
2533     innerloop_iters = 50; /* FIXME */
2534
2535   for (i = 0; i < nbbs; i++)
2536     {
2537       gimple_stmt_iterator si;
2538       basic_block bb = bbs[i];
2539
2540       if (bb->loop_father == loop->inner)
2541         factor = innerloop_iters;
2542       else
2543         factor = 1;
2544
2545       for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
2546         {
2547           gimple stmt = gsi_stmt (si);
2548           stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2549
2550           if (!is_gimple_assign (stmt) && !is_gimple_call (stmt))
2551             continue;
2552
2553           /* Skip stmts that are not vectorized inside the loop.  */
2554           if (stmt_info
2555               && !STMT_VINFO_RELEVANT_P (stmt_info)
2556               && (!STMT_VINFO_LIVE_P (stmt_info)
2557                   || !VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info)))
2558               && !STMT_VINFO_IN_PATTERN_P (stmt_info))
2559             continue;
2560
2561           if (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt)))
2562             {
2563               if (DR_IS_READ (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))))
2564                stmt_cost = vect_get_stmt_cost (scalar_load);
2565              else
2566                stmt_cost = vect_get_stmt_cost (scalar_store);
2567             }
2568           else
2569             stmt_cost = vect_get_stmt_cost (scalar_stmt);
2570
2571           scalar_single_iter_cost += stmt_cost * factor;
2572         }
2573     }
2574   return scalar_single_iter_cost;
2575 }
2576
2577 /* Calculate cost of peeling the loop PEEL_ITERS_PROLOGUE times.  */
2578 int
2579 vect_get_known_peeling_cost (loop_vec_info loop_vinfo, int peel_iters_prologue,
2580                              int *peel_iters_epilogue,
2581                              int scalar_single_iter_cost,
2582                              stmt_vector_for_cost *prologue_cost_vec,
2583                              stmt_vector_for_cost *epilogue_cost_vec)
2584 {
2585   int retval = 0;
2586   int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2587
2588   if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
2589     {
2590       *peel_iters_epilogue = vf/2;
2591       if (dump_enabled_p ())
2592         dump_printf_loc (MSG_NOTE, vect_location,
2593                          "cost model: epilogue peel iters set to vf/2 "
2594                          "because loop iterations are unknown .");
2595
2596       /* If peeled iterations are known but number of scalar loop
2597          iterations are unknown, count a taken branch per peeled loop.  */
2598       retval = record_stmt_cost (prologue_cost_vec, 2, cond_branch_taken,
2599                                  NULL, 0, vect_prologue);
2600     }
2601   else
2602     {
2603       int niters = LOOP_VINFO_INT_NITERS (loop_vinfo);
2604       peel_iters_prologue = niters < peel_iters_prologue ?
2605                             niters : peel_iters_prologue;
2606       *peel_iters_epilogue = (niters - peel_iters_prologue) % vf;
2607       /* If we need to peel for gaps, but no peeling is required, we have to
2608          peel VF iterations.  */
2609       if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) && !*peel_iters_epilogue)
2610         *peel_iters_epilogue = vf;
2611     }
2612
2613   if (peel_iters_prologue)
2614     retval += record_stmt_cost (prologue_cost_vec,
2615                                 peel_iters_prologue * scalar_single_iter_cost,
2616                                 scalar_stmt, NULL, 0, vect_prologue);
2617   if (*peel_iters_epilogue)
2618     retval += record_stmt_cost (epilogue_cost_vec,
2619                                 *peel_iters_epilogue * scalar_single_iter_cost,
2620                                 scalar_stmt, NULL, 0, vect_epilogue);
2621   return retval;
2622 }
2623
2624 /* Function vect_estimate_min_profitable_iters
2625
2626    Return the number of iterations required for the vector version of the
2627    loop to be profitable relative to the cost of the scalar version of the
2628    loop.  */
2629
2630 static void
2631 vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo,
2632                                     int *ret_min_profitable_niters,
2633                                     int *ret_min_profitable_estimate)
2634 {
2635   int min_profitable_iters;
2636   int min_profitable_estimate;
2637   int peel_iters_prologue;
2638   int peel_iters_epilogue;
2639   unsigned vec_inside_cost = 0;
2640   int vec_outside_cost = 0;
2641   unsigned vec_prologue_cost = 0;
2642   unsigned vec_epilogue_cost = 0;
2643   int scalar_single_iter_cost = 0;
2644   int scalar_outside_cost = 0;
2645   int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2646   int npeel = LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo);
2647   void *target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
2648
2649   /* Cost model disabled.  */
2650   if (!flag_vect_cost_model)
2651     {
2652       dump_printf_loc (MSG_NOTE, vect_location, "cost model disabled.");
2653       *ret_min_profitable_niters = 0;
2654       *ret_min_profitable_estimate = 0;
2655       return;
2656     }
2657
2658   /* Requires loop versioning tests to handle misalignment.  */
2659   if (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo))
2660     {
2661       /*  FIXME: Make cost depend on complexity of individual check.  */
2662       unsigned len = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo).length ();
2663       (void) add_stmt_cost (target_cost_data, len, vector_stmt, NULL, 0,
2664                             vect_prologue);
2665       dump_printf (MSG_NOTE,
2666                    "cost model: Adding cost of checks for loop "
2667                    "versioning to treat misalignment.\n");
2668     }
2669
2670   /* Requires loop versioning with alias checks.  */
2671   if (LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo))
2672     {
2673       /*  FIXME: Make cost depend on complexity of individual check.  */
2674       unsigned len = LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo).length ();
2675       (void) add_stmt_cost (target_cost_data, len, vector_stmt, NULL, 0,
2676                             vect_prologue);
2677       dump_printf (MSG_NOTE,
2678                    "cost model: Adding cost of checks for loop "
2679                    "versioning aliasing.\n");
2680     }
2681
2682   if (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo)
2683       || LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo))
2684     (void) add_stmt_cost (target_cost_data, 1, cond_branch_taken, NULL, 0,
2685                           vect_prologue);
2686
2687   /* Count statements in scalar loop.  Using this as scalar cost for a single
2688      iteration for now.
2689
2690      TODO: Add outer loop support.
2691
2692      TODO: Consider assigning different costs to different scalar
2693      statements.  */
2694
2695   scalar_single_iter_cost = vect_get_single_scalar_iteration_cost (loop_vinfo);
2696
2697   /* Add additional cost for the peeled instructions in prologue and epilogue
2698      loop.
2699
2700      FORNOW: If we don't know the value of peel_iters for prologue or epilogue
2701      at compile-time - we assume it's vf/2 (the worst would be vf-1).
2702
2703      TODO: Build an expression that represents peel_iters for prologue and
2704      epilogue to be used in a run-time test.  */
2705
2706   if (npeel  < 0)
2707     {
2708       peel_iters_prologue = vf/2;
2709       dump_printf (MSG_NOTE, "cost model: "
2710                    "prologue peel iters set to vf/2.");
2711
2712       /* If peeling for alignment is unknown, loop bound of main loop becomes
2713          unknown.  */
2714       peel_iters_epilogue = vf/2;
2715       dump_printf (MSG_NOTE, "cost model: "
2716                    "epilogue peel iters set to vf/2 because "
2717                    "peeling for alignment is unknown.");
2718
2719       /* If peeled iterations are unknown, count a taken branch and a not taken
2720          branch per peeled loop. Even if scalar loop iterations are known,
2721          vector iterations are not known since peeled prologue iterations are
2722          not known. Hence guards remain the same.  */
2723       (void) add_stmt_cost (target_cost_data, 2, cond_branch_taken,
2724                             NULL, 0, vect_prologue);
2725       (void) add_stmt_cost (target_cost_data, 2, cond_branch_not_taken,
2726                             NULL, 0, vect_prologue);
2727       /* FORNOW: Don't attempt to pass individual scalar instructions to
2728          the model; just assume linear cost for scalar iterations.  */
2729       (void) add_stmt_cost (target_cost_data,
2730                             peel_iters_prologue * scalar_single_iter_cost,
2731                             scalar_stmt, NULL, 0, vect_prologue);
2732       (void) add_stmt_cost (target_cost_data,
2733                             peel_iters_epilogue * scalar_single_iter_cost,
2734                             scalar_stmt, NULL, 0, vect_epilogue);
2735     }
2736   else
2737     {
2738       stmt_vector_for_cost prologue_cost_vec, epilogue_cost_vec;
2739       stmt_info_for_cost *si;
2740       int j;
2741       void *data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
2742
2743       prologue_cost_vec.create (2);
2744       epilogue_cost_vec.create (2);
2745       peel_iters_prologue = npeel;
2746
2747       (void) vect_get_known_peeling_cost (loop_vinfo, peel_iters_prologue,
2748                                           &peel_iters_epilogue,
2749                                           scalar_single_iter_cost,
2750                                           &prologue_cost_vec,
2751                                           &epilogue_cost_vec);
2752
2753       FOR_EACH_VEC_ELT (prologue_cost_vec, j, si)
2754         {
2755           struct _stmt_vec_info *stmt_info
2756             = si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
2757           (void) add_stmt_cost (data, si->count, si->kind, stmt_info,
2758                                 si->misalign, vect_prologue);
2759         }
2760
2761       FOR_EACH_VEC_ELT (epilogue_cost_vec, j, si)
2762         {
2763           struct _stmt_vec_info *stmt_info
2764             = si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
2765           (void) add_stmt_cost (data, si->count, si->kind, stmt_info,
2766                                 si->misalign, vect_epilogue);
2767         }
2768
2769       prologue_cost_vec.release ();
2770       epilogue_cost_vec.release ();
2771     }
2772
2773   /* FORNOW: The scalar outside cost is incremented in one of the
2774      following ways:
2775
2776      1. The vectorizer checks for alignment and aliasing and generates
2777      a condition that allows dynamic vectorization.  A cost model
2778      check is ANDED with the versioning condition.  Hence scalar code
2779      path now has the added cost of the versioning check.
2780
2781        if (cost > th & versioning_check)
2782          jmp to vector code
2783
2784      Hence run-time scalar is incremented by not-taken branch cost.
2785
2786      2. The vectorizer then checks if a prologue is required.  If the
2787      cost model check was not done before during versioning, it has to
2788      be done before the prologue check.
2789
2790        if (cost <= th)
2791          prologue = scalar_iters
2792        if (prologue == 0)
2793          jmp to vector code
2794        else
2795          execute prologue
2796        if (prologue == num_iters)
2797          go to exit
2798
2799      Hence the run-time scalar cost is incremented by a taken branch,
2800      plus a not-taken branch, plus a taken branch cost.
2801
2802      3. The vectorizer then checks if an epilogue is required.  If the
2803      cost model check was not done before during prologue check, it
2804      has to be done with the epilogue check.
2805
2806        if (prologue == 0)
2807          jmp to vector code
2808        else
2809          execute prologue
2810        if (prologue == num_iters)
2811          go to exit
2812        vector code:
2813          if ((cost <= th) | (scalar_iters-prologue-epilogue == 0))
2814            jmp to epilogue
2815
2816      Hence the run-time scalar cost should be incremented by 2 taken
2817      branches.
2818
2819      TODO: The back end may reorder the BBS's differently and reverse
2820      conditions/branch directions.  Change the estimates below to
2821      something more reasonable.  */
2822
2823   /* If the number of iterations is known and we do not do versioning, we can
2824      decide whether to vectorize at compile time.  Hence the scalar version
2825      do not carry cost model guard costs.  */
2826   if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
2827       || LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo)
2828       || LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo))
2829     {
2830       /* Cost model check occurs at versioning.  */
2831       if (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo)
2832           || LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo))
2833         scalar_outside_cost += vect_get_stmt_cost (cond_branch_not_taken);
2834       else
2835         {
2836           /* Cost model check occurs at prologue generation.  */
2837           if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) < 0)
2838             scalar_outside_cost += 2 * vect_get_stmt_cost (cond_branch_taken)
2839               + vect_get_stmt_cost (cond_branch_not_taken);
2840           /* Cost model check occurs at epilogue generation.  */
2841           else
2842             scalar_outside_cost += 2 * vect_get_stmt_cost (cond_branch_taken);
2843         }
2844     }
2845
2846   /* Complete the target-specific cost calculations.  */
2847   finish_cost (LOOP_VINFO_TARGET_COST_DATA (loop_vinfo), &vec_prologue_cost,
2848                &vec_inside_cost, &vec_epilogue_cost);
2849
2850   vec_outside_cost = (int)(vec_prologue_cost + vec_epilogue_cost);
2851
2852   /* Calculate number of iterations required to make the vector version
2853      profitable, relative to the loop bodies only.  The following condition
2854      must hold true:
2855      SIC * niters + SOC > VIC * ((niters-PL_ITERS-EP_ITERS)/VF) + VOC
2856      where
2857      SIC = scalar iteration cost, VIC = vector iteration cost,
2858      VOC = vector outside cost, VF = vectorization factor,
2859      PL_ITERS = prologue iterations, EP_ITERS= epilogue iterations
2860      SOC = scalar outside cost for run time cost model check.  */
2861
2862   if ((scalar_single_iter_cost * vf) > (int) vec_inside_cost)
2863     {
2864       if (vec_outside_cost <= 0)
2865         min_profitable_iters = 1;
2866       else
2867         {
2868           min_profitable_iters = ((vec_outside_cost - scalar_outside_cost) * vf
2869                                   - vec_inside_cost * peel_iters_prologue
2870                                   - vec_inside_cost * peel_iters_epilogue)
2871                                  / ((scalar_single_iter_cost * vf)
2872                                     - vec_inside_cost);
2873
2874           if ((scalar_single_iter_cost * vf * min_profitable_iters)
2875               <= (((int) vec_inside_cost * min_profitable_iters)
2876                   + (((int) vec_outside_cost - scalar_outside_cost) * vf)))
2877             min_profitable_iters++;
2878         }
2879     }
2880   /* vector version will never be profitable.  */
2881   else
2882     {
2883       if (dump_enabled_p ())
2884         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2885                          "cost model: the vector iteration cost = %d "
2886                          "divided by the scalar iteration cost = %d "
2887                          "is greater or equal to the vectorization factor = %d.",
2888                          vec_inside_cost, scalar_single_iter_cost, vf);
2889       *ret_min_profitable_niters = -1;
2890       *ret_min_profitable_estimate = -1;
2891       return;
2892     }
2893
2894   if (dump_enabled_p ())
2895     {
2896       dump_printf_loc (MSG_NOTE, vect_location, "Cost model analysis: \n");
2897       dump_printf (MSG_NOTE, "  Vector inside of loop cost: %d\n",
2898                    vec_inside_cost);
2899       dump_printf (MSG_NOTE, "  Vector prologue cost: %d\n",
2900                    vec_prologue_cost);
2901       dump_printf (MSG_NOTE, "  Vector epilogue cost: %d\n",
2902                    vec_epilogue_cost);
2903       dump_printf (MSG_NOTE, "  Scalar iteration cost: %d\n",
2904                    scalar_single_iter_cost);
2905       dump_printf (MSG_NOTE, "  Scalar outside cost: %d\n",
2906                    scalar_outside_cost);
2907       dump_printf (MSG_NOTE, "  Vector outside cost: %d\n",
2908                    vec_outside_cost);
2909       dump_printf (MSG_NOTE, "  prologue iterations: %d\n",
2910                    peel_iters_prologue);
2911       dump_printf (MSG_NOTE, "  epilogue iterations: %d\n",
2912                    peel_iters_epilogue);
2913       dump_printf (MSG_NOTE,
2914                    "  Calculated minimum iters for profitability: %d\n",
2915                    min_profitable_iters);
2916     }
2917
2918   min_profitable_iters =
2919         min_profitable_iters < vf ? vf : min_profitable_iters;
2920
2921   /* Because the condition we create is:
2922      if (niters <= min_profitable_iters)
2923        then skip the vectorized loop.  */
2924   min_profitable_iters--;
2925
2926   if (dump_enabled_p ())
2927     dump_printf_loc (MSG_NOTE, vect_location,
2928                      "  Runtime profitability threshold = %d\n", min_profitable_iters);
2929
2930   *ret_min_profitable_niters = min_profitable_iters;
2931
2932   /* Calculate number of iterations required to make the vector version
2933      profitable, relative to the loop bodies only.
2934
2935      Non-vectorized variant is SIC * niters and it must win over vector
2936      variant on the expected loop trip count.  The following condition must hold true:
2937      SIC * niters > VIC * ((niters-PL_ITERS-EP_ITERS)/VF) + VOC + SOC  */
2938
2939   if (vec_outside_cost <= 0)
2940     min_profitable_estimate = 1;
2941   else
2942     {
2943       min_profitable_estimate = ((vec_outside_cost + scalar_outside_cost) * vf
2944                                  - vec_inside_cost * peel_iters_prologue
2945                                  - vec_inside_cost * peel_iters_epilogue)
2946                                  / ((scalar_single_iter_cost * vf)
2947                                    - vec_inside_cost);
2948     }
2949   min_profitable_estimate --;
2950   min_profitable_estimate = MAX (min_profitable_estimate, min_profitable_iters);
2951   if (dump_enabled_p ())
2952     dump_printf_loc (MSG_NOTE, vect_location,
2953                      "  Static estimate profitability threshold = %d\n",
2954                       min_profitable_iters);
2955
2956   *ret_min_profitable_estimate = min_profitable_estimate;
2957 }
2958
2959
2960 /* TODO: Close dependency between vect_model_*_cost and vectorizable_*
2961    functions. Design better to avoid maintenance issues.  */
2962
2963 /* Function vect_model_reduction_cost.
2964
2965    Models cost for a reduction operation, including the vector ops
2966    generated within the strip-mine loop, the initial definition before
2967    the loop, and the epilogue code that must be generated.  */
2968
2969 static bool
2970 vect_model_reduction_cost (stmt_vec_info stmt_info, enum tree_code reduc_code,
2971                            int ncopies)
2972 {
2973   int prologue_cost = 0, epilogue_cost = 0;
2974   enum tree_code code;
2975   optab optab;
2976   tree vectype;
2977   gimple stmt, orig_stmt;
2978   tree reduction_op;
2979   enum machine_mode mode;
2980   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2981   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2982   void *target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
2983
2984   /* Cost of reduction op inside loop.  */
2985   unsigned inside_cost = add_stmt_cost (target_cost_data, ncopies, vector_stmt,
2986                                         stmt_info, 0, vect_body);
2987   stmt = STMT_VINFO_STMT (stmt_info);
2988
2989   switch (get_gimple_rhs_class (gimple_assign_rhs_code (stmt)))
2990     {
2991     case GIMPLE_SINGLE_RHS:
2992       gcc_assert (TREE_OPERAND_LENGTH (gimple_assign_rhs1 (stmt)) == ternary_op);
2993       reduction_op = TREE_OPERAND (gimple_assign_rhs1 (stmt), 2);
2994       break;
2995     case GIMPLE_UNARY_RHS:
2996       reduction_op = gimple_assign_rhs1 (stmt);
2997       break;
2998     case GIMPLE_BINARY_RHS:
2999       reduction_op = gimple_assign_rhs2 (stmt);
3000       break;
3001     case GIMPLE_TERNARY_RHS:
3002       reduction_op = gimple_assign_rhs3 (stmt);
3003       break;
3004     default:
3005       gcc_unreachable ();
3006     }
3007
3008   vectype = get_vectype_for_scalar_type (TREE_TYPE (reduction_op));
3009   if (!vectype)
3010     {
3011       if (dump_enabled_p ())
3012         {
3013           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3014                            "unsupported data-type ");
3015           dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
3016                              TREE_TYPE (reduction_op));
3017         }
3018       return false;
3019    }
3020
3021   mode = TYPE_MODE (vectype);
3022   orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
3023
3024   if (!orig_stmt)
3025     orig_stmt = STMT_VINFO_STMT (stmt_info);
3026
3027   code = gimple_assign_rhs_code (orig_stmt);
3028
3029   /* Add in cost for initial definition.  */
3030   prologue_cost += add_stmt_cost (target_cost_data, 1, scalar_to_vec,
3031                                   stmt_info, 0, vect_prologue);
3032
3033   /* Determine cost of epilogue code.
3034
3035      We have a reduction operator that will reduce the vector in one statement.
3036      Also requires scalar extract.  */
3037
3038   if (!nested_in_vect_loop_p (loop, orig_stmt))
3039     {
3040       if (reduc_code != ERROR_MARK)
3041         {
3042           epilogue_cost += add_stmt_cost (target_cost_data, 1, vector_stmt,
3043                                           stmt_info, 0, vect_epilogue);
3044           epilogue_cost += add_stmt_cost (target_cost_data, 1, vec_to_scalar,
3045                                           stmt_info, 0, vect_epilogue);
3046         }
3047       else
3048         {
3049           int vec_size_in_bits = tree_low_cst (TYPE_SIZE (vectype), 1);
3050           tree bitsize =
3051             TYPE_SIZE (TREE_TYPE (gimple_assign_lhs (orig_stmt)));
3052           int element_bitsize = tree_low_cst (bitsize, 1);
3053           int nelements = vec_size_in_bits / element_bitsize;
3054
3055           optab = optab_for_tree_code (code, vectype, optab_default);
3056
3057           /* We have a whole vector shift available.  */
3058           if (VECTOR_MODE_P (mode)
3059               && optab_handler (optab, mode) != CODE_FOR_nothing
3060               && optab_handler (vec_shr_optab, mode) != CODE_FOR_nothing)
3061             {
3062               /* Final reduction via vector shifts and the reduction operator.
3063                  Also requires scalar extract.  */
3064               epilogue_cost += add_stmt_cost (target_cost_data,
3065                                               exact_log2 (nelements) * 2,
3066                                               vector_stmt, stmt_info, 0,
3067                                               vect_epilogue);
3068               epilogue_cost += add_stmt_cost (target_cost_data, 1,
3069                                               vec_to_scalar, stmt_info, 0,
3070                                               vect_epilogue);
3071             }
3072           else
3073             /* Use extracts and reduction op for final reduction.  For N
3074                elements, we have N extracts and N-1 reduction ops.  */
3075             epilogue_cost += add_stmt_cost (target_cost_data,
3076                                             nelements + nelements - 1,
3077                                             vector_stmt, stmt_info, 0,
3078                                             vect_epilogue);
3079         }
3080     }
3081
3082   if (dump_enabled_p ())
3083     dump_printf (MSG_NOTE,
3084                  "vect_model_reduction_cost: inside_cost = %d, "
3085                  "prologue_cost = %d, epilogue_cost = %d .", inside_cost,
3086                  prologue_cost, epilogue_cost);
3087
3088   return true;
3089 }
3090
3091
3092 /* Function vect_model_induction_cost.
3093
3094    Models cost for induction operations.  */
3095
3096 static void
3097 vect_model_induction_cost (stmt_vec_info stmt_info, int ncopies)
3098 {
3099   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3100   void *target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
3101   unsigned inside_cost, prologue_cost;
3102
3103   /* loop cost for vec_loop.  */
3104   inside_cost = add_stmt_cost (target_cost_data, ncopies, vector_stmt,
3105                                stmt_info, 0, vect_body);
3106
3107   /* prologue cost for vec_init and vec_step.  */
3108   prologue_cost = add_stmt_cost (target_cost_data, 2, scalar_to_vec,
3109                                  stmt_info, 0, vect_prologue);
3110
3111   if (dump_enabled_p ())
3112     dump_printf_loc (MSG_NOTE, vect_location,
3113                      "vect_model_induction_cost: inside_cost = %d, "
3114                      "prologue_cost = %d .", inside_cost, prologue_cost);
3115 }
3116
3117
3118 /* Function get_initial_def_for_induction
3119
3120    Input:
3121    STMT - a stmt that performs an induction operation in the loop.
3122    IV_PHI - the initial value of the induction variable
3123
3124    Output:
3125    Return a vector variable, initialized with the first VF values of
3126    the induction variable.  E.g., for an iv with IV_PHI='X' and
3127    evolution S, for a vector of 4 units, we want to return:
3128    [X, X + S, X + 2*S, X + 3*S].  */
3129
3130 static tree
3131 get_initial_def_for_induction (gimple iv_phi)
3132 {
3133   stmt_vec_info stmt_vinfo = vinfo_for_stmt (iv_phi);
3134   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
3135   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3136   tree scalar_type;
3137   tree vectype;
3138   int nunits;
3139   edge pe = loop_preheader_edge (loop);
3140   struct loop *iv_loop;
3141   basic_block new_bb;
3142   tree new_vec, vec_init, vec_step, t;
3143   tree access_fn;
3144   tree new_var;
3145   tree new_name;
3146   gimple init_stmt, induction_phi, new_stmt;
3147   tree induc_def, vec_def, vec_dest;
3148   tree init_expr, step_expr;
3149   int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3150   int i;
3151   bool ok;
3152   int ncopies;
3153   tree expr;
3154   stmt_vec_info phi_info = vinfo_for_stmt (iv_phi);
3155   bool nested_in_vect_loop = false;
3156   gimple_seq stmts = NULL;
3157   imm_use_iterator imm_iter;
3158   use_operand_p use_p;
3159   gimple exit_phi;
3160   edge latch_e;
3161   tree loop_arg;
3162   gimple_stmt_iterator si;
3163   basic_block bb = gimple_bb (iv_phi);
3164   tree stepvectype;
3165   tree resvectype;
3166
3167   /* Is phi in an inner-loop, while vectorizing an enclosing outer-loop?  */
3168   if (nested_in_vect_loop_p (loop, iv_phi))
3169     {
3170       nested_in_vect_loop = true;
3171       iv_loop = loop->inner;
3172     }
3173   else
3174     iv_loop = loop;
3175   gcc_assert (iv_loop == (gimple_bb (iv_phi))->loop_father);
3176
3177   latch_e = loop_latch_edge (iv_loop);
3178   loop_arg = PHI_ARG_DEF_FROM_EDGE (iv_phi, latch_e);
3179
3180   access_fn = analyze_scalar_evolution (iv_loop, PHI_RESULT (iv_phi));
3181   gcc_assert (access_fn);
3182   STRIP_NOPS (access_fn);
3183   ok = vect_is_simple_iv_evolution (iv_loop->num, access_fn,
3184                                     &init_expr, &step_expr);
3185   gcc_assert (ok);
3186   pe = loop_preheader_edge (iv_loop);
3187
3188   scalar_type = TREE_TYPE (init_expr);
3189   vectype = get_vectype_for_scalar_type (scalar_type);
3190   resvectype = get_vectype_for_scalar_type (TREE_TYPE (PHI_RESULT (iv_phi)));
3191   gcc_assert (vectype);
3192   nunits = TYPE_VECTOR_SUBPARTS (vectype);
3193   ncopies = vf / nunits;
3194
3195   gcc_assert (phi_info);
3196   gcc_assert (ncopies >= 1);
3197
3198   /* Find the first insertion point in the BB.  */
3199   si = gsi_after_labels (bb);
3200
3201   /* Create the vector that holds the initial_value of the induction.  */
3202   if (nested_in_vect_loop)
3203     {
3204       /* iv_loop is nested in the loop to be vectorized.  init_expr had already
3205          been created during vectorization of previous stmts.  We obtain it
3206          from the STMT_VINFO_VEC_STMT of the defining stmt.  */
3207       tree iv_def = PHI_ARG_DEF_FROM_EDGE (iv_phi,
3208                                            loop_preheader_edge (iv_loop));
3209       vec_init = vect_get_vec_def_for_operand (iv_def, iv_phi, NULL);
3210       /* If the initial value is not of proper type, convert it.  */
3211       if (!useless_type_conversion_p (vectype, TREE_TYPE (vec_init)))
3212         {
3213           new_stmt = gimple_build_assign_with_ops
3214               (VIEW_CONVERT_EXPR,
3215                vect_get_new_vect_var (vectype, vect_simple_var, "vec_iv_"),
3216                build1 (VIEW_CONVERT_EXPR, vectype, vec_init), NULL_TREE);
3217           vec_init = make_ssa_name (gimple_assign_lhs (new_stmt), new_stmt);
3218           gimple_assign_set_lhs (new_stmt, vec_init);
3219           new_bb = gsi_insert_on_edge_immediate (loop_preheader_edge (iv_loop),
3220                                                  new_stmt);
3221           gcc_assert (!new_bb);
3222           set_vinfo_for_stmt (new_stmt,
3223                               new_stmt_vec_info (new_stmt, loop_vinfo, NULL));
3224         }
3225     }
3226   else
3227     {
3228       vec<constructor_elt, va_gc> *v;
3229
3230       /* iv_loop is the loop to be vectorized. Create:
3231          vec_init = [X, X+S, X+2*S, X+3*S] (S = step_expr, X = init_expr)  */
3232       new_var = vect_get_new_vect_var (scalar_type, vect_scalar_var, "var_");
3233       new_name = force_gimple_operand (init_expr, &stmts, false, new_var);
3234       if (stmts)
3235         {
3236           new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
3237           gcc_assert (!new_bb);
3238         }
3239
3240       vec_alloc (v, nunits);
3241       bool constant_p = is_gimple_min_invariant (new_name);
3242       CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, new_name);
3243       for (i = 1; i < nunits; i++)
3244         {
3245           /* Create: new_name_i = new_name + step_expr  */
3246           enum tree_code code = POINTER_TYPE_P (scalar_type)
3247                                 ? POINTER_PLUS_EXPR : PLUS_EXPR;
3248           new_name = fold_build2 (code, scalar_type, new_name, step_expr);
3249           if (!is_gimple_min_invariant (new_name))
3250             {
3251               init_stmt = gimple_build_assign (new_var, new_name);
3252               new_name = make_ssa_name (new_var, init_stmt);
3253               gimple_assign_set_lhs (init_stmt, new_name);
3254               new_bb = gsi_insert_on_edge_immediate (pe, init_stmt);
3255               gcc_assert (!new_bb);
3256               if (dump_enabled_p ())
3257                 {
3258                   dump_printf_loc (MSG_NOTE, vect_location,
3259                                    "created new init_stmt: ");
3260                   dump_gimple_stmt (MSG_NOTE, TDF_SLIM, init_stmt, 0);
3261                 }
3262               constant_p = false;
3263             }
3264           CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, new_name);
3265         }
3266       /* Create a vector from [new_name_0, new_name_1, ..., new_name_nunits-1]  */
3267       if (constant_p)
3268         new_vec = build_vector_from_ctor (vectype, v);
3269       else
3270         new_vec = build_constructor (vectype, v);
3271       vec_init = vect_init_vector (iv_phi, new_vec, vectype, NULL);
3272     }
3273
3274
3275   /* Create the vector that holds the step of the induction.  */
3276   if (nested_in_vect_loop)
3277     /* iv_loop is nested in the loop to be vectorized. Generate:
3278        vec_step = [S, S, S, S]  */
3279     new_name = step_expr;
3280   else
3281     {
3282       /* iv_loop is the loop to be vectorized. Generate:
3283           vec_step = [VF*S, VF*S, VF*S, VF*S]  */
3284       if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (step_expr)))
3285         {
3286           expr = build_int_cst (integer_type_node, vf);
3287           expr = fold_convert (TREE_TYPE (step_expr), expr);
3288         }
3289       else
3290         expr = build_int_cst (TREE_TYPE (step_expr), vf);
3291       new_name = fold_build2 (MULT_EXPR, TREE_TYPE (step_expr),
3292                               expr, step_expr);
3293       if (TREE_CODE (step_expr) == SSA_NAME)
3294         new_name = vect_init_vector (iv_phi, new_name,
3295                                      TREE_TYPE (step_expr), NULL);
3296     }
3297
3298   t = unshare_expr (new_name);
3299   gcc_assert (CONSTANT_CLASS_P (new_name)
3300               || TREE_CODE (new_name) == SSA_NAME);
3301   stepvectype = get_vectype_for_scalar_type (TREE_TYPE (new_name));
3302   gcc_assert (stepvectype);
3303   new_vec = build_vector_from_val (stepvectype, t);
3304   vec_step = vect_init_vector (iv_phi, new_vec, stepvectype, NULL);
3305
3306
3307   /* Create the following def-use cycle:
3308      loop prolog:
3309          vec_init = ...
3310          vec_step = ...
3311      loop:
3312          vec_iv = PHI <vec_init, vec_loop>
3313          ...
3314          STMT
3315          ...
3316          vec_loop = vec_iv + vec_step;  */
3317
3318   /* Create the induction-phi that defines the induction-operand.  */
3319   vec_dest = vect_get_new_vect_var (vectype, vect_simple_var, "vec_iv_");
3320   induction_phi = create_phi_node (vec_dest, iv_loop->header);
3321   set_vinfo_for_stmt (induction_phi,
3322                       new_stmt_vec_info (induction_phi, loop_vinfo, NULL));
3323   induc_def = PHI_RESULT (induction_phi);
3324
3325   /* Create the iv update inside the loop  */
3326   new_stmt = gimple_build_assign_with_ops (PLUS_EXPR, vec_dest,
3327                                            induc_def, vec_step);
3328   vec_def = make_ssa_name (vec_dest, new_stmt);
3329   gimple_assign_set_lhs (new_stmt, vec_def);
3330   gsi_insert_before (&si, new_stmt, GSI_SAME_STMT);
3331   set_vinfo_for_stmt (new_stmt, new_stmt_vec_info (new_stmt, loop_vinfo,
3332                                                    NULL));
3333
3334   /* Set the arguments of the phi node:  */
3335   add_phi_arg (induction_phi, vec_init, pe, UNKNOWN_LOCATION);
3336   add_phi_arg (induction_phi, vec_def, loop_latch_edge (iv_loop),
3337                UNKNOWN_LOCATION);
3338
3339
3340   /* In case that vectorization factor (VF) is bigger than the number
3341      of elements that we can fit in a vectype (nunits), we have to generate
3342      more than one vector stmt - i.e - we need to "unroll" the
3343      vector stmt by a factor VF/nunits.  For more details see documentation
3344      in vectorizable_operation.  */
3345
3346   if (ncopies > 1)
3347     {
3348       stmt_vec_info prev_stmt_vinfo;
3349       /* FORNOW. This restriction should be relaxed.  */
3350       gcc_assert (!nested_in_vect_loop);
3351
3352       /* Create the vector that holds the step of the induction.  */
3353       if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (step_expr)))
3354         {
3355           expr = build_int_cst (integer_type_node, nunits);
3356           expr = fold_convert (TREE_TYPE (step_expr), expr);
3357         }
3358       else
3359         expr = build_int_cst (TREE_TYPE (step_expr), nunits);
3360       new_name = fold_build2 (MULT_EXPR, TREE_TYPE (step_expr),
3361                               expr, step_expr);
3362       if (TREE_CODE (step_expr) == SSA_NAME)
3363         new_name = vect_init_vector (iv_phi, new_name,
3364                                      TREE_TYPE (step_expr), NULL);
3365       t = unshare_expr (new_name);
3366       gcc_assert (CONSTANT_CLASS_P (new_name)
3367                   || TREE_CODE (new_name) == SSA_NAME);
3368       new_vec = build_vector_from_val (stepvectype, t);
3369       vec_step = vect_init_vector (iv_phi, new_vec, stepvectype, NULL);
3370
3371       vec_def = induc_def;
3372       prev_stmt_vinfo = vinfo_for_stmt (induction_phi);
3373       for (i = 1; i < ncopies; i++)
3374         {
3375           /* vec_i = vec_prev + vec_step  */
3376           new_stmt = gimple_build_assign_with_ops (PLUS_EXPR, vec_dest,
3377                                                    vec_def, vec_step);
3378           vec_def = make_ssa_name (vec_dest, new_stmt);
3379           gimple_assign_set_lhs (new_stmt, vec_def);
3380
3381           gsi_insert_before (&si, new_stmt, GSI_SAME_STMT);
3382           if (!useless_type_conversion_p (resvectype, vectype))
3383             {
3384               new_stmt = gimple_build_assign_with_ops
3385                   (VIEW_CONVERT_EXPR,
3386                    vect_get_new_vect_var (resvectype, vect_simple_var,
3387                                           "vec_iv_"),
3388                    build1 (VIEW_CONVERT_EXPR, resvectype,
3389                            gimple_assign_lhs (new_stmt)), NULL_TREE);
3390               gimple_assign_set_lhs (new_stmt,
3391                                      make_ssa_name
3392                                        (gimple_assign_lhs (new_stmt), new_stmt));
3393               gsi_insert_before (&si, new_stmt, GSI_SAME_STMT);
3394             }
3395           set_vinfo_for_stmt (new_stmt,
3396                               new_stmt_vec_info (new_stmt, loop_vinfo, NULL));
3397           STMT_VINFO_RELATED_STMT (prev_stmt_vinfo) = new_stmt;
3398           prev_stmt_vinfo = vinfo_for_stmt (new_stmt);
3399         }
3400     }
3401
3402   if (nested_in_vect_loop)
3403     {
3404       /* Find the loop-closed exit-phi of the induction, and record
3405          the final vector of induction results:  */
3406       exit_phi = NULL;
3407       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, loop_arg)
3408         {
3409           if (!flow_bb_inside_loop_p (iv_loop, gimple_bb (USE_STMT (use_p))))
3410             {
3411               exit_phi = USE_STMT (use_p);
3412               break;
3413             }
3414         }
3415       if (exit_phi)
3416         {
3417           stmt_vec_info stmt_vinfo = vinfo_for_stmt (exit_phi);
3418           /* FORNOW. Currently not supporting the case that an inner-loop induction
3419              is not used in the outer-loop (i.e. only outside the outer-loop).  */
3420           gcc_assert (STMT_VINFO_RELEVANT_P (stmt_vinfo)
3421                       && !STMT_VINFO_LIVE_P (stmt_vinfo));
3422
3423           STMT_VINFO_VEC_STMT (stmt_vinfo) = new_stmt;
3424           if (dump_enabled_p ())
3425             {
3426               dump_printf_loc (MSG_NOTE, vect_location,
3427                                "vector of inductions after inner-loop:");
3428               dump_gimple_stmt (MSG_NOTE, TDF_SLIM, new_stmt, 0);
3429             }
3430         }
3431     }
3432
3433
3434   if (dump_enabled_p ())
3435     {
3436       dump_printf_loc (MSG_NOTE, vect_location,
3437                        "transform induction: created def-use cycle: ");
3438       dump_gimple_stmt (MSG_NOTE, TDF_SLIM, induction_phi, 0);
3439       dump_printf (MSG_NOTE, "\n");
3440       dump_gimple_stmt (MSG_NOTE, TDF_SLIM,
3441                         SSA_NAME_DEF_STMT (vec_def), 0);
3442     }
3443
3444   STMT_VINFO_VEC_STMT (phi_info) = induction_phi;
3445   if (!useless_type_conversion_p (resvectype, vectype))
3446     {
3447       new_stmt = gimple_build_assign_with_ops
3448          (VIEW_CONVERT_EXPR,
3449           vect_get_new_vect_var (resvectype, vect_simple_var, "vec_iv_"),
3450           build1 (VIEW_CONVERT_EXPR, resvectype, induc_def), NULL_TREE);
3451       induc_def = make_ssa_name (gimple_assign_lhs (new_stmt), new_stmt);
3452       gimple_assign_set_lhs (new_stmt, induc_def);
3453       si = gsi_after_labels (bb);
3454       gsi_insert_before (&si, new_stmt, GSI_SAME_STMT);
3455       set_vinfo_for_stmt (new_stmt,
3456                           new_stmt_vec_info (new_stmt, loop_vinfo, NULL));
3457       STMT_VINFO_RELATED_STMT (vinfo_for_stmt (new_stmt))
3458         = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (induction_phi));
3459     }
3460
3461   return induc_def;
3462 }
3463
3464
3465 /* Function get_initial_def_for_reduction
3466
3467    Input:
3468    STMT - a stmt that performs a reduction operation in the loop.
3469    INIT_VAL - the initial value of the reduction variable
3470
3471    Output:
3472    ADJUSTMENT_DEF - a tree that holds a value to be added to the final result
3473         of the reduction (used for adjusting the epilog - see below).
3474    Return a vector variable, initialized according to the operation that STMT
3475         performs. This vector will be used as the initial value of the
3476         vector of partial results.
3477
3478    Option1 (adjust in epilog): Initialize the vector as follows:
3479      add/bit or/xor:    [0,0,...,0,0]
3480      mult/bit and:      [1,1,...,1,1]
3481      min/max/cond_expr: [init_val,init_val,..,init_val,init_val]
3482    and when necessary (e.g. add/mult case) let the caller know
3483    that it needs to adjust the result by init_val.
3484
3485    Option2: Initialize the vector as follows:
3486      add/bit or/xor:    [init_val,0,0,...,0]
3487      mult/bit and:      [init_val,1,1,...,1]
3488      min/max/cond_expr: [init_val,init_val,...,init_val]
3489    and no adjustments are needed.
3490
3491    For example, for the following code:
3492
3493    s = init_val;
3494    for (i=0;i<n;i++)
3495      s = s + a[i];
3496
3497    STMT is 's = s + a[i]', and the reduction variable is 's'.
3498    For a vector of 4 units, we want to return either [0,0,0,init_val],
3499    or [0,0,0,0] and let the caller know that it needs to adjust
3500    the result at the end by 'init_val'.
3501
3502    FORNOW, we are using the 'adjust in epilog' scheme, because this way the
3503    initialization vector is simpler (same element in all entries), if
3504    ADJUSTMENT_DEF is not NULL, and Option2 otherwise.
3505
3506    A cost model should help decide between these two schemes.  */
3507
3508 tree
3509 get_initial_def_for_reduction (gimple stmt, tree init_val,
3510                                tree *adjustment_def)
3511 {
3512   stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
3513   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
3514   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3515   tree scalar_type = TREE_TYPE (init_val);
3516   tree vectype = get_vectype_for_scalar_type (scalar_type);
3517   int nunits;
3518   enum tree_code code = gimple_assign_rhs_code (stmt);
3519   tree def_for_init;
3520   tree init_def;
3521   tree *elts;
3522   int i;
3523   bool nested_in_vect_loop = false;
3524   tree init_value;
3525   REAL_VALUE_TYPE real_init_val = dconst0;
3526   int int_init_val = 0;
3527   gimple def_stmt = NULL;
3528
3529   gcc_assert (vectype);
3530   nunits = TYPE_VECTOR_SUBPARTS (vectype);
3531
3532   gcc_assert (POINTER_TYPE_P (scalar_type) || INTEGRAL_TYPE_P (scalar_type)
3533               || SCALAR_FLOAT_TYPE_P (scalar_type));
3534
3535   if (nested_in_vect_loop_p (loop, stmt))
3536     nested_in_vect_loop = true;
3537   else
3538     gcc_assert (loop == (gimple_bb (stmt))->loop_father);
3539
3540   /* In case of double reduction we only create a vector variable to be put
3541      in the reduction phi node.  The actual statement creation is done in
3542      vect_create_epilog_for_reduction.  */
3543   if (adjustment_def && nested_in_vect_loop
3544       && TREE_CODE (init_val) == SSA_NAME
3545       && (def_stmt = SSA_NAME_DEF_STMT (init_val))
3546       && gimple_code (def_stmt) == GIMPLE_PHI
3547       && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
3548       && vinfo_for_stmt (def_stmt)
3549       && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt))
3550           == vect_double_reduction_def)
3551     {
3552       *adjustment_def = NULL;
3553       return vect_create_destination_var (init_val, vectype);
3554     }
3555
3556   if (TREE_CONSTANT (init_val))
3557     {
3558       if (SCALAR_FLOAT_TYPE_P (scalar_type))
3559         init_value = build_real (scalar_type, TREE_REAL_CST (init_val));
3560       else
3561         init_value = build_int_cst (scalar_type, TREE_INT_CST_LOW (init_val));
3562     }
3563   else
3564     init_value = init_val;
3565
3566   switch (code)
3567     {
3568       case WIDEN_SUM_EXPR:
3569       case DOT_PROD_EXPR:
3570       case PLUS_EXPR:
3571       case MINUS_EXPR:
3572       case BIT_IOR_EXPR:
3573       case BIT_XOR_EXPR:
3574       case MULT_EXPR:
3575       case BIT_AND_EXPR:
3576         /* ADJUSMENT_DEF is NULL when called from
3577            vect_create_epilog_for_reduction to vectorize double reduction.  */
3578         if (adjustment_def)
3579           {
3580             if (nested_in_vect_loop)
3581               *adjustment_def = vect_get_vec_def_for_operand (init_val, stmt,
3582                                                               NULL);
3583             else
3584               *adjustment_def = init_val;
3585           }
3586
3587         if (code == MULT_EXPR)
3588           {
3589             real_init_val = dconst1;
3590             int_init_val = 1;
3591           }
3592
3593         if (code == BIT_AND_EXPR)
3594           int_init_val = -1;
3595
3596         if (SCALAR_FLOAT_TYPE_P (scalar_type))
3597           def_for_init = build_real (scalar_type, real_init_val);
3598         else
3599           def_for_init = build_int_cst (scalar_type, int_init_val);
3600
3601         /* Create a vector of '0' or '1' except the first element.  */
3602         elts = XALLOCAVEC (tree, nunits);
3603         for (i = nunits - 2; i >= 0; --i)
3604           elts[i + 1] = def_for_init;
3605
3606         /* Option1: the first element is '0' or '1' as well.  */
3607         if (adjustment_def)
3608           {
3609             elts[0] = def_for_init;
3610             init_def = build_vector (vectype, elts);
3611             break;
3612           }
3613
3614         /* Option2: the first element is INIT_VAL.  */
3615         elts[0] = init_val;
3616         if (TREE_CONSTANT (init_val))
3617           init_def = build_vector (vectype, elts);
3618         else
3619           {
3620             vec<constructor_elt, va_gc> *v;
3621             vec_alloc (v, nunits);
3622             CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, init_val);
3623             for (i = 1; i < nunits; ++i)
3624               CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, elts[i]);
3625             init_def = build_constructor (vectype, v);
3626           }
3627
3628         break;
3629
3630       case MIN_EXPR:
3631       case MAX_EXPR:
3632       case COND_EXPR:
3633         if (adjustment_def)
3634           {
3635             *adjustment_def = NULL_TREE;
3636             init_def = vect_get_vec_def_for_operand (init_val, stmt, NULL);
3637             break;
3638           }
3639
3640         init_def = build_vector_from_val (vectype, init_value);
3641         break;
3642
3643       default:
3644         gcc_unreachable ();
3645     }
3646
3647   return init_def;
3648 }
3649
3650
3651 /* Function vect_create_epilog_for_reduction
3652
3653    Create code at the loop-epilog to finalize the result of a reduction
3654    computation.
3655
3656    VECT_DEFS is list of vector of partial results, i.e., the lhs's of vector
3657      reduction statements.
3658    STMT is the scalar reduction stmt that is being vectorized.
3659    NCOPIES is > 1 in case the vectorization factor (VF) is bigger than the
3660      number of elements that we can fit in a vectype (nunits).  In this case
3661      we have to generate more than one vector stmt - i.e - we need to "unroll"
3662      the vector stmt by a factor VF/nunits.  For more details see documentation
3663      in vectorizable_operation.
3664    REDUC_CODE is the tree-code for the epilog reduction.
3665    REDUCTION_PHIS is a list of the phi-nodes that carry the reduction
3666      computation.
3667    REDUC_INDEX is the index of the operand in the right hand side of the
3668      statement that is defined by REDUCTION_PHI.
3669    DOUBLE_REDUC is TRUE if double reduction phi nodes should be handled.
3670    SLP_NODE is an SLP node containing a group of reduction statements. The
3671      first one in this group is STMT.
3672
3673    This function:
3674    1. Creates the reduction def-use cycles: sets the arguments for
3675       REDUCTION_PHIS:
3676       The loop-entry argument is the vectorized initial-value of the reduction.
3677       The loop-latch argument is taken from VECT_DEFS - the vector of partial
3678       sums.
3679    2. "Reduces" each vector of partial results VECT_DEFS into a single result,
3680       by applying the operation specified by REDUC_CODE if available, or by
3681       other means (whole-vector shifts or a scalar loop).
3682       The function also creates a new phi node at the loop exit to preserve
3683       loop-closed form, as illustrated below.
3684
3685      The flow at the entry to this function:
3686
3687         loop:
3688           vec_def = phi <null, null>            # REDUCTION_PHI
3689           VECT_DEF = vector_stmt                # vectorized form of STMT
3690           s_loop = scalar_stmt                  # (scalar) STMT
3691         loop_exit:
3692           s_out0 = phi <s_loop>                 # (scalar) EXIT_PHI
3693           use <s_out0>
3694           use <s_out0>
3695
3696      The above is transformed by this function into:
3697
3698         loop:
3699           vec_def = phi <vec_init, VECT_DEF>    # REDUCTION_PHI
3700           VECT_DEF = vector_stmt                # vectorized form of STMT
3701           s_loop = scalar_stmt                  # (scalar) STMT
3702         loop_exit:
3703           s_out0 = phi <s_loop>                 # (scalar) EXIT_PHI
3704           v_out1 = phi <VECT_DEF>               # NEW_EXIT_PHI
3705           v_out2 = reduce <v_out1>
3706           s_out3 = extract_field <v_out2, 0>
3707           s_out4 = adjust_result <s_out3>
3708           use <s_out4>
3709           use <s_out4>
3710 */
3711
3712 static void
3713 vect_create_epilog_for_reduction (vec<tree> vect_defs, gimple stmt,
3714                                   int ncopies, enum tree_code reduc_code,
3715                                   vec<gimple> reduction_phis,
3716                                   int reduc_index, bool double_reduc,
3717                                   slp_tree slp_node)
3718 {
3719   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3720   stmt_vec_info prev_phi_info;
3721   tree vectype;
3722   enum machine_mode mode;
3723   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3724   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo), *outer_loop = NULL;
3725   basic_block exit_bb;
3726   tree scalar_dest;
3727   tree scalar_type;
3728   gimple new_phi = NULL, phi;
3729   gimple_stmt_iterator exit_gsi;
3730   tree vec_dest;
3731   tree new_temp = NULL_TREE, new_dest, new_name, new_scalar_dest;
3732   gimple epilog_stmt = NULL;
3733   enum tree_code code = gimple_assign_rhs_code (stmt);
3734   gimple exit_phi;
3735   tree bitsize, bitpos;
3736   tree adjustment_def = NULL;
3737   tree vec_initial_def = NULL;
3738   tree reduction_op, expr, def;
3739   tree orig_name, scalar_result;
3740   imm_use_iterator imm_iter, phi_imm_iter;
3741   use_operand_p use_p, phi_use_p;
3742   bool extract_scalar_result = false;
3743   gimple use_stmt, orig_stmt, reduction_phi = NULL;
3744   bool nested_in_vect_loop = false;
3745   vec<gimple> new_phis = vNULL;
3746   vec<gimple> inner_phis = vNULL;
3747   enum vect_def_type dt = vect_unknown_def_type;
3748   int j, i;
3749   vec<tree> scalar_results = vNULL;
3750   unsigned int group_size = 1, k, ratio;
3751   vec<tree> vec_initial_defs = vNULL;
3752   vec<gimple> phis;
3753   bool slp_reduc = false;
3754   tree new_phi_result;
3755   gimple inner_phi = NULL;
3756
3757   if (slp_node)
3758     group_size = SLP_TREE_SCALAR_STMTS (slp_node).length ();
3759
3760   if (nested_in_vect_loop_p (loop, stmt))
3761     {
3762       outer_loop = loop;
3763       loop = loop->inner;
3764       nested_in_vect_loop = true;
3765       gcc_assert (!slp_node);
3766     }
3767
3768   switch (get_gimple_rhs_class (gimple_assign_rhs_code (stmt)))
3769     {
3770     case GIMPLE_SINGLE_RHS:
3771       gcc_assert (TREE_OPERAND_LENGTH (gimple_assign_rhs1 (stmt))
3772                   == ternary_op);
3773       reduction_op = TREE_OPERAND (gimple_assign_rhs1 (stmt), reduc_index);
3774       break;
3775     case GIMPLE_UNARY_RHS:
3776       reduction_op = gimple_assign_rhs1 (stmt);
3777       break;
3778     case GIMPLE_BINARY_RHS:
3779       reduction_op = reduc_index ?
3780                      gimple_assign_rhs2 (stmt) : gimple_assign_rhs1 (stmt);
3781       break;
3782     case GIMPLE_TERNARY_RHS:
3783       reduction_op = gimple_op (stmt, reduc_index + 1);
3784       break;
3785     default:
3786       gcc_unreachable ();
3787     }
3788
3789   vectype = get_vectype_for_scalar_type (TREE_TYPE (reduction_op));
3790   gcc_assert (vectype);
3791   mode = TYPE_MODE (vectype);
3792
3793   /* 1. Create the reduction def-use cycle:
3794      Set the arguments of REDUCTION_PHIS, i.e., transform
3795
3796         loop:
3797           vec_def = phi <null, null>            # REDUCTION_PHI
3798           VECT_DEF = vector_stmt                # vectorized form of STMT
3799           ...
3800
3801      into:
3802
3803         loop:
3804           vec_def = phi <vec_init, VECT_DEF>    # REDUCTION_PHI
3805           VECT_DEF = vector_stmt                # vectorized form of STMT
3806           ...
3807
3808      (in case of SLP, do it for all the phis). */
3809
3810   /* Get the loop-entry arguments.  */
3811   if (slp_node)
3812     vect_get_vec_defs (reduction_op, NULL_TREE, stmt, &vec_initial_defs,
3813                        NULL, slp_node, reduc_index);
3814   else
3815     {
3816       vec_initial_defs.create (1);
3817      /* For the case of reduction, vect_get_vec_def_for_operand returns
3818         the scalar def before the loop, that defines the initial value
3819         of the reduction variable.  */
3820       vec_initial_def = vect_get_vec_def_for_operand (reduction_op, stmt,
3821                                                       &adjustment_def);
3822       vec_initial_defs.quick_push (vec_initial_def);
3823     }
3824
3825   /* Set phi nodes arguments.  */
3826   FOR_EACH_VEC_ELT (reduction_phis, i, phi)
3827     {
3828       tree vec_init_def = vec_initial_defs[i];
3829       tree def = vect_defs[i];
3830       for (j = 0; j < ncopies; j++)
3831         {
3832           /* Set the loop-entry arg of the reduction-phi.  */
3833           add_phi_arg (phi, vec_init_def, loop_preheader_edge (loop),
3834                        UNKNOWN_LOCATION);
3835
3836           /* Set the loop-latch arg for the reduction-phi.  */
3837           if (j > 0)
3838             def = vect_get_vec_def_for_stmt_copy (vect_unknown_def_type, def);
3839
3840           add_phi_arg (phi, def, loop_latch_edge (loop), UNKNOWN_LOCATION);
3841
3842           if (dump_enabled_p ())
3843             {
3844               dump_printf_loc (MSG_NOTE, vect_location,
3845                                "transform reduction: created def-use cycle: ");
3846               dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
3847               dump_printf (MSG_NOTE, "\n");
3848               dump_gimple_stmt (MSG_NOTE, TDF_SLIM, SSA_NAME_DEF_STMT (def), 0);
3849             }
3850
3851           phi = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (phi));
3852         }
3853     }
3854
3855   vec_initial_defs.release ();
3856
3857   /* 2. Create epilog code.
3858         The reduction epilog code operates across the elements of the vector
3859         of partial results computed by the vectorized loop.
3860         The reduction epilog code consists of:
3861
3862         step 1: compute the scalar result in a vector (v_out2)
3863         step 2: extract the scalar result (s_out3) from the vector (v_out2)
3864         step 3: adjust the scalar result (s_out3) if needed.
3865
3866         Step 1 can be accomplished using one the following three schemes:
3867           (scheme 1) using reduc_code, if available.
3868           (scheme 2) using whole-vector shifts, if available.
3869           (scheme 3) using a scalar loop. In this case steps 1+2 above are
3870                      combined.
3871
3872           The overall epilog code looks like this:
3873
3874           s_out0 = phi <s_loop>         # original EXIT_PHI
3875           v_out1 = phi <VECT_DEF>       # NEW_EXIT_PHI
3876           v_out2 = reduce <v_out1>              # step 1
3877           s_out3 = extract_field <v_out2, 0>    # step 2
3878           s_out4 = adjust_result <s_out3>       # step 3
3879
3880           (step 3 is optional, and steps 1 and 2 may be combined).
3881           Lastly, the uses of s_out0 are replaced by s_out4.  */
3882
3883
3884   /* 2.1 Create new loop-exit-phis to preserve loop-closed form:
3885          v_out1 = phi <VECT_DEF>
3886          Store them in NEW_PHIS.  */
3887
3888   exit_bb = single_exit (loop)->dest;
3889   prev_phi_info = NULL;
3890   new_phis.create (vect_defs.length ());
3891   FOR_EACH_VEC_ELT (vect_defs, i, def)
3892     {
3893       for (j = 0; j < ncopies; j++)
3894         {
3895           tree new_def = copy_ssa_name (def, NULL);
3896           phi = create_phi_node (new_def, exit_bb);
3897           set_vinfo_for_stmt (phi, new_stmt_vec_info (phi, loop_vinfo, NULL));
3898           if (j == 0)
3899             new_phis.quick_push (phi);
3900           else
3901             {
3902               def = vect_get_vec_def_for_stmt_copy (dt, def);
3903               STMT_VINFO_RELATED_STMT (prev_phi_info) = phi;
3904             }
3905
3906           SET_PHI_ARG_DEF (phi, single_exit (loop)->dest_idx, def);
3907           prev_phi_info = vinfo_for_stmt (phi);
3908         }
3909     }
3910
3911   /* The epilogue is created for the outer-loop, i.e., for the loop being
3912      vectorized.  Create exit phis for the outer loop.  */
3913   if (double_reduc)
3914     {
3915       loop = outer_loop;
3916       exit_bb = single_exit (loop)->dest;
3917       inner_phis.create (vect_defs.length ());
3918       FOR_EACH_VEC_ELT (new_phis, i, phi)
3919         {
3920           tree new_result = copy_ssa_name (PHI_RESULT (phi), NULL);
3921           gimple outer_phi = create_phi_node (new_result, exit_bb);
3922           SET_PHI_ARG_DEF (outer_phi, single_exit (loop)->dest_idx,
3923                            PHI_RESULT (phi));
3924           set_vinfo_for_stmt (outer_phi, new_stmt_vec_info (outer_phi,
3925                                                             loop_vinfo, NULL));
3926           inner_phis.quick_push (phi);
3927           new_phis[i] = outer_phi;
3928           prev_phi_info = vinfo_for_stmt (outer_phi);
3929           while (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (phi)))
3930             {
3931               phi = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (phi));
3932               new_result = copy_ssa_name (PHI_RESULT (phi), NULL);
3933               outer_phi = create_phi_node (new_result, exit_bb);
3934               SET_PHI_ARG_DEF (outer_phi, single_exit (loop)->dest_idx,
3935                                PHI_RESULT (phi));
3936               set_vinfo_for_stmt (outer_phi, new_stmt_vec_info (outer_phi,
3937                                                         loop_vinfo, NULL));
3938               STMT_VINFO_RELATED_STMT (prev_phi_info) = outer_phi;
3939               prev_phi_info = vinfo_for_stmt (outer_phi);
3940             }
3941         }
3942     }
3943
3944   exit_gsi = gsi_after_labels (exit_bb);
3945
3946   /* 2.2 Get the relevant tree-code to use in the epilog for schemes 2,3
3947          (i.e. when reduc_code is not available) and in the final adjustment
3948          code (if needed).  Also get the original scalar reduction variable as
3949          defined in the loop.  In case STMT is a "pattern-stmt" (i.e. - it
3950          represents a reduction pattern), the tree-code and scalar-def are
3951          taken from the original stmt that the pattern-stmt (STMT) replaces.
3952          Otherwise (it is a regular reduction) - the tree-code and scalar-def
3953          are taken from STMT.  */
3954
3955   orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
3956   if (!orig_stmt)
3957     {
3958       /* Regular reduction  */
3959       orig_stmt = stmt;
3960     }
3961   else
3962     {
3963       /* Reduction pattern  */
3964       stmt_vec_info stmt_vinfo = vinfo_for_stmt (orig_stmt);
3965       gcc_assert (STMT_VINFO_IN_PATTERN_P (stmt_vinfo));
3966       gcc_assert (STMT_VINFO_RELATED_STMT (stmt_vinfo) == stmt);
3967     }
3968
3969   code = gimple_assign_rhs_code (orig_stmt);
3970   /* For MINUS_EXPR the initial vector is [init_val,0,...,0], therefore,
3971      partial results are added and not subtracted.  */
3972   if (code == MINUS_EXPR)
3973     code = PLUS_EXPR;
3974
3975   scalar_dest = gimple_assign_lhs (orig_stmt);
3976   scalar_type = TREE_TYPE (scalar_dest);
3977   scalar_results.create (group_size);
3978   new_scalar_dest = vect_create_destination_var (scalar_dest, NULL);
3979   bitsize = TYPE_SIZE (scalar_type);
3980
3981   /* In case this is a reduction in an inner-loop while vectorizing an outer
3982      loop - we don't need to extract a single scalar result at the end of the
3983      inner-loop (unless it is double reduction, i.e., the use of reduction is
3984      outside the outer-loop).  The final vector of partial results will be used
3985      in the vectorized outer-loop, or reduced to a scalar result at the end of
3986      the outer-loop.  */
3987   if (nested_in_vect_loop && !double_reduc)
3988     goto vect_finalize_reduction;
3989
3990   /* SLP reduction without reduction chain, e.g.,
3991      # a1 = phi <a2, a0>
3992      # b1 = phi <b2, b0>
3993      a2 = operation (a1)
3994      b2 = operation (b1)  */
3995   slp_reduc = (slp_node && !GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)));
3996
3997   /* In case of reduction chain, e.g.,
3998      # a1 = phi <a3, a0>
3999      a2 = operation (a1)
4000      a3 = operation (a2),
4001
4002      we may end up with more than one vector result.  Here we reduce them to
4003      one vector.  */
4004   if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
4005     {
4006       tree first_vect = PHI_RESULT (new_phis[0]);
4007       tree tmp;
4008       gimple new_vec_stmt = NULL;
4009
4010       vec_dest = vect_create_destination_var (scalar_dest, vectype);
4011       for (k = 1; k < new_phis.length (); k++)
4012         {
4013           gimple next_phi = new_phis[k];
4014           tree second_vect = PHI_RESULT (next_phi);
4015
4016           tmp = build2 (code, vectype,  first_vect, second_vect);
4017           new_vec_stmt = gimple_build_assign (vec_dest, tmp);
4018           first_vect = make_ssa_name (vec_dest, new_vec_stmt);
4019           gimple_assign_set_lhs (new_vec_stmt, first_vect);
4020           gsi_insert_before (&exit_gsi, new_vec_stmt, GSI_SAME_STMT);
4021         }
4022
4023       new_phi_result = first_vect;
4024       if (new_vec_stmt)
4025         {
4026           new_phis.truncate (0);
4027           new_phis.safe_push (new_vec_stmt);
4028         }
4029     }
4030   else
4031     new_phi_result = PHI_RESULT (new_phis[0]);
4032
4033   /* 2.3 Create the reduction code, using one of the three schemes described
4034          above. In SLP we simply need to extract all the elements from the
4035          vector (without reducing them), so we use scalar shifts.  */
4036   if (reduc_code != ERROR_MARK && !slp_reduc)
4037     {
4038       tree tmp;
4039
4040       /*** Case 1:  Create:
4041            v_out2 = reduc_expr <v_out1>  */
4042
4043       if (dump_enabled_p ())
4044         dump_printf_loc (MSG_NOTE, vect_location,
4045                          "Reduce using direct vector reduction.");
4046
4047       vec_dest = vect_create_destination_var (scalar_dest, vectype);
4048       tmp = build1 (reduc_code, vectype, new_phi_result);
4049       epilog_stmt = gimple_build_assign (vec_dest, tmp);
4050       new_temp = make_ssa_name (vec_dest, epilog_stmt);
4051       gimple_assign_set_lhs (epilog_stmt, new_temp);
4052       gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
4053
4054       extract_scalar_result = true;
4055     }
4056   else
4057     {
4058       enum tree_code shift_code = ERROR_MARK;
4059       bool have_whole_vector_shift = true;
4060       int bit_offset;
4061       int element_bitsize = tree_low_cst (bitsize, 1);
4062       int vec_size_in_bits = tree_low_cst (TYPE_SIZE (vectype), 1);
4063       tree vec_temp;
4064
4065       if (optab_handler (vec_shr_optab, mode) != CODE_FOR_nothing)
4066         shift_code = VEC_RSHIFT_EXPR;
4067       else
4068         have_whole_vector_shift = false;
4069
4070       /* Regardless of whether we have a whole vector shift, if we're
4071          emulating the operation via tree-vect-generic, we don't want
4072          to use it.  Only the first round of the reduction is likely
4073          to still be profitable via emulation.  */
4074       /* ??? It might be better to emit a reduction tree code here, so that
4075          tree-vect-generic can expand the first round via bit tricks.  */
4076       if (!VECTOR_MODE_P (mode))
4077         have_whole_vector_shift = false;
4078       else
4079         {
4080           optab optab = optab_for_tree_code (code, vectype, optab_default);
4081           if (optab_handler (optab, mode) == CODE_FOR_nothing)
4082             have_whole_vector_shift = false;
4083         }
4084
4085       if (have_whole_vector_shift && !slp_reduc)
4086         {
4087           /*** Case 2: Create:
4088              for (offset = VS/2; offset >= element_size; offset/=2)
4089                 {
4090                   Create:  va' = vec_shift <va, offset>
4091                   Create:  va = vop <va, va'>
4092                 }  */
4093
4094           if (dump_enabled_p ())
4095             dump_printf_loc (MSG_NOTE, vect_location,
4096                              "Reduce using vector shifts");
4097
4098           vec_dest = vect_create_destination_var (scalar_dest, vectype);
4099           new_temp = new_phi_result;
4100           for (bit_offset = vec_size_in_bits/2;
4101                bit_offset >= element_bitsize;
4102                bit_offset /= 2)
4103             {
4104               tree bitpos = size_int (bit_offset);
4105
4106               epilog_stmt = gimple_build_assign_with_ops (shift_code,
4107                                                vec_dest, new_temp, bitpos);
4108               new_name = make_ssa_name (vec_dest, epilog_stmt);
4109               gimple_assign_set_lhs (epilog_stmt, new_name);
4110               gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
4111
4112               epilog_stmt = gimple_build_assign_with_ops (code, vec_dest,
4113                                                           new_name, new_temp);
4114               new_temp = make_ssa_name (vec_dest, epilog_stmt);
4115               gimple_assign_set_lhs (epilog_stmt, new_temp);
4116               gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
4117             }
4118
4119           extract_scalar_result = true;
4120         }
4121       else
4122         {
4123           tree rhs;
4124
4125           /*** Case 3: Create:
4126              s = extract_field <v_out2, 0>
4127              for (offset = element_size;
4128                   offset < vector_size;
4129                   offset += element_size;)
4130                {
4131                  Create:  s' = extract_field <v_out2, offset>
4132                  Create:  s = op <s, s'>  // For non SLP cases
4133                }  */
4134
4135           if (dump_enabled_p ())
4136             dump_printf_loc (MSG_NOTE, vect_location,
4137                              "Reduce using scalar code. ");
4138
4139           vec_size_in_bits = tree_low_cst (TYPE_SIZE (vectype), 1);
4140           FOR_EACH_VEC_ELT (new_phis, i, new_phi)
4141             {
4142               if (gimple_code (new_phi) == GIMPLE_PHI)
4143                 vec_temp = PHI_RESULT (new_phi);
4144               else
4145                 vec_temp = gimple_assign_lhs (new_phi);
4146               rhs = build3 (BIT_FIELD_REF, scalar_type, vec_temp, bitsize,
4147                             bitsize_zero_node);
4148               epilog_stmt = gimple_build_assign (new_scalar_dest, rhs);
4149               new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
4150               gimple_assign_set_lhs (epilog_stmt, new_temp);
4151               gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
4152
4153               /* In SLP we don't need to apply reduction operation, so we just
4154                  collect s' values in SCALAR_RESULTS.  */
4155               if (slp_reduc)
4156                 scalar_results.safe_push (new_temp);
4157
4158               for (bit_offset = element_bitsize;
4159                    bit_offset < vec_size_in_bits;
4160                    bit_offset += element_bitsize)
4161                 {
4162                   tree bitpos = bitsize_int (bit_offset);
4163                   tree rhs = build3 (BIT_FIELD_REF, scalar_type, vec_temp,
4164                                      bitsize, bitpos);
4165
4166                   epilog_stmt = gimple_build_assign (new_scalar_dest, rhs);
4167                   new_name = make_ssa_name (new_scalar_dest, epilog_stmt);
4168                   gimple_assign_set_lhs (epilog_stmt, new_name);
4169                   gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
4170
4171                   if (slp_reduc)
4172                     {
4173                       /* In SLP we don't need to apply reduction operation, so
4174                          we just collect s' values in SCALAR_RESULTS.  */
4175                       new_temp = new_name;
4176                       scalar_results.safe_push (new_name);
4177                     }
4178                   else
4179                     {
4180                       epilog_stmt = gimple_build_assign_with_ops (code,
4181                                           new_scalar_dest, new_name, new_temp);
4182                       new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
4183                       gimple_assign_set_lhs (epilog_stmt, new_temp);
4184                       gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
4185                     }
4186                 }
4187             }
4188
4189           /* The only case where we need to reduce scalar results in SLP, is
4190              unrolling.  If the size of SCALAR_RESULTS is greater than
4191              GROUP_SIZE, we reduce them combining elements modulo
4192              GROUP_SIZE.  */
4193           if (slp_reduc)
4194             {
4195               tree res, first_res, new_res;
4196               gimple new_stmt;
4197
4198               /* Reduce multiple scalar results in case of SLP unrolling.  */
4199               for (j = group_size; scalar_results.iterate (j, &res);
4200                    j++)
4201                 {
4202                   first_res = scalar_results[j % group_size];
4203                   new_stmt = gimple_build_assign_with_ops (code,
4204                                               new_scalar_dest, first_res, res);
4205                   new_res = make_ssa_name (new_scalar_dest, new_stmt);
4206                   gimple_assign_set_lhs (new_stmt, new_res);
4207                   gsi_insert_before (&exit_gsi, new_stmt, GSI_SAME_STMT);
4208                   scalar_results[j % group_size] = new_res;
4209                 }
4210             }
4211           else
4212             /* Not SLP - we have one scalar to keep in SCALAR_RESULTS.  */
4213             scalar_results.safe_push (new_temp);
4214
4215           extract_scalar_result = false;
4216         }
4217     }
4218
4219   /* 2.4  Extract the final scalar result.  Create:
4220           s_out3 = extract_field <v_out2, bitpos>  */
4221
4222   if (extract_scalar_result)
4223     {
4224       tree rhs;
4225
4226       if (dump_enabled_p ())
4227         dump_printf_loc (MSG_NOTE, vect_location,
4228                          "extract scalar result");
4229
4230       if (BYTES_BIG_ENDIAN)
4231         bitpos = size_binop (MULT_EXPR,
4232                              bitsize_int (TYPE_VECTOR_SUBPARTS (vectype) - 1),
4233                              TYPE_SIZE (scalar_type));
4234       else
4235         bitpos = bitsize_zero_node;
4236
4237       rhs = build3 (BIT_FIELD_REF, scalar_type, new_temp, bitsize, bitpos);
4238       epilog_stmt = gimple_build_assign (new_scalar_dest, rhs);
4239       new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
4240       gimple_assign_set_lhs (epilog_stmt, new_temp);
4241       gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
4242       scalar_results.safe_push (new_temp);
4243     }
4244
4245 vect_finalize_reduction:
4246
4247   if (double_reduc)
4248     loop = loop->inner;
4249
4250   /* 2.5 Adjust the final result by the initial value of the reduction
4251          variable. (When such adjustment is not needed, then
4252          'adjustment_def' is zero).  For example, if code is PLUS we create:
4253          new_temp = loop_exit_def + adjustment_def  */
4254
4255   if (adjustment_def)
4256     {
4257       gcc_assert (!slp_reduc);
4258       if (nested_in_vect_loop)
4259         {
4260           new_phi = new_phis[0];
4261           gcc_assert (TREE_CODE (TREE_TYPE (adjustment_def)) == VECTOR_TYPE);
4262           expr = build2 (code, vectype, PHI_RESULT (new_phi), adjustment_def);
4263           new_dest = vect_create_destination_var (scalar_dest, vectype);
4264         }
4265       else
4266         {
4267           new_temp = scalar_results[0];
4268           gcc_assert (TREE_CODE (TREE_TYPE (adjustment_def)) != VECTOR_TYPE);
4269           expr = build2 (code, scalar_type, new_temp, adjustment_def);
4270           new_dest = vect_create_destination_var (scalar_dest, scalar_type);
4271         }
4272
4273       epilog_stmt = gimple_build_assign (new_dest, expr);
4274       new_temp = make_ssa_name (new_dest, epilog_stmt);
4275       gimple_assign_set_lhs (epilog_stmt, new_temp);
4276       SSA_NAME_DEF_STMT (new_temp) = epilog_stmt;
4277       gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
4278       if (nested_in_vect_loop)
4279         {
4280           set_vinfo_for_stmt (epilog_stmt,
4281                               new_stmt_vec_info (epilog_stmt, loop_vinfo,
4282                                                  NULL));
4283           STMT_VINFO_RELATED_STMT (vinfo_for_stmt (epilog_stmt)) =
4284                 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (new_phi));
4285
4286           if (!double_reduc)
4287             scalar_results.quick_push (new_temp);
4288           else
4289             scalar_results[0] = new_temp;
4290         }
4291       else
4292         scalar_results[0] = new_temp;
4293
4294       new_phis[0] = epilog_stmt;
4295     }
4296
4297   /* 2.6  Handle the loop-exit phis.  Replace the uses of scalar loop-exit
4298           phis with new adjusted scalar results, i.e., replace use <s_out0>
4299           with use <s_out4>.
4300
4301      Transform:
4302         loop_exit:
4303           s_out0 = phi <s_loop>                 # (scalar) EXIT_PHI
4304           v_out1 = phi <VECT_DEF>               # NEW_EXIT_PHI
4305           v_out2 = reduce <v_out1>
4306           s_out3 = extract_field <v_out2, 0>
4307           s_out4 = adjust_result <s_out3>
4308           use <s_out0>
4309           use <s_out0>
4310
4311      into:
4312
4313         loop_exit:
4314           s_out0 = phi <s_loop>                 # (scalar) EXIT_PHI
4315           v_out1 = phi <VECT_DEF>               # NEW_EXIT_PHI
4316           v_out2 = reduce <v_out1>
4317           s_out3 = extract_field <v_out2, 0>
4318           s_out4 = adjust_result <s_out3>
4319           use <s_out4>
4320           use <s_out4> */
4321
4322
4323   /* In SLP reduction chain we reduce vector results into one vector if
4324      necessary, hence we set here GROUP_SIZE to 1.  SCALAR_DEST is the LHS of
4325      the last stmt in the reduction chain, since we are looking for the loop
4326      exit phi node.  */
4327   if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
4328     {
4329       scalar_dest = gimple_assign_lhs (
4330                         SLP_TREE_SCALAR_STMTS (slp_node)[group_size - 1]);
4331       group_size = 1;
4332     }
4333
4334   /* In SLP we may have several statements in NEW_PHIS and REDUCTION_PHIS (in
4335      case that GROUP_SIZE is greater than vectorization factor).  Therefore, we
4336      need to match SCALAR_RESULTS with corresponding statements.  The first
4337      (GROUP_SIZE / number of new vector stmts) scalar results correspond to
4338      the first vector stmt, etc.
4339      (RATIO is equal to (GROUP_SIZE / number of new vector stmts)).  */
4340   if (group_size > new_phis.length ())
4341     {
4342       ratio = group_size / new_phis.length ();
4343       gcc_assert (!(group_size % new_phis.length ()));
4344     }
4345   else
4346     ratio = 1;
4347
4348   for (k = 0; k < group_size; k++)
4349     {
4350       if (k % ratio == 0)
4351         {
4352           epilog_stmt = new_phis[k / ratio];
4353           reduction_phi = reduction_phis[k / ratio];
4354           if (double_reduc)
4355             inner_phi = inner_phis[k / ratio];
4356         }
4357
4358       if (slp_reduc)
4359         {
4360           gimple current_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[k];
4361
4362           orig_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (current_stmt));
4363           /* SLP statements can't participate in patterns.  */
4364           gcc_assert (!orig_stmt);
4365           scalar_dest = gimple_assign_lhs (current_stmt);
4366         }
4367
4368       phis.create (3);
4369       /* Find the loop-closed-use at the loop exit of the original scalar
4370          result.  (The reduction result is expected to have two immediate uses -
4371          one at the latch block, and one at the loop exit).  */
4372       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
4373         if (!flow_bb_inside_loop_p (loop, gimple_bb (USE_STMT (use_p))))
4374           phis.safe_push (USE_STMT (use_p));
4375
4376       /* While we expect to have found an exit_phi because of loop-closed-ssa
4377          form we can end up without one if the scalar cycle is dead.  */
4378
4379       FOR_EACH_VEC_ELT (phis, i, exit_phi)
4380         {
4381           if (outer_loop)
4382             {
4383               stmt_vec_info exit_phi_vinfo = vinfo_for_stmt (exit_phi);
4384               gimple vect_phi;
4385
4386               /* FORNOW. Currently not supporting the case that an inner-loop
4387                  reduction is not used in the outer-loop (but only outside the
4388                  outer-loop), unless it is double reduction.  */
4389               gcc_assert ((STMT_VINFO_RELEVANT_P (exit_phi_vinfo)
4390                            && !STMT_VINFO_LIVE_P (exit_phi_vinfo))
4391                           || double_reduc);
4392
4393               STMT_VINFO_VEC_STMT (exit_phi_vinfo) = epilog_stmt;
4394               if (!double_reduc
4395                   || STMT_VINFO_DEF_TYPE (exit_phi_vinfo)
4396                       != vect_double_reduction_def)
4397                 continue;
4398
4399               /* Handle double reduction:
4400
4401                  stmt1: s1 = phi <s0, s2>  - double reduction phi (outer loop)
4402                  stmt2:   s3 = phi <s1, s4> - (regular) reduc phi (inner loop)
4403                  stmt3:   s4 = use (s3)     - (regular) reduc stmt (inner loop)
4404                  stmt4: s2 = phi <s4>      - double reduction stmt (outer loop)
4405
4406                  At that point the regular reduction (stmt2 and stmt3) is
4407                  already vectorized, as well as the exit phi node, stmt4.
4408                  Here we vectorize the phi node of double reduction, stmt1, and
4409                  update all relevant statements.  */
4410
4411               /* Go through all the uses of s2 to find double reduction phi
4412                  node, i.e., stmt1 above.  */
4413               orig_name = PHI_RESULT (exit_phi);
4414               FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, orig_name)
4415                 {
4416                   stmt_vec_info use_stmt_vinfo;
4417                   stmt_vec_info new_phi_vinfo;
4418                   tree vect_phi_init, preheader_arg, vect_phi_res, init_def;
4419                   basic_block bb = gimple_bb (use_stmt);
4420                   gimple use;
4421
4422                   /* Check that USE_STMT is really double reduction phi
4423                      node.  */
4424                   if (gimple_code (use_stmt) != GIMPLE_PHI
4425                       || gimple_phi_num_args (use_stmt) != 2
4426                       || bb->loop_father != outer_loop)
4427                     continue;
4428                   use_stmt_vinfo = vinfo_for_stmt (use_stmt);
4429                   if (!use_stmt_vinfo
4430                       || STMT_VINFO_DEF_TYPE (use_stmt_vinfo)
4431                           != vect_double_reduction_def)
4432                     continue;
4433
4434                   /* Create vector phi node for double reduction:
4435                      vs1 = phi <vs0, vs2>
4436                      vs1 was created previously in this function by a call to
4437                        vect_get_vec_def_for_operand and is stored in
4438                        vec_initial_def;
4439                      vs2 is defined by INNER_PHI, the vectorized EXIT_PHI;
4440                      vs0 is created here.  */
4441
4442                   /* Create vector phi node.  */
4443                   vect_phi = create_phi_node (vec_initial_def, bb);
4444                   new_phi_vinfo = new_stmt_vec_info (vect_phi,
4445                                     loop_vec_info_for_loop (outer_loop), NULL);
4446                   set_vinfo_for_stmt (vect_phi, new_phi_vinfo);
4447
4448                   /* Create vs0 - initial def of the double reduction phi.  */
4449                   preheader_arg = PHI_ARG_DEF_FROM_EDGE (use_stmt,
4450                                              loop_preheader_edge (outer_loop));
4451                   init_def = get_initial_def_for_reduction (stmt,
4452                                                           preheader_arg, NULL);
4453                   vect_phi_init = vect_init_vector (use_stmt, init_def,
4454                                                     vectype, NULL);
4455
4456                   /* Update phi node arguments with vs0 and vs2.  */
4457                   add_phi_arg (vect_phi, vect_phi_init,
4458                                loop_preheader_edge (outer_loop),
4459                                UNKNOWN_LOCATION);
4460                   add_phi_arg (vect_phi, PHI_RESULT (inner_phi),
4461                                loop_latch_edge (outer_loop), UNKNOWN_LOCATION);
4462                   if (dump_enabled_p ())
4463                     {
4464                       dump_printf_loc (MSG_NOTE, vect_location,
4465                                        "created double reduction phi node: ");
4466                       dump_gimple_stmt (MSG_NOTE, TDF_SLIM, vect_phi, 0);
4467                     }
4468
4469                   vect_phi_res = PHI_RESULT (vect_phi);
4470
4471                   /* Replace the use, i.e., set the correct vs1 in the regular
4472                      reduction phi node.  FORNOW, NCOPIES is always 1, so the
4473                      loop is redundant.  */
4474                   use = reduction_phi;
4475                   for (j = 0; j < ncopies; j++)
4476                     {
4477                       edge pr_edge = loop_preheader_edge (loop);
4478                       SET_PHI_ARG_DEF (use, pr_edge->dest_idx, vect_phi_res);
4479                       use = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (use));
4480                     }
4481                 }
4482             }
4483         }
4484
4485       phis.release ();
4486       if (nested_in_vect_loop)
4487         {
4488           if (double_reduc)
4489             loop = outer_loop;
4490           else
4491             continue;
4492         }
4493
4494       phis.create (3);
4495       /* Find the loop-closed-use at the loop exit of the original scalar
4496          result.  (The reduction result is expected to have two immediate uses,
4497          one at the latch block, and one at the loop exit).  For double
4498          reductions we are looking for exit phis of the outer loop.  */
4499       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
4500         {
4501           if (!flow_bb_inside_loop_p (loop, gimple_bb (USE_STMT (use_p))))
4502             phis.safe_push (USE_STMT (use_p));
4503           else
4504             {
4505               if (double_reduc && gimple_code (USE_STMT (use_p)) == GIMPLE_PHI)
4506                 {
4507                   tree phi_res = PHI_RESULT (USE_STMT (use_p));
4508
4509                   FOR_EACH_IMM_USE_FAST (phi_use_p, phi_imm_iter, phi_res)
4510                     {
4511                       if (!flow_bb_inside_loop_p (loop,
4512                                              gimple_bb (USE_STMT (phi_use_p))))
4513                         phis.safe_push (USE_STMT (phi_use_p));
4514                     }
4515                 }
4516             }
4517         }
4518
4519       FOR_EACH_VEC_ELT (phis, i, exit_phi)
4520         {
4521           /* Replace the uses:  */
4522           orig_name = PHI_RESULT (exit_phi);
4523           scalar_result = scalar_results[k];
4524           FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, orig_name)
4525             FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
4526               SET_USE (use_p, scalar_result);
4527         }
4528
4529       phis.release ();
4530     }
4531
4532   scalar_results.release ();
4533   inner_phis.release ();
4534   new_phis.release ();
4535 }
4536
4537
4538 /* Function vectorizable_reduction.
4539
4540    Check if STMT performs a reduction operation that can be vectorized.
4541    If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4542    stmt to replace it, put it in VEC_STMT, and insert it at GSI.
4543    Return FALSE if not a vectorizable STMT, TRUE otherwise.
4544
4545    This function also handles reduction idioms (patterns) that have been
4546    recognized in advance during vect_pattern_recog.  In this case, STMT may be
4547    of this form:
4548      X = pattern_expr (arg0, arg1, ..., X)
4549    and it's STMT_VINFO_RELATED_STMT points to the last stmt in the original
4550    sequence that had been detected and replaced by the pattern-stmt (STMT).
4551
4552    In some cases of reduction patterns, the type of the reduction variable X is
4553    different than the type of the other arguments of STMT.
4554    In such cases, the vectype that is used when transforming STMT into a vector
4555    stmt is different than the vectype that is used to determine the
4556    vectorization factor, because it consists of a different number of elements
4557    than the actual number of elements that are being operated upon in parallel.
4558
4559    For example, consider an accumulation of shorts into an int accumulator.
4560    On some targets it's possible to vectorize this pattern operating on 8
4561    shorts at a time (hence, the vectype for purposes of determining the
4562    vectorization factor should be V8HI); on the other hand, the vectype that
4563    is used to create the vector form is actually V4SI (the type of the result).
4564
4565    Upon entry to this function, STMT_VINFO_VECTYPE records the vectype that
4566    indicates what is the actual level of parallelism (V8HI in the example), so
4567    that the right vectorization factor would be derived.  This vectype
4568    corresponds to the type of arguments to the reduction stmt, and should *NOT*
4569    be used to create the vectorized stmt.  The right vectype for the vectorized
4570    stmt is obtained from the type of the result X:
4571         get_vectype_for_scalar_type (TREE_TYPE (X))
4572
4573    This means that, contrary to "regular" reductions (or "regular" stmts in
4574    general), the following equation:
4575       STMT_VINFO_VECTYPE == get_vectype_for_scalar_type (TREE_TYPE (X))
4576    does *NOT* necessarily hold for reduction patterns.  */
4577
4578 bool
4579 vectorizable_reduction (gimple stmt, gimple_stmt_iterator *gsi,
4580                         gimple *vec_stmt, slp_tree slp_node)
4581 {
4582   tree vec_dest;
4583   tree scalar_dest;
4584   tree loop_vec_def0 = NULL_TREE, loop_vec_def1 = NULL_TREE;
4585   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4586   tree vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4587   tree vectype_in = NULL_TREE;
4588   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4589   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4590   enum tree_code code, orig_code, epilog_reduc_code;
4591   enum machine_mode vec_mode;
4592   int op_type;
4593   optab optab, reduc_optab;
4594   tree new_temp = NULL_TREE;
4595   tree def;
4596   gimple def_stmt;
4597   enum vect_def_type dt;
4598   gimple new_phi = NULL;
4599   tree scalar_type;
4600   bool is_simple_use;
4601   gimple orig_stmt;
4602   stmt_vec_info orig_stmt_info;
4603   tree expr = NULL_TREE;
4604   int i;
4605   int ncopies;
4606   int epilog_copies;
4607   stmt_vec_info prev_stmt_info, prev_phi_info;
4608   bool single_defuse_cycle = false;
4609   tree reduc_def = NULL_TREE;
4610   gimple new_stmt = NULL;
4611   int j;
4612   tree ops[3];
4613   bool nested_cycle = false, found_nested_cycle_def = false;
4614   gimple reduc_def_stmt = NULL;
4615   /* The default is that the reduction variable is the last in statement.  */
4616   int reduc_index = 2;
4617   bool double_reduc = false, dummy;
4618   basic_block def_bb;
4619   struct loop * def_stmt_loop, *outer_loop = NULL;
4620   tree def_arg;
4621   gimple def_arg_stmt;
4622   vec<tree> vec_oprnds0 = vNULL;
4623   vec<tree> vec_oprnds1 = vNULL;
4624   vec<tree> vect_defs = vNULL;
4625   vec<gimple> phis = vNULL;
4626   int vec_num;
4627   tree def0, def1, tem, op0, op1 = NULL_TREE;
4628
4629   /* In case of reduction chain we switch to the first stmt in the chain, but
4630      we don't update STMT_INFO, since only the last stmt is marked as reduction
4631      and has reduction properties.  */
4632   if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
4633     stmt = GROUP_FIRST_ELEMENT (stmt_info);
4634
4635   if (nested_in_vect_loop_p (loop, stmt))
4636     {
4637       outer_loop = loop;
4638       loop = loop->inner;
4639       nested_cycle = true;
4640     }
4641
4642   /* 1. Is vectorizable reduction?  */
4643   /* Not supportable if the reduction variable is used in the loop, unless
4644      it's a reduction chain.  */
4645   if (STMT_VINFO_RELEVANT (stmt_info) > vect_used_in_outer
4646       && !GROUP_FIRST_ELEMENT (stmt_info))
4647     return false;
4648
4649   /* Reductions that are not used even in an enclosing outer-loop,
4650      are expected to be "live" (used out of the loop).  */
4651   if (STMT_VINFO_RELEVANT (stmt_info) == vect_unused_in_scope
4652       && !STMT_VINFO_LIVE_P (stmt_info))
4653     return false;
4654
4655   /* Make sure it was already recognized as a reduction computation.  */
4656   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_reduction_def
4657       && STMT_VINFO_DEF_TYPE (stmt_info) != vect_nested_cycle)
4658     return false;
4659
4660   /* 2. Has this been recognized as a reduction pattern?
4661
4662      Check if STMT represents a pattern that has been recognized
4663      in earlier analysis stages.  For stmts that represent a pattern,
4664      the STMT_VINFO_RELATED_STMT field records the last stmt in
4665      the original sequence that constitutes the pattern.  */
4666
4667   orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
4668   if (orig_stmt)
4669     {
4670       orig_stmt_info = vinfo_for_stmt (orig_stmt);
4671       gcc_assert (STMT_VINFO_IN_PATTERN_P (orig_stmt_info));
4672       gcc_assert (!STMT_VINFO_IN_PATTERN_P (stmt_info));
4673     }
4674
4675   /* 3. Check the operands of the operation.  The first operands are defined
4676         inside the loop body. The last operand is the reduction variable,
4677         which is defined by the loop-header-phi.  */
4678
4679   gcc_assert (is_gimple_assign (stmt));
4680
4681   /* Flatten RHS.  */
4682   switch (get_gimple_rhs_class (gimple_assign_rhs_code (stmt)))
4683     {
4684     case GIMPLE_SINGLE_RHS:
4685       op_type = TREE_OPERAND_LENGTH (gimple_assign_rhs1 (stmt));
4686       if (op_type == ternary_op)
4687         {
4688           tree rhs = gimple_assign_rhs1 (stmt);
4689           ops[0] = TREE_OPERAND (rhs, 0);
4690           ops[1] = TREE_OPERAND (rhs, 1);
4691           ops[2] = TREE_OPERAND (rhs, 2);
4692           code = TREE_CODE (rhs);
4693         }
4694       else
4695         return false;
4696       break;
4697
4698     case GIMPLE_BINARY_RHS:
4699       code = gimple_assign_rhs_code (stmt);
4700       op_type = TREE_CODE_LENGTH (code);
4701       gcc_assert (op_type == binary_op);
4702       ops[0] = gimple_assign_rhs1 (stmt);
4703       ops[1] = gimple_assign_rhs2 (stmt);
4704       break;
4705
4706     case GIMPLE_TERNARY_RHS:
4707       code = gimple_assign_rhs_code (stmt);
4708       op_type = TREE_CODE_LENGTH (code);
4709       gcc_assert (op_type == ternary_op);
4710       ops[0] = gimple_assign_rhs1 (stmt);
4711       ops[1] = gimple_assign_rhs2 (stmt);
4712       ops[2] = gimple_assign_rhs3 (stmt);
4713       break;
4714
4715     case GIMPLE_UNARY_RHS:
4716       return false;
4717
4718     default:
4719       gcc_unreachable ();
4720     }
4721
4722   if (code == COND_EXPR && slp_node)
4723     return false;
4724
4725   scalar_dest = gimple_assign_lhs (stmt);
4726   scalar_type = TREE_TYPE (scalar_dest);
4727   if (!POINTER_TYPE_P (scalar_type) && !INTEGRAL_TYPE_P (scalar_type)
4728       && !SCALAR_FLOAT_TYPE_P (scalar_type))
4729     return false;
4730
4731   /* Do not try to vectorize bit-precision reductions.  */
4732   if ((TYPE_PRECISION (scalar_type)
4733        != GET_MODE_PRECISION (TYPE_MODE (scalar_type))))
4734     return false;
4735
4736   /* All uses but the last are expected to be defined in the loop.
4737      The last use is the reduction variable.  In case of nested cycle this
4738      assumption is not true: we use reduc_index to record the index of the
4739      reduction variable.  */
4740   for (i = 0; i < op_type - 1; i++)
4741     {
4742       /* The condition of COND_EXPR is checked in vectorizable_condition().  */
4743       if (i == 0 && code == COND_EXPR)
4744         continue;
4745
4746       is_simple_use = vect_is_simple_use_1 (ops[i], stmt, loop_vinfo, NULL,
4747                                             &def_stmt, &def, &dt, &tem);
4748       if (!vectype_in)
4749         vectype_in = tem;
4750       gcc_assert (is_simple_use);
4751
4752       if (dt != vect_internal_def
4753           && dt != vect_external_def
4754           && dt != vect_constant_def
4755           && dt != vect_induction_def
4756           && !(dt == vect_nested_cycle && nested_cycle))
4757         return false;
4758
4759       if (dt == vect_nested_cycle)
4760         {
4761           found_nested_cycle_def = true;
4762           reduc_def_stmt = def_stmt;
4763           reduc_index = i;
4764         }
4765     }
4766
4767   is_simple_use = vect_is_simple_use_1 (ops[i], stmt, loop_vinfo, NULL,
4768                                         &def_stmt, &def, &dt, &tem);
4769   if (!vectype_in)
4770     vectype_in = tem;
4771   gcc_assert (is_simple_use);
4772   if (!(dt == vect_reduction_def
4773         || dt == vect_nested_cycle
4774         || ((dt == vect_internal_def || dt == vect_external_def
4775              || dt == vect_constant_def || dt == vect_induction_def)
4776             && nested_cycle && found_nested_cycle_def)))
4777     {
4778       /* For pattern recognized stmts, orig_stmt might be a reduction,
4779          but some helper statements for the pattern might not, or
4780          might be COND_EXPRs with reduction uses in the condition.  */
4781       gcc_assert (orig_stmt);
4782       return false;
4783     }
4784   if (!found_nested_cycle_def)
4785     reduc_def_stmt = def_stmt;
4786
4787   gcc_assert (gimple_code (reduc_def_stmt) == GIMPLE_PHI);
4788   if (orig_stmt)
4789     gcc_assert (orig_stmt == vect_is_simple_reduction (loop_vinfo,
4790                                                        reduc_def_stmt,
4791                                                        !nested_cycle,
4792                                                        &dummy));
4793   else
4794     {
4795       gimple tmp = vect_is_simple_reduction (loop_vinfo, reduc_def_stmt,
4796                                              !nested_cycle, &dummy);
4797       /* We changed STMT to be the first stmt in reduction chain, hence we
4798          check that in this case the first element in the chain is STMT.  */
4799       gcc_assert (stmt == tmp
4800                   || GROUP_FIRST_ELEMENT (vinfo_for_stmt (tmp)) == stmt);
4801     }
4802
4803   if (STMT_VINFO_LIVE_P (vinfo_for_stmt (reduc_def_stmt)))
4804     return false;
4805
4806   if (slp_node || PURE_SLP_STMT (stmt_info))
4807     ncopies = 1;
4808   else
4809     ncopies = (LOOP_VINFO_VECT_FACTOR (loop_vinfo)
4810                / TYPE_VECTOR_SUBPARTS (vectype_in));
4811
4812   gcc_assert (ncopies >= 1);
4813
4814   vec_mode = TYPE_MODE (vectype_in);
4815
4816   if (code == COND_EXPR)
4817     {
4818       if (!vectorizable_condition (stmt, gsi, NULL, ops[reduc_index], 0, NULL))
4819         {
4820           if (dump_enabled_p ())
4821             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4822                              "unsupported condition in reduction");
4823
4824             return false;
4825         }
4826     }
4827   else
4828     {
4829       /* 4. Supportable by target?  */
4830
4831       if (code == LSHIFT_EXPR || code == RSHIFT_EXPR
4832           || code == LROTATE_EXPR || code == RROTATE_EXPR)
4833         {
4834           /* Shifts and rotates are only supported by vectorizable_shifts,
4835              not vectorizable_reduction.  */
4836           if (dump_enabled_p ())
4837             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4838                              "unsupported shift or rotation.");
4839           return false;
4840         }
4841
4842       /* 4.1. check support for the operation in the loop  */
4843       optab = optab_for_tree_code (code, vectype_in, optab_default);
4844       if (!optab)
4845         {
4846           if (dump_enabled_p ())
4847             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4848                              "no optab.");
4849
4850           return false;
4851         }
4852
4853       if (optab_handler (optab, vec_mode) == CODE_FOR_nothing)
4854         {
4855           if (dump_enabled_p ())
4856             dump_printf (MSG_NOTE, "op not supported by target.");
4857
4858           if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
4859               || LOOP_VINFO_VECT_FACTOR (loop_vinfo)
4860                   < vect_min_worthwhile_factor (code))
4861             return false;
4862
4863           if (dump_enabled_p ())
4864             dump_printf (MSG_NOTE, "proceeding using word mode.");
4865         }
4866
4867       /* Worthwhile without SIMD support?  */
4868       if (!VECTOR_MODE_P (TYPE_MODE (vectype_in))
4869           && LOOP_VINFO_VECT_FACTOR (loop_vinfo)
4870              < vect_min_worthwhile_factor (code))
4871         {
4872           if (dump_enabled_p ())
4873             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4874                              "not worthwhile without SIMD support.");
4875
4876           return false;
4877         }
4878     }
4879
4880   /* 4.2. Check support for the epilog operation.
4881
4882           If STMT represents a reduction pattern, then the type of the
4883           reduction variable may be different than the type of the rest
4884           of the arguments.  For example, consider the case of accumulation
4885           of shorts into an int accumulator; The original code:
4886                         S1: int_a = (int) short_a;
4887           orig_stmt->   S2: int_acc = plus <int_a ,int_acc>;
4888
4889           was replaced with:
4890                         STMT: int_acc = widen_sum <short_a, int_acc>
4891
4892           This means that:
4893           1. The tree-code that is used to create the vector operation in the
4894              epilog code (that reduces the partial results) is not the
4895              tree-code of STMT, but is rather the tree-code of the original
4896              stmt from the pattern that STMT is replacing.  I.e, in the example
4897              above we want to use 'widen_sum' in the loop, but 'plus' in the
4898              epilog.
4899           2. The type (mode) we use to check available target support
4900              for the vector operation to be created in the *epilog*, is
4901              determined by the type of the reduction variable (in the example
4902              above we'd check this: optab_handler (plus_optab, vect_int_mode])).
4903              However the type (mode) we use to check available target support
4904              for the vector operation to be created *inside the loop*, is
4905              determined by the type of the other arguments to STMT (in the
4906              example we'd check this: optab_handler (widen_sum_optab,
4907              vect_short_mode)).
4908
4909           This is contrary to "regular" reductions, in which the types of all
4910           the arguments are the same as the type of the reduction variable.
4911           For "regular" reductions we can therefore use the same vector type
4912           (and also the same tree-code) when generating the epilog code and
4913           when generating the code inside the loop.  */
4914
4915   if (orig_stmt)
4916     {
4917       /* This is a reduction pattern: get the vectype from the type of the
4918          reduction variable, and get the tree-code from orig_stmt.  */
4919       orig_code = gimple_assign_rhs_code (orig_stmt);
4920       gcc_assert (vectype_out);
4921       vec_mode = TYPE_MODE (vectype_out);
4922     }
4923   else
4924     {
4925       /* Regular reduction: use the same vectype and tree-code as used for
4926          the vector code inside the loop can be used for the epilog code. */
4927       orig_code = code;
4928     }
4929
4930   if (nested_cycle)
4931     {
4932       def_bb = gimple_bb (reduc_def_stmt);
4933       def_stmt_loop = def_bb->loop_father;
4934       def_arg = PHI_ARG_DEF_FROM_EDGE (reduc_def_stmt,
4935                                        loop_preheader_edge (def_stmt_loop));
4936       if (TREE_CODE (def_arg) == SSA_NAME
4937           && (def_arg_stmt = SSA_NAME_DEF_STMT (def_arg))
4938           && gimple_code (def_arg_stmt) == GIMPLE_PHI
4939           && flow_bb_inside_loop_p (outer_loop, gimple_bb (def_arg_stmt))
4940           && vinfo_for_stmt (def_arg_stmt)
4941           && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_arg_stmt))
4942               == vect_double_reduction_def)
4943         double_reduc = true;
4944     }
4945
4946   epilog_reduc_code = ERROR_MARK;
4947   if (reduction_code_for_scalar_code (orig_code, &epilog_reduc_code))
4948     {
4949       reduc_optab = optab_for_tree_code (epilog_reduc_code, vectype_out,
4950                                          optab_default);
4951       if (!reduc_optab)
4952         {
4953           if (dump_enabled_p ())
4954             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4955                              "no optab for reduction.");
4956
4957           epilog_reduc_code = ERROR_MARK;
4958         }
4959
4960       if (reduc_optab
4961           && optab_handler (reduc_optab, vec_mode) == CODE_FOR_nothing)
4962         {
4963           if (dump_enabled_p ())
4964             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4965                              "reduc op not supported by target.");
4966
4967           epilog_reduc_code = ERROR_MARK;
4968         }
4969     }
4970   else
4971     {
4972       if (!nested_cycle || double_reduc)
4973         {
4974           if (dump_enabled_p ())
4975             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4976                              "no reduc code for scalar code.");
4977
4978           return false;
4979         }
4980     }
4981
4982   if (double_reduc && ncopies > 1)
4983     {
4984       if (dump_enabled_p ())
4985         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4986                          "multiple types in double reduction");
4987
4988       return false;
4989     }
4990
4991   /* In case of widenning multiplication by a constant, we update the type
4992      of the constant to be the type of the other operand.  We check that the
4993      constant fits the type in the pattern recognition pass.  */
4994   if (code == DOT_PROD_EXPR
4995       && !types_compatible_p (TREE_TYPE (ops[0]), TREE_TYPE (ops[1])))
4996     {
4997       if (TREE_CODE (ops[0]) == INTEGER_CST)
4998         ops[0] = fold_convert (TREE_TYPE (ops[1]), ops[0]);
4999       else if (TREE_CODE (ops[1]) == INTEGER_CST)
5000         ops[1] = fold_convert (TREE_TYPE (ops[0]), ops[1]);
5001       else
5002         {
5003           if (dump_enabled_p ())
5004             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5005                              "invalid types in dot-prod");
5006
5007           return false;
5008         }
5009     }
5010
5011   if (!vec_stmt) /* transformation not required.  */
5012     {
5013       if (!vect_model_reduction_cost (stmt_info, epilog_reduc_code, ncopies))
5014         return false;
5015       STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
5016       return true;
5017     }
5018
5019   /** Transform.  **/
5020
5021   if (dump_enabled_p ())
5022     dump_printf_loc (MSG_NOTE, vect_location, "transform reduction.");
5023
5024   /* FORNOW: Multiple types are not supported for condition.  */
5025   if (code == COND_EXPR)
5026     gcc_assert (ncopies == 1);
5027
5028   /* Create the destination vector  */
5029   vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
5030
5031   /* In case the vectorization factor (VF) is bigger than the number
5032      of elements that we can fit in a vectype (nunits), we have to generate
5033      more than one vector stmt - i.e - we need to "unroll" the
5034      vector stmt by a factor VF/nunits.  For more details see documentation
5035      in vectorizable_operation.  */
5036
5037   /* If the reduction is used in an outer loop we need to generate
5038      VF intermediate results, like so (e.g. for ncopies=2):
5039         r0 = phi (init, r0)
5040         r1 = phi (init, r1)
5041         r0 = x0 + r0;
5042         r1 = x1 + r1;
5043     (i.e. we generate VF results in 2 registers).
5044     In this case we have a separate def-use cycle for each copy, and therefore
5045     for each copy we get the vector def for the reduction variable from the
5046     respective phi node created for this copy.
5047
5048     Otherwise (the reduction is unused in the loop nest), we can combine
5049     together intermediate results, like so (e.g. for ncopies=2):
5050         r = phi (init, r)
5051         r = x0 + r;
5052         r = x1 + r;
5053    (i.e. we generate VF/2 results in a single register).
5054    In this case for each copy we get the vector def for the reduction variable
5055    from the vectorized reduction operation generated in the previous iteration.
5056   */
5057
5058   if (STMT_VINFO_RELEVANT (stmt_info) == vect_unused_in_scope)
5059     {
5060       single_defuse_cycle = true;
5061       epilog_copies = 1;
5062     }
5063   else
5064     epilog_copies = ncopies;
5065
5066   prev_stmt_info = NULL;
5067   prev_phi_info = NULL;
5068   if (slp_node)
5069     {
5070       vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
5071       gcc_assert (TYPE_VECTOR_SUBPARTS (vectype_out)
5072                   == TYPE_VECTOR_SUBPARTS (vectype_in));
5073     }
5074   else
5075     {
5076       vec_num = 1;
5077       vec_oprnds0.create (1);
5078       if (op_type == ternary_op)
5079         vec_oprnds1.create (1);
5080     }
5081
5082   phis.create (vec_num);
5083   vect_defs.create (vec_num);
5084   if (!slp_node)
5085     vect_defs.quick_push (NULL_TREE);
5086
5087   for (j = 0; j < ncopies; j++)
5088     {
5089       if (j == 0 || !single_defuse_cycle)
5090         {
5091           for (i = 0; i < vec_num; i++)
5092             {
5093               /* Create the reduction-phi that defines the reduction
5094                  operand.  */
5095               new_phi = create_phi_node (vec_dest, loop->header);
5096               set_vinfo_for_stmt (new_phi,
5097                                   new_stmt_vec_info (new_phi, loop_vinfo,
5098                                                      NULL));
5099                if (j == 0 || slp_node)
5100                  phis.quick_push (new_phi);
5101             }
5102         }
5103
5104       if (code == COND_EXPR)
5105         {
5106           gcc_assert (!slp_node);
5107           vectorizable_condition (stmt, gsi, vec_stmt,
5108                                   PHI_RESULT (phis[0]),
5109                                   reduc_index, NULL);
5110           /* Multiple types are not supported for condition.  */
5111           break;
5112         }
5113
5114       /* Handle uses.  */
5115       if (j == 0)
5116         {
5117           op0 = ops[!reduc_index];
5118           if (op_type == ternary_op)
5119             {
5120               if (reduc_index == 0)
5121                 op1 = ops[2];
5122               else
5123                 op1 = ops[1];
5124             }
5125
5126           if (slp_node)
5127             vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
5128                                slp_node, -1);
5129           else
5130             {
5131               loop_vec_def0 = vect_get_vec_def_for_operand (ops[!reduc_index],
5132                                                             stmt, NULL);
5133               vec_oprnds0.quick_push (loop_vec_def0);
5134               if (op_type == ternary_op)
5135                {
5136                  loop_vec_def1 = vect_get_vec_def_for_operand (op1, stmt,
5137                                                                NULL);
5138                  vec_oprnds1.quick_push (loop_vec_def1);
5139                }
5140             }
5141         }
5142       else
5143         {
5144           if (!slp_node)
5145             {
5146               enum vect_def_type dt;
5147               gimple dummy_stmt;
5148               tree dummy;
5149
5150               vect_is_simple_use (ops[!reduc_index], stmt, loop_vinfo, NULL,
5151                                   &dummy_stmt, &dummy, &dt);
5152               loop_vec_def0 = vect_get_vec_def_for_stmt_copy (dt,
5153                                                               loop_vec_def0);
5154               vec_oprnds0[0] = loop_vec_def0;
5155               if (op_type == ternary_op)
5156                 {
5157                   vect_is_simple_use (op1, stmt, loop_vinfo, NULL, &dummy_stmt,
5158                                       &dummy, &dt);
5159                   loop_vec_def1 = vect_get_vec_def_for_stmt_copy (dt,
5160                                                                 loop_vec_def1);
5161                   vec_oprnds1[0] = loop_vec_def1;
5162                 }
5163             }
5164
5165           if (single_defuse_cycle)
5166             reduc_def = gimple_assign_lhs (new_stmt);
5167
5168           STMT_VINFO_RELATED_STMT (prev_phi_info) = new_phi;
5169         }
5170
5171       FOR_EACH_VEC_ELT (vec_oprnds0, i, def0)
5172         {
5173           if (slp_node)
5174             reduc_def = PHI_RESULT (phis[i]);
5175           else
5176             {
5177               if (!single_defuse_cycle || j == 0)
5178                 reduc_def = PHI_RESULT (new_phi);
5179             }
5180
5181           def1 = ((op_type == ternary_op)
5182                   ? vec_oprnds1[i] : NULL);
5183           if (op_type == binary_op)
5184             {
5185               if (reduc_index == 0)
5186                 expr = build2 (code, vectype_out, reduc_def, def0);
5187               else
5188                 expr = build2 (code, vectype_out, def0, reduc_def);
5189             }
5190           else
5191             {
5192               if (reduc_index == 0)
5193                 expr = build3 (code, vectype_out, reduc_def, def0, def1);
5194               else
5195                 {
5196                   if (reduc_index == 1)
5197                     expr = build3 (code, vectype_out, def0, reduc_def, def1);
5198                   else
5199                     expr = build3 (code, vectype_out, def0, def1, reduc_def);
5200                 }
5201             }
5202
5203           new_stmt = gimple_build_assign (vec_dest, expr);
5204           new_temp = make_ssa_name (vec_dest, new_stmt);
5205           gimple_assign_set_lhs (new_stmt, new_temp);
5206           vect_finish_stmt_generation (stmt, new_stmt, gsi);
5207
5208           if (slp_node)
5209             {
5210               SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5211               vect_defs.quick_push (new_temp);
5212             }
5213           else
5214             vect_defs[0] = new_temp;
5215         }
5216
5217       if (slp_node)
5218         continue;
5219
5220       if (j == 0)
5221         STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5222       else
5223         STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5224
5225       prev_stmt_info = vinfo_for_stmt (new_stmt);
5226       prev_phi_info = vinfo_for_stmt (new_phi);
5227     }
5228
5229   /* Finalize the reduction-phi (set its arguments) and create the
5230      epilog reduction code.  */
5231   if ((!single_defuse_cycle || code == COND_EXPR) && !slp_node)
5232     {
5233       new_temp = gimple_assign_lhs (*vec_stmt);
5234       vect_defs[0] = new_temp;
5235     }
5236
5237   vect_create_epilog_for_reduction (vect_defs, stmt, epilog_copies,
5238                                     epilog_reduc_code, phis, reduc_index,
5239                                     double_reduc, slp_node);
5240
5241   phis.release ();
5242   vect_defs.release ();
5243   vec_oprnds0.release ();
5244   vec_oprnds1.release ();
5245
5246   return true;
5247 }
5248
5249 /* Function vect_min_worthwhile_factor.
5250
5251    For a loop where we could vectorize the operation indicated by CODE,
5252    return the minimum vectorization factor that makes it worthwhile
5253    to use generic vectors.  */
5254 int
5255 vect_min_worthwhile_factor (enum tree_code code)
5256 {
5257   switch (code)
5258     {
5259     case PLUS_EXPR:
5260     case MINUS_EXPR:
5261     case NEGATE_EXPR:
5262       return 4;
5263
5264     case BIT_AND_EXPR:
5265     case BIT_IOR_EXPR:
5266     case BIT_XOR_EXPR:
5267     case BIT_NOT_EXPR:
5268       return 2;
5269
5270     default:
5271       return INT_MAX;
5272     }
5273 }
5274
5275
5276 /* Function vectorizable_induction
5277
5278    Check if PHI performs an induction computation that can be vectorized.
5279    If VEC_STMT is also passed, vectorize the induction PHI: create a vectorized
5280    phi to replace it, put it in VEC_STMT, and add it to the same basic block.
5281    Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
5282
5283 bool
5284 vectorizable_induction (gimple phi, gimple_stmt_iterator *gsi ATTRIBUTE_UNUSED,
5285                         gimple *vec_stmt)
5286 {
5287   stmt_vec_info stmt_info = vinfo_for_stmt (phi);
5288   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5289   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5290   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5291   int nunits = TYPE_VECTOR_SUBPARTS (vectype);
5292   int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
5293   tree vec_def;
5294
5295   gcc_assert (ncopies >= 1);
5296   /* FORNOW. These restrictions should be relaxed.  */
5297   if (nested_in_vect_loop_p (loop, phi))
5298     {
5299       imm_use_iterator imm_iter;
5300       use_operand_p use_p;
5301       gimple exit_phi;
5302       edge latch_e;
5303       tree loop_arg;
5304
5305       if (ncopies > 1)
5306         {
5307           if (dump_enabled_p ())
5308             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5309                              "multiple types in nested loop.");
5310           return false;
5311         }
5312
5313       exit_phi = NULL;
5314       latch_e = loop_latch_edge (loop->inner);
5315       loop_arg = PHI_ARG_DEF_FROM_EDGE (phi, latch_e);
5316       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, loop_arg)
5317         {
5318           if (!flow_bb_inside_loop_p (loop->inner,
5319                                       gimple_bb (USE_STMT (use_p))))
5320             {
5321               exit_phi = USE_STMT (use_p);
5322               break;
5323             }
5324         }
5325       if (exit_phi)
5326         {
5327           stmt_vec_info exit_phi_vinfo  = vinfo_for_stmt (exit_phi);
5328           if (!(STMT_VINFO_RELEVANT_P (exit_phi_vinfo)
5329                 && !STMT_VINFO_LIVE_P (exit_phi_vinfo)))
5330             {
5331               if (dump_enabled_p ())
5332                 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5333                                  "inner-loop induction only used outside "
5334                                  "of the outer vectorized loop.");
5335               return false;
5336             }
5337         }
5338     }
5339
5340   if (!STMT_VINFO_RELEVANT_P (stmt_info))
5341     return false;
5342
5343   /* FORNOW: SLP not supported.  */
5344   if (STMT_SLP_TYPE (stmt_info))
5345     return false;
5346
5347   gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def);
5348
5349   if (gimple_code (phi) != GIMPLE_PHI)
5350     return false;
5351
5352   if (!vec_stmt) /* transformation not required.  */
5353     {
5354       STMT_VINFO_TYPE (stmt_info) = induc_vec_info_type;
5355       if (dump_enabled_p ())
5356         dump_printf_loc (MSG_NOTE, vect_location,
5357                          "=== vectorizable_induction ===");
5358       vect_model_induction_cost (stmt_info, ncopies);
5359       return true;
5360     }
5361
5362   /** Transform.  **/
5363
5364   if (dump_enabled_p ())
5365     dump_printf_loc (MSG_NOTE, vect_location, "transform induction phi.");
5366
5367   vec_def = get_initial_def_for_induction (phi);
5368   *vec_stmt = SSA_NAME_DEF_STMT (vec_def);
5369   return true;
5370 }
5371
5372 /* Function vectorizable_live_operation.
5373
5374    STMT computes a value that is used outside the loop.  Check if
5375    it can be supported.  */
5376
5377 bool
5378 vectorizable_live_operation (gimple stmt,
5379                              gimple_stmt_iterator *gsi ATTRIBUTE_UNUSED,
5380                              gimple *vec_stmt)
5381 {
5382   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5383   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5384   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5385   int i;
5386   int op_type;
5387   tree op;
5388   tree def;
5389   gimple def_stmt;
5390   enum vect_def_type dt;
5391   enum tree_code code;
5392   enum gimple_rhs_class rhs_class;
5393
5394   gcc_assert (STMT_VINFO_LIVE_P (stmt_info));
5395
5396   if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def)
5397     return false;
5398
5399   if (!is_gimple_assign (stmt))
5400     {
5401       if (gimple_call_internal_p (stmt)
5402           && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE
5403           && gimple_call_lhs (stmt)
5404           && loop->simduid
5405           && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
5406           && loop->simduid
5407              == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
5408         {
5409           edge e = single_exit (loop);
5410           basic_block merge_bb = e->dest;
5411           imm_use_iterator imm_iter;
5412           use_operand_p use_p;
5413           tree lhs = gimple_call_lhs (stmt);
5414
5415           FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
5416             {
5417               gimple use_stmt = USE_STMT (use_p);
5418               if (gimple_code (use_stmt) == GIMPLE_PHI
5419                   || gimple_bb (use_stmt) == merge_bb)
5420                 {
5421                   if (vec_stmt)
5422                     {
5423                       tree vfm1
5424                         = build_int_cst (unsigned_type_node,
5425                                          loop_vinfo->vectorization_factor - 1);
5426                       SET_PHI_ARG_DEF (use_stmt, e->dest_idx, vfm1);
5427                     }
5428                   return true;
5429                 }
5430             }
5431         }
5432
5433       return false;
5434     }
5435
5436   if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
5437     return false;
5438
5439   /* FORNOW. CHECKME. */
5440   if (nested_in_vect_loop_p (loop, stmt))
5441     return false;
5442
5443   code = gimple_assign_rhs_code (stmt);
5444   op_type = TREE_CODE_LENGTH (code);
5445   rhs_class = get_gimple_rhs_class (code);
5446   gcc_assert (rhs_class != GIMPLE_UNARY_RHS || op_type == unary_op);
5447   gcc_assert (rhs_class != GIMPLE_BINARY_RHS || op_type == binary_op);
5448
5449   /* FORNOW: support only if all uses are invariant.  This means
5450      that the scalar operations can remain in place, unvectorized.
5451      The original last scalar value that they compute will be used.  */
5452
5453   for (i = 0; i < op_type; i++)
5454     {
5455       if (rhs_class == GIMPLE_SINGLE_RHS)
5456         op = TREE_OPERAND (gimple_op (stmt, 1), i);
5457       else
5458         op = gimple_op (stmt, i + 1);
5459       if (op
5460           && !vect_is_simple_use (op, stmt, loop_vinfo, NULL, &def_stmt, &def,
5461                                   &dt))
5462         {
5463           if (dump_enabled_p ())
5464             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5465                              "use not simple.");
5466           return false;
5467         }
5468
5469       if (dt != vect_external_def && dt != vect_constant_def)
5470         return false;
5471     }
5472
5473   /* No transformation is required for the cases we currently support.  */
5474   return true;
5475 }
5476
5477 /* Kill any debug uses outside LOOP of SSA names defined in STMT.  */
5478
5479 static void
5480 vect_loop_kill_debug_uses (struct loop *loop, gimple stmt)
5481 {
5482   ssa_op_iter op_iter;
5483   imm_use_iterator imm_iter;
5484   def_operand_p def_p;
5485   gimple ustmt;
5486
5487   FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
5488     {
5489       FOR_EACH_IMM_USE_STMT (ustmt, imm_iter, DEF_FROM_PTR (def_p))
5490         {
5491           basic_block bb;
5492
5493           if (!is_gimple_debug (ustmt))
5494             continue;
5495
5496           bb = gimple_bb (ustmt);
5497
5498           if (!flow_bb_inside_loop_p (loop, bb))
5499             {
5500               if (gimple_debug_bind_p (ustmt))
5501                 {
5502                   if (dump_enabled_p ())
5503                     dump_printf_loc (MSG_NOTE, vect_location,
5504                                      "killing debug use");
5505
5506                   gimple_debug_bind_reset_value (ustmt);
5507                   update_stmt (ustmt);
5508                 }
5509               else
5510                 gcc_unreachable ();
5511             }
5512         }
5513     }
5514 }
5515
5516 /* Function vect_transform_loop.
5517
5518    The analysis phase has determined that the loop is vectorizable.
5519    Vectorize the loop - created vectorized stmts to replace the scalar
5520    stmts in the loop, and update the loop exit condition.  */
5521
5522 void
5523 vect_transform_loop (loop_vec_info loop_vinfo)
5524 {
5525   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5526   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
5527   int nbbs = loop->num_nodes;
5528   gimple_stmt_iterator si;
5529   int i;
5530   tree ratio = NULL;
5531   int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
5532   bool grouped_store;
5533   bool slp_scheduled = false;
5534   unsigned int nunits;
5535   gimple stmt, pattern_stmt;
5536   gimple_seq pattern_def_seq = NULL;
5537   gimple_stmt_iterator pattern_def_si = gsi_none ();
5538   bool transform_pattern_stmt = false;
5539   bool check_profitability = false;
5540   int th;
5541   /* Record number of iterations before we started tampering with the profile. */
5542   gcov_type expected_iterations = expected_loop_iterations_unbounded (loop);
5543
5544   if (dump_enabled_p ())
5545     dump_printf_loc (MSG_NOTE, vect_location, "=== vec_transform_loop ===");
5546
5547   /* If profile is inprecise, we have chance to fix it up.  */
5548   if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
5549     expected_iterations = LOOP_VINFO_INT_NITERS (loop_vinfo);
5550
5551   /* Use the more conservative vectorization threshold.  If the number
5552      of iterations is constant assume the cost check has been performed
5553      by our caller.  If the threshold makes all loops profitable that
5554      run at least the vectorization factor number of times checking
5555      is pointless, too.  */
5556   th = ((PARAM_VALUE (PARAM_MIN_VECT_LOOP_BOUND)
5557          * LOOP_VINFO_VECT_FACTOR (loop_vinfo)) - 1);
5558   th = MAX (th, LOOP_VINFO_COST_MODEL_MIN_ITERS (loop_vinfo));
5559   if (th >= LOOP_VINFO_VECT_FACTOR (loop_vinfo) - 1
5560       && !LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
5561     {
5562       if (dump_enabled_p ())
5563         dump_printf_loc (MSG_NOTE, vect_location,
5564                          "Profitability threshold is %d loop iterations.", th);
5565       check_profitability = true;
5566     }
5567
5568   /* Version the loop first, if required, so the profitability check
5569      comes first.  */
5570
5571   if (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo)
5572       || LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo))
5573     {
5574       vect_loop_versioning (loop_vinfo, th, check_profitability);
5575       check_profitability = false;
5576     }
5577
5578   /* Peel the loop if there are data refs with unknown alignment.
5579      Only one data ref with unknown store is allowed.  */
5580
5581   if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo))
5582     {
5583       vect_do_peeling_for_alignment (loop_vinfo, th, check_profitability);
5584       check_profitability = false;
5585     }
5586
5587   /* If the loop has a symbolic number of iterations 'n' (i.e. it's not a
5588      compile time constant), or it is a constant that doesn't divide by the
5589      vectorization factor, then an epilog loop needs to be created.
5590      We therefore duplicate the loop: the original loop will be vectorized,
5591      and will compute the first (n/VF) iterations.  The second copy of the loop
5592      will remain scalar and will compute the remaining (n%VF) iterations.
5593      (VF is the vectorization factor).  */
5594
5595   if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
5596        || (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
5597            && LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0)
5598        || LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
5599     vect_do_peeling_for_loop_bound (loop_vinfo, &ratio,
5600                                     th, check_profitability);
5601   else
5602     ratio = build_int_cst (TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo)),
5603                 LOOP_VINFO_INT_NITERS (loop_vinfo) / vectorization_factor);
5604
5605   /* 1) Make sure the loop header has exactly two entries
5606      2) Make sure we have a preheader basic block.  */
5607
5608   gcc_assert (EDGE_COUNT (loop->header->preds) == 2);
5609
5610   split_edge (loop_preheader_edge (loop));
5611
5612   /* FORNOW: the vectorizer supports only loops which body consist
5613      of one basic block (header + empty latch). When the vectorizer will
5614      support more involved loop forms, the order by which the BBs are
5615      traversed need to be reconsidered.  */
5616
5617   for (i = 0; i < nbbs; i++)
5618     {
5619       basic_block bb = bbs[i];
5620       stmt_vec_info stmt_info;
5621       gimple phi;
5622
5623       for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
5624         {
5625           phi = gsi_stmt (si);
5626           if (dump_enabled_p ())
5627             {
5628               dump_printf_loc (MSG_NOTE, vect_location,
5629                                "------>vectorizing phi: ");
5630               dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
5631             }
5632           stmt_info = vinfo_for_stmt (phi);
5633           if (!stmt_info)
5634             continue;
5635
5636           if (MAY_HAVE_DEBUG_STMTS && !STMT_VINFO_LIVE_P (stmt_info))
5637             vect_loop_kill_debug_uses (loop, phi);
5638
5639           if (!STMT_VINFO_RELEVANT_P (stmt_info)
5640               && !STMT_VINFO_LIVE_P (stmt_info))
5641             continue;
5642
5643           if ((TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info))
5644                 != (unsigned HOST_WIDE_INT) vectorization_factor)
5645               && dump_enabled_p ())
5646             dump_printf_loc (MSG_NOTE, vect_location, "multiple-types.");
5647
5648           if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def)
5649             {
5650               if (dump_enabled_p ())
5651                 dump_printf_loc (MSG_NOTE, vect_location, "transform phi.");
5652               vect_transform_stmt (phi, NULL, NULL, NULL, NULL);
5653             }
5654         }
5655
5656       pattern_stmt = NULL;
5657       for (si = gsi_start_bb (bb); !gsi_end_p (si) || transform_pattern_stmt;)
5658         {
5659           bool is_store;
5660
5661           if (transform_pattern_stmt)
5662             stmt = pattern_stmt;
5663           else
5664             {
5665               stmt = gsi_stmt (si);
5666               /* During vectorization remove existing clobber stmts.  */
5667               if (gimple_clobber_p (stmt))
5668                 {
5669                   unlink_stmt_vdef (stmt);
5670                   gsi_remove (&si, true);
5671                   release_defs (stmt);
5672                   continue;
5673                 }
5674             }
5675
5676           if (dump_enabled_p ())
5677             {
5678               dump_printf_loc (MSG_NOTE, vect_location,
5679                                "------>vectorizing statement: ");
5680               dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
5681             }
5682
5683           stmt_info = vinfo_for_stmt (stmt);
5684
5685           /* vector stmts created in the outer-loop during vectorization of
5686              stmts in an inner-loop may not have a stmt_info, and do not
5687              need to be vectorized.  */
5688           if (!stmt_info)
5689             {
5690               gsi_next (&si);
5691               continue;
5692             }
5693
5694           if (MAY_HAVE_DEBUG_STMTS && !STMT_VINFO_LIVE_P (stmt_info))
5695             vect_loop_kill_debug_uses (loop, stmt);
5696
5697           if (!STMT_VINFO_RELEVANT_P (stmt_info)
5698               && !STMT_VINFO_LIVE_P (stmt_info))
5699             {
5700               if (STMT_VINFO_IN_PATTERN_P (stmt_info)
5701                   && (pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info))
5702                   && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
5703                       || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
5704                 {
5705                   stmt = pattern_stmt;
5706                   stmt_info = vinfo_for_stmt (stmt);
5707                 }
5708               else
5709                 {
5710                   gsi_next (&si);
5711                   continue;
5712                 }
5713             }
5714           else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
5715                    && (pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info))
5716                    && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
5717                        || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
5718             transform_pattern_stmt = true;
5719
5720           /* If pattern statement has def stmts, vectorize them too.  */
5721           if (is_pattern_stmt_p (stmt_info))
5722             {
5723               if (pattern_def_seq == NULL)
5724                 {
5725                   pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info);
5726                   pattern_def_si = gsi_start (pattern_def_seq);
5727                 }
5728               else if (!gsi_end_p (pattern_def_si))
5729                 gsi_next (&pattern_def_si);
5730               if (pattern_def_seq != NULL)
5731                 {
5732                   gimple pattern_def_stmt = NULL;
5733                   stmt_vec_info pattern_def_stmt_info = NULL;
5734
5735                   while (!gsi_end_p (pattern_def_si))
5736                     {
5737                       pattern_def_stmt = gsi_stmt (pattern_def_si);
5738                       pattern_def_stmt_info
5739                         = vinfo_for_stmt (pattern_def_stmt);
5740                       if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info)
5741                           || STMT_VINFO_LIVE_P (pattern_def_stmt_info))
5742                         break;
5743                       gsi_next (&pattern_def_si);
5744                     }
5745
5746                   if (!gsi_end_p (pattern_def_si))
5747                     {
5748                       if (dump_enabled_p ())
5749                         {
5750                           dump_printf_loc (MSG_NOTE, vect_location,
5751                                            "==> vectorizing pattern def "
5752                                            "stmt: ");
5753                           dump_gimple_stmt (MSG_NOTE, TDF_SLIM,
5754                                             pattern_def_stmt, 0);
5755                         }
5756
5757                       stmt = pattern_def_stmt;
5758                       stmt_info = pattern_def_stmt_info;
5759                     }
5760                   else
5761                     {
5762                       pattern_def_si = gsi_none ();
5763                       transform_pattern_stmt = false;
5764                     }
5765                 }
5766               else
5767                 transform_pattern_stmt = false;
5768             }
5769
5770           gcc_assert (STMT_VINFO_VECTYPE (stmt_info));
5771           nunits = (unsigned int) TYPE_VECTOR_SUBPARTS (
5772                                                STMT_VINFO_VECTYPE (stmt_info));
5773           if (!STMT_SLP_TYPE (stmt_info)
5774               && nunits != (unsigned int) vectorization_factor
5775               && dump_enabled_p ())
5776             /* For SLP VF is set according to unrolling factor, and not to
5777                vector size, hence for SLP this print is not valid.  */
5778             dump_printf_loc (MSG_NOTE, vect_location,
5779                              "multiple-types.");
5780
5781           /* SLP. Schedule all the SLP instances when the first SLP stmt is
5782              reached.  */
5783           if (STMT_SLP_TYPE (stmt_info))
5784             {
5785               if (!slp_scheduled)
5786                 {
5787                   slp_scheduled = true;
5788
5789                   if (dump_enabled_p ())
5790                     dump_printf_loc (MSG_NOTE, vect_location,
5791                                      "=== scheduling SLP instances ===");
5792
5793                   vect_schedule_slp (loop_vinfo, NULL);
5794                 }
5795
5796               /* Hybrid SLP stmts must be vectorized in addition to SLP.  */
5797               if (!vinfo_for_stmt (stmt) || PURE_SLP_STMT (stmt_info))
5798                 {
5799                   if (!transform_pattern_stmt && gsi_end_p (pattern_def_si))
5800                     {
5801                       pattern_def_seq = NULL;
5802                       gsi_next (&si);
5803                     }
5804                   continue;
5805                 }
5806             }
5807
5808           /* -------- vectorize statement ------------ */
5809           if (dump_enabled_p ())
5810             dump_printf_loc (MSG_NOTE, vect_location, "transform statement.");
5811
5812           grouped_store = false;
5813           is_store = vect_transform_stmt (stmt, &si, &grouped_store, NULL, NULL);
5814           if (is_store)
5815             {
5816               if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
5817                 {
5818                   /* Interleaving. If IS_STORE is TRUE, the vectorization of the
5819                      interleaving chain was completed - free all the stores in
5820                      the chain.  */
5821                   gsi_next (&si);
5822                   vect_remove_stores (GROUP_FIRST_ELEMENT (stmt_info));
5823                   continue;
5824                 }
5825               else
5826                 {
5827                   /* Free the attached stmt_vec_info and remove the stmt.  */
5828                   gimple store = gsi_stmt (si);
5829                   free_stmt_vec_info (store);
5830                   unlink_stmt_vdef (store);
5831                   gsi_remove (&si, true);
5832                   release_defs (store);
5833                   continue;
5834                 }
5835             }
5836
5837           if (!transform_pattern_stmt && gsi_end_p (pattern_def_si))
5838             {
5839               pattern_def_seq = NULL;
5840               gsi_next (&si);
5841             }
5842         }                       /* stmts in BB */
5843     }                           /* BBs in loop */
5844
5845   slpeel_make_loop_iterate_ntimes (loop, ratio);
5846
5847   /* Reduce loop iterations by the vectorization factor.  */
5848   scale_loop_profile (loop, GCOV_COMPUTE_SCALE (1, vectorization_factor),
5849                       expected_iterations / vectorization_factor);
5850   loop->nb_iterations_upper_bound
5851     = loop->nb_iterations_upper_bound.udiv (double_int::from_uhwi (vectorization_factor),
5852                                             FLOOR_DIV_EXPR);
5853   if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
5854       && loop->nb_iterations_upper_bound != double_int_zero)
5855     loop->nb_iterations_upper_bound = loop->nb_iterations_upper_bound - double_int_one;
5856   if (loop->any_estimate)
5857     {
5858       loop->nb_iterations_estimate
5859         = loop->nb_iterations_estimate.udiv (double_int::from_uhwi (vectorization_factor),
5860                                              FLOOR_DIV_EXPR);
5861        if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
5862            && loop->nb_iterations_estimate != double_int_zero)
5863          loop->nb_iterations_estimate = loop->nb_iterations_estimate - double_int_one;
5864     }
5865
5866   if (dump_enabled_p ())
5867     {
5868       dump_printf_loc (MSG_NOTE, vect_location,
5869                        "LOOP VECTORIZED\n");
5870       if (loop->inner)
5871         dump_printf_loc (MSG_NOTE, vect_location,
5872                          "OUTER LOOP VECTORIZED\n");
5873     }
5874 }