gcc/tree-vect-loop.c

   1 /* Loop Vectorization
   2    Copyright (C) 2003-2016 Free Software Foundation, Inc.
   3    Contributed by Dorit Naishlos <dorit@il.ibm.com> and
   4    Ira Rosen <irar@il.ibm.com>
   5
   6 This file is part of GCC.
   7
   8 GCC is free software; you can redistribute it and/or modify it under
   9 the terms of the GNU General Public License as published by the Free
  10 Software Foundation; either version 3, or (at your option) any later
  11 version.
  12
  13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  15 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  16 for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with GCC; see the file COPYING3.  If not see
  20 <http://www.gnu.org/licenses/>.  */
  21
  22 #include "config.h"
  23 #include "system.h"
  24 #include "coretypes.h"
  25 #include "backend.h"
  26 #include "target.h"
  27 #include "rtl.h"
  28 #include "tree.h"
  29 #include "gimple.h"
  30 #include "cfghooks.h"
  31 #include "tree-pass.h"
  32 #include "ssa.h"
  33 #include "optabs-tree.h"
  34 #include "diagnostic-core.h"
  35 #include "fold-const.h"
  36 #include "stor-layout.h"
  37 #include "cfganal.h"
  38 #include "gimplify.h"
  39 #include "gimple-iterator.h"
  40 #include "gimplify-me.h"
  41 #include "tree-ssa-loop-ivopts.h"
  42 #include "tree-ssa-loop-manip.h"
  43 #include "tree-ssa-loop-niter.h"
  44 #include "cfgloop.h"
  45 #include "params.h"
  46 #include "tree-scalar-evolution.h"
  47 #include "tree-vectorizer.h"
  48 #include "gimple-fold.h"
  49 #include "cgraph.h"
  50
  51 /* Loop Vectorization Pass.
  52
  53    This pass tries to vectorize loops.
  54
  55    For example, the vectorizer transforms the following simple loop:
  56
  57         short a[N]; short b[N]; short c[N]; int i;
  58
  59         for (i=0; i<N; i++){
  60           a[i] = b[i] + c[i];
  61         }
  62
  63    as if it was manually vectorized by rewriting the source code into:
  64
  65         typedef int __attribute__((mode(V8HI))) v8hi;
  66         short a[N];  short b[N]; short c[N];   int i;
  67         v8hi *pa = (v8hi*)a, *pb = (v8hi*)b, *pc = (v8hi*)c;
  68         v8hi va, vb, vc;
  69
  70         for (i=0; i<N/8; i++){
  71           vb = pb[i];
  72           vc = pc[i];
  73           va = vb + vc;
  74           pa[i] = va;
  75         }
  76
  77         The main entry to this pass is vectorize_loops(), in which
  78    the vectorizer applies a set of analyses on a given set of loops,
  79    followed by the actual vectorization transformation for the loops that
  80    had successfully passed the analysis phase.
  81         Throughout this pass we make a distinction between two types of
  82    data: scalars (which are represented by SSA_NAMES), and memory references
  83    ("data-refs").  These two types of data require different handling both
  84    during analysis and transformation. The types of data-refs that the
  85    vectorizer currently supports are ARRAY_REFS which base is an array DECL
  86    (not a pointer), and INDIRECT_REFS through pointers; both array and pointer
  87    accesses are required to have a simple (consecutive) access pattern.
  88
  89    Analysis phase:
  90    ===============
  91         The driver for the analysis phase is vect_analyze_loop().
  92    It applies a set of analyses, some of which rely on the scalar evolution
  93    analyzer (scev) developed by Sebastian Pop.
  94
  95         During the analysis phase the vectorizer records some information
  96    per stmt in a "stmt_vec_info" struct which is attached to each stmt in the
  97    loop, as well as general information about the loop as a whole, which is
  98    recorded in a "loop_vec_info" struct attached to each loop.
  99
 100    Transformation phase:
 101    =====================
 102         The loop transformation phase scans all the stmts in the loop, and
 103    creates a vector stmt (or a sequence of stmts) for each scalar stmt S in
 104    the loop that needs to be vectorized.  It inserts the vector code sequence
 105    just before the scalar stmt S, and records a pointer to the vector code
 106    in STMT_VINFO_VEC_STMT (stmt_info) (stmt_info is the stmt_vec_info struct
 107    attached to S).  This pointer will be used for the vectorization of following
 108    stmts which use the def of stmt S. Stmt S is removed if it writes to memory;
 109    otherwise, we rely on dead code elimination for removing it.
 110
 111         For example, say stmt S1 was vectorized into stmt VS1:
 112
 113    VS1: vb = px[i];
 114    S1:  b = x[i];    STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
 115    S2:  a = b;
 116
 117    To vectorize stmt S2, the vectorizer first finds the stmt that defines
 118    the operand 'b' (S1), and gets the relevant vector def 'vb' from the
 119    vector stmt VS1 pointed to by STMT_VINFO_VEC_STMT (stmt_info (S1)).  The
 120    resulting sequence would be:
 121
 122    VS1: vb = px[i];
 123    S1:  b = x[i];       STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
 124    VS2: va = vb;
 125    S2:  a = b;          STMT_VINFO_VEC_STMT (stmt_info (S2)) = VS2
 126
 127         Operands that are not SSA_NAMEs, are data-refs that appear in
 128    load/store operations (like 'x[i]' in S1), and are handled differently.
 129
 130    Target modeling:
 131    =================
 132         Currently the only target specific information that is used is the
 133    size of the vector (in bytes) - "TARGET_VECTORIZE_UNITS_PER_SIMD_WORD".
 134    Targets that can support different sizes of vectors, for now will need
 135    to specify one value for "TARGET_VECTORIZE_UNITS_PER_SIMD_WORD".  More
 136    flexibility will be added in the future.
 137
 138         Since we only vectorize operations which vector form can be
 139    expressed using existing tree codes, to verify that an operation is
 140    supported, the vectorizer checks the relevant optab at the relevant
 141    machine_mode (e.g, optab_handler (add_optab, V8HImode)).  If
 142    the value found is CODE_FOR_nothing, then there's no target support, and
 143    we can't vectorize the stmt.
 144
 145    For additional information on this project see:
 146    http://gcc.gnu.org/projects/tree-ssa/vectorization.html
 147 */
 148
 149 static void vect_estimate_min_profitable_iters (loop_vec_info, int *, int *);
 150
 151 /* Function vect_determine_vectorization_factor
 152
 153    Determine the vectorization factor (VF).  VF is the number of data elements
 154    that are operated upon in parallel in a single iteration of the vectorized
 155    loop.  For example, when vectorizing a loop that operates on 4byte elements,
 156    on a target with vector size (VS) 16byte, the VF is set to 4, since 4
 157    elements can fit in a single vector register.
 158
 159    We currently support vectorization of loops in which all types operated upon
 160    are of the same size.  Therefore this function currently sets VF according to
 161    the size of the types operated upon, and fails if there are multiple sizes
 162    in the loop.
 163
 164    VF is also the factor by which the loop iterations are strip-mined, e.g.:
 165    original loop:
 166         for (i=0; i<N; i++){
 167           a[i] = b[i] + c[i];
 168         }
 169
 170    vectorized loop:
 171         for (i=0; i<N; i+=VF){
 172           a[i:VF] = b[i:VF] + c[i:VF];
 173         }
 174 */
 175
 176 static bool
 177 vect_determine_vectorization_factor (loop_vec_info loop_vinfo)
 178 {
 179   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
 180   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
 181   unsigned nbbs = loop->num_nodes;
 182   unsigned int vectorization_factor = 0;
 183   tree scalar_type;
 184   gphi *phi;
 185   tree vectype;
 186   unsigned int nunits;
 187   stmt_vec_info stmt_info;
 188   unsigned i;
 189   HOST_WIDE_INT dummy;
 190   gimple *stmt, *pattern_stmt = NULL;
 191   gimple_seq pattern_def_seq = NULL;
 192   gimple_stmt_iterator pattern_def_si = gsi_none ();
 193   bool analyze_pattern_stmt = false;
 194   bool bool_result;
 195   auto_vec<stmt_vec_info> mask_producers;
 196
 197   if (dump_enabled_p ())
 198     dump_printf_loc (MSG_NOTE, vect_location,
 199                      "=== vect_determine_vectorization_factor ===\n");
 200
 201   for (i = 0; i < nbbs; i++)
 202     {
 203       basic_block bb = bbs[i];
 204
 205       for (gphi_iterator si = gsi_start_phis (bb); !gsi_end_p (si);
 206            gsi_next (&si))
 207         {
 208           phi = si.phi ();
 209           stmt_info = vinfo_for_stmt (phi);
 210           if (dump_enabled_p ())
 211             {
 212               dump_printf_loc (MSG_NOTE, vect_location, "==> examining phi: ");
 213               dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
 214               dump_printf (MSG_NOTE, "\n");
 215             }
 216
 217           gcc_assert (stmt_info);
 218
 219           if (STMT_VINFO_RELEVANT_P (stmt_info))
 220             {
 221               gcc_assert (!STMT_VINFO_VECTYPE (stmt_info));
 222               scalar_type = TREE_TYPE (PHI_RESULT (phi));
 223
 224               if (dump_enabled_p ())
 225                 {
 226                   dump_printf_loc (MSG_NOTE, vect_location,
 227                                    "get vectype for scalar type:  ");
 228                   dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
 229                   dump_printf (MSG_NOTE, "\n");
 230                 }
 231
 232               vectype = get_vectype_for_scalar_type (scalar_type);
 233               if (!vectype)
 234                 {
 235                   if (dump_enabled_p ())
 236                     {
 237                       dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 238                                        "not vectorized: unsupported "
 239                                        "data-type ");
 240                       dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
 241                                          scalar_type);
 242                       dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
 243                     }
 244                   return false;
 245                 }
 246               STMT_VINFO_VECTYPE (stmt_info) = vectype;
 247
 248               if (dump_enabled_p ())
 249                 {
 250                   dump_printf_loc (MSG_NOTE, vect_location, "vectype: ");
 251                   dump_generic_expr (MSG_NOTE, TDF_SLIM, vectype);
 252                   dump_printf (MSG_NOTE, "\n");
 253                 }
 254
 255               nunits = TYPE_VECTOR_SUBPARTS (vectype);
 256               if (dump_enabled_p ())
 257                 dump_printf_loc (MSG_NOTE, vect_location, "nunits = %d\n",
 258                                  nunits);
 259
 260               if (!vectorization_factor
 261                   || (nunits > vectorization_factor))
 262                 vectorization_factor = nunits;
 263             }
 264         }
 265
 266       for (gimple_stmt_iterator si = gsi_start_bb (bb);
 267            !gsi_end_p (si) || analyze_pattern_stmt;)
 268         {
 269           tree vf_vectype;
 270
 271           if (analyze_pattern_stmt)
 272             stmt = pattern_stmt;
 273           else
 274             stmt = gsi_stmt (si);
 275
 276           stmt_info = vinfo_for_stmt (stmt);
 277
 278           if (dump_enabled_p ())
 279             {
 280               dump_printf_loc (MSG_NOTE, vect_location,
 281                                "==> examining statement: ");
 282               dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
 283               dump_printf (MSG_NOTE, "\n");
 284             }
 285
 286           gcc_assert (stmt_info);
 287
 288           /* Skip stmts which do not need to be vectorized.  */
 289           if ((!STMT_VINFO_RELEVANT_P (stmt_info)
 290                && !STMT_VINFO_LIVE_P (stmt_info))
 291               || gimple_clobber_p (stmt))
 292             {
 293               if (STMT_VINFO_IN_PATTERN_P (stmt_info)
 294                   && (pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info))
 295                   && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
 296                       || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
 297                 {
 298                   stmt = pattern_stmt;
 299                   stmt_info = vinfo_for_stmt (pattern_stmt);
 300                   if (dump_enabled_p ())
 301                     {
 302                       dump_printf_loc (MSG_NOTE, vect_location,
 303                                        "==> examining pattern statement: ");
 304                       dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
 305                       dump_printf (MSG_NOTE, "\n");
 306                     }
 307                 }
 308               else
 309                 {
 310                   if (dump_enabled_p ())
 311                     dump_printf_loc (MSG_NOTE, vect_location, "skip.\n");
 312                   gsi_next (&si);
 313                   continue;
 314                 }
 315             }
 316           else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
 317                    && (pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info))
 318                    && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
 319                        || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
 320             analyze_pattern_stmt = true;
 321
 322           /* If a pattern statement has def stmts, analyze them too.  */
 323           if (is_pattern_stmt_p (stmt_info))
 324             {
 325               if (pattern_def_seq == NULL)
 326                 {
 327                   pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info);
 328                   pattern_def_si = gsi_start (pattern_def_seq);
 329                 }
 330               else if (!gsi_end_p (pattern_def_si))
 331                 gsi_next (&pattern_def_si);
 332               if (pattern_def_seq != NULL)
 333                 {
 334                   gimple *pattern_def_stmt = NULL;
 335                   stmt_vec_info pattern_def_stmt_info = NULL;
 336
 337                   while (!gsi_end_p (pattern_def_si))
 338                     {
 339                       pattern_def_stmt = gsi_stmt (pattern_def_si);
 340                       pattern_def_stmt_info
 341                         = vinfo_for_stmt (pattern_def_stmt);
 342                       if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info)
 343                           || STMT_VINFO_LIVE_P (pattern_def_stmt_info))
 344                         break;
 345                       gsi_next (&pattern_def_si);
 346                     }
 347
 348                   if (!gsi_end_p (pattern_def_si))
 349                     {
 350                       if (dump_enabled_p ())
 351                         {
 352                           dump_printf_loc (MSG_NOTE, vect_location,
 353                                            "==> examining pattern def stmt: ");
 354                           dump_gimple_stmt (MSG_NOTE, TDF_SLIM,
 355                                             pattern_def_stmt, 0);
 356                           dump_printf (MSG_NOTE, "\n");
 357                         }
 358
 359                       stmt = pattern_def_stmt;
 360                       stmt_info = pattern_def_stmt_info;
 361                     }
 362                   else
 363                     {
 364                       pattern_def_si = gsi_none ();
 365                       analyze_pattern_stmt = false;
 366                     }
 367                 }
 368               else
 369                 analyze_pattern_stmt = false;
 370             }
 371
 372           if (gimple_get_lhs (stmt) == NULL_TREE
 373               /* MASK_STORE has no lhs, but is ok.  */
 374               && (!is_gimple_call (stmt)
 375                   || !gimple_call_internal_p (stmt)
 376                   || gimple_call_internal_fn (stmt) != IFN_MASK_STORE))
 377             {
 378               if (is_gimple_call (stmt))
 379                 {
 380                   /* Ignore calls with no lhs.  These must be calls to
 381                      #pragma omp simd functions, and what vectorization factor
 382                      it really needs can't be determined until
 383                      vectorizable_simd_clone_call.  */
 384                   if (!analyze_pattern_stmt && gsi_end_p (pattern_def_si))
 385                     {
 386                       pattern_def_seq = NULL;
 387                       gsi_next (&si);
 388                     }
 389                   continue;
 390                 }
 391               if (dump_enabled_p ())
 392                 {
 393                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 394                                    "not vectorized: irregular stmt.");
 395                   dump_gimple_stmt (MSG_MISSED_OPTIMIZATION,  TDF_SLIM, stmt,
 396                                     0);
 397                   dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
 398                 }
 399               return false;
 400             }
 401
 402           if (VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))))
 403             {
 404               if (dump_enabled_p ())
 405                 {
 406                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 407                                    "not vectorized: vector stmt in loop:");
 408                   dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
 409                   dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
 410                 }
 411               return false;
 412             }
 413
 414           bool_result = false;
 415
 416           if (STMT_VINFO_VECTYPE (stmt_info))
 417             {
 418               /* The only case when a vectype had been already set is for stmts
 419                  that contain a dataref, or for "pattern-stmts" (stmts
 420                  generated by the vectorizer to represent/replace a certain
 421                  idiom).  */
 422               gcc_assert (STMT_VINFO_DATA_REF (stmt_info)
 423                           || is_pattern_stmt_p (stmt_info)
 424                           || !gsi_end_p (pattern_def_si));
 425               vectype = STMT_VINFO_VECTYPE (stmt_info);
 426             }
 427           else
 428             {
 429               gcc_assert (!STMT_VINFO_DATA_REF (stmt_info));
 430               if (is_gimple_call (stmt)
 431                   && gimple_call_internal_p (stmt)
 432                   && gimple_call_internal_fn (stmt) == IFN_MASK_STORE)
 433                 scalar_type = TREE_TYPE (gimple_call_arg (stmt, 3));
 434               else
 435                 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
 436
 437               /* Bool ops don't participate in vectorization factor
 438                  computation.  For comparison use compared types to
 439                  compute a factor.  */
 440               if (TREE_CODE (scalar_type) == BOOLEAN_TYPE)
 441                 {
 442                   if (STMT_VINFO_RELEVANT_P (stmt_info))
 443                     mask_producers.safe_push (stmt_info);
 444                   bool_result = true;
 445
 446                   if (gimple_code (stmt) == GIMPLE_ASSIGN
 447                       && TREE_CODE_CLASS (gimple_assign_rhs_code (stmt))
 448                          == tcc_comparison
 449                       && TREE_CODE (TREE_TYPE (gimple_assign_rhs1 (stmt)))
 450                          != BOOLEAN_TYPE)
 451                     scalar_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
 452                   else
 453                     {
 454                       if (!analyze_pattern_stmt && gsi_end_p (pattern_def_si))
 455                         {
 456                           pattern_def_seq = NULL;
 457                           gsi_next (&si);
 458                         }
 459                       continue;
 460                     }
 461                 }
 462
 463               if (dump_enabled_p ())
 464                 {
 465                   dump_printf_loc (MSG_NOTE, vect_location,
 466                                    "get vectype for scalar type:  ");
 467                   dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
 468                   dump_printf (MSG_NOTE, "\n");
 469                 }
 470               vectype = get_vectype_for_scalar_type (scalar_type);
 471               if (!vectype)
 472                 {
 473                   if (dump_enabled_p ())
 474                     {
 475                       dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 476                                        "not vectorized: unsupported "
 477                                        "data-type ");
 478                       dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
 479                                          scalar_type);
 480                       dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
 481                     }
 482                   return false;
 483                 }
 484
 485               if (!bool_result)
 486                 STMT_VINFO_VECTYPE (stmt_info) = vectype;
 487
 488               if (dump_enabled_p ())
 489                 {
 490                   dump_printf_loc (MSG_NOTE, vect_location, "vectype: ");
 491                   dump_generic_expr (MSG_NOTE, TDF_SLIM, vectype);
 492                   dump_printf (MSG_NOTE, "\n");
 493                 }
 494             }
 495
 496           /* Don't try to compute VF out scalar types if we stmt
 497              produces boolean vector.  Use result vectype instead.  */
 498           if (VECTOR_BOOLEAN_TYPE_P (vectype))
 499             vf_vectype = vectype;
 500           else
 501             {
 502               /* The vectorization factor is according to the smallest
 503                  scalar type (or the largest vector size, but we only
 504                  support one vector size per loop).  */
 505               if (!bool_result)
 506                 scalar_type = vect_get_smallest_scalar_type (stmt, &dummy,
 507                                                              &dummy);
 508               if (dump_enabled_p ())
 509                 {
 510                   dump_printf_loc (MSG_NOTE, vect_location,
 511                                    "get vectype for scalar type:  ");
 512                   dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
 513                   dump_printf (MSG_NOTE, "\n");
 514                 }
 515               vf_vectype = get_vectype_for_scalar_type (scalar_type);
 516             }
 517           if (!vf_vectype)
 518             {
 519               if (dump_enabled_p ())
 520                 {
 521                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 522                                    "not vectorized: unsupported data-type ");
 523                   dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
 524                                      scalar_type);
 525                   dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
 526                 }
 527               return false;
 528             }
 529
 530           if ((GET_MODE_SIZE (TYPE_MODE (vectype))
 531                != GET_MODE_SIZE (TYPE_MODE (vf_vectype))))
 532             {
 533               if (dump_enabled_p ())
 534                 {
 535                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 536                                    "not vectorized: different sized vector "
 537                                    "types in statement, ");
 538                   dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
 539                                      vectype);
 540                   dump_printf (MSG_MISSED_OPTIMIZATION, " and ");
 541                   dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
 542                                      vf_vectype);
 543                   dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
 544                 }
 545               return false;
 546             }
 547
 548           if (dump_enabled_p ())
 549             {
 550               dump_printf_loc (MSG_NOTE, vect_location, "vectype: ");
 551               dump_generic_expr (MSG_NOTE, TDF_SLIM, vf_vectype);
 552               dump_printf (MSG_NOTE, "\n");
 553             }
 554
 555           nunits = TYPE_VECTOR_SUBPARTS (vf_vectype);
 556           if (dump_enabled_p ())
 557             dump_printf_loc (MSG_NOTE, vect_location, "nunits = %d\n", nunits);
 558           if (!vectorization_factor
 559               || (nunits > vectorization_factor))
 560             vectorization_factor = nunits;
 561
 562           if (!analyze_pattern_stmt && gsi_end_p (pattern_def_si))
 563             {
 564               pattern_def_seq = NULL;
 565               gsi_next (&si);
 566             }
 567         }
 568     }
 569
 570   /* TODO: Analyze cost. Decide if worth while to vectorize.  */
 571   if (dump_enabled_p ())
 572     dump_printf_loc (MSG_NOTE, vect_location, "vectorization factor = %d\n",
 573                      vectorization_factor);
 574   if (vectorization_factor <= 1)
 575     {
 576       if (dump_enabled_p ())
 577         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 578                          "not vectorized: unsupported data-type\n");
 579       return false;
 580     }
 581   LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
 582
 583   for (i = 0; i < mask_producers.length (); i++)
 584     {
 585       tree mask_type = NULL;
 586
 587       stmt = STMT_VINFO_STMT (mask_producers[i]);
 588
 589       if (gimple_code (stmt) == GIMPLE_ASSIGN
 590           && TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) == tcc_comparison
 591           && TREE_CODE (TREE_TYPE (gimple_assign_rhs1 (stmt))) != BOOLEAN_TYPE)
 592         {
 593           scalar_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
 594           mask_type = get_mask_type_for_scalar_type (scalar_type);
 595
 596           if (!mask_type)
 597             {
 598               if (dump_enabled_p ())
 599                 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 600                                  "not vectorized: unsupported mask\n");
 601               return false;
 602             }
 603         }
 604       else
 605         {
 606           tree rhs;
 607           ssa_op_iter iter;
 608           gimple *def_stmt;
 609           enum vect_def_type dt;
 610
 611           FOR_EACH_SSA_TREE_OPERAND (rhs, stmt, iter, SSA_OP_USE)
 612             {
 613               if (!vect_is_simple_use (rhs, mask_producers[i]->vinfo,
 614                                        &def_stmt, &dt, &vectype))
 615                 {
 616                   if (dump_enabled_p ())
 617                     {
 618                       dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 619                                        "not vectorized: can't compute mask type "
 620                                        "for statement, ");
 621                       dump_gimple_stmt (MSG_MISSED_OPTIMIZATION,  TDF_SLIM, stmt,
 622                                         0);
 623                       dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
 624                     }
 625                   return false;
 626                 }
 627
 628               /* No vectype probably means external definition.
 629                  Allow it in case there is another operand which
 630                  allows to determine mask type.  */
 631               if (!vectype)
 632                 continue;
 633
 634               if (!mask_type)
 635                 mask_type = vectype;
 636               else if (TYPE_VECTOR_SUBPARTS (mask_type)
 637                        != TYPE_VECTOR_SUBPARTS (vectype))
 638                 {
 639                   if (dump_enabled_p ())
 640                     {
 641                       dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 642                                        "not vectorized: different sized masks "
 643                                        "types in statement, ");
 644                       dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
 645                                          mask_type);
 646                       dump_printf (MSG_MISSED_OPTIMIZATION, " and ");
 647                       dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
 648                                          vectype);
 649                       dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
 650                     }
 651                   return false;
 652                 }
 653               else if (VECTOR_BOOLEAN_TYPE_P (mask_type)
 654                        != VECTOR_BOOLEAN_TYPE_P (vectype))
 655                 {
 656                   if (dump_enabled_p ())
 657                     {
 658                       dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 659                                        "not vectorized: mixed mask and "
 660                                        "nonmask vector types in statement, ");
 661                       dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
 662                                          mask_type);
 663                       dump_printf (MSG_MISSED_OPTIMIZATION, " and ");
 664                       dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
 665                                          vectype);
 666                       dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
 667                     }
 668                   return false;
 669                 }
 670             }
 671
 672           /* We may compare boolean value loaded as vector of integers.
 673              Fix mask_type in such case.  */
 674           if (mask_type
 675               && !VECTOR_BOOLEAN_TYPE_P (mask_type)
 676               && gimple_code (stmt) == GIMPLE_ASSIGN
 677               && TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) == tcc_comparison)
 678             mask_type = build_same_sized_truth_vector_type (mask_type);
 679         }
 680
 681       /* No mask_type should mean loop invariant predicate.
 682          This is probably a subject for optimization in
 683          if-conversion.  */
 684       if (!mask_type)
 685         {
 686           if (dump_enabled_p ())
 687             {
 688               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 689                                "not vectorized: can't compute mask type "
 690                                "for statement, ");
 691               dump_gimple_stmt (MSG_MISSED_OPTIMIZATION,  TDF_SLIM, stmt,
 692                                 0);
 693               dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
 694             }
 695           return false;
 696         }
 697
 698       STMT_VINFO_VECTYPE (mask_producers[i]) = mask_type;
 699     }
 700
 701   return true;
 702 }
 703
 704
 705 /* Function vect_is_simple_iv_evolution.
 706
 707    FORNOW: A simple evolution of an induction variables in the loop is
 708    considered a polynomial evolution.  */
 709
 710 static bool
 711 vect_is_simple_iv_evolution (unsigned loop_nb, tree access_fn, tree * init,
 712                              tree * step)
 713 {
 714   tree init_expr;
 715   tree step_expr;
 716   tree evolution_part = evolution_part_in_loop_num (access_fn, loop_nb);
 717   basic_block bb;
 718
 719   /* When there is no evolution in this loop, the evolution function
 720      is not "simple".  */
 721   if (evolution_part == NULL_TREE)
 722     return false;
 723
 724   /* When the evolution is a polynomial of degree >= 2
 725      the evolution function is not "simple".  */
 726   if (tree_is_chrec (evolution_part))
 727     return false;
 728
 729   step_expr = evolution_part;
 730   init_expr = unshare_expr (initial_condition_in_loop_num (access_fn, loop_nb));
 731
 732   if (dump_enabled_p ())
 733     {
 734       dump_printf_loc (MSG_NOTE, vect_location, "step: ");
 735       dump_generic_expr (MSG_NOTE, TDF_SLIM, step_expr);
 736       dump_printf (MSG_NOTE, ",  init: ");
 737       dump_generic_expr (MSG_NOTE, TDF_SLIM, init_expr);
 738       dump_printf (MSG_NOTE, "\n");
 739     }
 740
 741   *init = init_expr;
 742   *step = step_expr;
 743
 744   if (TREE_CODE (step_expr) != INTEGER_CST
 745       && (TREE_CODE (step_expr) != SSA_NAME
 746           || ((bb = gimple_bb (SSA_NAME_DEF_STMT (step_expr)))
 747               && flow_bb_inside_loop_p (get_loop (cfun, loop_nb), bb))
 748           || (!INTEGRAL_TYPE_P (TREE_TYPE (step_expr))
 749               && (!SCALAR_FLOAT_TYPE_P (TREE_TYPE (step_expr))
 750                   || !flag_associative_math)))
 751       && (TREE_CODE (step_expr) != REAL_CST
 752           || !flag_associative_math))
 753     {
 754       if (dump_enabled_p ())
 755         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 756                          "step unknown.\n");
 757       return false;
 758     }
 759
 760   return true;
 761 }
 762
 763 /* Function vect_analyze_scalar_cycles_1.
 764
 765    Examine the cross iteration def-use cycles of scalar variables
 766    in LOOP.  LOOP_VINFO represents the loop that is now being
 767    considered for vectorization (can be LOOP, or an outer-loop
 768    enclosing LOOP).  */
 769
 770 static void
 771 vect_analyze_scalar_cycles_1 (loop_vec_info loop_vinfo, struct loop *loop)
 772 {
 773   basic_block bb = loop->header;
 774   tree init, step;
 775   auto_vec<gimple *, 64> worklist;
 776   gphi_iterator gsi;
 777   bool double_reduc;
 778
 779   if (dump_enabled_p ())
 780     dump_printf_loc (MSG_NOTE, vect_location,
 781                      "=== vect_analyze_scalar_cycles ===\n");
 782
 783   /* First - identify all inductions.  Reduction detection assumes that all the
 784      inductions have been identified, therefore, this order must not be
 785      changed.  */
 786   for (gsi = gsi_start_phis  (bb); !gsi_end_p (gsi); gsi_next (&gsi))
 787     {
 788       gphi *phi = gsi.phi ();
 789       tree access_fn = NULL;
 790       tree def = PHI_RESULT (phi);
 791       stmt_vec_info stmt_vinfo = vinfo_for_stmt (phi);
 792
 793       if (dump_enabled_p ())
 794         {
 795           dump_printf_loc (MSG_NOTE, vect_location, "Analyze phi: ");
 796           dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
 797           dump_printf (MSG_NOTE, "\n");
 798         }
 799
 800       /* Skip virtual phi's.  The data dependences that are associated with
 801          virtual defs/uses (i.e., memory accesses) are analyzed elsewhere.  */
 802       if (virtual_operand_p (def))
 803         continue;
 804
 805       STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_unknown_def_type;
 806
 807       /* Analyze the evolution function.  */
 808       access_fn = analyze_scalar_evolution (loop, def);
 809       if (access_fn)
 810         {
 811           STRIP_NOPS (access_fn);
 812           if (dump_enabled_p ())
 813             {
 814               dump_printf_loc (MSG_NOTE, vect_location,
 815                                "Access function of PHI: ");
 816               dump_generic_expr (MSG_NOTE, TDF_SLIM, access_fn);
 817               dump_printf (MSG_NOTE, "\n");
 818             }
 819           STMT_VINFO_LOOP_PHI_EVOLUTION_BASE_UNCHANGED (stmt_vinfo)
 820             = initial_condition_in_loop_num (access_fn, loop->num);
 821           STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_vinfo)
 822             = evolution_part_in_loop_num (access_fn, loop->num);
 823         }
 824
 825       if (!access_fn
 826           || !vect_is_simple_iv_evolution (loop->num, access_fn, &init, &step)
 827           || (LOOP_VINFO_LOOP (loop_vinfo) != loop
 828               && TREE_CODE (step) != INTEGER_CST))
 829         {
 830           worklist.safe_push (phi);
 831           continue;
 832         }
 833
 834       gcc_assert (STMT_VINFO_LOOP_PHI_EVOLUTION_BASE_UNCHANGED (stmt_vinfo)
 835                   != NULL_TREE);
 836       gcc_assert (STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_vinfo) != NULL_TREE);
 837
 838       if (dump_enabled_p ())
 839         dump_printf_loc (MSG_NOTE, vect_location, "Detected induction.\n");
 840       STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_induction_def;
 841     }
 842
 843
 844   /* Second - identify all reductions and nested cycles.  */
 845   while (worklist.length () > 0)
 846     {
 847       gimple *phi = worklist.pop ();
 848       tree def = PHI_RESULT (phi);
 849       stmt_vec_info stmt_vinfo = vinfo_for_stmt (phi);
 850       gimple *reduc_stmt;
 851       bool nested_cycle;
 852
 853       if (dump_enabled_p ())
 854         {
 855           dump_printf_loc (MSG_NOTE, vect_location, "Analyze phi: ");
 856           dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
 857           dump_printf (MSG_NOTE, "\n");
 858         }
 859
 860       gcc_assert (!virtual_operand_p (def)
 861                   && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_unknown_def_type);
 862
 863       nested_cycle = (loop != LOOP_VINFO_LOOP (loop_vinfo));
 864       reduc_stmt = vect_force_simple_reduction (loop_vinfo, phi, !nested_cycle,
 865                                                 &double_reduc, false);
 866       if (reduc_stmt)
 867         {
 868           if (double_reduc)
 869             {
 870               if (dump_enabled_p ())
 871                 dump_printf_loc (MSG_NOTE, vect_location,
 872                                  "Detected double reduction.\n");
 873
 874               STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_double_reduction_def;
 875               STMT_VINFO_DEF_TYPE (vinfo_for_stmt (reduc_stmt)) =
 876                                                     vect_double_reduction_def;
 877             }
 878           else
 879             {
 880               if (nested_cycle)
 881                 {
 882                   if (dump_enabled_p ())
 883                     dump_printf_loc (MSG_NOTE, vect_location,
 884                                      "Detected vectorizable nested cycle.\n");
 885
 886                   STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_nested_cycle;
 887                   STMT_VINFO_DEF_TYPE (vinfo_for_stmt (reduc_stmt)) =
 888                                                              vect_nested_cycle;
 889                 }
 890               else
 891                 {
 892                   if (dump_enabled_p ())
 893                     dump_printf_loc (MSG_NOTE, vect_location,
 894                                      "Detected reduction.\n");
 895
 896                   STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_reduction_def;
 897                   STMT_VINFO_DEF_TYPE (vinfo_for_stmt (reduc_stmt)) =
 898                                                            vect_reduction_def;
 899                   /* Store the reduction cycles for possible vectorization in
 900                      loop-aware SLP.  */
 901                   LOOP_VINFO_REDUCTIONS (loop_vinfo).safe_push (reduc_stmt);
 902                 }
 903             }
 904         }
 905       else
 906         if (dump_enabled_p ())
 907           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 908                            "Unknown def-use cycle pattern.\n");
 909     }
 910 }
 911
 912
 913 /* Function vect_analyze_scalar_cycles.
 914
 915    Examine the cross iteration def-use cycles of scalar variables, by
 916    analyzing the loop-header PHIs of scalar variables.  Classify each
 917    cycle as one of the following: invariant, induction, reduction, unknown.
 918    We do that for the loop represented by LOOP_VINFO, and also to its
 919    inner-loop, if exists.
 920    Examples for scalar cycles:
 921
 922    Example1: reduction:
 923
 924               loop1:
 925               for (i=0; i<N; i++)
 926                  sum += a[i];
 927
 928    Example2: induction:
 929
 930               loop2:
 931               for (i=0; i<N; i++)
 932                  a[i] = i;  */
 933
 934 static void
 935 vect_analyze_scalar_cycles (loop_vec_info loop_vinfo)
 936 {
 937   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
 938
 939   vect_analyze_scalar_cycles_1 (loop_vinfo, loop);
 940
 941   /* When vectorizing an outer-loop, the inner-loop is executed sequentially.
 942      Reductions in such inner-loop therefore have different properties than
 943      the reductions in the nest that gets vectorized:
 944      1. When vectorized, they are executed in the same order as in the original
 945         scalar loop, so we can't change the order of computation when
 946         vectorizing them.
 947      2. FIXME: Inner-loop reductions can be used in the inner-loop, so the
 948         current checks are too strict.  */
 949
 950   if (loop->inner)
 951     vect_analyze_scalar_cycles_1 (loop_vinfo, loop->inner);
 952 }
 953
 954 /* Transfer group and reduction information from STMT to its pattern stmt.  */
 955
 956 static void
 957 vect_fixup_reduc_chain (gimple *stmt)
 958 {
 959   gimple *firstp = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt));
 960   gimple *stmtp;
 961   gcc_assert (!GROUP_FIRST_ELEMENT (vinfo_for_stmt (firstp))
 962               && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)));
 963   GROUP_SIZE (vinfo_for_stmt (firstp)) = GROUP_SIZE (vinfo_for_stmt (stmt));
 964   do
 965     {
 966       stmtp = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt));
 967       GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmtp)) = firstp;
 968       stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt));
 969       if (stmt)
 970         GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmtp))
 971           = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt));
 972     }
 973   while (stmt);
 974   STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmtp)) = vect_reduction_def;
 975 }
 976
 977 /* Fixup scalar cycles that now have their stmts detected as patterns.  */
 978
 979 static void
 980 vect_fixup_scalar_cycles_with_patterns (loop_vec_info loop_vinfo)
 981 {
 982   gimple *first;
 983   unsigned i;
 984
 985   FOR_EACH_VEC_ELT (LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo), i, first)
 986     if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (first)))
 987       {
 988         gimple *next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (first));
 989         while (next)
 990           {
 991             if (! STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (next)))
 992               break;
 993             next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
 994           }
 995         /* If not all stmt in the chain are patterns try to handle
 996            the chain without patterns.  */
 997         if (! next)
 998           {
 999             vect_fixup_reduc_chain (first);
1000             LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo)[i]
1001               = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (first));
1002           }
1003       }
1004 }
1005
1006 /* Function vect_get_loop_niters.
1007
1008    Determine how many iterations the loop is executed and place it
1009    in NUMBER_OF_ITERATIONS.  Place the number of latch iterations
1010    in NUMBER_OF_ITERATIONSM1.
1011
1012    Return the loop exit condition.  */
1013
1014
1015 static gcond *
1016 vect_get_loop_niters (struct loop *loop, tree *number_of_iterations,
1017                       tree *number_of_iterationsm1)
1018 {
1019   tree niters;
1020
1021   if (dump_enabled_p ())
1022     dump_printf_loc (MSG_NOTE, vect_location,
1023                      "=== get_loop_niters ===\n");
1024
1025   niters = number_of_latch_executions (loop);
1026   *number_of_iterationsm1 = niters;
1027
1028   /* We want the number of loop header executions which is the number
1029      of latch executions plus one.
1030      ???  For UINT_MAX latch executions this number overflows to zero
1031      for loops like do { n++; } while (n != 0);  */
1032   if (niters && !chrec_contains_undetermined (niters))
1033     niters = fold_build2 (PLUS_EXPR, TREE_TYPE (niters), unshare_expr (niters),
1034                           build_int_cst (TREE_TYPE (niters), 1));
1035   *number_of_iterations = niters;
1036
1037   return get_loop_exit_condition (loop);
1038 }
1039
1040
1041 /* Function bb_in_loop_p
1042
1043    Used as predicate for dfs order traversal of the loop bbs.  */
1044
1045 static bool
1046 bb_in_loop_p (const_basic_block bb, const void *data)
1047 {
1048   const struct loop *const loop = (const struct loop *)data;
1049   if (flow_bb_inside_loop_p (loop, bb))
1050     return true;
1051   return false;
1052 }
1053
1054
1055 /* Function new_loop_vec_info.
1056
1057    Create and initialize a new loop_vec_info struct for LOOP, as well as
1058    stmt_vec_info structs for all the stmts in LOOP.  */
1059
1060 static loop_vec_info
1061 new_loop_vec_info (struct loop *loop)
1062 {
1063   loop_vec_info res;
1064   basic_block *bbs;
1065   gimple_stmt_iterator si;
1066   unsigned int i, nbbs;
1067
1068   res = (loop_vec_info) xcalloc (1, sizeof (struct _loop_vec_info));
1069   res->kind = vec_info::loop;
1070   LOOP_VINFO_LOOP (res) = loop;
1071
1072   bbs = get_loop_body (loop);
1073
1074   /* Create/Update stmt_info for all stmts in the loop.  */
1075   for (i = 0; i < loop->num_nodes; i++)
1076     {
1077       basic_block bb = bbs[i];
1078
1079       for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
1080         {
1081           gimple *phi = gsi_stmt (si);
1082           gimple_set_uid (phi, 0);
1083           set_vinfo_for_stmt (phi, new_stmt_vec_info (phi, res));
1084         }
1085
1086       for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1087         {
1088           gimple *stmt = gsi_stmt (si);
1089           gimple_set_uid (stmt, 0);
1090           set_vinfo_for_stmt (stmt, new_stmt_vec_info (stmt, res));
1091         }
1092     }
1093
1094   /* CHECKME: We want to visit all BBs before their successors (except for
1095      latch blocks, for which this assertion wouldn't hold).  In the simple
1096      case of the loop forms we allow, a dfs order of the BBs would the same
1097      as reversed postorder traversal, so we are safe.  */
1098
1099    free (bbs);
1100    bbs = XCNEWVEC (basic_block, loop->num_nodes);
1101    nbbs = dfs_enumerate_from (loop->header, 0, bb_in_loop_p,
1102                               bbs, loop->num_nodes, loop);
1103    gcc_assert (nbbs == loop->num_nodes);
1104
1105   LOOP_VINFO_BBS (res) = bbs;
1106   LOOP_VINFO_NITERSM1 (res) = NULL;
1107   LOOP_VINFO_NITERS (res) = NULL;
1108   LOOP_VINFO_NITERS_UNCHANGED (res) = NULL;
1109   LOOP_VINFO_COST_MODEL_THRESHOLD (res) = 0;
1110   LOOP_VINFO_VECTORIZABLE_P (res) = 0;
1111   LOOP_VINFO_PEELING_FOR_ALIGNMENT (res) = 0;
1112   LOOP_VINFO_VECT_FACTOR (res) = 0;
1113   LOOP_VINFO_LOOP_NEST (res) = vNULL;
1114   LOOP_VINFO_DATAREFS (res) = vNULL;
1115   LOOP_VINFO_DDRS (res) = vNULL;
1116   LOOP_VINFO_UNALIGNED_DR (res) = NULL;
1117   LOOP_VINFO_MAY_MISALIGN_STMTS (res) = vNULL;
1118   LOOP_VINFO_MAY_ALIAS_DDRS (res) = vNULL;
1119   LOOP_VINFO_GROUPED_STORES (res) = vNULL;
1120   LOOP_VINFO_REDUCTIONS (res) = vNULL;
1121   LOOP_VINFO_REDUCTION_CHAINS (res) = vNULL;
1122   LOOP_VINFO_SLP_INSTANCES (res) = vNULL;
1123   LOOP_VINFO_SLP_UNROLLING_FACTOR (res) = 1;
1124   LOOP_VINFO_TARGET_COST_DATA (res) = init_cost (loop);
1125   LOOP_VINFO_PEELING_FOR_GAPS (res) = false;
1126   LOOP_VINFO_PEELING_FOR_NITER (res) = false;
1127   LOOP_VINFO_OPERANDS_SWAPPED (res) = false;
1128
1129   return res;
1130 }
1131
1132
1133 /* Function destroy_loop_vec_info.
1134
1135    Free LOOP_VINFO struct, as well as all the stmt_vec_info structs of all the
1136    stmts in the loop.  */
1137
1138 void
1139 destroy_loop_vec_info (loop_vec_info loop_vinfo, bool clean_stmts)
1140 {
1141   struct loop *loop;
1142   basic_block *bbs;
1143   int nbbs;
1144   gimple_stmt_iterator si;
1145   int j;
1146   vec<slp_instance> slp_instances;
1147   slp_instance instance;
1148   bool swapped;
1149
1150   if (!loop_vinfo)
1151     return;
1152
1153   loop = LOOP_VINFO_LOOP (loop_vinfo);
1154
1155   bbs = LOOP_VINFO_BBS (loop_vinfo);
1156   nbbs = clean_stmts ? loop->num_nodes : 0;
1157   swapped = LOOP_VINFO_OPERANDS_SWAPPED (loop_vinfo);
1158
1159   for (j = 0; j < nbbs; j++)
1160     {
1161       basic_block bb = bbs[j];
1162       for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
1163         free_stmt_vec_info (gsi_stmt (si));
1164
1165       for (si = gsi_start_bb (bb); !gsi_end_p (si); )
1166         {
1167           gimple *stmt = gsi_stmt (si);
1168
1169           /* We may have broken canonical form by moving a constant
1170              into RHS1 of a commutative op.  Fix such occurrences.  */
1171           if (swapped && is_gimple_assign (stmt))
1172             {
1173               enum tree_code code = gimple_assign_rhs_code (stmt);
1174
1175               if ((code == PLUS_EXPR
1176                    || code == POINTER_PLUS_EXPR
1177                    || code == MULT_EXPR)
1178                   && CONSTANT_CLASS_P (gimple_assign_rhs1 (stmt)))
1179                 swap_ssa_operands (stmt,
1180                                    gimple_assign_rhs1_ptr (stmt),
1181                                    gimple_assign_rhs2_ptr (stmt));
1182             }
1183
1184           /* Free stmt_vec_info.  */
1185           free_stmt_vec_info (stmt);
1186           gsi_next (&si);
1187         }
1188     }
1189
1190   free (LOOP_VINFO_BBS (loop_vinfo));
1191   vect_destroy_datarefs (loop_vinfo);
1192   free_dependence_relations (LOOP_VINFO_DDRS (loop_vinfo));
1193   LOOP_VINFO_LOOP_NEST (loop_vinfo).release ();
1194   LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo).release ();
1195   LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo).release ();
1196   LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo).release ();
1197   slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1198   FOR_EACH_VEC_ELT (slp_instances, j, instance)
1199     vect_free_slp_instance (instance);
1200
1201   LOOP_VINFO_SLP_INSTANCES (loop_vinfo).release ();
1202   LOOP_VINFO_GROUPED_STORES (loop_vinfo).release ();
1203   LOOP_VINFO_REDUCTIONS (loop_vinfo).release ();
1204   LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo).release ();
1205
1206   destroy_cost_data (LOOP_VINFO_TARGET_COST_DATA (loop_vinfo));
1207   loop_vinfo->scalar_cost_vec.release ();
1208
1209   free (loop_vinfo);
1210   loop->aux = NULL;
1211 }
1212
1213
1214 /* Calculate the cost of one scalar iteration of the loop.  */
1215 static void
1216 vect_compute_single_scalar_iteration_cost (loop_vec_info loop_vinfo)
1217 {
1218   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1219   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
1220   int nbbs = loop->num_nodes, factor, scalar_single_iter_cost = 0;
1221   int innerloop_iters, i;
1222
1223   /* Count statements in scalar loop.  Using this as scalar cost for a single
1224      iteration for now.
1225
1226      TODO: Add outer loop support.
1227
1228      TODO: Consider assigning different costs to different scalar
1229      statements.  */
1230
1231   /* FORNOW.  */
1232   innerloop_iters = 1;
1233   if (loop->inner)
1234     innerloop_iters = 50; /* FIXME */
1235
1236   for (i = 0; i < nbbs; i++)
1237     {
1238       gimple_stmt_iterator si;
1239       basic_block bb = bbs[i];
1240
1241       if (bb->loop_father == loop->inner)
1242         factor = innerloop_iters;
1243       else
1244         factor = 1;
1245
1246       for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1247         {
1248           gimple *stmt = gsi_stmt (si);
1249           stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1250
1251           if (!is_gimple_assign (stmt) && !is_gimple_call (stmt))
1252             continue;
1253
1254           /* Skip stmts that are not vectorized inside the loop.  */
1255           if (stmt_info
1256               && !STMT_VINFO_RELEVANT_P (stmt_info)
1257               && (!STMT_VINFO_LIVE_P (stmt_info)
1258                   || !VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info)))
1259               && !STMT_VINFO_IN_PATTERN_P (stmt_info))
1260             continue;
1261
1262           vect_cost_for_stmt kind;
1263           if (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt)))
1264             {
1265               if (DR_IS_READ (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))))
1266                kind = scalar_load;
1267              else
1268                kind = scalar_store;
1269             }
1270           else
1271             kind = scalar_stmt;
1272
1273           scalar_single_iter_cost
1274             += record_stmt_cost (&LOOP_VINFO_SCALAR_ITERATION_COST (loop_vinfo),
1275                                  factor, kind, NULL, 0, vect_prologue);
1276         }
1277     }
1278   LOOP_VINFO_SINGLE_SCALAR_ITERATION_COST (loop_vinfo)
1279     = scalar_single_iter_cost;
1280 }
1281
1282
1283 /* Function vect_analyze_loop_form_1.
1284
1285    Verify that certain CFG restrictions hold, including:
1286    - the loop has a pre-header
1287    - the loop has a single entry and exit
1288    - the loop exit condition is simple enough, and the number of iterations
1289      can be analyzed (a countable loop).  */
1290
1291 bool
1292 vect_analyze_loop_form_1 (struct loop *loop, gcond **loop_cond,
1293                           tree *number_of_iterationsm1,
1294                           tree *number_of_iterations, gcond **inner_loop_cond)
1295 {
1296   if (dump_enabled_p ())
1297     dump_printf_loc (MSG_NOTE, vect_location,
1298                      "=== vect_analyze_loop_form ===\n");
1299
1300   /* Different restrictions apply when we are considering an inner-most loop,
1301      vs. an outer (nested) loop.
1302      (FORNOW. May want to relax some of these restrictions in the future).  */
1303
1304   if (!loop->inner)
1305     {
1306       /* Inner-most loop.  We currently require that the number of BBs is
1307          exactly 2 (the header and latch).  Vectorizable inner-most loops
1308          look like this:
1309
1310                         (pre-header)
1311                            |
1312                           header <--------+
1313                            | |            |
1314                            | +--> latch --+
1315                            |
1316                         (exit-bb)  */
1317
1318       if (loop->num_nodes != 2)
1319         {
1320           if (dump_enabled_p ())
1321             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1322                              "not vectorized: control flow in loop.\n");
1323           return false;
1324         }
1325
1326       if (empty_block_p (loop->header))
1327         {
1328           if (dump_enabled_p ())
1329             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1330                              "not vectorized: empty loop.\n");
1331           return false;
1332         }
1333     }
1334   else
1335     {
1336       struct loop *innerloop = loop->inner;
1337       edge entryedge;
1338
1339       /* Nested loop. We currently require that the loop is doubly-nested,
1340          contains a single inner loop, and the number of BBs is exactly 5.
1341          Vectorizable outer-loops look like this:
1342
1343                         (pre-header)
1344                            |
1345                           header <---+
1346                            |         |
1347                           inner-loop |
1348                            |         |
1349                           tail ------+
1350                            |
1351                         (exit-bb)
1352
1353          The inner-loop has the properties expected of inner-most loops
1354          as described above.  */
1355
1356       if ((loop->inner)->inner || (loop->inner)->next)
1357         {
1358           if (dump_enabled_p ())
1359             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1360                              "not vectorized: multiple nested loops.\n");
1361           return false;
1362         }
1363
1364       if (loop->num_nodes != 5)
1365         {
1366           if (dump_enabled_p ())
1367             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1368                              "not vectorized: control flow in loop.\n");
1369           return false;
1370         }
1371
1372       entryedge = loop_preheader_edge (innerloop);
1373       if (entryedge->src != loop->header
1374           || !single_exit (innerloop)
1375           || single_exit (innerloop)->dest != EDGE_PRED (loop->latch, 0)->src)
1376         {
1377           if (dump_enabled_p ())
1378             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1379                              "not vectorized: unsupported outerloop form.\n");
1380           return false;
1381         }
1382
1383       /* Analyze the inner-loop.  */
1384       tree inner_niterm1, inner_niter;
1385       if (! vect_analyze_loop_form_1 (loop->inner, inner_loop_cond,
1386                                       &inner_niterm1, &inner_niter, NULL))
1387         {
1388           if (dump_enabled_p ())
1389             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1390                              "not vectorized: Bad inner loop.\n");
1391           return false;
1392         }
1393
1394       if (!expr_invariant_in_loop_p (loop, inner_niter))
1395         {
1396           if (dump_enabled_p ())
1397             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1398                              "not vectorized: inner-loop count not"
1399                              " invariant.\n");
1400           return false;
1401         }
1402
1403       if (dump_enabled_p ())
1404         dump_printf_loc (MSG_NOTE, vect_location,
1405                          "Considering outer-loop vectorization.\n");
1406     }
1407
1408   if (!single_exit (loop)
1409       || EDGE_COUNT (loop->header->preds) != 2)
1410     {
1411       if (dump_enabled_p ())
1412         {
1413           if (!single_exit (loop))
1414             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1415                              "not vectorized: multiple exits.\n");
1416           else if (EDGE_COUNT (loop->header->preds) != 2)
1417             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1418                              "not vectorized: too many incoming edges.\n");
1419         }
1420       return false;
1421     }
1422
1423   /* We assume that the loop exit condition is at the end of the loop. i.e,
1424      that the loop is represented as a do-while (with a proper if-guard
1425      before the loop if needed), where the loop header contains all the
1426      executable statements, and the latch is empty.  */
1427   if (!empty_block_p (loop->latch)
1428       || !gimple_seq_empty_p (phi_nodes (loop->latch)))
1429     {
1430       if (dump_enabled_p ())
1431         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1432                          "not vectorized: latch block not empty.\n");
1433       return false;
1434     }
1435
1436   /* Make sure there exists a single-predecessor exit bb:  */
1437   if (!single_pred_p (single_exit (loop)->dest))
1438     {
1439       edge e = single_exit (loop);
1440       if (!(e->flags & EDGE_ABNORMAL))
1441         {
1442           split_loop_exit_edge (e);
1443           if (dump_enabled_p ())
1444             dump_printf (MSG_NOTE, "split exit edge.\n");
1445         }
1446       else
1447         {
1448           if (dump_enabled_p ())
1449             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1450                              "not vectorized: abnormal loop exit edge.\n");
1451           return false;
1452         }
1453     }
1454
1455   *loop_cond = vect_get_loop_niters (loop, number_of_iterations,
1456                                      number_of_iterationsm1);
1457   if (!*loop_cond)
1458     {
1459       if (dump_enabled_p ())
1460         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1461                          "not vectorized: complicated exit condition.\n");
1462       return false;
1463     }
1464
1465   if (!*number_of_iterations
1466       || chrec_contains_undetermined (*number_of_iterations))
1467     {
1468       if (dump_enabled_p ())
1469         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1470                          "not vectorized: number of iterations cannot be "
1471                          "computed.\n");
1472       return false;
1473     }
1474
1475   if (integer_zerop (*number_of_iterations))
1476     {
1477       if (dump_enabled_p ())
1478         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1479                          "not vectorized: number of iterations = 0.\n");
1480       return false;
1481     }
1482
1483   return true;
1484 }
1485
1486 /* Analyze LOOP form and return a loop_vec_info if it is of suitable form.  */
1487
1488 loop_vec_info
1489 vect_analyze_loop_form (struct loop *loop)
1490 {
1491   tree number_of_iterations, number_of_iterationsm1;
1492   gcond *loop_cond, *inner_loop_cond = NULL;
1493
1494   if (! vect_analyze_loop_form_1 (loop, &loop_cond, &number_of_iterationsm1,
1495                                   &number_of_iterations, &inner_loop_cond))
1496     return NULL;
1497
1498   loop_vec_info loop_vinfo = new_loop_vec_info (loop);
1499   LOOP_VINFO_NITERSM1 (loop_vinfo) = number_of_iterationsm1;
1500   LOOP_VINFO_NITERS (loop_vinfo) = number_of_iterations;
1501   LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo) = number_of_iterations;
1502
1503   if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
1504     {
1505       if (dump_enabled_p ())
1506         {
1507           dump_printf_loc (MSG_NOTE, vect_location,
1508                            "Symbolic number of iterations is ");
1509           dump_generic_expr (MSG_NOTE, TDF_DETAILS, number_of_iterations);
1510           dump_printf (MSG_NOTE, "\n");
1511         }
1512     }
1513
1514   STMT_VINFO_TYPE (vinfo_for_stmt (loop_cond)) = loop_exit_ctrl_vec_info_type;
1515   if (inner_loop_cond)
1516     STMT_VINFO_TYPE (vinfo_for_stmt (inner_loop_cond))
1517       = loop_exit_ctrl_vec_info_type;
1518
1519   gcc_assert (!loop->aux);
1520   loop->aux = loop_vinfo;
1521   return loop_vinfo;
1522 }
1523
1524
1525
1526 /* Scan the loop stmts and dependent on whether there are any (non-)SLP
1527    statements update the vectorization factor.  */
1528
1529 static void
1530 vect_update_vf_for_slp (loop_vec_info loop_vinfo)
1531 {
1532   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1533   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
1534   int nbbs = loop->num_nodes;
1535   unsigned int vectorization_factor;
1536   int i;
1537
1538   if (dump_enabled_p ())
1539     dump_printf_loc (MSG_NOTE, vect_location,
1540                      "=== vect_update_vf_for_slp ===\n");
1541
1542   vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1543   gcc_assert (vectorization_factor != 0);
1544
1545   /* If all the stmts in the loop can be SLPed, we perform only SLP, and
1546      vectorization factor of the loop is the unrolling factor required by
1547      the SLP instances.  If that unrolling factor is 1, we say, that we
1548      perform pure SLP on loop - cross iteration parallelism is not
1549      exploited.  */
1550   bool only_slp_in_loop = true;
1551   for (i = 0; i < nbbs; i++)
1552     {
1553       basic_block bb = bbs[i];
1554       for (gimple_stmt_iterator si = gsi_start_bb (bb); !gsi_end_p (si);
1555            gsi_next (&si))
1556         {
1557           gimple *stmt = gsi_stmt (si);
1558           stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1559           if (STMT_VINFO_IN_PATTERN_P (stmt_info)
1560               && STMT_VINFO_RELATED_STMT (stmt_info))
1561             {
1562               stmt = STMT_VINFO_RELATED_STMT (stmt_info);
1563               stmt_info = vinfo_for_stmt (stmt);
1564             }
1565           if ((STMT_VINFO_RELEVANT_P (stmt_info)
1566                || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info)))
1567               && !PURE_SLP_STMT (stmt_info))
1568             /* STMT needs both SLP and loop-based vectorization.  */
1569             only_slp_in_loop = false;
1570         }
1571     }
1572
1573   if (only_slp_in_loop)
1574     vectorization_factor = LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo);
1575   else
1576     vectorization_factor
1577       = least_common_multiple (vectorization_factor,
1578                                LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo));
1579
1580   LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
1581   if (dump_enabled_p ())
1582     dump_printf_loc (MSG_NOTE, vect_location,
1583                      "Updating vectorization factor to %d\n",
1584                      vectorization_factor);
1585 }
1586
1587 /* Function vect_analyze_loop_operations.
1588
1589    Scan the loop stmts and make sure they are all vectorizable.  */
1590
1591 static bool
1592 vect_analyze_loop_operations (loop_vec_info loop_vinfo)
1593 {
1594   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1595   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
1596   int nbbs = loop->num_nodes;
1597   int i;
1598   stmt_vec_info stmt_info;
1599   bool need_to_vectorize = false;
1600   bool ok;
1601
1602   if (dump_enabled_p ())
1603     dump_printf_loc (MSG_NOTE, vect_location,
1604                      "=== vect_analyze_loop_operations ===\n");
1605
1606   for (i = 0; i < nbbs; i++)
1607     {
1608       basic_block bb = bbs[i];
1609
1610       for (gphi_iterator si = gsi_start_phis (bb); !gsi_end_p (si);
1611            gsi_next (&si))
1612         {
1613           gphi *phi = si.phi ();
1614           ok = true;
1615
1616           stmt_info = vinfo_for_stmt (phi);
1617           if (dump_enabled_p ())
1618             {
1619               dump_printf_loc (MSG_NOTE, vect_location, "examining phi: ");
1620               dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
1621               dump_printf (MSG_NOTE, "\n");
1622             }
1623           if (virtual_operand_p (gimple_phi_result (phi)))
1624             continue;
1625
1626           /* Inner-loop loop-closed exit phi in outer-loop vectorization
1627              (i.e., a phi in the tail of the outer-loop).  */
1628           if (! is_loop_header_bb_p (bb))
1629             {
1630               /* FORNOW: we currently don't support the case that these phis
1631                  are not used in the outerloop (unless it is double reduction,
1632                  i.e., this phi is vect_reduction_def), cause this case
1633                  requires to actually do something here.  */
1634               if ((!STMT_VINFO_RELEVANT_P (stmt_info)
1635                    || STMT_VINFO_LIVE_P (stmt_info))
1636                   && STMT_VINFO_DEF_TYPE (stmt_info)
1637                      != vect_double_reduction_def)
1638                 {
1639                   if (dump_enabled_p ())
1640                     dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1641                                      "Unsupported loop-closed phi in "
1642                                      "outer-loop.\n");
1643                   return false;
1644                 }
1645
1646               /* If PHI is used in the outer loop, we check that its operand
1647                  is defined in the inner loop.  */
1648               if (STMT_VINFO_RELEVANT_P (stmt_info))
1649                 {
1650                   tree phi_op;
1651                   gimple *op_def_stmt;
1652
1653                   if (gimple_phi_num_args (phi) != 1)
1654                     return false;
1655
1656                   phi_op = PHI_ARG_DEF (phi, 0);
1657                   if (TREE_CODE (phi_op) != SSA_NAME)
1658                     return false;
1659
1660                   op_def_stmt = SSA_NAME_DEF_STMT (phi_op);
1661                   if (gimple_nop_p (op_def_stmt)
1662                       || !flow_bb_inside_loop_p (loop, gimple_bb (op_def_stmt))
1663                       || !vinfo_for_stmt (op_def_stmt))
1664                     return false;
1665
1666                   if (STMT_VINFO_RELEVANT (vinfo_for_stmt (op_def_stmt))
1667                         != vect_used_in_outer
1668                       && STMT_VINFO_RELEVANT (vinfo_for_stmt (op_def_stmt))
1669                            != vect_used_in_outer_by_reduction)
1670                     return false;
1671                 }
1672
1673               continue;
1674             }
1675
1676           gcc_assert (stmt_info);
1677
1678           if (STMT_VINFO_LIVE_P (stmt_info))
1679             {
1680               /* FORNOW: not yet supported.  */
1681               if (dump_enabled_p ())
1682                 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1683                                  "not vectorized: value used after loop.\n");
1684               return false;
1685             }
1686
1687           if (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope
1688               && STMT_VINFO_DEF_TYPE (stmt_info) != vect_induction_def)
1689             {
1690               /* A scalar-dependence cycle that we don't support.  */
1691               if (dump_enabled_p ())
1692                 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1693                                  "not vectorized: scalar dependence cycle.\n");
1694               return false;
1695             }
1696
1697           if (STMT_VINFO_RELEVANT_P (stmt_info))
1698             {
1699               need_to_vectorize = true;
1700               if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def)
1701                 ok = vectorizable_induction (phi, NULL, NULL);
1702             }
1703
1704           if (!ok)
1705             {
1706               if (dump_enabled_p ())
1707                 {
1708                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1709                                    "not vectorized: relevant phi not "
1710                                    "supported: ");
1711                   dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, phi, 0);
1712                   dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
1713                 }
1714               return false;
1715             }
1716         }
1717
1718       for (gimple_stmt_iterator si = gsi_start_bb (bb); !gsi_end_p (si);
1719            gsi_next (&si))
1720         {
1721           gimple *stmt = gsi_stmt (si);
1722           if (!gimple_clobber_p (stmt)
1723               && !vect_analyze_stmt (stmt, &need_to_vectorize, NULL))
1724             return false;
1725         }
1726     } /* bbs */
1727
1728   /* All operations in the loop are either irrelevant (deal with loop
1729      control, or dead), or only used outside the loop and can be moved
1730      out of the loop (e.g. invariants, inductions).  The loop can be
1731      optimized away by scalar optimizations.  We're better off not
1732      touching this loop.  */
1733   if (!need_to_vectorize)
1734     {
1735       if (dump_enabled_p ())
1736         dump_printf_loc (MSG_NOTE, vect_location,
1737                          "All the computation can be taken out of the loop.\n");
1738       if (dump_enabled_p ())
1739         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1740                          "not vectorized: redundant loop. no profit to "
1741                          "vectorize.\n");
1742       return false;
1743     }
1744
1745   return true;
1746 }
1747
1748
1749 /* Function vect_analyze_loop_2.
1750
1751    Apply a set of analyses on LOOP, and create a loop_vec_info struct
1752    for it.  The different analyses will record information in the
1753    loop_vec_info struct.  */
1754 static bool
1755 vect_analyze_loop_2 (loop_vec_info loop_vinfo, bool &fatal)
1756 {
1757   bool ok;
1758   int max_vf = MAX_VECTORIZATION_FACTOR;
1759   int min_vf = 2;
1760   unsigned int n_stmts = 0;
1761
1762   /* The first group of checks is independent of the vector size.  */
1763   fatal = true;
1764
1765   /* Find all data references in the loop (which correspond to vdefs/vuses)
1766      and analyze their evolution in the loop.  */
1767
1768   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
1769
1770   loop_p loop = LOOP_VINFO_LOOP (loop_vinfo);
1771   if (!find_loop_nest (loop, &LOOP_VINFO_LOOP_NEST (loop_vinfo)))
1772     {
1773       if (dump_enabled_p ())
1774         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1775                          "not vectorized: loop contains function calls"
1776                          " or data references that cannot be analyzed\n");
1777       return false;
1778     }
1779
1780   for (unsigned i = 0; i < loop->num_nodes; i++)
1781     for (gimple_stmt_iterator gsi = gsi_start_bb (bbs[i]);
1782          !gsi_end_p (gsi); gsi_next (&gsi))
1783       {
1784         gimple *stmt = gsi_stmt (gsi);
1785         if (is_gimple_debug (stmt))
1786           continue;
1787         ++n_stmts;
1788         if (!find_data_references_in_stmt (loop, stmt,
1789                                            &LOOP_VINFO_DATAREFS (loop_vinfo)))
1790           {
1791             if (is_gimple_call (stmt) && loop->safelen)
1792               {
1793                 tree fndecl = gimple_call_fndecl (stmt), op;
1794                 if (fndecl != NULL_TREE)
1795                   {
1796                     cgraph_node *node = cgraph_node::get (fndecl);
1797                     if (node != NULL && node->simd_clones != NULL)
1798                       {
1799                         unsigned int j, n = gimple_call_num_args (stmt);
1800                         for (j = 0; j < n; j++)
1801                           {
1802                             op = gimple_call_arg (stmt, j);
1803                             if (DECL_P (op)
1804                                 || (REFERENCE_CLASS_P (op)
1805                                     && get_base_address (op)))
1806                               break;
1807                           }
1808                         op = gimple_call_lhs (stmt);
1809                         /* Ignore #pragma omp declare simd functions
1810                            if they don't have data references in the
1811                            call stmt itself.  */
1812                         if (j == n
1813                             && !(op
1814                                  && (DECL_P (op)
1815                                      || (REFERENCE_CLASS_P (op)
1816                                          && get_base_address (op)))))
1817                           continue;
1818                       }
1819                   }
1820               }
1821             if (dump_enabled_p ())
1822               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1823                                "not vectorized: loop contains function "
1824                                "calls or data references that cannot "
1825                                "be analyzed\n");
1826             return false;
1827           }
1828       }
1829
1830   /* Analyze the data references and also adjust the minimal
1831      vectorization factor according to the loads and stores.  */
1832
1833   ok = vect_analyze_data_refs (loop_vinfo, &min_vf);
1834   if (!ok)
1835     {
1836       if (dump_enabled_p ())
1837         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1838                          "bad data references.\n");
1839       return false;
1840     }
1841
1842   /* Classify all cross-iteration scalar data-flow cycles.
1843      Cross-iteration cycles caused by virtual phis are analyzed separately.  */
1844   vect_analyze_scalar_cycles (loop_vinfo);
1845
1846   vect_pattern_recog (loop_vinfo);
1847
1848   vect_fixup_scalar_cycles_with_patterns (loop_vinfo);
1849
1850   /* Analyze the access patterns of the data-refs in the loop (consecutive,
1851      complex, etc.). FORNOW: Only handle consecutive access pattern.  */
1852
1853   ok = vect_analyze_data_ref_accesses (loop_vinfo);
1854   if (!ok)
1855     {
1856       if (dump_enabled_p ())
1857         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1858                          "bad data access.\n");
1859       return false;
1860     }
1861
1862   /* Data-flow analysis to detect stmts that do not need to be vectorized.  */
1863
1864   ok = vect_mark_stmts_to_be_vectorized (loop_vinfo);
1865   if (!ok)
1866     {
1867       if (dump_enabled_p ())
1868         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1869                          "unexpected pattern.\n");
1870       return false;
1871     }
1872
1873   /* While the rest of the analysis below depends on it in some way.  */
1874   fatal = false;
1875
1876   /* Analyze data dependences between the data-refs in the loop
1877      and adjust the maximum vectorization factor according to
1878      the dependences.
1879      FORNOW: fail at the first data dependence that we encounter.  */
1880
1881   ok = vect_analyze_data_ref_dependences (loop_vinfo, &max_vf);
1882   if (!ok
1883       || max_vf < min_vf)
1884     {
1885       if (dump_enabled_p ())
1886             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1887                              "bad data dependence.\n");
1888       return false;
1889     }
1890
1891   ok = vect_determine_vectorization_factor (loop_vinfo);
1892   if (!ok)
1893     {
1894       if (dump_enabled_p ())
1895         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1896                          "can't determine vectorization factor.\n");
1897       return false;
1898     }
1899   if (max_vf < LOOP_VINFO_VECT_FACTOR (loop_vinfo))
1900     {
1901       if (dump_enabled_p ())
1902         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1903                          "bad data dependence.\n");
1904       return false;
1905     }
1906
1907   /* Compute the scalar iteration cost.  */
1908   vect_compute_single_scalar_iteration_cost (loop_vinfo);
1909
1910   int saved_vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1911   HOST_WIDE_INT estimated_niter;
1912   unsigned th;
1913   int min_scalar_loop_bound;
1914
1915   /* Check the SLP opportunities in the loop, analyze and build SLP trees.  */
1916   ok = vect_analyze_slp (loop_vinfo, n_stmts);
1917   if (!ok)
1918     return false;
1919
1920   /* If there are any SLP instances mark them as pure_slp.  */
1921   bool slp = vect_make_slp_decision (loop_vinfo);
1922   if (slp)
1923     {
1924       /* Find stmts that need to be both vectorized and SLPed.  */
1925       vect_detect_hybrid_slp (loop_vinfo);
1926
1927       /* Update the vectorization factor based on the SLP decision.  */
1928       vect_update_vf_for_slp (loop_vinfo);
1929     }
1930
1931   /* This is the point where we can re-start analysis with SLP forced off.  */
1932 start_over:
1933
1934   /* Now the vectorization factor is final.  */
1935   unsigned vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1936   gcc_assert (vectorization_factor != 0);
1937
1938   if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) && dump_enabled_p ())
1939     dump_printf_loc (MSG_NOTE, vect_location,
1940                      "vectorization_factor = %d, niters = "
1941                      HOST_WIDE_INT_PRINT_DEC "\n", vectorization_factor,
1942                      LOOP_VINFO_INT_NITERS (loop_vinfo));
1943
1944   HOST_WIDE_INT max_niter
1945     = max_stmt_executions_int (LOOP_VINFO_LOOP (loop_vinfo));
1946   if ((LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
1947        && (LOOP_VINFO_INT_NITERS (loop_vinfo) < vectorization_factor))
1948       || (max_niter != -1
1949           && (unsigned HOST_WIDE_INT) max_niter < vectorization_factor))
1950     {
1951       if (dump_enabled_p ())
1952         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1953                          "not vectorized: iteration count smaller than "
1954                          "vectorization factor.\n");
1955       return false;
1956     }
1957
1958   /* Analyze the alignment of the data-refs in the loop.
1959      Fail if a data reference is found that cannot be vectorized.  */
1960
1961   ok = vect_analyze_data_refs_alignment (loop_vinfo);
1962   if (!ok)
1963     {
1964       if (dump_enabled_p ())
1965         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1966                          "bad data alignment.\n");
1967       return false;
1968     }
1969
1970   /* Prune the list of ddrs to be tested at run-time by versioning for alias.
1971      It is important to call pruning after vect_analyze_data_ref_accesses,
1972      since we use grouping information gathered by interleaving analysis.  */
1973   ok = vect_prune_runtime_alias_test_list (loop_vinfo);
1974   if (!ok)
1975     {
1976       if (dump_enabled_p ())
1977         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1978                          "number of versioning for alias "
1979                          "run-time tests exceeds %d "
1980                          "(--param vect-max-version-for-alias-checks)\n",
1981                          PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS));
1982       return false;
1983     }
1984
1985   /* This pass will decide on using loop versioning and/or loop peeling in
1986      order to enhance the alignment of data references in the loop.  */
1987   ok = vect_enhance_data_refs_alignment (loop_vinfo);
1988   if (!ok)
1989     {
1990       if (dump_enabled_p ())
1991         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1992                          "bad data alignment.\n");
1993       return false;
1994     }
1995
1996   if (slp)
1997     {
1998       /* Analyze operations in the SLP instances.  Note this may
1999          remove unsupported SLP instances which makes the above
2000          SLP kind detection invalid.  */
2001       unsigned old_size = LOOP_VINFO_SLP_INSTANCES (loop_vinfo).length ();
2002       vect_slp_analyze_operations (LOOP_VINFO_SLP_INSTANCES (loop_vinfo),
2003                                    LOOP_VINFO_TARGET_COST_DATA (loop_vinfo));
2004       if (LOOP_VINFO_SLP_INSTANCES (loop_vinfo).length () != old_size)
2005         goto again;
2006     }
2007
2008   /* Scan all the remaining operations in the loop that are not subject
2009      to SLP and make sure they are vectorizable.  */
2010   ok = vect_analyze_loop_operations (loop_vinfo);
2011   if (!ok)
2012     {
2013       if (dump_enabled_p ())
2014         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2015                          "bad operation or unsupported loop bound.\n");
2016       return false;
2017     }
2018
2019   /* Analyze cost.  Decide if worth while to vectorize.  */
2020   int min_profitable_estimate, min_profitable_iters;
2021   vect_estimate_min_profitable_iters (loop_vinfo, &min_profitable_iters,
2022                                       &min_profitable_estimate);
2023
2024   if (min_profitable_iters < 0)
2025     {
2026       if (dump_enabled_p ())
2027         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2028                          "not vectorized: vectorization not profitable.\n");
2029       if (dump_enabled_p ())
2030         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2031                          "not vectorized: vector version will never be "
2032                          "profitable.\n");
2033       goto again;
2034     }
2035
2036   min_scalar_loop_bound = ((PARAM_VALUE (PARAM_MIN_VECT_LOOP_BOUND)
2037                             * vectorization_factor) - 1);
2038
2039   /* Use the cost model only if it is more conservative than user specified
2040      threshold.  */
2041   th = (unsigned) min_scalar_loop_bound;
2042   if (min_profitable_iters
2043       && (!min_scalar_loop_bound
2044           || min_profitable_iters > min_scalar_loop_bound))
2045     th = (unsigned) min_profitable_iters;
2046
2047   LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo) = th;
2048
2049   if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
2050       && LOOP_VINFO_INT_NITERS (loop_vinfo) <= th)
2051     {
2052       if (dump_enabled_p ())
2053         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2054                          "not vectorized: vectorization not profitable.\n");
2055       if (dump_enabled_p ())
2056         dump_printf_loc (MSG_NOTE, vect_location,
2057                          "not vectorized: iteration count smaller than user "
2058                          "specified loop bound parameter or minimum profitable "
2059                          "iterations (whichever is more conservative).\n");
2060       goto again;
2061     }
2062
2063   estimated_niter
2064     = estimated_stmt_executions_int (LOOP_VINFO_LOOP (loop_vinfo));
2065   if (estimated_niter != -1
2066       && ((unsigned HOST_WIDE_INT) estimated_niter
2067           <= MAX (th, (unsigned)min_profitable_estimate)))
2068     {
2069       if (dump_enabled_p ())
2070         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2071                          "not vectorized: estimated iteration count too "
2072                          "small.\n");
2073       if (dump_enabled_p ())
2074         dump_printf_loc (MSG_NOTE, vect_location,
2075                          "not vectorized: estimated iteration count smaller "
2076                          "than specified loop bound parameter or minimum "
2077                          "profitable iterations (whichever is more "
2078                          "conservative).\n");
2079       goto again;
2080     }
2081
2082   /* Decide whether we need to create an epilogue loop to handle
2083      remaining scalar iterations.  */
2084   th = ((LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo) + 1)
2085         / LOOP_VINFO_VECT_FACTOR (loop_vinfo))
2086        * LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2087
2088   if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
2089       && LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0)
2090     {
2091       if (ctz_hwi (LOOP_VINFO_INT_NITERS (loop_vinfo)
2092                    - LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo))
2093           < exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo)))
2094         LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo) = true;
2095     }
2096   else if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo)
2097            || (tree_ctz (LOOP_VINFO_NITERS (loop_vinfo))
2098                < (unsigned)exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo))
2099                /* In case of versioning, check if the maximum number of
2100                   iterations is greater than th.  If they are identical,
2101                   the epilogue is unnecessary.  */
2102                && ((!LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo)
2103                     && !LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo))
2104                    || (unsigned HOST_WIDE_INT) max_niter > th)))
2105     LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo) = true;
2106
2107   /* If an epilogue loop is required make sure we can create one.  */
2108   if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
2109       || LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo))
2110     {
2111       if (dump_enabled_p ())
2112         dump_printf_loc (MSG_NOTE, vect_location, "epilog loop required\n");
2113       if (!vect_can_advance_ivs_p (loop_vinfo)
2114           || !slpeel_can_duplicate_loop_p (LOOP_VINFO_LOOP (loop_vinfo),
2115                                            single_exit (LOOP_VINFO_LOOP
2116                                                          (loop_vinfo))))
2117         {
2118           if (dump_enabled_p ())
2119             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2120                              "not vectorized: can't create required "
2121                              "epilog loop\n");
2122           goto again;
2123         }
2124     }
2125
2126   gcc_assert (vectorization_factor
2127               == (unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo));
2128
2129   /* Ok to vectorize!  */
2130   return true;
2131
2132 again:
2133   /* Try again with SLP forced off but if we didn't do any SLP there is
2134      no point in re-trying.  */
2135   if (!slp)
2136     return false;
2137
2138   /* If there are reduction chains re-trying will fail anyway.  */
2139   if (! LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo).is_empty ())
2140     return false;
2141
2142   /* Likewise if the grouped loads or stores in the SLP cannot be handled
2143      via interleaving or lane instructions.  */
2144   slp_instance instance;
2145   slp_tree node;
2146   unsigned i, j;
2147   FOR_EACH_VEC_ELT (LOOP_VINFO_SLP_INSTANCES (loop_vinfo), i, instance)
2148     {
2149       stmt_vec_info vinfo;
2150       vinfo = vinfo_for_stmt
2151           (SLP_TREE_SCALAR_STMTS (SLP_INSTANCE_TREE (instance))[0]);
2152       if (! STMT_VINFO_GROUPED_ACCESS (vinfo))
2153         continue;
2154       vinfo = vinfo_for_stmt (STMT_VINFO_GROUP_FIRST_ELEMENT (vinfo));
2155       unsigned int size = STMT_VINFO_GROUP_SIZE (vinfo);
2156       tree vectype = STMT_VINFO_VECTYPE (vinfo);
2157       if (! vect_store_lanes_supported (vectype, size)
2158           && ! vect_grouped_store_supported (vectype, size))
2159         return false;
2160       FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (instance), j, node)
2161         {
2162           vinfo = vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (node)[0]);
2163           vinfo = vinfo_for_stmt (STMT_VINFO_GROUP_FIRST_ELEMENT (vinfo));
2164           size = STMT_VINFO_GROUP_SIZE (vinfo);
2165           vectype = STMT_VINFO_VECTYPE (vinfo);
2166           if (! vect_load_lanes_supported (vectype, size)
2167               && ! vect_grouped_load_supported (vectype, size))
2168             return false;
2169         }
2170     }
2171
2172   if (dump_enabled_p ())
2173     dump_printf_loc (MSG_NOTE, vect_location,
2174                      "re-trying with SLP disabled\n");
2175
2176   /* Roll back state appropriately.  No SLP this time.  */
2177   slp = false;
2178   /* Restore vectorization factor as it were without SLP.  */
2179   LOOP_VINFO_VECT_FACTOR (loop_vinfo) = saved_vectorization_factor;
2180   /* Free the SLP instances.  */
2181   FOR_EACH_VEC_ELT (LOOP_VINFO_SLP_INSTANCES (loop_vinfo), j, instance)
2182     vect_free_slp_instance (instance);
2183   LOOP_VINFO_SLP_INSTANCES (loop_vinfo).release ();
2184   /* Reset SLP type to loop_vect on all stmts.  */
2185   for (i = 0; i < LOOP_VINFO_LOOP (loop_vinfo)->num_nodes; ++i)
2186     {
2187       basic_block bb = LOOP_VINFO_BBS (loop_vinfo)[i];
2188       for (gimple_stmt_iterator si = gsi_start_bb (bb);
2189            !gsi_end_p (si); gsi_next (&si))
2190         {
2191           stmt_vec_info stmt_info = vinfo_for_stmt (gsi_stmt (si));
2192           STMT_SLP_TYPE (stmt_info) = loop_vect;
2193           if (STMT_VINFO_IN_PATTERN_P (stmt_info))
2194             {
2195               stmt_info = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
2196               STMT_SLP_TYPE (stmt_info) = loop_vect;
2197               for (gimple_stmt_iterator pi
2198                      = gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
2199                    !gsi_end_p (pi); gsi_next (&pi))
2200                 {
2201                   gimple *pstmt = gsi_stmt (pi);
2202                   STMT_SLP_TYPE (vinfo_for_stmt (pstmt)) = loop_vect;
2203                 }
2204             }
2205         }
2206     }
2207   /* Free optimized alias test DDRS.  */
2208   LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo).release ();
2209   /* Reset target cost data.  */
2210   destroy_cost_data (LOOP_VINFO_TARGET_COST_DATA (loop_vinfo));
2211   LOOP_VINFO_TARGET_COST_DATA (loop_vinfo)
2212     = init_cost (LOOP_VINFO_LOOP (loop_vinfo));
2213   /* Reset assorted flags.  */
2214   LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo) = false;
2215   LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = false;
2216   LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo) = 0;
2217
2218   goto start_over;
2219 }
2220
2221 /* Function vect_analyze_loop.
2222
2223    Apply a set of analyses on LOOP, and create a loop_vec_info struct
2224    for it.  The different analyses will record information in the
2225    loop_vec_info struct.  */
2226 loop_vec_info
2227 vect_analyze_loop (struct loop *loop)
2228 {
2229   loop_vec_info loop_vinfo;
2230   unsigned int vector_sizes;
2231
2232   /* Autodetect first vector size we try.  */
2233   current_vector_size = 0;
2234   vector_sizes = targetm.vectorize.autovectorize_vector_sizes ();
2235
2236   if (dump_enabled_p ())
2237     dump_printf_loc (MSG_NOTE, vect_location,
2238                      "===== analyze_loop_nest =====\n");
2239
2240   if (loop_outer (loop)
2241       && loop_vec_info_for_loop (loop_outer (loop))
2242       && LOOP_VINFO_VECTORIZABLE_P (loop_vec_info_for_loop (loop_outer (loop))))
2243     {
2244       if (dump_enabled_p ())
2245         dump_printf_loc (MSG_NOTE, vect_location,
2246                          "outer-loop already vectorized.\n");
2247       return NULL;
2248     }
2249
2250   while (1)
2251     {
2252       /* Check the CFG characteristics of the loop (nesting, entry/exit).  */
2253       loop_vinfo = vect_analyze_loop_form (loop);
2254       if (!loop_vinfo)
2255         {
2256           if (dump_enabled_p ())
2257             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2258                              "bad loop form.\n");
2259           return NULL;
2260         }
2261
2262       bool fatal = false;
2263       if (vect_analyze_loop_2 (loop_vinfo, fatal))
2264         {
2265           LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1;
2266
2267           return loop_vinfo;
2268         }
2269
2270       destroy_loop_vec_info (loop_vinfo, true);
2271
2272       vector_sizes &= ~current_vector_size;
2273       if (fatal
2274           || vector_sizes == 0
2275           || current_vector_size == 0)
2276         return NULL;
2277
2278       /* Try the next biggest vector size.  */
2279       current_vector_size = 1 << floor_log2 (vector_sizes);
2280       if (dump_enabled_p ())
2281         dump_printf_loc (MSG_NOTE, vect_location,
2282                          "***** Re-trying analysis with "
2283                          "vector size %d\n", current_vector_size);
2284     }
2285 }
2286
2287
2288 /* Function reduction_code_for_scalar_code
2289
2290    Input:
2291    CODE - tree_code of a reduction operations.
2292
2293    Output:
2294    REDUC_CODE - the corresponding tree-code to be used to reduce the
2295       vector of partial results into a single scalar result, or ERROR_MARK
2296       if the operation is a supported reduction operation, but does not have
2297       such a tree-code.
2298
2299    Return FALSE if CODE currently cannot be vectorized as reduction.  */
2300
2301 static bool
2302 reduction_code_for_scalar_code (enum tree_code code,
2303                                 enum tree_code *reduc_code)
2304 {
2305   switch (code)
2306     {
2307       case MAX_EXPR:
2308         *reduc_code = REDUC_MAX_EXPR;
2309         return true;
2310
2311       case MIN_EXPR:
2312         *reduc_code = REDUC_MIN_EXPR;
2313         return true;
2314
2315       case PLUS_EXPR:
2316         *reduc_code = REDUC_PLUS_EXPR;
2317         return true;
2318
2319       case MULT_EXPR:
2320       case MINUS_EXPR:
2321       case BIT_IOR_EXPR:
2322       case BIT_XOR_EXPR:
2323       case BIT_AND_EXPR:
2324         *reduc_code = ERROR_MARK;
2325         return true;
2326
2327       default:
2328        return false;
2329     }
2330 }
2331
2332
2333 /* Error reporting helper for vect_is_simple_reduction below.  GIMPLE statement
2334    STMT is printed with a message MSG. */
2335
2336 static void
2337 report_vect_op (int msg_type, gimple *stmt, const char *msg)
2338 {
2339   dump_printf_loc (msg_type, vect_location, "%s", msg);
2340   dump_gimple_stmt (msg_type, TDF_SLIM, stmt, 0);
2341   dump_printf (msg_type, "\n");
2342 }
2343
2344
2345 /* Detect SLP reduction of the form:
2346
2347    #a1 = phi <a5, a0>
2348    a2 = operation (a1)
2349    a3 = operation (a2)
2350    a4 = operation (a3)
2351    a5 = operation (a4)
2352
2353    #a = phi <a5>
2354
2355    PHI is the reduction phi node (#a1 = phi <a5, a0> above)
2356    FIRST_STMT is the first reduction stmt in the chain
2357    (a2 = operation (a1)).
2358
2359    Return TRUE if a reduction chain was detected.  */
2360
2361 static bool
2362 vect_is_slp_reduction (loop_vec_info loop_info, gimple *phi,
2363                        gimple *first_stmt)
2364 {
2365   struct loop *loop = (gimple_bb (phi))->loop_father;
2366   struct loop *vect_loop = LOOP_VINFO_LOOP (loop_info);
2367   enum tree_code code;
2368   gimple *current_stmt = NULL, *loop_use_stmt = NULL, *first, *next_stmt;
2369   stmt_vec_info use_stmt_info, current_stmt_info;
2370   tree lhs;
2371   imm_use_iterator imm_iter;
2372   use_operand_p use_p;
2373   int nloop_uses, size = 0, n_out_of_loop_uses;
2374   bool found = false;
2375
2376   if (loop != vect_loop)
2377     return false;
2378
2379   lhs = PHI_RESULT (phi);
2380   code = gimple_assign_rhs_code (first_stmt);
2381   while (1)
2382     {
2383       nloop_uses = 0;
2384       n_out_of_loop_uses = 0;
2385       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
2386         {
2387           gimple *use_stmt = USE_STMT (use_p);
2388           if (is_gimple_debug (use_stmt))
2389             continue;
2390
2391           /* Check if we got back to the reduction phi.  */
2392           if (use_stmt == phi)
2393             {
2394               loop_use_stmt = use_stmt;
2395               found = true;
2396               break;
2397             }
2398
2399           if (flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
2400             {
2401               loop_use_stmt = use_stmt;
2402               nloop_uses++;
2403             }
2404            else
2405              n_out_of_loop_uses++;
2406
2407            /* There are can be either a single use in the loop or two uses in
2408               phi nodes.  */
2409            if (nloop_uses > 1 || (n_out_of_loop_uses && nloop_uses))
2410              return false;
2411         }
2412
2413       if (found)
2414         break;
2415
2416       /* We reached a statement with no loop uses.  */
2417       if (nloop_uses == 0)
2418         return false;
2419
2420       /* This is a loop exit phi, and we haven't reached the reduction phi.  */
2421       if (gimple_code (loop_use_stmt) == GIMPLE_PHI)
2422         return false;
2423
2424       if (!is_gimple_assign (loop_use_stmt)
2425           || code != gimple_assign_rhs_code (loop_use_stmt)
2426           || !flow_bb_inside_loop_p (loop, gimple_bb (loop_use_stmt)))
2427         return false;
2428
2429       /* Insert USE_STMT into reduction chain.  */
2430       use_stmt_info = vinfo_for_stmt (loop_use_stmt);
2431       if (current_stmt)
2432         {
2433           current_stmt_info = vinfo_for_stmt (current_stmt);
2434           GROUP_NEXT_ELEMENT (current_stmt_info) = loop_use_stmt;
2435           GROUP_FIRST_ELEMENT (use_stmt_info)
2436             = GROUP_FIRST_ELEMENT (current_stmt_info);
2437         }
2438       else
2439         GROUP_FIRST_ELEMENT (use_stmt_info) = loop_use_stmt;
2440
2441       lhs = gimple_assign_lhs (loop_use_stmt);
2442       current_stmt = loop_use_stmt;
2443       size++;
2444    }
2445
2446   if (!found || loop_use_stmt != phi || size < 2)
2447     return false;
2448
2449   /* Swap the operands, if needed, to make the reduction operand be the second
2450      operand.  */
2451   lhs = PHI_RESULT (phi);
2452   next_stmt = GROUP_FIRST_ELEMENT (vinfo_for_stmt (current_stmt));
2453   while (next_stmt)
2454     {
2455       if (gimple_assign_rhs2 (next_stmt) == lhs)
2456         {
2457           tree op = gimple_assign_rhs1 (next_stmt);
2458           gimple *def_stmt = NULL;
2459
2460           if (TREE_CODE (op) == SSA_NAME)
2461             def_stmt = SSA_NAME_DEF_STMT (op);
2462
2463           /* Check that the other def is either defined in the loop
2464              ("vect_internal_def"), or it's an induction (defined by a
2465              loop-header phi-node).  */
2466           if (def_stmt
2467               && gimple_bb (def_stmt)
2468               && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
2469               && (is_gimple_assign (def_stmt)
2470                   || is_gimple_call (def_stmt)
2471                   || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt))
2472                            == vect_induction_def
2473                   || (gimple_code (def_stmt) == GIMPLE_PHI
2474                       && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt))
2475                                   == vect_internal_def
2476                       && !is_loop_header_bb_p (gimple_bb (def_stmt)))))
2477             {
2478               lhs = gimple_assign_lhs (next_stmt);
2479               next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
2480               continue;
2481             }
2482
2483           return false;
2484         }
2485       else
2486         {
2487           tree op = gimple_assign_rhs2 (next_stmt);
2488           gimple *def_stmt = NULL;
2489
2490           if (TREE_CODE (op) == SSA_NAME)
2491             def_stmt = SSA_NAME_DEF_STMT (op);
2492
2493           /* Check that the other def is either defined in the loop
2494             ("vect_internal_def"), or it's an induction (defined by a
2495             loop-header phi-node).  */
2496           if (def_stmt
2497               && gimple_bb (def_stmt)
2498               && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
2499               && (is_gimple_assign (def_stmt)
2500                   || is_gimple_call (def_stmt)
2501                   || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt))
2502                               == vect_induction_def
2503                   || (gimple_code (def_stmt) == GIMPLE_PHI
2504                       && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt))
2505                                   == vect_internal_def
2506                       && !is_loop_header_bb_p (gimple_bb (def_stmt)))))
2507             {
2508               if (dump_enabled_p ())
2509                 {
2510                   dump_printf_loc (MSG_NOTE, vect_location, "swapping oprnds: ");
2511                   dump_gimple_stmt (MSG_NOTE, TDF_SLIM, next_stmt, 0);
2512                   dump_printf (MSG_NOTE, "\n");
2513                 }
2514
2515               swap_ssa_operands (next_stmt,
2516                                  gimple_assign_rhs1_ptr (next_stmt),
2517                                  gimple_assign_rhs2_ptr (next_stmt));
2518               update_stmt (next_stmt);
2519
2520               if (CONSTANT_CLASS_P (gimple_assign_rhs1 (next_stmt)))
2521                 LOOP_VINFO_OPERANDS_SWAPPED (loop_info) = true;
2522             }
2523           else
2524             return false;
2525         }
2526
2527       lhs = gimple_assign_lhs (next_stmt);
2528       next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
2529     }
2530
2531   /* Save the chain for further analysis in SLP detection.  */
2532   first = GROUP_FIRST_ELEMENT (vinfo_for_stmt (current_stmt));
2533   LOOP_VINFO_REDUCTION_CHAINS (loop_info).safe_push (first);
2534   GROUP_SIZE (vinfo_for_stmt (first)) = size;
2535
2536   return true;
2537 }
2538
2539
2540 /* Function vect_is_simple_reduction_1
2541
2542    (1) Detect a cross-iteration def-use cycle that represents a simple
2543    reduction computation.  We look for the following pattern:
2544
2545    loop_header:
2546      a1 = phi < a0, a2 >
2547      a3 = ...
2548      a2 = operation (a3, a1)
2549
2550    or
2551
2552    a3 = ...
2553    loop_header:
2554      a1 = phi < a0, a2 >
2555      a2 = operation (a3, a1)
2556
2557    such that:
2558    1. operation is commutative and associative and it is safe to
2559       change the order of the computation (if CHECK_REDUCTION is true)
2560    2. no uses for a2 in the loop (a2 is used out of the loop)
2561    3. no uses of a1 in the loop besides the reduction operation
2562    4. no uses of a1 outside the loop.
2563
2564    Conditions 1,4 are tested here.
2565    Conditions 2,3 are tested in vect_mark_stmts_to_be_vectorized.
2566
2567    (2) Detect a cross-iteration def-use cycle in nested loops, i.e.,
2568    nested cycles, if CHECK_REDUCTION is false.
2569
2570    (3) Detect cycles of phi nodes in outer-loop vectorization, i.e., double
2571    reductions:
2572
2573      a1 = phi < a0, a2 >
2574      inner loop (def of a3)
2575      a2 = phi < a3 >
2576
2577    (4) Detect condition expressions, ie:
2578      for (int i = 0; i < N; i++)
2579        if (a[i] < val)
2580         ret_val = a[i];
2581
2582 */
2583
2584 static gimple *
2585 vect_is_simple_reduction (loop_vec_info loop_info, gimple *phi,
2586                           bool check_reduction, bool *double_reduc,
2587                           bool need_wrapping_integral_overflow,
2588                           enum vect_reduction_type *v_reduc_type)
2589 {
2590   struct loop *loop = (gimple_bb (phi))->loop_father;
2591   struct loop *vect_loop = LOOP_VINFO_LOOP (loop_info);
2592   edge latch_e = loop_latch_edge (loop);
2593   tree loop_arg = PHI_ARG_DEF_FROM_EDGE (phi, latch_e);
2594   gimple *def_stmt, *def1 = NULL, *def2 = NULL, *phi_use_stmt = NULL;
2595   enum tree_code orig_code, code;
2596   tree op1, op2, op3 = NULL_TREE, op4 = NULL_TREE;
2597   tree type;
2598   int nloop_uses;
2599   tree name;
2600   imm_use_iterator imm_iter;
2601   use_operand_p use_p;
2602   bool phi_def;
2603
2604   *double_reduc = false;
2605   *v_reduc_type = TREE_CODE_REDUCTION;
2606
2607   /* If CHECK_REDUCTION is true, we assume inner-most loop vectorization,
2608      otherwise, we assume outer loop vectorization.  */
2609   gcc_assert ((check_reduction && loop == vect_loop)
2610               || (!check_reduction && flow_loop_nested_p (vect_loop, loop)));
2611
2612   name = PHI_RESULT (phi);
2613   /* ???  If there are no uses of the PHI result the inner loop reduction
2614      won't be detected as possibly double-reduction by vectorizable_reduction
2615      because that tries to walk the PHI arg from the preheader edge which
2616      can be constant.  See PR60382.  */
2617   if (has_zero_uses (name))
2618     return NULL;
2619   nloop_uses = 0;
2620   FOR_EACH_IMM_USE_FAST (use_p, imm_iter, name)
2621     {
2622       gimple *use_stmt = USE_STMT (use_p);
2623       if (is_gimple_debug (use_stmt))
2624         continue;
2625
2626       if (!flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
2627         {
2628           if (dump_enabled_p ())
2629             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2630                              "intermediate value used outside loop.\n");
2631
2632           return NULL;
2633         }
2634
2635       nloop_uses++;
2636       if (nloop_uses > 1)
2637         {
2638           if (dump_enabled_p ())
2639             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2640                              "reduction used in loop.\n");
2641           return NULL;
2642         }
2643
2644       phi_use_stmt = use_stmt;
2645     }
2646
2647   if (TREE_CODE (loop_arg) != SSA_NAME)
2648     {
2649       if (dump_enabled_p ())
2650         {
2651           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2652                            "reduction: not ssa_name: ");
2653           dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, loop_arg);
2654           dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
2655         }
2656       return NULL;
2657     }
2658
2659   def_stmt = SSA_NAME_DEF_STMT (loop_arg);
2660   if (!def_stmt)
2661     {
2662       if (dump_enabled_p ())
2663         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2664                          "reduction: no def_stmt.\n");
2665       return NULL;
2666     }
2667
2668   if (!is_gimple_assign (def_stmt) && gimple_code (def_stmt) != GIMPLE_PHI)
2669     {
2670       if (dump_enabled_p ())
2671         {
2672           dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
2673           dump_printf (MSG_NOTE, "\n");
2674         }
2675       return NULL;
2676     }
2677
2678   if (is_gimple_assign (def_stmt))
2679     {
2680       name = gimple_assign_lhs (def_stmt);
2681       phi_def = false;
2682     }
2683   else
2684     {
2685       name = PHI_RESULT (def_stmt);
2686       phi_def = true;
2687     }
2688
2689   nloop_uses = 0;
2690   FOR_EACH_IMM_USE_FAST (use_p, imm_iter, name)
2691     {
2692       gimple *use_stmt = USE_STMT (use_p);
2693       if (is_gimple_debug (use_stmt))
2694         continue;
2695       if (flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
2696         nloop_uses++;
2697       if (nloop_uses > 1)
2698         {
2699           if (dump_enabled_p ())
2700             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2701                              "reduction used in loop.\n");
2702           return NULL;
2703         }
2704     }
2705
2706   /* If DEF_STMT is a phi node itself, we expect it to have a single argument
2707      defined in the inner loop.  */
2708   if (phi_def)
2709     {
2710       op1 = PHI_ARG_DEF (def_stmt, 0);
2711
2712       if (gimple_phi_num_args (def_stmt) != 1
2713           || TREE_CODE (op1) != SSA_NAME)
2714         {
2715           if (dump_enabled_p ())
2716             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2717                              "unsupported phi node definition.\n");
2718
2719           return NULL;
2720         }
2721
2722       def1 = SSA_NAME_DEF_STMT (op1);
2723       if (gimple_bb (def1)
2724           && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
2725           && loop->inner
2726           && flow_bb_inside_loop_p (loop->inner, gimple_bb (def1))
2727           && is_gimple_assign (def1)
2728           && flow_bb_inside_loop_p (loop->inner, gimple_bb (phi_use_stmt)))
2729         {
2730           if (dump_enabled_p ())
2731             report_vect_op (MSG_NOTE, def_stmt,
2732                             "detected double reduction: ");
2733
2734           *double_reduc = true;
2735           return def_stmt;
2736         }
2737
2738       return NULL;
2739     }
2740
2741   code = orig_code = gimple_assign_rhs_code (def_stmt);
2742
2743   /* We can handle "res -= x[i]", which is non-associative by
2744      simply rewriting this into "res += -x[i]".  Avoid changing
2745      gimple instruction for the first simple tests and only do this
2746      if we're allowed to change code at all.  */
2747   if (code == MINUS_EXPR
2748       && (op1 = gimple_assign_rhs1 (def_stmt))
2749       && TREE_CODE (op1) == SSA_NAME
2750       && SSA_NAME_DEF_STMT (op1) == phi)
2751     code = PLUS_EXPR;
2752
2753   if (check_reduction)
2754     {
2755       if (code == COND_EXPR)
2756         *v_reduc_type = COND_REDUCTION;
2757       else if (!commutative_tree_code (code) || !associative_tree_code (code))
2758         {
2759           if (dump_enabled_p ())
2760             report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
2761                             "reduction: not commutative/associative: ");
2762           return NULL;
2763         }
2764     }
2765
2766   if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
2767     {
2768       if (code != COND_EXPR)
2769         {
2770           if (dump_enabled_p ())
2771             report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
2772                             "reduction: not binary operation: ");
2773
2774           return NULL;
2775         }
2776
2777       op3 = gimple_assign_rhs1 (def_stmt);
2778       if (COMPARISON_CLASS_P (op3))
2779         {
2780           op4 = TREE_OPERAND (op3, 1);
2781           op3 = TREE_OPERAND (op3, 0);
2782         }
2783
2784       op1 = gimple_assign_rhs2 (def_stmt);
2785       op2 = gimple_assign_rhs3 (def_stmt);
2786
2787       if (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op2) != SSA_NAME)
2788         {
2789           if (dump_enabled_p ())
2790             report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
2791                             "reduction: uses not ssa_names: ");
2792
2793           return NULL;
2794         }
2795     }
2796   else
2797     {
2798       op1 = gimple_assign_rhs1 (def_stmt);
2799       op2 = gimple_assign_rhs2 (def_stmt);
2800
2801       if (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op2) != SSA_NAME)
2802         {
2803           if (dump_enabled_p ())
2804             report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
2805                             "reduction: uses not ssa_names: ");
2806
2807           return NULL;
2808         }
2809    }
2810
2811   type = TREE_TYPE (gimple_assign_lhs (def_stmt));
2812   if ((TREE_CODE (op1) == SSA_NAME
2813        && !types_compatible_p (type,TREE_TYPE (op1)))
2814       || (TREE_CODE (op2) == SSA_NAME
2815           && !types_compatible_p (type, TREE_TYPE (op2)))
2816       || (op3 && TREE_CODE (op3) == SSA_NAME
2817           && !types_compatible_p (type, TREE_TYPE (op3)))
2818       || (op4 && TREE_CODE (op4) == SSA_NAME
2819           && !types_compatible_p (type, TREE_TYPE (op4))))
2820     {
2821       if (dump_enabled_p ())
2822         {
2823           dump_printf_loc (MSG_NOTE, vect_location,
2824                            "reduction: multiple types: operation type: ");
2825           dump_generic_expr (MSG_NOTE, TDF_SLIM, type);
2826           dump_printf (MSG_NOTE, ", operands types: ");
2827           dump_generic_expr (MSG_NOTE, TDF_SLIM,
2828                              TREE_TYPE (op1));
2829           dump_printf (MSG_NOTE, ",");
2830           dump_generic_expr (MSG_NOTE, TDF_SLIM,
2831                              TREE_TYPE (op2));
2832           if (op3)
2833             {
2834               dump_printf (MSG_NOTE, ",");
2835               dump_generic_expr (MSG_NOTE, TDF_SLIM,
2836                                  TREE_TYPE (op3));
2837             }
2838
2839           if (op4)
2840             {
2841               dump_printf (MSG_NOTE, ",");
2842               dump_generic_expr (MSG_NOTE, TDF_SLIM,
2843                                  TREE_TYPE (op4));
2844             }
2845           dump_printf (MSG_NOTE, "\n");
2846         }
2847
2848       return NULL;
2849     }
2850
2851   /* Check that it's ok to change the order of the computation.
2852      Generally, when vectorizing a reduction we change the order of the
2853      computation.  This may change the behavior of the program in some
2854      cases, so we need to check that this is ok.  One exception is when
2855      vectorizing an outer-loop: the inner-loop is executed sequentially,
2856      and therefore vectorizing reductions in the inner-loop during
2857      outer-loop vectorization is safe.  */
2858
2859   if (*v_reduc_type != COND_REDUCTION)
2860     {
2861       /* CHECKME: check for !flag_finite_math_only too?  */
2862       if (SCALAR_FLOAT_TYPE_P (type) && !flag_associative_math
2863           && check_reduction)
2864         {
2865           /* Changing the order of operations changes the semantics.  */
2866           if (dump_enabled_p ())
2867             report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
2868                         "reduction: unsafe fp math optimization: ");
2869           return NULL;
2870         }
2871       else if (INTEGRAL_TYPE_P (type) && check_reduction)
2872         {
2873           if (!operation_no_trapping_overflow (type, code))
2874             {
2875               /* Changing the order of operations changes the semantics.  */
2876               if (dump_enabled_p ())
2877                 report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
2878                                 "reduction: unsafe int math optimization"
2879                                 " (overflow traps): ");
2880               return NULL;
2881             }
2882           if (need_wrapping_integral_overflow
2883               && !TYPE_OVERFLOW_WRAPS (type)
2884               && operation_can_overflow (code))
2885             {
2886               /* Changing the order of operations changes the semantics.  */
2887               if (dump_enabled_p ())
2888                 report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
2889                                 "reduction: unsafe int math optimization"
2890                                 " (overflow doesn't wrap): ");
2891               return NULL;
2892             }
2893         }
2894       else if (SAT_FIXED_POINT_TYPE_P (type) && check_reduction)
2895         {
2896           /* Changing the order of operations changes the semantics.  */
2897           if (dump_enabled_p ())
2898           report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
2899                           "reduction: unsafe fixed-point math optimization: ");
2900           return NULL;
2901         }
2902     }
2903
2904   /* Reduction is safe. We're dealing with one of the following:
2905      1) integer arithmetic and no trapv
2906      2) floating point arithmetic, and special flags permit this optimization
2907      3) nested cycle (i.e., outer loop vectorization).  */
2908   if (TREE_CODE (op1) == SSA_NAME)
2909     def1 = SSA_NAME_DEF_STMT (op1);
2910
2911   if (TREE_CODE (op2) == SSA_NAME)
2912     def2 = SSA_NAME_DEF_STMT (op2);
2913
2914   if (code != COND_EXPR
2915       && ((!def1 || gimple_nop_p (def1)) && (!def2 || gimple_nop_p (def2))))
2916     {
2917       if (dump_enabled_p ())
2918         report_vect_op (MSG_NOTE, def_stmt, "reduction: no defs for operands: ");
2919       return NULL;
2920     }
2921
2922   /* Check that one def is the reduction def, defined by PHI,
2923      the other def is either defined in the loop ("vect_internal_def"),
2924      or it's an induction (defined by a loop-header phi-node).  */
2925
2926   if (def2 && def2 == phi
2927       && (code == COND_EXPR
2928           || !def1 || gimple_nop_p (def1)
2929           || !flow_bb_inside_loop_p (loop, gimple_bb (def1))
2930           || (def1 && flow_bb_inside_loop_p (loop, gimple_bb (def1))
2931               && (is_gimple_assign (def1)
2932                   || is_gimple_call (def1)
2933                   || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def1))
2934                       == vect_induction_def
2935                   || (gimple_code (def1) == GIMPLE_PHI
2936                       && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def1))
2937                           == vect_internal_def
2938                       && !is_loop_header_bb_p (gimple_bb (def1)))))))
2939     {
2940       if (dump_enabled_p ())
2941         report_vect_op (MSG_NOTE, def_stmt, "detected reduction: ");
2942       return def_stmt;
2943     }
2944
2945   if (def1 && def1 == phi
2946       && (code == COND_EXPR
2947           || !def2 || gimple_nop_p (def2)
2948           || !flow_bb_inside_loop_p (loop, gimple_bb (def2))
2949           || (def2 && flow_bb_inside_loop_p (loop, gimple_bb (def2))
2950               && (is_gimple_assign (def2)
2951                   || is_gimple_call (def2)
2952                   || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def2))
2953                       == vect_induction_def
2954                   || (gimple_code (def2) == GIMPLE_PHI
2955                       && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def2))
2956                           == vect_internal_def
2957                       && !is_loop_header_bb_p (gimple_bb (def2)))))))
2958     {
2959       if (check_reduction
2960           && orig_code != MINUS_EXPR)
2961         {
2962           if (code == COND_EXPR)
2963             {
2964               /* No current known use where this case would be useful.  */
2965               if (dump_enabled_p ())
2966                 report_vect_op (MSG_NOTE, def_stmt,
2967                                 "detected reduction: cannot currently swap "
2968                                 "operands for cond_expr");
2969               return NULL;
2970             }
2971
2972           /* Swap operands (just for simplicity - so that the rest of the code
2973              can assume that the reduction variable is always the last (second)
2974              argument).  */
2975           if (dump_enabled_p ())
2976             report_vect_op (MSG_NOTE, def_stmt,
2977                             "detected reduction: need to swap operands: ");
2978
2979           swap_ssa_operands (def_stmt, gimple_assign_rhs1_ptr (def_stmt),
2980                              gimple_assign_rhs2_ptr (def_stmt));
2981
2982           if (CONSTANT_CLASS_P (gimple_assign_rhs1 (def_stmt)))
2983             LOOP_VINFO_OPERANDS_SWAPPED (loop_info) = true;
2984         }
2985       else
2986         {
2987           if (dump_enabled_p ())
2988             report_vect_op (MSG_NOTE, def_stmt, "detected reduction: ");
2989         }
2990
2991       return def_stmt;
2992     }
2993
2994   /* Try to find SLP reduction chain.  */
2995   if (check_reduction && code != COND_EXPR
2996       && vect_is_slp_reduction (loop_info, phi, def_stmt))
2997     {
2998       if (dump_enabled_p ())
2999         report_vect_op (MSG_NOTE, def_stmt,
3000                         "reduction: detected reduction chain: ");
3001
3002       return def_stmt;
3003     }
3004
3005   if (dump_enabled_p ())
3006     report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
3007                     "reduction: unknown pattern: ");
3008
3009   return NULL;
3010 }
3011
3012 /* Wrapper around vect_is_simple_reduction_1, which will modify code
3013    in-place if it enables detection of more reductions.  Arguments
3014    as there.  */
3015
3016 gimple *
3017 vect_force_simple_reduction (loop_vec_info loop_info, gimple *phi,
3018                              bool check_reduction, bool *double_reduc,
3019                              bool need_wrapping_integral_overflow)
3020 {
3021   enum vect_reduction_type v_reduc_type;
3022   return vect_is_simple_reduction (loop_info, phi, check_reduction,
3023                                    double_reduc,
3024                                    need_wrapping_integral_overflow,
3025                                    &v_reduc_type);
3026 }
3027
3028 /* Calculate cost of peeling the loop PEEL_ITERS_PROLOGUE times.  */
3029 int
3030 vect_get_known_peeling_cost (loop_vec_info loop_vinfo, int peel_iters_prologue,
3031                              int *peel_iters_epilogue,
3032                              stmt_vector_for_cost *scalar_cost_vec,
3033                              stmt_vector_for_cost *prologue_cost_vec,
3034                              stmt_vector_for_cost *epilogue_cost_vec)
3035 {
3036   int retval = 0;
3037   int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3038
3039   if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
3040     {
3041       *peel_iters_epilogue = vf/2;
3042       if (dump_enabled_p ())
3043         dump_printf_loc (MSG_NOTE, vect_location,
3044                          "cost model: epilogue peel iters set to vf/2 "
3045                          "because loop iterations are unknown .\n");
3046
3047       /* If peeled iterations are known but number of scalar loop
3048          iterations are unknown, count a taken branch per peeled loop.  */
3049       retval = record_stmt_cost (prologue_cost_vec, 1, cond_branch_taken,
3050                                  NULL, 0, vect_prologue);
3051       retval = record_stmt_cost (prologue_cost_vec, 1, cond_branch_taken,
3052                                  NULL, 0, vect_epilogue);
3053     }
3054   else
3055     {
3056       int niters = LOOP_VINFO_INT_NITERS (loop_vinfo);
3057       peel_iters_prologue = niters < peel_iters_prologue ?
3058                             niters : peel_iters_prologue;
3059       *peel_iters_epilogue = (niters - peel_iters_prologue) % vf;
3060       /* If we need to peel for gaps, but no peeling is required, we have to
3061          peel VF iterations.  */
3062       if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) && !*peel_iters_epilogue)
3063         *peel_iters_epilogue = vf;
3064     }
3065
3066   stmt_info_for_cost *si;
3067   int j;
3068   if (peel_iters_prologue)
3069     FOR_EACH_VEC_ELT (*scalar_cost_vec, j, si)
3070       retval += record_stmt_cost (prologue_cost_vec,
3071                                   si->count * peel_iters_prologue,
3072                                   si->kind, NULL, si->misalign,
3073                                   vect_prologue);
3074   if (*peel_iters_epilogue)
3075     FOR_EACH_VEC_ELT (*scalar_cost_vec, j, si)
3076       retval += record_stmt_cost (epilogue_cost_vec,
3077                                   si->count * *peel_iters_epilogue,
3078                                   si->kind, NULL, si->misalign,
3079                                   vect_epilogue);
3080
3081   return retval;
3082 }
3083
3084 /* Function vect_estimate_min_profitable_iters
3085
3086    Return the number of iterations required for the vector version of the
3087    loop to be profitable relative to the cost of the scalar version of the
3088    loop.  */
3089
3090 static void
3091 vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo,
3092                                     int *ret_min_profitable_niters,
3093                                     int *ret_min_profitable_estimate)
3094 {
3095   int min_profitable_iters;
3096   int min_profitable_estimate;
3097   int peel_iters_prologue;
3098   int peel_iters_epilogue;
3099   unsigned vec_inside_cost = 0;
3100   int vec_outside_cost = 0;
3101   unsigned vec_prologue_cost = 0;
3102   unsigned vec_epilogue_cost = 0;
3103   int scalar_single_iter_cost = 0;
3104   int scalar_outside_cost = 0;
3105   int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3106   int npeel = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
3107   void *target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
3108
3109   /* Cost model disabled.  */
3110   if (unlimited_cost_model (LOOP_VINFO_LOOP (loop_vinfo)))
3111     {
3112       dump_printf_loc (MSG_NOTE, vect_location, "cost model disabled.\n");
3113       *ret_min_profitable_niters = 0;
3114       *ret_min_profitable_estimate = 0;
3115       return;
3116     }
3117
3118   /* Requires loop versioning tests to handle misalignment.  */
3119   if (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo))
3120     {
3121       /*  FIXME: Make cost depend on complexity of individual check.  */
3122       unsigned len = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo).length ();
3123       (void) add_stmt_cost (target_cost_data, len, vector_stmt, NULL, 0,
3124                             vect_prologue);
3125       dump_printf (MSG_NOTE,
3126                    "cost model: Adding cost of checks for loop "
3127                    "versioning to treat misalignment.\n");
3128     }
3129
3130   /* Requires loop versioning with alias checks.  */
3131   if (LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo))
3132     {
3133       /*  FIXME: Make cost depend on complexity of individual check.  */
3134       unsigned len = LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo).length ();
3135       (void) add_stmt_cost (target_cost_data, len, vector_stmt, NULL, 0,
3136                             vect_prologue);
3137       dump_printf (MSG_NOTE,
3138                    "cost model: Adding cost of checks for loop "
3139                    "versioning aliasing.\n");
3140     }
3141
3142   if (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo)
3143       || LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo))
3144     (void) add_stmt_cost (target_cost_data, 1, cond_branch_taken, NULL, 0,
3145                           vect_prologue);
3146
3147   /* Count statements in scalar loop.  Using this as scalar cost for a single
3148      iteration for now.
3149
3150      TODO: Add outer loop support.
3151
3152      TODO: Consider assigning different costs to different scalar
3153      statements.  */
3154
3155   scalar_single_iter_cost
3156     = LOOP_VINFO_SINGLE_SCALAR_ITERATION_COST (loop_vinfo);
3157
3158   /* Add additional cost for the peeled instructions in prologue and epilogue
3159      loop.
3160
3161      FORNOW: If we don't know the value of peel_iters for prologue or epilogue
3162      at compile-time - we assume it's vf/2 (the worst would be vf-1).
3163
3164      TODO: Build an expression that represents peel_iters for prologue and
3165      epilogue to be used in a run-time test.  */
3166
3167   if (npeel  < 0)
3168     {
3169       peel_iters_prologue = vf/2;
3170       dump_printf (MSG_NOTE, "cost model: "
3171                    "prologue peel iters set to vf/2.\n");
3172
3173       /* If peeling for alignment is unknown, loop bound of main loop becomes
3174          unknown.  */
3175       peel_iters_epilogue = vf/2;
3176       dump_printf (MSG_NOTE, "cost model: "
3177                    "epilogue peel iters set to vf/2 because "
3178                    "peeling for alignment is unknown.\n");
3179
3180       /* If peeled iterations are unknown, count a taken branch and a not taken
3181          branch per peeled loop. Even if scalar loop iterations are known,
3182          vector iterations are not known since peeled prologue iterations are
3183          not known. Hence guards remain the same.  */
3184       (void) add_stmt_cost (target_cost_data, 1, cond_branch_taken,
3185                             NULL, 0, vect_prologue);
3186       (void) add_stmt_cost (target_cost_data, 1, cond_branch_not_taken,
3187                             NULL, 0, vect_prologue);
3188       (void) add_stmt_cost (target_cost_data, 1, cond_branch_taken,
3189                             NULL, 0, vect_epilogue);
3190       (void) add_stmt_cost (target_cost_data, 1, cond_branch_not_taken,
3191                             NULL, 0, vect_epilogue);
3192       stmt_info_for_cost *si;
3193       int j;
3194       FOR_EACH_VEC_ELT (LOOP_VINFO_SCALAR_ITERATION_COST (loop_vinfo), j, si)
3195         {
3196           struct _stmt_vec_info *stmt_info
3197             = si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
3198           (void) add_stmt_cost (target_cost_data,
3199                                 si->count * peel_iters_prologue,
3200                                 si->kind, stmt_info, si->misalign,
3201                                 vect_prologue);
3202           (void) add_stmt_cost (target_cost_data,
3203                                 si->count * peel_iters_epilogue,
3204                                 si->kind, stmt_info, si->misalign,
3205                                 vect_epilogue);
3206         }
3207     }
3208   else
3209     {
3210       stmt_vector_for_cost prologue_cost_vec, epilogue_cost_vec;
3211       stmt_info_for_cost *si;
3212       int j;
3213       void *data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
3214
3215       prologue_cost_vec.create (2);
3216       epilogue_cost_vec.create (2);
3217       peel_iters_prologue = npeel;
3218
3219       (void) vect_get_known_peeling_cost (loop_vinfo, peel_iters_prologue,
3220                                           &peel_iters_epilogue,
3221                                           &LOOP_VINFO_SCALAR_ITERATION_COST
3222                                             (loop_vinfo),
3223                                           &prologue_cost_vec,
3224                                           &epilogue_cost_vec);
3225
3226       FOR_EACH_VEC_ELT (prologue_cost_vec, j, si)
3227         {
3228           struct _stmt_vec_info *stmt_info
3229             = si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
3230           (void) add_stmt_cost (data, si->count, si->kind, stmt_info,
3231                                 si->misalign, vect_prologue);
3232         }
3233
3234       FOR_EACH_VEC_ELT (epilogue_cost_vec, j, si)
3235         {
3236           struct _stmt_vec_info *stmt_info
3237             = si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
3238           (void) add_stmt_cost (data, si->count, si->kind, stmt_info,
3239                                 si->misalign, vect_epilogue);
3240         }
3241
3242       prologue_cost_vec.release ();
3243       epilogue_cost_vec.release ();
3244     }
3245
3246   /* FORNOW: The scalar outside cost is incremented in one of the
3247      following ways:
3248
3249      1. The vectorizer checks for alignment and aliasing and generates
3250      a condition that allows dynamic vectorization.  A cost model
3251      check is ANDED with the versioning condition.  Hence scalar code
3252      path now has the added cost of the versioning check.
3253
3254        if (cost > th & versioning_check)
3255          jmp to vector code
3256
3257      Hence run-time scalar is incremented by not-taken branch cost.
3258
3259      2. The vectorizer then checks if a prologue is required.  If the
3260      cost model check was not done before during versioning, it has to
3261      be done before the prologue check.
3262
3263        if (cost <= th)
3264          prologue = scalar_iters
3265        if (prologue == 0)
3266          jmp to vector code
3267        else
3268          execute prologue
3269        if (prologue == num_iters)
3270          go to exit
3271
3272      Hence the run-time scalar cost is incremented by a taken branch,
3273      plus a not-taken branch, plus a taken branch cost.
3274
3275      3. The vectorizer then checks if an epilogue is required.  If the
3276      cost model check was not done before during prologue check, it
3277      has to be done with the epilogue check.
3278
3279        if (prologue == 0)
3280          jmp to vector code
3281        else
3282          execute prologue
3283        if (prologue == num_iters)
3284          go to exit
3285        vector code:
3286          if ((cost <= th) | (scalar_iters-prologue-epilogue == 0))
3287            jmp to epilogue
3288
3289      Hence the run-time scalar cost should be incremented by 2 taken
3290      branches.
3291
3292      TODO: The back end may reorder the BBS's differently and reverse
3293      conditions/branch directions.  Change the estimates below to
3294      something more reasonable.  */
3295
3296   /* If the number of iterations is known and we do not do versioning, we can
3297      decide whether to vectorize at compile time.  Hence the scalar version
3298      do not carry cost model guard costs.  */
3299   if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
3300       || LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo)
3301       || LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo))
3302     {
3303       /* Cost model check occurs at versioning.  */
3304       if (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo)
3305           || LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo))
3306         scalar_outside_cost += vect_get_stmt_cost (cond_branch_not_taken);
3307       else
3308         {
3309           /* Cost model check occurs at prologue generation.  */
3310           if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) < 0)
3311             scalar_outside_cost += 2 * vect_get_stmt_cost (cond_branch_taken)
3312               + vect_get_stmt_cost (cond_branch_not_taken);
3313           /* Cost model check occurs at epilogue generation.  */
3314           else
3315             scalar_outside_cost += 2 * vect_get_stmt_cost (cond_branch_taken);
3316         }
3317     }
3318
3319   /* Complete the target-specific cost calculations.  */
3320   finish_cost (LOOP_VINFO_TARGET_COST_DATA (loop_vinfo), &vec_prologue_cost,
3321                &vec_inside_cost, &vec_epilogue_cost);
3322
3323   vec_outside_cost = (int)(vec_prologue_cost + vec_epilogue_cost);
3324
3325   if (dump_enabled_p ())
3326     {
3327       dump_printf_loc (MSG_NOTE, vect_location, "Cost model analysis: \n");
3328       dump_printf (MSG_NOTE, "  Vector inside of loop cost: %d\n",
3329                    vec_inside_cost);
3330       dump_printf (MSG_NOTE, "  Vector prologue cost: %d\n",
3331                    vec_prologue_cost);
3332       dump_printf (MSG_NOTE, "  Vector epilogue cost: %d\n",
3333                    vec_epilogue_cost);
3334       dump_printf (MSG_NOTE, "  Scalar iteration cost: %d\n",
3335                    scalar_single_iter_cost);
3336       dump_printf (MSG_NOTE, "  Scalar outside cost: %d\n",
3337                    scalar_outside_cost);
3338       dump_printf (MSG_NOTE, "  Vector outside cost: %d\n",
3339                    vec_outside_cost);
3340       dump_printf (MSG_NOTE, "  prologue iterations: %d\n",
3341                    peel_iters_prologue);
3342       dump_printf (MSG_NOTE, "  epilogue iterations: %d\n",
3343                    peel_iters_epilogue);
3344     }
3345
3346   /* Calculate number of iterations required to make the vector version
3347      profitable, relative to the loop bodies only.  The following condition
3348      must hold true:
3349      SIC * niters + SOC > VIC * ((niters-PL_ITERS-EP_ITERS)/VF) + VOC
3350      where
3351      SIC = scalar iteration cost, VIC = vector iteration cost,
3352      VOC = vector outside cost, VF = vectorization factor,
3353      PL_ITERS = prologue iterations, EP_ITERS= epilogue iterations
3354      SOC = scalar outside cost for run time cost model check.  */
3355
3356   if ((scalar_single_iter_cost * vf) > (int) vec_inside_cost)
3357     {
3358       if (vec_outside_cost <= 0)
3359         min_profitable_iters = 1;
3360       else
3361         {
3362           min_profitable_iters = ((vec_outside_cost - scalar_outside_cost) * vf
3363                                   - vec_inside_cost * peel_iters_prologue
3364                                   - vec_inside_cost * peel_iters_epilogue)
3365                                  / ((scalar_single_iter_cost * vf)
3366                                     - vec_inside_cost);
3367
3368           if ((scalar_single_iter_cost * vf * min_profitable_iters)
3369               <= (((int) vec_inside_cost * min_profitable_iters)
3370                   + (((int) vec_outside_cost - scalar_outside_cost) * vf)))
3371             min_profitable_iters++;
3372         }
3373     }
3374   /* vector version will never be profitable.  */
3375   else
3376     {
3377       if (LOOP_VINFO_LOOP (loop_vinfo)->force_vectorize)
3378         warning_at (vect_location, OPT_Wopenmp_simd, "vectorization "
3379                     "did not happen for a simd loop");
3380
3381       if (dump_enabled_p ())
3382         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3383                          "cost model: the vector iteration cost = %d "
3384                          "divided by the scalar iteration cost = %d "
3385                          "is greater or equal to the vectorization factor = %d"
3386                          ".\n",
3387                          vec_inside_cost, scalar_single_iter_cost, vf);
3388       *ret_min_profitable_niters = -1;
3389       *ret_min_profitable_estimate = -1;
3390       return;
3391     }
3392
3393   dump_printf (MSG_NOTE,
3394                "  Calculated minimum iters for profitability: %d\n",
3395                min_profitable_iters);
3396
3397   min_profitable_iters =
3398         min_profitable_iters < vf ? vf : min_profitable_iters;
3399
3400   /* Because the condition we create is:
3401      if (niters <= min_profitable_iters)
3402        then skip the vectorized loop.  */
3403   min_profitable_iters--;
3404
3405   if (dump_enabled_p ())
3406     dump_printf_loc (MSG_NOTE, vect_location,
3407                      "  Runtime profitability threshold = %d\n",
3408                      min_profitable_iters);
3409
3410   *ret_min_profitable_niters = min_profitable_iters;
3411
3412   /* Calculate number of iterations required to make the vector version
3413      profitable, relative to the loop bodies only.
3414
3415      Non-vectorized variant is SIC * niters and it must win over vector
3416      variant on the expected loop trip count.  The following condition must hold true:
3417      SIC * niters > VIC * ((niters-PL_ITERS-EP_ITERS)/VF) + VOC + SOC  */
3418
3419   if (vec_outside_cost <= 0)
3420     min_profitable_estimate = 1;
3421   else
3422     {
3423       min_profitable_estimate = ((vec_outside_cost + scalar_outside_cost) * vf
3424                                  - vec_inside_cost * peel_iters_prologue
3425                                  - vec_inside_cost * peel_iters_epilogue)
3426                                  / ((scalar_single_iter_cost * vf)
3427                                    - vec_inside_cost);
3428     }
3429   min_profitable_estimate --;
3430   min_profitable_estimate = MAX (min_profitable_estimate, min_profitable_iters);
3431   if (dump_enabled_p ())
3432     dump_printf_loc (MSG_NOTE, vect_location,
3433                      "  Static estimate profitability threshold = %d\n",
3434                       min_profitable_iters);
3435
3436   *ret_min_profitable_estimate = min_profitable_estimate;
3437 }
3438
3439 /* Writes into SEL a mask for a vec_perm, equivalent to a vec_shr by OFFSET
3440    vector elements (not bits) for a vector of mode MODE.  */
3441 static void
3442 calc_vec_perm_mask_for_shift (enum machine_mode mode, unsigned int offset,
3443                               unsigned char *sel)
3444 {
3445   unsigned int i, nelt = GET_MODE_NUNITS (mode);
3446
3447   for (i = 0; i < nelt; i++)
3448     sel[i] = (i + offset) & (2*nelt - 1);
3449 }
3450
3451 /* Checks whether the target supports whole-vector shifts for vectors of mode
3452    MODE.  This is the case if _either_ the platform handles vec_shr_optab, _or_
3453    it supports vec_perm_const with masks for all necessary shift amounts.  */
3454 static bool
3455 have_whole_vector_shift (enum machine_mode mode)
3456 {
3457   if (optab_handler (vec_shr_optab, mode) != CODE_FOR_nothing)
3458     return true;
3459
3460   if (direct_optab_handler (vec_perm_const_optab, mode) == CODE_FOR_nothing)
3461     return false;
3462
3463   unsigned int i, nelt = GET_MODE_NUNITS (mode);
3464   unsigned char *sel = XALLOCAVEC (unsigned char, nelt);
3465
3466   for (i = nelt/2; i >= 1; i/=2)
3467     {
3468       calc_vec_perm_mask_for_shift (mode, i, sel);
3469       if (!can_vec_perm_p (mode, false, sel))
3470         return false;
3471     }
3472   return true;
3473 }
3474
3475 /* Return the reduction operand (with index REDUC_INDEX) of STMT.  */
3476
3477 static tree
3478 get_reduction_op (gimple *stmt, int reduc_index)
3479 {
3480   switch (get_gimple_rhs_class (gimple_assign_rhs_code (stmt)))
3481     {
3482     case GIMPLE_SINGLE_RHS:
3483       gcc_assert (TREE_OPERAND_LENGTH (gimple_assign_rhs1 (stmt))
3484                   == ternary_op);
3485       return TREE_OPERAND (gimple_assign_rhs1 (stmt), reduc_index);
3486     case GIMPLE_UNARY_RHS:
3487       return gimple_assign_rhs1 (stmt);
3488     case GIMPLE_BINARY_RHS:
3489       return (reduc_index
3490               ? gimple_assign_rhs2 (stmt) : gimple_assign_rhs1 (stmt));
3491     case GIMPLE_TERNARY_RHS:
3492       return gimple_op (stmt, reduc_index + 1);
3493     default:
3494       gcc_unreachable ();
3495     }
3496 }
3497
3498 /* TODO: Close dependency between vect_model_*_cost and vectorizable_*
3499    functions. Design better to avoid maintenance issues.  */
3500
3501 /* Function vect_model_reduction_cost.
3502
3503    Models cost for a reduction operation, including the vector ops
3504    generated within the strip-mine loop, the initial definition before
3505    the loop, and the epilogue code that must be generated.  */
3506
3507 static bool
3508 vect_model_reduction_cost (stmt_vec_info stmt_info, enum tree_code reduc_code,
3509                            int ncopies, int reduc_index)
3510 {
3511   int prologue_cost = 0, epilogue_cost = 0;
3512   enum tree_code code;
3513   optab optab;
3514   tree vectype;
3515   gimple *stmt, *orig_stmt;
3516   tree reduction_op;
3517   machine_mode mode;
3518   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3519   struct loop *loop = NULL;
3520   void *target_cost_data;
3521
3522   if (loop_vinfo)
3523     {
3524       loop = LOOP_VINFO_LOOP (loop_vinfo);
3525       target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
3526     }
3527   else
3528     target_cost_data = BB_VINFO_TARGET_COST_DATA (STMT_VINFO_BB_VINFO (stmt_info));
3529
3530   /* Condition reductions generate two reductions in the loop.  */
3531   if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == COND_REDUCTION)
3532     ncopies *= 2;
3533
3534   /* Cost of reduction op inside loop.  */
3535   unsigned inside_cost = add_stmt_cost (target_cost_data, ncopies, vector_stmt,
3536                                         stmt_info, 0, vect_body);
3537   stmt = STMT_VINFO_STMT (stmt_info);
3538
3539   reduction_op = get_reduction_op (stmt, reduc_index);
3540
3541   vectype = get_vectype_for_scalar_type (TREE_TYPE (reduction_op));
3542   if (!vectype)
3543     {
3544       if (dump_enabled_p ())
3545         {
3546           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3547                            "unsupported data-type ");
3548           dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
3549                              TREE_TYPE (reduction_op));
3550           dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
3551         }
3552       return false;
3553    }
3554
3555   mode = TYPE_MODE (vectype);
3556   orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
3557
3558   if (!orig_stmt)
3559     orig_stmt = STMT_VINFO_STMT (stmt_info);
3560
3561   code = gimple_assign_rhs_code (orig_stmt);
3562
3563   /* Add in cost for initial definition.
3564      For cond reduction we have four vectors: initial index, step, initial
3565      result of the data reduction, initial value of the index reduction.  */
3566   int prologue_stmts = STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
3567                        == COND_REDUCTION ? 4 : 1;
3568   prologue_cost += add_stmt_cost (target_cost_data, prologue_stmts,
3569                                   scalar_to_vec, stmt_info, 0,
3570                                   vect_prologue);
3571
3572   /* Determine cost of epilogue code.
3573
3574      We have a reduction operator that will reduce the vector in one statement.
3575      Also requires scalar extract.  */
3576
3577   if (!loop || !nested_in_vect_loop_p (loop, orig_stmt))
3578     {
3579       if (reduc_code != ERROR_MARK)
3580         {
3581           if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == COND_REDUCTION)
3582             {
3583               /* An EQ stmt and an COND_EXPR stmt.  */
3584               epilogue_cost += add_stmt_cost (target_cost_data, 2,
3585                                               vector_stmt, stmt_info, 0,
3586                                               vect_epilogue);
3587               /* Reduction of the max index and a reduction of the found
3588                  values.  */
3589               epilogue_cost += add_stmt_cost (target_cost_data, 2,
3590                                               vec_to_scalar, stmt_info, 0,
3591                                               vect_epilogue);
3592               /* A broadcast of the max value.  */
3593               epilogue_cost += add_stmt_cost (target_cost_data, 1,
3594                                               scalar_to_vec, stmt_info, 0,
3595                                               vect_epilogue);
3596             }
3597           else
3598             {
3599               epilogue_cost += add_stmt_cost (target_cost_data, 1, vector_stmt,
3600                                               stmt_info, 0, vect_epilogue);
3601               epilogue_cost += add_stmt_cost (target_cost_data, 1,
3602                                               vec_to_scalar, stmt_info, 0,
3603                                               vect_epilogue);
3604             }
3605         }
3606       else
3607         {
3608           int vec_size_in_bits = tree_to_uhwi (TYPE_SIZE (vectype));
3609           tree bitsize =
3610             TYPE_SIZE (TREE_TYPE (gimple_assign_lhs (orig_stmt)));
3611           int element_bitsize = tree_to_uhwi (bitsize);
3612           int nelements = vec_size_in_bits / element_bitsize;
3613
3614           optab = optab_for_tree_code (code, vectype, optab_default);
3615
3616           /* We have a whole vector shift available.  */
3617           if (VECTOR_MODE_P (mode)
3618               && optab_handler (optab, mode) != CODE_FOR_nothing
3619               && have_whole_vector_shift (mode))
3620             {
3621               /* Final reduction via vector shifts and the reduction operator.
3622                  Also requires scalar extract.  */
3623               epilogue_cost += add_stmt_cost (target_cost_data,
3624                                               exact_log2 (nelements) * 2,
3625                                               vector_stmt, stmt_info, 0,
3626                                               vect_epilogue);
3627               epilogue_cost += add_stmt_cost (target_cost_data, 1,
3628                                               vec_to_scalar, stmt_info, 0,
3629                                               vect_epilogue);
3630             }
3631           else
3632             /* Use extracts and reduction op for final reduction.  For N
3633                elements, we have N extracts and N-1 reduction ops.  */
3634             epilogue_cost += add_stmt_cost (target_cost_data,
3635                                             nelements + nelements - 1,
3636                                             vector_stmt, stmt_info, 0,
3637                                             vect_epilogue);
3638         }
3639     }
3640
3641   if (dump_enabled_p ())
3642     dump_printf (MSG_NOTE,
3643                  "vect_model_reduction_cost: inside_cost = %d, "
3644                  "prologue_cost = %d, epilogue_cost = %d .\n", inside_cost,
3645                  prologue_cost, epilogue_cost);
3646
3647   return true;
3648 }
3649
3650
3651 /* Function vect_model_induction_cost.
3652
3653    Models cost for induction operations.  */
3654
3655 static void
3656 vect_model_induction_cost (stmt_vec_info stmt_info, int ncopies)
3657 {
3658   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3659   void *target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
3660   unsigned inside_cost, prologue_cost;
3661
3662   /* loop cost for vec_loop.  */
3663   inside_cost = add_stmt_cost (target_cost_data, ncopies, vector_stmt,
3664                                stmt_info, 0, vect_body);
3665
3666   /* prologue cost for vec_init and vec_step.  */
3667   prologue_cost = add_stmt_cost (target_cost_data, 2, scalar_to_vec,
3668                                  stmt_info, 0, vect_prologue);
3669
3670   if (dump_enabled_p ())
3671     dump_printf_loc (MSG_NOTE, vect_location,
3672                      "vect_model_induction_cost: inside_cost = %d, "
3673                      "prologue_cost = %d .\n", inside_cost, prologue_cost);
3674 }
3675
3676
3677 /* Function get_initial_def_for_induction
3678
3679    Input:
3680    STMT - a stmt that performs an induction operation in the loop.
3681    IV_PHI - the initial value of the induction variable
3682
3683    Output:
3684    Return a vector variable, initialized with the first VF values of
3685    the induction variable.  E.g., for an iv with IV_PHI='X' and
3686    evolution S, for a vector of 4 units, we want to return:
3687    [X, X + S, X + 2*S, X + 3*S].  */
3688
3689 static tree
3690 get_initial_def_for_induction (gimple *iv_phi)
3691 {
3692   stmt_vec_info stmt_vinfo = vinfo_for_stmt (iv_phi);
3693   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
3694   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3695   tree vectype;
3696   int nunits;
3697   edge pe = loop_preheader_edge (loop);
3698   struct loop *iv_loop;
3699   basic_block new_bb;
3700   tree new_vec, vec_init, vec_step, t;
3701   tree new_name;
3702   gimple *new_stmt;
3703   gphi *induction_phi;
3704   tree induc_def, vec_def, vec_dest;
3705   tree init_expr, step_expr;
3706   int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3707   int i;
3708   int ncopies;
3709   tree expr;
3710   stmt_vec_info phi_info = vinfo_for_stmt (iv_phi);
3711   bool nested_in_vect_loop = false;
3712   gimple_seq stmts;
3713   imm_use_iterator imm_iter;
3714   use_operand_p use_p;
3715   gimple *exit_phi;
3716   edge latch_e;
3717   tree loop_arg;
3718   gimple_stmt_iterator si;
3719   basic_block bb = gimple_bb (iv_phi);
3720   tree stepvectype;
3721   tree resvectype;
3722
3723   /* Is phi in an inner-loop, while vectorizing an enclosing outer-loop?  */
3724   if (nested_in_vect_loop_p (loop, iv_phi))
3725     {
3726       nested_in_vect_loop = true;
3727       iv_loop = loop->inner;
3728     }
3729   else
3730     iv_loop = loop;
3731   gcc_assert (iv_loop == (gimple_bb (iv_phi))->loop_father);
3732
3733   latch_e = loop_latch_edge (iv_loop);
3734   loop_arg = PHI_ARG_DEF_FROM_EDGE (iv_phi, latch_e);
3735
3736   step_expr = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (phi_info);
3737   gcc_assert (step_expr != NULL_TREE);
3738
3739   pe = loop_preheader_edge (iv_loop);
3740   init_expr = PHI_ARG_DEF_FROM_EDGE (iv_phi,
3741                                      loop_preheader_edge (iv_loop));
3742
3743   vectype = get_vectype_for_scalar_type (TREE_TYPE (init_expr));
3744   resvectype = get_vectype_for_scalar_type (TREE_TYPE (PHI_RESULT (iv_phi)));
3745   gcc_assert (vectype);
3746   nunits = TYPE_VECTOR_SUBPARTS (vectype);
3747   ncopies = vf / nunits;
3748
3749   gcc_assert (phi_info);
3750   gcc_assert (ncopies >= 1);
3751
3752   /* Convert the step to the desired type.  */
3753   stmts = NULL;
3754   step_expr = gimple_convert (&stmts, TREE_TYPE (vectype), step_expr);
3755   if (stmts)
3756     {
3757       new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
3758       gcc_assert (!new_bb);
3759     }
3760
3761   /* Find the first insertion point in the BB.  */
3762   si = gsi_after_labels (bb);
3763
3764   /* Create the vector that holds the initial_value of the induction.  */
3765   if (nested_in_vect_loop)
3766     {
3767       /* iv_loop is nested in the loop to be vectorized.  init_expr had already
3768          been created during vectorization of previous stmts.  We obtain it
3769          from the STMT_VINFO_VEC_STMT of the defining stmt.  */
3770       vec_init = vect_get_vec_def_for_operand (init_expr, iv_phi);
3771       /* If the initial value is not of proper type, convert it.  */
3772       if (!useless_type_conversion_p (vectype, TREE_TYPE (vec_init)))
3773         {
3774           new_stmt
3775             = gimple_build_assign (vect_get_new_ssa_name (vectype,
3776                                                           vect_simple_var,
3777                                                           "vec_iv_"),
3778                                    VIEW_CONVERT_EXPR,
3779                                    build1 (VIEW_CONVERT_EXPR, vectype,
3780                                            vec_init));
3781           vec_init = gimple_assign_lhs (new_stmt);
3782           new_bb = gsi_insert_on_edge_immediate (loop_preheader_edge (iv_loop),
3783                                                  new_stmt);
3784           gcc_assert (!new_bb);
3785           set_vinfo_for_stmt (new_stmt,
3786                               new_stmt_vec_info (new_stmt, loop_vinfo));
3787         }
3788     }
3789   else
3790     {
3791       vec<constructor_elt, va_gc> *v;
3792
3793       /* iv_loop is the loop to be vectorized. Create:
3794          vec_init = [X, X+S, X+2*S, X+3*S] (S = step_expr, X = init_expr)  */
3795       stmts = NULL;
3796       new_name = gimple_convert (&stmts, TREE_TYPE (vectype), init_expr);
3797
3798       vec_alloc (v, nunits);
3799       bool constant_p = is_gimple_min_invariant (new_name);
3800       CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, new_name);
3801       for (i = 1; i < nunits; i++)
3802         {
3803           /* Create: new_name_i = new_name + step_expr  */
3804           new_name = gimple_build (&stmts, PLUS_EXPR, TREE_TYPE (new_name),
3805                                    new_name, step_expr);
3806           if (!is_gimple_min_invariant (new_name))
3807             constant_p = false;
3808           CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, new_name);
3809         }
3810       if (stmts)
3811         {
3812           new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
3813           gcc_assert (!new_bb);
3814         }
3815
3816       /* Create a vector from [new_name_0, new_name_1, ..., new_name_nunits-1]  */
3817       if (constant_p)
3818         new_vec = build_vector_from_ctor (vectype, v);
3819       else
3820         new_vec = build_constructor (vectype, v);
3821       vec_init = vect_init_vector (iv_phi, new_vec, vectype, NULL);
3822     }
3823
3824
3825   /* Create the vector that holds the step of the induction.  */
3826   if (nested_in_vect_loop)
3827     /* iv_loop is nested in the loop to be vectorized. Generate:
3828        vec_step = [S, S, S, S]  */
3829     new_name = step_expr;
3830   else
3831     {
3832       /* iv_loop is the loop to be vectorized. Generate:
3833           vec_step = [VF*S, VF*S, VF*S, VF*S]  */
3834       if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (step_expr)))
3835         {
3836           expr = build_int_cst (integer_type_node, vf);
3837           expr = fold_convert (TREE_TYPE (step_expr), expr);
3838         }
3839       else
3840         expr = build_int_cst (TREE_TYPE (step_expr), vf);
3841       new_name = fold_build2 (MULT_EXPR, TREE_TYPE (step_expr),
3842                               expr, step_expr);
3843       if (TREE_CODE (step_expr) == SSA_NAME)
3844         new_name = vect_init_vector (iv_phi, new_name,
3845                                      TREE_TYPE (step_expr), NULL);
3846     }
3847
3848   t = unshare_expr (new_name);
3849   gcc_assert (CONSTANT_CLASS_P (new_name)
3850               || TREE_CODE (new_name) == SSA_NAME);
3851   stepvectype = get_vectype_for_scalar_type (TREE_TYPE (new_name));
3852   gcc_assert (stepvectype);
3853   new_vec = build_vector_from_val (stepvectype, t);
3854   vec_step = vect_init_vector (iv_phi, new_vec, stepvectype, NULL);
3855
3856
3857   /* Create the following def-use cycle:
3858      loop prolog:
3859          vec_init = ...
3860          vec_step = ...
3861      loop:
3862          vec_iv = PHI <vec_init, vec_loop>
3863          ...
3864          STMT
3865          ...
3866          vec_loop = vec_iv + vec_step;  */
3867
3868   /* Create the induction-phi that defines the induction-operand.  */
3869   vec_dest = vect_get_new_vect_var (vectype, vect_simple_var, "vec_iv_");
3870   induction_phi = create_phi_node (vec_dest, iv_loop->header);
3871   set_vinfo_for_stmt (induction_phi,
3872                       new_stmt_vec_info (induction_phi, loop_vinfo));
3873   induc_def = PHI_RESULT (induction_phi);
3874
3875   /* Create the iv update inside the loop  */
3876   new_stmt = gimple_build_assign (vec_dest, PLUS_EXPR, induc_def, vec_step);
3877   vec_def = make_ssa_name (vec_dest, new_stmt);
3878   gimple_assign_set_lhs (new_stmt, vec_def);
3879   gsi_insert_before (&si, new_stmt, GSI_SAME_STMT);
3880   set_vinfo_for_stmt (new_stmt, new_stmt_vec_info (new_stmt, loop_vinfo));
3881
3882   /* Set the arguments of the phi node:  */
3883   add_phi_arg (induction_phi, vec_init, pe, UNKNOWN_LOCATION);
3884   add_phi_arg (induction_phi, vec_def, loop_latch_edge (iv_loop),
3885                UNKNOWN_LOCATION);
3886
3887
3888   /* In case that vectorization factor (VF) is bigger than the number
3889      of elements that we can fit in a vectype (nunits), we have to generate
3890      more than one vector stmt - i.e - we need to "unroll" the
3891      vector stmt by a factor VF/nunits.  For more details see documentation
3892      in vectorizable_operation.  */
3893
3894   if (ncopies > 1)
3895     {
3896       stmt_vec_info prev_stmt_vinfo;
3897       /* FORNOW. This restriction should be relaxed.  */
3898       gcc_assert (!nested_in_vect_loop);
3899
3900       /* Create the vector that holds the step of the induction.  */
3901       if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (step_expr)))
3902         {
3903           expr = build_int_cst (integer_type_node, nunits);
3904           expr = fold_convert (TREE_TYPE (step_expr), expr);
3905         }
3906       else
3907         expr = build_int_cst (TREE_TYPE (step_expr), nunits);
3908       new_name = fold_build2 (MULT_EXPR, TREE_TYPE (step_expr),
3909                               expr, step_expr);
3910       if (TREE_CODE (step_expr) == SSA_NAME)
3911         new_name = vect_init_vector (iv_phi, new_name,
3912                                      TREE_TYPE (step_expr), NULL);
3913       t = unshare_expr (new_name);
3914       gcc_assert (CONSTANT_CLASS_P (new_name)
3915                   || TREE_CODE (new_name) == SSA_NAME);
3916       new_vec = build_vector_from_val (stepvectype, t);
3917       vec_step = vect_init_vector (iv_phi, new_vec, stepvectype, NULL);
3918
3919       vec_def = induc_def;
3920       prev_stmt_vinfo = vinfo_for_stmt (induction_phi);
3921       for (i = 1; i < ncopies; i++)
3922         {
3923           /* vec_i = vec_prev + vec_step  */
3924           new_stmt = gimple_build_assign (vec_dest, PLUS_EXPR,
3925                                           vec_def, vec_step);
3926           vec_def = make_ssa_name (vec_dest, new_stmt);
3927           gimple_assign_set_lhs (new_stmt, vec_def);
3928
3929           gsi_insert_before (&si, new_stmt, GSI_SAME_STMT);
3930           if (!useless_type_conversion_p (resvectype, vectype))
3931             {
3932               new_stmt
3933                 = gimple_build_assign
3934                         (vect_get_new_vect_var (resvectype, vect_simple_var,
3935                                                 "vec_iv_"),
3936                          VIEW_CONVERT_EXPR,
3937                          build1 (VIEW_CONVERT_EXPR, resvectype,
3938                                  gimple_assign_lhs (new_stmt)));
3939               gimple_assign_set_lhs (new_stmt,
3940                                      make_ssa_name
3941                                        (gimple_assign_lhs (new_stmt), new_stmt));
3942               gsi_insert_before (&si, new_stmt, GSI_SAME_STMT);
3943             }
3944           set_vinfo_for_stmt (new_stmt,
3945                               new_stmt_vec_info (new_stmt, loop_vinfo));
3946           STMT_VINFO_RELATED_STMT (prev_stmt_vinfo) = new_stmt;
3947           prev_stmt_vinfo = vinfo_for_stmt (new_stmt);
3948         }
3949     }
3950
3951   if (nested_in_vect_loop)
3952     {
3953       /* Find the loop-closed exit-phi of the induction, and record
3954          the final vector of induction results:  */
3955       exit_phi = NULL;
3956       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, loop_arg)
3957         {
3958           gimple *use_stmt = USE_STMT (use_p);
3959           if (is_gimple_debug (use_stmt))
3960             continue;
3961
3962           if (!flow_bb_inside_loop_p (iv_loop, gimple_bb (use_stmt)))
3963             {
3964               exit_phi = use_stmt;
3965               break;
3966             }
3967         }
3968       if (exit_phi)
3969         {
3970           stmt_vec_info stmt_vinfo = vinfo_for_stmt (exit_phi);
3971           /* FORNOW. Currently not supporting the case that an inner-loop induction
3972              is not used in the outer-loop (i.e. only outside the outer-loop).  */
3973           gcc_assert (STMT_VINFO_RELEVANT_P (stmt_vinfo)
3974                       && !STMT_VINFO_LIVE_P (stmt_vinfo));
3975
3976           STMT_VINFO_VEC_STMT (stmt_vinfo) = new_stmt;
3977           if (dump_enabled_p ())
3978             {
3979               dump_printf_loc (MSG_NOTE, vect_location,
3980                                "vector of inductions after inner-loop:");
3981               dump_gimple_stmt (MSG_NOTE, TDF_SLIM, new_stmt, 0);
3982               dump_printf (MSG_NOTE, "\n");
3983             }
3984         }
3985     }
3986
3987
3988   if (dump_enabled_p ())
3989     {
3990       dump_printf_loc (MSG_NOTE, vect_location,
3991                        "transform induction: created def-use cycle: ");
3992       dump_gimple_stmt (MSG_NOTE, TDF_SLIM, induction_phi, 0);
3993       dump_printf (MSG_NOTE, "\n");
3994       dump_gimple_stmt (MSG_NOTE, TDF_SLIM,
3995                         SSA_NAME_DEF_STMT (vec_def), 0);
3996       dump_printf (MSG_NOTE, "\n");
3997     }
3998
3999   STMT_VINFO_VEC_STMT (phi_info) = induction_phi;
4000   if (!useless_type_conversion_p (resvectype, vectype))
4001     {
4002       new_stmt = gimple_build_assign (vect_get_new_vect_var (resvectype,
4003                                                              vect_simple_var,
4004                                                              "vec_iv_"),
4005                                       VIEW_CONVERT_EXPR,
4006                                       build1 (VIEW_CONVERT_EXPR, resvectype,
4007                                               induc_def));
4008       induc_def = make_ssa_name (gimple_assign_lhs (new_stmt), new_stmt);
4009       gimple_assign_set_lhs (new_stmt, induc_def);
4010       si = gsi_after_labels (bb);
4011       gsi_insert_before (&si, new_stmt, GSI_SAME_STMT);
4012       set_vinfo_for_stmt (new_stmt,
4013                           new_stmt_vec_info (new_stmt, loop_vinfo));
4014       STMT_VINFO_RELATED_STMT (vinfo_for_stmt (new_stmt))
4015         = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (induction_phi));
4016     }
4017
4018   return induc_def;
4019 }
4020
4021
4022 /* Function get_initial_def_for_reduction
4023
4024    Input:
4025    STMT - a stmt that performs a reduction operation in the loop.
4026    INIT_VAL - the initial value of the reduction variable
4027
4028    Output:
4029    ADJUSTMENT_DEF - a tree that holds a value to be added to the final result
4030         of the reduction (used for adjusting the epilog - see below).
4031    Return a vector variable, initialized according to the operation that STMT
4032         performs. This vector will be used as the initial value of the
4033         vector of partial results.
4034
4035    Option1 (adjust in epilog): Initialize the vector as follows:
4036      add/bit or/xor:    [0,0,...,0,0]
4037      mult/bit and:      [1,1,...,1,1]
4038      min/max/cond_expr: [init_val,init_val,..,init_val,init_val]
4039    and when necessary (e.g. add/mult case) let the caller know
4040    that it needs to adjust the result by init_val.
4041
4042    Option2: Initialize the vector as follows:
4043      add/bit or/xor:    [init_val,0,0,...,0]
4044      mult/bit and:      [init_val,1,1,...,1]
4045      min/max/cond_expr: [init_val,init_val,...,init_val]
4046    and no adjustments are needed.
4047
4048    For example, for the following code:
4049
4050    s = init_val;
4051    for (i=0;i<n;i++)
4052      s = s + a[i];
4053
4054    STMT is 's = s + a[i]', and the reduction variable is 's'.
4055    For a vector of 4 units, we want to return either [0,0,0,init_val],
4056    or [0,0,0,0] and let the caller know that it needs to adjust
4057    the result at the end by 'init_val'.
4058
4059    FORNOW, we are using the 'adjust in epilog' scheme, because this way the
4060    initialization vector is simpler (same element in all entries), if
4061    ADJUSTMENT_DEF is not NULL, and Option2 otherwise.
4062
4063    A cost model should help decide between these two schemes.  */
4064
4065 tree
4066 get_initial_def_for_reduction (gimple *stmt, tree init_val,
4067                                tree *adjustment_def)
4068 {
4069   stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
4070   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
4071   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4072   tree scalar_type = TREE_TYPE (init_val);
4073   tree vectype = get_vectype_for_scalar_type (scalar_type);
4074   int nunits;
4075   enum tree_code code = gimple_assign_rhs_code (stmt);
4076   tree def_for_init;
4077   tree init_def;
4078   tree *elts;
4079   int i;
4080   bool nested_in_vect_loop = false;
4081   REAL_VALUE_TYPE real_init_val = dconst0;
4082   int int_init_val = 0;
4083   gimple *def_stmt = NULL;
4084   gimple_seq stmts = NULL;
4085
4086   gcc_assert (vectype);
4087   nunits = TYPE_VECTOR_SUBPARTS (vectype);
4088
4089   gcc_assert (POINTER_TYPE_P (scalar_type) || INTEGRAL_TYPE_P (scalar_type)
4090               || SCALAR_FLOAT_TYPE_P (scalar_type));
4091
4092   if (nested_in_vect_loop_p (loop, stmt))
4093     nested_in_vect_loop = true;
4094   else
4095     gcc_assert (loop == (gimple_bb (stmt))->loop_father);
4096
4097   /* In case of double reduction we only create a vector variable to be put
4098      in the reduction phi node.  The actual statement creation is done in
4099      vect_create_epilog_for_reduction.  */
4100   if (adjustment_def && nested_in_vect_loop
4101       && TREE_CODE (init_val) == SSA_NAME
4102       && (def_stmt = SSA_NAME_DEF_STMT (init_val))
4103       && gimple_code (def_stmt) == GIMPLE_PHI
4104       && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
4105       && vinfo_for_stmt (def_stmt)
4106       && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt))
4107           == vect_double_reduction_def)
4108     {
4109       *adjustment_def = NULL;
4110       return vect_create_destination_var (init_val, vectype);
4111     }
4112
4113   switch (code)
4114     {
4115       case WIDEN_SUM_EXPR:
4116       case DOT_PROD_EXPR:
4117       case SAD_EXPR:
4118       case PLUS_EXPR:
4119       case MINUS_EXPR:
4120       case BIT_IOR_EXPR:
4121       case BIT_XOR_EXPR:
4122       case MULT_EXPR:
4123       case BIT_AND_EXPR:
4124         /* ADJUSMENT_DEF is NULL when called from
4125            vect_create_epilog_for_reduction to vectorize double reduction.  */
4126         if (adjustment_def)
4127           {
4128             if (nested_in_vect_loop)
4129               *adjustment_def = vect_get_vec_def_for_operand (init_val, stmt);
4130             else
4131               *adjustment_def = init_val;
4132           }
4133
4134         if (code == MULT_EXPR)
4135           {
4136             real_init_val = dconst1;
4137             int_init_val = 1;
4138           }
4139
4140         if (code == BIT_AND_EXPR)
4141           int_init_val = -1;
4142
4143         if (SCALAR_FLOAT_TYPE_P (scalar_type))
4144           def_for_init = build_real (scalar_type, real_init_val);
4145         else
4146           def_for_init = build_int_cst (scalar_type, int_init_val);
4147
4148         /* Create a vector of '0' or '1' except the first element.  */
4149         elts = XALLOCAVEC (tree, nunits);
4150         for (i = nunits - 2; i >= 0; --i)
4151           elts[i + 1] = def_for_init;
4152
4153         /* Option1: the first element is '0' or '1' as well.  */
4154         if (adjustment_def)
4155           {
4156             elts[0] = def_for_init;
4157             init_def = build_vector (vectype, elts);
4158             break;
4159           }
4160
4161         /* Option2: the first element is INIT_VAL.  */
4162         elts[0] = init_val;
4163         if (TREE_CONSTANT (init_val))
4164           init_def = build_vector (vectype, elts);
4165         else
4166           {
4167             vec<constructor_elt, va_gc> *v;
4168             vec_alloc (v, nunits);
4169             CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, init_val);
4170             for (i = 1; i < nunits; ++i)
4171               CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, elts[i]);
4172             init_def = build_constructor (vectype, v);
4173           }
4174
4175         break;
4176
4177       case MIN_EXPR:
4178       case MAX_EXPR:
4179       case COND_EXPR:
4180         if (adjustment_def)
4181           {
4182             *adjustment_def = NULL_TREE;
4183             if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_vinfo) != COND_REDUCTION)
4184               {
4185                 init_def = vect_get_vec_def_for_operand (init_val, stmt);
4186                 break;
4187               }
4188           }
4189         init_val = gimple_convert (&stmts, TREE_TYPE (vectype), init_val);
4190         if (! gimple_seq_empty_p (stmts))
4191           gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
4192         init_def = build_vector_from_val (vectype, init_val);
4193         break;
4194
4195       default:
4196         gcc_unreachable ();
4197     }
4198
4199   return init_def;
4200 }
4201
4202 /* Function vect_create_epilog_for_reduction
4203
4204    Create code at the loop-epilog to finalize the result of a reduction
4205    computation.
4206
4207    VECT_DEFS is list of vector of partial results, i.e., the lhs's of vector
4208      reduction statements.
4209    STMT is the scalar reduction stmt that is being vectorized.
4210    NCOPIES is > 1 in case the vectorization factor (VF) is bigger than the
4211      number of elements that we can fit in a vectype (nunits).  In this case
4212      we have to generate more than one vector stmt - i.e - we need to "unroll"
4213      the vector stmt by a factor VF/nunits.  For more details see documentation
4214      in vectorizable_operation.
4215    REDUC_CODE is the tree-code for the epilog reduction.
4216    REDUCTION_PHIS is a list of the phi-nodes that carry the reduction
4217      computation.
4218    REDUC_INDEX is the index of the operand in the right hand side of the
4219      statement that is defined by REDUCTION_PHI.
4220    DOUBLE_REDUC is TRUE if double reduction phi nodes should be handled.
4221    SLP_NODE is an SLP node containing a group of reduction statements. The
4222      first one in this group is STMT.
4223    INDUCTION_INDEX is the index of the loop for condition reductions.
4224      Otherwise it is undefined.
4225
4226    This function:
4227    1. Creates the reduction def-use cycles: sets the arguments for
4228       REDUCTION_PHIS:
4229       The loop-entry argument is the vectorized initial-value of the reduction.
4230       The loop-latch argument is taken from VECT_DEFS - the vector of partial
4231       sums.
4232    2. "Reduces" each vector of partial results VECT_DEFS into a single result,
4233       by applying the operation specified by REDUC_CODE if available, or by
4234       other means (whole-vector shifts or a scalar loop).
4235       The function also creates a new phi node at the loop exit to preserve
4236       loop-closed form, as illustrated below.
4237
4238      The flow at the entry to this function:
4239
4240         loop:
4241           vec_def = phi <null, null>            # REDUCTION_PHI
4242           VECT_DEF = vector_stmt                # vectorized form of STMT
4243           s_loop = scalar_stmt                  # (scalar) STMT
4244         loop_exit:
4245           s_out0 = phi <s_loop>                 # (scalar) EXIT_PHI
4246           use <s_out0>
4247           use <s_out0>
4248
4249      The above is transformed by this function into:
4250
4251         loop:
4252           vec_def = phi <vec_init, VECT_DEF>    # REDUCTION_PHI
4253           VECT_DEF = vector_stmt                # vectorized form of STMT
4254           s_loop = scalar_stmt                  # (scalar) STMT
4255         loop_exit:
4256           s_out0 = phi <s_loop>                 # (scalar) EXIT_PHI
4257           v_out1 = phi <VECT_DEF>               # NEW_EXIT_PHI
4258           v_out2 = reduce <v_out1>
4259           s_out3 = extract_field <v_out2, 0>
4260           s_out4 = adjust_result <s_out3>
4261           use <s_out4>
4262           use <s_out4>
4263 */
4264
4265 static void
4266 vect_create_epilog_for_reduction (vec<tree> vect_defs, gimple *stmt,
4267                                   int ncopies, enum tree_code reduc_code,
4268                                   vec<gimple *> reduction_phis,
4269                                   int reduc_index, bool double_reduc,
4270                                   slp_tree slp_node, tree induction_index)
4271 {
4272   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4273   stmt_vec_info prev_phi_info;
4274   tree vectype;
4275   machine_mode mode;
4276   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4277   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo), *outer_loop = NULL;
4278   basic_block exit_bb;
4279   tree scalar_dest;
4280   tree scalar_type;
4281   gimple *new_phi = NULL, *phi;
4282   gimple_stmt_iterator exit_gsi;
4283   tree vec_dest;
4284   tree new_temp = NULL_TREE, new_dest, new_name, new_scalar_dest;
4285   gimple *epilog_stmt = NULL;
4286   enum tree_code code = gimple_assign_rhs_code (stmt);
4287   gimple *exit_phi;
4288   tree bitsize;
4289   tree adjustment_def = NULL;
4290   tree vec_initial_def = NULL;
4291   tree reduction_op, expr, def, initial_def = NULL;
4292   tree orig_name, scalar_result;
4293   imm_use_iterator imm_iter, phi_imm_iter;
4294   use_operand_p use_p, phi_use_p;
4295   gimple *use_stmt, *orig_stmt, *reduction_phi = NULL;
4296   bool nested_in_vect_loop = false;
4297   auto_vec<gimple *> new_phis;
4298   auto_vec<gimple *> inner_phis;
4299   enum vect_def_type dt = vect_unknown_def_type;
4300   int j, i;
4301   auto_vec<tree> scalar_results;
4302   unsigned int group_size = 1, k, ratio;
4303   auto_vec<tree> vec_initial_defs;
4304   auto_vec<gimple *> phis;
4305   bool slp_reduc = false;
4306   tree new_phi_result;
4307   gimple *inner_phi = NULL;
4308
4309   if (slp_node)
4310     group_size = SLP_TREE_SCALAR_STMTS (slp_node).length ();
4311
4312   if (nested_in_vect_loop_p (loop, stmt))
4313     {
4314       outer_loop = loop;
4315       loop = loop->inner;
4316       nested_in_vect_loop = true;
4317       gcc_assert (!slp_node);
4318     }
4319
4320   reduction_op = get_reduction_op (stmt, reduc_index);
4321
4322   vectype = get_vectype_for_scalar_type (TREE_TYPE (reduction_op));
4323   gcc_assert (vectype);
4324   mode = TYPE_MODE (vectype);
4325
4326   /* 1. Create the reduction def-use cycle:
4327      Set the arguments of REDUCTION_PHIS, i.e., transform
4328
4329         loop:
4330           vec_def = phi <null, null>            # REDUCTION_PHI
4331           VECT_DEF = vector_stmt                # vectorized form of STMT
4332           ...
4333
4334      into:
4335
4336         loop:
4337           vec_def = phi <vec_init, VECT_DEF>    # REDUCTION_PHI
4338           VECT_DEF = vector_stmt                # vectorized form of STMT
4339           ...
4340
4341      (in case of SLP, do it for all the phis). */
4342
4343   /* Get the loop-entry arguments.  */
4344   if (slp_node)
4345     vect_get_vec_defs (reduction_op, NULL_TREE, stmt, &vec_initial_defs,
4346                        NULL, slp_node, reduc_index);
4347   else
4348     {
4349       /* Get at the scalar def before the loop, that defines the initial value
4350          of the reduction variable.  */
4351       gimple *def_stmt = SSA_NAME_DEF_STMT (reduction_op);
4352       initial_def = PHI_ARG_DEF_FROM_EDGE (def_stmt,
4353                                            loop_preheader_edge (loop));
4354       vec_initial_defs.create (1);
4355       vec_initial_def = get_initial_def_for_reduction (stmt, initial_def,
4356                                                        &adjustment_def);
4357       vec_initial_defs.quick_push (vec_initial_def);
4358     }
4359
4360   /* Set phi nodes arguments.  */
4361   FOR_EACH_VEC_ELT (reduction_phis, i, phi)
4362     {
4363       tree vec_init_def, def;
4364       gimple_seq stmts;
4365       vec_init_def = force_gimple_operand (vec_initial_defs[i], &stmts,
4366                                            true, NULL_TREE);
4367       gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
4368       def = vect_defs[i];
4369       for (j = 0; j < ncopies; j++)
4370         {
4371           /* Set the loop-entry arg of the reduction-phi.  */
4372
4373           if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
4374               == INTEGER_INDUC_COND_REDUCTION)
4375             {
4376               /* Initialise the reduction phi to zero.  This prevents initial
4377                  values of non-zero interferring with the reduction op.  */
4378               gcc_assert (ncopies == 1);
4379               gcc_assert (i == 0);
4380
4381               tree vec_init_def_type = TREE_TYPE (vec_init_def);
4382               tree zero_vec = build_zero_cst (vec_init_def_type);
4383
4384               add_phi_arg (as_a <gphi *> (phi), zero_vec,
4385                            loop_preheader_edge (loop), UNKNOWN_LOCATION);
4386             }
4387           else
4388             add_phi_arg (as_a <gphi *> (phi), vec_init_def,
4389                          loop_preheader_edge (loop), UNKNOWN_LOCATION);
4390
4391           /* Set the loop-latch arg for the reduction-phi.  */
4392           if (j > 0)
4393             def = vect_get_vec_def_for_stmt_copy (vect_unknown_def_type, def);
4394
4395           add_phi_arg (as_a <gphi *> (phi), def, loop_latch_edge (loop),
4396                        UNKNOWN_LOCATION);
4397
4398           if (dump_enabled_p ())
4399             {
4400               dump_printf_loc (MSG_NOTE, vect_location,
4401                                "transform reduction: created def-use cycle: ");
4402               dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
4403               dump_printf (MSG_NOTE, "\n");
4404               dump_gimple_stmt (MSG_NOTE, TDF_SLIM, SSA_NAME_DEF_STMT (def), 0);
4405               dump_printf (MSG_NOTE, "\n");
4406             }
4407
4408           phi = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (phi));
4409         }
4410     }
4411
4412   /* 2. Create epilog code.
4413         The reduction epilog code operates across the elements of the vector
4414         of partial results computed by the vectorized loop.
4415         The reduction epilog code consists of:
4416
4417         step 1: compute the scalar result in a vector (v_out2)
4418         step 2: extract the scalar result (s_out3) from the vector (v_out2)
4419         step 3: adjust the scalar result (s_out3) if needed.
4420
4421         Step 1 can be accomplished using one the following three schemes:
4422           (scheme 1) using reduc_code, if available.
4423           (scheme 2) using whole-vector shifts, if available.
4424           (scheme 3) using a scalar loop. In this case steps 1+2 above are
4425                      combined.
4426
4427           The overall epilog code looks like this:
4428
4429           s_out0 = phi <s_loop>         # original EXIT_PHI
4430           v_out1 = phi <VECT_DEF>       # NEW_EXIT_PHI
4431           v_out2 = reduce <v_out1>              # step 1
4432           s_out3 = extract_field <v_out2, 0>    # step 2
4433           s_out4 = adjust_result <s_out3>       # step 3
4434
4435           (step 3 is optional, and steps 1 and 2 may be combined).
4436           Lastly, the uses of s_out0 are replaced by s_out4.  */
4437
4438
4439   /* 2.1 Create new loop-exit-phis to preserve loop-closed form:
4440          v_out1 = phi <VECT_DEF>
4441          Store them in NEW_PHIS.  */
4442
4443   exit_bb = single_exit (loop)->dest;
4444   prev_phi_info = NULL;
4445   new_phis.create (vect_defs.length ());
4446   FOR_EACH_VEC_ELT (vect_defs, i, def)
4447     {
4448       for (j = 0; j < ncopies; j++)
4449         {
4450           tree new_def = copy_ssa_name (def);
4451           phi = create_phi_node (new_def, exit_bb);
4452           set_vinfo_for_stmt (phi, new_stmt_vec_info (phi, loop_vinfo));
4453           if (j == 0)
4454             new_phis.quick_push (phi);
4455           else
4456             {
4457               def = vect_get_vec_def_for_stmt_copy (dt, def);
4458               STMT_VINFO_RELATED_STMT (prev_phi_info) = phi;
4459             }
4460
4461           SET_PHI_ARG_DEF (phi, single_exit (loop)->dest_idx, def);
4462           prev_phi_info = vinfo_for_stmt (phi);
4463         }
4464     }
4465
4466   /* The epilogue is created for the outer-loop, i.e., for the loop being
4467      vectorized.  Create exit phis for the outer loop.  */
4468   if (double_reduc)
4469     {
4470       loop = outer_loop;
4471       exit_bb = single_exit (loop)->dest;
4472       inner_phis.create (vect_defs.length ());
4473       FOR_EACH_VEC_ELT (new_phis, i, phi)
4474         {
4475           tree new_result = copy_ssa_name (PHI_RESULT (phi));
4476           gphi *outer_phi = create_phi_node (new_result, exit_bb);
4477           SET_PHI_ARG_DEF (outer_phi, single_exit (loop)->dest_idx,
4478                            PHI_RESULT (phi));
4479           set_vinfo_for_stmt (outer_phi, new_stmt_vec_info (outer_phi,
4480                                                             loop_vinfo));
4481           inner_phis.quick_push (phi);
4482           new_phis[i] = outer_phi;
4483           prev_phi_info = vinfo_for_stmt (outer_phi);
4484           while (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (phi)))
4485             {
4486               phi = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (phi));
4487               new_result = copy_ssa_name (PHI_RESULT (phi));
4488               outer_phi = create_phi_node (new_result, exit_bb);
4489               SET_PHI_ARG_DEF (outer_phi, single_exit (loop)->dest_idx,
4490                                PHI_RESULT (phi));
4491               set_vinfo_for_stmt (outer_phi, new_stmt_vec_info (outer_phi,
4492                                                                 loop_vinfo));
4493               STMT_VINFO_RELATED_STMT (prev_phi_info) = outer_phi;
4494               prev_phi_info = vinfo_for_stmt (outer_phi);
4495             }
4496         }
4497     }
4498
4499   exit_gsi = gsi_after_labels (exit_bb);
4500
4501   /* 2.2 Get the relevant tree-code to use in the epilog for schemes 2,3
4502          (i.e. when reduc_code is not available) and in the final adjustment
4503          code (if needed).  Also get the original scalar reduction variable as
4504          defined in the loop.  In case STMT is a "pattern-stmt" (i.e. - it
4505          represents a reduction pattern), the tree-code and scalar-def are
4506          taken from the original stmt that the pattern-stmt (STMT) replaces.
4507          Otherwise (it is a regular reduction) - the tree-code and scalar-def
4508          are taken from STMT.  */
4509
4510   orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
4511   if (!orig_stmt)
4512     {
4513       /* Regular reduction  */
4514       orig_stmt = stmt;
4515     }
4516   else
4517     {
4518       /* Reduction pattern  */
4519       stmt_vec_info stmt_vinfo = vinfo_for_stmt (orig_stmt);
4520       gcc_assert (STMT_VINFO_IN_PATTERN_P (stmt_vinfo));
4521       gcc_assert (STMT_VINFO_RELATED_STMT (stmt_vinfo) == stmt);
4522     }
4523
4524   code = gimple_assign_rhs_code (orig_stmt);
4525   /* For MINUS_EXPR the initial vector is [init_val,0,...,0], therefore,
4526      partial results are added and not subtracted.  */
4527   if (code == MINUS_EXPR)
4528     code = PLUS_EXPR;
4529
4530   scalar_dest = gimple_assign_lhs (orig_stmt);
4531   scalar_type = TREE_TYPE (scalar_dest);
4532   scalar_results.create (group_size);
4533   new_scalar_dest = vect_create_destination_var (scalar_dest, NULL);
4534   bitsize = TYPE_SIZE (scalar_type);
4535
4536   /* In case this is a reduction in an inner-loop while vectorizing an outer
4537      loop - we don't need to extract a single scalar result at the end of the
4538      inner-loop (unless it is double reduction, i.e., the use of reduction is
4539      outside the outer-loop).  The final vector of partial results will be used
4540      in the vectorized outer-loop, or reduced to a scalar result at the end of
4541      the outer-loop.  */
4542   if (nested_in_vect_loop && !double_reduc)
4543     goto vect_finalize_reduction;
4544
4545   /* SLP reduction without reduction chain, e.g.,
4546      # a1 = phi <a2, a0>
4547      # b1 = phi <b2, b0>
4548      a2 = operation (a1)
4549      b2 = operation (b1)  */
4550   slp_reduc = (slp_node && !GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)));
4551
4552   /* In case of reduction chain, e.g.,
4553      # a1 = phi <a3, a0>
4554      a2 = operation (a1)
4555      a3 = operation (a2),
4556
4557      we may end up with more than one vector result.  Here we reduce them to
4558      one vector.  */
4559   if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
4560     {
4561       tree first_vect = PHI_RESULT (new_phis[0]);
4562       tree tmp;
4563       gassign *new_vec_stmt = NULL;
4564
4565       vec_dest = vect_create_destination_var (scalar_dest, vectype);
4566       for (k = 1; k < new_phis.length (); k++)
4567         {
4568           gimple *next_phi = new_phis[k];
4569           tree second_vect = PHI_RESULT (next_phi);
4570
4571           tmp = build2 (code, vectype,  first_vect, second_vect);
4572           new_vec_stmt = gimple_build_assign (vec_dest, tmp);
4573           first_vect = make_ssa_name (vec_dest, new_vec_stmt);
4574           gimple_assign_set_lhs (new_vec_stmt, first_vect);
4575           gsi_insert_before (&exit_gsi, new_vec_stmt, GSI_SAME_STMT);
4576         }
4577
4578       new_phi_result = first_vect;
4579       if (new_vec_stmt)
4580         {
4581           new_phis.truncate (0);
4582           new_phis.safe_push (new_vec_stmt);
4583         }
4584     }
4585   else
4586     new_phi_result = PHI_RESULT (new_phis[0]);
4587
4588   if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == COND_REDUCTION)
4589     {
4590       /* For condition reductions, we have a vector (NEW_PHI_RESULT) containing
4591          various data values where the condition matched and another vector
4592          (INDUCTION_INDEX) containing all the indexes of those matches.  We
4593          need to extract the last matching index (which will be the index with
4594          highest value) and use this to index into the data vector.
4595          For the case where there were no matches, the data vector will contain
4596          all default values and the index vector will be all zeros.  */
4597
4598       /* Get various versions of the type of the vector of indexes.  */
4599       tree index_vec_type = TREE_TYPE (induction_index);
4600       gcc_checking_assert (TYPE_UNSIGNED (index_vec_type));
4601       tree index_scalar_type = TREE_TYPE (index_vec_type);
4602       tree index_vec_cmp_type = build_same_sized_truth_vector_type
4603         (index_vec_type);
4604
4605       /* Get an unsigned integer version of the type of the data vector.  */
4606       int scalar_precision = GET_MODE_PRECISION (TYPE_MODE (scalar_type));
4607       tree scalar_type_unsigned = make_unsigned_type (scalar_precision);
4608       tree vectype_unsigned = build_vector_type
4609         (scalar_type_unsigned, TYPE_VECTOR_SUBPARTS (vectype));
4610
4611       /* First we need to create a vector (ZERO_VEC) of zeros and another
4612          vector (MAX_INDEX_VEC) filled with the last matching index, which we
4613          can create using a MAX reduction and then expanding.
4614          In the case where the loop never made any matches, the max index will
4615          be zero.  */
4616
4617       /* Vector of {0, 0, 0,...}.  */
4618       tree zero_vec = make_ssa_name (vectype);
4619       tree zero_vec_rhs = build_zero_cst (vectype);
4620       gimple *zero_vec_stmt = gimple_build_assign (zero_vec, zero_vec_rhs);
4621       gsi_insert_before (&exit_gsi, zero_vec_stmt, GSI_SAME_STMT);
4622
4623       /* Find maximum value from the vector of found indexes.  */
4624       tree max_index = make_ssa_name (index_scalar_type);
4625       gimple *max_index_stmt = gimple_build_assign (max_index, REDUC_MAX_EXPR,
4626                                                     induction_index);
4627       gsi_insert_before (&exit_gsi, max_index_stmt, GSI_SAME_STMT);
4628
4629       /* Vector of {max_index, max_index, max_index,...}.  */
4630       tree max_index_vec = make_ssa_name (index_vec_type);
4631       tree max_index_vec_rhs = build_vector_from_val (index_vec_type,
4632                                                       max_index);
4633       gimple *max_index_vec_stmt = gimple_build_assign (max_index_vec,
4634                                                         max_index_vec_rhs);
4635       gsi_insert_before (&exit_gsi, max_index_vec_stmt, GSI_SAME_STMT);
4636
4637       /* Next we compare the new vector (MAX_INDEX_VEC) full of max indexes
4638          with the vector (INDUCTION_INDEX) of found indexes, choosing values
4639          from the data vector (NEW_PHI_RESULT) for matches, 0 (ZERO_VEC)
4640          otherwise.  Only one value should match, resulting in a vector
4641          (VEC_COND) with one data value and the rest zeros.
4642          In the case where the loop never made any matches, every index will
4643          match, resulting in a vector with all data values (which will all be
4644          the default value).  */
4645
4646       /* Compare the max index vector to the vector of found indexes to find
4647          the position of the max value.  */
4648       tree vec_compare = make_ssa_name (index_vec_cmp_type);
4649       gimple *vec_compare_stmt = gimple_build_assign (vec_compare, EQ_EXPR,
4650                                                       induction_index,
4651                                                       max_index_vec);
4652       gsi_insert_before (&exit_gsi, vec_compare_stmt, GSI_SAME_STMT);
4653
4654       /* Use the compare to choose either values from the data vector or
4655          zero.  */
4656       tree vec_cond = make_ssa_name (vectype);
4657       gimple *vec_cond_stmt = gimple_build_assign (vec_cond, VEC_COND_EXPR,
4658                                                    vec_compare, new_phi_result,
4659                                                    zero_vec);
4660       gsi_insert_before (&exit_gsi, vec_cond_stmt, GSI_SAME_STMT);
4661
4662       /* Finally we need to extract the data value from the vector (VEC_COND)
4663          into a scalar (MATCHED_DATA_REDUC).  Logically we want to do a OR
4664          reduction, but because this doesn't exist, we can use a MAX reduction
4665          instead.  The data value might be signed or a float so we need to cast
4666          it first.
4667          In the case where the loop never made any matches, the data values are
4668          all identical, and so will reduce down correctly.  */
4669
4670       /* Make the matched data values unsigned.  */
4671       tree vec_cond_cast = make_ssa_name (vectype_unsigned);
4672       tree vec_cond_cast_rhs = build1 (VIEW_CONVERT_EXPR, vectype_unsigned,
4673                                        vec_cond);
4674       gimple *vec_cond_cast_stmt = gimple_build_assign (vec_cond_cast,
4675                                                         VIEW_CONVERT_EXPR,
4676                                                         vec_cond_cast_rhs);
4677       gsi_insert_before (&exit_gsi, vec_cond_cast_stmt, GSI_SAME_STMT);
4678
4679       /* Reduce down to a scalar value.  */
4680       tree data_reduc = make_ssa_name (scalar_type_unsigned);
4681       optab ot = optab_for_tree_code (REDUC_MAX_EXPR, vectype_unsigned,
4682                                       optab_default);
4683       gcc_assert (optab_handler (ot, TYPE_MODE (vectype_unsigned))
4684                   != CODE_FOR_nothing);
4685       gimple *data_reduc_stmt = gimple_build_assign (data_reduc,
4686                                                      REDUC_MAX_EXPR,
4687                                                      vec_cond_cast);
4688       gsi_insert_before (&exit_gsi, data_reduc_stmt, GSI_SAME_STMT);
4689
4690       /* Convert the reduced value back to the result type and set as the
4691          result.  */
4692       tree data_reduc_cast = build1 (VIEW_CONVERT_EXPR, scalar_type,
4693                                      data_reduc);
4694       epilog_stmt = gimple_build_assign (new_scalar_dest, data_reduc_cast);
4695       new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
4696       gimple_assign_set_lhs (epilog_stmt, new_temp);
4697       gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
4698       scalar_results.safe_push (new_temp);
4699     }
4700
4701   /* 2.3 Create the reduction code, using one of the three schemes described
4702          above. In SLP we simply need to extract all the elements from the
4703          vector (without reducing them), so we use scalar shifts.  */
4704   else if (reduc_code != ERROR_MARK && !slp_reduc)
4705     {
4706       tree tmp;
4707       tree vec_elem_type;
4708
4709       /*** Case 1:  Create:
4710            v_out2 = reduc_expr <v_out1>  */
4711
4712       if (dump_enabled_p ())
4713         dump_printf_loc (MSG_NOTE, vect_location,
4714                          "Reduce using direct vector reduction.\n");
4715
4716       vec_elem_type = TREE_TYPE (TREE_TYPE (new_phi_result));
4717       if (!useless_type_conversion_p (scalar_type, vec_elem_type))
4718         {
4719           tree tmp_dest =
4720               vect_create_destination_var (scalar_dest, vec_elem_type);
4721           tmp = build1 (reduc_code, vec_elem_type, new_phi_result);
4722           epilog_stmt = gimple_build_assign (tmp_dest, tmp);
4723           new_temp = make_ssa_name (tmp_dest, epilog_stmt);
4724           gimple_assign_set_lhs (epilog_stmt, new_temp);
4725           gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
4726
4727           tmp = build1 (NOP_EXPR, scalar_type, new_temp);
4728         }
4729       else
4730         tmp = build1 (reduc_code, scalar_type, new_phi_result);
4731
4732       epilog_stmt = gimple_build_assign (new_scalar_dest, tmp);
4733       new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
4734       gimple_assign_set_lhs (epilog_stmt, new_temp);
4735       gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
4736
4737       if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
4738           == INTEGER_INDUC_COND_REDUCTION)
4739         {
4740           /* Earlier we set the initial value to be zero.  Check the result
4741              and if it is zero then replace with the original initial
4742              value.  */
4743           tree zero = build_zero_cst (scalar_type);
4744           tree zcompare = build2 (EQ_EXPR, boolean_type_node, new_temp, zero);
4745
4746           tmp = make_ssa_name (new_scalar_dest);
4747           epilog_stmt = gimple_build_assign (tmp, COND_EXPR, zcompare,
4748                                              initial_def, new_temp);
4749           gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
4750           new_temp = tmp;
4751         }
4752
4753       scalar_results.safe_push (new_temp);
4754     }
4755   else
4756     {
4757       bool reduce_with_shift = have_whole_vector_shift (mode);
4758       int element_bitsize = tree_to_uhwi (bitsize);
4759       int vec_size_in_bits = tree_to_uhwi (TYPE_SIZE (vectype));
4760       tree vec_temp;
4761
4762       /* Regardless of whether we have a whole vector shift, if we're
4763          emulating the operation via tree-vect-generic, we don't want
4764          to use it.  Only the first round of the reduction is likely
4765          to still be profitable via emulation.  */
4766       /* ??? It might be better to emit a reduction tree code here, so that
4767          tree-vect-generic can expand the first round via bit tricks.  */
4768       if (!VECTOR_MODE_P (mode))
4769         reduce_with_shift = false;
4770       else
4771         {
4772           optab optab = optab_for_tree_code (code, vectype, optab_default);
4773           if (optab_handler (optab, mode) == CODE_FOR_nothing)
4774             reduce_with_shift = false;
4775         }
4776
4777       if (reduce_with_shift && !slp_reduc)
4778         {
4779           int nelements = vec_size_in_bits / element_bitsize;
4780           unsigned char *sel = XALLOCAVEC (unsigned char, nelements);
4781
4782           int elt_offset;
4783
4784           tree zero_vec = build_zero_cst (vectype);
4785           /*** Case 2: Create:
4786              for (offset = nelements/2; offset >= 1; offset/=2)
4787                 {
4788                   Create:  va' = vec_shift <va, offset>
4789                   Create:  va = vop <va, va'>
4790                 }  */
4791
4792           tree rhs;
4793
4794           if (dump_enabled_p ())
4795             dump_printf_loc (MSG_NOTE, vect_location,
4796                              "Reduce using vector shifts\n");
4797
4798           vec_dest = vect_create_destination_var (scalar_dest, vectype);
4799           new_temp = new_phi_result;
4800           for (elt_offset = nelements / 2;
4801                elt_offset >= 1;
4802                elt_offset /= 2)
4803             {
4804               calc_vec_perm_mask_for_shift (mode, elt_offset, sel);
4805               tree mask = vect_gen_perm_mask_any (vectype, sel);
4806               epilog_stmt = gimple_build_assign (vec_dest, VEC_PERM_EXPR,
4807                                                  new_temp, zero_vec, mask);
4808               new_name = make_ssa_name (vec_dest, epilog_stmt);
4809               gimple_assign_set_lhs (epilog_stmt, new_name);
4810               gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
4811
4812               epilog_stmt = gimple_build_assign (vec_dest, code, new_name,
4813                                                  new_temp);
4814               new_temp = make_ssa_name (vec_dest, epilog_stmt);
4815               gimple_assign_set_lhs (epilog_stmt, new_temp);
4816               gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
4817             }
4818
4819           /* 2.4  Extract the final scalar result.  Create:
4820              s_out3 = extract_field <v_out2, bitpos>  */
4821
4822           if (dump_enabled_p ())
4823             dump_printf_loc (MSG_NOTE, vect_location,
4824                              "extract scalar result\n");
4825
4826           rhs = build3 (BIT_FIELD_REF, scalar_type, new_temp,
4827                         bitsize, bitsize_zero_node);
4828           epilog_stmt = gimple_build_assign (new_scalar_dest, rhs);
4829           new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
4830           gimple_assign_set_lhs (epilog_stmt, new_temp);
4831           gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
4832           scalar_results.safe_push (new_temp);
4833         }
4834       else
4835         {
4836           /*** Case 3: Create:
4837              s = extract_field <v_out2, 0>
4838              for (offset = element_size;
4839                   offset < vector_size;
4840                   offset += element_size;)
4841                {
4842                  Create:  s' = extract_field <v_out2, offset>
4843                  Create:  s = op <s, s'>  // For non SLP cases
4844                }  */
4845
4846           if (dump_enabled_p ())
4847             dump_printf_loc (MSG_NOTE, vect_location,
4848                              "Reduce using scalar code.\n");
4849
4850           vec_size_in_bits = tree_to_uhwi (TYPE_SIZE (vectype));
4851           FOR_EACH_VEC_ELT (new_phis, i, new_phi)
4852             {
4853               int bit_offset;
4854               if (gimple_code (new_phi) == GIMPLE_PHI)
4855                 vec_temp = PHI_RESULT (new_phi);
4856               else
4857                 vec_temp = gimple_assign_lhs (new_phi);
4858               tree rhs = build3 (BIT_FIELD_REF, scalar_type, vec_temp, bitsize,
4859                             bitsize_zero_node);
4860               epilog_stmt = gimple_build_assign (new_scalar_dest, rhs);
4861               new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
4862               gimple_assign_set_lhs (epilog_stmt, new_temp);
4863               gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
4864
4865               /* In SLP we don't need to apply reduction operation, so we just
4866                  collect s' values in SCALAR_RESULTS.  */
4867               if (slp_reduc)
4868                 scalar_results.safe_push (new_temp);
4869
4870               for (bit_offset = element_bitsize;
4871                    bit_offset < vec_size_in_bits;
4872                    bit_offset += element_bitsize)
4873                 {
4874                   tree bitpos = bitsize_int (bit_offset);
4875                   tree rhs = build3 (BIT_FIELD_REF, scalar_type, vec_temp,
4876                                      bitsize, bitpos);
4877
4878                   epilog_stmt = gimple_build_assign (new_scalar_dest, rhs);
4879                   new_name = make_ssa_name (new_scalar_dest, epilog_stmt);
4880                   gimple_assign_set_lhs (epilog_stmt, new_name);
4881                   gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
4882
4883                   if (slp_reduc)
4884                     {
4885                       /* In SLP we don't need to apply reduction operation, so
4886                          we just collect s' values in SCALAR_RESULTS.  */
4887                       new_temp = new_name;
4888                       scalar_results.safe_push (new_name);
4889                     }
4890                   else
4891                     {
4892                       epilog_stmt = gimple_build_assign (new_scalar_dest, code,
4893                                                          new_name, new_temp);
4894                       new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
4895                       gimple_assign_set_lhs (epilog_stmt, new_temp);
4896                       gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
4897                     }
4898                 }
4899             }
4900
4901           /* The only case where we need to reduce scalar results in SLP, is
4902              unrolling.  If the size of SCALAR_RESULTS is greater than
4903              GROUP_SIZE, we reduce them combining elements modulo
4904              GROUP_SIZE.  */
4905           if (slp_reduc)
4906             {
4907               tree res, first_res, new_res;
4908               gimple *new_stmt;
4909
4910               /* Reduce multiple scalar results in case of SLP unrolling.  */
4911               for (j = group_size; scalar_results.iterate (j, &res);
4912                    j++)
4913                 {
4914                   first_res = scalar_results[j % group_size];
4915                   new_stmt = gimple_build_assign (new_scalar_dest, code,
4916                                                   first_res, res);
4917                   new_res = make_ssa_name (new_scalar_dest, new_stmt);
4918                   gimple_assign_set_lhs (new_stmt, new_res);
4919                   gsi_insert_before (&exit_gsi, new_stmt, GSI_SAME_STMT);
4920                   scalar_results[j % group_size] = new_res;
4921                 }
4922             }
4923           else
4924             /* Not SLP - we have one scalar to keep in SCALAR_RESULTS.  */
4925             scalar_results.safe_push (new_temp);
4926         }
4927     }
4928
4929 vect_finalize_reduction:
4930
4931   if (double_reduc)
4932     loop = loop->inner;
4933
4934   /* 2.5 Adjust the final result by the initial value of the reduction
4935          variable. (When such adjustment is not needed, then
4936          'adjustment_def' is zero).  For example, if code is PLUS we create:
4937          new_temp = loop_exit_def + adjustment_def  */
4938
4939   if (adjustment_def)
4940     {
4941       gcc_assert (!slp_reduc);
4942       if (nested_in_vect_loop)
4943         {
4944           new_phi = new_phis[0];
4945           gcc_assert (TREE_CODE (TREE_TYPE (adjustment_def)) == VECTOR_TYPE);
4946           expr = build2 (code, vectype, PHI_RESULT (new_phi), adjustment_def);
4947           new_dest = vect_create_destination_var (scalar_dest, vectype);
4948         }
4949       else
4950         {
4951           new_temp = scalar_results[0];
4952           gcc_assert (TREE_CODE (TREE_TYPE (adjustment_def)) != VECTOR_TYPE);
4953           expr = build2 (code, scalar_type, new_temp, adjustment_def);
4954           new_dest = vect_create_destination_var (scalar_dest, scalar_type);
4955         }
4956
4957       epilog_stmt = gimple_build_assign (new_dest, expr);
4958       new_temp = make_ssa_name (new_dest, epilog_stmt);
4959       gimple_assign_set_lhs (epilog_stmt, new_temp);
4960       gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
4961       if (nested_in_vect_loop)
4962         {
4963           set_vinfo_for_stmt (epilog_stmt,
4964                               new_stmt_vec_info (epilog_stmt, loop_vinfo));
4965           STMT_VINFO_RELATED_STMT (vinfo_for_stmt (epilog_stmt)) =
4966                 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (new_phi));
4967
4968           if (!double_reduc)
4969             scalar_results.quick_push (new_temp);
4970           else
4971             scalar_results[0] = new_temp;
4972         }
4973       else
4974         scalar_results[0] = new_temp;
4975
4976       new_phis[0] = epilog_stmt;
4977     }
4978
4979   /* 2.6  Handle the loop-exit phis.  Replace the uses of scalar loop-exit
4980           phis with new adjusted scalar results, i.e., replace use <s_out0>
4981           with use <s_out4>.
4982
4983      Transform:
4984         loop_exit:
4985           s_out0 = phi <s_loop>                 # (scalar) EXIT_PHI
4986           v_out1 = phi <VECT_DEF>               # NEW_EXIT_PHI
4987           v_out2 = reduce <v_out1>
4988           s_out3 = extract_field <v_out2, 0>
4989           s_out4 = adjust_result <s_out3>
4990           use <s_out0>
4991           use <s_out0>
4992
4993      into:
4994
4995         loop_exit:
4996           s_out0 = phi <s_loop>                 # (scalar) EXIT_PHI
4997           v_out1 = phi <VECT_DEF>               # NEW_EXIT_PHI
4998           v_out2 = reduce <v_out1>
4999           s_out3 = extract_field <v_out2, 0>
5000           s_out4 = adjust_result <s_out3>
5001           use <s_out4>
5002           use <s_out4> */
5003
5004
5005   /* In SLP reduction chain we reduce vector results into one vector if
5006      necessary, hence we set here GROUP_SIZE to 1.  SCALAR_DEST is the LHS of
5007      the last stmt in the reduction chain, since we are looking for the loop
5008      exit phi node.  */
5009   if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
5010     {
5011       gimple *dest_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[group_size - 1];
5012       /* Handle reduction patterns.  */
5013       if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (dest_stmt)))
5014         dest_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (dest_stmt));
5015
5016       scalar_dest = gimple_assign_lhs (dest_stmt);
5017       group_size = 1;
5018     }
5019
5020   /* In SLP we may have several statements in NEW_PHIS and REDUCTION_PHIS (in
5021      case that GROUP_SIZE is greater than vectorization factor).  Therefore, we
5022      need to match SCALAR_RESULTS with corresponding statements.  The first
5023      (GROUP_SIZE / number of new vector stmts) scalar results correspond to
5024      the first vector stmt, etc.
5025      (RATIO is equal to (GROUP_SIZE / number of new vector stmts)).  */
5026   if (group_size > new_phis.length ())
5027     {
5028       ratio = group_size / new_phis.length ();
5029       gcc_assert (!(group_size % new_phis.length ()));
5030     }
5031   else
5032     ratio = 1;
5033
5034   for (k = 0; k < group_size; k++)
5035     {
5036       if (k % ratio == 0)
5037         {
5038           epilog_stmt = new_phis[k / ratio];
5039           reduction_phi = reduction_phis[k / ratio];
5040           if (double_reduc)
5041             inner_phi = inner_phis[k / ratio];
5042         }
5043
5044       if (slp_reduc)
5045         {
5046           gimple *current_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[k];
5047
5048           orig_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (current_stmt));
5049           /* SLP statements can't participate in patterns.  */
5050           gcc_assert (!orig_stmt);
5051           scalar_dest = gimple_assign_lhs (current_stmt);
5052         }
5053
5054       phis.create (3);
5055       /* Find the loop-closed-use at the loop exit of the original scalar
5056          result.  (The reduction result is expected to have two immediate uses -
5057          one at the latch block, and one at the loop exit).  */
5058       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
5059         if (!flow_bb_inside_loop_p (loop, gimple_bb (USE_STMT (use_p)))
5060             && !is_gimple_debug (USE_STMT (use_p)))
5061           phis.safe_push (USE_STMT (use_p));
5062
5063       /* While we expect to have found an exit_phi because of loop-closed-ssa
5064          form we can end up without one if the scalar cycle is dead.  */
5065
5066       FOR_EACH_VEC_ELT (phis, i, exit_phi)
5067         {
5068           if (outer_loop)
5069             {
5070               stmt_vec_info exit_phi_vinfo = vinfo_for_stmt (exit_phi);
5071               gphi *vect_phi;
5072
5073               /* FORNOW. Currently not supporting the case that an inner-loop
5074                  reduction is not used in the outer-loop (but only outside the
5075                  outer-loop), unless it is double reduction.  */
5076               gcc_assert ((STMT_VINFO_RELEVANT_P (exit_phi_vinfo)
5077                            && !STMT_VINFO_LIVE_P (exit_phi_vinfo))
5078                           || double_reduc);
5079
5080               if (double_reduc)
5081                 STMT_VINFO_VEC_STMT (exit_phi_vinfo) = inner_phi;
5082               else
5083                 STMT_VINFO_VEC_STMT (exit_phi_vinfo) = epilog_stmt;
5084               if (!double_reduc
5085                   || STMT_VINFO_DEF_TYPE (exit_phi_vinfo)
5086                       != vect_double_reduction_def)
5087                 continue;
5088
5089               /* Handle double reduction:
5090
5091                  stmt1: s1 = phi <s0, s2>  - double reduction phi (outer loop)
5092                  stmt2:   s3 = phi <s1, s4> - (regular) reduc phi (inner loop)
5093                  stmt3:   s4 = use (s3)     - (regular) reduc stmt (inner loop)
5094                  stmt4: s2 = phi <s4>      - double reduction stmt (outer loop)
5095
5096                  At that point the regular reduction (stmt2 and stmt3) is
5097                  already vectorized, as well as the exit phi node, stmt4.
5098                  Here we vectorize the phi node of double reduction, stmt1, and
5099                  update all relevant statements.  */
5100
5101               /* Go through all the uses of s2 to find double reduction phi
5102                  node, i.e., stmt1 above.  */
5103               orig_name = PHI_RESULT (exit_phi);
5104               FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, orig_name)
5105                 {
5106                   stmt_vec_info use_stmt_vinfo;
5107                   stmt_vec_info new_phi_vinfo;
5108                   tree vect_phi_init, preheader_arg, vect_phi_res, init_def;
5109                   basic_block bb = gimple_bb (use_stmt);
5110                   gimple *use;
5111
5112                   /* Check that USE_STMT is really double reduction phi
5113                      node.  */
5114                   if (gimple_code (use_stmt) != GIMPLE_PHI
5115                       || gimple_phi_num_args (use_stmt) != 2
5116                       || bb->loop_father != outer_loop)
5117                     continue;
5118                   use_stmt_vinfo = vinfo_for_stmt (use_stmt);
5119                   if (!use_stmt_vinfo
5120                       || STMT_VINFO_DEF_TYPE (use_stmt_vinfo)
5121                           != vect_double_reduction_def)
5122                     continue;
5123
5124                   /* Create vector phi node for double reduction:
5125                      vs1 = phi <vs0, vs2>
5126                      vs1 was created previously in this function by a call to
5127                        vect_get_vec_def_for_operand and is stored in
5128                        vec_initial_def;
5129                      vs2 is defined by INNER_PHI, the vectorized EXIT_PHI;
5130                      vs0 is created here.  */
5131
5132                   /* Create vector phi node.  */
5133                   vect_phi = create_phi_node (vec_initial_def, bb);
5134                   new_phi_vinfo = new_stmt_vec_info (vect_phi,
5135                                     loop_vec_info_for_loop (outer_loop));
5136                   set_vinfo_for_stmt (vect_phi, new_phi_vinfo);
5137
5138                   /* Create vs0 - initial def of the double reduction phi.  */
5139                   preheader_arg = PHI_ARG_DEF_FROM_EDGE (use_stmt,
5140                                              loop_preheader_edge (outer_loop));
5141                   init_def = get_initial_def_for_reduction (stmt,
5142                                                           preheader_arg, NULL);
5143                   vect_phi_init = vect_init_vector (use_stmt, init_def,
5144                                                     vectype, NULL);
5145
5146                   /* Update phi node arguments with vs0 and vs2.  */
5147                   add_phi_arg (vect_phi, vect_phi_init,
5148                                loop_preheader_edge (outer_loop),
5149                                UNKNOWN_LOCATION);
5150                   add_phi_arg (vect_phi, PHI_RESULT (inner_phi),
5151                                loop_latch_edge (outer_loop), UNKNOWN_LOCATION);
5152                   if (dump_enabled_p ())
5153                     {
5154                       dump_printf_loc (MSG_NOTE, vect_location,
5155                                        "created double reduction phi node: ");
5156                       dump_gimple_stmt (MSG_NOTE, TDF_SLIM, vect_phi, 0);
5157                       dump_printf (MSG_NOTE, "\n");
5158                     }
5159
5160                   vect_phi_res = PHI_RESULT (vect_phi);
5161
5162                   /* Replace the use, i.e., set the correct vs1 in the regular
5163                      reduction phi node.  FORNOW, NCOPIES is always 1, so the
5164                      loop is redundant.  */
5165                   use = reduction_phi;
5166                   for (j = 0; j < ncopies; j++)
5167                     {
5168                       edge pr_edge = loop_preheader_edge (loop);
5169                       SET_PHI_ARG_DEF (use, pr_edge->dest_idx, vect_phi_res);
5170                       use = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (use));
5171                     }
5172                 }
5173             }
5174         }
5175
5176       phis.release ();
5177       if (nested_in_vect_loop)
5178         {
5179           if (double_reduc)
5180             loop = outer_loop;
5181           else
5182             continue;
5183         }
5184
5185       phis.create (3);
5186       /* Find the loop-closed-use at the loop exit of the original scalar
5187          result.  (The reduction result is expected to have two immediate uses,
5188          one at the latch block, and one at the loop exit).  For double
5189          reductions we are looking for exit phis of the outer loop.  */
5190       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
5191         {
5192           if (!flow_bb_inside_loop_p (loop, gimple_bb (USE_STMT (use_p))))
5193             {
5194               if (!is_gimple_debug (USE_STMT (use_p)))
5195                 phis.safe_push (USE_STMT (use_p));
5196             }
5197           else
5198             {
5199               if (double_reduc && gimple_code (USE_STMT (use_p)) == GIMPLE_PHI)
5200                 {
5201                   tree phi_res = PHI_RESULT (USE_STMT (use_p));
5202
5203                   FOR_EACH_IMM_USE_FAST (phi_use_p, phi_imm_iter, phi_res)
5204                     {
5205                       if (!flow_bb_inside_loop_p (loop,
5206                                              gimple_bb (USE_STMT (phi_use_p)))
5207                           && !is_gimple_debug (USE_STMT (phi_use_p)))
5208                         phis.safe_push (USE_STMT (phi_use_p));
5209                     }
5210                 }
5211             }
5212         }
5213
5214       FOR_EACH_VEC_ELT (phis, i, exit_phi)
5215         {
5216           /* Replace the uses:  */
5217           orig_name = PHI_RESULT (exit_phi);
5218           scalar_result = scalar_results[k];
5219           FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, orig_name)
5220             FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
5221               SET_USE (use_p, scalar_result);
5222         }
5223
5224       phis.release ();
5225     }
5226 }
5227
5228
5229 /* Function is_nonwrapping_integer_induction.
5230
5231    Check if STMT (which is part of loop LOOP) both increments and
5232    does not cause overflow.  */
5233
5234 static bool
5235 is_nonwrapping_integer_induction (gimple *stmt, struct loop *loop)
5236 {
5237   stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
5238   tree base = STMT_VINFO_LOOP_PHI_EVOLUTION_BASE_UNCHANGED (stmt_vinfo);
5239   tree step = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_vinfo);
5240   tree lhs_type = TREE_TYPE (gimple_phi_result (stmt));
5241   widest_int ni, max_loop_value, lhs_max;
5242   bool overflow = false;
5243
5244   /* Make sure the loop is integer based.  */
5245   if (TREE_CODE (base) != INTEGER_CST
5246       || TREE_CODE (step) != INTEGER_CST)
5247     return false;
5248
5249   /* Check that the induction increments.  */
5250   if (tree_int_cst_sgn (step) == -1)
5251     return false;
5252
5253   /* Check that the max size of the loop will not wrap.  */
5254
5255   if (TYPE_OVERFLOW_UNDEFINED (lhs_type))
5256     return true;
5257
5258   if (! max_stmt_executions (loop, &ni))
5259     return false;
5260
5261   max_loop_value = wi::mul (wi::to_widest (step), ni, TYPE_SIGN (lhs_type),
5262                             &overflow);
5263   if (overflow)
5264     return false;
5265
5266   max_loop_value = wi::add (wi::to_widest (base), max_loop_value,
5267                             TYPE_SIGN (lhs_type), &overflow);
5268   if (overflow)
5269     return false;
5270
5271   return (wi::min_precision (max_loop_value, TYPE_SIGN (lhs_type))
5272           <= TYPE_PRECISION (lhs_type));
5273 }
5274
5275 /* Function vectorizable_reduction.
5276
5277    Check if STMT performs a reduction operation that can be vectorized.
5278    If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5279    stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5280    Return FALSE if not a vectorizable STMT, TRUE otherwise.
5281
5282    This function also handles reduction idioms (patterns) that have been
5283    recognized in advance during vect_pattern_recog.  In this case, STMT may be
5284    of this form:
5285      X = pattern_expr (arg0, arg1, ..., X)
5286    and it's STMT_VINFO_RELATED_STMT points to the last stmt in the original
5287    sequence that had been detected and replaced by the pattern-stmt (STMT).
5288
5289    This function also handles reduction of condition expressions, for example:
5290      for (int i = 0; i < N; i++)
5291        if (a[i] < value)
5292          last = a[i];
5293    This is handled by vectorising the loop and creating an additional vector
5294    containing the loop indexes for which "a[i] < value" was true.  In the
5295    function epilogue this is reduced to a single max value and then used to
5296    index into the vector of results.
5297
5298    In some cases of reduction patterns, the type of the reduction variable X is
5299    different than the type of the other arguments of STMT.
5300    In such cases, the vectype that is used when transforming STMT into a vector
5301    stmt is different than the vectype that is used to determine the
5302    vectorization factor, because it consists of a different number of elements
5303    than the actual number of elements that are being operated upon in parallel.
5304
5305    For example, consider an accumulation of shorts into an int accumulator.
5306    On some targets it's possible to vectorize this pattern operating on 8
5307    shorts at a time (hence, the vectype for purposes of determining the
5308    vectorization factor should be V8HI); on the other hand, the vectype that
5309    is used to create the vector form is actually V4SI (the type of the result).
5310
5311    Upon entry to this function, STMT_VINFO_VECTYPE records the vectype that
5312    indicates what is the actual level of parallelism (V8HI in the example), so
5313    that the right vectorization factor would be derived.  This vectype
5314    corresponds to the type of arguments to the reduction stmt, and should *NOT*
5315    be used to create the vectorized stmt.  The right vectype for the vectorized
5316    stmt is obtained from the type of the result X:
5317         get_vectype_for_scalar_type (TREE_TYPE (X))
5318
5319    This means that, contrary to "regular" reductions (or "regular" stmts in
5320    general), the following equation:
5321       STMT_VINFO_VECTYPE == get_vectype_for_scalar_type (TREE_TYPE (X))
5322    does *NOT* necessarily hold for reduction patterns.  */
5323
5324 bool
5325 vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi,
5326                         gimple **vec_stmt, slp_tree slp_node)
5327 {
5328   tree vec_dest;
5329   tree scalar_dest;
5330   tree loop_vec_def0 = NULL_TREE, loop_vec_def1 = NULL_TREE;
5331   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5332   tree vectype_out = STMT_VINFO_VECTYPE (stmt_info);
5333   tree vectype_in = NULL_TREE;
5334   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5335   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5336   enum tree_code code, orig_code, epilog_reduc_code;
5337   machine_mode vec_mode;
5338   int op_type;
5339   optab optab, reduc_optab;
5340   tree new_temp = NULL_TREE;
5341   gimple *def_stmt;
5342   enum vect_def_type dt;
5343   gphi *new_phi = NULL;
5344   tree scalar_type;
5345   bool is_simple_use;
5346   gimple *orig_stmt;
5347   stmt_vec_info orig_stmt_info;
5348   tree expr = NULL_TREE;
5349   int i;
5350   int ncopies;
5351   int epilog_copies;
5352   stmt_vec_info prev_stmt_info, prev_phi_info;
5353   bool single_defuse_cycle = false;
5354   tree reduc_def = NULL_TREE;
5355   gimple *new_stmt = NULL;
5356   int j;
5357   tree ops[3];
5358   bool nested_cycle = false, found_nested_cycle_def = false;
5359   gimple *reduc_def_stmt = NULL;
5360   bool double_reduc = false, dummy;
5361   basic_block def_bb;
5362   struct loop * def_stmt_loop, *outer_loop = NULL;
5363   tree def_arg;
5364   gimple *def_arg_stmt;
5365   auto_vec<tree> vec_oprnds0;
5366   auto_vec<tree> vec_oprnds1;
5367   auto_vec<tree> vect_defs;
5368   auto_vec<gimple *> phis;
5369   int vec_num;
5370   tree def0, def1, tem, op0, op1 = NULL_TREE;
5371   bool first_p = true;
5372   tree cr_index_scalar_type = NULL_TREE, cr_index_vector_type = NULL_TREE;
5373   gimple *cond_expr_induction_def_stmt = NULL;
5374
5375   /* In case of reduction chain we switch to the first stmt in the chain, but
5376      we don't update STMT_INFO, since only the last stmt is marked as reduction
5377      and has reduction properties.  */
5378   if (GROUP_FIRST_ELEMENT (stmt_info)
5379       && GROUP_FIRST_ELEMENT (stmt_info) != stmt)
5380     {
5381       stmt = GROUP_FIRST_ELEMENT (stmt_info);
5382       first_p = false;
5383     }
5384
5385   if (nested_in_vect_loop_p (loop, stmt))
5386     {
5387       outer_loop = loop;
5388       loop = loop->inner;
5389       nested_cycle = true;
5390     }
5391
5392   /* 1. Is vectorizable reduction?  */
5393   /* Not supportable if the reduction variable is used in the loop, unless
5394      it's a reduction chain.  */
5395   if (STMT_VINFO_RELEVANT (stmt_info) > vect_used_in_outer
5396       && !GROUP_FIRST_ELEMENT (stmt_info))
5397     return false;
5398
5399   /* Reductions that are not used even in an enclosing outer-loop,
5400      are expected to be "live" (used out of the loop).  */
5401   if (STMT_VINFO_RELEVANT (stmt_info) == vect_unused_in_scope
5402       && !STMT_VINFO_LIVE_P (stmt_info))
5403     return false;
5404
5405   /* Make sure it was already recognized as a reduction computation.  */
5406   if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) != vect_reduction_def
5407       && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) != vect_nested_cycle)
5408     return false;
5409
5410   /* 2. Has this been recognized as a reduction pattern?
5411
5412      Check if STMT represents a pattern that has been recognized
5413      in earlier analysis stages.  For stmts that represent a pattern,
5414      the STMT_VINFO_RELATED_STMT field records the last stmt in
5415      the original sequence that constitutes the pattern.  */
5416
5417   orig_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt));
5418   if (orig_stmt)
5419     {
5420       orig_stmt_info = vinfo_for_stmt (orig_stmt);
5421       gcc_assert (STMT_VINFO_IN_PATTERN_P (orig_stmt_info));
5422       gcc_assert (!STMT_VINFO_IN_PATTERN_P (stmt_info));
5423     }
5424
5425   /* 3. Check the operands of the operation.  The first operands are defined
5426         inside the loop body. The last operand is the reduction variable,
5427         which is defined by the loop-header-phi.  */
5428
5429   gcc_assert (is_gimple_assign (stmt));
5430
5431   /* Flatten RHS.  */
5432   switch (get_gimple_rhs_class (gimple_assign_rhs_code (stmt)))
5433     {
5434     case GIMPLE_SINGLE_RHS:
5435       op_type = TREE_OPERAND_LENGTH (gimple_assign_rhs1 (stmt));
5436       if (op_type == ternary_op)
5437         {
5438           tree rhs = gimple_assign_rhs1 (stmt);
5439           ops[0] = TREE_OPERAND (rhs, 0);
5440           ops[1] = TREE_OPERAND (rhs, 1);
5441           ops[2] = TREE_OPERAND (rhs, 2);
5442           code = TREE_CODE (rhs);
5443         }
5444       else
5445         return false;
5446       break;
5447
5448     case GIMPLE_BINARY_RHS:
5449       code = gimple_assign_rhs_code (stmt);
5450       op_type = TREE_CODE_LENGTH (code);
5451       gcc_assert (op_type == binary_op);
5452       ops[0] = gimple_assign_rhs1 (stmt);
5453       ops[1] = gimple_assign_rhs2 (stmt);
5454       break;
5455
5456     case GIMPLE_TERNARY_RHS:
5457       code = gimple_assign_rhs_code (stmt);
5458       op_type = TREE_CODE_LENGTH (code);
5459       gcc_assert (op_type == ternary_op);
5460       ops[0] = gimple_assign_rhs1 (stmt);
5461       ops[1] = gimple_assign_rhs2 (stmt);
5462       ops[2] = gimple_assign_rhs3 (stmt);
5463       break;
5464
5465     case GIMPLE_UNARY_RHS:
5466       return false;
5467
5468     default:
5469       gcc_unreachable ();
5470     }
5471   /* The default is that the reduction variable is the last in statement.  */
5472   int reduc_index = op_type - 1;
5473   if (code == MINUS_EXPR)
5474     reduc_index = 0;
5475
5476   if (code == COND_EXPR && slp_node)
5477     return false;
5478
5479   scalar_dest = gimple_assign_lhs (stmt);
5480   scalar_type = TREE_TYPE (scalar_dest);
5481   if (!POINTER_TYPE_P (scalar_type) && !INTEGRAL_TYPE_P (scalar_type)
5482       && !SCALAR_FLOAT_TYPE_P (scalar_type))
5483     return false;
5484
5485   /* Do not try to vectorize bit-precision reductions.  */
5486   if ((TYPE_PRECISION (scalar_type)
5487        != GET_MODE_PRECISION (TYPE_MODE (scalar_type))))
5488     return false;
5489
5490   /* All uses but the last are expected to be defined in the loop.
5491      The last use is the reduction variable.  In case of nested cycle this
5492      assumption is not true: we use reduc_index to record the index of the
5493      reduction variable.  */
5494   for (i = 0; i < op_type; i++)
5495     {
5496       if (i == reduc_index)
5497         continue;
5498
5499       /* The condition of COND_EXPR is checked in vectorizable_condition().  */
5500       if (i == 0 && code == COND_EXPR)
5501         continue;
5502
5503       is_simple_use = vect_is_simple_use (ops[i], loop_vinfo,
5504                                           &def_stmt, &dt, &tem);
5505       if (!vectype_in)
5506         vectype_in = tem;
5507       gcc_assert (is_simple_use);
5508
5509       if (dt != vect_internal_def
5510           && dt != vect_external_def
5511           && dt != vect_constant_def
5512           && dt != vect_induction_def
5513           && !(dt == vect_nested_cycle && nested_cycle))
5514         return false;
5515
5516       if (dt == vect_nested_cycle)
5517         {
5518           found_nested_cycle_def = true;
5519           reduc_def_stmt = def_stmt;
5520           reduc_index = i;
5521         }
5522
5523       if (i == 1 && code == COND_EXPR && dt == vect_induction_def)
5524         cond_expr_induction_def_stmt = def_stmt;
5525     }
5526
5527   is_simple_use = vect_is_simple_use (ops[reduc_index], loop_vinfo,
5528                                       &def_stmt, &dt, &tem);
5529   if (!vectype_in)
5530     vectype_in = tem;
5531   gcc_assert (is_simple_use);
5532   if (!found_nested_cycle_def)
5533     reduc_def_stmt = def_stmt;
5534
5535   if (reduc_def_stmt && gimple_code (reduc_def_stmt) != GIMPLE_PHI)
5536     return false;
5537
5538   if (!(dt == vect_reduction_def
5539         || dt == vect_nested_cycle
5540         || ((dt == vect_internal_def || dt == vect_external_def
5541              || dt == vect_constant_def || dt == vect_induction_def)
5542             && nested_cycle && found_nested_cycle_def)))
5543     {
5544       /* For pattern recognized stmts, orig_stmt might be a reduction,
5545          but some helper statements for the pattern might not, or
5546          might be COND_EXPRs with reduction uses in the condition.  */
5547       gcc_assert (orig_stmt);
5548       return false;
5549     }
5550
5551   enum vect_reduction_type v_reduc_type;
5552   gimple *tmp = vect_is_simple_reduction (loop_vinfo, reduc_def_stmt,
5553                                           !nested_cycle, &dummy, false,
5554                                           &v_reduc_type);
5555
5556   /* If we have a condition reduction, see if we can simplify it further.  */
5557   if (v_reduc_type == COND_REDUCTION
5558       && cond_expr_induction_def_stmt != NULL
5559       && is_nonwrapping_integer_induction (cond_expr_induction_def_stmt, loop))
5560     {
5561       if (dump_enabled_p ())
5562         dump_printf_loc (MSG_NOTE, vect_location,
5563                          "condition expression based on integer induction.\n");
5564       STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) = INTEGER_INDUC_COND_REDUCTION;
5565     }
5566   else
5567    STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) = v_reduc_type;
5568
5569   if (orig_stmt)
5570     gcc_assert (tmp == orig_stmt
5571                 || GROUP_FIRST_ELEMENT (vinfo_for_stmt (tmp)) == orig_stmt);
5572   else
5573     /* We changed STMT to be the first stmt in reduction chain, hence we
5574        check that in this case the first element in the chain is STMT.  */
5575     gcc_assert (stmt == tmp
5576                 || GROUP_FIRST_ELEMENT (vinfo_for_stmt (tmp)) == stmt);
5577
5578   if (STMT_VINFO_LIVE_P (vinfo_for_stmt (reduc_def_stmt)))
5579     return false;
5580
5581   if (slp_node || PURE_SLP_STMT (stmt_info))
5582     ncopies = 1;
5583   else
5584     ncopies = (LOOP_VINFO_VECT_FACTOR (loop_vinfo)
5585                / TYPE_VECTOR_SUBPARTS (vectype_in));
5586
5587   gcc_assert (ncopies >= 1);
5588
5589   vec_mode = TYPE_MODE (vectype_in);
5590
5591   if (code == COND_EXPR)
5592     {
5593       /* Only call during the analysis stage, otherwise we'll lose
5594          STMT_VINFO_TYPE.  */
5595       if (!vec_stmt && !vectorizable_condition (stmt, gsi, NULL,
5596                                                 ops[reduc_index], 0, NULL))
5597         {
5598           if (dump_enabled_p ())
5599             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5600                              "unsupported condition in reduction\n");
5601           return false;
5602         }
5603     }
5604   else
5605     {
5606       /* 4. Supportable by target?  */
5607
5608       if (code == LSHIFT_EXPR || code == RSHIFT_EXPR
5609           || code == LROTATE_EXPR || code == RROTATE_EXPR)
5610         {
5611           /* Shifts and rotates are only supported by vectorizable_shifts,
5612              not vectorizable_reduction.  */
5613           if (dump_enabled_p ())
5614             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5615                              "unsupported shift or rotation.\n");
5616           return false;
5617         }
5618
5619       /* 4.1. check support for the operation in the loop  */
5620       optab = optab_for_tree_code (code, vectype_in, optab_default);
5621       if (!optab)
5622         {
5623           if (dump_enabled_p ())
5624             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5625                              "no optab.\n");
5626
5627           return false;
5628         }
5629
5630       if (optab_handler (optab, vec_mode) == CODE_FOR_nothing)
5631         {
5632           if (dump_enabled_p ())
5633             dump_printf (MSG_NOTE, "op not supported by target.\n");
5634
5635           if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
5636               || LOOP_VINFO_VECT_FACTOR (loop_vinfo)
5637                   < vect_min_worthwhile_factor (code))
5638             return false;
5639
5640           if (dump_enabled_p ())
5641             dump_printf (MSG_NOTE, "proceeding using word mode.\n");
5642         }
5643
5644       /* Worthwhile without SIMD support?  */
5645       if (!VECTOR_MODE_P (TYPE_MODE (vectype_in))
5646           && LOOP_VINFO_VECT_FACTOR (loop_vinfo)
5647              < vect_min_worthwhile_factor (code))
5648         {
5649           if (dump_enabled_p ())
5650             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5651                              "not worthwhile without SIMD support.\n");
5652
5653           return false;
5654         }
5655     }
5656
5657   /* 4.2. Check support for the epilog operation.
5658
5659           If STMT represents a reduction pattern, then the type of the
5660           reduction variable may be different than the type of the rest
5661           of the arguments.  For example, consider the case of accumulation
5662           of shorts into an int accumulator; The original code:
5663                         S1: int_a = (int) short_a;
5664           orig_stmt->   S2: int_acc = plus <int_a ,int_acc>;
5665
5666           was replaced with:
5667                         STMT: int_acc = widen_sum <short_a, int_acc>
5668
5669           This means that:
5670           1. The tree-code that is used to create the vector operation in the
5671              epilog code (that reduces the partial results) is not the
5672              tree-code of STMT, but is rather the tree-code of the original
5673              stmt from the pattern that STMT is replacing.  I.e, in the example
5674              above we want to use 'widen_sum' in the loop, but 'plus' in the
5675              epilog.
5676           2. The type (mode) we use to check available target support
5677              for the vector operation to be created in the *epilog*, is
5678              determined by the type of the reduction variable (in the example
5679              above we'd check this: optab_handler (plus_optab, vect_int_mode])).
5680              However the type (mode) we use to check available target support
5681              for the vector operation to be created *inside the loop*, is
5682              determined by the type of the other arguments to STMT (in the
5683              example we'd check this: optab_handler (widen_sum_optab,
5684              vect_short_mode)).
5685
5686           This is contrary to "regular" reductions, in which the types of all
5687           the arguments are the same as the type of the reduction variable.
5688           For "regular" reductions we can therefore use the same vector type
5689           (and also the same tree-code) when generating the epilog code and
5690           when generating the code inside the loop.  */
5691
5692   if (orig_stmt)
5693     {
5694       /* This is a reduction pattern: get the vectype from the type of the
5695          reduction variable, and get the tree-code from orig_stmt.  */
5696       gcc_assert (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
5697                   == TREE_CODE_REDUCTION);
5698       orig_code = gimple_assign_rhs_code (orig_stmt);
5699       gcc_assert (vectype_out);
5700       vec_mode = TYPE_MODE (vectype_out);
5701     }
5702   else
5703     {
5704       /* Regular reduction: use the same vectype and tree-code as used for
5705          the vector code inside the loop can be used for the epilog code. */
5706       orig_code = code;
5707
5708       if (code == MINUS_EXPR)
5709         orig_code = PLUS_EXPR;
5710
5711       /* For simple condition reductions, replace with the actual expression
5712          we want to base our reduction around.  */
5713       if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
5714           == INTEGER_INDUC_COND_REDUCTION)
5715         orig_code = MAX_EXPR;
5716     }
5717
5718   if (nested_cycle)
5719     {
5720       def_bb = gimple_bb (reduc_def_stmt);
5721       def_stmt_loop = def_bb->loop_father;
5722       def_arg = PHI_ARG_DEF_FROM_EDGE (reduc_def_stmt,
5723                                        loop_preheader_edge (def_stmt_loop));
5724       if (TREE_CODE (def_arg) == SSA_NAME
5725           && (def_arg_stmt = SSA_NAME_DEF_STMT (def_arg))
5726           && gimple_code (def_arg_stmt) == GIMPLE_PHI
5727           && flow_bb_inside_loop_p (outer_loop, gimple_bb (def_arg_stmt))
5728           && vinfo_for_stmt (def_arg_stmt)
5729           && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_arg_stmt))
5730               == vect_double_reduction_def)
5731         double_reduc = true;
5732     }
5733
5734   epilog_reduc_code = ERROR_MARK;
5735
5736   if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == TREE_CODE_REDUCTION
5737       || STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
5738                 == INTEGER_INDUC_COND_REDUCTION)
5739     {
5740       if (reduction_code_for_scalar_code (orig_code, &epilog_reduc_code))
5741         {
5742           reduc_optab = optab_for_tree_code (epilog_reduc_code, vectype_out,
5743                                          optab_default);
5744           if (!reduc_optab)
5745             {
5746               if (dump_enabled_p ())
5747                 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5748                                  "no optab for reduction.\n");
5749
5750               epilog_reduc_code = ERROR_MARK;
5751             }
5752           else if (optab_handler (reduc_optab, vec_mode) == CODE_FOR_nothing)
5753             {
5754               if (dump_enabled_p ())
5755                 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5756                                  "reduc op not supported by target.\n");
5757
5758               epilog_reduc_code = ERROR_MARK;
5759             }
5760
5761           /* When epilog_reduc_code is ERROR_MARK then a reduction will be
5762              generated in the epilog using multiple expressions.  This does not
5763              work for condition reductions.  */
5764           if (epilog_reduc_code == ERROR_MARK
5765               && STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
5766                         == INTEGER_INDUC_COND_REDUCTION)
5767             {
5768               if (dump_enabled_p ())
5769                 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5770                                  "no reduc code for scalar code.\n");
5771               return false;
5772             }
5773         }
5774       else
5775         {
5776           if (!nested_cycle || double_reduc)
5777             {
5778               if (dump_enabled_p ())
5779                 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5780                                  "no reduc code for scalar code.\n");
5781
5782               return false;
5783             }
5784         }
5785     }
5786   else
5787     {
5788       int scalar_precision = GET_MODE_PRECISION (TYPE_MODE (scalar_type));
5789       cr_index_scalar_type = make_unsigned_type (scalar_precision);
5790       cr_index_vector_type = build_vector_type
5791         (cr_index_scalar_type, TYPE_VECTOR_SUBPARTS (vectype_out));
5792
5793       epilog_reduc_code = REDUC_MAX_EXPR;
5794       optab = optab_for_tree_code (REDUC_MAX_EXPR, cr_index_vector_type,
5795                                    optab_default);
5796       if (optab_handler (optab, TYPE_MODE (cr_index_vector_type))
5797           == CODE_FOR_nothing)
5798         {
5799           if (dump_enabled_p ())
5800             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5801                              "reduc max op not supported by target.\n");
5802           return false;
5803         }
5804     }
5805
5806   if ((double_reduc
5807        || STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == COND_REDUCTION
5808        || STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
5809                 == INTEGER_INDUC_COND_REDUCTION)
5810       && ncopies > 1)
5811     {
5812       if (dump_enabled_p ())
5813         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5814                          "multiple types in double reduction or condition "
5815                          "reduction.\n");
5816       return false;
5817     }
5818
5819   /* In case of widenning multiplication by a constant, we update the type
5820      of the constant to be the type of the other operand.  We check that the
5821      constant fits the type in the pattern recognition pass.  */
5822   if (code == DOT_PROD_EXPR
5823       && !types_compatible_p (TREE_TYPE (ops[0]), TREE_TYPE (ops[1])))
5824     {
5825       if (TREE_CODE (ops[0]) == INTEGER_CST)
5826         ops[0] = fold_convert (TREE_TYPE (ops[1]), ops[0]);
5827       else if (TREE_CODE (ops[1]) == INTEGER_CST)
5828         ops[1] = fold_convert (TREE_TYPE (ops[0]), ops[1]);
5829       else
5830         {
5831           if (dump_enabled_p ())
5832             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5833                              "invalid types in dot-prod\n");
5834
5835           return false;
5836         }
5837     }
5838
5839   if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == COND_REDUCTION)
5840     {
5841       widest_int ni;
5842
5843       if (! max_loop_iterations (loop, &ni))
5844         {
5845           if (dump_enabled_p ())
5846             dump_printf_loc (MSG_NOTE, vect_location,
5847                              "loop count not known, cannot create cond "
5848                              "reduction.\n");
5849           return false;
5850         }
5851       /* Convert backedges to iterations.  */
5852       ni += 1;
5853
5854       /* The additional index will be the same type as the condition.  Check
5855          that the loop can fit into this less one (because we'll use up the
5856          zero slot for when there are no matches).  */
5857       tree max_index = TYPE_MAX_VALUE (cr_index_scalar_type);
5858       if (wi::geu_p (ni, wi::to_widest (max_index)))
5859         {
5860           if (dump_enabled_p ())
5861             dump_printf_loc (MSG_NOTE, vect_location,
5862                              "loop size is greater than data size.\n");
5863           return false;
5864         }
5865     }
5866
5867   if (!vec_stmt) /* transformation not required.  */
5868     {
5869       if (first_p
5870           && !vect_model_reduction_cost (stmt_info, epilog_reduc_code, ncopies,
5871                                          reduc_index))
5872         return false;
5873       STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
5874       return true;
5875     }
5876
5877   /** Transform.  **/
5878
5879   if (dump_enabled_p ())
5880     dump_printf_loc (MSG_NOTE, vect_location, "transform reduction.\n");
5881
5882   /* FORNOW: Multiple types are not supported for condition.  */
5883   if (code == COND_EXPR)
5884     gcc_assert (ncopies == 1);
5885
5886   /* Create the destination vector  */
5887   vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
5888
5889   /* In case the vectorization factor (VF) is bigger than the number
5890      of elements that we can fit in a vectype (nunits), we have to generate
5891      more than one vector stmt - i.e - we need to "unroll" the
5892      vector stmt by a factor VF/nunits.  For more details see documentation
5893      in vectorizable_operation.  */
5894
5895   /* If the reduction is used in an outer loop we need to generate
5896      VF intermediate results, like so (e.g. for ncopies=2):
5897         r0 = phi (init, r0)
5898         r1 = phi (init, r1)
5899         r0 = x0 + r0;
5900         r1 = x1 + r1;
5901     (i.e. we generate VF results in 2 registers).
5902     In this case we have a separate def-use cycle for each copy, and therefore
5903     for each copy we get the vector def for the reduction variable from the
5904     respective phi node created for this copy.
5905
5906     Otherwise (the reduction is unused in the loop nest), we can combine
5907     together intermediate results, like so (e.g. for ncopies=2):
5908         r = phi (init, r)
5909         r = x0 + r;
5910         r = x1 + r;
5911    (i.e. we generate VF/2 results in a single register).
5912    In this case for each copy we get the vector def for the reduction variable
5913    from the vectorized reduction operation generated in the previous iteration.
5914   */
5915
5916   if (STMT_VINFO_RELEVANT (stmt_info) == vect_unused_in_scope)
5917     {
5918       single_defuse_cycle = true;
5919       epilog_copies = 1;
5920     }
5921   else
5922     epilog_copies = ncopies;
5923
5924   prev_stmt_info = NULL;
5925   prev_phi_info = NULL;
5926   if (slp_node)
5927     vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
5928   else
5929     {
5930       vec_num = 1;
5931       vec_oprnds0.create (1);
5932       if (op_type == ternary_op)
5933         vec_oprnds1.create (1);
5934     }
5935
5936   phis.create (vec_num);
5937   vect_defs.create (vec_num);
5938   if (!slp_node)
5939     vect_defs.quick_push (NULL_TREE);
5940
5941   for (j = 0; j < ncopies; j++)
5942     {
5943       if (j == 0 || !single_defuse_cycle)
5944         {
5945           for (i = 0; i < vec_num; i++)
5946             {
5947               /* Create the reduction-phi that defines the reduction
5948                  operand.  */
5949               new_phi = create_phi_node (vec_dest, loop->header);
5950               set_vinfo_for_stmt (new_phi,
5951                                   new_stmt_vec_info (new_phi, loop_vinfo));
5952                if (j == 0 || slp_node)
5953                  phis.quick_push (new_phi);
5954             }
5955         }
5956
5957       if (code == COND_EXPR)
5958         {
5959           gcc_assert (!slp_node);
5960           vectorizable_condition (stmt, gsi, vec_stmt,
5961                                   PHI_RESULT (phis[0]),
5962                                   reduc_index, NULL);
5963           /* Multiple types are not supported for condition.  */
5964           break;
5965         }
5966
5967       /* Handle uses.  */
5968       if (j == 0)
5969         {
5970           op0 = ops[!reduc_index];
5971           if (op_type == ternary_op)
5972             {
5973               if (reduc_index == 0)
5974                 op1 = ops[2];
5975               else
5976                 op1 = ops[1];
5977             }
5978
5979           if (slp_node)
5980             vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
5981                                slp_node, -1);
5982           else
5983             {
5984               loop_vec_def0 = vect_get_vec_def_for_operand (ops[!reduc_index],
5985                                                             stmt);
5986               vec_oprnds0.quick_push (loop_vec_def0);
5987               if (op_type == ternary_op)
5988                {
5989                  loop_vec_def1 = vect_get_vec_def_for_operand (op1, stmt);
5990                  vec_oprnds1.quick_push (loop_vec_def1);
5991                }
5992             }
5993         }
5994       else
5995         {
5996           if (!slp_node)
5997             {
5998               enum vect_def_type dt;
5999               gimple *dummy_stmt;
6000
6001               vect_is_simple_use (ops[!reduc_index], loop_vinfo,
6002                                   &dummy_stmt, &dt);
6003               loop_vec_def0 = vect_get_vec_def_for_stmt_copy (dt,
6004                                                               loop_vec_def0);
6005               vec_oprnds0[0] = loop_vec_def0;
6006               if (op_type == ternary_op)
6007                 {
6008                   vect_is_simple_use (op1, loop_vinfo, &dummy_stmt, &dt);
6009                   loop_vec_def1 = vect_get_vec_def_for_stmt_copy (dt,
6010                                                                 loop_vec_def1);
6011                   vec_oprnds1[0] = loop_vec_def1;
6012                 }
6013             }
6014
6015           if (single_defuse_cycle)
6016             reduc_def = gimple_assign_lhs (new_stmt);
6017
6018           STMT_VINFO_RELATED_STMT (prev_phi_info) = new_phi;
6019         }
6020
6021       FOR_EACH_VEC_ELT (vec_oprnds0, i, def0)
6022         {
6023           if (slp_node)
6024             reduc_def = PHI_RESULT (phis[i]);
6025           else
6026             {
6027               if (!single_defuse_cycle || j == 0)
6028                 reduc_def = PHI_RESULT (new_phi);
6029             }
6030
6031           def1 = ((op_type == ternary_op)
6032                   ? vec_oprnds1[i] : NULL);
6033           if (op_type == binary_op)
6034             {
6035               if (reduc_index == 0)
6036                 expr = build2 (code, vectype_out, reduc_def, def0);
6037               else
6038                 expr = build2 (code, vectype_out, def0, reduc_def);
6039             }
6040           else
6041             {
6042               if (reduc_index == 0)
6043                 expr = build3 (code, vectype_out, reduc_def, def0, def1);
6044               else
6045                 {
6046                   if (reduc_index == 1)
6047                     expr = build3 (code, vectype_out, def0, reduc_def, def1);
6048                   else
6049                     expr = build3 (code, vectype_out, def0, def1, reduc_def);
6050                 }
6051             }
6052
6053           new_stmt = gimple_build_assign (vec_dest, expr);
6054           new_temp = make_ssa_name (vec_dest, new_stmt);
6055           gimple_assign_set_lhs (new_stmt, new_temp);
6056           vect_finish_stmt_generation (stmt, new_stmt, gsi);
6057
6058           if (slp_node)
6059             {
6060               SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
6061               vect_defs.quick_push (new_temp);
6062             }
6063           else
6064             vect_defs[0] = new_temp;
6065         }
6066
6067       if (slp_node)
6068         continue;
6069
6070       if (j == 0)
6071         STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6072       else
6073         STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6074
6075       prev_stmt_info = vinfo_for_stmt (new_stmt);
6076       prev_phi_info = vinfo_for_stmt (new_phi);
6077     }
6078
6079   tree indx_before_incr, indx_after_incr, cond_name = NULL;
6080
6081   /* Finalize the reduction-phi (set its arguments) and create the
6082      epilog reduction code.  */
6083   if ((!single_defuse_cycle || code == COND_EXPR) && !slp_node)
6084     {
6085       new_temp = gimple_assign_lhs (*vec_stmt);
6086       vect_defs[0] = new_temp;
6087
6088       /* For cond reductions we want to create a new vector (INDEX_COND_EXPR)
6089          which is updated with the current index of the loop for every match of
6090          the original loop's cond_expr (VEC_STMT).  This results in a vector
6091          containing the last time the condition passed for that vector lane.
6092          The first match will be a 1 to allow 0 to be used for non-matching
6093          indexes.  If there are no matches at all then the vector will be all
6094          zeroes.  */
6095       if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == COND_REDUCTION)
6096         {
6097           int nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
6098           int k;
6099
6100           gcc_assert (gimple_assign_rhs_code (*vec_stmt) == VEC_COND_EXPR);
6101
6102           /* First we create a simple vector induction variable which starts
6103              with the values {1,2,3,...} (SERIES_VECT) and increments by the
6104              vector size (STEP).  */
6105
6106           /* Create a {1,2,3,...} vector.  */
6107           tree *vtemp = XALLOCAVEC (tree, nunits_out);
6108           for (k = 0; k < nunits_out; ++k)
6109             vtemp[k] = build_int_cst (cr_index_scalar_type, k + 1);
6110           tree series_vect = build_vector (cr_index_vector_type, vtemp);
6111
6112           /* Create a vector of the step value.  */
6113           tree step = build_int_cst (cr_index_scalar_type, nunits_out);
6114           tree vec_step = build_vector_from_val (cr_index_vector_type, step);
6115
6116           /* Create an induction variable.  */
6117           gimple_stmt_iterator incr_gsi;
6118           bool insert_after;
6119           standard_iv_increment_position (loop, &incr_gsi, &insert_after);
6120           create_iv (series_vect, vec_step, NULL_TREE, loop, &incr_gsi,
6121                      insert_after, &indx_before_incr, &indx_after_incr);
6122
6123           /* Next create a new phi node vector (NEW_PHI_TREE) which starts
6124              filled with zeros (VEC_ZERO).  */
6125
6126           /* Create a vector of 0s.  */
6127           tree zero = build_zero_cst (cr_index_scalar_type);
6128           tree vec_zero = build_vector_from_val (cr_index_vector_type, zero);
6129
6130           /* Create a vector phi node.  */
6131           tree new_phi_tree = make_ssa_name (cr_index_vector_type);
6132           new_phi = create_phi_node (new_phi_tree, loop->header);
6133           set_vinfo_for_stmt (new_phi,
6134                               new_stmt_vec_info (new_phi, loop_vinfo));
6135           add_phi_arg (new_phi, vec_zero, loop_preheader_edge (loop),
6136                        UNKNOWN_LOCATION);
6137
6138           /* Now take the condition from the loops original cond_expr
6139              (VEC_STMT) and produce a new cond_expr (INDEX_COND_EXPR) which for
6140              every match uses values from the induction variable
6141              (INDEX_BEFORE_INCR) otherwise uses values from the phi node
6142              (NEW_PHI_TREE).
6143              Finally, we update the phi (NEW_PHI_TREE) to take the value of
6144              the new cond_expr (INDEX_COND_EXPR).  */
6145
6146           /* Turn the condition from vec_stmt into an ssa name.  */
6147           gimple_stmt_iterator vec_stmt_gsi = gsi_for_stmt (*vec_stmt);
6148           tree ccompare = gimple_assign_rhs1 (*vec_stmt);
6149           tree ccompare_name = make_ssa_name (TREE_TYPE (ccompare));
6150           gimple *ccompare_stmt = gimple_build_assign (ccompare_name,
6151                                                        ccompare);
6152           gsi_insert_before (&vec_stmt_gsi, ccompare_stmt, GSI_SAME_STMT);
6153           gimple_assign_set_rhs1 (*vec_stmt, ccompare_name);
6154           update_stmt (*vec_stmt);
6155
6156           /* Create a conditional, where the condition is taken from vec_stmt
6157              (CCOMPARE_NAME), then is the induction index (INDEX_BEFORE_INCR)
6158              and else is the phi (NEW_PHI_TREE).  */
6159           tree index_cond_expr = build3 (VEC_COND_EXPR, cr_index_vector_type,
6160                                          ccompare_name, indx_before_incr,
6161                                          new_phi_tree);
6162           cond_name = make_ssa_name (cr_index_vector_type);
6163           gimple *index_condition = gimple_build_assign (cond_name,
6164                                                          index_cond_expr);
6165           gsi_insert_before (&incr_gsi, index_condition, GSI_SAME_STMT);
6166           stmt_vec_info index_vec_info = new_stmt_vec_info (index_condition,
6167                                                             loop_vinfo);
6168           STMT_VINFO_VECTYPE (index_vec_info) = cr_index_vector_type;
6169           set_vinfo_for_stmt (index_condition, index_vec_info);
6170
6171           /* Update the phi with the vec cond.  */
6172           add_phi_arg (new_phi, cond_name, loop_latch_edge (loop),
6173                        UNKNOWN_LOCATION);
6174         }
6175     }
6176
6177   vect_create_epilog_for_reduction (vect_defs, stmt, epilog_copies,
6178                                     epilog_reduc_code, phis, reduc_index,
6179                                     double_reduc, slp_node, cond_name);
6180
6181   return true;
6182 }
6183
6184 /* Function vect_min_worthwhile_factor.
6185
6186    For a loop where we could vectorize the operation indicated by CODE,
6187    return the minimum vectorization factor that makes it worthwhile
6188    to use generic vectors.  */
6189 int
6190 vect_min_worthwhile_factor (enum tree_code code)
6191 {
6192   switch (code)
6193     {
6194     case PLUS_EXPR:
6195     case MINUS_EXPR:
6196     case NEGATE_EXPR:
6197       return 4;
6198
6199     case BIT_AND_EXPR:
6200     case BIT_IOR_EXPR:
6201     case BIT_XOR_EXPR:
6202     case BIT_NOT_EXPR:
6203       return 2;
6204
6205     default:
6206       return INT_MAX;
6207     }
6208 }
6209
6210
6211 /* Function vectorizable_induction
6212
6213    Check if PHI performs an induction computation that can be vectorized.
6214    If VEC_STMT is also passed, vectorize the induction PHI: create a vectorized
6215    phi to replace it, put it in VEC_STMT, and add it to the same basic block.
6216    Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
6217
6218 bool
6219 vectorizable_induction (gimple *phi,
6220                         gimple_stmt_iterator *gsi ATTRIBUTE_UNUSED,
6221                         gimple **vec_stmt)
6222 {
6223   stmt_vec_info stmt_info = vinfo_for_stmt (phi);
6224   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
6225   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
6226   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
6227   int nunits = TYPE_VECTOR_SUBPARTS (vectype);
6228   int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
6229   tree vec_def;
6230
6231   gcc_assert (ncopies >= 1);
6232   /* FORNOW. These restrictions should be relaxed.  */
6233   if (nested_in_vect_loop_p (loop, phi))
6234     {
6235       imm_use_iterator imm_iter;
6236       use_operand_p use_p;
6237       gimple *exit_phi;
6238       edge latch_e;
6239       tree loop_arg;
6240
6241       if (ncopies > 1)
6242         {
6243           if (dump_enabled_p ())
6244             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6245                              "multiple types in nested loop.\n");
6246           return false;
6247         }
6248
6249       exit_phi = NULL;
6250       latch_e = loop_latch_edge (loop->inner);
6251       loop_arg = PHI_ARG_DEF_FROM_EDGE (phi, latch_e);
6252       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, loop_arg)
6253         {
6254           gimple *use_stmt = USE_STMT (use_p);
6255           if (is_gimple_debug (use_stmt))
6256             continue;
6257
6258           if (!flow_bb_inside_loop_p (loop->inner, gimple_bb (use_stmt)))
6259             {
6260               exit_phi = use_stmt;
6261               break;
6262             }
6263         }
6264       if (exit_phi)
6265         {
6266           stmt_vec_info exit_phi_vinfo  = vinfo_for_stmt (exit_phi);
6267           if (!(STMT_VINFO_RELEVANT_P (exit_phi_vinfo)
6268                 && !STMT_VINFO_LIVE_P (exit_phi_vinfo)))
6269             {
6270               if (dump_enabled_p ())
6271                 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6272                                  "inner-loop induction only used outside "
6273                                  "of the outer vectorized loop.\n");
6274               return false;
6275             }
6276         }
6277     }
6278
6279   if (!STMT_VINFO_RELEVANT_P (stmt_info))
6280     return false;
6281
6282   /* FORNOW: SLP not supported.  */
6283   if (STMT_SLP_TYPE (stmt_info))
6284     return false;
6285
6286   gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def);
6287
6288   if (gimple_code (phi) != GIMPLE_PHI)
6289     return false;
6290
6291   if (!vec_stmt) /* transformation not required.  */
6292     {
6293       STMT_VINFO_TYPE (stmt_info) = induc_vec_info_type;
6294       if (dump_enabled_p ())
6295         dump_printf_loc (MSG_NOTE, vect_location,
6296                          "=== vectorizable_induction ===\n");
6297       vect_model_induction_cost (stmt_info, ncopies);
6298       return true;
6299     }
6300
6301   /** Transform.  **/
6302
6303   if (dump_enabled_p ())
6304     dump_printf_loc (MSG_NOTE, vect_location, "transform induction phi.\n");
6305
6306   vec_def = get_initial_def_for_induction (phi);
6307   *vec_stmt = SSA_NAME_DEF_STMT (vec_def);
6308   return true;
6309 }
6310
6311 /* Function vectorizable_live_operation.
6312
6313    STMT computes a value that is used outside the loop.  Check if
6314    it can be supported.  */
6315
6316 bool
6317 vectorizable_live_operation (gimple *stmt,
6318                              gimple_stmt_iterator *gsi ATTRIBUTE_UNUSED,
6319                              gimple **vec_stmt)
6320 {
6321   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
6322   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
6323   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
6324   tree op;
6325   gimple *def_stmt;
6326   ssa_op_iter iter;
6327
6328   gcc_assert (STMT_VINFO_LIVE_P (stmt_info));
6329
6330   if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def)
6331     return false;
6332
6333   if (!is_gimple_assign (stmt))
6334     {
6335       if (gimple_call_internal_p (stmt)
6336           && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE
6337           && gimple_call_lhs (stmt)
6338           && loop->simduid
6339           && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
6340           && loop->simduid
6341              == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
6342         {
6343           edge e = single_exit (loop);
6344           basic_block merge_bb = e->dest;
6345           imm_use_iterator imm_iter;
6346           use_operand_p use_p;
6347           tree lhs = gimple_call_lhs (stmt);
6348
6349           FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
6350             {
6351               gimple *use_stmt = USE_STMT (use_p);
6352               if (gimple_code (use_stmt) == GIMPLE_PHI
6353                   && gimple_bb (use_stmt) == merge_bb)
6354                 {
6355                   if (vec_stmt)
6356                     {
6357                       tree vfm1
6358                         = build_int_cst (unsigned_type_node,
6359                                          loop_vinfo->vectorization_factor - 1);
6360                       SET_PHI_ARG_DEF (use_stmt, e->dest_idx, vfm1);
6361                     }
6362                   return true;
6363                 }
6364             }
6365         }
6366
6367       return false;
6368     }
6369
6370   if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
6371     return false;
6372
6373   /* FORNOW. CHECKME. */
6374   if (nested_in_vect_loop_p (loop, stmt))
6375     return false;
6376
6377   /* FORNOW: support only if all uses are invariant.  This means
6378      that the scalar operations can remain in place, unvectorized.
6379      The original last scalar value that they compute will be used.  */
6380   FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
6381     {
6382       enum vect_def_type dt = vect_uninitialized_def;
6383
6384       if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &dt))
6385         {
6386           if (dump_enabled_p ())
6387             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6388                              "use not simple.\n");
6389           return false;
6390         }
6391
6392       if (dt != vect_external_def && dt != vect_constant_def)
6393         return false;
6394     }
6395
6396   /* No transformation is required for the cases we currently support.  */
6397   return true;
6398 }
6399
6400 /* Kill any debug uses outside LOOP of SSA names defined in STMT.  */
6401
6402 static void
6403 vect_loop_kill_debug_uses (struct loop *loop, gimple *stmt)
6404 {
6405   ssa_op_iter op_iter;
6406   imm_use_iterator imm_iter;
6407   def_operand_p def_p;
6408   gimple *ustmt;
6409
6410   FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
6411     {
6412       FOR_EACH_IMM_USE_STMT (ustmt, imm_iter, DEF_FROM_PTR (def_p))
6413         {
6414           basic_block bb;
6415
6416           if (!is_gimple_debug (ustmt))
6417             continue;
6418
6419           bb = gimple_bb (ustmt);
6420
6421           if (!flow_bb_inside_loop_p (loop, bb))
6422             {
6423               if (gimple_debug_bind_p (ustmt))
6424                 {
6425                   if (dump_enabled_p ())
6426                     dump_printf_loc (MSG_NOTE, vect_location,
6427                                      "killing debug use\n");
6428
6429                   gimple_debug_bind_reset_value (ustmt);
6430                   update_stmt (ustmt);
6431                 }
6432               else
6433                 gcc_unreachable ();
6434             }
6435         }
6436     }
6437 }
6438
6439
6440 /* This function builds ni_name = number of iterations.  Statements
6441    are emitted on the loop preheader edge.  */
6442
6443 static tree
6444 vect_build_loop_niters (loop_vec_info loop_vinfo)
6445 {
6446   tree ni = unshare_expr (LOOP_VINFO_NITERS (loop_vinfo));
6447   if (TREE_CODE (ni) == INTEGER_CST)
6448     return ni;
6449   else
6450     {
6451       tree ni_name, var;
6452       gimple_seq stmts = NULL;
6453       edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo));
6454
6455       var = create_tmp_var (TREE_TYPE (ni), "niters");
6456       ni_name = force_gimple_operand (ni, &stmts, false, var);
6457       if (stmts)
6458         gsi_insert_seq_on_edge_immediate (pe, stmts);
6459
6460       return ni_name;
6461     }
6462 }
6463
6464
6465 /* This function generates the following statements:
6466
6467    ni_name = number of iterations loop executes
6468    ratio = ni_name / vf
6469    ratio_mult_vf_name = ratio * vf
6470
6471    and places them on the loop preheader edge.  */
6472
6473 static void
6474 vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo,
6475                                  tree ni_name,
6476                                  tree *ratio_mult_vf_name_ptr,
6477                                  tree *ratio_name_ptr)
6478 {
6479   tree ni_minus_gap_name;
6480   tree var;
6481   tree ratio_name;
6482   tree ratio_mult_vf_name;
6483   int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
6484   edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo));
6485   tree log_vf;
6486
6487   log_vf = build_int_cst (TREE_TYPE (ni_name), exact_log2 (vf));
6488
6489   /* If epilogue loop is required because of data accesses with gaps, we
6490      subtract one iteration from the total number of iterations here for
6491      correct calculation of RATIO.  */
6492   if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
6493     {
6494       ni_minus_gap_name = fold_build2 (MINUS_EXPR, TREE_TYPE (ni_name),
6495                                        ni_name,
6496                                        build_one_cst (TREE_TYPE (ni_name)));
6497       if (!is_gimple_val (ni_minus_gap_name))
6498         {
6499           var = create_tmp_var (TREE_TYPE (ni_name), "ni_gap");
6500           gimple *stmts = NULL;
6501           ni_minus_gap_name = force_gimple_operand (ni_minus_gap_name, &stmts,
6502                                                     true, var);
6503           gsi_insert_seq_on_edge_immediate (pe, stmts);
6504         }
6505     }
6506   else
6507     ni_minus_gap_name = ni_name;
6508
6509   /* Create: ratio = ni >> log2(vf) */
6510   /* ???  As we have ni == number of latch executions + 1, ni could
6511      have overflown to zero.  So avoid computing ratio based on ni
6512      but compute it using the fact that we know ratio will be at least
6513      one, thus via (ni - vf) >> log2(vf) + 1.  */
6514   ratio_name
6515     = fold_build2 (PLUS_EXPR, TREE_TYPE (ni_name),
6516                    fold_build2 (RSHIFT_EXPR, TREE_TYPE (ni_name),
6517                                 fold_build2 (MINUS_EXPR, TREE_TYPE (ni_name),
6518                                              ni_minus_gap_name,
6519                                              build_int_cst
6520                                                (TREE_TYPE (ni_name), vf)),
6521                                 log_vf),
6522                    build_int_cst (TREE_TYPE (ni_name), 1));
6523   if (!is_gimple_val (ratio_name))
6524     {
6525       var = create_tmp_var (TREE_TYPE (ni_name), "bnd");
6526       gimple *stmts = NULL;
6527       ratio_name = force_gimple_operand (ratio_name, &stmts, true, var);
6528       gsi_insert_seq_on_edge_immediate (pe, stmts);
6529     }
6530   *ratio_name_ptr = ratio_name;
6531
6532   /* Create: ratio_mult_vf = ratio << log2 (vf).  */
6533
6534   if (ratio_mult_vf_name_ptr)
6535     {
6536       ratio_mult_vf_name = fold_build2 (LSHIFT_EXPR, TREE_TYPE (ratio_name),
6537                                         ratio_name, log_vf);
6538       if (!is_gimple_val (ratio_mult_vf_name))
6539         {
6540           var = create_tmp_var (TREE_TYPE (ni_name), "ratio_mult_vf");
6541           gimple *stmts = NULL;
6542           ratio_mult_vf_name = force_gimple_operand (ratio_mult_vf_name, &stmts,
6543                                                      true, var);
6544           gsi_insert_seq_on_edge_immediate (pe, stmts);
6545         }
6546       *ratio_mult_vf_name_ptr = ratio_mult_vf_name;
6547     }
6548
6549   return;
6550 }
6551
6552
6553 /* Function vect_transform_loop.
6554
6555    The analysis phase has determined that the loop is vectorizable.
6556    Vectorize the loop - created vectorized stmts to replace the scalar
6557    stmts in the loop, and update the loop exit condition.  */
6558
6559 void
6560 vect_transform_loop (loop_vec_info loop_vinfo)
6561 {
6562   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
6563   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
6564   int nbbs = loop->num_nodes;
6565   int i;
6566   tree ratio = NULL;
6567   int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
6568   bool grouped_store;
6569   bool slp_scheduled = false;
6570   gimple *stmt, *pattern_stmt;
6571   gimple_seq pattern_def_seq = NULL;
6572   gimple_stmt_iterator pattern_def_si = gsi_none ();
6573   bool transform_pattern_stmt = false;
6574   bool check_profitability = false;
6575   int th;
6576   /* Record number of iterations before we started tampering with the profile. */
6577   gcov_type expected_iterations = expected_loop_iterations_unbounded (loop);
6578
6579   if (dump_enabled_p ())
6580     dump_printf_loc (MSG_NOTE, vect_location, "=== vec_transform_loop ===\n");
6581
6582   /* If profile is inprecise, we have chance to fix it up.  */
6583   if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
6584     expected_iterations = LOOP_VINFO_INT_NITERS (loop_vinfo);
6585
6586   /* Use the more conservative vectorization threshold.  If the number
6587      of iterations is constant assume the cost check has been performed
6588      by our caller.  If the threshold makes all loops profitable that
6589      run at least the vectorization factor number of times checking
6590      is pointless, too.  */
6591   th = LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo);
6592   if (th >= LOOP_VINFO_VECT_FACTOR (loop_vinfo) - 1
6593       && !LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
6594     {
6595       if (dump_enabled_p ())
6596         dump_printf_loc (MSG_NOTE, vect_location,
6597                          "Profitability threshold is %d loop iterations.\n",
6598                          th);
6599       check_profitability = true;
6600     }
6601
6602   /* Version the loop first, if required, so the profitability check
6603      comes first.  */
6604
6605   if (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo)
6606       || LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo))
6607     {
6608       vect_loop_versioning (loop_vinfo, th, check_profitability);
6609       check_profitability = false;
6610     }
6611
6612   tree ni_name = vect_build_loop_niters (loop_vinfo);
6613   LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo) = ni_name;
6614
6615   /* Peel the loop if there are data refs with unknown alignment.
6616      Only one data ref with unknown store is allowed.  */
6617
6618   if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo))
6619     {
6620       vect_do_peeling_for_alignment (loop_vinfo, ni_name,
6621                                      th, check_profitability);
6622       check_profitability = false;
6623       /* The above adjusts LOOP_VINFO_NITERS, so cause ni_name to
6624          be re-computed.  */
6625       ni_name = NULL_TREE;
6626     }
6627
6628   /* If the loop has a symbolic number of iterations 'n' (i.e. it's not a
6629      compile time constant), or it is a constant that doesn't divide by the
6630      vectorization factor, then an epilog loop needs to be created.
6631      We therefore duplicate the loop: the original loop will be vectorized,
6632      and will compute the first (n/VF) iterations.  The second copy of the loop
6633      will remain scalar and will compute the remaining (n%VF) iterations.
6634      (VF is the vectorization factor).  */
6635
6636   if (LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo)
6637       || LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
6638     {
6639       tree ratio_mult_vf;
6640       if (!ni_name)
6641         ni_name = vect_build_loop_niters (loop_vinfo);
6642       vect_generate_tmps_on_preheader (loop_vinfo, ni_name, &ratio_mult_vf,
6643                                        &ratio);
6644       vect_do_peeling_for_loop_bound (loop_vinfo, ni_name, ratio_mult_vf,
6645                                       th, check_profitability);
6646     }
6647   else if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
6648     ratio = build_int_cst (TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo)),
6649                 LOOP_VINFO_INT_NITERS (loop_vinfo) / vectorization_factor);
6650   else
6651     {
6652       if (!ni_name)
6653         ni_name = vect_build_loop_niters (loop_vinfo);
6654       vect_generate_tmps_on_preheader (loop_vinfo, ni_name, NULL, &ratio);
6655     }
6656
6657   /* 1) Make sure the loop header has exactly two entries
6658      2) Make sure we have a preheader basic block.  */
6659
6660   gcc_assert (EDGE_COUNT (loop->header->preds) == 2);
6661
6662   split_edge (loop_preheader_edge (loop));
6663
6664   /* FORNOW: the vectorizer supports only loops which body consist
6665      of one basic block (header + empty latch). When the vectorizer will
6666      support more involved loop forms, the order by which the BBs are
6667      traversed need to be reconsidered.  */
6668
6669   for (i = 0; i < nbbs; i++)
6670     {
6671       basic_block bb = bbs[i];
6672       stmt_vec_info stmt_info;
6673
6674       for (gphi_iterator si = gsi_start_phis (bb); !gsi_end_p (si);
6675            gsi_next (&si))
6676         {
6677           gphi *phi = si.phi ();
6678           if (dump_enabled_p ())
6679             {
6680               dump_printf_loc (MSG_NOTE, vect_location,
6681                                "------>vectorizing phi: ");
6682               dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
6683               dump_printf (MSG_NOTE, "\n");
6684             }
6685           stmt_info = vinfo_for_stmt (phi);
6686           if (!stmt_info)
6687             continue;
6688
6689           if (MAY_HAVE_DEBUG_STMTS && !STMT_VINFO_LIVE_P (stmt_info))
6690             vect_loop_kill_debug_uses (loop, phi);
6691
6692           if (!STMT_VINFO_RELEVANT_P (stmt_info)
6693               && !STMT_VINFO_LIVE_P (stmt_info))
6694             continue;
6695
6696           if (STMT_VINFO_VECTYPE (stmt_info)
6697               && (TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info))
6698                   != (unsigned HOST_WIDE_INT) vectorization_factor)
6699               && dump_enabled_p ())
6700             dump_printf_loc (MSG_NOTE, vect_location, "multiple-types.\n");
6701
6702           if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def)
6703             {
6704               if (dump_enabled_p ())
6705                 dump_printf_loc (MSG_NOTE, vect_location, "transform phi.\n");
6706               vect_transform_stmt (phi, NULL, NULL, NULL, NULL);
6707             }
6708         }
6709
6710       pattern_stmt = NULL;
6711       for (gimple_stmt_iterator si = gsi_start_bb (bb);
6712            !gsi_end_p (si) || transform_pattern_stmt;)
6713         {
6714           bool is_store;
6715
6716           if (transform_pattern_stmt)
6717             stmt = pattern_stmt;
6718           else
6719             {
6720               stmt = gsi_stmt (si);
6721               /* During vectorization remove existing clobber stmts.  */
6722               if (gimple_clobber_p (stmt))
6723                 {
6724                   unlink_stmt_vdef (stmt);
6725                   gsi_remove (&si, true);
6726                   release_defs (stmt);
6727                   continue;
6728                 }
6729             }
6730
6731           if (dump_enabled_p ())
6732             {
6733               dump_printf_loc (MSG_NOTE, vect_location,
6734                                "------>vectorizing statement: ");
6735               dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
6736               dump_printf (MSG_NOTE, "\n");
6737             }
6738
6739           stmt_info = vinfo_for_stmt (stmt);
6740
6741           /* vector stmts created in the outer-loop during vectorization of
6742              stmts in an inner-loop may not have a stmt_info, and do not
6743              need to be vectorized.  */
6744           if (!stmt_info)
6745             {
6746               gsi_next (&si);
6747               continue;
6748             }
6749
6750           if (MAY_HAVE_DEBUG_STMTS && !STMT_VINFO_LIVE_P (stmt_info))
6751             vect_loop_kill_debug_uses (loop, stmt);
6752
6753           if (!STMT_VINFO_RELEVANT_P (stmt_info)
6754               && !STMT_VINFO_LIVE_P (stmt_info))
6755             {
6756               if (STMT_VINFO_IN_PATTERN_P (stmt_info)
6757                   && (pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info))
6758                   && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
6759                       || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
6760                 {
6761                   stmt = pattern_stmt;
6762                   stmt_info = vinfo_for_stmt (stmt);
6763                 }
6764               else
6765                 {
6766                   gsi_next (&si);
6767                   continue;
6768                 }
6769             }
6770           else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
6771                    && (pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info))
6772                    && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
6773                        || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
6774             transform_pattern_stmt = true;
6775
6776           /* If pattern statement has def stmts, vectorize them too.  */
6777           if (is_pattern_stmt_p (stmt_info))
6778             {
6779               if (pattern_def_seq == NULL)
6780                 {
6781                   pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info);
6782                   pattern_def_si = gsi_start (pattern_def_seq);
6783                 }
6784               else if (!gsi_end_p (pattern_def_si))
6785                 gsi_next (&pattern_def_si);
6786               if (pattern_def_seq != NULL)
6787                 {
6788                   gimple *pattern_def_stmt = NULL;
6789                   stmt_vec_info pattern_def_stmt_info = NULL;
6790
6791                   while (!gsi_end_p (pattern_def_si))
6792                     {
6793                       pattern_def_stmt = gsi_stmt (pattern_def_si);
6794                       pattern_def_stmt_info
6795                         = vinfo_for_stmt (pattern_def_stmt);
6796                       if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info)
6797                           || STMT_VINFO_LIVE_P (pattern_def_stmt_info))
6798                         break;
6799                       gsi_next (&pattern_def_si);
6800                     }
6801
6802                   if (!gsi_end_p (pattern_def_si))
6803                     {
6804                       if (dump_enabled_p ())
6805                         {
6806                           dump_printf_loc (MSG_NOTE, vect_location,
6807                                            "==> vectorizing pattern def "
6808                                            "stmt: ");
6809                           dump_gimple_stmt (MSG_NOTE, TDF_SLIM,
6810                                             pattern_def_stmt, 0);
6811                           dump_printf (MSG_NOTE, "\n");
6812                         }
6813
6814                       stmt = pattern_def_stmt;
6815                       stmt_info = pattern_def_stmt_info;
6816                     }
6817                   else
6818                     {
6819                       pattern_def_si = gsi_none ();
6820                       transform_pattern_stmt = false;
6821                     }
6822                 }
6823               else
6824                 transform_pattern_stmt = false;
6825             }
6826
6827           if (STMT_VINFO_VECTYPE (stmt_info))
6828             {
6829               unsigned int nunits
6830                 = (unsigned int)
6831                   TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
6832               if (!STMT_SLP_TYPE (stmt_info)
6833                   && nunits != (unsigned int) vectorization_factor
6834                   && dump_enabled_p ())
6835                   /* For SLP VF is set according to unrolling factor, and not
6836                      to vector size, hence for SLP this print is not valid.  */
6837                 dump_printf_loc (MSG_NOTE, vect_location, "multiple-types.\n");
6838             }
6839
6840           /* SLP. Schedule all the SLP instances when the first SLP stmt is
6841              reached.  */
6842           if (STMT_SLP_TYPE (stmt_info))
6843             {
6844               if (!slp_scheduled)
6845                 {
6846                   slp_scheduled = true;
6847
6848                   if (dump_enabled_p ())
6849                     dump_printf_loc (MSG_NOTE, vect_location,
6850                                      "=== scheduling SLP instances ===\n");
6851
6852                   vect_schedule_slp (loop_vinfo);
6853                 }
6854
6855               /* Hybrid SLP stmts must be vectorized in addition to SLP.  */
6856               if (!vinfo_for_stmt (stmt) || PURE_SLP_STMT (stmt_info))
6857                 {
6858                   if (!transform_pattern_stmt && gsi_end_p (pattern_def_si))
6859                     {
6860                       pattern_def_seq = NULL;
6861                       gsi_next (&si);
6862                     }
6863                   continue;
6864                 }
6865             }
6866
6867           /* -------- vectorize statement ------------ */
6868           if (dump_enabled_p ())
6869             dump_printf_loc (MSG_NOTE, vect_location, "transform statement.\n");
6870
6871           grouped_store = false;
6872           is_store = vect_transform_stmt (stmt, &si, &grouped_store, NULL, NULL);
6873           if (is_store)
6874             {
6875               if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
6876                 {
6877                   /* Interleaving. If IS_STORE is TRUE, the vectorization of the
6878                      interleaving chain was completed - free all the stores in
6879                      the chain.  */
6880                   gsi_next (&si);
6881                   vect_remove_stores (GROUP_FIRST_ELEMENT (stmt_info));
6882                 }
6883               else
6884                 {
6885                   /* Free the attached stmt_vec_info and remove the stmt.  */
6886                   gimple *store = gsi_stmt (si);
6887                   free_stmt_vec_info (store);
6888                   unlink_stmt_vdef (store);
6889                   gsi_remove (&si, true);
6890                   release_defs (store);
6891                 }
6892
6893               /* Stores can only appear at the end of pattern statements.  */
6894               gcc_assert (!transform_pattern_stmt);
6895               pattern_def_seq = NULL;
6896             }
6897           else if (!transform_pattern_stmt && gsi_end_p (pattern_def_si))
6898             {
6899               pattern_def_seq = NULL;
6900               gsi_next (&si);
6901             }
6902         }                       /* stmts in BB */
6903     }                           /* BBs in loop */
6904
6905   slpeel_make_loop_iterate_ntimes (loop, ratio);
6906
6907   /* Reduce loop iterations by the vectorization factor.  */
6908   scale_loop_profile (loop, GCOV_COMPUTE_SCALE (1, vectorization_factor),
6909                       expected_iterations / vectorization_factor);
6910   loop->nb_iterations_upper_bound
6911     = wi::udiv_floor (loop->nb_iterations_upper_bound, vectorization_factor);
6912   if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
6913       && loop->nb_iterations_upper_bound != 0)
6914     loop->nb_iterations_upper_bound = loop->nb_iterations_upper_bound - 1;
6915   if (loop->any_estimate)
6916     {
6917       loop->nb_iterations_estimate
6918         = wi::udiv_floor (loop->nb_iterations_estimate, vectorization_factor);
6919        if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
6920            && loop->nb_iterations_estimate != 0)
6921          loop->nb_iterations_estimate = loop->nb_iterations_estimate - 1;
6922     }
6923
6924   if (dump_enabled_p ())
6925     {
6926       dump_printf_loc (MSG_NOTE, vect_location,
6927                        "LOOP VECTORIZED\n");
6928       if (loop->inner)
6929         dump_printf_loc (MSG_NOTE, vect_location,
6930                          "OUTER LOOP VECTORIZED\n");
6931       dump_printf (MSG_NOTE, "\n");
6932     }
6933
6934   /* Free SLP instances here because otherwise stmt reference counting
6935      won't work.  */
6936   slp_instance instance;
6937   FOR_EACH_VEC_ELT (LOOP_VINFO_SLP_INSTANCES (loop_vinfo), i, instance)
6938     vect_free_slp_instance (instance);
6939   LOOP_VINFO_SLP_INSTANCES (loop_vinfo).release ();
6940 }