gcc/tree-vect-loop.c

   1 /* Loop Vectorization
   2    Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
   3    Free Software Foundation, Inc.
   4    Contributed by Dorit Naishlos <dorit@il.ibm.com> and
   5    Ira Rosen <irar@il.ibm.com>
   6
   7 This file is part of GCC.
   8
   9 GCC is free software; you can redistribute it and/or modify it under
  10 the terms of the GNU General Public License as published by the Free
  11 Software Foundation; either version 3, or (at your option) any later
  12 version.
  13
  14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  16 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  17 for more details.
  18
  19 You should have received a copy of the GNU General Public License
  20 along with GCC; see the file COPYING3.  If not see
  21 <http://www.gnu.org/licenses/>.  */
  22
  23 #include "config.h"
  24 #include "system.h"
  25 #include "coretypes.h"
  26 #include "tm.h"
  27 #include "ggc.h"
  28 #include "tree.h"
  29 #include "basic-block.h"
  30 #include "tree-pretty-print.h"
  31 #include "gimple-pretty-print.h"
  32 #include "tree-flow.h"
  33 #include "tree-dump.h"
  34 #include "cfgloop.h"
  35 #include "cfglayout.h"
  36 #include "expr.h"
  37 #include "recog.h"
  38 #include "optabs.h"
  39 #include "params.h"
  40 #include "diagnostic-core.h"
  41 #include "tree-chrec.h"
  42 #include "tree-scalar-evolution.h"
  43 #include "tree-vectorizer.h"
  44 #include "target.h"
  45
  46 /* Loop Vectorization Pass.
  47
  48    This pass tries to vectorize loops.
  49
  50    For example, the vectorizer transforms the following simple loop:
  51
  52         short a[N]; short b[N]; short c[N]; int i;
  53
  54         for (i=0; i<N; i++){
  55           a[i] = b[i] + c[i];
  56         }
  57
  58    as if it was manually vectorized by rewriting the source code into:
  59
  60         typedef int __attribute__((mode(V8HI))) v8hi;
  61         short a[N];  short b[N]; short c[N];   int i;
  62         v8hi *pa = (v8hi*)a, *pb = (v8hi*)b, *pc = (v8hi*)c;
  63         v8hi va, vb, vc;
  64
  65         for (i=0; i<N/8; i++){
  66           vb = pb[i];
  67           vc = pc[i];
  68           va = vb + vc;
  69           pa[i] = va;
  70         }
  71
  72         The main entry to this pass is vectorize_loops(), in which
  73    the vectorizer applies a set of analyses on a given set of loops,
  74    followed by the actual vectorization transformation for the loops that
  75    had successfully passed the analysis phase.
  76         Throughout this pass we make a distinction between two types of
  77    data: scalars (which are represented by SSA_NAMES), and memory references
  78    ("data-refs").  These two types of data require different handling both
  79    during analysis and transformation. The types of data-refs that the
  80    vectorizer currently supports are ARRAY_REFS which base is an array DECL
  81    (not a pointer), and INDIRECT_REFS through pointers; both array and pointer
  82    accesses are required to have a simple (consecutive) access pattern.
  83
  84    Analysis phase:
  85    ===============
  86         The driver for the analysis phase is vect_analyze_loop().
  87    It applies a set of analyses, some of which rely on the scalar evolution
  88    analyzer (scev) developed by Sebastian Pop.
  89
  90         During the analysis phase the vectorizer records some information
  91    per stmt in a "stmt_vec_info" struct which is attached to each stmt in the
  92    loop, as well as general information about the loop as a whole, which is
  93    recorded in a "loop_vec_info" struct attached to each loop.
  94
  95    Transformation phase:
  96    =====================
  97         The loop transformation phase scans all the stmts in the loop, and
  98    creates a vector stmt (or a sequence of stmts) for each scalar stmt S in
  99    the loop that needs to be vectorized.  It inserts the vector code sequence
 100    just before the scalar stmt S, and records a pointer to the vector code
 101    in STMT_VINFO_VEC_STMT (stmt_info) (stmt_info is the stmt_vec_info struct
 102    attached to S).  This pointer will be used for the vectorization of following
 103    stmts which use the def of stmt S. Stmt S is removed if it writes to memory;
 104    otherwise, we rely on dead code elimination for removing it.
 105
 106         For example, say stmt S1 was vectorized into stmt VS1:
 107
 108    VS1: vb = px[i];
 109    S1:  b = x[i];    STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
 110    S2:  a = b;
 111
 112    To vectorize stmt S2, the vectorizer first finds the stmt that defines
 113    the operand 'b' (S1), and gets the relevant vector def 'vb' from the
 114    vector stmt VS1 pointed to by STMT_VINFO_VEC_STMT (stmt_info (S1)).  The
 115    resulting sequence would be:
 116
 117    VS1: vb = px[i];
 118    S1:  b = x[i];       STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
 119    VS2: va = vb;
 120    S2:  a = b;          STMT_VINFO_VEC_STMT (stmt_info (S2)) = VS2
 121
 122         Operands that are not SSA_NAMEs, are data-refs that appear in
 123    load/store operations (like 'x[i]' in S1), and are handled differently.
 124
 125    Target modeling:
 126    =================
 127         Currently the only target specific information that is used is the
 128    size of the vector (in bytes) - "TARGET_VECTORIZE_UNITS_PER_SIMD_WORD".
 129    Targets that can support different sizes of vectors, for now will need
 130    to specify one value for "TARGET_VECTORIZE_UNITS_PER_SIMD_WORD".  More
 131    flexibility will be added in the future.
 132
 133         Since we only vectorize operations which vector form can be
 134    expressed using existing tree codes, to verify that an operation is
 135    supported, the vectorizer checks the relevant optab at the relevant
 136    machine_mode (e.g, optab_handler (add_optab, V8HImode)).  If
 137    the value found is CODE_FOR_nothing, then there's no target support, and
 138    we can't vectorize the stmt.
 139
 140    For additional information on this project see:
 141    http://gcc.gnu.org/projects/tree-ssa/vectorization.html
 142 */
 143
 144 /* Function vect_determine_vectorization_factor
 145
 146    Determine the vectorization factor (VF).  VF is the number of data elements
 147    that are operated upon in parallel in a single iteration of the vectorized
 148    loop.  For example, when vectorizing a loop that operates on 4byte elements,
 149    on a target with vector size (VS) 16byte, the VF is set to 4, since 4
 150    elements can fit in a single vector register.
 151
 152    We currently support vectorization of loops in which all types operated upon
 153    are of the same size.  Therefore this function currently sets VF according to
 154    the size of the types operated upon, and fails if there are multiple sizes
 155    in the loop.
 156
 157    VF is also the factor by which the loop iterations are strip-mined, e.g.:
 158    original loop:
 159         for (i=0; i<N; i++){
 160           a[i] = b[i] + c[i];
 161         }
 162
 163    vectorized loop:
 164         for (i=0; i<N; i+=VF){
 165           a[i:VF] = b[i:VF] + c[i:VF];
 166         }
 167 */
 168
 169 static bool
 170 vect_determine_vectorization_factor (loop_vec_info loop_vinfo)
 171 {
 172   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
 173   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
 174   int nbbs = loop->num_nodes;
 175   gimple_stmt_iterator si;
 176   unsigned int vectorization_factor = 0;
 177   tree scalar_type;
 178   gimple phi;
 179   tree vectype;
 180   unsigned int nunits;
 181   stmt_vec_info stmt_info;
 182   int i;
 183   HOST_WIDE_INT dummy;
 184   gimple stmt, pattern_stmt = NULL;
 185   gimple_seq pattern_def_seq = NULL;
 186   gimple_stmt_iterator pattern_def_si = gsi_none ();
 187   bool analyze_pattern_stmt = false;
 188
 189   if (vect_print_dump_info (REPORT_DETAILS))
 190     fprintf (vect_dump, "=== vect_determine_vectorization_factor ===");
 191
 192   for (i = 0; i < nbbs; i++)
 193     {
 194       basic_block bb = bbs[i];
 195
 196       for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
 197         {
 198           phi = gsi_stmt (si);
 199           stmt_info = vinfo_for_stmt (phi);
 200           if (vect_print_dump_info (REPORT_DETAILS))
 201             {
 202               fprintf (vect_dump, "==> examining phi: ");
 203               print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM);
 204             }
 205
 206           gcc_assert (stmt_info);
 207
 208           if (STMT_VINFO_RELEVANT_P (stmt_info))
 209             {
 210               gcc_assert (!STMT_VINFO_VECTYPE (stmt_info));
 211               scalar_type = TREE_TYPE (PHI_RESULT (phi));
 212
 213               if (vect_print_dump_info (REPORT_DETAILS))
 214                 {
 215                   fprintf (vect_dump, "get vectype for scalar type:  ");
 216                   print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
 217                 }
 218
 219               vectype = get_vectype_for_scalar_type (scalar_type);
 220               if (!vectype)
 221                 {
 222                   if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
 223                     {
 224                       fprintf (vect_dump,
 225                                "not vectorized: unsupported data-type ");
 226                       print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
 227                     }
 228                   return false;
 229                 }
 230               STMT_VINFO_VECTYPE (stmt_info) = vectype;
 231
 232               if (vect_print_dump_info (REPORT_DETAILS))
 233                 {
 234                   fprintf (vect_dump, "vectype: ");
 235                   print_generic_expr (vect_dump, vectype, TDF_SLIM);
 236                 }
 237
 238               nunits = TYPE_VECTOR_SUBPARTS (vectype);
 239               if (vect_print_dump_info (REPORT_DETAILS))
 240                 fprintf (vect_dump, "nunits = %d", nunits);
 241
 242               if (!vectorization_factor
 243                   || (nunits > vectorization_factor))
 244                 vectorization_factor = nunits;
 245             }
 246         }
 247
 248       for (si = gsi_start_bb (bb); !gsi_end_p (si) || analyze_pattern_stmt;)
 249         {
 250           tree vf_vectype;
 251
 252           if (analyze_pattern_stmt)
 253             stmt = pattern_stmt;
 254           else
 255             stmt = gsi_stmt (si);
 256
 257           stmt_info = vinfo_for_stmt (stmt);
 258
 259           if (vect_print_dump_info (REPORT_DETAILS))
 260             {
 261               fprintf (vect_dump, "==> examining statement: ");
 262               print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
 263             }
 264
 265           gcc_assert (stmt_info);
 266
 267           /* Skip stmts which do not need to be vectorized.  */
 268           if (!STMT_VINFO_RELEVANT_P (stmt_info)
 269               && !STMT_VINFO_LIVE_P (stmt_info))
 270             {
 271               if (STMT_VINFO_IN_PATTERN_P (stmt_info)
 272                   && (pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info))
 273                   && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
 274                       || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
 275                 {
 276                   stmt = pattern_stmt;
 277                   stmt_info = vinfo_for_stmt (pattern_stmt);
 278                   if (vect_print_dump_info (REPORT_DETAILS))
 279                     {
 280                       fprintf (vect_dump, "==> examining pattern statement: ");
 281                       print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
 282                     }
 283                 }
 284               else
 285                 {
 286                   if (vect_print_dump_info (REPORT_DETAILS))
 287                     fprintf (vect_dump, "skip.");
 288                   gsi_next (&si);
 289                   continue;
 290                 }
 291             }
 292           else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
 293                    && (pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info))
 294                    && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
 295                        || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
 296             analyze_pattern_stmt = true;
 297
 298           /* If a pattern statement has def stmts, analyze them too.  */
 299           if (is_pattern_stmt_p (stmt_info))
 300             {
 301               if (pattern_def_seq == NULL)
 302                 {
 303                   pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info);
 304                   pattern_def_si = gsi_start (pattern_def_seq);
 305                 }
 306               else if (!gsi_end_p (pattern_def_si))
 307                 gsi_next (&pattern_def_si);
 308               if (pattern_def_seq != NULL)
 309                 {
 310                   gimple pattern_def_stmt = NULL;
 311                   stmt_vec_info pattern_def_stmt_info = NULL;
 312
 313                   while (!gsi_end_p (pattern_def_si))
 314                     {
 315                       pattern_def_stmt = gsi_stmt (pattern_def_si);
 316                       pattern_def_stmt_info
 317                         = vinfo_for_stmt (pattern_def_stmt);
 318                       if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info)
 319                           || STMT_VINFO_LIVE_P (pattern_def_stmt_info))
 320                         break;
 321                       gsi_next (&pattern_def_si);
 322                     }
 323
 324                   if (!gsi_end_p (pattern_def_si))
 325                     {
 326                       if (vect_print_dump_info (REPORT_DETAILS))
 327                         {
 328                           fprintf (vect_dump,
 329                                    "==> examining pattern def stmt: ");
 330                           print_gimple_stmt (vect_dump, pattern_def_stmt, 0,
 331                                              TDF_SLIM);
 332                         }
 333
 334                       stmt = pattern_def_stmt;
 335                       stmt_info = pattern_def_stmt_info;
 336                     }
 337                   else
 338                     {
 339                       pattern_def_si = gsi_none ();
 340                       analyze_pattern_stmt = false;
 341                     }
 342                 }
 343               else
 344                 analyze_pattern_stmt = false;
 345             }
 346
 347           if (gimple_get_lhs (stmt) == NULL_TREE)
 348             {
 349               if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
 350                 {
 351                   fprintf (vect_dump, "not vectorized: irregular stmt.");
 352                   print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
 353                 }
 354               return false;
 355             }
 356
 357           if (VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))))
 358             {
 359               if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
 360                 {
 361                   fprintf (vect_dump, "not vectorized: vector stmt in loop:");
 362                   print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
 363                 }
 364               return false;
 365             }
 366
 367           if (STMT_VINFO_VECTYPE (stmt_info))
 368             {
 369               /* The only case when a vectype had been already set is for stmts
 370                  that contain a dataref, or for "pattern-stmts" (stmts
 371                  generated by the vectorizer to represent/replace a certain
 372                  idiom).  */
 373               gcc_assert (STMT_VINFO_DATA_REF (stmt_info)
 374                           || is_pattern_stmt_p (stmt_info)
 375                           || !gsi_end_p (pattern_def_si));
 376               vectype = STMT_VINFO_VECTYPE (stmt_info);
 377             }
 378           else
 379             {
 380               gcc_assert (!STMT_VINFO_DATA_REF (stmt_info));
 381               scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
 382               if (vect_print_dump_info (REPORT_DETAILS))
 383                 {
 384                   fprintf (vect_dump, "get vectype for scalar type:  ");
 385                   print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
 386                 }
 387               vectype = get_vectype_for_scalar_type (scalar_type);
 388               if (!vectype)
 389                 {
 390                   if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
 391                     {
 392                       fprintf (vect_dump,
 393                                "not vectorized: unsupported data-type ");
 394                       print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
 395                     }
 396                   return false;
 397                 }
 398
 399               STMT_VINFO_VECTYPE (stmt_info) = vectype;
 400             }
 401
 402           /* The vectorization factor is according to the smallest
 403              scalar type (or the largest vector size, but we only
 404              support one vector size per loop).  */
 405           scalar_type = vect_get_smallest_scalar_type (stmt, &dummy,
 406                                                        &dummy);
 407           if (vect_print_dump_info (REPORT_DETAILS))
 408             {
 409               fprintf (vect_dump, "get vectype for scalar type:  ");
 410               print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
 411             }
 412           vf_vectype = get_vectype_for_scalar_type (scalar_type);
 413           if (!vf_vectype)
 414             {
 415               if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
 416                 {
 417                   fprintf (vect_dump,
 418                            "not vectorized: unsupported data-type ");
 419                   print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
 420                 }
 421               return false;
 422             }
 423
 424           if ((GET_MODE_SIZE (TYPE_MODE (vectype))
 425                != GET_MODE_SIZE (TYPE_MODE (vf_vectype))))
 426             {
 427               if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
 428                 {
 429                   fprintf (vect_dump,
 430                            "not vectorized: different sized vector "
 431                            "types in statement, ");
 432                   print_generic_expr (vect_dump, vectype, TDF_SLIM);
 433                   fprintf (vect_dump, " and ");
 434                   print_generic_expr (vect_dump, vf_vectype, TDF_SLIM);
 435                 }
 436               return false;
 437             }
 438
 439           if (vect_print_dump_info (REPORT_DETAILS))
 440             {
 441               fprintf (vect_dump, "vectype: ");
 442               print_generic_expr (vect_dump, vf_vectype, TDF_SLIM);
 443             }
 444
 445           nunits = TYPE_VECTOR_SUBPARTS (vf_vectype);
 446           if (vect_print_dump_info (REPORT_DETAILS))
 447             fprintf (vect_dump, "nunits = %d", nunits);
 448
 449           if (!vectorization_factor
 450               || (nunits > vectorization_factor))
 451             vectorization_factor = nunits;
 452
 453           if (!analyze_pattern_stmt && gsi_end_p (pattern_def_si))
 454             {
 455               pattern_def_seq = NULL;
 456               gsi_next (&si);
 457             }
 458         }
 459     }
 460
 461   /* TODO: Analyze cost. Decide if worth while to vectorize.  */
 462   if (vect_print_dump_info (REPORT_DETAILS))
 463     fprintf (vect_dump, "vectorization factor = %d", vectorization_factor);
 464   if (vectorization_factor <= 1)
 465     {
 466       if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
 467         fprintf (vect_dump, "not vectorized: unsupported data-type");
 468       return false;
 469     }
 470   LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
 471
 472   return true;
 473 }
 474
 475
 476 /* Function vect_is_simple_iv_evolution.
 477
 478    FORNOW: A simple evolution of an induction variables in the loop is
 479    considered a polynomial evolution with constant step.  */
 480
 481 static bool
 482 vect_is_simple_iv_evolution (unsigned loop_nb, tree access_fn, tree * init,
 483                              tree * step)
 484 {
 485   tree init_expr;
 486   tree step_expr;
 487   tree evolution_part = evolution_part_in_loop_num (access_fn, loop_nb);
 488
 489   /* When there is no evolution in this loop, the evolution function
 490      is not "simple".  */
 491   if (evolution_part == NULL_TREE)
 492     return false;
 493
 494   /* When the evolution is a polynomial of degree >= 2
 495      the evolution function is not "simple".  */
 496   if (tree_is_chrec (evolution_part))
 497     return false;
 498
 499   step_expr = evolution_part;
 500   init_expr = unshare_expr (initial_condition_in_loop_num (access_fn, loop_nb));
 501
 502   if (vect_print_dump_info (REPORT_DETAILS))
 503     {
 504       fprintf (vect_dump, "step: ");
 505       print_generic_expr (vect_dump, step_expr, TDF_SLIM);
 506       fprintf (vect_dump, ",  init: ");
 507       print_generic_expr (vect_dump, init_expr, TDF_SLIM);
 508     }
 509
 510   *init = init_expr;
 511   *step = step_expr;
 512
 513   if (TREE_CODE (step_expr) != INTEGER_CST)
 514     {
 515       if (vect_print_dump_info (REPORT_DETAILS))
 516         fprintf (vect_dump, "step unknown.");
 517       return false;
 518     }
 519
 520   return true;
 521 }
 522
 523 /* Function vect_analyze_scalar_cycles_1.
 524
 525    Examine the cross iteration def-use cycles of scalar variables
 526    in LOOP.  LOOP_VINFO represents the loop that is now being
 527    considered for vectorization (can be LOOP, or an outer-loop
 528    enclosing LOOP).  */
 529
 530 static void
 531 vect_analyze_scalar_cycles_1 (loop_vec_info loop_vinfo, struct loop *loop)
 532 {
 533   basic_block bb = loop->header;
 534   tree dumy;
 535   VEC(gimple,heap) *worklist = VEC_alloc (gimple, heap, 64);
 536   gimple_stmt_iterator gsi;
 537   bool double_reduc;
 538
 539   if (vect_print_dump_info (REPORT_DETAILS))
 540     fprintf (vect_dump, "=== vect_analyze_scalar_cycles ===");
 541
 542   /* First - identify all inductions.  Reduction detection assumes that all the
 543      inductions have been identified, therefore, this order must not be
 544      changed.  */
 545   for (gsi = gsi_start_phis  (bb); !gsi_end_p (gsi); gsi_next (&gsi))
 546     {
 547       gimple phi = gsi_stmt (gsi);
 548       tree access_fn = NULL;
 549       tree def = PHI_RESULT (phi);
 550       stmt_vec_info stmt_vinfo = vinfo_for_stmt (phi);
 551
 552       if (vect_print_dump_info (REPORT_DETAILS))
 553         {
 554           fprintf (vect_dump, "Analyze phi: ");
 555           print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM);
 556         }
 557
 558       /* Skip virtual phi's.  The data dependences that are associated with
 559          virtual defs/uses (i.e., memory accesses) are analyzed elsewhere.  */
 560       if (!is_gimple_reg (SSA_NAME_VAR (def)))
 561         continue;
 562
 563       STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_unknown_def_type;
 564
 565       /* Analyze the evolution function.  */
 566       access_fn = analyze_scalar_evolution (loop, def);
 567       if (access_fn)
 568         {
 569           STRIP_NOPS (access_fn);
 570           if (vect_print_dump_info (REPORT_DETAILS))
 571             {
 572               fprintf (vect_dump, "Access function of PHI: ");
 573               print_generic_expr (vect_dump, access_fn, TDF_SLIM);
 574             }
 575           STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_vinfo)
 576             = evolution_part_in_loop_num (access_fn, loop->num);
 577         }
 578
 579       if (!access_fn
 580           || !vect_is_simple_iv_evolution (loop->num, access_fn, &dumy, &dumy))
 581         {
 582           VEC_safe_push (gimple, heap, worklist, phi);
 583           continue;
 584         }
 585
 586       gcc_assert (STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_vinfo) != NULL_TREE);
 587
 588       if (vect_print_dump_info (REPORT_DETAILS))
 589         fprintf (vect_dump, "Detected induction.");
 590       STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_induction_def;
 591     }
 592
 593
 594   /* Second - identify all reductions and nested cycles.  */
 595   while (VEC_length (gimple, worklist) > 0)
 596     {
 597       gimple phi = VEC_pop (gimple, worklist);
 598       tree def = PHI_RESULT (phi);
 599       stmt_vec_info stmt_vinfo = vinfo_for_stmt (phi);
 600       gimple reduc_stmt;
 601       bool nested_cycle;
 602
 603       if (vect_print_dump_info (REPORT_DETAILS))
 604         {
 605           fprintf (vect_dump, "Analyze phi: ");
 606           print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM);
 607         }
 608
 609       gcc_assert (is_gimple_reg (SSA_NAME_VAR (def)));
 610       gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_unknown_def_type);
 611
 612       nested_cycle = (loop != LOOP_VINFO_LOOP (loop_vinfo));
 613       reduc_stmt = vect_force_simple_reduction (loop_vinfo, phi, !nested_cycle,
 614                                                 &double_reduc);
 615       if (reduc_stmt)
 616         {
 617           if (double_reduc)
 618             {
 619               if (vect_print_dump_info (REPORT_DETAILS))
 620                 fprintf (vect_dump, "Detected double reduction.");
 621
 622               STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_double_reduction_def;
 623               STMT_VINFO_DEF_TYPE (vinfo_for_stmt (reduc_stmt)) =
 624                                                     vect_double_reduction_def;
 625             }
 626           else
 627             {
 628               if (nested_cycle)
 629                 {
 630                   if (vect_print_dump_info (REPORT_DETAILS))
 631                     fprintf (vect_dump, "Detected vectorizable nested cycle.");
 632
 633                   STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_nested_cycle;
 634                   STMT_VINFO_DEF_TYPE (vinfo_for_stmt (reduc_stmt)) =
 635                                                              vect_nested_cycle;
 636                 }
 637               else
 638                 {
 639                   if (vect_print_dump_info (REPORT_DETAILS))
 640                     fprintf (vect_dump, "Detected reduction.");
 641
 642                   STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_reduction_def;
 643                   STMT_VINFO_DEF_TYPE (vinfo_for_stmt (reduc_stmt)) =
 644                                                            vect_reduction_def;
 645                   /* Store the reduction cycles for possible vectorization in
 646                      loop-aware SLP.  */
 647                   VEC_safe_push (gimple, heap,
 648                                  LOOP_VINFO_REDUCTIONS (loop_vinfo),
 649                                  reduc_stmt);
 650                 }
 651             }
 652         }
 653       else
 654         if (vect_print_dump_info (REPORT_DETAILS))
 655           fprintf (vect_dump, "Unknown def-use cycle pattern.");
 656     }
 657
 658   VEC_free (gimple, heap, worklist);
 659 }
 660
 661
 662 /* Function vect_analyze_scalar_cycles.
 663
 664    Examine the cross iteration def-use cycles of scalar variables, by
 665    analyzing the loop-header PHIs of scalar variables.  Classify each
 666    cycle as one of the following: invariant, induction, reduction, unknown.
 667    We do that for the loop represented by LOOP_VINFO, and also to its
 668    inner-loop, if exists.
 669    Examples for scalar cycles:
 670
 671    Example1: reduction:
 672
 673               loop1:
 674               for (i=0; i<N; i++)
 675                  sum += a[i];
 676
 677    Example2: induction:
 678
 679               loop2:
 680               for (i=0; i<N; i++)
 681                  a[i] = i;  */
 682
 683 static void
 684 vect_analyze_scalar_cycles (loop_vec_info loop_vinfo)
 685 {
 686   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
 687
 688   vect_analyze_scalar_cycles_1 (loop_vinfo, loop);
 689
 690   /* When vectorizing an outer-loop, the inner-loop is executed sequentially.
 691      Reductions in such inner-loop therefore have different properties than
 692      the reductions in the nest that gets vectorized:
 693      1. When vectorized, they are executed in the same order as in the original
 694         scalar loop, so we can't change the order of computation when
 695         vectorizing them.
 696      2. FIXME: Inner-loop reductions can be used in the inner-loop, so the
 697         current checks are too strict.  */
 698
 699   if (loop->inner)
 700     vect_analyze_scalar_cycles_1 (loop_vinfo, loop->inner);
 701 }
 702
 703 /* Function vect_get_loop_niters.
 704
 705    Determine how many iterations the loop is executed.
 706    If an expression that represents the number of iterations
 707    can be constructed, place it in NUMBER_OF_ITERATIONS.
 708    Return the loop exit condition.  */
 709
 710 static gimple
 711 vect_get_loop_niters (struct loop *loop, tree *number_of_iterations)
 712 {
 713   tree niters;
 714
 715   if (vect_print_dump_info (REPORT_DETAILS))
 716     fprintf (vect_dump, "=== get_loop_niters ===");
 717
 718   niters = number_of_exit_cond_executions (loop);
 719
 720   if (niters != NULL_TREE
 721       && niters != chrec_dont_know)
 722     {
 723       *number_of_iterations = niters;
 724
 725       if (vect_print_dump_info (REPORT_DETAILS))
 726         {
 727           fprintf (vect_dump, "==> get_loop_niters:" );
 728           print_generic_expr (vect_dump, *number_of_iterations, TDF_SLIM);
 729         }
 730     }
 731
 732   return get_loop_exit_condition (loop);
 733 }
 734
 735
 736 /* Function bb_in_loop_p
 737
 738    Used as predicate for dfs order traversal of the loop bbs.  */
 739
 740 static bool
 741 bb_in_loop_p (const_basic_block bb, const void *data)
 742 {
 743   const struct loop *const loop = (const struct loop *)data;
 744   if (flow_bb_inside_loop_p (loop, bb))
 745     return true;
 746   return false;
 747 }
 748
 749
 750 /* Function new_loop_vec_info.
 751
 752    Create and initialize a new loop_vec_info struct for LOOP, as well as
 753    stmt_vec_info structs for all the stmts in LOOP.  */
 754
 755 static loop_vec_info
 756 new_loop_vec_info (struct loop *loop)
 757 {
 758   loop_vec_info res;
 759   basic_block *bbs;
 760   gimple_stmt_iterator si;
 761   unsigned int i, nbbs;
 762
 763   res = (loop_vec_info) xcalloc (1, sizeof (struct _loop_vec_info));
 764   LOOP_VINFO_LOOP (res) = loop;
 765
 766   bbs = get_loop_body (loop);
 767
 768   /* Create/Update stmt_info for all stmts in the loop.  */
 769   for (i = 0; i < loop->num_nodes; i++)
 770     {
 771       basic_block bb = bbs[i];
 772
 773       /* BBs in a nested inner-loop will have been already processed (because
 774          we will have called vect_analyze_loop_form for any nested inner-loop).
 775          Therefore, for stmts in an inner-loop we just want to update the
 776          STMT_VINFO_LOOP_VINFO field of their stmt_info to point to the new
 777          loop_info of the outer-loop we are currently considering to vectorize
 778          (instead of the loop_info of the inner-loop).
 779          For stmts in other BBs we need to create a stmt_info from scratch.  */
 780       if (bb->loop_father != loop)
 781         {
 782           /* Inner-loop bb.  */
 783           gcc_assert (loop->inner && bb->loop_father == loop->inner);
 784           for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
 785             {
 786               gimple phi = gsi_stmt (si);
 787               stmt_vec_info stmt_info = vinfo_for_stmt (phi);
 788               loop_vec_info inner_loop_vinfo =
 789                 STMT_VINFO_LOOP_VINFO (stmt_info);
 790               gcc_assert (loop->inner == LOOP_VINFO_LOOP (inner_loop_vinfo));
 791               STMT_VINFO_LOOP_VINFO (stmt_info) = res;
 792             }
 793           for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
 794            {
 795               gimple stmt = gsi_stmt (si);
 796               stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
 797               loop_vec_info inner_loop_vinfo =
 798                  STMT_VINFO_LOOP_VINFO (stmt_info);
 799               gcc_assert (loop->inner == LOOP_VINFO_LOOP (inner_loop_vinfo));
 800               STMT_VINFO_LOOP_VINFO (stmt_info) = res;
 801            }
 802         }
 803       else
 804         {
 805           /* bb in current nest.  */
 806           for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
 807             {
 808               gimple phi = gsi_stmt (si);
 809               gimple_set_uid (phi, 0);
 810               set_vinfo_for_stmt (phi, new_stmt_vec_info (phi, res, NULL));
 811             }
 812
 813           for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
 814             {
 815               gimple stmt = gsi_stmt (si);
 816               gimple_set_uid (stmt, 0);
 817               set_vinfo_for_stmt (stmt, new_stmt_vec_info (stmt, res, NULL));
 818             }
 819         }
 820     }
 821
 822   /* CHECKME: We want to visit all BBs before their successors (except for
 823      latch blocks, for which this assertion wouldn't hold).  In the simple
 824      case of the loop forms we allow, a dfs order of the BBs would the same
 825      as reversed postorder traversal, so we are safe.  */
 826
 827    free (bbs);
 828    bbs = XCNEWVEC (basic_block, loop->num_nodes);
 829    nbbs = dfs_enumerate_from (loop->header, 0, bb_in_loop_p,
 830                               bbs, loop->num_nodes, loop);
 831    gcc_assert (nbbs == loop->num_nodes);
 832
 833   LOOP_VINFO_BBS (res) = bbs;
 834   LOOP_VINFO_NITERS (res) = NULL;
 835   LOOP_VINFO_NITERS_UNCHANGED (res) = NULL;
 836   LOOP_VINFO_COST_MODEL_MIN_ITERS (res) = 0;
 837   LOOP_VINFO_VECTORIZABLE_P (res) = 0;
 838   LOOP_PEELING_FOR_ALIGNMENT (res) = 0;
 839   LOOP_VINFO_VECT_FACTOR (res) = 0;
 840   LOOP_VINFO_LOOP_NEST (res) = VEC_alloc (loop_p, heap, 3);
 841   LOOP_VINFO_DATAREFS (res) = VEC_alloc (data_reference_p, heap, 10);
 842   LOOP_VINFO_DDRS (res) = VEC_alloc (ddr_p, heap, 10 * 10);
 843   LOOP_VINFO_UNALIGNED_DR (res) = NULL;
 844   LOOP_VINFO_MAY_MISALIGN_STMTS (res) =
 845     VEC_alloc (gimple, heap,
 846                PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIGNMENT_CHECKS));
 847   LOOP_VINFO_MAY_ALIAS_DDRS (res) =
 848     VEC_alloc (ddr_p, heap,
 849                PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS));
 850   LOOP_VINFO_GROUPED_STORES (res) = VEC_alloc (gimple, heap, 10);
 851   LOOP_VINFO_REDUCTIONS (res) = VEC_alloc (gimple, heap, 10);
 852   LOOP_VINFO_REDUCTION_CHAINS (res) = VEC_alloc (gimple, heap, 10);
 853   LOOP_VINFO_SLP_INSTANCES (res) = VEC_alloc (slp_instance, heap, 10);
 854   LOOP_VINFO_SLP_UNROLLING_FACTOR (res) = 1;
 855   LOOP_VINFO_PEELING_HTAB (res) = NULL;
 856   LOOP_VINFO_PEELING_FOR_GAPS (res) = false;
 857
 858   return res;
 859 }
 860
 861
 862 /* Function destroy_loop_vec_info.
 863
 864    Free LOOP_VINFO struct, as well as all the stmt_vec_info structs of all the
 865    stmts in the loop.  */
 866
 867 void
 868 destroy_loop_vec_info (loop_vec_info loop_vinfo, bool clean_stmts)
 869 {
 870   struct loop *loop;
 871   basic_block *bbs;
 872   int nbbs;
 873   gimple_stmt_iterator si;
 874   int j;
 875   VEC (slp_instance, heap) *slp_instances;
 876   slp_instance instance;
 877
 878   if (!loop_vinfo)
 879     return;
 880
 881   loop = LOOP_VINFO_LOOP (loop_vinfo);
 882
 883   bbs = LOOP_VINFO_BBS (loop_vinfo);
 884   nbbs = loop->num_nodes;
 885
 886   if (!clean_stmts)
 887     {
 888       free (LOOP_VINFO_BBS (loop_vinfo));
 889       free_data_refs (LOOP_VINFO_DATAREFS (loop_vinfo));
 890       free_dependence_relations (LOOP_VINFO_DDRS (loop_vinfo));
 891       VEC_free (loop_p, heap, LOOP_VINFO_LOOP_NEST (loop_vinfo));
 892       VEC_free (gimple, heap, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo));
 893       VEC_free (ddr_p, heap, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo));
 894
 895       free (loop_vinfo);
 896       loop->aux = NULL;
 897       return;
 898     }
 899
 900   for (j = 0; j < nbbs; j++)
 901     {
 902       basic_block bb = bbs[j];
 903       for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
 904         free_stmt_vec_info (gsi_stmt (si));
 905
 906       for (si = gsi_start_bb (bb); !gsi_end_p (si); )
 907         {
 908           gimple stmt = gsi_stmt (si);
 909           /* Free stmt_vec_info.  */
 910           free_stmt_vec_info (stmt);
 911           gsi_next (&si);
 912         }
 913     }
 914
 915   free (LOOP_VINFO_BBS (loop_vinfo));
 916   free_data_refs (LOOP_VINFO_DATAREFS (loop_vinfo));
 917   free_dependence_relations (LOOP_VINFO_DDRS (loop_vinfo));
 918   VEC_free (loop_p, heap, LOOP_VINFO_LOOP_NEST (loop_vinfo));
 919   VEC_free (gimple, heap, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo));
 920   VEC_free (ddr_p, heap, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo));
 921   slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
 922   FOR_EACH_VEC_ELT (slp_instance, slp_instances, j, instance)
 923     vect_free_slp_instance (instance);
 924
 925   VEC_free (slp_instance, heap, LOOP_VINFO_SLP_INSTANCES (loop_vinfo));
 926   VEC_free (gimple, heap, LOOP_VINFO_GROUPED_STORES (loop_vinfo));
 927   VEC_free (gimple, heap, LOOP_VINFO_REDUCTIONS (loop_vinfo));
 928   VEC_free (gimple, heap, LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo));
 929
 930   if (LOOP_VINFO_PEELING_HTAB (loop_vinfo))
 931     htab_delete (LOOP_VINFO_PEELING_HTAB (loop_vinfo));
 932
 933   free (loop_vinfo);
 934   loop->aux = NULL;
 935 }
 936
 937
 938 /* Function vect_analyze_loop_1.
 939
 940    Apply a set of analyses on LOOP, and create a loop_vec_info struct
 941    for it. The different analyses will record information in the
 942    loop_vec_info struct.  This is a subset of the analyses applied in
 943    vect_analyze_loop, to be applied on an inner-loop nested in the loop
 944    that is now considered for (outer-loop) vectorization.  */
 945
 946 static loop_vec_info
 947 vect_analyze_loop_1 (struct loop *loop)
 948 {
 949   loop_vec_info loop_vinfo;
 950
 951   if (vect_print_dump_info (REPORT_DETAILS))
 952     fprintf (vect_dump, "===== analyze_loop_nest_1 =====");
 953
 954   /* Check the CFG characteristics of the loop (nesting, entry/exit, etc.  */
 955
 956   loop_vinfo = vect_analyze_loop_form (loop);
 957   if (!loop_vinfo)
 958     {
 959       if (vect_print_dump_info (REPORT_DETAILS))
 960         fprintf (vect_dump, "bad inner-loop form.");
 961       return NULL;
 962     }
 963
 964   return loop_vinfo;
 965 }
 966
 967
 968 /* Function vect_analyze_loop_form.
 969
 970    Verify that certain CFG restrictions hold, including:
 971    - the loop has a pre-header
 972    - the loop has a single entry and exit
 973    - the loop exit condition is simple enough, and the number of iterations
 974      can be analyzed (a countable loop).  */
 975
 976 loop_vec_info
 977 vect_analyze_loop_form (struct loop *loop)
 978 {
 979   loop_vec_info loop_vinfo;
 980   gimple loop_cond;
 981   tree number_of_iterations = NULL;
 982   loop_vec_info inner_loop_vinfo = NULL;
 983
 984   if (vect_print_dump_info (REPORT_DETAILS))
 985     fprintf (vect_dump, "=== vect_analyze_loop_form ===");
 986
 987   /* Different restrictions apply when we are considering an inner-most loop,
 988      vs. an outer (nested) loop.
 989      (FORNOW. May want to relax some of these restrictions in the future).  */
 990
 991   if (!loop->inner)
 992     {
 993       /* Inner-most loop.  We currently require that the number of BBs is
 994          exactly 2 (the header and latch).  Vectorizable inner-most loops
 995          look like this:
 996
 997                         (pre-header)
 998                            |
 999                           header <--------+
1000                            | |            |
1001                            | +--> latch --+
1002                            |
1003                         (exit-bb)  */
1004
1005       if (loop->num_nodes != 2)
1006         {
1007           if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
1008             fprintf (vect_dump, "not vectorized: control flow in loop.");
1009           return NULL;
1010         }
1011
1012       if (empty_block_p (loop->header))
1013     {
1014           if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
1015             fprintf (vect_dump, "not vectorized: empty loop.");
1016       return NULL;
1017     }
1018     }
1019   else
1020     {
1021       struct loop *innerloop = loop->inner;
1022       edge entryedge;
1023
1024       /* Nested loop. We currently require that the loop is doubly-nested,
1025          contains a single inner loop, and the number of BBs is exactly 5.
1026          Vectorizable outer-loops look like this:
1027
1028                         (pre-header)
1029                            |
1030                           header <---+
1031                            |         |
1032                           inner-loop |
1033                            |         |
1034                           tail ------+
1035                            |
1036                         (exit-bb)
1037
1038          The inner-loop has the properties expected of inner-most loops
1039          as described above.  */
1040
1041       if ((loop->inner)->inner || (loop->inner)->next)
1042         {
1043           if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
1044             fprintf (vect_dump, "not vectorized: multiple nested loops.");
1045           return NULL;
1046         }
1047
1048       /* Analyze the inner-loop.  */
1049       inner_loop_vinfo = vect_analyze_loop_1 (loop->inner);
1050       if (!inner_loop_vinfo)
1051         {
1052           if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
1053             fprintf (vect_dump, "not vectorized: Bad inner loop.");
1054           return NULL;
1055         }
1056
1057       if (!expr_invariant_in_loop_p (loop,
1058                                         LOOP_VINFO_NITERS (inner_loop_vinfo)))
1059         {
1060           if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
1061             fprintf (vect_dump,
1062                      "not vectorized: inner-loop count not invariant.");
1063           destroy_loop_vec_info (inner_loop_vinfo, true);
1064           return NULL;
1065         }
1066
1067       if (loop->num_nodes != 5)
1068         {
1069           if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
1070             fprintf (vect_dump, "not vectorized: control flow in loop.");
1071           destroy_loop_vec_info (inner_loop_vinfo, true);
1072           return NULL;
1073         }
1074
1075       gcc_assert (EDGE_COUNT (innerloop->header->preds) == 2);
1076       entryedge = EDGE_PRED (innerloop->header, 0);
1077       if (EDGE_PRED (innerloop->header, 0)->src == innerloop->latch)
1078         entryedge = EDGE_PRED (innerloop->header, 1);
1079
1080       if (entryedge->src != loop->header
1081           || !single_exit (innerloop)
1082           || single_exit (innerloop)->dest !=  EDGE_PRED (loop->latch, 0)->src)
1083         {
1084           if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
1085             fprintf (vect_dump, "not vectorized: unsupported outerloop form.");
1086           destroy_loop_vec_info (inner_loop_vinfo, true);
1087           return NULL;
1088         }
1089
1090       if (vect_print_dump_info (REPORT_DETAILS))
1091         fprintf (vect_dump, "Considering outer-loop vectorization.");
1092     }
1093
1094   if (!single_exit (loop)
1095       || EDGE_COUNT (loop->header->preds) != 2)
1096     {
1097       if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
1098         {
1099           if (!single_exit (loop))
1100             fprintf (vect_dump, "not vectorized: multiple exits.");
1101           else if (EDGE_COUNT (loop->header->preds) != 2)
1102             fprintf (vect_dump, "not vectorized: too many incoming edges.");
1103         }
1104       if (inner_loop_vinfo)
1105         destroy_loop_vec_info (inner_loop_vinfo, true);
1106       return NULL;
1107     }
1108
1109   /* We assume that the loop exit condition is at the end of the loop. i.e,
1110      that the loop is represented as a do-while (with a proper if-guard
1111      before the loop if needed), where the loop header contains all the
1112      executable statements, and the latch is empty.  */
1113   if (!empty_block_p (loop->latch)
1114         || !gimple_seq_empty_p (phi_nodes (loop->latch)))
1115     {
1116       if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
1117         fprintf (vect_dump, "not vectorized: unexpected loop form.");
1118       if (inner_loop_vinfo)
1119         destroy_loop_vec_info (inner_loop_vinfo, true);
1120       return NULL;
1121     }
1122
1123   /* Make sure there exists a single-predecessor exit bb:  */
1124   if (!single_pred_p (single_exit (loop)->dest))
1125     {
1126       edge e = single_exit (loop);
1127       if (!(e->flags & EDGE_ABNORMAL))
1128         {
1129           split_loop_exit_edge (e);
1130           if (vect_print_dump_info (REPORT_DETAILS))
1131             fprintf (vect_dump, "split exit edge.");
1132         }
1133       else
1134         {
1135           if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
1136             fprintf (vect_dump, "not vectorized: abnormal loop exit edge.");
1137           if (inner_loop_vinfo)
1138             destroy_loop_vec_info (inner_loop_vinfo, true);
1139           return NULL;
1140         }
1141     }
1142
1143   loop_cond = vect_get_loop_niters (loop, &number_of_iterations);
1144   if (!loop_cond)
1145     {
1146       if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
1147         fprintf (vect_dump, "not vectorized: complicated exit condition.");
1148       if (inner_loop_vinfo)
1149         destroy_loop_vec_info (inner_loop_vinfo, true);
1150       return NULL;
1151     }
1152
1153   if (!number_of_iterations)
1154     {
1155       if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
1156         fprintf (vect_dump,
1157                  "not vectorized: number of iterations cannot be computed.");
1158       if (inner_loop_vinfo)
1159         destroy_loop_vec_info (inner_loop_vinfo, true);
1160       return NULL;
1161     }
1162
1163   if (chrec_contains_undetermined (number_of_iterations))
1164     {
1165       if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
1166         fprintf (vect_dump, "Infinite number of iterations.");
1167       if (inner_loop_vinfo)
1168         destroy_loop_vec_info (inner_loop_vinfo, true);
1169       return NULL;
1170     }
1171
1172   if (!NITERS_KNOWN_P (number_of_iterations))
1173     {
1174       if (vect_print_dump_info (REPORT_DETAILS))
1175         {
1176           fprintf (vect_dump, "Symbolic number of iterations is ");
1177           print_generic_expr (vect_dump, number_of_iterations, TDF_DETAILS);
1178         }
1179     }
1180   else if (TREE_INT_CST_LOW (number_of_iterations) == 0)
1181     {
1182       if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1183         fprintf (vect_dump, "not vectorized: number of iterations = 0.");
1184       if (inner_loop_vinfo)
1185         destroy_loop_vec_info (inner_loop_vinfo, false);
1186       return NULL;
1187     }
1188
1189   loop_vinfo = new_loop_vec_info (loop);
1190   LOOP_VINFO_NITERS (loop_vinfo) = number_of_iterations;
1191   LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo) = number_of_iterations;
1192
1193   STMT_VINFO_TYPE (vinfo_for_stmt (loop_cond)) = loop_exit_ctrl_vec_info_type;
1194
1195   /* CHECKME: May want to keep it around it in the future.  */
1196   if (inner_loop_vinfo)
1197     destroy_loop_vec_info (inner_loop_vinfo, false);
1198
1199   gcc_assert (!loop->aux);
1200   loop->aux = loop_vinfo;
1201   return loop_vinfo;
1202 }
1203
1204
1205 /* Get cost by calling cost target builtin.  */
1206
1207 static inline int
1208 vect_get_cost (enum vect_cost_for_stmt type_of_cost)
1209 {
1210   tree dummy_type = NULL;
1211   int dummy = 0;
1212
1213   return targetm.vectorize.builtin_vectorization_cost (type_of_cost,
1214                                                        dummy_type, dummy);
1215 }
1216
1217
1218 /* Function vect_analyze_loop_operations.
1219
1220    Scan the loop stmts and make sure they are all vectorizable.  */
1221
1222 static bool
1223 vect_analyze_loop_operations (loop_vec_info loop_vinfo, bool slp)
1224 {
1225   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1226   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
1227   int nbbs = loop->num_nodes;
1228   gimple_stmt_iterator si;
1229   unsigned int vectorization_factor = 0;
1230   int i;
1231   gimple phi;
1232   stmt_vec_info stmt_info;
1233   bool need_to_vectorize = false;
1234   int min_profitable_iters;
1235   int min_scalar_loop_bound;
1236   unsigned int th;
1237   bool only_slp_in_loop = true, ok;
1238   HOST_WIDE_INT max_niter;
1239
1240   if (vect_print_dump_info (REPORT_DETAILS))
1241     fprintf (vect_dump, "=== vect_analyze_loop_operations ===");
1242
1243   gcc_assert (LOOP_VINFO_VECT_FACTOR (loop_vinfo));
1244   vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1245   if (slp)
1246     {
1247       /* If all the stmts in the loop can be SLPed, we perform only SLP, and
1248          vectorization factor of the loop is the unrolling factor required by
1249          the SLP instances.  If that unrolling factor is 1, we say, that we
1250          perform pure SLP on loop - cross iteration parallelism is not
1251          exploited.  */
1252       for (i = 0; i < nbbs; i++)
1253         {
1254           basic_block bb = bbs[i];
1255           for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1256             {
1257               gimple stmt = gsi_stmt (si);
1258               stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1259               gcc_assert (stmt_info);
1260               if ((STMT_VINFO_RELEVANT_P (stmt_info)
1261                    || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info)))
1262                   && !PURE_SLP_STMT (stmt_info))
1263                 /* STMT needs both SLP and loop-based vectorization.  */
1264                 only_slp_in_loop = false;
1265             }
1266         }
1267
1268       if (only_slp_in_loop)
1269         vectorization_factor = LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo);
1270       else
1271         vectorization_factor = least_common_multiple (vectorization_factor,
1272                                 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo));
1273
1274       LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
1275       if (vect_print_dump_info (REPORT_DETAILS))
1276         fprintf (vect_dump, "Updating vectorization factor to %d ",
1277                             vectorization_factor);
1278     }
1279
1280   for (i = 0; i < nbbs; i++)
1281     {
1282       basic_block bb = bbs[i];
1283
1284       for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
1285         {
1286           phi = gsi_stmt (si);
1287           ok = true;
1288
1289           stmt_info = vinfo_for_stmt (phi);
1290           if (vect_print_dump_info (REPORT_DETAILS))
1291             {
1292               fprintf (vect_dump, "examining phi: ");
1293               print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM);
1294             }
1295
1296           /* Inner-loop loop-closed exit phi in outer-loop vectorization
1297              (i.e., a phi in the tail of the outer-loop).  */
1298           if (! is_loop_header_bb_p (bb))
1299             {
1300               /* FORNOW: we currently don't support the case that these phis
1301                  are not used in the outerloop (unless it is double reduction,
1302                  i.e., this phi is vect_reduction_def), cause this case
1303                  requires to actually do something here.  */
1304               if ((!STMT_VINFO_RELEVANT_P (stmt_info)
1305                    || STMT_VINFO_LIVE_P (stmt_info))
1306                   && STMT_VINFO_DEF_TYPE (stmt_info)
1307                      != vect_double_reduction_def)
1308                 {
1309                   if (vect_print_dump_info (REPORT_DETAILS))
1310                     fprintf (vect_dump,
1311                              "Unsupported loop-closed phi in outer-loop.");
1312                   return false;
1313                 }
1314
1315               /* If PHI is used in the outer loop, we check that its operand
1316                  is defined in the inner loop.  */
1317               if (STMT_VINFO_RELEVANT_P (stmt_info))
1318                 {
1319                   tree phi_op;
1320                   gimple op_def_stmt;
1321
1322                   if (gimple_phi_num_args (phi) != 1)
1323                     return false;
1324
1325                   phi_op = PHI_ARG_DEF (phi, 0);
1326                   if (TREE_CODE (phi_op) != SSA_NAME)
1327                     return false;
1328
1329                   op_def_stmt = SSA_NAME_DEF_STMT (phi_op);
1330                   if (!op_def_stmt || !vinfo_for_stmt (op_def_stmt))
1331                     return false;
1332
1333                   if (STMT_VINFO_RELEVANT (vinfo_for_stmt (op_def_stmt))
1334                         != vect_used_in_outer
1335                       && STMT_VINFO_RELEVANT (vinfo_for_stmt (op_def_stmt))
1336                            != vect_used_in_outer_by_reduction)
1337                     return false;
1338                 }
1339
1340               continue;
1341             }
1342
1343           gcc_assert (stmt_info);
1344
1345           if (STMT_VINFO_LIVE_P (stmt_info))
1346             {
1347               /* FORNOW: not yet supported.  */
1348               if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1349                 fprintf (vect_dump, "not vectorized: value used after loop.");
1350               return false;
1351             }
1352
1353           if (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope
1354               && STMT_VINFO_DEF_TYPE (stmt_info) != vect_induction_def)
1355             {
1356               /* A scalar-dependence cycle that we don't support.  */
1357               if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1358                 fprintf (vect_dump, "not vectorized: scalar dependence cycle.");
1359               return false;
1360             }
1361
1362           if (STMT_VINFO_RELEVANT_P (stmt_info))
1363             {
1364               need_to_vectorize = true;
1365               if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def)
1366                 ok = vectorizable_induction (phi, NULL, NULL);
1367             }
1368
1369           if (!ok)
1370             {
1371               if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1372                 {
1373                   fprintf (vect_dump,
1374                            "not vectorized: relevant phi not supported: ");
1375                   print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM);
1376                 }
1377               return false;
1378             }
1379         }
1380
1381       for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1382         {
1383           gimple stmt = gsi_stmt (si);
1384           if (!vect_analyze_stmt (stmt, &need_to_vectorize, NULL))
1385             return false;
1386         }
1387     } /* bbs */
1388
1389   /* All operations in the loop are either irrelevant (deal with loop
1390      control, or dead), or only used outside the loop and can be moved
1391      out of the loop (e.g. invariants, inductions).  The loop can be
1392      optimized away by scalar optimizations.  We're better off not
1393      touching this loop.  */
1394   if (!need_to_vectorize)
1395     {
1396       if (vect_print_dump_info (REPORT_DETAILS))
1397         fprintf (vect_dump,
1398                  "All the computation can be taken out of the loop.");
1399       if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1400         fprintf (vect_dump,
1401                  "not vectorized: redundant loop. no profit to vectorize.");
1402       return false;
1403     }
1404
1405   if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
1406       && vect_print_dump_info (REPORT_DETAILS))
1407     fprintf (vect_dump,
1408         "vectorization_factor = %d, niters = " HOST_WIDE_INT_PRINT_DEC,
1409         vectorization_factor, LOOP_VINFO_INT_NITERS (loop_vinfo));
1410
1411   if ((LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
1412        && (LOOP_VINFO_INT_NITERS (loop_vinfo) < vectorization_factor))
1413       || ((max_niter = max_stmt_executions_int (loop)) != -1
1414           && (unsigned HOST_WIDE_INT) max_niter < vectorization_factor))
1415     {
1416       if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1417         fprintf (vect_dump, "not vectorized: iteration count too small.");
1418       if (vect_print_dump_info (REPORT_DETAILS))
1419         fprintf (vect_dump,"not vectorized: iteration count smaller than "
1420                  "vectorization factor.");
1421       return false;
1422     }
1423
1424   /* Analyze cost.  Decide if worth while to vectorize.  */
1425
1426   /* Once VF is set, SLP costs should be updated since the number of created
1427      vector stmts depends on VF.  */
1428   vect_update_slp_costs_according_to_vf (loop_vinfo);
1429
1430   min_profitable_iters = vect_estimate_min_profitable_iters (loop_vinfo);
1431   LOOP_VINFO_COST_MODEL_MIN_ITERS (loop_vinfo) = min_profitable_iters;
1432
1433   if (min_profitable_iters < 0)
1434     {
1435       if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1436         fprintf (vect_dump, "not vectorized: vectorization not profitable.");
1437       if (vect_print_dump_info (REPORT_DETAILS))
1438         fprintf (vect_dump, "not vectorized: vector version will never be "
1439                  "profitable.");
1440       return false;
1441     }
1442
1443   min_scalar_loop_bound = ((PARAM_VALUE (PARAM_MIN_VECT_LOOP_BOUND)
1444                             * vectorization_factor) - 1);
1445
1446   /* Use the cost model only if it is more conservative than user specified
1447      threshold.  */
1448
1449   th = (unsigned) min_scalar_loop_bound;
1450   if (min_profitable_iters
1451       && (!min_scalar_loop_bound
1452           || min_profitable_iters > min_scalar_loop_bound))
1453     th = (unsigned) min_profitable_iters;
1454
1455   if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
1456       && LOOP_VINFO_INT_NITERS (loop_vinfo) <= th)
1457     {
1458       if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1459         fprintf (vect_dump, "not vectorized: vectorization not "
1460                  "profitable.");
1461       if (vect_print_dump_info (REPORT_DETAILS))
1462         fprintf (vect_dump, "not vectorized: iteration count smaller than "
1463                  "user specified loop bound parameter or minimum "
1464                  "profitable iterations (whichever is more conservative).");
1465       return false;
1466     }
1467
1468   if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
1469       || LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0
1470       || LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo))
1471     {
1472       if (vect_print_dump_info (REPORT_DETAILS))
1473         fprintf (vect_dump, "epilog loop required.");
1474       if (!vect_can_advance_ivs_p (loop_vinfo))
1475         {
1476           if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1477             fprintf (vect_dump,
1478                      "not vectorized: can't create epilog loop 1.");
1479           return false;
1480         }
1481       if (!slpeel_can_duplicate_loop_p (loop, single_exit (loop)))
1482         {
1483           if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1484             fprintf (vect_dump,
1485                      "not vectorized: can't create epilog loop 2.");
1486           return false;
1487         }
1488     }
1489
1490   return true;
1491 }
1492
1493
1494 /* Function vect_analyze_loop_2.
1495
1496    Apply a set of analyses on LOOP, and create a loop_vec_info struct
1497    for it.  The different analyses will record information in the
1498    loop_vec_info struct.  */
1499 static bool
1500 vect_analyze_loop_2 (loop_vec_info loop_vinfo)
1501 {
1502   bool ok, slp = false;
1503   int max_vf = MAX_VECTORIZATION_FACTOR;
1504   int min_vf = 2;
1505
1506   /* Find all data references in the loop (which correspond to vdefs/vuses)
1507      and analyze their evolution in the loop.  Also adjust the minimal
1508      vectorization factor according to the loads and stores.
1509
1510      FORNOW: Handle only simple, array references, which
1511      alignment can be forced, and aligned pointer-references.  */
1512
1513   ok = vect_analyze_data_refs (loop_vinfo, NULL, &min_vf);
1514   if (!ok)
1515     {
1516       if (vect_print_dump_info (REPORT_DETAILS))
1517         fprintf (vect_dump, "bad data references.");
1518       return false;
1519     }
1520
1521   /* Classify all cross-iteration scalar data-flow cycles.
1522      Cross-iteration cycles caused by virtual phis are analyzed separately.  */
1523
1524   vect_analyze_scalar_cycles (loop_vinfo);
1525
1526   vect_pattern_recog (loop_vinfo, NULL);
1527
1528   /* Data-flow analysis to detect stmts that do not need to be vectorized.  */
1529
1530   ok = vect_mark_stmts_to_be_vectorized (loop_vinfo);
1531   if (!ok)
1532     {
1533       if (vect_print_dump_info (REPORT_DETAILS))
1534         fprintf (vect_dump, "unexpected pattern.");
1535       return false;
1536     }
1537
1538   /* Analyze data dependences between the data-refs in the loop
1539      and adjust the maximum vectorization factor according to
1540      the dependences.
1541      FORNOW: fail at the first data dependence that we encounter.  */
1542
1543   ok = vect_analyze_data_ref_dependences (loop_vinfo, NULL, &max_vf);
1544   if (!ok
1545       || max_vf < min_vf)
1546     {
1547       if (vect_print_dump_info (REPORT_DETAILS))
1548         fprintf (vect_dump, "bad data dependence.");
1549       return false;
1550     }
1551
1552   ok = vect_determine_vectorization_factor (loop_vinfo);
1553   if (!ok)
1554     {
1555       if (vect_print_dump_info (REPORT_DETAILS))
1556         fprintf (vect_dump, "can't determine vectorization factor.");
1557       return false;
1558     }
1559   if (max_vf < LOOP_VINFO_VECT_FACTOR (loop_vinfo))
1560     {
1561       if (vect_print_dump_info (REPORT_DETAILS))
1562         fprintf (vect_dump, "bad data dependence.");
1563       return false;
1564     }
1565
1566   /* Analyze the alignment of the data-refs in the loop.
1567      Fail if a data reference is found that cannot be vectorized.  */
1568
1569   ok = vect_analyze_data_refs_alignment (loop_vinfo, NULL);
1570   if (!ok)
1571     {
1572       if (vect_print_dump_info (REPORT_DETAILS))
1573         fprintf (vect_dump, "bad data alignment.");
1574       return false;
1575     }
1576
1577   /* Analyze the access patterns of the data-refs in the loop (consecutive,
1578      complex, etc.). FORNOW: Only handle consecutive access pattern.  */
1579
1580   ok = vect_analyze_data_ref_accesses (loop_vinfo, NULL);
1581   if (!ok)
1582     {
1583       if (vect_print_dump_info (REPORT_DETAILS))
1584         fprintf (vect_dump, "bad data access.");
1585       return false;
1586     }
1587
1588   /* Prune the list of ddrs to be tested at run-time by versioning for alias.
1589      It is important to call pruning after vect_analyze_data_ref_accesses,
1590      since we use grouping information gathered by interleaving analysis.  */
1591   ok = vect_prune_runtime_alias_test_list (loop_vinfo);
1592   if (!ok)
1593     {
1594       if (vect_print_dump_info (REPORT_DETAILS))
1595         fprintf (vect_dump, "too long list of versioning for alias "
1596                             "run-time tests.");
1597       return false;
1598     }
1599
1600   /* This pass will decide on using loop versioning and/or loop peeling in
1601      order to enhance the alignment of data references in the loop.  */
1602
1603   ok = vect_enhance_data_refs_alignment (loop_vinfo);
1604   if (!ok)
1605     {
1606       if (vect_print_dump_info (REPORT_DETAILS))
1607         fprintf (vect_dump, "bad data alignment.");
1608       return false;
1609     }
1610
1611   /* Check the SLP opportunities in the loop, analyze and build SLP trees.  */
1612   ok = vect_analyze_slp (loop_vinfo, NULL);
1613   if (ok)
1614     {
1615       /* Decide which possible SLP instances to SLP.  */
1616       slp = vect_make_slp_decision (loop_vinfo);
1617
1618       /* Find stmts that need to be both vectorized and SLPed.  */
1619       vect_detect_hybrid_slp (loop_vinfo);
1620     }
1621   else
1622     return false;
1623
1624   /* Scan all the operations in the loop and make sure they are
1625      vectorizable.  */
1626
1627   ok = vect_analyze_loop_operations (loop_vinfo, slp);
1628   if (!ok)
1629     {
1630       if (vect_print_dump_info (REPORT_DETAILS))
1631         fprintf (vect_dump, "bad operation or unsupported loop bound.");
1632       return false;
1633     }
1634
1635   return true;
1636 }
1637
1638 /* Function vect_analyze_loop.
1639
1640    Apply a set of analyses on LOOP, and create a loop_vec_info struct
1641    for it.  The different analyses will record information in the
1642    loop_vec_info struct.  */
1643 loop_vec_info
1644 vect_analyze_loop (struct loop *loop)
1645 {
1646   loop_vec_info loop_vinfo;
1647   unsigned int vector_sizes;
1648
1649   /* Autodetect first vector size we try.  */
1650   current_vector_size = 0;
1651   vector_sizes = targetm.vectorize.autovectorize_vector_sizes ();
1652
1653   if (vect_print_dump_info (REPORT_DETAILS))
1654     fprintf (vect_dump, "===== analyze_loop_nest =====");
1655
1656   if (loop_outer (loop)
1657       && loop_vec_info_for_loop (loop_outer (loop))
1658       && LOOP_VINFO_VECTORIZABLE_P (loop_vec_info_for_loop (loop_outer (loop))))
1659     {
1660       if (vect_print_dump_info (REPORT_DETAILS))
1661         fprintf (vect_dump, "outer-loop already vectorized.");
1662       return NULL;
1663     }
1664
1665   while (1)
1666     {
1667       /* Check the CFG characteristics of the loop (nesting, entry/exit).  */
1668       loop_vinfo = vect_analyze_loop_form (loop);
1669       if (!loop_vinfo)
1670         {
1671           if (vect_print_dump_info (REPORT_DETAILS))
1672             fprintf (vect_dump, "bad loop form.");
1673           return NULL;
1674         }
1675
1676       if (vect_analyze_loop_2 (loop_vinfo))
1677         {
1678           LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1;
1679
1680           return loop_vinfo;
1681         }
1682
1683       destroy_loop_vec_info (loop_vinfo, true);
1684
1685       vector_sizes &= ~current_vector_size;
1686       if (vector_sizes == 0
1687           || current_vector_size == 0)
1688         return NULL;
1689
1690       /* Try the next biggest vector size.  */
1691       current_vector_size = 1 << floor_log2 (vector_sizes);
1692       if (vect_print_dump_info (REPORT_DETAILS))
1693         fprintf (vect_dump, "***** Re-trying analysis with "
1694                  "vector size %d\n", current_vector_size);
1695     }
1696 }
1697
1698
1699 /* Function reduction_code_for_scalar_code
1700
1701    Input:
1702    CODE - tree_code of a reduction operations.
1703
1704    Output:
1705    REDUC_CODE - the corresponding tree-code to be used to reduce the
1706       vector of partial results into a single scalar result (which
1707       will also reside in a vector) or ERROR_MARK if the operation is
1708       a supported reduction operation, but does not have such tree-code.
1709
1710    Return FALSE if CODE currently cannot be vectorized as reduction.  */
1711
1712 static bool
1713 reduction_code_for_scalar_code (enum tree_code code,
1714                                 enum tree_code *reduc_code)
1715 {
1716   switch (code)
1717     {
1718       case MAX_EXPR:
1719         *reduc_code = REDUC_MAX_EXPR;
1720         return true;
1721
1722       case MIN_EXPR:
1723         *reduc_code = REDUC_MIN_EXPR;
1724         return true;
1725
1726       case PLUS_EXPR:
1727         *reduc_code = REDUC_PLUS_EXPR;
1728         return true;
1729
1730       case MULT_EXPR:
1731       case MINUS_EXPR:
1732       case BIT_IOR_EXPR:
1733       case BIT_XOR_EXPR:
1734       case BIT_AND_EXPR:
1735         *reduc_code = ERROR_MARK;
1736         return true;
1737
1738       default:
1739        return false;
1740     }
1741 }
1742
1743
1744 /* Error reporting helper for vect_is_simple_reduction below.  GIMPLE statement
1745    STMT is printed with a message MSG. */
1746
1747 static void
1748 report_vect_op (gimple stmt, const char *msg)
1749 {
1750   fprintf (vect_dump, "%s", msg);
1751   print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
1752 }
1753
1754
1755 /* Detect SLP reduction of the form:
1756
1757    #a1 = phi <a5, a0>
1758    a2 = operation (a1)
1759    a3 = operation (a2)
1760    a4 = operation (a3)
1761    a5 = operation (a4)
1762
1763    #a = phi <a5>
1764
1765    PHI is the reduction phi node (#a1 = phi <a5, a0> above)
1766    FIRST_STMT is the first reduction stmt in the chain
1767    (a2 = operation (a1)).
1768
1769    Return TRUE if a reduction chain was detected.  */
1770
1771 static bool
1772 vect_is_slp_reduction (loop_vec_info loop_info, gimple phi, gimple first_stmt)
1773 {
1774   struct loop *loop = (gimple_bb (phi))->loop_father;
1775   struct loop *vect_loop = LOOP_VINFO_LOOP (loop_info);
1776   enum tree_code code;
1777   gimple current_stmt = NULL, loop_use_stmt = NULL, first, next_stmt;
1778   stmt_vec_info use_stmt_info, current_stmt_info;
1779   tree lhs;
1780   imm_use_iterator imm_iter;
1781   use_operand_p use_p;
1782   int nloop_uses, size = 0, n_out_of_loop_uses;
1783   bool found = false;
1784
1785   if (loop != vect_loop)
1786     return false;
1787
1788   lhs = PHI_RESULT (phi);
1789   code = gimple_assign_rhs_code (first_stmt);
1790   while (1)
1791     {
1792       nloop_uses = 0;
1793       n_out_of_loop_uses = 0;
1794       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
1795         {
1796           gimple use_stmt = USE_STMT (use_p);
1797           if (is_gimple_debug (use_stmt))
1798             continue;
1799
1800           use_stmt = USE_STMT (use_p);
1801
1802           /* Check if we got back to the reduction phi.  */
1803           if (use_stmt == phi)
1804             {
1805               loop_use_stmt = use_stmt;
1806               found = true;
1807               break;
1808             }
1809
1810           if (flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
1811             {
1812               if (vinfo_for_stmt (use_stmt)
1813                   && !STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
1814                 {
1815                   loop_use_stmt = use_stmt;
1816                   nloop_uses++;
1817                 }
1818             }
1819            else
1820              n_out_of_loop_uses++;
1821
1822            /* There are can be either a single use in the loop or two uses in
1823               phi nodes.  */
1824            if (nloop_uses > 1 || (n_out_of_loop_uses && nloop_uses))
1825              return false;
1826         }
1827
1828       if (found)
1829         break;
1830
1831       /* We reached a statement with no loop uses.  */
1832       if (nloop_uses == 0)
1833         return false;
1834
1835       /* This is a loop exit phi, and we haven't reached the reduction phi.  */
1836       if (gimple_code (loop_use_stmt) == GIMPLE_PHI)
1837         return false;
1838
1839       if (!is_gimple_assign (loop_use_stmt)
1840           || code != gimple_assign_rhs_code (loop_use_stmt)
1841           || !flow_bb_inside_loop_p (loop, gimple_bb (loop_use_stmt)))
1842         return false;
1843
1844       /* Insert USE_STMT into reduction chain.  */
1845       use_stmt_info = vinfo_for_stmt (loop_use_stmt);
1846       if (current_stmt)
1847         {
1848           current_stmt_info = vinfo_for_stmt (current_stmt);
1849           GROUP_NEXT_ELEMENT (current_stmt_info) = loop_use_stmt;
1850           GROUP_FIRST_ELEMENT (use_stmt_info)
1851             = GROUP_FIRST_ELEMENT (current_stmt_info);
1852         }
1853       else
1854         GROUP_FIRST_ELEMENT (use_stmt_info) = loop_use_stmt;
1855
1856       lhs = gimple_assign_lhs (loop_use_stmt);
1857       current_stmt = loop_use_stmt;
1858       size++;
1859    }
1860
1861   if (!found || loop_use_stmt != phi || size < 2)
1862     return false;
1863
1864   /* Swap the operands, if needed, to make the reduction operand be the second
1865      operand.  */
1866   lhs = PHI_RESULT (phi);
1867   next_stmt = GROUP_FIRST_ELEMENT (vinfo_for_stmt (current_stmt));
1868   while (next_stmt)
1869     {
1870       if (gimple_assign_rhs2 (next_stmt) == lhs)
1871         {
1872           tree op = gimple_assign_rhs1 (next_stmt);
1873           gimple def_stmt = NULL;
1874
1875           if (TREE_CODE (op) == SSA_NAME)
1876             def_stmt = SSA_NAME_DEF_STMT (op);
1877
1878           /* Check that the other def is either defined in the loop
1879              ("vect_internal_def"), or it's an induction (defined by a
1880              loop-header phi-node).  */
1881           if (def_stmt
1882               && gimple_bb (def_stmt)
1883               && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
1884               && (is_gimple_assign (def_stmt)
1885                   || is_gimple_call (def_stmt)
1886                   || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt))
1887                            == vect_induction_def
1888                   || (gimple_code (def_stmt) == GIMPLE_PHI
1889                       && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt))
1890                                   == vect_internal_def
1891                       && !is_loop_header_bb_p (gimple_bb (def_stmt)))))
1892             {
1893               lhs = gimple_assign_lhs (next_stmt);
1894               next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
1895               continue;
1896             }
1897
1898           return false;
1899         }
1900       else
1901         {
1902           tree op = gimple_assign_rhs2 (next_stmt);
1903           gimple def_stmt = NULL;
1904
1905           if (TREE_CODE (op) == SSA_NAME)
1906             def_stmt = SSA_NAME_DEF_STMT (op);
1907
1908           /* Check that the other def is either defined in the loop
1909             ("vect_internal_def"), or it's an induction (defined by a
1910             loop-header phi-node).  */
1911           if (def_stmt
1912               && gimple_bb (def_stmt)
1913               && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
1914               && (is_gimple_assign (def_stmt)
1915                   || is_gimple_call (def_stmt)
1916                   || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt))
1917                               == vect_induction_def
1918                   || (gimple_code (def_stmt) == GIMPLE_PHI
1919                       && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt))
1920                                   == vect_internal_def
1921                       && !is_loop_header_bb_p (gimple_bb (def_stmt)))))
1922             {
1923               if (vect_print_dump_info (REPORT_DETAILS))
1924                 {
1925                   fprintf (vect_dump, "swapping oprnds: ");
1926                   print_gimple_stmt (vect_dump, next_stmt, 0, TDF_SLIM);
1927                 }
1928
1929               swap_tree_operands (next_stmt,
1930                                   gimple_assign_rhs1_ptr (next_stmt),
1931                                   gimple_assign_rhs2_ptr (next_stmt));
1932               update_stmt (next_stmt);
1933             }
1934           else
1935             return false;
1936         }
1937
1938       lhs = gimple_assign_lhs (next_stmt);
1939       next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
1940     }
1941
1942   /* Save the chain for further analysis in SLP detection.  */
1943   first = GROUP_FIRST_ELEMENT (vinfo_for_stmt (current_stmt));
1944   VEC_safe_push (gimple, heap, LOOP_VINFO_REDUCTION_CHAINS (loop_info), first);
1945   GROUP_SIZE (vinfo_for_stmt (first)) = size;
1946
1947   return true;
1948 }
1949
1950
1951 /* Function vect_is_simple_reduction_1
1952
1953    (1) Detect a cross-iteration def-use cycle that represents a simple
1954    reduction computation.  We look for the following pattern:
1955
1956    loop_header:
1957      a1 = phi < a0, a2 >
1958      a3 = ...
1959      a2 = operation (a3, a1)
1960
1961    such that:
1962    1. operation is commutative and associative and it is safe to
1963       change the order of the computation (if CHECK_REDUCTION is true)
1964    2. no uses for a2 in the loop (a2 is used out of the loop)
1965    3. no uses of a1 in the loop besides the reduction operation
1966    4. no uses of a1 outside the loop.
1967
1968    Conditions 1,4 are tested here.
1969    Conditions 2,3 are tested in vect_mark_stmts_to_be_vectorized.
1970
1971    (2) Detect a cross-iteration def-use cycle in nested loops, i.e.,
1972    nested cycles, if CHECK_REDUCTION is false.
1973
1974    (3) Detect cycles of phi nodes in outer-loop vectorization, i.e., double
1975    reductions:
1976
1977      a1 = phi < a0, a2 >
1978      inner loop (def of a3)
1979      a2 = phi < a3 >
1980
1981    If MODIFY is true it tries also to rework the code in-place to enable
1982    detection of more reduction patterns.  For the time being we rewrite
1983    "res -= RHS" into "rhs += -RHS" when it seems worthwhile.
1984 */
1985
1986 static gimple
1987 vect_is_simple_reduction_1 (loop_vec_info loop_info, gimple phi,
1988                             bool check_reduction, bool *double_reduc,
1989                             bool modify)
1990 {
1991   struct loop *loop = (gimple_bb (phi))->loop_father;
1992   struct loop *vect_loop = LOOP_VINFO_LOOP (loop_info);
1993   edge latch_e = loop_latch_edge (loop);
1994   tree loop_arg = PHI_ARG_DEF_FROM_EDGE (phi, latch_e);
1995   gimple def_stmt, def1 = NULL, def2 = NULL;
1996   enum tree_code orig_code, code;
1997   tree op1, op2, op3 = NULL_TREE, op4 = NULL_TREE;
1998   tree type;
1999   int nloop_uses;
2000   tree name;
2001   imm_use_iterator imm_iter;
2002   use_operand_p use_p;
2003   bool phi_def;
2004
2005   *double_reduc = false;
2006
2007   /* If CHECK_REDUCTION is true, we assume inner-most loop vectorization,
2008      otherwise, we assume outer loop vectorization.  */
2009   gcc_assert ((check_reduction && loop == vect_loop)
2010               || (!check_reduction && flow_loop_nested_p (vect_loop, loop)));
2011
2012   name = PHI_RESULT (phi);
2013   nloop_uses = 0;
2014   FOR_EACH_IMM_USE_FAST (use_p, imm_iter, name)
2015     {
2016       gimple use_stmt = USE_STMT (use_p);
2017       if (is_gimple_debug (use_stmt))
2018         continue;
2019
2020       if (!flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
2021         {
2022           if (vect_print_dump_info (REPORT_DETAILS))
2023             fprintf (vect_dump, "intermediate value used outside loop.");
2024
2025           return NULL;
2026         }
2027
2028       if (vinfo_for_stmt (use_stmt)
2029           && !is_pattern_stmt_p (vinfo_for_stmt (use_stmt)))
2030         nloop_uses++;
2031       if (nloop_uses > 1)
2032         {
2033           if (vect_print_dump_info (REPORT_DETAILS))
2034             fprintf (vect_dump, "reduction used in loop.");
2035           return NULL;
2036         }
2037     }
2038
2039   if (TREE_CODE (loop_arg) != SSA_NAME)
2040     {
2041       if (vect_print_dump_info (REPORT_DETAILS))
2042         {
2043           fprintf (vect_dump, "reduction: not ssa_name: ");
2044           print_generic_expr (vect_dump, loop_arg, TDF_SLIM);
2045         }
2046       return NULL;
2047     }
2048
2049   def_stmt = SSA_NAME_DEF_STMT (loop_arg);
2050   if (!def_stmt)
2051     {
2052       if (vect_print_dump_info (REPORT_DETAILS))
2053         fprintf (vect_dump, "reduction: no def_stmt.");
2054       return NULL;
2055     }
2056
2057   if (!is_gimple_assign (def_stmt) && gimple_code (def_stmt) != GIMPLE_PHI)
2058     {
2059       if (vect_print_dump_info (REPORT_DETAILS))
2060         print_gimple_stmt (vect_dump, def_stmt, 0, TDF_SLIM);
2061       return NULL;
2062     }
2063
2064   if (is_gimple_assign (def_stmt))
2065     {
2066       name = gimple_assign_lhs (def_stmt);
2067       phi_def = false;
2068     }
2069   else
2070     {
2071       name = PHI_RESULT (def_stmt);
2072       phi_def = true;
2073     }
2074
2075   nloop_uses = 0;
2076   FOR_EACH_IMM_USE_FAST (use_p, imm_iter, name)
2077     {
2078       gimple use_stmt = USE_STMT (use_p);
2079       if (is_gimple_debug (use_stmt))
2080         continue;
2081       if (flow_bb_inside_loop_p (loop, gimple_bb (use_stmt))
2082           && vinfo_for_stmt (use_stmt)
2083           && !is_pattern_stmt_p (vinfo_for_stmt (use_stmt)))
2084         nloop_uses++;
2085       if (nloop_uses > 1)
2086         {
2087           if (vect_print_dump_info (REPORT_DETAILS))
2088             fprintf (vect_dump, "reduction used in loop.");
2089           return NULL;
2090         }
2091     }
2092
2093   /* If DEF_STMT is a phi node itself, we expect it to have a single argument
2094      defined in the inner loop.  */
2095   if (phi_def)
2096     {
2097       op1 = PHI_ARG_DEF (def_stmt, 0);
2098
2099       if (gimple_phi_num_args (def_stmt) != 1
2100           || TREE_CODE (op1) != SSA_NAME)
2101         {
2102           if (vect_print_dump_info (REPORT_DETAILS))
2103             fprintf (vect_dump, "unsupported phi node definition.");
2104
2105           return NULL;
2106         }
2107
2108       def1 = SSA_NAME_DEF_STMT (op1);
2109       if (flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
2110           && loop->inner
2111           && flow_bb_inside_loop_p (loop->inner, gimple_bb (def1))
2112           && is_gimple_assign (def1))
2113         {
2114           if (vect_print_dump_info (REPORT_DETAILS))
2115             report_vect_op (def_stmt, "detected double reduction: ");
2116
2117           *double_reduc = true;
2118           return def_stmt;
2119         }
2120
2121       return NULL;
2122     }
2123
2124   code = orig_code = gimple_assign_rhs_code (def_stmt);
2125
2126   /* We can handle "res -= x[i]", which is non-associative by
2127      simply rewriting this into "res += -x[i]".  Avoid changing
2128      gimple instruction for the first simple tests and only do this
2129      if we're allowed to change code at all.  */
2130   if (code == MINUS_EXPR
2131       && modify
2132       && (op1 = gimple_assign_rhs1 (def_stmt))
2133       && TREE_CODE (op1) == SSA_NAME
2134       && SSA_NAME_DEF_STMT (op1) == phi)
2135     code = PLUS_EXPR;
2136
2137   if (check_reduction
2138       && (!commutative_tree_code (code) || !associative_tree_code (code)))
2139     {
2140       if (vect_print_dump_info (REPORT_DETAILS))
2141         report_vect_op (def_stmt, "reduction: not commutative/associative: ");
2142       return NULL;
2143     }
2144
2145   if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
2146     {
2147       if (code != COND_EXPR)
2148         {
2149           if (vect_print_dump_info (REPORT_DETAILS))
2150             report_vect_op (def_stmt, "reduction: not binary operation: ");
2151
2152           return NULL;
2153         }
2154
2155       op3 = gimple_assign_rhs1 (def_stmt);
2156       if (COMPARISON_CLASS_P (op3))
2157         {
2158           op4 = TREE_OPERAND (op3, 1);
2159           op3 = TREE_OPERAND (op3, 0);
2160         }
2161
2162       op1 = gimple_assign_rhs2 (def_stmt);
2163       op2 = gimple_assign_rhs3 (def_stmt);
2164
2165       if (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op2) != SSA_NAME)
2166         {
2167           if (vect_print_dump_info (REPORT_DETAILS))
2168             report_vect_op (def_stmt, "reduction: uses not ssa_names: ");
2169
2170           return NULL;
2171         }
2172     }
2173   else
2174     {
2175       op1 = gimple_assign_rhs1 (def_stmt);
2176       op2 = gimple_assign_rhs2 (def_stmt);
2177
2178       if (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op2) != SSA_NAME)
2179         {
2180           if (vect_print_dump_info (REPORT_DETAILS))
2181             report_vect_op (def_stmt, "reduction: uses not ssa_names: ");
2182
2183           return NULL;
2184         }
2185    }
2186
2187   type = TREE_TYPE (gimple_assign_lhs (def_stmt));
2188   if ((TREE_CODE (op1) == SSA_NAME
2189        && !types_compatible_p (type,TREE_TYPE (op1)))
2190       || (TREE_CODE (op2) == SSA_NAME
2191           && !types_compatible_p (type, TREE_TYPE (op2)))
2192       || (op3 && TREE_CODE (op3) == SSA_NAME
2193           && !types_compatible_p (type, TREE_TYPE (op3)))
2194       || (op4 && TREE_CODE (op4) == SSA_NAME
2195           && !types_compatible_p (type, TREE_TYPE (op4))))
2196     {
2197       if (vect_print_dump_info (REPORT_DETAILS))
2198         {
2199           fprintf (vect_dump, "reduction: multiple types: operation type: ");
2200           print_generic_expr (vect_dump, type, TDF_SLIM);
2201           fprintf (vect_dump, ", operands types: ");
2202           print_generic_expr (vect_dump, TREE_TYPE (op1), TDF_SLIM);
2203           fprintf (vect_dump, ",");
2204           print_generic_expr (vect_dump, TREE_TYPE (op2), TDF_SLIM);
2205           if (op3)
2206             {
2207               fprintf (vect_dump, ",");
2208               print_generic_expr (vect_dump, TREE_TYPE (op3), TDF_SLIM);
2209             }
2210
2211           if (op4)
2212             {
2213               fprintf (vect_dump, ",");
2214               print_generic_expr (vect_dump, TREE_TYPE (op4), TDF_SLIM);
2215             }
2216         }
2217
2218       return NULL;
2219     }
2220
2221   /* Check that it's ok to change the order of the computation.
2222      Generally, when vectorizing a reduction we change the order of the
2223      computation.  This may change the behavior of the program in some
2224      cases, so we need to check that this is ok.  One exception is when
2225      vectorizing an outer-loop: the inner-loop is executed sequentially,
2226      and therefore vectorizing reductions in the inner-loop during
2227      outer-loop vectorization is safe.  */
2228
2229   /* CHECKME: check for !flag_finite_math_only too?  */
2230   if (SCALAR_FLOAT_TYPE_P (type) && !flag_associative_math
2231       && check_reduction)
2232     {
2233       /* Changing the order of operations changes the semantics.  */
2234       if (vect_print_dump_info (REPORT_DETAILS))
2235         report_vect_op (def_stmt, "reduction: unsafe fp math optimization: ");
2236       return NULL;
2237     }
2238   else if (INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_TRAPS (type)
2239            && check_reduction)
2240     {
2241       /* Changing the order of operations changes the semantics.  */
2242       if (vect_print_dump_info (REPORT_DETAILS))
2243         report_vect_op (def_stmt, "reduction: unsafe int math optimization: ");
2244       return NULL;
2245     }
2246   else if (SAT_FIXED_POINT_TYPE_P (type) && check_reduction)
2247     {
2248       /* Changing the order of operations changes the semantics.  */
2249       if (vect_print_dump_info (REPORT_DETAILS))
2250         report_vect_op (def_stmt,
2251                         "reduction: unsafe fixed-point math optimization: ");
2252       return NULL;
2253     }
2254
2255   /* If we detected "res -= x[i]" earlier, rewrite it into
2256      "res += -x[i]" now.  If this turns out to be useless reassoc
2257      will clean it up again.  */
2258   if (orig_code == MINUS_EXPR)
2259     {
2260       tree rhs = gimple_assign_rhs2 (def_stmt);
2261       tree negrhs = make_ssa_name (SSA_NAME_VAR (rhs), NULL);
2262       gimple negate_stmt = gimple_build_assign_with_ops (NEGATE_EXPR, negrhs,
2263                                                          rhs, NULL);
2264       gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
2265       set_vinfo_for_stmt (negate_stmt, new_stmt_vec_info (negate_stmt,
2266                                                           loop_info, NULL));
2267       gsi_insert_before (&gsi, negate_stmt, GSI_NEW_STMT);
2268       gimple_assign_set_rhs2 (def_stmt, negrhs);
2269       gimple_assign_set_rhs_code (def_stmt, PLUS_EXPR);
2270       update_stmt (def_stmt);
2271     }
2272
2273   /* Reduction is safe. We're dealing with one of the following:
2274      1) integer arithmetic and no trapv
2275      2) floating point arithmetic, and special flags permit this optimization
2276      3) nested cycle (i.e., outer loop vectorization).  */
2277   if (TREE_CODE (op1) == SSA_NAME)
2278     def1 = SSA_NAME_DEF_STMT (op1);
2279
2280   if (TREE_CODE (op2) == SSA_NAME)
2281     def2 = SSA_NAME_DEF_STMT (op2);
2282
2283   if (code != COND_EXPR
2284       && ((!def1 || gimple_nop_p (def1)) && (!def2 || gimple_nop_p (def2))))
2285     {
2286       if (vect_print_dump_info (REPORT_DETAILS))
2287         report_vect_op (def_stmt, "reduction: no defs for operands: ");
2288       return NULL;
2289     }
2290
2291   /* Check that one def is the reduction def, defined by PHI,
2292      the other def is either defined in the loop ("vect_internal_def"),
2293      or it's an induction (defined by a loop-header phi-node).  */
2294
2295   if (def2 && def2 == phi
2296       && (code == COND_EXPR
2297           || !def1 || gimple_nop_p (def1)
2298           || (def1 && flow_bb_inside_loop_p (loop, gimple_bb (def1))
2299               && (is_gimple_assign (def1)
2300                   || is_gimple_call (def1)
2301                   || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def1))
2302                       == vect_induction_def
2303                   || (gimple_code (def1) == GIMPLE_PHI
2304                       && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def1))
2305                           == vect_internal_def
2306                       && !is_loop_header_bb_p (gimple_bb (def1)))))))
2307     {
2308       if (vect_print_dump_info (REPORT_DETAILS))
2309         report_vect_op (def_stmt, "detected reduction: ");
2310       return def_stmt;
2311     }
2312
2313   if (def1 && def1 == phi
2314       && (code == COND_EXPR
2315           || !def2 || gimple_nop_p (def2)
2316           || (def2 && flow_bb_inside_loop_p (loop, gimple_bb (def2))
2317               && (is_gimple_assign (def2)
2318                   || is_gimple_call (def2)
2319                   || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def2))
2320                       == vect_induction_def
2321                   || (gimple_code (def2) == GIMPLE_PHI
2322                       && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def2))
2323                           == vect_internal_def
2324                       && !is_loop_header_bb_p (gimple_bb (def2)))))))
2325     {
2326       if (check_reduction)
2327         {
2328           /* Swap operands (just for simplicity - so that the rest of the code
2329              can assume that the reduction variable is always the last (second)
2330              argument).  */
2331           if (vect_print_dump_info (REPORT_DETAILS))
2332             report_vect_op (def_stmt,
2333                             "detected reduction: need to swap operands: ");
2334
2335           swap_tree_operands (def_stmt, gimple_assign_rhs1_ptr (def_stmt),
2336                               gimple_assign_rhs2_ptr (def_stmt));
2337         }
2338       else
2339         {
2340           if (vect_print_dump_info (REPORT_DETAILS))
2341             report_vect_op (def_stmt, "detected reduction: ");
2342         }
2343
2344       return def_stmt;
2345     }
2346
2347   /* Try to find SLP reduction chain.  */
2348   if (check_reduction && vect_is_slp_reduction (loop_info, phi, def_stmt))
2349     {
2350       if (vect_print_dump_info (REPORT_DETAILS))
2351         report_vect_op (def_stmt, "reduction: detected reduction chain: ");
2352
2353       return def_stmt;
2354     }
2355
2356   if (vect_print_dump_info (REPORT_DETAILS))
2357     report_vect_op (def_stmt, "reduction: unknown pattern: ");
2358
2359   return NULL;
2360 }
2361
2362 /* Wrapper around vect_is_simple_reduction_1, that won't modify code
2363    in-place.  Arguments as there.  */
2364
2365 static gimple
2366 vect_is_simple_reduction (loop_vec_info loop_info, gimple phi,
2367                           bool check_reduction, bool *double_reduc)
2368 {
2369   return vect_is_simple_reduction_1 (loop_info, phi, check_reduction,
2370                                      double_reduc, false);
2371 }
2372
2373 /* Wrapper around vect_is_simple_reduction_1, which will modify code
2374    in-place if it enables detection of more reductions.  Arguments
2375    as there.  */
2376
2377 gimple
2378 vect_force_simple_reduction (loop_vec_info loop_info, gimple phi,
2379                           bool check_reduction, bool *double_reduc)
2380 {
2381   return vect_is_simple_reduction_1 (loop_info, phi, check_reduction,
2382                                      double_reduc, true);
2383 }
2384
2385 /* Calculate the cost of one scalar iteration of the loop.  */
2386 int
2387 vect_get_single_scalar_iteraion_cost (loop_vec_info loop_vinfo)
2388 {
2389   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2390   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
2391   int nbbs = loop->num_nodes, factor, scalar_single_iter_cost = 0;
2392   int innerloop_iters, i, stmt_cost;
2393
2394   /* Count statements in scalar loop.  Using this as scalar cost for a single
2395      iteration for now.
2396
2397      TODO: Add outer loop support.
2398
2399      TODO: Consider assigning different costs to different scalar
2400      statements.  */
2401
2402   /* FORNOW.  */
2403   innerloop_iters = 1;
2404   if (loop->inner)
2405     innerloop_iters = 50; /* FIXME */
2406
2407   for (i = 0; i < nbbs; i++)
2408     {
2409       gimple_stmt_iterator si;
2410       basic_block bb = bbs[i];
2411
2412       if (bb->loop_father == loop->inner)
2413         factor = innerloop_iters;
2414       else
2415         factor = 1;
2416
2417       for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
2418         {
2419           gimple stmt = gsi_stmt (si);
2420           stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2421
2422           if (!is_gimple_assign (stmt) && !is_gimple_call (stmt))
2423             continue;
2424
2425           /* Skip stmts that are not vectorized inside the loop.  */
2426           if (stmt_info
2427               && !STMT_VINFO_RELEVANT_P (stmt_info)
2428               && (!STMT_VINFO_LIVE_P (stmt_info)
2429                   || !VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info)))
2430               && !STMT_VINFO_IN_PATTERN_P (stmt_info))
2431             continue;
2432
2433           if (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt)))
2434             {
2435               if (DR_IS_READ (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))))
2436                stmt_cost = vect_get_cost (scalar_load);
2437              else
2438                stmt_cost = vect_get_cost (scalar_store);
2439             }
2440           else
2441             stmt_cost = vect_get_cost (scalar_stmt);
2442
2443           scalar_single_iter_cost += stmt_cost * factor;
2444         }
2445     }
2446   return scalar_single_iter_cost;
2447 }
2448
2449 /* Calculate cost of peeling the loop PEEL_ITERS_PROLOGUE times.  */
2450 int
2451 vect_get_known_peeling_cost (loop_vec_info loop_vinfo, int peel_iters_prologue,
2452                              int *peel_iters_epilogue,
2453                              int scalar_single_iter_cost)
2454 {
2455   int peel_guard_costs = 0;
2456   int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2457
2458   if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
2459     {
2460       *peel_iters_epilogue = vf/2;
2461       if (vect_print_dump_info (REPORT_COST))
2462         fprintf (vect_dump, "cost model: "
2463                             "epilogue peel iters set to vf/2 because "
2464                             "loop iterations are unknown .");
2465
2466       /* If peeled iterations are known but number of scalar loop
2467          iterations are unknown, count a taken branch per peeled loop.  */
2468       peel_guard_costs =  2 * vect_get_cost (cond_branch_taken);
2469     }
2470   else
2471     {
2472       int niters = LOOP_VINFO_INT_NITERS (loop_vinfo);
2473       peel_iters_prologue = niters < peel_iters_prologue ?
2474                             niters : peel_iters_prologue;
2475       *peel_iters_epilogue = (niters - peel_iters_prologue) % vf;
2476       /* If we need to peel for gaps, but no peeling is required, we have to
2477          peel VF iterations.  */
2478       if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) && !*peel_iters_epilogue)
2479         *peel_iters_epilogue = vf;
2480     }
2481
2482    return (peel_iters_prologue * scalar_single_iter_cost)
2483             + (*peel_iters_epilogue * scalar_single_iter_cost)
2484            + peel_guard_costs;
2485 }
2486
2487 /* Function vect_estimate_min_profitable_iters
2488
2489    Return the number of iterations required for the vector version of the
2490    loop to be profitable relative to the cost of the scalar version of the
2491    loop.
2492
2493    TODO: Take profile info into account before making vectorization
2494    decisions, if available.  */
2495
2496 int
2497 vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo)
2498 {
2499   int i;
2500   int min_profitable_iters;
2501   int peel_iters_prologue;
2502   int peel_iters_epilogue;
2503   int vec_inside_cost = 0;
2504   int vec_outside_cost = 0;
2505   int scalar_single_iter_cost = 0;
2506   int scalar_outside_cost = 0;
2507   int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2508   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2509   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
2510   int nbbs = loop->num_nodes;
2511   int npeel = LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo);
2512   int peel_guard_costs = 0;
2513   int innerloop_iters = 0, factor;
2514   VEC (slp_instance, heap) *slp_instances;
2515   slp_instance instance;
2516
2517   /* Cost model disabled.  */
2518   if (!flag_vect_cost_model)
2519     {
2520       if (vect_print_dump_info (REPORT_COST))
2521         fprintf (vect_dump, "cost model disabled.");
2522       return 0;
2523     }
2524
2525   /* Requires loop versioning tests to handle misalignment.  */
2526   if (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo))
2527     {
2528       /*  FIXME: Make cost depend on complexity of individual check.  */
2529       vec_outside_cost +=
2530         VEC_length (gimple, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo));
2531       if (vect_print_dump_info (REPORT_COST))
2532         fprintf (vect_dump, "cost model: Adding cost of checks for loop "
2533                  "versioning to treat misalignment.\n");
2534     }
2535
2536   /* Requires loop versioning with alias checks.  */
2537   if (LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo))
2538     {
2539       /*  FIXME: Make cost depend on complexity of individual check.  */
2540       vec_outside_cost +=
2541         VEC_length (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo));
2542       if (vect_print_dump_info (REPORT_COST))
2543         fprintf (vect_dump, "cost model: Adding cost of checks for loop "
2544                  "versioning aliasing.\n");
2545     }
2546
2547   if (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo)
2548       || LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo))
2549     vec_outside_cost += vect_get_cost (cond_branch_taken);
2550
2551   /* Count statements in scalar loop.  Using this as scalar cost for a single
2552      iteration for now.
2553
2554      TODO: Add outer loop support.
2555
2556      TODO: Consider assigning different costs to different scalar
2557      statements.  */
2558
2559   /* FORNOW.  */
2560   if (loop->inner)
2561     innerloop_iters = 50; /* FIXME */
2562
2563   for (i = 0; i < nbbs; i++)
2564     {
2565       gimple_stmt_iterator si;
2566       basic_block bb = bbs[i];
2567
2568       if (bb->loop_father == loop->inner)
2569         factor = innerloop_iters;
2570       else
2571         factor = 1;
2572
2573       for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
2574         {
2575           gimple stmt = gsi_stmt (si);
2576           stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2577
2578           if (STMT_VINFO_IN_PATTERN_P (stmt_info))
2579             {
2580               stmt = STMT_VINFO_RELATED_STMT (stmt_info);
2581               stmt_info = vinfo_for_stmt (stmt);
2582             }
2583
2584           /* Skip stmts that are not vectorized inside the loop.  */
2585           if (!STMT_VINFO_RELEVANT_P (stmt_info)
2586               && (!STMT_VINFO_LIVE_P (stmt_info)
2587                  || !VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info))))
2588             continue;
2589
2590           vec_inside_cost += STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info) * factor;
2591           /* FIXME: for stmts in the inner-loop in outer-loop vectorization,
2592              some of the "outside" costs are generated inside the outer-loop.  */
2593           vec_outside_cost += STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info);
2594           if (is_pattern_stmt_p (stmt_info)
2595               && STMT_VINFO_PATTERN_DEF_SEQ (stmt_info))
2596             {
2597               gimple_stmt_iterator gsi;
2598
2599               for (gsi = gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
2600                    !gsi_end_p (gsi); gsi_next (&gsi))
2601                 {
2602                   gimple pattern_def_stmt = gsi_stmt (gsi);
2603                   stmt_vec_info pattern_def_stmt_info
2604                     = vinfo_for_stmt (pattern_def_stmt);
2605                   if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info)
2606                       || STMT_VINFO_LIVE_P (pattern_def_stmt_info))
2607                     {
2608                       vec_inside_cost
2609                         += STMT_VINFO_INSIDE_OF_LOOP_COST
2610                            (pattern_def_stmt_info) * factor;
2611                       vec_outside_cost
2612                         += STMT_VINFO_OUTSIDE_OF_LOOP_COST
2613                            (pattern_def_stmt_info);
2614                     }
2615                 }
2616             }
2617         }
2618     }
2619
2620   scalar_single_iter_cost = vect_get_single_scalar_iteraion_cost (loop_vinfo);
2621
2622   /* Add additional cost for the peeled instructions in prologue and epilogue
2623      loop.
2624
2625      FORNOW: If we don't know the value of peel_iters for prologue or epilogue
2626      at compile-time - we assume it's vf/2 (the worst would be vf-1).
2627
2628      TODO: Build an expression that represents peel_iters for prologue and
2629      epilogue to be used in a run-time test.  */
2630
2631   if (npeel  < 0)
2632     {
2633       peel_iters_prologue = vf/2;
2634       if (vect_print_dump_info (REPORT_COST))
2635         fprintf (vect_dump, "cost model: "
2636                  "prologue peel iters set to vf/2.");
2637
2638       /* If peeling for alignment is unknown, loop bound of main loop becomes
2639          unknown.  */
2640       peel_iters_epilogue = vf/2;
2641       if (vect_print_dump_info (REPORT_COST))
2642         fprintf (vect_dump, "cost model: "
2643                  "epilogue peel iters set to vf/2 because "
2644                  "peeling for alignment is unknown .");
2645
2646       /* If peeled iterations are unknown, count a taken branch and a not taken
2647          branch per peeled loop. Even if scalar loop iterations are known,
2648          vector iterations are not known since peeled prologue iterations are
2649          not known. Hence guards remain the same.  */
2650       peel_guard_costs +=  2 * (vect_get_cost (cond_branch_taken)
2651                                 + vect_get_cost (cond_branch_not_taken));
2652       vec_outside_cost += (peel_iters_prologue * scalar_single_iter_cost)
2653                            + (peel_iters_epilogue * scalar_single_iter_cost)
2654                            + peel_guard_costs;
2655     }
2656   else
2657     {
2658       peel_iters_prologue = npeel;
2659       vec_outside_cost += vect_get_known_peeling_cost (loop_vinfo,
2660                                     peel_iters_prologue, &peel_iters_epilogue,
2661                                     scalar_single_iter_cost);
2662     }
2663
2664   /* FORNOW: The scalar outside cost is incremented in one of the
2665      following ways:
2666
2667      1. The vectorizer checks for alignment and aliasing and generates
2668      a condition that allows dynamic vectorization.  A cost model
2669      check is ANDED with the versioning condition.  Hence scalar code
2670      path now has the added cost of the versioning check.
2671
2672        if (cost > th & versioning_check)
2673          jmp to vector code
2674
2675      Hence run-time scalar is incremented by not-taken branch cost.
2676
2677      2. The vectorizer then checks if a prologue is required.  If the
2678      cost model check was not done before during versioning, it has to
2679      be done before the prologue check.
2680
2681        if (cost <= th)
2682          prologue = scalar_iters
2683        if (prologue == 0)
2684          jmp to vector code
2685        else
2686          execute prologue
2687        if (prologue == num_iters)
2688          go to exit
2689
2690      Hence the run-time scalar cost is incremented by a taken branch,
2691      plus a not-taken branch, plus a taken branch cost.
2692
2693      3. The vectorizer then checks if an epilogue is required.  If the
2694      cost model check was not done before during prologue check, it
2695      has to be done with the epilogue check.
2696
2697        if (prologue == 0)
2698          jmp to vector code
2699        else
2700          execute prologue
2701        if (prologue == num_iters)
2702          go to exit
2703        vector code:
2704          if ((cost <= th) | (scalar_iters-prologue-epilogue == 0))
2705            jmp to epilogue
2706
2707      Hence the run-time scalar cost should be incremented by 2 taken
2708      branches.
2709
2710      TODO: The back end may reorder the BBS's differently and reverse
2711      conditions/branch directions.  Change the estimates below to
2712      something more reasonable.  */
2713
2714   /* If the number of iterations is known and we do not do versioning, we can
2715      decide whether to vectorize at compile time.  Hence the scalar version
2716      do not carry cost model guard costs.  */
2717   if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
2718       || LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo)
2719       || LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo))
2720     {
2721       /* Cost model check occurs at versioning.  */
2722       if (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo)
2723           || LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo))
2724         scalar_outside_cost += vect_get_cost (cond_branch_not_taken);
2725       else
2726         {
2727           /* Cost model check occurs at prologue generation.  */
2728           if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) < 0)
2729             scalar_outside_cost += 2 * vect_get_cost (cond_branch_taken)
2730                                    + vect_get_cost (cond_branch_not_taken);
2731           /* Cost model check occurs at epilogue generation.  */
2732           else
2733             scalar_outside_cost += 2 * vect_get_cost (cond_branch_taken);
2734         }
2735     }
2736
2737   /* Add SLP costs.  */
2738   slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
2739   FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
2740     {
2741       vec_outside_cost += SLP_INSTANCE_OUTSIDE_OF_LOOP_COST (instance);
2742       vec_inside_cost += SLP_INSTANCE_INSIDE_OF_LOOP_COST (instance);
2743     }
2744
2745   /* Calculate number of iterations required to make the vector version
2746      profitable, relative to the loop bodies only.  The following condition
2747      must hold true:
2748      SIC * niters + SOC > VIC * ((niters-PL_ITERS-EP_ITERS)/VF) + VOC
2749      where
2750      SIC = scalar iteration cost, VIC = vector iteration cost,
2751      VOC = vector outside cost, VF = vectorization factor,
2752      PL_ITERS = prologue iterations, EP_ITERS= epilogue iterations
2753      SOC = scalar outside cost for run time cost model check.  */
2754
2755   if ((scalar_single_iter_cost * vf) > vec_inside_cost)
2756     {
2757       if (vec_outside_cost <= 0)
2758         min_profitable_iters = 1;
2759       else
2760         {
2761           min_profitable_iters = ((vec_outside_cost - scalar_outside_cost) * vf
2762                                   - vec_inside_cost * peel_iters_prologue
2763                                   - vec_inside_cost * peel_iters_epilogue)
2764                                  / ((scalar_single_iter_cost * vf)
2765                                     - vec_inside_cost);
2766
2767           if ((scalar_single_iter_cost * vf * min_profitable_iters)
2768               <= ((vec_inside_cost * min_profitable_iters)
2769                   + ((vec_outside_cost - scalar_outside_cost) * vf)))
2770             min_profitable_iters++;
2771         }
2772     }
2773   /* vector version will never be profitable.  */
2774   else
2775     {
2776       if (vect_print_dump_info (REPORT_COST))
2777         fprintf (vect_dump, "cost model: the vector iteration cost = %d "
2778                  "divided by the scalar iteration cost = %d "
2779                  "is greater or equal to the vectorization factor = %d.",
2780                  vec_inside_cost, scalar_single_iter_cost, vf);
2781       return -1;
2782     }
2783
2784   if (vect_print_dump_info (REPORT_COST))
2785     {
2786       fprintf (vect_dump, "Cost model analysis: \n");
2787       fprintf (vect_dump, "  Vector inside of loop cost: %d\n",
2788                vec_inside_cost);
2789       fprintf (vect_dump, "  Vector outside of loop cost: %d\n",
2790                vec_outside_cost);
2791       fprintf (vect_dump, "  Scalar iteration cost: %d\n",
2792                scalar_single_iter_cost);
2793       fprintf (vect_dump, "  Scalar outside cost: %d\n", scalar_outside_cost);
2794       fprintf (vect_dump, "  prologue iterations: %d\n",
2795                peel_iters_prologue);
2796       fprintf (vect_dump, "  epilogue iterations: %d\n",
2797                peel_iters_epilogue);
2798       fprintf (vect_dump, "  Calculated minimum iters for profitability: %d\n",
2799                min_profitable_iters);
2800     }
2801
2802   min_profitable_iters =
2803         min_profitable_iters < vf ? vf : min_profitable_iters;
2804
2805   /* Because the condition we create is:
2806      if (niters <= min_profitable_iters)
2807        then skip the vectorized loop.  */
2808   min_profitable_iters--;
2809
2810   if (vect_print_dump_info (REPORT_COST))
2811     fprintf (vect_dump, "  Profitability threshold = %d\n",
2812              min_profitable_iters);
2813
2814   return min_profitable_iters;
2815 }
2816
2817
2818 /* TODO: Close dependency between vect_model_*_cost and vectorizable_*
2819    functions. Design better to avoid maintenance issues.  */
2820
2821 /* Function vect_model_reduction_cost.
2822
2823    Models cost for a reduction operation, including the vector ops
2824    generated within the strip-mine loop, the initial definition before
2825    the loop, and the epilogue code that must be generated.  */
2826
2827 static bool
2828 vect_model_reduction_cost (stmt_vec_info stmt_info, enum tree_code reduc_code,
2829                            int ncopies)
2830 {
2831   int outer_cost = 0;
2832   enum tree_code code;
2833   optab optab;
2834   tree vectype;
2835   gimple stmt, orig_stmt;
2836   tree reduction_op;
2837   enum machine_mode mode;
2838   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2839   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2840
2841
2842   /* Cost of reduction op inside loop.  */
2843   STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info)
2844     += ncopies * vect_get_cost (vector_stmt);
2845
2846   stmt = STMT_VINFO_STMT (stmt_info);
2847
2848   switch (get_gimple_rhs_class (gimple_assign_rhs_code (stmt)))
2849     {
2850     case GIMPLE_SINGLE_RHS:
2851       gcc_assert (TREE_OPERAND_LENGTH (gimple_assign_rhs1 (stmt)) == ternary_op);
2852       reduction_op = TREE_OPERAND (gimple_assign_rhs1 (stmt), 2);
2853       break;
2854     case GIMPLE_UNARY_RHS:
2855       reduction_op = gimple_assign_rhs1 (stmt);
2856       break;
2857     case GIMPLE_BINARY_RHS:
2858       reduction_op = gimple_assign_rhs2 (stmt);
2859       break;
2860     case GIMPLE_TERNARY_RHS:
2861       reduction_op = gimple_assign_rhs3 (stmt);
2862       break;
2863     default:
2864       gcc_unreachable ();
2865     }
2866
2867   vectype = get_vectype_for_scalar_type (TREE_TYPE (reduction_op));
2868   if (!vectype)
2869     {
2870       if (vect_print_dump_info (REPORT_COST))
2871         {
2872           fprintf (vect_dump, "unsupported data-type ");
2873           print_generic_expr (vect_dump, TREE_TYPE (reduction_op), TDF_SLIM);
2874         }
2875       return false;
2876    }
2877
2878   mode = TYPE_MODE (vectype);
2879   orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
2880
2881   if (!orig_stmt)
2882     orig_stmt = STMT_VINFO_STMT (stmt_info);
2883
2884   code = gimple_assign_rhs_code (orig_stmt);
2885
2886   /* Add in cost for initial definition.  */
2887   outer_cost += vect_get_cost (scalar_to_vec);
2888
2889   /* Determine cost of epilogue code.
2890
2891      We have a reduction operator that will reduce the vector in one statement.
2892      Also requires scalar extract.  */
2893
2894   if (!nested_in_vect_loop_p (loop, orig_stmt))
2895     {
2896       if (reduc_code != ERROR_MARK)
2897         outer_cost += vect_get_cost (vector_stmt)
2898                       + vect_get_cost (vec_to_scalar);
2899       else
2900         {
2901           int vec_size_in_bits = tree_low_cst (TYPE_SIZE (vectype), 1);
2902           tree bitsize =
2903             TYPE_SIZE (TREE_TYPE (gimple_assign_lhs (orig_stmt)));
2904           int element_bitsize = tree_low_cst (bitsize, 1);
2905           int nelements = vec_size_in_bits / element_bitsize;
2906
2907           optab = optab_for_tree_code (code, vectype, optab_default);
2908
2909           /* We have a whole vector shift available.  */
2910           if (VECTOR_MODE_P (mode)
2911               && optab_handler (optab, mode) != CODE_FOR_nothing
2912               && optab_handler (vec_shr_optab, mode) != CODE_FOR_nothing)
2913             /* Final reduction via vector shifts and the reduction operator. Also
2914                requires scalar extract.  */
2915             outer_cost += ((exact_log2(nelements) * 2)
2916               * vect_get_cost (vector_stmt)
2917               + vect_get_cost (vec_to_scalar));
2918           else
2919             /* Use extracts and reduction op for final reduction.  For N elements,
2920                we have N extracts and N-1 reduction ops.  */
2921             outer_cost += ((nelements + nelements - 1)
2922               * vect_get_cost (vector_stmt));
2923         }
2924     }
2925
2926   STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info) = outer_cost;
2927
2928   if (vect_print_dump_info (REPORT_COST))
2929     fprintf (vect_dump, "vect_model_reduction_cost: inside_cost = %d, "
2930              "outside_cost = %d .", STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info),
2931              STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info));
2932
2933   return true;
2934 }
2935
2936
2937 /* Function vect_model_induction_cost.
2938
2939    Models cost for induction operations.  */
2940
2941 static void
2942 vect_model_induction_cost (stmt_vec_info stmt_info, int ncopies)
2943 {
2944   /* loop cost for vec_loop.  */
2945   STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info)
2946     = ncopies * vect_get_cost (vector_stmt);
2947   /* prologue cost for vec_init and vec_step.  */
2948   STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info)
2949     = 2 * vect_get_cost (scalar_to_vec);
2950
2951   if (vect_print_dump_info (REPORT_COST))
2952     fprintf (vect_dump, "vect_model_induction_cost: inside_cost = %d, "
2953              "outside_cost = %d .", STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info),
2954              STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info));
2955 }
2956
2957
2958 /* Function get_initial_def_for_induction
2959
2960    Input:
2961    STMT - a stmt that performs an induction operation in the loop.
2962    IV_PHI - the initial value of the induction variable
2963
2964    Output:
2965    Return a vector variable, initialized with the first VF values of
2966    the induction variable.  E.g., for an iv with IV_PHI='X' and
2967    evolution S, for a vector of 4 units, we want to return:
2968    [X, X + S, X + 2*S, X + 3*S].  */
2969
2970 static tree
2971 get_initial_def_for_induction (gimple iv_phi)
2972 {
2973   stmt_vec_info stmt_vinfo = vinfo_for_stmt (iv_phi);
2974   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
2975   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2976   tree scalar_type;
2977   tree vectype;
2978   int nunits;
2979   edge pe = loop_preheader_edge (loop);
2980   struct loop *iv_loop;
2981   basic_block new_bb;
2982   tree vec, vec_init, vec_step, t;
2983   tree access_fn;
2984   tree new_var;
2985   tree new_name;
2986   gimple init_stmt, induction_phi, new_stmt;
2987   tree induc_def, vec_def, vec_dest;
2988   tree init_expr, step_expr;
2989   int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2990   int i;
2991   bool ok;
2992   int ncopies;
2993   tree expr;
2994   stmt_vec_info phi_info = vinfo_for_stmt (iv_phi);
2995   bool nested_in_vect_loop = false;
2996   gimple_seq stmts = NULL;
2997   imm_use_iterator imm_iter;
2998   use_operand_p use_p;
2999   gimple exit_phi;
3000   edge latch_e;
3001   tree loop_arg;
3002   gimple_stmt_iterator si;
3003   basic_block bb = gimple_bb (iv_phi);
3004   tree stepvectype;
3005   tree resvectype;
3006
3007   /* Is phi in an inner-loop, while vectorizing an enclosing outer-loop?  */
3008   if (nested_in_vect_loop_p (loop, iv_phi))
3009     {
3010       nested_in_vect_loop = true;
3011       iv_loop = loop->inner;
3012     }
3013   else
3014     iv_loop = loop;
3015   gcc_assert (iv_loop == (gimple_bb (iv_phi))->loop_father);
3016
3017   latch_e = loop_latch_edge (iv_loop);
3018   loop_arg = PHI_ARG_DEF_FROM_EDGE (iv_phi, latch_e);
3019
3020   access_fn = analyze_scalar_evolution (iv_loop, PHI_RESULT (iv_phi));
3021   gcc_assert (access_fn);
3022   STRIP_NOPS (access_fn);
3023   ok = vect_is_simple_iv_evolution (iv_loop->num, access_fn,
3024                                     &init_expr, &step_expr);
3025   gcc_assert (ok);
3026   pe = loop_preheader_edge (iv_loop);
3027
3028   scalar_type = TREE_TYPE (init_expr);
3029   vectype = get_vectype_for_scalar_type (scalar_type);
3030   resvectype = get_vectype_for_scalar_type (TREE_TYPE (PHI_RESULT (iv_phi)));
3031   gcc_assert (vectype);
3032   nunits = TYPE_VECTOR_SUBPARTS (vectype);
3033   ncopies = vf / nunits;
3034
3035   gcc_assert (phi_info);
3036   gcc_assert (ncopies >= 1);
3037
3038   /* Find the first insertion point in the BB.  */
3039   si = gsi_after_labels (bb);
3040
3041   /* Create the vector that holds the initial_value of the induction.  */
3042   if (nested_in_vect_loop)
3043     {
3044       /* iv_loop is nested in the loop to be vectorized.  init_expr had already
3045          been created during vectorization of previous stmts.  We obtain it
3046          from the STMT_VINFO_VEC_STMT of the defining stmt.  */
3047       tree iv_def = PHI_ARG_DEF_FROM_EDGE (iv_phi,
3048                                            loop_preheader_edge (iv_loop));
3049       vec_init = vect_get_vec_def_for_operand (iv_def, iv_phi, NULL);
3050     }
3051   else
3052     {
3053       VEC(constructor_elt,gc) *v;
3054
3055       /* iv_loop is the loop to be vectorized. Create:
3056          vec_init = [X, X+S, X+2*S, X+3*S] (S = step_expr, X = init_expr)  */
3057       new_var = vect_get_new_vect_var (scalar_type, vect_scalar_var, "var_");
3058       add_referenced_var (new_var);
3059
3060       new_name = force_gimple_operand (init_expr, &stmts, false, new_var);
3061       if (stmts)
3062         {
3063           new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
3064           gcc_assert (!new_bb);
3065         }
3066
3067       v = VEC_alloc (constructor_elt, gc, nunits);
3068       CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, new_name);
3069       for (i = 1; i < nunits; i++)
3070         {
3071           /* Create: new_name_i = new_name + step_expr  */
3072           enum tree_code code = POINTER_TYPE_P (scalar_type)
3073                                 ? POINTER_PLUS_EXPR : PLUS_EXPR;
3074           init_stmt = gimple_build_assign_with_ops (code, new_var,
3075                                                     new_name, step_expr);
3076           new_name = make_ssa_name (new_var, init_stmt);
3077           gimple_assign_set_lhs (init_stmt, new_name);
3078
3079           new_bb = gsi_insert_on_edge_immediate (pe, init_stmt);
3080           gcc_assert (!new_bb);
3081
3082           if (vect_print_dump_info (REPORT_DETAILS))
3083             {
3084               fprintf (vect_dump, "created new init_stmt: ");
3085               print_gimple_stmt (vect_dump, init_stmt, 0, TDF_SLIM);
3086             }
3087           CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, new_name);
3088         }
3089       /* Create a vector from [new_name_0, new_name_1, ..., new_name_nunits-1]  */
3090       vec = build_constructor (vectype, v);
3091       vec_init = vect_init_vector (iv_phi, vec, vectype, NULL);
3092     }
3093
3094
3095   /* Create the vector that holds the step of the induction.  */
3096   if (nested_in_vect_loop)
3097     /* iv_loop is nested in the loop to be vectorized. Generate:
3098        vec_step = [S, S, S, S]  */
3099     new_name = step_expr;
3100   else
3101     {
3102       /* iv_loop is the loop to be vectorized. Generate:
3103           vec_step = [VF*S, VF*S, VF*S, VF*S]  */
3104       expr = build_int_cst (TREE_TYPE (step_expr), vf);
3105       new_name = fold_build2 (MULT_EXPR, TREE_TYPE (step_expr),
3106                               expr, step_expr);
3107     }
3108
3109   t = unshare_expr (new_name);
3110   gcc_assert (CONSTANT_CLASS_P (new_name));
3111   stepvectype = get_vectype_for_scalar_type (TREE_TYPE (new_name));
3112   gcc_assert (stepvectype);
3113   vec = build_vector_from_val (stepvectype, t);
3114   vec_step = vect_init_vector (iv_phi, vec, stepvectype, NULL);
3115
3116
3117   /* Create the following def-use cycle:
3118      loop prolog:
3119          vec_init = ...
3120          vec_step = ...
3121      loop:
3122          vec_iv = PHI <vec_init, vec_loop>
3123          ...
3124          STMT
3125          ...
3126          vec_loop = vec_iv + vec_step;  */
3127
3128   /* Create the induction-phi that defines the induction-operand.  */
3129   vec_dest = vect_get_new_vect_var (vectype, vect_simple_var, "vec_iv_");
3130   add_referenced_var (vec_dest);
3131   induction_phi = create_phi_node (vec_dest, iv_loop->header);
3132   set_vinfo_for_stmt (induction_phi,
3133                       new_stmt_vec_info (induction_phi, loop_vinfo, NULL));
3134   induc_def = PHI_RESULT (induction_phi);
3135
3136   /* Create the iv update inside the loop  */
3137   new_stmt = gimple_build_assign_with_ops (PLUS_EXPR, vec_dest,
3138                                            induc_def, vec_step);
3139   vec_def = make_ssa_name (vec_dest, new_stmt);
3140   gimple_assign_set_lhs (new_stmt, vec_def);
3141   gsi_insert_before (&si, new_stmt, GSI_SAME_STMT);
3142   set_vinfo_for_stmt (new_stmt, new_stmt_vec_info (new_stmt, loop_vinfo,
3143                                                    NULL));
3144
3145   /* Set the arguments of the phi node:  */
3146   add_phi_arg (induction_phi, vec_init, pe, UNKNOWN_LOCATION);
3147   add_phi_arg (induction_phi, vec_def, loop_latch_edge (iv_loop),
3148                UNKNOWN_LOCATION);
3149
3150
3151   /* In case that vectorization factor (VF) is bigger than the number
3152      of elements that we can fit in a vectype (nunits), we have to generate
3153      more than one vector stmt - i.e - we need to "unroll" the
3154      vector stmt by a factor VF/nunits.  For more details see documentation
3155      in vectorizable_operation.  */
3156
3157   if (ncopies > 1)
3158     {
3159       stmt_vec_info prev_stmt_vinfo;
3160       /* FORNOW. This restriction should be relaxed.  */
3161       gcc_assert (!nested_in_vect_loop);
3162
3163       /* Create the vector that holds the step of the induction.  */
3164       expr = build_int_cst (TREE_TYPE (step_expr), nunits);
3165       new_name = fold_build2 (MULT_EXPR, TREE_TYPE (step_expr),
3166                               expr, step_expr);
3167       t = unshare_expr (new_name);
3168       gcc_assert (CONSTANT_CLASS_P (new_name));
3169       vec = build_vector_from_val (stepvectype, t);
3170       vec_step = vect_init_vector (iv_phi, vec, stepvectype, NULL);
3171
3172       vec_def = induc_def;
3173       prev_stmt_vinfo = vinfo_for_stmt (induction_phi);
3174       for (i = 1; i < ncopies; i++)
3175         {
3176           /* vec_i = vec_prev + vec_step  */
3177           new_stmt = gimple_build_assign_with_ops (PLUS_EXPR, vec_dest,
3178                                                    vec_def, vec_step);
3179           vec_def = make_ssa_name (vec_dest, new_stmt);
3180           gimple_assign_set_lhs (new_stmt, vec_def);
3181
3182           gsi_insert_before (&si, new_stmt, GSI_SAME_STMT);
3183           if (!useless_type_conversion_p (resvectype, vectype))
3184             {
3185               new_stmt = gimple_build_assign_with_ops
3186                   (VIEW_CONVERT_EXPR,
3187                    vect_get_new_vect_var (resvectype, vect_simple_var,
3188                                           "vec_iv_"),
3189                    build1 (VIEW_CONVERT_EXPR, resvectype,
3190                            gimple_assign_lhs (new_stmt)), NULL_TREE);
3191               gimple_assign_set_lhs (new_stmt,
3192                                      make_ssa_name
3193                                        (gimple_assign_lhs (new_stmt), new_stmt));
3194               gsi_insert_before (&si, new_stmt, GSI_SAME_STMT);
3195             }
3196           set_vinfo_for_stmt (new_stmt,
3197                               new_stmt_vec_info (new_stmt, loop_vinfo, NULL));
3198           STMT_VINFO_RELATED_STMT (prev_stmt_vinfo) = new_stmt;
3199           prev_stmt_vinfo = vinfo_for_stmt (new_stmt);
3200         }
3201     }
3202
3203   if (nested_in_vect_loop)
3204     {
3205       /* Find the loop-closed exit-phi of the induction, and record
3206          the final vector of induction results:  */
3207       exit_phi = NULL;
3208       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, loop_arg)
3209         {
3210           if (!flow_bb_inside_loop_p (iv_loop, gimple_bb (USE_STMT (use_p))))
3211             {
3212               exit_phi = USE_STMT (use_p);
3213               break;
3214             }
3215         }
3216       if (exit_phi)
3217         {
3218           stmt_vec_info stmt_vinfo = vinfo_for_stmt (exit_phi);
3219           /* FORNOW. Currently not supporting the case that an inner-loop induction
3220              is not used in the outer-loop (i.e. only outside the outer-loop).  */
3221           gcc_assert (STMT_VINFO_RELEVANT_P (stmt_vinfo)
3222                       && !STMT_VINFO_LIVE_P (stmt_vinfo));
3223
3224           STMT_VINFO_VEC_STMT (stmt_vinfo) = new_stmt;
3225           if (vect_print_dump_info (REPORT_DETAILS))
3226             {
3227               fprintf (vect_dump, "vector of inductions after inner-loop:");
3228               print_gimple_stmt (vect_dump, new_stmt, 0, TDF_SLIM);
3229             }
3230         }
3231     }
3232
3233
3234   if (vect_print_dump_info (REPORT_DETAILS))
3235     {
3236       fprintf (vect_dump, "transform induction: created def-use cycle: ");
3237       print_gimple_stmt (vect_dump, induction_phi, 0, TDF_SLIM);
3238       fprintf (vect_dump, "\n");
3239       print_gimple_stmt (vect_dump, SSA_NAME_DEF_STMT (vec_def), 0, TDF_SLIM);
3240     }
3241
3242   STMT_VINFO_VEC_STMT (phi_info) = induction_phi;
3243   if (!useless_type_conversion_p (resvectype, vectype))
3244     {
3245       new_stmt = gimple_build_assign_with_ops
3246          (VIEW_CONVERT_EXPR,
3247           vect_get_new_vect_var (resvectype, vect_simple_var, "vec_iv_"),
3248           build1 (VIEW_CONVERT_EXPR, resvectype, induc_def), NULL_TREE);
3249       induc_def = make_ssa_name (gimple_assign_lhs (new_stmt), new_stmt);
3250       gimple_assign_set_lhs (new_stmt, induc_def);
3251       si = gsi_start_bb (bb);
3252       gsi_insert_before (&si, new_stmt, GSI_SAME_STMT);
3253       set_vinfo_for_stmt (new_stmt,
3254                           new_stmt_vec_info (new_stmt, loop_vinfo, NULL));
3255       STMT_VINFO_RELATED_STMT (vinfo_for_stmt (new_stmt))
3256         = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (induction_phi));
3257     }
3258
3259   return induc_def;
3260 }
3261
3262
3263 /* Function get_initial_def_for_reduction
3264
3265    Input:
3266    STMT - a stmt that performs a reduction operation in the loop.
3267    INIT_VAL - the initial value of the reduction variable
3268
3269    Output:
3270    ADJUSTMENT_DEF - a tree that holds a value to be added to the final result
3271         of the reduction (used for adjusting the epilog - see below).
3272    Return a vector variable, initialized according to the operation that STMT
3273         performs. This vector will be used as the initial value of the
3274         vector of partial results.
3275
3276    Option1 (adjust in epilog): Initialize the vector as follows:
3277      add/bit or/xor:    [0,0,...,0,0]
3278      mult/bit and:      [1,1,...,1,1]
3279      min/max/cond_expr: [init_val,init_val,..,init_val,init_val]
3280    and when necessary (e.g. add/mult case) let the caller know
3281    that it needs to adjust the result by init_val.
3282
3283    Option2: Initialize the vector as follows:
3284      add/bit or/xor:    [init_val,0,0,...,0]
3285      mult/bit and:      [init_val,1,1,...,1]
3286      min/max/cond_expr: [init_val,init_val,...,init_val]
3287    and no adjustments are needed.
3288
3289    For example, for the following code:
3290
3291    s = init_val;
3292    for (i=0;i<n;i++)
3293      s = s + a[i];
3294
3295    STMT is 's = s + a[i]', and the reduction variable is 's'.
3296    For a vector of 4 units, we want to return either [0,0,0,init_val],
3297    or [0,0,0,0] and let the caller know that it needs to adjust
3298    the result at the end by 'init_val'.
3299
3300    FORNOW, we are using the 'adjust in epilog' scheme, because this way the
3301    initialization vector is simpler (same element in all entries), if
3302    ADJUSTMENT_DEF is not NULL, and Option2 otherwise.
3303
3304    A cost model should help decide between these two schemes.  */
3305
3306 tree
3307 get_initial_def_for_reduction (gimple stmt, tree init_val,
3308                                tree *adjustment_def)
3309 {
3310   stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
3311   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
3312   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3313   tree scalar_type = TREE_TYPE (init_val);
3314   tree vectype = get_vectype_for_scalar_type (scalar_type);
3315   int nunits;
3316   enum tree_code code = gimple_assign_rhs_code (stmt);
3317   tree def_for_init;
3318   tree init_def;
3319   tree *elts;
3320   int i;
3321   bool nested_in_vect_loop = false;
3322   tree init_value;
3323   REAL_VALUE_TYPE real_init_val = dconst0;
3324   int int_init_val = 0;
3325   gimple def_stmt = NULL;
3326
3327   gcc_assert (vectype);
3328   nunits = TYPE_VECTOR_SUBPARTS (vectype);
3329
3330   gcc_assert (POINTER_TYPE_P (scalar_type) || INTEGRAL_TYPE_P (scalar_type)
3331               || SCALAR_FLOAT_TYPE_P (scalar_type));
3332
3333   if (nested_in_vect_loop_p (loop, stmt))
3334     nested_in_vect_loop = true;
3335   else
3336     gcc_assert (loop == (gimple_bb (stmt))->loop_father);
3337
3338   /* In case of double reduction we only create a vector variable to be put
3339      in the reduction phi node.  The actual statement creation is done in
3340      vect_create_epilog_for_reduction.  */
3341   if (adjustment_def && nested_in_vect_loop
3342       && TREE_CODE (init_val) == SSA_NAME
3343       && (def_stmt = SSA_NAME_DEF_STMT (init_val))
3344       && gimple_code (def_stmt) == GIMPLE_PHI
3345       && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
3346       && vinfo_for_stmt (def_stmt)
3347       && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt))
3348           == vect_double_reduction_def)
3349     {
3350       *adjustment_def = NULL;
3351       return vect_create_destination_var (init_val, vectype);
3352     }
3353
3354   if (TREE_CONSTANT (init_val))
3355     {
3356       if (SCALAR_FLOAT_TYPE_P (scalar_type))
3357         init_value = build_real (scalar_type, TREE_REAL_CST (init_val));
3358       else
3359         init_value = build_int_cst (scalar_type, TREE_INT_CST_LOW (init_val));
3360     }
3361   else
3362     init_value = init_val;
3363
3364   switch (code)
3365     {
3366       case WIDEN_SUM_EXPR:
3367       case DOT_PROD_EXPR:
3368       case PLUS_EXPR:
3369       case MINUS_EXPR:
3370       case BIT_IOR_EXPR:
3371       case BIT_XOR_EXPR:
3372       case MULT_EXPR:
3373       case BIT_AND_EXPR:
3374         /* ADJUSMENT_DEF is NULL when called from
3375            vect_create_epilog_for_reduction to vectorize double reduction.  */
3376         if (adjustment_def)
3377           {
3378             if (nested_in_vect_loop)
3379               *adjustment_def = vect_get_vec_def_for_operand (init_val, stmt,
3380                                                               NULL);
3381             else
3382               *adjustment_def = init_val;
3383           }
3384
3385         if (code == MULT_EXPR)
3386           {
3387             real_init_val = dconst1;
3388             int_init_val = 1;
3389           }
3390
3391         if (code == BIT_AND_EXPR)
3392           int_init_val = -1;
3393
3394         if (SCALAR_FLOAT_TYPE_P (scalar_type))
3395           def_for_init = build_real (scalar_type, real_init_val);
3396         else
3397           def_for_init = build_int_cst (scalar_type, int_init_val);
3398
3399         /* Create a vector of '0' or '1' except the first element.  */
3400         elts = XALLOCAVEC (tree, nunits);
3401         for (i = nunits - 2; i >= 0; --i)
3402           elts[i + 1] = def_for_init;
3403
3404         /* Option1: the first element is '0' or '1' as well.  */
3405         if (adjustment_def)
3406           {
3407             elts[0] = def_for_init;
3408             init_def = build_vector (vectype, elts);
3409             break;
3410           }
3411
3412         /* Option2: the first element is INIT_VAL.  */
3413         elts[0] = init_val;
3414         if (TREE_CONSTANT (init_val))
3415           init_def = build_vector (vectype, elts);
3416         else
3417           {
3418             VEC(constructor_elt,gc) *v;
3419             v = VEC_alloc (constructor_elt, gc, nunits);
3420             CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, init_val);
3421             for (i = 1; i < nunits; ++i)
3422               CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, elts[i]);
3423             init_def = build_constructor (vectype, v);
3424           }
3425
3426         break;
3427
3428       case MIN_EXPR:
3429       case MAX_EXPR:
3430       case COND_EXPR:
3431         if (adjustment_def)
3432           {
3433             *adjustment_def = NULL_TREE;
3434             init_def = vect_get_vec_def_for_operand (init_val, stmt, NULL);
3435             break;
3436           }
3437
3438         init_def = build_vector_from_val (vectype, init_value);
3439         break;
3440
3441       default:
3442         gcc_unreachable ();
3443     }
3444
3445   return init_def;
3446 }
3447
3448
3449 /* Function vect_create_epilog_for_reduction
3450
3451    Create code at the loop-epilog to finalize the result of a reduction
3452    computation.
3453
3454    VECT_DEFS is list of vector of partial results, i.e., the lhs's of vector
3455      reduction statements.
3456    STMT is the scalar reduction stmt that is being vectorized.
3457    NCOPIES is > 1 in case the vectorization factor (VF) is bigger than the
3458      number of elements that we can fit in a vectype (nunits).  In this case
3459      we have to generate more than one vector stmt - i.e - we need to "unroll"
3460      the vector stmt by a factor VF/nunits.  For more details see documentation
3461      in vectorizable_operation.
3462    REDUC_CODE is the tree-code for the epilog reduction.
3463    REDUCTION_PHIS is a list of the phi-nodes that carry the reduction
3464      computation.
3465    REDUC_INDEX is the index of the operand in the right hand side of the
3466      statement that is defined by REDUCTION_PHI.
3467    DOUBLE_REDUC is TRUE if double reduction phi nodes should be handled.
3468    SLP_NODE is an SLP node containing a group of reduction statements. The
3469      first one in this group is STMT.
3470
3471    This function:
3472    1. Creates the reduction def-use cycles: sets the arguments for
3473       REDUCTION_PHIS:
3474       The loop-entry argument is the vectorized initial-value of the reduction.
3475       The loop-latch argument is taken from VECT_DEFS - the vector of partial
3476       sums.
3477    2. "Reduces" each vector of partial results VECT_DEFS into a single result,
3478       by applying the operation specified by REDUC_CODE if available, or by
3479       other means (whole-vector shifts or a scalar loop).
3480       The function also creates a new phi node at the loop exit to preserve
3481       loop-closed form, as illustrated below.
3482
3483      The flow at the entry to this function:
3484
3485         loop:
3486           vec_def = phi <null, null>            # REDUCTION_PHI
3487           VECT_DEF = vector_stmt                # vectorized form of STMT
3488           s_loop = scalar_stmt                  # (scalar) STMT
3489         loop_exit:
3490           s_out0 = phi <s_loop>                 # (scalar) EXIT_PHI
3491           use <s_out0>
3492           use <s_out0>
3493
3494      The above is transformed by this function into:
3495
3496         loop:
3497           vec_def = phi <vec_init, VECT_DEF>    # REDUCTION_PHI
3498           VECT_DEF = vector_stmt                # vectorized form of STMT
3499           s_loop = scalar_stmt                  # (scalar) STMT
3500         loop_exit:
3501           s_out0 = phi <s_loop>                 # (scalar) EXIT_PHI
3502           v_out1 = phi <VECT_DEF>               # NEW_EXIT_PHI
3503           v_out2 = reduce <v_out1>
3504           s_out3 = extract_field <v_out2, 0>
3505           s_out4 = adjust_result <s_out3>
3506           use <s_out4>
3507           use <s_out4>
3508 */
3509
3510 static void
3511 vect_create_epilog_for_reduction (VEC (tree, heap) *vect_defs, gimple stmt,
3512                                   int ncopies, enum tree_code reduc_code,
3513                                   VEC (gimple, heap) *reduction_phis,
3514                                   int reduc_index, bool double_reduc,
3515                                   slp_tree slp_node)
3516 {
3517   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3518   stmt_vec_info prev_phi_info;
3519   tree vectype;
3520   enum machine_mode mode;
3521   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3522   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo), *outer_loop = NULL;
3523   basic_block exit_bb;
3524   tree scalar_dest;
3525   tree scalar_type;
3526   gimple new_phi = NULL, phi;
3527   gimple_stmt_iterator exit_gsi;
3528   tree vec_dest;
3529   tree new_temp = NULL_TREE, new_dest, new_name, new_scalar_dest;
3530   gimple epilog_stmt = NULL;
3531   enum tree_code code = gimple_assign_rhs_code (stmt);
3532   gimple exit_phi;
3533   tree bitsize, bitpos;
3534   tree adjustment_def = NULL;
3535   tree vec_initial_def = NULL;
3536   tree reduction_op, expr, def;
3537   tree orig_name, scalar_result;
3538   imm_use_iterator imm_iter, phi_imm_iter;
3539   use_operand_p use_p, phi_use_p;
3540   bool extract_scalar_result = false;
3541   gimple use_stmt, orig_stmt, reduction_phi = NULL;
3542   bool nested_in_vect_loop = false;
3543   VEC (gimple, heap) *new_phis = NULL;
3544   VEC (gimple, heap) *inner_phis = NULL;
3545   enum vect_def_type dt = vect_unknown_def_type;
3546   int j, i;
3547   VEC (tree, heap) *scalar_results = NULL;
3548   unsigned int group_size = 1, k, ratio;
3549   VEC (tree, heap) *vec_initial_defs = NULL;
3550   VEC (gimple, heap) *phis;
3551   bool slp_reduc = false;
3552   tree new_phi_result;
3553   gimple inner_phi = NULL;
3554
3555   if (slp_node)
3556     group_size = VEC_length (gimple, SLP_TREE_SCALAR_STMTS (slp_node));
3557
3558   if (nested_in_vect_loop_p (loop, stmt))
3559     {
3560       outer_loop = loop;
3561       loop = loop->inner;
3562       nested_in_vect_loop = true;
3563       gcc_assert (!slp_node);
3564     }
3565
3566   switch (get_gimple_rhs_class (gimple_assign_rhs_code (stmt)))
3567     {
3568     case GIMPLE_SINGLE_RHS:
3569       gcc_assert (TREE_OPERAND_LENGTH (gimple_assign_rhs1 (stmt))
3570                   == ternary_op);
3571       reduction_op = TREE_OPERAND (gimple_assign_rhs1 (stmt), reduc_index);
3572       break;
3573     case GIMPLE_UNARY_RHS:
3574       reduction_op = gimple_assign_rhs1 (stmt);
3575       break;
3576     case GIMPLE_BINARY_RHS:
3577       reduction_op = reduc_index ?
3578                      gimple_assign_rhs2 (stmt) : gimple_assign_rhs1 (stmt);
3579       break;
3580     case GIMPLE_TERNARY_RHS:
3581       reduction_op = gimple_op (stmt, reduc_index + 1);
3582       break;
3583     default:
3584       gcc_unreachable ();
3585     }
3586
3587   vectype = get_vectype_for_scalar_type (TREE_TYPE (reduction_op));
3588   gcc_assert (vectype);
3589   mode = TYPE_MODE (vectype);
3590
3591   /* 1. Create the reduction def-use cycle:
3592      Set the arguments of REDUCTION_PHIS, i.e., transform
3593
3594         loop:
3595           vec_def = phi <null, null>            # REDUCTION_PHI
3596           VECT_DEF = vector_stmt                # vectorized form of STMT
3597           ...
3598
3599      into:
3600
3601         loop:
3602           vec_def = phi <vec_init, VECT_DEF>    # REDUCTION_PHI
3603           VECT_DEF = vector_stmt                # vectorized form of STMT
3604           ...
3605
3606      (in case of SLP, do it for all the phis). */
3607
3608   /* Get the loop-entry arguments.  */
3609   if (slp_node)
3610     vect_get_vec_defs (reduction_op, NULL_TREE, stmt, &vec_initial_defs,
3611                        NULL, slp_node, reduc_index);
3612   else
3613     {
3614       vec_initial_defs = VEC_alloc (tree, heap, 1);
3615      /* For the case of reduction, vect_get_vec_def_for_operand returns
3616         the scalar def before the loop, that defines the initial value
3617         of the reduction variable.  */
3618       vec_initial_def = vect_get_vec_def_for_operand (reduction_op, stmt,
3619                                                       &adjustment_def);
3620       VEC_quick_push (tree, vec_initial_defs, vec_initial_def);
3621     }
3622
3623   /* Set phi nodes arguments.  */
3624   FOR_EACH_VEC_ELT (gimple, reduction_phis, i, phi)
3625     {
3626       tree vec_init_def = VEC_index (tree, vec_initial_defs, i);
3627       tree def = VEC_index (tree, vect_defs, i);
3628       for (j = 0; j < ncopies; j++)
3629         {
3630           /* Set the loop-entry arg of the reduction-phi.  */
3631           add_phi_arg (phi, vec_init_def, loop_preheader_edge (loop),
3632                        UNKNOWN_LOCATION);
3633
3634           /* Set the loop-latch arg for the reduction-phi.  */
3635           if (j > 0)
3636             def = vect_get_vec_def_for_stmt_copy (vect_unknown_def_type, def);
3637
3638           add_phi_arg (phi, def, loop_latch_edge (loop), UNKNOWN_LOCATION);
3639
3640           if (vect_print_dump_info (REPORT_DETAILS))
3641             {
3642               fprintf (vect_dump, "transform reduction: created def-use"
3643                                   " cycle: ");
3644               print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM);
3645               fprintf (vect_dump, "\n");
3646               print_gimple_stmt (vect_dump, SSA_NAME_DEF_STMT (def), 0,
3647                                  TDF_SLIM);
3648             }
3649
3650           phi = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (phi));
3651         }
3652     }
3653
3654   VEC_free (tree, heap, vec_initial_defs);
3655
3656   /* 2. Create epilog code.
3657         The reduction epilog code operates across the elements of the vector
3658         of partial results computed by the vectorized loop.
3659         The reduction epilog code consists of:
3660
3661         step 1: compute the scalar result in a vector (v_out2)
3662         step 2: extract the scalar result (s_out3) from the vector (v_out2)
3663         step 3: adjust the scalar result (s_out3) if needed.
3664
3665         Step 1 can be accomplished using one the following three schemes:
3666           (scheme 1) using reduc_code, if available.
3667           (scheme 2) using whole-vector shifts, if available.
3668           (scheme 3) using a scalar loop. In this case steps 1+2 above are
3669                      combined.
3670
3671           The overall epilog code looks like this:
3672
3673           s_out0 = phi <s_loop>         # original EXIT_PHI
3674           v_out1 = phi <VECT_DEF>       # NEW_EXIT_PHI
3675           v_out2 = reduce <v_out1>              # step 1
3676           s_out3 = extract_field <v_out2, 0>    # step 2
3677           s_out4 = adjust_result <s_out3>       # step 3
3678
3679           (step 3 is optional, and steps 1 and 2 may be combined).
3680           Lastly, the uses of s_out0 are replaced by s_out4.  */
3681
3682
3683   /* 2.1 Create new loop-exit-phis to preserve loop-closed form:
3684          v_out1 = phi <VECT_DEF>
3685          Store them in NEW_PHIS.  */
3686
3687   exit_bb = single_exit (loop)->dest;
3688   prev_phi_info = NULL;
3689   new_phis = VEC_alloc (gimple, heap, VEC_length (tree, vect_defs));
3690   FOR_EACH_VEC_ELT (tree, vect_defs, i, def)
3691     {
3692       for (j = 0; j < ncopies; j++)
3693         {
3694           phi = create_phi_node (SSA_NAME_VAR (def), exit_bb);
3695           set_vinfo_for_stmt (phi, new_stmt_vec_info (phi, loop_vinfo, NULL));
3696           if (j == 0)
3697             VEC_quick_push (gimple, new_phis, phi);
3698           else
3699             {
3700               def = vect_get_vec_def_for_stmt_copy (dt, def);
3701               STMT_VINFO_RELATED_STMT (prev_phi_info) = phi;
3702             }
3703
3704           SET_PHI_ARG_DEF (phi, single_exit (loop)->dest_idx, def);
3705           prev_phi_info = vinfo_for_stmt (phi);
3706         }
3707     }
3708
3709   /* The epilogue is created for the outer-loop, i.e., for the loop being
3710      vectorized.  Create exit phis for the outer loop.  */
3711   if (double_reduc)
3712     {
3713       loop = outer_loop;
3714       exit_bb = single_exit (loop)->dest;
3715       inner_phis = VEC_alloc (gimple, heap, VEC_length (tree, vect_defs));
3716       FOR_EACH_VEC_ELT (gimple, new_phis, i, phi)
3717         {
3718           gimple outer_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (phi)),
3719                                               exit_bb);
3720           SET_PHI_ARG_DEF (outer_phi, single_exit (loop)->dest_idx,
3721                            PHI_RESULT (phi));
3722           set_vinfo_for_stmt (outer_phi, new_stmt_vec_info (outer_phi,
3723                                                             loop_vinfo, NULL));
3724           VEC_quick_push (gimple, inner_phis, phi);
3725           VEC_replace (gimple, new_phis, i, outer_phi);
3726           prev_phi_info = vinfo_for_stmt (outer_phi);
3727           while (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (phi)))
3728             {
3729               phi = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (phi));
3730               outer_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (phi)),
3731                                            exit_bb);
3732               SET_PHI_ARG_DEF (outer_phi, single_exit (loop)->dest_idx,
3733                                PHI_RESULT (phi));
3734               set_vinfo_for_stmt (outer_phi, new_stmt_vec_info (outer_phi,
3735                                                         loop_vinfo, NULL));
3736               STMT_VINFO_RELATED_STMT (prev_phi_info) = outer_phi;
3737               prev_phi_info = vinfo_for_stmt (outer_phi);
3738             }
3739         }
3740     }
3741
3742   exit_gsi = gsi_after_labels (exit_bb);
3743
3744   /* 2.2 Get the relevant tree-code to use in the epilog for schemes 2,3
3745          (i.e. when reduc_code is not available) and in the final adjustment
3746          code (if needed).  Also get the original scalar reduction variable as
3747          defined in the loop.  In case STMT is a "pattern-stmt" (i.e. - it
3748          represents a reduction pattern), the tree-code and scalar-def are
3749          taken from the original stmt that the pattern-stmt (STMT) replaces.
3750          Otherwise (it is a regular reduction) - the tree-code and scalar-def
3751          are taken from STMT.  */
3752
3753   orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
3754   if (!orig_stmt)
3755     {
3756       /* Regular reduction  */
3757       orig_stmt = stmt;
3758     }
3759   else
3760     {
3761       /* Reduction pattern  */
3762       stmt_vec_info stmt_vinfo = vinfo_for_stmt (orig_stmt);
3763       gcc_assert (STMT_VINFO_IN_PATTERN_P (stmt_vinfo));
3764       gcc_assert (STMT_VINFO_RELATED_STMT (stmt_vinfo) == stmt);
3765     }
3766
3767   code = gimple_assign_rhs_code (orig_stmt);
3768   /* For MINUS_EXPR the initial vector is [init_val,0,...,0], therefore,
3769      partial results are added and not subtracted.  */
3770   if (code == MINUS_EXPR)
3771     code = PLUS_EXPR;
3772
3773   scalar_dest = gimple_assign_lhs (orig_stmt);
3774   scalar_type = TREE_TYPE (scalar_dest);
3775   scalar_results = VEC_alloc (tree, heap, group_size);
3776   new_scalar_dest = vect_create_destination_var (scalar_dest, NULL);
3777   bitsize = TYPE_SIZE (scalar_type);
3778
3779   /* In case this is a reduction in an inner-loop while vectorizing an outer
3780      loop - we don't need to extract a single scalar result at the end of the
3781      inner-loop (unless it is double reduction, i.e., the use of reduction is
3782      outside the outer-loop).  The final vector of partial results will be used
3783      in the vectorized outer-loop, or reduced to a scalar result at the end of
3784      the outer-loop.  */
3785   if (nested_in_vect_loop && !double_reduc)
3786     goto vect_finalize_reduction;
3787
3788   /* SLP reduction without reduction chain, e.g.,
3789      # a1 = phi <a2, a0>
3790      # b1 = phi <b2, b0>
3791      a2 = operation (a1)
3792      b2 = operation (b1)  */
3793   slp_reduc = (slp_node && !GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)));
3794
3795   /* In case of reduction chain, e.g.,
3796      # a1 = phi <a3, a0>
3797      a2 = operation (a1)
3798      a3 = operation (a2),
3799
3800      we may end up with more than one vector result.  Here we reduce them to
3801      one vector.  */
3802   if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
3803     {
3804       tree first_vect = PHI_RESULT (VEC_index (gimple, new_phis, 0));
3805       tree tmp;
3806       gimple new_vec_stmt = NULL;
3807
3808       vec_dest = vect_create_destination_var (scalar_dest, vectype);
3809       for (k = 1; k < VEC_length (gimple, new_phis); k++)
3810         {
3811           gimple next_phi = VEC_index (gimple, new_phis, k);
3812           tree second_vect = PHI_RESULT (next_phi);
3813
3814           tmp = build2 (code, vectype,  first_vect, second_vect);
3815           new_vec_stmt = gimple_build_assign (vec_dest, tmp);
3816           first_vect = make_ssa_name (vec_dest, new_vec_stmt);
3817           gimple_assign_set_lhs (new_vec_stmt, first_vect);
3818           gsi_insert_before (&exit_gsi, new_vec_stmt, GSI_SAME_STMT);
3819         }
3820
3821       new_phi_result = first_vect;
3822       if (new_vec_stmt)
3823         {
3824           VEC_truncate (gimple, new_phis, 0);
3825           VEC_safe_push (gimple, heap, new_phis, new_vec_stmt);
3826         }
3827     }
3828   else
3829     new_phi_result = PHI_RESULT (VEC_index (gimple, new_phis, 0));
3830
3831   /* 2.3 Create the reduction code, using one of the three schemes described
3832          above. In SLP we simply need to extract all the elements from the
3833          vector (without reducing them), so we use scalar shifts.  */
3834   if (reduc_code != ERROR_MARK && !slp_reduc)
3835     {
3836       tree tmp;
3837
3838       /*** Case 1:  Create:
3839            v_out2 = reduc_expr <v_out1>  */
3840
3841       if (vect_print_dump_info (REPORT_DETAILS))
3842         fprintf (vect_dump, "Reduce using direct vector reduction.");
3843
3844       vec_dest = vect_create_destination_var (scalar_dest, vectype);
3845       tmp = build1 (reduc_code, vectype, new_phi_result);
3846       epilog_stmt = gimple_build_assign (vec_dest, tmp);
3847       new_temp = make_ssa_name (vec_dest, epilog_stmt);
3848       gimple_assign_set_lhs (epilog_stmt, new_temp);
3849       gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
3850
3851       extract_scalar_result = true;
3852     }
3853   else
3854     {
3855       enum tree_code shift_code = ERROR_MARK;
3856       bool have_whole_vector_shift = true;
3857       int bit_offset;
3858       int element_bitsize = tree_low_cst (bitsize, 1);
3859       int vec_size_in_bits = tree_low_cst (TYPE_SIZE (vectype), 1);
3860       tree vec_temp;
3861
3862       if (optab_handler (vec_shr_optab, mode) != CODE_FOR_nothing)
3863         shift_code = VEC_RSHIFT_EXPR;
3864       else
3865         have_whole_vector_shift = false;
3866
3867       /* Regardless of whether we have a whole vector shift, if we're
3868          emulating the operation via tree-vect-generic, we don't want
3869          to use it.  Only the first round of the reduction is likely
3870          to still be profitable via emulation.  */
3871       /* ??? It might be better to emit a reduction tree code here, so that
3872          tree-vect-generic can expand the first round via bit tricks.  */
3873       if (!VECTOR_MODE_P (mode))
3874         have_whole_vector_shift = false;
3875       else
3876         {
3877           optab optab = optab_for_tree_code (code, vectype, optab_default);
3878           if (optab_handler (optab, mode) == CODE_FOR_nothing)
3879             have_whole_vector_shift = false;
3880         }
3881
3882       if (have_whole_vector_shift && !slp_reduc)
3883         {
3884           /*** Case 2: Create:
3885              for (offset = VS/2; offset >= element_size; offset/=2)
3886                 {
3887                   Create:  va' = vec_shift <va, offset>
3888                   Create:  va = vop <va, va'>
3889                 }  */
3890
3891           if (vect_print_dump_info (REPORT_DETAILS))
3892             fprintf (vect_dump, "Reduce using vector shifts");
3893
3894           vec_dest = vect_create_destination_var (scalar_dest, vectype);
3895           new_temp = new_phi_result;
3896           for (bit_offset = vec_size_in_bits/2;
3897                bit_offset >= element_bitsize;
3898                bit_offset /= 2)
3899             {
3900               tree bitpos = size_int (bit_offset);
3901
3902               epilog_stmt = gimple_build_assign_with_ops (shift_code,
3903                                                vec_dest, new_temp, bitpos);
3904               new_name = make_ssa_name (vec_dest, epilog_stmt);
3905               gimple_assign_set_lhs (epilog_stmt, new_name);
3906               gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
3907
3908               epilog_stmt = gimple_build_assign_with_ops (code, vec_dest,
3909                                                           new_name, new_temp);
3910               new_temp = make_ssa_name (vec_dest, epilog_stmt);
3911               gimple_assign_set_lhs (epilog_stmt, new_temp);
3912               gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
3913             }
3914
3915           extract_scalar_result = true;
3916         }
3917       else
3918         {
3919           tree rhs;
3920
3921           /*** Case 3: Create:
3922              s = extract_field <v_out2, 0>
3923              for (offset = element_size;
3924                   offset < vector_size;
3925                   offset += element_size;)
3926                {
3927                  Create:  s' = extract_field <v_out2, offset>
3928                  Create:  s = op <s, s'>  // For non SLP cases
3929                }  */
3930
3931           if (vect_print_dump_info (REPORT_DETAILS))
3932             fprintf (vect_dump, "Reduce using scalar code. ");
3933
3934           vec_size_in_bits = tree_low_cst (TYPE_SIZE (vectype), 1);
3935           FOR_EACH_VEC_ELT (gimple, new_phis, i, new_phi)
3936             {
3937               if (gimple_code (new_phi) == GIMPLE_PHI)
3938                 vec_temp = PHI_RESULT (new_phi);
3939               else
3940                 vec_temp = gimple_assign_lhs (new_phi);
3941               rhs = build3 (BIT_FIELD_REF, scalar_type, vec_temp, bitsize,
3942                             bitsize_zero_node);
3943               epilog_stmt = gimple_build_assign (new_scalar_dest, rhs);
3944               new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
3945               gimple_assign_set_lhs (epilog_stmt, new_temp);
3946               gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
3947
3948               /* In SLP we don't need to apply reduction operation, so we just
3949                  collect s' values in SCALAR_RESULTS.  */
3950               if (slp_reduc)
3951                 VEC_safe_push (tree, heap, scalar_results, new_temp);
3952
3953               for (bit_offset = element_bitsize;
3954                    bit_offset < vec_size_in_bits;
3955                    bit_offset += element_bitsize)
3956                 {
3957                   tree bitpos = bitsize_int (bit_offset);
3958                   tree rhs = build3 (BIT_FIELD_REF, scalar_type, vec_temp,
3959                                      bitsize, bitpos);
3960
3961                   epilog_stmt = gimple_build_assign (new_scalar_dest, rhs);
3962                   new_name = make_ssa_name (new_scalar_dest, epilog_stmt);
3963                   gimple_assign_set_lhs (epilog_stmt, new_name);
3964                   gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
3965
3966                   if (slp_reduc)
3967                     {
3968                       /* In SLP we don't need to apply reduction operation, so
3969                          we just collect s' values in SCALAR_RESULTS.  */
3970                       new_temp = new_name;
3971                       VEC_safe_push (tree, heap, scalar_results, new_name);
3972                     }
3973                   else
3974                     {
3975                       epilog_stmt = gimple_build_assign_with_ops (code,
3976                                           new_scalar_dest, new_name, new_temp);
3977                       new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
3978                       gimple_assign_set_lhs (epilog_stmt, new_temp);
3979                       gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
3980                     }
3981                 }
3982             }
3983
3984           /* The only case where we need to reduce scalar results in SLP, is
3985              unrolling.  If the size of SCALAR_RESULTS is greater than
3986              GROUP_SIZE, we reduce them combining elements modulo
3987              GROUP_SIZE.  */
3988           if (slp_reduc)
3989             {
3990               tree res, first_res, new_res;
3991               gimple new_stmt;
3992
3993               /* Reduce multiple scalar results in case of SLP unrolling.  */
3994               for (j = group_size; VEC_iterate (tree, scalar_results, j, res);
3995                    j++)
3996                 {
3997                   first_res = VEC_index (tree, scalar_results, j % group_size);
3998                   new_stmt = gimple_build_assign_with_ops (code,
3999                                               new_scalar_dest, first_res, res);
4000                   new_res = make_ssa_name (new_scalar_dest, new_stmt);
4001                   gimple_assign_set_lhs (new_stmt, new_res);
4002                   gsi_insert_before (&exit_gsi, new_stmt, GSI_SAME_STMT);
4003                   VEC_replace (tree, scalar_results, j % group_size, new_res);
4004                 }
4005             }
4006           else
4007             /* Not SLP - we have one scalar to keep in SCALAR_RESULTS.  */
4008             VEC_safe_push (tree, heap, scalar_results, new_temp);
4009
4010           extract_scalar_result = false;
4011         }
4012     }
4013
4014   /* 2.4  Extract the final scalar result.  Create:
4015           s_out3 = extract_field <v_out2, bitpos>  */
4016
4017   if (extract_scalar_result)
4018     {
4019       tree rhs;
4020
4021       if (vect_print_dump_info (REPORT_DETAILS))
4022         fprintf (vect_dump, "extract scalar result");
4023
4024       if (BYTES_BIG_ENDIAN)
4025         bitpos = size_binop (MULT_EXPR,
4026                              bitsize_int (TYPE_VECTOR_SUBPARTS (vectype) - 1),
4027                              TYPE_SIZE (scalar_type));
4028       else
4029         bitpos = bitsize_zero_node;
4030
4031       rhs = build3 (BIT_FIELD_REF, scalar_type, new_temp, bitsize, bitpos);
4032       epilog_stmt = gimple_build_assign (new_scalar_dest, rhs);
4033       new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
4034       gimple_assign_set_lhs (epilog_stmt, new_temp);
4035       gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
4036       VEC_safe_push (tree, heap, scalar_results, new_temp);
4037     }
4038
4039 vect_finalize_reduction:
4040
4041   if (double_reduc)
4042     loop = loop->inner;
4043
4044   /* 2.5 Adjust the final result by the initial value of the reduction
4045          variable. (When such adjustment is not needed, then
4046          'adjustment_def' is zero).  For example, if code is PLUS we create:
4047          new_temp = loop_exit_def + adjustment_def  */
4048
4049   if (adjustment_def)
4050     {
4051       gcc_assert (!slp_reduc);
4052       if (nested_in_vect_loop)
4053         {
4054           new_phi = VEC_index (gimple, new_phis, 0);
4055           gcc_assert (TREE_CODE (TREE_TYPE (adjustment_def)) == VECTOR_TYPE);
4056           expr = build2 (code, vectype, PHI_RESULT (new_phi), adjustment_def);
4057           new_dest = vect_create_destination_var (scalar_dest, vectype);
4058         }
4059       else
4060         {
4061           new_temp = VEC_index (tree, scalar_results, 0);
4062           gcc_assert (TREE_CODE (TREE_TYPE (adjustment_def)) != VECTOR_TYPE);
4063           expr = build2 (code, scalar_type, new_temp, adjustment_def);
4064           new_dest = vect_create_destination_var (scalar_dest, scalar_type);
4065         }
4066
4067       epilog_stmt = gimple_build_assign (new_dest, expr);
4068       new_temp = make_ssa_name (new_dest, epilog_stmt);
4069       gimple_assign_set_lhs (epilog_stmt, new_temp);
4070       SSA_NAME_DEF_STMT (new_temp) = epilog_stmt;
4071       gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
4072       if (nested_in_vect_loop)
4073         {
4074           set_vinfo_for_stmt (epilog_stmt,
4075                               new_stmt_vec_info (epilog_stmt, loop_vinfo,
4076                                                  NULL));
4077           STMT_VINFO_RELATED_STMT (vinfo_for_stmt (epilog_stmt)) =
4078                 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (new_phi));
4079
4080           if (!double_reduc)
4081             VEC_quick_push (tree, scalar_results, new_temp);
4082           else
4083             VEC_replace (tree, scalar_results, 0, new_temp);
4084         }
4085       else
4086         VEC_replace (tree, scalar_results, 0, new_temp);
4087
4088       VEC_replace (gimple, new_phis, 0, epilog_stmt);
4089     }
4090
4091   /* 2.6  Handle the loop-exit phis.  Replace the uses of scalar loop-exit
4092           phis with new adjusted scalar results, i.e., replace use <s_out0>
4093           with use <s_out4>.
4094
4095      Transform:
4096         loop_exit:
4097           s_out0 = phi <s_loop>                 # (scalar) EXIT_PHI
4098           v_out1 = phi <VECT_DEF>               # NEW_EXIT_PHI
4099           v_out2 = reduce <v_out1>
4100           s_out3 = extract_field <v_out2, 0>
4101           s_out4 = adjust_result <s_out3>
4102           use <s_out0>
4103           use <s_out0>
4104
4105      into:
4106
4107         loop_exit:
4108           s_out0 = phi <s_loop>                 # (scalar) EXIT_PHI
4109           v_out1 = phi <VECT_DEF>               # NEW_EXIT_PHI
4110           v_out2 = reduce <v_out1>
4111           s_out3 = extract_field <v_out2, 0>
4112           s_out4 = adjust_result <s_out3>
4113           use <s_out4>
4114           use <s_out4> */
4115
4116
4117   /* In SLP reduction chain we reduce vector results into one vector if
4118      necessary, hence we set here GROUP_SIZE to 1.  SCALAR_DEST is the LHS of
4119      the last stmt in the reduction chain, since we are looking for the loop
4120      exit phi node.  */
4121   if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
4122     {
4123       scalar_dest = gimple_assign_lhs (VEC_index (gimple,
4124                                        SLP_TREE_SCALAR_STMTS (slp_node),
4125                                        group_size - 1));
4126       group_size = 1;
4127     }
4128
4129   /* In SLP we may have several statements in NEW_PHIS and REDUCTION_PHIS (in
4130      case that GROUP_SIZE is greater than vectorization factor).  Therefore, we
4131      need to match SCALAR_RESULTS with corresponding statements.  The first
4132      (GROUP_SIZE / number of new vector stmts) scalar results correspond to
4133      the first vector stmt, etc.
4134      (RATIO is equal to (GROUP_SIZE / number of new vector stmts)).  */
4135   if (group_size > VEC_length (gimple, new_phis))
4136     {
4137       ratio = group_size / VEC_length (gimple, new_phis);
4138       gcc_assert (!(group_size % VEC_length (gimple, new_phis)));
4139     }
4140   else
4141     ratio = 1;
4142
4143   for (k = 0; k < group_size; k++)
4144     {
4145       if (k % ratio == 0)
4146         {
4147           epilog_stmt = VEC_index (gimple, new_phis, k / ratio);
4148           reduction_phi = VEC_index (gimple, reduction_phis, k / ratio);
4149           if (double_reduc)
4150             inner_phi = VEC_index (gimple, inner_phis, k / ratio);
4151         }
4152
4153       if (slp_reduc)
4154         {
4155           gimple current_stmt = VEC_index (gimple,
4156                                        SLP_TREE_SCALAR_STMTS (slp_node), k);
4157
4158           orig_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (current_stmt));
4159           /* SLP statements can't participate in patterns.  */
4160           gcc_assert (!orig_stmt);
4161           scalar_dest = gimple_assign_lhs (current_stmt);
4162         }
4163
4164       phis = VEC_alloc (gimple, heap, 3);
4165       /* Find the loop-closed-use at the loop exit of the original scalar
4166          result.  (The reduction result is expected to have two immediate uses -
4167          one at the latch block, and one at the loop exit).  */
4168       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
4169         if (!flow_bb_inside_loop_p (loop, gimple_bb (USE_STMT (use_p))))
4170           VEC_safe_push (gimple, heap, phis, USE_STMT (use_p));
4171
4172       /* We expect to have found an exit_phi because of loop-closed-ssa
4173          form.  */
4174       gcc_assert (!VEC_empty (gimple, phis));
4175
4176       FOR_EACH_VEC_ELT (gimple, phis, i, exit_phi)
4177         {
4178           if (outer_loop)
4179             {
4180               stmt_vec_info exit_phi_vinfo = vinfo_for_stmt (exit_phi);
4181               gimple vect_phi;
4182
4183               /* FORNOW. Currently not supporting the case that an inner-loop
4184                  reduction is not used in the outer-loop (but only outside the
4185                  outer-loop), unless it is double reduction.  */
4186               gcc_assert ((STMT_VINFO_RELEVANT_P (exit_phi_vinfo)
4187                            && !STMT_VINFO_LIVE_P (exit_phi_vinfo))
4188                           || double_reduc);
4189
4190               STMT_VINFO_VEC_STMT (exit_phi_vinfo) = epilog_stmt;
4191               if (!double_reduc
4192                   || STMT_VINFO_DEF_TYPE (exit_phi_vinfo)
4193                       != vect_double_reduction_def)
4194                 continue;
4195
4196               /* Handle double reduction:
4197
4198                  stmt1: s1 = phi <s0, s2>  - double reduction phi (outer loop)
4199                  stmt2:   s3 = phi <s1, s4> - (regular) reduc phi (inner loop)
4200                  stmt3:   s4 = use (s3)     - (regular) reduc stmt (inner loop)
4201                  stmt4: s2 = phi <s4>      - double reduction stmt (outer loop)
4202
4203                  At that point the regular reduction (stmt2 and stmt3) is
4204                  already vectorized, as well as the exit phi node, stmt4.
4205                  Here we vectorize the phi node of double reduction, stmt1, and
4206                  update all relevant statements.  */
4207
4208               /* Go through all the uses of s2 to find double reduction phi
4209                  node, i.e., stmt1 above.  */
4210               orig_name = PHI_RESULT (exit_phi);
4211               FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, orig_name)
4212                 {
4213                   stmt_vec_info use_stmt_vinfo = vinfo_for_stmt (use_stmt);
4214                   stmt_vec_info new_phi_vinfo;
4215                   tree vect_phi_init, preheader_arg, vect_phi_res, init_def;
4216                   basic_block bb = gimple_bb (use_stmt);
4217                   gimple use;
4218
4219                   /* Check that USE_STMT is really double reduction phi
4220                      node.  */
4221                   if (gimple_code (use_stmt) != GIMPLE_PHI
4222                       || gimple_phi_num_args (use_stmt) != 2
4223                       || !use_stmt_vinfo
4224                       || STMT_VINFO_DEF_TYPE (use_stmt_vinfo)
4225                           != vect_double_reduction_def
4226                       || bb->loop_father != outer_loop)
4227                     continue;
4228
4229                   /* Create vector phi node for double reduction:
4230                      vs1 = phi <vs0, vs2>
4231                      vs1 was created previously in this function by a call to
4232                        vect_get_vec_def_for_operand and is stored in
4233                        vec_initial_def;
4234                      vs2 is defined by INNER_PHI, the vectorized EXIT_PHI;
4235                      vs0 is created here.  */
4236
4237                   /* Create vector phi node.  */
4238                   vect_phi = create_phi_node (vec_initial_def, bb);
4239                   new_phi_vinfo = new_stmt_vec_info (vect_phi,
4240                                     loop_vec_info_for_loop (outer_loop), NULL);
4241                   set_vinfo_for_stmt (vect_phi, new_phi_vinfo);
4242
4243                   /* Create vs0 - initial def of the double reduction phi.  */
4244                   preheader_arg = PHI_ARG_DEF_FROM_EDGE (use_stmt,
4245                                              loop_preheader_edge (outer_loop));
4246                   init_def = get_initial_def_for_reduction (stmt,
4247                                                           preheader_arg, NULL);
4248                   vect_phi_init = vect_init_vector (use_stmt, init_def,
4249                                                     vectype, NULL);
4250
4251                   /* Update phi node arguments with vs0 and vs2.  */
4252                   add_phi_arg (vect_phi, vect_phi_init,
4253                                loop_preheader_edge (outer_loop),
4254                                UNKNOWN_LOCATION);
4255                   add_phi_arg (vect_phi, PHI_RESULT (inner_phi),
4256                                loop_latch_edge (outer_loop), UNKNOWN_LOCATION);
4257                   if (vect_print_dump_info (REPORT_DETAILS))
4258                     {
4259                       fprintf (vect_dump, "created double reduction phi "
4260                                           "node: ");
4261                       print_gimple_stmt (vect_dump, vect_phi, 0, TDF_SLIM);
4262                     }
4263
4264                   vect_phi_res = PHI_RESULT (vect_phi);
4265
4266                   /* Replace the use, i.e., set the correct vs1 in the regular
4267                      reduction phi node.  FORNOW, NCOPIES is always 1, so the
4268                      loop is redundant.  */
4269                   use = reduction_phi;
4270                   for (j = 0; j < ncopies; j++)
4271                     {
4272                       edge pr_edge = loop_preheader_edge (loop);
4273                       SET_PHI_ARG_DEF (use, pr_edge->dest_idx, vect_phi_res);
4274                       use = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (use));
4275                     }
4276                 }
4277             }
4278         }
4279
4280       VEC_free (gimple, heap, phis);
4281       if (nested_in_vect_loop)
4282         {
4283           if (double_reduc)
4284             loop = outer_loop;
4285           else
4286             continue;
4287         }
4288
4289       phis = VEC_alloc (gimple, heap, 3);
4290       /* Find the loop-closed-use at the loop exit of the original scalar
4291          result.  (The reduction result is expected to have two immediate uses,
4292          one at the latch block, and one at the loop exit).  For double
4293          reductions we are looking for exit phis of the outer loop.  */
4294       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
4295         {
4296           if (!flow_bb_inside_loop_p (loop, gimple_bb (USE_STMT (use_p))))
4297             VEC_safe_push (gimple, heap, phis, USE_STMT (use_p));
4298           else
4299             {
4300               if (double_reduc && gimple_code (USE_STMT (use_p)) == GIMPLE_PHI)
4301                 {
4302                   tree phi_res = PHI_RESULT (USE_STMT (use_p));
4303
4304                   FOR_EACH_IMM_USE_FAST (phi_use_p, phi_imm_iter, phi_res)
4305                     {
4306                       if (!flow_bb_inside_loop_p (loop,
4307                                              gimple_bb (USE_STMT (phi_use_p))))
4308                         VEC_safe_push (gimple, heap, phis,
4309                                        USE_STMT (phi_use_p));
4310                     }
4311                 }
4312             }
4313         }
4314
4315       FOR_EACH_VEC_ELT (gimple, phis, i, exit_phi)
4316         {
4317           /* Replace the uses:  */
4318           orig_name = PHI_RESULT (exit_phi);
4319           scalar_result = VEC_index (tree, scalar_results, k);
4320           FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, orig_name)
4321             FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
4322               SET_USE (use_p, scalar_result);
4323         }
4324
4325       VEC_free (gimple, heap, phis);
4326     }
4327
4328   VEC_free (tree, heap, scalar_results);
4329   VEC_free (gimple, heap, new_phis);
4330 }
4331
4332
4333 /* Function vectorizable_reduction.
4334
4335    Check if STMT performs a reduction operation that can be vectorized.
4336    If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4337    stmt to replace it, put it in VEC_STMT, and insert it at GSI.
4338    Return FALSE if not a vectorizable STMT, TRUE otherwise.
4339
4340    This function also handles reduction idioms (patterns) that have been
4341    recognized in advance during vect_pattern_recog.  In this case, STMT may be
4342    of this form:
4343      X = pattern_expr (arg0, arg1, ..., X)
4344    and it's STMT_VINFO_RELATED_STMT points to the last stmt in the original
4345    sequence that had been detected and replaced by the pattern-stmt (STMT).
4346
4347    In some cases of reduction patterns, the type of the reduction variable X is
4348    different than the type of the other arguments of STMT.
4349    In such cases, the vectype that is used when transforming STMT into a vector
4350    stmt is different than the vectype that is used to determine the
4351    vectorization factor, because it consists of a different number of elements
4352    than the actual number of elements that are being operated upon in parallel.
4353
4354    For example, consider an accumulation of shorts into an int accumulator.
4355    On some targets it's possible to vectorize this pattern operating on 8
4356    shorts at a time (hence, the vectype for purposes of determining the
4357    vectorization factor should be V8HI); on the other hand, the vectype that
4358    is used to create the vector form is actually V4SI (the type of the result).
4359
4360    Upon entry to this function, STMT_VINFO_VECTYPE records the vectype that
4361    indicates what is the actual level of parallelism (V8HI in the example), so
4362    that the right vectorization factor would be derived.  This vectype
4363    corresponds to the type of arguments to the reduction stmt, and should *NOT*
4364    be used to create the vectorized stmt.  The right vectype for the vectorized
4365    stmt is obtained from the type of the result X:
4366         get_vectype_for_scalar_type (TREE_TYPE (X))
4367
4368    This means that, contrary to "regular" reductions (or "regular" stmts in
4369    general), the following equation:
4370       STMT_VINFO_VECTYPE == get_vectype_for_scalar_type (TREE_TYPE (X))
4371    does *NOT* necessarily hold for reduction patterns.  */
4372
4373 bool
4374 vectorizable_reduction (gimple stmt, gimple_stmt_iterator *gsi,
4375                         gimple *vec_stmt, slp_tree slp_node)
4376 {
4377   tree vec_dest;
4378   tree scalar_dest;
4379   tree loop_vec_def0 = NULL_TREE, loop_vec_def1 = NULL_TREE;
4380   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4381   tree vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4382   tree vectype_in = NULL_TREE;
4383   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4384   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4385   enum tree_code code, orig_code, epilog_reduc_code;
4386   enum machine_mode vec_mode;
4387   int op_type;
4388   optab optab, reduc_optab;
4389   tree new_temp = NULL_TREE;
4390   tree def;
4391   gimple def_stmt;
4392   enum vect_def_type dt;
4393   gimple new_phi = NULL;
4394   tree scalar_type;
4395   bool is_simple_use;
4396   gimple orig_stmt;
4397   stmt_vec_info orig_stmt_info;
4398   tree expr = NULL_TREE;
4399   int i;
4400   int ncopies;
4401   int epilog_copies;
4402   stmt_vec_info prev_stmt_info, prev_phi_info;
4403   bool single_defuse_cycle = false;
4404   tree reduc_def = NULL_TREE;
4405   gimple new_stmt = NULL;
4406   int j;
4407   tree ops[3];
4408   bool nested_cycle = false, found_nested_cycle_def = false;
4409   gimple reduc_def_stmt = NULL;
4410   /* The default is that the reduction variable is the last in statement.  */
4411   int reduc_index = 2;
4412   bool double_reduc = false, dummy;
4413   basic_block def_bb;
4414   struct loop * def_stmt_loop, *outer_loop = NULL;
4415   tree def_arg;
4416   gimple def_arg_stmt;
4417   VEC (tree, heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL, *vect_defs = NULL;
4418   VEC (gimple, heap) *phis = NULL;
4419   int vec_num;
4420   tree def0, def1, tem, op0, op1 = NULL_TREE;
4421
4422   /* In case of reduction chain we switch to the first stmt in the chain, but
4423      we don't update STMT_INFO, since only the last stmt is marked as reduction
4424      and has reduction properties.  */
4425   if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
4426     stmt = GROUP_FIRST_ELEMENT (stmt_info);
4427
4428   if (nested_in_vect_loop_p (loop, stmt))
4429     {
4430       outer_loop = loop;
4431       loop = loop->inner;
4432       nested_cycle = true;
4433     }
4434
4435   /* 1. Is vectorizable reduction?  */
4436   /* Not supportable if the reduction variable is used in the loop, unless
4437      it's a reduction chain.  */
4438   if (STMT_VINFO_RELEVANT (stmt_info) > vect_used_in_outer
4439       && !GROUP_FIRST_ELEMENT (stmt_info))
4440     return false;
4441
4442   /* Reductions that are not used even in an enclosing outer-loop,
4443      are expected to be "live" (used out of the loop).  */
4444   if (STMT_VINFO_RELEVANT (stmt_info) == vect_unused_in_scope
4445       && !STMT_VINFO_LIVE_P (stmt_info))
4446     return false;
4447
4448   /* Make sure it was already recognized as a reduction computation.  */
4449   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_reduction_def
4450       && STMT_VINFO_DEF_TYPE (stmt_info) != vect_nested_cycle)
4451     return false;
4452
4453   /* 2. Has this been recognized as a reduction pattern?
4454
4455      Check if STMT represents a pattern that has been recognized
4456      in earlier analysis stages.  For stmts that represent a pattern,
4457      the STMT_VINFO_RELATED_STMT field records the last stmt in
4458      the original sequence that constitutes the pattern.  */
4459
4460   orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
4461   if (orig_stmt)
4462     {
4463       orig_stmt_info = vinfo_for_stmt (orig_stmt);
4464       gcc_assert (STMT_VINFO_RELATED_STMT (orig_stmt_info) == stmt);
4465       gcc_assert (STMT_VINFO_IN_PATTERN_P (orig_stmt_info));
4466       gcc_assert (!STMT_VINFO_IN_PATTERN_P (stmt_info));
4467     }
4468
4469   /* 3. Check the operands of the operation.  The first operands are defined
4470         inside the loop body. The last operand is the reduction variable,
4471         which is defined by the loop-header-phi.  */
4472
4473   gcc_assert (is_gimple_assign (stmt));
4474
4475   /* Flatten RHS.  */
4476   switch (get_gimple_rhs_class (gimple_assign_rhs_code (stmt)))
4477     {
4478     case GIMPLE_SINGLE_RHS:
4479       op_type = TREE_OPERAND_LENGTH (gimple_assign_rhs1 (stmt));
4480       if (op_type == ternary_op)
4481         {
4482           tree rhs = gimple_assign_rhs1 (stmt);
4483           ops[0] = TREE_OPERAND (rhs, 0);
4484           ops[1] = TREE_OPERAND (rhs, 1);
4485           ops[2] = TREE_OPERAND (rhs, 2);
4486           code = TREE_CODE (rhs);
4487         }
4488       else
4489         return false;
4490       break;
4491
4492     case GIMPLE_BINARY_RHS:
4493       code = gimple_assign_rhs_code (stmt);
4494       op_type = TREE_CODE_LENGTH (code);
4495       gcc_assert (op_type == binary_op);
4496       ops[0] = gimple_assign_rhs1 (stmt);
4497       ops[1] = gimple_assign_rhs2 (stmt);
4498       break;
4499
4500     case GIMPLE_TERNARY_RHS:
4501       code = gimple_assign_rhs_code (stmt);
4502       op_type = TREE_CODE_LENGTH (code);
4503       gcc_assert (op_type == ternary_op);
4504       ops[0] = gimple_assign_rhs1 (stmt);
4505       ops[1] = gimple_assign_rhs2 (stmt);
4506       ops[2] = gimple_assign_rhs3 (stmt);
4507       break;
4508
4509     case GIMPLE_UNARY_RHS:
4510       return false;
4511
4512     default:
4513       gcc_unreachable ();
4514     }
4515
4516   if (code == COND_EXPR && slp_node)
4517     return false;
4518
4519   scalar_dest = gimple_assign_lhs (stmt);
4520   scalar_type = TREE_TYPE (scalar_dest);
4521   if (!POINTER_TYPE_P (scalar_type) && !INTEGRAL_TYPE_P (scalar_type)
4522       && !SCALAR_FLOAT_TYPE_P (scalar_type))
4523     return false;
4524
4525   /* Do not try to vectorize bit-precision reductions.  */
4526   if ((TYPE_PRECISION (scalar_type)
4527        != GET_MODE_PRECISION (TYPE_MODE (scalar_type))))
4528     return false;
4529
4530   /* All uses but the last are expected to be defined in the loop.
4531      The last use is the reduction variable.  In case of nested cycle this
4532      assumption is not true: we use reduc_index to record the index of the
4533      reduction variable.  */
4534   for (i = 0; i < op_type-1; i++)
4535     {
4536       /* The condition of COND_EXPR is checked in vectorizable_condition().  */
4537       if (i == 0 && code == COND_EXPR)
4538         continue;
4539
4540       is_simple_use = vect_is_simple_use_1 (ops[i], stmt, loop_vinfo, NULL,
4541                                             &def_stmt, &def, &dt, &tem);
4542       if (!vectype_in)
4543         vectype_in = tem;
4544       gcc_assert (is_simple_use);
4545
4546       if (dt != vect_internal_def
4547           && dt != vect_external_def
4548           && dt != vect_constant_def
4549           && dt != vect_induction_def
4550           && !(dt == vect_nested_cycle && nested_cycle))
4551         return false;
4552
4553       if (dt == vect_nested_cycle)
4554         {
4555           found_nested_cycle_def = true;
4556           reduc_def_stmt = def_stmt;
4557           reduc_index = i;
4558         }
4559     }
4560
4561   is_simple_use = vect_is_simple_use_1 (ops[i], stmt, loop_vinfo, NULL,
4562                                         &def_stmt, &def, &dt, &tem);
4563   if (!vectype_in)
4564     vectype_in = tem;
4565   gcc_assert (is_simple_use);
4566   gcc_assert (dt == vect_reduction_def
4567               || dt == vect_nested_cycle
4568               || ((dt == vect_internal_def || dt == vect_external_def
4569                    || dt == vect_constant_def || dt == vect_induction_def)
4570                    && nested_cycle && found_nested_cycle_def));
4571   if (!found_nested_cycle_def)
4572     reduc_def_stmt = def_stmt;
4573
4574   gcc_assert (gimple_code (reduc_def_stmt) == GIMPLE_PHI);
4575   if (orig_stmt)
4576     gcc_assert (orig_stmt == vect_is_simple_reduction (loop_vinfo,
4577                                                        reduc_def_stmt,
4578                                                        !nested_cycle,
4579                                                        &dummy));
4580   else
4581     {
4582       gimple tmp = vect_is_simple_reduction (loop_vinfo, reduc_def_stmt,
4583                                              !nested_cycle, &dummy);
4584       /* We changed STMT to be the first stmt in reduction chain, hence we
4585          check that in this case the first element in the chain is STMT.  */
4586       gcc_assert (stmt == tmp
4587                   || GROUP_FIRST_ELEMENT (vinfo_for_stmt (tmp)) == stmt);
4588     }
4589
4590   if (STMT_VINFO_LIVE_P (vinfo_for_stmt (reduc_def_stmt)))
4591     return false;
4592
4593   if (slp_node || PURE_SLP_STMT (stmt_info))
4594     ncopies = 1;
4595   else
4596     ncopies = (LOOP_VINFO_VECT_FACTOR (loop_vinfo)
4597                / TYPE_VECTOR_SUBPARTS (vectype_in));
4598
4599   gcc_assert (ncopies >= 1);
4600
4601   vec_mode = TYPE_MODE (vectype_in);
4602
4603   if (code == COND_EXPR)
4604     {
4605       if (!vectorizable_condition (stmt, gsi, NULL, ops[reduc_index], 0, NULL))
4606         {
4607           if (vect_print_dump_info (REPORT_DETAILS))
4608             fprintf (vect_dump, "unsupported condition in reduction");
4609
4610             return false;
4611         }
4612     }
4613   else
4614     {
4615       /* 4. Supportable by target?  */
4616
4617       /* 4.1. check support for the operation in the loop  */
4618       optab = optab_for_tree_code (code, vectype_in, optab_default);
4619       if (!optab)
4620         {
4621           if (vect_print_dump_info (REPORT_DETAILS))
4622             fprintf (vect_dump, "no optab.");
4623
4624           return false;
4625         }
4626
4627       if (optab_handler (optab, vec_mode) == CODE_FOR_nothing)
4628         {
4629           if (vect_print_dump_info (REPORT_DETAILS))
4630             fprintf (vect_dump, "op not supported by target.");
4631
4632           if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
4633               || LOOP_VINFO_VECT_FACTOR (loop_vinfo)
4634                   < vect_min_worthwhile_factor (code))
4635             return false;
4636
4637           if (vect_print_dump_info (REPORT_DETAILS))
4638             fprintf (vect_dump, "proceeding using word mode.");
4639         }
4640
4641       /* Worthwhile without SIMD support?  */
4642       if (!VECTOR_MODE_P (TYPE_MODE (vectype_in))
4643           && LOOP_VINFO_VECT_FACTOR (loop_vinfo)
4644              < vect_min_worthwhile_factor (code))
4645         {
4646           if (vect_print_dump_info (REPORT_DETAILS))
4647             fprintf (vect_dump, "not worthwhile without SIMD support.");
4648
4649           return false;
4650         }
4651     }
4652
4653   /* 4.2. Check support for the epilog operation.
4654
4655           If STMT represents a reduction pattern, then the type of the
4656           reduction variable may be different than the type of the rest
4657           of the arguments.  For example, consider the case of accumulation
4658           of shorts into an int accumulator; The original code:
4659                         S1: int_a = (int) short_a;
4660           orig_stmt->   S2: int_acc = plus <int_a ,int_acc>;
4661
4662           was replaced with:
4663                         STMT: int_acc = widen_sum <short_a, int_acc>
4664
4665           This means that:
4666           1. The tree-code that is used to create the vector operation in the
4667              epilog code (that reduces the partial results) is not the
4668              tree-code of STMT, but is rather the tree-code of the original
4669              stmt from the pattern that STMT is replacing.  I.e, in the example
4670              above we want to use 'widen_sum' in the loop, but 'plus' in the
4671              epilog.
4672           2. The type (mode) we use to check available target support
4673              for the vector operation to be created in the *epilog*, is
4674              determined by the type of the reduction variable (in the example
4675              above we'd check this: optab_handler (plus_optab, vect_int_mode])).
4676              However the type (mode) we use to check available target support
4677              for the vector operation to be created *inside the loop*, is
4678              determined by the type of the other arguments to STMT (in the
4679              example we'd check this: optab_handler (widen_sum_optab,
4680              vect_short_mode)).
4681
4682           This is contrary to "regular" reductions, in which the types of all
4683           the arguments are the same as the type of the reduction variable.
4684           For "regular" reductions we can therefore use the same vector type
4685           (and also the same tree-code) when generating the epilog code and
4686           when generating the code inside the loop.  */
4687
4688   if (orig_stmt)
4689     {
4690       /* This is a reduction pattern: get the vectype from the type of the
4691          reduction variable, and get the tree-code from orig_stmt.  */
4692       orig_code = gimple_assign_rhs_code (orig_stmt);
4693       gcc_assert (vectype_out);
4694       vec_mode = TYPE_MODE (vectype_out);
4695     }
4696   else
4697     {
4698       /* Regular reduction: use the same vectype and tree-code as used for
4699          the vector code inside the loop can be used for the epilog code. */
4700       orig_code = code;
4701     }
4702
4703   if (nested_cycle)
4704     {
4705       def_bb = gimple_bb (reduc_def_stmt);
4706       def_stmt_loop = def_bb->loop_father;
4707       def_arg = PHI_ARG_DEF_FROM_EDGE (reduc_def_stmt,
4708                                        loop_preheader_edge (def_stmt_loop));
4709       if (TREE_CODE (def_arg) == SSA_NAME
4710           && (def_arg_stmt = SSA_NAME_DEF_STMT (def_arg))
4711           && gimple_code (def_arg_stmt) == GIMPLE_PHI
4712           && flow_bb_inside_loop_p (outer_loop, gimple_bb (def_arg_stmt))
4713           && vinfo_for_stmt (def_arg_stmt)
4714           && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_arg_stmt))
4715               == vect_double_reduction_def)
4716         double_reduc = true;
4717     }
4718
4719   epilog_reduc_code = ERROR_MARK;
4720   if (reduction_code_for_scalar_code (orig_code, &epilog_reduc_code))
4721     {
4722       reduc_optab = optab_for_tree_code (epilog_reduc_code, vectype_out,
4723                                          optab_default);
4724       if (!reduc_optab)
4725         {
4726           if (vect_print_dump_info (REPORT_DETAILS))
4727             fprintf (vect_dump, "no optab for reduction.");
4728
4729           epilog_reduc_code = ERROR_MARK;
4730         }
4731
4732       if (reduc_optab
4733           && optab_handler (reduc_optab, vec_mode) == CODE_FOR_nothing)
4734         {
4735           if (vect_print_dump_info (REPORT_DETAILS))
4736             fprintf (vect_dump, "reduc op not supported by target.");
4737
4738           epilog_reduc_code = ERROR_MARK;
4739         }
4740     }
4741   else
4742     {
4743       if (!nested_cycle || double_reduc)
4744         {
4745           if (vect_print_dump_info (REPORT_DETAILS))
4746             fprintf (vect_dump, "no reduc code for scalar code.");
4747
4748           return false;
4749         }
4750     }
4751
4752   if (double_reduc && ncopies > 1)
4753     {
4754       if (vect_print_dump_info (REPORT_DETAILS))
4755         fprintf (vect_dump, "multiple types in double reduction");
4756
4757       return false;
4758     }
4759
4760   /* In case of widenning multiplication by a constant, we update the type
4761      of the constant to be the type of the other operand.  We check that the
4762      constant fits the type in the pattern recognition pass.  */
4763   if (code == DOT_PROD_EXPR
4764       && !types_compatible_p (TREE_TYPE (ops[0]), TREE_TYPE (ops[1])))
4765     {
4766       if (TREE_CODE (ops[0]) == INTEGER_CST)
4767         ops[0] = fold_convert (TREE_TYPE (ops[1]), ops[0]);
4768       else if (TREE_CODE (ops[1]) == INTEGER_CST)
4769         ops[1] = fold_convert (TREE_TYPE (ops[0]), ops[1]);
4770       else
4771         {
4772           if (vect_print_dump_info (REPORT_DETAILS))
4773             fprintf (vect_dump, "invalid types in dot-prod");
4774
4775           return false;
4776         }
4777     }
4778
4779   if (!vec_stmt) /* transformation not required.  */
4780     {
4781       if (!vect_model_reduction_cost (stmt_info, epilog_reduc_code, ncopies))
4782         return false;
4783       STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
4784       return true;
4785     }
4786
4787   /** Transform.  **/
4788
4789   if (vect_print_dump_info (REPORT_DETAILS))
4790     fprintf (vect_dump, "transform reduction.");
4791
4792   /* FORNOW: Multiple types are not supported for condition.  */
4793   if (code == COND_EXPR)
4794     gcc_assert (ncopies == 1);
4795
4796   /* Create the destination vector  */
4797   vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
4798
4799   /* In case the vectorization factor (VF) is bigger than the number
4800      of elements that we can fit in a vectype (nunits), we have to generate
4801      more than one vector stmt - i.e - we need to "unroll" the
4802      vector stmt by a factor VF/nunits.  For more details see documentation
4803      in vectorizable_operation.  */
4804
4805   /* If the reduction is used in an outer loop we need to generate
4806      VF intermediate results, like so (e.g. for ncopies=2):
4807         r0 = phi (init, r0)
4808         r1 = phi (init, r1)
4809         r0 = x0 + r0;
4810         r1 = x1 + r1;
4811     (i.e. we generate VF results in 2 registers).
4812     In this case we have a separate def-use cycle for each copy, and therefore
4813     for each copy we get the vector def for the reduction variable from the
4814     respective phi node created for this copy.
4815
4816     Otherwise (the reduction is unused in the loop nest), we can combine
4817     together intermediate results, like so (e.g. for ncopies=2):
4818         r = phi (init, r)
4819         r = x0 + r;
4820         r = x1 + r;
4821    (i.e. we generate VF/2 results in a single register).
4822    In this case for each copy we get the vector def for the reduction variable
4823    from the vectorized reduction operation generated in the previous iteration.
4824   */
4825
4826   if (STMT_VINFO_RELEVANT (stmt_info) == vect_unused_in_scope)
4827     {
4828       single_defuse_cycle = true;
4829       epilog_copies = 1;
4830     }
4831   else
4832     epilog_copies = ncopies;
4833
4834   prev_stmt_info = NULL;
4835   prev_phi_info = NULL;
4836   if (slp_node)
4837     {
4838       vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
4839       gcc_assert (TYPE_VECTOR_SUBPARTS (vectype_out)
4840                   == TYPE_VECTOR_SUBPARTS (vectype_in));
4841     }
4842   else
4843     {
4844       vec_num = 1;
4845       vec_oprnds0 = VEC_alloc (tree, heap, 1);
4846       if (op_type == ternary_op)
4847         vec_oprnds1 = VEC_alloc (tree, heap, 1);
4848     }
4849
4850   phis = VEC_alloc (gimple, heap, vec_num);
4851   vect_defs = VEC_alloc (tree, heap, vec_num);
4852   if (!slp_node)
4853     VEC_quick_push (tree, vect_defs, NULL_TREE);
4854
4855   for (j = 0; j < ncopies; j++)
4856     {
4857       if (j == 0 || !single_defuse_cycle)
4858         {
4859           for (i = 0; i < vec_num; i++)
4860             {
4861               /* Create the reduction-phi that defines the reduction
4862                  operand.  */
4863               new_phi = create_phi_node (vec_dest, loop->header);
4864               set_vinfo_for_stmt (new_phi,
4865                                   new_stmt_vec_info (new_phi, loop_vinfo,
4866                                                      NULL));
4867                if (j == 0 || slp_node)
4868                  VEC_quick_push (gimple, phis, new_phi);
4869             }
4870         }
4871
4872       if (code == COND_EXPR)
4873         {
4874           gcc_assert (!slp_node);
4875           vectorizable_condition (stmt, gsi, vec_stmt,
4876                                   PHI_RESULT (VEC_index (gimple, phis, 0)),
4877                                   reduc_index, NULL);
4878           /* Multiple types are not supported for condition.  */
4879           break;
4880         }
4881
4882       /* Handle uses.  */
4883       if (j == 0)
4884         {
4885           op0 = ops[!reduc_index];
4886           if (op_type == ternary_op)
4887             {
4888               if (reduc_index == 0)
4889                 op1 = ops[2];
4890               else
4891                 op1 = ops[1];
4892             }
4893
4894           if (slp_node)
4895             vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
4896                                slp_node, -1);
4897           else
4898             {
4899               loop_vec_def0 = vect_get_vec_def_for_operand (ops[!reduc_index],
4900                                                             stmt, NULL);
4901               VEC_quick_push (tree, vec_oprnds0, loop_vec_def0);
4902               if (op_type == ternary_op)
4903                {
4904                  loop_vec_def1 = vect_get_vec_def_for_operand (op1, stmt,
4905                                                                NULL);
4906                  VEC_quick_push (tree, vec_oprnds1, loop_vec_def1);
4907                }
4908             }
4909         }
4910       else
4911         {
4912           if (!slp_node)
4913             {
4914               enum vect_def_type dt;
4915               gimple dummy_stmt;
4916               tree dummy;
4917
4918               vect_is_simple_use (ops[!reduc_index], stmt, loop_vinfo, NULL,
4919                                   &dummy_stmt, &dummy, &dt);
4920               loop_vec_def0 = vect_get_vec_def_for_stmt_copy (dt,
4921                                                               loop_vec_def0);
4922               VEC_replace (tree, vec_oprnds0, 0, loop_vec_def0);
4923               if (op_type == ternary_op)
4924                 {
4925                   vect_is_simple_use (op1, stmt, loop_vinfo, NULL, &dummy_stmt,
4926                                       &dummy, &dt);
4927                   loop_vec_def1 = vect_get_vec_def_for_stmt_copy (dt,
4928                                                                 loop_vec_def1);
4929                   VEC_replace (tree, vec_oprnds1, 0, loop_vec_def1);
4930                 }
4931             }
4932
4933           if (single_defuse_cycle)
4934             reduc_def = gimple_assign_lhs (new_stmt);
4935
4936           STMT_VINFO_RELATED_STMT (prev_phi_info) = new_phi;
4937         }
4938
4939       FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, def0)
4940         {
4941           if (slp_node)
4942             reduc_def = PHI_RESULT (VEC_index (gimple, phis, i));
4943           else
4944             {
4945               if (!single_defuse_cycle || j == 0)
4946                 reduc_def = PHI_RESULT (new_phi);
4947             }
4948
4949           def1 = ((op_type == ternary_op)
4950                   ? VEC_index (tree, vec_oprnds1, i) : NULL);
4951           if (op_type == binary_op)
4952             {
4953               if (reduc_index == 0)
4954                 expr = build2 (code, vectype_out, reduc_def, def0);
4955               else
4956                 expr = build2 (code, vectype_out, def0, reduc_def);
4957             }
4958           else
4959             {
4960               if (reduc_index == 0)
4961                 expr = build3 (code, vectype_out, reduc_def, def0, def1);
4962               else
4963                 {
4964                   if (reduc_index == 1)
4965                     expr = build3 (code, vectype_out, def0, reduc_def, def1);
4966                   else
4967                     expr = build3 (code, vectype_out, def0, def1, reduc_def);
4968                 }
4969             }
4970
4971           new_stmt = gimple_build_assign (vec_dest, expr);
4972           new_temp = make_ssa_name (vec_dest, new_stmt);
4973           gimple_assign_set_lhs (new_stmt, new_temp);
4974           vect_finish_stmt_generation (stmt, new_stmt, gsi);
4975
4976           if (slp_node)
4977             {
4978               VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
4979               VEC_quick_push (tree, vect_defs, new_temp);
4980             }
4981           else
4982             VEC_replace (tree, vect_defs, 0, new_temp);
4983         }
4984
4985       if (slp_node)
4986         continue;
4987
4988       if (j == 0)
4989         STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4990       else
4991         STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4992
4993       prev_stmt_info = vinfo_for_stmt (new_stmt);
4994       prev_phi_info = vinfo_for_stmt (new_phi);
4995     }
4996
4997   /* Finalize the reduction-phi (set its arguments) and create the
4998      epilog reduction code.  */
4999   if ((!single_defuse_cycle || code == COND_EXPR) && !slp_node)
5000     {
5001       new_temp = gimple_assign_lhs (*vec_stmt);
5002       VEC_replace (tree, vect_defs, 0, new_temp);
5003     }
5004
5005   vect_create_epilog_for_reduction (vect_defs, stmt, epilog_copies,
5006                                     epilog_reduc_code, phis, reduc_index,
5007                                     double_reduc, slp_node);
5008
5009   VEC_free (gimple, heap, phis);
5010   VEC_free (tree, heap, vec_oprnds0);
5011   if (vec_oprnds1)
5012     VEC_free (tree, heap, vec_oprnds1);
5013
5014   return true;
5015 }
5016
5017 /* Function vect_min_worthwhile_factor.
5018
5019    For a loop where we could vectorize the operation indicated by CODE,
5020    return the minimum vectorization factor that makes it worthwhile
5021    to use generic vectors.  */
5022 int
5023 vect_min_worthwhile_factor (enum tree_code code)
5024 {
5025   switch (code)
5026     {
5027     case PLUS_EXPR:
5028     case MINUS_EXPR:
5029     case NEGATE_EXPR:
5030       return 4;
5031
5032     case BIT_AND_EXPR:
5033     case BIT_IOR_EXPR:
5034     case BIT_XOR_EXPR:
5035     case BIT_NOT_EXPR:
5036       return 2;
5037
5038     default:
5039       return INT_MAX;
5040     }
5041 }
5042
5043
5044 /* Function vectorizable_induction
5045
5046    Check if PHI performs an induction computation that can be vectorized.
5047    If VEC_STMT is also passed, vectorize the induction PHI: create a vectorized
5048    phi to replace it, put it in VEC_STMT, and add it to the same basic block.
5049    Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
5050
5051 bool
5052 vectorizable_induction (gimple phi, gimple_stmt_iterator *gsi ATTRIBUTE_UNUSED,
5053                         gimple *vec_stmt)
5054 {
5055   stmt_vec_info stmt_info = vinfo_for_stmt (phi);
5056   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5057   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5058   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5059   int nunits = TYPE_VECTOR_SUBPARTS (vectype);
5060   int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
5061   tree vec_def;
5062
5063   gcc_assert (ncopies >= 1);
5064   /* FORNOW. These restrictions should be relaxed.  */
5065   if (nested_in_vect_loop_p (loop, phi))
5066     {
5067       imm_use_iterator imm_iter;
5068       use_operand_p use_p;
5069       gimple exit_phi;
5070       edge latch_e;
5071       tree loop_arg;
5072
5073       if (ncopies > 1)
5074         {
5075           if (vect_print_dump_info (REPORT_DETAILS))
5076             fprintf (vect_dump, "multiple types in nested loop.");
5077           return false;
5078         }
5079
5080       exit_phi = NULL;
5081       latch_e = loop_latch_edge (loop->inner);
5082       loop_arg = PHI_ARG_DEF_FROM_EDGE (phi, latch_e);
5083       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, loop_arg)
5084         {
5085           if (!flow_bb_inside_loop_p (loop->inner,
5086                                       gimple_bb (USE_STMT (use_p))))
5087             {
5088               exit_phi = USE_STMT (use_p);
5089               break;
5090             }
5091         }
5092       if (exit_phi)
5093         {
5094           stmt_vec_info exit_phi_vinfo  = vinfo_for_stmt (exit_phi);
5095           if (!(STMT_VINFO_RELEVANT_P (exit_phi_vinfo)
5096                 && !STMT_VINFO_LIVE_P (exit_phi_vinfo)))
5097             {
5098               if (vect_print_dump_info (REPORT_DETAILS))
5099                 fprintf (vect_dump, "inner-loop induction only used outside "
5100                          "of the outer vectorized loop.");
5101               return false;
5102             }
5103         }
5104     }
5105
5106   if (!STMT_VINFO_RELEVANT_P (stmt_info))
5107     return false;
5108
5109   /* FORNOW: SLP not supported.  */
5110   if (STMT_SLP_TYPE (stmt_info))
5111     return false;
5112
5113   gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def);
5114
5115   if (gimple_code (phi) != GIMPLE_PHI)
5116     return false;
5117
5118   if (!vec_stmt) /* transformation not required.  */
5119     {
5120       STMT_VINFO_TYPE (stmt_info) = induc_vec_info_type;
5121       if (vect_print_dump_info (REPORT_DETAILS))
5122         fprintf (vect_dump, "=== vectorizable_induction ===");
5123       vect_model_induction_cost (stmt_info, ncopies);
5124       return true;
5125     }
5126
5127   /** Transform.  **/
5128
5129   if (vect_print_dump_info (REPORT_DETAILS))
5130     fprintf (vect_dump, "transform induction phi.");
5131
5132   vec_def = get_initial_def_for_induction (phi);
5133   *vec_stmt = SSA_NAME_DEF_STMT (vec_def);
5134   return true;
5135 }
5136
5137 /* Function vectorizable_live_operation.
5138
5139    STMT computes a value that is used outside the loop.  Check if
5140    it can be supported.  */
5141
5142 bool
5143 vectorizable_live_operation (gimple stmt,
5144                              gimple_stmt_iterator *gsi ATTRIBUTE_UNUSED,
5145                              gimple *vec_stmt ATTRIBUTE_UNUSED)
5146 {
5147   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5148   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5149   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5150   int i;
5151   int op_type;
5152   tree op;
5153   tree def;
5154   gimple def_stmt;
5155   enum vect_def_type dt;
5156   enum tree_code code;
5157   enum gimple_rhs_class rhs_class;
5158
5159   gcc_assert (STMT_VINFO_LIVE_P (stmt_info));
5160
5161   if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def)
5162     return false;
5163
5164   if (!is_gimple_assign (stmt))
5165     return false;
5166
5167   if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
5168     return false;
5169
5170   /* FORNOW. CHECKME. */
5171   if (nested_in_vect_loop_p (loop, stmt))
5172     return false;
5173
5174   code = gimple_assign_rhs_code (stmt);
5175   op_type = TREE_CODE_LENGTH (code);
5176   rhs_class = get_gimple_rhs_class (code);
5177   gcc_assert (rhs_class != GIMPLE_UNARY_RHS || op_type == unary_op);
5178   gcc_assert (rhs_class != GIMPLE_BINARY_RHS || op_type == binary_op);
5179
5180   /* FORNOW: support only if all uses are invariant.  This means
5181      that the scalar operations can remain in place, unvectorized.
5182      The original last scalar value that they compute will be used.  */
5183
5184   for (i = 0; i < op_type; i++)
5185     {
5186       if (rhs_class == GIMPLE_SINGLE_RHS)
5187         op = TREE_OPERAND (gimple_op (stmt, 1), i);
5188       else
5189         op = gimple_op (stmt, i + 1);
5190       if (op
5191           && !vect_is_simple_use (op, stmt, loop_vinfo, NULL, &def_stmt, &def,
5192                                   &dt))
5193         {
5194           if (vect_print_dump_info (REPORT_DETAILS))
5195             fprintf (vect_dump, "use not simple.");
5196           return false;
5197         }
5198
5199       if (dt != vect_external_def && dt != vect_constant_def)
5200         return false;
5201     }
5202
5203   /* No transformation is required for the cases we currently support.  */
5204   return true;
5205 }
5206
5207 /* Kill any debug uses outside LOOP of SSA names defined in STMT.  */
5208
5209 static void
5210 vect_loop_kill_debug_uses (struct loop *loop, gimple stmt)
5211 {
5212   ssa_op_iter op_iter;
5213   imm_use_iterator imm_iter;
5214   def_operand_p def_p;
5215   gimple ustmt;
5216
5217   FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
5218     {
5219       FOR_EACH_IMM_USE_STMT (ustmt, imm_iter, DEF_FROM_PTR (def_p))
5220         {
5221           basic_block bb;
5222
5223           if (!is_gimple_debug (ustmt))
5224             continue;
5225
5226           bb = gimple_bb (ustmt);
5227
5228           if (!flow_bb_inside_loop_p (loop, bb))
5229             {
5230               if (gimple_debug_bind_p (ustmt))
5231                 {
5232                   if (vect_print_dump_info (REPORT_DETAILS))
5233                     fprintf (vect_dump, "killing debug use");
5234
5235                   gimple_debug_bind_reset_value (ustmt);
5236                   update_stmt (ustmt);
5237                 }
5238               else
5239                 gcc_unreachable ();
5240             }
5241         }
5242     }
5243 }
5244
5245 /* Function vect_transform_loop.
5246
5247    The analysis phase has determined that the loop is vectorizable.
5248    Vectorize the loop - created vectorized stmts to replace the scalar
5249    stmts in the loop, and update the loop exit condition.  */
5250
5251 void
5252 vect_transform_loop (loop_vec_info loop_vinfo)
5253 {
5254   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5255   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
5256   int nbbs = loop->num_nodes;
5257   gimple_stmt_iterator si;
5258   int i;
5259   tree ratio = NULL;
5260   int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
5261   bool grouped_store;
5262   bool slp_scheduled = false;
5263   unsigned int nunits;
5264   gimple stmt, pattern_stmt;
5265   gimple_seq pattern_def_seq = NULL;
5266   gimple_stmt_iterator pattern_def_si = gsi_none ();
5267   bool transform_pattern_stmt = false;
5268   bool check_profitability;
5269   int th;
5270
5271   if (vect_print_dump_info (REPORT_DETAILS))
5272     fprintf (vect_dump, "=== vec_transform_loop ===");
5273
5274   /* Use the more conservative vectorization threshold.  If the number
5275      of iterations is constant assume the cost check has been performed
5276      by our caller.  If the threshold makes all loops profitable that
5277      run at least the vectorization factor number of times checking
5278      is pointless, too.  */
5279   th = ((PARAM_VALUE (PARAM_MIN_VECT_LOOP_BOUND)
5280          * LOOP_VINFO_VECT_FACTOR (loop_vinfo)) - 1);
5281   th = MAX (th, LOOP_VINFO_COST_MODEL_MIN_ITERS (loop_vinfo));
5282   if (th >= LOOP_VINFO_VECT_FACTOR (loop_vinfo) - 1
5283       && !LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
5284     {
5285       if (vect_print_dump_info (REPORT_COST))
5286         fprintf (vect_dump,
5287                  "Profitability threshold is %d loop iterations.", th);
5288       check_profitability = true;
5289     }
5290
5291   /* Peel the loop if there are data refs with unknown alignment.
5292      Only one data ref with unknown store is allowed.  */
5293
5294   if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo))
5295     {
5296       vect_do_peeling_for_alignment (loop_vinfo, th, check_profitability);
5297       check_profitability = false;
5298     }
5299
5300   if (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo)
5301       || LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo))
5302     {
5303       vect_loop_versioning (loop_vinfo, th, check_profitability);
5304       check_profitability = false;
5305     }
5306
5307   /* If the loop has a symbolic number of iterations 'n' (i.e. it's not a
5308      compile time constant), or it is a constant that doesn't divide by the
5309      vectorization factor, then an epilog loop needs to be created.
5310      We therefore duplicate the loop: the original loop will be vectorized,
5311      and will compute the first (n/VF) iterations.  The second copy of the loop
5312      will remain scalar and will compute the remaining (n%VF) iterations.
5313      (VF is the vectorization factor).  */
5314
5315   if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
5316        || (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
5317            && LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0)
5318        || LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
5319     vect_do_peeling_for_loop_bound (loop_vinfo, &ratio,
5320                                     th, check_profitability);
5321   else
5322     ratio = build_int_cst (TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo)),
5323                 LOOP_VINFO_INT_NITERS (loop_vinfo) / vectorization_factor);
5324
5325   /* 1) Make sure the loop header has exactly two entries
5326      2) Make sure we have a preheader basic block.  */
5327
5328   gcc_assert (EDGE_COUNT (loop->header->preds) == 2);
5329
5330   split_edge (loop_preheader_edge (loop));
5331
5332   /* FORNOW: the vectorizer supports only loops which body consist
5333      of one basic block (header + empty latch). When the vectorizer will
5334      support more involved loop forms, the order by which the BBs are
5335      traversed need to be reconsidered.  */
5336
5337   for (i = 0; i < nbbs; i++)
5338     {
5339       basic_block bb = bbs[i];
5340       stmt_vec_info stmt_info;
5341       gimple phi;
5342
5343       for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
5344         {
5345           phi = gsi_stmt (si);
5346           if (vect_print_dump_info (REPORT_DETAILS))
5347             {
5348               fprintf (vect_dump, "------>vectorizing phi: ");
5349               print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM);
5350             }
5351           stmt_info = vinfo_for_stmt (phi);
5352           if (!stmt_info)
5353             continue;
5354
5355           if (MAY_HAVE_DEBUG_STMTS && !STMT_VINFO_LIVE_P (stmt_info))
5356             vect_loop_kill_debug_uses (loop, phi);
5357
5358           if (!STMT_VINFO_RELEVANT_P (stmt_info)
5359               && !STMT_VINFO_LIVE_P (stmt_info))
5360             continue;
5361
5362           if ((TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info))
5363                 != (unsigned HOST_WIDE_INT) vectorization_factor)
5364               && vect_print_dump_info (REPORT_DETAILS))
5365             fprintf (vect_dump, "multiple-types.");
5366
5367           if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def)
5368             {
5369               if (vect_print_dump_info (REPORT_DETAILS))
5370                 fprintf (vect_dump, "transform phi.");
5371               vect_transform_stmt (phi, NULL, NULL, NULL, NULL);
5372             }
5373         }
5374
5375       pattern_stmt = NULL;
5376       for (si = gsi_start_bb (bb); !gsi_end_p (si) || transform_pattern_stmt;)
5377         {
5378           bool is_store;
5379
5380           if (transform_pattern_stmt)
5381             stmt = pattern_stmt;
5382           else
5383             stmt = gsi_stmt (si);
5384
5385           if (vect_print_dump_info (REPORT_DETAILS))
5386             {
5387               fprintf (vect_dump, "------>vectorizing statement: ");
5388               print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
5389             }
5390
5391           stmt_info = vinfo_for_stmt (stmt);
5392
5393           /* vector stmts created in the outer-loop during vectorization of
5394              stmts in an inner-loop may not have a stmt_info, and do not
5395              need to be vectorized.  */
5396           if (!stmt_info)
5397             {
5398               gsi_next (&si);
5399               continue;
5400             }
5401
5402           if (MAY_HAVE_DEBUG_STMTS && !STMT_VINFO_LIVE_P (stmt_info))
5403             vect_loop_kill_debug_uses (loop, stmt);
5404
5405           if (!STMT_VINFO_RELEVANT_P (stmt_info)
5406               && !STMT_VINFO_LIVE_P (stmt_info))
5407             {
5408               if (STMT_VINFO_IN_PATTERN_P (stmt_info)
5409                   && (pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info))
5410                   && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
5411                       || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
5412                 {
5413                   stmt = pattern_stmt;
5414                   stmt_info = vinfo_for_stmt (stmt);
5415                 }
5416               else
5417                 {
5418                   gsi_next (&si);
5419                   continue;
5420                 }
5421             }
5422           else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
5423                    && (pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info))
5424                    && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
5425                        || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
5426             transform_pattern_stmt = true;
5427
5428           /* If pattern statement has def stmts, vectorize them too.  */
5429           if (is_pattern_stmt_p (stmt_info))
5430             {
5431               if (pattern_def_seq == NULL)
5432                 {
5433                   pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info);
5434                   pattern_def_si = gsi_start (pattern_def_seq);
5435                 }
5436               else if (!gsi_end_p (pattern_def_si))
5437                 gsi_next (&pattern_def_si);
5438               if (pattern_def_seq != NULL)
5439                 {
5440                   gimple pattern_def_stmt = NULL;
5441                   stmt_vec_info pattern_def_stmt_info = NULL;
5442
5443                   while (!gsi_end_p (pattern_def_si))
5444                     {
5445                       pattern_def_stmt = gsi_stmt (pattern_def_si);
5446                       pattern_def_stmt_info
5447                         = vinfo_for_stmt (pattern_def_stmt);
5448                       if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info)
5449                           || STMT_VINFO_LIVE_P (pattern_def_stmt_info))
5450                         break;
5451                       gsi_next (&pattern_def_si);
5452                     }
5453
5454                   if (!gsi_end_p (pattern_def_si))
5455                     {
5456                       if (vect_print_dump_info (REPORT_DETAILS))
5457                         {
5458                           fprintf (vect_dump, "==> vectorizing pattern def"
5459                                               " stmt: ");
5460                           print_gimple_stmt (vect_dump, pattern_def_stmt, 0,
5461                                              TDF_SLIM);
5462                         }
5463
5464                       stmt = pattern_def_stmt;
5465                       stmt_info = pattern_def_stmt_info;
5466                     }
5467                   else
5468                     {
5469                       pattern_def_si = gsi_none ();
5470                       transform_pattern_stmt = false;
5471                     }
5472                 }
5473               else
5474                 transform_pattern_stmt = false;
5475             }
5476
5477           gcc_assert (STMT_VINFO_VECTYPE (stmt_info));
5478           nunits = (unsigned int) TYPE_VECTOR_SUBPARTS (
5479                                                STMT_VINFO_VECTYPE (stmt_info));
5480           if (!STMT_SLP_TYPE (stmt_info)
5481               && nunits != (unsigned int) vectorization_factor
5482               && vect_print_dump_info (REPORT_DETAILS))
5483             /* For SLP VF is set according to unrolling factor, and not to
5484                vector size, hence for SLP this print is not valid.  */
5485             fprintf (vect_dump, "multiple-types.");
5486
5487           /* SLP. Schedule all the SLP instances when the first SLP stmt is
5488              reached.  */
5489           if (STMT_SLP_TYPE (stmt_info))
5490             {
5491               if (!slp_scheduled)
5492                 {
5493                   slp_scheduled = true;
5494
5495                   if (vect_print_dump_info (REPORT_DETAILS))
5496                     fprintf (vect_dump, "=== scheduling SLP instances ===");
5497
5498                   vect_schedule_slp (loop_vinfo, NULL);
5499                 }
5500
5501               /* Hybrid SLP stmts must be vectorized in addition to SLP.  */
5502               if (!vinfo_for_stmt (stmt) || PURE_SLP_STMT (stmt_info))
5503                 {
5504                   if (!transform_pattern_stmt && gsi_end_p (pattern_def_si))
5505                     {
5506                       pattern_def_seq = NULL;
5507                       gsi_next (&si);
5508                     }
5509                   continue;
5510                 }
5511             }
5512
5513           /* -------- vectorize statement ------------ */
5514           if (vect_print_dump_info (REPORT_DETAILS))
5515             fprintf (vect_dump, "transform statement.");
5516
5517           grouped_store = false;
5518           is_store = vect_transform_stmt (stmt, &si, &grouped_store, NULL, NULL);
5519           if (is_store)
5520             {
5521               if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
5522                 {
5523                   /* Interleaving. If IS_STORE is TRUE, the vectorization of the
5524                      interleaving chain was completed - free all the stores in
5525                      the chain.  */
5526                   gsi_next (&si);
5527                   vect_remove_stores (GROUP_FIRST_ELEMENT (stmt_info));
5528                   continue;
5529                 }
5530               else
5531                 {
5532                   /* Free the attached stmt_vec_info and remove the stmt.  */
5533                   gimple store = gsi_stmt (si);
5534                   free_stmt_vec_info (store);
5535                   unlink_stmt_vdef (store);
5536                   gsi_remove (&si, true);
5537                   release_defs (store);
5538                   continue;
5539                 }
5540             }
5541
5542           if (!transform_pattern_stmt && gsi_end_p (pattern_def_si))
5543             {
5544               pattern_def_seq = NULL;
5545               gsi_next (&si);
5546             }
5547         }                       /* stmts in BB */
5548     }                           /* BBs in loop */
5549
5550   slpeel_make_loop_iterate_ntimes (loop, ratio);
5551
5552   /* The memory tags and pointers in vectorized statements need to
5553      have their SSA forms updated.  FIXME, why can't this be delayed
5554      until all the loops have been transformed?  */
5555   update_ssa (TODO_update_ssa);
5556
5557   if (vect_print_dump_info (REPORT_VECTORIZED_LOCATIONS))
5558     fprintf (vect_dump, "LOOP VECTORIZED.");
5559   if (loop->inner && vect_print_dump_info (REPORT_VECTORIZED_LOCATIONS))
5560     fprintf (vect_dump, "OUTER LOOP VECTORIZED.");
5561 }