gcc/tree-vect-data-refs.c

   1 /* Data References Analysis and Manipulation Utilities for Vectorization.
   2    Copyright (C) 2003-2014 Free Software Foundation, Inc.
   3    Contributed by Dorit Naishlos <dorit@il.ibm.com>
   4    and Ira Rosen <irar@il.ibm.com>
   5
   6 This file is part of GCC.
   7
   8 GCC is free software; you can redistribute it and/or modify it under
   9 the terms of the GNU General Public License as published by the Free
  10 Software Foundation; either version 3, or (at your option) any later
  11 version.
  12
  13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  15 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  16 for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with GCC; see the file COPYING3.  If not see
  20 <http://www.gnu.org/licenses/>.  */
  21
  22 #include "config.h"
  23 #include "system.h"
  24 #include "coretypes.h"
  25 #include "dumpfile.h"
  26 #include "tm.h"
  27 #include "tree.h"
  28 #include "stor-layout.h"
  29 #include "tm_p.h"
  30 #include "target.h"
  31 #include "basic-block.h"
  32 #include "gimple-pretty-print.h"
  33 #include "tree-ssa-alias.h"
  34 #include "internal-fn.h"
  35 #include "tree-eh.h"
  36 #include "gimple-expr.h"
  37 #include "is-a.h"
  38 #include "gimple.h"
  39 #include "gimplify.h"
  40 #include "gimple-iterator.h"
  41 #include "gimplify-me.h"
  42 #include "gimple-ssa.h"
  43 #include "tree-phinodes.h"
  44 #include "ssa-iterators.h"
  45 #include "stringpool.h"
  46 #include "tree-ssanames.h"
  47 #include "tree-ssa-loop-ivopts.h"
  48 #include "tree-ssa-loop-manip.h"
  49 #include "tree-ssa-loop.h"
  50 #include "dumpfile.h"
  51 #include "cfgloop.h"
  52 #include "tree-chrec.h"
  53 #include "tree-scalar-evolution.h"
  54 #include "tree-vectorizer.h"
  55 #include "diagnostic-core.h"
  56 #include "cgraph.h"
  57 /* Need to include rtl.h, expr.h, etc. for optabs.  */
  58 #include "expr.h"
  59 #include "optabs.h"
  60
  61 /* Return true if load- or store-lanes optab OPTAB is implemented for
  62    COUNT vectors of type VECTYPE.  NAME is the name of OPTAB.  */
  63
  64 static bool
  65 vect_lanes_optab_supported_p (const char *name, convert_optab optab,
  66                               tree vectype, unsigned HOST_WIDE_INT count)
  67 {
  68   enum machine_mode mode, array_mode;
  69   bool limit_p;
  70
  71   mode = TYPE_MODE (vectype);
  72   limit_p = !targetm.array_mode_supported_p (mode, count);
  73   array_mode = mode_for_size (count * GET_MODE_BITSIZE (mode),
  74                               MODE_INT, limit_p);
  75
  76   if (array_mode == BLKmode)
  77     {
  78       if (dump_enabled_p ())
  79         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
  80                          "no array mode for %s[" HOST_WIDE_INT_PRINT_DEC "]\n",
  81                          GET_MODE_NAME (mode), count);
  82       return false;
  83     }
  84
  85   if (convert_optab_handler (optab, array_mode, mode) == CODE_FOR_nothing)
  86     {
  87       if (dump_enabled_p ())
  88         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
  89                          "cannot use %s<%s><%s>\n", name,
  90                          GET_MODE_NAME (array_mode), GET_MODE_NAME (mode));
  91       return false;
  92     }
  93
  94   if (dump_enabled_p ())
  95     dump_printf_loc (MSG_NOTE, vect_location,
  96                      "can use %s<%s><%s>\n", name, GET_MODE_NAME (array_mode),
  97                      GET_MODE_NAME (mode));
  98
  99   return true;
 100 }
 101
 102
 103 /* Return the smallest scalar part of STMT.
 104    This is used to determine the vectype of the stmt.  We generally set the
 105    vectype according to the type of the result (lhs).  For stmts whose
 106    result-type is different than the type of the arguments (e.g., demotion,
 107    promotion), vectype will be reset appropriately (later).  Note that we have
 108    to visit the smallest datatype in this function, because that determines the
 109    VF.  If the smallest datatype in the loop is present only as the rhs of a
 110    promotion operation - we'd miss it.
 111    Such a case, where a variable of this datatype does not appear in the lhs
 112    anywhere in the loop, can only occur if it's an invariant: e.g.:
 113    'int_x = (int) short_inv', which we'd expect to have been optimized away by
 114    invariant motion.  However, we cannot rely on invariant motion to always
 115    take invariants out of the loop, and so in the case of promotion we also
 116    have to check the rhs.
 117    LHS_SIZE_UNIT and RHS_SIZE_UNIT contain the sizes of the corresponding
 118    types.  */
 119
 120 tree
 121 vect_get_smallest_scalar_type (gimple stmt, HOST_WIDE_INT *lhs_size_unit,
 122                                HOST_WIDE_INT *rhs_size_unit)
 123 {
 124   tree scalar_type = gimple_expr_type (stmt);
 125   HOST_WIDE_INT lhs, rhs;
 126
 127   lhs = rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type));
 128
 129   if (is_gimple_assign (stmt)
 130       && (gimple_assign_cast_p (stmt)
 131           || gimple_assign_rhs_code (stmt) == WIDEN_MULT_EXPR
 132           || gimple_assign_rhs_code (stmt) == WIDEN_LSHIFT_EXPR
 133           || gimple_assign_rhs_code (stmt) == FLOAT_EXPR))
 134     {
 135       tree rhs_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
 136
 137       rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (rhs_type));
 138       if (rhs < lhs)
 139         scalar_type = rhs_type;
 140     }
 141
 142   *lhs_size_unit = lhs;
 143   *rhs_size_unit = rhs;
 144   return scalar_type;
 145 }
 146
 147
 148 /* Insert DDR into LOOP_VINFO list of ddrs that may alias and need to be
 149    tested at run-time.  Return TRUE if DDR was successfully inserted.
 150    Return false if versioning is not supported.  */
 151
 152 static bool
 153 vect_mark_for_runtime_alias_test (ddr_p ddr, loop_vec_info loop_vinfo)
 154 {
 155   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
 156
 157   if ((unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS) == 0)
 158     return false;
 159
 160   if (dump_enabled_p ())
 161     {
 162       dump_printf_loc (MSG_NOTE, vect_location,
 163                        "mark for run-time aliasing test between ");
 164       dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (DDR_A (ddr)));
 165       dump_printf (MSG_NOTE,  " and ");
 166       dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (DDR_B (ddr)));
 167       dump_printf (MSG_NOTE, "\n");
 168     }
 169
 170   if (optimize_loop_nest_for_size_p (loop))
 171     {
 172       if (dump_enabled_p ())
 173         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 174                          "versioning not supported when optimizing"
 175                          " for size.\n");
 176       return false;
 177     }
 178
 179   /* FORNOW: We don't support versioning with outer-loop vectorization.  */
 180   if (loop->inner)
 181     {
 182       if (dump_enabled_p ())
 183         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 184                          "versioning not yet supported for outer-loops.\n");
 185       return false;
 186     }
 187
 188   /* FORNOW: We don't support creating runtime alias tests for non-constant
 189      step.  */
 190   if (TREE_CODE (DR_STEP (DDR_A (ddr))) != INTEGER_CST
 191       || TREE_CODE (DR_STEP (DDR_B (ddr))) != INTEGER_CST)
 192     {
 193       if (dump_enabled_p ())
 194         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 195                          "versioning not yet supported for non-constant "
 196                          "step\n");
 197       return false;
 198     }
 199
 200   LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo).safe_push (ddr);
 201   return true;
 202 }
 203
 204
 205 /* Function vect_analyze_data_ref_dependence.
 206
 207    Return TRUE if there (might) exist a dependence between a memory-reference
 208    DRA and a memory-reference DRB.  When versioning for alias may check a
 209    dependence at run-time, return FALSE.  Adjust *MAX_VF according to
 210    the data dependence.  */
 211
 212 static bool
 213 vect_analyze_data_ref_dependence (struct data_dependence_relation *ddr,
 214                                   loop_vec_info loop_vinfo, int *max_vf)
 215 {
 216   unsigned int i;
 217   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
 218   struct data_reference *dra = DDR_A (ddr);
 219   struct data_reference *drb = DDR_B (ddr);
 220   stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
 221   stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
 222   lambda_vector dist_v;
 223   unsigned int loop_depth;
 224
 225   /* In loop analysis all data references should be vectorizable.  */
 226   if (!STMT_VINFO_VECTORIZABLE (stmtinfo_a)
 227       || !STMT_VINFO_VECTORIZABLE (stmtinfo_b))
 228     gcc_unreachable ();
 229
 230   /* Independent data accesses.  */
 231   if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
 232     return false;
 233
 234   if (dra == drb
 235       || (DR_IS_READ (dra) && DR_IS_READ (drb)))
 236     return false;
 237
 238   /* Even if we have an anti-dependence then, as the vectorized loop covers at
 239      least two scalar iterations, there is always also a true dependence.
 240      As the vectorizer does not re-order loads and stores we can ignore
 241      the anti-dependence if TBAA can disambiguate both DRs similar to the
 242      case with known negative distance anti-dependences (positive
 243      distance anti-dependences would violate TBAA constraints).  */
 244   if (((DR_IS_READ (dra) && DR_IS_WRITE (drb))
 245        || (DR_IS_WRITE (dra) && DR_IS_READ (drb)))
 246       && !alias_sets_conflict_p (get_alias_set (DR_REF (dra)),
 247                                  get_alias_set (DR_REF (drb))))
 248     return false;
 249
 250   /* Unknown data dependence.  */
 251   if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
 252     {
 253       /* If user asserted safelen consecutive iterations can be
 254          executed concurrently, assume independence.  */
 255       if (loop->safelen >= 2)
 256         {
 257           if (loop->safelen < *max_vf)
 258             *max_vf = loop->safelen;
 259           LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo) = false;
 260           return false;
 261         }
 262
 263       if (STMT_VINFO_GATHER_P (stmtinfo_a)
 264           || STMT_VINFO_GATHER_P (stmtinfo_b))
 265         {
 266           if (dump_enabled_p ())
 267             {
 268               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 269                                "versioning for alias not supported for: "
 270                                "can't determine dependence between ");
 271               dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
 272                                  DR_REF (dra));
 273               dump_printf (MSG_MISSED_OPTIMIZATION, " and ");
 274               dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
 275                                  DR_REF (drb));
 276               dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
 277             }
 278           return true;
 279         }
 280
 281       if (dump_enabled_p ())
 282         {
 283           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 284                            "versioning for alias required: "
 285                            "can't determine dependence between ");
 286           dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
 287                              DR_REF (dra));
 288           dump_printf (MSG_MISSED_OPTIMIZATION, " and ");
 289           dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
 290                              DR_REF (drb));
 291           dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
 292         }
 293
 294       /* Add to list of ddrs that need to be tested at run-time.  */
 295       return !vect_mark_for_runtime_alias_test (ddr, loop_vinfo);
 296     }
 297
 298   /* Known data dependence.  */
 299   if (DDR_NUM_DIST_VECTS (ddr) == 0)
 300     {
 301       /* If user asserted safelen consecutive iterations can be
 302          executed concurrently, assume independence.  */
 303       if (loop->safelen >= 2)
 304         {
 305           if (loop->safelen < *max_vf)
 306             *max_vf = loop->safelen;
 307           LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo) = false;
 308           return false;
 309         }
 310
 311       if (STMT_VINFO_GATHER_P (stmtinfo_a)
 312           || STMT_VINFO_GATHER_P (stmtinfo_b))
 313         {
 314           if (dump_enabled_p ())
 315             {
 316               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 317                                "versioning for alias not supported for: "
 318                                "bad dist vector for ");
 319               dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
 320                                  DR_REF (dra));
 321               dump_printf (MSG_MISSED_OPTIMIZATION, " and ");
 322               dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
 323                                  DR_REF (drb));
 324               dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
 325             }
 326           return true;
 327         }
 328
 329       if (dump_enabled_p ())
 330         {
 331           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 332                            "versioning for alias required: "
 333                            "bad dist vector for ");
 334           dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, DR_REF (dra));
 335           dump_printf (MSG_MISSED_OPTIMIZATION,  " and ");
 336           dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, DR_REF (drb));
 337           dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
 338         }
 339       /* Add to list of ddrs that need to be tested at run-time.  */
 340       return !vect_mark_for_runtime_alias_test (ddr, loop_vinfo);
 341     }
 342
 343   loop_depth = index_in_loop_nest (loop->num, DDR_LOOP_NEST (ddr));
 344   FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
 345     {
 346       int dist = dist_v[loop_depth];
 347
 348       if (dump_enabled_p ())
 349         dump_printf_loc (MSG_NOTE, vect_location,
 350                          "dependence distance  = %d.\n", dist);
 351
 352       if (dist == 0)
 353         {
 354           if (dump_enabled_p ())
 355             {
 356               dump_printf_loc (MSG_NOTE, vect_location,
 357                                "dependence distance == 0 between ");
 358               dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dra));
 359               dump_printf (MSG_NOTE, " and ");
 360               dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (drb));
 361               dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
 362             }
 363
 364           /* When we perform grouped accesses and perform implicit CSE
 365              by detecting equal accesses and doing disambiguation with
 366              runtime alias tests like for
 367                 .. = a[i];
 368                 .. = a[i+1];
 369                 a[i] = ..;
 370                 a[i+1] = ..;
 371                 *p = ..;
 372                 .. = a[i];
 373                 .. = a[i+1];
 374              where we will end up loading { a[i], a[i+1] } once, make
 375              sure that inserting group loads before the first load and
 376              stores after the last store will do the right thing.  */
 377           if ((STMT_VINFO_GROUPED_ACCESS (stmtinfo_a)
 378                && GROUP_SAME_DR_STMT (stmtinfo_a))
 379               || (STMT_VINFO_GROUPED_ACCESS (stmtinfo_b)
 380                   && GROUP_SAME_DR_STMT (stmtinfo_b)))
 381             {
 382               gimple earlier_stmt;
 383               earlier_stmt = get_earlier_stmt (DR_STMT (dra), DR_STMT (drb));
 384               if (DR_IS_WRITE
 385                     (STMT_VINFO_DATA_REF (vinfo_for_stmt (earlier_stmt))))
 386                 {
 387                   if (dump_enabled_p ())
 388                     dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 389                                      "READ_WRITE dependence in interleaving."
 390                                      "\n");
 391                   return true;
 392                 }
 393             }
 394
 395           continue;
 396         }
 397
 398       if (dist > 0 && DDR_REVERSED_P (ddr))
 399         {
 400           /* If DDR_REVERSED_P the order of the data-refs in DDR was
 401              reversed (to make distance vector positive), and the actual
 402              distance is negative.  */
 403           if (dump_enabled_p ())
 404             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 405                              "dependence distance negative.\n");
 406           /* Record a negative dependence distance to later limit the
 407              amount of stmt copying / unrolling we can perform.
 408              Only need to handle read-after-write dependence.  */
 409           if (DR_IS_READ (drb)
 410               && (STMT_VINFO_MIN_NEG_DIST (stmtinfo_b) == 0
 411                   || STMT_VINFO_MIN_NEG_DIST (stmtinfo_b) > (unsigned)dist))
 412             STMT_VINFO_MIN_NEG_DIST (stmtinfo_b) = dist;
 413           continue;
 414         }
 415
 416       if (abs (dist) >= 2
 417           && abs (dist) < *max_vf)
 418         {
 419           /* The dependence distance requires reduction of the maximal
 420              vectorization factor.  */
 421           *max_vf = abs (dist);
 422           if (dump_enabled_p ())
 423             dump_printf_loc (MSG_NOTE, vect_location,
 424                              "adjusting maximal vectorization factor to %i\n",
 425                              *max_vf);
 426         }
 427
 428       if (abs (dist) >= *max_vf)
 429         {
 430           /* Dependence distance does not create dependence, as far as
 431              vectorization is concerned, in this case.  */
 432           if (dump_enabled_p ())
 433             dump_printf_loc (MSG_NOTE, vect_location,
 434                              "dependence distance >= VF.\n");
 435           continue;
 436         }
 437
 438       if (dump_enabled_p ())
 439         {
 440           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 441                        "not vectorized, possible dependence "
 442                        "between data-refs ");
 443           dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dra));
 444           dump_printf (MSG_NOTE,  " and ");
 445           dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (drb));
 446           dump_printf (MSG_NOTE,  "\n");
 447         }
 448
 449       return true;
 450     }
 451
 452   return false;
 453 }
 454
 455 /* Function vect_analyze_data_ref_dependences.
 456
 457    Examine all the data references in the loop, and make sure there do not
 458    exist any data dependences between them.  Set *MAX_VF according to
 459    the maximum vectorization factor the data dependences allow.  */
 460
 461 bool
 462 vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo, int *max_vf)
 463 {
 464   unsigned int i;
 465   struct data_dependence_relation *ddr;
 466
 467   if (dump_enabled_p ())
 468     dump_printf_loc (MSG_NOTE, vect_location,
 469                      "=== vect_analyze_data_ref_dependences ===\n");
 470
 471   LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo) = true;
 472   if (!compute_all_dependences (LOOP_VINFO_DATAREFS (loop_vinfo),
 473                                 &LOOP_VINFO_DDRS (loop_vinfo),
 474                                 LOOP_VINFO_LOOP_NEST (loop_vinfo), true))
 475     return false;
 476
 477   FOR_EACH_VEC_ELT (LOOP_VINFO_DDRS (loop_vinfo), i, ddr)
 478     if (vect_analyze_data_ref_dependence (ddr, loop_vinfo, max_vf))
 479       return false;
 480
 481   return true;
 482 }
 483
 484
 485 /* Function vect_slp_analyze_data_ref_dependence.
 486
 487    Return TRUE if there (might) exist a dependence between a memory-reference
 488    DRA and a memory-reference DRB.  When versioning for alias may check a
 489    dependence at run-time, return FALSE.  Adjust *MAX_VF according to
 490    the data dependence.  */
 491
 492 static bool
 493 vect_slp_analyze_data_ref_dependence (struct data_dependence_relation *ddr)
 494 {
 495   struct data_reference *dra = DDR_A (ddr);
 496   struct data_reference *drb = DDR_B (ddr);
 497
 498   /* We need to check dependences of statements marked as unvectorizable
 499      as well, they still can prohibit vectorization.  */
 500
 501   /* Independent data accesses.  */
 502   if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
 503     return false;
 504
 505   if (dra == drb)
 506     return false;
 507
 508   /* Read-read is OK.  */
 509   if (DR_IS_READ (dra) && DR_IS_READ (drb))
 510     return false;
 511
 512   /* If dra and drb are part of the same interleaving chain consider
 513      them independent.  */
 514   if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (DR_STMT (dra)))
 515       && (GROUP_FIRST_ELEMENT (vinfo_for_stmt (DR_STMT (dra)))
 516           == GROUP_FIRST_ELEMENT (vinfo_for_stmt (DR_STMT (drb)))))
 517     return false;
 518
 519   /* Unknown data dependence.  */
 520   if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
 521     {
 522       if  (dump_enabled_p ())
 523         {
 524           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 525                            "can't determine dependence between ");
 526           dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, DR_REF (dra));
 527           dump_printf (MSG_MISSED_OPTIMIZATION,  " and ");
 528           dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, DR_REF (drb));
 529           dump_printf (MSG_MISSED_OPTIMIZATION,  "\n");
 530         }
 531     }
 532   else if (dump_enabled_p ())
 533     {
 534       dump_printf_loc (MSG_NOTE, vect_location,
 535                        "determined dependence between ");
 536       dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dra));
 537       dump_printf (MSG_NOTE, " and ");
 538       dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (drb));
 539       dump_printf (MSG_NOTE,  "\n");
 540     }
 541
 542   /* We do not vectorize basic blocks with write-write dependencies.  */
 543   if (DR_IS_WRITE (dra) && DR_IS_WRITE (drb))
 544     return true;
 545
 546   /* If we have a read-write dependence check that the load is before the store.
 547      When we vectorize basic blocks, vector load can be only before
 548      corresponding scalar load, and vector store can be only after its
 549      corresponding scalar store.  So the order of the acceses is preserved in
 550      case the load is before the store.  */
 551   gimple earlier_stmt = get_earlier_stmt (DR_STMT (dra), DR_STMT (drb));
 552   if (DR_IS_READ (STMT_VINFO_DATA_REF (vinfo_for_stmt (earlier_stmt))))
 553     {
 554       /* That only holds for load-store pairs taking part in vectorization.  */
 555       if (STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dra)))
 556           && STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (drb))))
 557         return false;
 558     }
 559
 560   return true;
 561 }
 562
 563
 564 /* Function vect_analyze_data_ref_dependences.
 565
 566    Examine all the data references in the basic-block, and make sure there
 567    do not exist any data dependences between them.  Set *MAX_VF according to
 568    the maximum vectorization factor the data dependences allow.  */
 569
 570 bool
 571 vect_slp_analyze_data_ref_dependences (bb_vec_info bb_vinfo)
 572 {
 573   struct data_dependence_relation *ddr;
 574   unsigned int i;
 575
 576   if (dump_enabled_p ())
 577     dump_printf_loc (MSG_NOTE, vect_location,
 578                      "=== vect_slp_analyze_data_ref_dependences ===\n");
 579
 580   if (!compute_all_dependences (BB_VINFO_DATAREFS (bb_vinfo),
 581                                 &BB_VINFO_DDRS (bb_vinfo),
 582                                 vNULL, true))
 583     return false;
 584
 585   FOR_EACH_VEC_ELT (BB_VINFO_DDRS (bb_vinfo), i, ddr)
 586     if (vect_slp_analyze_data_ref_dependence (ddr))
 587       return false;
 588
 589   return true;
 590 }
 591
 592
 593 /* Function vect_compute_data_ref_alignment
 594
 595    Compute the misalignment of the data reference DR.
 596
 597    Output:
 598    1. If during the misalignment computation it is found that the data reference
 599       cannot be vectorized then false is returned.
 600    2. DR_MISALIGNMENT (DR) is defined.
 601
 602    FOR NOW: No analysis is actually performed. Misalignment is calculated
 603    only for trivial cases. TODO.  */
 604
 605 static bool
 606 vect_compute_data_ref_alignment (struct data_reference *dr)
 607 {
 608   gimple stmt = DR_STMT (dr);
 609   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
 610   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
 611   struct loop *loop = NULL;
 612   tree ref = DR_REF (dr);
 613   tree vectype;
 614   tree base, base_addr;
 615   bool base_aligned;
 616   tree misalign;
 617   tree aligned_to, alignment;
 618
 619   if (dump_enabled_p ())
 620     dump_printf_loc (MSG_NOTE, vect_location,
 621                      "vect_compute_data_ref_alignment:\n");
 622
 623   if (loop_vinfo)
 624     loop = LOOP_VINFO_LOOP (loop_vinfo);
 625
 626   /* Initialize misalignment to unknown.  */
 627   SET_DR_MISALIGNMENT (dr, -1);
 628
 629   /* Strided loads perform only component accesses, misalignment information
 630      is irrelevant for them.  */
 631   if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
 632     return true;
 633
 634   misalign = DR_INIT (dr);
 635   aligned_to = DR_ALIGNED_TO (dr);
 636   base_addr = DR_BASE_ADDRESS (dr);
 637   vectype = STMT_VINFO_VECTYPE (stmt_info);
 638
 639   /* In case the dataref is in an inner-loop of the loop that is being
 640      vectorized (LOOP), we use the base and misalignment information
 641      relative to the outer-loop (LOOP).  This is ok only if the misalignment
 642      stays the same throughout the execution of the inner-loop, which is why
 643      we have to check that the stride of the dataref in the inner-loop evenly
 644      divides by the vector size.  */
 645   if (loop && nested_in_vect_loop_p (loop, stmt))
 646     {
 647       tree step = DR_STEP (dr);
 648       HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
 649
 650       if (dr_step % GET_MODE_SIZE (TYPE_MODE (vectype)) == 0)
 651         {
 652           if (dump_enabled_p ())
 653             dump_printf_loc (MSG_NOTE, vect_location,
 654                              "inner step divides the vector-size.\n");
 655           misalign = STMT_VINFO_DR_INIT (stmt_info);
 656           aligned_to = STMT_VINFO_DR_ALIGNED_TO (stmt_info);
 657           base_addr = STMT_VINFO_DR_BASE_ADDRESS (stmt_info);
 658         }
 659       else
 660         {
 661           if (dump_enabled_p ())
 662             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 663                              "inner step doesn't divide the vector-size.\n");
 664           misalign = NULL_TREE;
 665         }
 666     }
 667
 668   /* Similarly, if we're doing basic-block vectorization, we can only use
 669      base and misalignment information relative to an innermost loop if the
 670      misalignment stays the same throughout the execution of the loop.
 671      As above, this is the case if the stride of the dataref evenly divides
 672      by the vector size.  */
 673   if (!loop)
 674     {
 675       tree step = DR_STEP (dr);
 676       HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
 677
 678       if (dr_step % GET_MODE_SIZE (TYPE_MODE (vectype)) != 0)
 679         {
 680           if (dump_enabled_p ())
 681             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 682                              "SLP: step doesn't divide the vector-size.\n");
 683           misalign = NULL_TREE;
 684         }
 685     }
 686
 687   base = build_fold_indirect_ref (base_addr);
 688   alignment = ssize_int (TYPE_ALIGN (vectype)/BITS_PER_UNIT);
 689
 690   if ((aligned_to && tree_int_cst_compare (aligned_to, alignment) < 0)
 691       || !misalign)
 692     {
 693       if (dump_enabled_p ())
 694         {
 695           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 696                            "Unknown alignment for access: ");
 697           dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, base);
 698           dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
 699         }
 700       return true;
 701     }
 702
 703   if ((DECL_P (base)
 704        && tree_int_cst_compare (ssize_int (DECL_ALIGN_UNIT (base)),
 705                                 alignment) >= 0)
 706       || (TREE_CODE (base_addr) == SSA_NAME
 707           && tree_int_cst_compare (ssize_int (TYPE_ALIGN_UNIT (TREE_TYPE (
 708                                                       TREE_TYPE (base_addr)))),
 709                                    alignment) >= 0)
 710       || (get_pointer_alignment (base_addr) >= TYPE_ALIGN (vectype)))
 711     base_aligned = true;
 712   else
 713     base_aligned = false;
 714
 715   if (!base_aligned)
 716     {
 717       /* Do not change the alignment of global variables here if
 718          flag_section_anchors is enabled as we already generated
 719          RTL for other functions.  Most global variables should
 720          have been aligned during the IPA increase_alignment pass.  */
 721       if (!vect_can_force_dr_alignment_p (base, TYPE_ALIGN (vectype))
 722           || (TREE_STATIC (base) && flag_section_anchors))
 723         {
 724           if (dump_enabled_p ())
 725             {
 726               dump_printf_loc (MSG_NOTE, vect_location,
 727                                "can't force alignment of ref: ");
 728               dump_generic_expr (MSG_NOTE, TDF_SLIM, ref);
 729               dump_printf (MSG_NOTE, "\n");
 730             }
 731           return true;
 732         }
 733
 734       /* Force the alignment of the decl.
 735          NOTE: This is the only change to the code we make during
 736          the analysis phase, before deciding to vectorize the loop.  */
 737       if (dump_enabled_p ())
 738         {
 739           dump_printf_loc (MSG_NOTE, vect_location, "force alignment of ");
 740           dump_generic_expr (MSG_NOTE, TDF_SLIM, ref);
 741           dump_printf (MSG_NOTE, "\n");
 742         }
 743
 744       ((dataref_aux *)dr->aux)->base_decl = base;
 745       ((dataref_aux *)dr->aux)->base_misaligned = true;
 746     }
 747
 748   /* If this is a backward running DR then first access in the larger
 749      vectype actually is N-1 elements before the address in the DR.
 750      Adjust misalign accordingly.  */
 751   if (tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0)
 752     {
 753       tree offset = ssize_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
 754       /* DR_STEP(dr) is the same as -TYPE_SIZE of the scalar type,
 755          otherwise we wouldn't be here.  */
 756       offset = fold_build2 (MULT_EXPR, ssizetype, offset, DR_STEP (dr));
 757       /* PLUS because DR_STEP was negative.  */
 758       misalign = size_binop (PLUS_EXPR, misalign, offset);
 759     }
 760
 761   /* Modulo alignment.  */
 762   misalign = size_binop (FLOOR_MOD_EXPR, misalign, alignment);
 763
 764   if (!tree_fits_uhwi_p (misalign))
 765     {
 766       /* Negative or overflowed misalignment value.  */
 767       if (dump_enabled_p ())
 768         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 769                          "unexpected misalign value\n");
 770       return false;
 771     }
 772
 773   SET_DR_MISALIGNMENT (dr, tree_to_uhwi (misalign));
 774
 775   if (dump_enabled_p ())
 776     {
 777       dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 778                        "misalign = %d bytes of ref ", DR_MISALIGNMENT (dr));
 779       dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, ref);
 780       dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
 781     }
 782
 783   return true;
 784 }
 785
 786
 787 /* Function vect_compute_data_refs_alignment
 788
 789    Compute the misalignment of data references in the loop.
 790    Return FALSE if a data reference is found that cannot be vectorized.  */
 791
 792 static bool
 793 vect_compute_data_refs_alignment (loop_vec_info loop_vinfo,
 794                                   bb_vec_info bb_vinfo)
 795 {
 796   vec<data_reference_p> datarefs;
 797   struct data_reference *dr;
 798   unsigned int i;
 799
 800   if (loop_vinfo)
 801     datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
 802   else
 803     datarefs = BB_VINFO_DATAREFS (bb_vinfo);
 804
 805   FOR_EACH_VEC_ELT (datarefs, i, dr)
 806     if (STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr)))
 807         && !vect_compute_data_ref_alignment (dr))
 808       {
 809         if (bb_vinfo)
 810           {
 811             /* Mark unsupported statement as unvectorizable.  */
 812             STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr))) = false;
 813             continue;
 814           }
 815         else
 816           return false;
 817       }
 818
 819   return true;
 820 }
 821
 822
 823 /* Function vect_update_misalignment_for_peel
 824
 825    DR - the data reference whose misalignment is to be adjusted.
 826    DR_PEEL - the data reference whose misalignment is being made
 827              zero in the vector loop by the peel.
 828    NPEEL - the number of iterations in the peel loop if the misalignment
 829            of DR_PEEL is known at compile time.  */
 830
 831 static void
 832 vect_update_misalignment_for_peel (struct data_reference *dr,
 833                                    struct data_reference *dr_peel, int npeel)
 834 {
 835   unsigned int i;
 836   vec<dr_p> same_align_drs;
 837   struct data_reference *current_dr;
 838   int dr_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr))));
 839   int dr_peel_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr_peel))));
 840   stmt_vec_info stmt_info = vinfo_for_stmt (DR_STMT (dr));
 841   stmt_vec_info peel_stmt_info = vinfo_for_stmt (DR_STMT (dr_peel));
 842
 843  /* For interleaved data accesses the step in the loop must be multiplied by
 844      the size of the interleaving group.  */
 845   if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
 846     dr_size *= GROUP_SIZE (vinfo_for_stmt (GROUP_FIRST_ELEMENT (stmt_info)));
 847   if (STMT_VINFO_GROUPED_ACCESS (peel_stmt_info))
 848     dr_peel_size *= GROUP_SIZE (peel_stmt_info);
 849
 850   /* It can be assumed that the data refs with the same alignment as dr_peel
 851      are aligned in the vector loop.  */
 852   same_align_drs
 853     = STMT_VINFO_SAME_ALIGN_REFS (vinfo_for_stmt (DR_STMT (dr_peel)));
 854   FOR_EACH_VEC_ELT (same_align_drs, i, current_dr)
 855     {
 856       if (current_dr != dr)
 857         continue;
 858       gcc_assert (DR_MISALIGNMENT (dr) / dr_size ==
 859                   DR_MISALIGNMENT (dr_peel) / dr_peel_size);
 860       SET_DR_MISALIGNMENT (dr, 0);
 861       return;
 862     }
 863
 864   if (known_alignment_for_access_p (dr)
 865       && known_alignment_for_access_p (dr_peel))
 866     {
 867       bool negative = tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0;
 868       int misal = DR_MISALIGNMENT (dr);
 869       tree vectype = STMT_VINFO_VECTYPE (stmt_info);
 870       misal += negative ? -npeel * dr_size : npeel * dr_size;
 871       misal &= (TYPE_ALIGN (vectype) / BITS_PER_UNIT) - 1;
 872       SET_DR_MISALIGNMENT (dr, misal);
 873       return;
 874     }
 875
 876   if (dump_enabled_p ())
 877     dump_printf_loc (MSG_NOTE, vect_location, "Setting misalignment to -1.\n");
 878   SET_DR_MISALIGNMENT (dr, -1);
 879 }
 880
 881
 882 /* Function vect_verify_datarefs_alignment
 883
 884    Return TRUE if all data references in the loop can be
 885    handled with respect to alignment.  */
 886
 887 bool
 888 vect_verify_datarefs_alignment (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
 889 {
 890   vec<data_reference_p> datarefs;
 891   struct data_reference *dr;
 892   enum dr_alignment_support supportable_dr_alignment;
 893   unsigned int i;
 894
 895   if (loop_vinfo)
 896     datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
 897   else
 898     datarefs = BB_VINFO_DATAREFS (bb_vinfo);
 899
 900   FOR_EACH_VEC_ELT (datarefs, i, dr)
 901     {
 902       gimple stmt = DR_STMT (dr);
 903       stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
 904
 905       if (!STMT_VINFO_RELEVANT_P (stmt_info))
 906         continue;
 907
 908       /* For interleaving, only the alignment of the first access matters.
 909          Skip statements marked as not vectorizable.  */
 910       if ((STMT_VINFO_GROUPED_ACCESS (stmt_info)
 911            && GROUP_FIRST_ELEMENT (stmt_info) != stmt)
 912           || !STMT_VINFO_VECTORIZABLE (stmt_info))
 913         continue;
 914
 915       /* Strided loads perform only component accesses, alignment is
 916          irrelevant for them.  */
 917       if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
 918         continue;
 919
 920       supportable_dr_alignment = vect_supportable_dr_alignment (dr, false);
 921       if (!supportable_dr_alignment)
 922         {
 923           if (dump_enabled_p ())
 924             {
 925               if (DR_IS_READ (dr))
 926                 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 927                                  "not vectorized: unsupported unaligned load.");
 928               else
 929                 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 930                                  "not vectorized: unsupported unaligned "
 931                                  "store.");
 932
 933               dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
 934                                  DR_REF (dr));
 935               dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
 936             }
 937           return false;
 938         }
 939       if (supportable_dr_alignment != dr_aligned && dump_enabled_p ())
 940         dump_printf_loc (MSG_NOTE, vect_location,
 941                          "Vectorizing an unaligned access.\n");
 942     }
 943   return true;
 944 }
 945
 946 /* Given an memory reference EXP return whether its alignment is less
 947    than its size.  */
 948
 949 static bool
 950 not_size_aligned (tree exp)
 951 {
 952   if (!tree_fits_uhwi_p (TYPE_SIZE (TREE_TYPE (exp))))
 953     return true;
 954
 955   return (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (exp)))
 956           > get_object_alignment (exp));
 957 }
 958
 959 /* Function vector_alignment_reachable_p
 960
 961    Return true if vector alignment for DR is reachable by peeling
 962    a few loop iterations.  Return false otherwise.  */
 963
 964 static bool
 965 vector_alignment_reachable_p (struct data_reference *dr)
 966 {
 967   gimple stmt = DR_STMT (dr);
 968   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
 969   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
 970
 971   if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
 972     {
 973       /* For interleaved access we peel only if number of iterations in
 974          the prolog loop ({VF - misalignment}), is a multiple of the
 975          number of the interleaved accesses.  */
 976       int elem_size, mis_in_elements;
 977       int nelements = TYPE_VECTOR_SUBPARTS (vectype);
 978
 979       /* FORNOW: handle only known alignment.  */
 980       if (!known_alignment_for_access_p (dr))
 981         return false;
 982
 983       elem_size = GET_MODE_SIZE (TYPE_MODE (vectype)) / nelements;
 984       mis_in_elements = DR_MISALIGNMENT (dr) / elem_size;
 985
 986       if ((nelements - mis_in_elements) % GROUP_SIZE (stmt_info))
 987         return false;
 988     }
 989
 990   /* If misalignment is known at the compile time then allow peeling
 991      only if natural alignment is reachable through peeling.  */
 992   if (known_alignment_for_access_p (dr) && !aligned_access_p (dr))
 993     {
 994       HOST_WIDE_INT elmsize =
 995                 int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
 996       if (dump_enabled_p ())
 997         {
 998           dump_printf_loc (MSG_NOTE, vect_location,
 999                            "data size =" HOST_WIDE_INT_PRINT_DEC, elmsize);
1000           dump_printf (MSG_NOTE,
1001                        ". misalignment = %d.\n", DR_MISALIGNMENT (dr));
1002         }
1003       if (DR_MISALIGNMENT (dr) % elmsize)
1004         {
1005           if (dump_enabled_p ())
1006             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1007                              "data size does not divide the misalignment.\n");
1008           return false;
1009         }
1010     }
1011
1012   if (!known_alignment_for_access_p (dr))
1013     {
1014       tree type = TREE_TYPE (DR_REF (dr));
1015       bool is_packed = not_size_aligned (DR_REF (dr));
1016       if (dump_enabled_p ())
1017         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1018                          "Unknown misalignment, is_packed = %d\n",is_packed);
1019       if ((TYPE_USER_ALIGN (type) && !is_packed)
1020           || targetm.vectorize.vector_alignment_reachable (type, is_packed))
1021         return true;
1022       else
1023         return false;
1024     }
1025
1026   return true;
1027 }
1028
1029
1030 /* Calculate the cost of the memory access represented by DR.  */
1031
1032 static void
1033 vect_get_data_access_cost (struct data_reference *dr,
1034                            unsigned int *inside_cost,
1035                            unsigned int *outside_cost,
1036                            stmt_vector_for_cost *body_cost_vec)
1037 {
1038   gimple stmt = DR_STMT (dr);
1039   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1040   int nunits = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
1041   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1042   int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1043   int ncopies = vf / nunits;
1044
1045   if (DR_IS_READ (dr))
1046     vect_get_load_cost (dr, ncopies, true, inside_cost, outside_cost,
1047                         NULL, body_cost_vec, false);
1048   else
1049     vect_get_store_cost (dr, ncopies, inside_cost, body_cost_vec);
1050
1051   if (dump_enabled_p ())
1052     dump_printf_loc (MSG_NOTE, vect_location,
1053                      "vect_get_data_access_cost: inside_cost = %d, "
1054                      "outside_cost = %d.\n", *inside_cost, *outside_cost);
1055 }
1056
1057
1058 /* Insert DR into peeling hash table with NPEEL as key.  */
1059
1060 static void
1061 vect_peeling_hash_insert (loop_vec_info loop_vinfo, struct data_reference *dr,
1062                           int npeel)
1063 {
1064   struct _vect_peel_info elem, *slot;
1065   _vect_peel_info **new_slot;
1066   bool supportable_dr_alignment = vect_supportable_dr_alignment (dr, true);
1067
1068   elem.npeel = npeel;
1069   slot = LOOP_VINFO_PEELING_HTAB (loop_vinfo).find (&elem);
1070   if (slot)
1071     slot->count++;
1072   else
1073     {
1074       slot = XNEW (struct _vect_peel_info);
1075       slot->npeel = npeel;
1076       slot->dr = dr;
1077       slot->count = 1;
1078       new_slot = LOOP_VINFO_PEELING_HTAB (loop_vinfo).find_slot (slot, INSERT);
1079       *new_slot = slot;
1080     }
1081
1082   if (!supportable_dr_alignment
1083       && unlimited_cost_model (LOOP_VINFO_LOOP (loop_vinfo)))
1084     slot->count += VECT_MAX_COST;
1085 }
1086
1087
1088 /* Traverse peeling hash table to find peeling option that aligns maximum
1089    number of data accesses.  */
1090
1091 int
1092 vect_peeling_hash_get_most_frequent (_vect_peel_info **slot,
1093                                      _vect_peel_extended_info *max)
1094 {
1095   vect_peel_info elem = *slot;
1096
1097   if (elem->count > max->peel_info.count
1098       || (elem->count == max->peel_info.count
1099           && max->peel_info.npeel > elem->npeel))
1100     {
1101       max->peel_info.npeel = elem->npeel;
1102       max->peel_info.count = elem->count;
1103       max->peel_info.dr = elem->dr;
1104     }
1105
1106   return 1;
1107 }
1108
1109
1110 /* Traverse peeling hash table and calculate cost for each peeling option.
1111    Find the one with the lowest cost.  */
1112
1113 int
1114 vect_peeling_hash_get_lowest_cost (_vect_peel_info **slot,
1115                                    _vect_peel_extended_info *min)
1116 {
1117   vect_peel_info elem = *slot;
1118   int save_misalignment, dummy;
1119   unsigned int inside_cost = 0, outside_cost = 0, i;
1120   gimple stmt = DR_STMT (elem->dr);
1121   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1122   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1123   vec<data_reference_p> datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1124   struct data_reference *dr;
1125   stmt_vector_for_cost prologue_cost_vec, body_cost_vec, epilogue_cost_vec;
1126   int single_iter_cost;
1127
1128   prologue_cost_vec.create (2);
1129   body_cost_vec.create (2);
1130   epilogue_cost_vec.create (2);
1131
1132   FOR_EACH_VEC_ELT (datarefs, i, dr)
1133     {
1134       stmt = DR_STMT (dr);
1135       stmt_info = vinfo_for_stmt (stmt);
1136       /* For interleaving, only the alignment of the first access
1137          matters.  */
1138       if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
1139           && GROUP_FIRST_ELEMENT (stmt_info) != stmt)
1140         continue;
1141
1142       save_misalignment = DR_MISALIGNMENT (dr);
1143       vect_update_misalignment_for_peel (dr, elem->dr, elem->npeel);
1144       vect_get_data_access_cost (dr, &inside_cost, &outside_cost,
1145                                  &body_cost_vec);
1146       SET_DR_MISALIGNMENT (dr, save_misalignment);
1147     }
1148
1149   single_iter_cost = vect_get_single_scalar_iteration_cost (loop_vinfo);
1150   outside_cost += vect_get_known_peeling_cost (loop_vinfo, elem->npeel,
1151                                                &dummy, single_iter_cost,
1152                                                &prologue_cost_vec,
1153                                                &epilogue_cost_vec);
1154
1155   /* Prologue and epilogue costs are added to the target model later.
1156      These costs depend only on the scalar iteration cost, the
1157      number of peeling iterations finally chosen, and the number of
1158      misaligned statements.  So discard the information found here.  */
1159   prologue_cost_vec.release ();
1160   epilogue_cost_vec.release ();
1161
1162   if (inside_cost < min->inside_cost
1163       || (inside_cost == min->inside_cost && outside_cost < min->outside_cost))
1164     {
1165       min->inside_cost = inside_cost;
1166       min->outside_cost = outside_cost;
1167       min->body_cost_vec.release ();
1168       min->body_cost_vec = body_cost_vec;
1169       min->peel_info.dr = elem->dr;
1170       min->peel_info.npeel = elem->npeel;
1171     }
1172   else
1173     body_cost_vec.release ();
1174
1175   return 1;
1176 }
1177
1178
1179 /* Choose best peeling option by traversing peeling hash table and either
1180    choosing an option with the lowest cost (if cost model is enabled) or the
1181    option that aligns as many accesses as possible.  */
1182
1183 static struct data_reference *
1184 vect_peeling_hash_choose_best_peeling (loop_vec_info loop_vinfo,
1185                                        unsigned int *npeel,
1186                                        stmt_vector_for_cost *body_cost_vec)
1187 {
1188    struct _vect_peel_extended_info res;
1189
1190    res.peel_info.dr = NULL;
1191    res.body_cost_vec = stmt_vector_for_cost ();
1192
1193    if (!unlimited_cost_model (LOOP_VINFO_LOOP (loop_vinfo)))
1194      {
1195        res.inside_cost = INT_MAX;
1196        res.outside_cost = INT_MAX;
1197        LOOP_VINFO_PEELING_HTAB (loop_vinfo)
1198            .traverse <_vect_peel_extended_info *,
1199                       vect_peeling_hash_get_lowest_cost> (&res);
1200      }
1201    else
1202      {
1203        res.peel_info.count = 0;
1204        LOOP_VINFO_PEELING_HTAB (loop_vinfo)
1205            .traverse <_vect_peel_extended_info *,
1206                       vect_peeling_hash_get_most_frequent> (&res);
1207      }
1208
1209    *npeel = res.peel_info.npeel;
1210    *body_cost_vec = res.body_cost_vec;
1211    return res.peel_info.dr;
1212 }
1213
1214
1215 /* Function vect_enhance_data_refs_alignment
1216
1217    This pass will use loop versioning and loop peeling in order to enhance
1218    the alignment of data references in the loop.
1219
1220    FOR NOW: we assume that whatever versioning/peeling takes place, only the
1221    original loop is to be vectorized.  Any other loops that are created by
1222    the transformations performed in this pass - are not supposed to be
1223    vectorized.  This restriction will be relaxed.
1224
1225    This pass will require a cost model to guide it whether to apply peeling
1226    or versioning or a combination of the two.  For example, the scheme that
1227    intel uses when given a loop with several memory accesses, is as follows:
1228    choose one memory access ('p') which alignment you want to force by doing
1229    peeling.  Then, either (1) generate a loop in which 'p' is aligned and all
1230    other accesses are not necessarily aligned, or (2) use loop versioning to
1231    generate one loop in which all accesses are aligned, and another loop in
1232    which only 'p' is necessarily aligned.
1233
1234    ("Automatic Intra-Register Vectorization for the Intel Architecture",
1235    Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
1236    Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
1237
1238    Devising a cost model is the most critical aspect of this work.  It will
1239    guide us on which access to peel for, whether to use loop versioning, how
1240    many versions to create, etc.  The cost model will probably consist of
1241    generic considerations as well as target specific considerations (on
1242    powerpc for example, misaligned stores are more painful than misaligned
1243    loads).
1244
1245    Here are the general steps involved in alignment enhancements:
1246
1247      -- original loop, before alignment analysis:
1248         for (i=0; i<N; i++){
1249           x = q[i];                     # DR_MISALIGNMENT(q) = unknown
1250           p[i] = y;                     # DR_MISALIGNMENT(p) = unknown
1251         }
1252
1253      -- After vect_compute_data_refs_alignment:
1254         for (i=0; i<N; i++){
1255           x = q[i];                     # DR_MISALIGNMENT(q) = 3
1256           p[i] = y;                     # DR_MISALIGNMENT(p) = unknown
1257         }
1258
1259      -- Possibility 1: we do loop versioning:
1260      if (p is aligned) {
1261         for (i=0; i<N; i++){    # loop 1A
1262           x = q[i];                     # DR_MISALIGNMENT(q) = 3
1263           p[i] = y;                     # DR_MISALIGNMENT(p) = 0
1264         }
1265      }
1266      else {
1267         for (i=0; i<N; i++){    # loop 1B
1268           x = q[i];                     # DR_MISALIGNMENT(q) = 3
1269           p[i] = y;                     # DR_MISALIGNMENT(p) = unaligned
1270         }
1271      }
1272
1273      -- Possibility 2: we do loop peeling:
1274      for (i = 0; i < 3; i++){   # (scalar loop, not to be vectorized).
1275         x = q[i];
1276         p[i] = y;
1277      }
1278      for (i = 3; i < N; i++){   # loop 2A
1279         x = q[i];                       # DR_MISALIGNMENT(q) = 0
1280         p[i] = y;                       # DR_MISALIGNMENT(p) = unknown
1281      }
1282
1283      -- Possibility 3: combination of loop peeling and versioning:
1284      for (i = 0; i < 3; i++){   # (scalar loop, not to be vectorized).
1285         x = q[i];
1286         p[i] = y;
1287      }
1288      if (p is aligned) {
1289         for (i = 3; i<N; i++){  # loop 3A
1290           x = q[i];                     # DR_MISALIGNMENT(q) = 0
1291           p[i] = y;                     # DR_MISALIGNMENT(p) = 0
1292         }
1293      }
1294      else {
1295         for (i = 3; i<N; i++){  # loop 3B
1296           x = q[i];                     # DR_MISALIGNMENT(q) = 0
1297           p[i] = y;                     # DR_MISALIGNMENT(p) = unaligned
1298         }
1299      }
1300
1301      These loops are later passed to loop_transform to be vectorized.  The
1302      vectorizer will use the alignment information to guide the transformation
1303      (whether to generate regular loads/stores, or with special handling for
1304      misalignment).  */
1305
1306 bool
1307 vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
1308 {
1309   vec<data_reference_p> datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1310   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1311   enum dr_alignment_support supportable_dr_alignment;
1312   struct data_reference *dr0 = NULL, *first_store = NULL;
1313   struct data_reference *dr;
1314   unsigned int i, j;
1315   bool do_peeling = false;
1316   bool do_versioning = false;
1317   bool stat;
1318   gimple stmt;
1319   stmt_vec_info stmt_info;
1320   unsigned int npeel = 0;
1321   bool all_misalignments_unknown = true;
1322   unsigned int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1323   unsigned possible_npeel_number = 1;
1324   tree vectype;
1325   unsigned int nelements, mis, same_align_drs_max = 0;
1326   stmt_vector_for_cost body_cost_vec = stmt_vector_for_cost ();
1327
1328   if (dump_enabled_p ())
1329     dump_printf_loc (MSG_NOTE, vect_location,
1330                      "=== vect_enhance_data_refs_alignment ===\n");
1331
1332   /* While cost model enhancements are expected in the future, the high level
1333      view of the code at this time is as follows:
1334
1335      A) If there is a misaligned access then see if peeling to align
1336         this access can make all data references satisfy
1337         vect_supportable_dr_alignment.  If so, update data structures
1338         as needed and return true.
1339
1340      B) If peeling wasn't possible and there is a data reference with an
1341         unknown misalignment that does not satisfy vect_supportable_dr_alignment
1342         then see if loop versioning checks can be used to make all data
1343         references satisfy vect_supportable_dr_alignment.  If so, update
1344         data structures as needed and return true.
1345
1346      C) If neither peeling nor versioning were successful then return false if
1347         any data reference does not satisfy vect_supportable_dr_alignment.
1348
1349      D) Return true (all data references satisfy vect_supportable_dr_alignment).
1350
1351      Note, Possibility 3 above (which is peeling and versioning together) is not
1352      being done at this time.  */
1353
1354   /* (1) Peeling to force alignment.  */
1355
1356   /* (1.1) Decide whether to perform peeling, and how many iterations to peel:
1357      Considerations:
1358      + How many accesses will become aligned due to the peeling
1359      - How many accesses will become unaligned due to the peeling,
1360        and the cost of misaligned accesses.
1361      - The cost of peeling (the extra runtime checks, the increase
1362        in code size).  */
1363
1364   FOR_EACH_VEC_ELT (datarefs, i, dr)
1365     {
1366       stmt = DR_STMT (dr);
1367       stmt_info = vinfo_for_stmt (stmt);
1368
1369       if (!STMT_VINFO_RELEVANT_P (stmt_info))
1370         continue;
1371
1372       /* For interleaving, only the alignment of the first access
1373          matters.  */
1374       if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
1375           && GROUP_FIRST_ELEMENT (stmt_info) != stmt)
1376         continue;
1377
1378       /* For invariant accesses there is nothing to enhance.  */
1379       if (integer_zerop (DR_STEP (dr)))
1380         continue;
1381
1382       /* Strided loads perform only component accesses, alignment is
1383          irrelevant for them.  */
1384       if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
1385         continue;
1386
1387       supportable_dr_alignment = vect_supportable_dr_alignment (dr, true);
1388       do_peeling = vector_alignment_reachable_p (dr);
1389       if (do_peeling)
1390         {
1391           if (known_alignment_for_access_p (dr))
1392             {
1393               unsigned int npeel_tmp;
1394               bool negative = tree_int_cst_compare (DR_STEP (dr),
1395                                                     size_zero_node) < 0;
1396
1397               /* Save info about DR in the hash table.  */
1398               if (!LOOP_VINFO_PEELING_HTAB (loop_vinfo).is_created ())
1399                 LOOP_VINFO_PEELING_HTAB (loop_vinfo).create (1);
1400
1401               vectype = STMT_VINFO_VECTYPE (stmt_info);
1402               nelements = TYPE_VECTOR_SUBPARTS (vectype);
1403               mis = DR_MISALIGNMENT (dr) / GET_MODE_SIZE (TYPE_MODE (
1404                                                 TREE_TYPE (DR_REF (dr))));
1405               npeel_tmp = (negative
1406                            ? (mis - nelements) : (nelements - mis))
1407                   & (nelements - 1);
1408
1409               /* For multiple types, it is possible that the bigger type access
1410                  will have more than one peeling option.  E.g., a loop with two
1411                  types: one of size (vector size / 4), and the other one of
1412                  size (vector size / 8).  Vectorization factor will 8.  If both
1413                  access are misaligned by 3, the first one needs one scalar
1414                  iteration to be aligned, and the second one needs 5.  But the
1415                  the first one will be aligned also by peeling 5 scalar
1416                  iterations, and in that case both accesses will be aligned.
1417                  Hence, except for the immediate peeling amount, we also want
1418                  to try to add full vector size, while we don't exceed
1419                  vectorization factor.
1420                  We do this automtically for cost model, since we calculate cost
1421                  for every peeling option.  */
1422               if (unlimited_cost_model (LOOP_VINFO_LOOP (loop_vinfo)))
1423                 possible_npeel_number = vf /nelements;
1424
1425               /* Handle the aligned case. We may decide to align some other
1426                  access, making DR unaligned.  */
1427               if (DR_MISALIGNMENT (dr) == 0)
1428                 {
1429                   npeel_tmp = 0;
1430                   if (unlimited_cost_model (LOOP_VINFO_LOOP (loop_vinfo)))
1431                     possible_npeel_number++;
1432                 }
1433
1434               for (j = 0; j < possible_npeel_number; j++)
1435                 {
1436                   gcc_assert (npeel_tmp <= vf);
1437                   vect_peeling_hash_insert (loop_vinfo, dr, npeel_tmp);
1438                   npeel_tmp += nelements;
1439                 }
1440
1441               all_misalignments_unknown = false;
1442               /* Data-ref that was chosen for the case that all the
1443                  misalignments are unknown is not relevant anymore, since we
1444                  have a data-ref with known alignment.  */
1445               dr0 = NULL;
1446             }
1447           else
1448             {
1449               /* If we don't know any misalignment values, we prefer
1450                  peeling for data-ref that has the maximum number of data-refs
1451                  with the same alignment, unless the target prefers to align
1452                  stores over load.  */
1453               if (all_misalignments_unknown)
1454                 {
1455                   unsigned same_align_drs
1456                     = STMT_VINFO_SAME_ALIGN_REFS (stmt_info).length ();
1457                   if (!dr0
1458                       || same_align_drs_max < same_align_drs)
1459                     {
1460                       same_align_drs_max = same_align_drs;
1461                       dr0 = dr;
1462                     }
1463                   /* For data-refs with the same number of related
1464                      accesses prefer the one where the misalign
1465                      computation will be invariant in the outermost loop.  */
1466                   else if (same_align_drs_max == same_align_drs)
1467                     {
1468                       struct loop *ivloop0, *ivloop;
1469                       ivloop0 = outermost_invariant_loop_for_expr
1470                           (loop, DR_BASE_ADDRESS (dr0));
1471                       ivloop = outermost_invariant_loop_for_expr
1472                           (loop, DR_BASE_ADDRESS (dr));
1473                       if ((ivloop && !ivloop0)
1474                           || (ivloop && ivloop0
1475                               && flow_loop_nested_p (ivloop, ivloop0)))
1476                         dr0 = dr;
1477                     }
1478
1479                   if (!first_store && DR_IS_WRITE (dr))
1480                     first_store = dr;
1481                 }
1482
1483               /* If there are both known and unknown misaligned accesses in the
1484                  loop, we choose peeling amount according to the known
1485                  accesses.  */
1486               if (!supportable_dr_alignment)
1487                 {
1488                   dr0 = dr;
1489                   if (!first_store && DR_IS_WRITE (dr))
1490                     first_store = dr;
1491                 }
1492             }
1493         }
1494       else
1495         {
1496           if (!aligned_access_p (dr))
1497             {
1498               if (dump_enabled_p ())
1499                 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1500                                  "vector alignment may not be reachable\n");
1501               break;
1502             }
1503         }
1504     }
1505
1506   /* Check if we can possibly peel the loop.  */
1507   if (!vect_can_advance_ivs_p (loop_vinfo)
1508       || !slpeel_can_duplicate_loop_p (loop, single_exit (loop)))
1509     do_peeling = false;
1510
1511   if (do_peeling && all_misalignments_unknown
1512       && vect_supportable_dr_alignment (dr0, false))
1513     {
1514
1515       /* Check if the target requires to prefer stores over loads, i.e., if
1516          misaligned stores are more expensive than misaligned loads (taking
1517          drs with same alignment into account).  */
1518       if (first_store && DR_IS_READ (dr0))
1519         {
1520           unsigned int load_inside_cost = 0, load_outside_cost = 0;
1521           unsigned int store_inside_cost = 0, store_outside_cost = 0;
1522           unsigned int load_inside_penalty = 0, load_outside_penalty = 0;
1523           unsigned int store_inside_penalty = 0, store_outside_penalty = 0;
1524           stmt_vector_for_cost dummy;
1525           dummy.create (2);
1526
1527           vect_get_data_access_cost (dr0, &load_inside_cost, &load_outside_cost,
1528                                      &dummy);
1529           vect_get_data_access_cost (first_store, &store_inside_cost,
1530                                      &store_outside_cost, &dummy);
1531
1532           dummy.release ();
1533
1534           /* Calculate the penalty for leaving FIRST_STORE unaligned (by
1535              aligning the load DR0).  */
1536           load_inside_penalty = store_inside_cost;
1537           load_outside_penalty = store_outside_cost;
1538           for (i = 0;
1539                STMT_VINFO_SAME_ALIGN_REFS (vinfo_for_stmt (
1540                           DR_STMT (first_store))).iterate (i, &dr);
1541                i++)
1542             if (DR_IS_READ (dr))
1543               {
1544                 load_inside_penalty += load_inside_cost;
1545                 load_outside_penalty += load_outside_cost;
1546               }
1547             else
1548               {
1549                 load_inside_penalty += store_inside_cost;
1550                 load_outside_penalty += store_outside_cost;
1551               }
1552
1553           /* Calculate the penalty for leaving DR0 unaligned (by
1554              aligning the FIRST_STORE).  */
1555           store_inside_penalty = load_inside_cost;
1556           store_outside_penalty = load_outside_cost;
1557           for (i = 0;
1558                STMT_VINFO_SAME_ALIGN_REFS (vinfo_for_stmt (
1559                       DR_STMT (dr0))).iterate (i, &dr);
1560                i++)
1561             if (DR_IS_READ (dr))
1562               {
1563                 store_inside_penalty += load_inside_cost;
1564                 store_outside_penalty += load_outside_cost;
1565               }
1566             else
1567               {
1568                 store_inside_penalty += store_inside_cost;
1569                 store_outside_penalty += store_outside_cost;
1570               }
1571
1572           if (load_inside_penalty > store_inside_penalty
1573               || (load_inside_penalty == store_inside_penalty
1574                   && load_outside_penalty > store_outside_penalty))
1575             dr0 = first_store;
1576         }
1577
1578       /* In case there are only loads with different unknown misalignments, use
1579          peeling only if it may help to align other accesses in the loop.  */
1580       if (!first_store
1581           && !STMT_VINFO_SAME_ALIGN_REFS (
1582                   vinfo_for_stmt (DR_STMT (dr0))).length ()
1583           && vect_supportable_dr_alignment (dr0, false)
1584               != dr_unaligned_supported)
1585         do_peeling = false;
1586     }
1587
1588   if (do_peeling && !dr0)
1589     {
1590       /* Peeling is possible, but there is no data access that is not supported
1591          unless aligned. So we try to choose the best possible peeling.  */
1592
1593       /* We should get here only if there are drs with known misalignment.  */
1594       gcc_assert (!all_misalignments_unknown);
1595
1596       /* Choose the best peeling from the hash table.  */
1597       dr0 = vect_peeling_hash_choose_best_peeling (loop_vinfo, &npeel,
1598                                                    &body_cost_vec);
1599       if (!dr0 || !npeel)
1600         do_peeling = false;
1601     }
1602
1603   if (do_peeling)
1604     {
1605       stmt = DR_STMT (dr0);
1606       stmt_info = vinfo_for_stmt (stmt);
1607       vectype = STMT_VINFO_VECTYPE (stmt_info);
1608       nelements = TYPE_VECTOR_SUBPARTS (vectype);
1609
1610       if (known_alignment_for_access_p (dr0))
1611         {
1612           bool negative = tree_int_cst_compare (DR_STEP (dr0),
1613                                                 size_zero_node) < 0;
1614           if (!npeel)
1615             {
1616               /* Since it's known at compile time, compute the number of
1617                  iterations in the peeled loop (the peeling factor) for use in
1618                  updating DR_MISALIGNMENT values.  The peeling factor is the
1619                  vectorization factor minus the misalignment as an element
1620                  count.  */
1621               mis = DR_MISALIGNMENT (dr0);
1622               mis /= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr0))));
1623               npeel = ((negative ? mis - nelements : nelements - mis)
1624                        & (nelements - 1));
1625             }
1626
1627           /* For interleaved data access every iteration accesses all the
1628              members of the group, therefore we divide the number of iterations
1629              by the group size.  */
1630           stmt_info = vinfo_for_stmt (DR_STMT (dr0));
1631           if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1632             npeel /= GROUP_SIZE (stmt_info);
1633
1634           if (dump_enabled_p ())
1635             dump_printf_loc (MSG_NOTE, vect_location,
1636                              "Try peeling by %d\n", npeel);
1637         }
1638
1639       /* Ensure that all data refs can be vectorized after the peel.  */
1640       FOR_EACH_VEC_ELT (datarefs, i, dr)
1641         {
1642           int save_misalignment;
1643
1644           if (dr == dr0)
1645             continue;
1646
1647           stmt = DR_STMT (dr);
1648           stmt_info = vinfo_for_stmt (stmt);
1649           /* For interleaving, only the alignment of the first access
1650             matters.  */
1651           if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
1652               && GROUP_FIRST_ELEMENT (stmt_info) != stmt)
1653             continue;
1654
1655           /* Strided loads perform only component accesses, alignment is
1656              irrelevant for them.  */
1657           if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
1658             continue;
1659
1660           save_misalignment = DR_MISALIGNMENT (dr);
1661           vect_update_misalignment_for_peel (dr, dr0, npeel);
1662           supportable_dr_alignment = vect_supportable_dr_alignment (dr, false);
1663           SET_DR_MISALIGNMENT (dr, save_misalignment);
1664
1665           if (!supportable_dr_alignment)
1666             {
1667               do_peeling = false;
1668               break;
1669             }
1670         }
1671
1672       if (do_peeling && known_alignment_for_access_p (dr0) && npeel == 0)
1673         {
1674           stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
1675           if (!stat)
1676             do_peeling = false;
1677           else
1678             {
1679               body_cost_vec.release ();
1680               return stat;
1681             }
1682         }
1683
1684       if (do_peeling)
1685         {
1686           unsigned max_allowed_peel
1687             = PARAM_VALUE (PARAM_VECT_MAX_PEELING_FOR_ALIGNMENT);
1688           if (max_allowed_peel != (unsigned)-1)
1689             {
1690               unsigned max_peel = npeel;
1691               if (max_peel == 0)
1692                 {
1693                   gimple dr_stmt = DR_STMT (dr0);
1694                   stmt_vec_info vinfo = vinfo_for_stmt (dr_stmt);
1695                   tree vtype = STMT_VINFO_VECTYPE (vinfo);
1696                   max_peel = TYPE_VECTOR_SUBPARTS (vtype) - 1;
1697                 }
1698               if (max_peel > max_allowed_peel)
1699                 {
1700                   do_peeling = false;
1701                   if (dump_enabled_p ())
1702                     dump_printf_loc (MSG_NOTE, vect_location,
1703                         "Disable peeling, max peels reached: %d\n", max_peel);
1704                 }
1705             }
1706         }
1707
1708       if (do_peeling)
1709         {
1710           stmt_info_for_cost *si;
1711           void *data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
1712
1713           /* (1.2) Update the DR_MISALIGNMENT of each data reference DR_i.
1714              If the misalignment of DR_i is identical to that of dr0 then set
1715              DR_MISALIGNMENT (DR_i) to zero.  If the misalignment of DR_i and
1716              dr0 are known at compile time then increment DR_MISALIGNMENT (DR_i)
1717              by the peeling factor times the element size of DR_i (MOD the
1718              vectorization factor times the size).  Otherwise, the
1719              misalignment of DR_i must be set to unknown.  */
1720           FOR_EACH_VEC_ELT (datarefs, i, dr)
1721             if (dr != dr0)
1722               vect_update_misalignment_for_peel (dr, dr0, npeel);
1723
1724           LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr0;
1725           if (npeel)
1726             LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) = npeel;
1727           else
1728             LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo)
1729               = DR_MISALIGNMENT (dr0);
1730           SET_DR_MISALIGNMENT (dr0, 0);
1731           if (dump_enabled_p ())
1732             {
1733               dump_printf_loc (MSG_NOTE, vect_location,
1734                                "Alignment of access forced using peeling.\n");
1735               dump_printf_loc (MSG_NOTE, vect_location,
1736                                "Peeling for alignment will be applied.\n");
1737             }
1738           /* We've delayed passing the inside-loop peeling costs to the
1739              target cost model until we were sure peeling would happen.
1740              Do so now.  */
1741           if (body_cost_vec.exists ())
1742             {
1743               FOR_EACH_VEC_ELT (body_cost_vec, i, si)
1744                 {
1745                   struct _stmt_vec_info *stmt_info
1746                     = si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
1747                   (void) add_stmt_cost (data, si->count, si->kind, stmt_info,
1748                                         si->misalign, vect_body);
1749                 }
1750               body_cost_vec.release ();
1751             }
1752
1753           stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
1754           gcc_assert (stat);
1755           return stat;
1756         }
1757     }
1758
1759   body_cost_vec.release ();
1760
1761   /* (2) Versioning to force alignment.  */
1762
1763   /* Try versioning if:
1764      1) optimize loop for speed
1765      2) there is at least one unsupported misaligned data ref with an unknown
1766         misalignment, and
1767      3) all misaligned data refs with a known misalignment are supported, and
1768      4) the number of runtime alignment checks is within reason.  */
1769
1770   do_versioning =
1771         optimize_loop_nest_for_speed_p (loop)
1772         && (!loop->inner); /* FORNOW */
1773
1774   if (do_versioning)
1775     {
1776       FOR_EACH_VEC_ELT (datarefs, i, dr)
1777         {
1778           stmt = DR_STMT (dr);
1779           stmt_info = vinfo_for_stmt (stmt);
1780
1781           /* For interleaving, only the alignment of the first access
1782              matters.  */
1783           if (aligned_access_p (dr)
1784               || (STMT_VINFO_GROUPED_ACCESS (stmt_info)
1785                   && GROUP_FIRST_ELEMENT (stmt_info) != stmt))
1786             continue;
1787
1788           /* Strided loads perform only component accesses, alignment is
1789              irrelevant for them.  */
1790           if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
1791             continue;
1792
1793           supportable_dr_alignment = vect_supportable_dr_alignment (dr, false);
1794
1795           if (!supportable_dr_alignment)
1796             {
1797               gimple stmt;
1798               int mask;
1799               tree vectype;
1800
1801               if (known_alignment_for_access_p (dr)
1802                   || LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo).length ()
1803                      >= (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIGNMENT_CHECKS))
1804                 {
1805                   do_versioning = false;
1806                   break;
1807                 }
1808
1809               stmt = DR_STMT (dr);
1810               vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1811               gcc_assert (vectype);
1812
1813               /* The rightmost bits of an aligned address must be zeros.
1814                  Construct the mask needed for this test.  For example,
1815                  GET_MODE_SIZE for the vector mode V4SI is 16 bytes so the
1816                  mask must be 15 = 0xf. */
1817               mask = GET_MODE_SIZE (TYPE_MODE (vectype)) - 1;
1818
1819               /* FORNOW: use the same mask to test all potentially unaligned
1820                  references in the loop.  The vectorizer currently supports
1821                  a single vector size, see the reference to
1822                  GET_MODE_NUNITS (TYPE_MODE (vectype)) where the
1823                  vectorization factor is computed.  */
1824               gcc_assert (!LOOP_VINFO_PTR_MASK (loop_vinfo)
1825                           || LOOP_VINFO_PTR_MASK (loop_vinfo) == mask);
1826               LOOP_VINFO_PTR_MASK (loop_vinfo) = mask;
1827               LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo).safe_push (
1828                       DR_STMT (dr));
1829             }
1830         }
1831
1832       /* Versioning requires at least one misaligned data reference.  */
1833       if (!LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo))
1834         do_versioning = false;
1835       else if (!do_versioning)
1836         LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo).truncate (0);
1837     }
1838
1839   if (do_versioning)
1840     {
1841       vec<gimple> may_misalign_stmts
1842         = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
1843       gimple stmt;
1844
1845       /* It can now be assumed that the data references in the statements
1846          in LOOP_VINFO_MAY_MISALIGN_STMTS will be aligned in the version
1847          of the loop being vectorized.  */
1848       FOR_EACH_VEC_ELT (may_misalign_stmts, i, stmt)
1849         {
1850           stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1851           dr = STMT_VINFO_DATA_REF (stmt_info);
1852           SET_DR_MISALIGNMENT (dr, 0);
1853           if (dump_enabled_p ())
1854             dump_printf_loc (MSG_NOTE, vect_location,
1855                              "Alignment of access forced using versioning.\n");
1856         }
1857
1858       if (dump_enabled_p ())
1859         dump_printf_loc (MSG_NOTE, vect_location,
1860                          "Versioning for alignment will be applied.\n");
1861
1862       /* Peeling and versioning can't be done together at this time.  */
1863       gcc_assert (! (do_peeling && do_versioning));
1864
1865       stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
1866       gcc_assert (stat);
1867       return stat;
1868     }
1869
1870   /* This point is reached if neither peeling nor versioning is being done.  */
1871   gcc_assert (! (do_peeling || do_versioning));
1872
1873   stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
1874   return stat;
1875 }
1876
1877
1878 /* Function vect_find_same_alignment_drs.
1879
1880    Update group and alignment relations according to the chosen
1881    vectorization factor.  */
1882
1883 static void
1884 vect_find_same_alignment_drs (struct data_dependence_relation *ddr,
1885                               loop_vec_info loop_vinfo)
1886 {
1887   unsigned int i;
1888   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1889   int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1890   struct data_reference *dra = DDR_A (ddr);
1891   struct data_reference *drb = DDR_B (ddr);
1892   stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
1893   stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
1894   int dra_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dra))));
1895   int drb_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (drb))));
1896   lambda_vector dist_v;
1897   unsigned int loop_depth;
1898
1899   if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
1900     return;
1901
1902   if (dra == drb)
1903     return;
1904
1905   if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
1906     return;
1907
1908   /* Loop-based vectorization and known data dependence.  */
1909   if (DDR_NUM_DIST_VECTS (ddr) == 0)
1910     return;
1911
1912   /* Data-dependence analysis reports a distance vector of zero
1913      for data-references that overlap only in the first iteration
1914      but have different sign step (see PR45764).
1915      So as a sanity check require equal DR_STEP.  */
1916   if (!operand_equal_p (DR_STEP (dra), DR_STEP (drb), 0))
1917     return;
1918
1919   loop_depth = index_in_loop_nest (loop->num, DDR_LOOP_NEST (ddr));
1920   FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
1921     {
1922       int dist = dist_v[loop_depth];
1923
1924       if (dump_enabled_p ())
1925         dump_printf_loc (MSG_NOTE, vect_location,
1926                          "dependence distance  = %d.\n", dist);
1927
1928       /* Same loop iteration.  */
1929       if (dist == 0
1930           || (dist % vectorization_factor == 0 && dra_size == drb_size))
1931         {
1932           /* Two references with distance zero have the same alignment.  */
1933           STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_a).safe_push (drb);
1934           STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_b).safe_push (dra);
1935           if (dump_enabled_p ())
1936             {
1937               dump_printf_loc (MSG_NOTE, vect_location,
1938                                "accesses have the same alignment.\n");
1939               dump_printf (MSG_NOTE,
1940                            "dependence distance modulo vf == 0 between ");
1941               dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dra));
1942               dump_printf (MSG_NOTE,  " and ");
1943               dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (drb));
1944               dump_printf (MSG_NOTE, "\n");
1945             }
1946         }
1947     }
1948 }
1949
1950
1951 /* Function vect_analyze_data_refs_alignment
1952
1953    Analyze the alignment of the data-references in the loop.
1954    Return FALSE if a data reference is found that cannot be vectorized.  */
1955
1956 bool
1957 vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo,
1958                                   bb_vec_info bb_vinfo)
1959 {
1960   if (dump_enabled_p ())
1961     dump_printf_loc (MSG_NOTE, vect_location,
1962                      "=== vect_analyze_data_refs_alignment ===\n");
1963
1964   /* Mark groups of data references with same alignment using
1965      data dependence information.  */
1966   if (loop_vinfo)
1967     {
1968       vec<ddr_p> ddrs = LOOP_VINFO_DDRS (loop_vinfo);
1969       struct data_dependence_relation *ddr;
1970       unsigned int i;
1971
1972       FOR_EACH_VEC_ELT (ddrs, i, ddr)
1973         vect_find_same_alignment_drs (ddr, loop_vinfo);
1974     }
1975
1976   if (!vect_compute_data_refs_alignment (loop_vinfo, bb_vinfo))
1977     {
1978       if (dump_enabled_p ())
1979         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1980                          "not vectorized: can't calculate alignment "
1981                          "for data ref.\n");
1982       return false;
1983     }
1984
1985   return true;
1986 }
1987
1988
1989 /* Analyze groups of accesses: check that DR belongs to a group of
1990    accesses of legal size, step, etc.  Detect gaps, single element
1991    interleaving, and other special cases. Set grouped access info.
1992    Collect groups of strided stores for further use in SLP analysis.  */
1993
1994 static bool
1995 vect_analyze_group_access (struct data_reference *dr)
1996 {
1997   tree step = DR_STEP (dr);
1998   tree scalar_type = TREE_TYPE (DR_REF (dr));
1999   HOST_WIDE_INT type_size = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type));
2000   gimple stmt = DR_STMT (dr);
2001   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2002   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2003   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2004   HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
2005   HOST_WIDE_INT groupsize, last_accessed_element = 1;
2006   bool slp_impossible = false;
2007   struct loop *loop = NULL;
2008
2009   if (loop_vinfo)
2010     loop = LOOP_VINFO_LOOP (loop_vinfo);
2011
2012   /* For interleaving, GROUPSIZE is STEP counted in elements, i.e., the
2013      size of the interleaving group (including gaps).  */
2014   groupsize = absu_hwi (dr_step) / type_size;
2015
2016   /* Not consecutive access is possible only if it is a part of interleaving.  */
2017   if (!GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
2018     {
2019       /* Check if it this DR is a part of interleaving, and is a single
2020          element of the group that is accessed in the loop.  */
2021
2022       /* Gaps are supported only for loads. STEP must be a multiple of the type
2023          size.  The size of the group must be a power of 2.  */
2024       if (DR_IS_READ (dr)
2025           && (dr_step % type_size) == 0
2026           && groupsize > 0
2027           && exact_log2 (groupsize) != -1)
2028         {
2029           GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) = stmt;
2030           GROUP_SIZE (vinfo_for_stmt (stmt)) = groupsize;
2031           if (dump_enabled_p ())
2032             {
2033               dump_printf_loc (MSG_NOTE, vect_location,
2034                                "Detected single element interleaving ");
2035               dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr));
2036               dump_printf (MSG_NOTE, " step ");
2037               dump_generic_expr (MSG_NOTE, TDF_SLIM, step);
2038               dump_printf (MSG_NOTE, "\n");
2039             }
2040
2041           if (loop_vinfo)
2042             {
2043               if (dump_enabled_p ())
2044                 dump_printf_loc (MSG_NOTE, vect_location,
2045                                  "Data access with gaps requires scalar "
2046                                  "epilogue loop\n");
2047               if (loop->inner)
2048                 {
2049                   if (dump_enabled_p ())
2050                     dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2051                                      "Peeling for outer loop is not"
2052                                      " supported\n");
2053                   return false;
2054                 }
2055
2056               LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
2057             }
2058
2059           return true;
2060         }
2061
2062       if (dump_enabled_p ())
2063         {
2064           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2065                            "not consecutive access ");
2066           dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
2067           dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
2068         }
2069
2070       if (bb_vinfo)
2071         {
2072           /* Mark the statement as unvectorizable.  */
2073           STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr))) = false;
2074           return true;
2075         }
2076
2077       return false;
2078     }
2079
2080   if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) == stmt)
2081     {
2082       /* First stmt in the interleaving chain. Check the chain.  */
2083       gimple next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt));
2084       struct data_reference *data_ref = dr;
2085       unsigned int count = 1;
2086       tree prev_init = DR_INIT (data_ref);
2087       gimple prev = stmt;
2088       HOST_WIDE_INT diff, gaps = 0;
2089       unsigned HOST_WIDE_INT count_in_bytes;
2090
2091       while (next)
2092         {
2093           /* Skip same data-refs.  In case that two or more stmts share
2094              data-ref (supported only for loads), we vectorize only the first
2095              stmt, and the rest get their vectorized loads from the first
2096              one.  */
2097           if (!tree_int_cst_compare (DR_INIT (data_ref),
2098                                      DR_INIT (STMT_VINFO_DATA_REF (
2099                                                    vinfo_for_stmt (next)))))
2100             {
2101               if (DR_IS_WRITE (data_ref))
2102                 {
2103                   if (dump_enabled_p ())
2104                     dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2105                                      "Two store stmts share the same dr.\n");
2106                   return false;
2107                 }
2108
2109               /* For load use the same data-ref load.  */
2110               GROUP_SAME_DR_STMT (vinfo_for_stmt (next)) = prev;
2111
2112               prev = next;
2113               next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
2114               continue;
2115             }
2116
2117           prev = next;
2118           data_ref = STMT_VINFO_DATA_REF (vinfo_for_stmt (next));
2119
2120           /* All group members have the same STEP by construction.  */
2121           gcc_checking_assert (operand_equal_p (DR_STEP (data_ref), step, 0));
2122
2123           /* Check that the distance between two accesses is equal to the type
2124              size. Otherwise, we have gaps.  */
2125           diff = (TREE_INT_CST_LOW (DR_INIT (data_ref))
2126                   - TREE_INT_CST_LOW (prev_init)) / type_size;
2127           if (diff != 1)
2128             {
2129               /* FORNOW: SLP of accesses with gaps is not supported.  */
2130               slp_impossible = true;
2131               if (DR_IS_WRITE (data_ref))
2132                 {
2133                   if (dump_enabled_p ())
2134                     dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2135                                      "interleaved store with gaps\n");
2136                   return false;
2137                 }
2138
2139               gaps += diff - 1;
2140             }
2141
2142           last_accessed_element += diff;
2143
2144           /* Store the gap from the previous member of the group. If there is no
2145              gap in the access, GROUP_GAP is always 1.  */
2146           GROUP_GAP (vinfo_for_stmt (next)) = diff;
2147
2148           prev_init = DR_INIT (data_ref);
2149           next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
2150           /* Count the number of data-refs in the chain.  */
2151           count++;
2152         }
2153
2154       /* COUNT is the number of accesses found, we multiply it by the size of
2155          the type to get COUNT_IN_BYTES.  */
2156       count_in_bytes = type_size * count;
2157
2158       /* Check that the size of the interleaving (including gaps) is not
2159          greater than STEP.  */
2160       if (dr_step != 0
2161           && absu_hwi (dr_step) < count_in_bytes + gaps * type_size)
2162         {
2163           if (dump_enabled_p ())
2164             {
2165               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2166                                "interleaving size is greater than step for ");
2167               dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
2168                                  DR_REF (dr));
2169               dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
2170             }
2171           return false;
2172         }
2173
2174       /* Check that the size of the interleaving is equal to STEP for stores,
2175          i.e., that there are no gaps.  */
2176       if (dr_step != 0
2177           && absu_hwi (dr_step) != count_in_bytes)
2178         {
2179           if (DR_IS_READ (dr))
2180             {
2181               slp_impossible = true;
2182               /* There is a gap after the last load in the group. This gap is a
2183                  difference between the groupsize and the number of elements.
2184                  When there is no gap, this difference should be 0.  */
2185               GROUP_GAP (vinfo_for_stmt (stmt)) = groupsize - count;
2186             }
2187           else
2188             {
2189               if (dump_enabled_p ())
2190                 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2191                                  "interleaved store with gaps\n");
2192               return false;
2193             }
2194         }
2195
2196       /* Check that STEP is a multiple of type size.  */
2197       if (dr_step != 0
2198           && (dr_step % type_size) != 0)
2199         {
2200           if (dump_enabled_p ())
2201             {
2202               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2203                                "step is not a multiple of type size: step ");
2204               dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, step);
2205               dump_printf (MSG_MISSED_OPTIMIZATION, " size ");
2206               dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
2207                                  TYPE_SIZE_UNIT (scalar_type));
2208               dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
2209             }
2210           return false;
2211         }
2212
2213       if (groupsize == 0)
2214         groupsize = count;
2215
2216       GROUP_SIZE (vinfo_for_stmt (stmt)) = groupsize;
2217       if (dump_enabled_p ())
2218         dump_printf_loc (MSG_NOTE, vect_location,
2219                          "Detected interleaving of size %d\n", (int)groupsize);
2220
2221       /* SLP: create an SLP data structure for every interleaving group of
2222          stores for further analysis in vect_analyse_slp.  */
2223       if (DR_IS_WRITE (dr) && !slp_impossible)
2224         {
2225           if (loop_vinfo)
2226             LOOP_VINFO_GROUPED_STORES (loop_vinfo).safe_push (stmt);
2227           if (bb_vinfo)
2228             BB_VINFO_GROUPED_STORES (bb_vinfo).safe_push (stmt);
2229         }
2230
2231       /* There is a gap in the end of the group.  */
2232       if (groupsize - last_accessed_element > 0 && loop_vinfo)
2233         {
2234           if (dump_enabled_p ())
2235             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2236                              "Data access with gaps requires scalar "
2237                              "epilogue loop\n");
2238           if (loop->inner)
2239             {
2240               if (dump_enabled_p ())
2241                 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2242                                  "Peeling for outer loop is not supported\n");
2243               return false;
2244             }
2245
2246           LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
2247         }
2248     }
2249
2250   return true;
2251 }
2252
2253
2254 /* Analyze the access pattern of the data-reference DR.
2255    In case of non-consecutive accesses call vect_analyze_group_access() to
2256    analyze groups of accesses.  */
2257
2258 static bool
2259 vect_analyze_data_ref_access (struct data_reference *dr)
2260 {
2261   tree step = DR_STEP (dr);
2262   tree scalar_type = TREE_TYPE (DR_REF (dr));
2263   gimple stmt = DR_STMT (dr);
2264   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2265   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2266   struct loop *loop = NULL;
2267
2268   if (loop_vinfo)
2269     loop = LOOP_VINFO_LOOP (loop_vinfo);
2270
2271   if (loop_vinfo && !step)
2272     {
2273       if (dump_enabled_p ())
2274         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2275                          "bad data-ref access in loop\n");
2276       return false;
2277     }
2278
2279   /* Allow invariant loads in not nested loops.  */
2280   if (loop_vinfo && integer_zerop (step))
2281     {
2282       GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) = NULL;
2283       if (nested_in_vect_loop_p (loop, stmt))
2284         {
2285           if (dump_enabled_p ())
2286             dump_printf_loc (MSG_NOTE, vect_location,
2287                              "zero step in inner loop of nest\n");
2288           return false;
2289         }
2290       return DR_IS_READ (dr);
2291     }
2292
2293   if (loop && nested_in_vect_loop_p (loop, stmt))
2294     {
2295       /* Interleaved accesses are not yet supported within outer-loop
2296         vectorization for references in the inner-loop.  */
2297       GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) = NULL;
2298
2299       /* For the rest of the analysis we use the outer-loop step.  */
2300       step = STMT_VINFO_DR_STEP (stmt_info);
2301       if (integer_zerop (step))
2302         {
2303           if (dump_enabled_p ())
2304             dump_printf_loc (MSG_NOTE, vect_location,
2305                              "zero step in outer loop.\n");
2306           if (DR_IS_READ (dr))
2307             return true;
2308           else
2309             return false;
2310         }
2311     }
2312
2313   /* Consecutive?  */
2314   if (TREE_CODE (step) == INTEGER_CST)
2315     {
2316       HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
2317       if (!tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type))
2318           || (dr_step < 0
2319               && !compare_tree_int (TYPE_SIZE_UNIT (scalar_type), -dr_step)))
2320         {
2321           /* Mark that it is not interleaving.  */
2322           GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) = NULL;
2323           return true;
2324         }
2325     }
2326
2327   if (loop && nested_in_vect_loop_p (loop, stmt))
2328     {
2329       if (dump_enabled_p ())
2330         dump_printf_loc (MSG_NOTE, vect_location,
2331                          "grouped access in outer loop.\n");
2332       return false;
2333     }
2334
2335   /* Assume this is a DR handled by non-constant strided load case.  */
2336   if (TREE_CODE (step) != INTEGER_CST)
2337     return STMT_VINFO_STRIDE_LOAD_P (stmt_info);
2338
2339   /* Not consecutive access - check if it's a part of interleaving group.  */
2340   return vect_analyze_group_access (dr);
2341 }
2342
2343
2344
2345 /*  A helper function used in the comparator function to sort data
2346     references.  T1 and T2 are two data references to be compared.
2347     The function returns -1, 0, or 1.  */
2348
2349 static int
2350 compare_tree (tree t1, tree t2)
2351 {
2352   int i, cmp;
2353   enum tree_code code;
2354   char tclass;
2355
2356   if (t1 == t2)
2357     return 0;
2358   if (t1 == NULL)
2359     return -1;
2360   if (t2 == NULL)
2361     return 1;
2362
2363
2364   if (TREE_CODE (t1) != TREE_CODE (t2))
2365     return TREE_CODE (t1) < TREE_CODE (t2) ? -1 : 1;
2366
2367   code = TREE_CODE (t1);
2368   switch (code)
2369     {
2370     /* For const values, we can just use hash values for comparisons.  */
2371     case INTEGER_CST:
2372     case REAL_CST:
2373     case FIXED_CST:
2374     case STRING_CST:
2375     case COMPLEX_CST:
2376     case VECTOR_CST:
2377       {
2378         hashval_t h1 = iterative_hash_expr (t1, 0);
2379         hashval_t h2 = iterative_hash_expr (t2, 0);
2380         if (h1 != h2)
2381           return h1 < h2 ? -1 : 1;
2382         break;
2383       }
2384
2385     case SSA_NAME:
2386       cmp = compare_tree (SSA_NAME_VAR (t1), SSA_NAME_VAR (t2));
2387       if (cmp != 0)
2388         return cmp;
2389
2390       if (SSA_NAME_VERSION (t1) != SSA_NAME_VERSION (t2))
2391         return SSA_NAME_VERSION (t1) < SSA_NAME_VERSION (t2) ? -1 : 1;
2392       break;
2393
2394     default:
2395       tclass = TREE_CODE_CLASS (code);
2396
2397       /* For var-decl, we could compare their UIDs.  */
2398       if (tclass == tcc_declaration)
2399         {
2400           if (DECL_UID (t1) != DECL_UID (t2))
2401             return DECL_UID (t1) < DECL_UID (t2) ? -1 : 1;
2402           break;
2403         }
2404
2405       /* For expressions with operands, compare their operands recursively.  */
2406       for (i = TREE_OPERAND_LENGTH (t1) - 1; i >= 0; --i)
2407         {
2408           cmp = compare_tree (TREE_OPERAND (t1, i), TREE_OPERAND (t2, i));
2409           if (cmp != 0)
2410             return cmp;
2411         }
2412     }
2413
2414   return 0;
2415 }
2416
2417
2418 /* Compare two data-references DRA and DRB to group them into chunks
2419    suitable for grouping.  */
2420
2421 static int
2422 dr_group_sort_cmp (const void *dra_, const void *drb_)
2423 {
2424   data_reference_p dra = *(data_reference_p *)const_cast<void *>(dra_);
2425   data_reference_p drb = *(data_reference_p *)const_cast<void *>(drb_);
2426   int cmp;
2427
2428   /* Stabilize sort.  */
2429   if (dra == drb)
2430     return 0;
2431
2432   /* Ordering of DRs according to base.  */
2433   if (!operand_equal_p (DR_BASE_ADDRESS (dra), DR_BASE_ADDRESS (drb), 0))
2434     {
2435       cmp = compare_tree (DR_BASE_ADDRESS (dra), DR_BASE_ADDRESS (drb));
2436       if (cmp != 0)
2437         return cmp;
2438     }
2439
2440   /* And according to DR_OFFSET.  */
2441   if (!dr_equal_offsets_p (dra, drb))
2442     {
2443       cmp = compare_tree (DR_OFFSET (dra), DR_OFFSET (drb));
2444       if (cmp != 0)
2445         return cmp;
2446     }
2447
2448   /* Put reads before writes.  */
2449   if (DR_IS_READ (dra) != DR_IS_READ (drb))
2450     return DR_IS_READ (dra) ? -1 : 1;
2451
2452   /* Then sort after access size.  */
2453   if (!operand_equal_p (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra))),
2454                         TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb))), 0))
2455     {
2456       cmp = compare_tree (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra))),
2457                           TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb))));
2458       if (cmp != 0)
2459         return cmp;
2460     }
2461
2462   /* And after step.  */
2463   if (!operand_equal_p (DR_STEP (dra), DR_STEP (drb), 0))
2464     {
2465       cmp = compare_tree (DR_STEP (dra), DR_STEP (drb));
2466       if (cmp != 0)
2467         return cmp;
2468     }
2469
2470   /* Then sort after DR_INIT.  In case of identical DRs sort after stmt UID.  */
2471   cmp = tree_int_cst_compare (DR_INIT (dra), DR_INIT (drb));
2472   if (cmp == 0)
2473     return gimple_uid (DR_STMT (dra)) < gimple_uid (DR_STMT (drb)) ? -1 : 1;
2474   return cmp;
2475 }
2476
2477 /* Function vect_analyze_data_ref_accesses.
2478
2479    Analyze the access pattern of all the data references in the loop.
2480
2481    FORNOW: the only access pattern that is considered vectorizable is a
2482            simple step 1 (consecutive) access.
2483
2484    FORNOW: handle only arrays and pointer accesses.  */
2485
2486 bool
2487 vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
2488 {
2489   unsigned int i;
2490   vec<data_reference_p> datarefs;
2491   struct data_reference *dr;
2492
2493   if (dump_enabled_p ())
2494     dump_printf_loc (MSG_NOTE, vect_location,
2495                      "=== vect_analyze_data_ref_accesses ===\n");
2496
2497   if (loop_vinfo)
2498     datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
2499   else
2500     datarefs = BB_VINFO_DATAREFS (bb_vinfo);
2501
2502   if (datarefs.is_empty ())
2503     return true;
2504
2505   /* Sort the array of datarefs to make building the interleaving chains
2506      linear.  Don't modify the original vector's order, it is needed for
2507      determining what dependencies are reversed.  */
2508   vec<data_reference_p> datarefs_copy = datarefs.copy ();
2509   qsort (datarefs_copy.address (), datarefs_copy.length (),
2510          sizeof (data_reference_p), dr_group_sort_cmp);
2511
2512   /* Build the interleaving chains.  */
2513   for (i = 0; i < datarefs_copy.length () - 1;)
2514     {
2515       data_reference_p dra = datarefs_copy[i];
2516       stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
2517       stmt_vec_info lastinfo = NULL;
2518       for (i = i + 1; i < datarefs_copy.length (); ++i)
2519         {
2520           data_reference_p drb = datarefs_copy[i];
2521           stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
2522
2523           /* ???  Imperfect sorting (non-compatible types, non-modulo
2524              accesses, same accesses) can lead to a group to be artificially
2525              split here as we don't just skip over those.  If it really
2526              matters we can push those to a worklist and re-iterate
2527              over them.  The we can just skip ahead to the next DR here.  */
2528
2529           /* Check that the data-refs have same first location (except init)
2530              and they are both either store or load (not load and store).  */
2531           if (DR_IS_READ (dra) != DR_IS_READ (drb)
2532               || !operand_equal_p (DR_BASE_ADDRESS (dra),
2533                                    DR_BASE_ADDRESS (drb), 0)
2534               || !dr_equal_offsets_p (dra, drb))
2535             break;
2536
2537           /* Check that the data-refs have the same constant size and step.  */
2538           tree sza = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra)));
2539           tree szb = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb)));
2540           if (!tree_fits_uhwi_p (sza)
2541               || !tree_fits_uhwi_p (szb)
2542               || !tree_int_cst_equal (sza, szb)
2543               || !tree_fits_shwi_p (DR_STEP (dra))
2544               || !tree_fits_shwi_p (DR_STEP (drb))
2545               || !tree_int_cst_equal (DR_STEP (dra), DR_STEP (drb)))
2546             break;
2547
2548           /* Do not place the same access in the interleaving chain twice.  */
2549           if (tree_int_cst_compare (DR_INIT (dra), DR_INIT (drb)) == 0)
2550             break;
2551
2552           /* Check the types are compatible.
2553              ???  We don't distinguish this during sorting.  */
2554           if (!types_compatible_p (TREE_TYPE (DR_REF (dra)),
2555                                    TREE_TYPE (DR_REF (drb))))
2556             break;
2557
2558           /* Sorting has ensured that DR_INIT (dra) <= DR_INIT (drb).  */
2559           HOST_WIDE_INT init_a = TREE_INT_CST_LOW (DR_INIT (dra));
2560           HOST_WIDE_INT init_b = TREE_INT_CST_LOW (DR_INIT (drb));
2561           gcc_assert (init_a < init_b);
2562
2563           /* If init_b == init_a + the size of the type * k, we have an
2564              interleaving, and DRA is accessed before DRB.  */
2565           HOST_WIDE_INT type_size_a = tree_to_uhwi (sza);
2566           if ((init_b - init_a) % type_size_a != 0)
2567             break;
2568
2569           /* The step (if not zero) is greater than the difference between
2570              data-refs' inits.  This splits groups into suitable sizes.  */
2571           HOST_WIDE_INT step = tree_to_shwi (DR_STEP (dra));
2572           if (step != 0 && step <= (init_b - init_a))
2573             break;
2574
2575           if (dump_enabled_p ())
2576             {
2577               dump_printf_loc (MSG_NOTE, vect_location,
2578                                "Detected interleaving ");
2579               dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dra));
2580               dump_printf (MSG_NOTE,  " and ");
2581               dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (drb));
2582               dump_printf (MSG_NOTE, "\n");
2583             }
2584
2585           /* Link the found element into the group list.  */
2586           if (!GROUP_FIRST_ELEMENT (stmtinfo_a))
2587             {
2588               GROUP_FIRST_ELEMENT (stmtinfo_a) = DR_STMT (dra);
2589               lastinfo = stmtinfo_a;
2590             }
2591           GROUP_FIRST_ELEMENT (stmtinfo_b) = DR_STMT (dra);
2592           GROUP_NEXT_ELEMENT (lastinfo) = DR_STMT (drb);
2593           lastinfo = stmtinfo_b;
2594         }
2595     }
2596
2597   FOR_EACH_VEC_ELT (datarefs_copy, i, dr)
2598     if (STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr)))
2599         && !vect_analyze_data_ref_access (dr))
2600       {
2601         if (dump_enabled_p ())
2602           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2603                            "not vectorized: complicated access pattern.\n");
2604
2605         if (bb_vinfo)
2606           {
2607             /* Mark the statement as not vectorizable.  */
2608             STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr))) = false;
2609             continue;
2610           }
2611         else
2612           {
2613             datarefs_copy.release ();
2614             return false;
2615           }
2616       }
2617
2618   datarefs_copy.release ();
2619   return true;
2620 }
2621
2622
2623 /* Operator == between two dr_with_seg_len objects.
2624
2625    This equality operator is used to make sure two data refs
2626    are the same one so that we will consider to combine the
2627    aliasing checks of those two pairs of data dependent data
2628    refs.  */
2629
2630 static bool
2631 operator == (const dr_with_seg_len& d1,
2632              const dr_with_seg_len& d2)
2633 {
2634   return operand_equal_p (DR_BASE_ADDRESS (d1.dr),
2635                           DR_BASE_ADDRESS (d2.dr), 0)
2636            && compare_tree (d1.offset, d2.offset) == 0
2637            && compare_tree (d1.seg_len, d2.seg_len) == 0;
2638 }
2639
2640 /* Function comp_dr_with_seg_len_pair.
2641
2642    Comparison function for sorting objects of dr_with_seg_len_pair_t
2643    so that we can combine aliasing checks in one scan.  */
2644
2645 static int
2646 comp_dr_with_seg_len_pair (const void *p1_, const void *p2_)
2647 {
2648   const dr_with_seg_len_pair_t* p1 = (const dr_with_seg_len_pair_t *) p1_;
2649   const dr_with_seg_len_pair_t* p2 = (const dr_with_seg_len_pair_t *) p2_;
2650
2651   const dr_with_seg_len &p11 = p1->first,
2652                         &p12 = p1->second,
2653                         &p21 = p2->first,
2654                         &p22 = p2->second;
2655
2656   /* For DR pairs (a, b) and (c, d), we only consider to merge the alias checks
2657      if a and c have the same basic address snd step, and b and d have the same
2658      address and step.  Therefore, if any a&c or b&d don't have the same address
2659      and step, we don't care the order of those two pairs after sorting.  */
2660   int comp_res;
2661
2662   if ((comp_res = compare_tree (DR_BASE_ADDRESS (p11.dr),
2663                                 DR_BASE_ADDRESS (p21.dr))) != 0)
2664     return comp_res;
2665   if ((comp_res = compare_tree (DR_BASE_ADDRESS (p12.dr),
2666                                 DR_BASE_ADDRESS (p22.dr))) != 0)
2667     return comp_res;
2668   if ((comp_res = compare_tree (DR_STEP (p11.dr), DR_STEP (p21.dr))) != 0)
2669     return comp_res;
2670   if ((comp_res = compare_tree (DR_STEP (p12.dr), DR_STEP (p22.dr))) != 0)
2671     return comp_res;
2672   if ((comp_res = compare_tree (p11.offset, p21.offset)) != 0)
2673     return comp_res;
2674   if ((comp_res = compare_tree (p12.offset, p22.offset)) != 0)
2675     return comp_res;
2676
2677   return 0;
2678 }
2679
2680 template <class T> static void
2681 swap (T& a, T& b)
2682 {
2683   T c (a);
2684   a = b;
2685   b = c;
2686 }
2687
2688 /* Function vect_vfa_segment_size.
2689
2690    Create an expression that computes the size of segment
2691    that will be accessed for a data reference.  The functions takes into
2692    account that realignment loads may access one more vector.
2693
2694    Input:
2695      DR: The data reference.
2696      LENGTH_FACTOR: segment length to consider.
2697
2698    Return an expression whose value is the size of segment which will be
2699    accessed by DR.  */
2700
2701 static tree
2702 vect_vfa_segment_size (struct data_reference *dr, tree length_factor)
2703 {
2704   tree segment_length;
2705
2706   if (integer_zerop (DR_STEP (dr)))
2707     segment_length = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr)));
2708   else
2709     segment_length = size_binop (MULT_EXPR,
2710                                  fold_convert (sizetype, DR_STEP (dr)),
2711                                  fold_convert (sizetype, length_factor));
2712
2713   if (vect_supportable_dr_alignment (dr, false)
2714         == dr_explicit_realign_optimized)
2715     {
2716       tree vector_size = TYPE_SIZE_UNIT
2717                           (STMT_VINFO_VECTYPE (vinfo_for_stmt (DR_STMT (dr))));
2718
2719       segment_length = size_binop (PLUS_EXPR, segment_length, vector_size);
2720     }
2721   return segment_length;
2722 }
2723
2724 /* Function vect_prune_runtime_alias_test_list.
2725
2726    Prune a list of ddrs to be tested at run-time by versioning for alias.
2727    Merge several alias checks into one if possible.
2728    Return FALSE if resulting list of ddrs is longer then allowed by
2729    PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS, otherwise return TRUE.  */
2730
2731 bool
2732 vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo)
2733 {
2734   vec<ddr_p> may_alias_ddrs =
2735     LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo);
2736   vec<dr_with_seg_len_pair_t>& comp_alias_ddrs =
2737     LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo);
2738   int vect_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2739   tree scalar_loop_iters = LOOP_VINFO_NITERS (loop_vinfo);
2740
2741   ddr_p ddr;
2742   unsigned int i;
2743   tree length_factor;
2744
2745   if (dump_enabled_p ())
2746     dump_printf_loc (MSG_NOTE, vect_location,
2747                      "=== vect_prune_runtime_alias_test_list ===\n");
2748
2749   if (may_alias_ddrs.is_empty ())
2750     return true;
2751
2752   /* Basically, for each pair of dependent data refs store_ptr_0
2753      and load_ptr_0, we create an expression:
2754
2755      ((store_ptr_0 + store_segment_length_0) <= load_ptr_0)
2756      || (load_ptr_0 + load_segment_length_0) <= store_ptr_0))
2757
2758      for aliasing checks.  However, in some cases we can decrease
2759      the number of checks by combining two checks into one.  For
2760      example, suppose we have another pair of data refs store_ptr_0
2761      and load_ptr_1, and if the following condition is satisfied:
2762
2763      load_ptr_0 < load_ptr_1  &&
2764      load_ptr_1 - load_ptr_0 - load_segment_length_0 < store_segment_length_0
2765
2766      (this condition means, in each iteration of vectorized loop,
2767      the accessed memory of store_ptr_0 cannot be between the memory
2768      of load_ptr_0 and load_ptr_1.)
2769
2770      we then can use only the following expression to finish the
2771      alising checks between store_ptr_0 & load_ptr_0 and
2772      store_ptr_0 & load_ptr_1:
2773
2774      ((store_ptr_0 + store_segment_length_0) <= load_ptr_0)
2775      || (load_ptr_1 + load_segment_length_1 <= store_ptr_0))
2776
2777      Note that we only consider that load_ptr_0 and load_ptr_1 have the
2778      same basic address.  */
2779
2780   comp_alias_ddrs.create (may_alias_ddrs.length ());
2781
2782   /* First, we collect all data ref pairs for aliasing checks.  */
2783   FOR_EACH_VEC_ELT (may_alias_ddrs, i, ddr)
2784     {
2785       struct data_reference *dr_a, *dr_b;
2786       gimple dr_group_first_a, dr_group_first_b;
2787       tree segment_length_a, segment_length_b;
2788       gimple stmt_a, stmt_b;
2789
2790       dr_a = DDR_A (ddr);
2791       stmt_a = DR_STMT (DDR_A (ddr));
2792       dr_group_first_a = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt_a));
2793       if (dr_group_first_a)
2794         {
2795           stmt_a = dr_group_first_a;
2796           dr_a = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt_a));
2797         }
2798
2799       dr_b = DDR_B (ddr);
2800       stmt_b = DR_STMT (DDR_B (ddr));
2801       dr_group_first_b = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt_b));
2802       if (dr_group_first_b)
2803         {
2804           stmt_b = dr_group_first_b;
2805           dr_b = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt_b));
2806         }
2807
2808       if (!operand_equal_p (DR_STEP (dr_a), DR_STEP (dr_b), 0))
2809         length_factor = scalar_loop_iters;
2810       else
2811         length_factor = size_int (vect_factor);
2812       segment_length_a = vect_vfa_segment_size (dr_a, length_factor);
2813       segment_length_b = vect_vfa_segment_size (dr_b, length_factor);
2814
2815       dr_with_seg_len_pair_t dr_with_seg_len_pair
2816           (dr_with_seg_len (dr_a, segment_length_a),
2817            dr_with_seg_len (dr_b, segment_length_b));
2818
2819       if (compare_tree (DR_BASE_ADDRESS (dr_a), DR_BASE_ADDRESS (dr_b)) > 0)
2820         swap (dr_with_seg_len_pair.first, dr_with_seg_len_pair.second);
2821
2822       comp_alias_ddrs.safe_push (dr_with_seg_len_pair);
2823     }
2824
2825   /* Second, we sort the collected data ref pairs so that we can scan
2826      them once to combine all possible aliasing checks.  */
2827   comp_alias_ddrs.qsort (comp_dr_with_seg_len_pair);
2828
2829   /* Third, we scan the sorted dr pairs and check if we can combine
2830      alias checks of two neighbouring dr pairs.  */
2831   for (size_t i = 1; i < comp_alias_ddrs.length (); ++i)
2832     {
2833       /* Deal with two ddrs (dr_a1, dr_b1) and (dr_a2, dr_b2).  */
2834       dr_with_seg_len *dr_a1 = &comp_alias_ddrs[i-1].first,
2835                       *dr_b1 = &comp_alias_ddrs[i-1].second,
2836                       *dr_a2 = &comp_alias_ddrs[i].first,
2837                       *dr_b2 = &comp_alias_ddrs[i].second;
2838
2839       /* Remove duplicate data ref pairs.  */
2840       if (*dr_a1 == *dr_a2 && *dr_b1 == *dr_b2)
2841         {
2842           if (dump_enabled_p ())
2843             {
2844               dump_printf_loc (MSG_NOTE, vect_location,
2845                                "found equal ranges ");
2846               dump_generic_expr (MSG_NOTE, TDF_SLIM,
2847                                  DR_REF (dr_a1->dr));
2848               dump_printf (MSG_NOTE,  ", ");
2849               dump_generic_expr (MSG_NOTE, TDF_SLIM,
2850                                  DR_REF (dr_b1->dr));
2851               dump_printf (MSG_NOTE,  " and ");
2852               dump_generic_expr (MSG_NOTE, TDF_SLIM,
2853                                  DR_REF (dr_a2->dr));
2854               dump_printf (MSG_NOTE,  ", ");
2855               dump_generic_expr (MSG_NOTE, TDF_SLIM,
2856                                  DR_REF (dr_b2->dr));
2857               dump_printf (MSG_NOTE, "\n");
2858             }
2859
2860           comp_alias_ddrs.ordered_remove (i--);
2861           continue;
2862         }
2863
2864       if (*dr_a1 == *dr_a2 || *dr_b1 == *dr_b2)
2865         {
2866           /* We consider the case that DR_B1 and DR_B2 are same memrefs,
2867              and DR_A1 and DR_A2 are two consecutive memrefs.  */
2868           if (*dr_a1 == *dr_a2)
2869             {
2870               swap (dr_a1, dr_b1);
2871               swap (dr_a2, dr_b2);
2872             }
2873
2874           if (!operand_equal_p (DR_BASE_ADDRESS (dr_a1->dr),
2875                                 DR_BASE_ADDRESS (dr_a2->dr),
2876                                 0)
2877               || !tree_fits_shwi_p (dr_a1->offset)
2878               || !tree_fits_shwi_p (dr_a2->offset))
2879             continue;
2880
2881           HOST_WIDE_INT diff = (tree_to_shwi (dr_a2->offset)
2882                                 - tree_to_shwi (dr_a1->offset));
2883
2884
2885           /* Now we check if the following condition is satisfied:
2886
2887              DIFF - SEGMENT_LENGTH_A < SEGMENT_LENGTH_B
2888
2889              where DIFF = DR_A2->OFFSET - DR_A1->OFFSET.  However,
2890              SEGMENT_LENGTH_A or SEGMENT_LENGTH_B may not be constant so we
2891              have to make a best estimation.  We can get the minimum value
2892              of SEGMENT_LENGTH_B as a constant, represented by MIN_SEG_LEN_B,
2893              then either of the following two conditions can guarantee the
2894              one above:
2895
2896              1: DIFF <= MIN_SEG_LEN_B
2897              2: DIFF - SEGMENT_LENGTH_A < MIN_SEG_LEN_B
2898
2899              */
2900
2901           HOST_WIDE_INT  min_seg_len_b = (tree_fits_shwi_p (dr_b1->seg_len)
2902                                           ? tree_to_shwi (dr_b1->seg_len)
2903                                           : vect_factor);
2904
2905           if (diff <= min_seg_len_b
2906               || (tree_fits_shwi_p (dr_a1->seg_len)
2907                   && diff - tree_to_shwi (dr_a1->seg_len) < min_seg_len_b))
2908             {
2909               if (dump_enabled_p ())
2910                 {
2911                   dump_printf_loc (MSG_NOTE, vect_location,
2912                                    "merging ranges for ");
2913                   dump_generic_expr (MSG_NOTE, TDF_SLIM,
2914                                      DR_REF (dr_a1->dr));
2915                   dump_printf (MSG_NOTE,  ", ");
2916                   dump_generic_expr (MSG_NOTE, TDF_SLIM,
2917                                      DR_REF (dr_b1->dr));
2918                   dump_printf (MSG_NOTE,  " and ");
2919                   dump_generic_expr (MSG_NOTE, TDF_SLIM,
2920                                      DR_REF (dr_a2->dr));
2921                   dump_printf (MSG_NOTE,  ", ");
2922                   dump_generic_expr (MSG_NOTE, TDF_SLIM,
2923                                      DR_REF (dr_b2->dr));
2924                   dump_printf (MSG_NOTE, "\n");
2925                 }
2926
2927               dr_a1->seg_len = size_binop (PLUS_EXPR,
2928                                            dr_a2->seg_len, size_int (diff));
2929               comp_alias_ddrs.ordered_remove (i--);
2930             }
2931         }
2932     }
2933
2934   dump_printf_loc (MSG_NOTE, vect_location,
2935                    "improved number of alias checks from %d to %d\n",
2936                    may_alias_ddrs.length (), comp_alias_ddrs.length ());
2937   if ((int) comp_alias_ddrs.length () >
2938       PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS))
2939     return false;
2940
2941   return true;
2942 }
2943
2944 /* Check whether a non-affine read in stmt is suitable for gather load
2945    and if so, return a builtin decl for that operation.  */
2946
2947 tree
2948 vect_check_gather (gimple stmt, loop_vec_info loop_vinfo, tree *basep,
2949                    tree *offp, int *scalep)
2950 {
2951   HOST_WIDE_INT scale = 1, pbitpos, pbitsize;
2952   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2953   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2954   struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2955   tree offtype = NULL_TREE;
2956   tree decl, base, off;
2957   enum machine_mode pmode;
2958   int punsignedp, pvolatilep;
2959
2960   base = DR_REF (dr);
2961   /* For masked loads/stores, DR_REF (dr) is an artificial MEM_REF,
2962      see if we can use the def stmt of the address.  */
2963   if (is_gimple_call (stmt)
2964       && gimple_call_internal_p (stmt)
2965       && (gimple_call_internal_fn (stmt) == IFN_MASK_LOAD
2966           || gimple_call_internal_fn (stmt) == IFN_MASK_STORE)
2967       && TREE_CODE (base) == MEM_REF
2968       && TREE_CODE (TREE_OPERAND (base, 0)) == SSA_NAME
2969       && integer_zerop (TREE_OPERAND (base, 1))
2970       && !expr_invariant_in_loop_p (loop, TREE_OPERAND (base, 0)))
2971     {
2972       gimple def_stmt = SSA_NAME_DEF_STMT (TREE_OPERAND (base, 0));
2973       if (is_gimple_assign (def_stmt)
2974           && gimple_assign_rhs_code (def_stmt) == ADDR_EXPR)
2975         base = TREE_OPERAND (gimple_assign_rhs1 (def_stmt), 0);
2976     }
2977
2978   /* The gather builtins need address of the form
2979      loop_invariant + vector * {1, 2, 4, 8}
2980      or
2981      loop_invariant + sign_extend (vector) * { 1, 2, 4, 8 }.
2982      Unfortunately DR_BASE_ADDRESS/DR_OFFSET can be a mixture
2983      of loop invariants/SSA_NAMEs defined in the loop, with casts,
2984      multiplications and additions in it.  To get a vector, we need
2985      a single SSA_NAME that will be defined in the loop and will
2986      contain everything that is not loop invariant and that can be
2987      vectorized.  The following code attempts to find such a preexistng
2988      SSA_NAME OFF and put the loop invariants into a tree BASE
2989      that can be gimplified before the loop.  */
2990   base = get_inner_reference (base, &pbitsize, &pbitpos, &off,
2991                               &pmode, &punsignedp, &pvolatilep, false);
2992   gcc_assert (base != NULL_TREE && (pbitpos % BITS_PER_UNIT) == 0);
2993
2994   if (TREE_CODE (base) == MEM_REF)
2995     {
2996       if (!integer_zerop (TREE_OPERAND (base, 1)))
2997         {
2998           if (off == NULL_TREE)
2999             {
3000               offset_int moff = mem_ref_offset (base);
3001               off = wide_int_to_tree (sizetype, moff);
3002             }
3003           else
3004             off = size_binop (PLUS_EXPR, off,
3005                               fold_convert (sizetype, TREE_OPERAND (base, 1)));
3006         }
3007       base = TREE_OPERAND (base, 0);
3008     }
3009   else
3010     base = build_fold_addr_expr (base);
3011
3012   if (off == NULL_TREE)
3013     off = size_zero_node;
3014
3015   /* If base is not loop invariant, either off is 0, then we start with just
3016      the constant offset in the loop invariant BASE and continue with base
3017      as OFF, otherwise give up.
3018      We could handle that case by gimplifying the addition of base + off
3019      into some SSA_NAME and use that as off, but for now punt.  */
3020   if (!expr_invariant_in_loop_p (loop, base))
3021     {
3022       if (!integer_zerop (off))
3023         return NULL_TREE;
3024       off = base;
3025       base = size_int (pbitpos / BITS_PER_UNIT);
3026     }
3027   /* Otherwise put base + constant offset into the loop invariant BASE
3028      and continue with OFF.  */
3029   else
3030     {
3031       base = fold_convert (sizetype, base);
3032       base = size_binop (PLUS_EXPR, base, size_int (pbitpos / BITS_PER_UNIT));
3033     }
3034
3035   /* OFF at this point may be either a SSA_NAME or some tree expression
3036      from get_inner_reference.  Try to peel off loop invariants from it
3037      into BASE as long as possible.  */
3038   STRIP_NOPS (off);
3039   while (offtype == NULL_TREE)
3040     {
3041       enum tree_code code;
3042       tree op0, op1, add = NULL_TREE;
3043
3044       if (TREE_CODE (off) == SSA_NAME)
3045         {
3046           gimple def_stmt = SSA_NAME_DEF_STMT (off);
3047
3048           if (expr_invariant_in_loop_p (loop, off))
3049             return NULL_TREE;
3050
3051           if (gimple_code (def_stmt) != GIMPLE_ASSIGN)
3052             break;
3053
3054           op0 = gimple_assign_rhs1 (def_stmt);
3055           code = gimple_assign_rhs_code (def_stmt);
3056           op1 = gimple_assign_rhs2 (def_stmt);
3057         }
3058       else
3059         {
3060           if (get_gimple_rhs_class (TREE_CODE (off)) == GIMPLE_TERNARY_RHS)
3061             return NULL_TREE;
3062           code = TREE_CODE (off);
3063           extract_ops_from_tree (off, &code, &op0, &op1);
3064         }
3065       switch (code)
3066         {
3067         case POINTER_PLUS_EXPR:
3068         case PLUS_EXPR:
3069           if (expr_invariant_in_loop_p (loop, op0))
3070             {
3071               add = op0;
3072               off = op1;
3073             do_add:
3074               add = fold_convert (sizetype, add);
3075               if (scale != 1)
3076                 add = size_binop (MULT_EXPR, add, size_int (scale));
3077               base = size_binop (PLUS_EXPR, base, add);
3078               continue;
3079             }
3080           if (expr_invariant_in_loop_p (loop, op1))
3081             {
3082               add = op1;
3083               off = op0;
3084               goto do_add;
3085             }
3086           break;
3087         case MINUS_EXPR:
3088           if (expr_invariant_in_loop_p (loop, op1))
3089             {
3090               add = fold_convert (sizetype, op1);
3091               add = size_binop (MINUS_EXPR, size_zero_node, add);
3092               off = op0;
3093               goto do_add;
3094             }
3095           break;
3096         case MULT_EXPR:
3097           if (scale == 1 && tree_fits_shwi_p (op1))
3098             {
3099               scale = tree_to_shwi (op1);
3100               off = op0;
3101               continue;
3102             }
3103           break;
3104         case SSA_NAME:
3105           off = op0;
3106           continue;
3107         CASE_CONVERT:
3108           if (!POINTER_TYPE_P (TREE_TYPE (op0))
3109               && !INTEGRAL_TYPE_P (TREE_TYPE (op0)))
3110             break;
3111           if (TYPE_PRECISION (TREE_TYPE (op0))
3112               == TYPE_PRECISION (TREE_TYPE (off)))
3113             {
3114               off = op0;
3115               continue;
3116             }
3117           if (TYPE_PRECISION (TREE_TYPE (op0))
3118               < TYPE_PRECISION (TREE_TYPE (off)))
3119             {
3120               off = op0;
3121               offtype = TREE_TYPE (off);
3122               STRIP_NOPS (off);
3123               continue;
3124             }
3125           break;
3126         default:
3127           break;
3128         }
3129       break;
3130     }
3131
3132   /* If at the end OFF still isn't a SSA_NAME or isn't
3133      defined in the loop, punt.  */
3134   if (TREE_CODE (off) != SSA_NAME
3135       || expr_invariant_in_loop_p (loop, off))
3136     return NULL_TREE;
3137
3138   if (offtype == NULL_TREE)
3139     offtype = TREE_TYPE (off);
3140
3141   decl = targetm.vectorize.builtin_gather (STMT_VINFO_VECTYPE (stmt_info),
3142                                            offtype, scale);
3143   if (decl == NULL_TREE)
3144     return NULL_TREE;
3145
3146   if (basep)
3147     *basep = base;
3148   if (offp)
3149     *offp = off;
3150   if (scalep)
3151     *scalep = scale;
3152   return decl;
3153 }
3154
3155 /* Function vect_analyze_data_refs.
3156
3157   Find all the data references in the loop or basic block.
3158
3159    The general structure of the analysis of data refs in the vectorizer is as
3160    follows:
3161    1- vect_analyze_data_refs(loop/bb): call
3162       compute_data_dependences_for_loop/bb to find and analyze all data-refs
3163       in the loop/bb and their dependences.
3164    2- vect_analyze_dependences(): apply dependence testing using ddrs.
3165    3- vect_analyze_drs_alignment(): check that ref_stmt.alignment is ok.
3166    4- vect_analyze_drs_access(): check that ref_stmt.step is ok.
3167
3168 */
3169
3170 bool
3171 vect_analyze_data_refs (loop_vec_info loop_vinfo,
3172                         bb_vec_info bb_vinfo,
3173                         int *min_vf, unsigned *n_stmts)
3174 {
3175   struct loop *loop = NULL;
3176   basic_block bb = NULL;
3177   unsigned int i;
3178   vec<data_reference_p> datarefs;
3179   struct data_reference *dr;
3180   tree scalar_type;
3181
3182   if (dump_enabled_p ())
3183     dump_printf_loc (MSG_NOTE, vect_location,
3184                      "=== vect_analyze_data_refs ===\n");
3185
3186   if (loop_vinfo)
3187     {
3188       basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
3189
3190       loop = LOOP_VINFO_LOOP (loop_vinfo);
3191       datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
3192       if (!find_loop_nest (loop, &LOOP_VINFO_LOOP_NEST (loop_vinfo)))
3193         {
3194           if (dump_enabled_p ())
3195             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3196                              "not vectorized: loop contains function calls"
3197                              " or data references that cannot be analyzed\n");
3198           return false;
3199         }
3200
3201       for (i = 0; i < loop->num_nodes; i++)
3202         {
3203           gimple_stmt_iterator gsi;
3204
3205           for (gsi = gsi_start_bb (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
3206             {
3207               gimple stmt = gsi_stmt (gsi);
3208               if (is_gimple_debug (stmt))
3209                 continue;
3210               ++*n_stmts;
3211               if (!find_data_references_in_stmt (loop, stmt, &datarefs))
3212                 {
3213                   if (is_gimple_call (stmt) && loop->safelen)
3214                     {
3215                       tree fndecl = gimple_call_fndecl (stmt), op;
3216                       if (fndecl != NULL_TREE)
3217                         {
3218                           struct cgraph_node *node = cgraph_get_node (fndecl);
3219                           if (node != NULL && node->simd_clones != NULL)
3220                             {
3221                               unsigned int j, n = gimple_call_num_args (stmt);
3222                               for (j = 0; j < n; j++)
3223                                 {
3224                                   op = gimple_call_arg (stmt, j);
3225                                   if (DECL_P (op)
3226                                       || (REFERENCE_CLASS_P (op)
3227                                           && get_base_address (op)))
3228                                     break;
3229                                 }
3230                               op = gimple_call_lhs (stmt);
3231                               /* Ignore #pragma omp declare simd functions
3232                                  if they don't have data references in the
3233                                  call stmt itself.  */
3234                               if (j == n
3235                                   && !(op
3236                                        && (DECL_P (op)
3237                                            || (REFERENCE_CLASS_P (op)
3238                                                && get_base_address (op)))))
3239                                 continue;
3240                             }
3241                         }
3242                     }
3243                   LOOP_VINFO_DATAREFS (loop_vinfo) = datarefs;
3244                   if (dump_enabled_p ())
3245                     dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3246                                      "not vectorized: loop contains function "
3247                                      "calls or data references that cannot "
3248                                      "be analyzed\n");
3249                   return false;
3250                 }
3251             }
3252         }
3253
3254       LOOP_VINFO_DATAREFS (loop_vinfo) = datarefs;
3255     }
3256   else
3257     {
3258       gimple_stmt_iterator gsi;
3259
3260       bb = BB_VINFO_BB (bb_vinfo);
3261       for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
3262         {
3263           gimple stmt = gsi_stmt (gsi);
3264           if (is_gimple_debug (stmt))
3265             continue;
3266           ++*n_stmts;
3267           if (!find_data_references_in_stmt (NULL, stmt,
3268                                              &BB_VINFO_DATAREFS (bb_vinfo)))
3269             {
3270               /* Mark the rest of the basic-block as unvectorizable.  */
3271               for (; !gsi_end_p (gsi); gsi_next (&gsi))
3272                 {
3273                   stmt = gsi_stmt (gsi);
3274                   STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)) = false;
3275                 }
3276               break;
3277             }
3278         }
3279
3280       datarefs = BB_VINFO_DATAREFS (bb_vinfo);
3281     }
3282
3283   /* Go through the data-refs, check that the analysis succeeded.  Update
3284      pointer from stmt_vec_info struct to DR and vectype.  */
3285
3286   FOR_EACH_VEC_ELT (datarefs, i, dr)
3287     {
3288       gimple stmt;
3289       stmt_vec_info stmt_info;
3290       tree base, offset, init;
3291       bool gather = false;
3292       bool simd_lane_access = false;
3293       int vf;
3294
3295 again:
3296       if (!dr || !DR_REF (dr))
3297         {
3298           if (dump_enabled_p ())
3299             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3300                              "not vectorized: unhandled data-ref\n");
3301           return false;
3302         }
3303
3304       stmt = DR_STMT (dr);
3305       stmt_info = vinfo_for_stmt (stmt);
3306
3307       /* Discard clobbers from the dataref vector.  We will remove
3308          clobber stmts during vectorization.  */
3309       if (gimple_clobber_p (stmt))
3310         {
3311           free_data_ref (dr);
3312           if (i == datarefs.length () - 1)
3313             {
3314               datarefs.pop ();
3315               break;
3316             }
3317           datarefs.ordered_remove (i);
3318           dr = datarefs[i];
3319           goto again;
3320         }
3321
3322       /* Check that analysis of the data-ref succeeded.  */
3323       if (!DR_BASE_ADDRESS (dr) || !DR_OFFSET (dr) || !DR_INIT (dr)
3324           || !DR_STEP (dr))
3325         {
3326           bool maybe_gather
3327             = DR_IS_READ (dr)
3328               && !TREE_THIS_VOLATILE (DR_REF (dr))
3329               && targetm.vectorize.builtin_gather != NULL;
3330           bool maybe_simd_lane_access
3331             = loop_vinfo && loop->simduid;
3332
3333           /* If target supports vector gather loads, or if this might be
3334              a SIMD lane access, see if they can't be used.  */
3335           if (loop_vinfo
3336               && (maybe_gather || maybe_simd_lane_access)
3337               && !nested_in_vect_loop_p (loop, stmt))
3338             {
3339               struct data_reference *newdr
3340                 = create_data_ref (NULL, loop_containing_stmt (stmt),
3341                                    DR_REF (dr), stmt, true);
3342               gcc_assert (newdr != NULL && DR_REF (newdr));
3343               if (DR_BASE_ADDRESS (newdr)
3344                   && DR_OFFSET (newdr)
3345                   && DR_INIT (newdr)
3346                   && DR_STEP (newdr)
3347                   && integer_zerop (DR_STEP (newdr)))
3348                 {
3349                   if (maybe_simd_lane_access)
3350                     {
3351                       tree off = DR_OFFSET (newdr);
3352                       STRIP_NOPS (off);
3353                       if (TREE_CODE (DR_INIT (newdr)) == INTEGER_CST
3354                           && TREE_CODE (off) == MULT_EXPR
3355                           && tree_fits_uhwi_p (TREE_OPERAND (off, 1)))
3356                         {
3357                           tree step = TREE_OPERAND (off, 1);
3358                           off = TREE_OPERAND (off, 0);
3359                           STRIP_NOPS (off);
3360                           if (CONVERT_EXPR_P (off)
3361                               && TYPE_PRECISION (TREE_TYPE (TREE_OPERAND (off,
3362                                                                           0)))
3363                                  < TYPE_PRECISION (TREE_TYPE (off)))
3364                             off = TREE_OPERAND (off, 0);
3365                           if (TREE_CODE (off) == SSA_NAME)
3366                             {
3367                               gimple def = SSA_NAME_DEF_STMT (off);
3368                               tree reft = TREE_TYPE (DR_REF (newdr));
3369                               if (is_gimple_call (def)
3370                                   && gimple_call_internal_p (def)
3371                                   && (gimple_call_internal_fn (def)
3372                                       == IFN_GOMP_SIMD_LANE))
3373                                 {
3374                                   tree arg = gimple_call_arg (def, 0);
3375                                   gcc_assert (TREE_CODE (arg) == SSA_NAME);
3376                                   arg = SSA_NAME_VAR (arg);
3377                                   if (arg == loop->simduid
3378                                       /* For now.  */
3379                                       && tree_int_cst_equal
3380                                            (TYPE_SIZE_UNIT (reft),
3381                                             step))
3382                                     {
3383                                       DR_OFFSET (newdr) = ssize_int (0);
3384                                       DR_STEP (newdr) = step;
3385                                       DR_ALIGNED_TO (newdr)
3386                                         = size_int (BIGGEST_ALIGNMENT);
3387                                       dr = newdr;
3388                                       simd_lane_access = true;
3389                                     }
3390                                 }
3391                             }
3392                         }
3393                     }
3394                   if (!simd_lane_access && maybe_gather)
3395                     {
3396                       dr = newdr;
3397                       gather = true;
3398                     }
3399                 }
3400               if (!gather && !simd_lane_access)
3401                 free_data_ref (newdr);
3402             }
3403
3404           if (!gather && !simd_lane_access)
3405             {
3406               if (dump_enabled_p ())
3407                 {
3408                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3409                                    "not vectorized: data ref analysis "
3410                                    "failed ");
3411                   dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
3412                   dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
3413                 }
3414
3415               if (bb_vinfo)
3416                 break;
3417
3418               return false;
3419             }
3420         }
3421
3422       if (TREE_CODE (DR_BASE_ADDRESS (dr)) == INTEGER_CST)
3423         {
3424           if (dump_enabled_p ())
3425             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3426                              "not vectorized: base addr of dr is a "
3427                              "constant\n");
3428
3429           if (bb_vinfo)
3430             break;
3431
3432           if (gather || simd_lane_access)
3433             free_data_ref (dr);
3434           return false;
3435         }
3436
3437       if (TREE_THIS_VOLATILE (DR_REF (dr)))
3438         {
3439           if (dump_enabled_p ())
3440             {
3441               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3442                                "not vectorized: volatile type ");
3443               dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
3444               dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
3445             }
3446
3447           if (bb_vinfo)
3448             break;
3449
3450           return false;
3451         }
3452
3453       if (stmt_can_throw_internal (stmt))
3454         {
3455           if (dump_enabled_p ())
3456             {
3457               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3458                                "not vectorized: statement can throw an "
3459                                "exception ");
3460               dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
3461               dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
3462             }
3463
3464           if (bb_vinfo)
3465             break;
3466
3467           if (gather || simd_lane_access)
3468             free_data_ref (dr);
3469           return false;
3470         }
3471
3472       if (TREE_CODE (DR_REF (dr)) == COMPONENT_REF
3473           && DECL_BIT_FIELD (TREE_OPERAND (DR_REF (dr), 1)))
3474         {
3475           if (dump_enabled_p ())
3476             {
3477               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3478                                "not vectorized: statement is bitfield "
3479                                "access ");
3480               dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
3481               dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
3482             }
3483
3484           if (bb_vinfo)
3485             break;
3486
3487           if (gather || simd_lane_access)
3488             free_data_ref (dr);
3489           return false;
3490         }
3491
3492       base = unshare_expr (DR_BASE_ADDRESS (dr));
3493       offset = unshare_expr (DR_OFFSET (dr));
3494       init = unshare_expr (DR_INIT (dr));
3495
3496       if (is_gimple_call (stmt)
3497           && (!gimple_call_internal_p (stmt)
3498               || (gimple_call_internal_fn (stmt) != IFN_MASK_LOAD
3499                   && gimple_call_internal_fn (stmt) != IFN_MASK_STORE)))
3500         {
3501           if (dump_enabled_p ())
3502             {
3503               dump_printf_loc (MSG_MISSED_OPTIMIZATION,  vect_location,
3504                                "not vectorized: dr in a call ");
3505               dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
3506               dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
3507             }
3508
3509           if (bb_vinfo)
3510             break;
3511
3512           if (gather || simd_lane_access)
3513             free_data_ref (dr);
3514           return false;
3515         }
3516
3517       /* Update DR field in stmt_vec_info struct.  */
3518
3519       /* If the dataref is in an inner-loop of the loop that is considered for
3520          for vectorization, we also want to analyze the access relative to
3521          the outer-loop (DR contains information only relative to the
3522          inner-most enclosing loop).  We do that by building a reference to the
3523          first location accessed by the inner-loop, and analyze it relative to
3524          the outer-loop.  */
3525       if (loop && nested_in_vect_loop_p (loop, stmt))
3526         {
3527           tree outer_step, outer_base, outer_init;
3528           HOST_WIDE_INT pbitsize, pbitpos;
3529           tree poffset;
3530           enum machine_mode pmode;
3531           int punsignedp, pvolatilep;
3532           affine_iv base_iv, offset_iv;
3533           tree dinit;
3534
3535           /* Build a reference to the first location accessed by the
3536              inner-loop: *(BASE+INIT).  (The first location is actually
3537              BASE+INIT+OFFSET, but we add OFFSET separately later).  */
3538           tree inner_base = build_fold_indirect_ref
3539                                 (fold_build_pointer_plus (base, init));
3540
3541           if (dump_enabled_p ())
3542             {
3543               dump_printf_loc (MSG_NOTE, vect_location,
3544                                "analyze in outer-loop: ");
3545               dump_generic_expr (MSG_NOTE, TDF_SLIM, inner_base);
3546               dump_printf (MSG_NOTE, "\n");
3547             }
3548
3549           outer_base = get_inner_reference (inner_base, &pbitsize, &pbitpos,
3550                           &poffset, &pmode, &punsignedp, &pvolatilep, false);
3551           gcc_assert (outer_base != NULL_TREE);
3552
3553           if (pbitpos % BITS_PER_UNIT != 0)
3554             {
3555               if (dump_enabled_p ())
3556                 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3557                                  "failed: bit offset alignment.\n");
3558               return false;
3559             }
3560
3561           outer_base = build_fold_addr_expr (outer_base);
3562           if (!simple_iv (loop, loop_containing_stmt (stmt), outer_base,
3563                           &base_iv, false))
3564             {
3565               if (dump_enabled_p ())
3566                 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3567                                  "failed: evolution of base is not affine.\n");
3568               return false;
3569             }
3570
3571           if (offset)
3572             {
3573               if (poffset)
3574                 poffset = fold_build2 (PLUS_EXPR, TREE_TYPE (offset), offset,
3575                                        poffset);
3576               else
3577                 poffset = offset;
3578             }
3579
3580           if (!poffset)
3581             {
3582               offset_iv.base = ssize_int (0);
3583               offset_iv.step = ssize_int (0);
3584             }
3585           else if (!simple_iv (loop, loop_containing_stmt (stmt), poffset,
3586                                &offset_iv, false))
3587             {
3588               if (dump_enabled_p ())
3589                 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3590                                  "evolution of offset is not affine.\n");
3591               return false;
3592             }
3593
3594           outer_init = ssize_int (pbitpos / BITS_PER_UNIT);
3595           split_constant_offset (base_iv.base, &base_iv.base, &dinit);
3596           outer_init =  size_binop (PLUS_EXPR, outer_init, dinit);
3597           split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
3598           outer_init =  size_binop (PLUS_EXPR, outer_init, dinit);
3599
3600           outer_step = size_binop (PLUS_EXPR,
3601                                 fold_convert (ssizetype, base_iv.step),
3602                                 fold_convert (ssizetype, offset_iv.step));
3603
3604           STMT_VINFO_DR_STEP (stmt_info) = outer_step;
3605           /* FIXME: Use canonicalize_base_object_address (base_iv.base); */
3606           STMT_VINFO_DR_BASE_ADDRESS (stmt_info) = base_iv.base;
3607           STMT_VINFO_DR_INIT (stmt_info) = outer_init;
3608           STMT_VINFO_DR_OFFSET (stmt_info) =
3609                                 fold_convert (ssizetype, offset_iv.base);
3610           STMT_VINFO_DR_ALIGNED_TO (stmt_info) =
3611                                 size_int (highest_pow2_factor (offset_iv.base));
3612
3613           if (dump_enabled_p ())
3614             {
3615               dump_printf_loc (MSG_NOTE, vect_location,
3616                                "\touter base_address: ");
3617               dump_generic_expr (MSG_NOTE, TDF_SLIM,
3618                                  STMT_VINFO_DR_BASE_ADDRESS (stmt_info));
3619               dump_printf (MSG_NOTE, "\n\touter offset from base address: ");
3620               dump_generic_expr (MSG_NOTE, TDF_SLIM,
3621                                  STMT_VINFO_DR_OFFSET (stmt_info));
3622               dump_printf (MSG_NOTE,
3623                            "\n\touter constant offset from base address: ");
3624               dump_generic_expr (MSG_NOTE, TDF_SLIM,
3625                                  STMT_VINFO_DR_INIT (stmt_info));
3626               dump_printf (MSG_NOTE, "\n\touter step: ");
3627               dump_generic_expr (MSG_NOTE, TDF_SLIM,
3628                                  STMT_VINFO_DR_STEP (stmt_info));
3629               dump_printf (MSG_NOTE, "\n\touter aligned to: ");
3630               dump_generic_expr (MSG_NOTE, TDF_SLIM,
3631                                  STMT_VINFO_DR_ALIGNED_TO (stmt_info));
3632               dump_printf (MSG_NOTE, "\n");
3633             }
3634         }
3635
3636       if (STMT_VINFO_DATA_REF (stmt_info))
3637         {
3638           if (dump_enabled_p ())
3639             {
3640               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3641                                "not vectorized: more than one data ref "
3642                                "in stmt: ");
3643               dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
3644               dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
3645             }
3646
3647           if (bb_vinfo)
3648             break;
3649
3650           if (gather || simd_lane_access)
3651             free_data_ref (dr);
3652           return false;
3653         }
3654
3655       STMT_VINFO_DATA_REF (stmt_info) = dr;
3656       if (simd_lane_access)
3657         {
3658           STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) = true;
3659           free_data_ref (datarefs[i]);
3660           datarefs[i] = dr;
3661         }
3662
3663       /* Set vectype for STMT.  */
3664       scalar_type = TREE_TYPE (DR_REF (dr));
3665       STMT_VINFO_VECTYPE (stmt_info)
3666         = get_vectype_for_scalar_type (scalar_type);
3667       if (!STMT_VINFO_VECTYPE (stmt_info))
3668         {
3669           if (dump_enabled_p ())
3670             {
3671               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3672                                "not vectorized: no vectype for stmt: ");
3673               dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
3674               dump_printf (MSG_MISSED_OPTIMIZATION, " scalar_type: ");
3675               dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_DETAILS,
3676                                  scalar_type);
3677               dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
3678             }
3679
3680           if (bb_vinfo)
3681             break;
3682
3683           if (gather || simd_lane_access)
3684             {
3685               STMT_VINFO_DATA_REF (stmt_info) = NULL;
3686               if (gather)
3687                 free_data_ref (dr);
3688             }
3689           return false;
3690         }
3691       else
3692         {
3693           if (dump_enabled_p ())
3694             {
3695               dump_printf_loc (MSG_NOTE, vect_location,
3696                                "got vectype for stmt: ");
3697               dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
3698               dump_generic_expr (MSG_NOTE, TDF_SLIM,
3699                                  STMT_VINFO_VECTYPE (stmt_info));
3700               dump_printf (MSG_NOTE, "\n");
3701             }
3702         }
3703
3704       /* Adjust the minimal vectorization factor according to the
3705          vector type.  */
3706       vf = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
3707       if (vf > *min_vf)
3708         *min_vf = vf;
3709
3710       if (gather)
3711         {
3712           tree off;
3713
3714           gather = 0 != vect_check_gather (stmt, loop_vinfo, NULL, &off, NULL);
3715           if (gather
3716               && get_vectype_for_scalar_type (TREE_TYPE (off)) == NULL_TREE)
3717             gather = false;
3718           if (!gather)
3719             {
3720               STMT_VINFO_DATA_REF (stmt_info) = NULL;
3721               free_data_ref (dr);
3722               if (dump_enabled_p ())
3723                 {
3724                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3725                                    "not vectorized: not suitable for gather "
3726                                    "load ");
3727                   dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
3728                   dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
3729                 }
3730               return false;
3731             }
3732
3733           datarefs[i] = dr;
3734           STMT_VINFO_GATHER_P (stmt_info) = true;
3735         }
3736       else if (loop_vinfo
3737                && TREE_CODE (DR_STEP (dr)) != INTEGER_CST)
3738         {
3739           if (nested_in_vect_loop_p (loop, stmt)
3740               || !DR_IS_READ (dr))
3741             {
3742               if (dump_enabled_p ())
3743                 {
3744                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3745                                    "not vectorized: not suitable for strided "
3746                                    "load ");
3747                   dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
3748                   dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
3749                 }
3750               return false;
3751             }
3752           STMT_VINFO_STRIDE_LOAD_P (stmt_info) = true;
3753         }
3754     }
3755
3756   /* If we stopped analysis at the first dataref we could not analyze
3757      when trying to vectorize a basic-block mark the rest of the datarefs
3758      as not vectorizable and truncate the vector of datarefs.  That
3759      avoids spending useless time in analyzing their dependence.  */
3760   if (i != datarefs.length ())
3761     {
3762       gcc_assert (bb_vinfo != NULL);
3763       for (unsigned j = i; j < datarefs.length (); ++j)
3764         {
3765           data_reference_p dr = datarefs[j];
3766           STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr))) = false;
3767           free_data_ref (dr);
3768         }
3769       datarefs.truncate (i);
3770     }
3771
3772   return true;
3773 }
3774
3775
3776 /* Function vect_get_new_vect_var.
3777
3778    Returns a name for a new variable.  The current naming scheme appends the
3779    prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to
3780    the name of vectorizer generated variables, and appends that to NAME if
3781    provided.  */
3782
3783 tree
3784 vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name)
3785 {
3786   const char *prefix;
3787   tree new_vect_var;
3788
3789   switch (var_kind)
3790   {
3791   case vect_simple_var:
3792     prefix = "vect";
3793     break;
3794   case vect_scalar_var:
3795     prefix = "stmp";
3796     break;
3797   case vect_pointer_var:
3798     prefix = "vectp";
3799     break;
3800   default:
3801     gcc_unreachable ();
3802   }
3803
3804   if (name)
3805     {
3806       char* tmp = concat (prefix, "_", name, NULL);
3807       new_vect_var = create_tmp_reg (type, tmp);
3808       free (tmp);
3809     }
3810   else
3811     new_vect_var = create_tmp_reg (type, prefix);
3812
3813   return new_vect_var;
3814 }
3815
3816
3817 /* Function vect_create_addr_base_for_vector_ref.
3818
3819    Create an expression that computes the address of the first memory location
3820    that will be accessed for a data reference.
3821
3822    Input:
3823    STMT: The statement containing the data reference.
3824    NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list.
3825    OFFSET: Optional. If supplied, it is be added to the initial address.
3826    LOOP:    Specify relative to which loop-nest should the address be computed.
3827             For example, when the dataref is in an inner-loop nested in an
3828             outer-loop that is now being vectorized, LOOP can be either the
3829             outer-loop, or the inner-loop.  The first memory location accessed
3830             by the following dataref ('in' points to short):
3831
3832                 for (i=0; i<N; i++)
3833                    for (j=0; j<M; j++)
3834                      s += in[i+j]
3835
3836             is as follows:
3837             if LOOP=i_loop:     &in             (relative to i_loop)
3838             if LOOP=j_loop:     &in+i*2B        (relative to j_loop)
3839
3840    Output:
3841    1. Return an SSA_NAME whose value is the address of the memory location of
3842       the first vector of the data reference.
3843    2. If new_stmt_list is not NULL_TREE after return then the caller must insert
3844       these statement(s) which define the returned SSA_NAME.
3845
3846    FORNOW: We are only handling array accesses with step 1.  */
3847
3848 tree
3849 vect_create_addr_base_for_vector_ref (gimple stmt,
3850                                       gimple_seq *new_stmt_list,
3851                                       tree offset,
3852                                       struct loop *loop)
3853 {
3854   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3855   struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
3856   tree data_ref_base;
3857   const char *base_name;
3858   tree addr_base;
3859   tree dest;
3860   gimple_seq seq = NULL;
3861   tree base_offset;
3862   tree init;
3863   tree vect_ptr_type;
3864   tree step = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr)));
3865   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3866
3867   if (loop_vinfo && loop && loop != (gimple_bb (stmt))->loop_father)
3868     {
3869       struct loop *outer_loop = LOOP_VINFO_LOOP (loop_vinfo);
3870
3871       gcc_assert (nested_in_vect_loop_p (outer_loop, stmt));
3872
3873       data_ref_base = unshare_expr (STMT_VINFO_DR_BASE_ADDRESS (stmt_info));
3874       base_offset = unshare_expr (STMT_VINFO_DR_OFFSET (stmt_info));
3875       init = unshare_expr (STMT_VINFO_DR_INIT (stmt_info));
3876     }
3877   else
3878     {
3879       data_ref_base = unshare_expr (DR_BASE_ADDRESS (dr));
3880       base_offset = unshare_expr (DR_OFFSET (dr));
3881       init = unshare_expr (DR_INIT (dr));
3882     }
3883
3884   if (loop_vinfo)
3885     base_name = get_name (data_ref_base);
3886   else
3887     {
3888       base_offset = ssize_int (0);
3889       init = ssize_int (0);
3890       base_name = get_name (DR_REF (dr));
3891     }
3892
3893   /* Create base_offset */
3894   base_offset = size_binop (PLUS_EXPR,
3895                             fold_convert (sizetype, base_offset),
3896                             fold_convert (sizetype, init));
3897
3898   if (offset)
3899     {
3900       offset = fold_build2 (MULT_EXPR, sizetype,
3901                             fold_convert (sizetype, offset), step);
3902       base_offset = fold_build2 (PLUS_EXPR, sizetype,
3903                                  base_offset, offset);
3904     }
3905
3906   /* base + base_offset */
3907   if (loop_vinfo)
3908     addr_base = fold_build_pointer_plus (data_ref_base, base_offset);
3909   else
3910     {
3911       addr_base = build1 (ADDR_EXPR,
3912                           build_pointer_type (TREE_TYPE (DR_REF (dr))),
3913                           unshare_expr (DR_REF (dr)));
3914     }
3915
3916   vect_ptr_type = build_pointer_type (STMT_VINFO_VECTYPE (stmt_info));
3917   addr_base = fold_convert (vect_ptr_type, addr_base);
3918   dest = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var, base_name);
3919   addr_base = force_gimple_operand (addr_base, &seq, false, dest);
3920   gimple_seq_add_seq (new_stmt_list, seq);
3921
3922   if (DR_PTR_INFO (dr)
3923       && TREE_CODE (addr_base) == SSA_NAME)
3924     {
3925       duplicate_ssa_name_ptr_info (addr_base, DR_PTR_INFO (dr));
3926       if (offset)
3927         mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (addr_base));
3928     }
3929
3930   if (dump_enabled_p ())
3931     {
3932       dump_printf_loc (MSG_NOTE, vect_location, "created ");
3933       dump_generic_expr (MSG_NOTE, TDF_SLIM, addr_base);
3934       dump_printf (MSG_NOTE, "\n");
3935     }
3936
3937   return addr_base;
3938 }
3939
3940
3941 /* Function vect_create_data_ref_ptr.
3942
3943    Create a new pointer-to-AGGR_TYPE variable (ap), that points to the first
3944    location accessed in the loop by STMT, along with the def-use update
3945    chain to appropriately advance the pointer through the loop iterations.
3946    Also set aliasing information for the pointer.  This pointer is used by
3947    the callers to this function to create a memory reference expression for
3948    vector load/store access.
3949
3950    Input:
3951    1. STMT: a stmt that references memory. Expected to be of the form
3952          GIMPLE_ASSIGN <name, data-ref> or
3953          GIMPLE_ASSIGN <data-ref, name>.
3954    2. AGGR_TYPE: the type of the reference, which should be either a vector
3955         or an array.
3956    3. AT_LOOP: the loop where the vector memref is to be created.
3957    4. OFFSET (optional): an offset to be added to the initial address accessed
3958         by the data-ref in STMT.
3959    5. BSI: location where the new stmts are to be placed if there is no loop
3960    6. ONLY_INIT: indicate if ap is to be updated in the loop, or remain
3961         pointing to the initial address.
3962
3963    Output:
3964    1. Declare a new ptr to vector_type, and have it point to the base of the
3965       data reference (initial addressed accessed by the data reference).
3966       For example, for vector of type V8HI, the following code is generated:
3967
3968       v8hi *ap;
3969       ap = (v8hi *)initial_address;
3970
3971       if OFFSET is not supplied:
3972          initial_address = &a[init];
3973       if OFFSET is supplied:
3974          initial_address = &a[init + OFFSET];
3975
3976       Return the initial_address in INITIAL_ADDRESS.
3977
3978    2. If ONLY_INIT is true, just return the initial pointer.  Otherwise, also
3979       update the pointer in each iteration of the loop.
3980
3981       Return the increment stmt that updates the pointer in PTR_INCR.
3982
3983    3. Set INV_P to true if the access pattern of the data reference in the
3984       vectorized loop is invariant.  Set it to false otherwise.
3985
3986    4. Return the pointer.  */
3987
3988 tree
3989 vect_create_data_ref_ptr (gimple stmt, tree aggr_type, struct loop *at_loop,
3990                           tree offset, tree *initial_address,
3991                           gimple_stmt_iterator *gsi, gimple *ptr_incr,
3992                           bool only_init, bool *inv_p)
3993 {
3994   const char *base_name;
3995   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3996   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3997   struct loop *loop = NULL;
3998   bool nested_in_vect_loop = false;
3999   struct loop *containing_loop = NULL;
4000   tree aggr_ptr_type;
4001   tree aggr_ptr;
4002   tree new_temp;
4003   gimple vec_stmt;
4004   gimple_seq new_stmt_list = NULL;
4005   edge pe = NULL;
4006   basic_block new_bb;
4007   tree aggr_ptr_init;
4008   struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
4009   tree aptr;
4010   gimple_stmt_iterator incr_gsi;
4011   bool insert_after;
4012   tree indx_before_incr, indx_after_incr;
4013   gimple incr;
4014   tree step;
4015   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4016
4017   gcc_assert (TREE_CODE (aggr_type) == ARRAY_TYPE
4018               || TREE_CODE (aggr_type) == VECTOR_TYPE);
4019
4020   if (loop_vinfo)
4021     {
4022       loop = LOOP_VINFO_LOOP (loop_vinfo);
4023       nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
4024       containing_loop = (gimple_bb (stmt))->loop_father;
4025       pe = loop_preheader_edge (loop);
4026     }
4027   else
4028     {
4029       gcc_assert (bb_vinfo);
4030       only_init = true;
4031       *ptr_incr = NULL;
4032     }
4033
4034   /* Check the step (evolution) of the load in LOOP, and record
4035      whether it's invariant.  */
4036   if (nested_in_vect_loop)
4037     step = STMT_VINFO_DR_STEP (stmt_info);
4038   else
4039     step = DR_STEP (STMT_VINFO_DATA_REF (stmt_info));
4040
4041   if (integer_zerop (step))
4042     *inv_p = true;
4043   else
4044     *inv_p = false;
4045
4046   /* Create an expression for the first address accessed by this load
4047      in LOOP.  */
4048   base_name = get_name (DR_BASE_ADDRESS (dr));
4049
4050   if (dump_enabled_p ())
4051     {
4052       tree dr_base_type = TREE_TYPE (DR_BASE_OBJECT (dr));
4053       dump_printf_loc (MSG_NOTE, vect_location,
4054                        "create %s-pointer variable to type: ",
4055                        get_tree_code_name (TREE_CODE (aggr_type)));
4056       dump_generic_expr (MSG_NOTE, TDF_SLIM, aggr_type);
4057       if (TREE_CODE (dr_base_type) == ARRAY_TYPE)
4058         dump_printf (MSG_NOTE, "  vectorizing an array ref: ");
4059       else if (TREE_CODE (dr_base_type) == VECTOR_TYPE)
4060         dump_printf (MSG_NOTE, "  vectorizing a vector ref: ");
4061       else if (TREE_CODE (dr_base_type) == RECORD_TYPE)
4062         dump_printf (MSG_NOTE, "  vectorizing a record based array ref: ");
4063       else
4064         dump_printf (MSG_NOTE, "  vectorizing a pointer ref: ");
4065       dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_BASE_OBJECT (dr));
4066       dump_printf (MSG_NOTE, "\n");
4067     }
4068
4069   /* (1) Create the new aggregate-pointer variable.
4070      Vector and array types inherit the alias set of their component
4071      type by default so we need to use a ref-all pointer if the data
4072      reference does not conflict with the created aggregated data
4073      reference because it is not addressable.  */
4074   bool need_ref_all = false;
4075   if (!alias_sets_conflict_p (get_alias_set (aggr_type),
4076                               get_alias_set (DR_REF (dr))))
4077     need_ref_all = true;
4078   /* Likewise for any of the data references in the stmt group.  */
4079   else if (STMT_VINFO_GROUP_SIZE (stmt_info) > 1)
4080     {
4081       gimple orig_stmt = STMT_VINFO_GROUP_FIRST_ELEMENT (stmt_info);
4082       do
4083         {
4084           stmt_vec_info sinfo = vinfo_for_stmt (orig_stmt);
4085           struct data_reference *sdr = STMT_VINFO_DATA_REF (sinfo);
4086           if (!alias_sets_conflict_p (get_alias_set (aggr_type),
4087                                       get_alias_set (DR_REF (sdr))))
4088             {
4089               need_ref_all = true;
4090               break;
4091             }
4092           orig_stmt = STMT_VINFO_GROUP_NEXT_ELEMENT (sinfo);
4093         }
4094       while (orig_stmt);
4095     }
4096   aggr_ptr_type = build_pointer_type_for_mode (aggr_type, ptr_mode,
4097                                                need_ref_all);
4098   aggr_ptr = vect_get_new_vect_var (aggr_ptr_type, vect_pointer_var, base_name);
4099
4100
4101   /* Note: If the dataref is in an inner-loop nested in LOOP, and we are
4102      vectorizing LOOP (i.e., outer-loop vectorization), we need to create two
4103      def-use update cycles for the pointer: one relative to the outer-loop
4104      (LOOP), which is what steps (3) and (4) below do.  The other is relative
4105      to the inner-loop (which is the inner-most loop containing the dataref),
4106      and this is done be step (5) below.
4107
4108      When vectorizing inner-most loops, the vectorized loop (LOOP) is also the
4109      inner-most loop, and so steps (3),(4) work the same, and step (5) is
4110      redundant.  Steps (3),(4) create the following:
4111
4112         vp0 = &base_addr;
4113         LOOP:   vp1 = phi(vp0,vp2)
4114                 ...
4115                 ...
4116                 vp2 = vp1 + step
4117                 goto LOOP
4118
4119      If there is an inner-loop nested in loop, then step (5) will also be
4120      applied, and an additional update in the inner-loop will be created:
4121
4122         vp0 = &base_addr;
4123         LOOP:   vp1 = phi(vp0,vp2)
4124                 ...
4125         inner:     vp3 = phi(vp1,vp4)
4126                    vp4 = vp3 + inner_step
4127                    if () goto inner
4128                 ...
4129                 vp2 = vp1 + step
4130                 if () goto LOOP   */
4131
4132   /* (2) Calculate the initial address of the aggregate-pointer, and set
4133      the aggregate-pointer to point to it before the loop.  */
4134
4135   /* Create: (&(base[init_val+offset]) in the loop preheader.  */
4136
4137   new_temp = vect_create_addr_base_for_vector_ref (stmt, &new_stmt_list,
4138                                                    offset, loop);
4139   if (new_stmt_list)
4140     {
4141       if (pe)
4142         {
4143           new_bb = gsi_insert_seq_on_edge_immediate (pe, new_stmt_list);
4144           gcc_assert (!new_bb);
4145         }
4146       else
4147         gsi_insert_seq_before (gsi, new_stmt_list, GSI_SAME_STMT);
4148     }
4149
4150   *initial_address = new_temp;
4151
4152   /* Create: p = (aggr_type *) initial_base  */
4153   if (TREE_CODE (new_temp) != SSA_NAME
4154       || !useless_type_conversion_p (aggr_ptr_type, TREE_TYPE (new_temp)))
4155     {
4156       vec_stmt = gimple_build_assign (aggr_ptr,
4157                                       fold_convert (aggr_ptr_type, new_temp));
4158       aggr_ptr_init = make_ssa_name (aggr_ptr, vec_stmt);
4159       /* Copy the points-to information if it exists. */
4160       if (DR_PTR_INFO (dr))
4161         duplicate_ssa_name_ptr_info (aggr_ptr_init, DR_PTR_INFO (dr));
4162       gimple_assign_set_lhs (vec_stmt, aggr_ptr_init);
4163       if (pe)
4164         {
4165           new_bb = gsi_insert_on_edge_immediate (pe, vec_stmt);
4166           gcc_assert (!new_bb);
4167         }
4168       else
4169         gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
4170     }
4171   else
4172     aggr_ptr_init = new_temp;
4173
4174   /* (3) Handle the updating of the aggregate-pointer inside the loop.
4175      This is needed when ONLY_INIT is false, and also when AT_LOOP is the
4176      inner-loop nested in LOOP (during outer-loop vectorization).  */
4177
4178   /* No update in loop is required.  */
4179   if (only_init && (!loop_vinfo || at_loop == loop))
4180     aptr = aggr_ptr_init;
4181   else
4182     {
4183       /* The step of the aggregate pointer is the type size.  */
4184       tree iv_step = TYPE_SIZE_UNIT (aggr_type);
4185       /* One exception to the above is when the scalar step of the load in
4186          LOOP is zero. In this case the step here is also zero.  */
4187       if (*inv_p)
4188         iv_step = size_zero_node;
4189       else if (tree_int_cst_sgn (step) == -1)
4190         iv_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (iv_step), iv_step);
4191
4192       standard_iv_increment_position (loop, &incr_gsi, &insert_after);
4193
4194       create_iv (aggr_ptr_init,
4195                  fold_convert (aggr_ptr_type, iv_step),
4196                  aggr_ptr, loop, &incr_gsi, insert_after,
4197                  &indx_before_incr, &indx_after_incr);
4198       incr = gsi_stmt (incr_gsi);
4199       set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
4200
4201       /* Copy the points-to information if it exists. */
4202       if (DR_PTR_INFO (dr))
4203         {
4204           duplicate_ssa_name_ptr_info (indx_before_incr, DR_PTR_INFO (dr));
4205           duplicate_ssa_name_ptr_info (indx_after_incr, DR_PTR_INFO (dr));
4206         }
4207       if (ptr_incr)
4208         *ptr_incr = incr;
4209
4210       aptr = indx_before_incr;
4211     }
4212
4213   if (!nested_in_vect_loop || only_init)
4214     return aptr;
4215
4216
4217   /* (4) Handle the updating of the aggregate-pointer inside the inner-loop
4218      nested in LOOP, if exists.  */
4219
4220   gcc_assert (nested_in_vect_loop);
4221   if (!only_init)
4222     {
4223       standard_iv_increment_position (containing_loop, &incr_gsi,
4224                                       &insert_after);
4225       create_iv (aptr, fold_convert (aggr_ptr_type, DR_STEP (dr)), aggr_ptr,
4226                  containing_loop, &incr_gsi, insert_after, &indx_before_incr,
4227                  &indx_after_incr);
4228       incr = gsi_stmt (incr_gsi);
4229       set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
4230
4231       /* Copy the points-to information if it exists. */
4232       if (DR_PTR_INFO (dr))
4233         {
4234           duplicate_ssa_name_ptr_info (indx_before_incr, DR_PTR_INFO (dr));
4235           duplicate_ssa_name_ptr_info (indx_after_incr, DR_PTR_INFO (dr));
4236         }
4237       if (ptr_incr)
4238         *ptr_incr = incr;
4239
4240       return indx_before_incr;
4241     }
4242   else
4243     gcc_unreachable ();
4244 }
4245
4246
4247 /* Function bump_vector_ptr
4248
4249    Increment a pointer (to a vector type) by vector-size. If requested,
4250    i.e. if PTR-INCR is given, then also connect the new increment stmt
4251    to the existing def-use update-chain of the pointer, by modifying
4252    the PTR_INCR as illustrated below:
4253
4254    The pointer def-use update-chain before this function:
4255                         DATAREF_PTR = phi (p_0, p_2)
4256                         ....
4257         PTR_INCR:       p_2 = DATAREF_PTR + step
4258
4259    The pointer def-use update-chain after this function:
4260                         DATAREF_PTR = phi (p_0, p_2)
4261                         ....
4262                         NEW_DATAREF_PTR = DATAREF_PTR + BUMP
4263                         ....
4264         PTR_INCR:       p_2 = NEW_DATAREF_PTR + step
4265
4266    Input:
4267    DATAREF_PTR - ssa_name of a pointer (to vector type) that is being updated
4268                  in the loop.
4269    PTR_INCR - optional. The stmt that updates the pointer in each iteration of
4270               the loop.  The increment amount across iterations is expected
4271               to be vector_size.
4272    BSI - location where the new update stmt is to be placed.
4273    STMT - the original scalar memory-access stmt that is being vectorized.
4274    BUMP - optional. The offset by which to bump the pointer. If not given,
4275           the offset is assumed to be vector_size.
4276
4277    Output: Return NEW_DATAREF_PTR as illustrated above.
4278
4279 */
4280
4281 tree
4282 bump_vector_ptr (tree dataref_ptr, gimple ptr_incr, gimple_stmt_iterator *gsi,
4283                  gimple stmt, tree bump)
4284 {
4285   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4286   struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
4287   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4288   tree update = TYPE_SIZE_UNIT (vectype);
4289   gimple incr_stmt;
4290   ssa_op_iter iter;
4291   use_operand_p use_p;
4292   tree new_dataref_ptr;
4293
4294   if (bump)
4295     update = bump;
4296
4297   new_dataref_ptr = copy_ssa_name (dataref_ptr, NULL);
4298   incr_stmt = gimple_build_assign_with_ops (POINTER_PLUS_EXPR, new_dataref_ptr,
4299                                             dataref_ptr, update);
4300   vect_finish_stmt_generation (stmt, incr_stmt, gsi);
4301
4302   /* Copy the points-to information if it exists. */
4303   if (DR_PTR_INFO (dr))
4304     {
4305       duplicate_ssa_name_ptr_info (new_dataref_ptr, DR_PTR_INFO (dr));
4306       mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (new_dataref_ptr));
4307     }
4308
4309   if (!ptr_incr)
4310     return new_dataref_ptr;
4311
4312   /* Update the vector-pointer's cross-iteration increment.  */
4313   FOR_EACH_SSA_USE_OPERAND (use_p, ptr_incr, iter, SSA_OP_USE)
4314     {
4315       tree use = USE_FROM_PTR (use_p);
4316
4317       if (use == dataref_ptr)
4318         SET_USE (use_p, new_dataref_ptr);
4319       else
4320         gcc_assert (tree_int_cst_compare (use, update) == 0);
4321     }
4322
4323   return new_dataref_ptr;
4324 }
4325
4326
4327 /* Function vect_create_destination_var.
4328
4329    Create a new temporary of type VECTYPE.  */
4330
4331 tree
4332 vect_create_destination_var (tree scalar_dest, tree vectype)
4333 {
4334   tree vec_dest;
4335   const char *name;
4336   char *new_name;
4337   tree type;
4338   enum vect_var_kind kind;
4339
4340   kind = vectype ? vect_simple_var : vect_scalar_var;
4341   type = vectype ? vectype : TREE_TYPE (scalar_dest);
4342
4343   gcc_assert (TREE_CODE (scalar_dest) == SSA_NAME);
4344
4345   name = get_name (scalar_dest);
4346   if (name)
4347     asprintf (&new_name, "%s_%u", name, SSA_NAME_VERSION (scalar_dest));
4348   else
4349     asprintf (&new_name, "_%u", SSA_NAME_VERSION (scalar_dest));
4350   vec_dest = vect_get_new_vect_var (type, kind, new_name);
4351   free (new_name);
4352
4353   return vec_dest;
4354 }
4355
4356 /* Function vect_grouped_store_supported.
4357
4358    Returns TRUE if interleave high and interleave low permutations
4359    are supported, and FALSE otherwise.  */
4360
4361 bool
4362 vect_grouped_store_supported (tree vectype, unsigned HOST_WIDE_INT count)
4363 {
4364   enum machine_mode mode = TYPE_MODE (vectype);
4365
4366   /* vect_permute_store_chain requires the group size to be a power of two.  */
4367   if (exact_log2 (count) == -1)
4368     {
4369       if (dump_enabled_p ())
4370         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4371                          "the size of the group of accesses"
4372                          " is not a power of 2\n");
4373       return false;
4374     }
4375
4376   /* Check that the permutation is supported.  */
4377   if (VECTOR_MODE_P (mode))
4378     {
4379       unsigned int i, nelt = GET_MODE_NUNITS (mode);
4380       unsigned char *sel = XALLOCAVEC (unsigned char, nelt);
4381       for (i = 0; i < nelt / 2; i++)
4382         {
4383           sel[i * 2] = i;
4384           sel[i * 2 + 1] = i + nelt;
4385         }
4386       if (can_vec_perm_p (mode, false, sel))
4387         {
4388           for (i = 0; i < nelt; i++)
4389             sel[i] += nelt / 2;
4390           if (can_vec_perm_p (mode, false, sel))
4391             return true;
4392         }
4393     }
4394
4395   if (dump_enabled_p ())
4396     dump_printf (MSG_MISSED_OPTIMIZATION,
4397                  "interleave op not supported by target.\n");
4398   return false;
4399 }
4400
4401
4402 /* Return TRUE if vec_store_lanes is available for COUNT vectors of
4403    type VECTYPE.  */
4404
4405 bool
4406 vect_store_lanes_supported (tree vectype, unsigned HOST_WIDE_INT count)
4407 {
4408   return vect_lanes_optab_supported_p ("vec_store_lanes",
4409                                        vec_store_lanes_optab,
4410                                        vectype, count);
4411 }
4412
4413
4414 /* Function vect_permute_store_chain.
4415
4416    Given a chain of interleaved stores in DR_CHAIN of LENGTH that must be
4417    a power of 2, generate interleave_high/low stmts to reorder the data
4418    correctly for the stores.  Return the final references for stores in
4419    RESULT_CHAIN.
4420
4421    E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
4422    The input is 4 vectors each containing 8 elements.  We assign a number to
4423    each element, the input sequence is:
4424
4425    1st vec:   0  1  2  3  4  5  6  7
4426    2nd vec:   8  9 10 11 12 13 14 15
4427    3rd vec:  16 17 18 19 20 21 22 23
4428    4th vec:  24 25 26 27 28 29 30 31
4429
4430    The output sequence should be:
4431
4432    1st vec:  0  8 16 24  1  9 17 25
4433    2nd vec:  2 10 18 26  3 11 19 27
4434    3rd vec:  4 12 20 28  5 13 21 30
4435    4th vec:  6 14 22 30  7 15 23 31
4436
4437    i.e., we interleave the contents of the four vectors in their order.
4438
4439    We use interleave_high/low instructions to create such output.  The input of
4440    each interleave_high/low operation is two vectors:
4441    1st vec    2nd vec
4442    0 1 2 3    4 5 6 7
4443    the even elements of the result vector are obtained left-to-right from the
4444    high/low elements of the first vector.  The odd elements of the result are
4445    obtained left-to-right from the high/low elements of the second vector.
4446    The output of interleave_high will be:   0 4 1 5
4447    and of interleave_low:                   2 6 3 7
4448
4449
4450    The permutation is done in log LENGTH stages.  In each stage interleave_high
4451    and interleave_low stmts are created for each pair of vectors in DR_CHAIN,
4452    where the first argument is taken from the first half of DR_CHAIN and the
4453    second argument from it's second half.
4454    In our example,
4455
4456    I1: interleave_high (1st vec, 3rd vec)
4457    I2: interleave_low (1st vec, 3rd vec)
4458    I3: interleave_high (2nd vec, 4th vec)
4459    I4: interleave_low (2nd vec, 4th vec)
4460
4461    The output for the first stage is:
4462
4463    I1:  0 16  1 17  2 18  3 19
4464    I2:  4 20  5 21  6 22  7 23
4465    I3:  8 24  9 25 10 26 11 27
4466    I4: 12 28 13 29 14 30 15 31
4467
4468    The output of the second stage, i.e. the final result is:
4469
4470    I1:  0  8 16 24  1  9 17 25
4471    I2:  2 10 18 26  3 11 19 27
4472    I3:  4 12 20 28  5 13 21 30
4473    I4:  6 14 22 30  7 15 23 31.  */
4474
4475 void
4476 vect_permute_store_chain (vec<tree> dr_chain,
4477                           unsigned int length,
4478                           gimple stmt,
4479                           gimple_stmt_iterator *gsi,
4480                           vec<tree> *result_chain)
4481 {
4482   tree vect1, vect2, high, low;
4483   gimple perm_stmt;
4484   tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
4485   tree perm_mask_low, perm_mask_high;
4486   unsigned int i, n;
4487   unsigned int j, nelt = TYPE_VECTOR_SUBPARTS (vectype);
4488   unsigned char *sel = XALLOCAVEC (unsigned char, nelt);
4489
4490   result_chain->quick_grow (length);
4491   memcpy (result_chain->address (), dr_chain.address (),
4492           length * sizeof (tree));
4493
4494   for (i = 0, n = nelt / 2; i < n; i++)
4495     {
4496       sel[i * 2] = i;
4497       sel[i * 2 + 1] = i + nelt;
4498     }
4499   perm_mask_high = vect_gen_perm_mask (vectype, sel);
4500   gcc_assert (perm_mask_high != NULL);
4501
4502   for (i = 0; i < nelt; i++)
4503     sel[i] += nelt / 2;
4504   perm_mask_low = vect_gen_perm_mask (vectype, sel);
4505   gcc_assert (perm_mask_low != NULL);
4506
4507   for (i = 0, n = exact_log2 (length); i < n; i++)
4508     {
4509       for (j = 0; j < length/2; j++)
4510         {
4511           vect1 = dr_chain[j];
4512           vect2 = dr_chain[j+length/2];
4513
4514           /* Create interleaving stmt:
4515              high = VEC_PERM_EXPR <vect1, vect2, {0, nelt, 1, nelt+1, ...}>  */
4516           high = make_temp_ssa_name (vectype, NULL, "vect_inter_high");
4517           perm_stmt
4518             = gimple_build_assign_with_ops (VEC_PERM_EXPR, high,
4519                                             vect1, vect2, perm_mask_high);
4520           vect_finish_stmt_generation (stmt, perm_stmt, gsi);
4521           (*result_chain)[2*j] = high;
4522
4523           /* Create interleaving stmt:
4524              low = VEC_PERM_EXPR <vect1, vect2, {nelt/2, nelt*3/2, nelt/2+1,
4525                                                  nelt*3/2+1, ...}>  */
4526           low = make_temp_ssa_name (vectype, NULL, "vect_inter_low");
4527           perm_stmt
4528             = gimple_build_assign_with_ops (VEC_PERM_EXPR, low,
4529                                             vect1, vect2, perm_mask_low);
4530           vect_finish_stmt_generation (stmt, perm_stmt, gsi);
4531           (*result_chain)[2*j+1] = low;
4532         }
4533       memcpy (dr_chain.address (), result_chain->address (),
4534               length * sizeof (tree));
4535     }
4536 }
4537
4538 /* Function vect_setup_realignment
4539
4540    This function is called when vectorizing an unaligned load using
4541    the dr_explicit_realign[_optimized] scheme.
4542    This function generates the following code at the loop prolog:
4543
4544       p = initial_addr;
4545    x  msq_init = *(floor(p));   # prolog load
4546       realignment_token = call target_builtin;
4547     loop:
4548    x  msq = phi (msq_init, ---)
4549
4550    The stmts marked with x are generated only for the case of
4551    dr_explicit_realign_optimized.
4552
4553    The code above sets up a new (vector) pointer, pointing to the first
4554    location accessed by STMT, and a "floor-aligned" load using that pointer.
4555    It also generates code to compute the "realignment-token" (if the relevant
4556    target hook was defined), and creates a phi-node at the loop-header bb
4557    whose arguments are the result of the prolog-load (created by this
4558    function) and the result of a load that takes place in the loop (to be
4559    created by the caller to this function).
4560
4561    For the case of dr_explicit_realign_optimized:
4562    The caller to this function uses the phi-result (msq) to create the
4563    realignment code inside the loop, and sets up the missing phi argument,
4564    as follows:
4565     loop:
4566       msq = phi (msq_init, lsq)
4567       lsq = *(floor(p'));        # load in loop
4568       result = realign_load (msq, lsq, realignment_token);
4569
4570    For the case of dr_explicit_realign:
4571     loop:
4572       msq = *(floor(p));        # load in loop
4573       p' = p + (VS-1);
4574       lsq = *(floor(p'));       # load in loop
4575       result = realign_load (msq, lsq, realignment_token);
4576
4577    Input:
4578    STMT - (scalar) load stmt to be vectorized. This load accesses
4579           a memory location that may be unaligned.
4580    BSI - place where new code is to be inserted.
4581    ALIGNMENT_SUPPORT_SCHEME - which of the two misalignment handling schemes
4582                               is used.
4583
4584    Output:
4585    REALIGNMENT_TOKEN - the result of a call to the builtin_mask_for_load
4586                        target hook, if defined.
4587    Return value - the result of the loop-header phi node.  */
4588
4589 tree
4590 vect_setup_realignment (gimple stmt, gimple_stmt_iterator *gsi,
4591                         tree *realignment_token,
4592                         enum dr_alignment_support alignment_support_scheme,
4593                         tree init_addr,
4594                         struct loop **at_loop)
4595 {
4596   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4597   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4598   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4599   struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
4600   struct loop *loop = NULL;
4601   edge pe = NULL;
4602   tree scalar_dest = gimple_assign_lhs (stmt);
4603   tree vec_dest;
4604   gimple inc;
4605   tree ptr;
4606   tree data_ref;
4607   gimple new_stmt;
4608   basic_block new_bb;
4609   tree msq_init = NULL_TREE;
4610   tree new_temp;
4611   gimple phi_stmt;
4612   tree msq = NULL_TREE;
4613   gimple_seq stmts = NULL;
4614   bool inv_p;
4615   bool compute_in_loop = false;
4616   bool nested_in_vect_loop = false;
4617   struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
4618   struct loop *loop_for_initial_load = NULL;
4619
4620   if (loop_vinfo)
4621     {
4622       loop = LOOP_VINFO_LOOP (loop_vinfo);
4623       nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
4624     }
4625
4626   gcc_assert (alignment_support_scheme == dr_explicit_realign
4627               || alignment_support_scheme == dr_explicit_realign_optimized);
4628
4629   /* We need to generate three things:
4630      1. the misalignment computation
4631      2. the extra vector load (for the optimized realignment scheme).
4632      3. the phi node for the two vectors from which the realignment is
4633       done (for the optimized realignment scheme).  */
4634
4635   /* 1. Determine where to generate the misalignment computation.
4636
4637      If INIT_ADDR is NULL_TREE, this indicates that the misalignment
4638      calculation will be generated by this function, outside the loop (in the
4639      preheader).  Otherwise, INIT_ADDR had already been computed for us by the
4640      caller, inside the loop.
4641
4642      Background: If the misalignment remains fixed throughout the iterations of
4643      the loop, then both realignment schemes are applicable, and also the
4644      misalignment computation can be done outside LOOP.  This is because we are
4645      vectorizing LOOP, and so the memory accesses in LOOP advance in steps that
4646      are a multiple of VS (the Vector Size), and therefore the misalignment in
4647      different vectorized LOOP iterations is always the same.
4648      The problem arises only if the memory access is in an inner-loop nested
4649      inside LOOP, which is now being vectorized using outer-loop vectorization.
4650      This is the only case when the misalignment of the memory access may not
4651      remain fixed throughout the iterations of the inner-loop (as explained in
4652      detail in vect_supportable_dr_alignment).  In this case, not only is the
4653      optimized realignment scheme not applicable, but also the misalignment
4654      computation (and generation of the realignment token that is passed to
4655      REALIGN_LOAD) have to be done inside the loop.
4656
4657      In short, INIT_ADDR indicates whether we are in a COMPUTE_IN_LOOP mode
4658      or not, which in turn determines if the misalignment is computed inside
4659      the inner-loop, or outside LOOP.  */
4660
4661   if (init_addr != NULL_TREE || !loop_vinfo)
4662     {
4663       compute_in_loop = true;
4664       gcc_assert (alignment_support_scheme == dr_explicit_realign);
4665     }
4666
4667
4668   /* 2. Determine where to generate the extra vector load.
4669
4670      For the optimized realignment scheme, instead of generating two vector
4671      loads in each iteration, we generate a single extra vector load in the
4672      preheader of the loop, and in each iteration reuse the result of the
4673      vector load from the previous iteration.  In case the memory access is in
4674      an inner-loop nested inside LOOP, which is now being vectorized using
4675      outer-loop vectorization, we need to determine whether this initial vector
4676      load should be generated at the preheader of the inner-loop, or can be
4677      generated at the preheader of LOOP.  If the memory access has no evolution
4678      in LOOP, it can be generated in the preheader of LOOP. Otherwise, it has
4679      to be generated inside LOOP (in the preheader of the inner-loop).  */
4680
4681   if (nested_in_vect_loop)
4682     {
4683       tree outerloop_step = STMT_VINFO_DR_STEP (stmt_info);
4684       bool invariant_in_outerloop =
4685             (tree_int_cst_compare (outerloop_step, size_zero_node) == 0);
4686       loop_for_initial_load = (invariant_in_outerloop ? loop : loop->inner);
4687     }
4688   else
4689     loop_for_initial_load = loop;
4690   if (at_loop)
4691     *at_loop = loop_for_initial_load;
4692
4693   if (loop_for_initial_load)
4694     pe = loop_preheader_edge (loop_for_initial_load);
4695
4696   /* 3. For the case of the optimized realignment, create the first vector
4697       load at the loop preheader.  */
4698
4699   if (alignment_support_scheme == dr_explicit_realign_optimized)
4700     {
4701       /* Create msq_init = *(floor(p1)) in the loop preheader  */
4702
4703       gcc_assert (!compute_in_loop);
4704       vec_dest = vect_create_destination_var (scalar_dest, vectype);
4705       ptr = vect_create_data_ref_ptr (stmt, vectype, loop_for_initial_load,
4706                                       NULL_TREE, &init_addr, NULL, &inc,
4707                                       true, &inv_p);
4708       new_temp = copy_ssa_name (ptr, NULL);
4709       new_stmt = gimple_build_assign_with_ops
4710                    (BIT_AND_EXPR, new_temp, ptr,
4711                     build_int_cst (TREE_TYPE (ptr),
4712                                    -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
4713       new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
4714       gcc_assert (!new_bb);
4715       data_ref
4716         = build2 (MEM_REF, TREE_TYPE (vec_dest), new_temp,
4717                   build_int_cst (reference_alias_ptr_type (DR_REF (dr)), 0));
4718       new_stmt = gimple_build_assign (vec_dest, data_ref);
4719       new_temp = make_ssa_name (vec_dest, new_stmt);
4720       gimple_assign_set_lhs (new_stmt, new_temp);
4721       if (pe)
4722         {
4723           new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
4724           gcc_assert (!new_bb);
4725         }
4726       else
4727          gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
4728
4729       msq_init = gimple_assign_lhs (new_stmt);
4730     }
4731
4732   /* 4. Create realignment token using a target builtin, if available.
4733       It is done either inside the containing loop, or before LOOP (as
4734       determined above).  */
4735
4736   if (targetm.vectorize.builtin_mask_for_load)
4737     {
4738       tree builtin_decl;
4739
4740       /* Compute INIT_ADDR - the initial addressed accessed by this memref.  */
4741       if (!init_addr)
4742         {
4743           /* Generate the INIT_ADDR computation outside LOOP.  */
4744           init_addr = vect_create_addr_base_for_vector_ref (stmt, &stmts,
4745                                                         NULL_TREE, loop);
4746           if (loop)
4747             {
4748               pe = loop_preheader_edge (loop);
4749               new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
4750               gcc_assert (!new_bb);
4751             }
4752           else
4753              gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
4754         }
4755
4756       builtin_decl = targetm.vectorize.builtin_mask_for_load ();
4757       new_stmt = gimple_build_call (builtin_decl, 1, init_addr);
4758       vec_dest =
4759         vect_create_destination_var (scalar_dest,
4760                                      gimple_call_return_type (new_stmt));
4761       new_temp = make_ssa_name (vec_dest, new_stmt);
4762       gimple_call_set_lhs (new_stmt, new_temp);
4763
4764       if (compute_in_loop)
4765         gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
4766       else
4767         {
4768           /* Generate the misalignment computation outside LOOP.  */
4769           pe = loop_preheader_edge (loop);
4770           new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
4771           gcc_assert (!new_bb);
4772         }
4773
4774       *realignment_token = gimple_call_lhs (new_stmt);
4775
4776       /* The result of the CALL_EXPR to this builtin is determined from
4777          the value of the parameter and no global variables are touched
4778          which makes the builtin a "const" function.  Requiring the
4779          builtin to have the "const" attribute makes it unnecessary
4780          to call mark_call_clobbered.  */
4781       gcc_assert (TREE_READONLY (builtin_decl));
4782     }
4783
4784   if (alignment_support_scheme == dr_explicit_realign)
4785     return msq;
4786
4787   gcc_assert (!compute_in_loop);
4788   gcc_assert (alignment_support_scheme == dr_explicit_realign_optimized);
4789
4790
4791   /* 5. Create msq = phi <msq_init, lsq> in loop  */
4792
4793   pe = loop_preheader_edge (containing_loop);
4794   vec_dest = vect_create_destination_var (scalar_dest, vectype);
4795   msq = make_ssa_name (vec_dest, NULL);
4796   phi_stmt = create_phi_node (msq, containing_loop->header);
4797   add_phi_arg (phi_stmt, msq_init, pe, UNKNOWN_LOCATION);
4798
4799   return msq;
4800 }
4801
4802
4803 /* Function vect_grouped_load_supported.
4804
4805    Returns TRUE if even and odd permutations are supported,
4806    and FALSE otherwise.  */
4807
4808 bool
4809 vect_grouped_load_supported (tree vectype, unsigned HOST_WIDE_INT count)
4810 {
4811   enum machine_mode mode = TYPE_MODE (vectype);
4812
4813   /* vect_permute_load_chain requires the group size to be a power of two.  */
4814   if (exact_log2 (count) == -1)
4815     {
4816       if (dump_enabled_p ())
4817         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4818                          "the size of the group of accesses"
4819                          " is not a power of 2\n");
4820       return false;
4821     }
4822
4823   /* Check that the permutation is supported.  */
4824   if (VECTOR_MODE_P (mode))
4825     {
4826       unsigned int i, nelt = GET_MODE_NUNITS (mode);
4827       unsigned char *sel = XALLOCAVEC (unsigned char, nelt);
4828
4829       for (i = 0; i < nelt; i++)
4830         sel[i] = i * 2;
4831       if (can_vec_perm_p (mode, false, sel))
4832         {
4833           for (i = 0; i < nelt; i++)
4834             sel[i] = i * 2 + 1;
4835           if (can_vec_perm_p (mode, false, sel))
4836             return true;
4837         }
4838     }
4839
4840   if (dump_enabled_p ())
4841     dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4842                      "extract even/odd not supported by target\n");
4843   return false;
4844 }
4845
4846 /* Return TRUE if vec_load_lanes is available for COUNT vectors of
4847    type VECTYPE.  */
4848
4849 bool
4850 vect_load_lanes_supported (tree vectype, unsigned HOST_WIDE_INT count)
4851 {
4852   return vect_lanes_optab_supported_p ("vec_load_lanes",
4853                                        vec_load_lanes_optab,
4854                                        vectype, count);
4855 }
4856
4857 /* Function vect_permute_load_chain.
4858
4859    Given a chain of interleaved loads in DR_CHAIN of LENGTH that must be
4860    a power of 2, generate extract_even/odd stmts to reorder the input data
4861    correctly.  Return the final references for loads in RESULT_CHAIN.
4862
4863    E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
4864    The input is 4 vectors each containing 8 elements. We assign a number to each
4865    element, the input sequence is:
4866
4867    1st vec:   0  1  2  3  4  5  6  7
4868    2nd vec:   8  9 10 11 12 13 14 15
4869    3rd vec:  16 17 18 19 20 21 22 23
4870    4th vec:  24 25 26 27 28 29 30 31
4871
4872    The output sequence should be:
4873
4874    1st vec:  0 4  8 12 16 20 24 28
4875    2nd vec:  1 5  9 13 17 21 25 29
4876    3rd vec:  2 6 10 14 18 22 26 30
4877    4th vec:  3 7 11 15 19 23 27 31
4878
4879    i.e., the first output vector should contain the first elements of each
4880    interleaving group, etc.
4881
4882    We use extract_even/odd instructions to create such output.  The input of
4883    each extract_even/odd operation is two vectors
4884    1st vec    2nd vec
4885    0 1 2 3    4 5 6 7
4886
4887    and the output is the vector of extracted even/odd elements.  The output of
4888    extract_even will be:   0 2 4 6
4889    and of extract_odd:     1 3 5 7
4890
4891
4892    The permutation is done in log LENGTH stages.  In each stage extract_even
4893    and extract_odd stmts are created for each pair of vectors in DR_CHAIN in
4894    their order.  In our example,
4895
4896    E1: extract_even (1st vec, 2nd vec)
4897    E2: extract_odd (1st vec, 2nd vec)
4898    E3: extract_even (3rd vec, 4th vec)
4899    E4: extract_odd (3rd vec, 4th vec)
4900
4901    The output for the first stage will be:
4902
4903    E1:  0  2  4  6  8 10 12 14
4904    E2:  1  3  5  7  9 11 13 15
4905    E3: 16 18 20 22 24 26 28 30
4906    E4: 17 19 21 23 25 27 29 31
4907
4908    In order to proceed and create the correct sequence for the next stage (or
4909    for the correct output, if the second stage is the last one, as in our
4910    example), we first put the output of extract_even operation and then the
4911    output of extract_odd in RESULT_CHAIN (which is then copied to DR_CHAIN).
4912    The input for the second stage is:
4913
4914    1st vec (E1):  0  2  4  6  8 10 12 14
4915    2nd vec (E3): 16 18 20 22 24 26 28 30
4916    3rd vec (E2):  1  3  5  7  9 11 13 15
4917    4th vec (E4): 17 19 21 23 25 27 29 31
4918
4919    The output of the second stage:
4920
4921    E1: 0 4  8 12 16 20 24 28
4922    E2: 2 6 10 14 18 22 26 30
4923    E3: 1 5  9 13 17 21 25 29
4924    E4: 3 7 11 15 19 23 27 31
4925
4926    And RESULT_CHAIN after reordering:
4927
4928    1st vec (E1):  0 4  8 12 16 20 24 28
4929    2nd vec (E3):  1 5  9 13 17 21 25 29
4930    3rd vec (E2):  2 6 10 14 18 22 26 30
4931    4th vec (E4):  3 7 11 15 19 23 27 31.  */
4932
4933 static void
4934 vect_permute_load_chain (vec<tree> dr_chain,
4935                          unsigned int length,
4936                          gimple stmt,
4937                          gimple_stmt_iterator *gsi,
4938                          vec<tree> *result_chain)
4939 {
4940   tree data_ref, first_vect, second_vect;
4941   tree perm_mask_even, perm_mask_odd;
4942   gimple perm_stmt;
4943   tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
4944   unsigned int i, j, log_length = exact_log2 (length);
4945   unsigned nelt = TYPE_VECTOR_SUBPARTS (vectype);
4946   unsigned char *sel = XALLOCAVEC (unsigned char, nelt);
4947
4948   result_chain->quick_grow (length);
4949   memcpy (result_chain->address (), dr_chain.address (),
4950           length * sizeof (tree));
4951
4952   for (i = 0; i < nelt; ++i)
4953     sel[i] = i * 2;
4954   perm_mask_even = vect_gen_perm_mask (vectype, sel);
4955   gcc_assert (perm_mask_even != NULL);
4956
4957   for (i = 0; i < nelt; ++i)
4958     sel[i] = i * 2 + 1;
4959   perm_mask_odd = vect_gen_perm_mask (vectype, sel);
4960   gcc_assert (perm_mask_odd != NULL);
4961
4962   for (i = 0; i < log_length; i++)
4963     {
4964       for (j = 0; j < length; j += 2)
4965         {
4966           first_vect = dr_chain[j];
4967           second_vect = dr_chain[j+1];
4968
4969           /* data_ref = permute_even (first_data_ref, second_data_ref);  */
4970           data_ref = make_temp_ssa_name (vectype, NULL, "vect_perm_even");
4971           perm_stmt = gimple_build_assign_with_ops (VEC_PERM_EXPR, data_ref,
4972                                                     first_vect, second_vect,
4973                                                     perm_mask_even);
4974           vect_finish_stmt_generation (stmt, perm_stmt, gsi);
4975           (*result_chain)[j/2] = data_ref;
4976
4977           /* data_ref = permute_odd (first_data_ref, second_data_ref);  */
4978           data_ref = make_temp_ssa_name (vectype, NULL, "vect_perm_odd");
4979           perm_stmt = gimple_build_assign_with_ops (VEC_PERM_EXPR, data_ref,
4980                                                     first_vect, second_vect,
4981                                                     perm_mask_odd);
4982           vect_finish_stmt_generation (stmt, perm_stmt, gsi);
4983           (*result_chain)[j/2+length/2] = data_ref;
4984         }
4985       memcpy (dr_chain.address (), result_chain->address (),
4986               length * sizeof (tree));
4987     }
4988 }
4989
4990
4991 /* Function vect_transform_grouped_load.
4992
4993    Given a chain of input interleaved data-refs (in DR_CHAIN), build statements
4994    to perform their permutation and ascribe the result vectorized statements to
4995    the scalar statements.
4996 */
4997
4998 void
4999 vect_transform_grouped_load (gimple stmt, vec<tree> dr_chain, int size,
5000                              gimple_stmt_iterator *gsi)
5001 {
5002   vec<tree> result_chain = vNULL;
5003
5004   /* DR_CHAIN contains input data-refs that are a part of the interleaving.
5005      RESULT_CHAIN is the output of vect_permute_load_chain, it contains permuted
5006      vectors, that are ready for vector computation.  */
5007   result_chain.create (size);
5008   vect_permute_load_chain (dr_chain, size, stmt, gsi, &result_chain);
5009   vect_record_grouped_load_vectors (stmt, result_chain);
5010   result_chain.release ();
5011 }
5012
5013 /* RESULT_CHAIN contains the output of a group of grouped loads that were
5014    generated as part of the vectorization of STMT.  Assign the statement
5015    for each vector to the associated scalar statement.  */
5016
5017 void
5018 vect_record_grouped_load_vectors (gimple stmt, vec<tree> result_chain)
5019 {
5020   gimple first_stmt = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt));
5021   gimple next_stmt, new_stmt;
5022   unsigned int i, gap_count;
5023   tree tmp_data_ref;
5024
5025   /* Put a permuted data-ref in the VECTORIZED_STMT field.
5026      Since we scan the chain starting from it's first node, their order
5027      corresponds the order of data-refs in RESULT_CHAIN.  */
5028   next_stmt = first_stmt;
5029   gap_count = 1;
5030   FOR_EACH_VEC_ELT (result_chain, i, tmp_data_ref)
5031     {
5032       if (!next_stmt)
5033         break;
5034
5035       /* Skip the gaps.  Loads created for the gaps will be removed by dead
5036        code elimination pass later.  No need to check for the first stmt in
5037        the group, since it always exists.
5038        GROUP_GAP is the number of steps in elements from the previous
5039        access (if there is no gap GROUP_GAP is 1).  We skip loads that
5040        correspond to the gaps.  */
5041       if (next_stmt != first_stmt
5042           && gap_count < GROUP_GAP (vinfo_for_stmt (next_stmt)))
5043       {
5044         gap_count++;
5045         continue;
5046       }
5047
5048       while (next_stmt)
5049         {
5050           new_stmt = SSA_NAME_DEF_STMT (tmp_data_ref);
5051           /* We assume that if VEC_STMT is not NULL, this is a case of multiple
5052              copies, and we put the new vector statement in the first available
5053              RELATED_STMT.  */
5054           if (!STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt)))
5055             STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt)) = new_stmt;
5056           else
5057             {
5058               if (!GROUP_SAME_DR_STMT (vinfo_for_stmt (next_stmt)))
5059                 {
5060                   gimple prev_stmt =
5061                     STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt));
5062                   gimple rel_stmt =
5063                     STMT_VINFO_RELATED_STMT (vinfo_for_stmt (prev_stmt));
5064                   while (rel_stmt)
5065                     {
5066                       prev_stmt = rel_stmt;
5067                       rel_stmt =
5068                         STMT_VINFO_RELATED_STMT (vinfo_for_stmt (rel_stmt));
5069                     }
5070
5071                   STMT_VINFO_RELATED_STMT (vinfo_for_stmt (prev_stmt)) =
5072                     new_stmt;
5073                 }
5074             }
5075
5076           next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
5077           gap_count = 1;
5078           /* If NEXT_STMT accesses the same DR as the previous statement,
5079              put the same TMP_DATA_REF as its vectorized statement; otherwise
5080              get the next data-ref from RESULT_CHAIN.  */
5081           if (!next_stmt || !GROUP_SAME_DR_STMT (vinfo_for_stmt (next_stmt)))
5082             break;
5083         }
5084     }
5085 }
5086
5087 /* Function vect_force_dr_alignment_p.
5088
5089    Returns whether the alignment of a DECL can be forced to be aligned
5090    on ALIGNMENT bit boundary.  */
5091
5092 bool
5093 vect_can_force_dr_alignment_p (const_tree decl, unsigned int alignment)
5094 {
5095   if (TREE_CODE (decl) != VAR_DECL)
5096     return false;
5097
5098   /* We cannot change alignment of common or external symbols as another
5099      translation unit may contain a definition with lower alignment.
5100      The rules of common symbol linking mean that the definition
5101      will override the common symbol.  The same is true for constant
5102      pool entries which may be shared and are not properly merged
5103      by LTO.  */
5104   if (DECL_EXTERNAL (decl)
5105       || DECL_COMMON (decl)
5106       || DECL_IN_CONSTANT_POOL (decl))
5107     return false;
5108
5109   if (TREE_ASM_WRITTEN (decl))
5110     return false;
5111
5112   /* Do not override the alignment as specified by the ABI when the used
5113      attribute is set.  */
5114   if (DECL_PRESERVE_P (decl))
5115     return false;
5116
5117   /* Do not override explicit alignment set by the user when an explicit
5118      section name is also used.  This is a common idiom used by many
5119      software projects.  */
5120   if (DECL_SECTION_NAME (decl) != NULL_TREE
5121       && !DECL_HAS_IMPLICIT_SECTION_NAME_P (decl))
5122     return false;
5123
5124   if (TREE_STATIC (decl))
5125     return (alignment <= MAX_OFILE_ALIGNMENT);
5126   else
5127     return (alignment <= MAX_STACK_ALIGNMENT);
5128 }
5129
5130
5131 /* Return whether the data reference DR is supported with respect to its
5132    alignment.
5133    If CHECK_ALIGNED_ACCESSES is TRUE, check if the access is supported even
5134    it is aligned, i.e., check if it is possible to vectorize it with different
5135    alignment.  */
5136
5137 enum dr_alignment_support
5138 vect_supportable_dr_alignment (struct data_reference *dr,
5139                                bool check_aligned_accesses)
5140 {
5141   gimple stmt = DR_STMT (dr);
5142   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5143   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5144   enum machine_mode mode = TYPE_MODE (vectype);
5145   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5146   struct loop *vect_loop = NULL;
5147   bool nested_in_vect_loop = false;
5148
5149   if (aligned_access_p (dr) && !check_aligned_accesses)
5150     return dr_aligned;
5151
5152   /* For now assume all conditional loads/stores support unaligned
5153      access without any special code.  */
5154   if (is_gimple_call (stmt)
5155       && gimple_call_internal_p (stmt)
5156       && (gimple_call_internal_fn (stmt) == IFN_MASK_LOAD
5157           || gimple_call_internal_fn (stmt) == IFN_MASK_STORE))
5158     return dr_unaligned_supported;
5159
5160   if (loop_vinfo)
5161     {
5162       vect_loop = LOOP_VINFO_LOOP (loop_vinfo);
5163       nested_in_vect_loop = nested_in_vect_loop_p (vect_loop, stmt);
5164     }
5165
5166   /* Possibly unaligned access.  */
5167
5168   /* We can choose between using the implicit realignment scheme (generating
5169      a misaligned_move stmt) and the explicit realignment scheme (generating
5170      aligned loads with a REALIGN_LOAD).  There are two variants to the
5171      explicit realignment scheme: optimized, and unoptimized.
5172      We can optimize the realignment only if the step between consecutive
5173      vector loads is equal to the vector size.  Since the vector memory
5174      accesses advance in steps of VS (Vector Size) in the vectorized loop, it
5175      is guaranteed that the misalignment amount remains the same throughout the
5176      execution of the vectorized loop.  Therefore, we can create the
5177      "realignment token" (the permutation mask that is passed to REALIGN_LOAD)
5178      at the loop preheader.
5179
5180      However, in the case of outer-loop vectorization, when vectorizing a
5181      memory access in the inner-loop nested within the LOOP that is now being
5182      vectorized, while it is guaranteed that the misalignment of the
5183      vectorized memory access will remain the same in different outer-loop
5184      iterations, it is *not* guaranteed that is will remain the same throughout
5185      the execution of the inner-loop.  This is because the inner-loop advances
5186      with the original scalar step (and not in steps of VS).  If the inner-loop
5187      step happens to be a multiple of VS, then the misalignment remains fixed
5188      and we can use the optimized realignment scheme.  For example:
5189
5190       for (i=0; i<N; i++)
5191         for (j=0; j<M; j++)
5192           s += a[i+j];
5193
5194      When vectorizing the i-loop in the above example, the step between
5195      consecutive vector loads is 1, and so the misalignment does not remain
5196      fixed across the execution of the inner-loop, and the realignment cannot
5197      be optimized (as illustrated in the following pseudo vectorized loop):
5198
5199       for (i=0; i<N; i+=4)
5200         for (j=0; j<M; j++){
5201           vs += vp[i+j]; // misalignment of &vp[i+j] is {0,1,2,3,0,1,2,3,...}
5202                          // when j is {0,1,2,3,4,5,6,7,...} respectively.
5203                          // (assuming that we start from an aligned address).
5204           }
5205
5206      We therefore have to use the unoptimized realignment scheme:
5207
5208       for (i=0; i<N; i+=4)
5209           for (j=k; j<M; j+=4)
5210           vs += vp[i+j]; // misalignment of &vp[i+j] is always k (assuming
5211                            // that the misalignment of the initial address is
5212                            // 0).
5213
5214      The loop can then be vectorized as follows:
5215
5216       for (k=0; k<4; k++){
5217         rt = get_realignment_token (&vp[k]);
5218         for (i=0; i<N; i+=4){
5219           v1 = vp[i+k];
5220           for (j=k; j<M; j+=4){
5221             v2 = vp[i+j+VS-1];
5222             va = REALIGN_LOAD <v1,v2,rt>;
5223             vs += va;
5224             v1 = v2;
5225           }
5226         }
5227     } */
5228
5229   if (DR_IS_READ (dr))
5230     {
5231       bool is_packed = false;
5232       tree type = (TREE_TYPE (DR_REF (dr)));
5233
5234       if (optab_handler (vec_realign_load_optab, mode) != CODE_FOR_nothing
5235           && (!targetm.vectorize.builtin_mask_for_load
5236               || targetm.vectorize.builtin_mask_for_load ()))
5237         {
5238           tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5239           if ((nested_in_vect_loop
5240                && (TREE_INT_CST_LOW (DR_STEP (dr))
5241                    != GET_MODE_SIZE (TYPE_MODE (vectype))))
5242               || !loop_vinfo)
5243             return dr_explicit_realign;
5244           else
5245             return dr_explicit_realign_optimized;
5246         }
5247       if (!known_alignment_for_access_p (dr))
5248         is_packed = not_size_aligned (DR_REF (dr));
5249
5250       if ((TYPE_USER_ALIGN (type) && !is_packed)
5251           || targetm.vectorize.
5252                support_vector_misalignment (mode, type,
5253                                             DR_MISALIGNMENT (dr), is_packed))
5254         /* Can't software pipeline the loads, but can at least do them.  */
5255         return dr_unaligned_supported;
5256     }
5257   else
5258     {
5259       bool is_packed = false;
5260       tree type = (TREE_TYPE (DR_REF (dr)));
5261
5262       if (!known_alignment_for_access_p (dr))
5263         is_packed = not_size_aligned (DR_REF (dr));
5264
5265      if ((TYPE_USER_ALIGN (type) && !is_packed)
5266          || targetm.vectorize.
5267               support_vector_misalignment (mode, type,
5268                                            DR_MISALIGNMENT (dr), is_packed))
5269        return dr_unaligned_supported;
5270     }
5271
5272   /* Unsupported.  */
5273   return dr_unaligned_unsupported;
5274 }