gcc/tree-vect-data-refs.c

   1 /* Data References Analysis and Manipulation Utilities for Vectorization.
   2    Copyright (C) 2003-2013 Free Software Foundation, Inc.
   3    Contributed by Dorit Naishlos <dorit@il.ibm.com>
   4    and Ira Rosen <irar@il.ibm.com>
   5
   6 This file is part of GCC.
   7
   8 GCC is free software; you can redistribute it and/or modify it under
   9 the terms of the GNU General Public License as published by the Free
  10 Software Foundation; either version 3, or (at your option) any later
  11 version.
  12
  13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  15 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  16 for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with GCC; see the file COPYING3.  If not see
  20 <http://www.gnu.org/licenses/>.  */
  21
  22 #include "config.h"
  23 #include "system.h"
  24 #include "coretypes.h"
  25 #include "dumpfile.h"
  26 #include "tm.h"
  27 #include "ggc.h"
  28 #include "tree.h"
  29 #include "tm_p.h"
  30 #include "target.h"
  31 #include "basic-block.h"
  32 #include "gimple-pretty-print.h"
  33 #include "tree-flow.h"
  34 #include "dumpfile.h"
  35 #include "cfgloop.h"
  36 #include "tree-chrec.h"
  37 #include "tree-scalar-evolution.h"
  38 #include "tree-vectorizer.h"
  39 #include "diagnostic-core.h"
  40
  41 /* Need to include rtl.h, expr.h, etc. for optabs.  */
  42 #include "expr.h"
  43 #include "optabs.h"
  44
  45 /* Return true if load- or store-lanes optab OPTAB is implemented for
  46    COUNT vectors of type VECTYPE.  NAME is the name of OPTAB.  */
  47
  48 static bool
  49 vect_lanes_optab_supported_p (const char *name, convert_optab optab,
  50                               tree vectype, unsigned HOST_WIDE_INT count)
  51 {
  52   enum machine_mode mode, array_mode;
  53   bool limit_p;
  54
  55   mode = TYPE_MODE (vectype);
  56   limit_p = !targetm.array_mode_supported_p (mode, count);
  57   array_mode = mode_for_size (count * GET_MODE_BITSIZE (mode),
  58                               MODE_INT, limit_p);
  59
  60   if (array_mode == BLKmode)
  61     {
  62       if (dump_enabled_p ())
  63         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
  64                          "no array mode for %s[" HOST_WIDE_INT_PRINT_DEC "]",
  65                          GET_MODE_NAME (mode), count);
  66       return false;
  67     }
  68
  69   if (convert_optab_handler (optab, array_mode, mode) == CODE_FOR_nothing)
  70     {
  71       if (dump_enabled_p ())
  72         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
  73                          "cannot use %s<%s><%s>", name,
  74                          GET_MODE_NAME (array_mode), GET_MODE_NAME (mode));
  75       return false;
  76     }
  77
  78   if (dump_enabled_p ())
  79     dump_printf_loc (MSG_NOTE, vect_location,
  80                      "can use %s<%s><%s>", name, GET_MODE_NAME (array_mode),
  81                      GET_MODE_NAME (mode));
  82
  83   return true;
  84 }
  85
  86
  87 /* Return the smallest scalar part of STMT.
  88    This is used to determine the vectype of the stmt.  We generally set the
  89    vectype according to the type of the result (lhs).  For stmts whose
  90    result-type is different than the type of the arguments (e.g., demotion,
  91    promotion), vectype will be reset appropriately (later).  Note that we have
  92    to visit the smallest datatype in this function, because that determines the
  93    VF.  If the smallest datatype in the loop is present only as the rhs of a
  94    promotion operation - we'd miss it.
  95    Such a case, where a variable of this datatype does not appear in the lhs
  96    anywhere in the loop, can only occur if it's an invariant: e.g.:
  97    'int_x = (int) short_inv', which we'd expect to have been optimized away by
  98    invariant motion.  However, we cannot rely on invariant motion to always
  99    take invariants out of the loop, and so in the case of promotion we also
 100    have to check the rhs.
 101    LHS_SIZE_UNIT and RHS_SIZE_UNIT contain the sizes of the corresponding
 102    types.  */
 103
 104 tree
 105 vect_get_smallest_scalar_type (gimple stmt, HOST_WIDE_INT *lhs_size_unit,
 106                                HOST_WIDE_INT *rhs_size_unit)
 107 {
 108   tree scalar_type = gimple_expr_type (stmt);
 109   HOST_WIDE_INT lhs, rhs;
 110
 111   lhs = rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type));
 112
 113   if (is_gimple_assign (stmt)
 114       && (gimple_assign_cast_p (stmt)
 115           || gimple_assign_rhs_code (stmt) == WIDEN_MULT_EXPR
 116           || gimple_assign_rhs_code (stmt) == WIDEN_LSHIFT_EXPR
 117           || gimple_assign_rhs_code (stmt) == FLOAT_EXPR))
 118     {
 119       tree rhs_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
 120
 121       rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (rhs_type));
 122       if (rhs < lhs)
 123         scalar_type = rhs_type;
 124     }
 125
 126   *lhs_size_unit = lhs;
 127   *rhs_size_unit = rhs;
 128   return scalar_type;
 129 }
 130
 131
 132 /* Find the place of the data-ref in STMT in the interleaving chain that starts
 133    from FIRST_STMT.  Return -1 if the data-ref is not a part of the chain.  */
 134
 135 int
 136 vect_get_place_in_interleaving_chain (gimple stmt, gimple first_stmt)
 137 {
 138   gimple next_stmt = first_stmt;
 139   int result = 0;
 140
 141   if (first_stmt != GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
 142     return -1;
 143
 144   while (next_stmt && next_stmt != stmt)
 145     {
 146       result++;
 147       next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
 148     }
 149
 150   if (next_stmt)
 151     return result;
 152   else
 153     return -1;
 154 }
 155
 156
 157 /* Check if data references pointed by DR_I and DR_J are same or
 158    belong to same interleaving group.  Return FALSE if drs are
 159    different, otherwise return TRUE.  */
 160
 161 static bool
 162 vect_same_range_drs (data_reference_p dr_i, data_reference_p dr_j)
 163 {
 164   gimple stmt_i = DR_STMT (dr_i);
 165   gimple stmt_j = DR_STMT (dr_j);
 166
 167   if (operand_equal_p (DR_REF (dr_i), DR_REF (dr_j), 0)
 168       || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt_i))
 169             && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt_j))
 170             && (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt_i))
 171                 == GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt_j)))))
 172     return true;
 173   else
 174     return false;
 175 }
 176
 177 /* If address ranges represented by DDR_I and DDR_J are equal,
 178    return TRUE, otherwise return FALSE.  */
 179
 180 static bool
 181 vect_vfa_range_equal (ddr_p ddr_i, ddr_p ddr_j)
 182 {
 183   if ((vect_same_range_drs (DDR_A (ddr_i), DDR_A (ddr_j))
 184        && vect_same_range_drs (DDR_B (ddr_i), DDR_B (ddr_j)))
 185       || (vect_same_range_drs (DDR_A (ddr_i), DDR_B (ddr_j))
 186           && vect_same_range_drs (DDR_B (ddr_i), DDR_A (ddr_j))))
 187     return true;
 188   else
 189     return false;
 190 }
 191
 192 /* Insert DDR into LOOP_VINFO list of ddrs that may alias and need to be
 193    tested at run-time.  Return TRUE if DDR was successfully inserted.
 194    Return false if versioning is not supported.  */
 195
 196 static bool
 197 vect_mark_for_runtime_alias_test (ddr_p ddr, loop_vec_info loop_vinfo)
 198 {
 199   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
 200
 201   if ((unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS) == 0)
 202     return false;
 203
 204   if (dump_enabled_p ())
 205     {
 206       dump_printf_loc (MSG_NOTE, vect_location,
 207                        "mark for run-time aliasing test between ");
 208       dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (DDR_A (ddr)));
 209       dump_printf (MSG_NOTE,  " and ");
 210       dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (DDR_B (ddr)));
 211     }
 212
 213   if (optimize_loop_nest_for_size_p (loop))
 214     {
 215       if (dump_enabled_p ())
 216         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 217                          "versioning not supported when optimizing for size.");
 218       return false;
 219     }
 220
 221   /* FORNOW: We don't support versioning with outer-loop vectorization.  */
 222   if (loop->inner)
 223     {
 224       if (dump_enabled_p ())
 225         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 226                          "versioning not yet supported for outer-loops.");
 227       return false;
 228     }
 229
 230   /* FORNOW: We don't support creating runtime alias tests for non-constant
 231      step.  */
 232   if (TREE_CODE (DR_STEP (DDR_A (ddr))) != INTEGER_CST
 233       || TREE_CODE (DR_STEP (DDR_B (ddr))) != INTEGER_CST)
 234     {
 235       if (dump_enabled_p ())
 236         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 237                          "versioning not yet supported for non-constant "
 238                          "step");
 239       return false;
 240     }
 241
 242   LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo).safe_push (ddr);
 243   return true;
 244 }
 245
 246
 247 /* Function vect_analyze_data_ref_dependence.
 248
 249    Return TRUE if there (might) exist a dependence between a memory-reference
 250    DRA and a memory-reference DRB.  When versioning for alias may check a
 251    dependence at run-time, return FALSE.  Adjust *MAX_VF according to
 252    the data dependence.  */
 253
 254 static bool
 255 vect_analyze_data_ref_dependence (struct data_dependence_relation *ddr,
 256                                   loop_vec_info loop_vinfo, int *max_vf)
 257 {
 258   unsigned int i;
 259   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
 260   struct data_reference *dra = DDR_A (ddr);
 261   struct data_reference *drb = DDR_B (ddr);
 262   stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
 263   stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
 264   lambda_vector dist_v;
 265   unsigned int loop_depth;
 266
 267   /* In loop analysis all data references should be vectorizable.  */
 268   if (!STMT_VINFO_VECTORIZABLE (stmtinfo_a)
 269       || !STMT_VINFO_VECTORIZABLE (stmtinfo_b))
 270     gcc_unreachable ();
 271
 272   /* Independent data accesses.  */
 273   if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
 274     return false;
 275
 276   if (dra == drb
 277       || (DR_IS_READ (dra) && DR_IS_READ (drb)))
 278     return false;
 279
 280   /* Unknown data dependence.  */
 281   if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
 282     {
 283       if (dump_enabled_p ())
 284         {
 285           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 286                            "versioning for alias required: "
 287                            "can't determine dependence between ");
 288           dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
 289                              DR_REF (dra));
 290           dump_printf (MSG_MISSED_OPTIMIZATION, " and ");
 291           dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
 292                              DR_REF (drb));
 293         }
 294
 295       /* Add to list of ddrs that need to be tested at run-time.  */
 296       return !vect_mark_for_runtime_alias_test (ddr, loop_vinfo);
 297     }
 298
 299   /* Known data dependence.  */
 300   if (DDR_NUM_DIST_VECTS (ddr) == 0)
 301     {
 302       if (dump_enabled_p ())
 303         {
 304           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 305                            "versioning for alias required: "
 306                            "bad dist vector for ");
 307           dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, DR_REF (dra));
 308           dump_printf (MSG_MISSED_OPTIMIZATION,  " and ");
 309           dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, DR_REF (drb));
 310         }
 311       /* Add to list of ddrs that need to be tested at run-time.  */
 312       return !vect_mark_for_runtime_alias_test (ddr, loop_vinfo);
 313     }
 314
 315   loop_depth = index_in_loop_nest (loop->num, DDR_LOOP_NEST (ddr));
 316   FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
 317     {
 318       int dist = dist_v[loop_depth];
 319
 320       if (dump_enabled_p ())
 321         dump_printf_loc (MSG_NOTE, vect_location,
 322                          "dependence distance  = %d.", dist);
 323
 324       if (dist == 0)
 325         {
 326           if (dump_enabled_p ())
 327             {
 328               dump_printf_loc (MSG_NOTE, vect_location,
 329                                "dependence distance == 0 between ");
 330               dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dra));
 331               dump_printf (MSG_NOTE, " and ");
 332               dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (drb));
 333             }
 334
 335           /* For interleaving, mark that there is a read-write dependency if
 336              necessary.  We check before that one of the data-refs is store.  */
 337           if (DR_IS_READ (dra))
 338             GROUP_READ_WRITE_DEPENDENCE (stmtinfo_a) = true;
 339           else
 340             {
 341               if (DR_IS_READ (drb))
 342                 GROUP_READ_WRITE_DEPENDENCE (stmtinfo_b) = true;
 343             }
 344
 345           continue;
 346         }
 347
 348       if (dist > 0 && DDR_REVERSED_P (ddr))
 349         {
 350           /* If DDR_REVERSED_P the order of the data-refs in DDR was
 351              reversed (to make distance vector positive), and the actual
 352              distance is negative.  */
 353           if (dump_enabled_p ())
 354             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 355                              "dependence distance negative.");
 356           continue;
 357         }
 358
 359       if (abs (dist) >= 2
 360           && abs (dist) < *max_vf)
 361         {
 362           /* The dependence distance requires reduction of the maximal
 363              vectorization factor.  */
 364           *max_vf = abs (dist);
 365           if (dump_enabled_p ())
 366             dump_printf_loc (MSG_NOTE, vect_location,
 367                              "adjusting maximal vectorization factor to %i",
 368                              *max_vf);
 369         }
 370
 371       if (abs (dist) >= *max_vf)
 372         {
 373           /* Dependence distance does not create dependence, as far as
 374              vectorization is concerned, in this case.  */
 375           if (dump_enabled_p ())
 376             dump_printf_loc (MSG_NOTE, vect_location,
 377                              "dependence distance >= VF.");
 378           continue;
 379         }
 380
 381       if (dump_enabled_p ())
 382         {
 383           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 384                        "not vectorized, possible dependence "
 385                        "between data-refs ");
 386           dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dra));
 387           dump_printf (MSG_NOTE,  " and ");
 388           dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (drb));
 389         }
 390
 391       return true;
 392     }
 393
 394   return false;
 395 }
 396
 397 /* Function vect_analyze_data_ref_dependences.
 398
 399    Examine all the data references in the loop, and make sure there do not
 400    exist any data dependences between them.  Set *MAX_VF according to
 401    the maximum vectorization factor the data dependences allow.  */
 402
 403 bool
 404 vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo, int *max_vf)
 405 {
 406   unsigned int i;
 407   struct data_dependence_relation *ddr;
 408
 409   if (dump_enabled_p ())
 410     dump_printf_loc (MSG_NOTE, vect_location,
 411                      "=== vect_analyze_data_ref_dependences ===");
 412
 413   if (!compute_all_dependences (LOOP_VINFO_DATAREFS (loop_vinfo),
 414                                 &LOOP_VINFO_DDRS (loop_vinfo),
 415                                 LOOP_VINFO_LOOP_NEST (loop_vinfo), true))
 416     return false;
 417
 418   FOR_EACH_VEC_ELT (LOOP_VINFO_DDRS (loop_vinfo), i, ddr)
 419     if (vect_analyze_data_ref_dependence (ddr, loop_vinfo, max_vf))
 420       return false;
 421
 422   return true;
 423 }
 424
 425
 426 /* Function vect_slp_analyze_data_ref_dependence.
 427
 428    Return TRUE if there (might) exist a dependence between a memory-reference
 429    DRA and a memory-reference DRB.  When versioning for alias may check a
 430    dependence at run-time, return FALSE.  Adjust *MAX_VF according to
 431    the data dependence.  */
 432
 433 static bool
 434 vect_slp_analyze_data_ref_dependence (struct data_dependence_relation *ddr)
 435 {
 436   struct data_reference *dra = DDR_A (ddr);
 437   struct data_reference *drb = DDR_B (ddr);
 438
 439   /* We need to check dependences of statements marked as unvectorizable
 440      as well, they still can prohibit vectorization.  */
 441
 442   /* Independent data accesses.  */
 443   if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
 444     return false;
 445
 446   if (dra == drb)
 447     return false;
 448
 449   /* Read-read is OK.  */
 450   if (DR_IS_READ (dra) && DR_IS_READ (drb))
 451     return false;
 452
 453   /* Unknown data dependence.  */
 454   if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
 455     {
 456       gimple earlier_stmt;
 457
 458       if (dump_enabled_p ())
 459         {
 460           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 461                            "can't determine dependence between ");
 462           dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, DR_REF (dra));
 463           dump_printf (MSG_MISSED_OPTIMIZATION,  " and ");
 464           dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, DR_REF (drb));
 465         }
 466
 467       /* We do not vectorize basic blocks with write-write dependencies.  */
 468       if (DR_IS_WRITE (dra) && DR_IS_WRITE (drb))
 469         return true;
 470
 471       /* Check that it's not a load-after-store dependence.  */
 472       earlier_stmt = get_earlier_stmt (DR_STMT (dra), DR_STMT (drb));
 473       if (DR_IS_WRITE (STMT_VINFO_DATA_REF (vinfo_for_stmt (earlier_stmt))))
 474         return true;
 475
 476       return false;
 477     }
 478
 479   if (dump_enabled_p ())
 480     {
 481       dump_printf_loc (MSG_NOTE, vect_location,
 482                        "determined dependence between ");
 483       dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dra));
 484       dump_printf (MSG_NOTE, " and ");
 485       dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (drb));
 486     }
 487
 488   /* Do not vectorize basic blocks with write-write dependences.  */
 489   if (DR_IS_WRITE (dra) && DR_IS_WRITE (drb))
 490     return true;
 491
 492   /* Check dependence between DRA and DRB for basic block vectorization.
 493      If the accesses share same bases and offsets, we can compare their initial
 494      constant offsets to decide whether they differ or not.  In case of a read-
 495      write dependence we check that the load is before the store to ensure that
 496      vectorization will not change the order of the accesses.  */
 497
 498   HOST_WIDE_INT type_size_a, type_size_b, init_a, init_b;
 499   gimple earlier_stmt;
 500
 501   /* Check that the data-refs have same bases and offsets.  If not, we can't
 502      determine if they are dependent.  */
 503   if (!operand_equal_p (DR_BASE_ADDRESS (dra), DR_BASE_ADDRESS (drb), 0)
 504       || !dr_equal_offsets_p (dra, drb))
 505     return true;
 506
 507   /* Check the types.  */
 508   type_size_a = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra))));
 509   type_size_b = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb))));
 510
 511   if (type_size_a != type_size_b
 512       || !types_compatible_p (TREE_TYPE (DR_REF (dra)),
 513                               TREE_TYPE (DR_REF (drb))))
 514     return true;
 515
 516   init_a = TREE_INT_CST_LOW (DR_INIT (dra));
 517   init_b = TREE_INT_CST_LOW (DR_INIT (drb));
 518
 519   /* Two different locations - no dependence.  */
 520   if (init_a != init_b)
 521     return false;
 522
 523   /* We have a read-write dependence.  Check that the load is before the store.
 524      When we vectorize basic blocks, vector load can be only before
 525      corresponding scalar load, and vector store can be only after its
 526      corresponding scalar store.  So the order of the acceses is preserved in
 527      case the load is before the store.  */
 528   earlier_stmt = get_earlier_stmt (DR_STMT (dra), DR_STMT (drb));
 529   if (DR_IS_READ (STMT_VINFO_DATA_REF (vinfo_for_stmt (earlier_stmt))))
 530     return false;
 531
 532   return true;
 533 }
 534
 535
 536 /* Function vect_analyze_data_ref_dependences.
 537
 538    Examine all the data references in the basic-block, and make sure there
 539    do not exist any data dependences between them.  Set *MAX_VF according to
 540    the maximum vectorization factor the data dependences allow.  */
 541
 542 bool
 543 vect_slp_analyze_data_ref_dependences (bb_vec_info bb_vinfo)
 544 {
 545   struct data_dependence_relation *ddr;
 546   unsigned int i;
 547
 548   if (dump_enabled_p ())
 549     dump_printf_loc (MSG_NOTE, vect_location,
 550                      "=== vect_slp_analyze_data_ref_dependences ===");
 551
 552   if (!compute_all_dependences (BB_VINFO_DATAREFS (bb_vinfo),
 553                                 &BB_VINFO_DDRS (bb_vinfo),
 554                                 vNULL, true))
 555     return false;
 556
 557   FOR_EACH_VEC_ELT (BB_VINFO_DDRS (bb_vinfo), i, ddr)
 558     if (vect_slp_analyze_data_ref_dependence (ddr))
 559       return false;
 560
 561   return true;
 562 }
 563
 564
 565 /* Function vect_compute_data_ref_alignment
 566
 567    Compute the misalignment of the data reference DR.
 568
 569    Output:
 570    1. If during the misalignment computation it is found that the data reference
 571       cannot be vectorized then false is returned.
 572    2. DR_MISALIGNMENT (DR) is defined.
 573
 574    FOR NOW: No analysis is actually performed. Misalignment is calculated
 575    only for trivial cases. TODO.  */
 576
 577 static bool
 578 vect_compute_data_ref_alignment (struct data_reference *dr)
 579 {
 580   gimple stmt = DR_STMT (dr);
 581   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
 582   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
 583   struct loop *loop = NULL;
 584   tree ref = DR_REF (dr);
 585   tree vectype;
 586   tree base, base_addr;
 587   bool base_aligned;
 588   tree misalign;
 589   tree aligned_to, alignment;
 590
 591   if (dump_enabled_p ())
 592     dump_printf_loc (MSG_NOTE, vect_location,
 593                      "vect_compute_data_ref_alignment:");
 594
 595   if (loop_vinfo)
 596     loop = LOOP_VINFO_LOOP (loop_vinfo);
 597
 598   /* Initialize misalignment to unknown.  */
 599   SET_DR_MISALIGNMENT (dr, -1);
 600
 601   /* Strided loads perform only component accesses, misalignment information
 602      is irrelevant for them.  */
 603   if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
 604     return true;
 605
 606   misalign = DR_INIT (dr);
 607   aligned_to = DR_ALIGNED_TO (dr);
 608   base_addr = DR_BASE_ADDRESS (dr);
 609   vectype = STMT_VINFO_VECTYPE (stmt_info);
 610
 611   /* In case the dataref is in an inner-loop of the loop that is being
 612      vectorized (LOOP), we use the base and misalignment information
 613      relative to the outer-loop (LOOP).  This is ok only if the misalignment
 614      stays the same throughout the execution of the inner-loop, which is why
 615      we have to check that the stride of the dataref in the inner-loop evenly
 616      divides by the vector size.  */
 617   if (loop && nested_in_vect_loop_p (loop, stmt))
 618     {
 619       tree step = DR_STEP (dr);
 620       HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
 621
 622       if (dr_step % GET_MODE_SIZE (TYPE_MODE (vectype)) == 0)
 623         {
 624           if (dump_enabled_p ())
 625             dump_printf_loc (MSG_NOTE, vect_location,
 626                              "inner step divides the vector-size.");
 627           misalign = STMT_VINFO_DR_INIT (stmt_info);
 628           aligned_to = STMT_VINFO_DR_ALIGNED_TO (stmt_info);
 629           base_addr = STMT_VINFO_DR_BASE_ADDRESS (stmt_info);
 630         }
 631       else
 632         {
 633           if (dump_enabled_p ())
 634             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 635                              "inner step doesn't divide the vector-size.");
 636           misalign = NULL_TREE;
 637         }
 638     }
 639
 640   /* Similarly, if we're doing basic-block vectorization, we can only use
 641      base and misalignment information relative to an innermost loop if the
 642      misalignment stays the same throughout the execution of the loop.
 643      As above, this is the case if the stride of the dataref evenly divides
 644      by the vector size.  */
 645   if (!loop)
 646     {
 647       tree step = DR_STEP (dr);
 648       HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
 649
 650       if (dr_step % GET_MODE_SIZE (TYPE_MODE (vectype)) != 0)
 651         {
 652           if (dump_enabled_p ())
 653             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 654                              "SLP: step doesn't divide the vector-size.");
 655           misalign = NULL_TREE;
 656         }
 657     }
 658
 659   base = build_fold_indirect_ref (base_addr);
 660   alignment = ssize_int (TYPE_ALIGN (vectype)/BITS_PER_UNIT);
 661
 662   if ((aligned_to && tree_int_cst_compare (aligned_to, alignment) < 0)
 663       || !misalign)
 664     {
 665       if (dump_enabled_p ())
 666         {
 667           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 668                            "Unknown alignment for access: ");
 669           dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, base);
 670         }
 671       return true;
 672     }
 673
 674   if ((DECL_P (base)
 675        && tree_int_cst_compare (ssize_int (DECL_ALIGN_UNIT (base)),
 676                                 alignment) >= 0)
 677       || (TREE_CODE (base_addr) == SSA_NAME
 678           && tree_int_cst_compare (ssize_int (TYPE_ALIGN_UNIT (TREE_TYPE (
 679                                                       TREE_TYPE (base_addr)))),
 680                                    alignment) >= 0)
 681       || (get_pointer_alignment (base_addr) >= TYPE_ALIGN (vectype)))
 682     base_aligned = true;
 683   else
 684     base_aligned = false;
 685
 686   if (!base_aligned)
 687     {
 688       /* Do not change the alignment of global variables here if
 689          flag_section_anchors is enabled as we already generated
 690          RTL for other functions.  Most global variables should
 691          have been aligned during the IPA increase_alignment pass.  */
 692       if (!vect_can_force_dr_alignment_p (base, TYPE_ALIGN (vectype))
 693           || (TREE_STATIC (base) && flag_section_anchors))
 694         {
 695           if (dump_enabled_p ())
 696             {
 697               dump_printf_loc (MSG_NOTE, vect_location,
 698                                "can't force alignment of ref: ");
 699               dump_generic_expr (MSG_NOTE, TDF_SLIM, ref);
 700             }
 701           return true;
 702         }
 703
 704       /* Force the alignment of the decl.
 705          NOTE: This is the only change to the code we make during
 706          the analysis phase, before deciding to vectorize the loop.  */
 707       if (dump_enabled_p ())
 708         {
 709           dump_printf_loc (MSG_NOTE, vect_location, "force alignment of ");
 710           dump_generic_expr (MSG_NOTE, TDF_SLIM, ref);
 711         }
 712
 713       DECL_ALIGN (base) = TYPE_ALIGN (vectype);
 714       DECL_USER_ALIGN (base) = 1;
 715     }
 716
 717   /* At this point we assume that the base is aligned.  */
 718   gcc_assert (base_aligned
 719               || (TREE_CODE (base) == VAR_DECL
 720                   && DECL_ALIGN (base) >= TYPE_ALIGN (vectype)));
 721
 722   /* If this is a backward running DR then first access in the larger
 723      vectype actually is N-1 elements before the address in the DR.
 724      Adjust misalign accordingly.  */
 725   if (tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0)
 726     {
 727       tree offset = ssize_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
 728       /* DR_STEP(dr) is the same as -TYPE_SIZE of the scalar type,
 729          otherwise we wouldn't be here.  */
 730       offset = fold_build2 (MULT_EXPR, ssizetype, offset, DR_STEP (dr));
 731       /* PLUS because DR_STEP was negative.  */
 732       misalign = size_binop (PLUS_EXPR, misalign, offset);
 733     }
 734
 735   /* Modulo alignment.  */
 736   misalign = size_binop (FLOOR_MOD_EXPR, misalign, alignment);
 737
 738   if (!host_integerp (misalign, 1))
 739     {
 740       /* Negative or overflowed misalignment value.  */
 741       if (dump_enabled_p ())
 742         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 743                          "unexpected misalign value");
 744       return false;
 745     }
 746
 747   SET_DR_MISALIGNMENT (dr, TREE_INT_CST_LOW (misalign));
 748
 749   if (dump_enabled_p ())
 750     {
 751       dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 752                        "misalign = %d bytes of ref ", DR_MISALIGNMENT (dr));
 753       dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, ref);
 754     }
 755
 756   return true;
 757 }
 758
 759
 760 /* Function vect_compute_data_refs_alignment
 761
 762    Compute the misalignment of data references in the loop.
 763    Return FALSE if a data reference is found that cannot be vectorized.  */
 764
 765 static bool
 766 vect_compute_data_refs_alignment (loop_vec_info loop_vinfo,
 767                                   bb_vec_info bb_vinfo)
 768 {
 769   vec<data_reference_p> datarefs;
 770   struct data_reference *dr;
 771   unsigned int i;
 772
 773   if (loop_vinfo)
 774     datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
 775   else
 776     datarefs = BB_VINFO_DATAREFS (bb_vinfo);
 777
 778   FOR_EACH_VEC_ELT (datarefs, i, dr)
 779     if (STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr)))
 780         && !vect_compute_data_ref_alignment (dr))
 781       {
 782         if (bb_vinfo)
 783           {
 784             /* Mark unsupported statement as unvectorizable.  */
 785             STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr))) = false;
 786             continue;
 787           }
 788         else
 789           return false;
 790       }
 791
 792   return true;
 793 }
 794
 795
 796 /* Function vect_update_misalignment_for_peel
 797
 798    DR - the data reference whose misalignment is to be adjusted.
 799    DR_PEEL - the data reference whose misalignment is being made
 800              zero in the vector loop by the peel.
 801    NPEEL - the number of iterations in the peel loop if the misalignment
 802            of DR_PEEL is known at compile time.  */
 803
 804 static void
 805 vect_update_misalignment_for_peel (struct data_reference *dr,
 806                                    struct data_reference *dr_peel, int npeel)
 807 {
 808   unsigned int i;
 809   vec<dr_p> same_align_drs;
 810   struct data_reference *current_dr;
 811   int dr_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr))));
 812   int dr_peel_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr_peel))));
 813   stmt_vec_info stmt_info = vinfo_for_stmt (DR_STMT (dr));
 814   stmt_vec_info peel_stmt_info = vinfo_for_stmt (DR_STMT (dr_peel));
 815
 816  /* For interleaved data accesses the step in the loop must be multiplied by
 817      the size of the interleaving group.  */
 818   if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
 819     dr_size *= GROUP_SIZE (vinfo_for_stmt (GROUP_FIRST_ELEMENT (stmt_info)));
 820   if (STMT_VINFO_GROUPED_ACCESS (peel_stmt_info))
 821     dr_peel_size *= GROUP_SIZE (peel_stmt_info);
 822
 823   /* It can be assumed that the data refs with the same alignment as dr_peel
 824      are aligned in the vector loop.  */
 825   same_align_drs
 826     = STMT_VINFO_SAME_ALIGN_REFS (vinfo_for_stmt (DR_STMT (dr_peel)));
 827   FOR_EACH_VEC_ELT (same_align_drs, i, current_dr)
 828     {
 829       if (current_dr != dr)
 830         continue;
 831       gcc_assert (DR_MISALIGNMENT (dr) / dr_size ==
 832                   DR_MISALIGNMENT (dr_peel) / dr_peel_size);
 833       SET_DR_MISALIGNMENT (dr, 0);
 834       return;
 835     }
 836
 837   if (known_alignment_for_access_p (dr)
 838       && known_alignment_for_access_p (dr_peel))
 839     {
 840       bool negative = tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0;
 841       int misal = DR_MISALIGNMENT (dr);
 842       tree vectype = STMT_VINFO_VECTYPE (stmt_info);
 843       misal += negative ? -npeel * dr_size : npeel * dr_size;
 844       misal &= (TYPE_ALIGN (vectype) / BITS_PER_UNIT) - 1;
 845       SET_DR_MISALIGNMENT (dr, misal);
 846       return;
 847     }
 848
 849   if (dump_enabled_p ())
 850     dump_printf_loc (MSG_NOTE, vect_location, "Setting misalignment to -1.");
 851   SET_DR_MISALIGNMENT (dr, -1);
 852 }
 853
 854
 855 /* Function vect_verify_datarefs_alignment
 856
 857    Return TRUE if all data references in the loop can be
 858    handled with respect to alignment.  */
 859
 860 bool
 861 vect_verify_datarefs_alignment (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
 862 {
 863   vec<data_reference_p> datarefs;
 864   struct data_reference *dr;
 865   enum dr_alignment_support supportable_dr_alignment;
 866   unsigned int i;
 867
 868   if (loop_vinfo)
 869     datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
 870   else
 871     datarefs = BB_VINFO_DATAREFS (bb_vinfo);
 872
 873   FOR_EACH_VEC_ELT (datarefs, i, dr)
 874     {
 875       gimple stmt = DR_STMT (dr);
 876       stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
 877
 878       if (!STMT_VINFO_RELEVANT_P (stmt_info))
 879         continue;
 880
 881       /* For interleaving, only the alignment of the first access matters.
 882          Skip statements marked as not vectorizable.  */
 883       if ((STMT_VINFO_GROUPED_ACCESS (stmt_info)
 884            && GROUP_FIRST_ELEMENT (stmt_info) != stmt)
 885           || !STMT_VINFO_VECTORIZABLE (stmt_info))
 886         continue;
 887
 888       /* Strided loads perform only component accesses, alignment is
 889          irrelevant for them.  */
 890       if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
 891         continue;
 892
 893       supportable_dr_alignment = vect_supportable_dr_alignment (dr, false);
 894       if (!supportable_dr_alignment)
 895         {
 896           if (dump_enabled_p ())
 897             {
 898               if (DR_IS_READ (dr))
 899                 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 900                                  "not vectorized: unsupported unaligned load.");
 901               else
 902                 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 903                                  "not vectorized: unsupported unaligned "
 904                                  "store.");
 905
 906               dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
 907                                  DR_REF (dr));
 908             }
 909           return false;
 910         }
 911       if (supportable_dr_alignment != dr_aligned && dump_enabled_p ())
 912         dump_printf_loc (MSG_NOTE, vect_location,
 913                          "Vectorizing an unaligned access.");
 914     }
 915   return true;
 916 }
 917
 918 /* Given an memory reference EXP return whether its alignment is less
 919    than its size.  */
 920
 921 static bool
 922 not_size_aligned (tree exp)
 923 {
 924   if (!host_integerp (TYPE_SIZE (TREE_TYPE (exp)), 1))
 925     return true;
 926
 927   return (TREE_INT_CST_LOW (TYPE_SIZE (TREE_TYPE (exp)))
 928           > get_object_alignment (exp));
 929 }
 930
 931 /* Function vector_alignment_reachable_p
 932
 933    Return true if vector alignment for DR is reachable by peeling
 934    a few loop iterations.  Return false otherwise.  */
 935
 936 static bool
 937 vector_alignment_reachable_p (struct data_reference *dr)
 938 {
 939   gimple stmt = DR_STMT (dr);
 940   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
 941   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
 942
 943   if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
 944     {
 945       /* For interleaved access we peel only if number of iterations in
 946          the prolog loop ({VF - misalignment}), is a multiple of the
 947          number of the interleaved accesses.  */
 948       int elem_size, mis_in_elements;
 949       int nelements = TYPE_VECTOR_SUBPARTS (vectype);
 950
 951       /* FORNOW: handle only known alignment.  */
 952       if (!known_alignment_for_access_p (dr))
 953         return false;
 954
 955       elem_size = GET_MODE_SIZE (TYPE_MODE (vectype)) / nelements;
 956       mis_in_elements = DR_MISALIGNMENT (dr) / elem_size;
 957
 958       if ((nelements - mis_in_elements) % GROUP_SIZE (stmt_info))
 959         return false;
 960     }
 961
 962   /* If misalignment is known at the compile time then allow peeling
 963      only if natural alignment is reachable through peeling.  */
 964   if (known_alignment_for_access_p (dr) && !aligned_access_p (dr))
 965     {
 966       HOST_WIDE_INT elmsize =
 967                 int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
 968       if (dump_enabled_p ())
 969         {
 970           dump_printf_loc (MSG_NOTE, vect_location,
 971                            "data size =" HOST_WIDE_INT_PRINT_DEC, elmsize);
 972           dump_printf (MSG_NOTE,
 973                        ". misalignment = %d. ", DR_MISALIGNMENT (dr));
 974         }
 975       if (DR_MISALIGNMENT (dr) % elmsize)
 976         {
 977           if (dump_enabled_p ())
 978             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 979                              "data size does not divide the misalignment.\n");
 980           return false;
 981         }
 982     }
 983
 984   if (!known_alignment_for_access_p (dr))
 985     {
 986       tree type = TREE_TYPE (DR_REF (dr));
 987       bool is_packed = not_size_aligned (DR_REF (dr));
 988       if (dump_enabled_p ())
 989         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 990                          "Unknown misalignment, is_packed = %d",is_packed);
 991       if (targetm.vectorize.vector_alignment_reachable (type, is_packed))
 992         return true;
 993       else
 994         return false;
 995     }
 996
 997   return true;
 998 }
 999
1000
1001 /* Calculate the cost of the memory access represented by DR.  */
1002
1003 static void
1004 vect_get_data_access_cost (struct data_reference *dr,
1005                            unsigned int *inside_cost,
1006                            unsigned int *outside_cost,
1007                            stmt_vector_for_cost *body_cost_vec)
1008 {
1009   gimple stmt = DR_STMT (dr);
1010   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1011   int nunits = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
1012   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1013   int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1014   int ncopies = vf / nunits;
1015
1016   if (DR_IS_READ (dr))
1017     vect_get_load_cost (dr, ncopies, true, inside_cost, outside_cost,
1018                         NULL, body_cost_vec, false);
1019   else
1020     vect_get_store_cost (dr, ncopies, inside_cost, body_cost_vec);
1021
1022   if (dump_enabled_p ())
1023     dump_printf_loc (MSG_NOTE, vect_location,
1024                      "vect_get_data_access_cost: inside_cost = %d, "
1025                      "outside_cost = %d.", *inside_cost, *outside_cost);
1026 }
1027
1028
1029 static hashval_t
1030 vect_peeling_hash (const void *elem)
1031 {
1032   const struct _vect_peel_info *peel_info;
1033
1034   peel_info = (const struct _vect_peel_info *) elem;
1035   return (hashval_t) peel_info->npeel;
1036 }
1037
1038
1039 static int
1040 vect_peeling_hash_eq (const void *elem1, const void *elem2)
1041 {
1042   const struct _vect_peel_info *a, *b;
1043
1044   a = (const struct _vect_peel_info *) elem1;
1045   b = (const struct _vect_peel_info *) elem2;
1046   return (a->npeel == b->npeel);
1047 }
1048
1049
1050 /* Insert DR into peeling hash table with NPEEL as key.  */
1051
1052 static void
1053 vect_peeling_hash_insert (loop_vec_info loop_vinfo, struct data_reference *dr,
1054                           int npeel)
1055 {
1056   struct _vect_peel_info elem, *slot;
1057   void **new_slot;
1058   bool supportable_dr_alignment = vect_supportable_dr_alignment (dr, true);
1059
1060   elem.npeel = npeel;
1061   slot = (vect_peel_info) htab_find (LOOP_VINFO_PEELING_HTAB (loop_vinfo),
1062                                      &elem);
1063   if (slot)
1064     slot->count++;
1065   else
1066     {
1067       slot = XNEW (struct _vect_peel_info);
1068       slot->npeel = npeel;
1069       slot->dr = dr;
1070       slot->count = 1;
1071       new_slot = htab_find_slot (LOOP_VINFO_PEELING_HTAB (loop_vinfo), slot,
1072                                  INSERT);
1073       *new_slot = slot;
1074     }
1075
1076   if (!supportable_dr_alignment && !flag_vect_cost_model)
1077     slot->count += VECT_MAX_COST;
1078 }
1079
1080
1081 /* Traverse peeling hash table to find peeling option that aligns maximum
1082    number of data accesses.  */
1083
1084 static int
1085 vect_peeling_hash_get_most_frequent (void **slot, void *data)
1086 {
1087   vect_peel_info elem = (vect_peel_info) *slot;
1088   vect_peel_extended_info max = (vect_peel_extended_info) data;
1089
1090   if (elem->count > max->peel_info.count
1091       || (elem->count == max->peel_info.count
1092           && max->peel_info.npeel > elem->npeel))
1093     {
1094       max->peel_info.npeel = elem->npeel;
1095       max->peel_info.count = elem->count;
1096       max->peel_info.dr = elem->dr;
1097     }
1098
1099   return 1;
1100 }
1101
1102
1103 /* Traverse peeling hash table and calculate cost for each peeling option.
1104    Find the one with the lowest cost.  */
1105
1106 static int
1107 vect_peeling_hash_get_lowest_cost (void **slot, void *data)
1108 {
1109   vect_peel_info elem = (vect_peel_info) *slot;
1110   vect_peel_extended_info min = (vect_peel_extended_info) data;
1111   int save_misalignment, dummy;
1112   unsigned int inside_cost = 0, outside_cost = 0, i;
1113   gimple stmt = DR_STMT (elem->dr);
1114   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1115   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1116   vec<data_reference_p> datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1117   struct data_reference *dr;
1118   stmt_vector_for_cost prologue_cost_vec, body_cost_vec, epilogue_cost_vec;
1119   int single_iter_cost;
1120
1121   prologue_cost_vec.create (2);
1122   body_cost_vec.create (2);
1123   epilogue_cost_vec.create (2);
1124
1125   FOR_EACH_VEC_ELT (datarefs, i, dr)
1126     {
1127       stmt = DR_STMT (dr);
1128       stmt_info = vinfo_for_stmt (stmt);
1129       /* For interleaving, only the alignment of the first access
1130          matters.  */
1131       if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
1132           && GROUP_FIRST_ELEMENT (stmt_info) != stmt)
1133         continue;
1134
1135       save_misalignment = DR_MISALIGNMENT (dr);
1136       vect_update_misalignment_for_peel (dr, elem->dr, elem->npeel);
1137       vect_get_data_access_cost (dr, &inside_cost, &outside_cost,
1138                                  &body_cost_vec);
1139       SET_DR_MISALIGNMENT (dr, save_misalignment);
1140     }
1141
1142   single_iter_cost = vect_get_single_scalar_iteration_cost (loop_vinfo);
1143   outside_cost += vect_get_known_peeling_cost (loop_vinfo, elem->npeel,
1144                                                &dummy, single_iter_cost,
1145                                                &prologue_cost_vec,
1146                                                &epilogue_cost_vec);
1147
1148   /* Prologue and epilogue costs are added to the target model later.
1149      These costs depend only on the scalar iteration cost, the
1150      number of peeling iterations finally chosen, and the number of
1151      misaligned statements.  So discard the information found here.  */
1152   prologue_cost_vec.release ();
1153   epilogue_cost_vec.release ();
1154
1155   if (inside_cost < min->inside_cost
1156       || (inside_cost == min->inside_cost && outside_cost < min->outside_cost))
1157     {
1158       min->inside_cost = inside_cost;
1159       min->outside_cost = outside_cost;
1160       min->body_cost_vec.release ();
1161       min->body_cost_vec = body_cost_vec;
1162       min->peel_info.dr = elem->dr;
1163       min->peel_info.npeel = elem->npeel;
1164     }
1165   else
1166     body_cost_vec.release ();
1167
1168   return 1;
1169 }
1170
1171
1172 /* Choose best peeling option by traversing peeling hash table and either
1173    choosing an option with the lowest cost (if cost model is enabled) or the
1174    option that aligns as many accesses as possible.  */
1175
1176 static struct data_reference *
1177 vect_peeling_hash_choose_best_peeling (loop_vec_info loop_vinfo,
1178                                        unsigned int *npeel,
1179                                        stmt_vector_for_cost *body_cost_vec)
1180 {
1181    struct _vect_peel_extended_info res;
1182
1183    res.peel_info.dr = NULL;
1184    res.body_cost_vec = stmt_vector_for_cost();
1185
1186    if (flag_vect_cost_model)
1187      {
1188        res.inside_cost = INT_MAX;
1189        res.outside_cost = INT_MAX;
1190        htab_traverse (LOOP_VINFO_PEELING_HTAB (loop_vinfo),
1191                       vect_peeling_hash_get_lowest_cost, &res);
1192      }
1193    else
1194      {
1195        res.peel_info.count = 0;
1196        htab_traverse (LOOP_VINFO_PEELING_HTAB (loop_vinfo),
1197                       vect_peeling_hash_get_most_frequent, &res);
1198      }
1199
1200    *npeel = res.peel_info.npeel;
1201    *body_cost_vec = res.body_cost_vec;
1202    return res.peel_info.dr;
1203 }
1204
1205
1206 /* Function vect_enhance_data_refs_alignment
1207
1208    This pass will use loop versioning and loop peeling in order to enhance
1209    the alignment of data references in the loop.
1210
1211    FOR NOW: we assume that whatever versioning/peeling takes place, only the
1212    original loop is to be vectorized.  Any other loops that are created by
1213    the transformations performed in this pass - are not supposed to be
1214    vectorized.  This restriction will be relaxed.
1215
1216    This pass will require a cost model to guide it whether to apply peeling
1217    or versioning or a combination of the two.  For example, the scheme that
1218    intel uses when given a loop with several memory accesses, is as follows:
1219    choose one memory access ('p') which alignment you want to force by doing
1220    peeling.  Then, either (1) generate a loop in which 'p' is aligned and all
1221    other accesses are not necessarily aligned, or (2) use loop versioning to
1222    generate one loop in which all accesses are aligned, and another loop in
1223    which only 'p' is necessarily aligned.
1224
1225    ("Automatic Intra-Register Vectorization for the Intel Architecture",
1226    Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
1227    Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
1228
1229    Devising a cost model is the most critical aspect of this work.  It will
1230    guide us on which access to peel for, whether to use loop versioning, how
1231    many versions to create, etc.  The cost model will probably consist of
1232    generic considerations as well as target specific considerations (on
1233    powerpc for example, misaligned stores are more painful than misaligned
1234    loads).
1235
1236    Here are the general steps involved in alignment enhancements:
1237
1238      -- original loop, before alignment analysis:
1239         for (i=0; i<N; i++){
1240           x = q[i];                     # DR_MISALIGNMENT(q) = unknown
1241           p[i] = y;                     # DR_MISALIGNMENT(p) = unknown
1242         }
1243
1244      -- After vect_compute_data_refs_alignment:
1245         for (i=0; i<N; i++){
1246           x = q[i];                     # DR_MISALIGNMENT(q) = 3
1247           p[i] = y;                     # DR_MISALIGNMENT(p) = unknown
1248         }
1249
1250      -- Possibility 1: we do loop versioning:
1251      if (p is aligned) {
1252         for (i=0; i<N; i++){    # loop 1A
1253           x = q[i];                     # DR_MISALIGNMENT(q) = 3
1254           p[i] = y;                     # DR_MISALIGNMENT(p) = 0
1255         }
1256      }
1257      else {
1258         for (i=0; i<N; i++){    # loop 1B
1259           x = q[i];                     # DR_MISALIGNMENT(q) = 3
1260           p[i] = y;                     # DR_MISALIGNMENT(p) = unaligned
1261         }
1262      }
1263
1264      -- Possibility 2: we do loop peeling:
1265      for (i = 0; i < 3; i++){   # (scalar loop, not to be vectorized).
1266         x = q[i];
1267         p[i] = y;
1268      }
1269      for (i = 3; i < N; i++){   # loop 2A
1270         x = q[i];                       # DR_MISALIGNMENT(q) = 0
1271         p[i] = y;                       # DR_MISALIGNMENT(p) = unknown
1272      }
1273
1274      -- Possibility 3: combination of loop peeling and versioning:
1275      for (i = 0; i < 3; i++){   # (scalar loop, not to be vectorized).
1276         x = q[i];
1277         p[i] = y;
1278      }
1279      if (p is aligned) {
1280         for (i = 3; i<N; i++){  # loop 3A
1281           x = q[i];                     # DR_MISALIGNMENT(q) = 0
1282           p[i] = y;                     # DR_MISALIGNMENT(p) = 0
1283         }
1284      }
1285      else {
1286         for (i = 3; i<N; i++){  # loop 3B
1287           x = q[i];                     # DR_MISALIGNMENT(q) = 0
1288           p[i] = y;                     # DR_MISALIGNMENT(p) = unaligned
1289         }
1290      }
1291
1292      These loops are later passed to loop_transform to be vectorized.  The
1293      vectorizer will use the alignment information to guide the transformation
1294      (whether to generate regular loads/stores, or with special handling for
1295      misalignment).  */
1296
1297 bool
1298 vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
1299 {
1300   vec<data_reference_p> datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1301   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1302   enum dr_alignment_support supportable_dr_alignment;
1303   struct data_reference *dr0 = NULL, *first_store = NULL;
1304   struct data_reference *dr;
1305   unsigned int i, j;
1306   bool do_peeling = false;
1307   bool do_versioning = false;
1308   bool stat;
1309   gimple stmt;
1310   stmt_vec_info stmt_info;
1311   int vect_versioning_for_alias_required;
1312   unsigned int npeel = 0;
1313   bool all_misalignments_unknown = true;
1314   unsigned int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1315   unsigned possible_npeel_number = 1;
1316   tree vectype;
1317   unsigned int nelements, mis, same_align_drs_max = 0;
1318   stmt_vector_for_cost body_cost_vec = stmt_vector_for_cost();
1319
1320   if (dump_enabled_p ())
1321     dump_printf_loc (MSG_NOTE, vect_location,
1322                      "=== vect_enhance_data_refs_alignment ===");
1323
1324   /* While cost model enhancements are expected in the future, the high level
1325      view of the code at this time is as follows:
1326
1327      A) If there is a misaligned access then see if peeling to align
1328         this access can make all data references satisfy
1329         vect_supportable_dr_alignment.  If so, update data structures
1330         as needed and return true.
1331
1332      B) If peeling wasn't possible and there is a data reference with an
1333         unknown misalignment that does not satisfy vect_supportable_dr_alignment
1334         then see if loop versioning checks can be used to make all data
1335         references satisfy vect_supportable_dr_alignment.  If so, update
1336         data structures as needed and return true.
1337
1338      C) If neither peeling nor versioning were successful then return false if
1339         any data reference does not satisfy vect_supportable_dr_alignment.
1340
1341      D) Return true (all data references satisfy vect_supportable_dr_alignment).
1342
1343      Note, Possibility 3 above (which is peeling and versioning together) is not
1344      being done at this time.  */
1345
1346   /* (1) Peeling to force alignment.  */
1347
1348   /* (1.1) Decide whether to perform peeling, and how many iterations to peel:
1349      Considerations:
1350      + How many accesses will become aligned due to the peeling
1351      - How many accesses will become unaligned due to the peeling,
1352        and the cost of misaligned accesses.
1353      - The cost of peeling (the extra runtime checks, the increase
1354        in code size).  */
1355
1356   FOR_EACH_VEC_ELT (datarefs, i, dr)
1357     {
1358       stmt = DR_STMT (dr);
1359       stmt_info = vinfo_for_stmt (stmt);
1360
1361       if (!STMT_VINFO_RELEVANT_P (stmt_info))
1362         continue;
1363
1364       /* For interleaving, only the alignment of the first access
1365          matters.  */
1366       if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
1367           && GROUP_FIRST_ELEMENT (stmt_info) != stmt)
1368         continue;
1369
1370       /* For invariant accesses there is nothing to enhance.  */
1371       if (integer_zerop (DR_STEP (dr)))
1372         continue;
1373
1374       /* Strided loads perform only component accesses, alignment is
1375          irrelevant for them.  */
1376       if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
1377         continue;
1378
1379       supportable_dr_alignment = vect_supportable_dr_alignment (dr, true);
1380       do_peeling = vector_alignment_reachable_p (dr);
1381       if (do_peeling)
1382         {
1383           if (known_alignment_for_access_p (dr))
1384             {
1385               unsigned int npeel_tmp;
1386               bool negative = tree_int_cst_compare (DR_STEP (dr),
1387                                                     size_zero_node) < 0;
1388
1389               /* Save info about DR in the hash table.  */
1390               if (!LOOP_VINFO_PEELING_HTAB (loop_vinfo))
1391                 LOOP_VINFO_PEELING_HTAB (loop_vinfo) =
1392                            htab_create (1, vect_peeling_hash,
1393                                         vect_peeling_hash_eq, free);
1394
1395               vectype = STMT_VINFO_VECTYPE (stmt_info);
1396               nelements = TYPE_VECTOR_SUBPARTS (vectype);
1397               mis = DR_MISALIGNMENT (dr) / GET_MODE_SIZE (TYPE_MODE (
1398                                                 TREE_TYPE (DR_REF (dr))));
1399               npeel_tmp = (negative
1400                            ? (mis - nelements) : (nelements - mis))
1401                   & (nelements - 1);
1402
1403               /* For multiple types, it is possible that the bigger type access
1404                  will have more than one peeling option.  E.g., a loop with two
1405                  types: one of size (vector size / 4), and the other one of
1406                  size (vector size / 8).  Vectorization factor will 8.  If both
1407                  access are misaligned by 3, the first one needs one scalar
1408                  iteration to be aligned, and the second one needs 5.  But the
1409                  the first one will be aligned also by peeling 5 scalar
1410                  iterations, and in that case both accesses will be aligned.
1411                  Hence, except for the immediate peeling amount, we also want
1412                  to try to add full vector size, while we don't exceed
1413                  vectorization factor.
1414                  We do this automtically for cost model, since we calculate cost
1415                  for every peeling option.  */
1416               if (!flag_vect_cost_model)
1417                 possible_npeel_number = vf /nelements;
1418
1419               /* Handle the aligned case. We may decide to align some other
1420                  access, making DR unaligned.  */
1421               if (DR_MISALIGNMENT (dr) == 0)
1422                 {
1423                   npeel_tmp = 0;
1424                   if (!flag_vect_cost_model)
1425                     possible_npeel_number++;
1426                 }
1427
1428               for (j = 0; j < possible_npeel_number; j++)
1429                 {
1430                   gcc_assert (npeel_tmp <= vf);
1431                   vect_peeling_hash_insert (loop_vinfo, dr, npeel_tmp);
1432                   npeel_tmp += nelements;
1433                 }
1434
1435               all_misalignments_unknown = false;
1436               /* Data-ref that was chosen for the case that all the
1437                  misalignments are unknown is not relevant anymore, since we
1438                  have a data-ref with known alignment.  */
1439               dr0 = NULL;
1440             }
1441           else
1442             {
1443               /* If we don't know all the misalignment values, we prefer
1444                  peeling for data-ref that has maximum number of data-refs
1445                  with the same alignment, unless the target prefers to align
1446                  stores over load.  */
1447               if (all_misalignments_unknown)
1448                 {
1449                   if (same_align_drs_max
1450                         < STMT_VINFO_SAME_ALIGN_REFS (stmt_info).length ()
1451                       || !dr0)
1452                     {
1453                       same_align_drs_max
1454                           = STMT_VINFO_SAME_ALIGN_REFS (stmt_info).length ();
1455                       dr0 = dr;
1456                     }
1457
1458                   if (!first_store && DR_IS_WRITE (dr))
1459                     first_store = dr;
1460                 }
1461
1462               /* If there are both known and unknown misaligned accesses in the
1463                  loop, we choose peeling amount according to the known
1464                  accesses.  */
1465
1466
1467               if (!supportable_dr_alignment)
1468                 {
1469                   dr0 = dr;
1470                   if (!first_store && DR_IS_WRITE (dr))
1471                     first_store = dr;
1472                 }
1473             }
1474         }
1475       else
1476         {
1477           if (!aligned_access_p (dr))
1478             {
1479               if (dump_enabled_p ())
1480                 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1481                                  "vector alignment may not be reachable");
1482               break;
1483             }
1484         }
1485     }
1486
1487   vect_versioning_for_alias_required
1488     = LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo);
1489
1490   /* Temporarily, if versioning for alias is required, we disable peeling
1491      until we support peeling and versioning.  Often peeling for alignment
1492      will require peeling for loop-bound, which in turn requires that we
1493      know how to adjust the loop ivs after the loop.  */
1494   if (vect_versioning_for_alias_required
1495       || !vect_can_advance_ivs_p (loop_vinfo)
1496       || !slpeel_can_duplicate_loop_p (loop, single_exit (loop)))
1497     do_peeling = false;
1498
1499   if (do_peeling && all_misalignments_unknown
1500       && vect_supportable_dr_alignment (dr0, false))
1501     {
1502
1503       /* Check if the target requires to prefer stores over loads, i.e., if
1504          misaligned stores are more expensive than misaligned loads (taking
1505          drs with same alignment into account).  */
1506       if (first_store && DR_IS_READ (dr0))
1507         {
1508           unsigned int load_inside_cost = 0, load_outside_cost = 0;
1509           unsigned int store_inside_cost = 0, store_outside_cost = 0;
1510           unsigned int load_inside_penalty = 0, load_outside_penalty = 0;
1511           unsigned int store_inside_penalty = 0, store_outside_penalty = 0;
1512           stmt_vector_for_cost dummy;
1513           dummy.create (2);
1514
1515           vect_get_data_access_cost (dr0, &load_inside_cost, &load_outside_cost,
1516                                      &dummy);
1517           vect_get_data_access_cost (first_store, &store_inside_cost,
1518                                      &store_outside_cost, &dummy);
1519
1520           dummy.release ();
1521
1522           /* Calculate the penalty for leaving FIRST_STORE unaligned (by
1523              aligning the load DR0).  */
1524           load_inside_penalty = store_inside_cost;
1525           load_outside_penalty = store_outside_cost;
1526           for (i = 0;
1527                STMT_VINFO_SAME_ALIGN_REFS (vinfo_for_stmt (
1528                           DR_STMT (first_store))).iterate (i, &dr);
1529                i++)
1530             if (DR_IS_READ (dr))
1531               {
1532                 load_inside_penalty += load_inside_cost;
1533                 load_outside_penalty += load_outside_cost;
1534               }
1535             else
1536               {
1537                 load_inside_penalty += store_inside_cost;
1538                 load_outside_penalty += store_outside_cost;
1539               }
1540
1541           /* Calculate the penalty for leaving DR0 unaligned (by
1542              aligning the FIRST_STORE).  */
1543           store_inside_penalty = load_inside_cost;
1544           store_outside_penalty = load_outside_cost;
1545           for (i = 0;
1546                STMT_VINFO_SAME_ALIGN_REFS (vinfo_for_stmt (
1547                       DR_STMT (dr0))).iterate (i, &dr);
1548                i++)
1549             if (DR_IS_READ (dr))
1550               {
1551                 store_inside_penalty += load_inside_cost;
1552                 store_outside_penalty += load_outside_cost;
1553               }
1554             else
1555               {
1556                 store_inside_penalty += store_inside_cost;
1557                 store_outside_penalty += store_outside_cost;
1558               }
1559
1560           if (load_inside_penalty > store_inside_penalty
1561               || (load_inside_penalty == store_inside_penalty
1562                   && load_outside_penalty > store_outside_penalty))
1563             dr0 = first_store;
1564         }
1565
1566       /* In case there are only loads with different unknown misalignments, use
1567          peeling only if it may help to align other accesses in the loop.  */
1568       if (!first_store
1569           && !STMT_VINFO_SAME_ALIGN_REFS (
1570                   vinfo_for_stmt (DR_STMT (dr0))).length ()
1571           && vect_supportable_dr_alignment (dr0, false)
1572               != dr_unaligned_supported)
1573         do_peeling = false;
1574     }
1575
1576   if (do_peeling && !dr0)
1577     {
1578       /* Peeling is possible, but there is no data access that is not supported
1579          unless aligned. So we try to choose the best possible peeling.  */
1580
1581       /* We should get here only if there are drs with known misalignment.  */
1582       gcc_assert (!all_misalignments_unknown);
1583
1584       /* Choose the best peeling from the hash table.  */
1585       dr0 = vect_peeling_hash_choose_best_peeling (loop_vinfo, &npeel,
1586                                                    &body_cost_vec);
1587       if (!dr0 || !npeel)
1588         do_peeling = false;
1589     }
1590
1591   if (do_peeling)
1592     {
1593       stmt = DR_STMT (dr0);
1594       stmt_info = vinfo_for_stmt (stmt);
1595       vectype = STMT_VINFO_VECTYPE (stmt_info);
1596       nelements = TYPE_VECTOR_SUBPARTS (vectype);
1597
1598       if (known_alignment_for_access_p (dr0))
1599         {
1600           bool negative = tree_int_cst_compare (DR_STEP (dr0),
1601                                                 size_zero_node) < 0;
1602           if (!npeel)
1603             {
1604               /* Since it's known at compile time, compute the number of
1605                  iterations in the peeled loop (the peeling factor) for use in
1606                  updating DR_MISALIGNMENT values.  The peeling factor is the
1607                  vectorization factor minus the misalignment as an element
1608                  count.  */
1609               mis = DR_MISALIGNMENT (dr0);
1610               mis /= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr0))));
1611               npeel = ((negative ? mis - nelements : nelements - mis)
1612                        & (nelements - 1));
1613             }
1614
1615           /* For interleaved data access every iteration accesses all the
1616              members of the group, therefore we divide the number of iterations
1617              by the group size.  */
1618           stmt_info = vinfo_for_stmt (DR_STMT (dr0));
1619           if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1620             npeel /= GROUP_SIZE (stmt_info);
1621
1622           if (dump_enabled_p ())
1623             dump_printf_loc (MSG_NOTE, vect_location,
1624                              "Try peeling by %d", npeel);
1625         }
1626
1627       /* Ensure that all data refs can be vectorized after the peel.  */
1628       FOR_EACH_VEC_ELT (datarefs, i, dr)
1629         {
1630           int save_misalignment;
1631
1632           if (dr == dr0)
1633             continue;
1634
1635           stmt = DR_STMT (dr);
1636           stmt_info = vinfo_for_stmt (stmt);
1637           /* For interleaving, only the alignment of the first access
1638             matters.  */
1639           if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
1640               && GROUP_FIRST_ELEMENT (stmt_info) != stmt)
1641             continue;
1642
1643           /* Strided loads perform only component accesses, alignment is
1644              irrelevant for them.  */
1645           if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
1646             continue;
1647
1648           save_misalignment = DR_MISALIGNMENT (dr);
1649           vect_update_misalignment_for_peel (dr, dr0, npeel);
1650           supportable_dr_alignment = vect_supportable_dr_alignment (dr, false);
1651           SET_DR_MISALIGNMENT (dr, save_misalignment);
1652
1653           if (!supportable_dr_alignment)
1654             {
1655               do_peeling = false;
1656               break;
1657             }
1658         }
1659
1660       if (do_peeling && known_alignment_for_access_p (dr0) && npeel == 0)
1661         {
1662           stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
1663           if (!stat)
1664             do_peeling = false;
1665           else
1666             {
1667               body_cost_vec.release ();
1668               return stat;
1669             }
1670         }
1671
1672       if (do_peeling)
1673         {
1674           stmt_info_for_cost *si;
1675           void *data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
1676
1677           /* (1.2) Update the DR_MISALIGNMENT of each data reference DR_i.
1678              If the misalignment of DR_i is identical to that of dr0 then set
1679              DR_MISALIGNMENT (DR_i) to zero.  If the misalignment of DR_i and
1680              dr0 are known at compile time then increment DR_MISALIGNMENT (DR_i)
1681              by the peeling factor times the element size of DR_i (MOD the
1682              vectorization factor times the size).  Otherwise, the
1683              misalignment of DR_i must be set to unknown.  */
1684           FOR_EACH_VEC_ELT (datarefs, i, dr)
1685             if (dr != dr0)
1686               vect_update_misalignment_for_peel (dr, dr0, npeel);
1687
1688           LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr0;
1689           if (npeel)
1690             LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) = npeel;
1691           else
1692             LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) = DR_MISALIGNMENT (dr0);
1693           SET_DR_MISALIGNMENT (dr0, 0);
1694           if (dump_enabled_p ())
1695             {
1696               dump_printf_loc (MSG_NOTE, vect_location,
1697                                "Alignment of access forced using peeling.");
1698               dump_printf_loc (MSG_NOTE, vect_location,
1699                                "Peeling for alignment will be applied.");
1700             }
1701           /* We've delayed passing the inside-loop peeling costs to the
1702              target cost model until we were sure peeling would happen.
1703              Do so now.  */
1704           if (body_cost_vec.exists ())
1705             {
1706               FOR_EACH_VEC_ELT (body_cost_vec, i, si)
1707                 {
1708                   struct _stmt_vec_info *stmt_info
1709                     = si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
1710                   (void) add_stmt_cost (data, si->count, si->kind, stmt_info,
1711                                         si->misalign, vect_body);
1712                 }
1713               body_cost_vec.release ();
1714             }
1715
1716           stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
1717           gcc_assert (stat);
1718           return stat;
1719         }
1720     }
1721
1722   body_cost_vec.release ();
1723
1724   /* (2) Versioning to force alignment.  */
1725
1726   /* Try versioning if:
1727      1) flag_tree_vect_loop_version is TRUE
1728      2) optimize loop for speed
1729      3) there is at least one unsupported misaligned data ref with an unknown
1730         misalignment, and
1731      4) all misaligned data refs with a known misalignment are supported, and
1732      5) the number of runtime alignment checks is within reason.  */
1733
1734   do_versioning =
1735         flag_tree_vect_loop_version
1736         && optimize_loop_nest_for_speed_p (loop)
1737         && (!loop->inner); /* FORNOW */
1738
1739   if (do_versioning)
1740     {
1741       FOR_EACH_VEC_ELT (datarefs, i, dr)
1742         {
1743           stmt = DR_STMT (dr);
1744           stmt_info = vinfo_for_stmt (stmt);
1745
1746           /* For interleaving, only the alignment of the first access
1747              matters.  */
1748           if (aligned_access_p (dr)
1749               || (STMT_VINFO_GROUPED_ACCESS (stmt_info)
1750                   && GROUP_FIRST_ELEMENT (stmt_info) != stmt))
1751             continue;
1752
1753           /* Strided loads perform only component accesses, alignment is
1754              irrelevant for them.  */
1755           if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
1756             continue;
1757
1758           supportable_dr_alignment = vect_supportable_dr_alignment (dr, false);
1759
1760           if (!supportable_dr_alignment)
1761             {
1762               gimple stmt;
1763               int mask;
1764               tree vectype;
1765
1766               if (known_alignment_for_access_p (dr)
1767                   || LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo).length ()
1768                      >= (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIGNMENT_CHECKS))
1769                 {
1770                   do_versioning = false;
1771                   break;
1772                 }
1773
1774               stmt = DR_STMT (dr);
1775               vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1776               gcc_assert (vectype);
1777
1778               /* The rightmost bits of an aligned address must be zeros.
1779                  Construct the mask needed for this test.  For example,
1780                  GET_MODE_SIZE for the vector mode V4SI is 16 bytes so the
1781                  mask must be 15 = 0xf. */
1782               mask = GET_MODE_SIZE (TYPE_MODE (vectype)) - 1;
1783
1784               /* FORNOW: use the same mask to test all potentially unaligned
1785                  references in the loop.  The vectorizer currently supports
1786                  a single vector size, see the reference to
1787                  GET_MODE_NUNITS (TYPE_MODE (vectype)) where the
1788                  vectorization factor is computed.  */
1789               gcc_assert (!LOOP_VINFO_PTR_MASK (loop_vinfo)
1790                           || LOOP_VINFO_PTR_MASK (loop_vinfo) == mask);
1791               LOOP_VINFO_PTR_MASK (loop_vinfo) = mask;
1792               LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo).safe_push (
1793                       DR_STMT (dr));
1794             }
1795         }
1796
1797       /* Versioning requires at least one misaligned data reference.  */
1798       if (!LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo))
1799         do_versioning = false;
1800       else if (!do_versioning)
1801         LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo).truncate (0);
1802     }
1803
1804   if (do_versioning)
1805     {
1806       vec<gimple> may_misalign_stmts
1807         = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
1808       gimple stmt;
1809
1810       /* It can now be assumed that the data references in the statements
1811          in LOOP_VINFO_MAY_MISALIGN_STMTS will be aligned in the version
1812          of the loop being vectorized.  */
1813       FOR_EACH_VEC_ELT (may_misalign_stmts, i, stmt)
1814         {
1815           stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1816           dr = STMT_VINFO_DATA_REF (stmt_info);
1817           SET_DR_MISALIGNMENT (dr, 0);
1818           if (dump_enabled_p ())
1819             dump_printf_loc (MSG_NOTE, vect_location,
1820                              "Alignment of access forced using versioning.");
1821         }
1822
1823       if (dump_enabled_p ())
1824         dump_printf_loc (MSG_NOTE, vect_location,
1825                          "Versioning for alignment will be applied.");
1826
1827       /* Peeling and versioning can't be done together at this time.  */
1828       gcc_assert (! (do_peeling && do_versioning));
1829
1830       stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
1831       gcc_assert (stat);
1832       return stat;
1833     }
1834
1835   /* This point is reached if neither peeling nor versioning is being done.  */
1836   gcc_assert (! (do_peeling || do_versioning));
1837
1838   stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
1839   return stat;
1840 }
1841
1842
1843 /* Function vect_find_same_alignment_drs.
1844
1845    Update group and alignment relations according to the chosen
1846    vectorization factor.  */
1847
1848 static void
1849 vect_find_same_alignment_drs (struct data_dependence_relation *ddr,
1850                               loop_vec_info loop_vinfo)
1851 {
1852   unsigned int i;
1853   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1854   int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1855   struct data_reference *dra = DDR_A (ddr);
1856   struct data_reference *drb = DDR_B (ddr);
1857   stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
1858   stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
1859   int dra_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dra))));
1860   int drb_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (drb))));
1861   lambda_vector dist_v;
1862   unsigned int loop_depth;
1863
1864   if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
1865     return;
1866
1867   if (dra == drb)
1868     return;
1869
1870   if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
1871     return;
1872
1873   /* Loop-based vectorization and known data dependence.  */
1874   if (DDR_NUM_DIST_VECTS (ddr) == 0)
1875     return;
1876
1877   /* Data-dependence analysis reports a distance vector of zero
1878      for data-references that overlap only in the first iteration
1879      but have different sign step (see PR45764).
1880      So as a sanity check require equal DR_STEP.  */
1881   if (!operand_equal_p (DR_STEP (dra), DR_STEP (drb), 0))
1882     return;
1883
1884   loop_depth = index_in_loop_nest (loop->num, DDR_LOOP_NEST (ddr));
1885   FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
1886     {
1887       int dist = dist_v[loop_depth];
1888
1889       if (dump_enabled_p ())
1890         dump_printf_loc (MSG_NOTE, vect_location,
1891                          "dependence distance  = %d.", dist);
1892
1893       /* Same loop iteration.  */
1894       if (dist == 0
1895           || (dist % vectorization_factor == 0 && dra_size == drb_size))
1896         {
1897           /* Two references with distance zero have the same alignment.  */
1898           STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_a).safe_push (drb);
1899           STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_b).safe_push (dra);
1900           if (dump_enabled_p ())
1901             {
1902               dump_printf_loc (MSG_NOTE, vect_location,
1903                                "accesses have the same alignment.");
1904               dump_printf (MSG_NOTE,
1905                            "dependence distance modulo vf == 0 between ");
1906               dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dra));
1907               dump_printf (MSG_NOTE,  " and ");
1908               dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (drb));
1909             }
1910         }
1911     }
1912 }
1913
1914
1915 /* Function vect_analyze_data_refs_alignment
1916
1917    Analyze the alignment of the data-references in the loop.
1918    Return FALSE if a data reference is found that cannot be vectorized.  */
1919
1920 bool
1921 vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo,
1922                                   bb_vec_info bb_vinfo)
1923 {
1924   if (dump_enabled_p ())
1925     dump_printf_loc (MSG_NOTE, vect_location,
1926                      "=== vect_analyze_data_refs_alignment ===");
1927
1928   /* Mark groups of data references with same alignment using
1929      data dependence information.  */
1930   if (loop_vinfo)
1931     {
1932       vec<ddr_p> ddrs = LOOP_VINFO_DDRS (loop_vinfo);
1933       struct data_dependence_relation *ddr;
1934       unsigned int i;
1935
1936       FOR_EACH_VEC_ELT (ddrs, i, ddr)
1937         vect_find_same_alignment_drs (ddr, loop_vinfo);
1938     }
1939
1940   if (!vect_compute_data_refs_alignment (loop_vinfo, bb_vinfo))
1941     {
1942       if (dump_enabled_p ())
1943         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1944                          "not vectorized: can't calculate alignment "
1945                          "for data ref.");
1946       return false;
1947     }
1948
1949   return true;
1950 }
1951
1952
1953 /* Analyze groups of accesses: check that DR belongs to a group of
1954    accesses of legal size, step, etc.  Detect gaps, single element
1955    interleaving, and other special cases. Set grouped access info.
1956    Collect groups of strided stores for further use in SLP analysis.  */
1957
1958 static bool
1959 vect_analyze_group_access (struct data_reference *dr)
1960 {
1961   tree step = DR_STEP (dr);
1962   tree scalar_type = TREE_TYPE (DR_REF (dr));
1963   HOST_WIDE_INT type_size = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type));
1964   gimple stmt = DR_STMT (dr);
1965   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1966   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1967   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
1968   HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
1969   HOST_WIDE_INT groupsize, last_accessed_element = 1;
1970   bool slp_impossible = false;
1971   struct loop *loop = NULL;
1972
1973   if (loop_vinfo)
1974     loop = LOOP_VINFO_LOOP (loop_vinfo);
1975
1976   /* For interleaving, GROUPSIZE is STEP counted in elements, i.e., the
1977      size of the interleaving group (including gaps).  */
1978   groupsize = dr_step / type_size;
1979
1980   /* Not consecutive access is possible only if it is a part of interleaving.  */
1981   if (!GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1982     {
1983       /* Check if it this DR is a part of interleaving, and is a single
1984          element of the group that is accessed in the loop.  */
1985
1986       /* Gaps are supported only for loads. STEP must be a multiple of the type
1987          size.  The size of the group must be a power of 2.  */
1988       if (DR_IS_READ (dr)
1989           && (dr_step % type_size) == 0
1990           && groupsize > 0
1991           && exact_log2 (groupsize) != -1)
1992         {
1993           GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) = stmt;
1994           GROUP_SIZE (vinfo_for_stmt (stmt)) = groupsize;
1995           if (dump_enabled_p ())
1996             {
1997               dump_printf_loc (MSG_NOTE, vect_location,
1998                                "Detected single element interleaving ");
1999               dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr));
2000               dump_printf (MSG_NOTE, " step ");
2001               dump_generic_expr (MSG_NOTE, TDF_SLIM, step);
2002             }
2003
2004           if (loop_vinfo)
2005             {
2006               if (dump_enabled_p ())
2007                 dump_printf_loc (MSG_NOTE, vect_location,
2008                                  "Data access with gaps requires scalar "
2009                                  "epilogue loop");
2010               if (loop->inner)
2011                 {
2012                   if (dump_enabled_p ())
2013                     dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2014                                      "Peeling for outer loop is not"
2015                                      " supported");
2016                   return false;
2017                 }
2018
2019               LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
2020             }
2021
2022           return true;
2023         }
2024
2025       if (dump_enabled_p ())
2026         {
2027           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2028                            "not consecutive access ");
2029           dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
2030         }
2031
2032       if (bb_vinfo)
2033         {
2034           /* Mark the statement as unvectorizable.  */
2035           STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr))) = false;
2036           return true;
2037         }
2038
2039       return false;
2040     }
2041
2042   if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) == stmt)
2043     {
2044       /* First stmt in the interleaving chain. Check the chain.  */
2045       gimple next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt));
2046       struct data_reference *data_ref = dr;
2047       unsigned int count = 1;
2048       tree next_step;
2049       tree prev_init = DR_INIT (data_ref);
2050       gimple prev = stmt;
2051       HOST_WIDE_INT diff, count_in_bytes, gaps = 0;
2052
2053       while (next)
2054         {
2055           /* Skip same data-refs.  In case that two or more stmts share
2056              data-ref (supported only for loads), we vectorize only the first
2057              stmt, and the rest get their vectorized loads from the first
2058              one.  */
2059           if (!tree_int_cst_compare (DR_INIT (data_ref),
2060                                      DR_INIT (STMT_VINFO_DATA_REF (
2061                                                    vinfo_for_stmt (next)))))
2062             {
2063               if (DR_IS_WRITE (data_ref))
2064                 {
2065                   if (dump_enabled_p ())
2066                     dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2067                                      "Two store stmts share the same dr.");
2068                   return false;
2069                 }
2070
2071               /* Check that there is no load-store dependencies for this loads
2072                  to prevent a case of load-store-load to the same location.  */
2073               if (GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (next))
2074                   || GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (prev)))
2075                 {
2076                   if (dump_enabled_p ())
2077                     dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2078                                      "READ_WRITE dependence in interleaving.");
2079                   return false;
2080                 }
2081
2082               /* For load use the same data-ref load.  */
2083               GROUP_SAME_DR_STMT (vinfo_for_stmt (next)) = prev;
2084
2085               prev = next;
2086               next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
2087               continue;
2088             }
2089
2090           prev = next;
2091
2092           /* Check that all the accesses have the same STEP.  */
2093           next_step = DR_STEP (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
2094           if (tree_int_cst_compare (step, next_step))
2095             {
2096               if (dump_enabled_p ())
2097                 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2098                                  "not consecutive access in interleaving");
2099               return false;
2100             }
2101
2102           data_ref = STMT_VINFO_DATA_REF (vinfo_for_stmt (next));
2103           /* Check that the distance between two accesses is equal to the type
2104              size. Otherwise, we have gaps.  */
2105           diff = (TREE_INT_CST_LOW (DR_INIT (data_ref))
2106                   - TREE_INT_CST_LOW (prev_init)) / type_size;
2107           if (diff != 1)
2108             {
2109               /* FORNOW: SLP of accesses with gaps is not supported.  */
2110               slp_impossible = true;
2111               if (DR_IS_WRITE (data_ref))
2112                 {
2113                   if (dump_enabled_p ())
2114                     dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2115                                      "interleaved store with gaps");
2116                   return false;
2117                 }
2118
2119               gaps += diff - 1;
2120             }
2121
2122           last_accessed_element += diff;
2123
2124           /* Store the gap from the previous member of the group. If there is no
2125              gap in the access, GROUP_GAP is always 1.  */
2126           GROUP_GAP (vinfo_for_stmt (next)) = diff;
2127
2128           prev_init = DR_INIT (data_ref);
2129           next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
2130           /* Count the number of data-refs in the chain.  */
2131           count++;
2132         }
2133
2134       /* COUNT is the number of accesses found, we multiply it by the size of
2135          the type to get COUNT_IN_BYTES.  */
2136       count_in_bytes = type_size * count;
2137
2138       /* Check that the size of the interleaving (including gaps) is not
2139          greater than STEP.  */
2140       if (dr_step && dr_step < count_in_bytes + gaps * type_size)
2141         {
2142           if (dump_enabled_p ())
2143             {
2144               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2145                                "interleaving size is greater than step for ");
2146               dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, DR_REF (dr));
2147             }
2148           return false;
2149         }
2150
2151       /* Check that the size of the interleaving is equal to STEP for stores,
2152          i.e., that there are no gaps.  */
2153       if (dr_step && dr_step != count_in_bytes)
2154         {
2155           if (DR_IS_READ (dr))
2156             {
2157               slp_impossible = true;
2158               /* There is a gap after the last load in the group. This gap is a
2159                  difference between the groupsize and the number of elements.
2160                  When there is no gap, this difference should be 0.  */
2161               GROUP_GAP (vinfo_for_stmt (stmt)) = groupsize - count;
2162             }
2163           else
2164             {
2165               if (dump_enabled_p ())
2166                 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2167                                  "interleaved store with gaps");
2168               return false;
2169             }
2170         }
2171
2172       /* Check that STEP is a multiple of type size.  */
2173       if (dr_step && (dr_step % type_size) != 0)
2174         {
2175           if (dump_enabled_p ())
2176             {
2177               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2178                                "step is not a multiple of type size: step ");
2179               dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, step);
2180               dump_printf (MSG_MISSED_OPTIMIZATION, " size ");
2181               dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
2182                                  TYPE_SIZE_UNIT (scalar_type));
2183             }
2184           return false;
2185         }
2186
2187       if (groupsize == 0)
2188         groupsize = count;
2189
2190       GROUP_SIZE (vinfo_for_stmt (stmt)) = groupsize;
2191       if (dump_enabled_p ())
2192         dump_printf_loc (MSG_NOTE, vect_location,
2193                          "Detected interleaving of size %d", (int)groupsize);
2194
2195       /* SLP: create an SLP data structure for every interleaving group of
2196          stores for further analysis in vect_analyse_slp.  */
2197       if (DR_IS_WRITE (dr) && !slp_impossible)
2198         {
2199           if (loop_vinfo)
2200             LOOP_VINFO_GROUPED_STORES (loop_vinfo).safe_push (stmt);
2201           if (bb_vinfo)
2202             BB_VINFO_GROUPED_STORES (bb_vinfo).safe_push (stmt);
2203         }
2204
2205       /* There is a gap in the end of the group.  */
2206       if (groupsize - last_accessed_element > 0 && loop_vinfo)
2207         {
2208           if (dump_enabled_p ())
2209             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2210                              "Data access with gaps requires scalar "
2211                              "epilogue loop");
2212           if (loop->inner)
2213             {
2214               if (dump_enabled_p ())
2215                 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2216                                  "Peeling for outer loop is not supported");
2217               return false;
2218             }
2219
2220           LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
2221         }
2222     }
2223
2224   return true;
2225 }
2226
2227
2228 /* Analyze the access pattern of the data-reference DR.
2229    In case of non-consecutive accesses call vect_analyze_group_access() to
2230    analyze groups of accesses.  */
2231
2232 static bool
2233 vect_analyze_data_ref_access (struct data_reference *dr)
2234 {
2235   tree step = DR_STEP (dr);
2236   tree scalar_type = TREE_TYPE (DR_REF (dr));
2237   gimple stmt = DR_STMT (dr);
2238   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2239   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2240   struct loop *loop = NULL;
2241
2242   if (loop_vinfo)
2243     loop = LOOP_VINFO_LOOP (loop_vinfo);
2244
2245   if (loop_vinfo && !step)
2246     {
2247       if (dump_enabled_p ())
2248         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2249                          "bad data-ref access in loop");
2250       return false;
2251     }
2252
2253   /* Allow invariant loads in loops.  */
2254   if (loop_vinfo && integer_zerop (step))
2255     {
2256       GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) = NULL;
2257       return DR_IS_READ (dr);
2258     }
2259
2260   if (loop && nested_in_vect_loop_p (loop, stmt))
2261     {
2262       /* Interleaved accesses are not yet supported within outer-loop
2263         vectorization for references in the inner-loop.  */
2264       GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) = NULL;
2265
2266       /* For the rest of the analysis we use the outer-loop step.  */
2267       step = STMT_VINFO_DR_STEP (stmt_info);
2268       if (integer_zerop (step))
2269         {
2270           if (dump_enabled_p ())
2271             dump_printf_loc (MSG_NOTE, vect_location,
2272                              "zero step in outer loop.");
2273           if (DR_IS_READ (dr))
2274             return true;
2275           else
2276             return false;
2277         }
2278     }
2279
2280   /* Consecutive?  */
2281   if (TREE_CODE (step) == INTEGER_CST)
2282     {
2283       HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
2284       if (!tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type))
2285           || (dr_step < 0
2286               && !compare_tree_int (TYPE_SIZE_UNIT (scalar_type), -dr_step)))
2287         {
2288           /* Mark that it is not interleaving.  */
2289           GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) = NULL;
2290           return true;
2291         }
2292     }
2293
2294   if (loop && nested_in_vect_loop_p (loop, stmt))
2295     {
2296       if (dump_enabled_p ())
2297         dump_printf_loc (MSG_NOTE, vect_location,
2298                          "grouped access in outer loop.");
2299       return false;
2300     }
2301
2302   /* Assume this is a DR handled by non-constant strided load case.  */
2303   if (TREE_CODE (step) != INTEGER_CST)
2304     return STMT_VINFO_STRIDE_LOAD_P (stmt_info);
2305
2306   /* Not consecutive access - check if it's a part of interleaving group.  */
2307   return vect_analyze_group_access (dr);
2308 }
2309
2310 /* Compare two data-references DRA and DRB to group them into chunks
2311    suitable for grouping.  */
2312
2313 static int
2314 dr_group_sort_cmp (const void *dra_, const void *drb_)
2315 {
2316   data_reference_p dra = *(data_reference_p *)const_cast<void *>(dra_);
2317   data_reference_p drb = *(data_reference_p *)const_cast<void *>(drb_);
2318   hashval_t h1, h2;
2319   int cmp;
2320
2321   /* Stabilize sort.  */
2322   if (dra == drb)
2323     return 0;
2324
2325   /* Ordering of DRs according to base.  */
2326   if (!operand_equal_p (DR_BASE_ADDRESS (dra), DR_BASE_ADDRESS (drb), 0))
2327     {
2328       h1 = iterative_hash_expr (DR_BASE_ADDRESS (dra), 0);
2329       h2 = iterative_hash_expr (DR_BASE_ADDRESS (drb), 0);
2330       if (h1 != h2)
2331         return h1 < h2 ? -1 : 1;
2332     }
2333
2334   /* And according to DR_OFFSET.  */
2335   if (!dr_equal_offsets_p (dra, drb))
2336     {
2337       h1 = iterative_hash_expr (DR_OFFSET (dra), 0);
2338       h2 = iterative_hash_expr (DR_OFFSET (drb), 0);
2339       if (h1 != h2)
2340         return h1 < h2 ? -1 : 1;
2341     }
2342
2343   /* Put reads before writes.  */
2344   if (DR_IS_READ (dra) != DR_IS_READ (drb))
2345     return DR_IS_READ (dra) ? -1 : 1;
2346
2347   /* Then sort after access size.  */
2348   if (!operand_equal_p (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra))),
2349                         TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb))), 0))
2350     {
2351       h1 = iterative_hash_expr (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra))), 0);
2352       h2 = iterative_hash_expr (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb))), 0);
2353       if (h1 != h2)
2354         return h1 < h2 ? -1 : 1;
2355     }
2356
2357   /* And after step.  */
2358   if (!operand_equal_p (DR_STEP (dra), DR_STEP (drb), 0))
2359     {
2360       h1 = iterative_hash_expr (DR_STEP (dra), 0);
2361       h2 = iterative_hash_expr (DR_STEP (drb), 0);
2362       if (h1 != h2)
2363         return h1 < h2 ? -1 : 1;
2364     }
2365
2366   /* Then sort after DR_INIT.  In case of identical DRs sort after stmt UID.  */
2367   cmp = tree_int_cst_compare (DR_INIT (dra), DR_INIT (drb));
2368   if (cmp == 0)
2369     return gimple_uid (DR_STMT (dra)) < gimple_uid (DR_STMT (drb)) ? -1 : 1;
2370   return cmp;
2371 }
2372
2373 /* Function vect_analyze_data_ref_accesses.
2374
2375    Analyze the access pattern of all the data references in the loop.
2376
2377    FORNOW: the only access pattern that is considered vectorizable is a
2378            simple step 1 (consecutive) access.
2379
2380    FORNOW: handle only arrays and pointer accesses.  */
2381
2382 bool
2383 vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
2384 {
2385   unsigned int i;
2386   vec<data_reference_p> datarefs;
2387   struct data_reference *dr;
2388
2389   if (dump_enabled_p ())
2390     dump_printf_loc (MSG_NOTE, vect_location,
2391                      "=== vect_analyze_data_ref_accesses ===");
2392
2393   if (loop_vinfo)
2394     datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
2395   else
2396     datarefs = BB_VINFO_DATAREFS (bb_vinfo);
2397
2398   if (datarefs.is_empty ())
2399     return true;
2400
2401   /* Sort the array of datarefs to make building the interleaving chains
2402      linear.  */
2403   qsort (datarefs.address(), datarefs.length (),
2404          sizeof (data_reference_p), dr_group_sort_cmp);
2405
2406   /* Build the interleaving chains.  */
2407   for (i = 0; i < datarefs.length () - 1;)
2408     {
2409       data_reference_p dra = datarefs[i];
2410       stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
2411       stmt_vec_info lastinfo = NULL;
2412       for (i = i + 1; i < datarefs.length (); ++i)
2413         {
2414           data_reference_p drb = datarefs[i];
2415           stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
2416
2417           /* ???  Imperfect sorting (non-compatible types, non-modulo
2418              accesses, same accesses) can lead to a group to be artificially
2419              split here as we don't just skip over those.  If it really
2420              matters we can push those to a worklist and re-iterate
2421              over them.  The we can just skip ahead to the next DR here.  */
2422
2423           /* Check that the data-refs have same first location (except init)
2424              and they are both either store or load (not load and store).  */
2425           if (DR_IS_READ (dra) != DR_IS_READ (drb)
2426               || !operand_equal_p (DR_BASE_ADDRESS (dra),
2427                                    DR_BASE_ADDRESS (drb), 0)
2428               || !dr_equal_offsets_p (dra, drb))
2429             break;
2430
2431           /* Check that the data-refs have the same constant size and step.  */
2432           tree sza = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra)));
2433           tree szb = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb)));
2434           if (!host_integerp (sza, 1)
2435               || !host_integerp (szb, 1)
2436               || !tree_int_cst_equal (sza, szb)
2437               || !host_integerp (DR_STEP (dra), 0)
2438               || !host_integerp (DR_STEP (drb), 0)
2439               || !tree_int_cst_equal (DR_STEP (dra), DR_STEP (drb)))
2440             break;
2441
2442           /* Do not place the same access in the interleaving chain twice.  */
2443           if (tree_int_cst_compare (DR_INIT (dra), DR_INIT (drb)) == 0)
2444             break;
2445
2446           /* Check the types are compatible.
2447              ???  We don't distinguish this during sorting.  */
2448           if (!types_compatible_p (TREE_TYPE (DR_REF (dra)),
2449                                    TREE_TYPE (DR_REF (drb))))
2450             break;
2451
2452           /* Sorting has ensured that DR_INIT (dra) <= DR_INIT (drb).  */
2453           HOST_WIDE_INT init_a = TREE_INT_CST_LOW (DR_INIT (dra));
2454           HOST_WIDE_INT init_b = TREE_INT_CST_LOW (DR_INIT (drb));
2455           gcc_assert (init_a < init_b);
2456
2457           /* If init_b == init_a + the size of the type * k, we have an
2458              interleaving, and DRA is accessed before DRB.  */
2459           HOST_WIDE_INT type_size_a = TREE_INT_CST_LOW (sza);
2460           if ((init_b - init_a) % type_size_a != 0)
2461             break;
2462
2463           /* The step (if not zero) is greater than the difference between
2464              data-refs' inits.  This splits groups into suitable sizes.  */
2465           HOST_WIDE_INT step = TREE_INT_CST_LOW (DR_STEP (dra));
2466           if (step != 0 && step <= (init_b - init_a))
2467             break;
2468
2469           if (dump_enabled_p ())
2470             {
2471               dump_printf_loc (MSG_NOTE, vect_location,
2472                                "Detected interleaving ");
2473               dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dra));
2474               dump_printf (MSG_NOTE,  " and ");
2475               dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (drb));
2476             }
2477
2478           /* Link the found element into the group list.  */
2479           if (!GROUP_FIRST_ELEMENT (stmtinfo_a))
2480             {
2481               GROUP_FIRST_ELEMENT (stmtinfo_a) = DR_STMT (dra);
2482               lastinfo = stmtinfo_a;
2483             }
2484           GROUP_FIRST_ELEMENT (stmtinfo_b) = DR_STMT (dra);
2485           GROUP_NEXT_ELEMENT (lastinfo) = DR_STMT (drb);
2486           lastinfo = stmtinfo_b;
2487         }
2488     }
2489
2490   FOR_EACH_VEC_ELT (datarefs, i, dr)
2491     if (STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr)))
2492         && !vect_analyze_data_ref_access (dr))
2493       {
2494         if (dump_enabled_p ())
2495           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2496                            "not vectorized: complicated access pattern.");
2497
2498         if (bb_vinfo)
2499           {
2500             /* Mark the statement as not vectorizable.  */
2501             STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr))) = false;
2502             continue;
2503           }
2504         else
2505           return false;
2506       }
2507
2508   return true;
2509 }
2510
2511 /* Function vect_prune_runtime_alias_test_list.
2512
2513    Prune a list of ddrs to be tested at run-time by versioning for alias.
2514    Return FALSE if resulting list of ddrs is longer then allowed by
2515    PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS, otherwise return TRUE.  */
2516
2517 bool
2518 vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo)
2519 {
2520   vec<ddr_p>  ddrs =
2521     LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo);
2522   unsigned i, j;
2523
2524   if (dump_enabled_p ())
2525     dump_printf_loc (MSG_NOTE, vect_location,
2526                      "=== vect_prune_runtime_alias_test_list ===");
2527
2528   for (i = 0; i < ddrs.length (); )
2529     {
2530       bool found;
2531       ddr_p ddr_i;
2532
2533       ddr_i = ddrs[i];
2534       found = false;
2535
2536       for (j = 0; j < i; j++)
2537         {
2538           ddr_p ddr_j = ddrs[j];
2539
2540           if (vect_vfa_range_equal (ddr_i, ddr_j))
2541             {
2542               if (dump_enabled_p ())
2543                 {
2544                   dump_printf_loc (MSG_NOTE, vect_location,
2545                                    "found equal ranges ");
2546                   dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (DDR_A (ddr_i)));
2547                   dump_printf (MSG_NOTE,  ", ");
2548                   dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (DDR_B (ddr_i)));
2549                   dump_printf (MSG_NOTE,  " and ");
2550                   dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (DDR_A (ddr_j)));
2551                   dump_printf (MSG_NOTE,  ", ");
2552                   dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (DDR_B (ddr_j)));
2553                 }
2554               found = true;
2555               break;
2556             }
2557         }
2558
2559       if (found)
2560       {
2561         ddrs.ordered_remove (i);
2562         continue;
2563       }
2564       i++;
2565     }
2566
2567   if (ddrs.length () >
2568        (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS))
2569     {
2570       if (dump_enabled_p ())
2571         {
2572           dump_printf_loc (MSG_MISSED_OPTIMIZATION,  vect_location,
2573                            "disable versioning for alias - max number of "
2574                            "generated checks exceeded.");
2575         }
2576
2577       LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo).truncate (0);
2578
2579       return false;
2580     }
2581
2582   return true;
2583 }
2584
2585 /* Check whether a non-affine read in stmt is suitable for gather load
2586    and if so, return a builtin decl for that operation.  */
2587
2588 tree
2589 vect_check_gather (gimple stmt, loop_vec_info loop_vinfo, tree *basep,
2590                    tree *offp, int *scalep)
2591 {
2592   HOST_WIDE_INT scale = 1, pbitpos, pbitsize;
2593   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2594   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2595   struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2596   tree offtype = NULL_TREE;
2597   tree decl, base, off;
2598   enum machine_mode pmode;
2599   int punsignedp, pvolatilep;
2600
2601   /* The gather builtins need address of the form
2602      loop_invariant + vector * {1, 2, 4, 8}
2603      or
2604      loop_invariant + sign_extend (vector) * { 1, 2, 4, 8 }.
2605      Unfortunately DR_BASE_ADDRESS/DR_OFFSET can be a mixture
2606      of loop invariants/SSA_NAMEs defined in the loop, with casts,
2607      multiplications and additions in it.  To get a vector, we need
2608      a single SSA_NAME that will be defined in the loop and will
2609      contain everything that is not loop invariant and that can be
2610      vectorized.  The following code attempts to find such a preexistng
2611      SSA_NAME OFF and put the loop invariants into a tree BASE
2612      that can be gimplified before the loop.  */
2613   base = get_inner_reference (DR_REF (dr), &pbitsize, &pbitpos, &off,
2614                               &pmode, &punsignedp, &pvolatilep, false);
2615   gcc_assert (base != NULL_TREE && (pbitpos % BITS_PER_UNIT) == 0);
2616
2617   if (TREE_CODE (base) == MEM_REF)
2618     {
2619       if (!integer_zerop (TREE_OPERAND (base, 1)))
2620         {
2621           if (off == NULL_TREE)
2622             {
2623               double_int moff = mem_ref_offset (base);
2624               off = double_int_to_tree (sizetype, moff);
2625             }
2626           else
2627             off = size_binop (PLUS_EXPR, off,
2628                               fold_convert (sizetype, TREE_OPERAND (base, 1)));
2629         }
2630       base = TREE_OPERAND (base, 0);
2631     }
2632   else
2633     base = build_fold_addr_expr (base);
2634
2635   if (off == NULL_TREE)
2636     off = size_zero_node;
2637
2638   /* If base is not loop invariant, either off is 0, then we start with just
2639      the constant offset in the loop invariant BASE and continue with base
2640      as OFF, otherwise give up.
2641      We could handle that case by gimplifying the addition of base + off
2642      into some SSA_NAME and use that as off, but for now punt.  */
2643   if (!expr_invariant_in_loop_p (loop, base))
2644     {
2645       if (!integer_zerop (off))
2646         return NULL_TREE;
2647       off = base;
2648       base = size_int (pbitpos / BITS_PER_UNIT);
2649     }
2650   /* Otherwise put base + constant offset into the loop invariant BASE
2651      and continue with OFF.  */
2652   else
2653     {
2654       base = fold_convert (sizetype, base);
2655       base = size_binop (PLUS_EXPR, base, size_int (pbitpos / BITS_PER_UNIT));
2656     }
2657
2658   /* OFF at this point may be either a SSA_NAME or some tree expression
2659      from get_inner_reference.  Try to peel off loop invariants from it
2660      into BASE as long as possible.  */
2661   STRIP_NOPS (off);
2662   while (offtype == NULL_TREE)
2663     {
2664       enum tree_code code;
2665       tree op0, op1, add = NULL_TREE;
2666
2667       if (TREE_CODE (off) == SSA_NAME)
2668         {
2669           gimple def_stmt = SSA_NAME_DEF_STMT (off);
2670
2671           if (expr_invariant_in_loop_p (loop, off))
2672             return NULL_TREE;
2673
2674           if (gimple_code (def_stmt) != GIMPLE_ASSIGN)
2675             break;
2676
2677           op0 = gimple_assign_rhs1 (def_stmt);
2678           code = gimple_assign_rhs_code (def_stmt);
2679           op1 = gimple_assign_rhs2 (def_stmt);
2680         }
2681       else
2682         {
2683           if (get_gimple_rhs_class (TREE_CODE (off)) == GIMPLE_TERNARY_RHS)
2684             return NULL_TREE;
2685           code = TREE_CODE (off);
2686           extract_ops_from_tree (off, &code, &op0, &op1);
2687         }
2688       switch (code)
2689         {
2690         case POINTER_PLUS_EXPR:
2691         case PLUS_EXPR:
2692           if (expr_invariant_in_loop_p (loop, op0))
2693             {
2694               add = op0;
2695               off = op1;
2696             do_add:
2697               add = fold_convert (sizetype, add);
2698               if (scale != 1)
2699                 add = size_binop (MULT_EXPR, add, size_int (scale));
2700               base = size_binop (PLUS_EXPR, base, add);
2701               continue;
2702             }
2703           if (expr_invariant_in_loop_p (loop, op1))
2704             {
2705               add = op1;
2706               off = op0;
2707               goto do_add;
2708             }
2709           break;
2710         case MINUS_EXPR:
2711           if (expr_invariant_in_loop_p (loop, op1))
2712             {
2713               add = fold_convert (sizetype, op1);
2714               add = size_binop (MINUS_EXPR, size_zero_node, add);
2715               off = op0;
2716               goto do_add;
2717             }
2718           break;
2719         case MULT_EXPR:
2720           if (scale == 1 && host_integerp (op1, 0))
2721             {
2722               scale = tree_low_cst (op1, 0);
2723               off = op0;
2724               continue;
2725             }
2726           break;
2727         case SSA_NAME:
2728           off = op0;
2729           continue;
2730         CASE_CONVERT:
2731           if (!POINTER_TYPE_P (TREE_TYPE (op0))
2732               && !INTEGRAL_TYPE_P (TREE_TYPE (op0)))
2733             break;
2734           if (TYPE_PRECISION (TREE_TYPE (op0))
2735               == TYPE_PRECISION (TREE_TYPE (off)))
2736             {
2737               off = op0;
2738               continue;
2739             }
2740           if (TYPE_PRECISION (TREE_TYPE (op0))
2741               < TYPE_PRECISION (TREE_TYPE (off)))
2742             {
2743               off = op0;
2744               offtype = TREE_TYPE (off);
2745               STRIP_NOPS (off);
2746               continue;
2747             }
2748           break;
2749         default:
2750           break;
2751         }
2752       break;
2753     }
2754
2755   /* If at the end OFF still isn't a SSA_NAME or isn't
2756      defined in the loop, punt.  */
2757   if (TREE_CODE (off) != SSA_NAME
2758       || expr_invariant_in_loop_p (loop, off))
2759     return NULL_TREE;
2760
2761   if (offtype == NULL_TREE)
2762     offtype = TREE_TYPE (off);
2763
2764   decl = targetm.vectorize.builtin_gather (STMT_VINFO_VECTYPE (stmt_info),
2765                                            offtype, scale);
2766   if (decl == NULL_TREE)
2767     return NULL_TREE;
2768
2769   if (basep)
2770     *basep = base;
2771   if (offp)
2772     *offp = off;
2773   if (scalep)
2774     *scalep = scale;
2775   return decl;
2776 }
2777
2778 /* Check wether a non-affine load in STMT (being in the loop referred to
2779    in LOOP_VINFO) is suitable for handling as strided load.  That is the case
2780    if its address is a simple induction variable.  If so return the base
2781    of that induction variable in *BASEP and the (loop-invariant) step
2782    in *STEPP, both only when that pointer is non-zero.
2783
2784    This handles ARRAY_REFs (with variant index) and MEM_REFs (with variant
2785    base pointer) only.  */
2786
2787 static bool
2788 vect_check_strided_load (gimple stmt, loop_vec_info loop_vinfo)
2789 {
2790   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2791   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2792   struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2793   tree base, off;
2794   affine_iv iv;
2795
2796   if (!DR_IS_READ (dr))
2797     return false;
2798
2799   base = DR_REF (dr);
2800
2801   if (TREE_CODE (base) == ARRAY_REF)
2802     {
2803       off = TREE_OPERAND (base, 1);
2804       base = TREE_OPERAND (base, 0);
2805     }
2806   else if (TREE_CODE (base) == MEM_REF)
2807     {
2808       off = TREE_OPERAND (base, 0);
2809       base = TREE_OPERAND (base, 1);
2810     }
2811   else
2812     return false;
2813
2814   if (TREE_CODE (off) != SSA_NAME)
2815     return false;
2816
2817   if (!expr_invariant_in_loop_p (loop, base)
2818       || !simple_iv (loop, loop_containing_stmt (stmt), off, &iv, true))
2819     return false;
2820
2821   return true;
2822 }
2823
2824 /* Function vect_analyze_data_refs.
2825
2826   Find all the data references in the loop or basic block.
2827
2828    The general structure of the analysis of data refs in the vectorizer is as
2829    follows:
2830    1- vect_analyze_data_refs(loop/bb): call
2831       compute_data_dependences_for_loop/bb to find and analyze all data-refs
2832       in the loop/bb and their dependences.
2833    2- vect_analyze_dependences(): apply dependence testing using ddrs.
2834    3- vect_analyze_drs_alignment(): check that ref_stmt.alignment is ok.
2835    4- vect_analyze_drs_access(): check that ref_stmt.step is ok.
2836
2837 */
2838
2839 bool
2840 vect_analyze_data_refs (loop_vec_info loop_vinfo,
2841                         bb_vec_info bb_vinfo,
2842                         int *min_vf)
2843 {
2844   struct loop *loop = NULL;
2845   basic_block bb = NULL;
2846   unsigned int i;
2847   vec<data_reference_p> datarefs;
2848   struct data_reference *dr;
2849   tree scalar_type;
2850
2851   if (dump_enabled_p ())
2852     dump_printf_loc (MSG_NOTE, vect_location,
2853                      "=== vect_analyze_data_refs ===\n");
2854
2855   if (loop_vinfo)
2856     {
2857       loop = LOOP_VINFO_LOOP (loop_vinfo);
2858       if (!find_loop_nest (loop, &LOOP_VINFO_LOOP_NEST (loop_vinfo))
2859           || find_data_references_in_loop
2860                (loop, &LOOP_VINFO_DATAREFS (loop_vinfo)))
2861         {
2862           if (dump_enabled_p ())
2863             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2864                              "not vectorized: loop contains function calls"
2865                              " or data references that cannot be analyzed");
2866           return false;
2867         }
2868
2869       datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
2870     }
2871   else
2872     {
2873       gimple_stmt_iterator gsi;
2874
2875       bb = BB_VINFO_BB (bb_vinfo);
2876       for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2877         {
2878           gimple stmt = gsi_stmt (gsi);
2879           if (!find_data_references_in_stmt (NULL, stmt,
2880                                              &BB_VINFO_DATAREFS (bb_vinfo)))
2881             {
2882               /* Mark the rest of the basic-block as unvectorizable.  */
2883               for (; !gsi_end_p (gsi); gsi_next (&gsi))
2884                 {
2885                   stmt = gsi_stmt (gsi);
2886                   STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)) = false;
2887                 }
2888               break;
2889             }
2890         }
2891
2892       datarefs = BB_VINFO_DATAREFS (bb_vinfo);
2893     }
2894
2895   /* Go through the data-refs, check that the analysis succeeded.  Update
2896      pointer from stmt_vec_info struct to DR and vectype.  */
2897
2898   FOR_EACH_VEC_ELT (datarefs, i, dr)
2899     {
2900       gimple stmt;
2901       stmt_vec_info stmt_info;
2902       tree base, offset, init;
2903       bool gather = false;
2904       int vf;
2905
2906       if (!dr || !DR_REF (dr))
2907         {
2908           if (dump_enabled_p ())
2909             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2910                              "not vectorized: unhandled data-ref ");
2911           return false;
2912         }
2913
2914       stmt = DR_STMT (dr);
2915       stmt_info = vinfo_for_stmt (stmt);
2916
2917       /* Check that analysis of the data-ref succeeded.  */
2918       if (!DR_BASE_ADDRESS (dr) || !DR_OFFSET (dr) || !DR_INIT (dr)
2919           || !DR_STEP (dr))
2920         {
2921           /* If target supports vector gather loads, see if they can't
2922              be used.  */
2923           if (loop_vinfo
2924               && DR_IS_READ (dr)
2925               && !TREE_THIS_VOLATILE (DR_REF (dr))
2926               && targetm.vectorize.builtin_gather != NULL
2927               && !nested_in_vect_loop_p (loop, stmt))
2928             {
2929               struct data_reference *newdr
2930                 = create_data_ref (NULL, loop_containing_stmt (stmt),
2931                                    DR_REF (dr), stmt, true);
2932               gcc_assert (newdr != NULL && DR_REF (newdr));
2933               if (DR_BASE_ADDRESS (newdr)
2934                   && DR_OFFSET (newdr)
2935                   && DR_INIT (newdr)
2936                   && DR_STEP (newdr)
2937                   && integer_zerop (DR_STEP (newdr)))
2938                 {
2939                   dr = newdr;
2940                   gather = true;
2941                 }
2942               else
2943                 free_data_ref (newdr);
2944             }
2945
2946           if (!gather)
2947             {
2948               if (dump_enabled_p ())
2949                 {
2950                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2951                                    "not vectorized: data ref analysis "
2952                                    "failed ");
2953                   dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
2954                 }
2955
2956               if (bb_vinfo)
2957                 break;
2958
2959               return false;
2960             }
2961         }
2962
2963       if (TREE_CODE (DR_BASE_ADDRESS (dr)) == INTEGER_CST)
2964         {
2965           if (dump_enabled_p ())
2966             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2967                              "not vectorized: base addr of dr is a "
2968                              "constant");
2969
2970           if (bb_vinfo)
2971             break;
2972
2973           if (gather)
2974             free_data_ref (dr);
2975           return false;
2976         }
2977
2978       if (TREE_THIS_VOLATILE (DR_REF (dr)))
2979         {
2980           if (dump_enabled_p ())
2981             {
2982               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2983                                "not vectorized: volatile type ");
2984               dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
2985             }
2986
2987           if (bb_vinfo)
2988             break;
2989
2990           return false;
2991         }
2992
2993       if (stmt_can_throw_internal (stmt))
2994         {
2995           if (dump_enabled_p ())
2996             {
2997               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2998                                "not vectorized: statement can throw an "
2999                                "exception ");
3000               dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
3001             }
3002
3003           if (bb_vinfo)
3004             break;
3005
3006           if (gather)
3007             free_data_ref (dr);
3008           return false;
3009         }
3010
3011       if (TREE_CODE (DR_REF (dr)) == COMPONENT_REF
3012           && DECL_BIT_FIELD (TREE_OPERAND (DR_REF (dr), 1)))
3013         {
3014           if (dump_enabled_p ())
3015             {
3016               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3017                                "not vectorized: statement is bitfield "
3018                                "access ");
3019               dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
3020             }
3021
3022           if (bb_vinfo)
3023             break;
3024
3025           if (gather)
3026             free_data_ref (dr);
3027           return false;
3028         }
3029
3030       base = unshare_expr (DR_BASE_ADDRESS (dr));
3031       offset = unshare_expr (DR_OFFSET (dr));
3032       init = unshare_expr (DR_INIT (dr));
3033
3034       if (is_gimple_call (stmt))
3035         {
3036           if (dump_enabled_p ())
3037             {
3038               dump_printf_loc (MSG_MISSED_OPTIMIZATION,  vect_location,
3039                                "not vectorized: dr in a call ");
3040               dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
3041             }
3042
3043           if (bb_vinfo)
3044             break;
3045
3046           if (gather)
3047             free_data_ref (dr);
3048           return false;
3049         }
3050
3051       /* Update DR field in stmt_vec_info struct.  */
3052
3053       /* If the dataref is in an inner-loop of the loop that is considered for
3054          for vectorization, we also want to analyze the access relative to
3055          the outer-loop (DR contains information only relative to the
3056          inner-most enclosing loop).  We do that by building a reference to the
3057          first location accessed by the inner-loop, and analyze it relative to
3058          the outer-loop.  */
3059       if (loop && nested_in_vect_loop_p (loop, stmt))
3060         {
3061           tree outer_step, outer_base, outer_init;
3062           HOST_WIDE_INT pbitsize, pbitpos;
3063           tree poffset;
3064           enum machine_mode pmode;
3065           int punsignedp, pvolatilep;
3066           affine_iv base_iv, offset_iv;
3067           tree dinit;
3068
3069           /* Build a reference to the first location accessed by the
3070              inner-loop: *(BASE+INIT).  (The first location is actually
3071              BASE+INIT+OFFSET, but we add OFFSET separately later).  */
3072           tree inner_base = build_fold_indirect_ref
3073                                 (fold_build_pointer_plus (base, init));
3074
3075           if (dump_enabled_p ())
3076             {
3077               dump_printf_loc (MSG_NOTE, vect_location,
3078                                "analyze in outer-loop: ");
3079               dump_generic_expr (MSG_NOTE, TDF_SLIM, inner_base);
3080             }
3081
3082           outer_base = get_inner_reference (inner_base, &pbitsize, &pbitpos,
3083                           &poffset, &pmode, &punsignedp, &pvolatilep, false);
3084           gcc_assert (outer_base != NULL_TREE);
3085
3086           if (pbitpos % BITS_PER_UNIT != 0)
3087             {
3088               if (dump_enabled_p ())
3089                 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3090                                  "failed: bit offset alignment.\n");
3091               return false;
3092             }
3093
3094           outer_base = build_fold_addr_expr (outer_base);
3095           if (!simple_iv (loop, loop_containing_stmt (stmt), outer_base,
3096                           &base_iv, false))
3097             {
3098               if (dump_enabled_p ())
3099                 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3100                                  "failed: evolution of base is not affine.\n");
3101               return false;
3102             }
3103
3104           if (offset)
3105             {
3106               if (poffset)
3107                 poffset = fold_build2 (PLUS_EXPR, TREE_TYPE (offset), offset,
3108                                        poffset);
3109               else
3110                 poffset = offset;
3111             }
3112
3113           if (!poffset)
3114             {
3115               offset_iv.base = ssize_int (0);
3116               offset_iv.step = ssize_int (0);
3117             }
3118           else if (!simple_iv (loop, loop_containing_stmt (stmt), poffset,
3119                                &offset_iv, false))
3120             {
3121               if (dump_enabled_p ())
3122                 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3123                                  "evolution of offset is not affine.\n");
3124               return false;
3125             }
3126
3127           outer_init = ssize_int (pbitpos / BITS_PER_UNIT);
3128           split_constant_offset (base_iv.base, &base_iv.base, &dinit);
3129           outer_init =  size_binop (PLUS_EXPR, outer_init, dinit);
3130           split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
3131           outer_init =  size_binop (PLUS_EXPR, outer_init, dinit);
3132
3133           outer_step = size_binop (PLUS_EXPR,
3134                                 fold_convert (ssizetype, base_iv.step),
3135                                 fold_convert (ssizetype, offset_iv.step));
3136
3137           STMT_VINFO_DR_STEP (stmt_info) = outer_step;
3138           /* FIXME: Use canonicalize_base_object_address (base_iv.base); */
3139           STMT_VINFO_DR_BASE_ADDRESS (stmt_info) = base_iv.base;
3140           STMT_VINFO_DR_INIT (stmt_info) = outer_init;
3141           STMT_VINFO_DR_OFFSET (stmt_info) =
3142                                 fold_convert (ssizetype, offset_iv.base);
3143           STMT_VINFO_DR_ALIGNED_TO (stmt_info) =
3144                                 size_int (highest_pow2_factor (offset_iv.base));
3145
3146           if (dump_enabled_p ())
3147             {
3148               dump_printf_loc (MSG_NOTE, vect_location,
3149                                "\touter base_address: ");
3150               dump_generic_expr (MSG_NOTE, TDF_SLIM,
3151                                  STMT_VINFO_DR_BASE_ADDRESS (stmt_info));
3152               dump_printf (MSG_NOTE, "\n\touter offset from base address: ");
3153               dump_generic_expr (MSG_NOTE, TDF_SLIM,
3154                                  STMT_VINFO_DR_OFFSET (stmt_info));
3155               dump_printf (MSG_NOTE,
3156                            "\n\touter constant offset from base address: ");
3157               dump_generic_expr (MSG_NOTE, TDF_SLIM,
3158                                  STMT_VINFO_DR_INIT (stmt_info));
3159               dump_printf (MSG_NOTE, "\n\touter step: ");
3160               dump_generic_expr (MSG_NOTE, TDF_SLIM,
3161                                  STMT_VINFO_DR_STEP (stmt_info));
3162               dump_printf (MSG_NOTE, "\n\touter aligned to: ");
3163               dump_generic_expr (MSG_NOTE, TDF_SLIM,
3164                                  STMT_VINFO_DR_ALIGNED_TO (stmt_info));
3165             }
3166         }
3167
3168       if (STMT_VINFO_DATA_REF (stmt_info))
3169         {
3170           if (dump_enabled_p ())
3171             {
3172               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3173                                "not vectorized: more than one data ref "
3174                                "in stmt: ");
3175               dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
3176             }
3177
3178           if (bb_vinfo)
3179             break;
3180
3181           if (gather)
3182             free_data_ref (dr);
3183           return false;
3184         }
3185
3186       STMT_VINFO_DATA_REF (stmt_info) = dr;
3187
3188       /* Set vectype for STMT.  */
3189       scalar_type = TREE_TYPE (DR_REF (dr));
3190       STMT_VINFO_VECTYPE (stmt_info) =
3191                 get_vectype_for_scalar_type (scalar_type);
3192       if (!STMT_VINFO_VECTYPE (stmt_info))
3193         {
3194           if (dump_enabled_p ())
3195             {
3196               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3197                                "not vectorized: no vectype for stmt: ");
3198               dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
3199               dump_printf (MSG_MISSED_OPTIMIZATION, " scalar_type: ");
3200               dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_DETAILS,
3201                                  scalar_type);
3202             }
3203
3204           if (bb_vinfo)
3205             break;
3206
3207           if (gather)
3208             {
3209               STMT_VINFO_DATA_REF (stmt_info) = NULL;
3210               free_data_ref (dr);
3211             }
3212           return false;
3213         }
3214
3215       /* Adjust the minimal vectorization factor according to the
3216          vector type.  */
3217       vf = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
3218       if (vf > *min_vf)
3219         *min_vf = vf;
3220
3221       if (gather)
3222         {
3223           tree off;
3224
3225           gather = 0 != vect_check_gather (stmt, loop_vinfo, NULL, &off, NULL);
3226           if (gather
3227               && get_vectype_for_scalar_type (TREE_TYPE (off)) == NULL_TREE)
3228             gather = false;
3229           if (!gather)
3230             {
3231               STMT_VINFO_DATA_REF (stmt_info) = NULL;
3232               free_data_ref (dr);
3233               if (dump_enabled_p ())
3234                 {
3235                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3236                                    "not vectorized: not suitable for gather "
3237                                    "load ");
3238                   dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
3239                 }
3240               return false;
3241             }
3242
3243           datarefs[i] = dr;
3244           STMT_VINFO_GATHER_P (stmt_info) = true;
3245         }
3246       else if (loop_vinfo
3247                && TREE_CODE (DR_STEP (dr)) != INTEGER_CST)
3248         {
3249           bool strided_load = false;
3250           if (!nested_in_vect_loop_p (loop, stmt))
3251             strided_load = vect_check_strided_load (stmt, loop_vinfo);
3252           if (!strided_load)
3253             {
3254               if (dump_enabled_p ())
3255                 {
3256                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3257                                    "not vectorized: not suitable for strided "
3258                                    "load ");
3259                   dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
3260                 }
3261               return false;
3262             }
3263           STMT_VINFO_STRIDE_LOAD_P (stmt_info) = true;
3264         }
3265     }
3266
3267   /* If we stopped analysis at the first dataref we could not analyze
3268      when trying to vectorize a basic-block mark the rest of the datarefs
3269      as not vectorizable and truncate the vector of datarefs.  That
3270      avoids spending useless time in analyzing their dependence.  */
3271   if (i != datarefs.length ())
3272     {
3273       gcc_assert (bb_vinfo != NULL);
3274       for (unsigned j = i; j < datarefs.length (); ++j)
3275         {
3276           data_reference_p dr = datarefs[j];
3277           STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr))) = false;
3278           free_data_ref (dr);
3279         }
3280       datarefs.truncate (i);
3281     }
3282
3283   return true;
3284 }
3285
3286
3287 /* Function vect_get_new_vect_var.
3288
3289    Returns a name for a new variable.  The current naming scheme appends the
3290    prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to
3291    the name of vectorizer generated variables, and appends that to NAME if
3292    provided.  */
3293
3294 tree
3295 vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name)
3296 {
3297   const char *prefix;
3298   tree new_vect_var;
3299
3300   switch (var_kind)
3301   {
3302   case vect_simple_var:
3303     prefix = "vect_";
3304     break;
3305   case vect_scalar_var:
3306     prefix = "stmp_";
3307     break;
3308   case vect_pointer_var:
3309     prefix = "vect_p";
3310     break;
3311   default:
3312     gcc_unreachable ();
3313   }
3314
3315   if (name)
3316     {
3317       char* tmp = concat (prefix, name, NULL);
3318       new_vect_var = create_tmp_reg (type, tmp);
3319       free (tmp);
3320     }
3321   else
3322     new_vect_var = create_tmp_reg (type, prefix);
3323
3324   return new_vect_var;
3325 }
3326
3327
3328 /* Function vect_create_addr_base_for_vector_ref.
3329
3330    Create an expression that computes the address of the first memory location
3331    that will be accessed for a data reference.
3332
3333    Input:
3334    STMT: The statement containing the data reference.
3335    NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list.
3336    OFFSET: Optional. If supplied, it is be added to the initial address.
3337    LOOP:    Specify relative to which loop-nest should the address be computed.
3338             For example, when the dataref is in an inner-loop nested in an
3339             outer-loop that is now being vectorized, LOOP can be either the
3340             outer-loop, or the inner-loop.  The first memory location accessed
3341             by the following dataref ('in' points to short):
3342
3343                 for (i=0; i<N; i++)
3344                    for (j=0; j<M; j++)
3345                      s += in[i+j]
3346
3347             is as follows:
3348             if LOOP=i_loop:     &in             (relative to i_loop)
3349             if LOOP=j_loop:     &in+i*2B        (relative to j_loop)
3350
3351    Output:
3352    1. Return an SSA_NAME whose value is the address of the memory location of
3353       the first vector of the data reference.
3354    2. If new_stmt_list is not NULL_TREE after return then the caller must insert
3355       these statement(s) which define the returned SSA_NAME.
3356
3357    FORNOW: We are only handling array accesses with step 1.  */
3358
3359 tree
3360 vect_create_addr_base_for_vector_ref (gimple stmt,
3361                                       gimple_seq *new_stmt_list,
3362                                       tree offset,
3363                                       struct loop *loop)
3364 {
3365   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3366   struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
3367   tree data_ref_base;
3368   const char *base_name;
3369   tree addr_base;
3370   tree dest;
3371   gimple_seq seq = NULL;
3372   tree base_offset;
3373   tree init;
3374   tree vect_ptr_type;
3375   tree step = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr)));
3376   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3377
3378   if (loop_vinfo && loop && loop != (gimple_bb (stmt))->loop_father)
3379     {
3380       struct loop *outer_loop = LOOP_VINFO_LOOP (loop_vinfo);
3381
3382       gcc_assert (nested_in_vect_loop_p (outer_loop, stmt));
3383
3384       data_ref_base = unshare_expr (STMT_VINFO_DR_BASE_ADDRESS (stmt_info));
3385       base_offset = unshare_expr (STMT_VINFO_DR_OFFSET (stmt_info));
3386       init = unshare_expr (STMT_VINFO_DR_INIT (stmt_info));
3387     }
3388   else
3389     {
3390       data_ref_base = unshare_expr (DR_BASE_ADDRESS (dr));
3391       base_offset = unshare_expr (DR_OFFSET (dr));
3392       init = unshare_expr (DR_INIT (dr));
3393     }
3394
3395   if (loop_vinfo)
3396     base_name = get_name (data_ref_base);
3397   else
3398     {
3399       base_offset = ssize_int (0);
3400       init = ssize_int (0);
3401       base_name = get_name (DR_REF (dr));
3402     }
3403
3404   /* Create base_offset */
3405   base_offset = size_binop (PLUS_EXPR,
3406                             fold_convert (sizetype, base_offset),
3407                             fold_convert (sizetype, init));
3408
3409   if (offset)
3410     {
3411       offset = fold_build2 (MULT_EXPR, sizetype,
3412                             fold_convert (sizetype, offset), step);
3413       base_offset = fold_build2 (PLUS_EXPR, sizetype,
3414                                  base_offset, offset);
3415     }
3416
3417   /* base + base_offset */
3418   if (loop_vinfo)
3419     addr_base = fold_build_pointer_plus (data_ref_base, base_offset);
3420   else
3421     {
3422       addr_base = build1 (ADDR_EXPR,
3423                           build_pointer_type (TREE_TYPE (DR_REF (dr))),
3424                           unshare_expr (DR_REF (dr)));
3425     }
3426
3427   vect_ptr_type = build_pointer_type (STMT_VINFO_VECTYPE (stmt_info));
3428   addr_base = fold_convert (vect_ptr_type, addr_base);
3429   dest = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var, base_name);
3430   addr_base = force_gimple_operand (addr_base, &seq, false, dest);
3431   gimple_seq_add_seq (new_stmt_list, seq);
3432
3433   if (DR_PTR_INFO (dr)
3434       && TREE_CODE (addr_base) == SSA_NAME)
3435     {
3436       duplicate_ssa_name_ptr_info (addr_base, DR_PTR_INFO (dr));
3437       if (offset)
3438         mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (addr_base));
3439     }
3440
3441   if (dump_enabled_p ())
3442     {
3443       dump_printf_loc (MSG_NOTE, vect_location, "created ");
3444       dump_generic_expr (MSG_NOTE, TDF_SLIM, addr_base);
3445     }
3446
3447   return addr_base;
3448 }
3449
3450
3451 /* Function vect_create_data_ref_ptr.
3452
3453    Create a new pointer-to-AGGR_TYPE variable (ap), that points to the first
3454    location accessed in the loop by STMT, along with the def-use update
3455    chain to appropriately advance the pointer through the loop iterations.
3456    Also set aliasing information for the pointer.  This pointer is used by
3457    the callers to this function to create a memory reference expression for
3458    vector load/store access.
3459
3460    Input:
3461    1. STMT: a stmt that references memory. Expected to be of the form
3462          GIMPLE_ASSIGN <name, data-ref> or
3463          GIMPLE_ASSIGN <data-ref, name>.
3464    2. AGGR_TYPE: the type of the reference, which should be either a vector
3465         or an array.
3466    3. AT_LOOP: the loop where the vector memref is to be created.
3467    4. OFFSET (optional): an offset to be added to the initial address accessed
3468         by the data-ref in STMT.
3469    5. BSI: location where the new stmts are to be placed if there is no loop
3470    6. ONLY_INIT: indicate if ap is to be updated in the loop, or remain
3471         pointing to the initial address.
3472
3473    Output:
3474    1. Declare a new ptr to vector_type, and have it point to the base of the
3475       data reference (initial addressed accessed by the data reference).
3476       For example, for vector of type V8HI, the following code is generated:
3477
3478       v8hi *ap;
3479       ap = (v8hi *)initial_address;
3480
3481       if OFFSET is not supplied:
3482          initial_address = &a[init];
3483       if OFFSET is supplied:
3484          initial_address = &a[init + OFFSET];
3485
3486       Return the initial_address in INITIAL_ADDRESS.
3487
3488    2. If ONLY_INIT is true, just return the initial pointer.  Otherwise, also
3489       update the pointer in each iteration of the loop.
3490
3491       Return the increment stmt that updates the pointer in PTR_INCR.
3492
3493    3. Set INV_P to true if the access pattern of the data reference in the
3494       vectorized loop is invariant.  Set it to false otherwise.
3495
3496    4. Return the pointer.  */
3497
3498 tree
3499 vect_create_data_ref_ptr (gimple stmt, tree aggr_type, struct loop *at_loop,
3500                           tree offset, tree *initial_address,
3501                           gimple_stmt_iterator *gsi, gimple *ptr_incr,
3502                           bool only_init, bool *inv_p)
3503 {
3504   const char *base_name;
3505   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3506   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3507   struct loop *loop = NULL;
3508   bool nested_in_vect_loop = false;
3509   struct loop *containing_loop = NULL;
3510   tree aggr_ptr_type;
3511   tree aggr_ptr;
3512   tree new_temp;
3513   gimple vec_stmt;
3514   gimple_seq new_stmt_list = NULL;
3515   edge pe = NULL;
3516   basic_block new_bb;
3517   tree aggr_ptr_init;
3518   struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
3519   tree aptr;
3520   gimple_stmt_iterator incr_gsi;
3521   bool insert_after;
3522   bool negative;
3523   tree indx_before_incr, indx_after_incr;
3524   gimple incr;
3525   tree step;
3526   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3527
3528   gcc_assert (TREE_CODE (aggr_type) == ARRAY_TYPE
3529               || TREE_CODE (aggr_type) == VECTOR_TYPE);
3530
3531   if (loop_vinfo)
3532     {
3533       loop = LOOP_VINFO_LOOP (loop_vinfo);
3534       nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
3535       containing_loop = (gimple_bb (stmt))->loop_father;
3536       pe = loop_preheader_edge (loop);
3537     }
3538   else
3539     {
3540       gcc_assert (bb_vinfo);
3541       only_init = true;
3542       *ptr_incr = NULL;
3543     }
3544
3545   /* Check the step (evolution) of the load in LOOP, and record
3546      whether it's invariant.  */
3547   if (nested_in_vect_loop)
3548     step = STMT_VINFO_DR_STEP (stmt_info);
3549   else
3550     step = DR_STEP (STMT_VINFO_DATA_REF (stmt_info));
3551
3552   if (tree_int_cst_compare (step, size_zero_node) == 0)
3553     *inv_p = true;
3554   else
3555     *inv_p = false;
3556   negative = tree_int_cst_compare (step, size_zero_node) < 0;
3557
3558   /* Create an expression for the first address accessed by this load
3559      in LOOP.  */
3560   base_name = get_name (DR_BASE_ADDRESS (dr));
3561
3562   if (dump_enabled_p ())
3563     {
3564       tree dr_base_type = TREE_TYPE (DR_BASE_OBJECT (dr));
3565       dump_printf_loc (MSG_NOTE, vect_location,
3566                        "create %s-pointer variable to type: ",
3567                        tree_code_name[(int) TREE_CODE (aggr_type)]);
3568       dump_generic_expr (MSG_NOTE, TDF_SLIM, aggr_type);
3569       if (TREE_CODE (dr_base_type) == ARRAY_TYPE)
3570         dump_printf (MSG_NOTE, "  vectorizing an array ref: ");
3571       else if (TREE_CODE (dr_base_type) == RECORD_TYPE)
3572         dump_printf (MSG_NOTE, "  vectorizing a record based array ref: ");
3573       else
3574         dump_printf (MSG_NOTE, "  vectorizing a pointer ref: ");
3575       dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_BASE_OBJECT (dr));
3576     }
3577
3578   /* (1) Create the new aggregate-pointer variable.
3579      Vector and array types inherit the alias set of their component
3580      type by default so we need to use a ref-all pointer if the data
3581      reference does not conflict with the created aggregated data
3582      reference because it is not addressable.  */
3583   bool need_ref_all = false;
3584   if (!alias_sets_conflict_p (get_alias_set (aggr_type),
3585                               get_alias_set (DR_REF (dr))))
3586     need_ref_all = true;
3587   /* Likewise for any of the data references in the stmt group.  */
3588   else if (STMT_VINFO_GROUP_SIZE (stmt_info) > 1)
3589     {
3590       gimple orig_stmt = STMT_VINFO_GROUP_FIRST_ELEMENT (stmt_info);
3591       do
3592         {
3593           stmt_vec_info sinfo = vinfo_for_stmt (orig_stmt);
3594           struct data_reference *sdr = STMT_VINFO_DATA_REF (sinfo);
3595           if (!alias_sets_conflict_p (get_alias_set (aggr_type),
3596                                       get_alias_set (DR_REF (sdr))))
3597             {
3598               need_ref_all = true;
3599               break;
3600             }
3601           orig_stmt = STMT_VINFO_GROUP_NEXT_ELEMENT (sinfo);
3602         }
3603       while (orig_stmt);
3604     }
3605   aggr_ptr_type = build_pointer_type_for_mode (aggr_type, ptr_mode,
3606                                                need_ref_all);
3607   aggr_ptr = vect_get_new_vect_var (aggr_ptr_type, vect_pointer_var, base_name);
3608
3609
3610   /* Note: If the dataref is in an inner-loop nested in LOOP, and we are
3611      vectorizing LOOP (i.e., outer-loop vectorization), we need to create two
3612      def-use update cycles for the pointer: one relative to the outer-loop
3613      (LOOP), which is what steps (3) and (4) below do.  The other is relative
3614      to the inner-loop (which is the inner-most loop containing the dataref),
3615      and this is done be step (5) below.
3616
3617      When vectorizing inner-most loops, the vectorized loop (LOOP) is also the
3618      inner-most loop, and so steps (3),(4) work the same, and step (5) is
3619      redundant.  Steps (3),(4) create the following:
3620
3621         vp0 = &base_addr;
3622         LOOP:   vp1 = phi(vp0,vp2)
3623                 ...
3624                 ...
3625                 vp2 = vp1 + step
3626                 goto LOOP
3627
3628      If there is an inner-loop nested in loop, then step (5) will also be
3629      applied, and an additional update in the inner-loop will be created:
3630
3631         vp0 = &base_addr;
3632         LOOP:   vp1 = phi(vp0,vp2)
3633                 ...
3634         inner:     vp3 = phi(vp1,vp4)
3635                    vp4 = vp3 + inner_step
3636                    if () goto inner
3637                 ...
3638                 vp2 = vp1 + step
3639                 if () goto LOOP   */
3640
3641   /* (2) Calculate the initial address of the aggregate-pointer, and set
3642      the aggregate-pointer to point to it before the loop.  */
3643
3644   /* Create: (&(base[init_val+offset]) in the loop preheader.  */
3645
3646   new_temp = vect_create_addr_base_for_vector_ref (stmt, &new_stmt_list,
3647                                                    offset, loop);
3648   if (new_stmt_list)
3649     {
3650       if (pe)
3651         {
3652           new_bb = gsi_insert_seq_on_edge_immediate (pe, new_stmt_list);
3653           gcc_assert (!new_bb);
3654         }
3655       else
3656         gsi_insert_seq_before (gsi, new_stmt_list, GSI_SAME_STMT);
3657     }
3658
3659   *initial_address = new_temp;
3660
3661   /* Create: p = (aggr_type *) initial_base  */
3662   if (TREE_CODE (new_temp) != SSA_NAME
3663       || !useless_type_conversion_p (aggr_ptr_type, TREE_TYPE (new_temp)))
3664     {
3665       vec_stmt = gimple_build_assign (aggr_ptr,
3666                                       fold_convert (aggr_ptr_type, new_temp));
3667       aggr_ptr_init = make_ssa_name (aggr_ptr, vec_stmt);
3668       /* Copy the points-to information if it exists. */
3669       if (DR_PTR_INFO (dr))
3670         duplicate_ssa_name_ptr_info (aggr_ptr_init, DR_PTR_INFO (dr));
3671       gimple_assign_set_lhs (vec_stmt, aggr_ptr_init);
3672       if (pe)
3673         {
3674           new_bb = gsi_insert_on_edge_immediate (pe, vec_stmt);
3675           gcc_assert (!new_bb);
3676         }
3677       else
3678         gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
3679     }
3680   else
3681     aggr_ptr_init = new_temp;
3682
3683   /* (3) Handle the updating of the aggregate-pointer inside the loop.
3684      This is needed when ONLY_INIT is false, and also when AT_LOOP is the
3685      inner-loop nested in LOOP (during outer-loop vectorization).  */
3686
3687   /* No update in loop is required.  */
3688   if (only_init && (!loop_vinfo || at_loop == loop))
3689     aptr = aggr_ptr_init;
3690   else
3691     {
3692       /* The step of the aggregate pointer is the type size.  */
3693       tree step = TYPE_SIZE_UNIT (aggr_type);
3694       /* One exception to the above is when the scalar step of the load in
3695          LOOP is zero. In this case the step here is also zero.  */
3696       if (*inv_p)
3697         step = size_zero_node;
3698       else if (negative)
3699         step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
3700
3701       standard_iv_increment_position (loop, &incr_gsi, &insert_after);
3702
3703       create_iv (aggr_ptr_init,
3704                  fold_convert (aggr_ptr_type, step),
3705                  aggr_ptr, loop, &incr_gsi, insert_after,
3706                  &indx_before_incr, &indx_after_incr);
3707       incr = gsi_stmt (incr_gsi);
3708       set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
3709
3710       /* Copy the points-to information if it exists. */
3711       if (DR_PTR_INFO (dr))
3712         {
3713           duplicate_ssa_name_ptr_info (indx_before_incr, DR_PTR_INFO (dr));
3714           duplicate_ssa_name_ptr_info (indx_after_incr, DR_PTR_INFO (dr));
3715         }
3716       if (ptr_incr)
3717         *ptr_incr = incr;
3718
3719       aptr = indx_before_incr;
3720     }
3721
3722   if (!nested_in_vect_loop || only_init)
3723     return aptr;
3724
3725
3726   /* (4) Handle the updating of the aggregate-pointer inside the inner-loop
3727      nested in LOOP, if exists.  */
3728
3729   gcc_assert (nested_in_vect_loop);
3730   if (!only_init)
3731     {
3732       standard_iv_increment_position (containing_loop, &incr_gsi,
3733                                       &insert_after);
3734       create_iv (aptr, fold_convert (aggr_ptr_type, DR_STEP (dr)), aggr_ptr,
3735                  containing_loop, &incr_gsi, insert_after, &indx_before_incr,
3736                  &indx_after_incr);
3737       incr = gsi_stmt (incr_gsi);
3738       set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
3739
3740       /* Copy the points-to information if it exists. */
3741       if (DR_PTR_INFO (dr))
3742         {
3743           duplicate_ssa_name_ptr_info (indx_before_incr, DR_PTR_INFO (dr));
3744           duplicate_ssa_name_ptr_info (indx_after_incr, DR_PTR_INFO (dr));
3745         }
3746       if (ptr_incr)
3747         *ptr_incr = incr;
3748
3749       return indx_before_incr;
3750     }
3751   else
3752     gcc_unreachable ();
3753 }
3754
3755
3756 /* Function bump_vector_ptr
3757
3758    Increment a pointer (to a vector type) by vector-size. If requested,
3759    i.e. if PTR-INCR is given, then also connect the new increment stmt
3760    to the existing def-use update-chain of the pointer, by modifying
3761    the PTR_INCR as illustrated below:
3762
3763    The pointer def-use update-chain before this function:
3764                         DATAREF_PTR = phi (p_0, p_2)
3765                         ....
3766         PTR_INCR:       p_2 = DATAREF_PTR + step
3767
3768    The pointer def-use update-chain after this function:
3769                         DATAREF_PTR = phi (p_0, p_2)
3770                         ....
3771                         NEW_DATAREF_PTR = DATAREF_PTR + BUMP
3772                         ....
3773         PTR_INCR:       p_2 = NEW_DATAREF_PTR + step
3774
3775    Input:
3776    DATAREF_PTR - ssa_name of a pointer (to vector type) that is being updated
3777                  in the loop.
3778    PTR_INCR - optional. The stmt that updates the pointer in each iteration of
3779               the loop.  The increment amount across iterations is expected
3780               to be vector_size.
3781    BSI - location where the new update stmt is to be placed.
3782    STMT - the original scalar memory-access stmt that is being vectorized.
3783    BUMP - optional. The offset by which to bump the pointer. If not given,
3784           the offset is assumed to be vector_size.
3785
3786    Output: Return NEW_DATAREF_PTR as illustrated above.
3787
3788 */
3789
3790 tree
3791 bump_vector_ptr (tree dataref_ptr, gimple ptr_incr, gimple_stmt_iterator *gsi,
3792                  gimple stmt, tree bump)
3793 {
3794   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3795   struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
3796   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3797   tree update = TYPE_SIZE_UNIT (vectype);
3798   gimple incr_stmt;
3799   ssa_op_iter iter;
3800   use_operand_p use_p;
3801   tree new_dataref_ptr;
3802
3803   if (bump)
3804     update = bump;
3805
3806   new_dataref_ptr = copy_ssa_name (dataref_ptr, NULL);
3807   incr_stmt = gimple_build_assign_with_ops (POINTER_PLUS_EXPR, new_dataref_ptr,
3808                                             dataref_ptr, update);
3809   vect_finish_stmt_generation (stmt, incr_stmt, gsi);
3810
3811   /* Copy the points-to information if it exists. */
3812   if (DR_PTR_INFO (dr))
3813     {
3814       duplicate_ssa_name_ptr_info (new_dataref_ptr, DR_PTR_INFO (dr));
3815       mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (new_dataref_ptr));
3816     }
3817
3818   if (!ptr_incr)
3819     return new_dataref_ptr;
3820
3821   /* Update the vector-pointer's cross-iteration increment.  */
3822   FOR_EACH_SSA_USE_OPERAND (use_p, ptr_incr, iter, SSA_OP_USE)
3823     {
3824       tree use = USE_FROM_PTR (use_p);
3825
3826       if (use == dataref_ptr)
3827         SET_USE (use_p, new_dataref_ptr);
3828       else
3829         gcc_assert (tree_int_cst_compare (use, update) == 0);
3830     }
3831
3832   return new_dataref_ptr;
3833 }
3834
3835
3836 /* Function vect_create_destination_var.
3837
3838    Create a new temporary of type VECTYPE.  */
3839
3840 tree
3841 vect_create_destination_var (tree scalar_dest, tree vectype)
3842 {
3843   tree vec_dest;
3844   const char *new_name;
3845   tree type;
3846   enum vect_var_kind kind;
3847
3848   kind = vectype ? vect_simple_var : vect_scalar_var;
3849   type = vectype ? vectype : TREE_TYPE (scalar_dest);
3850
3851   gcc_assert (TREE_CODE (scalar_dest) == SSA_NAME);
3852
3853   new_name = get_name (scalar_dest);
3854   if (!new_name)
3855     new_name = "var_";
3856   vec_dest = vect_get_new_vect_var (type, kind, new_name);
3857
3858   return vec_dest;
3859 }
3860
3861 /* Function vect_grouped_store_supported.
3862
3863    Returns TRUE if interleave high and interleave low permutations
3864    are supported, and FALSE otherwise.  */
3865
3866 bool
3867 vect_grouped_store_supported (tree vectype, unsigned HOST_WIDE_INT count)
3868 {
3869   enum machine_mode mode = TYPE_MODE (vectype);
3870
3871   /* vect_permute_store_chain requires the group size to be a power of two.  */
3872   if (exact_log2 (count) == -1)
3873     {
3874       if (dump_enabled_p ())
3875         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3876                          "the size of the group of accesses"
3877                          " is not a power of 2");
3878       return false;
3879     }
3880
3881   /* Check that the permutation is supported.  */
3882   if (VECTOR_MODE_P (mode))
3883     {
3884       unsigned int i, nelt = GET_MODE_NUNITS (mode);
3885       unsigned char *sel = XALLOCAVEC (unsigned char, nelt);
3886       for (i = 0; i < nelt / 2; i++)
3887         {
3888           sel[i * 2] = i;
3889           sel[i * 2 + 1] = i + nelt;
3890         }
3891       if (can_vec_perm_p (mode, false, sel))
3892         {
3893           for (i = 0; i < nelt; i++)
3894             sel[i] += nelt / 2;
3895           if (can_vec_perm_p (mode, false, sel))
3896             return true;
3897         }
3898     }
3899
3900   if (dump_enabled_p ())
3901     dump_printf (MSG_MISSED_OPTIMIZATION,
3902                  "interleave op not supported by target.");
3903   return false;
3904 }
3905
3906
3907 /* Return TRUE if vec_store_lanes is available for COUNT vectors of
3908    type VECTYPE.  */
3909
3910 bool
3911 vect_store_lanes_supported (tree vectype, unsigned HOST_WIDE_INT count)
3912 {
3913   return vect_lanes_optab_supported_p ("vec_store_lanes",
3914                                        vec_store_lanes_optab,
3915                                        vectype, count);
3916 }
3917
3918
3919 /* Function vect_permute_store_chain.
3920
3921    Given a chain of interleaved stores in DR_CHAIN of LENGTH that must be
3922    a power of 2, generate interleave_high/low stmts to reorder the data
3923    correctly for the stores.  Return the final references for stores in
3924    RESULT_CHAIN.
3925
3926    E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
3927    The input is 4 vectors each containing 8 elements.  We assign a number to
3928    each element, the input sequence is:
3929
3930    1st vec:   0  1  2  3  4  5  6  7
3931    2nd vec:   8  9 10 11 12 13 14 15
3932    3rd vec:  16 17 18 19 20 21 22 23
3933    4th vec:  24 25 26 27 28 29 30 31
3934
3935    The output sequence should be:
3936
3937    1st vec:  0  8 16 24  1  9 17 25
3938    2nd vec:  2 10 18 26  3 11 19 27
3939    3rd vec:  4 12 20 28  5 13 21 30
3940    4th vec:  6 14 22 30  7 15 23 31
3941
3942    i.e., we interleave the contents of the four vectors in their order.
3943
3944    We use interleave_high/low instructions to create such output.  The input of
3945    each interleave_high/low operation is two vectors:
3946    1st vec    2nd vec
3947    0 1 2 3    4 5 6 7
3948    the even elements of the result vector are obtained left-to-right from the
3949    high/low elements of the first vector.  The odd elements of the result are
3950    obtained left-to-right from the high/low elements of the second vector.
3951    The output of interleave_high will be:   0 4 1 5
3952    and of interleave_low:                   2 6 3 7
3953
3954
3955    The permutation is done in log LENGTH stages.  In each stage interleave_high
3956    and interleave_low stmts are created for each pair of vectors in DR_CHAIN,
3957    where the first argument is taken from the first half of DR_CHAIN and the
3958    second argument from it's second half.
3959    In our example,
3960
3961    I1: interleave_high (1st vec, 3rd vec)
3962    I2: interleave_low (1st vec, 3rd vec)
3963    I3: interleave_high (2nd vec, 4th vec)
3964    I4: interleave_low (2nd vec, 4th vec)
3965
3966    The output for the first stage is:
3967
3968    I1:  0 16  1 17  2 18  3 19
3969    I2:  4 20  5 21  6 22  7 23
3970    I3:  8 24  9 25 10 26 11 27
3971    I4: 12 28 13 29 14 30 15 31
3972
3973    The output of the second stage, i.e. the final result is:
3974
3975    I1:  0  8 16 24  1  9 17 25
3976    I2:  2 10 18 26  3 11 19 27
3977    I3:  4 12 20 28  5 13 21 30
3978    I4:  6 14 22 30  7 15 23 31.  */
3979
3980 void
3981 vect_permute_store_chain (vec<tree> dr_chain,
3982                           unsigned int length,
3983                           gimple stmt,
3984                           gimple_stmt_iterator *gsi,
3985                           vec<tree> *result_chain)
3986 {
3987   tree vect1, vect2, high, low;
3988   gimple perm_stmt;
3989   tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
3990   tree perm_mask_low, perm_mask_high;
3991   unsigned int i, n;
3992   unsigned int j, nelt = TYPE_VECTOR_SUBPARTS (vectype);
3993   unsigned char *sel = XALLOCAVEC (unsigned char, nelt);
3994
3995   result_chain->quick_grow (length);
3996   memcpy (result_chain->address (), dr_chain.address (),
3997           length * sizeof (tree));
3998
3999   for (i = 0, n = nelt / 2; i < n; i++)
4000     {
4001       sel[i * 2] = i;
4002       sel[i * 2 + 1] = i + nelt;
4003     }
4004   perm_mask_high = vect_gen_perm_mask (vectype, sel);
4005   gcc_assert (perm_mask_high != NULL);
4006
4007   for (i = 0; i < nelt; i++)
4008     sel[i] += nelt / 2;
4009   perm_mask_low = vect_gen_perm_mask (vectype, sel);
4010   gcc_assert (perm_mask_low != NULL);
4011
4012   for (i = 0, n = exact_log2 (length); i < n; i++)
4013     {
4014       for (j = 0; j < length/2; j++)
4015         {
4016           vect1 = dr_chain[j];
4017           vect2 = dr_chain[j+length/2];
4018
4019           /* Create interleaving stmt:
4020              high = VEC_PERM_EXPR <vect1, vect2, {0, nelt, 1, nelt+1, ...}>  */
4021           high = make_temp_ssa_name (vectype, NULL, "vect_inter_high");
4022           perm_stmt
4023             = gimple_build_assign_with_ops (VEC_PERM_EXPR, high,
4024                                             vect1, vect2, perm_mask_high);
4025           vect_finish_stmt_generation (stmt, perm_stmt, gsi);
4026           (*result_chain)[2*j] = high;
4027
4028           /* Create interleaving stmt:
4029              low = VEC_PERM_EXPR <vect1, vect2, {nelt/2, nelt*3/2, nelt/2+1,
4030                                                  nelt*3/2+1, ...}>  */
4031           low = make_temp_ssa_name (vectype, NULL, "vect_inter_low");
4032           perm_stmt
4033             = gimple_build_assign_with_ops (VEC_PERM_EXPR, low,
4034                                             vect1, vect2, perm_mask_low);
4035           vect_finish_stmt_generation (stmt, perm_stmt, gsi);
4036           (*result_chain)[2*j+1] = low;
4037         }
4038       memcpy (dr_chain.address (), result_chain->address (),
4039               length * sizeof (tree));
4040     }
4041 }
4042
4043 /* Function vect_setup_realignment
4044
4045    This function is called when vectorizing an unaligned load using
4046    the dr_explicit_realign[_optimized] scheme.
4047    This function generates the following code at the loop prolog:
4048
4049       p = initial_addr;
4050    x  msq_init = *(floor(p));   # prolog load
4051       realignment_token = call target_builtin;
4052     loop:
4053    x  msq = phi (msq_init, ---)
4054
4055    The stmts marked with x are generated only for the case of
4056    dr_explicit_realign_optimized.
4057
4058    The code above sets up a new (vector) pointer, pointing to the first
4059    location accessed by STMT, and a "floor-aligned" load using that pointer.
4060    It also generates code to compute the "realignment-token" (if the relevant
4061    target hook was defined), and creates a phi-node at the loop-header bb
4062    whose arguments are the result of the prolog-load (created by this
4063    function) and the result of a load that takes place in the loop (to be
4064    created by the caller to this function).
4065
4066    For the case of dr_explicit_realign_optimized:
4067    The caller to this function uses the phi-result (msq) to create the
4068    realignment code inside the loop, and sets up the missing phi argument,
4069    as follows:
4070     loop:
4071       msq = phi (msq_init, lsq)
4072       lsq = *(floor(p'));        # load in loop
4073       result = realign_load (msq, lsq, realignment_token);
4074
4075    For the case of dr_explicit_realign:
4076     loop:
4077       msq = *(floor(p));        # load in loop
4078       p' = p + (VS-1);
4079       lsq = *(floor(p'));       # load in loop
4080       result = realign_load (msq, lsq, realignment_token);
4081
4082    Input:
4083    STMT - (scalar) load stmt to be vectorized. This load accesses
4084           a memory location that may be unaligned.
4085    BSI - place where new code is to be inserted.
4086    ALIGNMENT_SUPPORT_SCHEME - which of the two misalignment handling schemes
4087                               is used.
4088
4089    Output:
4090    REALIGNMENT_TOKEN - the result of a call to the builtin_mask_for_load
4091                        target hook, if defined.
4092    Return value - the result of the loop-header phi node.  */
4093
4094 tree
4095 vect_setup_realignment (gimple stmt, gimple_stmt_iterator *gsi,
4096                         tree *realignment_token,
4097                         enum dr_alignment_support alignment_support_scheme,
4098                         tree init_addr,
4099                         struct loop **at_loop)
4100 {
4101   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4102   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4103   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4104   struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
4105   struct loop *loop = NULL;
4106   edge pe = NULL;
4107   tree scalar_dest = gimple_assign_lhs (stmt);
4108   tree vec_dest;
4109   gimple inc;
4110   tree ptr;
4111   tree data_ref;
4112   gimple new_stmt;
4113   basic_block new_bb;
4114   tree msq_init = NULL_TREE;
4115   tree new_temp;
4116   gimple phi_stmt;
4117   tree msq = NULL_TREE;
4118   gimple_seq stmts = NULL;
4119   bool inv_p;
4120   bool compute_in_loop = false;
4121   bool nested_in_vect_loop = false;
4122   struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
4123   struct loop *loop_for_initial_load = NULL;
4124
4125   if (loop_vinfo)
4126     {
4127       loop = LOOP_VINFO_LOOP (loop_vinfo);
4128       nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
4129     }
4130
4131   gcc_assert (alignment_support_scheme == dr_explicit_realign
4132               || alignment_support_scheme == dr_explicit_realign_optimized);
4133
4134   /* We need to generate three things:
4135      1. the misalignment computation
4136      2. the extra vector load (for the optimized realignment scheme).
4137      3. the phi node for the two vectors from which the realignment is
4138       done (for the optimized realignment scheme).  */
4139
4140   /* 1. Determine where to generate the misalignment computation.
4141
4142      If INIT_ADDR is NULL_TREE, this indicates that the misalignment
4143      calculation will be generated by this function, outside the loop (in the
4144      preheader).  Otherwise, INIT_ADDR had already been computed for us by the
4145      caller, inside the loop.
4146
4147      Background: If the misalignment remains fixed throughout the iterations of
4148      the loop, then both realignment schemes are applicable, and also the
4149      misalignment computation can be done outside LOOP.  This is because we are
4150      vectorizing LOOP, and so the memory accesses in LOOP advance in steps that
4151      are a multiple of VS (the Vector Size), and therefore the misalignment in
4152      different vectorized LOOP iterations is always the same.
4153      The problem arises only if the memory access is in an inner-loop nested
4154      inside LOOP, which is now being vectorized using outer-loop vectorization.
4155      This is the only case when the misalignment of the memory access may not
4156      remain fixed throughout the iterations of the inner-loop (as explained in
4157      detail in vect_supportable_dr_alignment).  In this case, not only is the
4158      optimized realignment scheme not applicable, but also the misalignment
4159      computation (and generation of the realignment token that is passed to
4160      REALIGN_LOAD) have to be done inside the loop.
4161
4162      In short, INIT_ADDR indicates whether we are in a COMPUTE_IN_LOOP mode
4163      or not, which in turn determines if the misalignment is computed inside
4164      the inner-loop, or outside LOOP.  */
4165
4166   if (init_addr != NULL_TREE || !loop_vinfo)
4167     {
4168       compute_in_loop = true;
4169       gcc_assert (alignment_support_scheme == dr_explicit_realign);
4170     }
4171
4172
4173   /* 2. Determine where to generate the extra vector load.
4174
4175      For the optimized realignment scheme, instead of generating two vector
4176      loads in each iteration, we generate a single extra vector load in the
4177      preheader of the loop, and in each iteration reuse the result of the
4178      vector load from the previous iteration.  In case the memory access is in
4179      an inner-loop nested inside LOOP, which is now being vectorized using
4180      outer-loop vectorization, we need to determine whether this initial vector
4181      load should be generated at the preheader of the inner-loop, or can be
4182      generated at the preheader of LOOP.  If the memory access has no evolution
4183      in LOOP, it can be generated in the preheader of LOOP. Otherwise, it has
4184      to be generated inside LOOP (in the preheader of the inner-loop).  */
4185
4186   if (nested_in_vect_loop)
4187     {
4188       tree outerloop_step = STMT_VINFO_DR_STEP (stmt_info);
4189       bool invariant_in_outerloop =
4190             (tree_int_cst_compare (outerloop_step, size_zero_node) == 0);
4191       loop_for_initial_load = (invariant_in_outerloop ? loop : loop->inner);
4192     }
4193   else
4194     loop_for_initial_load = loop;
4195   if (at_loop)
4196     *at_loop = loop_for_initial_load;
4197
4198   if (loop_for_initial_load)
4199     pe = loop_preheader_edge (loop_for_initial_load);
4200
4201   /* 3. For the case of the optimized realignment, create the first vector
4202       load at the loop preheader.  */
4203
4204   if (alignment_support_scheme == dr_explicit_realign_optimized)
4205     {
4206       /* Create msq_init = *(floor(p1)) in the loop preheader  */
4207
4208       gcc_assert (!compute_in_loop);
4209       vec_dest = vect_create_destination_var (scalar_dest, vectype);
4210       ptr = vect_create_data_ref_ptr (stmt, vectype, loop_for_initial_load,
4211                                       NULL_TREE, &init_addr, NULL, &inc,
4212                                       true, &inv_p);
4213       new_temp = copy_ssa_name (ptr, NULL);
4214       new_stmt = gimple_build_assign_with_ops
4215                    (BIT_AND_EXPR, new_temp, ptr,
4216                     build_int_cst (TREE_TYPE (ptr),
4217                                    -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
4218       new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
4219       gcc_assert (!new_bb);
4220       data_ref
4221         = build2 (MEM_REF, TREE_TYPE (vec_dest), new_temp,
4222                   build_int_cst (reference_alias_ptr_type (DR_REF (dr)), 0));
4223       new_stmt = gimple_build_assign (vec_dest, data_ref);
4224       new_temp = make_ssa_name (vec_dest, new_stmt);
4225       gimple_assign_set_lhs (new_stmt, new_temp);
4226       if (pe)
4227         {
4228           new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
4229           gcc_assert (!new_bb);
4230         }
4231       else
4232          gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
4233
4234       msq_init = gimple_assign_lhs (new_stmt);
4235     }
4236
4237   /* 4. Create realignment token using a target builtin, if available.
4238       It is done either inside the containing loop, or before LOOP (as
4239       determined above).  */
4240
4241   if (targetm.vectorize.builtin_mask_for_load)
4242     {
4243       tree builtin_decl;
4244
4245       /* Compute INIT_ADDR - the initial addressed accessed by this memref.  */
4246       if (!init_addr)
4247         {
4248           /* Generate the INIT_ADDR computation outside LOOP.  */
4249           init_addr = vect_create_addr_base_for_vector_ref (stmt, &stmts,
4250                                                         NULL_TREE, loop);
4251           if (loop)
4252             {
4253               pe = loop_preheader_edge (loop);
4254               new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
4255               gcc_assert (!new_bb);
4256             }
4257           else
4258              gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
4259         }
4260
4261       builtin_decl = targetm.vectorize.builtin_mask_for_load ();
4262       new_stmt = gimple_build_call (builtin_decl, 1, init_addr);
4263       vec_dest =
4264         vect_create_destination_var (scalar_dest,
4265                                      gimple_call_return_type (new_stmt));
4266       new_temp = make_ssa_name (vec_dest, new_stmt);
4267       gimple_call_set_lhs (new_stmt, new_temp);
4268
4269       if (compute_in_loop)
4270         gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
4271       else
4272         {
4273           /* Generate the misalignment computation outside LOOP.  */
4274           pe = loop_preheader_edge (loop);
4275           new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
4276           gcc_assert (!new_bb);
4277         }
4278
4279       *realignment_token = gimple_call_lhs (new_stmt);
4280
4281       /* The result of the CALL_EXPR to this builtin is determined from
4282          the value of the parameter and no global variables are touched
4283          which makes the builtin a "const" function.  Requiring the
4284          builtin to have the "const" attribute makes it unnecessary
4285          to call mark_call_clobbered.  */
4286       gcc_assert (TREE_READONLY (builtin_decl));
4287     }
4288
4289   if (alignment_support_scheme == dr_explicit_realign)
4290     return msq;
4291
4292   gcc_assert (!compute_in_loop);
4293   gcc_assert (alignment_support_scheme == dr_explicit_realign_optimized);
4294
4295
4296   /* 5. Create msq = phi <msq_init, lsq> in loop  */
4297
4298   pe = loop_preheader_edge (containing_loop);
4299   vec_dest = vect_create_destination_var (scalar_dest, vectype);
4300   msq = make_ssa_name (vec_dest, NULL);
4301   phi_stmt = create_phi_node (msq, containing_loop->header);
4302   add_phi_arg (phi_stmt, msq_init, pe, UNKNOWN_LOCATION);
4303
4304   return msq;
4305 }
4306
4307
4308 /* Function vect_grouped_load_supported.
4309
4310    Returns TRUE if even and odd permutations are supported,
4311    and FALSE otherwise.  */
4312
4313 bool
4314 vect_grouped_load_supported (tree vectype, unsigned HOST_WIDE_INT count)
4315 {
4316   enum machine_mode mode = TYPE_MODE (vectype);
4317
4318   /* vect_permute_load_chain requires the group size to be a power of two.  */
4319   if (exact_log2 (count) == -1)
4320     {
4321       if (dump_enabled_p ())
4322         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4323                          "the size of the group of accesses"
4324                          " is not a power of 2");
4325       return false;
4326     }
4327
4328   /* Check that the permutation is supported.  */
4329   if (VECTOR_MODE_P (mode))
4330     {
4331       unsigned int i, nelt = GET_MODE_NUNITS (mode);
4332       unsigned char *sel = XALLOCAVEC (unsigned char, nelt);
4333
4334       for (i = 0; i < nelt; i++)
4335         sel[i] = i * 2;
4336       if (can_vec_perm_p (mode, false, sel))
4337         {
4338           for (i = 0; i < nelt; i++)
4339             sel[i] = i * 2 + 1;
4340           if (can_vec_perm_p (mode, false, sel))
4341             return true;
4342         }
4343     }
4344
4345   if (dump_enabled_p ())
4346     dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4347                      "extract even/odd not supported by target");
4348   return false;
4349 }
4350
4351 /* Return TRUE if vec_load_lanes is available for COUNT vectors of
4352    type VECTYPE.  */
4353
4354 bool
4355 vect_load_lanes_supported (tree vectype, unsigned HOST_WIDE_INT count)
4356 {
4357   return vect_lanes_optab_supported_p ("vec_load_lanes",
4358                                        vec_load_lanes_optab,
4359                                        vectype, count);
4360 }
4361
4362 /* Function vect_permute_load_chain.
4363
4364    Given a chain of interleaved loads in DR_CHAIN of LENGTH that must be
4365    a power of 2, generate extract_even/odd stmts to reorder the input data
4366    correctly.  Return the final references for loads in RESULT_CHAIN.
4367
4368    E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
4369    The input is 4 vectors each containing 8 elements. We assign a number to each
4370    element, the input sequence is:
4371
4372    1st vec:   0  1  2  3  4  5  6  7
4373    2nd vec:   8  9 10 11 12 13 14 15
4374    3rd vec:  16 17 18 19 20 21 22 23
4375    4th vec:  24 25 26 27 28 29 30 31
4376
4377    The output sequence should be:
4378
4379    1st vec:  0 4  8 12 16 20 24 28
4380    2nd vec:  1 5  9 13 17 21 25 29
4381    3rd vec:  2 6 10 14 18 22 26 30
4382    4th vec:  3 7 11 15 19 23 27 31
4383
4384    i.e., the first output vector should contain the first elements of each
4385    interleaving group, etc.
4386
4387    We use extract_even/odd instructions to create such output.  The input of
4388    each extract_even/odd operation is two vectors
4389    1st vec    2nd vec
4390    0 1 2 3    4 5 6 7
4391
4392    and the output is the vector of extracted even/odd elements.  The output of
4393    extract_even will be:   0 2 4 6
4394    and of extract_odd:     1 3 5 7
4395
4396
4397    The permutation is done in log LENGTH stages.  In each stage extract_even
4398    and extract_odd stmts are created for each pair of vectors in DR_CHAIN in
4399    their order.  In our example,
4400
4401    E1: extract_even (1st vec, 2nd vec)
4402    E2: extract_odd (1st vec, 2nd vec)
4403    E3: extract_even (3rd vec, 4th vec)
4404    E4: extract_odd (3rd vec, 4th vec)
4405
4406    The output for the first stage will be:
4407
4408    E1:  0  2  4  6  8 10 12 14
4409    E2:  1  3  5  7  9 11 13 15
4410    E3: 16 18 20 22 24 26 28 30
4411    E4: 17 19 21 23 25 27 29 31
4412
4413    In order to proceed and create the correct sequence for the next stage (or
4414    for the correct output, if the second stage is the last one, as in our
4415    example), we first put the output of extract_even operation and then the
4416    output of extract_odd in RESULT_CHAIN (which is then copied to DR_CHAIN).
4417    The input for the second stage is:
4418
4419    1st vec (E1):  0  2  4  6  8 10 12 14
4420    2nd vec (E3): 16 18 20 22 24 26 28 30
4421    3rd vec (E2):  1  3  5  7  9 11 13 15
4422    4th vec (E4): 17 19 21 23 25 27 29 31
4423
4424    The output of the second stage:
4425
4426    E1: 0 4  8 12 16 20 24 28
4427    E2: 2 6 10 14 18 22 26 30
4428    E3: 1 5  9 13 17 21 25 29
4429    E4: 3 7 11 15 19 23 27 31
4430
4431    And RESULT_CHAIN after reordering:
4432
4433    1st vec (E1):  0 4  8 12 16 20 24 28
4434    2nd vec (E3):  1 5  9 13 17 21 25 29
4435    3rd vec (E2):  2 6 10 14 18 22 26 30
4436    4th vec (E4):  3 7 11 15 19 23 27 31.  */
4437
4438 static void
4439 vect_permute_load_chain (vec<tree> dr_chain,
4440                          unsigned int length,
4441                          gimple stmt,
4442                          gimple_stmt_iterator *gsi,
4443                          vec<tree> *result_chain)
4444 {
4445   tree data_ref, first_vect, second_vect;
4446   tree perm_mask_even, perm_mask_odd;
4447   gimple perm_stmt;
4448   tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
4449   unsigned int i, j, log_length = exact_log2 (length);
4450   unsigned nelt = TYPE_VECTOR_SUBPARTS (vectype);
4451   unsigned char *sel = XALLOCAVEC (unsigned char, nelt);
4452
4453   result_chain->quick_grow (length);
4454   memcpy (result_chain->address (), dr_chain.address (),
4455           length * sizeof (tree));
4456
4457   for (i = 0; i < nelt; ++i)
4458     sel[i] = i * 2;
4459   perm_mask_even = vect_gen_perm_mask (vectype, sel);
4460   gcc_assert (perm_mask_even != NULL);
4461
4462   for (i = 0; i < nelt; ++i)
4463     sel[i] = i * 2 + 1;
4464   perm_mask_odd = vect_gen_perm_mask (vectype, sel);
4465   gcc_assert (perm_mask_odd != NULL);
4466
4467   for (i = 0; i < log_length; i++)
4468     {
4469       for (j = 0; j < length; j += 2)
4470         {
4471           first_vect = dr_chain[j];
4472           second_vect = dr_chain[j+1];
4473
4474           /* data_ref = permute_even (first_data_ref, second_data_ref);  */
4475           data_ref = make_temp_ssa_name (vectype, NULL, "vect_perm_even");
4476           perm_stmt = gimple_build_assign_with_ops (VEC_PERM_EXPR, data_ref,
4477                                                     first_vect, second_vect,
4478                                                     perm_mask_even);
4479           vect_finish_stmt_generation (stmt, perm_stmt, gsi);
4480           (*result_chain)[j/2] = data_ref;
4481
4482           /* data_ref = permute_odd (first_data_ref, second_data_ref);  */
4483           data_ref = make_temp_ssa_name (vectype, NULL, "vect_perm_odd");
4484           perm_stmt = gimple_build_assign_with_ops (VEC_PERM_EXPR, data_ref,
4485                                                     first_vect, second_vect,
4486                                                     perm_mask_odd);
4487           vect_finish_stmt_generation (stmt, perm_stmt, gsi);
4488           (*result_chain)[j/2+length/2] = data_ref;
4489         }
4490       memcpy (dr_chain.address (), result_chain->address (),
4491               length * sizeof (tree));
4492     }
4493 }
4494
4495
4496 /* Function vect_transform_grouped_load.
4497
4498    Given a chain of input interleaved data-refs (in DR_CHAIN), build statements
4499    to perform their permutation and ascribe the result vectorized statements to
4500    the scalar statements.
4501 */
4502
4503 void
4504 vect_transform_grouped_load (gimple stmt, vec<tree> dr_chain, int size,
4505                              gimple_stmt_iterator *gsi)
4506 {
4507   vec<tree> result_chain = vNULL;
4508
4509   /* DR_CHAIN contains input data-refs that are a part of the interleaving.
4510      RESULT_CHAIN is the output of vect_permute_load_chain, it contains permuted
4511      vectors, that are ready for vector computation.  */
4512   result_chain.create (size);
4513   vect_permute_load_chain (dr_chain, size, stmt, gsi, &result_chain);
4514   vect_record_grouped_load_vectors (stmt, result_chain);
4515   result_chain.release ();
4516 }
4517
4518 /* RESULT_CHAIN contains the output of a group of grouped loads that were
4519    generated as part of the vectorization of STMT.  Assign the statement
4520    for each vector to the associated scalar statement.  */
4521
4522 void
4523 vect_record_grouped_load_vectors (gimple stmt, vec<tree> result_chain)
4524 {
4525   gimple first_stmt = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt));
4526   gimple next_stmt, new_stmt;
4527   unsigned int i, gap_count;
4528   tree tmp_data_ref;
4529
4530   /* Put a permuted data-ref in the VECTORIZED_STMT field.
4531      Since we scan the chain starting from it's first node, their order
4532      corresponds the order of data-refs in RESULT_CHAIN.  */
4533   next_stmt = first_stmt;
4534   gap_count = 1;
4535   FOR_EACH_VEC_ELT (result_chain, i, tmp_data_ref)
4536     {
4537       if (!next_stmt)
4538         break;
4539
4540       /* Skip the gaps.  Loads created for the gaps will be removed by dead
4541        code elimination pass later.  No need to check for the first stmt in
4542        the group, since it always exists.
4543        GROUP_GAP is the number of steps in elements from the previous
4544        access (if there is no gap GROUP_GAP is 1).  We skip loads that
4545        correspond to the gaps.  */
4546       if (next_stmt != first_stmt
4547           && gap_count < GROUP_GAP (vinfo_for_stmt (next_stmt)))
4548       {
4549         gap_count++;
4550         continue;
4551       }
4552
4553       while (next_stmt)
4554         {
4555           new_stmt = SSA_NAME_DEF_STMT (tmp_data_ref);
4556           /* We assume that if VEC_STMT is not NULL, this is a case of multiple
4557              copies, and we put the new vector statement in the first available
4558              RELATED_STMT.  */
4559           if (!STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt)))
4560             STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt)) = new_stmt;
4561           else
4562             {
4563               if (!GROUP_SAME_DR_STMT (vinfo_for_stmt (next_stmt)))
4564                 {
4565                   gimple prev_stmt =
4566                     STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt));
4567                   gimple rel_stmt =
4568                     STMT_VINFO_RELATED_STMT (vinfo_for_stmt (prev_stmt));
4569                   while (rel_stmt)
4570                     {
4571                       prev_stmt = rel_stmt;
4572                       rel_stmt =
4573                         STMT_VINFO_RELATED_STMT (vinfo_for_stmt (rel_stmt));
4574                     }
4575
4576                   STMT_VINFO_RELATED_STMT (vinfo_for_stmt (prev_stmt)) =
4577                     new_stmt;
4578                 }
4579             }
4580
4581           next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
4582           gap_count = 1;
4583           /* If NEXT_STMT accesses the same DR as the previous statement,
4584              put the same TMP_DATA_REF as its vectorized statement; otherwise
4585              get the next data-ref from RESULT_CHAIN.  */
4586           if (!next_stmt || !GROUP_SAME_DR_STMT (vinfo_for_stmt (next_stmt)))
4587             break;
4588         }
4589     }
4590 }
4591
4592 /* Function vect_force_dr_alignment_p.
4593
4594    Returns whether the alignment of a DECL can be forced to be aligned
4595    on ALIGNMENT bit boundary.  */
4596
4597 bool
4598 vect_can_force_dr_alignment_p (const_tree decl, unsigned int alignment)
4599 {
4600   if (TREE_CODE (decl) != VAR_DECL)
4601     return false;
4602
4603   /* We cannot change alignment of common or external symbols as another
4604      translation unit may contain a definition with lower alignment.
4605      The rules of common symbol linking mean that the definition
4606      will override the common symbol.  The same is true for constant
4607      pool entries which may be shared and are not properly merged
4608      by LTO.  */
4609   if (DECL_EXTERNAL (decl)
4610       || DECL_COMMON (decl)
4611       || DECL_IN_CONSTANT_POOL (decl))
4612     return false;
4613
4614   if (TREE_ASM_WRITTEN (decl))
4615     return false;
4616
4617   /* Do not override the alignment as specified by the ABI when the used
4618      attribute is set.  */
4619   if (DECL_PRESERVE_P (decl))
4620     return false;
4621
4622   /* Do not override explicit alignment set by the user when an explicit
4623      section name is also used.  This is a common idiom used by many
4624      software projects.  */
4625   if (DECL_SECTION_NAME (decl) != NULL_TREE
4626       && !DECL_HAS_IMPLICIT_SECTION_NAME_P (decl))
4627     return false;
4628
4629   if (TREE_STATIC (decl))
4630     return (alignment <= MAX_OFILE_ALIGNMENT);
4631   else
4632     return (alignment <= MAX_STACK_ALIGNMENT);
4633 }
4634
4635
4636 /* Return whether the data reference DR is supported with respect to its
4637    alignment.
4638    If CHECK_ALIGNED_ACCESSES is TRUE, check if the access is supported even
4639    it is aligned, i.e., check if it is possible to vectorize it with different
4640    alignment.  */
4641
4642 enum dr_alignment_support
4643 vect_supportable_dr_alignment (struct data_reference *dr,
4644                                bool check_aligned_accesses)
4645 {
4646   gimple stmt = DR_STMT (dr);
4647   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4648   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4649   enum machine_mode mode = TYPE_MODE (vectype);
4650   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4651   struct loop *vect_loop = NULL;
4652   bool nested_in_vect_loop = false;
4653
4654   if (aligned_access_p (dr) && !check_aligned_accesses)
4655     return dr_aligned;
4656
4657   if (loop_vinfo)
4658     {
4659       vect_loop = LOOP_VINFO_LOOP (loop_vinfo);
4660       nested_in_vect_loop = nested_in_vect_loop_p (vect_loop, stmt);
4661     }
4662
4663   /* Possibly unaligned access.  */
4664
4665   /* We can choose between using the implicit realignment scheme (generating
4666      a misaligned_move stmt) and the explicit realignment scheme (generating
4667      aligned loads with a REALIGN_LOAD).  There are two variants to the
4668      explicit realignment scheme: optimized, and unoptimized.
4669      We can optimize the realignment only if the step between consecutive
4670      vector loads is equal to the vector size.  Since the vector memory
4671      accesses advance in steps of VS (Vector Size) in the vectorized loop, it
4672      is guaranteed that the misalignment amount remains the same throughout the
4673      execution of the vectorized loop.  Therefore, we can create the
4674      "realignment token" (the permutation mask that is passed to REALIGN_LOAD)
4675      at the loop preheader.
4676
4677      However, in the case of outer-loop vectorization, when vectorizing a
4678      memory access in the inner-loop nested within the LOOP that is now being
4679      vectorized, while it is guaranteed that the misalignment of the
4680      vectorized memory access will remain the same in different outer-loop
4681      iterations, it is *not* guaranteed that is will remain the same throughout
4682      the execution of the inner-loop.  This is because the inner-loop advances
4683      with the original scalar step (and not in steps of VS).  If the inner-loop
4684      step happens to be a multiple of VS, then the misalignment remains fixed
4685      and we can use the optimized realignment scheme.  For example:
4686
4687       for (i=0; i<N; i++)
4688         for (j=0; j<M; j++)
4689           s += a[i+j];
4690
4691      When vectorizing the i-loop in the above example, the step between
4692      consecutive vector loads is 1, and so the misalignment does not remain
4693      fixed across the execution of the inner-loop, and the realignment cannot
4694      be optimized (as illustrated in the following pseudo vectorized loop):
4695
4696       for (i=0; i<N; i+=4)
4697         for (j=0; j<M; j++){
4698           vs += vp[i+j]; // misalignment of &vp[i+j] is {0,1,2,3,0,1,2,3,...}
4699                          // when j is {0,1,2,3,4,5,6,7,...} respectively.
4700                          // (assuming that we start from an aligned address).
4701           }
4702
4703      We therefore have to use the unoptimized realignment scheme:
4704
4705       for (i=0; i<N; i+=4)
4706           for (j=k; j<M; j+=4)
4707           vs += vp[i+j]; // misalignment of &vp[i+j] is always k (assuming
4708                            // that the misalignment of the initial address is
4709                            // 0).
4710
4711      The loop can then be vectorized as follows:
4712
4713       for (k=0; k<4; k++){
4714         rt = get_realignment_token (&vp[k]);
4715         for (i=0; i<N; i+=4){
4716           v1 = vp[i+k];
4717           for (j=k; j<M; j+=4){
4718             v2 = vp[i+j+VS-1];
4719             va = REALIGN_LOAD <v1,v2,rt>;
4720             vs += va;
4721             v1 = v2;
4722           }
4723         }
4724     } */
4725
4726   if (DR_IS_READ (dr))
4727     {
4728       bool is_packed = false;
4729       tree type = (TREE_TYPE (DR_REF (dr)));
4730
4731       if (optab_handler (vec_realign_load_optab, mode) != CODE_FOR_nothing
4732           && (!targetm.vectorize.builtin_mask_for_load
4733               || targetm.vectorize.builtin_mask_for_load ()))
4734         {
4735           tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4736           if ((nested_in_vect_loop
4737                && (TREE_INT_CST_LOW (DR_STEP (dr))
4738                    != GET_MODE_SIZE (TYPE_MODE (vectype))))
4739               || !loop_vinfo)
4740             return dr_explicit_realign;
4741           else
4742             return dr_explicit_realign_optimized;
4743         }
4744       if (!known_alignment_for_access_p (dr))
4745         is_packed = not_size_aligned (DR_REF (dr));
4746
4747       if (targetm.vectorize.
4748           support_vector_misalignment (mode, type,
4749                                        DR_MISALIGNMENT (dr), is_packed))
4750         /* Can't software pipeline the loads, but can at least do them.  */
4751         return dr_unaligned_supported;
4752     }
4753   else
4754     {
4755       bool is_packed = false;
4756       tree type = (TREE_TYPE (DR_REF (dr)));
4757
4758       if (!known_alignment_for_access_p (dr))
4759         is_packed = not_size_aligned (DR_REF (dr));
4760
4761      if (targetm.vectorize.
4762          support_vector_misalignment (mode, type,
4763                                       DR_MISALIGNMENT (dr), is_packed))
4764        return dr_unaligned_supported;
4765     }
4766
4767   /* Unsupported.  */
4768   return dr_unaligned_unsupported;
4769 }