varasm.c (set_implicit_section): New function.
[gcc.git] / gcc / tree-vect-data-refs.c
1 /* Data References Analysis and Manipulation Utilities for Vectorization.
2 Copyright (C) 2003-2014 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
12
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "dumpfile.h"
26 #include "tm.h"
27 #include "tree.h"
28 #include "stor-layout.h"
29 #include "tm_p.h"
30 #include "target.h"
31 #include "basic-block.h"
32 #include "gimple-pretty-print.h"
33 #include "tree-ssa-alias.h"
34 #include "internal-fn.h"
35 #include "tree-eh.h"
36 #include "gimple-expr.h"
37 #include "is-a.h"
38 #include "gimple.h"
39 #include "gimplify.h"
40 #include "gimple-iterator.h"
41 #include "gimplify-me.h"
42 #include "gimple-ssa.h"
43 #include "tree-phinodes.h"
44 #include "ssa-iterators.h"
45 #include "stringpool.h"
46 #include "tree-ssanames.h"
47 #include "tree-ssa-loop-ivopts.h"
48 #include "tree-ssa-loop-manip.h"
49 #include "tree-ssa-loop.h"
50 #include "dumpfile.h"
51 #include "cfgloop.h"
52 #include "tree-chrec.h"
53 #include "tree-scalar-evolution.h"
54 #include "tree-vectorizer.h"
55 #include "diagnostic-core.h"
56 #include "cgraph.h"
57 /* Need to include rtl.h, expr.h, etc. for optabs. */
58 #include "expr.h"
59 #include "optabs.h"
60 #include "builtins.h"
61
62 /* Return true if load- or store-lanes optab OPTAB is implemented for
63 COUNT vectors of type VECTYPE. NAME is the name of OPTAB. */
64
65 static bool
66 vect_lanes_optab_supported_p (const char *name, convert_optab optab,
67 tree vectype, unsigned HOST_WIDE_INT count)
68 {
69 enum machine_mode mode, array_mode;
70 bool limit_p;
71
72 mode = TYPE_MODE (vectype);
73 limit_p = !targetm.array_mode_supported_p (mode, count);
74 array_mode = mode_for_size (count * GET_MODE_BITSIZE (mode),
75 MODE_INT, limit_p);
76
77 if (array_mode == BLKmode)
78 {
79 if (dump_enabled_p ())
80 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
81 "no array mode for %s[" HOST_WIDE_INT_PRINT_DEC "]\n",
82 GET_MODE_NAME (mode), count);
83 return false;
84 }
85
86 if (convert_optab_handler (optab, array_mode, mode) == CODE_FOR_nothing)
87 {
88 if (dump_enabled_p ())
89 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
90 "cannot use %s<%s><%s>\n", name,
91 GET_MODE_NAME (array_mode), GET_MODE_NAME (mode));
92 return false;
93 }
94
95 if (dump_enabled_p ())
96 dump_printf_loc (MSG_NOTE, vect_location,
97 "can use %s<%s><%s>\n", name, GET_MODE_NAME (array_mode),
98 GET_MODE_NAME (mode));
99
100 return true;
101 }
102
103
104 /* Return the smallest scalar part of STMT.
105 This is used to determine the vectype of the stmt. We generally set the
106 vectype according to the type of the result (lhs). For stmts whose
107 result-type is different than the type of the arguments (e.g., demotion,
108 promotion), vectype will be reset appropriately (later). Note that we have
109 to visit the smallest datatype in this function, because that determines the
110 VF. If the smallest datatype in the loop is present only as the rhs of a
111 promotion operation - we'd miss it.
112 Such a case, where a variable of this datatype does not appear in the lhs
113 anywhere in the loop, can only occur if it's an invariant: e.g.:
114 'int_x = (int) short_inv', which we'd expect to have been optimized away by
115 invariant motion. However, we cannot rely on invariant motion to always
116 take invariants out of the loop, and so in the case of promotion we also
117 have to check the rhs.
118 LHS_SIZE_UNIT and RHS_SIZE_UNIT contain the sizes of the corresponding
119 types. */
120
121 tree
122 vect_get_smallest_scalar_type (gimple stmt, HOST_WIDE_INT *lhs_size_unit,
123 HOST_WIDE_INT *rhs_size_unit)
124 {
125 tree scalar_type = gimple_expr_type (stmt);
126 HOST_WIDE_INT lhs, rhs;
127
128 lhs = rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type));
129
130 if (is_gimple_assign (stmt)
131 && (gimple_assign_cast_p (stmt)
132 || gimple_assign_rhs_code (stmt) == WIDEN_MULT_EXPR
133 || gimple_assign_rhs_code (stmt) == WIDEN_LSHIFT_EXPR
134 || gimple_assign_rhs_code (stmt) == FLOAT_EXPR))
135 {
136 tree rhs_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
137
138 rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (rhs_type));
139 if (rhs < lhs)
140 scalar_type = rhs_type;
141 }
142
143 *lhs_size_unit = lhs;
144 *rhs_size_unit = rhs;
145 return scalar_type;
146 }
147
148
149 /* Insert DDR into LOOP_VINFO list of ddrs that may alias and need to be
150 tested at run-time. Return TRUE if DDR was successfully inserted.
151 Return false if versioning is not supported. */
152
153 static bool
154 vect_mark_for_runtime_alias_test (ddr_p ddr, loop_vec_info loop_vinfo)
155 {
156 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
157
158 if ((unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS) == 0)
159 return false;
160
161 if (dump_enabled_p ())
162 {
163 dump_printf_loc (MSG_NOTE, vect_location,
164 "mark for run-time aliasing test between ");
165 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (DDR_A (ddr)));
166 dump_printf (MSG_NOTE, " and ");
167 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (DDR_B (ddr)));
168 dump_printf (MSG_NOTE, "\n");
169 }
170
171 if (optimize_loop_nest_for_size_p (loop))
172 {
173 if (dump_enabled_p ())
174 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
175 "versioning not supported when optimizing"
176 " for size.\n");
177 return false;
178 }
179
180 /* FORNOW: We don't support versioning with outer-loop vectorization. */
181 if (loop->inner)
182 {
183 if (dump_enabled_p ())
184 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
185 "versioning not yet supported for outer-loops.\n");
186 return false;
187 }
188
189 /* FORNOW: We don't support creating runtime alias tests for non-constant
190 step. */
191 if (TREE_CODE (DR_STEP (DDR_A (ddr))) != INTEGER_CST
192 || TREE_CODE (DR_STEP (DDR_B (ddr))) != INTEGER_CST)
193 {
194 if (dump_enabled_p ())
195 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
196 "versioning not yet supported for non-constant "
197 "step\n");
198 return false;
199 }
200
201 LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo).safe_push (ddr);
202 return true;
203 }
204
205
206 /* Function vect_analyze_data_ref_dependence.
207
208 Return TRUE if there (might) exist a dependence between a memory-reference
209 DRA and a memory-reference DRB. When versioning for alias may check a
210 dependence at run-time, return FALSE. Adjust *MAX_VF according to
211 the data dependence. */
212
213 static bool
214 vect_analyze_data_ref_dependence (struct data_dependence_relation *ddr,
215 loop_vec_info loop_vinfo, int *max_vf)
216 {
217 unsigned int i;
218 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
219 struct data_reference *dra = DDR_A (ddr);
220 struct data_reference *drb = DDR_B (ddr);
221 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
222 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
223 lambda_vector dist_v;
224 unsigned int loop_depth;
225
226 /* In loop analysis all data references should be vectorizable. */
227 if (!STMT_VINFO_VECTORIZABLE (stmtinfo_a)
228 || !STMT_VINFO_VECTORIZABLE (stmtinfo_b))
229 gcc_unreachable ();
230
231 /* Independent data accesses. */
232 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
233 return false;
234
235 if (dra == drb
236 || (DR_IS_READ (dra) && DR_IS_READ (drb)))
237 return false;
238
239 /* Even if we have an anti-dependence then, as the vectorized loop covers at
240 least two scalar iterations, there is always also a true dependence.
241 As the vectorizer does not re-order loads and stores we can ignore
242 the anti-dependence if TBAA can disambiguate both DRs similar to the
243 case with known negative distance anti-dependences (positive
244 distance anti-dependences would violate TBAA constraints). */
245 if (((DR_IS_READ (dra) && DR_IS_WRITE (drb))
246 || (DR_IS_WRITE (dra) && DR_IS_READ (drb)))
247 && !alias_sets_conflict_p (get_alias_set (DR_REF (dra)),
248 get_alias_set (DR_REF (drb))))
249 return false;
250
251 /* Unknown data dependence. */
252 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
253 {
254 /* If user asserted safelen consecutive iterations can be
255 executed concurrently, assume independence. */
256 if (loop->safelen >= 2)
257 {
258 if (loop->safelen < *max_vf)
259 *max_vf = loop->safelen;
260 LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo) = false;
261 return false;
262 }
263
264 if (STMT_VINFO_GATHER_P (stmtinfo_a)
265 || STMT_VINFO_GATHER_P (stmtinfo_b))
266 {
267 if (dump_enabled_p ())
268 {
269 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
270 "versioning for alias not supported for: "
271 "can't determine dependence between ");
272 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
273 DR_REF (dra));
274 dump_printf (MSG_MISSED_OPTIMIZATION, " and ");
275 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
276 DR_REF (drb));
277 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
278 }
279 return true;
280 }
281
282 if (dump_enabled_p ())
283 {
284 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
285 "versioning for alias required: "
286 "can't determine dependence between ");
287 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
288 DR_REF (dra));
289 dump_printf (MSG_MISSED_OPTIMIZATION, " and ");
290 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
291 DR_REF (drb));
292 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
293 }
294
295 /* Add to list of ddrs that need to be tested at run-time. */
296 return !vect_mark_for_runtime_alias_test (ddr, loop_vinfo);
297 }
298
299 /* Known data dependence. */
300 if (DDR_NUM_DIST_VECTS (ddr) == 0)
301 {
302 /* If user asserted safelen consecutive iterations can be
303 executed concurrently, assume independence. */
304 if (loop->safelen >= 2)
305 {
306 if (loop->safelen < *max_vf)
307 *max_vf = loop->safelen;
308 LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo) = false;
309 return false;
310 }
311
312 if (STMT_VINFO_GATHER_P (stmtinfo_a)
313 || STMT_VINFO_GATHER_P (stmtinfo_b))
314 {
315 if (dump_enabled_p ())
316 {
317 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
318 "versioning for alias not supported for: "
319 "bad dist vector for ");
320 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
321 DR_REF (dra));
322 dump_printf (MSG_MISSED_OPTIMIZATION, " and ");
323 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
324 DR_REF (drb));
325 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
326 }
327 return true;
328 }
329
330 if (dump_enabled_p ())
331 {
332 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
333 "versioning for alias required: "
334 "bad dist vector for ");
335 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, DR_REF (dra));
336 dump_printf (MSG_MISSED_OPTIMIZATION, " and ");
337 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, DR_REF (drb));
338 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
339 }
340 /* Add to list of ddrs that need to be tested at run-time. */
341 return !vect_mark_for_runtime_alias_test (ddr, loop_vinfo);
342 }
343
344 loop_depth = index_in_loop_nest (loop->num, DDR_LOOP_NEST (ddr));
345 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
346 {
347 int dist = dist_v[loop_depth];
348
349 if (dump_enabled_p ())
350 dump_printf_loc (MSG_NOTE, vect_location,
351 "dependence distance = %d.\n", dist);
352
353 if (dist == 0)
354 {
355 if (dump_enabled_p ())
356 {
357 dump_printf_loc (MSG_NOTE, vect_location,
358 "dependence distance == 0 between ");
359 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dra));
360 dump_printf (MSG_NOTE, " and ");
361 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (drb));
362 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
363 }
364
365 /* When we perform grouped accesses and perform implicit CSE
366 by detecting equal accesses and doing disambiguation with
367 runtime alias tests like for
368 .. = a[i];
369 .. = a[i+1];
370 a[i] = ..;
371 a[i+1] = ..;
372 *p = ..;
373 .. = a[i];
374 .. = a[i+1];
375 where we will end up loading { a[i], a[i+1] } once, make
376 sure that inserting group loads before the first load and
377 stores after the last store will do the right thing. */
378 if ((STMT_VINFO_GROUPED_ACCESS (stmtinfo_a)
379 && GROUP_SAME_DR_STMT (stmtinfo_a))
380 || (STMT_VINFO_GROUPED_ACCESS (stmtinfo_b)
381 && GROUP_SAME_DR_STMT (stmtinfo_b)))
382 {
383 gimple earlier_stmt;
384 earlier_stmt = get_earlier_stmt (DR_STMT (dra), DR_STMT (drb));
385 if (DR_IS_WRITE
386 (STMT_VINFO_DATA_REF (vinfo_for_stmt (earlier_stmt))))
387 {
388 if (dump_enabled_p ())
389 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
390 "READ_WRITE dependence in interleaving."
391 "\n");
392 return true;
393 }
394 }
395
396 continue;
397 }
398
399 if (dist > 0 && DDR_REVERSED_P (ddr))
400 {
401 /* If DDR_REVERSED_P the order of the data-refs in DDR was
402 reversed (to make distance vector positive), and the actual
403 distance is negative. */
404 if (dump_enabled_p ())
405 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
406 "dependence distance negative.\n");
407 /* Record a negative dependence distance to later limit the
408 amount of stmt copying / unrolling we can perform.
409 Only need to handle read-after-write dependence. */
410 if (DR_IS_READ (drb)
411 && (STMT_VINFO_MIN_NEG_DIST (stmtinfo_b) == 0
412 || STMT_VINFO_MIN_NEG_DIST (stmtinfo_b) > (unsigned)dist))
413 STMT_VINFO_MIN_NEG_DIST (stmtinfo_b) = dist;
414 continue;
415 }
416
417 if (abs (dist) >= 2
418 && abs (dist) < *max_vf)
419 {
420 /* The dependence distance requires reduction of the maximal
421 vectorization factor. */
422 *max_vf = abs (dist);
423 if (dump_enabled_p ())
424 dump_printf_loc (MSG_NOTE, vect_location,
425 "adjusting maximal vectorization factor to %i\n",
426 *max_vf);
427 }
428
429 if (abs (dist) >= *max_vf)
430 {
431 /* Dependence distance does not create dependence, as far as
432 vectorization is concerned, in this case. */
433 if (dump_enabled_p ())
434 dump_printf_loc (MSG_NOTE, vect_location,
435 "dependence distance >= VF.\n");
436 continue;
437 }
438
439 if (dump_enabled_p ())
440 {
441 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
442 "not vectorized, possible dependence "
443 "between data-refs ");
444 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dra));
445 dump_printf (MSG_NOTE, " and ");
446 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (drb));
447 dump_printf (MSG_NOTE, "\n");
448 }
449
450 return true;
451 }
452
453 return false;
454 }
455
456 /* Function vect_analyze_data_ref_dependences.
457
458 Examine all the data references in the loop, and make sure there do not
459 exist any data dependences between them. Set *MAX_VF according to
460 the maximum vectorization factor the data dependences allow. */
461
462 bool
463 vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo, int *max_vf)
464 {
465 unsigned int i;
466 struct data_dependence_relation *ddr;
467
468 if (dump_enabled_p ())
469 dump_printf_loc (MSG_NOTE, vect_location,
470 "=== vect_analyze_data_ref_dependences ===\n");
471
472 LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo) = true;
473 if (!compute_all_dependences (LOOP_VINFO_DATAREFS (loop_vinfo),
474 &LOOP_VINFO_DDRS (loop_vinfo),
475 LOOP_VINFO_LOOP_NEST (loop_vinfo), true))
476 return false;
477
478 FOR_EACH_VEC_ELT (LOOP_VINFO_DDRS (loop_vinfo), i, ddr)
479 if (vect_analyze_data_ref_dependence (ddr, loop_vinfo, max_vf))
480 return false;
481
482 return true;
483 }
484
485
486 /* Function vect_slp_analyze_data_ref_dependence.
487
488 Return TRUE if there (might) exist a dependence between a memory-reference
489 DRA and a memory-reference DRB. When versioning for alias may check a
490 dependence at run-time, return FALSE. Adjust *MAX_VF according to
491 the data dependence. */
492
493 static bool
494 vect_slp_analyze_data_ref_dependence (struct data_dependence_relation *ddr)
495 {
496 struct data_reference *dra = DDR_A (ddr);
497 struct data_reference *drb = DDR_B (ddr);
498
499 /* We need to check dependences of statements marked as unvectorizable
500 as well, they still can prohibit vectorization. */
501
502 /* Independent data accesses. */
503 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
504 return false;
505
506 if (dra == drb)
507 return false;
508
509 /* Read-read is OK. */
510 if (DR_IS_READ (dra) && DR_IS_READ (drb))
511 return false;
512
513 /* If dra and drb are part of the same interleaving chain consider
514 them independent. */
515 if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (DR_STMT (dra)))
516 && (GROUP_FIRST_ELEMENT (vinfo_for_stmt (DR_STMT (dra)))
517 == GROUP_FIRST_ELEMENT (vinfo_for_stmt (DR_STMT (drb)))))
518 return false;
519
520 /* Unknown data dependence. */
521 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
522 {
523 if (dump_enabled_p ())
524 {
525 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
526 "can't determine dependence between ");
527 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, DR_REF (dra));
528 dump_printf (MSG_MISSED_OPTIMIZATION, " and ");
529 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, DR_REF (drb));
530 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
531 }
532 }
533 else if (dump_enabled_p ())
534 {
535 dump_printf_loc (MSG_NOTE, vect_location,
536 "determined dependence between ");
537 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dra));
538 dump_printf (MSG_NOTE, " and ");
539 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (drb));
540 dump_printf (MSG_NOTE, "\n");
541 }
542
543 /* We do not vectorize basic blocks with write-write dependencies. */
544 if (DR_IS_WRITE (dra) && DR_IS_WRITE (drb))
545 return true;
546
547 /* If we have a read-write dependence check that the load is before the store.
548 When we vectorize basic blocks, vector load can be only before
549 corresponding scalar load, and vector store can be only after its
550 corresponding scalar store. So the order of the acceses is preserved in
551 case the load is before the store. */
552 gimple earlier_stmt = get_earlier_stmt (DR_STMT (dra), DR_STMT (drb));
553 if (DR_IS_READ (STMT_VINFO_DATA_REF (vinfo_for_stmt (earlier_stmt))))
554 {
555 /* That only holds for load-store pairs taking part in vectorization. */
556 if (STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dra)))
557 && STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (drb))))
558 return false;
559 }
560
561 return true;
562 }
563
564
565 /* Function vect_analyze_data_ref_dependences.
566
567 Examine all the data references in the basic-block, and make sure there
568 do not exist any data dependences between them. Set *MAX_VF according to
569 the maximum vectorization factor the data dependences allow. */
570
571 bool
572 vect_slp_analyze_data_ref_dependences (bb_vec_info bb_vinfo)
573 {
574 struct data_dependence_relation *ddr;
575 unsigned int i;
576
577 if (dump_enabled_p ())
578 dump_printf_loc (MSG_NOTE, vect_location,
579 "=== vect_slp_analyze_data_ref_dependences ===\n");
580
581 if (!compute_all_dependences (BB_VINFO_DATAREFS (bb_vinfo),
582 &BB_VINFO_DDRS (bb_vinfo),
583 vNULL, true))
584 return false;
585
586 FOR_EACH_VEC_ELT (BB_VINFO_DDRS (bb_vinfo), i, ddr)
587 if (vect_slp_analyze_data_ref_dependence (ddr))
588 return false;
589
590 return true;
591 }
592
593
594 /* Function vect_compute_data_ref_alignment
595
596 Compute the misalignment of the data reference DR.
597
598 Output:
599 1. If during the misalignment computation it is found that the data reference
600 cannot be vectorized then false is returned.
601 2. DR_MISALIGNMENT (DR) is defined.
602
603 FOR NOW: No analysis is actually performed. Misalignment is calculated
604 only for trivial cases. TODO. */
605
606 static bool
607 vect_compute_data_ref_alignment (struct data_reference *dr)
608 {
609 gimple stmt = DR_STMT (dr);
610 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
611 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
612 struct loop *loop = NULL;
613 tree ref = DR_REF (dr);
614 tree vectype;
615 tree base, base_addr;
616 bool base_aligned;
617 tree misalign;
618 tree aligned_to, alignment;
619
620 if (dump_enabled_p ())
621 dump_printf_loc (MSG_NOTE, vect_location,
622 "vect_compute_data_ref_alignment:\n");
623
624 if (loop_vinfo)
625 loop = LOOP_VINFO_LOOP (loop_vinfo);
626
627 /* Initialize misalignment to unknown. */
628 SET_DR_MISALIGNMENT (dr, -1);
629
630 /* Strided loads perform only component accesses, misalignment information
631 is irrelevant for them. */
632 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
633 return true;
634
635 misalign = DR_INIT (dr);
636 aligned_to = DR_ALIGNED_TO (dr);
637 base_addr = DR_BASE_ADDRESS (dr);
638 vectype = STMT_VINFO_VECTYPE (stmt_info);
639
640 /* In case the dataref is in an inner-loop of the loop that is being
641 vectorized (LOOP), we use the base and misalignment information
642 relative to the outer-loop (LOOP). This is ok only if the misalignment
643 stays the same throughout the execution of the inner-loop, which is why
644 we have to check that the stride of the dataref in the inner-loop evenly
645 divides by the vector size. */
646 if (loop && nested_in_vect_loop_p (loop, stmt))
647 {
648 tree step = DR_STEP (dr);
649 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
650
651 if (dr_step % GET_MODE_SIZE (TYPE_MODE (vectype)) == 0)
652 {
653 if (dump_enabled_p ())
654 dump_printf_loc (MSG_NOTE, vect_location,
655 "inner step divides the vector-size.\n");
656 misalign = STMT_VINFO_DR_INIT (stmt_info);
657 aligned_to = STMT_VINFO_DR_ALIGNED_TO (stmt_info);
658 base_addr = STMT_VINFO_DR_BASE_ADDRESS (stmt_info);
659 }
660 else
661 {
662 if (dump_enabled_p ())
663 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
664 "inner step doesn't divide the vector-size.\n");
665 misalign = NULL_TREE;
666 }
667 }
668
669 /* Similarly, if we're doing basic-block vectorization, we can only use
670 base and misalignment information relative to an innermost loop if the
671 misalignment stays the same throughout the execution of the loop.
672 As above, this is the case if the stride of the dataref evenly divides
673 by the vector size. */
674 if (!loop)
675 {
676 tree step = DR_STEP (dr);
677 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
678
679 if (dr_step % GET_MODE_SIZE (TYPE_MODE (vectype)) != 0)
680 {
681 if (dump_enabled_p ())
682 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
683 "SLP: step doesn't divide the vector-size.\n");
684 misalign = NULL_TREE;
685 }
686 }
687
688 base = build_fold_indirect_ref (base_addr);
689 alignment = ssize_int (TYPE_ALIGN (vectype)/BITS_PER_UNIT);
690
691 if ((aligned_to && tree_int_cst_compare (aligned_to, alignment) < 0)
692 || !misalign)
693 {
694 if (dump_enabled_p ())
695 {
696 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
697 "Unknown alignment for access: ");
698 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, base);
699 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
700 }
701 return true;
702 }
703
704 if ((DECL_P (base)
705 && tree_int_cst_compare (ssize_int (DECL_ALIGN_UNIT (base)),
706 alignment) >= 0)
707 || (TREE_CODE (base_addr) == SSA_NAME
708 && tree_int_cst_compare (ssize_int (TYPE_ALIGN_UNIT (TREE_TYPE (
709 TREE_TYPE (base_addr)))),
710 alignment) >= 0)
711 || (get_pointer_alignment (base_addr) >= TYPE_ALIGN (vectype)))
712 base_aligned = true;
713 else
714 base_aligned = false;
715
716 if (!base_aligned)
717 {
718 /* Do not change the alignment of global variables here if
719 flag_section_anchors is enabled as we already generated
720 RTL for other functions. Most global variables should
721 have been aligned during the IPA increase_alignment pass. */
722 if (!vect_can_force_dr_alignment_p (base, TYPE_ALIGN (vectype))
723 || (TREE_STATIC (base) && flag_section_anchors))
724 {
725 if (dump_enabled_p ())
726 {
727 dump_printf_loc (MSG_NOTE, vect_location,
728 "can't force alignment of ref: ");
729 dump_generic_expr (MSG_NOTE, TDF_SLIM, ref);
730 dump_printf (MSG_NOTE, "\n");
731 }
732 return true;
733 }
734
735 /* Force the alignment of the decl.
736 NOTE: This is the only change to the code we make during
737 the analysis phase, before deciding to vectorize the loop. */
738 if (dump_enabled_p ())
739 {
740 dump_printf_loc (MSG_NOTE, vect_location, "force alignment of ");
741 dump_generic_expr (MSG_NOTE, TDF_SLIM, ref);
742 dump_printf (MSG_NOTE, "\n");
743 }
744
745 ((dataref_aux *)dr->aux)->base_decl = base;
746 ((dataref_aux *)dr->aux)->base_misaligned = true;
747 }
748
749 /* If this is a backward running DR then first access in the larger
750 vectype actually is N-1 elements before the address in the DR.
751 Adjust misalign accordingly. */
752 if (tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0)
753 {
754 tree offset = ssize_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
755 /* DR_STEP(dr) is the same as -TYPE_SIZE of the scalar type,
756 otherwise we wouldn't be here. */
757 offset = fold_build2 (MULT_EXPR, ssizetype, offset, DR_STEP (dr));
758 /* PLUS because DR_STEP was negative. */
759 misalign = size_binop (PLUS_EXPR, misalign, offset);
760 }
761
762 /* Modulo alignment. */
763 misalign = size_binop (FLOOR_MOD_EXPR, misalign, alignment);
764
765 if (!tree_fits_uhwi_p (misalign))
766 {
767 /* Negative or overflowed misalignment value. */
768 if (dump_enabled_p ())
769 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
770 "unexpected misalign value\n");
771 return false;
772 }
773
774 SET_DR_MISALIGNMENT (dr, tree_to_uhwi (misalign));
775
776 if (dump_enabled_p ())
777 {
778 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
779 "misalign = %d bytes of ref ", DR_MISALIGNMENT (dr));
780 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, ref);
781 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
782 }
783
784 return true;
785 }
786
787
788 /* Function vect_compute_data_refs_alignment
789
790 Compute the misalignment of data references in the loop.
791 Return FALSE if a data reference is found that cannot be vectorized. */
792
793 static bool
794 vect_compute_data_refs_alignment (loop_vec_info loop_vinfo,
795 bb_vec_info bb_vinfo)
796 {
797 vec<data_reference_p> datarefs;
798 struct data_reference *dr;
799 unsigned int i;
800
801 if (loop_vinfo)
802 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
803 else
804 datarefs = BB_VINFO_DATAREFS (bb_vinfo);
805
806 FOR_EACH_VEC_ELT (datarefs, i, dr)
807 if (STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr)))
808 && !vect_compute_data_ref_alignment (dr))
809 {
810 if (bb_vinfo)
811 {
812 /* Mark unsupported statement as unvectorizable. */
813 STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr))) = false;
814 continue;
815 }
816 else
817 return false;
818 }
819
820 return true;
821 }
822
823
824 /* Function vect_update_misalignment_for_peel
825
826 DR - the data reference whose misalignment is to be adjusted.
827 DR_PEEL - the data reference whose misalignment is being made
828 zero in the vector loop by the peel.
829 NPEEL - the number of iterations in the peel loop if the misalignment
830 of DR_PEEL is known at compile time. */
831
832 static void
833 vect_update_misalignment_for_peel (struct data_reference *dr,
834 struct data_reference *dr_peel, int npeel)
835 {
836 unsigned int i;
837 vec<dr_p> same_align_drs;
838 struct data_reference *current_dr;
839 int dr_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr))));
840 int dr_peel_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr_peel))));
841 stmt_vec_info stmt_info = vinfo_for_stmt (DR_STMT (dr));
842 stmt_vec_info peel_stmt_info = vinfo_for_stmt (DR_STMT (dr_peel));
843
844 /* For interleaved data accesses the step in the loop must be multiplied by
845 the size of the interleaving group. */
846 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
847 dr_size *= GROUP_SIZE (vinfo_for_stmt (GROUP_FIRST_ELEMENT (stmt_info)));
848 if (STMT_VINFO_GROUPED_ACCESS (peel_stmt_info))
849 dr_peel_size *= GROUP_SIZE (peel_stmt_info);
850
851 /* It can be assumed that the data refs with the same alignment as dr_peel
852 are aligned in the vector loop. */
853 same_align_drs
854 = STMT_VINFO_SAME_ALIGN_REFS (vinfo_for_stmt (DR_STMT (dr_peel)));
855 FOR_EACH_VEC_ELT (same_align_drs, i, current_dr)
856 {
857 if (current_dr != dr)
858 continue;
859 gcc_assert (DR_MISALIGNMENT (dr) / dr_size ==
860 DR_MISALIGNMENT (dr_peel) / dr_peel_size);
861 SET_DR_MISALIGNMENT (dr, 0);
862 return;
863 }
864
865 if (known_alignment_for_access_p (dr)
866 && known_alignment_for_access_p (dr_peel))
867 {
868 bool negative = tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0;
869 int misal = DR_MISALIGNMENT (dr);
870 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
871 misal += negative ? -npeel * dr_size : npeel * dr_size;
872 misal &= (TYPE_ALIGN (vectype) / BITS_PER_UNIT) - 1;
873 SET_DR_MISALIGNMENT (dr, misal);
874 return;
875 }
876
877 if (dump_enabled_p ())
878 dump_printf_loc (MSG_NOTE, vect_location, "Setting misalignment to -1.\n");
879 SET_DR_MISALIGNMENT (dr, -1);
880 }
881
882
883 /* Function vect_verify_datarefs_alignment
884
885 Return TRUE if all data references in the loop can be
886 handled with respect to alignment. */
887
888 bool
889 vect_verify_datarefs_alignment (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
890 {
891 vec<data_reference_p> datarefs;
892 struct data_reference *dr;
893 enum dr_alignment_support supportable_dr_alignment;
894 unsigned int i;
895
896 if (loop_vinfo)
897 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
898 else
899 datarefs = BB_VINFO_DATAREFS (bb_vinfo);
900
901 FOR_EACH_VEC_ELT (datarefs, i, dr)
902 {
903 gimple stmt = DR_STMT (dr);
904 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
905
906 if (!STMT_VINFO_RELEVANT_P (stmt_info))
907 continue;
908
909 /* For interleaving, only the alignment of the first access matters.
910 Skip statements marked as not vectorizable. */
911 if ((STMT_VINFO_GROUPED_ACCESS (stmt_info)
912 && GROUP_FIRST_ELEMENT (stmt_info) != stmt)
913 || !STMT_VINFO_VECTORIZABLE (stmt_info))
914 continue;
915
916 /* Strided loads perform only component accesses, alignment is
917 irrelevant for them. */
918 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
919 continue;
920
921 supportable_dr_alignment = vect_supportable_dr_alignment (dr, false);
922 if (!supportable_dr_alignment)
923 {
924 if (dump_enabled_p ())
925 {
926 if (DR_IS_READ (dr))
927 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
928 "not vectorized: unsupported unaligned load.");
929 else
930 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
931 "not vectorized: unsupported unaligned "
932 "store.");
933
934 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
935 DR_REF (dr));
936 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
937 }
938 return false;
939 }
940 if (supportable_dr_alignment != dr_aligned && dump_enabled_p ())
941 dump_printf_loc (MSG_NOTE, vect_location,
942 "Vectorizing an unaligned access.\n");
943 }
944 return true;
945 }
946
947 /* Given an memory reference EXP return whether its alignment is less
948 than its size. */
949
950 static bool
951 not_size_aligned (tree exp)
952 {
953 if (!tree_fits_uhwi_p (TYPE_SIZE (TREE_TYPE (exp))))
954 return true;
955
956 return (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (exp)))
957 > get_object_alignment (exp));
958 }
959
960 /* Function vector_alignment_reachable_p
961
962 Return true if vector alignment for DR is reachable by peeling
963 a few loop iterations. Return false otherwise. */
964
965 static bool
966 vector_alignment_reachable_p (struct data_reference *dr)
967 {
968 gimple stmt = DR_STMT (dr);
969 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
970 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
971
972 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
973 {
974 /* For interleaved access we peel only if number of iterations in
975 the prolog loop ({VF - misalignment}), is a multiple of the
976 number of the interleaved accesses. */
977 int elem_size, mis_in_elements;
978 int nelements = TYPE_VECTOR_SUBPARTS (vectype);
979
980 /* FORNOW: handle only known alignment. */
981 if (!known_alignment_for_access_p (dr))
982 return false;
983
984 elem_size = GET_MODE_SIZE (TYPE_MODE (vectype)) / nelements;
985 mis_in_elements = DR_MISALIGNMENT (dr) / elem_size;
986
987 if ((nelements - mis_in_elements) % GROUP_SIZE (stmt_info))
988 return false;
989 }
990
991 /* If misalignment is known at the compile time then allow peeling
992 only if natural alignment is reachable through peeling. */
993 if (known_alignment_for_access_p (dr) && !aligned_access_p (dr))
994 {
995 HOST_WIDE_INT elmsize =
996 int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
997 if (dump_enabled_p ())
998 {
999 dump_printf_loc (MSG_NOTE, vect_location,
1000 "data size =" HOST_WIDE_INT_PRINT_DEC, elmsize);
1001 dump_printf (MSG_NOTE,
1002 ". misalignment = %d.\n", DR_MISALIGNMENT (dr));
1003 }
1004 if (DR_MISALIGNMENT (dr) % elmsize)
1005 {
1006 if (dump_enabled_p ())
1007 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1008 "data size does not divide the misalignment.\n");
1009 return false;
1010 }
1011 }
1012
1013 if (!known_alignment_for_access_p (dr))
1014 {
1015 tree type = TREE_TYPE (DR_REF (dr));
1016 bool is_packed = not_size_aligned (DR_REF (dr));
1017 if (dump_enabled_p ())
1018 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1019 "Unknown misalignment, is_packed = %d\n",is_packed);
1020 if ((TYPE_USER_ALIGN (type) && !is_packed)
1021 || targetm.vectorize.vector_alignment_reachable (type, is_packed))
1022 return true;
1023 else
1024 return false;
1025 }
1026
1027 return true;
1028 }
1029
1030
1031 /* Calculate the cost of the memory access represented by DR. */
1032
1033 static void
1034 vect_get_data_access_cost (struct data_reference *dr,
1035 unsigned int *inside_cost,
1036 unsigned int *outside_cost,
1037 stmt_vector_for_cost *body_cost_vec)
1038 {
1039 gimple stmt = DR_STMT (dr);
1040 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1041 int nunits = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
1042 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1043 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1044 int ncopies = vf / nunits;
1045
1046 if (DR_IS_READ (dr))
1047 vect_get_load_cost (dr, ncopies, true, inside_cost, outside_cost,
1048 NULL, body_cost_vec, false);
1049 else
1050 vect_get_store_cost (dr, ncopies, inside_cost, body_cost_vec);
1051
1052 if (dump_enabled_p ())
1053 dump_printf_loc (MSG_NOTE, vect_location,
1054 "vect_get_data_access_cost: inside_cost = %d, "
1055 "outside_cost = %d.\n", *inside_cost, *outside_cost);
1056 }
1057
1058
1059 /* Insert DR into peeling hash table with NPEEL as key. */
1060
1061 static void
1062 vect_peeling_hash_insert (loop_vec_info loop_vinfo, struct data_reference *dr,
1063 int npeel)
1064 {
1065 struct _vect_peel_info elem, *slot;
1066 _vect_peel_info **new_slot;
1067 bool supportable_dr_alignment = vect_supportable_dr_alignment (dr, true);
1068
1069 elem.npeel = npeel;
1070 slot = LOOP_VINFO_PEELING_HTAB (loop_vinfo).find (&elem);
1071 if (slot)
1072 slot->count++;
1073 else
1074 {
1075 slot = XNEW (struct _vect_peel_info);
1076 slot->npeel = npeel;
1077 slot->dr = dr;
1078 slot->count = 1;
1079 new_slot = LOOP_VINFO_PEELING_HTAB (loop_vinfo).find_slot (slot, INSERT);
1080 *new_slot = slot;
1081 }
1082
1083 if (!supportable_dr_alignment
1084 && unlimited_cost_model (LOOP_VINFO_LOOP (loop_vinfo)))
1085 slot->count += VECT_MAX_COST;
1086 }
1087
1088
1089 /* Traverse peeling hash table to find peeling option that aligns maximum
1090 number of data accesses. */
1091
1092 int
1093 vect_peeling_hash_get_most_frequent (_vect_peel_info **slot,
1094 _vect_peel_extended_info *max)
1095 {
1096 vect_peel_info elem = *slot;
1097
1098 if (elem->count > max->peel_info.count
1099 || (elem->count == max->peel_info.count
1100 && max->peel_info.npeel > elem->npeel))
1101 {
1102 max->peel_info.npeel = elem->npeel;
1103 max->peel_info.count = elem->count;
1104 max->peel_info.dr = elem->dr;
1105 }
1106
1107 return 1;
1108 }
1109
1110
1111 /* Traverse peeling hash table and calculate cost for each peeling option.
1112 Find the one with the lowest cost. */
1113
1114 int
1115 vect_peeling_hash_get_lowest_cost (_vect_peel_info **slot,
1116 _vect_peel_extended_info *min)
1117 {
1118 vect_peel_info elem = *slot;
1119 int save_misalignment, dummy;
1120 unsigned int inside_cost = 0, outside_cost = 0, i;
1121 gimple stmt = DR_STMT (elem->dr);
1122 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1123 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1124 vec<data_reference_p> datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1125 struct data_reference *dr;
1126 stmt_vector_for_cost prologue_cost_vec, body_cost_vec, epilogue_cost_vec;
1127 int single_iter_cost;
1128
1129 prologue_cost_vec.create (2);
1130 body_cost_vec.create (2);
1131 epilogue_cost_vec.create (2);
1132
1133 FOR_EACH_VEC_ELT (datarefs, i, dr)
1134 {
1135 stmt = DR_STMT (dr);
1136 stmt_info = vinfo_for_stmt (stmt);
1137 /* For interleaving, only the alignment of the first access
1138 matters. */
1139 if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
1140 && GROUP_FIRST_ELEMENT (stmt_info) != stmt)
1141 continue;
1142
1143 save_misalignment = DR_MISALIGNMENT (dr);
1144 vect_update_misalignment_for_peel (dr, elem->dr, elem->npeel);
1145 vect_get_data_access_cost (dr, &inside_cost, &outside_cost,
1146 &body_cost_vec);
1147 SET_DR_MISALIGNMENT (dr, save_misalignment);
1148 }
1149
1150 single_iter_cost = vect_get_single_scalar_iteration_cost (loop_vinfo);
1151 outside_cost += vect_get_known_peeling_cost (loop_vinfo, elem->npeel,
1152 &dummy, single_iter_cost,
1153 &prologue_cost_vec,
1154 &epilogue_cost_vec);
1155
1156 /* Prologue and epilogue costs are added to the target model later.
1157 These costs depend only on the scalar iteration cost, the
1158 number of peeling iterations finally chosen, and the number of
1159 misaligned statements. So discard the information found here. */
1160 prologue_cost_vec.release ();
1161 epilogue_cost_vec.release ();
1162
1163 if (inside_cost < min->inside_cost
1164 || (inside_cost == min->inside_cost && outside_cost < min->outside_cost))
1165 {
1166 min->inside_cost = inside_cost;
1167 min->outside_cost = outside_cost;
1168 min->body_cost_vec.release ();
1169 min->body_cost_vec = body_cost_vec;
1170 min->peel_info.dr = elem->dr;
1171 min->peel_info.npeel = elem->npeel;
1172 }
1173 else
1174 body_cost_vec.release ();
1175
1176 return 1;
1177 }
1178
1179
1180 /* Choose best peeling option by traversing peeling hash table and either
1181 choosing an option with the lowest cost (if cost model is enabled) or the
1182 option that aligns as many accesses as possible. */
1183
1184 static struct data_reference *
1185 vect_peeling_hash_choose_best_peeling (loop_vec_info loop_vinfo,
1186 unsigned int *npeel,
1187 stmt_vector_for_cost *body_cost_vec)
1188 {
1189 struct _vect_peel_extended_info res;
1190
1191 res.peel_info.dr = NULL;
1192 res.body_cost_vec = stmt_vector_for_cost ();
1193
1194 if (!unlimited_cost_model (LOOP_VINFO_LOOP (loop_vinfo)))
1195 {
1196 res.inside_cost = INT_MAX;
1197 res.outside_cost = INT_MAX;
1198 LOOP_VINFO_PEELING_HTAB (loop_vinfo)
1199 .traverse <_vect_peel_extended_info *,
1200 vect_peeling_hash_get_lowest_cost> (&res);
1201 }
1202 else
1203 {
1204 res.peel_info.count = 0;
1205 LOOP_VINFO_PEELING_HTAB (loop_vinfo)
1206 .traverse <_vect_peel_extended_info *,
1207 vect_peeling_hash_get_most_frequent> (&res);
1208 }
1209
1210 *npeel = res.peel_info.npeel;
1211 *body_cost_vec = res.body_cost_vec;
1212 return res.peel_info.dr;
1213 }
1214
1215
1216 /* Function vect_enhance_data_refs_alignment
1217
1218 This pass will use loop versioning and loop peeling in order to enhance
1219 the alignment of data references in the loop.
1220
1221 FOR NOW: we assume that whatever versioning/peeling takes place, only the
1222 original loop is to be vectorized. Any other loops that are created by
1223 the transformations performed in this pass - are not supposed to be
1224 vectorized. This restriction will be relaxed.
1225
1226 This pass will require a cost model to guide it whether to apply peeling
1227 or versioning or a combination of the two. For example, the scheme that
1228 intel uses when given a loop with several memory accesses, is as follows:
1229 choose one memory access ('p') which alignment you want to force by doing
1230 peeling. Then, either (1) generate a loop in which 'p' is aligned and all
1231 other accesses are not necessarily aligned, or (2) use loop versioning to
1232 generate one loop in which all accesses are aligned, and another loop in
1233 which only 'p' is necessarily aligned.
1234
1235 ("Automatic Intra-Register Vectorization for the Intel Architecture",
1236 Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
1237 Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
1238
1239 Devising a cost model is the most critical aspect of this work. It will
1240 guide us on which access to peel for, whether to use loop versioning, how
1241 many versions to create, etc. The cost model will probably consist of
1242 generic considerations as well as target specific considerations (on
1243 powerpc for example, misaligned stores are more painful than misaligned
1244 loads).
1245
1246 Here are the general steps involved in alignment enhancements:
1247
1248 -- original loop, before alignment analysis:
1249 for (i=0; i<N; i++){
1250 x = q[i]; # DR_MISALIGNMENT(q) = unknown
1251 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1252 }
1253
1254 -- After vect_compute_data_refs_alignment:
1255 for (i=0; i<N; i++){
1256 x = q[i]; # DR_MISALIGNMENT(q) = 3
1257 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1258 }
1259
1260 -- Possibility 1: we do loop versioning:
1261 if (p is aligned) {
1262 for (i=0; i<N; i++){ # loop 1A
1263 x = q[i]; # DR_MISALIGNMENT(q) = 3
1264 p[i] = y; # DR_MISALIGNMENT(p) = 0
1265 }
1266 }
1267 else {
1268 for (i=0; i<N; i++){ # loop 1B
1269 x = q[i]; # DR_MISALIGNMENT(q) = 3
1270 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1271 }
1272 }
1273
1274 -- Possibility 2: we do loop peeling:
1275 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1276 x = q[i];
1277 p[i] = y;
1278 }
1279 for (i = 3; i < N; i++){ # loop 2A
1280 x = q[i]; # DR_MISALIGNMENT(q) = 0
1281 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1282 }
1283
1284 -- Possibility 3: combination of loop peeling and versioning:
1285 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1286 x = q[i];
1287 p[i] = y;
1288 }
1289 if (p is aligned) {
1290 for (i = 3; i<N; i++){ # loop 3A
1291 x = q[i]; # DR_MISALIGNMENT(q) = 0
1292 p[i] = y; # DR_MISALIGNMENT(p) = 0
1293 }
1294 }
1295 else {
1296 for (i = 3; i<N; i++){ # loop 3B
1297 x = q[i]; # DR_MISALIGNMENT(q) = 0
1298 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1299 }
1300 }
1301
1302 These loops are later passed to loop_transform to be vectorized. The
1303 vectorizer will use the alignment information to guide the transformation
1304 (whether to generate regular loads/stores, or with special handling for
1305 misalignment). */
1306
1307 bool
1308 vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
1309 {
1310 vec<data_reference_p> datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1311 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1312 enum dr_alignment_support supportable_dr_alignment;
1313 struct data_reference *dr0 = NULL, *first_store = NULL;
1314 struct data_reference *dr;
1315 unsigned int i, j;
1316 bool do_peeling = false;
1317 bool do_versioning = false;
1318 bool stat;
1319 gimple stmt;
1320 stmt_vec_info stmt_info;
1321 unsigned int npeel = 0;
1322 bool all_misalignments_unknown = true;
1323 unsigned int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1324 unsigned possible_npeel_number = 1;
1325 tree vectype;
1326 unsigned int nelements, mis, same_align_drs_max = 0;
1327 stmt_vector_for_cost body_cost_vec = stmt_vector_for_cost ();
1328
1329 if (dump_enabled_p ())
1330 dump_printf_loc (MSG_NOTE, vect_location,
1331 "=== vect_enhance_data_refs_alignment ===\n");
1332
1333 /* While cost model enhancements are expected in the future, the high level
1334 view of the code at this time is as follows:
1335
1336 A) If there is a misaligned access then see if peeling to align
1337 this access can make all data references satisfy
1338 vect_supportable_dr_alignment. If so, update data structures
1339 as needed and return true.
1340
1341 B) If peeling wasn't possible and there is a data reference with an
1342 unknown misalignment that does not satisfy vect_supportable_dr_alignment
1343 then see if loop versioning checks can be used to make all data
1344 references satisfy vect_supportable_dr_alignment. If so, update
1345 data structures as needed and return true.
1346
1347 C) If neither peeling nor versioning were successful then return false if
1348 any data reference does not satisfy vect_supportable_dr_alignment.
1349
1350 D) Return true (all data references satisfy vect_supportable_dr_alignment).
1351
1352 Note, Possibility 3 above (which is peeling and versioning together) is not
1353 being done at this time. */
1354
1355 /* (1) Peeling to force alignment. */
1356
1357 /* (1.1) Decide whether to perform peeling, and how many iterations to peel:
1358 Considerations:
1359 + How many accesses will become aligned due to the peeling
1360 - How many accesses will become unaligned due to the peeling,
1361 and the cost of misaligned accesses.
1362 - The cost of peeling (the extra runtime checks, the increase
1363 in code size). */
1364
1365 FOR_EACH_VEC_ELT (datarefs, i, dr)
1366 {
1367 stmt = DR_STMT (dr);
1368 stmt_info = vinfo_for_stmt (stmt);
1369
1370 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1371 continue;
1372
1373 /* For interleaving, only the alignment of the first access
1374 matters. */
1375 if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
1376 && GROUP_FIRST_ELEMENT (stmt_info) != stmt)
1377 continue;
1378
1379 /* For invariant accesses there is nothing to enhance. */
1380 if (integer_zerop (DR_STEP (dr)))
1381 continue;
1382
1383 /* Strided loads perform only component accesses, alignment is
1384 irrelevant for them. */
1385 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
1386 continue;
1387
1388 supportable_dr_alignment = vect_supportable_dr_alignment (dr, true);
1389 do_peeling = vector_alignment_reachable_p (dr);
1390 if (do_peeling)
1391 {
1392 if (known_alignment_for_access_p (dr))
1393 {
1394 unsigned int npeel_tmp;
1395 bool negative = tree_int_cst_compare (DR_STEP (dr),
1396 size_zero_node) < 0;
1397
1398 /* Save info about DR in the hash table. */
1399 if (!LOOP_VINFO_PEELING_HTAB (loop_vinfo).is_created ())
1400 LOOP_VINFO_PEELING_HTAB (loop_vinfo).create (1);
1401
1402 vectype = STMT_VINFO_VECTYPE (stmt_info);
1403 nelements = TYPE_VECTOR_SUBPARTS (vectype);
1404 mis = DR_MISALIGNMENT (dr) / GET_MODE_SIZE (TYPE_MODE (
1405 TREE_TYPE (DR_REF (dr))));
1406 npeel_tmp = (negative
1407 ? (mis - nelements) : (nelements - mis))
1408 & (nelements - 1);
1409
1410 /* For multiple types, it is possible that the bigger type access
1411 will have more than one peeling option. E.g., a loop with two
1412 types: one of size (vector size / 4), and the other one of
1413 size (vector size / 8). Vectorization factor will 8. If both
1414 access are misaligned by 3, the first one needs one scalar
1415 iteration to be aligned, and the second one needs 5. But the
1416 the first one will be aligned also by peeling 5 scalar
1417 iterations, and in that case both accesses will be aligned.
1418 Hence, except for the immediate peeling amount, we also want
1419 to try to add full vector size, while we don't exceed
1420 vectorization factor.
1421 We do this automtically for cost model, since we calculate cost
1422 for every peeling option. */
1423 if (unlimited_cost_model (LOOP_VINFO_LOOP (loop_vinfo)))
1424 possible_npeel_number = vf /nelements;
1425
1426 /* Handle the aligned case. We may decide to align some other
1427 access, making DR unaligned. */
1428 if (DR_MISALIGNMENT (dr) == 0)
1429 {
1430 npeel_tmp = 0;
1431 if (unlimited_cost_model (LOOP_VINFO_LOOP (loop_vinfo)))
1432 possible_npeel_number++;
1433 }
1434
1435 for (j = 0; j < possible_npeel_number; j++)
1436 {
1437 gcc_assert (npeel_tmp <= vf);
1438 vect_peeling_hash_insert (loop_vinfo, dr, npeel_tmp);
1439 npeel_tmp += nelements;
1440 }
1441
1442 all_misalignments_unknown = false;
1443 /* Data-ref that was chosen for the case that all the
1444 misalignments are unknown is not relevant anymore, since we
1445 have a data-ref with known alignment. */
1446 dr0 = NULL;
1447 }
1448 else
1449 {
1450 /* If we don't know any misalignment values, we prefer
1451 peeling for data-ref that has the maximum number of data-refs
1452 with the same alignment, unless the target prefers to align
1453 stores over load. */
1454 if (all_misalignments_unknown)
1455 {
1456 unsigned same_align_drs
1457 = STMT_VINFO_SAME_ALIGN_REFS (stmt_info).length ();
1458 if (!dr0
1459 || same_align_drs_max < same_align_drs)
1460 {
1461 same_align_drs_max = same_align_drs;
1462 dr0 = dr;
1463 }
1464 /* For data-refs with the same number of related
1465 accesses prefer the one where the misalign
1466 computation will be invariant in the outermost loop. */
1467 else if (same_align_drs_max == same_align_drs)
1468 {
1469 struct loop *ivloop0, *ivloop;
1470 ivloop0 = outermost_invariant_loop_for_expr
1471 (loop, DR_BASE_ADDRESS (dr0));
1472 ivloop = outermost_invariant_loop_for_expr
1473 (loop, DR_BASE_ADDRESS (dr));
1474 if ((ivloop && !ivloop0)
1475 || (ivloop && ivloop0
1476 && flow_loop_nested_p (ivloop, ivloop0)))
1477 dr0 = dr;
1478 }
1479
1480 if (!first_store && DR_IS_WRITE (dr))
1481 first_store = dr;
1482 }
1483
1484 /* If there are both known and unknown misaligned accesses in the
1485 loop, we choose peeling amount according to the known
1486 accesses. */
1487 if (!supportable_dr_alignment)
1488 {
1489 dr0 = dr;
1490 if (!first_store && DR_IS_WRITE (dr))
1491 first_store = dr;
1492 }
1493 }
1494 }
1495 else
1496 {
1497 if (!aligned_access_p (dr))
1498 {
1499 if (dump_enabled_p ())
1500 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1501 "vector alignment may not be reachable\n");
1502 break;
1503 }
1504 }
1505 }
1506
1507 /* Check if we can possibly peel the loop. */
1508 if (!vect_can_advance_ivs_p (loop_vinfo)
1509 || !slpeel_can_duplicate_loop_p (loop, single_exit (loop)))
1510 do_peeling = false;
1511
1512 if (do_peeling && all_misalignments_unknown
1513 && vect_supportable_dr_alignment (dr0, false))
1514 {
1515
1516 /* Check if the target requires to prefer stores over loads, i.e., if
1517 misaligned stores are more expensive than misaligned loads (taking
1518 drs with same alignment into account). */
1519 if (first_store && DR_IS_READ (dr0))
1520 {
1521 unsigned int load_inside_cost = 0, load_outside_cost = 0;
1522 unsigned int store_inside_cost = 0, store_outside_cost = 0;
1523 unsigned int load_inside_penalty = 0, load_outside_penalty = 0;
1524 unsigned int store_inside_penalty = 0, store_outside_penalty = 0;
1525 stmt_vector_for_cost dummy;
1526 dummy.create (2);
1527
1528 vect_get_data_access_cost (dr0, &load_inside_cost, &load_outside_cost,
1529 &dummy);
1530 vect_get_data_access_cost (first_store, &store_inside_cost,
1531 &store_outside_cost, &dummy);
1532
1533 dummy.release ();
1534
1535 /* Calculate the penalty for leaving FIRST_STORE unaligned (by
1536 aligning the load DR0). */
1537 load_inside_penalty = store_inside_cost;
1538 load_outside_penalty = store_outside_cost;
1539 for (i = 0;
1540 STMT_VINFO_SAME_ALIGN_REFS (vinfo_for_stmt (
1541 DR_STMT (first_store))).iterate (i, &dr);
1542 i++)
1543 if (DR_IS_READ (dr))
1544 {
1545 load_inside_penalty += load_inside_cost;
1546 load_outside_penalty += load_outside_cost;
1547 }
1548 else
1549 {
1550 load_inside_penalty += store_inside_cost;
1551 load_outside_penalty += store_outside_cost;
1552 }
1553
1554 /* Calculate the penalty for leaving DR0 unaligned (by
1555 aligning the FIRST_STORE). */
1556 store_inside_penalty = load_inside_cost;
1557 store_outside_penalty = load_outside_cost;
1558 for (i = 0;
1559 STMT_VINFO_SAME_ALIGN_REFS (vinfo_for_stmt (
1560 DR_STMT (dr0))).iterate (i, &dr);
1561 i++)
1562 if (DR_IS_READ (dr))
1563 {
1564 store_inside_penalty += load_inside_cost;
1565 store_outside_penalty += load_outside_cost;
1566 }
1567 else
1568 {
1569 store_inside_penalty += store_inside_cost;
1570 store_outside_penalty += store_outside_cost;
1571 }
1572
1573 if (load_inside_penalty > store_inside_penalty
1574 || (load_inside_penalty == store_inside_penalty
1575 && load_outside_penalty > store_outside_penalty))
1576 dr0 = first_store;
1577 }
1578
1579 /* In case there are only loads with different unknown misalignments, use
1580 peeling only if it may help to align other accesses in the loop. */
1581 if (!first_store
1582 && !STMT_VINFO_SAME_ALIGN_REFS (
1583 vinfo_for_stmt (DR_STMT (dr0))).length ()
1584 && vect_supportable_dr_alignment (dr0, false)
1585 != dr_unaligned_supported)
1586 do_peeling = false;
1587 }
1588
1589 if (do_peeling && !dr0)
1590 {
1591 /* Peeling is possible, but there is no data access that is not supported
1592 unless aligned. So we try to choose the best possible peeling. */
1593
1594 /* We should get here only if there are drs with known misalignment. */
1595 gcc_assert (!all_misalignments_unknown);
1596
1597 /* Choose the best peeling from the hash table. */
1598 dr0 = vect_peeling_hash_choose_best_peeling (loop_vinfo, &npeel,
1599 &body_cost_vec);
1600 if (!dr0 || !npeel)
1601 do_peeling = false;
1602 }
1603
1604 if (do_peeling)
1605 {
1606 stmt = DR_STMT (dr0);
1607 stmt_info = vinfo_for_stmt (stmt);
1608 vectype = STMT_VINFO_VECTYPE (stmt_info);
1609 nelements = TYPE_VECTOR_SUBPARTS (vectype);
1610
1611 if (known_alignment_for_access_p (dr0))
1612 {
1613 bool negative = tree_int_cst_compare (DR_STEP (dr0),
1614 size_zero_node) < 0;
1615 if (!npeel)
1616 {
1617 /* Since it's known at compile time, compute the number of
1618 iterations in the peeled loop (the peeling factor) for use in
1619 updating DR_MISALIGNMENT values. The peeling factor is the
1620 vectorization factor minus the misalignment as an element
1621 count. */
1622 mis = DR_MISALIGNMENT (dr0);
1623 mis /= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr0))));
1624 npeel = ((negative ? mis - nelements : nelements - mis)
1625 & (nelements - 1));
1626 }
1627
1628 /* For interleaved data access every iteration accesses all the
1629 members of the group, therefore we divide the number of iterations
1630 by the group size. */
1631 stmt_info = vinfo_for_stmt (DR_STMT (dr0));
1632 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1633 npeel /= GROUP_SIZE (stmt_info);
1634
1635 if (dump_enabled_p ())
1636 dump_printf_loc (MSG_NOTE, vect_location,
1637 "Try peeling by %d\n", npeel);
1638 }
1639
1640 /* Ensure that all data refs can be vectorized after the peel. */
1641 FOR_EACH_VEC_ELT (datarefs, i, dr)
1642 {
1643 int save_misalignment;
1644
1645 if (dr == dr0)
1646 continue;
1647
1648 stmt = DR_STMT (dr);
1649 stmt_info = vinfo_for_stmt (stmt);
1650 /* For interleaving, only the alignment of the first access
1651 matters. */
1652 if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
1653 && GROUP_FIRST_ELEMENT (stmt_info) != stmt)
1654 continue;
1655
1656 /* Strided loads perform only component accesses, alignment is
1657 irrelevant for them. */
1658 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
1659 continue;
1660
1661 save_misalignment = DR_MISALIGNMENT (dr);
1662 vect_update_misalignment_for_peel (dr, dr0, npeel);
1663 supportable_dr_alignment = vect_supportable_dr_alignment (dr, false);
1664 SET_DR_MISALIGNMENT (dr, save_misalignment);
1665
1666 if (!supportable_dr_alignment)
1667 {
1668 do_peeling = false;
1669 break;
1670 }
1671 }
1672
1673 if (do_peeling && known_alignment_for_access_p (dr0) && npeel == 0)
1674 {
1675 stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
1676 if (!stat)
1677 do_peeling = false;
1678 else
1679 {
1680 body_cost_vec.release ();
1681 return stat;
1682 }
1683 }
1684
1685 if (do_peeling)
1686 {
1687 unsigned max_allowed_peel
1688 = PARAM_VALUE (PARAM_VECT_MAX_PEELING_FOR_ALIGNMENT);
1689 if (max_allowed_peel != (unsigned)-1)
1690 {
1691 unsigned max_peel = npeel;
1692 if (max_peel == 0)
1693 {
1694 gimple dr_stmt = DR_STMT (dr0);
1695 stmt_vec_info vinfo = vinfo_for_stmt (dr_stmt);
1696 tree vtype = STMT_VINFO_VECTYPE (vinfo);
1697 max_peel = TYPE_VECTOR_SUBPARTS (vtype) - 1;
1698 }
1699 if (max_peel > max_allowed_peel)
1700 {
1701 do_peeling = false;
1702 if (dump_enabled_p ())
1703 dump_printf_loc (MSG_NOTE, vect_location,
1704 "Disable peeling, max peels reached: %d\n", max_peel);
1705 }
1706 }
1707 }
1708
1709 if (do_peeling)
1710 {
1711 stmt_info_for_cost *si;
1712 void *data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
1713
1714 /* (1.2) Update the DR_MISALIGNMENT of each data reference DR_i.
1715 If the misalignment of DR_i is identical to that of dr0 then set
1716 DR_MISALIGNMENT (DR_i) to zero. If the misalignment of DR_i and
1717 dr0 are known at compile time then increment DR_MISALIGNMENT (DR_i)
1718 by the peeling factor times the element size of DR_i (MOD the
1719 vectorization factor times the size). Otherwise, the
1720 misalignment of DR_i must be set to unknown. */
1721 FOR_EACH_VEC_ELT (datarefs, i, dr)
1722 if (dr != dr0)
1723 vect_update_misalignment_for_peel (dr, dr0, npeel);
1724
1725 LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr0;
1726 if (npeel)
1727 LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) = npeel;
1728 else
1729 LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo)
1730 = DR_MISALIGNMENT (dr0);
1731 SET_DR_MISALIGNMENT (dr0, 0);
1732 if (dump_enabled_p ())
1733 {
1734 dump_printf_loc (MSG_NOTE, vect_location,
1735 "Alignment of access forced using peeling.\n");
1736 dump_printf_loc (MSG_NOTE, vect_location,
1737 "Peeling for alignment will be applied.\n");
1738 }
1739 /* We've delayed passing the inside-loop peeling costs to the
1740 target cost model until we were sure peeling would happen.
1741 Do so now. */
1742 if (body_cost_vec.exists ())
1743 {
1744 FOR_EACH_VEC_ELT (body_cost_vec, i, si)
1745 {
1746 struct _stmt_vec_info *stmt_info
1747 = si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
1748 (void) add_stmt_cost (data, si->count, si->kind, stmt_info,
1749 si->misalign, vect_body);
1750 }
1751 body_cost_vec.release ();
1752 }
1753
1754 stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
1755 gcc_assert (stat);
1756 return stat;
1757 }
1758 }
1759
1760 body_cost_vec.release ();
1761
1762 /* (2) Versioning to force alignment. */
1763
1764 /* Try versioning if:
1765 1) optimize loop for speed
1766 2) there is at least one unsupported misaligned data ref with an unknown
1767 misalignment, and
1768 3) all misaligned data refs with a known misalignment are supported, and
1769 4) the number of runtime alignment checks is within reason. */
1770
1771 do_versioning =
1772 optimize_loop_nest_for_speed_p (loop)
1773 && (!loop->inner); /* FORNOW */
1774
1775 if (do_versioning)
1776 {
1777 FOR_EACH_VEC_ELT (datarefs, i, dr)
1778 {
1779 stmt = DR_STMT (dr);
1780 stmt_info = vinfo_for_stmt (stmt);
1781
1782 /* For interleaving, only the alignment of the first access
1783 matters. */
1784 if (aligned_access_p (dr)
1785 || (STMT_VINFO_GROUPED_ACCESS (stmt_info)
1786 && GROUP_FIRST_ELEMENT (stmt_info) != stmt))
1787 continue;
1788
1789 /* Strided loads perform only component accesses, alignment is
1790 irrelevant for them. */
1791 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
1792 continue;
1793
1794 supportable_dr_alignment = vect_supportable_dr_alignment (dr, false);
1795
1796 if (!supportable_dr_alignment)
1797 {
1798 gimple stmt;
1799 int mask;
1800 tree vectype;
1801
1802 if (known_alignment_for_access_p (dr)
1803 || LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo).length ()
1804 >= (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIGNMENT_CHECKS))
1805 {
1806 do_versioning = false;
1807 break;
1808 }
1809
1810 stmt = DR_STMT (dr);
1811 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1812 gcc_assert (vectype);
1813
1814 /* The rightmost bits of an aligned address must be zeros.
1815 Construct the mask needed for this test. For example,
1816 GET_MODE_SIZE for the vector mode V4SI is 16 bytes so the
1817 mask must be 15 = 0xf. */
1818 mask = GET_MODE_SIZE (TYPE_MODE (vectype)) - 1;
1819
1820 /* FORNOW: use the same mask to test all potentially unaligned
1821 references in the loop. The vectorizer currently supports
1822 a single vector size, see the reference to
1823 GET_MODE_NUNITS (TYPE_MODE (vectype)) where the
1824 vectorization factor is computed. */
1825 gcc_assert (!LOOP_VINFO_PTR_MASK (loop_vinfo)
1826 || LOOP_VINFO_PTR_MASK (loop_vinfo) == mask);
1827 LOOP_VINFO_PTR_MASK (loop_vinfo) = mask;
1828 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo).safe_push (
1829 DR_STMT (dr));
1830 }
1831 }
1832
1833 /* Versioning requires at least one misaligned data reference. */
1834 if (!LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo))
1835 do_versioning = false;
1836 else if (!do_versioning)
1837 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo).truncate (0);
1838 }
1839
1840 if (do_versioning)
1841 {
1842 vec<gimple> may_misalign_stmts
1843 = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
1844 gimple stmt;
1845
1846 /* It can now be assumed that the data references in the statements
1847 in LOOP_VINFO_MAY_MISALIGN_STMTS will be aligned in the version
1848 of the loop being vectorized. */
1849 FOR_EACH_VEC_ELT (may_misalign_stmts, i, stmt)
1850 {
1851 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1852 dr = STMT_VINFO_DATA_REF (stmt_info);
1853 SET_DR_MISALIGNMENT (dr, 0);
1854 if (dump_enabled_p ())
1855 dump_printf_loc (MSG_NOTE, vect_location,
1856 "Alignment of access forced using versioning.\n");
1857 }
1858
1859 if (dump_enabled_p ())
1860 dump_printf_loc (MSG_NOTE, vect_location,
1861 "Versioning for alignment will be applied.\n");
1862
1863 /* Peeling and versioning can't be done together at this time. */
1864 gcc_assert (! (do_peeling && do_versioning));
1865
1866 stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
1867 gcc_assert (stat);
1868 return stat;
1869 }
1870
1871 /* This point is reached if neither peeling nor versioning is being done. */
1872 gcc_assert (! (do_peeling || do_versioning));
1873
1874 stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
1875 return stat;
1876 }
1877
1878
1879 /* Function vect_find_same_alignment_drs.
1880
1881 Update group and alignment relations according to the chosen
1882 vectorization factor. */
1883
1884 static void
1885 vect_find_same_alignment_drs (struct data_dependence_relation *ddr,
1886 loop_vec_info loop_vinfo)
1887 {
1888 unsigned int i;
1889 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1890 int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1891 struct data_reference *dra = DDR_A (ddr);
1892 struct data_reference *drb = DDR_B (ddr);
1893 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
1894 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
1895 int dra_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dra))));
1896 int drb_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (drb))));
1897 lambda_vector dist_v;
1898 unsigned int loop_depth;
1899
1900 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
1901 return;
1902
1903 if (dra == drb)
1904 return;
1905
1906 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
1907 return;
1908
1909 /* Loop-based vectorization and known data dependence. */
1910 if (DDR_NUM_DIST_VECTS (ddr) == 0)
1911 return;
1912
1913 /* Data-dependence analysis reports a distance vector of zero
1914 for data-references that overlap only in the first iteration
1915 but have different sign step (see PR45764).
1916 So as a sanity check require equal DR_STEP. */
1917 if (!operand_equal_p (DR_STEP (dra), DR_STEP (drb), 0))
1918 return;
1919
1920 loop_depth = index_in_loop_nest (loop->num, DDR_LOOP_NEST (ddr));
1921 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
1922 {
1923 int dist = dist_v[loop_depth];
1924
1925 if (dump_enabled_p ())
1926 dump_printf_loc (MSG_NOTE, vect_location,
1927 "dependence distance = %d.\n", dist);
1928
1929 /* Same loop iteration. */
1930 if (dist == 0
1931 || (dist % vectorization_factor == 0 && dra_size == drb_size))
1932 {
1933 /* Two references with distance zero have the same alignment. */
1934 STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_a).safe_push (drb);
1935 STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_b).safe_push (dra);
1936 if (dump_enabled_p ())
1937 {
1938 dump_printf_loc (MSG_NOTE, vect_location,
1939 "accesses have the same alignment.\n");
1940 dump_printf (MSG_NOTE,
1941 "dependence distance modulo vf == 0 between ");
1942 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dra));
1943 dump_printf (MSG_NOTE, " and ");
1944 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (drb));
1945 dump_printf (MSG_NOTE, "\n");
1946 }
1947 }
1948 }
1949 }
1950
1951
1952 /* Function vect_analyze_data_refs_alignment
1953
1954 Analyze the alignment of the data-references in the loop.
1955 Return FALSE if a data reference is found that cannot be vectorized. */
1956
1957 bool
1958 vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo,
1959 bb_vec_info bb_vinfo)
1960 {
1961 if (dump_enabled_p ())
1962 dump_printf_loc (MSG_NOTE, vect_location,
1963 "=== vect_analyze_data_refs_alignment ===\n");
1964
1965 /* Mark groups of data references with same alignment using
1966 data dependence information. */
1967 if (loop_vinfo)
1968 {
1969 vec<ddr_p> ddrs = LOOP_VINFO_DDRS (loop_vinfo);
1970 struct data_dependence_relation *ddr;
1971 unsigned int i;
1972
1973 FOR_EACH_VEC_ELT (ddrs, i, ddr)
1974 vect_find_same_alignment_drs (ddr, loop_vinfo);
1975 }
1976
1977 if (!vect_compute_data_refs_alignment (loop_vinfo, bb_vinfo))
1978 {
1979 if (dump_enabled_p ())
1980 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1981 "not vectorized: can't calculate alignment "
1982 "for data ref.\n");
1983 return false;
1984 }
1985
1986 return true;
1987 }
1988
1989
1990 /* Analyze groups of accesses: check that DR belongs to a group of
1991 accesses of legal size, step, etc. Detect gaps, single element
1992 interleaving, and other special cases. Set grouped access info.
1993 Collect groups of strided stores for further use in SLP analysis. */
1994
1995 static bool
1996 vect_analyze_group_access (struct data_reference *dr)
1997 {
1998 tree step = DR_STEP (dr);
1999 tree scalar_type = TREE_TYPE (DR_REF (dr));
2000 HOST_WIDE_INT type_size = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type));
2001 gimple stmt = DR_STMT (dr);
2002 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2003 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2004 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2005 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
2006 HOST_WIDE_INT groupsize, last_accessed_element = 1;
2007 bool slp_impossible = false;
2008 struct loop *loop = NULL;
2009
2010 if (loop_vinfo)
2011 loop = LOOP_VINFO_LOOP (loop_vinfo);
2012
2013 /* For interleaving, GROUPSIZE is STEP counted in elements, i.e., the
2014 size of the interleaving group (including gaps). */
2015 groupsize = absu_hwi (dr_step) / type_size;
2016
2017 /* Not consecutive access is possible only if it is a part of interleaving. */
2018 if (!GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
2019 {
2020 /* Check if it this DR is a part of interleaving, and is a single
2021 element of the group that is accessed in the loop. */
2022
2023 /* Gaps are supported only for loads. STEP must be a multiple of the type
2024 size. The size of the group must be a power of 2. */
2025 if (DR_IS_READ (dr)
2026 && (dr_step % type_size) == 0
2027 && groupsize > 0
2028 && exact_log2 (groupsize) != -1)
2029 {
2030 GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) = stmt;
2031 GROUP_SIZE (vinfo_for_stmt (stmt)) = groupsize;
2032 if (dump_enabled_p ())
2033 {
2034 dump_printf_loc (MSG_NOTE, vect_location,
2035 "Detected single element interleaving ");
2036 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr));
2037 dump_printf (MSG_NOTE, " step ");
2038 dump_generic_expr (MSG_NOTE, TDF_SLIM, step);
2039 dump_printf (MSG_NOTE, "\n");
2040 }
2041
2042 if (loop_vinfo)
2043 {
2044 if (dump_enabled_p ())
2045 dump_printf_loc (MSG_NOTE, vect_location,
2046 "Data access with gaps requires scalar "
2047 "epilogue loop\n");
2048 if (loop->inner)
2049 {
2050 if (dump_enabled_p ())
2051 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2052 "Peeling for outer loop is not"
2053 " supported\n");
2054 return false;
2055 }
2056
2057 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
2058 }
2059
2060 return true;
2061 }
2062
2063 if (dump_enabled_p ())
2064 {
2065 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2066 "not consecutive access ");
2067 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
2068 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
2069 }
2070
2071 if (bb_vinfo)
2072 {
2073 /* Mark the statement as unvectorizable. */
2074 STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr))) = false;
2075 return true;
2076 }
2077
2078 return false;
2079 }
2080
2081 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) == stmt)
2082 {
2083 /* First stmt in the interleaving chain. Check the chain. */
2084 gimple next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt));
2085 struct data_reference *data_ref = dr;
2086 unsigned int count = 1;
2087 tree prev_init = DR_INIT (data_ref);
2088 gimple prev = stmt;
2089 HOST_WIDE_INT diff, gaps = 0;
2090 unsigned HOST_WIDE_INT count_in_bytes;
2091
2092 while (next)
2093 {
2094 /* Skip same data-refs. In case that two or more stmts share
2095 data-ref (supported only for loads), we vectorize only the first
2096 stmt, and the rest get their vectorized loads from the first
2097 one. */
2098 if (!tree_int_cst_compare (DR_INIT (data_ref),
2099 DR_INIT (STMT_VINFO_DATA_REF (
2100 vinfo_for_stmt (next)))))
2101 {
2102 if (DR_IS_WRITE (data_ref))
2103 {
2104 if (dump_enabled_p ())
2105 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2106 "Two store stmts share the same dr.\n");
2107 return false;
2108 }
2109
2110 /* For load use the same data-ref load. */
2111 GROUP_SAME_DR_STMT (vinfo_for_stmt (next)) = prev;
2112
2113 prev = next;
2114 next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
2115 continue;
2116 }
2117
2118 prev = next;
2119 data_ref = STMT_VINFO_DATA_REF (vinfo_for_stmt (next));
2120
2121 /* All group members have the same STEP by construction. */
2122 gcc_checking_assert (operand_equal_p (DR_STEP (data_ref), step, 0));
2123
2124 /* Check that the distance between two accesses is equal to the type
2125 size. Otherwise, we have gaps. */
2126 diff = (TREE_INT_CST_LOW (DR_INIT (data_ref))
2127 - TREE_INT_CST_LOW (prev_init)) / type_size;
2128 if (diff != 1)
2129 {
2130 /* FORNOW: SLP of accesses with gaps is not supported. */
2131 slp_impossible = true;
2132 if (DR_IS_WRITE (data_ref))
2133 {
2134 if (dump_enabled_p ())
2135 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2136 "interleaved store with gaps\n");
2137 return false;
2138 }
2139
2140 gaps += diff - 1;
2141 }
2142
2143 last_accessed_element += diff;
2144
2145 /* Store the gap from the previous member of the group. If there is no
2146 gap in the access, GROUP_GAP is always 1. */
2147 GROUP_GAP (vinfo_for_stmt (next)) = diff;
2148
2149 prev_init = DR_INIT (data_ref);
2150 next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
2151 /* Count the number of data-refs in the chain. */
2152 count++;
2153 }
2154
2155 /* COUNT is the number of accesses found, we multiply it by the size of
2156 the type to get COUNT_IN_BYTES. */
2157 count_in_bytes = type_size * count;
2158
2159 /* Check that the size of the interleaving (including gaps) is not
2160 greater than STEP. */
2161 if (dr_step != 0
2162 && absu_hwi (dr_step) < count_in_bytes + gaps * type_size)
2163 {
2164 if (dump_enabled_p ())
2165 {
2166 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2167 "interleaving size is greater than step for ");
2168 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
2169 DR_REF (dr));
2170 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
2171 }
2172 return false;
2173 }
2174
2175 /* Check that the size of the interleaving is equal to STEP for stores,
2176 i.e., that there are no gaps. */
2177 if (dr_step != 0
2178 && absu_hwi (dr_step) != count_in_bytes)
2179 {
2180 if (DR_IS_READ (dr))
2181 {
2182 slp_impossible = true;
2183 /* There is a gap after the last load in the group. This gap is a
2184 difference between the groupsize and the number of elements.
2185 When there is no gap, this difference should be 0. */
2186 GROUP_GAP (vinfo_for_stmt (stmt)) = groupsize - count;
2187 }
2188 else
2189 {
2190 if (dump_enabled_p ())
2191 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2192 "interleaved store with gaps\n");
2193 return false;
2194 }
2195 }
2196
2197 /* Check that STEP is a multiple of type size. */
2198 if (dr_step != 0
2199 && (dr_step % type_size) != 0)
2200 {
2201 if (dump_enabled_p ())
2202 {
2203 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2204 "step is not a multiple of type size: step ");
2205 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, step);
2206 dump_printf (MSG_MISSED_OPTIMIZATION, " size ");
2207 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
2208 TYPE_SIZE_UNIT (scalar_type));
2209 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
2210 }
2211 return false;
2212 }
2213
2214 if (groupsize == 0)
2215 groupsize = count;
2216
2217 GROUP_SIZE (vinfo_for_stmt (stmt)) = groupsize;
2218 if (dump_enabled_p ())
2219 dump_printf_loc (MSG_NOTE, vect_location,
2220 "Detected interleaving of size %d\n", (int)groupsize);
2221
2222 /* SLP: create an SLP data structure for every interleaving group of
2223 stores for further analysis in vect_analyse_slp. */
2224 if (DR_IS_WRITE (dr) && !slp_impossible)
2225 {
2226 if (loop_vinfo)
2227 LOOP_VINFO_GROUPED_STORES (loop_vinfo).safe_push (stmt);
2228 if (bb_vinfo)
2229 BB_VINFO_GROUPED_STORES (bb_vinfo).safe_push (stmt);
2230 }
2231
2232 /* There is a gap in the end of the group. */
2233 if (groupsize - last_accessed_element > 0 && loop_vinfo)
2234 {
2235 if (dump_enabled_p ())
2236 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2237 "Data access with gaps requires scalar "
2238 "epilogue loop\n");
2239 if (loop->inner)
2240 {
2241 if (dump_enabled_p ())
2242 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2243 "Peeling for outer loop is not supported\n");
2244 return false;
2245 }
2246
2247 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
2248 }
2249 }
2250
2251 return true;
2252 }
2253
2254
2255 /* Analyze the access pattern of the data-reference DR.
2256 In case of non-consecutive accesses call vect_analyze_group_access() to
2257 analyze groups of accesses. */
2258
2259 static bool
2260 vect_analyze_data_ref_access (struct data_reference *dr)
2261 {
2262 tree step = DR_STEP (dr);
2263 tree scalar_type = TREE_TYPE (DR_REF (dr));
2264 gimple stmt = DR_STMT (dr);
2265 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2266 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2267 struct loop *loop = NULL;
2268
2269 if (loop_vinfo)
2270 loop = LOOP_VINFO_LOOP (loop_vinfo);
2271
2272 if (loop_vinfo && !step)
2273 {
2274 if (dump_enabled_p ())
2275 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2276 "bad data-ref access in loop\n");
2277 return false;
2278 }
2279
2280 /* Allow invariant loads in not nested loops. */
2281 if (loop_vinfo && integer_zerop (step))
2282 {
2283 GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) = NULL;
2284 if (nested_in_vect_loop_p (loop, stmt))
2285 {
2286 if (dump_enabled_p ())
2287 dump_printf_loc (MSG_NOTE, vect_location,
2288 "zero step in inner loop of nest\n");
2289 return false;
2290 }
2291 return DR_IS_READ (dr);
2292 }
2293
2294 if (loop && nested_in_vect_loop_p (loop, stmt))
2295 {
2296 /* Interleaved accesses are not yet supported within outer-loop
2297 vectorization for references in the inner-loop. */
2298 GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) = NULL;
2299
2300 /* For the rest of the analysis we use the outer-loop step. */
2301 step = STMT_VINFO_DR_STEP (stmt_info);
2302 if (integer_zerop (step))
2303 {
2304 if (dump_enabled_p ())
2305 dump_printf_loc (MSG_NOTE, vect_location,
2306 "zero step in outer loop.\n");
2307 if (DR_IS_READ (dr))
2308 return true;
2309 else
2310 return false;
2311 }
2312 }
2313
2314 /* Consecutive? */
2315 if (TREE_CODE (step) == INTEGER_CST)
2316 {
2317 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
2318 if (!tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type))
2319 || (dr_step < 0
2320 && !compare_tree_int (TYPE_SIZE_UNIT (scalar_type), -dr_step)))
2321 {
2322 /* Mark that it is not interleaving. */
2323 GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) = NULL;
2324 return true;
2325 }
2326 }
2327
2328 if (loop && nested_in_vect_loop_p (loop, stmt))
2329 {
2330 if (dump_enabled_p ())
2331 dump_printf_loc (MSG_NOTE, vect_location,
2332 "grouped access in outer loop.\n");
2333 return false;
2334 }
2335
2336 /* Assume this is a DR handled by non-constant strided load case. */
2337 if (TREE_CODE (step) != INTEGER_CST)
2338 return STMT_VINFO_STRIDE_LOAD_P (stmt_info);
2339
2340 /* Not consecutive access - check if it's a part of interleaving group. */
2341 return vect_analyze_group_access (dr);
2342 }
2343
2344
2345
2346 /* A helper function used in the comparator function to sort data
2347 references. T1 and T2 are two data references to be compared.
2348 The function returns -1, 0, or 1. */
2349
2350 static int
2351 compare_tree (tree t1, tree t2)
2352 {
2353 int i, cmp;
2354 enum tree_code code;
2355 char tclass;
2356
2357 if (t1 == t2)
2358 return 0;
2359 if (t1 == NULL)
2360 return -1;
2361 if (t2 == NULL)
2362 return 1;
2363
2364
2365 if (TREE_CODE (t1) != TREE_CODE (t2))
2366 return TREE_CODE (t1) < TREE_CODE (t2) ? -1 : 1;
2367
2368 code = TREE_CODE (t1);
2369 switch (code)
2370 {
2371 /* For const values, we can just use hash values for comparisons. */
2372 case INTEGER_CST:
2373 case REAL_CST:
2374 case FIXED_CST:
2375 case STRING_CST:
2376 case COMPLEX_CST:
2377 case VECTOR_CST:
2378 {
2379 hashval_t h1 = iterative_hash_expr (t1, 0);
2380 hashval_t h2 = iterative_hash_expr (t2, 0);
2381 if (h1 != h2)
2382 return h1 < h2 ? -1 : 1;
2383 break;
2384 }
2385
2386 case SSA_NAME:
2387 cmp = compare_tree (SSA_NAME_VAR (t1), SSA_NAME_VAR (t2));
2388 if (cmp != 0)
2389 return cmp;
2390
2391 if (SSA_NAME_VERSION (t1) != SSA_NAME_VERSION (t2))
2392 return SSA_NAME_VERSION (t1) < SSA_NAME_VERSION (t2) ? -1 : 1;
2393 break;
2394
2395 default:
2396 tclass = TREE_CODE_CLASS (code);
2397
2398 /* For var-decl, we could compare their UIDs. */
2399 if (tclass == tcc_declaration)
2400 {
2401 if (DECL_UID (t1) != DECL_UID (t2))
2402 return DECL_UID (t1) < DECL_UID (t2) ? -1 : 1;
2403 break;
2404 }
2405
2406 /* For expressions with operands, compare their operands recursively. */
2407 for (i = TREE_OPERAND_LENGTH (t1) - 1; i >= 0; --i)
2408 {
2409 cmp = compare_tree (TREE_OPERAND (t1, i), TREE_OPERAND (t2, i));
2410 if (cmp != 0)
2411 return cmp;
2412 }
2413 }
2414
2415 return 0;
2416 }
2417
2418
2419 /* Compare two data-references DRA and DRB to group them into chunks
2420 suitable for grouping. */
2421
2422 static int
2423 dr_group_sort_cmp (const void *dra_, const void *drb_)
2424 {
2425 data_reference_p dra = *(data_reference_p *)const_cast<void *>(dra_);
2426 data_reference_p drb = *(data_reference_p *)const_cast<void *>(drb_);
2427 int cmp;
2428
2429 /* Stabilize sort. */
2430 if (dra == drb)
2431 return 0;
2432
2433 /* Ordering of DRs according to base. */
2434 if (!operand_equal_p (DR_BASE_ADDRESS (dra), DR_BASE_ADDRESS (drb), 0))
2435 {
2436 cmp = compare_tree (DR_BASE_ADDRESS (dra), DR_BASE_ADDRESS (drb));
2437 if (cmp != 0)
2438 return cmp;
2439 }
2440
2441 /* And according to DR_OFFSET. */
2442 if (!dr_equal_offsets_p (dra, drb))
2443 {
2444 cmp = compare_tree (DR_OFFSET (dra), DR_OFFSET (drb));
2445 if (cmp != 0)
2446 return cmp;
2447 }
2448
2449 /* Put reads before writes. */
2450 if (DR_IS_READ (dra) != DR_IS_READ (drb))
2451 return DR_IS_READ (dra) ? -1 : 1;
2452
2453 /* Then sort after access size. */
2454 if (!operand_equal_p (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra))),
2455 TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb))), 0))
2456 {
2457 cmp = compare_tree (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra))),
2458 TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb))));
2459 if (cmp != 0)
2460 return cmp;
2461 }
2462
2463 /* And after step. */
2464 if (!operand_equal_p (DR_STEP (dra), DR_STEP (drb), 0))
2465 {
2466 cmp = compare_tree (DR_STEP (dra), DR_STEP (drb));
2467 if (cmp != 0)
2468 return cmp;
2469 }
2470
2471 /* Then sort after DR_INIT. In case of identical DRs sort after stmt UID. */
2472 cmp = tree_int_cst_compare (DR_INIT (dra), DR_INIT (drb));
2473 if (cmp == 0)
2474 return gimple_uid (DR_STMT (dra)) < gimple_uid (DR_STMT (drb)) ? -1 : 1;
2475 return cmp;
2476 }
2477
2478 /* Function vect_analyze_data_ref_accesses.
2479
2480 Analyze the access pattern of all the data references in the loop.
2481
2482 FORNOW: the only access pattern that is considered vectorizable is a
2483 simple step 1 (consecutive) access.
2484
2485 FORNOW: handle only arrays and pointer accesses. */
2486
2487 bool
2488 vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
2489 {
2490 unsigned int i;
2491 vec<data_reference_p> datarefs;
2492 struct data_reference *dr;
2493
2494 if (dump_enabled_p ())
2495 dump_printf_loc (MSG_NOTE, vect_location,
2496 "=== vect_analyze_data_ref_accesses ===\n");
2497
2498 if (loop_vinfo)
2499 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
2500 else
2501 datarefs = BB_VINFO_DATAREFS (bb_vinfo);
2502
2503 if (datarefs.is_empty ())
2504 return true;
2505
2506 /* Sort the array of datarefs to make building the interleaving chains
2507 linear. Don't modify the original vector's order, it is needed for
2508 determining what dependencies are reversed. */
2509 vec<data_reference_p> datarefs_copy = datarefs.copy ();
2510 qsort (datarefs_copy.address (), datarefs_copy.length (),
2511 sizeof (data_reference_p), dr_group_sort_cmp);
2512
2513 /* Build the interleaving chains. */
2514 for (i = 0; i < datarefs_copy.length () - 1;)
2515 {
2516 data_reference_p dra = datarefs_copy[i];
2517 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
2518 stmt_vec_info lastinfo = NULL;
2519 for (i = i + 1; i < datarefs_copy.length (); ++i)
2520 {
2521 data_reference_p drb = datarefs_copy[i];
2522 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
2523
2524 /* ??? Imperfect sorting (non-compatible types, non-modulo
2525 accesses, same accesses) can lead to a group to be artificially
2526 split here as we don't just skip over those. If it really
2527 matters we can push those to a worklist and re-iterate
2528 over them. The we can just skip ahead to the next DR here. */
2529
2530 /* Check that the data-refs have same first location (except init)
2531 and they are both either store or load (not load and store). */
2532 if (DR_IS_READ (dra) != DR_IS_READ (drb)
2533 || !operand_equal_p (DR_BASE_ADDRESS (dra),
2534 DR_BASE_ADDRESS (drb), 0)
2535 || !dr_equal_offsets_p (dra, drb))
2536 break;
2537
2538 /* Check that the data-refs have the same constant size and step. */
2539 tree sza = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra)));
2540 tree szb = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb)));
2541 if (!tree_fits_uhwi_p (sza)
2542 || !tree_fits_uhwi_p (szb)
2543 || !tree_int_cst_equal (sza, szb)
2544 || !tree_fits_shwi_p (DR_STEP (dra))
2545 || !tree_fits_shwi_p (DR_STEP (drb))
2546 || !tree_int_cst_equal (DR_STEP (dra), DR_STEP (drb)))
2547 break;
2548
2549 /* Do not place the same access in the interleaving chain twice. */
2550 if (tree_int_cst_compare (DR_INIT (dra), DR_INIT (drb)) == 0)
2551 break;
2552
2553 /* Check the types are compatible.
2554 ??? We don't distinguish this during sorting. */
2555 if (!types_compatible_p (TREE_TYPE (DR_REF (dra)),
2556 TREE_TYPE (DR_REF (drb))))
2557 break;
2558
2559 /* Sorting has ensured that DR_INIT (dra) <= DR_INIT (drb). */
2560 HOST_WIDE_INT init_a = TREE_INT_CST_LOW (DR_INIT (dra));
2561 HOST_WIDE_INT init_b = TREE_INT_CST_LOW (DR_INIT (drb));
2562 gcc_assert (init_a < init_b);
2563
2564 /* If init_b == init_a + the size of the type * k, we have an
2565 interleaving, and DRA is accessed before DRB. */
2566 HOST_WIDE_INT type_size_a = tree_to_uhwi (sza);
2567 if ((init_b - init_a) % type_size_a != 0)
2568 break;
2569
2570 /* The step (if not zero) is greater than the difference between
2571 data-refs' inits. This splits groups into suitable sizes. */
2572 HOST_WIDE_INT step = tree_to_shwi (DR_STEP (dra));
2573 if (step != 0 && step <= (init_b - init_a))
2574 break;
2575
2576 if (dump_enabled_p ())
2577 {
2578 dump_printf_loc (MSG_NOTE, vect_location,
2579 "Detected interleaving ");
2580 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dra));
2581 dump_printf (MSG_NOTE, " and ");
2582 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (drb));
2583 dump_printf (MSG_NOTE, "\n");
2584 }
2585
2586 /* Link the found element into the group list. */
2587 if (!GROUP_FIRST_ELEMENT (stmtinfo_a))
2588 {
2589 GROUP_FIRST_ELEMENT (stmtinfo_a) = DR_STMT (dra);
2590 lastinfo = stmtinfo_a;
2591 }
2592 GROUP_FIRST_ELEMENT (stmtinfo_b) = DR_STMT (dra);
2593 GROUP_NEXT_ELEMENT (lastinfo) = DR_STMT (drb);
2594 lastinfo = stmtinfo_b;
2595 }
2596 }
2597
2598 FOR_EACH_VEC_ELT (datarefs_copy, i, dr)
2599 if (STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr)))
2600 && !vect_analyze_data_ref_access (dr))
2601 {
2602 if (dump_enabled_p ())
2603 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2604 "not vectorized: complicated access pattern.\n");
2605
2606 if (bb_vinfo)
2607 {
2608 /* Mark the statement as not vectorizable. */
2609 STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr))) = false;
2610 continue;
2611 }
2612 else
2613 {
2614 datarefs_copy.release ();
2615 return false;
2616 }
2617 }
2618
2619 datarefs_copy.release ();
2620 return true;
2621 }
2622
2623
2624 /* Operator == between two dr_with_seg_len objects.
2625
2626 This equality operator is used to make sure two data refs
2627 are the same one so that we will consider to combine the
2628 aliasing checks of those two pairs of data dependent data
2629 refs. */
2630
2631 static bool
2632 operator == (const dr_with_seg_len& d1,
2633 const dr_with_seg_len& d2)
2634 {
2635 return operand_equal_p (DR_BASE_ADDRESS (d1.dr),
2636 DR_BASE_ADDRESS (d2.dr), 0)
2637 && compare_tree (d1.offset, d2.offset) == 0
2638 && compare_tree (d1.seg_len, d2.seg_len) == 0;
2639 }
2640
2641 /* Function comp_dr_with_seg_len_pair.
2642
2643 Comparison function for sorting objects of dr_with_seg_len_pair_t
2644 so that we can combine aliasing checks in one scan. */
2645
2646 static int
2647 comp_dr_with_seg_len_pair (const void *p1_, const void *p2_)
2648 {
2649 const dr_with_seg_len_pair_t* p1 = (const dr_with_seg_len_pair_t *) p1_;
2650 const dr_with_seg_len_pair_t* p2 = (const dr_with_seg_len_pair_t *) p2_;
2651
2652 const dr_with_seg_len &p11 = p1->first,
2653 &p12 = p1->second,
2654 &p21 = p2->first,
2655 &p22 = p2->second;
2656
2657 /* For DR pairs (a, b) and (c, d), we only consider to merge the alias checks
2658 if a and c have the same basic address snd step, and b and d have the same
2659 address and step. Therefore, if any a&c or b&d don't have the same address
2660 and step, we don't care the order of those two pairs after sorting. */
2661 int comp_res;
2662
2663 if ((comp_res = compare_tree (DR_BASE_ADDRESS (p11.dr),
2664 DR_BASE_ADDRESS (p21.dr))) != 0)
2665 return comp_res;
2666 if ((comp_res = compare_tree (DR_BASE_ADDRESS (p12.dr),
2667 DR_BASE_ADDRESS (p22.dr))) != 0)
2668 return comp_res;
2669 if ((comp_res = compare_tree (DR_STEP (p11.dr), DR_STEP (p21.dr))) != 0)
2670 return comp_res;
2671 if ((comp_res = compare_tree (DR_STEP (p12.dr), DR_STEP (p22.dr))) != 0)
2672 return comp_res;
2673 if ((comp_res = compare_tree (p11.offset, p21.offset)) != 0)
2674 return comp_res;
2675 if ((comp_res = compare_tree (p12.offset, p22.offset)) != 0)
2676 return comp_res;
2677
2678 return 0;
2679 }
2680
2681 template <class T> static void
2682 swap (T& a, T& b)
2683 {
2684 T c (a);
2685 a = b;
2686 b = c;
2687 }
2688
2689 /* Function vect_vfa_segment_size.
2690
2691 Create an expression that computes the size of segment
2692 that will be accessed for a data reference. The functions takes into
2693 account that realignment loads may access one more vector.
2694
2695 Input:
2696 DR: The data reference.
2697 LENGTH_FACTOR: segment length to consider.
2698
2699 Return an expression whose value is the size of segment which will be
2700 accessed by DR. */
2701
2702 static tree
2703 vect_vfa_segment_size (struct data_reference *dr, tree length_factor)
2704 {
2705 tree segment_length;
2706
2707 if (integer_zerop (DR_STEP (dr)))
2708 segment_length = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr)));
2709 else
2710 segment_length = size_binop (MULT_EXPR,
2711 fold_convert (sizetype, DR_STEP (dr)),
2712 fold_convert (sizetype, length_factor));
2713
2714 if (vect_supportable_dr_alignment (dr, false)
2715 == dr_explicit_realign_optimized)
2716 {
2717 tree vector_size = TYPE_SIZE_UNIT
2718 (STMT_VINFO_VECTYPE (vinfo_for_stmt (DR_STMT (dr))));
2719
2720 segment_length = size_binop (PLUS_EXPR, segment_length, vector_size);
2721 }
2722 return segment_length;
2723 }
2724
2725 /* Function vect_prune_runtime_alias_test_list.
2726
2727 Prune a list of ddrs to be tested at run-time by versioning for alias.
2728 Merge several alias checks into one if possible.
2729 Return FALSE if resulting list of ddrs is longer then allowed by
2730 PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS, otherwise return TRUE. */
2731
2732 bool
2733 vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo)
2734 {
2735 vec<ddr_p> may_alias_ddrs =
2736 LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo);
2737 vec<dr_with_seg_len_pair_t>& comp_alias_ddrs =
2738 LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo);
2739 int vect_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2740 tree scalar_loop_iters = LOOP_VINFO_NITERS (loop_vinfo);
2741
2742 ddr_p ddr;
2743 unsigned int i;
2744 tree length_factor;
2745
2746 if (dump_enabled_p ())
2747 dump_printf_loc (MSG_NOTE, vect_location,
2748 "=== vect_prune_runtime_alias_test_list ===\n");
2749
2750 if (may_alias_ddrs.is_empty ())
2751 return true;
2752
2753 /* Basically, for each pair of dependent data refs store_ptr_0
2754 and load_ptr_0, we create an expression:
2755
2756 ((store_ptr_0 + store_segment_length_0) <= load_ptr_0)
2757 || (load_ptr_0 + load_segment_length_0) <= store_ptr_0))
2758
2759 for aliasing checks. However, in some cases we can decrease
2760 the number of checks by combining two checks into one. For
2761 example, suppose we have another pair of data refs store_ptr_0
2762 and load_ptr_1, and if the following condition is satisfied:
2763
2764 load_ptr_0 < load_ptr_1 &&
2765 load_ptr_1 - load_ptr_0 - load_segment_length_0 < store_segment_length_0
2766
2767 (this condition means, in each iteration of vectorized loop,
2768 the accessed memory of store_ptr_0 cannot be between the memory
2769 of load_ptr_0 and load_ptr_1.)
2770
2771 we then can use only the following expression to finish the
2772 alising checks between store_ptr_0 & load_ptr_0 and
2773 store_ptr_0 & load_ptr_1:
2774
2775 ((store_ptr_0 + store_segment_length_0) <= load_ptr_0)
2776 || (load_ptr_1 + load_segment_length_1 <= store_ptr_0))
2777
2778 Note that we only consider that load_ptr_0 and load_ptr_1 have the
2779 same basic address. */
2780
2781 comp_alias_ddrs.create (may_alias_ddrs.length ());
2782
2783 /* First, we collect all data ref pairs for aliasing checks. */
2784 FOR_EACH_VEC_ELT (may_alias_ddrs, i, ddr)
2785 {
2786 struct data_reference *dr_a, *dr_b;
2787 gimple dr_group_first_a, dr_group_first_b;
2788 tree segment_length_a, segment_length_b;
2789 gimple stmt_a, stmt_b;
2790
2791 dr_a = DDR_A (ddr);
2792 stmt_a = DR_STMT (DDR_A (ddr));
2793 dr_group_first_a = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt_a));
2794 if (dr_group_first_a)
2795 {
2796 stmt_a = dr_group_first_a;
2797 dr_a = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt_a));
2798 }
2799
2800 dr_b = DDR_B (ddr);
2801 stmt_b = DR_STMT (DDR_B (ddr));
2802 dr_group_first_b = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt_b));
2803 if (dr_group_first_b)
2804 {
2805 stmt_b = dr_group_first_b;
2806 dr_b = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt_b));
2807 }
2808
2809 if (!operand_equal_p (DR_STEP (dr_a), DR_STEP (dr_b), 0))
2810 length_factor = scalar_loop_iters;
2811 else
2812 length_factor = size_int (vect_factor);
2813 segment_length_a = vect_vfa_segment_size (dr_a, length_factor);
2814 segment_length_b = vect_vfa_segment_size (dr_b, length_factor);
2815
2816 dr_with_seg_len_pair_t dr_with_seg_len_pair
2817 (dr_with_seg_len (dr_a, segment_length_a),
2818 dr_with_seg_len (dr_b, segment_length_b));
2819
2820 if (compare_tree (DR_BASE_ADDRESS (dr_a), DR_BASE_ADDRESS (dr_b)) > 0)
2821 swap (dr_with_seg_len_pair.first, dr_with_seg_len_pair.second);
2822
2823 comp_alias_ddrs.safe_push (dr_with_seg_len_pair);
2824 }
2825
2826 /* Second, we sort the collected data ref pairs so that we can scan
2827 them once to combine all possible aliasing checks. */
2828 comp_alias_ddrs.qsort (comp_dr_with_seg_len_pair);
2829
2830 /* Third, we scan the sorted dr pairs and check if we can combine
2831 alias checks of two neighbouring dr pairs. */
2832 for (size_t i = 1; i < comp_alias_ddrs.length (); ++i)
2833 {
2834 /* Deal with two ddrs (dr_a1, dr_b1) and (dr_a2, dr_b2). */
2835 dr_with_seg_len *dr_a1 = &comp_alias_ddrs[i-1].first,
2836 *dr_b1 = &comp_alias_ddrs[i-1].second,
2837 *dr_a2 = &comp_alias_ddrs[i].first,
2838 *dr_b2 = &comp_alias_ddrs[i].second;
2839
2840 /* Remove duplicate data ref pairs. */
2841 if (*dr_a1 == *dr_a2 && *dr_b1 == *dr_b2)
2842 {
2843 if (dump_enabled_p ())
2844 {
2845 dump_printf_loc (MSG_NOTE, vect_location,
2846 "found equal ranges ");
2847 dump_generic_expr (MSG_NOTE, TDF_SLIM,
2848 DR_REF (dr_a1->dr));
2849 dump_printf (MSG_NOTE, ", ");
2850 dump_generic_expr (MSG_NOTE, TDF_SLIM,
2851 DR_REF (dr_b1->dr));
2852 dump_printf (MSG_NOTE, " and ");
2853 dump_generic_expr (MSG_NOTE, TDF_SLIM,
2854 DR_REF (dr_a2->dr));
2855 dump_printf (MSG_NOTE, ", ");
2856 dump_generic_expr (MSG_NOTE, TDF_SLIM,
2857 DR_REF (dr_b2->dr));
2858 dump_printf (MSG_NOTE, "\n");
2859 }
2860
2861 comp_alias_ddrs.ordered_remove (i--);
2862 continue;
2863 }
2864
2865 if (*dr_a1 == *dr_a2 || *dr_b1 == *dr_b2)
2866 {
2867 /* We consider the case that DR_B1 and DR_B2 are same memrefs,
2868 and DR_A1 and DR_A2 are two consecutive memrefs. */
2869 if (*dr_a1 == *dr_a2)
2870 {
2871 swap (dr_a1, dr_b1);
2872 swap (dr_a2, dr_b2);
2873 }
2874
2875 if (!operand_equal_p (DR_BASE_ADDRESS (dr_a1->dr),
2876 DR_BASE_ADDRESS (dr_a2->dr),
2877 0)
2878 || !tree_fits_shwi_p (dr_a1->offset)
2879 || !tree_fits_shwi_p (dr_a2->offset))
2880 continue;
2881
2882 HOST_WIDE_INT diff = (tree_to_shwi (dr_a2->offset)
2883 - tree_to_shwi (dr_a1->offset));
2884
2885
2886 /* Now we check if the following condition is satisfied:
2887
2888 DIFF - SEGMENT_LENGTH_A < SEGMENT_LENGTH_B
2889
2890 where DIFF = DR_A2->OFFSET - DR_A1->OFFSET. However,
2891 SEGMENT_LENGTH_A or SEGMENT_LENGTH_B may not be constant so we
2892 have to make a best estimation. We can get the minimum value
2893 of SEGMENT_LENGTH_B as a constant, represented by MIN_SEG_LEN_B,
2894 then either of the following two conditions can guarantee the
2895 one above:
2896
2897 1: DIFF <= MIN_SEG_LEN_B
2898 2: DIFF - SEGMENT_LENGTH_A < MIN_SEG_LEN_B
2899
2900 */
2901
2902 HOST_WIDE_INT min_seg_len_b = (tree_fits_shwi_p (dr_b1->seg_len)
2903 ? tree_to_shwi (dr_b1->seg_len)
2904 : vect_factor);
2905
2906 if (diff <= min_seg_len_b
2907 || (tree_fits_shwi_p (dr_a1->seg_len)
2908 && diff - tree_to_shwi (dr_a1->seg_len) < min_seg_len_b))
2909 {
2910 if (dump_enabled_p ())
2911 {
2912 dump_printf_loc (MSG_NOTE, vect_location,
2913 "merging ranges for ");
2914 dump_generic_expr (MSG_NOTE, TDF_SLIM,
2915 DR_REF (dr_a1->dr));
2916 dump_printf (MSG_NOTE, ", ");
2917 dump_generic_expr (MSG_NOTE, TDF_SLIM,
2918 DR_REF (dr_b1->dr));
2919 dump_printf (MSG_NOTE, " and ");
2920 dump_generic_expr (MSG_NOTE, TDF_SLIM,
2921 DR_REF (dr_a2->dr));
2922 dump_printf (MSG_NOTE, ", ");
2923 dump_generic_expr (MSG_NOTE, TDF_SLIM,
2924 DR_REF (dr_b2->dr));
2925 dump_printf (MSG_NOTE, "\n");
2926 }
2927
2928 dr_a1->seg_len = size_binop (PLUS_EXPR,
2929 dr_a2->seg_len, size_int (diff));
2930 comp_alias_ddrs.ordered_remove (i--);
2931 }
2932 }
2933 }
2934
2935 dump_printf_loc (MSG_NOTE, vect_location,
2936 "improved number of alias checks from %d to %d\n",
2937 may_alias_ddrs.length (), comp_alias_ddrs.length ());
2938 if ((int) comp_alias_ddrs.length () >
2939 PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS))
2940 return false;
2941
2942 return true;
2943 }
2944
2945 /* Check whether a non-affine read in stmt is suitable for gather load
2946 and if so, return a builtin decl for that operation. */
2947
2948 tree
2949 vect_check_gather (gimple stmt, loop_vec_info loop_vinfo, tree *basep,
2950 tree *offp, int *scalep)
2951 {
2952 HOST_WIDE_INT scale = 1, pbitpos, pbitsize;
2953 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2954 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2955 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2956 tree offtype = NULL_TREE;
2957 tree decl, base, off;
2958 enum machine_mode pmode;
2959 int punsignedp, pvolatilep;
2960
2961 base = DR_REF (dr);
2962 /* For masked loads/stores, DR_REF (dr) is an artificial MEM_REF,
2963 see if we can use the def stmt of the address. */
2964 if (is_gimple_call (stmt)
2965 && gimple_call_internal_p (stmt)
2966 && (gimple_call_internal_fn (stmt) == IFN_MASK_LOAD
2967 || gimple_call_internal_fn (stmt) == IFN_MASK_STORE)
2968 && TREE_CODE (base) == MEM_REF
2969 && TREE_CODE (TREE_OPERAND (base, 0)) == SSA_NAME
2970 && integer_zerop (TREE_OPERAND (base, 1))
2971 && !expr_invariant_in_loop_p (loop, TREE_OPERAND (base, 0)))
2972 {
2973 gimple def_stmt = SSA_NAME_DEF_STMT (TREE_OPERAND (base, 0));
2974 if (is_gimple_assign (def_stmt)
2975 && gimple_assign_rhs_code (def_stmt) == ADDR_EXPR)
2976 base = TREE_OPERAND (gimple_assign_rhs1 (def_stmt), 0);
2977 }
2978
2979 /* The gather builtins need address of the form
2980 loop_invariant + vector * {1, 2, 4, 8}
2981 or
2982 loop_invariant + sign_extend (vector) * { 1, 2, 4, 8 }.
2983 Unfortunately DR_BASE_ADDRESS/DR_OFFSET can be a mixture
2984 of loop invariants/SSA_NAMEs defined in the loop, with casts,
2985 multiplications and additions in it. To get a vector, we need
2986 a single SSA_NAME that will be defined in the loop and will
2987 contain everything that is not loop invariant and that can be
2988 vectorized. The following code attempts to find such a preexistng
2989 SSA_NAME OFF and put the loop invariants into a tree BASE
2990 that can be gimplified before the loop. */
2991 base = get_inner_reference (base, &pbitsize, &pbitpos, &off,
2992 &pmode, &punsignedp, &pvolatilep, false);
2993 gcc_assert (base != NULL_TREE && (pbitpos % BITS_PER_UNIT) == 0);
2994
2995 if (TREE_CODE (base) == MEM_REF)
2996 {
2997 if (!integer_zerop (TREE_OPERAND (base, 1)))
2998 {
2999 if (off == NULL_TREE)
3000 {
3001 offset_int moff = mem_ref_offset (base);
3002 off = wide_int_to_tree (sizetype, moff);
3003 }
3004 else
3005 off = size_binop (PLUS_EXPR, off,
3006 fold_convert (sizetype, TREE_OPERAND (base, 1)));
3007 }
3008 base = TREE_OPERAND (base, 0);
3009 }
3010 else
3011 base = build_fold_addr_expr (base);
3012
3013 if (off == NULL_TREE)
3014 off = size_zero_node;
3015
3016 /* If base is not loop invariant, either off is 0, then we start with just
3017 the constant offset in the loop invariant BASE and continue with base
3018 as OFF, otherwise give up.
3019 We could handle that case by gimplifying the addition of base + off
3020 into some SSA_NAME and use that as off, but for now punt. */
3021 if (!expr_invariant_in_loop_p (loop, base))
3022 {
3023 if (!integer_zerop (off))
3024 return NULL_TREE;
3025 off = base;
3026 base = size_int (pbitpos / BITS_PER_UNIT);
3027 }
3028 /* Otherwise put base + constant offset into the loop invariant BASE
3029 and continue with OFF. */
3030 else
3031 {
3032 base = fold_convert (sizetype, base);
3033 base = size_binop (PLUS_EXPR, base, size_int (pbitpos / BITS_PER_UNIT));
3034 }
3035
3036 /* OFF at this point may be either a SSA_NAME or some tree expression
3037 from get_inner_reference. Try to peel off loop invariants from it
3038 into BASE as long as possible. */
3039 STRIP_NOPS (off);
3040 while (offtype == NULL_TREE)
3041 {
3042 enum tree_code code;
3043 tree op0, op1, add = NULL_TREE;
3044
3045 if (TREE_CODE (off) == SSA_NAME)
3046 {
3047 gimple def_stmt = SSA_NAME_DEF_STMT (off);
3048
3049 if (expr_invariant_in_loop_p (loop, off))
3050 return NULL_TREE;
3051
3052 if (gimple_code (def_stmt) != GIMPLE_ASSIGN)
3053 break;
3054
3055 op0 = gimple_assign_rhs1 (def_stmt);
3056 code = gimple_assign_rhs_code (def_stmt);
3057 op1 = gimple_assign_rhs2 (def_stmt);
3058 }
3059 else
3060 {
3061 if (get_gimple_rhs_class (TREE_CODE (off)) == GIMPLE_TERNARY_RHS)
3062 return NULL_TREE;
3063 code = TREE_CODE (off);
3064 extract_ops_from_tree (off, &code, &op0, &op1);
3065 }
3066 switch (code)
3067 {
3068 case POINTER_PLUS_EXPR:
3069 case PLUS_EXPR:
3070 if (expr_invariant_in_loop_p (loop, op0))
3071 {
3072 add = op0;
3073 off = op1;
3074 do_add:
3075 add = fold_convert (sizetype, add);
3076 if (scale != 1)
3077 add = size_binop (MULT_EXPR, add, size_int (scale));
3078 base = size_binop (PLUS_EXPR, base, add);
3079 continue;
3080 }
3081 if (expr_invariant_in_loop_p (loop, op1))
3082 {
3083 add = op1;
3084 off = op0;
3085 goto do_add;
3086 }
3087 break;
3088 case MINUS_EXPR:
3089 if (expr_invariant_in_loop_p (loop, op1))
3090 {
3091 add = fold_convert (sizetype, op1);
3092 add = size_binop (MINUS_EXPR, size_zero_node, add);
3093 off = op0;
3094 goto do_add;
3095 }
3096 break;
3097 case MULT_EXPR:
3098 if (scale == 1 && tree_fits_shwi_p (op1))
3099 {
3100 scale = tree_to_shwi (op1);
3101 off = op0;
3102 continue;
3103 }
3104 break;
3105 case SSA_NAME:
3106 off = op0;
3107 continue;
3108 CASE_CONVERT:
3109 if (!POINTER_TYPE_P (TREE_TYPE (op0))
3110 && !INTEGRAL_TYPE_P (TREE_TYPE (op0)))
3111 break;
3112 if (TYPE_PRECISION (TREE_TYPE (op0))
3113 == TYPE_PRECISION (TREE_TYPE (off)))
3114 {
3115 off = op0;
3116 continue;
3117 }
3118 if (TYPE_PRECISION (TREE_TYPE (op0))
3119 < TYPE_PRECISION (TREE_TYPE (off)))
3120 {
3121 off = op0;
3122 offtype = TREE_TYPE (off);
3123 STRIP_NOPS (off);
3124 continue;
3125 }
3126 break;
3127 default:
3128 break;
3129 }
3130 break;
3131 }
3132
3133 /* If at the end OFF still isn't a SSA_NAME or isn't
3134 defined in the loop, punt. */
3135 if (TREE_CODE (off) != SSA_NAME
3136 || expr_invariant_in_loop_p (loop, off))
3137 return NULL_TREE;
3138
3139 if (offtype == NULL_TREE)
3140 offtype = TREE_TYPE (off);
3141
3142 decl = targetm.vectorize.builtin_gather (STMT_VINFO_VECTYPE (stmt_info),
3143 offtype, scale);
3144 if (decl == NULL_TREE)
3145 return NULL_TREE;
3146
3147 if (basep)
3148 *basep = base;
3149 if (offp)
3150 *offp = off;
3151 if (scalep)
3152 *scalep = scale;
3153 return decl;
3154 }
3155
3156 /* Function vect_analyze_data_refs.
3157
3158 Find all the data references in the loop or basic block.
3159
3160 The general structure of the analysis of data refs in the vectorizer is as
3161 follows:
3162 1- vect_analyze_data_refs(loop/bb): call
3163 compute_data_dependences_for_loop/bb to find and analyze all data-refs
3164 in the loop/bb and their dependences.
3165 2- vect_analyze_dependences(): apply dependence testing using ddrs.
3166 3- vect_analyze_drs_alignment(): check that ref_stmt.alignment is ok.
3167 4- vect_analyze_drs_access(): check that ref_stmt.step is ok.
3168
3169 */
3170
3171 bool
3172 vect_analyze_data_refs (loop_vec_info loop_vinfo,
3173 bb_vec_info bb_vinfo,
3174 int *min_vf, unsigned *n_stmts)
3175 {
3176 struct loop *loop = NULL;
3177 basic_block bb = NULL;
3178 unsigned int i;
3179 vec<data_reference_p> datarefs;
3180 struct data_reference *dr;
3181 tree scalar_type;
3182
3183 if (dump_enabled_p ())
3184 dump_printf_loc (MSG_NOTE, vect_location,
3185 "=== vect_analyze_data_refs ===\n");
3186
3187 if (loop_vinfo)
3188 {
3189 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
3190
3191 loop = LOOP_VINFO_LOOP (loop_vinfo);
3192 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
3193 if (!find_loop_nest (loop, &LOOP_VINFO_LOOP_NEST (loop_vinfo)))
3194 {
3195 if (dump_enabled_p ())
3196 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3197 "not vectorized: loop contains function calls"
3198 " or data references that cannot be analyzed\n");
3199 return false;
3200 }
3201
3202 for (i = 0; i < loop->num_nodes; i++)
3203 {
3204 gimple_stmt_iterator gsi;
3205
3206 for (gsi = gsi_start_bb (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
3207 {
3208 gimple stmt = gsi_stmt (gsi);
3209 if (is_gimple_debug (stmt))
3210 continue;
3211 ++*n_stmts;
3212 if (!find_data_references_in_stmt (loop, stmt, &datarefs))
3213 {
3214 if (is_gimple_call (stmt) && loop->safelen)
3215 {
3216 tree fndecl = gimple_call_fndecl (stmt), op;
3217 if (fndecl != NULL_TREE)
3218 {
3219 struct cgraph_node *node = cgraph_get_node (fndecl);
3220 if (node != NULL && node->simd_clones != NULL)
3221 {
3222 unsigned int j, n = gimple_call_num_args (stmt);
3223 for (j = 0; j < n; j++)
3224 {
3225 op = gimple_call_arg (stmt, j);
3226 if (DECL_P (op)
3227 || (REFERENCE_CLASS_P (op)
3228 && get_base_address (op)))
3229 break;
3230 }
3231 op = gimple_call_lhs (stmt);
3232 /* Ignore #pragma omp declare simd functions
3233 if they don't have data references in the
3234 call stmt itself. */
3235 if (j == n
3236 && !(op
3237 && (DECL_P (op)
3238 || (REFERENCE_CLASS_P (op)
3239 && get_base_address (op)))))
3240 continue;
3241 }
3242 }
3243 }
3244 LOOP_VINFO_DATAREFS (loop_vinfo) = datarefs;
3245 if (dump_enabled_p ())
3246 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3247 "not vectorized: loop contains function "
3248 "calls or data references that cannot "
3249 "be analyzed\n");
3250 return false;
3251 }
3252 }
3253 }
3254
3255 LOOP_VINFO_DATAREFS (loop_vinfo) = datarefs;
3256 }
3257 else
3258 {
3259 gimple_stmt_iterator gsi;
3260
3261 bb = BB_VINFO_BB (bb_vinfo);
3262 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
3263 {
3264 gimple stmt = gsi_stmt (gsi);
3265 if (is_gimple_debug (stmt))
3266 continue;
3267 ++*n_stmts;
3268 if (!find_data_references_in_stmt (NULL, stmt,
3269 &BB_VINFO_DATAREFS (bb_vinfo)))
3270 {
3271 /* Mark the rest of the basic-block as unvectorizable. */
3272 for (; !gsi_end_p (gsi); gsi_next (&gsi))
3273 {
3274 stmt = gsi_stmt (gsi);
3275 STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)) = false;
3276 }
3277 break;
3278 }
3279 }
3280
3281 datarefs = BB_VINFO_DATAREFS (bb_vinfo);
3282 }
3283
3284 /* Go through the data-refs, check that the analysis succeeded. Update
3285 pointer from stmt_vec_info struct to DR and vectype. */
3286
3287 FOR_EACH_VEC_ELT (datarefs, i, dr)
3288 {
3289 gimple stmt;
3290 stmt_vec_info stmt_info;
3291 tree base, offset, init;
3292 bool gather = false;
3293 bool simd_lane_access = false;
3294 int vf;
3295
3296 again:
3297 if (!dr || !DR_REF (dr))
3298 {
3299 if (dump_enabled_p ())
3300 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3301 "not vectorized: unhandled data-ref\n");
3302 return false;
3303 }
3304
3305 stmt = DR_STMT (dr);
3306 stmt_info = vinfo_for_stmt (stmt);
3307
3308 /* Discard clobbers from the dataref vector. We will remove
3309 clobber stmts during vectorization. */
3310 if (gimple_clobber_p (stmt))
3311 {
3312 free_data_ref (dr);
3313 if (i == datarefs.length () - 1)
3314 {
3315 datarefs.pop ();
3316 break;
3317 }
3318 datarefs.ordered_remove (i);
3319 dr = datarefs[i];
3320 goto again;
3321 }
3322
3323 /* Check that analysis of the data-ref succeeded. */
3324 if (!DR_BASE_ADDRESS (dr) || !DR_OFFSET (dr) || !DR_INIT (dr)
3325 || !DR_STEP (dr))
3326 {
3327 bool maybe_gather
3328 = DR_IS_READ (dr)
3329 && !TREE_THIS_VOLATILE (DR_REF (dr))
3330 && targetm.vectorize.builtin_gather != NULL;
3331 bool maybe_simd_lane_access
3332 = loop_vinfo && loop->simduid;
3333
3334 /* If target supports vector gather loads, or if this might be
3335 a SIMD lane access, see if they can't be used. */
3336 if (loop_vinfo
3337 && (maybe_gather || maybe_simd_lane_access)
3338 && !nested_in_vect_loop_p (loop, stmt))
3339 {
3340 struct data_reference *newdr
3341 = create_data_ref (NULL, loop_containing_stmt (stmt),
3342 DR_REF (dr), stmt, true);
3343 gcc_assert (newdr != NULL && DR_REF (newdr));
3344 if (DR_BASE_ADDRESS (newdr)
3345 && DR_OFFSET (newdr)
3346 && DR_INIT (newdr)
3347 && DR_STEP (newdr)
3348 && integer_zerop (DR_STEP (newdr)))
3349 {
3350 if (maybe_simd_lane_access)
3351 {
3352 tree off = DR_OFFSET (newdr);
3353 STRIP_NOPS (off);
3354 if (TREE_CODE (DR_INIT (newdr)) == INTEGER_CST
3355 && TREE_CODE (off) == MULT_EXPR
3356 && tree_fits_uhwi_p (TREE_OPERAND (off, 1)))
3357 {
3358 tree step = TREE_OPERAND (off, 1);
3359 off = TREE_OPERAND (off, 0);
3360 STRIP_NOPS (off);
3361 if (CONVERT_EXPR_P (off)
3362 && TYPE_PRECISION (TREE_TYPE (TREE_OPERAND (off,
3363 0)))
3364 < TYPE_PRECISION (TREE_TYPE (off)))
3365 off = TREE_OPERAND (off, 0);
3366 if (TREE_CODE (off) == SSA_NAME)
3367 {
3368 gimple def = SSA_NAME_DEF_STMT (off);
3369 tree reft = TREE_TYPE (DR_REF (newdr));
3370 if (is_gimple_call (def)
3371 && gimple_call_internal_p (def)
3372 && (gimple_call_internal_fn (def)
3373 == IFN_GOMP_SIMD_LANE))
3374 {
3375 tree arg = gimple_call_arg (def, 0);
3376 gcc_assert (TREE_CODE (arg) == SSA_NAME);
3377 arg = SSA_NAME_VAR (arg);
3378 if (arg == loop->simduid
3379 /* For now. */
3380 && tree_int_cst_equal
3381 (TYPE_SIZE_UNIT (reft),
3382 step))
3383 {
3384 DR_OFFSET (newdr) = ssize_int (0);
3385 DR_STEP (newdr) = step;
3386 DR_ALIGNED_TO (newdr)
3387 = size_int (BIGGEST_ALIGNMENT);
3388 dr = newdr;
3389 simd_lane_access = true;
3390 }
3391 }
3392 }
3393 }
3394 }
3395 if (!simd_lane_access && maybe_gather)
3396 {
3397 dr = newdr;
3398 gather = true;
3399 }
3400 }
3401 if (!gather && !simd_lane_access)
3402 free_data_ref (newdr);
3403 }
3404
3405 if (!gather && !simd_lane_access)
3406 {
3407 if (dump_enabled_p ())
3408 {
3409 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3410 "not vectorized: data ref analysis "
3411 "failed ");
3412 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
3413 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
3414 }
3415
3416 if (bb_vinfo)
3417 break;
3418
3419 return false;
3420 }
3421 }
3422
3423 if (TREE_CODE (DR_BASE_ADDRESS (dr)) == INTEGER_CST)
3424 {
3425 if (dump_enabled_p ())
3426 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3427 "not vectorized: base addr of dr is a "
3428 "constant\n");
3429
3430 if (bb_vinfo)
3431 break;
3432
3433 if (gather || simd_lane_access)
3434 free_data_ref (dr);
3435 return false;
3436 }
3437
3438 if (TREE_THIS_VOLATILE (DR_REF (dr)))
3439 {
3440 if (dump_enabled_p ())
3441 {
3442 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3443 "not vectorized: volatile type ");
3444 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
3445 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
3446 }
3447
3448 if (bb_vinfo)
3449 break;
3450
3451 return false;
3452 }
3453
3454 if (stmt_can_throw_internal (stmt))
3455 {
3456 if (dump_enabled_p ())
3457 {
3458 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3459 "not vectorized: statement can throw an "
3460 "exception ");
3461 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
3462 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
3463 }
3464
3465 if (bb_vinfo)
3466 break;
3467
3468 if (gather || simd_lane_access)
3469 free_data_ref (dr);
3470 return false;
3471 }
3472
3473 if (TREE_CODE (DR_REF (dr)) == COMPONENT_REF
3474 && DECL_BIT_FIELD (TREE_OPERAND (DR_REF (dr), 1)))
3475 {
3476 if (dump_enabled_p ())
3477 {
3478 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3479 "not vectorized: statement is bitfield "
3480 "access ");
3481 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
3482 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
3483 }
3484
3485 if (bb_vinfo)
3486 break;
3487
3488 if (gather || simd_lane_access)
3489 free_data_ref (dr);
3490 return false;
3491 }
3492
3493 base = unshare_expr (DR_BASE_ADDRESS (dr));
3494 offset = unshare_expr (DR_OFFSET (dr));
3495 init = unshare_expr (DR_INIT (dr));
3496
3497 if (is_gimple_call (stmt)
3498 && (!gimple_call_internal_p (stmt)
3499 || (gimple_call_internal_fn (stmt) != IFN_MASK_LOAD
3500 && gimple_call_internal_fn (stmt) != IFN_MASK_STORE)))
3501 {
3502 if (dump_enabled_p ())
3503 {
3504 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3505 "not vectorized: dr in a call ");
3506 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
3507 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
3508 }
3509
3510 if (bb_vinfo)
3511 break;
3512
3513 if (gather || simd_lane_access)
3514 free_data_ref (dr);
3515 return false;
3516 }
3517
3518 /* Update DR field in stmt_vec_info struct. */
3519
3520 /* If the dataref is in an inner-loop of the loop that is considered for
3521 for vectorization, we also want to analyze the access relative to
3522 the outer-loop (DR contains information only relative to the
3523 inner-most enclosing loop). We do that by building a reference to the
3524 first location accessed by the inner-loop, and analyze it relative to
3525 the outer-loop. */
3526 if (loop && nested_in_vect_loop_p (loop, stmt))
3527 {
3528 tree outer_step, outer_base, outer_init;
3529 HOST_WIDE_INT pbitsize, pbitpos;
3530 tree poffset;
3531 enum machine_mode pmode;
3532 int punsignedp, pvolatilep;
3533 affine_iv base_iv, offset_iv;
3534 tree dinit;
3535
3536 /* Build a reference to the first location accessed by the
3537 inner-loop: *(BASE+INIT). (The first location is actually
3538 BASE+INIT+OFFSET, but we add OFFSET separately later). */
3539 tree inner_base = build_fold_indirect_ref
3540 (fold_build_pointer_plus (base, init));
3541
3542 if (dump_enabled_p ())
3543 {
3544 dump_printf_loc (MSG_NOTE, vect_location,
3545 "analyze in outer-loop: ");
3546 dump_generic_expr (MSG_NOTE, TDF_SLIM, inner_base);
3547 dump_printf (MSG_NOTE, "\n");
3548 }
3549
3550 outer_base = get_inner_reference (inner_base, &pbitsize, &pbitpos,
3551 &poffset, &pmode, &punsignedp, &pvolatilep, false);
3552 gcc_assert (outer_base != NULL_TREE);
3553
3554 if (pbitpos % BITS_PER_UNIT != 0)
3555 {
3556 if (dump_enabled_p ())
3557 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3558 "failed: bit offset alignment.\n");
3559 return false;
3560 }
3561
3562 outer_base = build_fold_addr_expr (outer_base);
3563 if (!simple_iv (loop, loop_containing_stmt (stmt), outer_base,
3564 &base_iv, false))
3565 {
3566 if (dump_enabled_p ())
3567 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3568 "failed: evolution of base is not affine.\n");
3569 return false;
3570 }
3571
3572 if (offset)
3573 {
3574 if (poffset)
3575 poffset = fold_build2 (PLUS_EXPR, TREE_TYPE (offset), offset,
3576 poffset);
3577 else
3578 poffset = offset;
3579 }
3580
3581 if (!poffset)
3582 {
3583 offset_iv.base = ssize_int (0);
3584 offset_iv.step = ssize_int (0);
3585 }
3586 else if (!simple_iv (loop, loop_containing_stmt (stmt), poffset,
3587 &offset_iv, false))
3588 {
3589 if (dump_enabled_p ())
3590 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3591 "evolution of offset is not affine.\n");
3592 return false;
3593 }
3594
3595 outer_init = ssize_int (pbitpos / BITS_PER_UNIT);
3596 split_constant_offset (base_iv.base, &base_iv.base, &dinit);
3597 outer_init = size_binop (PLUS_EXPR, outer_init, dinit);
3598 split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
3599 outer_init = size_binop (PLUS_EXPR, outer_init, dinit);
3600
3601 outer_step = size_binop (PLUS_EXPR,
3602 fold_convert (ssizetype, base_iv.step),
3603 fold_convert (ssizetype, offset_iv.step));
3604
3605 STMT_VINFO_DR_STEP (stmt_info) = outer_step;
3606 /* FIXME: Use canonicalize_base_object_address (base_iv.base); */
3607 STMT_VINFO_DR_BASE_ADDRESS (stmt_info) = base_iv.base;
3608 STMT_VINFO_DR_INIT (stmt_info) = outer_init;
3609 STMT_VINFO_DR_OFFSET (stmt_info) =
3610 fold_convert (ssizetype, offset_iv.base);
3611 STMT_VINFO_DR_ALIGNED_TO (stmt_info) =
3612 size_int (highest_pow2_factor (offset_iv.base));
3613
3614 if (dump_enabled_p ())
3615 {
3616 dump_printf_loc (MSG_NOTE, vect_location,
3617 "\touter base_address: ");
3618 dump_generic_expr (MSG_NOTE, TDF_SLIM,
3619 STMT_VINFO_DR_BASE_ADDRESS (stmt_info));
3620 dump_printf (MSG_NOTE, "\n\touter offset from base address: ");
3621 dump_generic_expr (MSG_NOTE, TDF_SLIM,
3622 STMT_VINFO_DR_OFFSET (stmt_info));
3623 dump_printf (MSG_NOTE,
3624 "\n\touter constant offset from base address: ");
3625 dump_generic_expr (MSG_NOTE, TDF_SLIM,
3626 STMT_VINFO_DR_INIT (stmt_info));
3627 dump_printf (MSG_NOTE, "\n\touter step: ");
3628 dump_generic_expr (MSG_NOTE, TDF_SLIM,
3629 STMT_VINFO_DR_STEP (stmt_info));
3630 dump_printf (MSG_NOTE, "\n\touter aligned to: ");
3631 dump_generic_expr (MSG_NOTE, TDF_SLIM,
3632 STMT_VINFO_DR_ALIGNED_TO (stmt_info));
3633 dump_printf (MSG_NOTE, "\n");
3634 }
3635 }
3636
3637 if (STMT_VINFO_DATA_REF (stmt_info))
3638 {
3639 if (dump_enabled_p ())
3640 {
3641 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3642 "not vectorized: more than one data ref "
3643 "in stmt: ");
3644 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
3645 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
3646 }
3647
3648 if (bb_vinfo)
3649 break;
3650
3651 if (gather || simd_lane_access)
3652 free_data_ref (dr);
3653 return false;
3654 }
3655
3656 STMT_VINFO_DATA_REF (stmt_info) = dr;
3657 if (simd_lane_access)
3658 {
3659 STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) = true;
3660 free_data_ref (datarefs[i]);
3661 datarefs[i] = dr;
3662 }
3663
3664 /* Set vectype for STMT. */
3665 scalar_type = TREE_TYPE (DR_REF (dr));
3666 STMT_VINFO_VECTYPE (stmt_info)
3667 = get_vectype_for_scalar_type (scalar_type);
3668 if (!STMT_VINFO_VECTYPE (stmt_info))
3669 {
3670 if (dump_enabled_p ())
3671 {
3672 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3673 "not vectorized: no vectype for stmt: ");
3674 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
3675 dump_printf (MSG_MISSED_OPTIMIZATION, " scalar_type: ");
3676 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_DETAILS,
3677 scalar_type);
3678 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
3679 }
3680
3681 if (bb_vinfo)
3682 break;
3683
3684 if (gather || simd_lane_access)
3685 {
3686 STMT_VINFO_DATA_REF (stmt_info) = NULL;
3687 if (gather)
3688 free_data_ref (dr);
3689 }
3690 return false;
3691 }
3692 else
3693 {
3694 if (dump_enabled_p ())
3695 {
3696 dump_printf_loc (MSG_NOTE, vect_location,
3697 "got vectype for stmt: ");
3698 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
3699 dump_generic_expr (MSG_NOTE, TDF_SLIM,
3700 STMT_VINFO_VECTYPE (stmt_info));
3701 dump_printf (MSG_NOTE, "\n");
3702 }
3703 }
3704
3705 /* Adjust the minimal vectorization factor according to the
3706 vector type. */
3707 vf = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
3708 if (vf > *min_vf)
3709 *min_vf = vf;
3710
3711 if (gather)
3712 {
3713 tree off;
3714
3715 gather = 0 != vect_check_gather (stmt, loop_vinfo, NULL, &off, NULL);
3716 if (gather
3717 && get_vectype_for_scalar_type (TREE_TYPE (off)) == NULL_TREE)
3718 gather = false;
3719 if (!gather)
3720 {
3721 STMT_VINFO_DATA_REF (stmt_info) = NULL;
3722 free_data_ref (dr);
3723 if (dump_enabled_p ())
3724 {
3725 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3726 "not vectorized: not suitable for gather "
3727 "load ");
3728 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
3729 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
3730 }
3731 return false;
3732 }
3733
3734 datarefs[i] = dr;
3735 STMT_VINFO_GATHER_P (stmt_info) = true;
3736 }
3737 else if (loop_vinfo
3738 && TREE_CODE (DR_STEP (dr)) != INTEGER_CST)
3739 {
3740 if (nested_in_vect_loop_p (loop, stmt)
3741 || !DR_IS_READ (dr))
3742 {
3743 if (dump_enabled_p ())
3744 {
3745 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3746 "not vectorized: not suitable for strided "
3747 "load ");
3748 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
3749 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
3750 }
3751 return false;
3752 }
3753 STMT_VINFO_STRIDE_LOAD_P (stmt_info) = true;
3754 }
3755 }
3756
3757 /* If we stopped analysis at the first dataref we could not analyze
3758 when trying to vectorize a basic-block mark the rest of the datarefs
3759 as not vectorizable and truncate the vector of datarefs. That
3760 avoids spending useless time in analyzing their dependence. */
3761 if (i != datarefs.length ())
3762 {
3763 gcc_assert (bb_vinfo != NULL);
3764 for (unsigned j = i; j < datarefs.length (); ++j)
3765 {
3766 data_reference_p dr = datarefs[j];
3767 STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr))) = false;
3768 free_data_ref (dr);
3769 }
3770 datarefs.truncate (i);
3771 }
3772
3773 return true;
3774 }
3775
3776
3777 /* Function vect_get_new_vect_var.
3778
3779 Returns a name for a new variable. The current naming scheme appends the
3780 prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to
3781 the name of vectorizer generated variables, and appends that to NAME if
3782 provided. */
3783
3784 tree
3785 vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name)
3786 {
3787 const char *prefix;
3788 tree new_vect_var;
3789
3790 switch (var_kind)
3791 {
3792 case vect_simple_var:
3793 prefix = "vect";
3794 break;
3795 case vect_scalar_var:
3796 prefix = "stmp";
3797 break;
3798 case vect_pointer_var:
3799 prefix = "vectp";
3800 break;
3801 default:
3802 gcc_unreachable ();
3803 }
3804
3805 if (name)
3806 {
3807 char* tmp = concat (prefix, "_", name, NULL);
3808 new_vect_var = create_tmp_reg (type, tmp);
3809 free (tmp);
3810 }
3811 else
3812 new_vect_var = create_tmp_reg (type, prefix);
3813
3814 return new_vect_var;
3815 }
3816
3817
3818 /* Function vect_create_addr_base_for_vector_ref.
3819
3820 Create an expression that computes the address of the first memory location
3821 that will be accessed for a data reference.
3822
3823 Input:
3824 STMT: The statement containing the data reference.
3825 NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list.
3826 OFFSET: Optional. If supplied, it is be added to the initial address.
3827 LOOP: Specify relative to which loop-nest should the address be computed.
3828 For example, when the dataref is in an inner-loop nested in an
3829 outer-loop that is now being vectorized, LOOP can be either the
3830 outer-loop, or the inner-loop. The first memory location accessed
3831 by the following dataref ('in' points to short):
3832
3833 for (i=0; i<N; i++)
3834 for (j=0; j<M; j++)
3835 s += in[i+j]
3836
3837 is as follows:
3838 if LOOP=i_loop: &in (relative to i_loop)
3839 if LOOP=j_loop: &in+i*2B (relative to j_loop)
3840
3841 Output:
3842 1. Return an SSA_NAME whose value is the address of the memory location of
3843 the first vector of the data reference.
3844 2. If new_stmt_list is not NULL_TREE after return then the caller must insert
3845 these statement(s) which define the returned SSA_NAME.
3846
3847 FORNOW: We are only handling array accesses with step 1. */
3848
3849 tree
3850 vect_create_addr_base_for_vector_ref (gimple stmt,
3851 gimple_seq *new_stmt_list,
3852 tree offset,
3853 struct loop *loop)
3854 {
3855 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3856 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
3857 tree data_ref_base;
3858 const char *base_name;
3859 tree addr_base;
3860 tree dest;
3861 gimple_seq seq = NULL;
3862 tree base_offset;
3863 tree init;
3864 tree vect_ptr_type;
3865 tree step = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr)));
3866 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3867
3868 if (loop_vinfo && loop && loop != (gimple_bb (stmt))->loop_father)
3869 {
3870 struct loop *outer_loop = LOOP_VINFO_LOOP (loop_vinfo);
3871
3872 gcc_assert (nested_in_vect_loop_p (outer_loop, stmt));
3873
3874 data_ref_base = unshare_expr (STMT_VINFO_DR_BASE_ADDRESS (stmt_info));
3875 base_offset = unshare_expr (STMT_VINFO_DR_OFFSET (stmt_info));
3876 init = unshare_expr (STMT_VINFO_DR_INIT (stmt_info));
3877 }
3878 else
3879 {
3880 data_ref_base = unshare_expr (DR_BASE_ADDRESS (dr));
3881 base_offset = unshare_expr (DR_OFFSET (dr));
3882 init = unshare_expr (DR_INIT (dr));
3883 }
3884
3885 if (loop_vinfo)
3886 base_name = get_name (data_ref_base);
3887 else
3888 {
3889 base_offset = ssize_int (0);
3890 init = ssize_int (0);
3891 base_name = get_name (DR_REF (dr));
3892 }
3893
3894 /* Create base_offset */
3895 base_offset = size_binop (PLUS_EXPR,
3896 fold_convert (sizetype, base_offset),
3897 fold_convert (sizetype, init));
3898
3899 if (offset)
3900 {
3901 offset = fold_build2 (MULT_EXPR, sizetype,
3902 fold_convert (sizetype, offset), step);
3903 base_offset = fold_build2 (PLUS_EXPR, sizetype,
3904 base_offset, offset);
3905 }
3906
3907 /* base + base_offset */
3908 if (loop_vinfo)
3909 addr_base = fold_build_pointer_plus (data_ref_base, base_offset);
3910 else
3911 {
3912 addr_base = build1 (ADDR_EXPR,
3913 build_pointer_type (TREE_TYPE (DR_REF (dr))),
3914 unshare_expr (DR_REF (dr)));
3915 }
3916
3917 vect_ptr_type = build_pointer_type (STMT_VINFO_VECTYPE (stmt_info));
3918 addr_base = fold_convert (vect_ptr_type, addr_base);
3919 dest = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var, base_name);
3920 addr_base = force_gimple_operand (addr_base, &seq, false, dest);
3921 gimple_seq_add_seq (new_stmt_list, seq);
3922
3923 if (DR_PTR_INFO (dr)
3924 && TREE_CODE (addr_base) == SSA_NAME)
3925 {
3926 duplicate_ssa_name_ptr_info (addr_base, DR_PTR_INFO (dr));
3927 if (offset)
3928 mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (addr_base));
3929 }
3930
3931 if (dump_enabled_p ())
3932 {
3933 dump_printf_loc (MSG_NOTE, vect_location, "created ");
3934 dump_generic_expr (MSG_NOTE, TDF_SLIM, addr_base);
3935 dump_printf (MSG_NOTE, "\n");
3936 }
3937
3938 return addr_base;
3939 }
3940
3941
3942 /* Function vect_create_data_ref_ptr.
3943
3944 Create a new pointer-to-AGGR_TYPE variable (ap), that points to the first
3945 location accessed in the loop by STMT, along with the def-use update
3946 chain to appropriately advance the pointer through the loop iterations.
3947 Also set aliasing information for the pointer. This pointer is used by
3948 the callers to this function to create a memory reference expression for
3949 vector load/store access.
3950
3951 Input:
3952 1. STMT: a stmt that references memory. Expected to be of the form
3953 GIMPLE_ASSIGN <name, data-ref> or
3954 GIMPLE_ASSIGN <data-ref, name>.
3955 2. AGGR_TYPE: the type of the reference, which should be either a vector
3956 or an array.
3957 3. AT_LOOP: the loop where the vector memref is to be created.
3958 4. OFFSET (optional): an offset to be added to the initial address accessed
3959 by the data-ref in STMT.
3960 5. BSI: location where the new stmts are to be placed if there is no loop
3961 6. ONLY_INIT: indicate if ap is to be updated in the loop, or remain
3962 pointing to the initial address.
3963
3964 Output:
3965 1. Declare a new ptr to vector_type, and have it point to the base of the
3966 data reference (initial addressed accessed by the data reference).
3967 For example, for vector of type V8HI, the following code is generated:
3968
3969 v8hi *ap;
3970 ap = (v8hi *)initial_address;
3971
3972 if OFFSET is not supplied:
3973 initial_address = &a[init];
3974 if OFFSET is supplied:
3975 initial_address = &a[init + OFFSET];
3976
3977 Return the initial_address in INITIAL_ADDRESS.
3978
3979 2. If ONLY_INIT is true, just return the initial pointer. Otherwise, also
3980 update the pointer in each iteration of the loop.
3981
3982 Return the increment stmt that updates the pointer in PTR_INCR.
3983
3984 3. Set INV_P to true if the access pattern of the data reference in the
3985 vectorized loop is invariant. Set it to false otherwise.
3986
3987 4. Return the pointer. */
3988
3989 tree
3990 vect_create_data_ref_ptr (gimple stmt, tree aggr_type, struct loop *at_loop,
3991 tree offset, tree *initial_address,
3992 gimple_stmt_iterator *gsi, gimple *ptr_incr,
3993 bool only_init, bool *inv_p)
3994 {
3995 const char *base_name;
3996 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3997 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3998 struct loop *loop = NULL;
3999 bool nested_in_vect_loop = false;
4000 struct loop *containing_loop = NULL;
4001 tree aggr_ptr_type;
4002 tree aggr_ptr;
4003 tree new_temp;
4004 gimple vec_stmt;
4005 gimple_seq new_stmt_list = NULL;
4006 edge pe = NULL;
4007 basic_block new_bb;
4008 tree aggr_ptr_init;
4009 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
4010 tree aptr;
4011 gimple_stmt_iterator incr_gsi;
4012 bool insert_after;
4013 tree indx_before_incr, indx_after_incr;
4014 gimple incr;
4015 tree step;
4016 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4017
4018 gcc_assert (TREE_CODE (aggr_type) == ARRAY_TYPE
4019 || TREE_CODE (aggr_type) == VECTOR_TYPE);
4020
4021 if (loop_vinfo)
4022 {
4023 loop = LOOP_VINFO_LOOP (loop_vinfo);
4024 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
4025 containing_loop = (gimple_bb (stmt))->loop_father;
4026 pe = loop_preheader_edge (loop);
4027 }
4028 else
4029 {
4030 gcc_assert (bb_vinfo);
4031 only_init = true;
4032 *ptr_incr = NULL;
4033 }
4034
4035 /* Check the step (evolution) of the load in LOOP, and record
4036 whether it's invariant. */
4037 if (nested_in_vect_loop)
4038 step = STMT_VINFO_DR_STEP (stmt_info);
4039 else
4040 step = DR_STEP (STMT_VINFO_DATA_REF (stmt_info));
4041
4042 if (integer_zerop (step))
4043 *inv_p = true;
4044 else
4045 *inv_p = false;
4046
4047 /* Create an expression for the first address accessed by this load
4048 in LOOP. */
4049 base_name = get_name (DR_BASE_ADDRESS (dr));
4050
4051 if (dump_enabled_p ())
4052 {
4053 tree dr_base_type = TREE_TYPE (DR_BASE_OBJECT (dr));
4054 dump_printf_loc (MSG_NOTE, vect_location,
4055 "create %s-pointer variable to type: ",
4056 get_tree_code_name (TREE_CODE (aggr_type)));
4057 dump_generic_expr (MSG_NOTE, TDF_SLIM, aggr_type);
4058 if (TREE_CODE (dr_base_type) == ARRAY_TYPE)
4059 dump_printf (MSG_NOTE, " vectorizing an array ref: ");
4060 else if (TREE_CODE (dr_base_type) == VECTOR_TYPE)
4061 dump_printf (MSG_NOTE, " vectorizing a vector ref: ");
4062 else if (TREE_CODE (dr_base_type) == RECORD_TYPE)
4063 dump_printf (MSG_NOTE, " vectorizing a record based array ref: ");
4064 else
4065 dump_printf (MSG_NOTE, " vectorizing a pointer ref: ");
4066 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_BASE_OBJECT (dr));
4067 dump_printf (MSG_NOTE, "\n");
4068 }
4069
4070 /* (1) Create the new aggregate-pointer variable.
4071 Vector and array types inherit the alias set of their component
4072 type by default so we need to use a ref-all pointer if the data
4073 reference does not conflict with the created aggregated data
4074 reference because it is not addressable. */
4075 bool need_ref_all = false;
4076 if (!alias_sets_conflict_p (get_alias_set (aggr_type),
4077 get_alias_set (DR_REF (dr))))
4078 need_ref_all = true;
4079 /* Likewise for any of the data references in the stmt group. */
4080 else if (STMT_VINFO_GROUP_SIZE (stmt_info) > 1)
4081 {
4082 gimple orig_stmt = STMT_VINFO_GROUP_FIRST_ELEMENT (stmt_info);
4083 do
4084 {
4085 stmt_vec_info sinfo = vinfo_for_stmt (orig_stmt);
4086 struct data_reference *sdr = STMT_VINFO_DATA_REF (sinfo);
4087 if (!alias_sets_conflict_p (get_alias_set (aggr_type),
4088 get_alias_set (DR_REF (sdr))))
4089 {
4090 need_ref_all = true;
4091 break;
4092 }
4093 orig_stmt = STMT_VINFO_GROUP_NEXT_ELEMENT (sinfo);
4094 }
4095 while (orig_stmt);
4096 }
4097 aggr_ptr_type = build_pointer_type_for_mode (aggr_type, ptr_mode,
4098 need_ref_all);
4099 aggr_ptr = vect_get_new_vect_var (aggr_ptr_type, vect_pointer_var, base_name);
4100
4101
4102 /* Note: If the dataref is in an inner-loop nested in LOOP, and we are
4103 vectorizing LOOP (i.e., outer-loop vectorization), we need to create two
4104 def-use update cycles for the pointer: one relative to the outer-loop
4105 (LOOP), which is what steps (3) and (4) below do. The other is relative
4106 to the inner-loop (which is the inner-most loop containing the dataref),
4107 and this is done be step (5) below.
4108
4109 When vectorizing inner-most loops, the vectorized loop (LOOP) is also the
4110 inner-most loop, and so steps (3),(4) work the same, and step (5) is
4111 redundant. Steps (3),(4) create the following:
4112
4113 vp0 = &base_addr;
4114 LOOP: vp1 = phi(vp0,vp2)
4115 ...
4116 ...
4117 vp2 = vp1 + step
4118 goto LOOP
4119
4120 If there is an inner-loop nested in loop, then step (5) will also be
4121 applied, and an additional update in the inner-loop will be created:
4122
4123 vp0 = &base_addr;
4124 LOOP: vp1 = phi(vp0,vp2)
4125 ...
4126 inner: vp3 = phi(vp1,vp4)
4127 vp4 = vp3 + inner_step
4128 if () goto inner
4129 ...
4130 vp2 = vp1 + step
4131 if () goto LOOP */
4132
4133 /* (2) Calculate the initial address of the aggregate-pointer, and set
4134 the aggregate-pointer to point to it before the loop. */
4135
4136 /* Create: (&(base[init_val+offset]) in the loop preheader. */
4137
4138 new_temp = vect_create_addr_base_for_vector_ref (stmt, &new_stmt_list,
4139 offset, loop);
4140 if (new_stmt_list)
4141 {
4142 if (pe)
4143 {
4144 new_bb = gsi_insert_seq_on_edge_immediate (pe, new_stmt_list);
4145 gcc_assert (!new_bb);
4146 }
4147 else
4148 gsi_insert_seq_before (gsi, new_stmt_list, GSI_SAME_STMT);
4149 }
4150
4151 *initial_address = new_temp;
4152
4153 /* Create: p = (aggr_type *) initial_base */
4154 if (TREE_CODE (new_temp) != SSA_NAME
4155 || !useless_type_conversion_p (aggr_ptr_type, TREE_TYPE (new_temp)))
4156 {
4157 vec_stmt = gimple_build_assign (aggr_ptr,
4158 fold_convert (aggr_ptr_type, new_temp));
4159 aggr_ptr_init = make_ssa_name (aggr_ptr, vec_stmt);
4160 /* Copy the points-to information if it exists. */
4161 if (DR_PTR_INFO (dr))
4162 duplicate_ssa_name_ptr_info (aggr_ptr_init, DR_PTR_INFO (dr));
4163 gimple_assign_set_lhs (vec_stmt, aggr_ptr_init);
4164 if (pe)
4165 {
4166 new_bb = gsi_insert_on_edge_immediate (pe, vec_stmt);
4167 gcc_assert (!new_bb);
4168 }
4169 else
4170 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
4171 }
4172 else
4173 aggr_ptr_init = new_temp;
4174
4175 /* (3) Handle the updating of the aggregate-pointer inside the loop.
4176 This is needed when ONLY_INIT is false, and also when AT_LOOP is the
4177 inner-loop nested in LOOP (during outer-loop vectorization). */
4178
4179 /* No update in loop is required. */
4180 if (only_init && (!loop_vinfo || at_loop == loop))
4181 aptr = aggr_ptr_init;
4182 else
4183 {
4184 /* The step of the aggregate pointer is the type size. */
4185 tree iv_step = TYPE_SIZE_UNIT (aggr_type);
4186 /* One exception to the above is when the scalar step of the load in
4187 LOOP is zero. In this case the step here is also zero. */
4188 if (*inv_p)
4189 iv_step = size_zero_node;
4190 else if (tree_int_cst_sgn (step) == -1)
4191 iv_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (iv_step), iv_step);
4192
4193 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
4194
4195 create_iv (aggr_ptr_init,
4196 fold_convert (aggr_ptr_type, iv_step),
4197 aggr_ptr, loop, &incr_gsi, insert_after,
4198 &indx_before_incr, &indx_after_incr);
4199 incr = gsi_stmt (incr_gsi);
4200 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
4201
4202 /* Copy the points-to information if it exists. */
4203 if (DR_PTR_INFO (dr))
4204 {
4205 duplicate_ssa_name_ptr_info (indx_before_incr, DR_PTR_INFO (dr));
4206 duplicate_ssa_name_ptr_info (indx_after_incr, DR_PTR_INFO (dr));
4207 }
4208 if (ptr_incr)
4209 *ptr_incr = incr;
4210
4211 aptr = indx_before_incr;
4212 }
4213
4214 if (!nested_in_vect_loop || only_init)
4215 return aptr;
4216
4217
4218 /* (4) Handle the updating of the aggregate-pointer inside the inner-loop
4219 nested in LOOP, if exists. */
4220
4221 gcc_assert (nested_in_vect_loop);
4222 if (!only_init)
4223 {
4224 standard_iv_increment_position (containing_loop, &incr_gsi,
4225 &insert_after);
4226 create_iv (aptr, fold_convert (aggr_ptr_type, DR_STEP (dr)), aggr_ptr,
4227 containing_loop, &incr_gsi, insert_after, &indx_before_incr,
4228 &indx_after_incr);
4229 incr = gsi_stmt (incr_gsi);
4230 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
4231
4232 /* Copy the points-to information if it exists. */
4233 if (DR_PTR_INFO (dr))
4234 {
4235 duplicate_ssa_name_ptr_info (indx_before_incr, DR_PTR_INFO (dr));
4236 duplicate_ssa_name_ptr_info (indx_after_incr, DR_PTR_INFO (dr));
4237 }
4238 if (ptr_incr)
4239 *ptr_incr = incr;
4240
4241 return indx_before_incr;
4242 }
4243 else
4244 gcc_unreachable ();
4245 }
4246
4247
4248 /* Function bump_vector_ptr
4249
4250 Increment a pointer (to a vector type) by vector-size. If requested,
4251 i.e. if PTR-INCR is given, then also connect the new increment stmt
4252 to the existing def-use update-chain of the pointer, by modifying
4253 the PTR_INCR as illustrated below:
4254
4255 The pointer def-use update-chain before this function:
4256 DATAREF_PTR = phi (p_0, p_2)
4257 ....
4258 PTR_INCR: p_2 = DATAREF_PTR + step
4259
4260 The pointer def-use update-chain after this function:
4261 DATAREF_PTR = phi (p_0, p_2)
4262 ....
4263 NEW_DATAREF_PTR = DATAREF_PTR + BUMP
4264 ....
4265 PTR_INCR: p_2 = NEW_DATAREF_PTR + step
4266
4267 Input:
4268 DATAREF_PTR - ssa_name of a pointer (to vector type) that is being updated
4269 in the loop.
4270 PTR_INCR - optional. The stmt that updates the pointer in each iteration of
4271 the loop. The increment amount across iterations is expected
4272 to be vector_size.
4273 BSI - location where the new update stmt is to be placed.
4274 STMT - the original scalar memory-access stmt that is being vectorized.
4275 BUMP - optional. The offset by which to bump the pointer. If not given,
4276 the offset is assumed to be vector_size.
4277
4278 Output: Return NEW_DATAREF_PTR as illustrated above.
4279
4280 */
4281
4282 tree
4283 bump_vector_ptr (tree dataref_ptr, gimple ptr_incr, gimple_stmt_iterator *gsi,
4284 gimple stmt, tree bump)
4285 {
4286 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4287 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
4288 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4289 tree update = TYPE_SIZE_UNIT (vectype);
4290 gimple incr_stmt;
4291 ssa_op_iter iter;
4292 use_operand_p use_p;
4293 tree new_dataref_ptr;
4294
4295 if (bump)
4296 update = bump;
4297
4298 new_dataref_ptr = copy_ssa_name (dataref_ptr, NULL);
4299 incr_stmt = gimple_build_assign_with_ops (POINTER_PLUS_EXPR, new_dataref_ptr,
4300 dataref_ptr, update);
4301 vect_finish_stmt_generation (stmt, incr_stmt, gsi);
4302
4303 /* Copy the points-to information if it exists. */
4304 if (DR_PTR_INFO (dr))
4305 {
4306 duplicate_ssa_name_ptr_info (new_dataref_ptr, DR_PTR_INFO (dr));
4307 mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (new_dataref_ptr));
4308 }
4309
4310 if (!ptr_incr)
4311 return new_dataref_ptr;
4312
4313 /* Update the vector-pointer's cross-iteration increment. */
4314 FOR_EACH_SSA_USE_OPERAND (use_p, ptr_incr, iter, SSA_OP_USE)
4315 {
4316 tree use = USE_FROM_PTR (use_p);
4317
4318 if (use == dataref_ptr)
4319 SET_USE (use_p, new_dataref_ptr);
4320 else
4321 gcc_assert (tree_int_cst_compare (use, update) == 0);
4322 }
4323
4324 return new_dataref_ptr;
4325 }
4326
4327
4328 /* Function vect_create_destination_var.
4329
4330 Create a new temporary of type VECTYPE. */
4331
4332 tree
4333 vect_create_destination_var (tree scalar_dest, tree vectype)
4334 {
4335 tree vec_dest;
4336 const char *name;
4337 char *new_name;
4338 tree type;
4339 enum vect_var_kind kind;
4340
4341 kind = vectype ? vect_simple_var : vect_scalar_var;
4342 type = vectype ? vectype : TREE_TYPE (scalar_dest);
4343
4344 gcc_assert (TREE_CODE (scalar_dest) == SSA_NAME);
4345
4346 name = get_name (scalar_dest);
4347 if (name)
4348 asprintf (&new_name, "%s_%u", name, SSA_NAME_VERSION (scalar_dest));
4349 else
4350 asprintf (&new_name, "_%u", SSA_NAME_VERSION (scalar_dest));
4351 vec_dest = vect_get_new_vect_var (type, kind, new_name);
4352 free (new_name);
4353
4354 return vec_dest;
4355 }
4356
4357 /* Function vect_grouped_store_supported.
4358
4359 Returns TRUE if interleave high and interleave low permutations
4360 are supported, and FALSE otherwise. */
4361
4362 bool
4363 vect_grouped_store_supported (tree vectype, unsigned HOST_WIDE_INT count)
4364 {
4365 enum machine_mode mode = TYPE_MODE (vectype);
4366
4367 /* vect_permute_store_chain requires the group size to be a power of two. */
4368 if (exact_log2 (count) == -1)
4369 {
4370 if (dump_enabled_p ())
4371 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4372 "the size of the group of accesses"
4373 " is not a power of 2\n");
4374 return false;
4375 }
4376
4377 /* Check that the permutation is supported. */
4378 if (VECTOR_MODE_P (mode))
4379 {
4380 unsigned int i, nelt = GET_MODE_NUNITS (mode);
4381 unsigned char *sel = XALLOCAVEC (unsigned char, nelt);
4382 for (i = 0; i < nelt / 2; i++)
4383 {
4384 sel[i * 2] = i;
4385 sel[i * 2 + 1] = i + nelt;
4386 }
4387 if (can_vec_perm_p (mode, false, sel))
4388 {
4389 for (i = 0; i < nelt; i++)
4390 sel[i] += nelt / 2;
4391 if (can_vec_perm_p (mode, false, sel))
4392 return true;
4393 }
4394 }
4395
4396 if (dump_enabled_p ())
4397 dump_printf (MSG_MISSED_OPTIMIZATION,
4398 "interleave op not supported by target.\n");
4399 return false;
4400 }
4401
4402
4403 /* Return TRUE if vec_store_lanes is available for COUNT vectors of
4404 type VECTYPE. */
4405
4406 bool
4407 vect_store_lanes_supported (tree vectype, unsigned HOST_WIDE_INT count)
4408 {
4409 return vect_lanes_optab_supported_p ("vec_store_lanes",
4410 vec_store_lanes_optab,
4411 vectype, count);
4412 }
4413
4414
4415 /* Function vect_permute_store_chain.
4416
4417 Given a chain of interleaved stores in DR_CHAIN of LENGTH that must be
4418 a power of 2, generate interleave_high/low stmts to reorder the data
4419 correctly for the stores. Return the final references for stores in
4420 RESULT_CHAIN.
4421
4422 E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
4423 The input is 4 vectors each containing 8 elements. We assign a number to
4424 each element, the input sequence is:
4425
4426 1st vec: 0 1 2 3 4 5 6 7
4427 2nd vec: 8 9 10 11 12 13 14 15
4428 3rd vec: 16 17 18 19 20 21 22 23
4429 4th vec: 24 25 26 27 28 29 30 31
4430
4431 The output sequence should be:
4432
4433 1st vec: 0 8 16 24 1 9 17 25
4434 2nd vec: 2 10 18 26 3 11 19 27
4435 3rd vec: 4 12 20 28 5 13 21 30
4436 4th vec: 6 14 22 30 7 15 23 31
4437
4438 i.e., we interleave the contents of the four vectors in their order.
4439
4440 We use interleave_high/low instructions to create such output. The input of
4441 each interleave_high/low operation is two vectors:
4442 1st vec 2nd vec
4443 0 1 2 3 4 5 6 7
4444 the even elements of the result vector are obtained left-to-right from the
4445 high/low elements of the first vector. The odd elements of the result are
4446 obtained left-to-right from the high/low elements of the second vector.
4447 The output of interleave_high will be: 0 4 1 5
4448 and of interleave_low: 2 6 3 7
4449
4450
4451 The permutation is done in log LENGTH stages. In each stage interleave_high
4452 and interleave_low stmts are created for each pair of vectors in DR_CHAIN,
4453 where the first argument is taken from the first half of DR_CHAIN and the
4454 second argument from it's second half.
4455 In our example,
4456
4457 I1: interleave_high (1st vec, 3rd vec)
4458 I2: interleave_low (1st vec, 3rd vec)
4459 I3: interleave_high (2nd vec, 4th vec)
4460 I4: interleave_low (2nd vec, 4th vec)
4461
4462 The output for the first stage is:
4463
4464 I1: 0 16 1 17 2 18 3 19
4465 I2: 4 20 5 21 6 22 7 23
4466 I3: 8 24 9 25 10 26 11 27
4467 I4: 12 28 13 29 14 30 15 31
4468
4469 The output of the second stage, i.e. the final result is:
4470
4471 I1: 0 8 16 24 1 9 17 25
4472 I2: 2 10 18 26 3 11 19 27
4473 I3: 4 12 20 28 5 13 21 30
4474 I4: 6 14 22 30 7 15 23 31. */
4475
4476 void
4477 vect_permute_store_chain (vec<tree> dr_chain,
4478 unsigned int length,
4479 gimple stmt,
4480 gimple_stmt_iterator *gsi,
4481 vec<tree> *result_chain)
4482 {
4483 tree vect1, vect2, high, low;
4484 gimple perm_stmt;
4485 tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
4486 tree perm_mask_low, perm_mask_high;
4487 unsigned int i, n;
4488 unsigned int j, nelt = TYPE_VECTOR_SUBPARTS (vectype);
4489 unsigned char *sel = XALLOCAVEC (unsigned char, nelt);
4490
4491 result_chain->quick_grow (length);
4492 memcpy (result_chain->address (), dr_chain.address (),
4493 length * sizeof (tree));
4494
4495 for (i = 0, n = nelt / 2; i < n; i++)
4496 {
4497 sel[i * 2] = i;
4498 sel[i * 2 + 1] = i + nelt;
4499 }
4500 perm_mask_high = vect_gen_perm_mask (vectype, sel);
4501 gcc_assert (perm_mask_high != NULL);
4502
4503 for (i = 0; i < nelt; i++)
4504 sel[i] += nelt / 2;
4505 perm_mask_low = vect_gen_perm_mask (vectype, sel);
4506 gcc_assert (perm_mask_low != NULL);
4507
4508 for (i = 0, n = exact_log2 (length); i < n; i++)
4509 {
4510 for (j = 0; j < length/2; j++)
4511 {
4512 vect1 = dr_chain[j];
4513 vect2 = dr_chain[j+length/2];
4514
4515 /* Create interleaving stmt:
4516 high = VEC_PERM_EXPR <vect1, vect2, {0, nelt, 1, nelt+1, ...}> */
4517 high = make_temp_ssa_name (vectype, NULL, "vect_inter_high");
4518 perm_stmt
4519 = gimple_build_assign_with_ops (VEC_PERM_EXPR, high,
4520 vect1, vect2, perm_mask_high);
4521 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
4522 (*result_chain)[2*j] = high;
4523
4524 /* Create interleaving stmt:
4525 low = VEC_PERM_EXPR <vect1, vect2, {nelt/2, nelt*3/2, nelt/2+1,
4526 nelt*3/2+1, ...}> */
4527 low = make_temp_ssa_name (vectype, NULL, "vect_inter_low");
4528 perm_stmt
4529 = gimple_build_assign_with_ops (VEC_PERM_EXPR, low,
4530 vect1, vect2, perm_mask_low);
4531 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
4532 (*result_chain)[2*j+1] = low;
4533 }
4534 memcpy (dr_chain.address (), result_chain->address (),
4535 length * sizeof (tree));
4536 }
4537 }
4538
4539 /* Function vect_setup_realignment
4540
4541 This function is called when vectorizing an unaligned load using
4542 the dr_explicit_realign[_optimized] scheme.
4543 This function generates the following code at the loop prolog:
4544
4545 p = initial_addr;
4546 x msq_init = *(floor(p)); # prolog load
4547 realignment_token = call target_builtin;
4548 loop:
4549 x msq = phi (msq_init, ---)
4550
4551 The stmts marked with x are generated only for the case of
4552 dr_explicit_realign_optimized.
4553
4554 The code above sets up a new (vector) pointer, pointing to the first
4555 location accessed by STMT, and a "floor-aligned" load using that pointer.
4556 It also generates code to compute the "realignment-token" (if the relevant
4557 target hook was defined), and creates a phi-node at the loop-header bb
4558 whose arguments are the result of the prolog-load (created by this
4559 function) and the result of a load that takes place in the loop (to be
4560 created by the caller to this function).
4561
4562 For the case of dr_explicit_realign_optimized:
4563 The caller to this function uses the phi-result (msq) to create the
4564 realignment code inside the loop, and sets up the missing phi argument,
4565 as follows:
4566 loop:
4567 msq = phi (msq_init, lsq)
4568 lsq = *(floor(p')); # load in loop
4569 result = realign_load (msq, lsq, realignment_token);
4570
4571 For the case of dr_explicit_realign:
4572 loop:
4573 msq = *(floor(p)); # load in loop
4574 p' = p + (VS-1);
4575 lsq = *(floor(p')); # load in loop
4576 result = realign_load (msq, lsq, realignment_token);
4577
4578 Input:
4579 STMT - (scalar) load stmt to be vectorized. This load accesses
4580 a memory location that may be unaligned.
4581 BSI - place where new code is to be inserted.
4582 ALIGNMENT_SUPPORT_SCHEME - which of the two misalignment handling schemes
4583 is used.
4584
4585 Output:
4586 REALIGNMENT_TOKEN - the result of a call to the builtin_mask_for_load
4587 target hook, if defined.
4588 Return value - the result of the loop-header phi node. */
4589
4590 tree
4591 vect_setup_realignment (gimple stmt, gimple_stmt_iterator *gsi,
4592 tree *realignment_token,
4593 enum dr_alignment_support alignment_support_scheme,
4594 tree init_addr,
4595 struct loop **at_loop)
4596 {
4597 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4598 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4599 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4600 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
4601 struct loop *loop = NULL;
4602 edge pe = NULL;
4603 tree scalar_dest = gimple_assign_lhs (stmt);
4604 tree vec_dest;
4605 gimple inc;
4606 tree ptr;
4607 tree data_ref;
4608 gimple new_stmt;
4609 basic_block new_bb;
4610 tree msq_init = NULL_TREE;
4611 tree new_temp;
4612 gimple phi_stmt;
4613 tree msq = NULL_TREE;
4614 gimple_seq stmts = NULL;
4615 bool inv_p;
4616 bool compute_in_loop = false;
4617 bool nested_in_vect_loop = false;
4618 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
4619 struct loop *loop_for_initial_load = NULL;
4620
4621 if (loop_vinfo)
4622 {
4623 loop = LOOP_VINFO_LOOP (loop_vinfo);
4624 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
4625 }
4626
4627 gcc_assert (alignment_support_scheme == dr_explicit_realign
4628 || alignment_support_scheme == dr_explicit_realign_optimized);
4629
4630 /* We need to generate three things:
4631 1. the misalignment computation
4632 2. the extra vector load (for the optimized realignment scheme).
4633 3. the phi node for the two vectors from which the realignment is
4634 done (for the optimized realignment scheme). */
4635
4636 /* 1. Determine where to generate the misalignment computation.
4637
4638 If INIT_ADDR is NULL_TREE, this indicates that the misalignment
4639 calculation will be generated by this function, outside the loop (in the
4640 preheader). Otherwise, INIT_ADDR had already been computed for us by the
4641 caller, inside the loop.
4642
4643 Background: If the misalignment remains fixed throughout the iterations of
4644 the loop, then both realignment schemes are applicable, and also the
4645 misalignment computation can be done outside LOOP. This is because we are
4646 vectorizing LOOP, and so the memory accesses in LOOP advance in steps that
4647 are a multiple of VS (the Vector Size), and therefore the misalignment in
4648 different vectorized LOOP iterations is always the same.
4649 The problem arises only if the memory access is in an inner-loop nested
4650 inside LOOP, which is now being vectorized using outer-loop vectorization.
4651 This is the only case when the misalignment of the memory access may not
4652 remain fixed throughout the iterations of the inner-loop (as explained in
4653 detail in vect_supportable_dr_alignment). In this case, not only is the
4654 optimized realignment scheme not applicable, but also the misalignment
4655 computation (and generation of the realignment token that is passed to
4656 REALIGN_LOAD) have to be done inside the loop.
4657
4658 In short, INIT_ADDR indicates whether we are in a COMPUTE_IN_LOOP mode
4659 or not, which in turn determines if the misalignment is computed inside
4660 the inner-loop, or outside LOOP. */
4661
4662 if (init_addr != NULL_TREE || !loop_vinfo)
4663 {
4664 compute_in_loop = true;
4665 gcc_assert (alignment_support_scheme == dr_explicit_realign);
4666 }
4667
4668
4669 /* 2. Determine where to generate the extra vector load.
4670
4671 For the optimized realignment scheme, instead of generating two vector
4672 loads in each iteration, we generate a single extra vector load in the
4673 preheader of the loop, and in each iteration reuse the result of the
4674 vector load from the previous iteration. In case the memory access is in
4675 an inner-loop nested inside LOOP, which is now being vectorized using
4676 outer-loop vectorization, we need to determine whether this initial vector
4677 load should be generated at the preheader of the inner-loop, or can be
4678 generated at the preheader of LOOP. If the memory access has no evolution
4679 in LOOP, it can be generated in the preheader of LOOP. Otherwise, it has
4680 to be generated inside LOOP (in the preheader of the inner-loop). */
4681
4682 if (nested_in_vect_loop)
4683 {
4684 tree outerloop_step = STMT_VINFO_DR_STEP (stmt_info);
4685 bool invariant_in_outerloop =
4686 (tree_int_cst_compare (outerloop_step, size_zero_node) == 0);
4687 loop_for_initial_load = (invariant_in_outerloop ? loop : loop->inner);
4688 }
4689 else
4690 loop_for_initial_load = loop;
4691 if (at_loop)
4692 *at_loop = loop_for_initial_load;
4693
4694 if (loop_for_initial_load)
4695 pe = loop_preheader_edge (loop_for_initial_load);
4696
4697 /* 3. For the case of the optimized realignment, create the first vector
4698 load at the loop preheader. */
4699
4700 if (alignment_support_scheme == dr_explicit_realign_optimized)
4701 {
4702 /* Create msq_init = *(floor(p1)) in the loop preheader */
4703
4704 gcc_assert (!compute_in_loop);
4705 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4706 ptr = vect_create_data_ref_ptr (stmt, vectype, loop_for_initial_load,
4707 NULL_TREE, &init_addr, NULL, &inc,
4708 true, &inv_p);
4709 new_temp = copy_ssa_name (ptr, NULL);
4710 new_stmt = gimple_build_assign_with_ops
4711 (BIT_AND_EXPR, new_temp, ptr,
4712 build_int_cst (TREE_TYPE (ptr),
4713 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
4714 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
4715 gcc_assert (!new_bb);
4716 data_ref
4717 = build2 (MEM_REF, TREE_TYPE (vec_dest), new_temp,
4718 build_int_cst (reference_alias_ptr_type (DR_REF (dr)), 0));
4719 new_stmt = gimple_build_assign (vec_dest, data_ref);
4720 new_temp = make_ssa_name (vec_dest, new_stmt);
4721 gimple_assign_set_lhs (new_stmt, new_temp);
4722 if (pe)
4723 {
4724 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
4725 gcc_assert (!new_bb);
4726 }
4727 else
4728 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
4729
4730 msq_init = gimple_assign_lhs (new_stmt);
4731 }
4732
4733 /* 4. Create realignment token using a target builtin, if available.
4734 It is done either inside the containing loop, or before LOOP (as
4735 determined above). */
4736
4737 if (targetm.vectorize.builtin_mask_for_load)
4738 {
4739 tree builtin_decl;
4740
4741 /* Compute INIT_ADDR - the initial addressed accessed by this memref. */
4742 if (!init_addr)
4743 {
4744 /* Generate the INIT_ADDR computation outside LOOP. */
4745 init_addr = vect_create_addr_base_for_vector_ref (stmt, &stmts,
4746 NULL_TREE, loop);
4747 if (loop)
4748 {
4749 pe = loop_preheader_edge (loop);
4750 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
4751 gcc_assert (!new_bb);
4752 }
4753 else
4754 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
4755 }
4756
4757 builtin_decl = targetm.vectorize.builtin_mask_for_load ();
4758 new_stmt = gimple_build_call (builtin_decl, 1, init_addr);
4759 vec_dest =
4760 vect_create_destination_var (scalar_dest,
4761 gimple_call_return_type (new_stmt));
4762 new_temp = make_ssa_name (vec_dest, new_stmt);
4763 gimple_call_set_lhs (new_stmt, new_temp);
4764
4765 if (compute_in_loop)
4766 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
4767 else
4768 {
4769 /* Generate the misalignment computation outside LOOP. */
4770 pe = loop_preheader_edge (loop);
4771 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
4772 gcc_assert (!new_bb);
4773 }
4774
4775 *realignment_token = gimple_call_lhs (new_stmt);
4776
4777 /* The result of the CALL_EXPR to this builtin is determined from
4778 the value of the parameter and no global variables are touched
4779 which makes the builtin a "const" function. Requiring the
4780 builtin to have the "const" attribute makes it unnecessary
4781 to call mark_call_clobbered. */
4782 gcc_assert (TREE_READONLY (builtin_decl));
4783 }
4784
4785 if (alignment_support_scheme == dr_explicit_realign)
4786 return msq;
4787
4788 gcc_assert (!compute_in_loop);
4789 gcc_assert (alignment_support_scheme == dr_explicit_realign_optimized);
4790
4791
4792 /* 5. Create msq = phi <msq_init, lsq> in loop */
4793
4794 pe = loop_preheader_edge (containing_loop);
4795 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4796 msq = make_ssa_name (vec_dest, NULL);
4797 phi_stmt = create_phi_node (msq, containing_loop->header);
4798 add_phi_arg (phi_stmt, msq_init, pe, UNKNOWN_LOCATION);
4799
4800 return msq;
4801 }
4802
4803
4804 /* Function vect_grouped_load_supported.
4805
4806 Returns TRUE if even and odd permutations are supported,
4807 and FALSE otherwise. */
4808
4809 bool
4810 vect_grouped_load_supported (tree vectype, unsigned HOST_WIDE_INT count)
4811 {
4812 enum machine_mode mode = TYPE_MODE (vectype);
4813
4814 /* vect_permute_load_chain requires the group size to be equal to 3 or
4815 be a power of two. */
4816 if (count != 3 && exact_log2 (count) == -1)
4817 {
4818 if (dump_enabled_p ())
4819 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4820 "the size of the group of accesses"
4821 " is not a power of 2 or not equal to 3\n");
4822 return false;
4823 }
4824
4825 /* Check that the permutation is supported. */
4826 if (VECTOR_MODE_P (mode))
4827 {
4828 unsigned int i, j, nelt = GET_MODE_NUNITS (mode);
4829 unsigned char *sel = XALLOCAVEC (unsigned char, nelt);
4830
4831 if (count == 3)
4832 {
4833 unsigned int k;
4834 for (k = 0; k < 3; k++)
4835 {
4836 for (i = 0; i < nelt; i++)
4837 if (3 * i + k < 2 * nelt)
4838 sel[i] = 3 * i + k;
4839 else
4840 sel[i] = 0;
4841 if (!can_vec_perm_p (mode, false, sel))
4842 {
4843 if (dump_enabled_p ())
4844 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4845 "shuffle of 3 loads is not supported by"
4846 " target\n");
4847 return false;
4848 }
4849 for (i = 0, j = 0; i < nelt; i++)
4850 if (3 * i + k < 2 * nelt)
4851 sel[i] = i;
4852 else
4853 sel[i] = nelt + ((nelt + k) % 3) + 3 * (j++);
4854 if (!can_vec_perm_p (mode, false, sel))
4855 {
4856 if (dump_enabled_p ())
4857 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4858 "shuffle of 3 loads is not supported by"
4859 " target\n");
4860 return false;
4861 }
4862 }
4863 return true;
4864 }
4865 else
4866 {
4867 /* If length is not equal to 3 then only power of 2 is supported. */
4868 gcc_assert (exact_log2 (count) != -1);
4869 for (i = 0; i < nelt; i++)
4870 sel[i] = i * 2;
4871 if (can_vec_perm_p (mode, false, sel))
4872 {
4873 for (i = 0; i < nelt; i++)
4874 sel[i] = i * 2 + 1;
4875 if (can_vec_perm_p (mode, false, sel))
4876 return true;
4877 }
4878 }
4879 }
4880
4881 if (dump_enabled_p ())
4882 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4883 "extract even/odd not supported by target\n");
4884 return false;
4885 }
4886
4887 /* Return TRUE if vec_load_lanes is available for COUNT vectors of
4888 type VECTYPE. */
4889
4890 bool
4891 vect_load_lanes_supported (tree vectype, unsigned HOST_WIDE_INT count)
4892 {
4893 return vect_lanes_optab_supported_p ("vec_load_lanes",
4894 vec_load_lanes_optab,
4895 vectype, count);
4896 }
4897
4898 /* Function vect_permute_load_chain.
4899
4900 Given a chain of interleaved loads in DR_CHAIN of LENGTH that must be
4901 a power of 2 or equal to 3, generate extract_even/odd stmts to reorder
4902 the input data correctly. Return the final references for loads in
4903 RESULT_CHAIN.
4904
4905 E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
4906 The input is 4 vectors each containing 8 elements. We assign a number to each
4907 element, the input sequence is:
4908
4909 1st vec: 0 1 2 3 4 5 6 7
4910 2nd vec: 8 9 10 11 12 13 14 15
4911 3rd vec: 16 17 18 19 20 21 22 23
4912 4th vec: 24 25 26 27 28 29 30 31
4913
4914 The output sequence should be:
4915
4916 1st vec: 0 4 8 12 16 20 24 28
4917 2nd vec: 1 5 9 13 17 21 25 29
4918 3rd vec: 2 6 10 14 18 22 26 30
4919 4th vec: 3 7 11 15 19 23 27 31
4920
4921 i.e., the first output vector should contain the first elements of each
4922 interleaving group, etc.
4923
4924 We use extract_even/odd instructions to create such output. The input of
4925 each extract_even/odd operation is two vectors
4926 1st vec 2nd vec
4927 0 1 2 3 4 5 6 7
4928
4929 and the output is the vector of extracted even/odd elements. The output of
4930 extract_even will be: 0 2 4 6
4931 and of extract_odd: 1 3 5 7
4932
4933
4934 The permutation is done in log LENGTH stages. In each stage extract_even
4935 and extract_odd stmts are created for each pair of vectors in DR_CHAIN in
4936 their order. In our example,
4937
4938 E1: extract_even (1st vec, 2nd vec)
4939 E2: extract_odd (1st vec, 2nd vec)
4940 E3: extract_even (3rd vec, 4th vec)
4941 E4: extract_odd (3rd vec, 4th vec)
4942
4943 The output for the first stage will be:
4944
4945 E1: 0 2 4 6 8 10 12 14
4946 E2: 1 3 5 7 9 11 13 15
4947 E3: 16 18 20 22 24 26 28 30
4948 E4: 17 19 21 23 25 27 29 31
4949
4950 In order to proceed and create the correct sequence for the next stage (or
4951 for the correct output, if the second stage is the last one, as in our
4952 example), we first put the output of extract_even operation and then the
4953 output of extract_odd in RESULT_CHAIN (which is then copied to DR_CHAIN).
4954 The input for the second stage is:
4955
4956 1st vec (E1): 0 2 4 6 8 10 12 14
4957 2nd vec (E3): 16 18 20 22 24 26 28 30
4958 3rd vec (E2): 1 3 5 7 9 11 13 15
4959 4th vec (E4): 17 19 21 23 25 27 29 31
4960
4961 The output of the second stage:
4962
4963 E1: 0 4 8 12 16 20 24 28
4964 E2: 2 6 10 14 18 22 26 30
4965 E3: 1 5 9 13 17 21 25 29
4966 E4: 3 7 11 15 19 23 27 31
4967
4968 And RESULT_CHAIN after reordering:
4969
4970 1st vec (E1): 0 4 8 12 16 20 24 28
4971 2nd vec (E3): 1 5 9 13 17 21 25 29
4972 3rd vec (E2): 2 6 10 14 18 22 26 30
4973 4th vec (E4): 3 7 11 15 19 23 27 31. */
4974
4975 static void
4976 vect_permute_load_chain (vec<tree> dr_chain,
4977 unsigned int length,
4978 gimple stmt,
4979 gimple_stmt_iterator *gsi,
4980 vec<tree> *result_chain)
4981 {
4982 tree data_ref, first_vect, second_vect;
4983 tree perm_mask_even, perm_mask_odd;
4984 tree perm3_mask_low, perm3_mask_high;
4985 gimple perm_stmt;
4986 tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
4987 unsigned int i, j, log_length = exact_log2 (length);
4988 unsigned nelt = TYPE_VECTOR_SUBPARTS (vectype);
4989 unsigned char *sel = XALLOCAVEC (unsigned char, nelt);
4990
4991 result_chain->quick_grow (length);
4992 memcpy (result_chain->address (), dr_chain.address (),
4993 length * sizeof (tree));
4994
4995 if (length == 3)
4996 {
4997 unsigned int k;
4998
4999 for (k = 0; k < 3; k++)
5000 {
5001 for (i = 0; i < nelt; i++)
5002 if (3 * i + k < 2 * nelt)
5003 sel[i] = 3 * i + k;
5004 else
5005 sel[i] = 0;
5006 perm3_mask_low = vect_gen_perm_mask (vectype, sel);
5007 gcc_assert (perm3_mask_low != NULL);
5008
5009 for (i = 0, j = 0; i < nelt; i++)
5010 if (3 * i + k < 2 * nelt)
5011 sel[i] = i;
5012 else
5013 sel[i] = nelt + ((nelt + k) % 3) + 3 * (j++);
5014
5015 perm3_mask_high = vect_gen_perm_mask (vectype, sel);
5016 gcc_assert (perm3_mask_high != NULL);
5017
5018 first_vect = dr_chain[0];
5019 second_vect = dr_chain[1];
5020
5021 /* Create interleaving stmt (low part of):
5022 low = VEC_PERM_EXPR <first_vect, second_vect2, {k, 3 + k, 6 + k,
5023 ...}> */
5024 data_ref = make_temp_ssa_name (vectype, NULL, "vect_suffle3_low");
5025 perm_stmt = gimple_build_assign_with_ops (VEC_PERM_EXPR, data_ref,
5026 first_vect, second_vect,
5027 perm3_mask_low);
5028 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
5029
5030 /* Create interleaving stmt (high part of):
5031 high = VEC_PERM_EXPR <first_vect, second_vect2, {k, 3 + k, 6 + k,
5032 ...}> */
5033 first_vect = data_ref;
5034 second_vect = dr_chain[2];
5035 data_ref = make_temp_ssa_name (vectype, NULL, "vect_suffle3_high");
5036 perm_stmt = gimple_build_assign_with_ops (VEC_PERM_EXPR, data_ref,
5037 first_vect, second_vect,
5038 perm3_mask_high);
5039 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
5040 (*result_chain)[k] = data_ref;
5041 }
5042 }
5043 else
5044 {
5045 /* If length is not equal to 3 then only power of 2 is supported. */
5046 gcc_assert (exact_log2 (length) != -1);
5047
5048 for (i = 0; i < nelt; ++i)
5049 sel[i] = i * 2;
5050 perm_mask_even = vect_gen_perm_mask (vectype, sel);
5051 gcc_assert (perm_mask_even != NULL);
5052
5053 for (i = 0; i < nelt; ++i)
5054 sel[i] = i * 2 + 1;
5055 perm_mask_odd = vect_gen_perm_mask (vectype, sel);
5056 gcc_assert (perm_mask_odd != NULL);
5057
5058 for (i = 0; i < log_length; i++)
5059 {
5060 for (j = 0; j < length; j += 2)
5061 {
5062 first_vect = dr_chain[j];
5063 second_vect = dr_chain[j+1];
5064
5065 /* data_ref = permute_even (first_data_ref, second_data_ref); */
5066 data_ref = make_temp_ssa_name (vectype, NULL, "vect_perm_even");
5067 perm_stmt = gimple_build_assign_with_ops (VEC_PERM_EXPR, data_ref,
5068 first_vect, second_vect,
5069 perm_mask_even);
5070 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
5071 (*result_chain)[j/2] = data_ref;
5072
5073 /* data_ref = permute_odd (first_data_ref, second_data_ref); */
5074 data_ref = make_temp_ssa_name (vectype, NULL, "vect_perm_odd");
5075 perm_stmt = gimple_build_assign_with_ops (VEC_PERM_EXPR, data_ref,
5076 first_vect, second_vect,
5077 perm_mask_odd);
5078 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
5079 (*result_chain)[j/2+length/2] = data_ref;
5080 }
5081 memcpy (dr_chain.address (), result_chain->address (),
5082 length * sizeof (tree));
5083 }
5084 }
5085 }
5086
5087 /* Function vect_transform_grouped_load.
5088
5089 Given a chain of input interleaved data-refs (in DR_CHAIN), build statements
5090 to perform their permutation and ascribe the result vectorized statements to
5091 the scalar statements.
5092 */
5093
5094 void
5095 vect_transform_grouped_load (gimple stmt, vec<tree> dr_chain, int size,
5096 gimple_stmt_iterator *gsi)
5097 {
5098 vec<tree> result_chain = vNULL;
5099
5100 /* DR_CHAIN contains input data-refs that are a part of the interleaving.
5101 RESULT_CHAIN is the output of vect_permute_load_chain, it contains permuted
5102 vectors, that are ready for vector computation. */
5103 result_chain.create (size);
5104 vect_permute_load_chain (dr_chain, size, stmt, gsi, &result_chain);
5105 vect_record_grouped_load_vectors (stmt, result_chain);
5106 result_chain.release ();
5107 }
5108
5109 /* RESULT_CHAIN contains the output of a group of grouped loads that were
5110 generated as part of the vectorization of STMT. Assign the statement
5111 for each vector to the associated scalar statement. */
5112
5113 void
5114 vect_record_grouped_load_vectors (gimple stmt, vec<tree> result_chain)
5115 {
5116 gimple first_stmt = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt));
5117 gimple next_stmt, new_stmt;
5118 unsigned int i, gap_count;
5119 tree tmp_data_ref;
5120
5121 /* Put a permuted data-ref in the VECTORIZED_STMT field.
5122 Since we scan the chain starting from it's first node, their order
5123 corresponds the order of data-refs in RESULT_CHAIN. */
5124 next_stmt = first_stmt;
5125 gap_count = 1;
5126 FOR_EACH_VEC_ELT (result_chain, i, tmp_data_ref)
5127 {
5128 if (!next_stmt)
5129 break;
5130
5131 /* Skip the gaps. Loads created for the gaps will be removed by dead
5132 code elimination pass later. No need to check for the first stmt in
5133 the group, since it always exists.
5134 GROUP_GAP is the number of steps in elements from the previous
5135 access (if there is no gap GROUP_GAP is 1). We skip loads that
5136 correspond to the gaps. */
5137 if (next_stmt != first_stmt
5138 && gap_count < GROUP_GAP (vinfo_for_stmt (next_stmt)))
5139 {
5140 gap_count++;
5141 continue;
5142 }
5143
5144 while (next_stmt)
5145 {
5146 new_stmt = SSA_NAME_DEF_STMT (tmp_data_ref);
5147 /* We assume that if VEC_STMT is not NULL, this is a case of multiple
5148 copies, and we put the new vector statement in the first available
5149 RELATED_STMT. */
5150 if (!STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt)))
5151 STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt)) = new_stmt;
5152 else
5153 {
5154 if (!GROUP_SAME_DR_STMT (vinfo_for_stmt (next_stmt)))
5155 {
5156 gimple prev_stmt =
5157 STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt));
5158 gimple rel_stmt =
5159 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (prev_stmt));
5160 while (rel_stmt)
5161 {
5162 prev_stmt = rel_stmt;
5163 rel_stmt =
5164 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (rel_stmt));
5165 }
5166
5167 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (prev_stmt)) =
5168 new_stmt;
5169 }
5170 }
5171
5172 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
5173 gap_count = 1;
5174 /* If NEXT_STMT accesses the same DR as the previous statement,
5175 put the same TMP_DATA_REF as its vectorized statement; otherwise
5176 get the next data-ref from RESULT_CHAIN. */
5177 if (!next_stmt || !GROUP_SAME_DR_STMT (vinfo_for_stmt (next_stmt)))
5178 break;
5179 }
5180 }
5181 }
5182
5183 /* Function vect_force_dr_alignment_p.
5184
5185 Returns whether the alignment of a DECL can be forced to be aligned
5186 on ALIGNMENT bit boundary. */
5187
5188 bool
5189 vect_can_force_dr_alignment_p (const_tree decl, unsigned int alignment)
5190 {
5191 if (TREE_CODE (decl) != VAR_DECL)
5192 return false;
5193
5194 /* We cannot change alignment of common or external symbols as another
5195 translation unit may contain a definition with lower alignment.
5196 The rules of common symbol linking mean that the definition
5197 will override the common symbol. The same is true for constant
5198 pool entries which may be shared and are not properly merged
5199 by LTO. */
5200 if (DECL_EXTERNAL (decl)
5201 || DECL_COMMON (decl)
5202 || DECL_IN_CONSTANT_POOL (decl))
5203 return false;
5204
5205 if (TREE_ASM_WRITTEN (decl))
5206 return false;
5207
5208 /* Do not override the alignment as specified by the ABI when the used
5209 attribute is set. */
5210 if (DECL_PRESERVE_P (decl))
5211 return false;
5212
5213 /* Do not override explicit alignment set by the user when an explicit
5214 section name is also used. This is a common idiom used by many
5215 software projects. */
5216 if (TREE_STATIC (decl)
5217 && DECL_SECTION_NAME (decl) != NULL_TREE
5218 && !symtab_get_node (decl)->implicit_section)
5219 return false;
5220
5221 if (TREE_STATIC (decl))
5222 return (alignment <= MAX_OFILE_ALIGNMENT);
5223 else
5224 return (alignment <= MAX_STACK_ALIGNMENT);
5225 }
5226
5227
5228 /* Return whether the data reference DR is supported with respect to its
5229 alignment.
5230 If CHECK_ALIGNED_ACCESSES is TRUE, check if the access is supported even
5231 it is aligned, i.e., check if it is possible to vectorize it with different
5232 alignment. */
5233
5234 enum dr_alignment_support
5235 vect_supportable_dr_alignment (struct data_reference *dr,
5236 bool check_aligned_accesses)
5237 {
5238 gimple stmt = DR_STMT (dr);
5239 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5240 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5241 enum machine_mode mode = TYPE_MODE (vectype);
5242 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5243 struct loop *vect_loop = NULL;
5244 bool nested_in_vect_loop = false;
5245
5246 if (aligned_access_p (dr) && !check_aligned_accesses)
5247 return dr_aligned;
5248
5249 /* For now assume all conditional loads/stores support unaligned
5250 access without any special code. */
5251 if (is_gimple_call (stmt)
5252 && gimple_call_internal_p (stmt)
5253 && (gimple_call_internal_fn (stmt) == IFN_MASK_LOAD
5254 || gimple_call_internal_fn (stmt) == IFN_MASK_STORE))
5255 return dr_unaligned_supported;
5256
5257 if (loop_vinfo)
5258 {
5259 vect_loop = LOOP_VINFO_LOOP (loop_vinfo);
5260 nested_in_vect_loop = nested_in_vect_loop_p (vect_loop, stmt);
5261 }
5262
5263 /* Possibly unaligned access. */
5264
5265 /* We can choose between using the implicit realignment scheme (generating
5266 a misaligned_move stmt) and the explicit realignment scheme (generating
5267 aligned loads with a REALIGN_LOAD). There are two variants to the
5268 explicit realignment scheme: optimized, and unoptimized.
5269 We can optimize the realignment only if the step between consecutive
5270 vector loads is equal to the vector size. Since the vector memory
5271 accesses advance in steps of VS (Vector Size) in the vectorized loop, it
5272 is guaranteed that the misalignment amount remains the same throughout the
5273 execution of the vectorized loop. Therefore, we can create the
5274 "realignment token" (the permutation mask that is passed to REALIGN_LOAD)
5275 at the loop preheader.
5276
5277 However, in the case of outer-loop vectorization, when vectorizing a
5278 memory access in the inner-loop nested within the LOOP that is now being
5279 vectorized, while it is guaranteed that the misalignment of the
5280 vectorized memory access will remain the same in different outer-loop
5281 iterations, it is *not* guaranteed that is will remain the same throughout
5282 the execution of the inner-loop. This is because the inner-loop advances
5283 with the original scalar step (and not in steps of VS). If the inner-loop
5284 step happens to be a multiple of VS, then the misalignment remains fixed
5285 and we can use the optimized realignment scheme. For example:
5286
5287 for (i=0; i<N; i++)
5288 for (j=0; j<M; j++)
5289 s += a[i+j];
5290
5291 When vectorizing the i-loop in the above example, the step between
5292 consecutive vector loads is 1, and so the misalignment does not remain
5293 fixed across the execution of the inner-loop, and the realignment cannot
5294 be optimized (as illustrated in the following pseudo vectorized loop):
5295
5296 for (i=0; i<N; i+=4)
5297 for (j=0; j<M; j++){
5298 vs += vp[i+j]; // misalignment of &vp[i+j] is {0,1,2,3,0,1,2,3,...}
5299 // when j is {0,1,2,3,4,5,6,7,...} respectively.
5300 // (assuming that we start from an aligned address).
5301 }
5302
5303 We therefore have to use the unoptimized realignment scheme:
5304
5305 for (i=0; i<N; i+=4)
5306 for (j=k; j<M; j+=4)
5307 vs += vp[i+j]; // misalignment of &vp[i+j] is always k (assuming
5308 // that the misalignment of the initial address is
5309 // 0).
5310
5311 The loop can then be vectorized as follows:
5312
5313 for (k=0; k<4; k++){
5314 rt = get_realignment_token (&vp[k]);
5315 for (i=0; i<N; i+=4){
5316 v1 = vp[i+k];
5317 for (j=k; j<M; j+=4){
5318 v2 = vp[i+j+VS-1];
5319 va = REALIGN_LOAD <v1,v2,rt>;
5320 vs += va;
5321 v1 = v2;
5322 }
5323 }
5324 } */
5325
5326 if (DR_IS_READ (dr))
5327 {
5328 bool is_packed = false;
5329 tree type = (TREE_TYPE (DR_REF (dr)));
5330
5331 if (optab_handler (vec_realign_load_optab, mode) != CODE_FOR_nothing
5332 && (!targetm.vectorize.builtin_mask_for_load
5333 || targetm.vectorize.builtin_mask_for_load ()))
5334 {
5335 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5336 if ((nested_in_vect_loop
5337 && (TREE_INT_CST_LOW (DR_STEP (dr))
5338 != GET_MODE_SIZE (TYPE_MODE (vectype))))
5339 || !loop_vinfo)
5340 return dr_explicit_realign;
5341 else
5342 return dr_explicit_realign_optimized;
5343 }
5344 if (!known_alignment_for_access_p (dr))
5345 is_packed = not_size_aligned (DR_REF (dr));
5346
5347 if ((TYPE_USER_ALIGN (type) && !is_packed)
5348 || targetm.vectorize.
5349 support_vector_misalignment (mode, type,
5350 DR_MISALIGNMENT (dr), is_packed))
5351 /* Can't software pipeline the loads, but can at least do them. */
5352 return dr_unaligned_supported;
5353 }
5354 else
5355 {
5356 bool is_packed = false;
5357 tree type = (TREE_TYPE (DR_REF (dr)));
5358
5359 if (!known_alignment_for_access_p (dr))
5360 is_packed = not_size_aligned (DR_REF (dr));
5361
5362 if ((TYPE_USER_ALIGN (type) && !is_packed)
5363 || targetm.vectorize.
5364 support_vector_misalignment (mode, type,
5365 DR_MISALIGNMENT (dr), is_packed))
5366 return dr_unaligned_supported;
5367 }
5368
5369 /* Unsupported. */
5370 return dr_unaligned_unsupported;
5371 }