(Synchronize with addition made to binutils sources):
[gcc.git] / gcc / tree-vect-data-refs.c
1 /* Data References Analysis and Manipulation Utilities for Vectorization.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software
3 Foundation, Inc.
4 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 and Ira Rosen <irar@il.ibm.com>
6
7 This file is part of GCC.
8
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
13
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
22
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "tm.h"
27 #include "ggc.h"
28 #include "tree.h"
29 #include "target.h"
30 #include "basic-block.h"
31 #include "diagnostic.h"
32 #include "tree-flow.h"
33 #include "tree-dump.h"
34 #include "cfgloop.h"
35 #include "expr.h"
36 #include "optabs.h"
37 #include "tree-chrec.h"
38 #include "tree-scalar-evolution.h"
39 #include "tree-vectorizer.h"
40 #include "toplev.h"
41
42
43 /* Return the smallest scalar part of STMT.
44 This is used to determine the vectype of the stmt. We generally set the
45 vectype according to the type of the result (lhs). For stmts whose
46 result-type is different than the type of the arguments (e.g., demotion,
47 promotion), vectype will be reset appropriately (later). Note that we have
48 to visit the smallest datatype in this function, because that determines the
49 VF. If the smallest datatype in the loop is present only as the rhs of a
50 promotion operation - we'd miss it.
51 Such a case, where a variable of this datatype does not appear in the lhs
52 anywhere in the loop, can only occur if it's an invariant: e.g.:
53 'int_x = (int) short_inv', which we'd expect to have been optimized away by
54 invariant motion. However, we cannot rely on invariant motion to always take
55 invariants out of the loop, and so in the case of promotion we also have to
56 check the rhs.
57 LHS_SIZE_UNIT and RHS_SIZE_UNIT contain the sizes of the corresponding
58 types. */
59
60 tree
61 vect_get_smallest_scalar_type (gimple stmt, HOST_WIDE_INT *lhs_size_unit,
62 HOST_WIDE_INT *rhs_size_unit)
63 {
64 tree scalar_type = gimple_expr_type (stmt);
65 HOST_WIDE_INT lhs, rhs;
66
67 lhs = rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type));
68
69 if (is_gimple_assign (stmt)
70 && (gimple_assign_cast_p (stmt)
71 || gimple_assign_rhs_code (stmt) == WIDEN_MULT_EXPR
72 || gimple_assign_rhs_code (stmt) == FLOAT_EXPR))
73 {
74 tree rhs_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
75
76 rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (rhs_type));
77 if (rhs < lhs)
78 scalar_type = rhs_type;
79 }
80
81 *lhs_size_unit = lhs;
82 *rhs_size_unit = rhs;
83 return scalar_type;
84 }
85
86
87 /* Find the place of the data-ref in STMT in the interleaving chain that starts
88 from FIRST_STMT. Return -1 if the data-ref is not a part of the chain. */
89
90 int
91 vect_get_place_in_interleaving_chain (gimple stmt, gimple first_stmt)
92 {
93 gimple next_stmt = first_stmt;
94 int result = 0;
95
96 if (first_stmt != DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)))
97 return -1;
98
99 while (next_stmt && next_stmt != stmt)
100 {
101 result++;
102 next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt));
103 }
104
105 if (next_stmt)
106 return result;
107 else
108 return -1;
109 }
110
111
112 /* Function vect_insert_into_interleaving_chain.
113
114 Insert DRA into the interleaving chain of DRB according to DRA's INIT. */
115
116 static void
117 vect_insert_into_interleaving_chain (struct data_reference *dra,
118 struct data_reference *drb)
119 {
120 gimple prev, next;
121 tree next_init;
122 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
123 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
124
125 prev = DR_GROUP_FIRST_DR (stmtinfo_b);
126 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
127 while (next)
128 {
129 next_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
130 if (tree_int_cst_compare (next_init, DR_INIT (dra)) > 0)
131 {
132 /* Insert here. */
133 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = DR_STMT (dra);
134 DR_GROUP_NEXT_DR (stmtinfo_a) = next;
135 return;
136 }
137 prev = next;
138 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
139 }
140
141 /* We got to the end of the list. Insert here. */
142 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = DR_STMT (dra);
143 DR_GROUP_NEXT_DR (stmtinfo_a) = NULL;
144 }
145
146
147 /* Function vect_update_interleaving_chain.
148
149 For two data-refs DRA and DRB that are a part of a chain interleaved data
150 accesses, update the interleaving chain. DRB's INIT is smaller than DRA's.
151
152 There are four possible cases:
153 1. New stmts - both DRA and DRB are not a part of any chain:
154 FIRST_DR = DRB
155 NEXT_DR (DRB) = DRA
156 2. DRB is a part of a chain and DRA is not:
157 no need to update FIRST_DR
158 no need to insert DRB
159 insert DRA according to init
160 3. DRA is a part of a chain and DRB is not:
161 if (init of FIRST_DR > init of DRB)
162 FIRST_DR = DRB
163 NEXT(FIRST_DR) = previous FIRST_DR
164 else
165 insert DRB according to its init
166 4. both DRA and DRB are in some interleaving chains:
167 choose the chain with the smallest init of FIRST_DR
168 insert the nodes of the second chain into the first one. */
169
170 static void
171 vect_update_interleaving_chain (struct data_reference *drb,
172 struct data_reference *dra)
173 {
174 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
175 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
176 tree next_init, init_dra_chain, init_drb_chain;
177 gimple first_a, first_b;
178 tree node_init;
179 gimple node, prev, next, first_stmt;
180
181 /* 1. New stmts - both DRA and DRB are not a part of any chain. */
182 if (!DR_GROUP_FIRST_DR (stmtinfo_a) && !DR_GROUP_FIRST_DR (stmtinfo_b))
183 {
184 DR_GROUP_FIRST_DR (stmtinfo_a) = DR_STMT (drb);
185 DR_GROUP_FIRST_DR (stmtinfo_b) = DR_STMT (drb);
186 DR_GROUP_NEXT_DR (stmtinfo_b) = DR_STMT (dra);
187 return;
188 }
189
190 /* 2. DRB is a part of a chain and DRA is not. */
191 if (!DR_GROUP_FIRST_DR (stmtinfo_a) && DR_GROUP_FIRST_DR (stmtinfo_b))
192 {
193 DR_GROUP_FIRST_DR (stmtinfo_a) = DR_GROUP_FIRST_DR (stmtinfo_b);
194 /* Insert DRA into the chain of DRB. */
195 vect_insert_into_interleaving_chain (dra, drb);
196 return;
197 }
198
199 /* 3. DRA is a part of a chain and DRB is not. */
200 if (DR_GROUP_FIRST_DR (stmtinfo_a) && !DR_GROUP_FIRST_DR (stmtinfo_b))
201 {
202 gimple old_first_stmt = DR_GROUP_FIRST_DR (stmtinfo_a);
203 tree init_old = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (
204 old_first_stmt)));
205 gimple tmp;
206
207 if (tree_int_cst_compare (init_old, DR_INIT (drb)) > 0)
208 {
209 /* DRB's init is smaller than the init of the stmt previously marked
210 as the first stmt of the interleaving chain of DRA. Therefore, we
211 update FIRST_STMT and put DRB in the head of the list. */
212 DR_GROUP_FIRST_DR (stmtinfo_b) = DR_STMT (drb);
213 DR_GROUP_NEXT_DR (stmtinfo_b) = old_first_stmt;
214
215 /* Update all the stmts in the list to point to the new FIRST_STMT. */
216 tmp = old_first_stmt;
217 while (tmp)
218 {
219 DR_GROUP_FIRST_DR (vinfo_for_stmt (tmp)) = DR_STMT (drb);
220 tmp = DR_GROUP_NEXT_DR (vinfo_for_stmt (tmp));
221 }
222 }
223 else
224 {
225 /* Insert DRB in the list of DRA. */
226 vect_insert_into_interleaving_chain (drb, dra);
227 DR_GROUP_FIRST_DR (stmtinfo_b) = DR_GROUP_FIRST_DR (stmtinfo_a);
228 }
229 return;
230 }
231
232 /* 4. both DRA and DRB are in some interleaving chains. */
233 first_a = DR_GROUP_FIRST_DR (stmtinfo_a);
234 first_b = DR_GROUP_FIRST_DR (stmtinfo_b);
235 if (first_a == first_b)
236 return;
237 init_dra_chain = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_a)));
238 init_drb_chain = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_b)));
239
240 if (tree_int_cst_compare (init_dra_chain, init_drb_chain) > 0)
241 {
242 /* Insert the nodes of DRA chain into the DRB chain.
243 After inserting a node, continue from this node of the DRB chain (don't
244 start from the beginning. */
245 node = DR_GROUP_FIRST_DR (stmtinfo_a);
246 prev = DR_GROUP_FIRST_DR (stmtinfo_b);
247 first_stmt = first_b;
248 }
249 else
250 {
251 /* Insert the nodes of DRB chain into the DRA chain.
252 After inserting a node, continue from this node of the DRA chain (don't
253 start from the beginning. */
254 node = DR_GROUP_FIRST_DR (stmtinfo_b);
255 prev = DR_GROUP_FIRST_DR (stmtinfo_a);
256 first_stmt = first_a;
257 }
258
259 while (node)
260 {
261 node_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (node)));
262 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
263 while (next)
264 {
265 next_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
266 if (tree_int_cst_compare (next_init, node_init) > 0)
267 {
268 /* Insert here. */
269 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = node;
270 DR_GROUP_NEXT_DR (vinfo_for_stmt (node)) = next;
271 prev = node;
272 break;
273 }
274 prev = next;
275 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
276 }
277 if (!next)
278 {
279 /* We got to the end of the list. Insert here. */
280 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = node;
281 DR_GROUP_NEXT_DR (vinfo_for_stmt (node)) = NULL;
282 prev = node;
283 }
284 DR_GROUP_FIRST_DR (vinfo_for_stmt (node)) = first_stmt;
285 node = DR_GROUP_NEXT_DR (vinfo_for_stmt (node));
286 }
287 }
288
289
290 /* Function vect_equal_offsets.
291
292 Check if OFFSET1 and OFFSET2 are identical expressions. */
293
294 static bool
295 vect_equal_offsets (tree offset1, tree offset2)
296 {
297 bool res0, res1;
298
299 STRIP_NOPS (offset1);
300 STRIP_NOPS (offset2);
301
302 if (offset1 == offset2)
303 return true;
304
305 if (TREE_CODE (offset1) != TREE_CODE (offset2)
306 || !BINARY_CLASS_P (offset1)
307 || !BINARY_CLASS_P (offset2))
308 return false;
309
310 res0 = vect_equal_offsets (TREE_OPERAND (offset1, 0),
311 TREE_OPERAND (offset2, 0));
312 res1 = vect_equal_offsets (TREE_OPERAND (offset1, 1),
313 TREE_OPERAND (offset2, 1));
314
315 return (res0 && res1);
316 }
317
318
319 /* Function vect_check_interleaving.
320
321 Check if DRA and DRB are a part of interleaving. In case they are, insert
322 DRA and DRB in an interleaving chain. */
323
324 static bool
325 vect_check_interleaving (struct data_reference *dra,
326 struct data_reference *drb)
327 {
328 HOST_WIDE_INT type_size_a, type_size_b, diff_mod_size, step, init_a, init_b;
329
330 /* Check that the data-refs have same first location (except init) and they
331 are both either store or load (not load and store). */
332 if ((DR_BASE_ADDRESS (dra) != DR_BASE_ADDRESS (drb)
333 && (TREE_CODE (DR_BASE_ADDRESS (dra)) != ADDR_EXPR
334 || TREE_CODE (DR_BASE_ADDRESS (drb)) != ADDR_EXPR
335 || TREE_OPERAND (DR_BASE_ADDRESS (dra), 0)
336 != TREE_OPERAND (DR_BASE_ADDRESS (drb),0)))
337 || !vect_equal_offsets (DR_OFFSET (dra), DR_OFFSET (drb))
338 || !tree_int_cst_compare (DR_INIT (dra), DR_INIT (drb))
339 || DR_IS_READ (dra) != DR_IS_READ (drb))
340 return false;
341
342 /* Check:
343 1. data-refs are of the same type
344 2. their steps are equal
345 3. the step (if greater than zero) is greater than the difference between
346 data-refs' inits. */
347 type_size_a = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra))));
348 type_size_b = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb))));
349
350 if (type_size_a != type_size_b
351 || tree_int_cst_compare (DR_STEP (dra), DR_STEP (drb))
352 || !types_compatible_p (TREE_TYPE (DR_REF (dra)),
353 TREE_TYPE (DR_REF (drb))))
354 return false;
355
356 init_a = TREE_INT_CST_LOW (DR_INIT (dra));
357 init_b = TREE_INT_CST_LOW (DR_INIT (drb));
358 step = TREE_INT_CST_LOW (DR_STEP (dra));
359
360 if (init_a > init_b)
361 {
362 /* If init_a == init_b + the size of the type * k, we have an interleaving,
363 and DRB is accessed before DRA. */
364 diff_mod_size = (init_a - init_b) % type_size_a;
365
366 if (step && (init_a - init_b) > step)
367 return false;
368
369 if (diff_mod_size == 0)
370 {
371 vect_update_interleaving_chain (drb, dra);
372 if (vect_print_dump_info (REPORT_DR_DETAILS))
373 {
374 fprintf (vect_dump, "Detected interleaving ");
375 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
376 fprintf (vect_dump, " and ");
377 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
378 }
379 return true;
380 }
381 }
382 else
383 {
384 /* If init_b == init_a + the size of the type * k, we have an
385 interleaving, and DRA is accessed before DRB. */
386 diff_mod_size = (init_b - init_a) % type_size_a;
387
388 if (step && (init_b - init_a) > step)
389 return false;
390
391 if (diff_mod_size == 0)
392 {
393 vect_update_interleaving_chain (dra, drb);
394 if (vect_print_dump_info (REPORT_DR_DETAILS))
395 {
396 fprintf (vect_dump, "Detected interleaving ");
397 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
398 fprintf (vect_dump, " and ");
399 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
400 }
401 return true;
402 }
403 }
404
405 return false;
406 }
407
408 /* Check if data references pointed by DR_I and DR_J are same or
409 belong to same interleaving group. Return FALSE if drs are
410 different, otherwise return TRUE. */
411
412 static bool
413 vect_same_range_drs (data_reference_p dr_i, data_reference_p dr_j)
414 {
415 gimple stmt_i = DR_STMT (dr_i);
416 gimple stmt_j = DR_STMT (dr_j);
417
418 if (operand_equal_p (DR_REF (dr_i), DR_REF (dr_j), 0)
419 || (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_i))
420 && DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_j))
421 && (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_i))
422 == DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_j)))))
423 return true;
424 else
425 return false;
426 }
427
428 /* If address ranges represented by DDR_I and DDR_J are equal,
429 return TRUE, otherwise return FALSE. */
430
431 static bool
432 vect_vfa_range_equal (ddr_p ddr_i, ddr_p ddr_j)
433 {
434 if ((vect_same_range_drs (DDR_A (ddr_i), DDR_A (ddr_j))
435 && vect_same_range_drs (DDR_B (ddr_i), DDR_B (ddr_j)))
436 || (vect_same_range_drs (DDR_A (ddr_i), DDR_B (ddr_j))
437 && vect_same_range_drs (DDR_B (ddr_i), DDR_A (ddr_j))))
438 return true;
439 else
440 return false;
441 }
442
443 /* Insert DDR into LOOP_VINFO list of ddrs that may alias and need to be
444 tested at run-time. Return TRUE if DDR was successfully inserted.
445 Return false if versioning is not supported. */
446
447 static bool
448 vect_mark_for_runtime_alias_test (ddr_p ddr, loop_vec_info loop_vinfo)
449 {
450 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
451
452 if ((unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS) == 0)
453 return false;
454
455 if (vect_print_dump_info (REPORT_DR_DETAILS))
456 {
457 fprintf (vect_dump, "mark for run-time aliasing test between ");
458 print_generic_expr (vect_dump, DR_REF (DDR_A (ddr)), TDF_SLIM);
459 fprintf (vect_dump, " and ");
460 print_generic_expr (vect_dump, DR_REF (DDR_B (ddr)), TDF_SLIM);
461 }
462
463 if (optimize_loop_nest_for_size_p (loop))
464 {
465 if (vect_print_dump_info (REPORT_DR_DETAILS))
466 fprintf (vect_dump, "versioning not supported when optimizing for size.");
467 return false;
468 }
469
470 /* FORNOW: We don't support versioning with outer-loop vectorization. */
471 if (loop->inner)
472 {
473 if (vect_print_dump_info (REPORT_DR_DETAILS))
474 fprintf (vect_dump, "versioning not yet supported for outer-loops.");
475 return false;
476 }
477
478 VEC_safe_push (ddr_p, heap, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo), ddr);
479 return true;
480 }
481
482
483 /* Function vect_analyze_data_ref_dependence.
484
485 Return TRUE if there (might) exist a dependence between a memory-reference
486 DRA and a memory-reference DRB. When versioning for alias may check a
487 dependence at run-time, return FALSE. */
488
489 static bool
490 vect_analyze_data_ref_dependence (struct data_dependence_relation *ddr,
491 loop_vec_info loop_vinfo)
492 {
493 unsigned int i;
494 struct loop *loop = NULL;
495 int vectorization_factor = 0;
496 struct data_reference *dra = DDR_A (ddr);
497 struct data_reference *drb = DDR_B (ddr);
498 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
499 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
500 int dra_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dra))));
501 int drb_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (drb))));
502 lambda_vector dist_v;
503 unsigned int loop_depth;
504
505 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
506 {
507 /* Independent data accesses. */
508 vect_check_interleaving (dra, drb);
509 return false;
510 }
511
512 if (loop_vinfo)
513 {
514 loop = LOOP_VINFO_LOOP (loop_vinfo);
515 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
516 }
517
518 if ((DR_IS_READ (dra) && DR_IS_READ (drb) && loop_vinfo) || dra == drb)
519 return false;
520
521 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
522 {
523 if (loop_vinfo)
524 {
525 if (vect_print_dump_info (REPORT_DR_DETAILS))
526 {
527 fprintf (vect_dump, "versioning for alias required: "
528 "can't determine dependence between ");
529 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
530 fprintf (vect_dump, " and ");
531 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
532 }
533
534 /* Add to list of ddrs that need to be tested at run-time. */
535 return !vect_mark_for_runtime_alias_test (ddr, loop_vinfo);
536 }
537
538 /* When vectorizing a basic block unknown depnedence can still mean
539 strided access. */
540 if (vect_check_interleaving (dra, drb))
541 return false;
542
543 if (vect_print_dump_info (REPORT_DR_DETAILS))
544 {
545 fprintf (vect_dump, "can't determine dependence between ");
546 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
547 fprintf (vect_dump, " and ");
548 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
549 }
550
551 return true;
552 }
553
554 /* Versioning for alias is not yet supported for basic block SLP, and
555 dependence distance is unapplicable, hence, in case of known data
556 dependence, basic block vectorization is impossible for now. */
557 if (!loop_vinfo)
558 {
559 if (dra != drb && vect_check_interleaving (dra, drb))
560 return false;
561
562 if (vect_print_dump_info (REPORT_DR_DETAILS))
563 {
564 fprintf (vect_dump, "determined dependence between ");
565 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
566 fprintf (vect_dump, " and ");
567 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
568 }
569
570 return true;
571 }
572
573 /* Loop-based vectorization and known data dependence. */
574 if (DDR_NUM_DIST_VECTS (ddr) == 0)
575 {
576 if (vect_print_dump_info (REPORT_DR_DETAILS))
577 {
578 fprintf (vect_dump, "versioning for alias required: bad dist vector for ");
579 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
580 fprintf (vect_dump, " and ");
581 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
582 }
583 /* Add to list of ddrs that need to be tested at run-time. */
584 return !vect_mark_for_runtime_alias_test (ddr, loop_vinfo);
585 }
586
587 loop_depth = index_in_loop_nest (loop->num, DDR_LOOP_NEST (ddr));
588 for (i = 0; VEC_iterate (lambda_vector, DDR_DIST_VECTS (ddr), i, dist_v); i++)
589 {
590 int dist = dist_v[loop_depth];
591
592 if (vect_print_dump_info (REPORT_DR_DETAILS))
593 fprintf (vect_dump, "dependence distance = %d.", dist);
594
595 /* Same loop iteration. */
596 if (dist % vectorization_factor == 0 && dra_size == drb_size)
597 {
598 /* Two references with distance zero have the same alignment. */
599 VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_a), drb);
600 VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_b), dra);
601 if (vect_print_dump_info (REPORT_ALIGNMENT))
602 fprintf (vect_dump, "accesses have the same alignment.");
603 if (vect_print_dump_info (REPORT_DR_DETAILS))
604 {
605 fprintf (vect_dump, "dependence distance modulo vf == 0 between ");
606 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
607 fprintf (vect_dump, " and ");
608 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
609 }
610
611 /* For interleaving, mark that there is a read-write dependency if
612 necessary. We check before that one of the data-refs is store. */
613 if (DR_IS_READ (dra))
614 DR_GROUP_READ_WRITE_DEPENDENCE (stmtinfo_a) = true;
615 else
616 {
617 if (DR_IS_READ (drb))
618 DR_GROUP_READ_WRITE_DEPENDENCE (stmtinfo_b) = true;
619 }
620
621 continue;
622 }
623
624 if (abs (dist) >= vectorization_factor
625 || (dist > 0 && DDR_REVERSED_P (ddr)))
626 {
627 /* Dependence distance does not create dependence, as far as
628 vectorization is concerned, in this case. If DDR_REVERSED_P the
629 order of the data-refs in DDR was reversed (to make distance
630 vector positive), and the actual distance is negative. */
631 if (vect_print_dump_info (REPORT_DR_DETAILS))
632 fprintf (vect_dump, "dependence distance >= VF or negative.");
633 continue;
634 }
635
636 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
637 {
638 fprintf (vect_dump, "not vectorized, possible dependence "
639 "between data-refs ");
640 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
641 fprintf (vect_dump, " and ");
642 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
643 }
644
645 return true;
646 }
647
648 return false;
649 }
650
651 /* Function vect_analyze_data_ref_dependences.
652
653 Examine all the data references in the loop, and make sure there do not
654 exist any data dependences between them. */
655
656 bool
657 vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo,
658 bb_vec_info bb_vinfo)
659 {
660 unsigned int i;
661 VEC (ddr_p, heap) *ddrs = NULL;
662 struct data_dependence_relation *ddr;
663
664 if (vect_print_dump_info (REPORT_DETAILS))
665 fprintf (vect_dump, "=== vect_analyze_dependences ===");
666
667 if (loop_vinfo)
668 ddrs = LOOP_VINFO_DDRS (loop_vinfo);
669 else
670 ddrs = BB_VINFO_DDRS (bb_vinfo);
671
672 for (i = 0; VEC_iterate (ddr_p, ddrs, i, ddr); i++)
673 if (vect_analyze_data_ref_dependence (ddr, loop_vinfo))
674 return false;
675
676 return true;
677 }
678
679
680 /* Function vect_compute_data_ref_alignment
681
682 Compute the misalignment of the data reference DR.
683
684 Output:
685 1. If during the misalignment computation it is found that the data reference
686 cannot be vectorized then false is returned.
687 2. DR_MISALIGNMENT (DR) is defined.
688
689 FOR NOW: No analysis is actually performed. Misalignment is calculated
690 only for trivial cases. TODO. */
691
692 static bool
693 vect_compute_data_ref_alignment (struct data_reference *dr)
694 {
695 gimple stmt = DR_STMT (dr);
696 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
697 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
698 struct loop *loop = NULL;
699 tree ref = DR_REF (dr);
700 tree vectype;
701 tree base, base_addr;
702 bool base_aligned;
703 tree misalign;
704 tree aligned_to, alignment;
705
706 if (vect_print_dump_info (REPORT_DETAILS))
707 fprintf (vect_dump, "vect_compute_data_ref_alignment:");
708
709 if (loop_vinfo)
710 loop = LOOP_VINFO_LOOP (loop_vinfo);
711
712 /* Initialize misalignment to unknown. */
713 SET_DR_MISALIGNMENT (dr, -1);
714
715 misalign = DR_INIT (dr);
716 aligned_to = DR_ALIGNED_TO (dr);
717 base_addr = DR_BASE_ADDRESS (dr);
718 vectype = STMT_VINFO_VECTYPE (stmt_info);
719
720 /* In case the dataref is in an inner-loop of the loop that is being
721 vectorized (LOOP), we use the base and misalignment information
722 relative to the outer-loop (LOOP). This is ok only if the misalignment
723 stays the same throughout the execution of the inner-loop, which is why
724 we have to check that the stride of the dataref in the inner-loop evenly
725 divides by the vector size. */
726 if (loop && nested_in_vect_loop_p (loop, stmt))
727 {
728 tree step = DR_STEP (dr);
729 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
730
731 if (dr_step % GET_MODE_SIZE (TYPE_MODE (vectype)) == 0)
732 {
733 if (vect_print_dump_info (REPORT_ALIGNMENT))
734 fprintf (vect_dump, "inner step divides the vector-size.");
735 misalign = STMT_VINFO_DR_INIT (stmt_info);
736 aligned_to = STMT_VINFO_DR_ALIGNED_TO (stmt_info);
737 base_addr = STMT_VINFO_DR_BASE_ADDRESS (stmt_info);
738 }
739 else
740 {
741 if (vect_print_dump_info (REPORT_ALIGNMENT))
742 fprintf (vect_dump, "inner step doesn't divide the vector-size.");
743 misalign = NULL_TREE;
744 }
745 }
746
747 base = build_fold_indirect_ref (base_addr);
748 alignment = ssize_int (TYPE_ALIGN (vectype)/BITS_PER_UNIT);
749
750 if ((aligned_to && tree_int_cst_compare (aligned_to, alignment) < 0)
751 || !misalign)
752 {
753 if (vect_print_dump_info (REPORT_ALIGNMENT))
754 {
755 fprintf (vect_dump, "Unknown alignment for access: ");
756 print_generic_expr (vect_dump, base, TDF_SLIM);
757 }
758 return true;
759 }
760
761 if ((DECL_P (base)
762 && tree_int_cst_compare (ssize_int (DECL_ALIGN_UNIT (base)),
763 alignment) >= 0)
764 || (TREE_CODE (base_addr) == SSA_NAME
765 && tree_int_cst_compare (ssize_int (TYPE_ALIGN_UNIT (TREE_TYPE (
766 TREE_TYPE (base_addr)))),
767 alignment) >= 0))
768 base_aligned = true;
769 else
770 base_aligned = false;
771
772 if (!base_aligned)
773 {
774 /* Do not change the alignment of global variables if
775 flag_section_anchors is enabled. */
776 if (!vect_can_force_dr_alignment_p (base, TYPE_ALIGN (vectype))
777 || (TREE_STATIC (base) && flag_section_anchors))
778 {
779 if (vect_print_dump_info (REPORT_DETAILS))
780 {
781 fprintf (vect_dump, "can't force alignment of ref: ");
782 print_generic_expr (vect_dump, ref, TDF_SLIM);
783 }
784 return true;
785 }
786
787 /* Force the alignment of the decl.
788 NOTE: This is the only change to the code we make during
789 the analysis phase, before deciding to vectorize the loop. */
790 if (vect_print_dump_info (REPORT_DETAILS))
791 fprintf (vect_dump, "force alignment");
792 DECL_ALIGN (base) = TYPE_ALIGN (vectype);
793 DECL_USER_ALIGN (base) = 1;
794 }
795
796 /* At this point we assume that the base is aligned. */
797 gcc_assert (base_aligned
798 || (TREE_CODE (base) == VAR_DECL
799 && DECL_ALIGN (base) >= TYPE_ALIGN (vectype)));
800
801 /* Modulo alignment. */
802 misalign = size_binop (FLOOR_MOD_EXPR, misalign, alignment);
803
804 if (!host_integerp (misalign, 1))
805 {
806 /* Negative or overflowed misalignment value. */
807 if (vect_print_dump_info (REPORT_DETAILS))
808 fprintf (vect_dump, "unexpected misalign value");
809 return false;
810 }
811
812 SET_DR_MISALIGNMENT (dr, TREE_INT_CST_LOW (misalign));
813
814 if (vect_print_dump_info (REPORT_DETAILS))
815 {
816 fprintf (vect_dump, "misalign = %d bytes of ref ", DR_MISALIGNMENT (dr));
817 print_generic_expr (vect_dump, ref, TDF_SLIM);
818 }
819
820 return true;
821 }
822
823
824 /* Function vect_compute_data_refs_alignment
825
826 Compute the misalignment of data references in the loop.
827 Return FALSE if a data reference is found that cannot be vectorized. */
828
829 static bool
830 vect_compute_data_refs_alignment (loop_vec_info loop_vinfo,
831 bb_vec_info bb_vinfo)
832 {
833 VEC (data_reference_p, heap) *datarefs;
834 struct data_reference *dr;
835 unsigned int i;
836
837 if (loop_vinfo)
838 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
839 else
840 datarefs = BB_VINFO_DATAREFS (bb_vinfo);
841
842 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
843 if (!vect_compute_data_ref_alignment (dr))
844 return false;
845
846 return true;
847 }
848
849
850 /* Function vect_update_misalignment_for_peel
851
852 DR - the data reference whose misalignment is to be adjusted.
853 DR_PEEL - the data reference whose misalignment is being made
854 zero in the vector loop by the peel.
855 NPEEL - the number of iterations in the peel loop if the misalignment
856 of DR_PEEL is known at compile time. */
857
858 static void
859 vect_update_misalignment_for_peel (struct data_reference *dr,
860 struct data_reference *dr_peel, int npeel)
861 {
862 unsigned int i;
863 VEC(dr_p,heap) *same_align_drs;
864 struct data_reference *current_dr;
865 int dr_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr))));
866 int dr_peel_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr_peel))));
867 stmt_vec_info stmt_info = vinfo_for_stmt (DR_STMT (dr));
868 stmt_vec_info peel_stmt_info = vinfo_for_stmt (DR_STMT (dr_peel));
869
870 /* For interleaved data accesses the step in the loop must be multiplied by
871 the size of the interleaving group. */
872 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
873 dr_size *= DR_GROUP_SIZE (vinfo_for_stmt (DR_GROUP_FIRST_DR (stmt_info)));
874 if (STMT_VINFO_STRIDED_ACCESS (peel_stmt_info))
875 dr_peel_size *= DR_GROUP_SIZE (peel_stmt_info);
876
877 /* It can be assumed that the data refs with the same alignment as dr_peel
878 are aligned in the vector loop. */
879 same_align_drs
880 = STMT_VINFO_SAME_ALIGN_REFS (vinfo_for_stmt (DR_STMT (dr_peel)));
881 for (i = 0; VEC_iterate (dr_p, same_align_drs, i, current_dr); i++)
882 {
883 if (current_dr != dr)
884 continue;
885 gcc_assert (DR_MISALIGNMENT (dr) / dr_size ==
886 DR_MISALIGNMENT (dr_peel) / dr_peel_size);
887 SET_DR_MISALIGNMENT (dr, 0);
888 return;
889 }
890
891 if (known_alignment_for_access_p (dr)
892 && known_alignment_for_access_p (dr_peel))
893 {
894 int misal = DR_MISALIGNMENT (dr);
895 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
896 misal += npeel * dr_size;
897 misal %= GET_MODE_SIZE (TYPE_MODE (vectype));
898 SET_DR_MISALIGNMENT (dr, misal);
899 return;
900 }
901
902 if (vect_print_dump_info (REPORT_DETAILS))
903 fprintf (vect_dump, "Setting misalignment to -1.");
904 SET_DR_MISALIGNMENT (dr, -1);
905 }
906
907
908 /* Function vect_verify_datarefs_alignment
909
910 Return TRUE if all data references in the loop can be
911 handled with respect to alignment. */
912
913 bool
914 vect_verify_datarefs_alignment (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
915 {
916 VEC (data_reference_p, heap) *datarefs;
917 struct data_reference *dr;
918 enum dr_alignment_support supportable_dr_alignment;
919 unsigned int i;
920
921 if (loop_vinfo)
922 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
923 else
924 datarefs = BB_VINFO_DATAREFS (bb_vinfo);
925
926 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
927 {
928 gimple stmt = DR_STMT (dr);
929 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
930
931 /* For interleaving, only the alignment of the first access matters. */
932 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
933 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
934 continue;
935
936 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
937 if (!supportable_dr_alignment)
938 {
939 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
940 {
941 if (DR_IS_READ (dr))
942 fprintf (vect_dump,
943 "not vectorized: unsupported unaligned load.");
944 else
945 fprintf (vect_dump,
946 "not vectorized: unsupported unaligned store.");
947 }
948 return false;
949 }
950 if (supportable_dr_alignment != dr_aligned
951 && vect_print_dump_info (REPORT_ALIGNMENT))
952 fprintf (vect_dump, "Vectorizing an unaligned access.");
953 }
954 return true;
955 }
956
957
958 /* Function vector_alignment_reachable_p
959
960 Return true if vector alignment for DR is reachable by peeling
961 a few loop iterations. Return false otherwise. */
962
963 static bool
964 vector_alignment_reachable_p (struct data_reference *dr)
965 {
966 gimple stmt = DR_STMT (dr);
967 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
968 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
969
970 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
971 {
972 /* For interleaved access we peel only if number of iterations in
973 the prolog loop ({VF - misalignment}), is a multiple of the
974 number of the interleaved accesses. */
975 int elem_size, mis_in_elements;
976 int nelements = TYPE_VECTOR_SUBPARTS (vectype);
977
978 /* FORNOW: handle only known alignment. */
979 if (!known_alignment_for_access_p (dr))
980 return false;
981
982 elem_size = GET_MODE_SIZE (TYPE_MODE (vectype)) / nelements;
983 mis_in_elements = DR_MISALIGNMENT (dr) / elem_size;
984
985 if ((nelements - mis_in_elements) % DR_GROUP_SIZE (stmt_info))
986 return false;
987 }
988
989 /* If misalignment is known at the compile time then allow peeling
990 only if natural alignment is reachable through peeling. */
991 if (known_alignment_for_access_p (dr) && !aligned_access_p (dr))
992 {
993 HOST_WIDE_INT elmsize =
994 int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
995 if (vect_print_dump_info (REPORT_DETAILS))
996 {
997 fprintf (vect_dump, "data size =" HOST_WIDE_INT_PRINT_DEC, elmsize);
998 fprintf (vect_dump, ". misalignment = %d. ", DR_MISALIGNMENT (dr));
999 }
1000 if (DR_MISALIGNMENT (dr) % elmsize)
1001 {
1002 if (vect_print_dump_info (REPORT_DETAILS))
1003 fprintf (vect_dump, "data size does not divide the misalignment.\n");
1004 return false;
1005 }
1006 }
1007
1008 if (!known_alignment_for_access_p (dr))
1009 {
1010 tree type = (TREE_TYPE (DR_REF (dr)));
1011 tree ba = DR_BASE_OBJECT (dr);
1012 bool is_packed = false;
1013
1014 if (ba)
1015 is_packed = contains_packed_reference (ba);
1016
1017 if (vect_print_dump_info (REPORT_DETAILS))
1018 fprintf (vect_dump, "Unknown misalignment, is_packed = %d",is_packed);
1019 if (targetm.vectorize.vector_alignment_reachable (type, is_packed))
1020 return true;
1021 else
1022 return false;
1023 }
1024
1025 return true;
1026 }
1027
1028 /* Function vect_enhance_data_refs_alignment
1029
1030 This pass will use loop versioning and loop peeling in order to enhance
1031 the alignment of data references in the loop.
1032
1033 FOR NOW: we assume that whatever versioning/peeling takes place, only the
1034 original loop is to be vectorized; Any other loops that are created by
1035 the transformations performed in this pass - are not supposed to be
1036 vectorized. This restriction will be relaxed.
1037
1038 This pass will require a cost model to guide it whether to apply peeling
1039 or versioning or a combination of the two. For example, the scheme that
1040 intel uses when given a loop with several memory accesses, is as follows:
1041 choose one memory access ('p') which alignment you want to force by doing
1042 peeling. Then, either (1) generate a loop in which 'p' is aligned and all
1043 other accesses are not necessarily aligned, or (2) use loop versioning to
1044 generate one loop in which all accesses are aligned, and another loop in
1045 which only 'p' is necessarily aligned.
1046
1047 ("Automatic Intra-Register Vectorization for the Intel Architecture",
1048 Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
1049 Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
1050
1051 Devising a cost model is the most critical aspect of this work. It will
1052 guide us on which access to peel for, whether to use loop versioning, how
1053 many versions to create, etc. The cost model will probably consist of
1054 generic considerations as well as target specific considerations (on
1055 powerpc for example, misaligned stores are more painful than misaligned
1056 loads).
1057
1058 Here are the general steps involved in alignment enhancements:
1059
1060 -- original loop, before alignment analysis:
1061 for (i=0; i<N; i++){
1062 x = q[i]; # DR_MISALIGNMENT(q) = unknown
1063 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1064 }
1065
1066 -- After vect_compute_data_refs_alignment:
1067 for (i=0; i<N; i++){
1068 x = q[i]; # DR_MISALIGNMENT(q) = 3
1069 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1070 }
1071
1072 -- Possibility 1: we do loop versioning:
1073 if (p is aligned) {
1074 for (i=0; i<N; i++){ # loop 1A
1075 x = q[i]; # DR_MISALIGNMENT(q) = 3
1076 p[i] = y; # DR_MISALIGNMENT(p) = 0
1077 }
1078 }
1079 else {
1080 for (i=0; i<N; i++){ # loop 1B
1081 x = q[i]; # DR_MISALIGNMENT(q) = 3
1082 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1083 }
1084 }
1085
1086 -- Possibility 2: we do loop peeling:
1087 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1088 x = q[i];
1089 p[i] = y;
1090 }
1091 for (i = 3; i < N; i++){ # loop 2A
1092 x = q[i]; # DR_MISALIGNMENT(q) = 0
1093 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1094 }
1095
1096 -- Possibility 3: combination of loop peeling and versioning:
1097 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1098 x = q[i];
1099 p[i] = y;
1100 }
1101 if (p is aligned) {
1102 for (i = 3; i<N; i++){ # loop 3A
1103 x = q[i]; # DR_MISALIGNMENT(q) = 0
1104 p[i] = y; # DR_MISALIGNMENT(p) = 0
1105 }
1106 }
1107 else {
1108 for (i = 3; i<N; i++){ # loop 3B
1109 x = q[i]; # DR_MISALIGNMENT(q) = 0
1110 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1111 }
1112 }
1113
1114 These loops are later passed to loop_transform to be vectorized. The
1115 vectorizer will use the alignment information to guide the transformation
1116 (whether to generate regular loads/stores, or with special handling for
1117 misalignment). */
1118
1119 bool
1120 vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
1121 {
1122 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1123 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1124 enum dr_alignment_support supportable_dr_alignment;
1125 struct data_reference *dr0 = NULL;
1126 struct data_reference *dr;
1127 unsigned int i;
1128 bool do_peeling = false;
1129 bool do_versioning = false;
1130 bool stat;
1131 gimple stmt;
1132 stmt_vec_info stmt_info;
1133 int vect_versioning_for_alias_required;
1134
1135 if (vect_print_dump_info (REPORT_DETAILS))
1136 fprintf (vect_dump, "=== vect_enhance_data_refs_alignment ===");
1137
1138 /* While cost model enhancements are expected in the future, the high level
1139 view of the code at this time is as follows:
1140
1141 A) If there is a misaligned write then see if peeling to align this write
1142 can make all data references satisfy vect_supportable_dr_alignment.
1143 If so, update data structures as needed and return true. Note that
1144 at this time vect_supportable_dr_alignment is known to return false
1145 for a misaligned write.
1146
1147 B) If peeling wasn't possible and there is a data reference with an
1148 unknown misalignment that does not satisfy vect_supportable_dr_alignment
1149 then see if loop versioning checks can be used to make all data
1150 references satisfy vect_supportable_dr_alignment. If so, update
1151 data structures as needed and return true.
1152
1153 C) If neither peeling nor versioning were successful then return false if
1154 any data reference does not satisfy vect_supportable_dr_alignment.
1155
1156 D) Return true (all data references satisfy vect_supportable_dr_alignment).
1157
1158 Note, Possibility 3 above (which is peeling and versioning together) is not
1159 being done at this time. */
1160
1161 /* (1) Peeling to force alignment. */
1162
1163 /* (1.1) Decide whether to perform peeling, and how many iterations to peel:
1164 Considerations:
1165 + How many accesses will become aligned due to the peeling
1166 - How many accesses will become unaligned due to the peeling,
1167 and the cost of misaligned accesses.
1168 - The cost of peeling (the extra runtime checks, the increase
1169 in code size).
1170
1171 The scheme we use FORNOW: peel to force the alignment of the first
1172 misaligned store in the loop.
1173 Rationale: misaligned stores are not yet supported.
1174
1175 TODO: Use a cost model. */
1176
1177 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1178 {
1179 stmt = DR_STMT (dr);
1180 stmt_info = vinfo_for_stmt (stmt);
1181
1182 /* For interleaving, only the alignment of the first access
1183 matters. */
1184 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1185 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1186 continue;
1187
1188 if (!DR_IS_READ (dr) && !aligned_access_p (dr))
1189 {
1190 do_peeling = vector_alignment_reachable_p (dr);
1191 if (do_peeling)
1192 dr0 = dr;
1193 if (!do_peeling && vect_print_dump_info (REPORT_DETAILS))
1194 fprintf (vect_dump, "vector alignment may not be reachable");
1195 break;
1196 }
1197 }
1198
1199 vect_versioning_for_alias_required =
1200 (VEC_length (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo)) > 0);
1201
1202 /* Temporarily, if versioning for alias is required, we disable peeling
1203 until we support peeling and versioning. Often peeling for alignment
1204 will require peeling for loop-bound, which in turn requires that we
1205 know how to adjust the loop ivs after the loop. */
1206 if (vect_versioning_for_alias_required
1207 || !vect_can_advance_ivs_p (loop_vinfo)
1208 || !slpeel_can_duplicate_loop_p (loop, single_exit (loop)))
1209 do_peeling = false;
1210
1211 if (do_peeling)
1212 {
1213 int mis;
1214 int npeel = 0;
1215 gimple stmt = DR_STMT (dr0);
1216 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1217 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1218 int nelements = TYPE_VECTOR_SUBPARTS (vectype);
1219
1220 if (known_alignment_for_access_p (dr0))
1221 {
1222 /* Since it's known at compile time, compute the number of iterations
1223 in the peeled loop (the peeling factor) for use in updating
1224 DR_MISALIGNMENT values. The peeling factor is the vectorization
1225 factor minus the misalignment as an element count. */
1226 mis = DR_MISALIGNMENT (dr0);
1227 mis /= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr0))));
1228 npeel = nelements - mis;
1229
1230 /* For interleaved data access every iteration accesses all the
1231 members of the group, therefore we divide the number of iterations
1232 by the group size. */
1233 stmt_info = vinfo_for_stmt (DR_STMT (dr0));
1234 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
1235 npeel /= DR_GROUP_SIZE (stmt_info);
1236
1237 if (vect_print_dump_info (REPORT_DETAILS))
1238 fprintf (vect_dump, "Try peeling by %d", npeel);
1239 }
1240
1241 /* Ensure that all data refs can be vectorized after the peel. */
1242 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1243 {
1244 int save_misalignment;
1245
1246 if (dr == dr0)
1247 continue;
1248
1249 stmt = DR_STMT (dr);
1250 stmt_info = vinfo_for_stmt (stmt);
1251 /* For interleaving, only the alignment of the first access
1252 matters. */
1253 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1254 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1255 continue;
1256
1257 save_misalignment = DR_MISALIGNMENT (dr);
1258 vect_update_misalignment_for_peel (dr, dr0, npeel);
1259 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
1260 SET_DR_MISALIGNMENT (dr, save_misalignment);
1261
1262 if (!supportable_dr_alignment)
1263 {
1264 do_peeling = false;
1265 break;
1266 }
1267 }
1268
1269 if (do_peeling)
1270 {
1271 /* (1.2) Update the DR_MISALIGNMENT of each data reference DR_i.
1272 If the misalignment of DR_i is identical to that of dr0 then set
1273 DR_MISALIGNMENT (DR_i) to zero. If the misalignment of DR_i and
1274 dr0 are known at compile time then increment DR_MISALIGNMENT (DR_i)
1275 by the peeling factor times the element size of DR_i (MOD the
1276 vectorization factor times the size). Otherwise, the
1277 misalignment of DR_i must be set to unknown. */
1278 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1279 if (dr != dr0)
1280 vect_update_misalignment_for_peel (dr, dr0, npeel);
1281
1282 LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr0;
1283 LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) = DR_MISALIGNMENT (dr0);
1284 SET_DR_MISALIGNMENT (dr0, 0);
1285 if (vect_print_dump_info (REPORT_ALIGNMENT))
1286 fprintf (vect_dump, "Alignment of access forced using peeling.");
1287
1288 if (vect_print_dump_info (REPORT_DETAILS))
1289 fprintf (vect_dump, "Peeling for alignment will be applied.");
1290
1291 stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
1292 gcc_assert (stat);
1293 return stat;
1294 }
1295 }
1296
1297
1298 /* (2) Versioning to force alignment. */
1299
1300 /* Try versioning if:
1301 1) flag_tree_vect_loop_version is TRUE
1302 2) optimize loop for speed
1303 3) there is at least one unsupported misaligned data ref with an unknown
1304 misalignment, and
1305 4) all misaligned data refs with a known misalignment are supported, and
1306 5) the number of runtime alignment checks is within reason. */
1307
1308 do_versioning =
1309 flag_tree_vect_loop_version
1310 && optimize_loop_nest_for_speed_p (loop)
1311 && (!loop->inner); /* FORNOW */
1312
1313 if (do_versioning)
1314 {
1315 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1316 {
1317 stmt = DR_STMT (dr);
1318 stmt_info = vinfo_for_stmt (stmt);
1319
1320 /* For interleaving, only the alignment of the first access
1321 matters. */
1322 if (aligned_access_p (dr)
1323 || (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1324 && DR_GROUP_FIRST_DR (stmt_info) != stmt))
1325 continue;
1326
1327 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
1328
1329 if (!supportable_dr_alignment)
1330 {
1331 gimple stmt;
1332 int mask;
1333 tree vectype;
1334
1335 if (known_alignment_for_access_p (dr)
1336 || VEC_length (gimple,
1337 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo))
1338 >= (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIGNMENT_CHECKS))
1339 {
1340 do_versioning = false;
1341 break;
1342 }
1343
1344 stmt = DR_STMT (dr);
1345 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1346 gcc_assert (vectype);
1347
1348 /* The rightmost bits of an aligned address must be zeros.
1349 Construct the mask needed for this test. For example,
1350 GET_MODE_SIZE for the vector mode V4SI is 16 bytes so the
1351 mask must be 15 = 0xf. */
1352 mask = GET_MODE_SIZE (TYPE_MODE (vectype)) - 1;
1353
1354 /* FORNOW: use the same mask to test all potentially unaligned
1355 references in the loop. The vectorizer currently supports
1356 a single vector size, see the reference to
1357 GET_MODE_NUNITS (TYPE_MODE (vectype)) where the
1358 vectorization factor is computed. */
1359 gcc_assert (!LOOP_VINFO_PTR_MASK (loop_vinfo)
1360 || LOOP_VINFO_PTR_MASK (loop_vinfo) == mask);
1361 LOOP_VINFO_PTR_MASK (loop_vinfo) = mask;
1362 VEC_safe_push (gimple, heap,
1363 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo),
1364 DR_STMT (dr));
1365 }
1366 }
1367
1368 /* Versioning requires at least one misaligned data reference. */
1369 if (VEC_length (gimple, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo)) == 0)
1370 do_versioning = false;
1371 else if (!do_versioning)
1372 VEC_truncate (gimple, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo), 0);
1373 }
1374
1375 if (do_versioning)
1376 {
1377 VEC(gimple,heap) *may_misalign_stmts
1378 = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
1379 gimple stmt;
1380
1381 /* It can now be assumed that the data references in the statements
1382 in LOOP_VINFO_MAY_MISALIGN_STMTS will be aligned in the version
1383 of the loop being vectorized. */
1384 for (i = 0; VEC_iterate (gimple, may_misalign_stmts, i, stmt); i++)
1385 {
1386 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1387 dr = STMT_VINFO_DATA_REF (stmt_info);
1388 SET_DR_MISALIGNMENT (dr, 0);
1389 if (vect_print_dump_info (REPORT_ALIGNMENT))
1390 fprintf (vect_dump, "Alignment of access forced using versioning.");
1391 }
1392
1393 if (vect_print_dump_info (REPORT_DETAILS))
1394 fprintf (vect_dump, "Versioning for alignment will be applied.");
1395
1396 /* Peeling and versioning can't be done together at this time. */
1397 gcc_assert (! (do_peeling && do_versioning));
1398
1399 stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
1400 gcc_assert (stat);
1401 return stat;
1402 }
1403
1404 /* This point is reached if neither peeling nor versioning is being done. */
1405 gcc_assert (! (do_peeling || do_versioning));
1406
1407 stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
1408 return stat;
1409 }
1410
1411
1412 /* Function vect_analyze_data_refs_alignment
1413
1414 Analyze the alignment of the data-references in the loop.
1415 Return FALSE if a data reference is found that cannot be vectorized. */
1416
1417 bool
1418 vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo,
1419 bb_vec_info bb_vinfo)
1420 {
1421 if (vect_print_dump_info (REPORT_DETAILS))
1422 fprintf (vect_dump, "=== vect_analyze_data_refs_alignment ===");
1423
1424 if (!vect_compute_data_refs_alignment (loop_vinfo, bb_vinfo))
1425 {
1426 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1427 fprintf (vect_dump,
1428 "not vectorized: can't calculate alignment for data ref.");
1429 return false;
1430 }
1431
1432 return true;
1433 }
1434
1435
1436 /* Analyze groups of strided accesses: check that DR belongs to a group of
1437 strided accesses of legal size, step, etc. Detect gaps, single element
1438 interleaving, and other special cases. Set strided access info.
1439 Collect groups of strided stores for further use in SLP analysis. */
1440
1441 static bool
1442 vect_analyze_group_access (struct data_reference *dr)
1443 {
1444 tree step = DR_STEP (dr);
1445 tree scalar_type = TREE_TYPE (DR_REF (dr));
1446 HOST_WIDE_INT type_size = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type));
1447 gimple stmt = DR_STMT (dr);
1448 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1449 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1450 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
1451 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
1452 HOST_WIDE_INT stride;
1453 bool slp_impossible = false;
1454
1455 /* For interleaving, STRIDE is STEP counted in elements, i.e., the size of the
1456 interleaving group (including gaps). */
1457 stride = dr_step / type_size;
1458
1459 /* Not consecutive access is possible only if it is a part of interleaving. */
1460 if (!DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)))
1461 {
1462 /* Check if it this DR is a part of interleaving, and is a single
1463 element of the group that is accessed in the loop. */
1464
1465 /* Gaps are supported only for loads. STEP must be a multiple of the type
1466 size. The size of the group must be a power of 2. */
1467 if (DR_IS_READ (dr)
1468 && (dr_step % type_size) == 0
1469 && stride > 0
1470 && exact_log2 (stride) != -1)
1471 {
1472 DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = stmt;
1473 DR_GROUP_SIZE (vinfo_for_stmt (stmt)) = stride;
1474 if (vect_print_dump_info (REPORT_DR_DETAILS))
1475 {
1476 fprintf (vect_dump, "Detected single element interleaving ");
1477 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
1478 fprintf (vect_dump, " step ");
1479 print_generic_expr (vect_dump, step, TDF_SLIM);
1480 }
1481 return true;
1482 }
1483 if (vect_print_dump_info (REPORT_DETAILS))
1484 fprintf (vect_dump, "not consecutive access");
1485 return false;
1486 }
1487
1488 if (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) == stmt)
1489 {
1490 /* First stmt in the interleaving chain. Check the chain. */
1491 gimple next = DR_GROUP_NEXT_DR (vinfo_for_stmt (stmt));
1492 struct data_reference *data_ref = dr;
1493 unsigned int count = 1;
1494 tree next_step;
1495 tree prev_init = DR_INIT (data_ref);
1496 gimple prev = stmt;
1497 HOST_WIDE_INT diff, count_in_bytes, gaps = 0;
1498
1499 while (next)
1500 {
1501 /* Skip same data-refs. In case that two or more stmts share data-ref
1502 (supported only for loads), we vectorize only the first stmt, and
1503 the rest get their vectorized loads from the first one. */
1504 if (!tree_int_cst_compare (DR_INIT (data_ref),
1505 DR_INIT (STMT_VINFO_DATA_REF (
1506 vinfo_for_stmt (next)))))
1507 {
1508 if (!DR_IS_READ (data_ref))
1509 {
1510 if (vect_print_dump_info (REPORT_DETAILS))
1511 fprintf (vect_dump, "Two store stmts share the same dr.");
1512 return false;
1513 }
1514
1515 /* Check that there is no load-store dependencies for this loads
1516 to prevent a case of load-store-load to the same location. */
1517 if (DR_GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (next))
1518 || DR_GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (prev)))
1519 {
1520 if (vect_print_dump_info (REPORT_DETAILS))
1521 fprintf (vect_dump,
1522 "READ_WRITE dependence in interleaving.");
1523 return false;
1524 }
1525
1526 /* For load use the same data-ref load. */
1527 DR_GROUP_SAME_DR_STMT (vinfo_for_stmt (next)) = prev;
1528
1529 prev = next;
1530 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
1531 continue;
1532 }
1533 prev = next;
1534
1535 /* Check that all the accesses have the same STEP. */
1536 next_step = DR_STEP (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
1537 if (tree_int_cst_compare (step, next_step))
1538 {
1539 if (vect_print_dump_info (REPORT_DETAILS))
1540 fprintf (vect_dump, "not consecutive access in interleaving");
1541 return false;
1542 }
1543
1544 data_ref = STMT_VINFO_DATA_REF (vinfo_for_stmt (next));
1545 /* Check that the distance between two accesses is equal to the type
1546 size. Otherwise, we have gaps. */
1547 diff = (TREE_INT_CST_LOW (DR_INIT (data_ref))
1548 - TREE_INT_CST_LOW (prev_init)) / type_size;
1549 if (diff != 1)
1550 {
1551 /* FORNOW: SLP of accesses with gaps is not supported. */
1552 slp_impossible = true;
1553 if (!DR_IS_READ (data_ref))
1554 {
1555 if (vect_print_dump_info (REPORT_DETAILS))
1556 fprintf (vect_dump, "interleaved store with gaps");
1557 return false;
1558 }
1559
1560 gaps += diff - 1;
1561 }
1562
1563 /* Store the gap from the previous member of the group. If there is no
1564 gap in the access, DR_GROUP_GAP is always 1. */
1565 DR_GROUP_GAP (vinfo_for_stmt (next)) = diff;
1566
1567 prev_init = DR_INIT (data_ref);
1568 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
1569 /* Count the number of data-refs in the chain. */
1570 count++;
1571 }
1572
1573 /* COUNT is the number of accesses found, we multiply it by the size of
1574 the type to get COUNT_IN_BYTES. */
1575 count_in_bytes = type_size * count;
1576
1577 /* Check that the size of the interleaving (including gaps) is not
1578 greater than STEP. */
1579 if (dr_step && dr_step < count_in_bytes + gaps * type_size)
1580 {
1581 if (vect_print_dump_info (REPORT_DETAILS))
1582 {
1583 fprintf (vect_dump, "interleaving size is greater than step for ");
1584 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
1585 }
1586 return false;
1587 }
1588
1589 /* Check that the size of the interleaving is equal to STEP for stores,
1590 i.e., that there are no gaps. */
1591 if (dr_step && dr_step != count_in_bytes)
1592 {
1593 if (DR_IS_READ (dr))
1594 {
1595 slp_impossible = true;
1596 /* There is a gap after the last load in the group. This gap is a
1597 difference between the stride and the number of elements. When
1598 there is no gap, this difference should be 0. */
1599 DR_GROUP_GAP (vinfo_for_stmt (stmt)) = stride - count;
1600 }
1601 else
1602 {
1603 if (vect_print_dump_info (REPORT_DETAILS))
1604 fprintf (vect_dump, "interleaved store with gaps");
1605 return false;
1606 }
1607 }
1608
1609 /* Check that STEP is a multiple of type size. */
1610 if (dr_step && (dr_step % type_size) != 0)
1611 {
1612 if (vect_print_dump_info (REPORT_DETAILS))
1613 {
1614 fprintf (vect_dump, "step is not a multiple of type size: step ");
1615 print_generic_expr (vect_dump, step, TDF_SLIM);
1616 fprintf (vect_dump, " size ");
1617 print_generic_expr (vect_dump, TYPE_SIZE_UNIT (scalar_type),
1618 TDF_SLIM);
1619 }
1620 return false;
1621 }
1622
1623 /* FORNOW: we handle only interleaving that is a power of 2.
1624 We don't fail here if it may be still possible to vectorize the
1625 group using SLP. If not, the size of the group will be checked in
1626 vect_analyze_operations, and the vectorization will fail. */
1627 if (exact_log2 (stride) == -1)
1628 {
1629 if (vect_print_dump_info (REPORT_DETAILS))
1630 fprintf (vect_dump, "interleaving is not a power of 2");
1631
1632 if (slp_impossible)
1633 return false;
1634 }
1635
1636 if (stride == 0)
1637 stride = count;
1638
1639 DR_GROUP_SIZE (vinfo_for_stmt (stmt)) = stride;
1640 if (vect_print_dump_info (REPORT_DETAILS))
1641 fprintf (vect_dump, "Detected interleaving of size %d", (int)stride);
1642
1643 /* SLP: create an SLP data structure for every interleaving group of
1644 stores for further analysis in vect_analyse_slp. */
1645 if (!DR_IS_READ (dr) && !slp_impossible)
1646 {
1647 if (loop_vinfo)
1648 VEC_safe_push (gimple, heap, LOOP_VINFO_STRIDED_STORES (loop_vinfo),
1649 stmt);
1650 if (bb_vinfo)
1651 VEC_safe_push (gimple, heap, BB_VINFO_STRIDED_STORES (bb_vinfo),
1652 stmt);
1653 }
1654 }
1655
1656 return true;
1657 }
1658
1659
1660 /* Analyze the access pattern of the data-reference DR.
1661 In case of non-consecutive accesses call vect_analyze_group_access() to
1662 analyze groups of strided accesses. */
1663
1664 static bool
1665 vect_analyze_data_ref_access (struct data_reference *dr)
1666 {
1667 tree step = DR_STEP (dr);
1668 tree scalar_type = TREE_TYPE (DR_REF (dr));
1669 gimple stmt = DR_STMT (dr);
1670 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1671 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1672 struct loop *loop = NULL;
1673 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
1674
1675 if (loop_vinfo)
1676 loop = LOOP_VINFO_LOOP (loop_vinfo);
1677
1678 if (loop_vinfo && !step)
1679 {
1680 if (vect_print_dump_info (REPORT_DETAILS))
1681 fprintf (vect_dump, "bad data-ref access in loop");
1682 return false;
1683 }
1684
1685 /* Don't allow invariant accesses in loops. */
1686 if (loop_vinfo && dr_step == 0)
1687 return false;
1688
1689 if (loop && nested_in_vect_loop_p (loop, stmt))
1690 {
1691 /* Interleaved accesses are not yet supported within outer-loop
1692 vectorization for references in the inner-loop. */
1693 DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = NULL;
1694
1695 /* For the rest of the analysis we use the outer-loop step. */
1696 step = STMT_VINFO_DR_STEP (stmt_info);
1697 dr_step = TREE_INT_CST_LOW (step);
1698
1699 if (dr_step == 0)
1700 {
1701 if (vect_print_dump_info (REPORT_ALIGNMENT))
1702 fprintf (vect_dump, "zero step in outer loop.");
1703 if (DR_IS_READ (dr))
1704 return true;
1705 else
1706 return false;
1707 }
1708 }
1709
1710 /* Consecutive? */
1711 if (!tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type)))
1712 {
1713 /* Mark that it is not interleaving. */
1714 DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = NULL;
1715 return true;
1716 }
1717
1718 if (loop && nested_in_vect_loop_p (loop, stmt))
1719 {
1720 if (vect_print_dump_info (REPORT_ALIGNMENT))
1721 fprintf (vect_dump, "strided access in outer loop.");
1722 return false;
1723 }
1724
1725 /* Not consecutive access - check if it's a part of interleaving group. */
1726 return vect_analyze_group_access (dr);
1727 }
1728
1729
1730 /* Function vect_analyze_data_ref_accesses.
1731
1732 Analyze the access pattern of all the data references in the loop.
1733
1734 FORNOW: the only access pattern that is considered vectorizable is a
1735 simple step 1 (consecutive) access.
1736
1737 FORNOW: handle only arrays and pointer accesses. */
1738
1739 bool
1740 vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
1741 {
1742 unsigned int i;
1743 VEC (data_reference_p, heap) *datarefs;
1744 struct data_reference *dr;
1745
1746 if (vect_print_dump_info (REPORT_DETAILS))
1747 fprintf (vect_dump, "=== vect_analyze_data_ref_accesses ===");
1748
1749 if (loop_vinfo)
1750 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1751 else
1752 datarefs = BB_VINFO_DATAREFS (bb_vinfo);
1753
1754 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1755 if (!vect_analyze_data_ref_access (dr))
1756 {
1757 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1758 fprintf (vect_dump, "not vectorized: complicated access pattern.");
1759 return false;
1760 }
1761
1762 return true;
1763 }
1764
1765 /* Function vect_prune_runtime_alias_test_list.
1766
1767 Prune a list of ddrs to be tested at run-time by versioning for alias.
1768 Return FALSE if resulting list of ddrs is longer then allowed by
1769 PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS, otherwise return TRUE. */
1770
1771 bool
1772 vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo)
1773 {
1774 VEC (ddr_p, heap) * ddrs =
1775 LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo);
1776 unsigned i, j;
1777
1778 if (vect_print_dump_info (REPORT_DETAILS))
1779 fprintf (vect_dump, "=== vect_prune_runtime_alias_test_list ===");
1780
1781 for (i = 0; i < VEC_length (ddr_p, ddrs); )
1782 {
1783 bool found;
1784 ddr_p ddr_i;
1785
1786 ddr_i = VEC_index (ddr_p, ddrs, i);
1787 found = false;
1788
1789 for (j = 0; j < i; j++)
1790 {
1791 ddr_p ddr_j = VEC_index (ddr_p, ddrs, j);
1792
1793 if (vect_vfa_range_equal (ddr_i, ddr_j))
1794 {
1795 if (vect_print_dump_info (REPORT_DR_DETAILS))
1796 {
1797 fprintf (vect_dump, "found equal ranges ");
1798 print_generic_expr (vect_dump, DR_REF (DDR_A (ddr_i)), TDF_SLIM);
1799 fprintf (vect_dump, ", ");
1800 print_generic_expr (vect_dump, DR_REF (DDR_B (ddr_i)), TDF_SLIM);
1801 fprintf (vect_dump, " and ");
1802 print_generic_expr (vect_dump, DR_REF (DDR_A (ddr_j)), TDF_SLIM);
1803 fprintf (vect_dump, ", ");
1804 print_generic_expr (vect_dump, DR_REF (DDR_B (ddr_j)), TDF_SLIM);
1805 }
1806 found = true;
1807 break;
1808 }
1809 }
1810
1811 if (found)
1812 {
1813 VEC_ordered_remove (ddr_p, ddrs, i);
1814 continue;
1815 }
1816 i++;
1817 }
1818
1819 if (VEC_length (ddr_p, ddrs) >
1820 (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS))
1821 {
1822 if (vect_print_dump_info (REPORT_DR_DETAILS))
1823 {
1824 fprintf (vect_dump,
1825 "disable versioning for alias - max number of generated "
1826 "checks exceeded.");
1827 }
1828
1829 VEC_truncate (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo), 0);
1830
1831 return false;
1832 }
1833
1834 return true;
1835 }
1836
1837
1838 /* Function vect_analyze_data_refs.
1839
1840 Find all the data references in the loop or basic block.
1841
1842 The general structure of the analysis of data refs in the vectorizer is as
1843 follows:
1844 1- vect_analyze_data_refs(loop/bb): call
1845 compute_data_dependences_for_loop/bb to find and analyze all data-refs
1846 in the loop/bb and their dependences.
1847 2- vect_analyze_dependences(): apply dependence testing using ddrs.
1848 3- vect_analyze_drs_alignment(): check that ref_stmt.alignment is ok.
1849 4- vect_analyze_drs_access(): check that ref_stmt.step is ok.
1850
1851 */
1852
1853 bool
1854 vect_analyze_data_refs (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
1855 {
1856 struct loop *loop = NULL;
1857 basic_block bb = NULL;
1858 unsigned int i;
1859 VEC (data_reference_p, heap) *datarefs;
1860 struct data_reference *dr;
1861 tree scalar_type;
1862
1863 if (vect_print_dump_info (REPORT_DETAILS))
1864 fprintf (vect_dump, "=== vect_analyze_data_refs ===\n");
1865
1866 if (loop_vinfo)
1867 {
1868 loop = LOOP_VINFO_LOOP (loop_vinfo);
1869 compute_data_dependences_for_loop (loop, true,
1870 &LOOP_VINFO_DATAREFS (loop_vinfo),
1871 &LOOP_VINFO_DDRS (loop_vinfo));
1872 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1873 }
1874 else
1875 {
1876 bb = BB_VINFO_BB (bb_vinfo);
1877 compute_data_dependences_for_bb (bb, true,
1878 &BB_VINFO_DATAREFS (bb_vinfo),
1879 &BB_VINFO_DDRS (bb_vinfo));
1880 datarefs = BB_VINFO_DATAREFS (bb_vinfo);
1881 }
1882
1883 /* Go through the data-refs, check that the analysis succeeded. Update pointer
1884 from stmt_vec_info struct to DR and vectype. */
1885
1886 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1887 {
1888 gimple stmt;
1889 stmt_vec_info stmt_info;
1890 basic_block bb;
1891 tree base, offset, init;
1892
1893 if (!dr || !DR_REF (dr))
1894 {
1895 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1896 fprintf (vect_dump, "not vectorized: unhandled data-ref ");
1897 return false;
1898 }
1899
1900 stmt = DR_STMT (dr);
1901 stmt_info = vinfo_for_stmt (stmt);
1902
1903 /* Check that analysis of the data-ref succeeded. */
1904 if (!DR_BASE_ADDRESS (dr) || !DR_OFFSET (dr) || !DR_INIT (dr)
1905 || !DR_STEP (dr))
1906 {
1907 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1908 {
1909 fprintf (vect_dump, "not vectorized: data ref analysis failed ");
1910 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
1911 }
1912 return false;
1913 }
1914
1915 if (TREE_CODE (DR_BASE_ADDRESS (dr)) == INTEGER_CST)
1916 {
1917 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1918 fprintf (vect_dump, "not vectorized: base addr of dr is a "
1919 "constant");
1920 return false;
1921 }
1922
1923 base = unshare_expr (DR_BASE_ADDRESS (dr));
1924 offset = unshare_expr (DR_OFFSET (dr));
1925 init = unshare_expr (DR_INIT (dr));
1926
1927 /* Update DR field in stmt_vec_info struct. */
1928 bb = gimple_bb (stmt);
1929
1930 /* If the dataref is in an inner-loop of the loop that is considered for
1931 for vectorization, we also want to analyze the access relative to
1932 the outer-loop (DR contains information only relative to the
1933 inner-most enclosing loop). We do that by building a reference to the
1934 first location accessed by the inner-loop, and analyze it relative to
1935 the outer-loop. */
1936 if (loop && nested_in_vect_loop_p (loop, stmt))
1937 {
1938 tree outer_step, outer_base, outer_init;
1939 HOST_WIDE_INT pbitsize, pbitpos;
1940 tree poffset;
1941 enum machine_mode pmode;
1942 int punsignedp, pvolatilep;
1943 affine_iv base_iv, offset_iv;
1944 tree dinit;
1945
1946 /* Build a reference to the first location accessed by the
1947 inner-loop: *(BASE+INIT). (The first location is actually
1948 BASE+INIT+OFFSET, but we add OFFSET separately later). */
1949 tree inner_base = build_fold_indirect_ref
1950 (fold_build2 (POINTER_PLUS_EXPR,
1951 TREE_TYPE (base), base,
1952 fold_convert (sizetype, init)));
1953
1954 if (vect_print_dump_info (REPORT_DETAILS))
1955 {
1956 fprintf (vect_dump, "analyze in outer-loop: ");
1957 print_generic_expr (vect_dump, inner_base, TDF_SLIM);
1958 }
1959
1960 outer_base = get_inner_reference (inner_base, &pbitsize, &pbitpos,
1961 &poffset, &pmode, &punsignedp, &pvolatilep, false);
1962 gcc_assert (outer_base != NULL_TREE);
1963
1964 if (pbitpos % BITS_PER_UNIT != 0)
1965 {
1966 if (vect_print_dump_info (REPORT_DETAILS))
1967 fprintf (vect_dump, "failed: bit offset alignment.\n");
1968 return false;
1969 }
1970
1971 outer_base = build_fold_addr_expr (outer_base);
1972 if (!simple_iv (loop, loop_containing_stmt (stmt), outer_base,
1973 &base_iv, false))
1974 {
1975 if (vect_print_dump_info (REPORT_DETAILS))
1976 fprintf (vect_dump, "failed: evolution of base is not affine.\n");
1977 return false;
1978 }
1979
1980 if (offset)
1981 {
1982 if (poffset)
1983 poffset = fold_build2 (PLUS_EXPR, TREE_TYPE (offset), offset,
1984 poffset);
1985 else
1986 poffset = offset;
1987 }
1988
1989 if (!poffset)
1990 {
1991 offset_iv.base = ssize_int (0);
1992 offset_iv.step = ssize_int (0);
1993 }
1994 else if (!simple_iv (loop, loop_containing_stmt (stmt), poffset,
1995 &offset_iv, false))
1996 {
1997 if (vect_print_dump_info (REPORT_DETAILS))
1998 fprintf (vect_dump, "evolution of offset is not affine.\n");
1999 return false;
2000 }
2001
2002 outer_init = ssize_int (pbitpos / BITS_PER_UNIT);
2003 split_constant_offset (base_iv.base, &base_iv.base, &dinit);
2004 outer_init = size_binop (PLUS_EXPR, outer_init, dinit);
2005 split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
2006 outer_init = size_binop (PLUS_EXPR, outer_init, dinit);
2007
2008 outer_step = size_binop (PLUS_EXPR,
2009 fold_convert (ssizetype, base_iv.step),
2010 fold_convert (ssizetype, offset_iv.step));
2011
2012 STMT_VINFO_DR_STEP (stmt_info) = outer_step;
2013 /* FIXME: Use canonicalize_base_object_address (base_iv.base); */
2014 STMT_VINFO_DR_BASE_ADDRESS (stmt_info) = base_iv.base;
2015 STMT_VINFO_DR_INIT (stmt_info) = outer_init;
2016 STMT_VINFO_DR_OFFSET (stmt_info) =
2017 fold_convert (ssizetype, offset_iv.base);
2018 STMT_VINFO_DR_ALIGNED_TO (stmt_info) =
2019 size_int (highest_pow2_factor (offset_iv.base));
2020
2021 if (vect_print_dump_info (REPORT_DETAILS))
2022 {
2023 fprintf (vect_dump, "\touter base_address: ");
2024 print_generic_expr (vect_dump, STMT_VINFO_DR_BASE_ADDRESS (stmt_info), TDF_SLIM);
2025 fprintf (vect_dump, "\n\touter offset from base address: ");
2026 print_generic_expr (vect_dump, STMT_VINFO_DR_OFFSET (stmt_info), TDF_SLIM);
2027 fprintf (vect_dump, "\n\touter constant offset from base address: ");
2028 print_generic_expr (vect_dump, STMT_VINFO_DR_INIT (stmt_info), TDF_SLIM);
2029 fprintf (vect_dump, "\n\touter step: ");
2030 print_generic_expr (vect_dump, STMT_VINFO_DR_STEP (stmt_info), TDF_SLIM);
2031 fprintf (vect_dump, "\n\touter aligned to: ");
2032 print_generic_expr (vect_dump, STMT_VINFO_DR_ALIGNED_TO (stmt_info), TDF_SLIM);
2033 }
2034 }
2035
2036 if (STMT_VINFO_DATA_REF (stmt_info))
2037 {
2038 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2039 {
2040 fprintf (vect_dump,
2041 "not vectorized: more than one data ref in stmt: ");
2042 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2043 }
2044 return false;
2045 }
2046
2047 STMT_VINFO_DATA_REF (stmt_info) = dr;
2048
2049 /* Set vectype for STMT. */
2050 scalar_type = TREE_TYPE (DR_REF (dr));
2051 STMT_VINFO_VECTYPE (stmt_info) =
2052 get_vectype_for_scalar_type (scalar_type);
2053 if (!STMT_VINFO_VECTYPE (stmt_info))
2054 {
2055 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2056 {
2057 fprintf (vect_dump,
2058 "not vectorized: no vectype for stmt: ");
2059 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2060 fprintf (vect_dump, " scalar_type: ");
2061 print_generic_expr (vect_dump, scalar_type, TDF_DETAILS);
2062 }
2063 return false;
2064 }
2065 }
2066
2067 return true;
2068 }
2069
2070
2071 /* Function vect_get_new_vect_var.
2072
2073 Returns a name for a new variable. The current naming scheme appends the
2074 prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to
2075 the name of vectorizer generated variables, and appends that to NAME if
2076 provided. */
2077
2078 tree
2079 vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name)
2080 {
2081 const char *prefix;
2082 tree new_vect_var;
2083
2084 switch (var_kind)
2085 {
2086 case vect_simple_var:
2087 prefix = "vect_";
2088 break;
2089 case vect_scalar_var:
2090 prefix = "stmp_";
2091 break;
2092 case vect_pointer_var:
2093 prefix = "vect_p";
2094 break;
2095 default:
2096 gcc_unreachable ();
2097 }
2098
2099 if (name)
2100 {
2101 char* tmp = concat (prefix, name, NULL);
2102 new_vect_var = create_tmp_var (type, tmp);
2103 free (tmp);
2104 }
2105 else
2106 new_vect_var = create_tmp_var (type, prefix);
2107
2108 /* Mark vector typed variable as a gimple register variable. */
2109 if (TREE_CODE (type) == VECTOR_TYPE)
2110 DECL_GIMPLE_REG_P (new_vect_var) = true;
2111
2112 return new_vect_var;
2113 }
2114
2115
2116 /* Function vect_create_addr_base_for_vector_ref.
2117
2118 Create an expression that computes the address of the first memory location
2119 that will be accessed for a data reference.
2120
2121 Input:
2122 STMT: The statement containing the data reference.
2123 NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list.
2124 OFFSET: Optional. If supplied, it is be added to the initial address.
2125 LOOP: Specify relative to which loop-nest should the address be computed.
2126 For example, when the dataref is in an inner-loop nested in an
2127 outer-loop that is now being vectorized, LOOP can be either the
2128 outer-loop, or the inner-loop. The first memory location accessed
2129 by the following dataref ('in' points to short):
2130
2131 for (i=0; i<N; i++)
2132 for (j=0; j<M; j++)
2133 s += in[i+j]
2134
2135 is as follows:
2136 if LOOP=i_loop: &in (relative to i_loop)
2137 if LOOP=j_loop: &in+i*2B (relative to j_loop)
2138
2139 Output:
2140 1. Return an SSA_NAME whose value is the address of the memory location of
2141 the first vector of the data reference.
2142 2. If new_stmt_list is not NULL_TREE after return then the caller must insert
2143 these statement(s) which define the returned SSA_NAME.
2144
2145 FORNOW: We are only handling array accesses with step 1. */
2146
2147 tree
2148 vect_create_addr_base_for_vector_ref (gimple stmt,
2149 gimple_seq *new_stmt_list,
2150 tree offset,
2151 struct loop *loop)
2152 {
2153 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2154 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2155 tree data_ref_base = unshare_expr (DR_BASE_ADDRESS (dr));
2156 tree base_name;
2157 tree data_ref_base_var;
2158 tree vec_stmt;
2159 tree addr_base, addr_expr;
2160 tree dest;
2161 gimple_seq seq = NULL;
2162 tree base_offset = unshare_expr (DR_OFFSET (dr));
2163 tree init = unshare_expr (DR_INIT (dr));
2164 tree vect_ptr_type;
2165 tree step = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr)));
2166 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2167
2168 if (loop_vinfo && loop && loop != (gimple_bb (stmt))->loop_father)
2169 {
2170 struct loop *outer_loop = LOOP_VINFO_LOOP (loop_vinfo);
2171
2172 gcc_assert (nested_in_vect_loop_p (outer_loop, stmt));
2173
2174 data_ref_base = unshare_expr (STMT_VINFO_DR_BASE_ADDRESS (stmt_info));
2175 base_offset = unshare_expr (STMT_VINFO_DR_OFFSET (stmt_info));
2176 init = unshare_expr (STMT_VINFO_DR_INIT (stmt_info));
2177 }
2178
2179 if (loop_vinfo)
2180 base_name = build_fold_indirect_ref (data_ref_base);
2181 else
2182 {
2183 base_offset = ssize_int (0);
2184 init = ssize_int (0);
2185 base_name = build_fold_indirect_ref (unshare_expr (DR_REF (dr)));
2186 }
2187
2188 data_ref_base_var = create_tmp_var (TREE_TYPE (data_ref_base), "batmp");
2189 add_referenced_var (data_ref_base_var);
2190 data_ref_base = force_gimple_operand (data_ref_base, &seq, true,
2191 data_ref_base_var);
2192 gimple_seq_add_seq (new_stmt_list, seq);
2193
2194 /* Create base_offset */
2195 base_offset = size_binop (PLUS_EXPR,
2196 fold_convert (sizetype, base_offset),
2197 fold_convert (sizetype, init));
2198 dest = create_tmp_var (sizetype, "base_off");
2199 add_referenced_var (dest);
2200 base_offset = force_gimple_operand (base_offset, &seq, true, dest);
2201 gimple_seq_add_seq (new_stmt_list, seq);
2202
2203 if (offset)
2204 {
2205 tree tmp = create_tmp_var (sizetype, "offset");
2206
2207 add_referenced_var (tmp);
2208 offset = fold_build2 (MULT_EXPR, sizetype,
2209 fold_convert (sizetype, offset), step);
2210 base_offset = fold_build2 (PLUS_EXPR, sizetype,
2211 base_offset, offset);
2212 base_offset = force_gimple_operand (base_offset, &seq, false, tmp);
2213 gimple_seq_add_seq (new_stmt_list, seq);
2214 }
2215
2216 /* base + base_offset */
2217 if (loop_vinfo)
2218 addr_base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (data_ref_base),
2219 data_ref_base, base_offset);
2220 else
2221 {
2222 if (TREE_CODE (DR_REF (dr)) == INDIRECT_REF)
2223 addr_base = unshare_expr (TREE_OPERAND (DR_REF (dr), 0));
2224 else
2225 addr_base = build1 (ADDR_EXPR,
2226 build_pointer_type (TREE_TYPE (DR_REF (dr))),
2227 unshare_expr (DR_REF (dr)));
2228 }
2229
2230 vect_ptr_type = build_pointer_type (STMT_VINFO_VECTYPE (stmt_info));
2231
2232 vec_stmt = fold_convert (vect_ptr_type, addr_base);
2233 addr_expr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
2234 get_name (base_name));
2235 add_referenced_var (addr_expr);
2236 vec_stmt = force_gimple_operand (vec_stmt, &seq, false, addr_expr);
2237 gimple_seq_add_seq (new_stmt_list, seq);
2238
2239 if (vect_print_dump_info (REPORT_DETAILS))
2240 {
2241 fprintf (vect_dump, "created ");
2242 print_generic_expr (vect_dump, vec_stmt, TDF_SLIM);
2243 }
2244
2245 return vec_stmt;
2246 }
2247
2248
2249 /* Function vect_create_data_ref_ptr.
2250
2251 Create a new pointer to vector type (vp), that points to the first location
2252 accessed in the loop by STMT, along with the def-use update chain to
2253 appropriately advance the pointer through the loop iterations. Also set
2254 aliasing information for the pointer. This vector pointer is used by the
2255 callers to this function to create a memory reference expression for vector
2256 load/store access.
2257
2258 Input:
2259 1. STMT: a stmt that references memory. Expected to be of the form
2260 GIMPLE_ASSIGN <name, data-ref> or
2261 GIMPLE_ASSIGN <data-ref, name>.
2262 2. AT_LOOP: the loop where the vector memref is to be created.
2263 3. OFFSET (optional): an offset to be added to the initial address accessed
2264 by the data-ref in STMT.
2265 4. ONLY_INIT: indicate if vp is to be updated in the loop, or remain
2266 pointing to the initial address.
2267 5. TYPE: if not NULL indicates the required type of the data-ref.
2268
2269 Output:
2270 1. Declare a new ptr to vector_type, and have it point to the base of the
2271 data reference (initial addressed accessed by the data reference).
2272 For example, for vector of type V8HI, the following code is generated:
2273
2274 v8hi *vp;
2275 vp = (v8hi *)initial_address;
2276
2277 if OFFSET is not supplied:
2278 initial_address = &a[init];
2279 if OFFSET is supplied:
2280 initial_address = &a[init + OFFSET];
2281
2282 Return the initial_address in INITIAL_ADDRESS.
2283
2284 2. If ONLY_INIT is true, just return the initial pointer. Otherwise, also
2285 update the pointer in each iteration of the loop.
2286
2287 Return the increment stmt that updates the pointer in PTR_INCR.
2288
2289 3. Set INV_P to true if the access pattern of the data reference in the
2290 vectorized loop is invariant. Set it to false otherwise.
2291
2292 4. Return the pointer. */
2293
2294 tree
2295 vect_create_data_ref_ptr (gimple stmt, struct loop *at_loop,
2296 tree offset, tree *initial_address, gimple *ptr_incr,
2297 bool only_init, bool *inv_p)
2298 {
2299 tree base_name;
2300 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2301 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2302 struct loop *loop = NULL;
2303 bool nested_in_vect_loop = false;
2304 struct loop *containing_loop = NULL;
2305 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2306 tree vect_ptr_type;
2307 tree vect_ptr;
2308 tree new_temp;
2309 gimple vec_stmt;
2310 gimple_seq new_stmt_list = NULL;
2311 edge pe = NULL;
2312 basic_block new_bb;
2313 tree vect_ptr_init;
2314 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2315 tree vptr;
2316 gimple_stmt_iterator incr_gsi;
2317 bool insert_after;
2318 tree indx_before_incr, indx_after_incr;
2319 gimple incr;
2320 tree step;
2321 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2322 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
2323
2324 if (loop_vinfo)
2325 {
2326 loop = LOOP_VINFO_LOOP (loop_vinfo);
2327 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
2328 containing_loop = (gimple_bb (stmt))->loop_father;
2329 pe = loop_preheader_edge (loop);
2330 }
2331 else
2332 {
2333 gcc_assert (bb_vinfo);
2334 only_init = true;
2335 *ptr_incr = NULL;
2336 }
2337
2338 /* Check the step (evolution) of the load in LOOP, and record
2339 whether it's invariant. */
2340 if (nested_in_vect_loop)
2341 step = STMT_VINFO_DR_STEP (stmt_info);
2342 else
2343 step = DR_STEP (STMT_VINFO_DATA_REF (stmt_info));
2344
2345 if (tree_int_cst_compare (step, size_zero_node) == 0)
2346 *inv_p = true;
2347 else
2348 *inv_p = false;
2349
2350 /* Create an expression for the first address accessed by this load
2351 in LOOP. */
2352 base_name = build_fold_indirect_ref (unshare_expr (DR_BASE_ADDRESS (dr)));
2353
2354 if (vect_print_dump_info (REPORT_DETAILS))
2355 {
2356 tree data_ref_base = base_name;
2357 fprintf (vect_dump, "create vector-pointer variable to type: ");
2358 print_generic_expr (vect_dump, vectype, TDF_SLIM);
2359 if (TREE_CODE (data_ref_base) == VAR_DECL)
2360 fprintf (vect_dump, " vectorizing a one dimensional array ref: ");
2361 else if (TREE_CODE (data_ref_base) == ARRAY_REF)
2362 fprintf (vect_dump, " vectorizing a multidimensional array ref: ");
2363 else if (TREE_CODE (data_ref_base) == COMPONENT_REF)
2364 fprintf (vect_dump, " vectorizing a record based array ref: ");
2365 else if (TREE_CODE (data_ref_base) == SSA_NAME)
2366 fprintf (vect_dump, " vectorizing a pointer ref: ");
2367 print_generic_expr (vect_dump, base_name, TDF_SLIM);
2368 }
2369
2370 /** (1) Create the new vector-pointer variable: **/
2371 vect_ptr_type = build_pointer_type (vectype);
2372 vect_ptr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
2373 get_name (base_name));
2374 /* If any of the data-references in the stmt group does not conflict
2375 with the created vector data-reference use a ref-all pointer instead. */
2376 if (STMT_VINFO_DR_GROUP_SIZE (stmt_info) > 1)
2377 {
2378 gimple orig_stmt = STMT_VINFO_DR_GROUP_FIRST_DR (stmt_info);
2379 do
2380 {
2381 tree lhs = gimple_assign_lhs (orig_stmt);
2382 if (!alias_sets_conflict_p (get_deref_alias_set (vect_ptr),
2383 get_alias_set (lhs)))
2384 {
2385 vect_ptr_type = build_pointer_type_for_mode (vectype,
2386 ptr_mode, true);
2387 vect_ptr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
2388 get_name (base_name));
2389 break;
2390 }
2391
2392 orig_stmt = STMT_VINFO_DR_GROUP_NEXT_DR (vinfo_for_stmt (orig_stmt));
2393 }
2394 while (orig_stmt);
2395 }
2396
2397 add_referenced_var (vect_ptr);
2398
2399 /** Note: If the dataref is in an inner-loop nested in LOOP, and we are
2400 vectorizing LOOP (i.e. outer-loop vectorization), we need to create two
2401 def-use update cycles for the pointer: One relative to the outer-loop
2402 (LOOP), which is what steps (3) and (4) below do. The other is relative
2403 to the inner-loop (which is the inner-most loop containing the dataref),
2404 and this is done be step (5) below.
2405
2406 When vectorizing inner-most loops, the vectorized loop (LOOP) is also the
2407 inner-most loop, and so steps (3),(4) work the same, and step (5) is
2408 redundant. Steps (3),(4) create the following:
2409
2410 vp0 = &base_addr;
2411 LOOP: vp1 = phi(vp0,vp2)
2412 ...
2413 ...
2414 vp2 = vp1 + step
2415 goto LOOP
2416
2417 If there is an inner-loop nested in loop, then step (5) will also be
2418 applied, and an additional update in the inner-loop will be created:
2419
2420 vp0 = &base_addr;
2421 LOOP: vp1 = phi(vp0,vp2)
2422 ...
2423 inner: vp3 = phi(vp1,vp4)
2424 vp4 = vp3 + inner_step
2425 if () goto inner
2426 ...
2427 vp2 = vp1 + step
2428 if () goto LOOP */
2429
2430 /** (3) Calculate the initial address the vector-pointer, and set
2431 the vector-pointer to point to it before the loop: **/
2432
2433 /* Create: (&(base[init_val+offset]) in the loop preheader. */
2434
2435 new_temp = vect_create_addr_base_for_vector_ref (stmt, &new_stmt_list,
2436 offset, loop);
2437 if (new_stmt_list)
2438 {
2439 if (pe)
2440 {
2441 new_bb = gsi_insert_seq_on_edge_immediate (pe, new_stmt_list);
2442 gcc_assert (!new_bb);
2443 }
2444 else
2445 gsi_insert_seq_before (&gsi, new_stmt_list, GSI_SAME_STMT);
2446 }
2447
2448 *initial_address = new_temp;
2449
2450 /* Create: p = (vectype *) initial_base */
2451 vec_stmt = gimple_build_assign (vect_ptr,
2452 fold_convert (vect_ptr_type, new_temp));
2453 vect_ptr_init = make_ssa_name (vect_ptr, vec_stmt);
2454 gimple_assign_set_lhs (vec_stmt, vect_ptr_init);
2455 if (pe)
2456 {
2457 new_bb = gsi_insert_on_edge_immediate (pe, vec_stmt);
2458 gcc_assert (!new_bb);
2459 }
2460 else
2461 gsi_insert_before (&gsi, vec_stmt, GSI_SAME_STMT);
2462
2463 /** (4) Handle the updating of the vector-pointer inside the loop.
2464 This is needed when ONLY_INIT is false, and also when AT_LOOP
2465 is the inner-loop nested in LOOP (during outer-loop vectorization).
2466 **/
2467
2468 /* No update in loop is required. */
2469 if (only_init && (!loop_vinfo || at_loop == loop))
2470 {
2471 /* Copy the points-to information if it exists. */
2472 if (DR_PTR_INFO (dr))
2473 duplicate_ssa_name_ptr_info (vect_ptr_init, DR_PTR_INFO (dr));
2474 vptr = vect_ptr_init;
2475 }
2476 else
2477 {
2478 /* The step of the vector pointer is the Vector Size. */
2479 tree step = TYPE_SIZE_UNIT (vectype);
2480 /* One exception to the above is when the scalar step of the load in
2481 LOOP is zero. In this case the step here is also zero. */
2482 if (*inv_p)
2483 step = size_zero_node;
2484
2485 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
2486
2487 create_iv (vect_ptr_init,
2488 fold_convert (vect_ptr_type, step),
2489 vect_ptr, loop, &incr_gsi, insert_after,
2490 &indx_before_incr, &indx_after_incr);
2491 incr = gsi_stmt (incr_gsi);
2492 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
2493
2494 /* Copy the points-to information if it exists. */
2495 if (DR_PTR_INFO (dr))
2496 {
2497 duplicate_ssa_name_ptr_info (indx_before_incr, DR_PTR_INFO (dr));
2498 duplicate_ssa_name_ptr_info (indx_after_incr, DR_PTR_INFO (dr));
2499 }
2500 if (ptr_incr)
2501 *ptr_incr = incr;
2502
2503 vptr = indx_before_incr;
2504 }
2505
2506 if (!nested_in_vect_loop || only_init)
2507 return vptr;
2508
2509
2510 /** (5) Handle the updating of the vector-pointer inside the inner-loop
2511 nested in LOOP, if exists: **/
2512
2513 gcc_assert (nested_in_vect_loop);
2514 if (!only_init)
2515 {
2516 standard_iv_increment_position (containing_loop, &incr_gsi,
2517 &insert_after);
2518 create_iv (vptr, fold_convert (vect_ptr_type, DR_STEP (dr)), vect_ptr,
2519 containing_loop, &incr_gsi, insert_after, &indx_before_incr,
2520 &indx_after_incr);
2521 incr = gsi_stmt (incr_gsi);
2522 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
2523
2524 /* Copy the points-to information if it exists. */
2525 if (DR_PTR_INFO (dr))
2526 {
2527 duplicate_ssa_name_ptr_info (indx_before_incr, DR_PTR_INFO (dr));
2528 duplicate_ssa_name_ptr_info (indx_after_incr, DR_PTR_INFO (dr));
2529 }
2530 if (ptr_incr)
2531 *ptr_incr = incr;
2532
2533 return indx_before_incr;
2534 }
2535 else
2536 gcc_unreachable ();
2537 }
2538
2539
2540 /* Function bump_vector_ptr
2541
2542 Increment a pointer (to a vector type) by vector-size. If requested,
2543 i.e. if PTR-INCR is given, then also connect the new increment stmt
2544 to the existing def-use update-chain of the pointer, by modifying
2545 the PTR_INCR as illustrated below:
2546
2547 The pointer def-use update-chain before this function:
2548 DATAREF_PTR = phi (p_0, p_2)
2549 ....
2550 PTR_INCR: p_2 = DATAREF_PTR + step
2551
2552 The pointer def-use update-chain after this function:
2553 DATAREF_PTR = phi (p_0, p_2)
2554 ....
2555 NEW_DATAREF_PTR = DATAREF_PTR + BUMP
2556 ....
2557 PTR_INCR: p_2 = NEW_DATAREF_PTR + step
2558
2559 Input:
2560 DATAREF_PTR - ssa_name of a pointer (to vector type) that is being updated
2561 in the loop.
2562 PTR_INCR - optional. The stmt that updates the pointer in each iteration of
2563 the loop. The increment amount across iterations is expected
2564 to be vector_size.
2565 BSI - location where the new update stmt is to be placed.
2566 STMT - the original scalar memory-access stmt that is being vectorized.
2567 BUMP - optional. The offset by which to bump the pointer. If not given,
2568 the offset is assumed to be vector_size.
2569
2570 Output: Return NEW_DATAREF_PTR as illustrated above.
2571
2572 */
2573
2574 tree
2575 bump_vector_ptr (tree dataref_ptr, gimple ptr_incr, gimple_stmt_iterator *gsi,
2576 gimple stmt, tree bump)
2577 {
2578 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2579 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2580 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2581 tree ptr_var = SSA_NAME_VAR (dataref_ptr);
2582 tree update = TYPE_SIZE_UNIT (vectype);
2583 gimple incr_stmt;
2584 ssa_op_iter iter;
2585 use_operand_p use_p;
2586 tree new_dataref_ptr;
2587
2588 if (bump)
2589 update = bump;
2590
2591 incr_stmt = gimple_build_assign_with_ops (POINTER_PLUS_EXPR, ptr_var,
2592 dataref_ptr, update);
2593 new_dataref_ptr = make_ssa_name (ptr_var, incr_stmt);
2594 gimple_assign_set_lhs (incr_stmt, new_dataref_ptr);
2595 vect_finish_stmt_generation (stmt, incr_stmt, gsi);
2596
2597 /* Copy the points-to information if it exists. */
2598 if (DR_PTR_INFO (dr))
2599 duplicate_ssa_name_ptr_info (new_dataref_ptr, DR_PTR_INFO (dr));
2600
2601 if (!ptr_incr)
2602 return new_dataref_ptr;
2603
2604 /* Update the vector-pointer's cross-iteration increment. */
2605 FOR_EACH_SSA_USE_OPERAND (use_p, ptr_incr, iter, SSA_OP_USE)
2606 {
2607 tree use = USE_FROM_PTR (use_p);
2608
2609 if (use == dataref_ptr)
2610 SET_USE (use_p, new_dataref_ptr);
2611 else
2612 gcc_assert (tree_int_cst_compare (use, update) == 0);
2613 }
2614
2615 return new_dataref_ptr;
2616 }
2617
2618
2619 /* Function vect_create_destination_var.
2620
2621 Create a new temporary of type VECTYPE. */
2622
2623 tree
2624 vect_create_destination_var (tree scalar_dest, tree vectype)
2625 {
2626 tree vec_dest;
2627 const char *new_name;
2628 tree type;
2629 enum vect_var_kind kind;
2630
2631 kind = vectype ? vect_simple_var : vect_scalar_var;
2632 type = vectype ? vectype : TREE_TYPE (scalar_dest);
2633
2634 gcc_assert (TREE_CODE (scalar_dest) == SSA_NAME);
2635
2636 new_name = get_name (scalar_dest);
2637 if (!new_name)
2638 new_name = "var_";
2639 vec_dest = vect_get_new_vect_var (type, kind, new_name);
2640 add_referenced_var (vec_dest);
2641
2642 return vec_dest;
2643 }
2644
2645 /* Function vect_strided_store_supported.
2646
2647 Returns TRUE is INTERLEAVE_HIGH and INTERLEAVE_LOW operations are supported,
2648 and FALSE otherwise. */
2649
2650 bool
2651 vect_strided_store_supported (tree vectype)
2652 {
2653 optab interleave_high_optab, interleave_low_optab;
2654 int mode;
2655
2656 mode = (int) TYPE_MODE (vectype);
2657
2658 /* Check that the operation is supported. */
2659 interleave_high_optab = optab_for_tree_code (VEC_INTERLEAVE_HIGH_EXPR,
2660 vectype, optab_default);
2661 interleave_low_optab = optab_for_tree_code (VEC_INTERLEAVE_LOW_EXPR,
2662 vectype, optab_default);
2663 if (!interleave_high_optab || !interleave_low_optab)
2664 {
2665 if (vect_print_dump_info (REPORT_DETAILS))
2666 fprintf (vect_dump, "no optab for interleave.");
2667 return false;
2668 }
2669
2670 if (optab_handler (interleave_high_optab, mode)->insn_code
2671 == CODE_FOR_nothing
2672 || optab_handler (interleave_low_optab, mode)->insn_code
2673 == CODE_FOR_nothing)
2674 {
2675 if (vect_print_dump_info (REPORT_DETAILS))
2676 fprintf (vect_dump, "interleave op not supported by target.");
2677 return false;
2678 }
2679
2680 return true;
2681 }
2682
2683
2684 /* Function vect_permute_store_chain.
2685
2686 Given a chain of interleaved stores in DR_CHAIN of LENGTH that must be
2687 a power of 2, generate interleave_high/low stmts to reorder the data
2688 correctly for the stores. Return the final references for stores in
2689 RESULT_CHAIN.
2690
2691 E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
2692 The input is 4 vectors each containing 8 elements. We assign a number to each
2693 element, the input sequence is:
2694
2695 1st vec: 0 1 2 3 4 5 6 7
2696 2nd vec: 8 9 10 11 12 13 14 15
2697 3rd vec: 16 17 18 19 20 21 22 23
2698 4th vec: 24 25 26 27 28 29 30 31
2699
2700 The output sequence should be:
2701
2702 1st vec: 0 8 16 24 1 9 17 25
2703 2nd vec: 2 10 18 26 3 11 19 27
2704 3rd vec: 4 12 20 28 5 13 21 30
2705 4th vec: 6 14 22 30 7 15 23 31
2706
2707 i.e., we interleave the contents of the four vectors in their order.
2708
2709 We use interleave_high/low instructions to create such output. The input of
2710 each interleave_high/low operation is two vectors:
2711 1st vec 2nd vec
2712 0 1 2 3 4 5 6 7
2713 the even elements of the result vector are obtained left-to-right from the
2714 high/low elements of the first vector. The odd elements of the result are
2715 obtained left-to-right from the high/low elements of the second vector.
2716 The output of interleave_high will be: 0 4 1 5
2717 and of interleave_low: 2 6 3 7
2718
2719
2720 The permutation is done in log LENGTH stages. In each stage interleave_high
2721 and interleave_low stmts are created for each pair of vectors in DR_CHAIN,
2722 where the first argument is taken from the first half of DR_CHAIN and the
2723 second argument from it's second half.
2724 In our example,
2725
2726 I1: interleave_high (1st vec, 3rd vec)
2727 I2: interleave_low (1st vec, 3rd vec)
2728 I3: interleave_high (2nd vec, 4th vec)
2729 I4: interleave_low (2nd vec, 4th vec)
2730
2731 The output for the first stage is:
2732
2733 I1: 0 16 1 17 2 18 3 19
2734 I2: 4 20 5 21 6 22 7 23
2735 I3: 8 24 9 25 10 26 11 27
2736 I4: 12 28 13 29 14 30 15 31
2737
2738 The output of the second stage, i.e. the final result is:
2739
2740 I1: 0 8 16 24 1 9 17 25
2741 I2: 2 10 18 26 3 11 19 27
2742 I3: 4 12 20 28 5 13 21 30
2743 I4: 6 14 22 30 7 15 23 31. */
2744
2745 bool
2746 vect_permute_store_chain (VEC(tree,heap) *dr_chain,
2747 unsigned int length,
2748 gimple stmt,
2749 gimple_stmt_iterator *gsi,
2750 VEC(tree,heap) **result_chain)
2751 {
2752 tree perm_dest, vect1, vect2, high, low;
2753 gimple perm_stmt;
2754 tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
2755 tree scalar_dest;
2756 int i;
2757 unsigned int j;
2758 enum tree_code high_code, low_code;
2759
2760 scalar_dest = gimple_assign_lhs (stmt);
2761
2762 /* Check that the operation is supported. */
2763 if (!vect_strided_store_supported (vectype))
2764 return false;
2765
2766 *result_chain = VEC_copy (tree, heap, dr_chain);
2767
2768 for (i = 0; i < exact_log2 (length); i++)
2769 {
2770 for (j = 0; j < length/2; j++)
2771 {
2772 vect1 = VEC_index (tree, dr_chain, j);
2773 vect2 = VEC_index (tree, dr_chain, j+length/2);
2774
2775 /* Create interleaving stmt:
2776 in the case of big endian:
2777 high = interleave_high (vect1, vect2)
2778 and in the case of little endian:
2779 high = interleave_low (vect1, vect2). */
2780 perm_dest = create_tmp_var (vectype, "vect_inter_high");
2781 DECL_GIMPLE_REG_P (perm_dest) = 1;
2782 add_referenced_var (perm_dest);
2783 if (BYTES_BIG_ENDIAN)
2784 {
2785 high_code = VEC_INTERLEAVE_HIGH_EXPR;
2786 low_code = VEC_INTERLEAVE_LOW_EXPR;
2787 }
2788 else
2789 {
2790 low_code = VEC_INTERLEAVE_HIGH_EXPR;
2791 high_code = VEC_INTERLEAVE_LOW_EXPR;
2792 }
2793 perm_stmt = gimple_build_assign_with_ops (high_code, perm_dest,
2794 vect1, vect2);
2795 high = make_ssa_name (perm_dest, perm_stmt);
2796 gimple_assign_set_lhs (perm_stmt, high);
2797 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
2798 VEC_replace (tree, *result_chain, 2*j, high);
2799
2800 /* Create interleaving stmt:
2801 in the case of big endian:
2802 low = interleave_low (vect1, vect2)
2803 and in the case of little endian:
2804 low = interleave_high (vect1, vect2). */
2805 perm_dest = create_tmp_var (vectype, "vect_inter_low");
2806 DECL_GIMPLE_REG_P (perm_dest) = 1;
2807 add_referenced_var (perm_dest);
2808 perm_stmt = gimple_build_assign_with_ops (low_code, perm_dest,
2809 vect1, vect2);
2810 low = make_ssa_name (perm_dest, perm_stmt);
2811 gimple_assign_set_lhs (perm_stmt, low);
2812 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
2813 VEC_replace (tree, *result_chain, 2*j+1, low);
2814 }
2815 dr_chain = VEC_copy (tree, heap, *result_chain);
2816 }
2817 return true;
2818 }
2819
2820 /* Function vect_setup_realignment
2821
2822 This function is called when vectorizing an unaligned load using
2823 the dr_explicit_realign[_optimized] scheme.
2824 This function generates the following code at the loop prolog:
2825
2826 p = initial_addr;
2827 x msq_init = *(floor(p)); # prolog load
2828 realignment_token = call target_builtin;
2829 loop:
2830 x msq = phi (msq_init, ---)
2831
2832 The stmts marked with x are generated only for the case of
2833 dr_explicit_realign_optimized.
2834
2835 The code above sets up a new (vector) pointer, pointing to the first
2836 location accessed by STMT, and a "floor-aligned" load using that pointer.
2837 It also generates code to compute the "realignment-token" (if the relevant
2838 target hook was defined), and creates a phi-node at the loop-header bb
2839 whose arguments are the result of the prolog-load (created by this
2840 function) and the result of a load that takes place in the loop (to be
2841 created by the caller to this function).
2842
2843 For the case of dr_explicit_realign_optimized:
2844 The caller to this function uses the phi-result (msq) to create the
2845 realignment code inside the loop, and sets up the missing phi argument,
2846 as follows:
2847 loop:
2848 msq = phi (msq_init, lsq)
2849 lsq = *(floor(p')); # load in loop
2850 result = realign_load (msq, lsq, realignment_token);
2851
2852 For the case of dr_explicit_realign:
2853 loop:
2854 msq = *(floor(p)); # load in loop
2855 p' = p + (VS-1);
2856 lsq = *(floor(p')); # load in loop
2857 result = realign_load (msq, lsq, realignment_token);
2858
2859 Input:
2860 STMT - (scalar) load stmt to be vectorized. This load accesses
2861 a memory location that may be unaligned.
2862 BSI - place where new code is to be inserted.
2863 ALIGNMENT_SUPPORT_SCHEME - which of the two misalignment handling schemes
2864 is used.
2865
2866 Output:
2867 REALIGNMENT_TOKEN - the result of a call to the builtin_mask_for_load
2868 target hook, if defined.
2869 Return value - the result of the loop-header phi node. */
2870
2871 tree
2872 vect_setup_realignment (gimple stmt, gimple_stmt_iterator *gsi,
2873 tree *realignment_token,
2874 enum dr_alignment_support alignment_support_scheme,
2875 tree init_addr,
2876 struct loop **at_loop)
2877 {
2878 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2879 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2880 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2881 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2882 edge pe;
2883 tree scalar_dest = gimple_assign_lhs (stmt);
2884 tree vec_dest;
2885 gimple inc;
2886 tree ptr;
2887 tree data_ref;
2888 gimple new_stmt;
2889 basic_block new_bb;
2890 tree msq_init = NULL_TREE;
2891 tree new_temp;
2892 gimple phi_stmt;
2893 tree msq = NULL_TREE;
2894 gimple_seq stmts = NULL;
2895 bool inv_p;
2896 bool compute_in_loop = false;
2897 bool nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
2898 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
2899 struct loop *loop_for_initial_load;
2900
2901 gcc_assert (alignment_support_scheme == dr_explicit_realign
2902 || alignment_support_scheme == dr_explicit_realign_optimized);
2903
2904 /* We need to generate three things:
2905 1. the misalignment computation
2906 2. the extra vector load (for the optimized realignment scheme).
2907 3. the phi node for the two vectors from which the realignment is
2908 done (for the optimized realignment scheme).
2909 */
2910
2911 /* 1. Determine where to generate the misalignment computation.
2912
2913 If INIT_ADDR is NULL_TREE, this indicates that the misalignment
2914 calculation will be generated by this function, outside the loop (in the
2915 preheader). Otherwise, INIT_ADDR had already been computed for us by the
2916 caller, inside the loop.
2917
2918 Background: If the misalignment remains fixed throughout the iterations of
2919 the loop, then both realignment schemes are applicable, and also the
2920 misalignment computation can be done outside LOOP. This is because we are
2921 vectorizing LOOP, and so the memory accesses in LOOP advance in steps that
2922 are a multiple of VS (the Vector Size), and therefore the misalignment in
2923 different vectorized LOOP iterations is always the same.
2924 The problem arises only if the memory access is in an inner-loop nested
2925 inside LOOP, which is now being vectorized using outer-loop vectorization.
2926 This is the only case when the misalignment of the memory access may not
2927 remain fixed throughout the iterations of the inner-loop (as explained in
2928 detail in vect_supportable_dr_alignment). In this case, not only is the
2929 optimized realignment scheme not applicable, but also the misalignment
2930 computation (and generation of the realignment token that is passed to
2931 REALIGN_LOAD) have to be done inside the loop.
2932
2933 In short, INIT_ADDR indicates whether we are in a COMPUTE_IN_LOOP mode
2934 or not, which in turn determines if the misalignment is computed inside
2935 the inner-loop, or outside LOOP. */
2936
2937 if (init_addr != NULL_TREE)
2938 {
2939 compute_in_loop = true;
2940 gcc_assert (alignment_support_scheme == dr_explicit_realign);
2941 }
2942
2943
2944 /* 2. Determine where to generate the extra vector load.
2945
2946 For the optimized realignment scheme, instead of generating two vector
2947 loads in each iteration, we generate a single extra vector load in the
2948 preheader of the loop, and in each iteration reuse the result of the
2949 vector load from the previous iteration. In case the memory access is in
2950 an inner-loop nested inside LOOP, which is now being vectorized using
2951 outer-loop vectorization, we need to determine whether this initial vector
2952 load should be generated at the preheader of the inner-loop, or can be
2953 generated at the preheader of LOOP. If the memory access has no evolution
2954 in LOOP, it can be generated in the preheader of LOOP. Otherwise, it has
2955 to be generated inside LOOP (in the preheader of the inner-loop). */
2956
2957 if (nested_in_vect_loop)
2958 {
2959 tree outerloop_step = STMT_VINFO_DR_STEP (stmt_info);
2960 bool invariant_in_outerloop =
2961 (tree_int_cst_compare (outerloop_step, size_zero_node) == 0);
2962 loop_for_initial_load = (invariant_in_outerloop ? loop : loop->inner);
2963 }
2964 else
2965 loop_for_initial_load = loop;
2966 if (at_loop)
2967 *at_loop = loop_for_initial_load;
2968
2969 /* 3. For the case of the optimized realignment, create the first vector
2970 load at the loop preheader. */
2971
2972 if (alignment_support_scheme == dr_explicit_realign_optimized)
2973 {
2974 /* Create msq_init = *(floor(p1)) in the loop preheader */
2975
2976 gcc_assert (!compute_in_loop);
2977 pe = loop_preheader_edge (loop_for_initial_load);
2978 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2979 ptr = vect_create_data_ref_ptr (stmt, loop_for_initial_load, NULL_TREE,
2980 &init_addr, &inc, true, &inv_p);
2981 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, ptr);
2982 new_stmt = gimple_build_assign (vec_dest, data_ref);
2983 new_temp = make_ssa_name (vec_dest, new_stmt);
2984 gimple_assign_set_lhs (new_stmt, new_temp);
2985 mark_symbols_for_renaming (new_stmt);
2986 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
2987 gcc_assert (!new_bb);
2988 msq_init = gimple_assign_lhs (new_stmt);
2989 }
2990
2991 /* 4. Create realignment token using a target builtin, if available.
2992 It is done either inside the containing loop, or before LOOP (as
2993 determined above). */
2994
2995 if (targetm.vectorize.builtin_mask_for_load)
2996 {
2997 tree builtin_decl;
2998
2999 /* Compute INIT_ADDR - the initial addressed accessed by this memref. */
3000 if (compute_in_loop)
3001 gcc_assert (init_addr); /* already computed by the caller. */
3002 else
3003 {
3004 /* Generate the INIT_ADDR computation outside LOOP. */
3005 init_addr = vect_create_addr_base_for_vector_ref (stmt, &stmts,
3006 NULL_TREE, loop);
3007 pe = loop_preheader_edge (loop);
3008 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
3009 gcc_assert (!new_bb);
3010 }
3011
3012 builtin_decl = targetm.vectorize.builtin_mask_for_load ();
3013 new_stmt = gimple_build_call (builtin_decl, 1, init_addr);
3014 vec_dest =
3015 vect_create_destination_var (scalar_dest,
3016 gimple_call_return_type (new_stmt));
3017 new_temp = make_ssa_name (vec_dest, new_stmt);
3018 gimple_call_set_lhs (new_stmt, new_temp);
3019
3020 if (compute_in_loop)
3021 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
3022 else
3023 {
3024 /* Generate the misalignment computation outside LOOP. */
3025 pe = loop_preheader_edge (loop);
3026 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
3027 gcc_assert (!new_bb);
3028 }
3029
3030 *realignment_token = gimple_call_lhs (new_stmt);
3031
3032 /* The result of the CALL_EXPR to this builtin is determined from
3033 the value of the parameter and no global variables are touched
3034 which makes the builtin a "const" function. Requiring the
3035 builtin to have the "const" attribute makes it unnecessary
3036 to call mark_call_clobbered. */
3037 gcc_assert (TREE_READONLY (builtin_decl));
3038 }
3039
3040 if (alignment_support_scheme == dr_explicit_realign)
3041 return msq;
3042
3043 gcc_assert (!compute_in_loop);
3044 gcc_assert (alignment_support_scheme == dr_explicit_realign_optimized);
3045
3046
3047 /* 5. Create msq = phi <msq_init, lsq> in loop */
3048
3049 pe = loop_preheader_edge (containing_loop);
3050 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3051 msq = make_ssa_name (vec_dest, NULL);
3052 phi_stmt = create_phi_node (msq, containing_loop->header);
3053 SSA_NAME_DEF_STMT (msq) = phi_stmt;
3054 add_phi_arg (phi_stmt, msq_init, pe);
3055
3056 return msq;
3057 }
3058
3059
3060 /* Function vect_strided_load_supported.
3061
3062 Returns TRUE is EXTRACT_EVEN and EXTRACT_ODD operations are supported,
3063 and FALSE otherwise. */
3064
3065 bool
3066 vect_strided_load_supported (tree vectype)
3067 {
3068 optab perm_even_optab, perm_odd_optab;
3069 int mode;
3070
3071 mode = (int) TYPE_MODE (vectype);
3072
3073 perm_even_optab = optab_for_tree_code (VEC_EXTRACT_EVEN_EXPR, vectype,
3074 optab_default);
3075 if (!perm_even_optab)
3076 {
3077 if (vect_print_dump_info (REPORT_DETAILS))
3078 fprintf (vect_dump, "no optab for perm_even.");
3079 return false;
3080 }
3081
3082 if (optab_handler (perm_even_optab, mode)->insn_code == CODE_FOR_nothing)
3083 {
3084 if (vect_print_dump_info (REPORT_DETAILS))
3085 fprintf (vect_dump, "perm_even op not supported by target.");
3086 return false;
3087 }
3088
3089 perm_odd_optab = optab_for_tree_code (VEC_EXTRACT_ODD_EXPR, vectype,
3090 optab_default);
3091 if (!perm_odd_optab)
3092 {
3093 if (vect_print_dump_info (REPORT_DETAILS))
3094 fprintf (vect_dump, "no optab for perm_odd.");
3095 return false;
3096 }
3097
3098 if (optab_handler (perm_odd_optab, mode)->insn_code == CODE_FOR_nothing)
3099 {
3100 if (vect_print_dump_info (REPORT_DETAILS))
3101 fprintf (vect_dump, "perm_odd op not supported by target.");
3102 return false;
3103 }
3104 return true;
3105 }
3106
3107
3108 /* Function vect_permute_load_chain.
3109
3110 Given a chain of interleaved loads in DR_CHAIN of LENGTH that must be
3111 a power of 2, generate extract_even/odd stmts to reorder the input data
3112 correctly. Return the final references for loads in RESULT_CHAIN.
3113
3114 E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
3115 The input is 4 vectors each containing 8 elements. We assign a number to each
3116 element, the input sequence is:
3117
3118 1st vec: 0 1 2 3 4 5 6 7
3119 2nd vec: 8 9 10 11 12 13 14 15
3120 3rd vec: 16 17 18 19 20 21 22 23
3121 4th vec: 24 25 26 27 28 29 30 31
3122
3123 The output sequence should be:
3124
3125 1st vec: 0 4 8 12 16 20 24 28
3126 2nd vec: 1 5 9 13 17 21 25 29
3127 3rd vec: 2 6 10 14 18 22 26 30
3128 4th vec: 3 7 11 15 19 23 27 31
3129
3130 i.e., the first output vector should contain the first elements of each
3131 interleaving group, etc.
3132
3133 We use extract_even/odd instructions to create such output. The input of each
3134 extract_even/odd operation is two vectors
3135 1st vec 2nd vec
3136 0 1 2 3 4 5 6 7
3137
3138 and the output is the vector of extracted even/odd elements. The output of
3139 extract_even will be: 0 2 4 6
3140 and of extract_odd: 1 3 5 7
3141
3142
3143 The permutation is done in log LENGTH stages. In each stage extract_even and
3144 extract_odd stmts are created for each pair of vectors in DR_CHAIN in their
3145 order. In our example,
3146
3147 E1: extract_even (1st vec, 2nd vec)
3148 E2: extract_odd (1st vec, 2nd vec)
3149 E3: extract_even (3rd vec, 4th vec)
3150 E4: extract_odd (3rd vec, 4th vec)
3151
3152 The output for the first stage will be:
3153
3154 E1: 0 2 4 6 8 10 12 14
3155 E2: 1 3 5 7 9 11 13 15
3156 E3: 16 18 20 22 24 26 28 30
3157 E4: 17 19 21 23 25 27 29 31
3158
3159 In order to proceed and create the correct sequence for the next stage (or
3160 for the correct output, if the second stage is the last one, as in our
3161 example), we first put the output of extract_even operation and then the
3162 output of extract_odd in RESULT_CHAIN (which is then copied to DR_CHAIN).
3163 The input for the second stage is:
3164
3165 1st vec (E1): 0 2 4 6 8 10 12 14
3166 2nd vec (E3): 16 18 20 22 24 26 28 30
3167 3rd vec (E2): 1 3 5 7 9 11 13 15
3168 4th vec (E4): 17 19 21 23 25 27 29 31
3169
3170 The output of the second stage:
3171
3172 E1: 0 4 8 12 16 20 24 28
3173 E2: 2 6 10 14 18 22 26 30
3174 E3: 1 5 9 13 17 21 25 29
3175 E4: 3 7 11 15 19 23 27 31
3176
3177 And RESULT_CHAIN after reordering:
3178
3179 1st vec (E1): 0 4 8 12 16 20 24 28
3180 2nd vec (E3): 1 5 9 13 17 21 25 29
3181 3rd vec (E2): 2 6 10 14 18 22 26 30
3182 4th vec (E4): 3 7 11 15 19 23 27 31. */
3183
3184 bool
3185 vect_permute_load_chain (VEC(tree,heap) *dr_chain,
3186 unsigned int length,
3187 gimple stmt,
3188 gimple_stmt_iterator *gsi,
3189 VEC(tree,heap) **result_chain)
3190 {
3191 tree perm_dest, data_ref, first_vect, second_vect;
3192 gimple perm_stmt;
3193 tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
3194 int i;
3195 unsigned int j;
3196
3197 /* Check that the operation is supported. */
3198 if (!vect_strided_load_supported (vectype))
3199 return false;
3200
3201 *result_chain = VEC_copy (tree, heap, dr_chain);
3202 for (i = 0; i < exact_log2 (length); i++)
3203 {
3204 for (j = 0; j < length; j +=2)
3205 {
3206 first_vect = VEC_index (tree, dr_chain, j);
3207 second_vect = VEC_index (tree, dr_chain, j+1);
3208
3209 /* data_ref = permute_even (first_data_ref, second_data_ref); */
3210 perm_dest = create_tmp_var (vectype, "vect_perm_even");
3211 DECL_GIMPLE_REG_P (perm_dest) = 1;
3212 add_referenced_var (perm_dest);
3213
3214 perm_stmt = gimple_build_assign_with_ops (VEC_EXTRACT_EVEN_EXPR,
3215 perm_dest, first_vect,
3216 second_vect);
3217
3218 data_ref = make_ssa_name (perm_dest, perm_stmt);
3219 gimple_assign_set_lhs (perm_stmt, data_ref);
3220 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
3221 mark_symbols_for_renaming (perm_stmt);
3222
3223 VEC_replace (tree, *result_chain, j/2, data_ref);
3224
3225 /* data_ref = permute_odd (first_data_ref, second_data_ref); */
3226 perm_dest = create_tmp_var (vectype, "vect_perm_odd");
3227 DECL_GIMPLE_REG_P (perm_dest) = 1;
3228 add_referenced_var (perm_dest);
3229
3230 perm_stmt = gimple_build_assign_with_ops (VEC_EXTRACT_ODD_EXPR,
3231 perm_dest, first_vect,
3232 second_vect);
3233 data_ref = make_ssa_name (perm_dest, perm_stmt);
3234 gimple_assign_set_lhs (perm_stmt, data_ref);
3235 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
3236 mark_symbols_for_renaming (perm_stmt);
3237
3238 VEC_replace (tree, *result_chain, j/2+length/2, data_ref);
3239 }
3240 dr_chain = VEC_copy (tree, heap, *result_chain);
3241 }
3242 return true;
3243 }
3244
3245
3246 /* Function vect_transform_strided_load.
3247
3248 Given a chain of input interleaved data-refs (in DR_CHAIN), build statements
3249 to perform their permutation and ascribe the result vectorized statements to
3250 the scalar statements.
3251 */
3252
3253 bool
3254 vect_transform_strided_load (gimple stmt, VEC(tree,heap) *dr_chain, int size,
3255 gimple_stmt_iterator *gsi)
3256 {
3257 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3258 gimple first_stmt = DR_GROUP_FIRST_DR (stmt_info);
3259 gimple next_stmt, new_stmt;
3260 VEC(tree,heap) *result_chain = NULL;
3261 unsigned int i, gap_count;
3262 tree tmp_data_ref;
3263
3264 /* DR_CHAIN contains input data-refs that are a part of the interleaving.
3265 RESULT_CHAIN is the output of vect_permute_load_chain, it contains permuted
3266 vectors, that are ready for vector computation. */
3267 result_chain = VEC_alloc (tree, heap, size);
3268 /* Permute. */
3269 if (!vect_permute_load_chain (dr_chain, size, stmt, gsi, &result_chain))
3270 return false;
3271
3272 /* Put a permuted data-ref in the VECTORIZED_STMT field.
3273 Since we scan the chain starting from it's first node, their order
3274 corresponds the order of data-refs in RESULT_CHAIN. */
3275 next_stmt = first_stmt;
3276 gap_count = 1;
3277 for (i = 0; VEC_iterate (tree, result_chain, i, tmp_data_ref); i++)
3278 {
3279 if (!next_stmt)
3280 break;
3281
3282 /* Skip the gaps. Loads created for the gaps will be removed by dead
3283 code elimination pass later. No need to check for the first stmt in
3284 the group, since it always exists.
3285 DR_GROUP_GAP is the number of steps in elements from the previous
3286 access (if there is no gap DR_GROUP_GAP is 1). We skip loads that
3287 correspond to the gaps.
3288 */
3289 if (next_stmt != first_stmt
3290 && gap_count < DR_GROUP_GAP (vinfo_for_stmt (next_stmt)))
3291 {
3292 gap_count++;
3293 continue;
3294 }
3295
3296 while (next_stmt)
3297 {
3298 new_stmt = SSA_NAME_DEF_STMT (tmp_data_ref);
3299 /* We assume that if VEC_STMT is not NULL, this is a case of multiple
3300 copies, and we put the new vector statement in the first available
3301 RELATED_STMT. */
3302 if (!STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt)))
3303 STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt)) = new_stmt;
3304 else
3305 {
3306 if (!DR_GROUP_SAME_DR_STMT (vinfo_for_stmt (next_stmt)))
3307 {
3308 gimple prev_stmt =
3309 STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt));
3310 gimple rel_stmt =
3311 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (prev_stmt));
3312 while (rel_stmt)
3313 {
3314 prev_stmt = rel_stmt;
3315 rel_stmt =
3316 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (rel_stmt));
3317 }
3318
3319 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (prev_stmt)) =
3320 new_stmt;
3321 }
3322 }
3323
3324 next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt));
3325 gap_count = 1;
3326 /* If NEXT_STMT accesses the same DR as the previous statement,
3327 put the same TMP_DATA_REF as its vectorized statement; otherwise
3328 get the next data-ref from RESULT_CHAIN. */
3329 if (!next_stmt || !DR_GROUP_SAME_DR_STMT (vinfo_for_stmt (next_stmt)))
3330 break;
3331 }
3332 }
3333
3334 VEC_free (tree, heap, result_chain);
3335 return true;
3336 }
3337
3338 /* Function vect_force_dr_alignment_p.
3339
3340 Returns whether the alignment of a DECL can be forced to be aligned
3341 on ALIGNMENT bit boundary. */
3342
3343 bool
3344 vect_can_force_dr_alignment_p (const_tree decl, unsigned int alignment)
3345 {
3346 if (TREE_CODE (decl) != VAR_DECL)
3347 return false;
3348
3349 if (DECL_EXTERNAL (decl))
3350 return false;
3351
3352 if (TREE_ASM_WRITTEN (decl))
3353 return false;
3354
3355 if (TREE_STATIC (decl))
3356 return (alignment <= MAX_OFILE_ALIGNMENT);
3357 else
3358 return (alignment <= MAX_STACK_ALIGNMENT);
3359 }
3360
3361 /* Function vect_supportable_dr_alignment
3362
3363 Return whether the data reference DR is supported with respect to its
3364 alignment. */
3365
3366 enum dr_alignment_support
3367 vect_supportable_dr_alignment (struct data_reference *dr)
3368 {
3369 gimple stmt = DR_STMT (dr);
3370 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3371 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3372 enum machine_mode mode = TYPE_MODE (vectype);
3373 bool invariant_in_outerloop = false;
3374 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3375 struct loop *vect_loop = NULL;
3376 bool nested_in_vect_loop = false;
3377
3378 if (aligned_access_p (dr))
3379 return dr_aligned;
3380
3381 if (!loop_vinfo)
3382 /* FORNOW: Misaligned accesses are supported only in loops. */
3383 return dr_unaligned_unsupported;
3384
3385 vect_loop = LOOP_VINFO_LOOP (loop_vinfo);
3386 nested_in_vect_loop = nested_in_vect_loop_p (vect_loop, stmt);
3387
3388 if (nested_in_vect_loop)
3389 {
3390 tree outerloop_step = STMT_VINFO_DR_STEP (stmt_info);
3391 invariant_in_outerloop =
3392 (tree_int_cst_compare (outerloop_step, size_zero_node) == 0);
3393 }
3394
3395 /* Possibly unaligned access. */
3396
3397 /* We can choose between using the implicit realignment scheme (generating
3398 a misaligned_move stmt) and the explicit realignment scheme (generating
3399 aligned loads with a REALIGN_LOAD). There are two variants to the explicit
3400 realignment scheme: optimized, and unoptimized.
3401 We can optimize the realignment only if the step between consecutive
3402 vector loads is equal to the vector size. Since the vector memory
3403 accesses advance in steps of VS (Vector Size) in the vectorized loop, it
3404 is guaranteed that the misalignment amount remains the same throughout the
3405 execution of the vectorized loop. Therefore, we can create the
3406 "realignment token" (the permutation mask that is passed to REALIGN_LOAD)
3407 at the loop preheader.
3408
3409 However, in the case of outer-loop vectorization, when vectorizing a
3410 memory access in the inner-loop nested within the LOOP that is now being
3411 vectorized, while it is guaranteed that the misalignment of the
3412 vectorized memory access will remain the same in different outer-loop
3413 iterations, it is *not* guaranteed that is will remain the same throughout
3414 the execution of the inner-loop. This is because the inner-loop advances
3415 with the original scalar step (and not in steps of VS). If the inner-loop
3416 step happens to be a multiple of VS, then the misalignment remains fixed
3417 and we can use the optimized realignment scheme. For example:
3418
3419 for (i=0; i<N; i++)
3420 for (j=0; j<M; j++)
3421 s += a[i+j];
3422
3423 When vectorizing the i-loop in the above example, the step between
3424 consecutive vector loads is 1, and so the misalignment does not remain
3425 fixed across the execution of the inner-loop, and the realignment cannot
3426 be optimized (as illustrated in the following pseudo vectorized loop):
3427
3428 for (i=0; i<N; i+=4)
3429 for (j=0; j<M; j++){
3430 vs += vp[i+j]; // misalignment of &vp[i+j] is {0,1,2,3,0,1,2,3,...}
3431 // when j is {0,1,2,3,4,5,6,7,...} respectively.
3432 // (assuming that we start from an aligned address).
3433 }
3434
3435 We therefore have to use the unoptimized realignment scheme:
3436
3437 for (i=0; i<N; i+=4)
3438 for (j=k; j<M; j+=4)
3439 vs += vp[i+j]; // misalignment of &vp[i+j] is always k (assuming
3440 // that the misalignment of the initial address is
3441 // 0).
3442
3443 The loop can then be vectorized as follows:
3444
3445 for (k=0; k<4; k++){
3446 rt = get_realignment_token (&vp[k]);
3447 for (i=0; i<N; i+=4){
3448 v1 = vp[i+k];
3449 for (j=k; j<M; j+=4){
3450 v2 = vp[i+j+VS-1];
3451 va = REALIGN_LOAD <v1,v2,rt>;
3452 vs += va;
3453 v1 = v2;
3454 }
3455 }
3456 } */
3457
3458 if (DR_IS_READ (dr))
3459 {
3460 if (optab_handler (vec_realign_load_optab, mode)->insn_code !=
3461 CODE_FOR_nothing
3462 && (!targetm.vectorize.builtin_mask_for_load
3463 || targetm.vectorize.builtin_mask_for_load ()))
3464 {
3465 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3466 if (nested_in_vect_loop
3467 && (TREE_INT_CST_LOW (DR_STEP (dr))
3468 != GET_MODE_SIZE (TYPE_MODE (vectype))))
3469 return dr_explicit_realign;
3470 else
3471 return dr_explicit_realign_optimized;
3472 }
3473
3474 if (optab_handler (movmisalign_optab, mode)->insn_code !=
3475 CODE_FOR_nothing)
3476 /* Can't software pipeline the loads, but can at least do them. */
3477 return dr_unaligned_supported;
3478 }
3479
3480 /* Unsupported. */
3481 return dr_unaligned_unsupported;
3482 }