re PR preprocessor/36674 (#include location is offset by one row in errors from prepr...
[gcc.git] / gcc / tree-vect-data-refs.c
1 /* Data References Analysis and Manipulation Utilities for Vectorization.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software
3 Foundation, Inc.
4 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 and Ira Rosen <irar@il.ibm.com>
6
7 This file is part of GCC.
8
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
13
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
22
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "tm.h"
27 #include "ggc.h"
28 #include "tree.h"
29 #include "target.h"
30 #include "basic-block.h"
31 #include "diagnostic.h"
32 #include "tree-flow.h"
33 #include "tree-dump.h"
34 #include "cfgloop.h"
35 #include "expr.h"
36 #include "optabs.h"
37 #include "tree-chrec.h"
38 #include "tree-scalar-evolution.h"
39 #include "tree-vectorizer.h"
40 #include "toplev.h"
41
42
43 /* Return the smallest scalar part of STMT.
44 This is used to determine the vectype of the stmt. We generally set the
45 vectype according to the type of the result (lhs). For stmts whose
46 result-type is different than the type of the arguments (e.g., demotion,
47 promotion), vectype will be reset appropriately (later). Note that we have
48 to visit the smallest datatype in this function, because that determines the
49 VF. If the smallest datatype in the loop is present only as the rhs of a
50 promotion operation - we'd miss it.
51 Such a case, where a variable of this datatype does not appear in the lhs
52 anywhere in the loop, can only occur if it's an invariant: e.g.:
53 'int_x = (int) short_inv', which we'd expect to have been optimized away by
54 invariant motion. However, we cannot rely on invariant motion to always take
55 invariants out of the loop, and so in the case of promotion we also have to
56 check the rhs.
57 LHS_SIZE_UNIT and RHS_SIZE_UNIT contain the sizes of the corresponding
58 types. */
59
60 tree
61 vect_get_smallest_scalar_type (gimple stmt, HOST_WIDE_INT *lhs_size_unit,
62 HOST_WIDE_INT *rhs_size_unit)
63 {
64 tree scalar_type = gimple_expr_type (stmt);
65 HOST_WIDE_INT lhs, rhs;
66
67 lhs = rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type));
68
69 if (is_gimple_assign (stmt)
70 && (gimple_assign_cast_p (stmt)
71 || gimple_assign_rhs_code (stmt) == WIDEN_MULT_EXPR
72 || gimple_assign_rhs_code (stmt) == FLOAT_EXPR))
73 {
74 tree rhs_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
75
76 rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (rhs_type));
77 if (rhs < lhs)
78 scalar_type = rhs_type;
79 }
80
81 *lhs_size_unit = lhs;
82 *rhs_size_unit = rhs;
83 return scalar_type;
84 }
85
86
87 /* Find the place of the data-ref in STMT in the interleaving chain that starts
88 from FIRST_STMT. Return -1 if the data-ref is not a part of the chain. */
89
90 int
91 vect_get_place_in_interleaving_chain (gimple stmt, gimple first_stmt)
92 {
93 gimple next_stmt = first_stmt;
94 int result = 0;
95
96 if (first_stmt != DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)))
97 return -1;
98
99 while (next_stmt && next_stmt != stmt)
100 {
101 result++;
102 next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt));
103 }
104
105 if (next_stmt)
106 return result;
107 else
108 return -1;
109 }
110
111
112 /* Function vect_insert_into_interleaving_chain.
113
114 Insert DRA into the interleaving chain of DRB according to DRA's INIT. */
115
116 static void
117 vect_insert_into_interleaving_chain (struct data_reference *dra,
118 struct data_reference *drb)
119 {
120 gimple prev, next;
121 tree next_init;
122 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
123 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
124
125 prev = DR_GROUP_FIRST_DR (stmtinfo_b);
126 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
127 while (next)
128 {
129 next_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
130 if (tree_int_cst_compare (next_init, DR_INIT (dra)) > 0)
131 {
132 /* Insert here. */
133 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = DR_STMT (dra);
134 DR_GROUP_NEXT_DR (stmtinfo_a) = next;
135 return;
136 }
137 prev = next;
138 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
139 }
140
141 /* We got to the end of the list. Insert here. */
142 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = DR_STMT (dra);
143 DR_GROUP_NEXT_DR (stmtinfo_a) = NULL;
144 }
145
146
147 /* Function vect_update_interleaving_chain.
148
149 For two data-refs DRA and DRB that are a part of a chain interleaved data
150 accesses, update the interleaving chain. DRB's INIT is smaller than DRA's.
151
152 There are four possible cases:
153 1. New stmts - both DRA and DRB are not a part of any chain:
154 FIRST_DR = DRB
155 NEXT_DR (DRB) = DRA
156 2. DRB is a part of a chain and DRA is not:
157 no need to update FIRST_DR
158 no need to insert DRB
159 insert DRA according to init
160 3. DRA is a part of a chain and DRB is not:
161 if (init of FIRST_DR > init of DRB)
162 FIRST_DR = DRB
163 NEXT(FIRST_DR) = previous FIRST_DR
164 else
165 insert DRB according to its init
166 4. both DRA and DRB are in some interleaving chains:
167 choose the chain with the smallest init of FIRST_DR
168 insert the nodes of the second chain into the first one. */
169
170 static void
171 vect_update_interleaving_chain (struct data_reference *drb,
172 struct data_reference *dra)
173 {
174 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
175 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
176 tree next_init, init_dra_chain, init_drb_chain;
177 gimple first_a, first_b;
178 tree node_init;
179 gimple node, prev, next, first_stmt;
180
181 /* 1. New stmts - both DRA and DRB are not a part of any chain. */
182 if (!DR_GROUP_FIRST_DR (stmtinfo_a) && !DR_GROUP_FIRST_DR (stmtinfo_b))
183 {
184 DR_GROUP_FIRST_DR (stmtinfo_a) = DR_STMT (drb);
185 DR_GROUP_FIRST_DR (stmtinfo_b) = DR_STMT (drb);
186 DR_GROUP_NEXT_DR (stmtinfo_b) = DR_STMT (dra);
187 return;
188 }
189
190 /* 2. DRB is a part of a chain and DRA is not. */
191 if (!DR_GROUP_FIRST_DR (stmtinfo_a) && DR_GROUP_FIRST_DR (stmtinfo_b))
192 {
193 DR_GROUP_FIRST_DR (stmtinfo_a) = DR_GROUP_FIRST_DR (stmtinfo_b);
194 /* Insert DRA into the chain of DRB. */
195 vect_insert_into_interleaving_chain (dra, drb);
196 return;
197 }
198
199 /* 3. DRA is a part of a chain and DRB is not. */
200 if (DR_GROUP_FIRST_DR (stmtinfo_a) && !DR_GROUP_FIRST_DR (stmtinfo_b))
201 {
202 gimple old_first_stmt = DR_GROUP_FIRST_DR (stmtinfo_a);
203 tree init_old = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (
204 old_first_stmt)));
205 gimple tmp;
206
207 if (tree_int_cst_compare (init_old, DR_INIT (drb)) > 0)
208 {
209 /* DRB's init is smaller than the init of the stmt previously marked
210 as the first stmt of the interleaving chain of DRA. Therefore, we
211 update FIRST_STMT and put DRB in the head of the list. */
212 DR_GROUP_FIRST_DR (stmtinfo_b) = DR_STMT (drb);
213 DR_GROUP_NEXT_DR (stmtinfo_b) = old_first_stmt;
214
215 /* Update all the stmts in the list to point to the new FIRST_STMT. */
216 tmp = old_first_stmt;
217 while (tmp)
218 {
219 DR_GROUP_FIRST_DR (vinfo_for_stmt (tmp)) = DR_STMT (drb);
220 tmp = DR_GROUP_NEXT_DR (vinfo_for_stmt (tmp));
221 }
222 }
223 else
224 {
225 /* Insert DRB in the list of DRA. */
226 vect_insert_into_interleaving_chain (drb, dra);
227 DR_GROUP_FIRST_DR (stmtinfo_b) = DR_GROUP_FIRST_DR (stmtinfo_a);
228 }
229 return;
230 }
231
232 /* 4. both DRA and DRB are in some interleaving chains. */
233 first_a = DR_GROUP_FIRST_DR (stmtinfo_a);
234 first_b = DR_GROUP_FIRST_DR (stmtinfo_b);
235 if (first_a == first_b)
236 return;
237 init_dra_chain = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_a)));
238 init_drb_chain = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_b)));
239
240 if (tree_int_cst_compare (init_dra_chain, init_drb_chain) > 0)
241 {
242 /* Insert the nodes of DRA chain into the DRB chain.
243 After inserting a node, continue from this node of the DRB chain (don't
244 start from the beginning. */
245 node = DR_GROUP_FIRST_DR (stmtinfo_a);
246 prev = DR_GROUP_FIRST_DR (stmtinfo_b);
247 first_stmt = first_b;
248 }
249 else
250 {
251 /* Insert the nodes of DRB chain into the DRA chain.
252 After inserting a node, continue from this node of the DRA chain (don't
253 start from the beginning. */
254 node = DR_GROUP_FIRST_DR (stmtinfo_b);
255 prev = DR_GROUP_FIRST_DR (stmtinfo_a);
256 first_stmt = first_a;
257 }
258
259 while (node)
260 {
261 node_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (node)));
262 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
263 while (next)
264 {
265 next_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
266 if (tree_int_cst_compare (next_init, node_init) > 0)
267 {
268 /* Insert here. */
269 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = node;
270 DR_GROUP_NEXT_DR (vinfo_for_stmt (node)) = next;
271 prev = node;
272 break;
273 }
274 prev = next;
275 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
276 }
277 if (!next)
278 {
279 /* We got to the end of the list. Insert here. */
280 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = node;
281 DR_GROUP_NEXT_DR (vinfo_for_stmt (node)) = NULL;
282 prev = node;
283 }
284 DR_GROUP_FIRST_DR (vinfo_for_stmt (node)) = first_stmt;
285 node = DR_GROUP_NEXT_DR (vinfo_for_stmt (node));
286 }
287 }
288
289
290 /* Function vect_equal_offsets.
291
292 Check if OFFSET1 and OFFSET2 are identical expressions. */
293
294 static bool
295 vect_equal_offsets (tree offset1, tree offset2)
296 {
297 bool res0, res1;
298
299 STRIP_NOPS (offset1);
300 STRIP_NOPS (offset2);
301
302 if (offset1 == offset2)
303 return true;
304
305 if (TREE_CODE (offset1) != TREE_CODE (offset2)
306 || !BINARY_CLASS_P (offset1)
307 || !BINARY_CLASS_P (offset2))
308 return false;
309
310 res0 = vect_equal_offsets (TREE_OPERAND (offset1, 0),
311 TREE_OPERAND (offset2, 0));
312 res1 = vect_equal_offsets (TREE_OPERAND (offset1, 1),
313 TREE_OPERAND (offset2, 1));
314
315 return (res0 && res1);
316 }
317
318
319 /* Function vect_check_interleaving.
320
321 Check if DRA and DRB are a part of interleaving. In case they are, insert
322 DRA and DRB in an interleaving chain. */
323
324 static bool
325 vect_check_interleaving (struct data_reference *dra,
326 struct data_reference *drb)
327 {
328 HOST_WIDE_INT type_size_a, type_size_b, diff_mod_size, step, init_a, init_b;
329
330 /* Check that the data-refs have same first location (except init) and they
331 are both either store or load (not load and store). */
332 if ((DR_BASE_ADDRESS (dra) != DR_BASE_ADDRESS (drb)
333 && (TREE_CODE (DR_BASE_ADDRESS (dra)) != ADDR_EXPR
334 || TREE_CODE (DR_BASE_ADDRESS (drb)) != ADDR_EXPR
335 || TREE_OPERAND (DR_BASE_ADDRESS (dra), 0)
336 != TREE_OPERAND (DR_BASE_ADDRESS (drb),0)))
337 || !vect_equal_offsets (DR_OFFSET (dra), DR_OFFSET (drb))
338 || !tree_int_cst_compare (DR_INIT (dra), DR_INIT (drb))
339 || DR_IS_READ (dra) != DR_IS_READ (drb))
340 return false;
341
342 /* Check:
343 1. data-refs are of the same type
344 2. their steps are equal
345 3. the step (if greater than zero) is greater than the difference between
346 data-refs' inits. */
347 type_size_a = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra))));
348 type_size_b = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb))));
349
350 if (type_size_a != type_size_b
351 || tree_int_cst_compare (DR_STEP (dra), DR_STEP (drb))
352 || !types_compatible_p (TREE_TYPE (DR_REF (dra)),
353 TREE_TYPE (DR_REF (drb))))
354 return false;
355
356 init_a = TREE_INT_CST_LOW (DR_INIT (dra));
357 init_b = TREE_INT_CST_LOW (DR_INIT (drb));
358 step = TREE_INT_CST_LOW (DR_STEP (dra));
359
360 if (init_a > init_b)
361 {
362 /* If init_a == init_b + the size of the type * k, we have an interleaving,
363 and DRB is accessed before DRA. */
364 diff_mod_size = (init_a - init_b) % type_size_a;
365
366 if ((init_a - init_b) > step)
367 return false;
368
369 if (diff_mod_size == 0)
370 {
371 vect_update_interleaving_chain (drb, dra);
372 if (vect_print_dump_info (REPORT_DR_DETAILS))
373 {
374 fprintf (vect_dump, "Detected interleaving ");
375 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
376 fprintf (vect_dump, " and ");
377 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
378 }
379 return true;
380 }
381 }
382 else
383 {
384 /* If init_b == init_a + the size of the type * k, we have an
385 interleaving, and DRA is accessed before DRB. */
386 diff_mod_size = (init_b - init_a) % type_size_a;
387
388 if ((init_b - init_a) > step)
389 return false;
390
391 if (diff_mod_size == 0)
392 {
393 vect_update_interleaving_chain (dra, drb);
394 if (vect_print_dump_info (REPORT_DR_DETAILS))
395 {
396 fprintf (vect_dump, "Detected interleaving ");
397 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
398 fprintf (vect_dump, " and ");
399 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
400 }
401 return true;
402 }
403 }
404
405 return false;
406 }
407
408 /* Check if data references pointed by DR_I and DR_J are same or
409 belong to same interleaving group. Return FALSE if drs are
410 different, otherwise return TRUE. */
411
412 static bool
413 vect_same_range_drs (data_reference_p dr_i, data_reference_p dr_j)
414 {
415 gimple stmt_i = DR_STMT (dr_i);
416 gimple stmt_j = DR_STMT (dr_j);
417
418 if (operand_equal_p (DR_REF (dr_i), DR_REF (dr_j), 0)
419 || (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_i))
420 && DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_j))
421 && (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_i))
422 == DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_j)))))
423 return true;
424 else
425 return false;
426 }
427
428 /* If address ranges represented by DDR_I and DDR_J are equal,
429 return TRUE, otherwise return FALSE. */
430
431 static bool
432 vect_vfa_range_equal (ddr_p ddr_i, ddr_p ddr_j)
433 {
434 if ((vect_same_range_drs (DDR_A (ddr_i), DDR_A (ddr_j))
435 && vect_same_range_drs (DDR_B (ddr_i), DDR_B (ddr_j)))
436 || (vect_same_range_drs (DDR_A (ddr_i), DDR_B (ddr_j))
437 && vect_same_range_drs (DDR_B (ddr_i), DDR_A (ddr_j))))
438 return true;
439 else
440 return false;
441 }
442
443 /* Insert DDR into LOOP_VINFO list of ddrs that may alias and need to be
444 tested at run-time. Return TRUE if DDR was successfully inserted.
445 Return false if versioning is not supported. */
446
447 static bool
448 vect_mark_for_runtime_alias_test (ddr_p ddr, loop_vec_info loop_vinfo)
449 {
450 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
451
452 if ((unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS) == 0)
453 return false;
454
455 if (vect_print_dump_info (REPORT_DR_DETAILS))
456 {
457 fprintf (vect_dump, "mark for run-time aliasing test between ");
458 print_generic_expr (vect_dump, DR_REF (DDR_A (ddr)), TDF_SLIM);
459 fprintf (vect_dump, " and ");
460 print_generic_expr (vect_dump, DR_REF (DDR_B (ddr)), TDF_SLIM);
461 }
462
463 if (optimize_loop_nest_for_size_p (loop))
464 {
465 if (vect_print_dump_info (REPORT_DR_DETAILS))
466 fprintf (vect_dump, "versioning not supported when optimizing for size.");
467 return false;
468 }
469
470 /* FORNOW: We don't support versioning with outer-loop vectorization. */
471 if (loop->inner)
472 {
473 if (vect_print_dump_info (REPORT_DR_DETAILS))
474 fprintf (vect_dump, "versioning not yet supported for outer-loops.");
475 return false;
476 }
477
478 VEC_safe_push (ddr_p, heap, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo), ddr);
479 return true;
480 }
481
482 /* Function vect_analyze_data_ref_dependence.
483
484 Return TRUE if there (might) exist a dependence between a memory-reference
485 DRA and a memory-reference DRB. When versioning for alias may check a
486 dependence at run-time, return FALSE. */
487
488 static bool
489 vect_analyze_data_ref_dependence (struct data_dependence_relation *ddr,
490 loop_vec_info loop_vinfo)
491 {
492 unsigned int i;
493 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
494 int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
495 struct data_reference *dra = DDR_A (ddr);
496 struct data_reference *drb = DDR_B (ddr);
497 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
498 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
499 int dra_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dra))));
500 int drb_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (drb))));
501 lambda_vector dist_v;
502 unsigned int loop_depth;
503
504 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
505 {
506 /* Independent data accesses. */
507 vect_check_interleaving (dra, drb);
508 return false;
509 }
510
511 if ((DR_IS_READ (dra) && DR_IS_READ (drb)) || dra == drb)
512 return false;
513
514 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
515 {
516 if (vect_print_dump_info (REPORT_DR_DETAILS))
517 {
518 fprintf (vect_dump,
519 "versioning for alias required: can't determine dependence between ");
520 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
521 fprintf (vect_dump, " and ");
522 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
523 }
524 /* Add to list of ddrs that need to be tested at run-time. */
525 return !vect_mark_for_runtime_alias_test (ddr, loop_vinfo);
526 }
527
528 if (DDR_NUM_DIST_VECTS (ddr) == 0)
529 {
530 if (vect_print_dump_info (REPORT_DR_DETAILS))
531 {
532 fprintf (vect_dump, "versioning for alias required: bad dist vector for ");
533 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
534 fprintf (vect_dump, " and ");
535 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
536 }
537 /* Add to list of ddrs that need to be tested at run-time. */
538 return !vect_mark_for_runtime_alias_test (ddr, loop_vinfo);
539 }
540
541 loop_depth = index_in_loop_nest (loop->num, DDR_LOOP_NEST (ddr));
542 for (i = 0; VEC_iterate (lambda_vector, DDR_DIST_VECTS (ddr), i, dist_v); i++)
543 {
544 int dist = dist_v[loop_depth];
545
546 if (vect_print_dump_info (REPORT_DR_DETAILS))
547 fprintf (vect_dump, "dependence distance = %d.", dist);
548
549 /* Same loop iteration. */
550 if (dist % vectorization_factor == 0 && dra_size == drb_size)
551 {
552 /* Two references with distance zero have the same alignment. */
553 VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_a), drb);
554 VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_b), dra);
555 if (vect_print_dump_info (REPORT_ALIGNMENT))
556 fprintf (vect_dump, "accesses have the same alignment.");
557 if (vect_print_dump_info (REPORT_DR_DETAILS))
558 {
559 fprintf (vect_dump, "dependence distance modulo vf == 0 between ");
560 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
561 fprintf (vect_dump, " and ");
562 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
563 }
564
565 /* For interleaving, mark that there is a read-write dependency if
566 necessary. We check before that one of the data-refs is store. */
567 if (DR_IS_READ (dra))
568 DR_GROUP_READ_WRITE_DEPENDENCE (stmtinfo_a) = true;
569 else
570 {
571 if (DR_IS_READ (drb))
572 DR_GROUP_READ_WRITE_DEPENDENCE (stmtinfo_b) = true;
573 }
574
575 continue;
576 }
577
578 if (abs (dist) >= vectorization_factor
579 || (dist > 0 && DDR_REVERSED_P (ddr)))
580 {
581 /* Dependence distance does not create dependence, as far as
582 vectorization is concerned, in this case. If DDR_REVERSED_P the
583 order of the data-refs in DDR was reversed (to make distance
584 vector positive), and the actual distance is negative. */
585 if (vect_print_dump_info (REPORT_DR_DETAILS))
586 fprintf (vect_dump, "dependence distance >= VF or negative.");
587 continue;
588 }
589
590 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
591 {
592 fprintf (vect_dump,
593 "not vectorized, possible dependence "
594 "between data-refs ");
595 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
596 fprintf (vect_dump, " and ");
597 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
598 }
599
600 return true;
601 }
602
603 return false;
604 }
605
606 /* Function vect_analyze_data_ref_dependences.
607
608 Examine all the data references in the loop, and make sure there do not
609 exist any data dependences between them. */
610
611 bool
612 vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo)
613 {
614 unsigned int i;
615 VEC (ddr_p, heap) * ddrs = LOOP_VINFO_DDRS (loop_vinfo);
616 struct data_dependence_relation *ddr;
617
618 if (vect_print_dump_info (REPORT_DETAILS))
619 fprintf (vect_dump, "=== vect_analyze_dependences ===");
620
621 for (i = 0; VEC_iterate (ddr_p, ddrs, i, ddr); i++)
622 if (vect_analyze_data_ref_dependence (ddr, loop_vinfo))
623 return false;
624
625 return true;
626 }
627
628
629 /* Function vect_compute_data_ref_alignment
630
631 Compute the misalignment of the data reference DR.
632
633 Output:
634 1. If during the misalignment computation it is found that the data reference
635 cannot be vectorized then false is returned.
636 2. DR_MISALIGNMENT (DR) is defined.
637
638 FOR NOW: No analysis is actually performed. Misalignment is calculated
639 only for trivial cases. TODO. */
640
641 static bool
642 vect_compute_data_ref_alignment (struct data_reference *dr)
643 {
644 gimple stmt = DR_STMT (dr);
645 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
646 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
647 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
648 tree ref = DR_REF (dr);
649 tree vectype;
650 tree base, base_addr;
651 bool base_aligned;
652 tree misalign;
653 tree aligned_to, alignment;
654
655 if (vect_print_dump_info (REPORT_DETAILS))
656 fprintf (vect_dump, "vect_compute_data_ref_alignment:");
657
658 /* Initialize misalignment to unknown. */
659 SET_DR_MISALIGNMENT (dr, -1);
660
661 misalign = DR_INIT (dr);
662 aligned_to = DR_ALIGNED_TO (dr);
663 base_addr = DR_BASE_ADDRESS (dr);
664 vectype = STMT_VINFO_VECTYPE (stmt_info);
665
666 /* In case the dataref is in an inner-loop of the loop that is being
667 vectorized (LOOP), we use the base and misalignment information
668 relative to the outer-loop (LOOP). This is ok only if the misalignment
669 stays the same throughout the execution of the inner-loop, which is why
670 we have to check that the stride of the dataref in the inner-loop evenly
671 divides by the vector size. */
672 if (nested_in_vect_loop_p (loop, stmt))
673 {
674 tree step = DR_STEP (dr);
675 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
676
677 if (dr_step % GET_MODE_SIZE (TYPE_MODE (vectype)) == 0)
678 {
679 if (vect_print_dump_info (REPORT_ALIGNMENT))
680 fprintf (vect_dump, "inner step divides the vector-size.");
681 misalign = STMT_VINFO_DR_INIT (stmt_info);
682 aligned_to = STMT_VINFO_DR_ALIGNED_TO (stmt_info);
683 base_addr = STMT_VINFO_DR_BASE_ADDRESS (stmt_info);
684 }
685 else
686 {
687 if (vect_print_dump_info (REPORT_ALIGNMENT))
688 fprintf (vect_dump, "inner step doesn't divide the vector-size.");
689 misalign = NULL_TREE;
690 }
691 }
692
693 base = build_fold_indirect_ref (base_addr);
694 alignment = ssize_int (TYPE_ALIGN (vectype)/BITS_PER_UNIT);
695
696 if ((aligned_to && tree_int_cst_compare (aligned_to, alignment) < 0)
697 || !misalign)
698 {
699 if (vect_print_dump_info (REPORT_ALIGNMENT))
700 {
701 fprintf (vect_dump, "Unknown alignment for access: ");
702 print_generic_expr (vect_dump, base, TDF_SLIM);
703 }
704 return true;
705 }
706
707 if ((DECL_P (base)
708 && tree_int_cst_compare (ssize_int (DECL_ALIGN_UNIT (base)),
709 alignment) >= 0)
710 || (TREE_CODE (base_addr) == SSA_NAME
711 && tree_int_cst_compare (ssize_int (TYPE_ALIGN_UNIT (TREE_TYPE (
712 TREE_TYPE (base_addr)))),
713 alignment) >= 0))
714 base_aligned = true;
715 else
716 base_aligned = false;
717
718 if (!base_aligned)
719 {
720 /* Do not change the alignment of global variables if
721 flag_section_anchors is enabled. */
722 if (!vect_can_force_dr_alignment_p (base, TYPE_ALIGN (vectype))
723 || (TREE_STATIC (base) && flag_section_anchors))
724 {
725 if (vect_print_dump_info (REPORT_DETAILS))
726 {
727 fprintf (vect_dump, "can't force alignment of ref: ");
728 print_generic_expr (vect_dump, ref, TDF_SLIM);
729 }
730 return true;
731 }
732
733 /* Force the alignment of the decl.
734 NOTE: This is the only change to the code we make during
735 the analysis phase, before deciding to vectorize the loop. */
736 if (vect_print_dump_info (REPORT_DETAILS))
737 fprintf (vect_dump, "force alignment");
738 DECL_ALIGN (base) = TYPE_ALIGN (vectype);
739 DECL_USER_ALIGN (base) = 1;
740 }
741
742 /* At this point we assume that the base is aligned. */
743 gcc_assert (base_aligned
744 || (TREE_CODE (base) == VAR_DECL
745 && DECL_ALIGN (base) >= TYPE_ALIGN (vectype)));
746
747 /* Modulo alignment. */
748 misalign = size_binop (FLOOR_MOD_EXPR, misalign, alignment);
749
750 if (!host_integerp (misalign, 1))
751 {
752 /* Negative or overflowed misalignment value. */
753 if (vect_print_dump_info (REPORT_DETAILS))
754 fprintf (vect_dump, "unexpected misalign value");
755 return false;
756 }
757
758 SET_DR_MISALIGNMENT (dr, TREE_INT_CST_LOW (misalign));
759
760 if (vect_print_dump_info (REPORT_DETAILS))
761 {
762 fprintf (vect_dump, "misalign = %d bytes of ref ", DR_MISALIGNMENT (dr));
763 print_generic_expr (vect_dump, ref, TDF_SLIM);
764 }
765
766 return true;
767 }
768
769
770 /* Function vect_compute_data_refs_alignment
771
772 Compute the misalignment of data references in the loop.
773 Return FALSE if a data reference is found that cannot be vectorized. */
774
775 static bool
776 vect_compute_data_refs_alignment (loop_vec_info loop_vinfo)
777 {
778 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
779 struct data_reference *dr;
780 unsigned int i;
781
782 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
783 if (!vect_compute_data_ref_alignment (dr))
784 return false;
785
786 return true;
787 }
788
789
790 /* Function vect_update_misalignment_for_peel
791
792 DR - the data reference whose misalignment is to be adjusted.
793 DR_PEEL - the data reference whose misalignment is being made
794 zero in the vector loop by the peel.
795 NPEEL - the number of iterations in the peel loop if the misalignment
796 of DR_PEEL is known at compile time. */
797
798 static void
799 vect_update_misalignment_for_peel (struct data_reference *dr,
800 struct data_reference *dr_peel, int npeel)
801 {
802 unsigned int i;
803 VEC(dr_p,heap) *same_align_drs;
804 struct data_reference *current_dr;
805 int dr_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr))));
806 int dr_peel_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr_peel))));
807 stmt_vec_info stmt_info = vinfo_for_stmt (DR_STMT (dr));
808 stmt_vec_info peel_stmt_info = vinfo_for_stmt (DR_STMT (dr_peel));
809
810 /* For interleaved data accesses the step in the loop must be multiplied by
811 the size of the interleaving group. */
812 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
813 dr_size *= DR_GROUP_SIZE (vinfo_for_stmt (DR_GROUP_FIRST_DR (stmt_info)));
814 if (STMT_VINFO_STRIDED_ACCESS (peel_stmt_info))
815 dr_peel_size *= DR_GROUP_SIZE (peel_stmt_info);
816
817 /* It can be assumed that the data refs with the same alignment as dr_peel
818 are aligned in the vector loop. */
819 same_align_drs
820 = STMT_VINFO_SAME_ALIGN_REFS (vinfo_for_stmt (DR_STMT (dr_peel)));
821 for (i = 0; VEC_iterate (dr_p, same_align_drs, i, current_dr); i++)
822 {
823 if (current_dr != dr)
824 continue;
825 gcc_assert (DR_MISALIGNMENT (dr) / dr_size ==
826 DR_MISALIGNMENT (dr_peel) / dr_peel_size);
827 SET_DR_MISALIGNMENT (dr, 0);
828 return;
829 }
830
831 if (known_alignment_for_access_p (dr)
832 && known_alignment_for_access_p (dr_peel))
833 {
834 int misal = DR_MISALIGNMENT (dr);
835 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
836 misal += npeel * dr_size;
837 misal %= GET_MODE_SIZE (TYPE_MODE (vectype));
838 SET_DR_MISALIGNMENT (dr, misal);
839 return;
840 }
841
842 if (vect_print_dump_info (REPORT_DETAILS))
843 fprintf (vect_dump, "Setting misalignment to -1.");
844 SET_DR_MISALIGNMENT (dr, -1);
845 }
846
847
848 /* Function vect_verify_datarefs_alignment
849
850 Return TRUE if all data references in the loop can be
851 handled with respect to alignment. */
852
853 static bool
854 vect_verify_datarefs_alignment (loop_vec_info loop_vinfo)
855 {
856 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
857 struct data_reference *dr;
858 enum dr_alignment_support supportable_dr_alignment;
859 unsigned int i;
860
861 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
862 {
863 gimple stmt = DR_STMT (dr);
864 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
865
866 /* For interleaving, only the alignment of the first access matters. */
867 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
868 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
869 continue;
870
871 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
872 if (!supportable_dr_alignment)
873 {
874 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
875 {
876 if (DR_IS_READ (dr))
877 fprintf (vect_dump,
878 "not vectorized: unsupported unaligned load.");
879 else
880 fprintf (vect_dump,
881 "not vectorized: unsupported unaligned store.");
882 }
883 return false;
884 }
885 if (supportable_dr_alignment != dr_aligned
886 && vect_print_dump_info (REPORT_ALIGNMENT))
887 fprintf (vect_dump, "Vectorizing an unaligned access.");
888 }
889 return true;
890 }
891
892
893 /* Function vector_alignment_reachable_p
894
895 Return true if vector alignment for DR is reachable by peeling
896 a few loop iterations. Return false otherwise. */
897
898 static bool
899 vector_alignment_reachable_p (struct data_reference *dr)
900 {
901 gimple stmt = DR_STMT (dr);
902 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
903 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
904
905 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
906 {
907 /* For interleaved access we peel only if number of iterations in
908 the prolog loop ({VF - misalignment}), is a multiple of the
909 number of the interleaved accesses. */
910 int elem_size, mis_in_elements;
911 int nelements = TYPE_VECTOR_SUBPARTS (vectype);
912
913 /* FORNOW: handle only known alignment. */
914 if (!known_alignment_for_access_p (dr))
915 return false;
916
917 elem_size = GET_MODE_SIZE (TYPE_MODE (vectype)) / nelements;
918 mis_in_elements = DR_MISALIGNMENT (dr) / elem_size;
919
920 if ((nelements - mis_in_elements) % DR_GROUP_SIZE (stmt_info))
921 return false;
922 }
923
924 /* If misalignment is known at the compile time then allow peeling
925 only if natural alignment is reachable through peeling. */
926 if (known_alignment_for_access_p (dr) && !aligned_access_p (dr))
927 {
928 HOST_WIDE_INT elmsize =
929 int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
930 if (vect_print_dump_info (REPORT_DETAILS))
931 {
932 fprintf (vect_dump, "data size =" HOST_WIDE_INT_PRINT_DEC, elmsize);
933 fprintf (vect_dump, ". misalignment = %d. ", DR_MISALIGNMENT (dr));
934 }
935 if (DR_MISALIGNMENT (dr) % elmsize)
936 {
937 if (vect_print_dump_info (REPORT_DETAILS))
938 fprintf (vect_dump, "data size does not divide the misalignment.\n");
939 return false;
940 }
941 }
942
943 if (!known_alignment_for_access_p (dr))
944 {
945 tree type = (TREE_TYPE (DR_REF (dr)));
946 tree ba = DR_BASE_OBJECT (dr);
947 bool is_packed = false;
948
949 if (ba)
950 is_packed = contains_packed_reference (ba);
951
952 if (vect_print_dump_info (REPORT_DETAILS))
953 fprintf (vect_dump, "Unknown misalignment, is_packed = %d",is_packed);
954 if (targetm.vectorize.vector_alignment_reachable (type, is_packed))
955 return true;
956 else
957 return false;
958 }
959
960 return true;
961 }
962
963 /* Function vect_enhance_data_refs_alignment
964
965 This pass will use loop versioning and loop peeling in order to enhance
966 the alignment of data references in the loop.
967
968 FOR NOW: we assume that whatever versioning/peeling takes place, only the
969 original loop is to be vectorized; Any other loops that are created by
970 the transformations performed in this pass - are not supposed to be
971 vectorized. This restriction will be relaxed.
972
973 This pass will require a cost model to guide it whether to apply peeling
974 or versioning or a combination of the two. For example, the scheme that
975 intel uses when given a loop with several memory accesses, is as follows:
976 choose one memory access ('p') which alignment you want to force by doing
977 peeling. Then, either (1) generate a loop in which 'p' is aligned and all
978 other accesses are not necessarily aligned, or (2) use loop versioning to
979 generate one loop in which all accesses are aligned, and another loop in
980 which only 'p' is necessarily aligned.
981
982 ("Automatic Intra-Register Vectorization for the Intel Architecture",
983 Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
984 Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
985
986 Devising a cost model is the most critical aspect of this work. It will
987 guide us on which access to peel for, whether to use loop versioning, how
988 many versions to create, etc. The cost model will probably consist of
989 generic considerations as well as target specific considerations (on
990 powerpc for example, misaligned stores are more painful than misaligned
991 loads).
992
993 Here are the general steps involved in alignment enhancements:
994
995 -- original loop, before alignment analysis:
996 for (i=0; i<N; i++){
997 x = q[i]; # DR_MISALIGNMENT(q) = unknown
998 p[i] = y; # DR_MISALIGNMENT(p) = unknown
999 }
1000
1001 -- After vect_compute_data_refs_alignment:
1002 for (i=0; i<N; i++){
1003 x = q[i]; # DR_MISALIGNMENT(q) = 3
1004 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1005 }
1006
1007 -- Possibility 1: we do loop versioning:
1008 if (p is aligned) {
1009 for (i=0; i<N; i++){ # loop 1A
1010 x = q[i]; # DR_MISALIGNMENT(q) = 3
1011 p[i] = y; # DR_MISALIGNMENT(p) = 0
1012 }
1013 }
1014 else {
1015 for (i=0; i<N; i++){ # loop 1B
1016 x = q[i]; # DR_MISALIGNMENT(q) = 3
1017 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1018 }
1019 }
1020
1021 -- Possibility 2: we do loop peeling:
1022 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1023 x = q[i];
1024 p[i] = y;
1025 }
1026 for (i = 3; i < N; i++){ # loop 2A
1027 x = q[i]; # DR_MISALIGNMENT(q) = 0
1028 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1029 }
1030
1031 -- Possibility 3: combination of loop peeling and versioning:
1032 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1033 x = q[i];
1034 p[i] = y;
1035 }
1036 if (p is aligned) {
1037 for (i = 3; i<N; i++){ # loop 3A
1038 x = q[i]; # DR_MISALIGNMENT(q) = 0
1039 p[i] = y; # DR_MISALIGNMENT(p) = 0
1040 }
1041 }
1042 else {
1043 for (i = 3; i<N; i++){ # loop 3B
1044 x = q[i]; # DR_MISALIGNMENT(q) = 0
1045 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1046 }
1047 }
1048
1049 These loops are later passed to loop_transform to be vectorized. The
1050 vectorizer will use the alignment information to guide the transformation
1051 (whether to generate regular loads/stores, or with special handling for
1052 misalignment). */
1053
1054 bool
1055 vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
1056 {
1057 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1058 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1059 enum dr_alignment_support supportable_dr_alignment;
1060 struct data_reference *dr0 = NULL;
1061 struct data_reference *dr;
1062 unsigned int i;
1063 bool do_peeling = false;
1064 bool do_versioning = false;
1065 bool stat;
1066 gimple stmt;
1067 stmt_vec_info stmt_info;
1068 int vect_versioning_for_alias_required;
1069
1070 if (vect_print_dump_info (REPORT_DETAILS))
1071 fprintf (vect_dump, "=== vect_enhance_data_refs_alignment ===");
1072
1073 /* While cost model enhancements are expected in the future, the high level
1074 view of the code at this time is as follows:
1075
1076 A) If there is a misaligned write then see if peeling to align this write
1077 can make all data references satisfy vect_supportable_dr_alignment.
1078 If so, update data structures as needed and return true. Note that
1079 at this time vect_supportable_dr_alignment is known to return false
1080 for a misaligned write.
1081
1082 B) If peeling wasn't possible and there is a data reference with an
1083 unknown misalignment that does not satisfy vect_supportable_dr_alignment
1084 then see if loop versioning checks can be used to make all data
1085 references satisfy vect_supportable_dr_alignment. If so, update
1086 data structures as needed and return true.
1087
1088 C) If neither peeling nor versioning were successful then return false if
1089 any data reference does not satisfy vect_supportable_dr_alignment.
1090
1091 D) Return true (all data references satisfy vect_supportable_dr_alignment).
1092
1093 Note, Possibility 3 above (which is peeling and versioning together) is not
1094 being done at this time. */
1095
1096 /* (1) Peeling to force alignment. */
1097
1098 /* (1.1) Decide whether to perform peeling, and how many iterations to peel:
1099 Considerations:
1100 + How many accesses will become aligned due to the peeling
1101 - How many accesses will become unaligned due to the peeling,
1102 and the cost of misaligned accesses.
1103 - The cost of peeling (the extra runtime checks, the increase
1104 in code size).
1105
1106 The scheme we use FORNOW: peel to force the alignment of the first
1107 misaligned store in the loop.
1108 Rationale: misaligned stores are not yet supported.
1109
1110 TODO: Use a cost model. */
1111
1112 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1113 {
1114 stmt = DR_STMT (dr);
1115 stmt_info = vinfo_for_stmt (stmt);
1116
1117 /* For interleaving, only the alignment of the first access
1118 matters. */
1119 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1120 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1121 continue;
1122
1123 if (!DR_IS_READ (dr) && !aligned_access_p (dr))
1124 {
1125 do_peeling = vector_alignment_reachable_p (dr);
1126 if (do_peeling)
1127 dr0 = dr;
1128 if (!do_peeling && vect_print_dump_info (REPORT_DETAILS))
1129 fprintf (vect_dump, "vector alignment may not be reachable");
1130 break;
1131 }
1132 }
1133
1134 vect_versioning_for_alias_required =
1135 (VEC_length (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo)) > 0);
1136
1137 /* Temporarily, if versioning for alias is required, we disable peeling
1138 until we support peeling and versioning. Often peeling for alignment
1139 will require peeling for loop-bound, which in turn requires that we
1140 know how to adjust the loop ivs after the loop. */
1141 if (vect_versioning_for_alias_required
1142 || !vect_can_advance_ivs_p (loop_vinfo)
1143 || !slpeel_can_duplicate_loop_p (loop, single_exit (loop)))
1144 do_peeling = false;
1145
1146 if (do_peeling)
1147 {
1148 int mis;
1149 int npeel = 0;
1150 gimple stmt = DR_STMT (dr0);
1151 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1152 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1153 int nelements = TYPE_VECTOR_SUBPARTS (vectype);
1154
1155 if (known_alignment_for_access_p (dr0))
1156 {
1157 /* Since it's known at compile time, compute the number of iterations
1158 in the peeled loop (the peeling factor) for use in updating
1159 DR_MISALIGNMENT values. The peeling factor is the vectorization
1160 factor minus the misalignment as an element count. */
1161 mis = DR_MISALIGNMENT (dr0);
1162 mis /= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr0))));
1163 npeel = nelements - mis;
1164
1165 /* For interleaved data access every iteration accesses all the
1166 members of the group, therefore we divide the number of iterations
1167 by the group size. */
1168 stmt_info = vinfo_for_stmt (DR_STMT (dr0));
1169 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
1170 npeel /= DR_GROUP_SIZE (stmt_info);
1171
1172 if (vect_print_dump_info (REPORT_DETAILS))
1173 fprintf (vect_dump, "Try peeling by %d", npeel);
1174 }
1175
1176 /* Ensure that all data refs can be vectorized after the peel. */
1177 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1178 {
1179 int save_misalignment;
1180
1181 if (dr == dr0)
1182 continue;
1183
1184 stmt = DR_STMT (dr);
1185 stmt_info = vinfo_for_stmt (stmt);
1186 /* For interleaving, only the alignment of the first access
1187 matters. */
1188 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1189 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1190 continue;
1191
1192 save_misalignment = DR_MISALIGNMENT (dr);
1193 vect_update_misalignment_for_peel (dr, dr0, npeel);
1194 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
1195 SET_DR_MISALIGNMENT (dr, save_misalignment);
1196
1197 if (!supportable_dr_alignment)
1198 {
1199 do_peeling = false;
1200 break;
1201 }
1202 }
1203
1204 if (do_peeling)
1205 {
1206 /* (1.2) Update the DR_MISALIGNMENT of each data reference DR_i.
1207 If the misalignment of DR_i is identical to that of dr0 then set
1208 DR_MISALIGNMENT (DR_i) to zero. If the misalignment of DR_i and
1209 dr0 are known at compile time then increment DR_MISALIGNMENT (DR_i)
1210 by the peeling factor times the element size of DR_i (MOD the
1211 vectorization factor times the size). Otherwise, the
1212 misalignment of DR_i must be set to unknown. */
1213 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1214 if (dr != dr0)
1215 vect_update_misalignment_for_peel (dr, dr0, npeel);
1216
1217 LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr0;
1218 LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) = DR_MISALIGNMENT (dr0);
1219 SET_DR_MISALIGNMENT (dr0, 0);
1220 if (vect_print_dump_info (REPORT_ALIGNMENT))
1221 fprintf (vect_dump, "Alignment of access forced using peeling.");
1222
1223 if (vect_print_dump_info (REPORT_DETAILS))
1224 fprintf (vect_dump, "Peeling for alignment will be applied.");
1225
1226 stat = vect_verify_datarefs_alignment (loop_vinfo);
1227 gcc_assert (stat);
1228 return stat;
1229 }
1230 }
1231
1232
1233 /* (2) Versioning to force alignment. */
1234
1235 /* Try versioning if:
1236 1) flag_tree_vect_loop_version is TRUE
1237 2) optimize loop for speed
1238 3) there is at least one unsupported misaligned data ref with an unknown
1239 misalignment, and
1240 4) all misaligned data refs with a known misalignment are supported, and
1241 5) the number of runtime alignment checks is within reason. */
1242
1243 do_versioning =
1244 flag_tree_vect_loop_version
1245 && optimize_loop_nest_for_speed_p (loop)
1246 && (!loop->inner); /* FORNOW */
1247
1248 if (do_versioning)
1249 {
1250 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1251 {
1252 stmt = DR_STMT (dr);
1253 stmt_info = vinfo_for_stmt (stmt);
1254
1255 /* For interleaving, only the alignment of the first access
1256 matters. */
1257 if (aligned_access_p (dr)
1258 || (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1259 && DR_GROUP_FIRST_DR (stmt_info) != stmt))
1260 continue;
1261
1262 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
1263
1264 if (!supportable_dr_alignment)
1265 {
1266 gimple stmt;
1267 int mask;
1268 tree vectype;
1269
1270 if (known_alignment_for_access_p (dr)
1271 || VEC_length (gimple,
1272 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo))
1273 >= (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIGNMENT_CHECKS))
1274 {
1275 do_versioning = false;
1276 break;
1277 }
1278
1279 stmt = DR_STMT (dr);
1280 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1281 gcc_assert (vectype);
1282
1283 /* The rightmost bits of an aligned address must be zeros.
1284 Construct the mask needed for this test. For example,
1285 GET_MODE_SIZE for the vector mode V4SI is 16 bytes so the
1286 mask must be 15 = 0xf. */
1287 mask = GET_MODE_SIZE (TYPE_MODE (vectype)) - 1;
1288
1289 /* FORNOW: use the same mask to test all potentially unaligned
1290 references in the loop. The vectorizer currently supports
1291 a single vector size, see the reference to
1292 GET_MODE_NUNITS (TYPE_MODE (vectype)) where the
1293 vectorization factor is computed. */
1294 gcc_assert (!LOOP_VINFO_PTR_MASK (loop_vinfo)
1295 || LOOP_VINFO_PTR_MASK (loop_vinfo) == mask);
1296 LOOP_VINFO_PTR_MASK (loop_vinfo) = mask;
1297 VEC_safe_push (gimple, heap,
1298 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo),
1299 DR_STMT (dr));
1300 }
1301 }
1302
1303 /* Versioning requires at least one misaligned data reference. */
1304 if (VEC_length (gimple, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo)) == 0)
1305 do_versioning = false;
1306 else if (!do_versioning)
1307 VEC_truncate (gimple, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo), 0);
1308 }
1309
1310 if (do_versioning)
1311 {
1312 VEC(gimple,heap) *may_misalign_stmts
1313 = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
1314 gimple stmt;
1315
1316 /* It can now be assumed that the data references in the statements
1317 in LOOP_VINFO_MAY_MISALIGN_STMTS will be aligned in the version
1318 of the loop being vectorized. */
1319 for (i = 0; VEC_iterate (gimple, may_misalign_stmts, i, stmt); i++)
1320 {
1321 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1322 dr = STMT_VINFO_DATA_REF (stmt_info);
1323 SET_DR_MISALIGNMENT (dr, 0);
1324 if (vect_print_dump_info (REPORT_ALIGNMENT))
1325 fprintf (vect_dump, "Alignment of access forced using versioning.");
1326 }
1327
1328 if (vect_print_dump_info (REPORT_DETAILS))
1329 fprintf (vect_dump, "Versioning for alignment will be applied.");
1330
1331 /* Peeling and versioning can't be done together at this time. */
1332 gcc_assert (! (do_peeling && do_versioning));
1333
1334 stat = vect_verify_datarefs_alignment (loop_vinfo);
1335 gcc_assert (stat);
1336 return stat;
1337 }
1338
1339 /* This point is reached if neither peeling nor versioning is being done. */
1340 gcc_assert (! (do_peeling || do_versioning));
1341
1342 stat = vect_verify_datarefs_alignment (loop_vinfo);
1343 return stat;
1344 }
1345
1346
1347 /* Function vect_analyze_data_refs_alignment
1348
1349 Analyze the alignment of the data-references in the loop.
1350 Return FALSE if a data reference is found that cannot be vectorized. */
1351
1352 bool
1353 vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo)
1354 {
1355 if (vect_print_dump_info (REPORT_DETAILS))
1356 fprintf (vect_dump, "=== vect_analyze_data_refs_alignment ===");
1357
1358 if (!vect_compute_data_refs_alignment (loop_vinfo))
1359 {
1360 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1361 fprintf (vect_dump,
1362 "not vectorized: can't calculate alignment for data ref.");
1363 return false;
1364 }
1365
1366 return true;
1367 }
1368
1369
1370 /* Analyze groups of strided accesses: check that DR belongs to a group of
1371 strided accesses of legal size, step, etc. Detect gaps, single element
1372 interleaving, and other special cases. Set strided access info.
1373 Collect groups of strided stores for further use in SLP analysis. */
1374
1375 static bool
1376 vect_analyze_group_access (struct data_reference *dr)
1377 {
1378 tree step = DR_STEP (dr);
1379 tree scalar_type = TREE_TYPE (DR_REF (dr));
1380 HOST_WIDE_INT type_size = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type));
1381 gimple stmt = DR_STMT (dr);
1382 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1383 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1384 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
1385 HOST_WIDE_INT stride;
1386 bool slp_impossible = false;
1387
1388 /* For interleaving, STRIDE is STEP counted in elements, i.e., the size of the
1389 interleaving group (including gaps). */
1390 stride = dr_step / type_size;
1391
1392 /* Not consecutive access is possible only if it is a part of interleaving. */
1393 if (!DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)))
1394 {
1395 /* Check if it this DR is a part of interleaving, and is a single
1396 element of the group that is accessed in the loop. */
1397
1398 /* Gaps are supported only for loads. STEP must be a multiple of the type
1399 size. The size of the group must be a power of 2. */
1400 if (DR_IS_READ (dr)
1401 && (dr_step % type_size) == 0
1402 && stride > 0
1403 && exact_log2 (stride) != -1)
1404 {
1405 DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = stmt;
1406 DR_GROUP_SIZE (vinfo_for_stmt (stmt)) = stride;
1407 if (vect_print_dump_info (REPORT_DR_DETAILS))
1408 {
1409 fprintf (vect_dump, "Detected single element interleaving %d ",
1410 DR_GROUP_SIZE (vinfo_for_stmt (stmt)));
1411 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
1412 fprintf (vect_dump, " step ");
1413 print_generic_expr (vect_dump, step, TDF_SLIM);
1414 }
1415 return true;
1416 }
1417 if (vect_print_dump_info (REPORT_DETAILS))
1418 fprintf (vect_dump, "not consecutive access");
1419 return false;
1420 }
1421
1422 if (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) == stmt)
1423 {
1424 /* First stmt in the interleaving chain. Check the chain. */
1425 gimple next = DR_GROUP_NEXT_DR (vinfo_for_stmt (stmt));
1426 struct data_reference *data_ref = dr;
1427 unsigned int count = 1;
1428 tree next_step;
1429 tree prev_init = DR_INIT (data_ref);
1430 gimple prev = stmt;
1431 HOST_WIDE_INT diff, count_in_bytes, gaps = 0;
1432
1433 while (next)
1434 {
1435 /* Skip same data-refs. In case that two or more stmts share data-ref
1436 (supported only for loads), we vectorize only the first stmt, and
1437 the rest get their vectorized loads from the first one. */
1438 if (!tree_int_cst_compare (DR_INIT (data_ref),
1439 DR_INIT (STMT_VINFO_DATA_REF (
1440 vinfo_for_stmt (next)))))
1441 {
1442 if (!DR_IS_READ (data_ref))
1443 {
1444 if (vect_print_dump_info (REPORT_DETAILS))
1445 fprintf (vect_dump, "Two store stmts share the same dr.");
1446 return false;
1447 }
1448
1449 /* Check that there is no load-store dependencies for this loads
1450 to prevent a case of load-store-load to the same location. */
1451 if (DR_GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (next))
1452 || DR_GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (prev)))
1453 {
1454 if (vect_print_dump_info (REPORT_DETAILS))
1455 fprintf (vect_dump,
1456 "READ_WRITE dependence in interleaving.");
1457 return false;
1458 }
1459
1460 /* For load use the same data-ref load. */
1461 DR_GROUP_SAME_DR_STMT (vinfo_for_stmt (next)) = prev;
1462
1463 prev = next;
1464 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
1465 continue;
1466 }
1467 prev = next;
1468
1469 /* Check that all the accesses have the same STEP. */
1470 next_step = DR_STEP (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
1471 if (tree_int_cst_compare (step, next_step))
1472 {
1473 if (vect_print_dump_info (REPORT_DETAILS))
1474 fprintf (vect_dump, "not consecutive access in interleaving");
1475 return false;
1476 }
1477
1478 data_ref = STMT_VINFO_DATA_REF (vinfo_for_stmt (next));
1479 /* Check that the distance between two accesses is equal to the type
1480 size. Otherwise, we have gaps. */
1481 diff = (TREE_INT_CST_LOW (DR_INIT (data_ref))
1482 - TREE_INT_CST_LOW (prev_init)) / type_size;
1483 if (diff != 1)
1484 {
1485 /* FORNOW: SLP of accesses with gaps is not supported. */
1486 slp_impossible = true;
1487 if (!DR_IS_READ (data_ref))
1488 {
1489 if (vect_print_dump_info (REPORT_DETAILS))
1490 fprintf (vect_dump, "interleaved store with gaps");
1491 return false;
1492 }
1493
1494 gaps += diff - 1;
1495 }
1496
1497 /* Store the gap from the previous member of the group. If there is no
1498 gap in the access, DR_GROUP_GAP is always 1. */
1499 DR_GROUP_GAP (vinfo_for_stmt (next)) = diff;
1500
1501 prev_init = DR_INIT (data_ref);
1502 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
1503 /* Count the number of data-refs in the chain. */
1504 count++;
1505 }
1506
1507 /* COUNT is the number of accesses found, we multiply it by the size of
1508 the type to get COUNT_IN_BYTES. */
1509 count_in_bytes = type_size * count;
1510
1511 /* Check that the size of the interleaving (including gaps) is not greater
1512 than STEP. */
1513 if (dr_step && dr_step < count_in_bytes + gaps * type_size)
1514 {
1515 if (vect_print_dump_info (REPORT_DETAILS))
1516 {
1517 fprintf (vect_dump, "interleaving size is greater than step for ");
1518 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
1519 }
1520 return false;
1521 }
1522
1523 /* Check that the size of the interleaving is equal to STEP for stores,
1524 i.e., that there are no gaps. */
1525 if (dr_step != count_in_bytes)
1526 {
1527 if (DR_IS_READ (dr))
1528 {
1529 slp_impossible = true;
1530 /* There is a gap after the last load in the group. This gap is a
1531 difference between the stride and the number of elements. When
1532 there is no gap, this difference should be 0. */
1533 DR_GROUP_GAP (vinfo_for_stmt (stmt)) = stride - count;
1534 }
1535 else
1536 {
1537 if (vect_print_dump_info (REPORT_DETAILS))
1538 fprintf (vect_dump, "interleaved store with gaps");
1539 return false;
1540 }
1541 }
1542
1543 /* Check that STEP is a multiple of type size. */
1544 if ((dr_step % type_size) != 0)
1545 {
1546 if (vect_print_dump_info (REPORT_DETAILS))
1547 {
1548 fprintf (vect_dump, "step is not a multiple of type size: step ");
1549 print_generic_expr (vect_dump, step, TDF_SLIM);
1550 fprintf (vect_dump, " size ");
1551 print_generic_expr (vect_dump, TYPE_SIZE_UNIT (scalar_type),
1552 TDF_SLIM);
1553 }
1554 return false;
1555 }
1556
1557 /* FORNOW: we handle only interleaving that is a power of 2.
1558 We don't fail here if it may be still possible to vectorize the
1559 group using SLP. If not, the size of the group will be checked in
1560 vect_analyze_operations, and the vectorization will fail. */
1561 if (exact_log2 (stride) == -1)
1562 {
1563 if (vect_print_dump_info (REPORT_DETAILS))
1564 fprintf (vect_dump, "interleaving is not a power of 2");
1565
1566 if (slp_impossible)
1567 return false;
1568 }
1569 DR_GROUP_SIZE (vinfo_for_stmt (stmt)) = stride;
1570 if (vect_print_dump_info (REPORT_DETAILS))
1571 fprintf (vect_dump, "Detected interleaving of size %d", (int)stride);
1572
1573 /* SLP: create an SLP data structure for every interleaving group of
1574 stores for further analysis in vect_analyse_slp. */
1575 if (!DR_IS_READ (dr) && !slp_impossible)
1576 VEC_safe_push (gimple, heap, LOOP_VINFO_STRIDED_STORES (loop_vinfo), stmt);
1577 }
1578
1579 return true;
1580 }
1581
1582
1583 /* Analyze the access pattern of the data-reference DR.
1584 In case of non-consecutive accesses call vect_analyze_group_access() to
1585 analyze groups of strided accesses. */
1586
1587 static bool
1588 vect_analyze_data_ref_access (struct data_reference *dr)
1589 {
1590 tree step = DR_STEP (dr);
1591 tree scalar_type = TREE_TYPE (DR_REF (dr));
1592 gimple stmt = DR_STMT (dr);
1593 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1594 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1595 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1596 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
1597
1598 if (!step)
1599 {
1600 if (vect_print_dump_info (REPORT_DETAILS))
1601 fprintf (vect_dump, "bad data-ref access");
1602 return false;
1603 }
1604
1605 /* Don't allow invariant accesses. */
1606 if (dr_step == 0)
1607 return false;
1608
1609 if (nested_in_vect_loop_p (loop, stmt))
1610 {
1611 /* Interleaved accesses are not yet supported within outer-loop
1612 vectorization for references in the inner-loop. */
1613 DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = NULL;
1614
1615 /* For the rest of the analysis we use the outer-loop step. */
1616 step = STMT_VINFO_DR_STEP (stmt_info);
1617 dr_step = TREE_INT_CST_LOW (step);
1618
1619 if (dr_step == 0)
1620 {
1621 if (vect_print_dump_info (REPORT_ALIGNMENT))
1622 fprintf (vect_dump, "zero step in outer loop.");
1623 if (DR_IS_READ (dr))
1624 return true;
1625 else
1626 return false;
1627 }
1628 }
1629
1630 /* Consecutive? */
1631 if (!tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type)))
1632 {
1633 /* Mark that it is not interleaving. */
1634 DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = NULL;
1635 return true;
1636 }
1637
1638 if (nested_in_vect_loop_p (loop, stmt))
1639 {
1640 if (vect_print_dump_info (REPORT_ALIGNMENT))
1641 fprintf (vect_dump, "strided access in outer loop.");
1642 return false;
1643 }
1644
1645 /* Not consecutive access - check if it's a part of interleaving group. */
1646 return vect_analyze_group_access (dr);
1647 }
1648
1649
1650 /* Function vect_analyze_data_ref_accesses.
1651
1652 Analyze the access pattern of all the data references in the loop.
1653
1654 FORNOW: the only access pattern that is considered vectorizable is a
1655 simple step 1 (consecutive) access.
1656
1657 FORNOW: handle only arrays and pointer accesses. */
1658
1659 bool
1660 vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo)
1661 {
1662 unsigned int i;
1663 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1664 struct data_reference *dr;
1665
1666 if (vect_print_dump_info (REPORT_DETAILS))
1667 fprintf (vect_dump, "=== vect_analyze_data_ref_accesses ===");
1668
1669 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1670 if (!vect_analyze_data_ref_access (dr))
1671 {
1672 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1673 fprintf (vect_dump, "not vectorized: complicated access pattern.");
1674 return false;
1675 }
1676
1677 return true;
1678 }
1679
1680 /* Function vect_prune_runtime_alias_test_list.
1681
1682 Prune a list of ddrs to be tested at run-time by versioning for alias.
1683 Return FALSE if resulting list of ddrs is longer then allowed by
1684 PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS, otherwise return TRUE. */
1685
1686 bool
1687 vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo)
1688 {
1689 VEC (ddr_p, heap) * ddrs =
1690 LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo);
1691 unsigned i, j;
1692
1693 if (vect_print_dump_info (REPORT_DETAILS))
1694 fprintf (vect_dump, "=== vect_prune_runtime_alias_test_list ===");
1695
1696 for (i = 0; i < VEC_length (ddr_p, ddrs); )
1697 {
1698 bool found;
1699 ddr_p ddr_i;
1700
1701 ddr_i = VEC_index (ddr_p, ddrs, i);
1702 found = false;
1703
1704 for (j = 0; j < i; j++)
1705 {
1706 ddr_p ddr_j = VEC_index (ddr_p, ddrs, j);
1707
1708 if (vect_vfa_range_equal (ddr_i, ddr_j))
1709 {
1710 if (vect_print_dump_info (REPORT_DR_DETAILS))
1711 {
1712 fprintf (vect_dump, "found equal ranges ");
1713 print_generic_expr (vect_dump, DR_REF (DDR_A (ddr_i)), TDF_SLIM);
1714 fprintf (vect_dump, ", ");
1715 print_generic_expr (vect_dump, DR_REF (DDR_B (ddr_i)), TDF_SLIM);
1716 fprintf (vect_dump, " and ");
1717 print_generic_expr (vect_dump, DR_REF (DDR_A (ddr_j)), TDF_SLIM);
1718 fprintf (vect_dump, ", ");
1719 print_generic_expr (vect_dump, DR_REF (DDR_B (ddr_j)), TDF_SLIM);
1720 }
1721 found = true;
1722 break;
1723 }
1724 }
1725
1726 if (found)
1727 {
1728 VEC_ordered_remove (ddr_p, ddrs, i);
1729 continue;
1730 }
1731 i++;
1732 }
1733
1734 if (VEC_length (ddr_p, ddrs) >
1735 (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS))
1736 {
1737 if (vect_print_dump_info (REPORT_DR_DETAILS))
1738 {
1739 fprintf (vect_dump,
1740 "disable versioning for alias - max number of generated "
1741 "checks exceeded.");
1742 }
1743
1744 VEC_truncate (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo), 0);
1745
1746 return false;
1747 }
1748
1749 return true;
1750 }
1751
1752
1753 /* Function vect_analyze_data_refs.
1754
1755 Find all the data references in the loop.
1756
1757 The general structure of the analysis of data refs in the vectorizer is as
1758 follows:
1759 1- vect_analyze_data_refs(loop): call compute_data_dependences_for_loop to
1760 find and analyze all data-refs in the loop and their dependences.
1761 2- vect_analyze_dependences(): apply dependence testing using ddrs.
1762 3- vect_analyze_drs_alignment(): check that ref_stmt.alignment is ok.
1763 4- vect_analyze_drs_access(): check that ref_stmt.step is ok.
1764
1765 */
1766
1767 bool
1768 vect_analyze_data_refs (loop_vec_info loop_vinfo)
1769 {
1770 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1771 unsigned int i;
1772 VEC (data_reference_p, heap) *datarefs;
1773 struct data_reference *dr;
1774 tree scalar_type;
1775
1776 if (vect_print_dump_info (REPORT_DETAILS))
1777 fprintf (vect_dump, "=== vect_analyze_data_refs ===\n");
1778
1779 compute_data_dependences_for_loop (loop, true,
1780 &LOOP_VINFO_DATAREFS (loop_vinfo),
1781 &LOOP_VINFO_DDRS (loop_vinfo));
1782
1783 /* Go through the data-refs, check that the analysis succeeded. Update pointer
1784 from stmt_vec_info struct to DR and vectype. */
1785 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1786
1787 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1788 {
1789 gimple stmt;
1790 stmt_vec_info stmt_info;
1791 basic_block bb;
1792 tree base, offset, init;
1793
1794 if (!dr || !DR_REF (dr))
1795 {
1796 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1797 fprintf (vect_dump, "not vectorized: unhandled data-ref ");
1798 return false;
1799 }
1800
1801 stmt = DR_STMT (dr);
1802 stmt_info = vinfo_for_stmt (stmt);
1803
1804 /* Check that analysis of the data-ref succeeded. */
1805 if (!DR_BASE_ADDRESS (dr) || !DR_OFFSET (dr) || !DR_INIT (dr)
1806 || !DR_STEP (dr))
1807 {
1808 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1809 {
1810 fprintf (vect_dump, "not vectorized: data ref analysis failed ");
1811 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
1812 }
1813 return false;
1814 }
1815
1816 if (TREE_CODE (DR_BASE_ADDRESS (dr)) == INTEGER_CST)
1817 {
1818 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1819 fprintf (vect_dump, "not vectorized: base addr of dr is a "
1820 "constant");
1821 return false;
1822 }
1823
1824 base = unshare_expr (DR_BASE_ADDRESS (dr));
1825 offset = unshare_expr (DR_OFFSET (dr));
1826 init = unshare_expr (DR_INIT (dr));
1827
1828 /* Update DR field in stmt_vec_info struct. */
1829 bb = gimple_bb (stmt);
1830
1831 /* If the dataref is in an inner-loop of the loop that is considered for
1832 for vectorization, we also want to analyze the access relative to
1833 the outer-loop (DR contains information only relative to the
1834 inner-most enclosing loop). We do that by building a reference to the
1835 first location accessed by the inner-loop, and analyze it relative to
1836 the outer-loop. */
1837 if (nested_in_vect_loop_p (loop, stmt))
1838 {
1839 tree outer_step, outer_base, outer_init;
1840 HOST_WIDE_INT pbitsize, pbitpos;
1841 tree poffset;
1842 enum machine_mode pmode;
1843 int punsignedp, pvolatilep;
1844 affine_iv base_iv, offset_iv;
1845 tree dinit;
1846
1847 /* Build a reference to the first location accessed by the
1848 inner-loop: *(BASE+INIT). (The first location is actually
1849 BASE+INIT+OFFSET, but we add OFFSET separately later). */
1850 tree inner_base = build_fold_indirect_ref
1851 (fold_build2 (POINTER_PLUS_EXPR,
1852 TREE_TYPE (base), base,
1853 fold_convert (sizetype, init)));
1854
1855 if (vect_print_dump_info (REPORT_DETAILS))
1856 {
1857 fprintf (vect_dump, "analyze in outer-loop: ");
1858 print_generic_expr (vect_dump, inner_base, TDF_SLIM);
1859 }
1860
1861 outer_base = get_inner_reference (inner_base, &pbitsize, &pbitpos,
1862 &poffset, &pmode, &punsignedp, &pvolatilep, false);
1863 gcc_assert (outer_base != NULL_TREE);
1864
1865 if (pbitpos % BITS_PER_UNIT != 0)
1866 {
1867 if (vect_print_dump_info (REPORT_DETAILS))
1868 fprintf (vect_dump, "failed: bit offset alignment.\n");
1869 return false;
1870 }
1871
1872 outer_base = build_fold_addr_expr (outer_base);
1873 if (!simple_iv (loop, loop_containing_stmt (stmt), outer_base,
1874 &base_iv, false))
1875 {
1876 if (vect_print_dump_info (REPORT_DETAILS))
1877 fprintf (vect_dump, "failed: evolution of base is not affine.\n");
1878 return false;
1879 }
1880
1881 if (offset)
1882 {
1883 if (poffset)
1884 poffset = fold_build2 (PLUS_EXPR, TREE_TYPE (offset), offset,
1885 poffset);
1886 else
1887 poffset = offset;
1888 }
1889
1890 if (!poffset)
1891 {
1892 offset_iv.base = ssize_int (0);
1893 offset_iv.step = ssize_int (0);
1894 }
1895 else if (!simple_iv (loop, loop_containing_stmt (stmt), poffset,
1896 &offset_iv, false))
1897 {
1898 if (vect_print_dump_info (REPORT_DETAILS))
1899 fprintf (vect_dump, "evolution of offset is not affine.\n");
1900 return false;
1901 }
1902
1903 outer_init = ssize_int (pbitpos / BITS_PER_UNIT);
1904 split_constant_offset (base_iv.base, &base_iv.base, &dinit);
1905 outer_init = size_binop (PLUS_EXPR, outer_init, dinit);
1906 split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
1907 outer_init = size_binop (PLUS_EXPR, outer_init, dinit);
1908
1909 outer_step = size_binop (PLUS_EXPR,
1910 fold_convert (ssizetype, base_iv.step),
1911 fold_convert (ssizetype, offset_iv.step));
1912
1913 STMT_VINFO_DR_STEP (stmt_info) = outer_step;
1914 /* FIXME: Use canonicalize_base_object_address (base_iv.base); */
1915 STMT_VINFO_DR_BASE_ADDRESS (stmt_info) = base_iv.base;
1916 STMT_VINFO_DR_INIT (stmt_info) = outer_init;
1917 STMT_VINFO_DR_OFFSET (stmt_info) =
1918 fold_convert (ssizetype, offset_iv.base);
1919 STMT_VINFO_DR_ALIGNED_TO (stmt_info) =
1920 size_int (highest_pow2_factor (offset_iv.base));
1921
1922 if (vect_print_dump_info (REPORT_DETAILS))
1923 {
1924 fprintf (vect_dump, "\touter base_address: ");
1925 print_generic_expr (vect_dump, STMT_VINFO_DR_BASE_ADDRESS (stmt_info), TDF_SLIM);
1926 fprintf (vect_dump, "\n\touter offset from base address: ");
1927 print_generic_expr (vect_dump, STMT_VINFO_DR_OFFSET (stmt_info), TDF_SLIM);
1928 fprintf (vect_dump, "\n\touter constant offset from base address: ");
1929 print_generic_expr (vect_dump, STMT_VINFO_DR_INIT (stmt_info), TDF_SLIM);
1930 fprintf (vect_dump, "\n\touter step: ");
1931 print_generic_expr (vect_dump, STMT_VINFO_DR_STEP (stmt_info), TDF_SLIM);
1932 fprintf (vect_dump, "\n\touter aligned to: ");
1933 print_generic_expr (vect_dump, STMT_VINFO_DR_ALIGNED_TO (stmt_info), TDF_SLIM);
1934 }
1935 }
1936
1937 if (STMT_VINFO_DATA_REF (stmt_info))
1938 {
1939 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1940 {
1941 fprintf (vect_dump,
1942 "not vectorized: more than one data ref in stmt: ");
1943 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
1944 }
1945 return false;
1946 }
1947
1948 STMT_VINFO_DATA_REF (stmt_info) = dr;
1949
1950 /* Set vectype for STMT. */
1951 scalar_type = TREE_TYPE (DR_REF (dr));
1952 STMT_VINFO_VECTYPE (stmt_info) =
1953 get_vectype_for_scalar_type (scalar_type);
1954 if (!STMT_VINFO_VECTYPE (stmt_info))
1955 {
1956 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1957 {
1958 fprintf (vect_dump,
1959 "not vectorized: no vectype for stmt: ");
1960 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
1961 fprintf (vect_dump, " scalar_type: ");
1962 print_generic_expr (vect_dump, scalar_type, TDF_DETAILS);
1963 }
1964 return false;
1965 }
1966 }
1967
1968 return true;
1969 }
1970
1971
1972 /* Function vect_get_new_vect_var.
1973
1974 Returns a name for a new variable. The current naming scheme appends the
1975 prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to
1976 the name of vectorizer generated variables, and appends that to NAME if
1977 provided. */
1978
1979 tree
1980 vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name)
1981 {
1982 const char *prefix;
1983 tree new_vect_var;
1984
1985 switch (var_kind)
1986 {
1987 case vect_simple_var:
1988 prefix = "vect_";
1989 break;
1990 case vect_scalar_var:
1991 prefix = "stmp_";
1992 break;
1993 case vect_pointer_var:
1994 prefix = "vect_p";
1995 break;
1996 default:
1997 gcc_unreachable ();
1998 }
1999
2000 if (name)
2001 {
2002 char* tmp = concat (prefix, name, NULL);
2003 new_vect_var = create_tmp_var (type, tmp);
2004 free (tmp);
2005 }
2006 else
2007 new_vect_var = create_tmp_var (type, prefix);
2008
2009 /* Mark vector typed variable as a gimple register variable. */
2010 if (TREE_CODE (type) == VECTOR_TYPE)
2011 DECL_GIMPLE_REG_P (new_vect_var) = true;
2012
2013 return new_vect_var;
2014 }
2015
2016
2017 /* Function vect_create_addr_base_for_vector_ref.
2018
2019 Create an expression that computes the address of the first memory location
2020 that will be accessed for a data reference.
2021
2022 Input:
2023 STMT: The statement containing the data reference.
2024 NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list.
2025 OFFSET: Optional. If supplied, it is be added to the initial address.
2026 LOOP: Specify relative to which loop-nest should the address be computed.
2027 For example, when the dataref is in an inner-loop nested in an
2028 outer-loop that is now being vectorized, LOOP can be either the
2029 outer-loop, or the inner-loop. The first memory location accessed
2030 by the following dataref ('in' points to short):
2031
2032 for (i=0; i<N; i++)
2033 for (j=0; j<M; j++)
2034 s += in[i+j]
2035
2036 is as follows:
2037 if LOOP=i_loop: &in (relative to i_loop)
2038 if LOOP=j_loop: &in+i*2B (relative to j_loop)
2039
2040 Output:
2041 1. Return an SSA_NAME whose value is the address of the memory location of
2042 the first vector of the data reference.
2043 2. If new_stmt_list is not NULL_TREE after return then the caller must insert
2044 these statement(s) which define the returned SSA_NAME.
2045
2046 FORNOW: We are only handling array accesses with step 1. */
2047
2048 tree
2049 vect_create_addr_base_for_vector_ref (gimple stmt,
2050 gimple_seq *new_stmt_list,
2051 tree offset,
2052 struct loop *loop)
2053 {
2054 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2055 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2056 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
2057 tree data_ref_base = unshare_expr (DR_BASE_ADDRESS (dr));
2058 tree base_name;
2059 tree data_ref_base_var;
2060 tree vec_stmt;
2061 tree addr_base, addr_expr;
2062 tree dest;
2063 gimple_seq seq = NULL;
2064 tree base_offset = unshare_expr (DR_OFFSET (dr));
2065 tree init = unshare_expr (DR_INIT (dr));
2066 tree vect_ptr_type;
2067 tree step = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr)));
2068
2069 gcc_assert (loop);
2070 if (loop != containing_loop)
2071 {
2072 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2073 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2074
2075 gcc_assert (nested_in_vect_loop_p (loop, stmt));
2076
2077 data_ref_base = unshare_expr (STMT_VINFO_DR_BASE_ADDRESS (stmt_info));
2078 base_offset = unshare_expr (STMT_VINFO_DR_OFFSET (stmt_info));
2079 init = unshare_expr (STMT_VINFO_DR_INIT (stmt_info));
2080 }
2081
2082 /* Create data_ref_base */
2083 base_name = build_fold_indirect_ref (data_ref_base);
2084 data_ref_base_var = create_tmp_var (TREE_TYPE (data_ref_base), "batmp");
2085 add_referenced_var (data_ref_base_var);
2086 data_ref_base = force_gimple_operand (data_ref_base, &seq, true,
2087 data_ref_base_var);
2088 gimple_seq_add_seq (new_stmt_list, seq);
2089
2090 /* Create base_offset */
2091 base_offset = size_binop (PLUS_EXPR,
2092 fold_convert (sizetype, base_offset),
2093 fold_convert (sizetype, init));
2094 dest = create_tmp_var (sizetype, "base_off");
2095 add_referenced_var (dest);
2096 base_offset = force_gimple_operand (base_offset, &seq, true, dest);
2097 gimple_seq_add_seq (new_stmt_list, seq);
2098
2099 if (offset)
2100 {
2101 tree tmp = create_tmp_var (sizetype, "offset");
2102
2103 add_referenced_var (tmp);
2104 offset = fold_build2 (MULT_EXPR, sizetype,
2105 fold_convert (sizetype, offset), step);
2106 base_offset = fold_build2 (PLUS_EXPR, sizetype,
2107 base_offset, offset);
2108 base_offset = force_gimple_operand (base_offset, &seq, false, tmp);
2109 gimple_seq_add_seq (new_stmt_list, seq);
2110 }
2111
2112 /* base + base_offset */
2113 addr_base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (data_ref_base),
2114 data_ref_base, base_offset);
2115
2116 vect_ptr_type = build_pointer_type (STMT_VINFO_VECTYPE (stmt_info));
2117
2118 vec_stmt = fold_convert (vect_ptr_type, addr_base);
2119 addr_expr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
2120 get_name (base_name));
2121
2122 add_referenced_var (addr_expr);
2123 vec_stmt = force_gimple_operand (vec_stmt, &seq, false, addr_expr);
2124 gimple_seq_add_seq (new_stmt_list, seq);
2125
2126 if (vect_print_dump_info (REPORT_DETAILS))
2127 {
2128 fprintf (vect_dump, "created ");
2129 print_generic_expr (vect_dump, vec_stmt, TDF_SLIM);
2130 }
2131
2132 return vec_stmt;
2133 }
2134
2135
2136 /* Function vect_create_data_ref_ptr.
2137
2138 Create a new pointer to vector type (vp), that points to the first location
2139 accessed in the loop by STMT, along with the def-use update chain to
2140 appropriately advance the pointer through the loop iterations. Also set
2141 aliasing information for the pointer. This vector pointer is used by the
2142 callers to this function to create a memory reference expression for vector
2143 load/store access.
2144
2145 Input:
2146 1. STMT: a stmt that references memory. Expected to be of the form
2147 GIMPLE_ASSIGN <name, data-ref> or
2148 GIMPLE_ASSIGN <data-ref, name>.
2149 2. AT_LOOP: the loop where the vector memref is to be created.
2150 3. OFFSET (optional): an offset to be added to the initial address accessed
2151 by the data-ref in STMT.
2152 4. ONLY_INIT: indicate if vp is to be updated in the loop, or remain
2153 pointing to the initial address.
2154 5. TYPE: if not NULL indicates the required type of the data-ref.
2155
2156 Output:
2157 1. Declare a new ptr to vector_type, and have it point to the base of the
2158 data reference (initial addressed accessed by the data reference).
2159 For example, for vector of type V8HI, the following code is generated:
2160
2161 v8hi *vp;
2162 vp = (v8hi *)initial_address;
2163
2164 if OFFSET is not supplied:
2165 initial_address = &a[init];
2166 if OFFSET is supplied:
2167 initial_address = &a[init + OFFSET];
2168
2169 Return the initial_address in INITIAL_ADDRESS.
2170
2171 2. If ONLY_INIT is true, just return the initial pointer. Otherwise, also
2172 update the pointer in each iteration of the loop.
2173
2174 Return the increment stmt that updates the pointer in PTR_INCR.
2175
2176 3. Set INV_P to true if the access pattern of the data reference in the
2177 vectorized loop is invariant. Set it to false otherwise.
2178
2179 4. Return the pointer. */
2180
2181 tree
2182 vect_create_data_ref_ptr (gimple stmt, struct loop *at_loop,
2183 tree offset, tree *initial_address, gimple *ptr_incr,
2184 bool only_init, bool *inv_p)
2185 {
2186 tree base_name;
2187 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2188 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2189 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2190 bool nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
2191 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
2192 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2193 tree vect_ptr_type;
2194 tree vect_ptr;
2195 tree new_temp;
2196 gimple vec_stmt;
2197 gimple_seq new_stmt_list = NULL;
2198 edge pe;
2199 basic_block new_bb;
2200 tree vect_ptr_init;
2201 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2202 tree vptr;
2203 gimple_stmt_iterator incr_gsi;
2204 bool insert_after;
2205 tree indx_before_incr, indx_after_incr;
2206 gimple incr;
2207 tree step;
2208
2209 /* Check the step (evolution) of the load in LOOP, and record
2210 whether it's invariant. */
2211 if (nested_in_vect_loop)
2212 step = STMT_VINFO_DR_STEP (stmt_info);
2213 else
2214 step = DR_STEP (STMT_VINFO_DATA_REF (stmt_info));
2215
2216 if (tree_int_cst_compare (step, size_zero_node) == 0)
2217 *inv_p = true;
2218 else
2219 *inv_p = false;
2220
2221 /* Create an expression for the first address accessed by this load
2222 in LOOP. */
2223 base_name = build_fold_indirect_ref (unshare_expr (DR_BASE_ADDRESS (dr)));
2224
2225 if (vect_print_dump_info (REPORT_DETAILS))
2226 {
2227 tree data_ref_base = base_name;
2228 fprintf (vect_dump, "create vector-pointer variable to type: ");
2229 print_generic_expr (vect_dump, vectype, TDF_SLIM);
2230 if (TREE_CODE (data_ref_base) == VAR_DECL)
2231 fprintf (vect_dump, " vectorizing a one dimensional array ref: ");
2232 else if (TREE_CODE (data_ref_base) == ARRAY_REF)
2233 fprintf (vect_dump, " vectorizing a multidimensional array ref: ");
2234 else if (TREE_CODE (data_ref_base) == COMPONENT_REF)
2235 fprintf (vect_dump, " vectorizing a record based array ref: ");
2236 else if (TREE_CODE (data_ref_base) == SSA_NAME)
2237 fprintf (vect_dump, " vectorizing a pointer ref: ");
2238 print_generic_expr (vect_dump, base_name, TDF_SLIM);
2239 }
2240
2241 /** (1) Create the new vector-pointer variable: **/
2242 vect_ptr_type = build_pointer_type (vectype);
2243 vect_ptr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
2244 get_name (base_name));
2245 /* If any of the data-references in the stmt group does not conflict
2246 with the created vector data-reference use a ref-all pointer instead. */
2247 if (STMT_VINFO_DR_GROUP_SIZE (stmt_info) > 1)
2248 {
2249 gimple orig_stmt = STMT_VINFO_DR_GROUP_FIRST_DR (stmt_info);
2250 do
2251 {
2252 tree lhs = gimple_assign_lhs (orig_stmt);
2253 if (!alias_sets_conflict_p (get_deref_alias_set (vect_ptr),
2254 get_alias_set (lhs)))
2255 {
2256 vect_ptr_type = build_pointer_type_for_mode (vectype,
2257 ptr_mode, true);
2258 vect_ptr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
2259 get_name (base_name));
2260 break;
2261 }
2262
2263 orig_stmt = STMT_VINFO_DR_GROUP_NEXT_DR (vinfo_for_stmt (orig_stmt));
2264 }
2265 while (orig_stmt);
2266 }
2267
2268 add_referenced_var (vect_ptr);
2269
2270 /** Note: If the dataref is in an inner-loop nested in LOOP, and we are
2271 vectorizing LOOP (i.e. outer-loop vectorization), we need to create two
2272 def-use update cycles for the pointer: One relative to the outer-loop
2273 (LOOP), which is what steps (3) and (4) below do. The other is relative
2274 to the inner-loop (which is the inner-most loop containing the dataref),
2275 and this is done be step (5) below.
2276
2277 When vectorizing inner-most loops, the vectorized loop (LOOP) is also the
2278 inner-most loop, and so steps (3),(4) work the same, and step (5) is
2279 redundant. Steps (3),(4) create the following:
2280
2281 vp0 = &base_addr;
2282 LOOP: vp1 = phi(vp0,vp2)
2283 ...
2284 ...
2285 vp2 = vp1 + step
2286 goto LOOP
2287
2288 If there is an inner-loop nested in loop, then step (5) will also be
2289 applied, and an additional update in the inner-loop will be created:
2290
2291 vp0 = &base_addr;
2292 LOOP: vp1 = phi(vp0,vp2)
2293 ...
2294 inner: vp3 = phi(vp1,vp4)
2295 vp4 = vp3 + inner_step
2296 if () goto inner
2297 ...
2298 vp2 = vp1 + step
2299 if () goto LOOP */
2300
2301 /** (3) Calculate the initial address the vector-pointer, and set
2302 the vector-pointer to point to it before the loop: **/
2303
2304 /* Create: (&(base[init_val+offset]) in the loop preheader. */
2305
2306 new_temp = vect_create_addr_base_for_vector_ref (stmt, &new_stmt_list,
2307 offset, loop);
2308 pe = loop_preheader_edge (loop);
2309 if (new_stmt_list)
2310 {
2311 new_bb = gsi_insert_seq_on_edge_immediate (pe, new_stmt_list);
2312 gcc_assert (!new_bb);
2313 }
2314
2315 *initial_address = new_temp;
2316
2317 /* Create: p = (vectype *) initial_base */
2318 vec_stmt = gimple_build_assign (vect_ptr,
2319 fold_convert (vect_ptr_type, new_temp));
2320 vect_ptr_init = make_ssa_name (vect_ptr, vec_stmt);
2321 gimple_assign_set_lhs (vec_stmt, vect_ptr_init);
2322 new_bb = gsi_insert_on_edge_immediate (pe, vec_stmt);
2323 gcc_assert (!new_bb);
2324
2325
2326 /** (4) Handle the updating of the vector-pointer inside the loop.
2327 This is needed when ONLY_INIT is false, and also when AT_LOOP
2328 is the inner-loop nested in LOOP (during outer-loop vectorization).
2329 **/
2330
2331 if (only_init && at_loop == loop) /* No update in loop is required. */
2332 {
2333 /* Copy the points-to information if it exists. */
2334 if (DR_PTR_INFO (dr))
2335 duplicate_ssa_name_ptr_info (vect_ptr_init, DR_PTR_INFO (dr));
2336 vptr = vect_ptr_init;
2337 }
2338 else
2339 {
2340 /* The step of the vector pointer is the Vector Size. */
2341 tree step = TYPE_SIZE_UNIT (vectype);
2342 /* One exception to the above is when the scalar step of the load in
2343 LOOP is zero. In this case the step here is also zero. */
2344 if (*inv_p)
2345 step = size_zero_node;
2346
2347 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
2348
2349 create_iv (vect_ptr_init,
2350 fold_convert (vect_ptr_type, step),
2351 vect_ptr, loop, &incr_gsi, insert_after,
2352 &indx_before_incr, &indx_after_incr);
2353 incr = gsi_stmt (incr_gsi);
2354 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo));
2355
2356 /* Copy the points-to information if it exists. */
2357 if (DR_PTR_INFO (dr))
2358 {
2359 duplicate_ssa_name_ptr_info (indx_before_incr, DR_PTR_INFO (dr));
2360 duplicate_ssa_name_ptr_info (indx_after_incr, DR_PTR_INFO (dr));
2361 }
2362 merge_alias_info (vect_ptr_init, indx_before_incr);
2363 merge_alias_info (vect_ptr_init, indx_after_incr);
2364 if (ptr_incr)
2365 *ptr_incr = incr;
2366
2367 vptr = indx_before_incr;
2368 }
2369
2370 if (!nested_in_vect_loop || only_init)
2371 return vptr;
2372
2373
2374 /** (5) Handle the updating of the vector-pointer inside the inner-loop
2375 nested in LOOP, if exists: **/
2376
2377 gcc_assert (nested_in_vect_loop);
2378 if (!only_init)
2379 {
2380 standard_iv_increment_position (containing_loop, &incr_gsi,
2381 &insert_after);
2382 create_iv (vptr, fold_convert (vect_ptr_type, DR_STEP (dr)), vect_ptr,
2383 containing_loop, &incr_gsi, insert_after, &indx_before_incr,
2384 &indx_after_incr);
2385 incr = gsi_stmt (incr_gsi);
2386 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo));
2387
2388 /* Copy the points-to information if it exists. */
2389 if (DR_PTR_INFO (dr))
2390 {
2391 duplicate_ssa_name_ptr_info (indx_before_incr, DR_PTR_INFO (dr));
2392 duplicate_ssa_name_ptr_info (indx_after_incr, DR_PTR_INFO (dr));
2393 }
2394 merge_alias_info (vect_ptr_init, indx_before_incr);
2395 merge_alias_info (vect_ptr_init, indx_after_incr);
2396 if (ptr_incr)
2397 *ptr_incr = incr;
2398
2399 return indx_before_incr;
2400 }
2401 else
2402 gcc_unreachable ();
2403 }
2404
2405
2406 /* Function bump_vector_ptr
2407
2408 Increment a pointer (to a vector type) by vector-size. If requested,
2409 i.e. if PTR-INCR is given, then also connect the new increment stmt
2410 to the existing def-use update-chain of the pointer, by modifying
2411 the PTR_INCR as illustrated below:
2412
2413 The pointer def-use update-chain before this function:
2414 DATAREF_PTR = phi (p_0, p_2)
2415 ....
2416 PTR_INCR: p_2 = DATAREF_PTR + step
2417
2418 The pointer def-use update-chain after this function:
2419 DATAREF_PTR = phi (p_0, p_2)
2420 ....
2421 NEW_DATAREF_PTR = DATAREF_PTR + BUMP
2422 ....
2423 PTR_INCR: p_2 = NEW_DATAREF_PTR + step
2424
2425 Input:
2426 DATAREF_PTR - ssa_name of a pointer (to vector type) that is being updated
2427 in the loop.
2428 PTR_INCR - optional. The stmt that updates the pointer in each iteration of
2429 the loop. The increment amount across iterations is expected
2430 to be vector_size.
2431 BSI - location where the new update stmt is to be placed.
2432 STMT - the original scalar memory-access stmt that is being vectorized.
2433 BUMP - optional. The offset by which to bump the pointer. If not given,
2434 the offset is assumed to be vector_size.
2435
2436 Output: Return NEW_DATAREF_PTR as illustrated above.
2437
2438 */
2439
2440 tree
2441 bump_vector_ptr (tree dataref_ptr, gimple ptr_incr, gimple_stmt_iterator *gsi,
2442 gimple stmt, tree bump)
2443 {
2444 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2445 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2446 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2447 tree ptr_var = SSA_NAME_VAR (dataref_ptr);
2448 tree update = TYPE_SIZE_UNIT (vectype);
2449 gimple incr_stmt;
2450 ssa_op_iter iter;
2451 use_operand_p use_p;
2452 tree new_dataref_ptr;
2453
2454 if (bump)
2455 update = bump;
2456
2457 incr_stmt = gimple_build_assign_with_ops (POINTER_PLUS_EXPR, ptr_var,
2458 dataref_ptr, update);
2459 new_dataref_ptr = make_ssa_name (ptr_var, incr_stmt);
2460 gimple_assign_set_lhs (incr_stmt, new_dataref_ptr);
2461 vect_finish_stmt_generation (stmt, incr_stmt, gsi);
2462
2463 /* Copy the points-to information if it exists. */
2464 if (DR_PTR_INFO (dr))
2465 duplicate_ssa_name_ptr_info (new_dataref_ptr, DR_PTR_INFO (dr));
2466 merge_alias_info (new_dataref_ptr, dataref_ptr);
2467
2468 if (!ptr_incr)
2469 return new_dataref_ptr;
2470
2471 /* Update the vector-pointer's cross-iteration increment. */
2472 FOR_EACH_SSA_USE_OPERAND (use_p, ptr_incr, iter, SSA_OP_USE)
2473 {
2474 tree use = USE_FROM_PTR (use_p);
2475
2476 if (use == dataref_ptr)
2477 SET_USE (use_p, new_dataref_ptr);
2478 else
2479 gcc_assert (tree_int_cst_compare (use, update) == 0);
2480 }
2481
2482 return new_dataref_ptr;
2483 }
2484
2485
2486 /* Function vect_create_destination_var.
2487
2488 Create a new temporary of type VECTYPE. */
2489
2490 tree
2491 vect_create_destination_var (tree scalar_dest, tree vectype)
2492 {
2493 tree vec_dest;
2494 const char *new_name;
2495 tree type;
2496 enum vect_var_kind kind;
2497
2498 kind = vectype ? vect_simple_var : vect_scalar_var;
2499 type = vectype ? vectype : TREE_TYPE (scalar_dest);
2500
2501 gcc_assert (TREE_CODE (scalar_dest) == SSA_NAME);
2502
2503 new_name = get_name (scalar_dest);
2504 if (!new_name)
2505 new_name = "var_";
2506 vec_dest = vect_get_new_vect_var (type, kind, new_name);
2507 add_referenced_var (vec_dest);
2508
2509 return vec_dest;
2510 }
2511
2512 /* Function vect_strided_store_supported.
2513
2514 Returns TRUE is INTERLEAVE_HIGH and INTERLEAVE_LOW operations are supported,
2515 and FALSE otherwise. */
2516
2517 bool
2518 vect_strided_store_supported (tree vectype)
2519 {
2520 optab interleave_high_optab, interleave_low_optab;
2521 int mode;
2522
2523 mode = (int) TYPE_MODE (vectype);
2524
2525 /* Check that the operation is supported. */
2526 interleave_high_optab = optab_for_tree_code (VEC_INTERLEAVE_HIGH_EXPR,
2527 vectype, optab_default);
2528 interleave_low_optab = optab_for_tree_code (VEC_INTERLEAVE_LOW_EXPR,
2529 vectype, optab_default);
2530 if (!interleave_high_optab || !interleave_low_optab)
2531 {
2532 if (vect_print_dump_info (REPORT_DETAILS))
2533 fprintf (vect_dump, "no optab for interleave.");
2534 return false;
2535 }
2536
2537 if (optab_handler (interleave_high_optab, mode)->insn_code
2538 == CODE_FOR_nothing
2539 || optab_handler (interleave_low_optab, mode)->insn_code
2540 == CODE_FOR_nothing)
2541 {
2542 if (vect_print_dump_info (REPORT_DETAILS))
2543 fprintf (vect_dump, "interleave op not supported by target.");
2544 return false;
2545 }
2546
2547 return true;
2548 }
2549
2550
2551 /* Function vect_permute_store_chain.
2552
2553 Given a chain of interleaved stores in DR_CHAIN of LENGTH that must be
2554 a power of 2, generate interleave_high/low stmts to reorder the data
2555 correctly for the stores. Return the final references for stores in
2556 RESULT_CHAIN.
2557
2558 E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
2559 The input is 4 vectors each containing 8 elements. We assign a number to each
2560 element, the input sequence is:
2561
2562 1st vec: 0 1 2 3 4 5 6 7
2563 2nd vec: 8 9 10 11 12 13 14 15
2564 3rd vec: 16 17 18 19 20 21 22 23
2565 4th vec: 24 25 26 27 28 29 30 31
2566
2567 The output sequence should be:
2568
2569 1st vec: 0 8 16 24 1 9 17 25
2570 2nd vec: 2 10 18 26 3 11 19 27
2571 3rd vec: 4 12 20 28 5 13 21 30
2572 4th vec: 6 14 22 30 7 15 23 31
2573
2574 i.e., we interleave the contents of the four vectors in their order.
2575
2576 We use interleave_high/low instructions to create such output. The input of
2577 each interleave_high/low operation is two vectors:
2578 1st vec 2nd vec
2579 0 1 2 3 4 5 6 7
2580 the even elements of the result vector are obtained left-to-right from the
2581 high/low elements of the first vector. The odd elements of the result are
2582 obtained left-to-right from the high/low elements of the second vector.
2583 The output of interleave_high will be: 0 4 1 5
2584 and of interleave_low: 2 6 3 7
2585
2586
2587 The permutation is done in log LENGTH stages. In each stage interleave_high
2588 and interleave_low stmts are created for each pair of vectors in DR_CHAIN,
2589 where the first argument is taken from the first half of DR_CHAIN and the
2590 second argument from it's second half.
2591 In our example,
2592
2593 I1: interleave_high (1st vec, 3rd vec)
2594 I2: interleave_low (1st vec, 3rd vec)
2595 I3: interleave_high (2nd vec, 4th vec)
2596 I4: interleave_low (2nd vec, 4th vec)
2597
2598 The output for the first stage is:
2599
2600 I1: 0 16 1 17 2 18 3 19
2601 I2: 4 20 5 21 6 22 7 23
2602 I3: 8 24 9 25 10 26 11 27
2603 I4: 12 28 13 29 14 30 15 31
2604
2605 The output of the second stage, i.e. the final result is:
2606
2607 I1: 0 8 16 24 1 9 17 25
2608 I2: 2 10 18 26 3 11 19 27
2609 I3: 4 12 20 28 5 13 21 30
2610 I4: 6 14 22 30 7 15 23 31. */
2611
2612 bool
2613 vect_permute_store_chain (VEC(tree,heap) *dr_chain,
2614 unsigned int length,
2615 gimple stmt,
2616 gimple_stmt_iterator *gsi,
2617 VEC(tree,heap) **result_chain)
2618 {
2619 tree perm_dest, vect1, vect2, high, low;
2620 gimple perm_stmt;
2621 tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
2622 tree scalar_dest;
2623 int i;
2624 unsigned int j;
2625 enum tree_code high_code, low_code;
2626
2627 scalar_dest = gimple_assign_lhs (stmt);
2628
2629 /* Check that the operation is supported. */
2630 if (!vect_strided_store_supported (vectype))
2631 return false;
2632
2633 *result_chain = VEC_copy (tree, heap, dr_chain);
2634
2635 for (i = 0; i < exact_log2 (length); i++)
2636 {
2637 for (j = 0; j < length/2; j++)
2638 {
2639 vect1 = VEC_index (tree, dr_chain, j);
2640 vect2 = VEC_index (tree, dr_chain, j+length/2);
2641
2642 /* Create interleaving stmt:
2643 in the case of big endian:
2644 high = interleave_high (vect1, vect2)
2645 and in the case of little endian:
2646 high = interleave_low (vect1, vect2). */
2647 perm_dest = create_tmp_var (vectype, "vect_inter_high");
2648 DECL_GIMPLE_REG_P (perm_dest) = 1;
2649 add_referenced_var (perm_dest);
2650 if (BYTES_BIG_ENDIAN)
2651 {
2652 high_code = VEC_INTERLEAVE_HIGH_EXPR;
2653 low_code = VEC_INTERLEAVE_LOW_EXPR;
2654 }
2655 else
2656 {
2657 low_code = VEC_INTERLEAVE_HIGH_EXPR;
2658 high_code = VEC_INTERLEAVE_LOW_EXPR;
2659 }
2660 perm_stmt = gimple_build_assign_with_ops (high_code, perm_dest,
2661 vect1, vect2);
2662 high = make_ssa_name (perm_dest, perm_stmt);
2663 gimple_assign_set_lhs (perm_stmt, high);
2664 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
2665 VEC_replace (tree, *result_chain, 2*j, high);
2666
2667 /* Create interleaving stmt:
2668 in the case of big endian:
2669 low = interleave_low (vect1, vect2)
2670 and in the case of little endian:
2671 low = interleave_high (vect1, vect2). */
2672 perm_dest = create_tmp_var (vectype, "vect_inter_low");
2673 DECL_GIMPLE_REG_P (perm_dest) = 1;
2674 add_referenced_var (perm_dest);
2675 perm_stmt = gimple_build_assign_with_ops (low_code, perm_dest,
2676 vect1, vect2);
2677 low = make_ssa_name (perm_dest, perm_stmt);
2678 gimple_assign_set_lhs (perm_stmt, low);
2679 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
2680 VEC_replace (tree, *result_chain, 2*j+1, low);
2681 }
2682 dr_chain = VEC_copy (tree, heap, *result_chain);
2683 }
2684 return true;
2685 }
2686
2687 /* Function vect_setup_realignment
2688
2689 This function is called when vectorizing an unaligned load using
2690 the dr_explicit_realign[_optimized] scheme.
2691 This function generates the following code at the loop prolog:
2692
2693 p = initial_addr;
2694 x msq_init = *(floor(p)); # prolog load
2695 realignment_token = call target_builtin;
2696 loop:
2697 x msq = phi (msq_init, ---)
2698
2699 The stmts marked with x are generated only for the case of
2700 dr_explicit_realign_optimized.
2701
2702 The code above sets up a new (vector) pointer, pointing to the first
2703 location accessed by STMT, and a "floor-aligned" load using that pointer.
2704 It also generates code to compute the "realignment-token" (if the relevant
2705 target hook was defined), and creates a phi-node at the loop-header bb
2706 whose arguments are the result of the prolog-load (created by this
2707 function) and the result of a load that takes place in the loop (to be
2708 created by the caller to this function).
2709
2710 For the case of dr_explicit_realign_optimized:
2711 The caller to this function uses the phi-result (msq) to create the
2712 realignment code inside the loop, and sets up the missing phi argument,
2713 as follows:
2714 loop:
2715 msq = phi (msq_init, lsq)
2716 lsq = *(floor(p')); # load in loop
2717 result = realign_load (msq, lsq, realignment_token);
2718
2719 For the case of dr_explicit_realign:
2720 loop:
2721 msq = *(floor(p)); # load in loop
2722 p' = p + (VS-1);
2723 lsq = *(floor(p')); # load in loop
2724 result = realign_load (msq, lsq, realignment_token);
2725
2726 Input:
2727 STMT - (scalar) load stmt to be vectorized. This load accesses
2728 a memory location that may be unaligned.
2729 BSI - place where new code is to be inserted.
2730 ALIGNMENT_SUPPORT_SCHEME - which of the two misalignment handling schemes
2731 is used.
2732
2733 Output:
2734 REALIGNMENT_TOKEN - the result of a call to the builtin_mask_for_load
2735 target hook, if defined.
2736 Return value - the result of the loop-header phi node. */
2737
2738 tree
2739 vect_setup_realignment (gimple stmt, gimple_stmt_iterator *gsi,
2740 tree *realignment_token,
2741 enum dr_alignment_support alignment_support_scheme,
2742 tree init_addr,
2743 struct loop **at_loop)
2744 {
2745 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2746 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2747 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2748 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2749 edge pe;
2750 tree scalar_dest = gimple_assign_lhs (stmt);
2751 tree vec_dest;
2752 gimple inc;
2753 tree ptr;
2754 tree data_ref;
2755 gimple new_stmt;
2756 basic_block new_bb;
2757 tree msq_init = NULL_TREE;
2758 tree new_temp;
2759 gimple phi_stmt;
2760 tree msq = NULL_TREE;
2761 gimple_seq stmts = NULL;
2762 bool inv_p;
2763 bool compute_in_loop = false;
2764 bool nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
2765 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
2766 struct loop *loop_for_initial_load;
2767
2768 gcc_assert (alignment_support_scheme == dr_explicit_realign
2769 || alignment_support_scheme == dr_explicit_realign_optimized);
2770
2771 /* We need to generate three things:
2772 1. the misalignment computation
2773 2. the extra vector load (for the optimized realignment scheme).
2774 3. the phi node for the two vectors from which the realignment is
2775 done (for the optimized realignment scheme).
2776 */
2777
2778 /* 1. Determine where to generate the misalignment computation.
2779
2780 If INIT_ADDR is NULL_TREE, this indicates that the misalignment
2781 calculation will be generated by this function, outside the loop (in the
2782 preheader). Otherwise, INIT_ADDR had already been computed for us by the
2783 caller, inside the loop.
2784
2785 Background: If the misalignment remains fixed throughout the iterations of
2786 the loop, then both realignment schemes are applicable, and also the
2787 misalignment computation can be done outside LOOP. This is because we are
2788 vectorizing LOOP, and so the memory accesses in LOOP advance in steps that
2789 are a multiple of VS (the Vector Size), and therefore the misalignment in
2790 different vectorized LOOP iterations is always the same.
2791 The problem arises only if the memory access is in an inner-loop nested
2792 inside LOOP, which is now being vectorized using outer-loop vectorization.
2793 This is the only case when the misalignment of the memory access may not
2794 remain fixed throughout the iterations of the inner-loop (as explained in
2795 detail in vect_supportable_dr_alignment). In this case, not only is the
2796 optimized realignment scheme not applicable, but also the misalignment
2797 computation (and generation of the realignment token that is passed to
2798 REALIGN_LOAD) have to be done inside the loop.
2799
2800 In short, INIT_ADDR indicates whether we are in a COMPUTE_IN_LOOP mode
2801 or not, which in turn determines if the misalignment is computed inside
2802 the inner-loop, or outside LOOP. */
2803
2804 if (init_addr != NULL_TREE)
2805 {
2806 compute_in_loop = true;
2807 gcc_assert (alignment_support_scheme == dr_explicit_realign);
2808 }
2809
2810
2811 /* 2. Determine where to generate the extra vector load.
2812
2813 For the optimized realignment scheme, instead of generating two vector
2814 loads in each iteration, we generate a single extra vector load in the
2815 preheader of the loop, and in each iteration reuse the result of the
2816 vector load from the previous iteration. In case the memory access is in
2817 an inner-loop nested inside LOOP, which is now being vectorized using
2818 outer-loop vectorization, we need to determine whether this initial vector
2819 load should be generated at the preheader of the inner-loop, or can be
2820 generated at the preheader of LOOP. If the memory access has no evolution
2821 in LOOP, it can be generated in the preheader of LOOP. Otherwise, it has
2822 to be generated inside LOOP (in the preheader of the inner-loop). */
2823
2824 if (nested_in_vect_loop)
2825 {
2826 tree outerloop_step = STMT_VINFO_DR_STEP (stmt_info);
2827 bool invariant_in_outerloop =
2828 (tree_int_cst_compare (outerloop_step, size_zero_node) == 0);
2829 loop_for_initial_load = (invariant_in_outerloop ? loop : loop->inner);
2830 }
2831 else
2832 loop_for_initial_load = loop;
2833 if (at_loop)
2834 *at_loop = loop_for_initial_load;
2835
2836 /* 3. For the case of the optimized realignment, create the first vector
2837 load at the loop preheader. */
2838
2839 if (alignment_support_scheme == dr_explicit_realign_optimized)
2840 {
2841 /* Create msq_init = *(floor(p1)) in the loop preheader */
2842
2843 gcc_assert (!compute_in_loop);
2844 pe = loop_preheader_edge (loop_for_initial_load);
2845 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2846 ptr = vect_create_data_ref_ptr (stmt, loop_for_initial_load, NULL_TREE,
2847 &init_addr, &inc, true, &inv_p);
2848 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, ptr);
2849 new_stmt = gimple_build_assign (vec_dest, data_ref);
2850 new_temp = make_ssa_name (vec_dest, new_stmt);
2851 gimple_assign_set_lhs (new_stmt, new_temp);
2852 mark_symbols_for_renaming (new_stmt);
2853 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
2854 gcc_assert (!new_bb);
2855 msq_init = gimple_assign_lhs (new_stmt);
2856 }
2857
2858 /* 4. Create realignment token using a target builtin, if available.
2859 It is done either inside the containing loop, or before LOOP (as
2860 determined above). */
2861
2862 if (targetm.vectorize.builtin_mask_for_load)
2863 {
2864 tree builtin_decl;
2865
2866 /* Compute INIT_ADDR - the initial addressed accessed by this memref. */
2867 if (compute_in_loop)
2868 gcc_assert (init_addr); /* already computed by the caller. */
2869 else
2870 {
2871 /* Generate the INIT_ADDR computation outside LOOP. */
2872 init_addr = vect_create_addr_base_for_vector_ref (stmt, &stmts,
2873 NULL_TREE, loop);
2874 pe = loop_preheader_edge (loop);
2875 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
2876 gcc_assert (!new_bb);
2877 }
2878
2879 builtin_decl = targetm.vectorize.builtin_mask_for_load ();
2880 new_stmt = gimple_build_call (builtin_decl, 1, init_addr);
2881 vec_dest =
2882 vect_create_destination_var (scalar_dest,
2883 gimple_call_return_type (new_stmt));
2884 new_temp = make_ssa_name (vec_dest, new_stmt);
2885 gimple_call_set_lhs (new_stmt, new_temp);
2886
2887 if (compute_in_loop)
2888 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
2889 else
2890 {
2891 /* Generate the misalignment computation outside LOOP. */
2892 pe = loop_preheader_edge (loop);
2893 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
2894 gcc_assert (!new_bb);
2895 }
2896
2897 *realignment_token = gimple_call_lhs (new_stmt);
2898
2899 /* The result of the CALL_EXPR to this builtin is determined from
2900 the value of the parameter and no global variables are touched
2901 which makes the builtin a "const" function. Requiring the
2902 builtin to have the "const" attribute makes it unnecessary
2903 to call mark_call_clobbered. */
2904 gcc_assert (TREE_READONLY (builtin_decl));
2905 }
2906
2907 if (alignment_support_scheme == dr_explicit_realign)
2908 return msq;
2909
2910 gcc_assert (!compute_in_loop);
2911 gcc_assert (alignment_support_scheme == dr_explicit_realign_optimized);
2912
2913
2914 /* 5. Create msq = phi <msq_init, lsq> in loop */
2915
2916 pe = loop_preheader_edge (containing_loop);
2917 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2918 msq = make_ssa_name (vec_dest, NULL);
2919 phi_stmt = create_phi_node (msq, containing_loop->header);
2920 SSA_NAME_DEF_STMT (msq) = phi_stmt;
2921 add_phi_arg (phi_stmt, msq_init, pe);
2922
2923 return msq;
2924 }
2925
2926
2927 /* Function vect_strided_load_supported.
2928
2929 Returns TRUE is EXTRACT_EVEN and EXTRACT_ODD operations are supported,
2930 and FALSE otherwise. */
2931
2932 bool
2933 vect_strided_load_supported (tree vectype)
2934 {
2935 optab perm_even_optab, perm_odd_optab;
2936 int mode;
2937
2938 mode = (int) TYPE_MODE (vectype);
2939
2940 perm_even_optab = optab_for_tree_code (VEC_EXTRACT_EVEN_EXPR, vectype,
2941 optab_default);
2942 if (!perm_even_optab)
2943 {
2944 if (vect_print_dump_info (REPORT_DETAILS))
2945 fprintf (vect_dump, "no optab for perm_even.");
2946 return false;
2947 }
2948
2949 if (optab_handler (perm_even_optab, mode)->insn_code == CODE_FOR_nothing)
2950 {
2951 if (vect_print_dump_info (REPORT_DETAILS))
2952 fprintf (vect_dump, "perm_even op not supported by target.");
2953 return false;
2954 }
2955
2956 perm_odd_optab = optab_for_tree_code (VEC_EXTRACT_ODD_EXPR, vectype,
2957 optab_default);
2958 if (!perm_odd_optab)
2959 {
2960 if (vect_print_dump_info (REPORT_DETAILS))
2961 fprintf (vect_dump, "no optab for perm_odd.");
2962 return false;
2963 }
2964
2965 if (optab_handler (perm_odd_optab, mode)->insn_code == CODE_FOR_nothing)
2966 {
2967 if (vect_print_dump_info (REPORT_DETAILS))
2968 fprintf (vect_dump, "perm_odd op not supported by target.");
2969 return false;
2970 }
2971 return true;
2972 }
2973
2974
2975 /* Function vect_permute_load_chain.
2976
2977 Given a chain of interleaved loads in DR_CHAIN of LENGTH that must be
2978 a power of 2, generate extract_even/odd stmts to reorder the input data
2979 correctly. Return the final references for loads in RESULT_CHAIN.
2980
2981 E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
2982 The input is 4 vectors each containing 8 elements. We assign a number to each
2983 element, the input sequence is:
2984
2985 1st vec: 0 1 2 3 4 5 6 7
2986 2nd vec: 8 9 10 11 12 13 14 15
2987 3rd vec: 16 17 18 19 20 21 22 23
2988 4th vec: 24 25 26 27 28 29 30 31
2989
2990 The output sequence should be:
2991
2992 1st vec: 0 4 8 12 16 20 24 28
2993 2nd vec: 1 5 9 13 17 21 25 29
2994 3rd vec: 2 6 10 14 18 22 26 30
2995 4th vec: 3 7 11 15 19 23 27 31
2996
2997 i.e., the first output vector should contain the first elements of each
2998 interleaving group, etc.
2999
3000 We use extract_even/odd instructions to create such output. The input of each
3001 extract_even/odd operation is two vectors
3002 1st vec 2nd vec
3003 0 1 2 3 4 5 6 7
3004
3005 and the output is the vector of extracted even/odd elements. The output of
3006 extract_even will be: 0 2 4 6
3007 and of extract_odd: 1 3 5 7
3008
3009
3010 The permutation is done in log LENGTH stages. In each stage extract_even and
3011 extract_odd stmts are created for each pair of vectors in DR_CHAIN in their
3012 order. In our example,
3013
3014 E1: extract_even (1st vec, 2nd vec)
3015 E2: extract_odd (1st vec, 2nd vec)
3016 E3: extract_even (3rd vec, 4th vec)
3017 E4: extract_odd (3rd vec, 4th vec)
3018
3019 The output for the first stage will be:
3020
3021 E1: 0 2 4 6 8 10 12 14
3022 E2: 1 3 5 7 9 11 13 15
3023 E3: 16 18 20 22 24 26 28 30
3024 E4: 17 19 21 23 25 27 29 31
3025
3026 In order to proceed and create the correct sequence for the next stage (or
3027 for the correct output, if the second stage is the last one, as in our
3028 example), we first put the output of extract_even operation and then the
3029 output of extract_odd in RESULT_CHAIN (which is then copied to DR_CHAIN).
3030 The input for the second stage is:
3031
3032 1st vec (E1): 0 2 4 6 8 10 12 14
3033 2nd vec (E3): 16 18 20 22 24 26 28 30
3034 3rd vec (E2): 1 3 5 7 9 11 13 15
3035 4th vec (E4): 17 19 21 23 25 27 29 31
3036
3037 The output of the second stage:
3038
3039 E1: 0 4 8 12 16 20 24 28
3040 E2: 2 6 10 14 18 22 26 30
3041 E3: 1 5 9 13 17 21 25 29
3042 E4: 3 7 11 15 19 23 27 31
3043
3044 And RESULT_CHAIN after reordering:
3045
3046 1st vec (E1): 0 4 8 12 16 20 24 28
3047 2nd vec (E3): 1 5 9 13 17 21 25 29
3048 3rd vec (E2): 2 6 10 14 18 22 26 30
3049 4th vec (E4): 3 7 11 15 19 23 27 31. */
3050
3051 bool
3052 vect_permute_load_chain (VEC(tree,heap) *dr_chain,
3053 unsigned int length,
3054 gimple stmt,
3055 gimple_stmt_iterator *gsi,
3056 VEC(tree,heap) **result_chain)
3057 {
3058 tree perm_dest, data_ref, first_vect, second_vect;
3059 gimple perm_stmt;
3060 tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
3061 int i;
3062 unsigned int j;
3063
3064 /* Check that the operation is supported. */
3065 if (!vect_strided_load_supported (vectype))
3066 return false;
3067
3068 *result_chain = VEC_copy (tree, heap, dr_chain);
3069 for (i = 0; i < exact_log2 (length); i++)
3070 {
3071 for (j = 0; j < length; j +=2)
3072 {
3073 first_vect = VEC_index (tree, dr_chain, j);
3074 second_vect = VEC_index (tree, dr_chain, j+1);
3075
3076 /* data_ref = permute_even (first_data_ref, second_data_ref); */
3077 perm_dest = create_tmp_var (vectype, "vect_perm_even");
3078 DECL_GIMPLE_REG_P (perm_dest) = 1;
3079 add_referenced_var (perm_dest);
3080
3081 perm_stmt = gimple_build_assign_with_ops (VEC_EXTRACT_EVEN_EXPR,
3082 perm_dest, first_vect,
3083 second_vect);
3084
3085 data_ref = make_ssa_name (perm_dest, perm_stmt);
3086 gimple_assign_set_lhs (perm_stmt, data_ref);
3087 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
3088 mark_symbols_for_renaming (perm_stmt);
3089
3090 VEC_replace (tree, *result_chain, j/2, data_ref);
3091
3092 /* data_ref = permute_odd (first_data_ref, second_data_ref); */
3093 perm_dest = create_tmp_var (vectype, "vect_perm_odd");
3094 DECL_GIMPLE_REG_P (perm_dest) = 1;
3095 add_referenced_var (perm_dest);
3096
3097 perm_stmt = gimple_build_assign_with_ops (VEC_EXTRACT_ODD_EXPR,
3098 perm_dest, first_vect,
3099 second_vect);
3100 data_ref = make_ssa_name (perm_dest, perm_stmt);
3101 gimple_assign_set_lhs (perm_stmt, data_ref);
3102 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
3103 mark_symbols_for_renaming (perm_stmt);
3104
3105 VEC_replace (tree, *result_chain, j/2+length/2, data_ref);
3106 }
3107 dr_chain = VEC_copy (tree, heap, *result_chain);
3108 }
3109 return true;
3110 }
3111
3112
3113 /* Function vect_transform_strided_load.
3114
3115 Given a chain of input interleaved data-refs (in DR_CHAIN), build statements
3116 to perform their permutation and ascribe the result vectorized statements to
3117 the scalar statements.
3118 */
3119
3120 bool
3121 vect_transform_strided_load (gimple stmt, VEC(tree,heap) *dr_chain, int size,
3122 gimple_stmt_iterator *gsi)
3123 {
3124 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3125 gimple first_stmt = DR_GROUP_FIRST_DR (stmt_info);
3126 gimple next_stmt, new_stmt;
3127 VEC(tree,heap) *result_chain = NULL;
3128 unsigned int i, gap_count;
3129 tree tmp_data_ref;
3130
3131 /* DR_CHAIN contains input data-refs that are a part of the interleaving.
3132 RESULT_CHAIN is the output of vect_permute_load_chain, it contains permuted
3133 vectors, that are ready for vector computation. */
3134 result_chain = VEC_alloc (tree, heap, size);
3135 /* Permute. */
3136 if (!vect_permute_load_chain (dr_chain, size, stmt, gsi, &result_chain))
3137 return false;
3138
3139 /* Put a permuted data-ref in the VECTORIZED_STMT field.
3140 Since we scan the chain starting from it's first node, their order
3141 corresponds the order of data-refs in RESULT_CHAIN. */
3142 next_stmt = first_stmt;
3143 gap_count = 1;
3144 for (i = 0; VEC_iterate (tree, result_chain, i, tmp_data_ref); i++)
3145 {
3146 if (!next_stmt)
3147 break;
3148
3149 /* Skip the gaps. Loads created for the gaps will be removed by dead
3150 code elimination pass later. No need to check for the first stmt in
3151 the group, since it always exists.
3152 DR_GROUP_GAP is the number of steps in elements from the previous
3153 access (if there is no gap DR_GROUP_GAP is 1). We skip loads that
3154 correspond to the gaps.
3155 */
3156 if (next_stmt != first_stmt
3157 && gap_count < DR_GROUP_GAP (vinfo_for_stmt (next_stmt)))
3158 {
3159 gap_count++;
3160 continue;
3161 }
3162
3163 while (next_stmt)
3164 {
3165 new_stmt = SSA_NAME_DEF_STMT (tmp_data_ref);
3166 /* We assume that if VEC_STMT is not NULL, this is a case of multiple
3167 copies, and we put the new vector statement in the first available
3168 RELATED_STMT. */
3169 if (!STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt)))
3170 STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt)) = new_stmt;
3171 else
3172 {
3173 if (!DR_GROUP_SAME_DR_STMT (vinfo_for_stmt (next_stmt)))
3174 {
3175 gimple prev_stmt =
3176 STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt));
3177 gimple rel_stmt =
3178 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (prev_stmt));
3179 while (rel_stmt)
3180 {
3181 prev_stmt = rel_stmt;
3182 rel_stmt =
3183 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (rel_stmt));
3184 }
3185
3186 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (prev_stmt)) =
3187 new_stmt;
3188 }
3189 }
3190
3191 next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt));
3192 gap_count = 1;
3193 /* If NEXT_STMT accesses the same DR as the previous statement,
3194 put the same TMP_DATA_REF as its vectorized statement; otherwise
3195 get the next data-ref from RESULT_CHAIN. */
3196 if (!next_stmt || !DR_GROUP_SAME_DR_STMT (vinfo_for_stmt (next_stmt)))
3197 break;
3198 }
3199 }
3200
3201 VEC_free (tree, heap, result_chain);
3202 return true;
3203 }
3204
3205 /* Function vect_force_dr_alignment_p.
3206
3207 Returns whether the alignment of a DECL can be forced to be aligned
3208 on ALIGNMENT bit boundary. */
3209
3210 bool
3211 vect_can_force_dr_alignment_p (const_tree decl, unsigned int alignment)
3212 {
3213 if (TREE_CODE (decl) != VAR_DECL)
3214 return false;
3215
3216 if (DECL_EXTERNAL (decl))
3217 return false;
3218
3219 if (TREE_ASM_WRITTEN (decl))
3220 return false;
3221
3222 if (TREE_STATIC (decl))
3223 return (alignment <= MAX_OFILE_ALIGNMENT);
3224 else
3225 return (alignment <= MAX_STACK_ALIGNMENT);
3226 }
3227
3228 /* Function vect_supportable_dr_alignment
3229
3230 Return whether the data reference DR is supported with respect to its
3231 alignment. */
3232
3233 enum dr_alignment_support
3234 vect_supportable_dr_alignment (struct data_reference *dr)
3235 {
3236 gimple stmt = DR_STMT (dr);
3237 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3238 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3239 enum machine_mode mode = TYPE_MODE (vectype);
3240 struct loop *vect_loop = LOOP_VINFO_LOOP (STMT_VINFO_LOOP_VINFO (stmt_info));
3241 bool nested_in_vect_loop = nested_in_vect_loop_p (vect_loop, stmt);
3242 bool invariant_in_outerloop = false;
3243
3244 if (aligned_access_p (dr))
3245 return dr_aligned;
3246
3247 if (nested_in_vect_loop)
3248 {
3249 tree outerloop_step = STMT_VINFO_DR_STEP (stmt_info);
3250 invariant_in_outerloop =
3251 (tree_int_cst_compare (outerloop_step, size_zero_node) == 0);
3252 }
3253
3254 /* Possibly unaligned access. */
3255
3256 /* We can choose between using the implicit realignment scheme (generating
3257 a misaligned_move stmt) and the explicit realignment scheme (generating
3258 aligned loads with a REALIGN_LOAD). There are two variants to the explicit
3259 realignment scheme: optimized, and unoptimized.
3260 We can optimize the realignment only if the step between consecutive
3261 vector loads is equal to the vector size. Since the vector memory
3262 accesses advance in steps of VS (Vector Size) in the vectorized loop, it
3263 is guaranteed that the misalignment amount remains the same throughout the
3264 execution of the vectorized loop. Therefore, we can create the
3265 "realignment token" (the permutation mask that is passed to REALIGN_LOAD)
3266 at the loop preheader.
3267
3268 However, in the case of outer-loop vectorization, when vectorizing a
3269 memory access in the inner-loop nested within the LOOP that is now being
3270 vectorized, while it is guaranteed that the misalignment of the
3271 vectorized memory access will remain the same in different outer-loop
3272 iterations, it is *not* guaranteed that is will remain the same throughout
3273 the execution of the inner-loop. This is because the inner-loop advances
3274 with the original scalar step (and not in steps of VS). If the inner-loop
3275 step happens to be a multiple of VS, then the misalignment remains fixed
3276 and we can use the optimized realignment scheme. For example:
3277
3278 for (i=0; i<N; i++)
3279 for (j=0; j<M; j++)
3280 s += a[i+j];
3281
3282 When vectorizing the i-loop in the above example, the step between
3283 consecutive vector loads is 1, and so the misalignment does not remain
3284 fixed across the execution of the inner-loop, and the realignment cannot
3285 be optimized (as illustrated in the following pseudo vectorized loop):
3286
3287 for (i=0; i<N; i+=4)
3288 for (j=0; j<M; j++){
3289 vs += vp[i+j]; // misalignment of &vp[i+j] is {0,1,2,3,0,1,2,3,...}
3290 // when j is {0,1,2,3,4,5,6,7,...} respectively.
3291 // (assuming that we start from an aligned address).
3292 }
3293
3294 We therefore have to use the unoptimized realignment scheme:
3295
3296 for (i=0; i<N; i+=4)
3297 for (j=k; j<M; j+=4)
3298 vs += vp[i+j]; // misalignment of &vp[i+j] is always k (assuming
3299 // that the misalignment of the initial address is
3300 // 0).
3301
3302 The loop can then be vectorized as follows:
3303
3304 for (k=0; k<4; k++){
3305 rt = get_realignment_token (&vp[k]);
3306 for (i=0; i<N; i+=4){
3307 v1 = vp[i+k];
3308 for (j=k; j<M; j+=4){
3309 v2 = vp[i+j+VS-1];
3310 va = REALIGN_LOAD <v1,v2,rt>;
3311 vs += va;
3312 v1 = v2;
3313 }
3314 }
3315 } */
3316
3317 if (DR_IS_READ (dr))
3318 {
3319 if (optab_handler (vec_realign_load_optab, mode)->insn_code !=
3320 CODE_FOR_nothing
3321 && (!targetm.vectorize.builtin_mask_for_load
3322 || targetm.vectorize.builtin_mask_for_load ()))
3323 {
3324 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3325 if (nested_in_vect_loop
3326 && (TREE_INT_CST_LOW (DR_STEP (dr))
3327 != GET_MODE_SIZE (TYPE_MODE (vectype))))
3328 return dr_explicit_realign;
3329 else
3330 return dr_explicit_realign_optimized;
3331 }
3332
3333 if (optab_handler (movmisalign_optab, mode)->insn_code !=
3334 CODE_FOR_nothing)
3335 /* Can't software pipeline the loads, but can at least do them. */
3336 return dr_unaligned_supported;
3337 }
3338
3339 /* Unsupported. */
3340 return dr_unaligned_unsupported;
3341 }