* g++.dg/cpp0x/nullptr21.c: Remove printfs, make self-checking.
[gcc.git] / gcc / tree-vect-data-refs.c
1 /* Data References Analysis and Manipulation Utilities for Vectorization.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
3 Free Software Foundation, Inc.
4 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 and Ira Rosen <irar@il.ibm.com>
6
7 This file is part of GCC.
8
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
13
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
22
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "tm.h"
27 #include "ggc.h"
28 #include "tree.h"
29 #include "tm_p.h"
30 #include "target.h"
31 #include "basic-block.h"
32 #include "gimple-pretty-print.h"
33 #include "tree-flow.h"
34 #include "dumpfile.h"
35 #include "cfgloop.h"
36 #include "tree-chrec.h"
37 #include "tree-scalar-evolution.h"
38 #include "tree-vectorizer.h"
39 #include "diagnostic-core.h"
40
41 /* Need to include rtl.h, expr.h, etc. for optabs. */
42 #include "expr.h"
43 #include "optabs.h"
44
45 /* Return true if load- or store-lanes optab OPTAB is implemented for
46 COUNT vectors of type VECTYPE. NAME is the name of OPTAB. */
47
48 static bool
49 vect_lanes_optab_supported_p (const char *name, convert_optab optab,
50 tree vectype, unsigned HOST_WIDE_INT count)
51 {
52 enum machine_mode mode, array_mode;
53 bool limit_p;
54
55 mode = TYPE_MODE (vectype);
56 limit_p = !targetm.array_mode_supported_p (mode, count);
57 array_mode = mode_for_size (count * GET_MODE_BITSIZE (mode),
58 MODE_INT, limit_p);
59
60 if (array_mode == BLKmode)
61 {
62 if (vect_print_dump_info (REPORT_DETAILS))
63 fprintf (vect_dump, "no array mode for %s[" HOST_WIDE_INT_PRINT_DEC "]",
64 GET_MODE_NAME (mode), count);
65 return false;
66 }
67
68 if (convert_optab_handler (optab, array_mode, mode) == CODE_FOR_nothing)
69 {
70 if (vect_print_dump_info (REPORT_DETAILS))
71 fprintf (vect_dump, "cannot use %s<%s><%s>",
72 name, GET_MODE_NAME (array_mode), GET_MODE_NAME (mode));
73 return false;
74 }
75
76 if (vect_print_dump_info (REPORT_DETAILS))
77 fprintf (vect_dump, "can use %s<%s><%s>",
78 name, GET_MODE_NAME (array_mode), GET_MODE_NAME (mode));
79
80 return true;
81 }
82
83
84 /* Return the smallest scalar part of STMT.
85 This is used to determine the vectype of the stmt. We generally set the
86 vectype according to the type of the result (lhs). For stmts whose
87 result-type is different than the type of the arguments (e.g., demotion,
88 promotion), vectype will be reset appropriately (later). Note that we have
89 to visit the smallest datatype in this function, because that determines the
90 VF. If the smallest datatype in the loop is present only as the rhs of a
91 promotion operation - we'd miss it.
92 Such a case, where a variable of this datatype does not appear in the lhs
93 anywhere in the loop, can only occur if it's an invariant: e.g.:
94 'int_x = (int) short_inv', which we'd expect to have been optimized away by
95 invariant motion. However, we cannot rely on invariant motion to always
96 take invariants out of the loop, and so in the case of promotion we also
97 have to check the rhs.
98 LHS_SIZE_UNIT and RHS_SIZE_UNIT contain the sizes of the corresponding
99 types. */
100
101 tree
102 vect_get_smallest_scalar_type (gimple stmt, HOST_WIDE_INT *lhs_size_unit,
103 HOST_WIDE_INT *rhs_size_unit)
104 {
105 tree scalar_type = gimple_expr_type (stmt);
106 HOST_WIDE_INT lhs, rhs;
107
108 lhs = rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type));
109
110 if (is_gimple_assign (stmt)
111 && (gimple_assign_cast_p (stmt)
112 || gimple_assign_rhs_code (stmt) == WIDEN_MULT_EXPR
113 || gimple_assign_rhs_code (stmt) == WIDEN_LSHIFT_EXPR
114 || gimple_assign_rhs_code (stmt) == FLOAT_EXPR))
115 {
116 tree rhs_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
117
118 rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (rhs_type));
119 if (rhs < lhs)
120 scalar_type = rhs_type;
121 }
122
123 *lhs_size_unit = lhs;
124 *rhs_size_unit = rhs;
125 return scalar_type;
126 }
127
128
129 /* Find the place of the data-ref in STMT in the interleaving chain that starts
130 from FIRST_STMT. Return -1 if the data-ref is not a part of the chain. */
131
132 int
133 vect_get_place_in_interleaving_chain (gimple stmt, gimple first_stmt)
134 {
135 gimple next_stmt = first_stmt;
136 int result = 0;
137
138 if (first_stmt != GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
139 return -1;
140
141 while (next_stmt && next_stmt != stmt)
142 {
143 result++;
144 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
145 }
146
147 if (next_stmt)
148 return result;
149 else
150 return -1;
151 }
152
153
154 /* Function vect_insert_into_interleaving_chain.
155
156 Insert DRA into the interleaving chain of DRB according to DRA's INIT. */
157
158 static void
159 vect_insert_into_interleaving_chain (struct data_reference *dra,
160 struct data_reference *drb)
161 {
162 gimple prev, next;
163 tree next_init;
164 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
165 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
166
167 prev = GROUP_FIRST_ELEMENT (stmtinfo_b);
168 next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev));
169 while (next)
170 {
171 next_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
172 if (tree_int_cst_compare (next_init, DR_INIT (dra)) > 0)
173 {
174 /* Insert here. */
175 GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev)) = DR_STMT (dra);
176 GROUP_NEXT_ELEMENT (stmtinfo_a) = next;
177 return;
178 }
179 prev = next;
180 next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev));
181 }
182
183 /* We got to the end of the list. Insert here. */
184 GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev)) = DR_STMT (dra);
185 GROUP_NEXT_ELEMENT (stmtinfo_a) = NULL;
186 }
187
188
189 /* Function vect_update_interleaving_chain.
190
191 For two data-refs DRA and DRB that are a part of a chain interleaved data
192 accesses, update the interleaving chain. DRB's INIT is smaller than DRA's.
193
194 There are four possible cases:
195 1. New stmts - both DRA and DRB are not a part of any chain:
196 FIRST_DR = DRB
197 NEXT_DR (DRB) = DRA
198 2. DRB is a part of a chain and DRA is not:
199 no need to update FIRST_DR
200 no need to insert DRB
201 insert DRA according to init
202 3. DRA is a part of a chain and DRB is not:
203 if (init of FIRST_DR > init of DRB)
204 FIRST_DR = DRB
205 NEXT(FIRST_DR) = previous FIRST_DR
206 else
207 insert DRB according to its init
208 4. both DRA and DRB are in some interleaving chains:
209 choose the chain with the smallest init of FIRST_DR
210 insert the nodes of the second chain into the first one. */
211
212 static void
213 vect_update_interleaving_chain (struct data_reference *drb,
214 struct data_reference *dra)
215 {
216 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
217 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
218 tree next_init, init_dra_chain, init_drb_chain;
219 gimple first_a, first_b;
220 tree node_init;
221 gimple node, prev, next, first_stmt;
222
223 /* 1. New stmts - both DRA and DRB are not a part of any chain. */
224 if (!GROUP_FIRST_ELEMENT (stmtinfo_a) && !GROUP_FIRST_ELEMENT (stmtinfo_b))
225 {
226 GROUP_FIRST_ELEMENT (stmtinfo_a) = DR_STMT (drb);
227 GROUP_FIRST_ELEMENT (stmtinfo_b) = DR_STMT (drb);
228 GROUP_NEXT_ELEMENT (stmtinfo_b) = DR_STMT (dra);
229 return;
230 }
231
232 /* 2. DRB is a part of a chain and DRA is not. */
233 if (!GROUP_FIRST_ELEMENT (stmtinfo_a) && GROUP_FIRST_ELEMENT (stmtinfo_b))
234 {
235 GROUP_FIRST_ELEMENT (stmtinfo_a) = GROUP_FIRST_ELEMENT (stmtinfo_b);
236 /* Insert DRA into the chain of DRB. */
237 vect_insert_into_interleaving_chain (dra, drb);
238 return;
239 }
240
241 /* 3. DRA is a part of a chain and DRB is not. */
242 if (GROUP_FIRST_ELEMENT (stmtinfo_a) && !GROUP_FIRST_ELEMENT (stmtinfo_b))
243 {
244 gimple old_first_stmt = GROUP_FIRST_ELEMENT (stmtinfo_a);
245 tree init_old = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (
246 old_first_stmt)));
247 gimple tmp;
248
249 if (tree_int_cst_compare (init_old, DR_INIT (drb)) > 0)
250 {
251 /* DRB's init is smaller than the init of the stmt previously marked
252 as the first stmt of the interleaving chain of DRA. Therefore, we
253 update FIRST_STMT and put DRB in the head of the list. */
254 GROUP_FIRST_ELEMENT (stmtinfo_b) = DR_STMT (drb);
255 GROUP_NEXT_ELEMENT (stmtinfo_b) = old_first_stmt;
256
257 /* Update all the stmts in the list to point to the new FIRST_STMT. */
258 tmp = old_first_stmt;
259 while (tmp)
260 {
261 GROUP_FIRST_ELEMENT (vinfo_for_stmt (tmp)) = DR_STMT (drb);
262 tmp = GROUP_NEXT_ELEMENT (vinfo_for_stmt (tmp));
263 }
264 }
265 else
266 {
267 /* Insert DRB in the list of DRA. */
268 vect_insert_into_interleaving_chain (drb, dra);
269 GROUP_FIRST_ELEMENT (stmtinfo_b) = GROUP_FIRST_ELEMENT (stmtinfo_a);
270 }
271 return;
272 }
273
274 /* 4. both DRA and DRB are in some interleaving chains. */
275 first_a = GROUP_FIRST_ELEMENT (stmtinfo_a);
276 first_b = GROUP_FIRST_ELEMENT (stmtinfo_b);
277 if (first_a == first_b)
278 return;
279 init_dra_chain = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_a)));
280 init_drb_chain = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_b)));
281
282 if (tree_int_cst_compare (init_dra_chain, init_drb_chain) > 0)
283 {
284 /* Insert the nodes of DRA chain into the DRB chain.
285 After inserting a node, continue from this node of the DRB chain (don't
286 start from the beginning. */
287 node = GROUP_FIRST_ELEMENT (stmtinfo_a);
288 prev = GROUP_FIRST_ELEMENT (stmtinfo_b);
289 first_stmt = first_b;
290 }
291 else
292 {
293 /* Insert the nodes of DRB chain into the DRA chain.
294 After inserting a node, continue from this node of the DRA chain (don't
295 start from the beginning. */
296 node = GROUP_FIRST_ELEMENT (stmtinfo_b);
297 prev = GROUP_FIRST_ELEMENT (stmtinfo_a);
298 first_stmt = first_a;
299 }
300
301 while (node)
302 {
303 node_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (node)));
304 next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev));
305 while (next)
306 {
307 next_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
308 if (tree_int_cst_compare (next_init, node_init) > 0)
309 {
310 /* Insert here. */
311 GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev)) = node;
312 GROUP_NEXT_ELEMENT (vinfo_for_stmt (node)) = next;
313 prev = node;
314 break;
315 }
316 prev = next;
317 next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev));
318 }
319 if (!next)
320 {
321 /* We got to the end of the list. Insert here. */
322 GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev)) = node;
323 GROUP_NEXT_ELEMENT (vinfo_for_stmt (node)) = NULL;
324 prev = node;
325 }
326 GROUP_FIRST_ELEMENT (vinfo_for_stmt (node)) = first_stmt;
327 node = GROUP_NEXT_ELEMENT (vinfo_for_stmt (node));
328 }
329 }
330
331 /* Check dependence between DRA and DRB for basic block vectorization.
332 If the accesses share same bases and offsets, we can compare their initial
333 constant offsets to decide whether they differ or not. In case of a read-
334 write dependence we check that the load is before the store to ensure that
335 vectorization will not change the order of the accesses. */
336
337 static bool
338 vect_drs_dependent_in_basic_block (struct data_reference *dra,
339 struct data_reference *drb)
340 {
341 HOST_WIDE_INT type_size_a, type_size_b, init_a, init_b;
342 gimple earlier_stmt;
343
344 /* We only call this function for pairs of loads and stores, but we verify
345 it here. */
346 if (DR_IS_READ (dra) == DR_IS_READ (drb))
347 {
348 if (DR_IS_READ (dra))
349 return false;
350 else
351 return true;
352 }
353
354 /* Check that the data-refs have same bases and offsets. If not, we can't
355 determine if they are dependent. */
356 if (!operand_equal_p (DR_BASE_ADDRESS (dra), DR_BASE_ADDRESS (drb), 0)
357 || !dr_equal_offsets_p (dra, drb))
358 return true;
359
360 /* Check the types. */
361 type_size_a = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra))));
362 type_size_b = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb))));
363
364 if (type_size_a != type_size_b
365 || !types_compatible_p (TREE_TYPE (DR_REF (dra)),
366 TREE_TYPE (DR_REF (drb))))
367 return true;
368
369 init_a = TREE_INT_CST_LOW (DR_INIT (dra));
370 init_b = TREE_INT_CST_LOW (DR_INIT (drb));
371
372 /* Two different locations - no dependence. */
373 if (init_a != init_b)
374 return false;
375
376 /* We have a read-write dependence. Check that the load is before the store.
377 When we vectorize basic blocks, vector load can be only before
378 corresponding scalar load, and vector store can be only after its
379 corresponding scalar store. So the order of the acceses is preserved in
380 case the load is before the store. */
381 earlier_stmt = get_earlier_stmt (DR_STMT (dra), DR_STMT (drb));
382 if (DR_IS_READ (STMT_VINFO_DATA_REF (vinfo_for_stmt (earlier_stmt))))
383 return false;
384
385 return true;
386 }
387
388
389 /* Function vect_check_interleaving.
390
391 Check if DRA and DRB are a part of interleaving. In case they are, insert
392 DRA and DRB in an interleaving chain. */
393
394 static bool
395 vect_check_interleaving (struct data_reference *dra,
396 struct data_reference *drb)
397 {
398 HOST_WIDE_INT type_size_a, type_size_b, diff_mod_size, step, init_a, init_b;
399
400 /* Check that the data-refs have same first location (except init) and they
401 are both either store or load (not load and store). */
402 if (!operand_equal_p (DR_BASE_ADDRESS (dra), DR_BASE_ADDRESS (drb), 0)
403 || !dr_equal_offsets_p (dra, drb)
404 || !tree_int_cst_compare (DR_INIT (dra), DR_INIT (drb))
405 || DR_IS_READ (dra) != DR_IS_READ (drb))
406 return false;
407
408 /* Check:
409 1. data-refs are of the same type
410 2. their steps are equal
411 3. the step (if greater than zero) is greater than the difference between
412 data-refs' inits. */
413 type_size_a = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra))));
414 type_size_b = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb))));
415
416 if (type_size_a != type_size_b
417 || tree_int_cst_compare (DR_STEP (dra), DR_STEP (drb))
418 || !types_compatible_p (TREE_TYPE (DR_REF (dra)),
419 TREE_TYPE (DR_REF (drb))))
420 return false;
421
422 init_a = TREE_INT_CST_LOW (DR_INIT (dra));
423 init_b = TREE_INT_CST_LOW (DR_INIT (drb));
424 step = TREE_INT_CST_LOW (DR_STEP (dra));
425
426 if (init_a > init_b)
427 {
428 /* If init_a == init_b + the size of the type * k, we have an interleaving,
429 and DRB is accessed before DRA. */
430 diff_mod_size = (init_a - init_b) % type_size_a;
431
432 if (step && (init_a - init_b) > step)
433 return false;
434
435 if (diff_mod_size == 0)
436 {
437 vect_update_interleaving_chain (drb, dra);
438 if (vect_print_dump_info (REPORT_DR_DETAILS))
439 {
440 fprintf (vect_dump, "Detected interleaving ");
441 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
442 fprintf (vect_dump, " and ");
443 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
444 }
445 return true;
446 }
447 }
448 else
449 {
450 /* If init_b == init_a + the size of the type * k, we have an
451 interleaving, and DRA is accessed before DRB. */
452 diff_mod_size = (init_b - init_a) % type_size_a;
453
454 if (step && (init_b - init_a) > step)
455 return false;
456
457 if (diff_mod_size == 0)
458 {
459 vect_update_interleaving_chain (dra, drb);
460 if (vect_print_dump_info (REPORT_DR_DETAILS))
461 {
462 fprintf (vect_dump, "Detected interleaving ");
463 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
464 fprintf (vect_dump, " and ");
465 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
466 }
467 return true;
468 }
469 }
470
471 return false;
472 }
473
474 /* Check if data references pointed by DR_I and DR_J are same or
475 belong to same interleaving group. Return FALSE if drs are
476 different, otherwise return TRUE. */
477
478 static bool
479 vect_same_range_drs (data_reference_p dr_i, data_reference_p dr_j)
480 {
481 gimple stmt_i = DR_STMT (dr_i);
482 gimple stmt_j = DR_STMT (dr_j);
483
484 if (operand_equal_p (DR_REF (dr_i), DR_REF (dr_j), 0)
485 || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt_i))
486 && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt_j))
487 && (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt_i))
488 == GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt_j)))))
489 return true;
490 else
491 return false;
492 }
493
494 /* If address ranges represented by DDR_I and DDR_J are equal,
495 return TRUE, otherwise return FALSE. */
496
497 static bool
498 vect_vfa_range_equal (ddr_p ddr_i, ddr_p ddr_j)
499 {
500 if ((vect_same_range_drs (DDR_A (ddr_i), DDR_A (ddr_j))
501 && vect_same_range_drs (DDR_B (ddr_i), DDR_B (ddr_j)))
502 || (vect_same_range_drs (DDR_A (ddr_i), DDR_B (ddr_j))
503 && vect_same_range_drs (DDR_B (ddr_i), DDR_A (ddr_j))))
504 return true;
505 else
506 return false;
507 }
508
509 /* Insert DDR into LOOP_VINFO list of ddrs that may alias and need to be
510 tested at run-time. Return TRUE if DDR was successfully inserted.
511 Return false if versioning is not supported. */
512
513 static bool
514 vect_mark_for_runtime_alias_test (ddr_p ddr, loop_vec_info loop_vinfo)
515 {
516 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
517
518 if ((unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS) == 0)
519 return false;
520
521 if (vect_print_dump_info (REPORT_DR_DETAILS))
522 {
523 fprintf (vect_dump, "mark for run-time aliasing test between ");
524 print_generic_expr (vect_dump, DR_REF (DDR_A (ddr)), TDF_SLIM);
525 fprintf (vect_dump, " and ");
526 print_generic_expr (vect_dump, DR_REF (DDR_B (ddr)), TDF_SLIM);
527 }
528
529 if (optimize_loop_nest_for_size_p (loop))
530 {
531 if (vect_print_dump_info (REPORT_DR_DETAILS))
532 fprintf (vect_dump, "versioning not supported when optimizing for size.");
533 return false;
534 }
535
536 /* FORNOW: We don't support versioning with outer-loop vectorization. */
537 if (loop->inner)
538 {
539 if (vect_print_dump_info (REPORT_DR_DETAILS))
540 fprintf (vect_dump, "versioning not yet supported for outer-loops.");
541 return false;
542 }
543
544 /* FORNOW: We don't support creating runtime alias tests for non-constant
545 step. */
546 if (TREE_CODE (DR_STEP (DDR_A (ddr))) != INTEGER_CST
547 || TREE_CODE (DR_STEP (DDR_B (ddr))) != INTEGER_CST)
548 {
549 if (vect_print_dump_info (REPORT_DR_DETAILS))
550 fprintf (vect_dump, "versioning not yet supported for non-constant "
551 "step");
552 return false;
553 }
554
555 VEC_safe_push (ddr_p, heap, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo), ddr);
556 return true;
557 }
558
559
560 /* Function vect_analyze_data_ref_dependence.
561
562 Return TRUE if there (might) exist a dependence between a memory-reference
563 DRA and a memory-reference DRB. When versioning for alias may check a
564 dependence at run-time, return FALSE. Adjust *MAX_VF according to
565 the data dependence. */
566
567 static bool
568 vect_analyze_data_ref_dependence (struct data_dependence_relation *ddr,
569 loop_vec_info loop_vinfo, int *max_vf)
570 {
571 unsigned int i;
572 struct loop *loop = NULL;
573 struct data_reference *dra = DDR_A (ddr);
574 struct data_reference *drb = DDR_B (ddr);
575 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
576 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
577 lambda_vector dist_v;
578 unsigned int loop_depth;
579
580 /* Don't bother to analyze statements marked as unvectorizable. */
581 if (!STMT_VINFO_VECTORIZABLE (stmtinfo_a)
582 || !STMT_VINFO_VECTORIZABLE (stmtinfo_b))
583 return false;
584
585 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
586 {
587 /* Independent data accesses. */
588 vect_check_interleaving (dra, drb);
589 return false;
590 }
591
592 if (loop_vinfo)
593 loop = LOOP_VINFO_LOOP (loop_vinfo);
594
595 if ((DR_IS_READ (dra) && DR_IS_READ (drb) && loop_vinfo) || dra == drb)
596 return false;
597
598 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
599 {
600 gimple earlier_stmt;
601
602 if (loop_vinfo)
603 {
604 if (vect_print_dump_info (REPORT_DR_DETAILS))
605 {
606 fprintf (vect_dump, "versioning for alias required: "
607 "can't determine dependence between ");
608 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
609 fprintf (vect_dump, " and ");
610 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
611 }
612
613 /* Add to list of ddrs that need to be tested at run-time. */
614 return !vect_mark_for_runtime_alias_test (ddr, loop_vinfo);
615 }
616
617 /* When vectorizing a basic block unknown depnedence can still mean
618 grouped access. */
619 if (vect_check_interleaving (dra, drb))
620 return false;
621
622 /* Read-read is OK (we need this check here, after checking for
623 interleaving). */
624 if (DR_IS_READ (dra) && DR_IS_READ (drb))
625 return false;
626
627 if (vect_print_dump_info (REPORT_DR_DETAILS))
628 {
629 fprintf (vect_dump, "can't determine dependence between ");
630 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
631 fprintf (vect_dump, " and ");
632 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
633 }
634
635 /* We do not vectorize basic blocks with write-write dependencies. */
636 if (DR_IS_WRITE (dra) && DR_IS_WRITE (drb))
637 return true;
638
639 /* Check that it's not a load-after-store dependence. */
640 earlier_stmt = get_earlier_stmt (DR_STMT (dra), DR_STMT (drb));
641 if (DR_IS_WRITE (STMT_VINFO_DATA_REF (vinfo_for_stmt (earlier_stmt))))
642 return true;
643
644 return false;
645 }
646
647 /* Versioning for alias is not yet supported for basic block SLP, and
648 dependence distance is unapplicable, hence, in case of known data
649 dependence, basic block vectorization is impossible for now. */
650 if (!loop_vinfo)
651 {
652 if (dra != drb && vect_check_interleaving (dra, drb))
653 return false;
654
655 if (vect_print_dump_info (REPORT_DR_DETAILS))
656 {
657 fprintf (vect_dump, "determined dependence between ");
658 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
659 fprintf (vect_dump, " and ");
660 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
661 }
662
663 /* Do not vectorize basic blcoks with write-write dependences. */
664 if (DR_IS_WRITE (dra) && DR_IS_WRITE (drb))
665 return true;
666
667 /* Check if this dependence is allowed in basic block vectorization. */
668 return vect_drs_dependent_in_basic_block (dra, drb);
669 }
670
671 /* Loop-based vectorization and known data dependence. */
672 if (DDR_NUM_DIST_VECTS (ddr) == 0)
673 {
674 if (vect_print_dump_info (REPORT_DR_DETAILS))
675 {
676 fprintf (vect_dump, "versioning for alias required: bad dist vector for ");
677 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
678 fprintf (vect_dump, " and ");
679 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
680 }
681 /* Add to list of ddrs that need to be tested at run-time. */
682 return !vect_mark_for_runtime_alias_test (ddr, loop_vinfo);
683 }
684
685 loop_depth = index_in_loop_nest (loop->num, DDR_LOOP_NEST (ddr));
686 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIST_VECTS (ddr), i, dist_v)
687 {
688 int dist = dist_v[loop_depth];
689
690 if (vect_print_dump_info (REPORT_DR_DETAILS))
691 fprintf (vect_dump, "dependence distance = %d.", dist);
692
693 if (dist == 0)
694 {
695 if (vect_print_dump_info (REPORT_DR_DETAILS))
696 {
697 fprintf (vect_dump, "dependence distance == 0 between ");
698 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
699 fprintf (vect_dump, " and ");
700 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
701 }
702
703 /* For interleaving, mark that there is a read-write dependency if
704 necessary. We check before that one of the data-refs is store. */
705 if (DR_IS_READ (dra))
706 GROUP_READ_WRITE_DEPENDENCE (stmtinfo_a) = true;
707 else
708 {
709 if (DR_IS_READ (drb))
710 GROUP_READ_WRITE_DEPENDENCE (stmtinfo_b) = true;
711 }
712
713 continue;
714 }
715
716 if (dist > 0 && DDR_REVERSED_P (ddr))
717 {
718 /* If DDR_REVERSED_P the order of the data-refs in DDR was
719 reversed (to make distance vector positive), and the actual
720 distance is negative. */
721 if (vect_print_dump_info (REPORT_DR_DETAILS))
722 fprintf (vect_dump, "dependence distance negative.");
723 continue;
724 }
725
726 if (abs (dist) >= 2
727 && abs (dist) < *max_vf)
728 {
729 /* The dependence distance requires reduction of the maximal
730 vectorization factor. */
731 *max_vf = abs (dist);
732 if (vect_print_dump_info (REPORT_DR_DETAILS))
733 fprintf (vect_dump, "adjusting maximal vectorization factor to %i",
734 *max_vf);
735 }
736
737 if (abs (dist) >= *max_vf)
738 {
739 /* Dependence distance does not create dependence, as far as
740 vectorization is concerned, in this case. */
741 if (vect_print_dump_info (REPORT_DR_DETAILS))
742 fprintf (vect_dump, "dependence distance >= VF.");
743 continue;
744 }
745
746 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
747 {
748 fprintf (vect_dump, "not vectorized, possible dependence "
749 "between data-refs ");
750 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
751 fprintf (vect_dump, " and ");
752 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
753 }
754
755 return true;
756 }
757
758 return false;
759 }
760
761 /* Function vect_analyze_data_ref_dependences.
762
763 Examine all the data references in the loop, and make sure there do not
764 exist any data dependences between them. Set *MAX_VF according to
765 the maximum vectorization factor the data dependences allow. */
766
767 bool
768 vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo,
769 bb_vec_info bb_vinfo, int *max_vf)
770 {
771 unsigned int i;
772 VEC (ddr_p, heap) *ddrs = NULL;
773 struct data_dependence_relation *ddr;
774
775 if (vect_print_dump_info (REPORT_DETAILS))
776 fprintf (vect_dump, "=== vect_analyze_dependences ===");
777
778 if (loop_vinfo)
779 ddrs = LOOP_VINFO_DDRS (loop_vinfo);
780 else
781 ddrs = BB_VINFO_DDRS (bb_vinfo);
782
783 FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
784 if (vect_analyze_data_ref_dependence (ddr, loop_vinfo, max_vf))
785 return false;
786
787 return true;
788 }
789
790
791 /* Function vect_compute_data_ref_alignment
792
793 Compute the misalignment of the data reference DR.
794
795 Output:
796 1. If during the misalignment computation it is found that the data reference
797 cannot be vectorized then false is returned.
798 2. DR_MISALIGNMENT (DR) is defined.
799
800 FOR NOW: No analysis is actually performed. Misalignment is calculated
801 only for trivial cases. TODO. */
802
803 static bool
804 vect_compute_data_ref_alignment (struct data_reference *dr)
805 {
806 gimple stmt = DR_STMT (dr);
807 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
808 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
809 struct loop *loop = NULL;
810 tree ref = DR_REF (dr);
811 tree vectype;
812 tree base, base_addr;
813 bool base_aligned;
814 tree misalign;
815 tree aligned_to, alignment;
816
817 if (vect_print_dump_info (REPORT_DETAILS))
818 fprintf (vect_dump, "vect_compute_data_ref_alignment:");
819
820 if (loop_vinfo)
821 loop = LOOP_VINFO_LOOP (loop_vinfo);
822
823 /* Initialize misalignment to unknown. */
824 SET_DR_MISALIGNMENT (dr, -1);
825
826 /* Strided loads perform only component accesses, misalignment information
827 is irrelevant for them. */
828 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
829 return true;
830
831 misalign = DR_INIT (dr);
832 aligned_to = DR_ALIGNED_TO (dr);
833 base_addr = DR_BASE_ADDRESS (dr);
834 vectype = STMT_VINFO_VECTYPE (stmt_info);
835
836 /* In case the dataref is in an inner-loop of the loop that is being
837 vectorized (LOOP), we use the base and misalignment information
838 relative to the outer-loop (LOOP). This is ok only if the misalignment
839 stays the same throughout the execution of the inner-loop, which is why
840 we have to check that the stride of the dataref in the inner-loop evenly
841 divides by the vector size. */
842 if (loop && nested_in_vect_loop_p (loop, stmt))
843 {
844 tree step = DR_STEP (dr);
845 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
846
847 if (dr_step % GET_MODE_SIZE (TYPE_MODE (vectype)) == 0)
848 {
849 if (vect_print_dump_info (REPORT_ALIGNMENT))
850 fprintf (vect_dump, "inner step divides the vector-size.");
851 misalign = STMT_VINFO_DR_INIT (stmt_info);
852 aligned_to = STMT_VINFO_DR_ALIGNED_TO (stmt_info);
853 base_addr = STMT_VINFO_DR_BASE_ADDRESS (stmt_info);
854 }
855 else
856 {
857 if (vect_print_dump_info (REPORT_ALIGNMENT))
858 fprintf (vect_dump, "inner step doesn't divide the vector-size.");
859 misalign = NULL_TREE;
860 }
861 }
862
863 /* Similarly, if we're doing basic-block vectorization, we can only use
864 base and misalignment information relative to an innermost loop if the
865 misalignment stays the same throughout the execution of the loop.
866 As above, this is the case if the stride of the dataref evenly divides
867 by the vector size. */
868 if (!loop)
869 {
870 tree step = DR_STEP (dr);
871 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
872
873 if (dr_step % GET_MODE_SIZE (TYPE_MODE (vectype)) != 0)
874 {
875 if (vect_print_dump_info (REPORT_ALIGNMENT))
876 fprintf (vect_dump, "SLP: step doesn't divide the vector-size.");
877 misalign = NULL_TREE;
878 }
879 }
880
881 base = build_fold_indirect_ref (base_addr);
882 alignment = ssize_int (TYPE_ALIGN (vectype)/BITS_PER_UNIT);
883
884 if ((aligned_to && tree_int_cst_compare (aligned_to, alignment) < 0)
885 || !misalign)
886 {
887 if (vect_print_dump_info (REPORT_ALIGNMENT))
888 {
889 fprintf (vect_dump, "Unknown alignment for access: ");
890 print_generic_expr (vect_dump, base, TDF_SLIM);
891 }
892 return true;
893 }
894
895 if ((DECL_P (base)
896 && tree_int_cst_compare (ssize_int (DECL_ALIGN_UNIT (base)),
897 alignment) >= 0)
898 || (TREE_CODE (base_addr) == SSA_NAME
899 && tree_int_cst_compare (ssize_int (TYPE_ALIGN_UNIT (TREE_TYPE (
900 TREE_TYPE (base_addr)))),
901 alignment) >= 0)
902 || (get_pointer_alignment (base_addr) >= TYPE_ALIGN (vectype)))
903 base_aligned = true;
904 else
905 base_aligned = false;
906
907 if (!base_aligned)
908 {
909 /* Do not change the alignment of global variables here if
910 flag_section_anchors is enabled as we already generated
911 RTL for other functions. Most global variables should
912 have been aligned during the IPA increase_alignment pass. */
913 if (!vect_can_force_dr_alignment_p (base, TYPE_ALIGN (vectype))
914 || (TREE_STATIC (base) && flag_section_anchors))
915 {
916 if (vect_print_dump_info (REPORT_DETAILS))
917 {
918 fprintf (vect_dump, "can't force alignment of ref: ");
919 print_generic_expr (vect_dump, ref, TDF_SLIM);
920 }
921 return true;
922 }
923
924 /* Force the alignment of the decl.
925 NOTE: This is the only change to the code we make during
926 the analysis phase, before deciding to vectorize the loop. */
927 if (vect_print_dump_info (REPORT_DETAILS))
928 {
929 fprintf (vect_dump, "force alignment of ");
930 print_generic_expr (vect_dump, ref, TDF_SLIM);
931 }
932
933 DECL_ALIGN (base) = TYPE_ALIGN (vectype);
934 DECL_USER_ALIGN (base) = 1;
935 }
936
937 /* At this point we assume that the base is aligned. */
938 gcc_assert (base_aligned
939 || (TREE_CODE (base) == VAR_DECL
940 && DECL_ALIGN (base) >= TYPE_ALIGN (vectype)));
941
942 /* If this is a backward running DR then first access in the larger
943 vectype actually is N-1 elements before the address in the DR.
944 Adjust misalign accordingly. */
945 if (tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0)
946 {
947 tree offset = ssize_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
948 /* DR_STEP(dr) is the same as -TYPE_SIZE of the scalar type,
949 otherwise we wouldn't be here. */
950 offset = fold_build2 (MULT_EXPR, ssizetype, offset, DR_STEP (dr));
951 /* PLUS because DR_STEP was negative. */
952 misalign = size_binop (PLUS_EXPR, misalign, offset);
953 }
954
955 /* Modulo alignment. */
956 misalign = size_binop (FLOOR_MOD_EXPR, misalign, alignment);
957
958 if (!host_integerp (misalign, 1))
959 {
960 /* Negative or overflowed misalignment value. */
961 if (vect_print_dump_info (REPORT_DETAILS))
962 fprintf (vect_dump, "unexpected misalign value");
963 return false;
964 }
965
966 SET_DR_MISALIGNMENT (dr, TREE_INT_CST_LOW (misalign));
967
968 if (vect_print_dump_info (REPORT_DETAILS))
969 {
970 fprintf (vect_dump, "misalign = %d bytes of ref ", DR_MISALIGNMENT (dr));
971 print_generic_expr (vect_dump, ref, TDF_SLIM);
972 }
973
974 return true;
975 }
976
977
978 /* Function vect_compute_data_refs_alignment
979
980 Compute the misalignment of data references in the loop.
981 Return FALSE if a data reference is found that cannot be vectorized. */
982
983 static bool
984 vect_compute_data_refs_alignment (loop_vec_info loop_vinfo,
985 bb_vec_info bb_vinfo)
986 {
987 VEC (data_reference_p, heap) *datarefs;
988 struct data_reference *dr;
989 unsigned int i;
990
991 if (loop_vinfo)
992 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
993 else
994 datarefs = BB_VINFO_DATAREFS (bb_vinfo);
995
996 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
997 if (STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr)))
998 && !vect_compute_data_ref_alignment (dr))
999 {
1000 if (bb_vinfo)
1001 {
1002 /* Mark unsupported statement as unvectorizable. */
1003 STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr))) = false;
1004 continue;
1005 }
1006 else
1007 return false;
1008 }
1009
1010 return true;
1011 }
1012
1013
1014 /* Function vect_update_misalignment_for_peel
1015
1016 DR - the data reference whose misalignment is to be adjusted.
1017 DR_PEEL - the data reference whose misalignment is being made
1018 zero in the vector loop by the peel.
1019 NPEEL - the number of iterations in the peel loop if the misalignment
1020 of DR_PEEL is known at compile time. */
1021
1022 static void
1023 vect_update_misalignment_for_peel (struct data_reference *dr,
1024 struct data_reference *dr_peel, int npeel)
1025 {
1026 unsigned int i;
1027 VEC(dr_p,heap) *same_align_drs;
1028 struct data_reference *current_dr;
1029 int dr_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr))));
1030 int dr_peel_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr_peel))));
1031 stmt_vec_info stmt_info = vinfo_for_stmt (DR_STMT (dr));
1032 stmt_vec_info peel_stmt_info = vinfo_for_stmt (DR_STMT (dr_peel));
1033
1034 /* For interleaved data accesses the step in the loop must be multiplied by
1035 the size of the interleaving group. */
1036 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1037 dr_size *= GROUP_SIZE (vinfo_for_stmt (GROUP_FIRST_ELEMENT (stmt_info)));
1038 if (STMT_VINFO_GROUPED_ACCESS (peel_stmt_info))
1039 dr_peel_size *= GROUP_SIZE (peel_stmt_info);
1040
1041 /* It can be assumed that the data refs with the same alignment as dr_peel
1042 are aligned in the vector loop. */
1043 same_align_drs
1044 = STMT_VINFO_SAME_ALIGN_REFS (vinfo_for_stmt (DR_STMT (dr_peel)));
1045 FOR_EACH_VEC_ELT (dr_p, same_align_drs, i, current_dr)
1046 {
1047 if (current_dr != dr)
1048 continue;
1049 gcc_assert (DR_MISALIGNMENT (dr) / dr_size ==
1050 DR_MISALIGNMENT (dr_peel) / dr_peel_size);
1051 SET_DR_MISALIGNMENT (dr, 0);
1052 return;
1053 }
1054
1055 if (known_alignment_for_access_p (dr)
1056 && known_alignment_for_access_p (dr_peel))
1057 {
1058 bool negative = tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0;
1059 int misal = DR_MISALIGNMENT (dr);
1060 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1061 misal += negative ? -npeel * dr_size : npeel * dr_size;
1062 misal &= GET_MODE_SIZE (TYPE_MODE (vectype)) - 1;
1063 SET_DR_MISALIGNMENT (dr, misal);
1064 return;
1065 }
1066
1067 if (vect_print_dump_info (REPORT_DETAILS))
1068 fprintf (vect_dump, "Setting misalignment to -1.");
1069 SET_DR_MISALIGNMENT (dr, -1);
1070 }
1071
1072
1073 /* Function vect_verify_datarefs_alignment
1074
1075 Return TRUE if all data references in the loop can be
1076 handled with respect to alignment. */
1077
1078 bool
1079 vect_verify_datarefs_alignment (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
1080 {
1081 VEC (data_reference_p, heap) *datarefs;
1082 struct data_reference *dr;
1083 enum dr_alignment_support supportable_dr_alignment;
1084 unsigned int i;
1085
1086 if (loop_vinfo)
1087 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1088 else
1089 datarefs = BB_VINFO_DATAREFS (bb_vinfo);
1090
1091 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
1092 {
1093 gimple stmt = DR_STMT (dr);
1094 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1095
1096 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1097 continue;
1098
1099 /* For interleaving, only the alignment of the first access matters.
1100 Skip statements marked as not vectorizable. */
1101 if ((STMT_VINFO_GROUPED_ACCESS (stmt_info)
1102 && GROUP_FIRST_ELEMENT (stmt_info) != stmt)
1103 || !STMT_VINFO_VECTORIZABLE (stmt_info))
1104 continue;
1105
1106 /* Strided loads perform only component accesses, alignment is
1107 irrelevant for them. */
1108 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
1109 continue;
1110
1111 supportable_dr_alignment = vect_supportable_dr_alignment (dr, false);
1112 if (!supportable_dr_alignment)
1113 {
1114 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1115 {
1116 if (DR_IS_READ (dr))
1117 fprintf (vect_dump,
1118 "not vectorized: unsupported unaligned load.");
1119 else
1120 fprintf (vect_dump,
1121 "not vectorized: unsupported unaligned store.");
1122
1123 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
1124 }
1125 return false;
1126 }
1127 if (supportable_dr_alignment != dr_aligned
1128 && vect_print_dump_info (REPORT_ALIGNMENT))
1129 fprintf (vect_dump, "Vectorizing an unaligned access.");
1130 }
1131 return true;
1132 }
1133
1134 /* Given an memory reference EXP return whether its alignment is less
1135 than its size. */
1136
1137 static bool
1138 not_size_aligned (tree exp)
1139 {
1140 if (!host_integerp (TYPE_SIZE (TREE_TYPE (exp)), 1))
1141 return true;
1142
1143 return (TREE_INT_CST_LOW (TYPE_SIZE (TREE_TYPE (exp)))
1144 > get_object_alignment (exp));
1145 }
1146
1147 /* Function vector_alignment_reachable_p
1148
1149 Return true if vector alignment for DR is reachable by peeling
1150 a few loop iterations. Return false otherwise. */
1151
1152 static bool
1153 vector_alignment_reachable_p (struct data_reference *dr)
1154 {
1155 gimple stmt = DR_STMT (dr);
1156 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1157 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1158
1159 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1160 {
1161 /* For interleaved access we peel only if number of iterations in
1162 the prolog loop ({VF - misalignment}), is a multiple of the
1163 number of the interleaved accesses. */
1164 int elem_size, mis_in_elements;
1165 int nelements = TYPE_VECTOR_SUBPARTS (vectype);
1166
1167 /* FORNOW: handle only known alignment. */
1168 if (!known_alignment_for_access_p (dr))
1169 return false;
1170
1171 elem_size = GET_MODE_SIZE (TYPE_MODE (vectype)) / nelements;
1172 mis_in_elements = DR_MISALIGNMENT (dr) / elem_size;
1173
1174 if ((nelements - mis_in_elements) % GROUP_SIZE (stmt_info))
1175 return false;
1176 }
1177
1178 /* If misalignment is known at the compile time then allow peeling
1179 only if natural alignment is reachable through peeling. */
1180 if (known_alignment_for_access_p (dr) && !aligned_access_p (dr))
1181 {
1182 HOST_WIDE_INT elmsize =
1183 int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
1184 if (vect_print_dump_info (REPORT_DETAILS))
1185 {
1186 fprintf (vect_dump, "data size =" HOST_WIDE_INT_PRINT_DEC, elmsize);
1187 fprintf (vect_dump, ". misalignment = %d. ", DR_MISALIGNMENT (dr));
1188 }
1189 if (DR_MISALIGNMENT (dr) % elmsize)
1190 {
1191 if (vect_print_dump_info (REPORT_DETAILS))
1192 fprintf (vect_dump, "data size does not divide the misalignment.\n");
1193 return false;
1194 }
1195 }
1196
1197 if (!known_alignment_for_access_p (dr))
1198 {
1199 tree type = TREE_TYPE (DR_REF (dr));
1200 bool is_packed = not_size_aligned (DR_REF (dr));
1201 if (vect_print_dump_info (REPORT_DETAILS))
1202 fprintf (vect_dump, "Unknown misalignment, is_packed = %d",is_packed);
1203 if (targetm.vectorize.vector_alignment_reachable (type, is_packed))
1204 return true;
1205 else
1206 return false;
1207 }
1208
1209 return true;
1210 }
1211
1212
1213 /* Calculate the cost of the memory access represented by DR. */
1214
1215 static void
1216 vect_get_data_access_cost (struct data_reference *dr,
1217 unsigned int *inside_cost,
1218 unsigned int *outside_cost,
1219 stmt_vector_for_cost *body_cost_vec)
1220 {
1221 gimple stmt = DR_STMT (dr);
1222 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1223 int nunits = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
1224 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1225 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1226 int ncopies = vf / nunits;
1227
1228 if (DR_IS_READ (dr))
1229 vect_get_load_cost (dr, ncopies, true, inside_cost, outside_cost,
1230 NULL, body_cost_vec, false);
1231 else
1232 vect_get_store_cost (dr, ncopies, inside_cost, body_cost_vec);
1233
1234 if (vect_print_dump_info (REPORT_COST))
1235 fprintf (vect_dump, "vect_get_data_access_cost: inside_cost = %d, "
1236 "outside_cost = %d.", *inside_cost, *outside_cost);
1237 }
1238
1239
1240 static hashval_t
1241 vect_peeling_hash (const void *elem)
1242 {
1243 const struct _vect_peel_info *peel_info;
1244
1245 peel_info = (const struct _vect_peel_info *) elem;
1246 return (hashval_t) peel_info->npeel;
1247 }
1248
1249
1250 static int
1251 vect_peeling_hash_eq (const void *elem1, const void *elem2)
1252 {
1253 const struct _vect_peel_info *a, *b;
1254
1255 a = (const struct _vect_peel_info *) elem1;
1256 b = (const struct _vect_peel_info *) elem2;
1257 return (a->npeel == b->npeel);
1258 }
1259
1260
1261 /* Insert DR into peeling hash table with NPEEL as key. */
1262
1263 static void
1264 vect_peeling_hash_insert (loop_vec_info loop_vinfo, struct data_reference *dr,
1265 int npeel)
1266 {
1267 struct _vect_peel_info elem, *slot;
1268 void **new_slot;
1269 bool supportable_dr_alignment = vect_supportable_dr_alignment (dr, true);
1270
1271 elem.npeel = npeel;
1272 slot = (vect_peel_info) htab_find (LOOP_VINFO_PEELING_HTAB (loop_vinfo),
1273 &elem);
1274 if (slot)
1275 slot->count++;
1276 else
1277 {
1278 slot = XNEW (struct _vect_peel_info);
1279 slot->npeel = npeel;
1280 slot->dr = dr;
1281 slot->count = 1;
1282 new_slot = htab_find_slot (LOOP_VINFO_PEELING_HTAB (loop_vinfo), slot,
1283 INSERT);
1284 *new_slot = slot;
1285 }
1286
1287 if (!supportable_dr_alignment && !flag_vect_cost_model)
1288 slot->count += VECT_MAX_COST;
1289 }
1290
1291
1292 /* Traverse peeling hash table to find peeling option that aligns maximum
1293 number of data accesses. */
1294
1295 static int
1296 vect_peeling_hash_get_most_frequent (void **slot, void *data)
1297 {
1298 vect_peel_info elem = (vect_peel_info) *slot;
1299 vect_peel_extended_info max = (vect_peel_extended_info) data;
1300
1301 if (elem->count > max->peel_info.count
1302 || (elem->count == max->peel_info.count
1303 && max->peel_info.npeel > elem->npeel))
1304 {
1305 max->peel_info.npeel = elem->npeel;
1306 max->peel_info.count = elem->count;
1307 max->peel_info.dr = elem->dr;
1308 }
1309
1310 return 1;
1311 }
1312
1313
1314 /* Traverse peeling hash table and calculate cost for each peeling option.
1315 Find the one with the lowest cost. */
1316
1317 static int
1318 vect_peeling_hash_get_lowest_cost (void **slot, void *data)
1319 {
1320 vect_peel_info elem = (vect_peel_info) *slot;
1321 vect_peel_extended_info min = (vect_peel_extended_info) data;
1322 int save_misalignment, dummy;
1323 unsigned int inside_cost = 0, outside_cost = 0, i;
1324 gimple stmt = DR_STMT (elem->dr);
1325 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1326 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1327 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1328 struct data_reference *dr;
1329 stmt_vector_for_cost prologue_cost_vec, body_cost_vec, epilogue_cost_vec;
1330 int single_iter_cost;
1331
1332 prologue_cost_vec = VEC_alloc (stmt_info_for_cost, heap, 2);
1333 body_cost_vec = VEC_alloc (stmt_info_for_cost, heap, 2);
1334 epilogue_cost_vec = VEC_alloc (stmt_info_for_cost, heap, 2);
1335
1336 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
1337 {
1338 stmt = DR_STMT (dr);
1339 stmt_info = vinfo_for_stmt (stmt);
1340 /* For interleaving, only the alignment of the first access
1341 matters. */
1342 if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
1343 && GROUP_FIRST_ELEMENT (stmt_info) != stmt)
1344 continue;
1345
1346 save_misalignment = DR_MISALIGNMENT (dr);
1347 vect_update_misalignment_for_peel (dr, elem->dr, elem->npeel);
1348 vect_get_data_access_cost (dr, &inside_cost, &outside_cost,
1349 &body_cost_vec);
1350 SET_DR_MISALIGNMENT (dr, save_misalignment);
1351 }
1352
1353 single_iter_cost = vect_get_single_scalar_iteration_cost (loop_vinfo);
1354 outside_cost += vect_get_known_peeling_cost (loop_vinfo, elem->npeel,
1355 &dummy, single_iter_cost,
1356 &prologue_cost_vec,
1357 &epilogue_cost_vec);
1358
1359 /* Prologue and epilogue costs are added to the target model later.
1360 These costs depend only on the scalar iteration cost, the
1361 number of peeling iterations finally chosen, and the number of
1362 misaligned statements. So discard the information found here. */
1363 VEC_free (stmt_info_for_cost, heap, prologue_cost_vec);
1364 VEC_free (stmt_info_for_cost, heap, epilogue_cost_vec);
1365
1366 if (inside_cost < min->inside_cost
1367 || (inside_cost == min->inside_cost && outside_cost < min->outside_cost))
1368 {
1369 min->inside_cost = inside_cost;
1370 min->outside_cost = outside_cost;
1371 min->body_cost_vec = body_cost_vec;
1372 min->peel_info.dr = elem->dr;
1373 min->peel_info.npeel = elem->npeel;
1374 }
1375 else
1376 VEC_free (stmt_info_for_cost, heap, body_cost_vec);
1377
1378 return 1;
1379 }
1380
1381
1382 /* Choose best peeling option by traversing peeling hash table and either
1383 choosing an option with the lowest cost (if cost model is enabled) or the
1384 option that aligns as many accesses as possible. */
1385
1386 static struct data_reference *
1387 vect_peeling_hash_choose_best_peeling (loop_vec_info loop_vinfo,
1388 unsigned int *npeel,
1389 stmt_vector_for_cost *body_cost_vec)
1390 {
1391 struct _vect_peel_extended_info res;
1392
1393 res.peel_info.dr = NULL;
1394 res.body_cost_vec = NULL;
1395
1396 if (flag_vect_cost_model)
1397 {
1398 res.inside_cost = INT_MAX;
1399 res.outside_cost = INT_MAX;
1400 htab_traverse (LOOP_VINFO_PEELING_HTAB (loop_vinfo),
1401 vect_peeling_hash_get_lowest_cost, &res);
1402 }
1403 else
1404 {
1405 res.peel_info.count = 0;
1406 htab_traverse (LOOP_VINFO_PEELING_HTAB (loop_vinfo),
1407 vect_peeling_hash_get_most_frequent, &res);
1408 }
1409
1410 *npeel = res.peel_info.npeel;
1411 *body_cost_vec = res.body_cost_vec;
1412 return res.peel_info.dr;
1413 }
1414
1415
1416 /* Function vect_enhance_data_refs_alignment
1417
1418 This pass will use loop versioning and loop peeling in order to enhance
1419 the alignment of data references in the loop.
1420
1421 FOR NOW: we assume that whatever versioning/peeling takes place, only the
1422 original loop is to be vectorized. Any other loops that are created by
1423 the transformations performed in this pass - are not supposed to be
1424 vectorized. This restriction will be relaxed.
1425
1426 This pass will require a cost model to guide it whether to apply peeling
1427 or versioning or a combination of the two. For example, the scheme that
1428 intel uses when given a loop with several memory accesses, is as follows:
1429 choose one memory access ('p') which alignment you want to force by doing
1430 peeling. Then, either (1) generate a loop in which 'p' is aligned and all
1431 other accesses are not necessarily aligned, or (2) use loop versioning to
1432 generate one loop in which all accesses are aligned, and another loop in
1433 which only 'p' is necessarily aligned.
1434
1435 ("Automatic Intra-Register Vectorization for the Intel Architecture",
1436 Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
1437 Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
1438
1439 Devising a cost model is the most critical aspect of this work. It will
1440 guide us on which access to peel for, whether to use loop versioning, how
1441 many versions to create, etc. The cost model will probably consist of
1442 generic considerations as well as target specific considerations (on
1443 powerpc for example, misaligned stores are more painful than misaligned
1444 loads).
1445
1446 Here are the general steps involved in alignment enhancements:
1447
1448 -- original loop, before alignment analysis:
1449 for (i=0; i<N; i++){
1450 x = q[i]; # DR_MISALIGNMENT(q) = unknown
1451 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1452 }
1453
1454 -- After vect_compute_data_refs_alignment:
1455 for (i=0; i<N; i++){
1456 x = q[i]; # DR_MISALIGNMENT(q) = 3
1457 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1458 }
1459
1460 -- Possibility 1: we do loop versioning:
1461 if (p is aligned) {
1462 for (i=0; i<N; i++){ # loop 1A
1463 x = q[i]; # DR_MISALIGNMENT(q) = 3
1464 p[i] = y; # DR_MISALIGNMENT(p) = 0
1465 }
1466 }
1467 else {
1468 for (i=0; i<N; i++){ # loop 1B
1469 x = q[i]; # DR_MISALIGNMENT(q) = 3
1470 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1471 }
1472 }
1473
1474 -- Possibility 2: we do loop peeling:
1475 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1476 x = q[i];
1477 p[i] = y;
1478 }
1479 for (i = 3; i < N; i++){ # loop 2A
1480 x = q[i]; # DR_MISALIGNMENT(q) = 0
1481 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1482 }
1483
1484 -- Possibility 3: combination of loop peeling and versioning:
1485 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1486 x = q[i];
1487 p[i] = y;
1488 }
1489 if (p is aligned) {
1490 for (i = 3; i<N; i++){ # loop 3A
1491 x = q[i]; # DR_MISALIGNMENT(q) = 0
1492 p[i] = y; # DR_MISALIGNMENT(p) = 0
1493 }
1494 }
1495 else {
1496 for (i = 3; i<N; i++){ # loop 3B
1497 x = q[i]; # DR_MISALIGNMENT(q) = 0
1498 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1499 }
1500 }
1501
1502 These loops are later passed to loop_transform to be vectorized. The
1503 vectorizer will use the alignment information to guide the transformation
1504 (whether to generate regular loads/stores, or with special handling for
1505 misalignment). */
1506
1507 bool
1508 vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
1509 {
1510 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1511 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1512 enum dr_alignment_support supportable_dr_alignment;
1513 struct data_reference *dr0 = NULL, *first_store = NULL;
1514 struct data_reference *dr;
1515 unsigned int i, j;
1516 bool do_peeling = false;
1517 bool do_versioning = false;
1518 bool stat;
1519 gimple stmt;
1520 stmt_vec_info stmt_info;
1521 int vect_versioning_for_alias_required;
1522 unsigned int npeel = 0;
1523 bool all_misalignments_unknown = true;
1524 unsigned int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1525 unsigned possible_npeel_number = 1;
1526 tree vectype;
1527 unsigned int nelements, mis, same_align_drs_max = 0;
1528 stmt_vector_for_cost body_cost_vec = NULL;
1529
1530 if (vect_print_dump_info (REPORT_DETAILS))
1531 fprintf (vect_dump, "=== vect_enhance_data_refs_alignment ===");
1532
1533 /* While cost model enhancements are expected in the future, the high level
1534 view of the code at this time is as follows:
1535
1536 A) If there is a misaligned access then see if peeling to align
1537 this access can make all data references satisfy
1538 vect_supportable_dr_alignment. If so, update data structures
1539 as needed and return true.
1540
1541 B) If peeling wasn't possible and there is a data reference with an
1542 unknown misalignment that does not satisfy vect_supportable_dr_alignment
1543 then see if loop versioning checks can be used to make all data
1544 references satisfy vect_supportable_dr_alignment. If so, update
1545 data structures as needed and return true.
1546
1547 C) If neither peeling nor versioning were successful then return false if
1548 any data reference does not satisfy vect_supportable_dr_alignment.
1549
1550 D) Return true (all data references satisfy vect_supportable_dr_alignment).
1551
1552 Note, Possibility 3 above (which is peeling and versioning together) is not
1553 being done at this time. */
1554
1555 /* (1) Peeling to force alignment. */
1556
1557 /* (1.1) Decide whether to perform peeling, and how many iterations to peel:
1558 Considerations:
1559 + How many accesses will become aligned due to the peeling
1560 - How many accesses will become unaligned due to the peeling,
1561 and the cost of misaligned accesses.
1562 - The cost of peeling (the extra runtime checks, the increase
1563 in code size). */
1564
1565 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
1566 {
1567 stmt = DR_STMT (dr);
1568 stmt_info = vinfo_for_stmt (stmt);
1569
1570 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1571 continue;
1572
1573 /* For interleaving, only the alignment of the first access
1574 matters. */
1575 if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
1576 && GROUP_FIRST_ELEMENT (stmt_info) != stmt)
1577 continue;
1578
1579 /* FORNOW: Any strided load prevents peeling. The induction
1580 variable analysis will fail when the prologue loop is generated,
1581 and so we can't generate the new base for the pointer. */
1582 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
1583 {
1584 if (vect_print_dump_info (REPORT_DETAILS))
1585 fprintf (vect_dump, "strided load prevents peeling");
1586 do_peeling = false;
1587 break;
1588 }
1589
1590 /* For invariant accesses there is nothing to enhance. */
1591 if (integer_zerop (DR_STEP (dr)))
1592 continue;
1593
1594 /* Strided loads perform only component accesses, alignment is
1595 irrelevant for them. */
1596 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
1597 continue;
1598
1599 supportable_dr_alignment = vect_supportable_dr_alignment (dr, true);
1600 do_peeling = vector_alignment_reachable_p (dr);
1601 if (do_peeling)
1602 {
1603 if (known_alignment_for_access_p (dr))
1604 {
1605 unsigned int npeel_tmp;
1606 bool negative = tree_int_cst_compare (DR_STEP (dr),
1607 size_zero_node) < 0;
1608
1609 /* Save info about DR in the hash table. */
1610 if (!LOOP_VINFO_PEELING_HTAB (loop_vinfo))
1611 LOOP_VINFO_PEELING_HTAB (loop_vinfo) =
1612 htab_create (1, vect_peeling_hash,
1613 vect_peeling_hash_eq, free);
1614
1615 vectype = STMT_VINFO_VECTYPE (stmt_info);
1616 nelements = TYPE_VECTOR_SUBPARTS (vectype);
1617 mis = DR_MISALIGNMENT (dr) / GET_MODE_SIZE (TYPE_MODE (
1618 TREE_TYPE (DR_REF (dr))));
1619 npeel_tmp = (negative
1620 ? (mis - nelements) : (nelements - mis))
1621 & (nelements - 1);
1622
1623 /* For multiple types, it is possible that the bigger type access
1624 will have more than one peeling option. E.g., a loop with two
1625 types: one of size (vector size / 4), and the other one of
1626 size (vector size / 8). Vectorization factor will 8. If both
1627 access are misaligned by 3, the first one needs one scalar
1628 iteration to be aligned, and the second one needs 5. But the
1629 the first one will be aligned also by peeling 5 scalar
1630 iterations, and in that case both accesses will be aligned.
1631 Hence, except for the immediate peeling amount, we also want
1632 to try to add full vector size, while we don't exceed
1633 vectorization factor.
1634 We do this automtically for cost model, since we calculate cost
1635 for every peeling option. */
1636 if (!flag_vect_cost_model)
1637 possible_npeel_number = vf /nelements;
1638
1639 /* Handle the aligned case. We may decide to align some other
1640 access, making DR unaligned. */
1641 if (DR_MISALIGNMENT (dr) == 0)
1642 {
1643 npeel_tmp = 0;
1644 if (!flag_vect_cost_model)
1645 possible_npeel_number++;
1646 }
1647
1648 for (j = 0; j < possible_npeel_number; j++)
1649 {
1650 gcc_assert (npeel_tmp <= vf);
1651 vect_peeling_hash_insert (loop_vinfo, dr, npeel_tmp);
1652 npeel_tmp += nelements;
1653 }
1654
1655 all_misalignments_unknown = false;
1656 /* Data-ref that was chosen for the case that all the
1657 misalignments are unknown is not relevant anymore, since we
1658 have a data-ref with known alignment. */
1659 dr0 = NULL;
1660 }
1661 else
1662 {
1663 /* If we don't know all the misalignment values, we prefer
1664 peeling for data-ref that has maximum number of data-refs
1665 with the same alignment, unless the target prefers to align
1666 stores over load. */
1667 if (all_misalignments_unknown)
1668 {
1669 if (same_align_drs_max < VEC_length (dr_p,
1670 STMT_VINFO_SAME_ALIGN_REFS (stmt_info))
1671 || !dr0)
1672 {
1673 same_align_drs_max = VEC_length (dr_p,
1674 STMT_VINFO_SAME_ALIGN_REFS (stmt_info));
1675 dr0 = dr;
1676 }
1677
1678 if (!first_store && DR_IS_WRITE (dr))
1679 first_store = dr;
1680 }
1681
1682 /* If there are both known and unknown misaligned accesses in the
1683 loop, we choose peeling amount according to the known
1684 accesses. */
1685
1686
1687 if (!supportable_dr_alignment)
1688 {
1689 dr0 = dr;
1690 if (!first_store && DR_IS_WRITE (dr))
1691 first_store = dr;
1692 }
1693 }
1694 }
1695 else
1696 {
1697 if (!aligned_access_p (dr))
1698 {
1699 if (vect_print_dump_info (REPORT_DETAILS))
1700 fprintf (vect_dump, "vector alignment may not be reachable");
1701
1702 break;
1703 }
1704 }
1705 }
1706
1707 vect_versioning_for_alias_required
1708 = LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo);
1709
1710 /* Temporarily, if versioning for alias is required, we disable peeling
1711 until we support peeling and versioning. Often peeling for alignment
1712 will require peeling for loop-bound, which in turn requires that we
1713 know how to adjust the loop ivs after the loop. */
1714 if (vect_versioning_for_alias_required
1715 || !vect_can_advance_ivs_p (loop_vinfo)
1716 || !slpeel_can_duplicate_loop_p (loop, single_exit (loop)))
1717 do_peeling = false;
1718
1719 if (do_peeling && all_misalignments_unknown
1720 && vect_supportable_dr_alignment (dr0, false))
1721 {
1722
1723 /* Check if the target requires to prefer stores over loads, i.e., if
1724 misaligned stores are more expensive than misaligned loads (taking
1725 drs with same alignment into account). */
1726 if (first_store && DR_IS_READ (dr0))
1727 {
1728 unsigned int load_inside_cost = 0, load_outside_cost = 0;
1729 unsigned int store_inside_cost = 0, store_outside_cost = 0;
1730 unsigned int load_inside_penalty = 0, load_outside_penalty = 0;
1731 unsigned int store_inside_penalty = 0, store_outside_penalty = 0;
1732 stmt_vector_for_cost dummy = VEC_alloc (stmt_info_for_cost, heap, 2);
1733
1734 vect_get_data_access_cost (dr0, &load_inside_cost, &load_outside_cost,
1735 &dummy);
1736 vect_get_data_access_cost (first_store, &store_inside_cost,
1737 &store_outside_cost, &dummy);
1738
1739 VEC_free (stmt_info_for_cost, heap, dummy);
1740
1741 /* Calculate the penalty for leaving FIRST_STORE unaligned (by
1742 aligning the load DR0). */
1743 load_inside_penalty = store_inside_cost;
1744 load_outside_penalty = store_outside_cost;
1745 for (i = 0; VEC_iterate (dr_p, STMT_VINFO_SAME_ALIGN_REFS
1746 (vinfo_for_stmt (DR_STMT (first_store))),
1747 i, dr);
1748 i++)
1749 if (DR_IS_READ (dr))
1750 {
1751 load_inside_penalty += load_inside_cost;
1752 load_outside_penalty += load_outside_cost;
1753 }
1754 else
1755 {
1756 load_inside_penalty += store_inside_cost;
1757 load_outside_penalty += store_outside_cost;
1758 }
1759
1760 /* Calculate the penalty for leaving DR0 unaligned (by
1761 aligning the FIRST_STORE). */
1762 store_inside_penalty = load_inside_cost;
1763 store_outside_penalty = load_outside_cost;
1764 for (i = 0; VEC_iterate (dr_p, STMT_VINFO_SAME_ALIGN_REFS
1765 (vinfo_for_stmt (DR_STMT (dr0))),
1766 i, dr);
1767 i++)
1768 if (DR_IS_READ (dr))
1769 {
1770 store_inside_penalty += load_inside_cost;
1771 store_outside_penalty += load_outside_cost;
1772 }
1773 else
1774 {
1775 store_inside_penalty += store_inside_cost;
1776 store_outside_penalty += store_outside_cost;
1777 }
1778
1779 if (load_inside_penalty > store_inside_penalty
1780 || (load_inside_penalty == store_inside_penalty
1781 && load_outside_penalty > store_outside_penalty))
1782 dr0 = first_store;
1783 }
1784
1785 /* In case there are only loads with different unknown misalignments, use
1786 peeling only if it may help to align other accesses in the loop. */
1787 if (!first_store && !VEC_length (dr_p, STMT_VINFO_SAME_ALIGN_REFS
1788 (vinfo_for_stmt (DR_STMT (dr0))))
1789 && vect_supportable_dr_alignment (dr0, false)
1790 != dr_unaligned_supported)
1791 do_peeling = false;
1792 }
1793
1794 if (do_peeling && !dr0)
1795 {
1796 /* Peeling is possible, but there is no data access that is not supported
1797 unless aligned. So we try to choose the best possible peeling. */
1798
1799 /* We should get here only if there are drs with known misalignment. */
1800 gcc_assert (!all_misalignments_unknown);
1801
1802 /* Choose the best peeling from the hash table. */
1803 dr0 = vect_peeling_hash_choose_best_peeling (loop_vinfo, &npeel,
1804 &body_cost_vec);
1805 if (!dr0 || !npeel)
1806 do_peeling = false;
1807 }
1808
1809 if (do_peeling)
1810 {
1811 stmt = DR_STMT (dr0);
1812 stmt_info = vinfo_for_stmt (stmt);
1813 vectype = STMT_VINFO_VECTYPE (stmt_info);
1814 nelements = TYPE_VECTOR_SUBPARTS (vectype);
1815
1816 if (known_alignment_for_access_p (dr0))
1817 {
1818 bool negative = tree_int_cst_compare (DR_STEP (dr0),
1819 size_zero_node) < 0;
1820 if (!npeel)
1821 {
1822 /* Since it's known at compile time, compute the number of
1823 iterations in the peeled loop (the peeling factor) for use in
1824 updating DR_MISALIGNMENT values. The peeling factor is the
1825 vectorization factor minus the misalignment as an element
1826 count. */
1827 mis = DR_MISALIGNMENT (dr0);
1828 mis /= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr0))));
1829 npeel = ((negative ? mis - nelements : nelements - mis)
1830 & (nelements - 1));
1831 }
1832
1833 /* For interleaved data access every iteration accesses all the
1834 members of the group, therefore we divide the number of iterations
1835 by the group size. */
1836 stmt_info = vinfo_for_stmt (DR_STMT (dr0));
1837 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1838 npeel /= GROUP_SIZE (stmt_info);
1839
1840 if (vect_print_dump_info (REPORT_DETAILS))
1841 fprintf (vect_dump, "Try peeling by %d", npeel);
1842 }
1843
1844 /* Ensure that all data refs can be vectorized after the peel. */
1845 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
1846 {
1847 int save_misalignment;
1848
1849 if (dr == dr0)
1850 continue;
1851
1852 stmt = DR_STMT (dr);
1853 stmt_info = vinfo_for_stmt (stmt);
1854 /* For interleaving, only the alignment of the first access
1855 matters. */
1856 if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
1857 && GROUP_FIRST_ELEMENT (stmt_info) != stmt)
1858 continue;
1859
1860 /* Strided loads perform only component accesses, alignment is
1861 irrelevant for them. */
1862 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
1863 continue;
1864
1865 save_misalignment = DR_MISALIGNMENT (dr);
1866 vect_update_misalignment_for_peel (dr, dr0, npeel);
1867 supportable_dr_alignment = vect_supportable_dr_alignment (dr, false);
1868 SET_DR_MISALIGNMENT (dr, save_misalignment);
1869
1870 if (!supportable_dr_alignment)
1871 {
1872 do_peeling = false;
1873 break;
1874 }
1875 }
1876
1877 if (do_peeling && known_alignment_for_access_p (dr0) && npeel == 0)
1878 {
1879 stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
1880 if (!stat)
1881 do_peeling = false;
1882 else
1883 return stat;
1884 }
1885
1886 if (do_peeling)
1887 {
1888 stmt_info_for_cost *si;
1889 void *data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
1890
1891 /* (1.2) Update the DR_MISALIGNMENT of each data reference DR_i.
1892 If the misalignment of DR_i is identical to that of dr0 then set
1893 DR_MISALIGNMENT (DR_i) to zero. If the misalignment of DR_i and
1894 dr0 are known at compile time then increment DR_MISALIGNMENT (DR_i)
1895 by the peeling factor times the element size of DR_i (MOD the
1896 vectorization factor times the size). Otherwise, the
1897 misalignment of DR_i must be set to unknown. */
1898 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
1899 if (dr != dr0)
1900 vect_update_misalignment_for_peel (dr, dr0, npeel);
1901
1902 LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr0;
1903 if (npeel)
1904 LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) = npeel;
1905 else
1906 LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) = DR_MISALIGNMENT (dr0);
1907 SET_DR_MISALIGNMENT (dr0, 0);
1908 if (vect_print_dump_info (REPORT_ALIGNMENT))
1909 fprintf (vect_dump, "Alignment of access forced using peeling.");
1910
1911 if (vect_print_dump_info (REPORT_DETAILS))
1912 fprintf (vect_dump, "Peeling for alignment will be applied.");
1913
1914 /* We've delayed passing the inside-loop peeling costs to the
1915 target cost model until we were sure peeling would happen.
1916 Do so now. */
1917 if (body_cost_vec)
1918 {
1919 FOR_EACH_VEC_ELT (stmt_info_for_cost, body_cost_vec, i, si)
1920 {
1921 struct _stmt_vec_info *stmt_info
1922 = si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
1923 (void) add_stmt_cost (data, si->count, si->kind, stmt_info,
1924 si->misalign, vect_body);
1925 }
1926 VEC_free (stmt_info_for_cost, heap, body_cost_vec);
1927 }
1928
1929 stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
1930 gcc_assert (stat);
1931 return stat;
1932 }
1933 }
1934
1935
1936 /* (2) Versioning to force alignment. */
1937
1938 /* Try versioning if:
1939 1) flag_tree_vect_loop_version is TRUE
1940 2) optimize loop for speed
1941 3) there is at least one unsupported misaligned data ref with an unknown
1942 misalignment, and
1943 4) all misaligned data refs with a known misalignment are supported, and
1944 5) the number of runtime alignment checks is within reason. */
1945
1946 do_versioning =
1947 flag_tree_vect_loop_version
1948 && optimize_loop_nest_for_speed_p (loop)
1949 && (!loop->inner); /* FORNOW */
1950
1951 if (do_versioning)
1952 {
1953 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
1954 {
1955 stmt = DR_STMT (dr);
1956 stmt_info = vinfo_for_stmt (stmt);
1957
1958 /* For interleaving, only the alignment of the first access
1959 matters. */
1960 if (aligned_access_p (dr)
1961 || (STMT_VINFO_GROUPED_ACCESS (stmt_info)
1962 && GROUP_FIRST_ELEMENT (stmt_info) != stmt))
1963 continue;
1964
1965 /* Strided loads perform only component accesses, alignment is
1966 irrelevant for them. */
1967 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
1968 continue;
1969
1970 supportable_dr_alignment = vect_supportable_dr_alignment (dr, false);
1971
1972 if (!supportable_dr_alignment)
1973 {
1974 gimple stmt;
1975 int mask;
1976 tree vectype;
1977
1978 if (known_alignment_for_access_p (dr)
1979 || VEC_length (gimple,
1980 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo))
1981 >= (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIGNMENT_CHECKS))
1982 {
1983 do_versioning = false;
1984 break;
1985 }
1986
1987 stmt = DR_STMT (dr);
1988 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1989 gcc_assert (vectype);
1990
1991 /* The rightmost bits of an aligned address must be zeros.
1992 Construct the mask needed for this test. For example,
1993 GET_MODE_SIZE for the vector mode V4SI is 16 bytes so the
1994 mask must be 15 = 0xf. */
1995 mask = GET_MODE_SIZE (TYPE_MODE (vectype)) - 1;
1996
1997 /* FORNOW: use the same mask to test all potentially unaligned
1998 references in the loop. The vectorizer currently supports
1999 a single vector size, see the reference to
2000 GET_MODE_NUNITS (TYPE_MODE (vectype)) where the
2001 vectorization factor is computed. */
2002 gcc_assert (!LOOP_VINFO_PTR_MASK (loop_vinfo)
2003 || LOOP_VINFO_PTR_MASK (loop_vinfo) == mask);
2004 LOOP_VINFO_PTR_MASK (loop_vinfo) = mask;
2005 VEC_safe_push (gimple, heap,
2006 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo),
2007 DR_STMT (dr));
2008 }
2009 }
2010
2011 /* Versioning requires at least one misaligned data reference. */
2012 if (!LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo))
2013 do_versioning = false;
2014 else if (!do_versioning)
2015 VEC_truncate (gimple, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo), 0);
2016 }
2017
2018 if (do_versioning)
2019 {
2020 VEC(gimple,heap) *may_misalign_stmts
2021 = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
2022 gimple stmt;
2023
2024 /* It can now be assumed that the data references in the statements
2025 in LOOP_VINFO_MAY_MISALIGN_STMTS will be aligned in the version
2026 of the loop being vectorized. */
2027 FOR_EACH_VEC_ELT (gimple, may_misalign_stmts, i, stmt)
2028 {
2029 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2030 dr = STMT_VINFO_DATA_REF (stmt_info);
2031 SET_DR_MISALIGNMENT (dr, 0);
2032 if (vect_print_dump_info (REPORT_ALIGNMENT))
2033 fprintf (vect_dump, "Alignment of access forced using versioning.");
2034 }
2035
2036 if (vect_print_dump_info (REPORT_DETAILS))
2037 fprintf (vect_dump, "Versioning for alignment will be applied.");
2038
2039 /* Peeling and versioning can't be done together at this time. */
2040 gcc_assert (! (do_peeling && do_versioning));
2041
2042 stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
2043 gcc_assert (stat);
2044 return stat;
2045 }
2046
2047 /* This point is reached if neither peeling nor versioning is being done. */
2048 gcc_assert (! (do_peeling || do_versioning));
2049
2050 stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
2051 return stat;
2052 }
2053
2054
2055 /* Function vect_find_same_alignment_drs.
2056
2057 Update group and alignment relations according to the chosen
2058 vectorization factor. */
2059
2060 static void
2061 vect_find_same_alignment_drs (struct data_dependence_relation *ddr,
2062 loop_vec_info loop_vinfo)
2063 {
2064 unsigned int i;
2065 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2066 int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2067 struct data_reference *dra = DDR_A (ddr);
2068 struct data_reference *drb = DDR_B (ddr);
2069 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
2070 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
2071 int dra_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dra))));
2072 int drb_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (drb))));
2073 lambda_vector dist_v;
2074 unsigned int loop_depth;
2075
2076 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
2077 return;
2078
2079 if (dra == drb)
2080 return;
2081
2082 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
2083 return;
2084
2085 /* Loop-based vectorization and known data dependence. */
2086 if (DDR_NUM_DIST_VECTS (ddr) == 0)
2087 return;
2088
2089 /* Data-dependence analysis reports a distance vector of zero
2090 for data-references that overlap only in the first iteration
2091 but have different sign step (see PR45764).
2092 So as a sanity check require equal DR_STEP. */
2093 if (!operand_equal_p (DR_STEP (dra), DR_STEP (drb), 0))
2094 return;
2095
2096 loop_depth = index_in_loop_nest (loop->num, DDR_LOOP_NEST (ddr));
2097 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIST_VECTS (ddr), i, dist_v)
2098 {
2099 int dist = dist_v[loop_depth];
2100
2101 if (vect_print_dump_info (REPORT_DR_DETAILS))
2102 fprintf (vect_dump, "dependence distance = %d.", dist);
2103
2104 /* Same loop iteration. */
2105 if (dist == 0
2106 || (dist % vectorization_factor == 0 && dra_size == drb_size))
2107 {
2108 /* Two references with distance zero have the same alignment. */
2109 VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_a), drb);
2110 VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_b), dra);
2111 if (vect_print_dump_info (REPORT_ALIGNMENT))
2112 fprintf (vect_dump, "accesses have the same alignment.");
2113 if (vect_print_dump_info (REPORT_DR_DETAILS))
2114 {
2115 fprintf (vect_dump, "dependence distance modulo vf == 0 between ");
2116 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
2117 fprintf (vect_dump, " and ");
2118 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
2119 }
2120 }
2121 }
2122 }
2123
2124
2125 /* Function vect_analyze_data_refs_alignment
2126
2127 Analyze the alignment of the data-references in the loop.
2128 Return FALSE if a data reference is found that cannot be vectorized. */
2129
2130 bool
2131 vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo,
2132 bb_vec_info bb_vinfo)
2133 {
2134 if (vect_print_dump_info (REPORT_DETAILS))
2135 fprintf (vect_dump, "=== vect_analyze_data_refs_alignment ===");
2136
2137 /* Mark groups of data references with same alignment using
2138 data dependence information. */
2139 if (loop_vinfo)
2140 {
2141 VEC (ddr_p, heap) *ddrs = LOOP_VINFO_DDRS (loop_vinfo);
2142 struct data_dependence_relation *ddr;
2143 unsigned int i;
2144
2145 FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
2146 vect_find_same_alignment_drs (ddr, loop_vinfo);
2147 }
2148
2149 if (!vect_compute_data_refs_alignment (loop_vinfo, bb_vinfo))
2150 {
2151 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2152 fprintf (vect_dump,
2153 "not vectorized: can't calculate alignment for data ref.");
2154 return false;
2155 }
2156
2157 return true;
2158 }
2159
2160
2161 /* Analyze groups of accesses: check that DR belongs to a group of
2162 accesses of legal size, step, etc. Detect gaps, single element
2163 interleaving, and other special cases. Set grouped access info.
2164 Collect groups of strided stores for further use in SLP analysis. */
2165
2166 static bool
2167 vect_analyze_group_access (struct data_reference *dr)
2168 {
2169 tree step = DR_STEP (dr);
2170 tree scalar_type = TREE_TYPE (DR_REF (dr));
2171 HOST_WIDE_INT type_size = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type));
2172 gimple stmt = DR_STMT (dr);
2173 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2174 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2175 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2176 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
2177 HOST_WIDE_INT groupsize, last_accessed_element = 1;
2178 bool slp_impossible = false;
2179 struct loop *loop = NULL;
2180
2181 if (loop_vinfo)
2182 loop = LOOP_VINFO_LOOP (loop_vinfo);
2183
2184 /* For interleaving, GROUPSIZE is STEP counted in elements, i.e., the
2185 size of the interleaving group (including gaps). */
2186 groupsize = dr_step / type_size;
2187
2188 /* Not consecutive access is possible only if it is a part of interleaving. */
2189 if (!GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
2190 {
2191 /* Check if it this DR is a part of interleaving, and is a single
2192 element of the group that is accessed in the loop. */
2193
2194 /* Gaps are supported only for loads. STEP must be a multiple of the type
2195 size. The size of the group must be a power of 2. */
2196 if (DR_IS_READ (dr)
2197 && (dr_step % type_size) == 0
2198 && groupsize > 0
2199 && exact_log2 (groupsize) != -1)
2200 {
2201 GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) = stmt;
2202 GROUP_SIZE (vinfo_for_stmt (stmt)) = groupsize;
2203 if (vect_print_dump_info (REPORT_DR_DETAILS))
2204 {
2205 fprintf (vect_dump, "Detected single element interleaving ");
2206 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
2207 fprintf (vect_dump, " step ");
2208 print_generic_expr (vect_dump, step, TDF_SLIM);
2209 }
2210
2211 if (loop_vinfo)
2212 {
2213 if (vect_print_dump_info (REPORT_DETAILS))
2214 fprintf (vect_dump, "Data access with gaps requires scalar "
2215 "epilogue loop");
2216 if (loop->inner)
2217 {
2218 if (vect_print_dump_info (REPORT_DETAILS))
2219 fprintf (vect_dump, "Peeling for outer loop is not"
2220 " supported");
2221 return false;
2222 }
2223
2224 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
2225 }
2226
2227 return true;
2228 }
2229
2230 if (vect_print_dump_info (REPORT_DETAILS))
2231 {
2232 fprintf (vect_dump, "not consecutive access ");
2233 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2234 }
2235
2236 if (bb_vinfo)
2237 {
2238 /* Mark the statement as unvectorizable. */
2239 STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr))) = false;
2240 return true;
2241 }
2242
2243 return false;
2244 }
2245
2246 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) == stmt)
2247 {
2248 /* First stmt in the interleaving chain. Check the chain. */
2249 gimple next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt));
2250 struct data_reference *data_ref = dr;
2251 unsigned int count = 1;
2252 tree next_step;
2253 tree prev_init = DR_INIT (data_ref);
2254 gimple prev = stmt;
2255 HOST_WIDE_INT diff, count_in_bytes, gaps = 0;
2256
2257 while (next)
2258 {
2259 /* Skip same data-refs. In case that two or more stmts share
2260 data-ref (supported only for loads), we vectorize only the first
2261 stmt, and the rest get their vectorized loads from the first
2262 one. */
2263 if (!tree_int_cst_compare (DR_INIT (data_ref),
2264 DR_INIT (STMT_VINFO_DATA_REF (
2265 vinfo_for_stmt (next)))))
2266 {
2267 if (DR_IS_WRITE (data_ref))
2268 {
2269 if (vect_print_dump_info (REPORT_DETAILS))
2270 fprintf (vect_dump, "Two store stmts share the same dr.");
2271 return false;
2272 }
2273
2274 /* Check that there is no load-store dependencies for this loads
2275 to prevent a case of load-store-load to the same location. */
2276 if (GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (next))
2277 || GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (prev)))
2278 {
2279 if (vect_print_dump_info (REPORT_DETAILS))
2280 fprintf (vect_dump,
2281 "READ_WRITE dependence in interleaving.");
2282 return false;
2283 }
2284
2285 /* For load use the same data-ref load. */
2286 GROUP_SAME_DR_STMT (vinfo_for_stmt (next)) = prev;
2287
2288 prev = next;
2289 next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
2290 continue;
2291 }
2292
2293 prev = next;
2294
2295 /* Check that all the accesses have the same STEP. */
2296 next_step = DR_STEP (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
2297 if (tree_int_cst_compare (step, next_step))
2298 {
2299 if (vect_print_dump_info (REPORT_DETAILS))
2300 fprintf (vect_dump, "not consecutive access in interleaving");
2301 return false;
2302 }
2303
2304 data_ref = STMT_VINFO_DATA_REF (vinfo_for_stmt (next));
2305 /* Check that the distance between two accesses is equal to the type
2306 size. Otherwise, we have gaps. */
2307 diff = (TREE_INT_CST_LOW (DR_INIT (data_ref))
2308 - TREE_INT_CST_LOW (prev_init)) / type_size;
2309 if (diff != 1)
2310 {
2311 /* FORNOW: SLP of accesses with gaps is not supported. */
2312 slp_impossible = true;
2313 if (DR_IS_WRITE (data_ref))
2314 {
2315 if (vect_print_dump_info (REPORT_DETAILS))
2316 fprintf (vect_dump, "interleaved store with gaps");
2317 return false;
2318 }
2319
2320 gaps += diff - 1;
2321 }
2322
2323 last_accessed_element += diff;
2324
2325 /* Store the gap from the previous member of the group. If there is no
2326 gap in the access, GROUP_GAP is always 1. */
2327 GROUP_GAP (vinfo_for_stmt (next)) = diff;
2328
2329 prev_init = DR_INIT (data_ref);
2330 next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
2331 /* Count the number of data-refs in the chain. */
2332 count++;
2333 }
2334
2335 /* COUNT is the number of accesses found, we multiply it by the size of
2336 the type to get COUNT_IN_BYTES. */
2337 count_in_bytes = type_size * count;
2338
2339 /* Check that the size of the interleaving (including gaps) is not
2340 greater than STEP. */
2341 if (dr_step && dr_step < count_in_bytes + gaps * type_size)
2342 {
2343 if (vect_print_dump_info (REPORT_DETAILS))
2344 {
2345 fprintf (vect_dump, "interleaving size is greater than step for ");
2346 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
2347 }
2348 return false;
2349 }
2350
2351 /* Check that the size of the interleaving is equal to STEP for stores,
2352 i.e., that there are no gaps. */
2353 if (dr_step && dr_step != count_in_bytes)
2354 {
2355 if (DR_IS_READ (dr))
2356 {
2357 slp_impossible = true;
2358 /* There is a gap after the last load in the group. This gap is a
2359 difference between the groupsize and the number of elements.
2360 When there is no gap, this difference should be 0. */
2361 GROUP_GAP (vinfo_for_stmt (stmt)) = groupsize - count;
2362 }
2363 else
2364 {
2365 if (vect_print_dump_info (REPORT_DETAILS))
2366 fprintf (vect_dump, "interleaved store with gaps");
2367 return false;
2368 }
2369 }
2370
2371 /* Check that STEP is a multiple of type size. */
2372 if (dr_step && (dr_step % type_size) != 0)
2373 {
2374 if (vect_print_dump_info (REPORT_DETAILS))
2375 {
2376 fprintf (vect_dump, "step is not a multiple of type size: step ");
2377 print_generic_expr (vect_dump, step, TDF_SLIM);
2378 fprintf (vect_dump, " size ");
2379 print_generic_expr (vect_dump, TYPE_SIZE_UNIT (scalar_type),
2380 TDF_SLIM);
2381 }
2382 return false;
2383 }
2384
2385 if (groupsize == 0)
2386 groupsize = count;
2387
2388 GROUP_SIZE (vinfo_for_stmt (stmt)) = groupsize;
2389 if (vect_print_dump_info (REPORT_DETAILS))
2390 fprintf (vect_dump, "Detected interleaving of size %d", (int)groupsize);
2391
2392 /* SLP: create an SLP data structure for every interleaving group of
2393 stores for further analysis in vect_analyse_slp. */
2394 if (DR_IS_WRITE (dr) && !slp_impossible)
2395 {
2396 if (loop_vinfo)
2397 VEC_safe_push (gimple, heap, LOOP_VINFO_GROUPED_STORES (loop_vinfo),
2398 stmt);
2399 if (bb_vinfo)
2400 VEC_safe_push (gimple, heap, BB_VINFO_GROUPED_STORES (bb_vinfo),
2401 stmt);
2402 }
2403
2404 /* There is a gap in the end of the group. */
2405 if (groupsize - last_accessed_element > 0 && loop_vinfo)
2406 {
2407 if (vect_print_dump_info (REPORT_DETAILS))
2408 fprintf (vect_dump, "Data access with gaps requires scalar "
2409 "epilogue loop");
2410 if (loop->inner)
2411 {
2412 if (vect_print_dump_info (REPORT_DETAILS))
2413 fprintf (vect_dump, "Peeling for outer loop is not supported");
2414 return false;
2415 }
2416
2417 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
2418 }
2419 }
2420
2421 return true;
2422 }
2423
2424
2425 /* Analyze the access pattern of the data-reference DR.
2426 In case of non-consecutive accesses call vect_analyze_group_access() to
2427 analyze groups of accesses. */
2428
2429 static bool
2430 vect_analyze_data_ref_access (struct data_reference *dr)
2431 {
2432 tree step = DR_STEP (dr);
2433 tree scalar_type = TREE_TYPE (DR_REF (dr));
2434 gimple stmt = DR_STMT (dr);
2435 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2436 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2437 struct loop *loop = NULL;
2438
2439 if (loop_vinfo)
2440 loop = LOOP_VINFO_LOOP (loop_vinfo);
2441
2442 if (loop_vinfo && !step)
2443 {
2444 if (vect_print_dump_info (REPORT_DETAILS))
2445 fprintf (vect_dump, "bad data-ref access in loop");
2446 return false;
2447 }
2448
2449 /* Allow invariant loads in loops. */
2450 if (loop_vinfo && integer_zerop (step))
2451 {
2452 GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) = NULL;
2453 return DR_IS_READ (dr);
2454 }
2455
2456 if (loop && nested_in_vect_loop_p (loop, stmt))
2457 {
2458 /* Interleaved accesses are not yet supported within outer-loop
2459 vectorization for references in the inner-loop. */
2460 GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) = NULL;
2461
2462 /* For the rest of the analysis we use the outer-loop step. */
2463 step = STMT_VINFO_DR_STEP (stmt_info);
2464 if (integer_zerop (step))
2465 {
2466 if (vect_print_dump_info (REPORT_ALIGNMENT))
2467 fprintf (vect_dump, "zero step in outer loop.");
2468 if (DR_IS_READ (dr))
2469 return true;
2470 else
2471 return false;
2472 }
2473 }
2474
2475 /* Consecutive? */
2476 if (TREE_CODE (step) == INTEGER_CST)
2477 {
2478 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
2479 if (!tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type))
2480 || (dr_step < 0
2481 && !compare_tree_int (TYPE_SIZE_UNIT (scalar_type), -dr_step)))
2482 {
2483 /* Mark that it is not interleaving. */
2484 GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) = NULL;
2485 return true;
2486 }
2487 }
2488
2489 if (loop && nested_in_vect_loop_p (loop, stmt))
2490 {
2491 if (vect_print_dump_info (REPORT_ALIGNMENT))
2492 fprintf (vect_dump, "grouped access in outer loop.");
2493 return false;
2494 }
2495
2496 /* Assume this is a DR handled by non-constant strided load case. */
2497 if (TREE_CODE (step) != INTEGER_CST)
2498 return STMT_VINFO_STRIDE_LOAD_P (stmt_info);
2499
2500 /* Not consecutive access - check if it's a part of interleaving group. */
2501 return vect_analyze_group_access (dr);
2502 }
2503
2504
2505 /* Function vect_analyze_data_ref_accesses.
2506
2507 Analyze the access pattern of all the data references in the loop.
2508
2509 FORNOW: the only access pattern that is considered vectorizable is a
2510 simple step 1 (consecutive) access.
2511
2512 FORNOW: handle only arrays and pointer accesses. */
2513
2514 bool
2515 vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
2516 {
2517 unsigned int i;
2518 VEC (data_reference_p, heap) *datarefs;
2519 struct data_reference *dr;
2520
2521 if (vect_print_dump_info (REPORT_DETAILS))
2522 fprintf (vect_dump, "=== vect_analyze_data_ref_accesses ===");
2523
2524 if (loop_vinfo)
2525 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
2526 else
2527 datarefs = BB_VINFO_DATAREFS (bb_vinfo);
2528
2529 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
2530 if (STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr)))
2531 && !vect_analyze_data_ref_access (dr))
2532 {
2533 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2534 fprintf (vect_dump, "not vectorized: complicated access pattern.");
2535
2536 if (bb_vinfo)
2537 {
2538 /* Mark the statement as not vectorizable. */
2539 STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr))) = false;
2540 continue;
2541 }
2542 else
2543 return false;
2544 }
2545
2546 return true;
2547 }
2548
2549 /* Function vect_prune_runtime_alias_test_list.
2550
2551 Prune a list of ddrs to be tested at run-time by versioning for alias.
2552 Return FALSE if resulting list of ddrs is longer then allowed by
2553 PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS, otherwise return TRUE. */
2554
2555 bool
2556 vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo)
2557 {
2558 VEC (ddr_p, heap) * ddrs =
2559 LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo);
2560 unsigned i, j;
2561
2562 if (vect_print_dump_info (REPORT_DETAILS))
2563 fprintf (vect_dump, "=== vect_prune_runtime_alias_test_list ===");
2564
2565 for (i = 0; i < VEC_length (ddr_p, ddrs); )
2566 {
2567 bool found;
2568 ddr_p ddr_i;
2569
2570 ddr_i = VEC_index (ddr_p, ddrs, i);
2571 found = false;
2572
2573 for (j = 0; j < i; j++)
2574 {
2575 ddr_p ddr_j = VEC_index (ddr_p, ddrs, j);
2576
2577 if (vect_vfa_range_equal (ddr_i, ddr_j))
2578 {
2579 if (vect_print_dump_info (REPORT_DR_DETAILS))
2580 {
2581 fprintf (vect_dump, "found equal ranges ");
2582 print_generic_expr (vect_dump, DR_REF (DDR_A (ddr_i)), TDF_SLIM);
2583 fprintf (vect_dump, ", ");
2584 print_generic_expr (vect_dump, DR_REF (DDR_B (ddr_i)), TDF_SLIM);
2585 fprintf (vect_dump, " and ");
2586 print_generic_expr (vect_dump, DR_REF (DDR_A (ddr_j)), TDF_SLIM);
2587 fprintf (vect_dump, ", ");
2588 print_generic_expr (vect_dump, DR_REF (DDR_B (ddr_j)), TDF_SLIM);
2589 }
2590 found = true;
2591 break;
2592 }
2593 }
2594
2595 if (found)
2596 {
2597 VEC_ordered_remove (ddr_p, ddrs, i);
2598 continue;
2599 }
2600 i++;
2601 }
2602
2603 if (VEC_length (ddr_p, ddrs) >
2604 (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS))
2605 {
2606 if (vect_print_dump_info (REPORT_DR_DETAILS))
2607 {
2608 fprintf (vect_dump,
2609 "disable versioning for alias - max number of generated "
2610 "checks exceeded.");
2611 }
2612
2613 VEC_truncate (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo), 0);
2614
2615 return false;
2616 }
2617
2618 return true;
2619 }
2620
2621 /* Check whether a non-affine read in stmt is suitable for gather load
2622 and if so, return a builtin decl for that operation. */
2623
2624 tree
2625 vect_check_gather (gimple stmt, loop_vec_info loop_vinfo, tree *basep,
2626 tree *offp, int *scalep)
2627 {
2628 HOST_WIDE_INT scale = 1, pbitpos, pbitsize;
2629 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2630 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2631 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2632 tree offtype = NULL_TREE;
2633 tree decl, base, off;
2634 enum machine_mode pmode;
2635 int punsignedp, pvolatilep;
2636
2637 /* The gather builtins need address of the form
2638 loop_invariant + vector * {1, 2, 4, 8}
2639 or
2640 loop_invariant + sign_extend (vector) * { 1, 2, 4, 8 }.
2641 Unfortunately DR_BASE_ADDRESS/DR_OFFSET can be a mixture
2642 of loop invariants/SSA_NAMEs defined in the loop, with casts,
2643 multiplications and additions in it. To get a vector, we need
2644 a single SSA_NAME that will be defined in the loop and will
2645 contain everything that is not loop invariant and that can be
2646 vectorized. The following code attempts to find such a preexistng
2647 SSA_NAME OFF and put the loop invariants into a tree BASE
2648 that can be gimplified before the loop. */
2649 base = get_inner_reference (DR_REF (dr), &pbitsize, &pbitpos, &off,
2650 &pmode, &punsignedp, &pvolatilep, false);
2651 gcc_assert (base != NULL_TREE && (pbitpos % BITS_PER_UNIT) == 0);
2652
2653 if (TREE_CODE (base) == MEM_REF)
2654 {
2655 if (!integer_zerop (TREE_OPERAND (base, 1)))
2656 {
2657 if (off == NULL_TREE)
2658 {
2659 double_int moff = mem_ref_offset (base);
2660 off = double_int_to_tree (sizetype, moff);
2661 }
2662 else
2663 off = size_binop (PLUS_EXPR, off,
2664 fold_convert (sizetype, TREE_OPERAND (base, 1)));
2665 }
2666 base = TREE_OPERAND (base, 0);
2667 }
2668 else
2669 base = build_fold_addr_expr (base);
2670
2671 if (off == NULL_TREE)
2672 off = size_zero_node;
2673
2674 /* If base is not loop invariant, either off is 0, then we start with just
2675 the constant offset in the loop invariant BASE and continue with base
2676 as OFF, otherwise give up.
2677 We could handle that case by gimplifying the addition of base + off
2678 into some SSA_NAME and use that as off, but for now punt. */
2679 if (!expr_invariant_in_loop_p (loop, base))
2680 {
2681 if (!integer_zerop (off))
2682 return NULL_TREE;
2683 off = base;
2684 base = size_int (pbitpos / BITS_PER_UNIT);
2685 }
2686 /* Otherwise put base + constant offset into the loop invariant BASE
2687 and continue with OFF. */
2688 else
2689 {
2690 base = fold_convert (sizetype, base);
2691 base = size_binop (PLUS_EXPR, base, size_int (pbitpos / BITS_PER_UNIT));
2692 }
2693
2694 /* OFF at this point may be either a SSA_NAME or some tree expression
2695 from get_inner_reference. Try to peel off loop invariants from it
2696 into BASE as long as possible. */
2697 STRIP_NOPS (off);
2698 while (offtype == NULL_TREE)
2699 {
2700 enum tree_code code;
2701 tree op0, op1, add = NULL_TREE;
2702
2703 if (TREE_CODE (off) == SSA_NAME)
2704 {
2705 gimple def_stmt = SSA_NAME_DEF_STMT (off);
2706
2707 if (expr_invariant_in_loop_p (loop, off))
2708 return NULL_TREE;
2709
2710 if (gimple_code (def_stmt) != GIMPLE_ASSIGN)
2711 break;
2712
2713 op0 = gimple_assign_rhs1 (def_stmt);
2714 code = gimple_assign_rhs_code (def_stmt);
2715 op1 = gimple_assign_rhs2 (def_stmt);
2716 }
2717 else
2718 {
2719 if (get_gimple_rhs_class (TREE_CODE (off)) == GIMPLE_TERNARY_RHS)
2720 return NULL_TREE;
2721 code = TREE_CODE (off);
2722 extract_ops_from_tree (off, &code, &op0, &op1);
2723 }
2724 switch (code)
2725 {
2726 case POINTER_PLUS_EXPR:
2727 case PLUS_EXPR:
2728 if (expr_invariant_in_loop_p (loop, op0))
2729 {
2730 add = op0;
2731 off = op1;
2732 do_add:
2733 add = fold_convert (sizetype, add);
2734 if (scale != 1)
2735 add = size_binop (MULT_EXPR, add, size_int (scale));
2736 base = size_binop (PLUS_EXPR, base, add);
2737 continue;
2738 }
2739 if (expr_invariant_in_loop_p (loop, op1))
2740 {
2741 add = op1;
2742 off = op0;
2743 goto do_add;
2744 }
2745 break;
2746 case MINUS_EXPR:
2747 if (expr_invariant_in_loop_p (loop, op1))
2748 {
2749 add = fold_convert (sizetype, op1);
2750 add = size_binop (MINUS_EXPR, size_zero_node, add);
2751 off = op0;
2752 goto do_add;
2753 }
2754 break;
2755 case MULT_EXPR:
2756 if (scale == 1 && host_integerp (op1, 0))
2757 {
2758 scale = tree_low_cst (op1, 0);
2759 off = op0;
2760 continue;
2761 }
2762 break;
2763 case SSA_NAME:
2764 off = op0;
2765 continue;
2766 CASE_CONVERT:
2767 if (!POINTER_TYPE_P (TREE_TYPE (op0))
2768 && !INTEGRAL_TYPE_P (TREE_TYPE (op0)))
2769 break;
2770 if (TYPE_PRECISION (TREE_TYPE (op0))
2771 == TYPE_PRECISION (TREE_TYPE (off)))
2772 {
2773 off = op0;
2774 continue;
2775 }
2776 if (TYPE_PRECISION (TREE_TYPE (op0))
2777 < TYPE_PRECISION (TREE_TYPE (off)))
2778 {
2779 off = op0;
2780 offtype = TREE_TYPE (off);
2781 STRIP_NOPS (off);
2782 continue;
2783 }
2784 break;
2785 default:
2786 break;
2787 }
2788 break;
2789 }
2790
2791 /* If at the end OFF still isn't a SSA_NAME or isn't
2792 defined in the loop, punt. */
2793 if (TREE_CODE (off) != SSA_NAME
2794 || expr_invariant_in_loop_p (loop, off))
2795 return NULL_TREE;
2796
2797 if (offtype == NULL_TREE)
2798 offtype = TREE_TYPE (off);
2799
2800 decl = targetm.vectorize.builtin_gather (STMT_VINFO_VECTYPE (stmt_info),
2801 offtype, scale);
2802 if (decl == NULL_TREE)
2803 return NULL_TREE;
2804
2805 if (basep)
2806 *basep = base;
2807 if (offp)
2808 *offp = off;
2809 if (scalep)
2810 *scalep = scale;
2811 return decl;
2812 }
2813
2814 /* Check wether a non-affine load in STMT (being in the loop referred to
2815 in LOOP_VINFO) is suitable for handling as strided load. That is the case
2816 if its address is a simple induction variable. If so return the base
2817 of that induction variable in *BASEP and the (loop-invariant) step
2818 in *STEPP, both only when that pointer is non-zero.
2819
2820 This handles ARRAY_REFs (with variant index) and MEM_REFs (with variant
2821 base pointer) only. */
2822
2823 bool
2824 vect_check_strided_load (gimple stmt, loop_vec_info loop_vinfo, tree *basep,
2825 tree *stepp)
2826 {
2827 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2828 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2829 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2830 tree base, off;
2831 affine_iv iv;
2832
2833 if (!DR_IS_READ (dr))
2834 return false;
2835
2836 base = DR_REF (dr);
2837
2838 if (TREE_CODE (base) == ARRAY_REF)
2839 {
2840 off = TREE_OPERAND (base, 1);
2841 base = TREE_OPERAND (base, 0);
2842 }
2843 else if (TREE_CODE (base) == MEM_REF)
2844 {
2845 off = TREE_OPERAND (base, 0);
2846 base = TREE_OPERAND (base, 1);
2847 }
2848 else
2849 return false;
2850
2851 if (TREE_CODE (off) != SSA_NAME)
2852 return false;
2853
2854 if (!expr_invariant_in_loop_p (loop, base)
2855 || !simple_iv (loop, loop_containing_stmt (stmt), off, &iv, true))
2856 return false;
2857
2858 if (basep)
2859 *basep = iv.base;
2860 if (stepp)
2861 *stepp = iv.step;
2862 return true;
2863 }
2864
2865 /* Function vect_analyze_data_refs.
2866
2867 Find all the data references in the loop or basic block.
2868
2869 The general structure of the analysis of data refs in the vectorizer is as
2870 follows:
2871 1- vect_analyze_data_refs(loop/bb): call
2872 compute_data_dependences_for_loop/bb to find and analyze all data-refs
2873 in the loop/bb and their dependences.
2874 2- vect_analyze_dependences(): apply dependence testing using ddrs.
2875 3- vect_analyze_drs_alignment(): check that ref_stmt.alignment is ok.
2876 4- vect_analyze_drs_access(): check that ref_stmt.step is ok.
2877
2878 */
2879
2880 bool
2881 vect_analyze_data_refs (loop_vec_info loop_vinfo,
2882 bb_vec_info bb_vinfo,
2883 int *min_vf)
2884 {
2885 struct loop *loop = NULL;
2886 basic_block bb = NULL;
2887 unsigned int i;
2888 VEC (data_reference_p, heap) *datarefs;
2889 struct data_reference *dr;
2890 tree scalar_type;
2891 bool res, stop_bb_analysis = false;
2892
2893 if (vect_print_dump_info (REPORT_DETAILS))
2894 fprintf (vect_dump, "=== vect_analyze_data_refs ===\n");
2895
2896 if (loop_vinfo)
2897 {
2898 loop = LOOP_VINFO_LOOP (loop_vinfo);
2899 res = compute_data_dependences_for_loop
2900 (loop, true,
2901 &LOOP_VINFO_LOOP_NEST (loop_vinfo),
2902 &LOOP_VINFO_DATAREFS (loop_vinfo),
2903 &LOOP_VINFO_DDRS (loop_vinfo));
2904
2905 if (!res)
2906 {
2907 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2908 fprintf (vect_dump, "not vectorized: loop contains function calls"
2909 " or data references that cannot be analyzed");
2910 return false;
2911 }
2912
2913 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
2914 }
2915 else
2916 {
2917 gimple_stmt_iterator gsi;
2918
2919 bb = BB_VINFO_BB (bb_vinfo);
2920 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2921 {
2922 gimple stmt = gsi_stmt (gsi);
2923 if (!find_data_references_in_stmt (NULL, stmt,
2924 &BB_VINFO_DATAREFS (bb_vinfo)))
2925 {
2926 /* Mark the rest of the basic-block as unvectorizable. */
2927 for (; !gsi_end_p (gsi); gsi_next (&gsi))
2928 {
2929 stmt = gsi_stmt (gsi);
2930 STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)) = false;
2931 }
2932 break;
2933 }
2934 }
2935 if (!compute_all_dependences (BB_VINFO_DATAREFS (bb_vinfo),
2936 &BB_VINFO_DDRS (bb_vinfo), NULL, true))
2937 {
2938 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2939 fprintf (vect_dump, "not vectorized: basic block contains function"
2940 " calls or data references that cannot be analyzed");
2941 return false;
2942 }
2943
2944 datarefs = BB_VINFO_DATAREFS (bb_vinfo);
2945 }
2946
2947 /* Go through the data-refs, check that the analysis succeeded. Update
2948 pointer from stmt_vec_info struct to DR and vectype. */
2949
2950 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
2951 {
2952 gimple stmt;
2953 stmt_vec_info stmt_info;
2954 tree base, offset, init;
2955 bool gather = false;
2956 int vf;
2957
2958 if (!dr || !DR_REF (dr))
2959 {
2960 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2961 fprintf (vect_dump, "not vectorized: unhandled data-ref ");
2962
2963 return false;
2964 }
2965
2966 stmt = DR_STMT (dr);
2967 stmt_info = vinfo_for_stmt (stmt);
2968
2969 if (stop_bb_analysis)
2970 {
2971 STMT_VINFO_VECTORIZABLE (stmt_info) = false;
2972 continue;
2973 }
2974
2975 /* Check that analysis of the data-ref succeeded. */
2976 if (!DR_BASE_ADDRESS (dr) || !DR_OFFSET (dr) || !DR_INIT (dr)
2977 || !DR_STEP (dr))
2978 {
2979 /* If target supports vector gather loads, see if they can't
2980 be used. */
2981 if (loop_vinfo
2982 && DR_IS_READ (dr)
2983 && !TREE_THIS_VOLATILE (DR_REF (dr))
2984 && targetm.vectorize.builtin_gather != NULL
2985 && !nested_in_vect_loop_p (loop, stmt))
2986 {
2987 struct data_reference *newdr
2988 = create_data_ref (NULL, loop_containing_stmt (stmt),
2989 DR_REF (dr), stmt, true);
2990 gcc_assert (newdr != NULL && DR_REF (newdr));
2991 if (DR_BASE_ADDRESS (newdr)
2992 && DR_OFFSET (newdr)
2993 && DR_INIT (newdr)
2994 && DR_STEP (newdr)
2995 && integer_zerop (DR_STEP (newdr)))
2996 {
2997 dr = newdr;
2998 gather = true;
2999 }
3000 else
3001 free_data_ref (newdr);
3002 }
3003
3004 if (!gather)
3005 {
3006 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
3007 {
3008 fprintf (vect_dump, "not vectorized: data ref analysis "
3009 "failed ");
3010 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
3011 }
3012
3013 if (bb_vinfo)
3014 {
3015 STMT_VINFO_VECTORIZABLE (stmt_info) = false;
3016 stop_bb_analysis = true;
3017 continue;
3018 }
3019
3020 return false;
3021 }
3022 }
3023
3024 if (TREE_CODE (DR_BASE_ADDRESS (dr)) == INTEGER_CST)
3025 {
3026 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
3027 fprintf (vect_dump, "not vectorized: base addr of dr is a "
3028 "constant");
3029
3030 if (bb_vinfo)
3031 {
3032 STMT_VINFO_VECTORIZABLE (stmt_info) = false;
3033 stop_bb_analysis = true;
3034 continue;
3035 }
3036
3037 if (gather)
3038 free_data_ref (dr);
3039 return false;
3040 }
3041
3042 if (TREE_THIS_VOLATILE (DR_REF (dr)))
3043 {
3044 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
3045 {
3046 fprintf (vect_dump, "not vectorized: volatile type ");
3047 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
3048 }
3049
3050 if (bb_vinfo)
3051 {
3052 STMT_VINFO_VECTORIZABLE (stmt_info) = false;
3053 stop_bb_analysis = true;
3054 continue;
3055 }
3056
3057 return false;
3058 }
3059
3060 if (stmt_can_throw_internal (stmt))
3061 {
3062 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
3063 {
3064 fprintf (vect_dump, "not vectorized: statement can throw an "
3065 "exception ");
3066 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
3067 }
3068
3069 if (bb_vinfo)
3070 {
3071 STMT_VINFO_VECTORIZABLE (stmt_info) = false;
3072 stop_bb_analysis = true;
3073 continue;
3074 }
3075
3076 if (gather)
3077 free_data_ref (dr);
3078 return false;
3079 }
3080
3081 if (TREE_CODE (DR_REF (dr)) == COMPONENT_REF
3082 && DECL_BIT_FIELD (TREE_OPERAND (DR_REF (dr), 1)))
3083 {
3084 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
3085 {
3086 fprintf (vect_dump, "not vectorized: statement is bitfield "
3087 "access ");
3088 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
3089 }
3090
3091 if (bb_vinfo)
3092 {
3093 STMT_VINFO_VECTORIZABLE (stmt_info) = false;
3094 stop_bb_analysis = true;
3095 continue;
3096 }
3097
3098 if (gather)
3099 free_data_ref (dr);
3100 return false;
3101 }
3102
3103 base = unshare_expr (DR_BASE_ADDRESS (dr));
3104 offset = unshare_expr (DR_OFFSET (dr));
3105 init = unshare_expr (DR_INIT (dr));
3106
3107 if (is_gimple_call (stmt))
3108 {
3109 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
3110 {
3111 fprintf (vect_dump, "not vectorized: dr in a call ");
3112 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
3113 }
3114
3115 if (bb_vinfo)
3116 {
3117 STMT_VINFO_VECTORIZABLE (stmt_info) = false;
3118 stop_bb_analysis = true;
3119 continue;
3120 }
3121
3122 if (gather)
3123 free_data_ref (dr);
3124 return false;
3125 }
3126
3127 /* Update DR field in stmt_vec_info struct. */
3128
3129 /* If the dataref is in an inner-loop of the loop that is considered for
3130 for vectorization, we also want to analyze the access relative to
3131 the outer-loop (DR contains information only relative to the
3132 inner-most enclosing loop). We do that by building a reference to the
3133 first location accessed by the inner-loop, and analyze it relative to
3134 the outer-loop. */
3135 if (loop && nested_in_vect_loop_p (loop, stmt))
3136 {
3137 tree outer_step, outer_base, outer_init;
3138 HOST_WIDE_INT pbitsize, pbitpos;
3139 tree poffset;
3140 enum machine_mode pmode;
3141 int punsignedp, pvolatilep;
3142 affine_iv base_iv, offset_iv;
3143 tree dinit;
3144
3145 /* Build a reference to the first location accessed by the
3146 inner-loop: *(BASE+INIT). (The first location is actually
3147 BASE+INIT+OFFSET, but we add OFFSET separately later). */
3148 tree inner_base = build_fold_indirect_ref
3149 (fold_build_pointer_plus (base, init));
3150
3151 if (vect_print_dump_info (REPORT_DETAILS))
3152 {
3153 fprintf (vect_dump, "analyze in outer-loop: ");
3154 print_generic_expr (vect_dump, inner_base, TDF_SLIM);
3155 }
3156
3157 outer_base = get_inner_reference (inner_base, &pbitsize, &pbitpos,
3158 &poffset, &pmode, &punsignedp, &pvolatilep, false);
3159 gcc_assert (outer_base != NULL_TREE);
3160
3161 if (pbitpos % BITS_PER_UNIT != 0)
3162 {
3163 if (vect_print_dump_info (REPORT_DETAILS))
3164 fprintf (vect_dump, "failed: bit offset alignment.\n");
3165 return false;
3166 }
3167
3168 outer_base = build_fold_addr_expr (outer_base);
3169 if (!simple_iv (loop, loop_containing_stmt (stmt), outer_base,
3170 &base_iv, false))
3171 {
3172 if (vect_print_dump_info (REPORT_DETAILS))
3173 fprintf (vect_dump, "failed: evolution of base is not affine.\n");
3174 return false;
3175 }
3176
3177 if (offset)
3178 {
3179 if (poffset)
3180 poffset = fold_build2 (PLUS_EXPR, TREE_TYPE (offset), offset,
3181 poffset);
3182 else
3183 poffset = offset;
3184 }
3185
3186 if (!poffset)
3187 {
3188 offset_iv.base = ssize_int (0);
3189 offset_iv.step = ssize_int (0);
3190 }
3191 else if (!simple_iv (loop, loop_containing_stmt (stmt), poffset,
3192 &offset_iv, false))
3193 {
3194 if (vect_print_dump_info (REPORT_DETAILS))
3195 fprintf (vect_dump, "evolution of offset is not affine.\n");
3196 return false;
3197 }
3198
3199 outer_init = ssize_int (pbitpos / BITS_PER_UNIT);
3200 split_constant_offset (base_iv.base, &base_iv.base, &dinit);
3201 outer_init = size_binop (PLUS_EXPR, outer_init, dinit);
3202 split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
3203 outer_init = size_binop (PLUS_EXPR, outer_init, dinit);
3204
3205 outer_step = size_binop (PLUS_EXPR,
3206 fold_convert (ssizetype, base_iv.step),
3207 fold_convert (ssizetype, offset_iv.step));
3208
3209 STMT_VINFO_DR_STEP (stmt_info) = outer_step;
3210 /* FIXME: Use canonicalize_base_object_address (base_iv.base); */
3211 STMT_VINFO_DR_BASE_ADDRESS (stmt_info) = base_iv.base;
3212 STMT_VINFO_DR_INIT (stmt_info) = outer_init;
3213 STMT_VINFO_DR_OFFSET (stmt_info) =
3214 fold_convert (ssizetype, offset_iv.base);
3215 STMT_VINFO_DR_ALIGNED_TO (stmt_info) =
3216 size_int (highest_pow2_factor (offset_iv.base));
3217
3218 if (vect_print_dump_info (REPORT_DETAILS))
3219 {
3220 fprintf (vect_dump, "\touter base_address: ");
3221 print_generic_expr (vect_dump, STMT_VINFO_DR_BASE_ADDRESS (stmt_info), TDF_SLIM);
3222 fprintf (vect_dump, "\n\touter offset from base address: ");
3223 print_generic_expr (vect_dump, STMT_VINFO_DR_OFFSET (stmt_info), TDF_SLIM);
3224 fprintf (vect_dump, "\n\touter constant offset from base address: ");
3225 print_generic_expr (vect_dump, STMT_VINFO_DR_INIT (stmt_info), TDF_SLIM);
3226 fprintf (vect_dump, "\n\touter step: ");
3227 print_generic_expr (vect_dump, STMT_VINFO_DR_STEP (stmt_info), TDF_SLIM);
3228 fprintf (vect_dump, "\n\touter aligned to: ");
3229 print_generic_expr (vect_dump, STMT_VINFO_DR_ALIGNED_TO (stmt_info), TDF_SLIM);
3230 }
3231 }
3232
3233 if (STMT_VINFO_DATA_REF (stmt_info))
3234 {
3235 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
3236 {
3237 fprintf (vect_dump,
3238 "not vectorized: more than one data ref in stmt: ");
3239 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
3240 }
3241
3242 if (bb_vinfo)
3243 {
3244 STMT_VINFO_VECTORIZABLE (stmt_info) = false;
3245 stop_bb_analysis = true;
3246 continue;
3247 }
3248
3249 if (gather)
3250 free_data_ref (dr);
3251 return false;
3252 }
3253
3254 STMT_VINFO_DATA_REF (stmt_info) = dr;
3255
3256 /* Set vectype for STMT. */
3257 scalar_type = TREE_TYPE (DR_REF (dr));
3258 STMT_VINFO_VECTYPE (stmt_info) =
3259 get_vectype_for_scalar_type (scalar_type);
3260 if (!STMT_VINFO_VECTYPE (stmt_info))
3261 {
3262 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
3263 {
3264 fprintf (vect_dump,
3265 "not vectorized: no vectype for stmt: ");
3266 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
3267 fprintf (vect_dump, " scalar_type: ");
3268 print_generic_expr (vect_dump, scalar_type, TDF_DETAILS);
3269 }
3270
3271 if (bb_vinfo)
3272 {
3273 /* Mark the statement as not vectorizable. */
3274 STMT_VINFO_VECTORIZABLE (stmt_info) = false;
3275 stop_bb_analysis = true;
3276 continue;
3277 }
3278
3279 if (gather)
3280 {
3281 STMT_VINFO_DATA_REF (stmt_info) = NULL;
3282 free_data_ref (dr);
3283 }
3284 return false;
3285 }
3286
3287 /* Adjust the minimal vectorization factor according to the
3288 vector type. */
3289 vf = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
3290 if (vf > *min_vf)
3291 *min_vf = vf;
3292
3293 if (gather)
3294 {
3295 unsigned int j, k, n;
3296 struct data_reference *olddr
3297 = VEC_index (data_reference_p, datarefs, i);
3298 VEC (ddr_p, heap) *ddrs = LOOP_VINFO_DDRS (loop_vinfo);
3299 struct data_dependence_relation *ddr, *newddr;
3300 bool bad = false;
3301 tree off;
3302 VEC (loop_p, heap) *nest = LOOP_VINFO_LOOP_NEST (loop_vinfo);
3303
3304 gather = 0 != vect_check_gather (stmt, loop_vinfo, NULL, &off, NULL);
3305 if (gather
3306 && get_vectype_for_scalar_type (TREE_TYPE (off)) == NULL_TREE)
3307 gather = false;
3308 if (!gather)
3309 {
3310 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
3311 {
3312 fprintf (vect_dump,
3313 "not vectorized: not suitable for gather load ");
3314 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
3315 }
3316 return false;
3317 }
3318
3319 n = VEC_length (data_reference_p, datarefs) - 1;
3320 for (j = 0, k = i - 1; j < i; j++)
3321 {
3322 ddr = VEC_index (ddr_p, ddrs, k);
3323 gcc_assert (DDR_B (ddr) == olddr);
3324 newddr = initialize_data_dependence_relation (DDR_A (ddr), dr,
3325 nest);
3326 VEC_replace (ddr_p, ddrs, k, newddr);
3327 free_dependence_relation (ddr);
3328 if (!bad
3329 && DR_IS_WRITE (DDR_A (newddr))
3330 && DDR_ARE_DEPENDENT (newddr) != chrec_known)
3331 bad = true;
3332 k += --n;
3333 }
3334
3335 k++;
3336 n = k + VEC_length (data_reference_p, datarefs) - i - 1;
3337 for (; k < n; k++)
3338 {
3339 ddr = VEC_index (ddr_p, ddrs, k);
3340 gcc_assert (DDR_A (ddr) == olddr);
3341 newddr = initialize_data_dependence_relation (dr, DDR_B (ddr),
3342 nest);
3343 VEC_replace (ddr_p, ddrs, k, newddr);
3344 free_dependence_relation (ddr);
3345 if (!bad
3346 && DR_IS_WRITE (DDR_B (newddr))
3347 && DDR_ARE_DEPENDENT (newddr) != chrec_known)
3348 bad = true;
3349 }
3350
3351 k = VEC_length (ddr_p, ddrs)
3352 - VEC_length (data_reference_p, datarefs) + i;
3353 ddr = VEC_index (ddr_p, ddrs, k);
3354 gcc_assert (DDR_A (ddr) == olddr && DDR_B (ddr) == olddr);
3355 newddr = initialize_data_dependence_relation (dr, dr, nest);
3356 VEC_replace (ddr_p, ddrs, k, newddr);
3357 free_dependence_relation (ddr);
3358 VEC_replace (data_reference_p, datarefs, i, dr);
3359
3360 if (bad)
3361 {
3362 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
3363 {
3364 fprintf (vect_dump,
3365 "not vectorized: data dependence conflict"
3366 " prevents gather load");
3367 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
3368 }
3369 return false;
3370 }
3371
3372 STMT_VINFO_GATHER_P (stmt_info) = true;
3373 }
3374 else if (loop_vinfo
3375 && TREE_CODE (DR_STEP (dr)) != INTEGER_CST)
3376 {
3377 bool strided_load = false;
3378 if (!nested_in_vect_loop_p (loop, stmt))
3379 strided_load
3380 = vect_check_strided_load (stmt, loop_vinfo, NULL, NULL);
3381 if (!strided_load)
3382 {
3383 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
3384 {
3385 fprintf (vect_dump,
3386 "not vectorized: not suitable for strided load ");
3387 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
3388 }
3389 return false;
3390 }
3391 STMT_VINFO_STRIDE_LOAD_P (stmt_info) = true;
3392 }
3393 }
3394
3395 return true;
3396 }
3397
3398
3399 /* Function vect_get_new_vect_var.
3400
3401 Returns a name for a new variable. The current naming scheme appends the
3402 prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to
3403 the name of vectorizer generated variables, and appends that to NAME if
3404 provided. */
3405
3406 tree
3407 vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name)
3408 {
3409 const char *prefix;
3410 tree new_vect_var;
3411
3412 switch (var_kind)
3413 {
3414 case vect_simple_var:
3415 prefix = "vect_";
3416 break;
3417 case vect_scalar_var:
3418 prefix = "stmp_";
3419 break;
3420 case vect_pointer_var:
3421 prefix = "vect_p";
3422 break;
3423 default:
3424 gcc_unreachable ();
3425 }
3426
3427 if (name)
3428 {
3429 char* tmp = concat (prefix, name, NULL);
3430 new_vect_var = create_tmp_reg (type, tmp);
3431 free (tmp);
3432 }
3433 else
3434 new_vect_var = create_tmp_reg (type, prefix);
3435 add_referenced_var (new_vect_var);
3436
3437 return new_vect_var;
3438 }
3439
3440
3441 /* Function vect_create_addr_base_for_vector_ref.
3442
3443 Create an expression that computes the address of the first memory location
3444 that will be accessed for a data reference.
3445
3446 Input:
3447 STMT: The statement containing the data reference.
3448 NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list.
3449 OFFSET: Optional. If supplied, it is be added to the initial address.
3450 LOOP: Specify relative to which loop-nest should the address be computed.
3451 For example, when the dataref is in an inner-loop nested in an
3452 outer-loop that is now being vectorized, LOOP can be either the
3453 outer-loop, or the inner-loop. The first memory location accessed
3454 by the following dataref ('in' points to short):
3455
3456 for (i=0; i<N; i++)
3457 for (j=0; j<M; j++)
3458 s += in[i+j]
3459
3460 is as follows:
3461 if LOOP=i_loop: &in (relative to i_loop)
3462 if LOOP=j_loop: &in+i*2B (relative to j_loop)
3463
3464 Output:
3465 1. Return an SSA_NAME whose value is the address of the memory location of
3466 the first vector of the data reference.
3467 2. If new_stmt_list is not NULL_TREE after return then the caller must insert
3468 these statement(s) which define the returned SSA_NAME.
3469
3470 FORNOW: We are only handling array accesses with step 1. */
3471
3472 tree
3473 vect_create_addr_base_for_vector_ref (gimple stmt,
3474 gimple_seq *new_stmt_list,
3475 tree offset,
3476 struct loop *loop)
3477 {
3478 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3479 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
3480 tree data_ref_base = unshare_expr (DR_BASE_ADDRESS (dr));
3481 tree base_name;
3482 tree data_ref_base_var;
3483 tree vec_stmt;
3484 tree addr_base, addr_expr;
3485 tree dest;
3486 gimple_seq seq = NULL;
3487 tree base_offset = unshare_expr (DR_OFFSET (dr));
3488 tree init = unshare_expr (DR_INIT (dr));
3489 tree vect_ptr_type;
3490 tree step = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr)));
3491 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3492 tree base;
3493
3494 if (loop_vinfo && loop && loop != (gimple_bb (stmt))->loop_father)
3495 {
3496 struct loop *outer_loop = LOOP_VINFO_LOOP (loop_vinfo);
3497
3498 gcc_assert (nested_in_vect_loop_p (outer_loop, stmt));
3499
3500 data_ref_base = unshare_expr (STMT_VINFO_DR_BASE_ADDRESS (stmt_info));
3501 base_offset = unshare_expr (STMT_VINFO_DR_OFFSET (stmt_info));
3502 init = unshare_expr (STMT_VINFO_DR_INIT (stmt_info));
3503 }
3504
3505 if (loop_vinfo)
3506 base_name = build_fold_indirect_ref (data_ref_base);
3507 else
3508 {
3509 base_offset = ssize_int (0);
3510 init = ssize_int (0);
3511 base_name = build_fold_indirect_ref (unshare_expr (DR_REF (dr)));
3512 }
3513
3514 data_ref_base_var = create_tmp_var (TREE_TYPE (data_ref_base), "batmp");
3515 add_referenced_var (data_ref_base_var);
3516 data_ref_base = force_gimple_operand (data_ref_base, &seq, true,
3517 data_ref_base_var);
3518 gimple_seq_add_seq (new_stmt_list, seq);
3519
3520 /* Create base_offset */
3521 base_offset = size_binop (PLUS_EXPR,
3522 fold_convert (sizetype, base_offset),
3523 fold_convert (sizetype, init));
3524 dest = create_tmp_var (sizetype, "base_off");
3525 add_referenced_var (dest);
3526 base_offset = force_gimple_operand (base_offset, &seq, true, dest);
3527 gimple_seq_add_seq (new_stmt_list, seq);
3528
3529 if (offset)
3530 {
3531 tree tmp = create_tmp_var (sizetype, "offset");
3532
3533 add_referenced_var (tmp);
3534 offset = fold_build2 (MULT_EXPR, sizetype,
3535 fold_convert (sizetype, offset), step);
3536 base_offset = fold_build2 (PLUS_EXPR, sizetype,
3537 base_offset, offset);
3538 base_offset = force_gimple_operand (base_offset, &seq, false, tmp);
3539 gimple_seq_add_seq (new_stmt_list, seq);
3540 }
3541
3542 /* base + base_offset */
3543 if (loop_vinfo)
3544 addr_base = fold_build_pointer_plus (data_ref_base, base_offset);
3545 else
3546 {
3547 addr_base = build1 (ADDR_EXPR,
3548 build_pointer_type (TREE_TYPE (DR_REF (dr))),
3549 unshare_expr (DR_REF (dr)));
3550 }
3551
3552 vect_ptr_type = build_pointer_type (STMT_VINFO_VECTYPE (stmt_info));
3553 base = get_base_address (DR_REF (dr));
3554 if (base
3555 && TREE_CODE (base) == MEM_REF)
3556 vect_ptr_type
3557 = build_qualified_type (vect_ptr_type,
3558 TYPE_QUALS (TREE_TYPE (TREE_OPERAND (base, 0))));
3559
3560 vec_stmt = fold_convert (vect_ptr_type, addr_base);
3561 addr_expr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
3562 get_name (base_name));
3563 vec_stmt = force_gimple_operand (vec_stmt, &seq, false, addr_expr);
3564 gimple_seq_add_seq (new_stmt_list, seq);
3565
3566 if (DR_PTR_INFO (dr)
3567 && TREE_CODE (vec_stmt) == SSA_NAME)
3568 {
3569 duplicate_ssa_name_ptr_info (vec_stmt, DR_PTR_INFO (dr));
3570 if (offset)
3571 mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (vec_stmt));
3572 }
3573
3574 if (vect_print_dump_info (REPORT_DETAILS))
3575 {
3576 fprintf (vect_dump, "created ");
3577 print_generic_expr (vect_dump, vec_stmt, TDF_SLIM);
3578 }
3579
3580 return vec_stmt;
3581 }
3582
3583
3584 /* Function vect_create_data_ref_ptr.
3585
3586 Create a new pointer-to-AGGR_TYPE variable (ap), that points to the first
3587 location accessed in the loop by STMT, along with the def-use update
3588 chain to appropriately advance the pointer through the loop iterations.
3589 Also set aliasing information for the pointer. This pointer is used by
3590 the callers to this function to create a memory reference expression for
3591 vector load/store access.
3592
3593 Input:
3594 1. STMT: a stmt that references memory. Expected to be of the form
3595 GIMPLE_ASSIGN <name, data-ref> or
3596 GIMPLE_ASSIGN <data-ref, name>.
3597 2. AGGR_TYPE: the type of the reference, which should be either a vector
3598 or an array.
3599 3. AT_LOOP: the loop where the vector memref is to be created.
3600 4. OFFSET (optional): an offset to be added to the initial address accessed
3601 by the data-ref in STMT.
3602 5. BSI: location where the new stmts are to be placed if there is no loop
3603 6. ONLY_INIT: indicate if ap is to be updated in the loop, or remain
3604 pointing to the initial address.
3605
3606 Output:
3607 1. Declare a new ptr to vector_type, and have it point to the base of the
3608 data reference (initial addressed accessed by the data reference).
3609 For example, for vector of type V8HI, the following code is generated:
3610
3611 v8hi *ap;
3612 ap = (v8hi *)initial_address;
3613
3614 if OFFSET is not supplied:
3615 initial_address = &a[init];
3616 if OFFSET is supplied:
3617 initial_address = &a[init + OFFSET];
3618
3619 Return the initial_address in INITIAL_ADDRESS.
3620
3621 2. If ONLY_INIT is true, just return the initial pointer. Otherwise, also
3622 update the pointer in each iteration of the loop.
3623
3624 Return the increment stmt that updates the pointer in PTR_INCR.
3625
3626 3. Set INV_P to true if the access pattern of the data reference in the
3627 vectorized loop is invariant. Set it to false otherwise.
3628
3629 4. Return the pointer. */
3630
3631 tree
3632 vect_create_data_ref_ptr (gimple stmt, tree aggr_type, struct loop *at_loop,
3633 tree offset, tree *initial_address,
3634 gimple_stmt_iterator *gsi, gimple *ptr_incr,
3635 bool only_init, bool *inv_p)
3636 {
3637 tree base_name;
3638 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3639 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3640 struct loop *loop = NULL;
3641 bool nested_in_vect_loop = false;
3642 struct loop *containing_loop = NULL;
3643 tree aggr_ptr_type;
3644 tree aggr_ptr;
3645 tree new_temp;
3646 gimple vec_stmt;
3647 gimple_seq new_stmt_list = NULL;
3648 edge pe = NULL;
3649 basic_block new_bb;
3650 tree aggr_ptr_init;
3651 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
3652 tree aptr;
3653 gimple_stmt_iterator incr_gsi;
3654 bool insert_after;
3655 bool negative;
3656 tree indx_before_incr, indx_after_incr;
3657 gimple incr;
3658 tree step;
3659 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3660 tree base;
3661
3662 gcc_assert (TREE_CODE (aggr_type) == ARRAY_TYPE
3663 || TREE_CODE (aggr_type) == VECTOR_TYPE);
3664
3665 if (loop_vinfo)
3666 {
3667 loop = LOOP_VINFO_LOOP (loop_vinfo);
3668 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
3669 containing_loop = (gimple_bb (stmt))->loop_father;
3670 pe = loop_preheader_edge (loop);
3671 }
3672 else
3673 {
3674 gcc_assert (bb_vinfo);
3675 only_init = true;
3676 *ptr_incr = NULL;
3677 }
3678
3679 /* Check the step (evolution) of the load in LOOP, and record
3680 whether it's invariant. */
3681 if (nested_in_vect_loop)
3682 step = STMT_VINFO_DR_STEP (stmt_info);
3683 else
3684 step = DR_STEP (STMT_VINFO_DATA_REF (stmt_info));
3685
3686 if (tree_int_cst_compare (step, size_zero_node) == 0)
3687 *inv_p = true;
3688 else
3689 *inv_p = false;
3690 negative = tree_int_cst_compare (step, size_zero_node) < 0;
3691
3692 /* Create an expression for the first address accessed by this load
3693 in LOOP. */
3694 base_name = build_fold_indirect_ref (unshare_expr (DR_BASE_ADDRESS (dr)));
3695
3696 if (vect_print_dump_info (REPORT_DETAILS))
3697 {
3698 tree data_ref_base = base_name;
3699 fprintf (vect_dump, "create %s-pointer variable to type: ",
3700 tree_code_name[(int) TREE_CODE (aggr_type)]);
3701 print_generic_expr (vect_dump, aggr_type, TDF_SLIM);
3702 if (TREE_CODE (data_ref_base) == VAR_DECL
3703 || TREE_CODE (data_ref_base) == ARRAY_REF)
3704 fprintf (vect_dump, " vectorizing an array ref: ");
3705 else if (TREE_CODE (data_ref_base) == COMPONENT_REF)
3706 fprintf (vect_dump, " vectorizing a record based array ref: ");
3707 else if (TREE_CODE (data_ref_base) == SSA_NAME)
3708 fprintf (vect_dump, " vectorizing a pointer ref: ");
3709 print_generic_expr (vect_dump, base_name, TDF_SLIM);
3710 }
3711
3712 /* (1) Create the new aggregate-pointer variable. */
3713 aggr_ptr_type = build_pointer_type (aggr_type);
3714 base = get_base_address (DR_REF (dr));
3715 if (base
3716 && TREE_CODE (base) == MEM_REF)
3717 aggr_ptr_type
3718 = build_qualified_type (aggr_ptr_type,
3719 TYPE_QUALS (TREE_TYPE (TREE_OPERAND (base, 0))));
3720 aggr_ptr = vect_get_new_vect_var (aggr_ptr_type, vect_pointer_var,
3721 get_name (base_name));
3722
3723 /* Vector and array types inherit the alias set of their component
3724 type by default so we need to use a ref-all pointer if the data
3725 reference does not conflict with the created aggregated data
3726 reference because it is not addressable. */
3727 if (!alias_sets_conflict_p (get_deref_alias_set (aggr_ptr),
3728 get_alias_set (DR_REF (dr))))
3729 {
3730 aggr_ptr_type
3731 = build_pointer_type_for_mode (aggr_type,
3732 TYPE_MODE (aggr_ptr_type), true);
3733 aggr_ptr = vect_get_new_vect_var (aggr_ptr_type, vect_pointer_var,
3734 get_name (base_name));
3735 }
3736
3737 /* Likewise for any of the data references in the stmt group. */
3738 else if (STMT_VINFO_GROUP_SIZE (stmt_info) > 1)
3739 {
3740 gimple orig_stmt = STMT_VINFO_GROUP_FIRST_ELEMENT (stmt_info);
3741 do
3742 {
3743 tree lhs = gimple_assign_lhs (orig_stmt);
3744 if (!alias_sets_conflict_p (get_deref_alias_set (aggr_ptr),
3745 get_alias_set (lhs)))
3746 {
3747 aggr_ptr_type
3748 = build_pointer_type_for_mode (aggr_type,
3749 TYPE_MODE (aggr_ptr_type), true);
3750 aggr_ptr
3751 = vect_get_new_vect_var (aggr_ptr_type, vect_pointer_var,
3752 get_name (base_name));
3753 break;
3754 }
3755
3756 orig_stmt = STMT_VINFO_GROUP_NEXT_ELEMENT (vinfo_for_stmt (orig_stmt));
3757 }
3758 while (orig_stmt);
3759 }
3760
3761 /* Note: If the dataref is in an inner-loop nested in LOOP, and we are
3762 vectorizing LOOP (i.e., outer-loop vectorization), we need to create two
3763 def-use update cycles for the pointer: one relative to the outer-loop
3764 (LOOP), which is what steps (3) and (4) below do. The other is relative
3765 to the inner-loop (which is the inner-most loop containing the dataref),
3766 and this is done be step (5) below.
3767
3768 When vectorizing inner-most loops, the vectorized loop (LOOP) is also the
3769 inner-most loop, and so steps (3),(4) work the same, and step (5) is
3770 redundant. Steps (3),(4) create the following:
3771
3772 vp0 = &base_addr;
3773 LOOP: vp1 = phi(vp0,vp2)
3774 ...
3775 ...
3776 vp2 = vp1 + step
3777 goto LOOP
3778
3779 If there is an inner-loop nested in loop, then step (5) will also be
3780 applied, and an additional update in the inner-loop will be created:
3781
3782 vp0 = &base_addr;
3783 LOOP: vp1 = phi(vp0,vp2)
3784 ...
3785 inner: vp3 = phi(vp1,vp4)
3786 vp4 = vp3 + inner_step
3787 if () goto inner
3788 ...
3789 vp2 = vp1 + step
3790 if () goto LOOP */
3791
3792 /* (2) Calculate the initial address of the aggregate-pointer, and set
3793 the aggregate-pointer to point to it before the loop. */
3794
3795 /* Create: (&(base[init_val+offset]) in the loop preheader. */
3796
3797 new_temp = vect_create_addr_base_for_vector_ref (stmt, &new_stmt_list,
3798 offset, loop);
3799 if (new_stmt_list)
3800 {
3801 if (pe)
3802 {
3803 new_bb = gsi_insert_seq_on_edge_immediate (pe, new_stmt_list);
3804 gcc_assert (!new_bb);
3805 }
3806 else
3807 gsi_insert_seq_before (gsi, new_stmt_list, GSI_SAME_STMT);
3808 }
3809
3810 *initial_address = new_temp;
3811
3812 /* Create: p = (aggr_type *) initial_base */
3813 if (TREE_CODE (new_temp) != SSA_NAME
3814 || !useless_type_conversion_p (aggr_ptr_type, TREE_TYPE (new_temp)))
3815 {
3816 vec_stmt = gimple_build_assign (aggr_ptr,
3817 fold_convert (aggr_ptr_type, new_temp));
3818 aggr_ptr_init = make_ssa_name (aggr_ptr, vec_stmt);
3819 /* Copy the points-to information if it exists. */
3820 if (DR_PTR_INFO (dr))
3821 duplicate_ssa_name_ptr_info (aggr_ptr_init, DR_PTR_INFO (dr));
3822 gimple_assign_set_lhs (vec_stmt, aggr_ptr_init);
3823 if (pe)
3824 {
3825 new_bb = gsi_insert_on_edge_immediate (pe, vec_stmt);
3826 gcc_assert (!new_bb);
3827 }
3828 else
3829 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
3830 }
3831 else
3832 aggr_ptr_init = new_temp;
3833
3834 /* (3) Handle the updating of the aggregate-pointer inside the loop.
3835 This is needed when ONLY_INIT is false, and also when AT_LOOP is the
3836 inner-loop nested in LOOP (during outer-loop vectorization). */
3837
3838 /* No update in loop is required. */
3839 if (only_init && (!loop_vinfo || at_loop == loop))
3840 aptr = aggr_ptr_init;
3841 else
3842 {
3843 /* The step of the aggregate pointer is the type size. */
3844 tree step = TYPE_SIZE_UNIT (aggr_type);
3845 /* One exception to the above is when the scalar step of the load in
3846 LOOP is zero. In this case the step here is also zero. */
3847 if (*inv_p)
3848 step = size_zero_node;
3849 else if (negative)
3850 step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
3851
3852 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
3853
3854 create_iv (aggr_ptr_init,
3855 fold_convert (aggr_ptr_type, step),
3856 aggr_ptr, loop, &incr_gsi, insert_after,
3857 &indx_before_incr, &indx_after_incr);
3858 incr = gsi_stmt (incr_gsi);
3859 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
3860
3861 /* Copy the points-to information if it exists. */
3862 if (DR_PTR_INFO (dr))
3863 {
3864 duplicate_ssa_name_ptr_info (indx_before_incr, DR_PTR_INFO (dr));
3865 duplicate_ssa_name_ptr_info (indx_after_incr, DR_PTR_INFO (dr));
3866 }
3867 if (ptr_incr)
3868 *ptr_incr = incr;
3869
3870 aptr = indx_before_incr;
3871 }
3872
3873 if (!nested_in_vect_loop || only_init)
3874 return aptr;
3875
3876
3877 /* (4) Handle the updating of the aggregate-pointer inside the inner-loop
3878 nested in LOOP, if exists. */
3879
3880 gcc_assert (nested_in_vect_loop);
3881 if (!only_init)
3882 {
3883 standard_iv_increment_position (containing_loop, &incr_gsi,
3884 &insert_after);
3885 create_iv (aptr, fold_convert (aggr_ptr_type, DR_STEP (dr)), aggr_ptr,
3886 containing_loop, &incr_gsi, insert_after, &indx_before_incr,
3887 &indx_after_incr);
3888 incr = gsi_stmt (incr_gsi);
3889 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
3890
3891 /* Copy the points-to information if it exists. */
3892 if (DR_PTR_INFO (dr))
3893 {
3894 duplicate_ssa_name_ptr_info (indx_before_incr, DR_PTR_INFO (dr));
3895 duplicate_ssa_name_ptr_info (indx_after_incr, DR_PTR_INFO (dr));
3896 }
3897 if (ptr_incr)
3898 *ptr_incr = incr;
3899
3900 return indx_before_incr;
3901 }
3902 else
3903 gcc_unreachable ();
3904 }
3905
3906
3907 /* Function bump_vector_ptr
3908
3909 Increment a pointer (to a vector type) by vector-size. If requested,
3910 i.e. if PTR-INCR is given, then also connect the new increment stmt
3911 to the existing def-use update-chain of the pointer, by modifying
3912 the PTR_INCR as illustrated below:
3913
3914 The pointer def-use update-chain before this function:
3915 DATAREF_PTR = phi (p_0, p_2)
3916 ....
3917 PTR_INCR: p_2 = DATAREF_PTR + step
3918
3919 The pointer def-use update-chain after this function:
3920 DATAREF_PTR = phi (p_0, p_2)
3921 ....
3922 NEW_DATAREF_PTR = DATAREF_PTR + BUMP
3923 ....
3924 PTR_INCR: p_2 = NEW_DATAREF_PTR + step
3925
3926 Input:
3927 DATAREF_PTR - ssa_name of a pointer (to vector type) that is being updated
3928 in the loop.
3929 PTR_INCR - optional. The stmt that updates the pointer in each iteration of
3930 the loop. The increment amount across iterations is expected
3931 to be vector_size.
3932 BSI - location where the new update stmt is to be placed.
3933 STMT - the original scalar memory-access stmt that is being vectorized.
3934 BUMP - optional. The offset by which to bump the pointer. If not given,
3935 the offset is assumed to be vector_size.
3936
3937 Output: Return NEW_DATAREF_PTR as illustrated above.
3938
3939 */
3940
3941 tree
3942 bump_vector_ptr (tree dataref_ptr, gimple ptr_incr, gimple_stmt_iterator *gsi,
3943 gimple stmt, tree bump)
3944 {
3945 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3946 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
3947 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3948 tree ptr_var = SSA_NAME_VAR (dataref_ptr);
3949 tree update = TYPE_SIZE_UNIT (vectype);
3950 gimple incr_stmt;
3951 ssa_op_iter iter;
3952 use_operand_p use_p;
3953 tree new_dataref_ptr;
3954
3955 if (bump)
3956 update = bump;
3957
3958 incr_stmt = gimple_build_assign_with_ops (POINTER_PLUS_EXPR, ptr_var,
3959 dataref_ptr, update);
3960 new_dataref_ptr = make_ssa_name (ptr_var, incr_stmt);
3961 gimple_assign_set_lhs (incr_stmt, new_dataref_ptr);
3962 vect_finish_stmt_generation (stmt, incr_stmt, gsi);
3963
3964 /* Copy the points-to information if it exists. */
3965 if (DR_PTR_INFO (dr))
3966 {
3967 duplicate_ssa_name_ptr_info (new_dataref_ptr, DR_PTR_INFO (dr));
3968 mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (new_dataref_ptr));
3969 }
3970
3971 if (!ptr_incr)
3972 return new_dataref_ptr;
3973
3974 /* Update the vector-pointer's cross-iteration increment. */
3975 FOR_EACH_SSA_USE_OPERAND (use_p, ptr_incr, iter, SSA_OP_USE)
3976 {
3977 tree use = USE_FROM_PTR (use_p);
3978
3979 if (use == dataref_ptr)
3980 SET_USE (use_p, new_dataref_ptr);
3981 else
3982 gcc_assert (tree_int_cst_compare (use, update) == 0);
3983 }
3984
3985 return new_dataref_ptr;
3986 }
3987
3988
3989 /* Function vect_create_destination_var.
3990
3991 Create a new temporary of type VECTYPE. */
3992
3993 tree
3994 vect_create_destination_var (tree scalar_dest, tree vectype)
3995 {
3996 tree vec_dest;
3997 const char *new_name;
3998 tree type;
3999 enum vect_var_kind kind;
4000
4001 kind = vectype ? vect_simple_var : vect_scalar_var;
4002 type = vectype ? vectype : TREE_TYPE (scalar_dest);
4003
4004 gcc_assert (TREE_CODE (scalar_dest) == SSA_NAME);
4005
4006 new_name = get_name (scalar_dest);
4007 if (!new_name)
4008 new_name = "var_";
4009 vec_dest = vect_get_new_vect_var (type, kind, new_name);
4010
4011 return vec_dest;
4012 }
4013
4014 /* Function vect_grouped_store_supported.
4015
4016 Returns TRUE if interleave high and interleave low permutations
4017 are supported, and FALSE otherwise. */
4018
4019 bool
4020 vect_grouped_store_supported (tree vectype, unsigned HOST_WIDE_INT count)
4021 {
4022 enum machine_mode mode = TYPE_MODE (vectype);
4023
4024 /* vect_permute_store_chain requires the group size to be a power of two. */
4025 if (exact_log2 (count) == -1)
4026 {
4027 if (vect_print_dump_info (REPORT_DETAILS))
4028 fprintf (vect_dump, "the size of the group of accesses"
4029 " is not a power of 2");
4030 return false;
4031 }
4032
4033 /* Check that the permutation is supported. */
4034 if (VECTOR_MODE_P (mode))
4035 {
4036 unsigned int i, nelt = GET_MODE_NUNITS (mode);
4037 unsigned char *sel = XALLOCAVEC (unsigned char, nelt);
4038 for (i = 0; i < nelt / 2; i++)
4039 {
4040 sel[i * 2] = i;
4041 sel[i * 2 + 1] = i + nelt;
4042 }
4043 if (can_vec_perm_p (mode, false, sel))
4044 {
4045 for (i = 0; i < nelt; i++)
4046 sel[i] += nelt / 2;
4047 if (can_vec_perm_p (mode, false, sel))
4048 return true;
4049 }
4050 }
4051
4052 if (vect_print_dump_info (REPORT_DETAILS))
4053 fprintf (vect_dump, "interleave op not supported by target.");
4054 return false;
4055 }
4056
4057
4058 /* Return TRUE if vec_store_lanes is available for COUNT vectors of
4059 type VECTYPE. */
4060
4061 bool
4062 vect_store_lanes_supported (tree vectype, unsigned HOST_WIDE_INT count)
4063 {
4064 return vect_lanes_optab_supported_p ("vec_store_lanes",
4065 vec_store_lanes_optab,
4066 vectype, count);
4067 }
4068
4069
4070 /* Function vect_permute_store_chain.
4071
4072 Given a chain of interleaved stores in DR_CHAIN of LENGTH that must be
4073 a power of 2, generate interleave_high/low stmts to reorder the data
4074 correctly for the stores. Return the final references for stores in
4075 RESULT_CHAIN.
4076
4077 E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
4078 The input is 4 vectors each containing 8 elements. We assign a number to
4079 each element, the input sequence is:
4080
4081 1st vec: 0 1 2 3 4 5 6 7
4082 2nd vec: 8 9 10 11 12 13 14 15
4083 3rd vec: 16 17 18 19 20 21 22 23
4084 4th vec: 24 25 26 27 28 29 30 31
4085
4086 The output sequence should be:
4087
4088 1st vec: 0 8 16 24 1 9 17 25
4089 2nd vec: 2 10 18 26 3 11 19 27
4090 3rd vec: 4 12 20 28 5 13 21 30
4091 4th vec: 6 14 22 30 7 15 23 31
4092
4093 i.e., we interleave the contents of the four vectors in their order.
4094
4095 We use interleave_high/low instructions to create such output. The input of
4096 each interleave_high/low operation is two vectors:
4097 1st vec 2nd vec
4098 0 1 2 3 4 5 6 7
4099 the even elements of the result vector are obtained left-to-right from the
4100 high/low elements of the first vector. The odd elements of the result are
4101 obtained left-to-right from the high/low elements of the second vector.
4102 The output of interleave_high will be: 0 4 1 5
4103 and of interleave_low: 2 6 3 7
4104
4105
4106 The permutation is done in log LENGTH stages. In each stage interleave_high
4107 and interleave_low stmts are created for each pair of vectors in DR_CHAIN,
4108 where the first argument is taken from the first half of DR_CHAIN and the
4109 second argument from it's second half.
4110 In our example,
4111
4112 I1: interleave_high (1st vec, 3rd vec)
4113 I2: interleave_low (1st vec, 3rd vec)
4114 I3: interleave_high (2nd vec, 4th vec)
4115 I4: interleave_low (2nd vec, 4th vec)
4116
4117 The output for the first stage is:
4118
4119 I1: 0 16 1 17 2 18 3 19
4120 I2: 4 20 5 21 6 22 7 23
4121 I3: 8 24 9 25 10 26 11 27
4122 I4: 12 28 13 29 14 30 15 31
4123
4124 The output of the second stage, i.e. the final result is:
4125
4126 I1: 0 8 16 24 1 9 17 25
4127 I2: 2 10 18 26 3 11 19 27
4128 I3: 4 12 20 28 5 13 21 30
4129 I4: 6 14 22 30 7 15 23 31. */
4130
4131 void
4132 vect_permute_store_chain (VEC(tree,heap) *dr_chain,
4133 unsigned int length,
4134 gimple stmt,
4135 gimple_stmt_iterator *gsi,
4136 VEC(tree,heap) **result_chain)
4137 {
4138 tree perm_dest, vect1, vect2, high, low;
4139 gimple perm_stmt;
4140 tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
4141 tree perm_mask_low, perm_mask_high;
4142 unsigned int i, n;
4143 unsigned int j, nelt = TYPE_VECTOR_SUBPARTS (vectype);
4144 unsigned char *sel = XALLOCAVEC (unsigned char, nelt);
4145
4146 *result_chain = VEC_copy (tree, heap, dr_chain);
4147
4148 for (i = 0, n = nelt / 2; i < n; i++)
4149 {
4150 sel[i * 2] = i;
4151 sel[i * 2 + 1] = i + nelt;
4152 }
4153 perm_mask_high = vect_gen_perm_mask (vectype, sel);
4154 gcc_assert (perm_mask_high != NULL);
4155
4156 for (i = 0; i < nelt; i++)
4157 sel[i] += nelt / 2;
4158 perm_mask_low = vect_gen_perm_mask (vectype, sel);
4159 gcc_assert (perm_mask_low != NULL);
4160
4161 for (i = 0, n = exact_log2 (length); i < n; i++)
4162 {
4163 for (j = 0; j < length/2; j++)
4164 {
4165 vect1 = VEC_index (tree, dr_chain, j);
4166 vect2 = VEC_index (tree, dr_chain, j+length/2);
4167
4168 /* Create interleaving stmt:
4169 high = VEC_PERM_EXPR <vect1, vect2, {0, nelt, 1, nelt+1, ...}> */
4170 perm_dest = create_tmp_reg (vectype, "vect_inter_high");
4171 add_referenced_var (perm_dest);
4172 high = make_ssa_name (perm_dest, NULL);
4173 perm_stmt
4174 = gimple_build_assign_with_ops3 (VEC_PERM_EXPR, high,
4175 vect1, vect2, perm_mask_high);
4176 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
4177 VEC_replace (tree, *result_chain, 2*j, high);
4178
4179 /* Create interleaving stmt:
4180 low = VEC_PERM_EXPR <vect1, vect2, {nelt/2, nelt*3/2, nelt/2+1,
4181 nelt*3/2+1, ...}> */
4182 perm_dest = create_tmp_reg (vectype, "vect_inter_low");
4183 add_referenced_var (perm_dest);
4184 low = make_ssa_name (perm_dest, NULL);
4185 perm_stmt
4186 = gimple_build_assign_with_ops3 (VEC_PERM_EXPR, low,
4187 vect1, vect2, perm_mask_low);
4188 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
4189 VEC_replace (tree, *result_chain, 2*j+1, low);
4190 }
4191 dr_chain = VEC_copy (tree, heap, *result_chain);
4192 }
4193 }
4194
4195 /* Function vect_setup_realignment
4196
4197 This function is called when vectorizing an unaligned load using
4198 the dr_explicit_realign[_optimized] scheme.
4199 This function generates the following code at the loop prolog:
4200
4201 p = initial_addr;
4202 x msq_init = *(floor(p)); # prolog load
4203 realignment_token = call target_builtin;
4204 loop:
4205 x msq = phi (msq_init, ---)
4206
4207 The stmts marked with x are generated only for the case of
4208 dr_explicit_realign_optimized.
4209
4210 The code above sets up a new (vector) pointer, pointing to the first
4211 location accessed by STMT, and a "floor-aligned" load using that pointer.
4212 It also generates code to compute the "realignment-token" (if the relevant
4213 target hook was defined), and creates a phi-node at the loop-header bb
4214 whose arguments are the result of the prolog-load (created by this
4215 function) and the result of a load that takes place in the loop (to be
4216 created by the caller to this function).
4217
4218 For the case of dr_explicit_realign_optimized:
4219 The caller to this function uses the phi-result (msq) to create the
4220 realignment code inside the loop, and sets up the missing phi argument,
4221 as follows:
4222 loop:
4223 msq = phi (msq_init, lsq)
4224 lsq = *(floor(p')); # load in loop
4225 result = realign_load (msq, lsq, realignment_token);
4226
4227 For the case of dr_explicit_realign:
4228 loop:
4229 msq = *(floor(p)); # load in loop
4230 p' = p + (VS-1);
4231 lsq = *(floor(p')); # load in loop
4232 result = realign_load (msq, lsq, realignment_token);
4233
4234 Input:
4235 STMT - (scalar) load stmt to be vectorized. This load accesses
4236 a memory location that may be unaligned.
4237 BSI - place where new code is to be inserted.
4238 ALIGNMENT_SUPPORT_SCHEME - which of the two misalignment handling schemes
4239 is used.
4240
4241 Output:
4242 REALIGNMENT_TOKEN - the result of a call to the builtin_mask_for_load
4243 target hook, if defined.
4244 Return value - the result of the loop-header phi node. */
4245
4246 tree
4247 vect_setup_realignment (gimple stmt, gimple_stmt_iterator *gsi,
4248 tree *realignment_token,
4249 enum dr_alignment_support alignment_support_scheme,
4250 tree init_addr,
4251 struct loop **at_loop)
4252 {
4253 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4254 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4255 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4256 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
4257 struct loop *loop = NULL;
4258 edge pe = NULL;
4259 tree scalar_dest = gimple_assign_lhs (stmt);
4260 tree vec_dest;
4261 gimple inc;
4262 tree ptr;
4263 tree data_ref;
4264 gimple new_stmt;
4265 basic_block new_bb;
4266 tree msq_init = NULL_TREE;
4267 tree new_temp;
4268 gimple phi_stmt;
4269 tree msq = NULL_TREE;
4270 gimple_seq stmts = NULL;
4271 bool inv_p;
4272 bool compute_in_loop = false;
4273 bool nested_in_vect_loop = false;
4274 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
4275 struct loop *loop_for_initial_load = NULL;
4276
4277 if (loop_vinfo)
4278 {
4279 loop = LOOP_VINFO_LOOP (loop_vinfo);
4280 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
4281 }
4282
4283 gcc_assert (alignment_support_scheme == dr_explicit_realign
4284 || alignment_support_scheme == dr_explicit_realign_optimized);
4285
4286 /* We need to generate three things:
4287 1. the misalignment computation
4288 2. the extra vector load (for the optimized realignment scheme).
4289 3. the phi node for the two vectors from which the realignment is
4290 done (for the optimized realignment scheme). */
4291
4292 /* 1. Determine where to generate the misalignment computation.
4293
4294 If INIT_ADDR is NULL_TREE, this indicates that the misalignment
4295 calculation will be generated by this function, outside the loop (in the
4296 preheader). Otherwise, INIT_ADDR had already been computed for us by the
4297 caller, inside the loop.
4298
4299 Background: If the misalignment remains fixed throughout the iterations of
4300 the loop, then both realignment schemes are applicable, and also the
4301 misalignment computation can be done outside LOOP. This is because we are
4302 vectorizing LOOP, and so the memory accesses in LOOP advance in steps that
4303 are a multiple of VS (the Vector Size), and therefore the misalignment in
4304 different vectorized LOOP iterations is always the same.
4305 The problem arises only if the memory access is in an inner-loop nested
4306 inside LOOP, which is now being vectorized using outer-loop vectorization.
4307 This is the only case when the misalignment of the memory access may not
4308 remain fixed throughout the iterations of the inner-loop (as explained in
4309 detail in vect_supportable_dr_alignment). In this case, not only is the
4310 optimized realignment scheme not applicable, but also the misalignment
4311 computation (and generation of the realignment token that is passed to
4312 REALIGN_LOAD) have to be done inside the loop.
4313
4314 In short, INIT_ADDR indicates whether we are in a COMPUTE_IN_LOOP mode
4315 or not, which in turn determines if the misalignment is computed inside
4316 the inner-loop, or outside LOOP. */
4317
4318 if (init_addr != NULL_TREE || !loop_vinfo)
4319 {
4320 compute_in_loop = true;
4321 gcc_assert (alignment_support_scheme == dr_explicit_realign);
4322 }
4323
4324
4325 /* 2. Determine where to generate the extra vector load.
4326
4327 For the optimized realignment scheme, instead of generating two vector
4328 loads in each iteration, we generate a single extra vector load in the
4329 preheader of the loop, and in each iteration reuse the result of the
4330 vector load from the previous iteration. In case the memory access is in
4331 an inner-loop nested inside LOOP, which is now being vectorized using
4332 outer-loop vectorization, we need to determine whether this initial vector
4333 load should be generated at the preheader of the inner-loop, or can be
4334 generated at the preheader of LOOP. If the memory access has no evolution
4335 in LOOP, it can be generated in the preheader of LOOP. Otherwise, it has
4336 to be generated inside LOOP (in the preheader of the inner-loop). */
4337
4338 if (nested_in_vect_loop)
4339 {
4340 tree outerloop_step = STMT_VINFO_DR_STEP (stmt_info);
4341 bool invariant_in_outerloop =
4342 (tree_int_cst_compare (outerloop_step, size_zero_node) == 0);
4343 loop_for_initial_load = (invariant_in_outerloop ? loop : loop->inner);
4344 }
4345 else
4346 loop_for_initial_load = loop;
4347 if (at_loop)
4348 *at_loop = loop_for_initial_load;
4349
4350 if (loop_for_initial_load)
4351 pe = loop_preheader_edge (loop_for_initial_load);
4352
4353 /* 3. For the case of the optimized realignment, create the first vector
4354 load at the loop preheader. */
4355
4356 if (alignment_support_scheme == dr_explicit_realign_optimized)
4357 {
4358 /* Create msq_init = *(floor(p1)) in the loop preheader */
4359
4360 gcc_assert (!compute_in_loop);
4361 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4362 ptr = vect_create_data_ref_ptr (stmt, vectype, loop_for_initial_load,
4363 NULL_TREE, &init_addr, NULL, &inc,
4364 true, &inv_p);
4365 new_stmt = gimple_build_assign_with_ops
4366 (BIT_AND_EXPR, NULL_TREE, ptr,
4367 build_int_cst (TREE_TYPE (ptr),
4368 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
4369 new_temp = make_ssa_name (SSA_NAME_VAR (ptr), new_stmt);
4370 gimple_assign_set_lhs (new_stmt, new_temp);
4371 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
4372 gcc_assert (!new_bb);
4373 data_ref
4374 = build2 (MEM_REF, TREE_TYPE (vec_dest), new_temp,
4375 build_int_cst (reference_alias_ptr_type (DR_REF (dr)), 0));
4376 new_stmt = gimple_build_assign (vec_dest, data_ref);
4377 new_temp = make_ssa_name (vec_dest, new_stmt);
4378 gimple_assign_set_lhs (new_stmt, new_temp);
4379 if (pe)
4380 {
4381 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
4382 gcc_assert (!new_bb);
4383 }
4384 else
4385 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
4386
4387 msq_init = gimple_assign_lhs (new_stmt);
4388 }
4389
4390 /* 4. Create realignment token using a target builtin, if available.
4391 It is done either inside the containing loop, or before LOOP (as
4392 determined above). */
4393
4394 if (targetm.vectorize.builtin_mask_for_load)
4395 {
4396 tree builtin_decl;
4397
4398 /* Compute INIT_ADDR - the initial addressed accessed by this memref. */
4399 if (!init_addr)
4400 {
4401 /* Generate the INIT_ADDR computation outside LOOP. */
4402 init_addr = vect_create_addr_base_for_vector_ref (stmt, &stmts,
4403 NULL_TREE, loop);
4404 if (loop)
4405 {
4406 pe = loop_preheader_edge (loop);
4407 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
4408 gcc_assert (!new_bb);
4409 }
4410 else
4411 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
4412 }
4413
4414 builtin_decl = targetm.vectorize.builtin_mask_for_load ();
4415 new_stmt = gimple_build_call (builtin_decl, 1, init_addr);
4416 vec_dest =
4417 vect_create_destination_var (scalar_dest,
4418 gimple_call_return_type (new_stmt));
4419 new_temp = make_ssa_name (vec_dest, new_stmt);
4420 gimple_call_set_lhs (new_stmt, new_temp);
4421
4422 if (compute_in_loop)
4423 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
4424 else
4425 {
4426 /* Generate the misalignment computation outside LOOP. */
4427 pe = loop_preheader_edge (loop);
4428 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
4429 gcc_assert (!new_bb);
4430 }
4431
4432 *realignment_token = gimple_call_lhs (new_stmt);
4433
4434 /* The result of the CALL_EXPR to this builtin is determined from
4435 the value of the parameter and no global variables are touched
4436 which makes the builtin a "const" function. Requiring the
4437 builtin to have the "const" attribute makes it unnecessary
4438 to call mark_call_clobbered. */
4439 gcc_assert (TREE_READONLY (builtin_decl));
4440 }
4441
4442 if (alignment_support_scheme == dr_explicit_realign)
4443 return msq;
4444
4445 gcc_assert (!compute_in_loop);
4446 gcc_assert (alignment_support_scheme == dr_explicit_realign_optimized);
4447
4448
4449 /* 5. Create msq = phi <msq_init, lsq> in loop */
4450
4451 pe = loop_preheader_edge (containing_loop);
4452 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4453 msq = make_ssa_name (vec_dest, NULL);
4454 phi_stmt = create_phi_node (msq, containing_loop->header);
4455 SSA_NAME_DEF_STMT (msq) = phi_stmt;
4456 add_phi_arg (phi_stmt, msq_init, pe, UNKNOWN_LOCATION);
4457
4458 return msq;
4459 }
4460
4461
4462 /* Function vect_grouped_load_supported.
4463
4464 Returns TRUE if even and odd permutations are supported,
4465 and FALSE otherwise. */
4466
4467 bool
4468 vect_grouped_load_supported (tree vectype, unsigned HOST_WIDE_INT count)
4469 {
4470 enum machine_mode mode = TYPE_MODE (vectype);
4471
4472 /* vect_permute_load_chain requires the group size to be a power of two. */
4473 if (exact_log2 (count) == -1)
4474 {
4475 if (vect_print_dump_info (REPORT_DETAILS))
4476 fprintf (vect_dump, "the size of the group of accesses"
4477 " is not a power of 2");
4478 return false;
4479 }
4480
4481 /* Check that the permutation is supported. */
4482 if (VECTOR_MODE_P (mode))
4483 {
4484 unsigned int i, nelt = GET_MODE_NUNITS (mode);
4485 unsigned char *sel = XALLOCAVEC (unsigned char, nelt);
4486
4487 for (i = 0; i < nelt; i++)
4488 sel[i] = i * 2;
4489 if (can_vec_perm_p (mode, false, sel))
4490 {
4491 for (i = 0; i < nelt; i++)
4492 sel[i] = i * 2 + 1;
4493 if (can_vec_perm_p (mode, false, sel))
4494 return true;
4495 }
4496 }
4497
4498 if (vect_print_dump_info (REPORT_DETAILS))
4499 fprintf (vect_dump, "extract even/odd not supported by target");
4500 return false;
4501 }
4502
4503 /* Return TRUE if vec_load_lanes is available for COUNT vectors of
4504 type VECTYPE. */
4505
4506 bool
4507 vect_load_lanes_supported (tree vectype, unsigned HOST_WIDE_INT count)
4508 {
4509 return vect_lanes_optab_supported_p ("vec_load_lanes",
4510 vec_load_lanes_optab,
4511 vectype, count);
4512 }
4513
4514 /* Function vect_permute_load_chain.
4515
4516 Given a chain of interleaved loads in DR_CHAIN of LENGTH that must be
4517 a power of 2, generate extract_even/odd stmts to reorder the input data
4518 correctly. Return the final references for loads in RESULT_CHAIN.
4519
4520 E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
4521 The input is 4 vectors each containing 8 elements. We assign a number to each
4522 element, the input sequence is:
4523
4524 1st vec: 0 1 2 3 4 5 6 7
4525 2nd vec: 8 9 10 11 12 13 14 15
4526 3rd vec: 16 17 18 19 20 21 22 23
4527 4th vec: 24 25 26 27 28 29 30 31
4528
4529 The output sequence should be:
4530
4531 1st vec: 0 4 8 12 16 20 24 28
4532 2nd vec: 1 5 9 13 17 21 25 29
4533 3rd vec: 2 6 10 14 18 22 26 30
4534 4th vec: 3 7 11 15 19 23 27 31
4535
4536 i.e., the first output vector should contain the first elements of each
4537 interleaving group, etc.
4538
4539 We use extract_even/odd instructions to create such output. The input of
4540 each extract_even/odd operation is two vectors
4541 1st vec 2nd vec
4542 0 1 2 3 4 5 6 7
4543
4544 and the output is the vector of extracted even/odd elements. The output of
4545 extract_even will be: 0 2 4 6
4546 and of extract_odd: 1 3 5 7
4547
4548
4549 The permutation is done in log LENGTH stages. In each stage extract_even
4550 and extract_odd stmts are created for each pair of vectors in DR_CHAIN in
4551 their order. In our example,
4552
4553 E1: extract_even (1st vec, 2nd vec)
4554 E2: extract_odd (1st vec, 2nd vec)
4555 E3: extract_even (3rd vec, 4th vec)
4556 E4: extract_odd (3rd vec, 4th vec)
4557
4558 The output for the first stage will be:
4559
4560 E1: 0 2 4 6 8 10 12 14
4561 E2: 1 3 5 7 9 11 13 15
4562 E3: 16 18 20 22 24 26 28 30
4563 E4: 17 19 21 23 25 27 29 31
4564
4565 In order to proceed and create the correct sequence for the next stage (or
4566 for the correct output, if the second stage is the last one, as in our
4567 example), we first put the output of extract_even operation and then the
4568 output of extract_odd in RESULT_CHAIN (which is then copied to DR_CHAIN).
4569 The input for the second stage is:
4570
4571 1st vec (E1): 0 2 4 6 8 10 12 14
4572 2nd vec (E3): 16 18 20 22 24 26 28 30
4573 3rd vec (E2): 1 3 5 7 9 11 13 15
4574 4th vec (E4): 17 19 21 23 25 27 29 31
4575
4576 The output of the second stage:
4577
4578 E1: 0 4 8 12 16 20 24 28
4579 E2: 2 6 10 14 18 22 26 30
4580 E3: 1 5 9 13 17 21 25 29
4581 E4: 3 7 11 15 19 23 27 31
4582
4583 And RESULT_CHAIN after reordering:
4584
4585 1st vec (E1): 0 4 8 12 16 20 24 28
4586 2nd vec (E3): 1 5 9 13 17 21 25 29
4587 3rd vec (E2): 2 6 10 14 18 22 26 30
4588 4th vec (E4): 3 7 11 15 19 23 27 31. */
4589
4590 static void
4591 vect_permute_load_chain (VEC(tree,heap) *dr_chain,
4592 unsigned int length,
4593 gimple stmt,
4594 gimple_stmt_iterator *gsi,
4595 VEC(tree,heap) **result_chain)
4596 {
4597 tree perm_dest, data_ref, first_vect, second_vect;
4598 tree perm_mask_even, perm_mask_odd;
4599 gimple perm_stmt;
4600 tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
4601 unsigned int i, j, log_length = exact_log2 (length);
4602 unsigned nelt = TYPE_VECTOR_SUBPARTS (vectype);
4603 unsigned char *sel = XALLOCAVEC (unsigned char, nelt);
4604
4605 *result_chain = VEC_copy (tree, heap, dr_chain);
4606
4607 for (i = 0; i < nelt; ++i)
4608 sel[i] = i * 2;
4609 perm_mask_even = vect_gen_perm_mask (vectype, sel);
4610 gcc_assert (perm_mask_even != NULL);
4611
4612 for (i = 0; i < nelt; ++i)
4613 sel[i] = i * 2 + 1;
4614 perm_mask_odd = vect_gen_perm_mask (vectype, sel);
4615 gcc_assert (perm_mask_odd != NULL);
4616
4617 for (i = 0; i < log_length; i++)
4618 {
4619 for (j = 0; j < length; j += 2)
4620 {
4621 first_vect = VEC_index (tree, dr_chain, j);
4622 second_vect = VEC_index (tree, dr_chain, j+1);
4623
4624 /* data_ref = permute_even (first_data_ref, second_data_ref); */
4625 perm_dest = create_tmp_reg (vectype, "vect_perm_even");
4626 add_referenced_var (perm_dest);
4627
4628 perm_stmt = gimple_build_assign_with_ops3 (VEC_PERM_EXPR, perm_dest,
4629 first_vect, second_vect,
4630 perm_mask_even);
4631
4632 data_ref = make_ssa_name (perm_dest, perm_stmt);
4633 gimple_assign_set_lhs (perm_stmt, data_ref);
4634 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
4635
4636 VEC_replace (tree, *result_chain, j/2, data_ref);
4637
4638 /* data_ref = permute_odd (first_data_ref, second_data_ref); */
4639 perm_dest = create_tmp_reg (vectype, "vect_perm_odd");
4640 add_referenced_var (perm_dest);
4641
4642 perm_stmt = gimple_build_assign_with_ops3 (VEC_PERM_EXPR, perm_dest,
4643 first_vect, second_vect,
4644 perm_mask_odd);
4645
4646 data_ref = make_ssa_name (perm_dest, perm_stmt);
4647 gimple_assign_set_lhs (perm_stmt, data_ref);
4648 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
4649
4650 VEC_replace (tree, *result_chain, j/2+length/2, data_ref);
4651 }
4652 dr_chain = VEC_copy (tree, heap, *result_chain);
4653 }
4654 }
4655
4656
4657 /* Function vect_transform_grouped_load.
4658
4659 Given a chain of input interleaved data-refs (in DR_CHAIN), build statements
4660 to perform their permutation and ascribe the result vectorized statements to
4661 the scalar statements.
4662 */
4663
4664 void
4665 vect_transform_grouped_load (gimple stmt, VEC(tree,heap) *dr_chain, int size,
4666 gimple_stmt_iterator *gsi)
4667 {
4668 VEC(tree,heap) *result_chain = NULL;
4669
4670 /* DR_CHAIN contains input data-refs that are a part of the interleaving.
4671 RESULT_CHAIN is the output of vect_permute_load_chain, it contains permuted
4672 vectors, that are ready for vector computation. */
4673 result_chain = VEC_alloc (tree, heap, size);
4674 vect_permute_load_chain (dr_chain, size, stmt, gsi, &result_chain);
4675 vect_record_grouped_load_vectors (stmt, result_chain);
4676 VEC_free (tree, heap, result_chain);
4677 }
4678
4679 /* RESULT_CHAIN contains the output of a group of grouped loads that were
4680 generated as part of the vectorization of STMT. Assign the statement
4681 for each vector to the associated scalar statement. */
4682
4683 void
4684 vect_record_grouped_load_vectors (gimple stmt, VEC(tree,heap) *result_chain)
4685 {
4686 gimple first_stmt = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt));
4687 gimple next_stmt, new_stmt;
4688 unsigned int i, gap_count;
4689 tree tmp_data_ref;
4690
4691 /* Put a permuted data-ref in the VECTORIZED_STMT field.
4692 Since we scan the chain starting from it's first node, their order
4693 corresponds the order of data-refs in RESULT_CHAIN. */
4694 next_stmt = first_stmt;
4695 gap_count = 1;
4696 FOR_EACH_VEC_ELT (tree, result_chain, i, tmp_data_ref)
4697 {
4698 if (!next_stmt)
4699 break;
4700
4701 /* Skip the gaps. Loads created for the gaps will be removed by dead
4702 code elimination pass later. No need to check for the first stmt in
4703 the group, since it always exists.
4704 GROUP_GAP is the number of steps in elements from the previous
4705 access (if there is no gap GROUP_GAP is 1). We skip loads that
4706 correspond to the gaps. */
4707 if (next_stmt != first_stmt
4708 && gap_count < GROUP_GAP (vinfo_for_stmt (next_stmt)))
4709 {
4710 gap_count++;
4711 continue;
4712 }
4713
4714 while (next_stmt)
4715 {
4716 new_stmt = SSA_NAME_DEF_STMT (tmp_data_ref);
4717 /* We assume that if VEC_STMT is not NULL, this is a case of multiple
4718 copies, and we put the new vector statement in the first available
4719 RELATED_STMT. */
4720 if (!STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt)))
4721 STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt)) = new_stmt;
4722 else
4723 {
4724 if (!GROUP_SAME_DR_STMT (vinfo_for_stmt (next_stmt)))
4725 {
4726 gimple prev_stmt =
4727 STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt));
4728 gimple rel_stmt =
4729 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (prev_stmt));
4730 while (rel_stmt)
4731 {
4732 prev_stmt = rel_stmt;
4733 rel_stmt =
4734 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (rel_stmt));
4735 }
4736
4737 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (prev_stmt)) =
4738 new_stmt;
4739 }
4740 }
4741
4742 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
4743 gap_count = 1;
4744 /* If NEXT_STMT accesses the same DR as the previous statement,
4745 put the same TMP_DATA_REF as its vectorized statement; otherwise
4746 get the next data-ref from RESULT_CHAIN. */
4747 if (!next_stmt || !GROUP_SAME_DR_STMT (vinfo_for_stmt (next_stmt)))
4748 break;
4749 }
4750 }
4751 }
4752
4753 /* Function vect_force_dr_alignment_p.
4754
4755 Returns whether the alignment of a DECL can be forced to be aligned
4756 on ALIGNMENT bit boundary. */
4757
4758 bool
4759 vect_can_force_dr_alignment_p (const_tree decl, unsigned int alignment)
4760 {
4761 if (TREE_CODE (decl) != VAR_DECL)
4762 return false;
4763
4764 /* We cannot change alignment of common or external symbols as another
4765 translation unit may contain a definition with lower alignment.
4766 The rules of common symbol linking mean that the definition
4767 will override the common symbol. */
4768 if (DECL_EXTERNAL (decl)
4769 || DECL_COMMON (decl))
4770 return false;
4771
4772 if (TREE_ASM_WRITTEN (decl))
4773 return false;
4774
4775 /* Do not override the alignment as specified by the ABI when the used
4776 attribute is set. */
4777 if (DECL_PRESERVE_P (decl))
4778 return false;
4779
4780 if (TREE_STATIC (decl))
4781 return (alignment <= MAX_OFILE_ALIGNMENT);
4782 else
4783 return (alignment <= MAX_STACK_ALIGNMENT);
4784 }
4785
4786
4787 /* Return whether the data reference DR is supported with respect to its
4788 alignment.
4789 If CHECK_ALIGNED_ACCESSES is TRUE, check if the access is supported even
4790 it is aligned, i.e., check if it is possible to vectorize it with different
4791 alignment. */
4792
4793 enum dr_alignment_support
4794 vect_supportable_dr_alignment (struct data_reference *dr,
4795 bool check_aligned_accesses)
4796 {
4797 gimple stmt = DR_STMT (dr);
4798 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4799 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4800 enum machine_mode mode = TYPE_MODE (vectype);
4801 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4802 struct loop *vect_loop = NULL;
4803 bool nested_in_vect_loop = false;
4804
4805 if (aligned_access_p (dr) && !check_aligned_accesses)
4806 return dr_aligned;
4807
4808 if (loop_vinfo)
4809 {
4810 vect_loop = LOOP_VINFO_LOOP (loop_vinfo);
4811 nested_in_vect_loop = nested_in_vect_loop_p (vect_loop, stmt);
4812 }
4813
4814 /* Possibly unaligned access. */
4815
4816 /* We can choose between using the implicit realignment scheme (generating
4817 a misaligned_move stmt) and the explicit realignment scheme (generating
4818 aligned loads with a REALIGN_LOAD). There are two variants to the
4819 explicit realignment scheme: optimized, and unoptimized.
4820 We can optimize the realignment only if the step between consecutive
4821 vector loads is equal to the vector size. Since the vector memory
4822 accesses advance in steps of VS (Vector Size) in the vectorized loop, it
4823 is guaranteed that the misalignment amount remains the same throughout the
4824 execution of the vectorized loop. Therefore, we can create the
4825 "realignment token" (the permutation mask that is passed to REALIGN_LOAD)
4826 at the loop preheader.
4827
4828 However, in the case of outer-loop vectorization, when vectorizing a
4829 memory access in the inner-loop nested within the LOOP that is now being
4830 vectorized, while it is guaranteed that the misalignment of the
4831 vectorized memory access will remain the same in different outer-loop
4832 iterations, it is *not* guaranteed that is will remain the same throughout
4833 the execution of the inner-loop. This is because the inner-loop advances
4834 with the original scalar step (and not in steps of VS). If the inner-loop
4835 step happens to be a multiple of VS, then the misalignment remains fixed
4836 and we can use the optimized realignment scheme. For example:
4837
4838 for (i=0; i<N; i++)
4839 for (j=0; j<M; j++)
4840 s += a[i+j];
4841
4842 When vectorizing the i-loop in the above example, the step between
4843 consecutive vector loads is 1, and so the misalignment does not remain
4844 fixed across the execution of the inner-loop, and the realignment cannot
4845 be optimized (as illustrated in the following pseudo vectorized loop):
4846
4847 for (i=0; i<N; i+=4)
4848 for (j=0; j<M; j++){
4849 vs += vp[i+j]; // misalignment of &vp[i+j] is {0,1,2,3,0,1,2,3,...}
4850 // when j is {0,1,2,3,4,5,6,7,...} respectively.
4851 // (assuming that we start from an aligned address).
4852 }
4853
4854 We therefore have to use the unoptimized realignment scheme:
4855
4856 for (i=0; i<N; i+=4)
4857 for (j=k; j<M; j+=4)
4858 vs += vp[i+j]; // misalignment of &vp[i+j] is always k (assuming
4859 // that the misalignment of the initial address is
4860 // 0).
4861
4862 The loop can then be vectorized as follows:
4863
4864 for (k=0; k<4; k++){
4865 rt = get_realignment_token (&vp[k]);
4866 for (i=0; i<N; i+=4){
4867 v1 = vp[i+k];
4868 for (j=k; j<M; j+=4){
4869 v2 = vp[i+j+VS-1];
4870 va = REALIGN_LOAD <v1,v2,rt>;
4871 vs += va;
4872 v1 = v2;
4873 }
4874 }
4875 } */
4876
4877 if (DR_IS_READ (dr))
4878 {
4879 bool is_packed = false;
4880 tree type = (TREE_TYPE (DR_REF (dr)));
4881
4882 if (optab_handler (vec_realign_load_optab, mode) != CODE_FOR_nothing
4883 && (!targetm.vectorize.builtin_mask_for_load
4884 || targetm.vectorize.builtin_mask_for_load ()))
4885 {
4886 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4887 if ((nested_in_vect_loop
4888 && (TREE_INT_CST_LOW (DR_STEP (dr))
4889 != GET_MODE_SIZE (TYPE_MODE (vectype))))
4890 || !loop_vinfo)
4891 return dr_explicit_realign;
4892 else
4893 return dr_explicit_realign_optimized;
4894 }
4895 if (!known_alignment_for_access_p (dr))
4896 is_packed = not_size_aligned (DR_REF (dr));
4897
4898 if (targetm.vectorize.
4899 support_vector_misalignment (mode, type,
4900 DR_MISALIGNMENT (dr), is_packed))
4901 /* Can't software pipeline the loads, but can at least do them. */
4902 return dr_unaligned_supported;
4903 }
4904 else
4905 {
4906 bool is_packed = false;
4907 tree type = (TREE_TYPE (DR_REF (dr)));
4908
4909 if (!known_alignment_for_access_p (dr))
4910 is_packed = not_size_aligned (DR_REF (dr));
4911
4912 if (targetm.vectorize.
4913 support_vector_misalignment (mode, type,
4914 DR_MISALIGNMENT (dr), is_packed))
4915 return dr_unaligned_supported;
4916 }
4917
4918 /* Unsupported. */
4919 return dr_unaligned_unsupported;
4920 }