tree-vect-analyze.c (vect_analyze_data_ref_dependence): DRs whose dependence-distance...
[gcc.git] / gcc / tree-vect-analyze.c
1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2003,2004,2005 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
10 version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to the Free
19 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
20 02111-1307, USA. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "ggc.h"
27 #include "tree.h"
28 #include "basic-block.h"
29 #include "diagnostic.h"
30 #include "tree-flow.h"
31 #include "tree-dump.h"
32 #include "timevar.h"
33 #include "cfgloop.h"
34 #include "expr.h"
35 #include "optabs.h"
36 #include "tree-chrec.h"
37 #include "tree-data-ref.h"
38 #include "tree-scalar-evolution.h"
39 #include "tree-vectorizer.h"
40
41 /* Main analysis functions. */
42 static loop_vec_info vect_analyze_loop_form (struct loop *);
43 static bool vect_analyze_data_refs (loop_vec_info);
44 static bool vect_mark_stmts_to_be_vectorized (loop_vec_info);
45 static void vect_analyze_scalar_cycles (loop_vec_info);
46 static bool vect_analyze_data_ref_accesses (loop_vec_info);
47 static bool vect_analyze_data_ref_dependences (loop_vec_info);
48 static bool vect_analyze_data_refs_alignment (loop_vec_info);
49 static bool vect_compute_data_refs_alignment (loop_vec_info);
50 static void vect_enhance_data_refs_alignment (loop_vec_info);
51 static bool vect_analyze_operations (loop_vec_info);
52 static bool vect_determine_vectorization_factor (loop_vec_info);
53
54 /* Utility functions for the analyses. */
55 static bool exist_non_indexing_operands_for_use_p (tree, tree);
56 static void vect_mark_relevant (VEC(tree,heap) **, tree, bool, bool);
57 static bool vect_stmt_relevant_p (tree, loop_vec_info, bool *, bool *);
58 static tree vect_get_loop_niters (struct loop *, tree *);
59 static bool vect_analyze_data_ref_dependence
60 (struct data_reference *, struct data_reference *, loop_vec_info);
61 static bool vect_compute_data_ref_alignment (struct data_reference *);
62 static bool vect_analyze_data_ref_access (struct data_reference *);
63 static struct data_reference * vect_analyze_pointer_ref_access
64 (tree, tree, bool, tree, tree *, tree *);
65 static bool vect_can_advance_ivs_p (loop_vec_info);
66 static tree vect_get_ptr_offset (tree, tree, tree *);
67 static bool vect_analyze_offset_expr (tree, struct loop *, tree, tree *,
68 tree *, tree *);
69 static bool vect_base_addr_differ_p (struct data_reference *,
70 struct data_reference *drb, bool *);
71 static tree vect_object_analysis (tree, tree, bool, tree,
72 struct data_reference **, tree *, tree *,
73 tree *, bool *, tree *, struct ptr_info_def **,
74 subvar_t *);
75 static tree vect_address_analysis (tree, tree, bool, tree,
76 struct data_reference *, tree *, tree *,
77 tree *, bool *);
78
79
80 /* Function vect_get_ptr_offset
81
82 Compute the OFFSET modulo vector-type alignment of pointer REF in bits. */
83
84 static tree
85 vect_get_ptr_offset (tree ref ATTRIBUTE_UNUSED,
86 tree vectype ATTRIBUTE_UNUSED,
87 tree *offset ATTRIBUTE_UNUSED)
88 {
89 /* TODO: Use alignment information. */
90 return NULL_TREE;
91 }
92
93
94 /* Function vect_analyze_offset_expr
95
96 Given an offset expression EXPR received from get_inner_reference, analyze
97 it and create an expression for INITIAL_OFFSET by substituting the variables
98 of EXPR with initial_condition of the corresponding access_fn in the loop.
99 E.g.,
100 for i
101 for (j = 3; j < N; j++)
102 a[j].b[i][j] = 0;
103
104 For a[j].b[i][j], EXPR will be 'i * C_i + j * C_j + C'. 'i' cannot be
105 substituted, since its access_fn in the inner loop is i. 'j' will be
106 substituted with 3. An INITIAL_OFFSET will be 'i * C_i + C`', where
107 C` = 3 * C_j + C.
108
109 Compute MISALIGN (the misalignment of the data reference initial access from
110 its base) if possible. Misalignment can be calculated only if all the
111 variables can be substituted with constants, or if a variable is multiplied
112 by a multiple of VECTYPE_ALIGNMENT. In the above example, since 'i' cannot
113 be substituted, MISALIGN will be NULL_TREE in case that C_i is not a multiple
114 of VECTYPE_ALIGNMENT, and C` otherwise. (We perform MISALIGN modulo
115 VECTYPE_ALIGNMENT computation in the caller of this function).
116
117 STEP is an evolution of the data reference in this loop in bytes.
118 In the above example, STEP is C_j.
119
120 Return FALSE, if the analysis fails, e.g., there is no access_fn for a
121 variable. In this case, all the outputs (INITIAL_OFFSET, MISALIGN and STEP)
122 are NULL_TREEs. Otherwise, return TRUE.
123
124 */
125
126 static bool
127 vect_analyze_offset_expr (tree expr,
128 struct loop *loop,
129 tree vectype_alignment,
130 tree *initial_offset,
131 tree *misalign,
132 tree *step)
133 {
134 tree oprnd0;
135 tree oprnd1;
136 tree left_offset = ssize_int (0);
137 tree right_offset = ssize_int (0);
138 tree left_misalign = ssize_int (0);
139 tree right_misalign = ssize_int (0);
140 tree left_step = ssize_int (0);
141 tree right_step = ssize_int (0);
142 enum tree_code code;
143 tree init, evolution;
144
145 *step = NULL_TREE;
146 *misalign = NULL_TREE;
147 *initial_offset = NULL_TREE;
148
149 /* Strip conversions that don't narrow the mode. */
150 expr = vect_strip_conversion (expr);
151 if (!expr)
152 return false;
153
154 /* Stop conditions:
155 1. Constant. */
156 if (TREE_CODE (expr) == INTEGER_CST)
157 {
158 *initial_offset = fold_convert (ssizetype, expr);
159 *misalign = fold_convert (ssizetype, expr);
160 *step = ssize_int (0);
161 return true;
162 }
163
164 /* 2. Variable. Try to substitute with initial_condition of the corresponding
165 access_fn in the current loop. */
166 if (SSA_VAR_P (expr))
167 {
168 tree access_fn = analyze_scalar_evolution (loop, expr);
169
170 if (access_fn == chrec_dont_know)
171 /* No access_fn. */
172 return false;
173
174 init = initial_condition_in_loop_num (access_fn, loop->num);
175 if (init == expr && !expr_invariant_in_loop_p (loop, init))
176 /* Not enough information: may be not loop invariant.
177 E.g., for a[b[i]], we get a[D], where D=b[i]. EXPR is D, its
178 initial_condition is D, but it depends on i - loop's induction
179 variable. */
180 return false;
181
182 evolution = evolution_part_in_loop_num (access_fn, loop->num);
183 if (evolution && TREE_CODE (evolution) != INTEGER_CST)
184 /* Evolution is not constant. */
185 return false;
186
187 if (TREE_CODE (init) == INTEGER_CST)
188 *misalign = fold_convert (ssizetype, init);
189 else
190 /* Not constant, misalignment cannot be calculated. */
191 *misalign = NULL_TREE;
192
193 *initial_offset = fold_convert (ssizetype, init);
194
195 *step = evolution ? fold_convert (ssizetype, evolution) : ssize_int (0);
196 return true;
197 }
198
199 /* Recursive computation. */
200 if (!BINARY_CLASS_P (expr))
201 {
202 /* We expect to get binary expressions (PLUS/MINUS and MULT). */
203 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
204 {
205 fprintf (vect_dump, "Not binary expression ");
206 print_generic_expr (vect_dump, expr, TDF_SLIM);
207 }
208 return false;
209 }
210 oprnd0 = TREE_OPERAND (expr, 0);
211 oprnd1 = TREE_OPERAND (expr, 1);
212
213 if (!vect_analyze_offset_expr (oprnd0, loop, vectype_alignment, &left_offset,
214 &left_misalign, &left_step)
215 || !vect_analyze_offset_expr (oprnd1, loop, vectype_alignment,
216 &right_offset, &right_misalign, &right_step))
217 return false;
218
219 /* The type of the operation: plus, minus or mult. */
220 code = TREE_CODE (expr);
221 switch (code)
222 {
223 case MULT_EXPR:
224 if (TREE_CODE (right_offset) != INTEGER_CST)
225 /* RIGHT_OFFSET can be not constant. For example, for arrays of variable
226 sized types.
227 FORNOW: We don't support such cases. */
228 return false;
229
230 /* Strip conversions that don't narrow the mode. */
231 left_offset = vect_strip_conversion (left_offset);
232 if (!left_offset)
233 return false;
234 /* Misalignment computation. */
235 if (SSA_VAR_P (left_offset))
236 {
237 /* If the left side contains variables that can't be substituted with
238 constants, we check if the right side is a multiple of ALIGNMENT.
239 */
240 if (integer_zerop (size_binop (TRUNC_MOD_EXPR, right_offset,
241 fold_convert (ssizetype, vectype_alignment))))
242 *misalign = ssize_int (0);
243 else
244 /* If the remainder is not zero or the right side isn't constant,
245 we can't compute misalignment. */
246 *misalign = NULL_TREE;
247 }
248 else
249 {
250 /* The left operand was successfully substituted with constant. */
251 if (left_misalign)
252 /* In case of EXPR '(i * C1 + j) * C2', LEFT_MISALIGN is
253 NULL_TREE. */
254 *misalign = size_binop (code, left_misalign, right_misalign);
255 else
256 *misalign = NULL_TREE;
257 }
258
259 /* Step calculation. */
260 /* Multiply the step by the right operand. */
261 *step = size_binop (MULT_EXPR, left_step, right_offset);
262 break;
263
264 case PLUS_EXPR:
265 case MINUS_EXPR:
266 /* Combine the recursive calculations for step and misalignment. */
267 *step = size_binop (code, left_step, right_step);
268
269 if (left_misalign && right_misalign)
270 *misalign = size_binop (code, left_misalign, right_misalign);
271 else
272 *misalign = NULL_TREE;
273
274 break;
275
276 default:
277 gcc_unreachable ();
278 }
279
280 /* Compute offset. */
281 *initial_offset = fold_convert (ssizetype,
282 fold (build2 (code, TREE_TYPE (left_offset),
283 left_offset,
284 right_offset)));
285 return true;
286 }
287
288
289 /* Function vect_determine_vectorization_factor
290
291 Determine the vectorization factor (VF). VF is the number of data elements
292 that are operated upon in parallel in a single iteration of the vectorized
293 loop. For example, when vectorizing a loop that operates on 4byte elements,
294 on a target with vector size (VS) 16byte, the VF is set to 4, since 4
295 elements can fit in a single vector register.
296
297 We currently support vectorization of loops in which all types operated upon
298 are of the same size. Therefore this function currently sets VF according to
299 the size of the types operated upon, and fails if there are multiple sizes
300 in the loop.
301
302 VF is also the factor by which the loop iterations are strip-mined, e.g.:
303 original loop:
304 for (i=0; i<N; i++){
305 a[i] = b[i] + c[i];
306 }
307
308 vectorized loop:
309 for (i=0; i<N; i+=VF){
310 a[i:VF] = b[i:VF] + c[i:VF];
311 }
312 */
313
314 static bool
315 vect_determine_vectorization_factor (loop_vec_info loop_vinfo)
316 {
317 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
318 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
319 int nbbs = loop->num_nodes;
320 block_stmt_iterator si;
321 unsigned int vectorization_factor = 0;
322 int i;
323 tree scalar_type;
324
325 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
326 fprintf (vect_dump, "=== vect_determine_vectorization_factor ===");
327
328 for (i = 0; i < nbbs; i++)
329 {
330 basic_block bb = bbs[i];
331
332 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
333 {
334 tree stmt = bsi_stmt (si);
335 unsigned int nunits;
336 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
337 tree vectype;
338
339 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
340 {
341 fprintf (vect_dump, "==> examining statement: ");
342 print_generic_expr (vect_dump, stmt, TDF_SLIM);
343 }
344
345 gcc_assert (stmt_info);
346 /* skip stmts which do not need to be vectorized. */
347 if (!STMT_VINFO_RELEVANT_P (stmt_info)
348 && !STMT_VINFO_LIVE_P (stmt_info))
349 {
350 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
351 fprintf (vect_dump, "skip.");
352 continue;
353 }
354
355 if (VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt))))
356 {
357 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
358 LOOP_LOC (loop_vinfo)))
359 {
360 fprintf (vect_dump, "not vectorized: vector stmt in loop:");
361 print_generic_expr (vect_dump, stmt, TDF_SLIM);
362 }
363 return false;
364 }
365
366 if (STMT_VINFO_DATA_REF (stmt_info))
367 scalar_type = TREE_TYPE (DR_REF (STMT_VINFO_DATA_REF (stmt_info)));
368 else if (TREE_CODE (stmt) == MODIFY_EXPR)
369 scalar_type = TREE_TYPE (TREE_OPERAND (stmt, 0));
370 else
371 scalar_type = TREE_TYPE (stmt);
372
373 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
374 {
375 fprintf (vect_dump, "get vectype for scalar type: ");
376 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
377 }
378
379 vectype = get_vectype_for_scalar_type (scalar_type);
380 if (!vectype)
381 {
382 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
383 LOOP_LOC (loop_vinfo)))
384 {
385 fprintf (vect_dump, "not vectorized: unsupported data-type ");
386 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
387 }
388 return false;
389 }
390 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
391 {
392 fprintf (vect_dump, "vectype: ");
393 print_generic_expr (vect_dump, vectype, TDF_SLIM);
394 }
395 STMT_VINFO_VECTYPE (stmt_info) = vectype;
396
397 nunits = TYPE_VECTOR_SUBPARTS (vectype);
398 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
399 fprintf (vect_dump, "nunits = %d", nunits);
400
401 if (vectorization_factor)
402 {
403 /* FORNOW: don't allow mixed units.
404 This restriction will be relaxed in the future. */
405 if (nunits != vectorization_factor)
406 {
407 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
408 LOOP_LOC (loop_vinfo)))
409 fprintf (vect_dump, "not vectorized: mixed data-types");
410 return false;
411 }
412 }
413 else
414 vectorization_factor = nunits;
415
416 #ifdef ENABLE_CHECKING
417 gcc_assert (GET_MODE_SIZE (TYPE_MODE (scalar_type))
418 * vectorization_factor == UNITS_PER_SIMD_WORD);
419 #endif
420 }
421 }
422
423 /* TODO: Analyze cost. Decide if worth while to vectorize. */
424
425 if (vectorization_factor <= 1)
426 {
427 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
428 LOOP_LOC (loop_vinfo)))
429 fprintf (vect_dump, "not vectorized: unsupported data-type");
430 return false;
431 }
432 LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
433
434 return true;
435 }
436
437
438 /* Function vect_analyze_operations.
439
440 Scan the loop stmts and make sure they are all vectorizable. */
441
442 static bool
443 vect_analyze_operations (loop_vec_info loop_vinfo)
444 {
445 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
446 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
447 int nbbs = loop->num_nodes;
448 block_stmt_iterator si;
449 unsigned int vectorization_factor = 0;
450 int i;
451 bool ok;
452 tree phi;
453 stmt_vec_info stmt_info;
454 bool need_to_vectorize = false;
455
456 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
457 fprintf (vect_dump, "=== vect_analyze_operations ===");
458
459 gcc_assert (LOOP_VINFO_VECT_FACTOR (loop_vinfo));
460 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
461
462 for (i = 0; i < nbbs; i++)
463 {
464 basic_block bb = bbs[i];
465
466 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
467 {
468 stmt_info = vinfo_for_stmt (phi);
469 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
470 {
471 fprintf (vect_dump, "examining phi: ");
472 print_generic_expr (vect_dump, phi, TDF_SLIM);
473 }
474
475 gcc_assert (stmt_info);
476
477 if (STMT_VINFO_LIVE_P (stmt_info))
478 {
479 /* FORNOW: not yet supported. */
480 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
481 LOOP_LOC (loop_vinfo)))
482 fprintf (vect_dump, "not vectorized: value used after loop.");
483 return false;
484 }
485
486 gcc_assert (!STMT_VINFO_RELEVANT_P (stmt_info));
487 }
488
489 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
490 {
491 tree stmt = bsi_stmt (si);
492 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
493
494 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
495 {
496 fprintf (vect_dump, "==> examining statement: ");
497 print_generic_expr (vect_dump, stmt, TDF_SLIM);
498 }
499
500 gcc_assert (stmt_info);
501
502 /* skip stmts which do not need to be vectorized.
503 this is expected to include:
504 - the COND_EXPR which is the loop exit condition
505 - any LABEL_EXPRs in the loop
506 - computations that are used only for array indexing or loop
507 control */
508
509 if (!STMT_VINFO_RELEVANT_P (stmt_info)
510 && !STMT_VINFO_LIVE_P (stmt_info))
511 {
512 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
513 fprintf (vect_dump, "irrelevant.");
514 continue;
515 }
516
517 if (STMT_VINFO_RELEVANT_P (stmt_info))
518 {
519 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt))));
520 gcc_assert (STMT_VINFO_VECTYPE (stmt_info));
521
522 ok = (vectorizable_operation (stmt, NULL, NULL)
523 || vectorizable_assignment (stmt, NULL, NULL)
524 || vectorizable_load (stmt, NULL, NULL)
525 || vectorizable_store (stmt, NULL, NULL)
526 || vectorizable_condition (stmt, NULL, NULL));
527
528 if (!ok)
529 {
530 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
531 LOOP_LOC (loop_vinfo)))
532 {
533 fprintf (vect_dump,
534 "not vectorized: relevant stmt not supported: ");
535 print_generic_expr (vect_dump, stmt, TDF_SLIM);
536 }
537 return false;
538 }
539 need_to_vectorize = true;
540 }
541
542 if (STMT_VINFO_LIVE_P (stmt_info))
543 {
544 ok = vectorizable_live_operation (stmt, NULL, NULL);
545
546 if (!ok)
547 {
548 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
549 LOOP_LOC (loop_vinfo)))
550 {
551 fprintf (vect_dump,
552 "not vectorized: live stmt not supported: ");
553 print_generic_expr (vect_dump, stmt, TDF_SLIM);
554 }
555 return false;
556 }
557 }
558 } /* stmts in bb */
559 } /* bbs */
560
561 /* TODO: Analyze cost. Decide if worth while to vectorize. */
562
563 /* All operations in the loop are either irrelevant (deal with loop
564 control, or dead), or only used outside the loop and can be moved
565 out of the loop (e.g. invariants, inductions). The loop can be
566 optimized away by scalar optimizations. We're better off not
567 touching this loop. */
568 if (!need_to_vectorize)
569 {
570 if (vect_print_dump_info (REPORT_DETAILS, LOOP_LOC (loop_vinfo)))
571 fprintf (vect_dump,
572 "All the computation can be taken out of the loop.");
573 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
574 LOOP_LOC (loop_vinfo)))
575 fprintf (vect_dump,
576 "not vectorized: redundant loop. no profit to vectorize.");
577 return false;
578 }
579
580 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
581 && vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
582 fprintf (vect_dump,
583 "vectorization_factor = %d, niters = " HOST_WIDE_INT_PRINT_DEC,
584 vectorization_factor, LOOP_VINFO_INT_NITERS (loop_vinfo));
585
586 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
587 && LOOP_VINFO_INT_NITERS (loop_vinfo) < vectorization_factor)
588 {
589 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
590 LOOP_LOC (loop_vinfo)))
591 fprintf (vect_dump, "not vectorized: iteration count too small.");
592 return false;
593 }
594
595 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
596 || LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0)
597 {
598 if (vect_print_dump_info (REPORT_DETAILS, LOOP_LOC (loop_vinfo)))
599 fprintf (vect_dump, "epilog loop required.");
600 if (!vect_can_advance_ivs_p (loop_vinfo))
601 {
602 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
603 LOOP_LOC (loop_vinfo)))
604 fprintf (vect_dump,
605 "not vectorized: can't create epilog loop 1.");
606 return false;
607 }
608 if (!slpeel_can_duplicate_loop_p (loop, loop->single_exit))
609 {
610 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
611 LOOP_LOC (loop_vinfo)))
612 fprintf (vect_dump,
613 "not vectorized: can't create epilog loop 2.");
614 return false;
615 }
616 }
617
618 return true;
619 }
620
621
622 /* Function exist_non_indexing_operands_for_use_p
623
624 USE is one of the uses attached to STMT. Check if USE is
625 used in STMT for anything other than indexing an array. */
626
627 static bool
628 exist_non_indexing_operands_for_use_p (tree use, tree stmt)
629 {
630 tree operand;
631 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
632
633 /* USE corresponds to some operand in STMT. If there is no data
634 reference in STMT, then any operand that corresponds to USE
635 is not indexing an array. */
636 if (!STMT_VINFO_DATA_REF (stmt_info))
637 return true;
638
639 /* STMT has a data_ref. FORNOW this means that its of one of
640 the following forms:
641 -1- ARRAY_REF = var
642 -2- var = ARRAY_REF
643 (This should have been verified in analyze_data_refs).
644
645 'var' in the second case corresponds to a def, not a use,
646 so USE cannot correspond to any operands that are not used
647 for array indexing.
648
649 Therefore, all we need to check is if STMT falls into the
650 first case, and whether var corresponds to USE. */
651
652 if (TREE_CODE (TREE_OPERAND (stmt, 0)) == SSA_NAME)
653 return false;
654
655 operand = TREE_OPERAND (stmt, 1);
656
657 if (TREE_CODE (operand) != SSA_NAME)
658 return false;
659
660 if (operand == use)
661 return true;
662
663 return false;
664 }
665
666
667 /* Function vect_analyze_scalar_cycles.
668
669 Examine the cross iteration def-use cycles of scalar variables, by
670 analyzing the loop (scalar) PHIs; Classify each cycle as one of the
671 following: invariant, induction, reduction, unknown.
672
673 Some forms of scalar cycles are not yet supported.
674
675 Example1: reduction: (unsupported yet)
676
677 loop1:
678 for (i=0; i<N; i++)
679 sum += a[i];
680
681 Example2: induction: (unsupported yet)
682
683 loop2:
684 for (i=0; i<N; i++)
685 a[i] = i;
686
687 Note: the following loop *is* vectorizable:
688
689 loop3:
690 for (i=0; i<N; i++)
691 a[i] = b[i];
692
693 even though it has a def-use cycle caused by the induction variable i:
694
695 loop: i_2 = PHI (i_0, i_1)
696 a[i_2] = ...;
697 i_1 = i_2 + 1;
698 GOTO loop;
699
700 because the def-use cycle in loop3 is considered "not relevant" - i.e.,
701 it does not need to be vectorized because it is only used for array
702 indexing (see 'mark_stmts_to_be_vectorized'). The def-use cycle in
703 loop2 on the other hand is relevant (it is being written to memory).
704 */
705
706 static void
707 vect_analyze_scalar_cycles (loop_vec_info loop_vinfo)
708 {
709 tree phi;
710 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
711 basic_block bb = loop->header;
712 tree dummy;
713
714 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
715 fprintf (vect_dump, "=== vect_analyze_scalar_cycles ===");
716
717 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
718 {
719 tree access_fn = NULL;
720 tree def = PHI_RESULT (phi);
721 stmt_vec_info stmt_vinfo = vinfo_for_stmt (phi);
722 tree reduc_stmt;
723
724 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
725 {
726 fprintf (vect_dump, "Analyze phi: ");
727 print_generic_expr (vect_dump, phi, TDF_SLIM);
728 }
729
730 /* Skip virtual phi's. The data dependences that are associated with
731 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
732
733 if (!is_gimple_reg (SSA_NAME_VAR (def)))
734 {
735 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
736 fprintf (vect_dump, "virtual phi. skip.");
737 continue;
738 }
739
740 STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_unknown_def_type;
741
742 /* Analyze the evolution function. */
743
744 access_fn = analyze_scalar_evolution (loop, def);
745
746 if (!access_fn)
747 continue;
748
749 if (vect_print_dump_info (REPORT_DETAILS,
750 LOOP_LOC (loop_vinfo)))
751 {
752 fprintf (vect_dump, "Access function of PHI: ");
753 print_generic_expr (vect_dump, access_fn, TDF_SLIM);
754 }
755
756 if (vect_is_simple_iv_evolution (loop->num, access_fn, &dummy, &dummy))
757 {
758 if (vect_print_dump_info (REPORT_DETAILS,LOOP_LOC (loop_vinfo)))
759 fprintf (vect_dump, "Detected induction.");
760 STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_induction_def;
761 continue;
762 }
763
764 /* TODO: handle invariant phis */
765
766 reduc_stmt = vect_is_simple_reduction (loop, phi);
767 if (reduc_stmt)
768 {
769 if (vect_print_dump_info (REPORT_DETAILS, LOOP_LOC (loop_vinfo)))
770 fprintf (vect_dump, "Detected reduction.");
771 STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_reduction_def;
772 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (reduc_stmt)) =
773 vect_reduction_def;
774 }
775 else
776 if (vect_print_dump_info (REPORT_DETAILS, LOOP_LOC (loop_vinfo)))
777 fprintf (vect_dump, "Unknown def-use cycle pattern.");
778
779 }
780
781 return;
782 }
783
784
785 /* Function vect_base_addr_differ_p.
786
787 This is the simplest data dependence test: determines whether the
788 data references A and B access the same array/region. Returns
789 false when the property is not computable at compile time.
790 Otherwise return true, and DIFFER_P will record the result. This
791 utility will not be necessary when alias_sets_conflict_p will be
792 less conservative. */
793
794 static bool
795 vect_base_addr_differ_p (struct data_reference *dra,
796 struct data_reference *drb,
797 bool *differ_p)
798 {
799 tree stmt_a = DR_STMT (dra);
800 stmt_vec_info stmt_info_a = vinfo_for_stmt (stmt_a);
801 tree stmt_b = DR_STMT (drb);
802 stmt_vec_info stmt_info_b = vinfo_for_stmt (stmt_b);
803 tree addr_a = STMT_VINFO_VECT_DR_BASE_ADDRESS (stmt_info_a);
804 tree addr_b = STMT_VINFO_VECT_DR_BASE_ADDRESS (stmt_info_b);
805 tree type_a = TREE_TYPE (addr_a);
806 tree type_b = TREE_TYPE (addr_b);
807 HOST_WIDE_INT alias_set_a, alias_set_b;
808
809 gcc_assert (POINTER_TYPE_P (type_a) && POINTER_TYPE_P (type_b));
810
811 /* Both references are ADDR_EXPR, i.e., we have the objects. */
812 if (TREE_CODE (addr_a) == ADDR_EXPR && TREE_CODE (addr_b) == ADDR_EXPR)
813 return array_base_name_differ_p (dra, drb, differ_p);
814
815 alias_set_a = (TREE_CODE (addr_a) == ADDR_EXPR) ?
816 get_alias_set (TREE_OPERAND (addr_a, 0)) : get_alias_set (addr_a);
817 alias_set_b = (TREE_CODE (addr_b) == ADDR_EXPR) ?
818 get_alias_set (TREE_OPERAND (addr_b, 0)) : get_alias_set (addr_b);
819
820 if (!alias_sets_conflict_p (alias_set_a, alias_set_b))
821 {
822 *differ_p = true;
823 return true;
824 }
825
826 /* An instruction writing through a restricted pointer is "independent" of any
827 instruction reading or writing through a different pointer, in the same
828 block/scope. */
829 else if ((TYPE_RESTRICT (type_a) && !DR_IS_READ (dra))
830 || (TYPE_RESTRICT (type_b) && !DR_IS_READ (drb)))
831 {
832 *differ_p = true;
833 return true;
834 }
835 return false;
836 }
837
838
839 /* Function vect_analyze_data_ref_dependence.
840
841 Return TRUE if there (might) exist a dependence between a memory-reference
842 DRA and a memory-reference DRB. */
843
844 static bool
845 vect_analyze_data_ref_dependence (struct data_reference *dra,
846 struct data_reference *drb,
847 loop_vec_info loop_vinfo)
848 {
849 bool differ_p;
850 struct data_dependence_relation *ddr;
851 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
852 int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
853 int dist = 0;
854 unsigned int loop_depth = 0;
855 struct loop *loop_nest = loop;
856 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
857 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
858
859 if (!vect_base_addr_differ_p (dra, drb, &differ_p))
860 {
861 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
862 LOOP_LOC (loop_vinfo)))
863 {
864 fprintf (vect_dump,
865 "not vectorized: can't determine dependence between: ");
866 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
867 fprintf (vect_dump, " and ");
868 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
869 }
870 return true;
871 }
872
873 if (differ_p)
874 return false;
875
876 ddr = initialize_data_dependence_relation (dra, drb);
877 compute_affine_dependence (ddr);
878
879 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
880 return false;
881
882 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
883 {
884 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
885 LOOP_LOC (loop_vinfo)))
886 {
887 fprintf (vect_dump,
888 "not vectorized: can't determine dependence between ");
889 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
890 fprintf (vect_dump, " and ");
891 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
892 }
893 return true;
894 }
895
896 /* Find loop depth. */
897 while (loop_nest)
898 {
899 if (loop_nest->outer && loop_nest->outer->outer)
900 {
901 loop_nest = loop_nest->outer;
902 loop_depth++;
903 }
904 else
905 break;
906 }
907
908 /* Compute distance vector. */
909 compute_subscript_distance (ddr);
910 build_classic_dist_vector (ddr, vect_loops_num, loop_nest->depth);
911
912 if (!DDR_DIST_VECT (ddr))
913 {
914 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
915 LOOP_LOC (loop_vinfo)))
916 {
917 fprintf (vect_dump, "not vectorized: bad dist vector for ");
918 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
919 fprintf (vect_dump, " and ");
920 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
921 }
922 return true;
923 }
924
925 dist = DDR_DIST_VECT (ddr)[loop_depth];
926
927 /* Same loop iteration. */
928 if (dist % vectorization_factor == 0)
929 {
930 /* Two references with distance zero have the same alignment. */
931 VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_a), drb);
932 VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_b), dra);
933 if (vect_print_dump_info (REPORT_ALIGNMENT, LOOP_LOC (loop_vinfo)))
934 fprintf (vect_dump, "accesses have the same alignment.");
935 return false;
936 }
937
938 if (dist >= vectorization_factor)
939 /* Dependence distance does not create dependence, as far as vectorization
940 is concerned, in this case. */
941 return false;
942
943 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
944 LOOP_LOC (loop_vinfo)))
945 {
946 fprintf (vect_dump,
947 "not vectorized: possible dependence between data-refs ");
948 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
949 fprintf (vect_dump, " and ");
950 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
951 }
952
953 return true;
954 }
955
956
957 /* Function vect_analyze_data_ref_dependences.
958
959 Examine all the data references in the loop, and make sure there do not
960 exist any data dependences between them. */
961
962 static bool
963 vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo)
964 {
965 unsigned int i, j;
966 varray_type loop_write_refs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
967 varray_type loop_read_refs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
968
969 /* Examine store-store (output) dependences. */
970
971 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
972 fprintf (vect_dump, "=== vect_analyze_dependences ===");
973
974 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
975 fprintf (vect_dump, "compare all store-store pairs.");
976
977 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_refs); i++)
978 {
979 for (j = i + 1; j < VARRAY_ACTIVE_SIZE (loop_write_refs); j++)
980 {
981 struct data_reference *dra =
982 VARRAY_GENERIC_PTR (loop_write_refs, i);
983 struct data_reference *drb =
984 VARRAY_GENERIC_PTR (loop_write_refs, j);
985 if (vect_analyze_data_ref_dependence (dra, drb, loop_vinfo))
986 return false;
987 }
988 }
989
990 /* Examine load-store (true/anti) dependences. */
991
992 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
993 fprintf (vect_dump, "compare all load-store pairs.");
994
995 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_refs); i++)
996 {
997 for (j = 0; j < VARRAY_ACTIVE_SIZE (loop_write_refs); j++)
998 {
999 struct data_reference *dra = VARRAY_GENERIC_PTR (loop_read_refs, i);
1000 struct data_reference *drb =
1001 VARRAY_GENERIC_PTR (loop_write_refs, j);
1002 if (vect_analyze_data_ref_dependence (dra, drb, loop_vinfo))
1003 return false;
1004 }
1005 }
1006
1007 return true;
1008 }
1009
1010
1011 /* Function vect_compute_data_ref_alignment
1012
1013 Compute the misalignment of the data reference DR.
1014
1015 Output:
1016 1. If during the misalignment computation it is found that the data reference
1017 cannot be vectorized then false is returned.
1018 2. DR_MISALIGNMENT (DR) is defined.
1019
1020 FOR NOW: No analysis is actually performed. Misalignment is calculated
1021 only for trivial cases. TODO. */
1022
1023 static bool
1024 vect_compute_data_ref_alignment (struct data_reference *dr)
1025 {
1026 tree stmt = DR_STMT (dr);
1027 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1028 tree ref = DR_REF (dr);
1029 tree vectype;
1030 tree base, alignment;
1031 bool base_aligned_p;
1032 tree misalign;
1033
1034 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1035 fprintf (vect_dump, "vect_compute_data_ref_alignment:");
1036
1037 /* Initialize misalignment to unknown. */
1038 DR_MISALIGNMENT (dr) = -1;
1039
1040 misalign = STMT_VINFO_VECT_MISALIGNMENT (stmt_info);
1041 base_aligned_p = STMT_VINFO_VECT_BASE_ALIGNED_P (stmt_info);
1042 base = build_fold_indirect_ref (STMT_VINFO_VECT_DR_BASE_ADDRESS (stmt_info));
1043 vectype = STMT_VINFO_VECTYPE (stmt_info);
1044
1045 if (!misalign)
1046 {
1047 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1048 {
1049 fprintf (vect_dump, "Unknown alignment for access: ");
1050 print_generic_expr (vect_dump, base, TDF_SLIM);
1051 }
1052 return true;
1053 }
1054
1055 if (!base_aligned_p)
1056 {
1057 if (!vect_can_force_dr_alignment_p (base, TYPE_ALIGN (vectype)))
1058 {
1059 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1060 {
1061 fprintf (vect_dump, "can't force alignment of ref: ");
1062 print_generic_expr (vect_dump, ref, TDF_SLIM);
1063 }
1064 return true;
1065 }
1066
1067 /* Force the alignment of the decl.
1068 NOTE: This is the only change to the code we make during
1069 the analysis phase, before deciding to vectorize the loop. */
1070 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1071 fprintf (vect_dump, "force alignment");
1072 DECL_ALIGN (base) = TYPE_ALIGN (vectype);
1073 DECL_USER_ALIGN (base) = 1;
1074 }
1075
1076 /* At this point we assume that the base is aligned. */
1077 gcc_assert (base_aligned_p
1078 || (TREE_CODE (base) == VAR_DECL
1079 && DECL_ALIGN (base) >= TYPE_ALIGN (vectype)));
1080
1081 /* Alignment required, in bytes: */
1082 alignment = ssize_int (TYPE_ALIGN (vectype)/BITS_PER_UNIT);
1083
1084 /* Modulo alignment. */
1085 misalign = size_binop (TRUNC_MOD_EXPR, misalign, alignment);
1086 if (tree_int_cst_sgn (misalign) < 0)
1087 {
1088 /* Negative misalignment value. */
1089 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1090 fprintf (vect_dump, "unexpected misalign value");
1091 return false;
1092 }
1093
1094 DR_MISALIGNMENT (dr) = tree_low_cst (misalign, 1);
1095
1096 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1097 fprintf (vect_dump, "misalign = %d bytes", DR_MISALIGNMENT (dr));
1098
1099 return true;
1100 }
1101
1102
1103 /* Function vect_compute_data_refs_alignment
1104
1105 Compute the misalignment of data references in the loop.
1106 This pass may take place at function granularity instead of at loop
1107 granularity.
1108
1109 FOR NOW: No analysis is actually performed. Misalignment is calculated
1110 only for trivial cases. TODO. */
1111
1112 static bool
1113 vect_compute_data_refs_alignment (loop_vec_info loop_vinfo)
1114 {
1115 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
1116 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
1117 unsigned int i;
1118
1119 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
1120 {
1121 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
1122 if (!vect_compute_data_ref_alignment (dr))
1123 return false;
1124 }
1125
1126 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
1127 {
1128 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
1129 if (!vect_compute_data_ref_alignment (dr))
1130 return false;
1131 }
1132
1133 return true;
1134 }
1135
1136
1137 /* Function vect_enhance_data_refs_alignment
1138
1139 This pass will use loop versioning and loop peeling in order to enhance
1140 the alignment of data references in the loop.
1141
1142 FOR NOW: we assume that whatever versioning/peeling takes place, only the
1143 original loop is to be vectorized; Any other loops that are created by
1144 the transformations performed in this pass - are not supposed to be
1145 vectorized. This restriction will be relaxed. */
1146
1147 static void
1148 vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
1149 {
1150 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
1151 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
1152 varray_type datarefs;
1153 VEC(dr_p,heap) *same_align_drs;
1154 struct data_reference *dr0 = NULL;
1155 struct data_reference *dr;
1156 unsigned int i, j;
1157
1158 /*
1159 This pass will require a cost model to guide it whether to apply peeling
1160 or versioning or a combination of the two. For example, the scheme that
1161 intel uses when given a loop with several memory accesses, is as follows:
1162 choose one memory access ('p') which alignment you want to force by doing
1163 peeling. Then, either (1) generate a loop in which 'p' is aligned and all
1164 other accesses are not necessarily aligned, or (2) use loop versioning to
1165 generate one loop in which all accesses are aligned, and another loop in
1166 which only 'p' is necessarily aligned.
1167
1168 ("Automatic Intra-Register Vectorization for the Intel Architecture",
1169 Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
1170 Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
1171
1172 Devising a cost model is the most critical aspect of this work. It will
1173 guide us on which access to peel for, whether to use loop versioning, how
1174 many versions to create, etc. The cost model will probably consist of
1175 generic considerations as well as target specific considerations (on
1176 powerpc for example, misaligned stores are more painful than misaligned
1177 loads).
1178
1179 Here is the general steps involved in alignment enhancements:
1180
1181 -- original loop, before alignment analysis:
1182 for (i=0; i<N; i++){
1183 x = q[i]; # DR_MISALIGNMENT(q) = unknown
1184 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1185 }
1186
1187 -- After vect_compute_data_refs_alignment:
1188 for (i=0; i<N; i++){
1189 x = q[i]; # DR_MISALIGNMENT(q) = 3
1190 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1191 }
1192
1193 -- Possibility 1: we do loop versioning:
1194 if (p is aligned) {
1195 for (i=0; i<N; i++){ # loop 1A
1196 x = q[i]; # DR_MISALIGNMENT(q) = 3
1197 p[i] = y; # DR_MISALIGNMENT(p) = 0
1198 }
1199 }
1200 else {
1201 for (i=0; i<N; i++){ # loop 1B
1202 x = q[i]; # DR_MISALIGNMENT(q) = 3
1203 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1204 }
1205 }
1206
1207 -- Possibility 2: we do loop peeling:
1208 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1209 x = q[i];
1210 p[i] = y;
1211 }
1212 for (i = 3; i < N; i++){ # loop 2A
1213 x = q[i]; # DR_MISALIGNMENT(q) = 0
1214 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1215 }
1216
1217 -- Possibility 3: combination of loop peeling and versioning:
1218 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1219 x = q[i];
1220 p[i] = y;
1221 }
1222 if (p is aligned) {
1223 for (i = 3; i<N; i++){ # loop 3A
1224 x = q[i]; # DR_MISALIGNMENT(q) = 0
1225 p[i] = y; # DR_MISALIGNMENT(p) = 0
1226 }
1227 }
1228 else {
1229 for (i = 3; i<N; i++){ # loop 3B
1230 x = q[i]; # DR_MISALIGNMENT(q) = 0
1231 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1232 }
1233 }
1234
1235 These loops are later passed to loop_transform to be vectorized. The
1236 vectorizer will use the alignment information to guide the transformation
1237 (whether to generate regular loads/stores, or with special handling for
1238 misalignment).
1239 */
1240
1241 /* (1) Peeling to force alignment. */
1242
1243 /* (1.1) Decide whether to perform peeling, and how many iterations to peel:
1244 Considerations:
1245 + How many accesses will become aligned due to the peeling
1246 - How many accesses will become unaligned due to the peeling,
1247 and the cost of misaligned accesses.
1248 - The cost of peeling (the extra runtime checks, the increase
1249 in code size).
1250
1251 The scheme we use FORNOW: peel to force the alignment of the first
1252 misaligned store in the loop.
1253 Rationale: misaligned stores are not yet supported.
1254
1255 TODO: Use a better cost model. */
1256
1257 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
1258 {
1259 dr0 = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
1260 if (!aligned_access_p (dr0))
1261 {
1262 LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr0;
1263 LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) = DR_MISALIGNMENT (dr0);
1264 break;
1265 }
1266 }
1267
1268 /* (1.2) Update the alignment info according to the peeling factor.
1269 If the misalignment of the DR we peel for is M, then the
1270 peeling factor is VF - M, and the misalignment of each access DR_i
1271 in the loop is DR_MISALIGNMENT (DR_i) + VF - M.
1272 If the misalignment of the DR we peel for is unknown, then the
1273 misalignment of each access DR_i in the loop is also unknown.
1274
1275 TODO: - consider accesses that are known to have the same
1276 alignment, even if that alignment is unknown. */
1277
1278 if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo))
1279 {
1280 int mis;
1281 int npeel = 0;
1282
1283 if (known_alignment_for_access_p (dr0))
1284 {
1285 /* Since it's known at compile time, compute the number of iterations
1286 in the peeled loop (the peeling factor) for use in updating
1287 DR_MISALIGNMENT values. The peeling factor is the vectorization
1288 factor minus the misalignment as an element count. */
1289 mis = DR_MISALIGNMENT (dr0);
1290 mis /= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr0))));
1291 npeel = LOOP_VINFO_VECT_FACTOR (loop_vinfo) - mis;
1292 }
1293
1294 datarefs = loop_write_datarefs;
1295 for (j = 0; j < 2; j++)
1296 {
1297 for (i = 0; i < VARRAY_ACTIVE_SIZE (datarefs); i++)
1298 {
1299 struct data_reference *dr = VARRAY_GENERIC_PTR (datarefs, i);
1300
1301 if (dr == dr0)
1302 continue;
1303 if (known_alignment_for_access_p (dr)
1304 && DR_MISALIGNMENT (dr) == DR_MISALIGNMENT (dr0))
1305 DR_MISALIGNMENT (dr) = 0;
1306 else if (known_alignment_for_access_p (dr)
1307 && known_alignment_for_access_p (dr0))
1308 {
1309 int drsize =
1310 GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr))));
1311
1312 DR_MISALIGNMENT (dr) += npeel * drsize;
1313 DR_MISALIGNMENT (dr) %= UNITS_PER_SIMD_WORD;
1314 }
1315 else
1316 DR_MISALIGNMENT (dr) = -1;
1317 }
1318 datarefs = loop_read_datarefs;
1319 }
1320
1321 same_align_drs =
1322 STMT_VINFO_SAME_ALIGN_REFS (vinfo_for_stmt (DR_STMT (dr0)));
1323 for (i = 0; VEC_iterate (dr_p, same_align_drs, i, dr); i++)
1324 {
1325 DR_MISALIGNMENT (dr) = 0;
1326 }
1327
1328 DR_MISALIGNMENT (dr0) = 0;
1329 }
1330 }
1331
1332
1333 /* Function vect_analyze_data_refs_alignment
1334
1335 Analyze the alignment of the data-references in the loop.
1336 FOR NOW: Until support for misaligned accesses is in place, only if all
1337 accesses are aligned can the loop be vectorized. This restriction will be
1338 relaxed. */
1339
1340 static bool
1341 vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo)
1342 {
1343 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
1344 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
1345 enum dr_alignment_support supportable_dr_alignment;
1346 unsigned int i;
1347
1348 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1349 fprintf (vect_dump, "=== vect_analyze_data_refs_alignment ===");
1350
1351
1352 /* This pass may take place at function granularity instead of at loop
1353 granularity. */
1354
1355 if (!vect_compute_data_refs_alignment (loop_vinfo))
1356 {
1357 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
1358 LOOP_LOC (loop_vinfo)))
1359 fprintf (vect_dump,
1360 "not vectorized: can't calculate alignment for data ref.");
1361 return false;
1362 }
1363
1364
1365 /* This pass will decide on using loop versioning and/or loop peeling in
1366 order to enhance the alignment of data references in the loop. */
1367
1368 vect_enhance_data_refs_alignment (loop_vinfo);
1369
1370
1371 /* Finally, check that all the data references in the loop can be
1372 handled with respect to their alignment. */
1373
1374 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
1375 {
1376 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
1377 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
1378 if (!supportable_dr_alignment)
1379 {
1380 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
1381 LOOP_LOC (loop_vinfo)))
1382 fprintf (vect_dump, "not vectorized: unsupported unaligned load.");
1383 return false;
1384 }
1385 if (supportable_dr_alignment != dr_aligned
1386 && (vect_print_dump_info (REPORT_ALIGNMENT, LOOP_LOC (loop_vinfo))))
1387 fprintf (vect_dump, "Vectorizing an unaligned access.");
1388 }
1389 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
1390 {
1391 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
1392 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
1393 if (!supportable_dr_alignment)
1394 {
1395 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
1396 LOOP_LOC (loop_vinfo)))
1397 fprintf (vect_dump, "not vectorized: unsupported unaligned store.");
1398 return false;
1399 }
1400 if (supportable_dr_alignment != dr_aligned
1401 && (vect_print_dump_info (REPORT_ALIGNMENT, LOOP_LOC (loop_vinfo))))
1402 fprintf (vect_dump, "Vectorizing an unaligned access.");
1403 }
1404 if (LOOP_VINFO_UNALIGNED_DR (loop_vinfo)
1405 && vect_print_dump_info (REPORT_ALIGNMENT, LOOP_LOC (loop_vinfo)))
1406 fprintf (vect_dump, "Alignment of access forced using peeling.");
1407
1408 return true;
1409 }
1410
1411
1412 /* Function vect_analyze_data_ref_access.
1413
1414 Analyze the access pattern of the data-reference DR. For now, a data access
1415 has to consecutive to be considered vectorizable. */
1416
1417 static bool
1418 vect_analyze_data_ref_access (struct data_reference *dr)
1419 {
1420 tree stmt = DR_STMT (dr);
1421 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1422 tree step = STMT_VINFO_VECT_STEP (stmt_info);
1423 tree scalar_type = TREE_TYPE (DR_REF (dr));
1424
1425 if (!step || tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type)))
1426 {
1427 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1428 fprintf (vect_dump, "not consecutive access");
1429 return false;
1430 }
1431 return true;
1432 }
1433
1434
1435 /* Function vect_analyze_data_ref_accesses.
1436
1437 Analyze the access pattern of all the data references in the loop.
1438
1439 FORNOW: the only access pattern that is considered vectorizable is a
1440 simple step 1 (consecutive) access.
1441
1442 FORNOW: handle only arrays and pointer accesses. */
1443
1444 static bool
1445 vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo)
1446 {
1447 unsigned int i;
1448 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
1449 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
1450
1451 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1452 fprintf (vect_dump, "=== vect_analyze_data_ref_accesses ===");
1453
1454 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
1455 {
1456 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
1457 bool ok = vect_analyze_data_ref_access (dr);
1458 if (!ok)
1459 {
1460 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
1461 LOOP_LOC (loop_vinfo)))
1462 fprintf (vect_dump, "not vectorized: complicated access pattern.");
1463 return false;
1464 }
1465 }
1466
1467 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
1468 {
1469 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
1470 bool ok = vect_analyze_data_ref_access (dr);
1471 if (!ok)
1472 {
1473 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
1474 LOOP_LOC (loop_vinfo)))
1475 fprintf (vect_dump, "not vectorized: complicated access pattern.");
1476 return false;
1477 }
1478 }
1479
1480 return true;
1481 }
1482
1483
1484 /* Function vect_analyze_pointer_ref_access.
1485
1486 Input:
1487 STMT - a stmt that contains a data-ref.
1488 MEMREF - a data-ref in STMT, which is an INDIRECT_REF.
1489 ACCESS_FN - the access function of MEMREF.
1490
1491 Output:
1492 If the data-ref access is vectorizable, return a data_reference structure
1493 that represents it (DR). Otherwise - return NULL.
1494 STEP - the stride of MEMREF in the loop.
1495 INIT - the initial condition of MEMREF in the loop.
1496 */
1497
1498 static struct data_reference *
1499 vect_analyze_pointer_ref_access (tree memref, tree stmt, bool is_read,
1500 tree access_fn, tree *ptr_init, tree *ptr_step)
1501 {
1502 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1503 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1504 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1505 tree step, init;
1506 tree reftype, innertype;
1507 tree indx_access_fn;
1508 int loopnum = loop->num;
1509 struct data_reference *dr;
1510
1511 if (!vect_is_simple_iv_evolution (loopnum, access_fn, &init, &step))
1512 {
1513 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
1514 LOOP_LOC (loop_vinfo)))
1515 fprintf (vect_dump, "not vectorized: pointer access is not simple.");
1516 return NULL;
1517 }
1518
1519 STRIP_NOPS (init);
1520
1521 if (!expr_invariant_in_loop_p (loop, init))
1522 {
1523 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
1524 LOOP_LOC (loop_vinfo)))
1525 fprintf (vect_dump,
1526 "not vectorized: initial condition is not loop invariant.");
1527 return NULL;
1528 }
1529
1530 if (TREE_CODE (step) != INTEGER_CST)
1531 {
1532 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
1533 LOOP_LOC (loop_vinfo)))
1534 fprintf (vect_dump,
1535 "not vectorized: non constant step for pointer access.");
1536 return NULL;
1537 }
1538
1539 reftype = TREE_TYPE (TREE_OPERAND (memref, 0));
1540 if (!POINTER_TYPE_P (reftype))
1541 {
1542 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
1543 LOOP_LOC (loop_vinfo)))
1544 fprintf (vect_dump, "not vectorized: unexpected pointer access form.");
1545 return NULL;
1546 }
1547
1548 if (!POINTER_TYPE_P (TREE_TYPE (init)))
1549 {
1550 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
1551 LOOP_LOC (loop_vinfo)))
1552 fprintf (vect_dump, "not vectorized: unexpected pointer access form.");
1553 return NULL;
1554 }
1555
1556 *ptr_step = fold_convert (ssizetype, step);
1557 innertype = TREE_TYPE (reftype);
1558 if (!COMPLETE_TYPE_P (innertype))
1559 {
1560 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
1561 LOOP_LOC (loop_vinfo)))
1562 fprintf (vect_dump, "not vectorized: pointer to incomplete type.");
1563 return NULL;
1564 }
1565
1566 /* Check that STEP is a multiple of type size. */
1567 if (!integer_zerop (size_binop (TRUNC_MOD_EXPR, *ptr_step,
1568 fold_convert (ssizetype, TYPE_SIZE_UNIT (innertype)))))
1569 {
1570 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
1571 LOOP_LOC (loop_vinfo)))
1572 fprintf (vect_dump, "not vectorized: non consecutive access.");
1573 return NULL;
1574 }
1575
1576 indx_access_fn =
1577 build_polynomial_chrec (loopnum, integer_zero_node, integer_one_node);
1578 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1579 {
1580 fprintf (vect_dump, "Access function of ptr indx: ");
1581 print_generic_expr (vect_dump, indx_access_fn, TDF_SLIM);
1582 }
1583 dr = init_data_ref (stmt, memref, NULL_TREE, indx_access_fn, is_read);
1584 *ptr_init = init;
1585 return dr;
1586 }
1587
1588
1589 /* Function vect_address_analysis
1590
1591 Return the BASE of the address expression EXPR.
1592 Also compute the INITIAL_OFFSET from BASE, MISALIGN and STEP.
1593
1594 Input:
1595 EXPR - the address expression that is being analyzed
1596 STMT - the statement that contains EXPR or its original memory reference
1597 IS_READ - TRUE if STMT reads from EXPR, FALSE if writes to EXPR
1598 VECTYPE - the type that defines the alignment (i.e, we compute
1599 alignment relative to TYPE_ALIGN(VECTYPE))
1600 DR - data_reference struct for the original memory reference
1601
1602 Output:
1603 BASE (returned value) - the base of the data reference EXPR.
1604 INITIAL_OFFSET - initial offset of EXPR from BASE (an expression)
1605 MISALIGN - offset of EXPR from BASE in bytes (a constant) or NULL_TREE if the
1606 computation is impossible
1607 STEP - evolution of EXPR in the loop
1608 BASE_ALIGNED - indicates if BASE is aligned
1609
1610 If something unexpected is encountered (an unsupported form of data-ref),
1611 then NULL_TREE is returned.
1612 */
1613
1614 static tree
1615 vect_address_analysis (tree expr, tree stmt, bool is_read, tree vectype,
1616 struct data_reference *dr, tree *offset, tree *misalign,
1617 tree *step, bool *base_aligned)
1618 {
1619 tree oprnd0, oprnd1, base_address, offset_expr, base_addr0, base_addr1;
1620 tree address_offset = ssize_int (0), address_misalign = ssize_int (0);
1621 tree dummy;
1622 struct ptr_info_def *dummy1;
1623 subvar_t dummy2;
1624
1625 switch (TREE_CODE (expr))
1626 {
1627 case PLUS_EXPR:
1628 case MINUS_EXPR:
1629 /* EXPR is of form {base +/- offset} (or {offset +/- base}). */
1630 oprnd0 = TREE_OPERAND (expr, 0);
1631 oprnd1 = TREE_OPERAND (expr, 1);
1632
1633 STRIP_NOPS (oprnd0);
1634 STRIP_NOPS (oprnd1);
1635
1636 /* Recursively try to find the base of the address contained in EXPR.
1637 For offset, the returned base will be NULL. */
1638 base_addr0 = vect_address_analysis (oprnd0, stmt, is_read, vectype, dr,
1639 &address_offset, &address_misalign, step,
1640 base_aligned);
1641
1642 base_addr1 = vect_address_analysis (oprnd1, stmt, is_read, vectype, dr,
1643 &address_offset, &address_misalign, step,
1644 base_aligned);
1645
1646 /* We support cases where only one of the operands contains an
1647 address. */
1648 if ((base_addr0 && base_addr1) || (!base_addr0 && !base_addr1))
1649 return NULL_TREE;
1650
1651 /* To revert STRIP_NOPS. */
1652 oprnd0 = TREE_OPERAND (expr, 0);
1653 oprnd1 = TREE_OPERAND (expr, 1);
1654
1655 offset_expr = base_addr0 ?
1656 fold_convert (ssizetype, oprnd1) : fold_convert (ssizetype, oprnd0);
1657
1658 /* EXPR is of form {base +/- offset} (or {offset +/- base}). If offset is
1659 a number, we can add it to the misalignment value calculated for base,
1660 otherwise, misalignment is NULL. */
1661 if (TREE_CODE (offset_expr) == INTEGER_CST && address_misalign)
1662 *misalign = size_binop (TREE_CODE (expr), address_misalign,
1663 offset_expr);
1664 else
1665 *misalign = NULL_TREE;
1666
1667 /* Combine offset (from EXPR {base + offset}) with the offset calculated
1668 for base. */
1669 *offset = size_binop (TREE_CODE (expr), address_offset, offset_expr);
1670 return base_addr0 ? base_addr0 : base_addr1;
1671
1672 case ADDR_EXPR:
1673 base_address = vect_object_analysis (TREE_OPERAND (expr, 0), stmt,
1674 is_read, vectype, &dr, offset,
1675 misalign, step, base_aligned,
1676 &dummy, &dummy1, &dummy2);
1677 return base_address;
1678
1679 case SSA_NAME:
1680 if (!POINTER_TYPE_P (TREE_TYPE (expr)))
1681 return NULL_TREE;
1682
1683 if (TYPE_ALIGN (TREE_TYPE (TREE_TYPE (expr))) < TYPE_ALIGN (vectype))
1684 {
1685 if (vect_get_ptr_offset (expr, vectype, misalign))
1686 *base_aligned = true;
1687 else
1688 *base_aligned = false;
1689 }
1690 else
1691 {
1692 *base_aligned = true;
1693 *misalign = ssize_int (0);
1694 }
1695 *offset = ssize_int (0);
1696 *step = ssize_int (0);
1697 return expr;
1698
1699 default:
1700 return NULL_TREE;
1701 }
1702 }
1703
1704
1705 /* Function vect_object_analysis
1706
1707 Return the BASE of the data reference MEMREF.
1708 Also compute the INITIAL_OFFSET from BASE, MISALIGN and STEP.
1709 E.g., for EXPR a.b[i] + 4B, BASE is a, and OFFSET is the overall offset
1710 'a.b[i] + 4B' from a (can be an expression), MISALIGN is an OFFSET
1711 instantiated with initial_conditions of access_functions of variables,
1712 modulo alignment, and STEP is the evolution of the DR_REF in this loop.
1713
1714 Function get_inner_reference is used for the above in case of ARRAY_REF and
1715 COMPONENT_REF.
1716
1717 The structure of the function is as follows:
1718 Part 1:
1719 Case 1. For handled_component_p refs
1720 1.1 call get_inner_reference
1721 1.1.1 analyze offset expr received from get_inner_reference
1722 1.2. build data-reference structure for MEMREF
1723 (fall through with BASE)
1724 Case 2. For declarations
1725 2.1 check alignment
1726 2.2 update DR_BASE_NAME if necessary for alias
1727 Case 3. For INDIRECT_REFs
1728 3.1 get the access function
1729 3.2 analyze evolution of MEMREF
1730 3.3 set data-reference structure for MEMREF
1731 3.4 call vect_address_analysis to analyze INIT of the access function
1732
1733 Part 2:
1734 Combine the results of object and address analysis to calculate
1735 INITIAL_OFFSET, STEP and misalignment info.
1736
1737 Input:
1738 MEMREF - the memory reference that is being analyzed
1739 STMT - the statement that contains MEMREF
1740 IS_READ - TRUE if STMT reads from MEMREF, FALSE if writes to MEMREF
1741 VECTYPE - the type that defines the alignment (i.e, we compute
1742 alignment relative to TYPE_ALIGN(VECTYPE))
1743
1744 Output:
1745 BASE_ADDRESS (returned value) - the base address of the data reference MEMREF
1746 E.g, if MEMREF is a.b[k].c[i][j] the returned
1747 base is &a.
1748 DR - data_reference struct for MEMREF
1749 INITIAL_OFFSET - initial offset of MEMREF from BASE (an expression)
1750 MISALIGN - offset of MEMREF from BASE in bytes (a constant) or NULL_TREE if
1751 the computation is impossible
1752 STEP - evolution of the DR_REF in the loop
1753 BASE_ALIGNED - indicates if BASE is aligned
1754 MEMTAG - memory tag for aliasing purposes
1755 PTR_INFO - NULL or points-to aliasing info from a pointer SSA_NAME
1756 SUBVAR - Sub-variables of the variable
1757
1758 If something unexpected is encountered (an unsupported form of data-ref),
1759 then NULL_TREE is returned. */
1760
1761 static tree
1762 vect_object_analysis (tree memref, tree stmt, bool is_read,
1763 tree vectype, struct data_reference **dr,
1764 tree *offset, tree *misalign, tree *step,
1765 bool *base_aligned, tree *memtag,
1766 struct ptr_info_def **ptr_info, subvar_t *subvars)
1767 {
1768 tree base = NULL_TREE, base_address = NULL_TREE;
1769 tree object_offset = ssize_int (0), object_misalign = ssize_int (0);
1770 tree object_step = ssize_int (0), address_step = ssize_int (0);
1771 bool object_base_aligned = true, address_base_aligned = true;
1772 tree address_offset = ssize_int (0), address_misalign = ssize_int (0);
1773 HOST_WIDE_INT pbitsize, pbitpos;
1774 tree poffset, bit_pos_in_bytes;
1775 enum machine_mode pmode;
1776 int punsignedp, pvolatilep;
1777 tree ptr_step = ssize_int (0), ptr_init = NULL_TREE;
1778 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1779 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1780 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1781 struct data_reference *ptr_dr = NULL;
1782 tree access_fn, evolution_part, address_to_analyze;
1783
1784 *ptr_info = NULL;
1785
1786 /* Part 1: */
1787 /* Case 1. handled_component_p refs. */
1788 if (handled_component_p (memref))
1789 {
1790 /* 1.1 call get_inner_reference. */
1791 /* Find the base and the offset from it. */
1792 base = get_inner_reference (memref, &pbitsize, &pbitpos, &poffset,
1793 &pmode, &punsignedp, &pvolatilep, false);
1794 if (!base)
1795 return NULL_TREE;
1796
1797 /* 1.1.1 analyze offset expr received from get_inner_reference. */
1798 if (poffset
1799 && !vect_analyze_offset_expr (poffset, loop, TYPE_SIZE_UNIT (vectype),
1800 &object_offset, &object_misalign, &object_step))
1801 {
1802 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1803 {
1804 fprintf (vect_dump, "failed to compute offset or step for ");
1805 print_generic_expr (vect_dump, memref, TDF_SLIM);
1806 }
1807 return NULL_TREE;
1808 }
1809
1810 /* Add bit position to OFFSET and MISALIGN. */
1811
1812 bit_pos_in_bytes = ssize_int (pbitpos/BITS_PER_UNIT);
1813 /* Check that there is no remainder in bits. */
1814 if (pbitpos%BITS_PER_UNIT)
1815 {
1816 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1817 fprintf (vect_dump, "bit offset alignment.");
1818 return NULL_TREE;
1819 }
1820 object_offset = size_binop (PLUS_EXPR, bit_pos_in_bytes, object_offset);
1821 if (object_misalign)
1822 object_misalign = size_binop (PLUS_EXPR, object_misalign,
1823 bit_pos_in_bytes);
1824
1825 /* Create data-reference for MEMREF. TODO: handle COMPONENT_REFs. */
1826 if (!(*dr))
1827 {
1828 if (TREE_CODE (memref) == ARRAY_REF)
1829 *dr = analyze_array (stmt, memref, is_read);
1830 else
1831 /* FORNOW. */
1832 return NULL_TREE;
1833 }
1834 memref = base; /* To continue analysis of BASE. */
1835 /* fall through */
1836 }
1837
1838 /* Part 1: Case 2. Declarations. */
1839 if (DECL_P (memref))
1840 {
1841 /* We expect to get a decl only if we already have a DR. */
1842 if (!(*dr))
1843 {
1844 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1845 {
1846 fprintf (vect_dump, "unhandled decl ");
1847 print_generic_expr (vect_dump, memref, TDF_SLIM);
1848 }
1849 return NULL_TREE;
1850 }
1851
1852 /* 2.1 check the alignment. */
1853 if (DECL_ALIGN (memref) >= TYPE_ALIGN (vectype))
1854 object_base_aligned = true;
1855 else
1856 object_base_aligned = false;
1857
1858 /* 2.2 update DR_BASE_NAME if necessary. */
1859 if (!DR_BASE_NAME ((*dr)))
1860 /* For alias analysis. In case the analysis of INDIRECT_REF brought
1861 us to object. */
1862 DR_BASE_NAME ((*dr)) = memref;
1863
1864 if (SSA_VAR_P (memref) && var_can_have_subvars (memref))
1865 *subvars = get_subvars_for_var (memref);
1866 base_address = build_fold_addr_expr (memref);
1867 *memtag = memref;
1868 }
1869
1870 /* Part 1: Case 3. INDIRECT_REFs. */
1871 else if (TREE_CODE (memref) == INDIRECT_REF)
1872 {
1873 tree ptr_ref = TREE_OPERAND (memref, 0);
1874 if (TREE_CODE (ptr_ref) == SSA_NAME)
1875 *ptr_info = SSA_NAME_PTR_INFO (ptr_ref);
1876
1877 /* 3.1 get the access function. */
1878 access_fn = analyze_scalar_evolution (loop, ptr_ref);
1879 if (!access_fn)
1880 {
1881 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
1882 LOOP_LOC (loop_vinfo)))
1883 fprintf (vect_dump, "not vectorized: complicated pointer access.");
1884 return NULL_TREE;
1885 }
1886 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1887 {
1888 fprintf (vect_dump, "Access function of ptr: ");
1889 print_generic_expr (vect_dump, access_fn, TDF_SLIM);
1890 }
1891
1892 /* 3.2 analyze evolution of MEMREF. */
1893 evolution_part = evolution_part_in_loop_num (access_fn, loop->num);
1894 if (evolution_part)
1895 {
1896 ptr_dr = vect_analyze_pointer_ref_access (memref, stmt, is_read,
1897 access_fn, &ptr_init, &ptr_step);
1898 if (!(ptr_dr))
1899 return NULL_TREE;
1900
1901 object_step = size_binop (PLUS_EXPR, object_step, ptr_step);
1902 address_to_analyze = ptr_init;
1903 }
1904 else
1905 {
1906 if (!(*dr))
1907 {
1908 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
1909 LOOP_LOC (loop_vinfo)))
1910 fprintf (vect_dump, "not vectorized: ptr is loop invariant.");
1911 return NULL_TREE;
1912 }
1913 /* Since there exists DR for MEMREF, we are analyzing the init of
1914 the access function, which not necessary has evolution in the
1915 loop. */
1916 address_to_analyze = initial_condition_in_loop_num (access_fn,
1917 loop->num);
1918 }
1919
1920 /* 3.3 set data-reference structure for MEMREF. */
1921 *dr = (*dr) ? *dr : ptr_dr;
1922
1923 /* 3.4 call vect_address_analysis to analyze INIT of the access
1924 function. */
1925 base_address = vect_address_analysis (address_to_analyze, stmt, is_read,
1926 vectype, *dr, &address_offset, &address_misalign,
1927 &address_step, &address_base_aligned);
1928 if (!base_address)
1929 return NULL_TREE;
1930
1931 switch (TREE_CODE (base_address))
1932 {
1933 case SSA_NAME:
1934 *memtag = get_var_ann (SSA_NAME_VAR (base_address))->type_mem_tag;
1935 if (!(*memtag) && TREE_CODE (TREE_OPERAND (memref, 0)) == SSA_NAME)
1936 *memtag = get_var_ann (
1937 SSA_NAME_VAR (TREE_OPERAND (memref, 0)))->type_mem_tag;
1938 break;
1939 case ADDR_EXPR:
1940 *memtag = TREE_OPERAND (base_address, 0);
1941 break;
1942 default:
1943 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
1944 LOOP_LOC (loop_vinfo)))
1945 {
1946 fprintf (vect_dump, "not vectorized: no memtag ref: ");
1947 print_generic_expr (vect_dump, memref, TDF_SLIM);
1948 }
1949 return NULL_TREE;
1950 }
1951 }
1952
1953 if (!base_address)
1954 /* MEMREF cannot be analyzed. */
1955 return NULL_TREE;
1956
1957 if (SSA_VAR_P (*memtag) && var_can_have_subvars (*memtag))
1958 *subvars = get_subvars_for_var (*memtag);
1959
1960 /* Part 2: Combine the results of object and address analysis to calculate
1961 INITIAL_OFFSET, STEP and misalignment info. */
1962 *offset = size_binop (PLUS_EXPR, object_offset, address_offset);
1963 if (object_misalign && address_misalign)
1964 *misalign = size_binop (PLUS_EXPR, object_misalign, address_misalign);
1965 else
1966 *misalign = NULL_TREE;
1967 *step = size_binop (PLUS_EXPR, object_step, address_step);
1968 *base_aligned = object_base_aligned && address_base_aligned;
1969
1970 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1971 {
1972 fprintf (vect_dump, "Results of object analysis for: ");
1973 print_generic_expr (vect_dump, memref, TDF_SLIM);
1974 fprintf (vect_dump, "\n\tbase_address: ");
1975 print_generic_expr (vect_dump, base_address, TDF_SLIM);
1976 fprintf (vect_dump, "\n\toffset: ");
1977 print_generic_expr (vect_dump, *offset, TDF_SLIM);
1978 fprintf (vect_dump, "\n\tstep: ");
1979 print_generic_expr (vect_dump, *step, TDF_SLIM);
1980 fprintf (vect_dump, "\n\tbase aligned %d\n\tmisalign: ", *base_aligned);
1981 print_generic_expr (vect_dump, *misalign, TDF_SLIM);
1982 }
1983 return base_address;
1984 }
1985
1986
1987 /* Function vect_analyze_data_refs.
1988
1989 Find all the data references in the loop.
1990
1991 The general structure of the analysis of data refs in the vectorizer is as
1992 follows:
1993 1- vect_analyze_data_refs(loop):
1994 Find and analyze all data-refs in the loop:
1995 foreach ref
1996 base_address = vect_object_analysis(ref)
1997 1.1- vect_object_analysis(ref):
1998 Analyze ref, and build a DR (data_reference struct) for it;
1999 compute base, initial_offset, step and alignment.
2000 Call get_inner_reference for refs handled in this function.
2001 Call vect_addr_analysis(addr) to analyze pointer type expressions.
2002 Set ref_stmt.base, ref_stmt.initial_offset, ref_stmt.alignment,
2003 ref_stmt.memtag, ref_stmt.ptr_info and ref_stmt.step accordingly.
2004 2- vect_analyze_dependences(): apply dependence testing using ref_stmt.DR
2005 3- vect_analyze_drs_alignment(): check that ref_stmt.alignment is ok.
2006 4- vect_analyze_drs_access(): check that ref_stmt.step is ok.
2007
2008 FORNOW: Handle aligned INDIRECT_REFs and ARRAY_REFs
2009 which base is really an array (not a pointer) and which alignment
2010 can be forced. This restriction will be relaxed. */
2011
2012 static bool
2013 vect_analyze_data_refs (loop_vec_info loop_vinfo)
2014 {
2015 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2016 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
2017 int nbbs = loop->num_nodes;
2018 block_stmt_iterator si;
2019 int j;
2020 struct data_reference *dr;
2021
2022 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2023 fprintf (vect_dump, "=== vect_analyze_data_refs ===");
2024
2025 for (j = 0; j < nbbs; j++)
2026 {
2027 basic_block bb = bbs[j];
2028 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
2029 {
2030 bool is_read = false;
2031 tree stmt = bsi_stmt (si);
2032 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2033 varray_type *datarefs = NULL;
2034 tree memref = NULL;
2035 tree scalar_type, vectype;
2036 tree base, offset, misalign, step, tag;
2037 struct ptr_info_def *ptr_info;
2038 bool base_aligned;
2039 subvar_t subvars = NULL;
2040 bool no_vuse, no_vmaymust;
2041
2042 /* Assumption: there exists a data-ref in stmt, if and only if
2043 it has vuses/vdefs. */
2044
2045 no_vuse = ZERO_SSA_OPERANDS (stmt, SSA_OP_VUSE);
2046 no_vmaymust = ZERO_SSA_OPERANDS (stmt,
2047 SSA_OP_VMAYDEF | SSA_OP_VMUSTDEF);
2048 if (no_vuse && no_vmaymust)
2049 continue;
2050
2051 if (!no_vuse && !no_vmaymust)
2052 {
2053 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2054 {
2055 fprintf (vect_dump, "unexpected vdefs and vuses in stmt: ");
2056 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2057 }
2058 return false;
2059 }
2060
2061 if (TREE_CODE (stmt) != MODIFY_EXPR)
2062 {
2063 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2064 {
2065 fprintf (vect_dump, "unexpected vops in stmt: ");
2066 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2067 }
2068 return false;
2069 }
2070
2071 if (!no_vuse)
2072 {
2073 memref = TREE_OPERAND (stmt, 1);
2074 datarefs = &(LOOP_VINFO_DATAREF_READS (loop_vinfo));
2075 is_read = true;
2076 }
2077 else /* vdefs */
2078 {
2079 memref = TREE_OPERAND (stmt, 0);
2080 datarefs = &(LOOP_VINFO_DATAREF_WRITES (loop_vinfo));
2081 is_read = false;
2082 }
2083
2084 scalar_type = TREE_TYPE (memref);
2085 vectype = get_vectype_for_scalar_type (scalar_type);
2086 if (!vectype)
2087 {
2088 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2089 {
2090 fprintf (vect_dump, "no vectype for stmt: ");
2091 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2092 fprintf (vect_dump, " scalar_type: ");
2093 print_generic_expr (vect_dump, scalar_type, TDF_DETAILS);
2094 }
2095 /* It is not possible to vectorize this data reference. */
2096 return false;
2097 }
2098 /* Analyze MEMREF. If it is of a supported form, build data_reference
2099 struct for it (DR). */
2100 dr = NULL;
2101 base = vect_object_analysis (memref, stmt, is_read, vectype, &dr,
2102 &offset, &misalign, &step,
2103 &base_aligned, &tag, &ptr_info,
2104 &subvars);
2105 if (!base)
2106 {
2107 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
2108 LOOP_LOC (loop_vinfo)))
2109 {
2110 fprintf (vect_dump, "not vectorized: unhandled data ref: ");
2111 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2112 }
2113 return false;
2114 }
2115 STMT_VINFO_VECT_DR_BASE_ADDRESS (stmt_info) = base;
2116 STMT_VINFO_VECT_INIT_OFFSET (stmt_info) = offset;
2117 STMT_VINFO_VECT_STEP (stmt_info) = step;
2118 STMT_VINFO_VECT_MISALIGNMENT (stmt_info) = misalign;
2119 STMT_VINFO_VECT_BASE_ALIGNED_P (stmt_info) = base_aligned;
2120 STMT_VINFO_MEMTAG (stmt_info) = tag;
2121 STMT_VINFO_PTR_INFO (stmt_info) = ptr_info;
2122 STMT_VINFO_SUBVARS (stmt_info) = subvars;
2123 STMT_VINFO_VECTYPE (stmt_info) = vectype;
2124 VARRAY_PUSH_GENERIC_PTR (*datarefs, dr);
2125 STMT_VINFO_DATA_REF (stmt_info) = dr;
2126 }
2127 }
2128
2129 return true;
2130 }
2131
2132
2133 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
2134
2135 /* Function vect_mark_relevant.
2136
2137 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
2138
2139 static void
2140 vect_mark_relevant (VEC(tree,heap) **worklist, tree stmt,
2141 bool relevant_p, bool live_p)
2142 {
2143 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2144 bool save_relevant_p = STMT_VINFO_RELEVANT_P (stmt_info);
2145 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
2146
2147 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2148 fprintf (vect_dump, "mark relevant %d, live %d.",relevant_p, live_p);
2149
2150 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
2151
2152 if (TREE_CODE (stmt) == PHI_NODE)
2153 /* Don't mark as relevant because it's not going to vectorized. */
2154 return;
2155
2156 STMT_VINFO_RELEVANT_P (stmt_info) |= relevant_p;
2157
2158 if (STMT_VINFO_RELEVANT_P (stmt_info) == save_relevant_p
2159 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
2160 {
2161 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2162 fprintf (vect_dump, "already marked relevant/live.");
2163 return;
2164 }
2165
2166 VEC_safe_push (tree, heap, *worklist, stmt);
2167 }
2168
2169
2170 /* Function vect_stmt_relevant_p.
2171
2172 Return true if STMT in loop that is represented by LOOP_VINFO is
2173 "relevant for vectorization".
2174
2175 A stmt is considered "relevant for vectorization" if:
2176 - it has uses outside the loop.
2177 - it has vdefs (it alters memory).
2178 - control stmts in the loop (except for the exit condition).
2179
2180 CHECKME: what other side effects would the vectorizer allow? */
2181
2182 static bool
2183 vect_stmt_relevant_p (tree stmt, loop_vec_info loop_vinfo,
2184 bool *relevant_p, bool *live_p)
2185 {
2186 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2187 ssa_op_iter op_iter;
2188 imm_use_iterator imm_iter;
2189 use_operand_p use_p;
2190 def_operand_p def_p;
2191
2192 *relevant_p = false;
2193 *live_p = false;
2194
2195 /* cond stmt other than loop exit cond. */
2196 if (is_ctrl_stmt (stmt) && (stmt != LOOP_VINFO_EXIT_COND (loop_vinfo)))
2197 *relevant_p = true;
2198
2199 /* changing memory. */
2200 if (TREE_CODE (stmt) != PHI_NODE)
2201 if (!ZERO_SSA_OPERANDS (stmt, SSA_OP_VIRTUAL_DEFS))
2202 {
2203 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2204 fprintf (vect_dump, "vec_stmt_relevant_p: stmt has vdefs.");
2205 *relevant_p = true;
2206 }
2207
2208 /* uses outside the loop. */
2209 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
2210 {
2211 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
2212 {
2213 basic_block bb = bb_for_stmt (USE_STMT (use_p));
2214 if (!flow_bb_inside_loop_p (loop, bb))
2215 {
2216 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2217 fprintf (vect_dump, "vec_stmt_relevant_p: used out of loop.");
2218
2219 /* We expect all such uses to be in the loop exit phis
2220 (because of loop closed form) */
2221 gcc_assert (TREE_CODE (USE_STMT (use_p)) == PHI_NODE);
2222 gcc_assert (bb == loop->single_exit->dest);
2223
2224 *live_p = true;
2225 }
2226 }
2227 }
2228
2229 return (*live_p || *relevant_p);
2230 }
2231
2232
2233 /* Function vect_mark_stmts_to_be_vectorized.
2234
2235 Not all stmts in the loop need to be vectorized. For example:
2236
2237 for i...
2238 for j...
2239 1. T0 = i + j
2240 2. T1 = a[T0]
2241
2242 3. j = j + 1
2243
2244 Stmt 1 and 3 do not need to be vectorized, because loop control and
2245 addressing of vectorized data-refs are handled differently.
2246
2247 This pass detects such stmts. */
2248
2249 static bool
2250 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
2251 {
2252 VEC(tree,heap) *worklist;
2253 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2254 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
2255 unsigned int nbbs = loop->num_nodes;
2256 block_stmt_iterator si;
2257 tree stmt, use;
2258 stmt_ann_t ann;
2259 ssa_op_iter iter;
2260 unsigned int i;
2261 stmt_vec_info stmt_vinfo;
2262 basic_block bb;
2263 tree phi;
2264 bool relevant_p, live_p;
2265 tree def, def_stmt;
2266 enum vect_def_type dt;
2267
2268 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2269 fprintf (vect_dump, "=== vect_mark_stmts_to_be_vectorized ===");
2270
2271 worklist = VEC_alloc (tree, heap, 64);
2272
2273 /* 1. Init worklist. */
2274
2275 bb = loop->header;
2276 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
2277 {
2278 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2279 {
2280 fprintf (vect_dump, "init: phi relevant? ");
2281 print_generic_expr (vect_dump, phi, TDF_SLIM);
2282 }
2283
2284 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant_p, &live_p))
2285 vect_mark_relevant (&worklist, phi, relevant_p, live_p);
2286 }
2287
2288 for (i = 0; i < nbbs; i++)
2289 {
2290 bb = bbs[i];
2291 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
2292 {
2293 stmt = bsi_stmt (si);
2294
2295 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2296 {
2297 fprintf (vect_dump, "init: stmt relevant? ");
2298 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2299 }
2300
2301 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant_p, &live_p))
2302 vect_mark_relevant (&worklist, stmt, relevant_p, live_p);
2303 }
2304 }
2305
2306
2307 /* 2. Process_worklist */
2308
2309 while (VEC_length (tree, worklist) > 0)
2310 {
2311 stmt = VEC_pop (tree, worklist);
2312
2313 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2314 {
2315 fprintf (vect_dump, "worklist: examine stmt: ");
2316 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2317 }
2318
2319 /* Examine the USEs of STMT. For each ssa-name USE thta is defined
2320 in the loop, mark the stmt that defines it (DEF_STMT) as
2321 relevant/irrelevant and live/dead according to the liveness and
2322 relevance properties of STMT.
2323 */
2324
2325 gcc_assert (TREE_CODE (stmt) != PHI_NODE);
2326
2327 ann = stmt_ann (stmt);
2328 stmt_vinfo = vinfo_for_stmt (stmt);
2329
2330 relevant_p = STMT_VINFO_RELEVANT_P (stmt_vinfo);
2331 live_p = STMT_VINFO_LIVE_P (stmt_vinfo);
2332
2333 /* Generally, the liveness and relevance properties of STMT are
2334 propagated to the DEF_STMTs of its USEs:
2335 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
2336 STMT_VINFO_RELEVANT_P (DEF_STMT_info) <-- relevant_p
2337
2338 Exceptions:
2339
2340 - if USE is used only for address computations (e.g. array indexing),
2341 which does not need to be directly vectorized, then the
2342 liveness/relevance of the respective DEF_STMT is left unchanged.
2343
2344 - if STMT has been identified as defining a reduction variable, then:
2345 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- false
2346 STMT_VINFO_RELEVANT_P (DEF_STMT_info) <-- true
2347 because even though STMT is classified as live (since it defines a
2348 value that is used across loop iterations) and irrelevant (since it
2349 is not used inside the loop), it will be vectorized, and therefore
2350 the corresponding DEF_STMTs need to marked as relevant.
2351 */
2352
2353 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def)
2354 {
2355 gcc_assert (!relevant_p && live_p);
2356 relevant_p = true;
2357 live_p = false;
2358 }
2359
2360 FOR_EACH_SSA_TREE_OPERAND (use, stmt, iter, SSA_OP_USE)
2361 {
2362 /* We are only interested in uses that need to be vectorized. Uses
2363 that are used for address computation are not considered relevant.
2364 */
2365 if (exist_non_indexing_operands_for_use_p (use, stmt))
2366 {
2367 if (!vect_is_simple_use (use, loop_vinfo, &def_stmt, &def, &dt))
2368 {
2369 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
2370 LOOP_LOC (loop_vinfo)))
2371 fprintf (vect_dump,
2372 "not vectorized: unsupported use in stmt.");
2373 VEC_free (tree, heap, worklist);
2374 return false;
2375 }
2376
2377 if (!def_stmt || IS_EMPTY_STMT (def_stmt))
2378 continue;
2379
2380 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2381 {
2382 fprintf (vect_dump, "worklist: examine use %d: ", i);
2383 print_generic_expr (vect_dump, use, TDF_SLIM);
2384 }
2385
2386 bb = bb_for_stmt (def_stmt);
2387 if (!flow_bb_inside_loop_p (loop, bb))
2388 continue;
2389
2390 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2391 {
2392 fprintf (vect_dump, "def_stmt: ");
2393 print_generic_expr (vect_dump, def_stmt, TDF_SLIM);
2394 }
2395
2396 vect_mark_relevant (&worklist, def_stmt, relevant_p, live_p);
2397 }
2398 }
2399 } /* while worklist */
2400
2401 VEC_free (tree, heap, worklist);
2402 return true;
2403 }
2404
2405
2406 /* Function vect_can_advance_ivs_p
2407
2408 In case the number of iterations that LOOP iterates in unknown at compile
2409 time, an epilog loop will be generated, and the loop induction variables
2410 (IVs) will be "advanced" to the value they are supposed to take just before
2411 the epilog loop. Here we check that the access function of the loop IVs
2412 and the expression that represents the loop bound are simple enough.
2413 These restrictions will be relaxed in the future. */
2414
2415 static bool
2416 vect_can_advance_ivs_p (loop_vec_info loop_vinfo)
2417 {
2418 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2419 basic_block bb = loop->header;
2420 tree phi;
2421
2422 /* Analyze phi functions of the loop header. */
2423
2424 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2425 fprintf (vect_dump, "=== vect_can_advance_ivs_p ===");
2426
2427 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
2428 {
2429 tree access_fn = NULL;
2430 tree evolution_part;
2431
2432 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2433 {
2434 fprintf (vect_dump, "Analyze phi: ");
2435 print_generic_expr (vect_dump, phi, TDF_SLIM);
2436 }
2437
2438 /* Skip virtual phi's. The data dependences that are associated with
2439 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
2440
2441 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
2442 {
2443 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2444 fprintf (vect_dump, "virtual phi. skip.");
2445 continue;
2446 }
2447
2448 /* Analyze the evolution function. */
2449
2450 access_fn = instantiate_parameters
2451 (loop, analyze_scalar_evolution (loop, PHI_RESULT (phi)));
2452
2453 if (!access_fn)
2454 {
2455 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2456 fprintf (vect_dump, "No Access function.");
2457 return false;
2458 }
2459
2460 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2461 {
2462 fprintf (vect_dump, "Access function of PHI: ");
2463 print_generic_expr (vect_dump, access_fn, TDF_SLIM);
2464 }
2465
2466 evolution_part = evolution_part_in_loop_num (access_fn, loop->num);
2467
2468 if (evolution_part == NULL_TREE)
2469 {
2470 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2471 fprintf (vect_dump, "No evolution.");
2472 return false;
2473 }
2474
2475 /* FORNOW: We do not transform initial conditions of IVs
2476 which evolution functions are a polynomial of degree >= 2. */
2477
2478 if (tree_is_chrec (evolution_part))
2479 return false;
2480 }
2481
2482 return true;
2483 }
2484
2485
2486 /* Function vect_get_loop_niters.
2487
2488 Determine how many iterations the loop is executed.
2489 If an expression that represents the number of iterations
2490 can be constructed, place it in NUMBER_OF_ITERATIONS.
2491 Return the loop exit condition. */
2492
2493 static tree
2494 vect_get_loop_niters (struct loop *loop, tree *number_of_iterations)
2495 {
2496 tree niters;
2497
2498 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2499 fprintf (vect_dump, "=== get_loop_niters ===");
2500
2501 niters = number_of_iterations_in_loop (loop);
2502
2503 if (niters != NULL_TREE
2504 && niters != chrec_dont_know)
2505 {
2506 *number_of_iterations = niters;
2507
2508 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2509 {
2510 fprintf (vect_dump, "==> get_loop_niters:" );
2511 print_generic_expr (vect_dump, *number_of_iterations, TDF_SLIM);
2512 }
2513 }
2514
2515 return get_loop_exit_condition (loop);
2516 }
2517
2518
2519 /* Function vect_analyze_loop_form.
2520
2521 Verify the following restrictions (some may be relaxed in the future):
2522 - it's an inner-most loop
2523 - number of BBs = 2 (which are the loop header and the latch)
2524 - the loop has a pre-header
2525 - the loop has a single entry and exit
2526 - the loop exit condition is simple enough, and the number of iterations
2527 can be analyzed (a countable loop). */
2528
2529 static loop_vec_info
2530 vect_analyze_loop_form (struct loop *loop)
2531 {
2532 loop_vec_info loop_vinfo;
2533 tree loop_cond;
2534 tree number_of_iterations = NULL;
2535 LOC loop_loc;
2536
2537 loop_loc = find_loop_location (loop);
2538
2539 if (vect_print_dump_info (REPORT_DETAILS, loop_loc))
2540 fprintf (vect_dump, "=== vect_analyze_loop_form ===");
2541
2542 if (loop->inner)
2543 {
2544 if (vect_print_dump_info (REPORT_OUTER_LOOPS, loop_loc))
2545 fprintf (vect_dump, "not vectorized: nested loop.");
2546 return NULL;
2547 }
2548
2549 if (!loop->single_exit
2550 || loop->num_nodes != 2
2551 || EDGE_COUNT (loop->header->preds) != 2)
2552 {
2553 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS, loop_loc))
2554 {
2555 if (!loop->single_exit)
2556 fprintf (vect_dump, "not vectorized: multiple exits.");
2557 else if (loop->num_nodes != 2)
2558 fprintf (vect_dump, "not vectorized: too many BBs in loop.");
2559 else if (EDGE_COUNT (loop->header->preds) != 2)
2560 fprintf (vect_dump, "not vectorized: too many incoming edges.");
2561 }
2562
2563 return NULL;
2564 }
2565
2566 /* We assume that the loop exit condition is at the end of the loop. i.e,
2567 that the loop is represented as a do-while (with a proper if-guard
2568 before the loop if needed), where the loop header contains all the
2569 executable statements, and the latch is empty. */
2570 if (!empty_block_p (loop->latch))
2571 {
2572 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS, loop_loc))
2573 fprintf (vect_dump, "not vectorized: unexpected loop form.");
2574 return NULL;
2575 }
2576
2577 /* Make sure there exists a single-predecessor exit bb: */
2578 if (!single_pred_p (loop->single_exit->dest))
2579 {
2580 edge e = loop->single_exit;
2581 if (!(e->flags & EDGE_ABNORMAL))
2582 {
2583 split_loop_exit_edge (e);
2584 if (vect_print_dump_info (REPORT_DETAILS, loop_loc))
2585 fprintf (vect_dump, "split exit edge.");
2586 }
2587 else
2588 {
2589 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS, loop_loc))
2590 fprintf (vect_dump, "not vectorized: abnormal loop exit edge.");
2591 return NULL;
2592 }
2593 }
2594
2595 if (empty_block_p (loop->header))
2596 {
2597 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS, loop_loc))
2598 fprintf (vect_dump, "not vectorized: empty loop.");
2599 return NULL;
2600 }
2601
2602 loop_cond = vect_get_loop_niters (loop, &number_of_iterations);
2603 if (!loop_cond)
2604 {
2605 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS, loop_loc))
2606 fprintf (vect_dump, "not vectorized: complicated exit condition.");
2607 return NULL;
2608 }
2609
2610 if (!number_of_iterations)
2611 {
2612 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS, loop_loc))
2613 fprintf (vect_dump,
2614 "not vectorized: number of iterations cannot be computed.");
2615 return NULL;
2616 }
2617
2618 if (chrec_contains_undetermined (number_of_iterations))
2619 {
2620 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS, loop_loc))
2621 fprintf (vect_dump, "Infinite number of iterations.");
2622 return false;
2623 }
2624
2625 loop_vinfo = new_loop_vec_info (loop);
2626 LOOP_VINFO_NITERS (loop_vinfo) = number_of_iterations;
2627
2628 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
2629 {
2630 if (vect_print_dump_info (REPORT_DETAILS, loop_loc))
2631 {
2632 fprintf (vect_dump, "Symbolic number of iterations is ");
2633 print_generic_expr (vect_dump, number_of_iterations, TDF_DETAILS);
2634 }
2635 }
2636 else
2637 if (LOOP_VINFO_INT_NITERS (loop_vinfo) == 0)
2638 {
2639 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS, loop_loc))
2640 fprintf (vect_dump, "not vectorized: number of iterations = 0.");
2641 return NULL;
2642 }
2643
2644 LOOP_VINFO_EXIT_COND (loop_vinfo) = loop_cond;
2645 LOOP_VINFO_LOC (loop_vinfo) = loop_loc;
2646
2647 return loop_vinfo;
2648 }
2649
2650
2651 /* Function vect_analyze_loop.
2652
2653 Apply a set of analyses on LOOP, and create a loop_vec_info struct
2654 for it. The different analyses will record information in the
2655 loop_vec_info struct. */
2656 loop_vec_info
2657 vect_analyze_loop (struct loop *loop)
2658 {
2659 bool ok;
2660 loop_vec_info loop_vinfo;
2661
2662 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2663 fprintf (vect_dump, "===== analyze_loop_nest =====");
2664
2665 /* Check the CFG characteristics of the loop (nesting, entry/exit, etc. */
2666
2667 loop_vinfo = vect_analyze_loop_form (loop);
2668 if (!loop_vinfo)
2669 {
2670 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2671 fprintf (vect_dump, "bad loop form.");
2672 return NULL;
2673 }
2674
2675 /* Find all data references in the loop (which correspond to vdefs/vuses)
2676 and analyze their evolution in the loop.
2677
2678 FORNOW: Handle only simple, array references, which
2679 alignment can be forced, and aligned pointer-references. */
2680
2681 ok = vect_analyze_data_refs (loop_vinfo);
2682 if (!ok)
2683 {
2684 if (vect_print_dump_info (REPORT_DETAILS, LOOP_LOC (loop_vinfo)))
2685 fprintf (vect_dump, "bad data references.");
2686 destroy_loop_vec_info (loop_vinfo);
2687 return NULL;
2688 }
2689
2690 /* Classify all cross-iteration scalar data-flow cycles.
2691 Cross-iteration cycles caused by virtual phis are analyzed separately. */
2692
2693 vect_analyze_scalar_cycles (loop_vinfo);
2694
2695 /* Data-flow analysis to detect stmts that do not need to be vectorized. */
2696
2697 ok = vect_mark_stmts_to_be_vectorized (loop_vinfo);
2698 if (!ok)
2699 {
2700 if (vect_print_dump_info (REPORT_DETAILS, LOOP_LOC (loop_vinfo)))
2701 fprintf (vect_dump, "unexpected pattern.");
2702 destroy_loop_vec_info (loop_vinfo);
2703 return NULL;
2704 }
2705
2706 ok = vect_determine_vectorization_factor (loop_vinfo);
2707 if (!ok)
2708 {
2709 if (vect_print_dump_info (REPORT_DETAILS, LOOP_LOC (loop_vinfo)))
2710 fprintf (vect_dump, "can't determine vectorization factor.");
2711 destroy_loop_vec_info (loop_vinfo);
2712 return NULL;
2713 }
2714
2715 /* Analyze data dependences between the data-refs in the loop.
2716 FORNOW: fail at the first data dependence that we encounter. */
2717
2718 ok = vect_analyze_data_ref_dependences (loop_vinfo);
2719 if (!ok)
2720 {
2721 if (vect_print_dump_info (REPORT_DETAILS, LOOP_LOC (loop_vinfo)))
2722 fprintf (vect_dump, "bad data dependence.");
2723 destroy_loop_vec_info (loop_vinfo);
2724 return NULL;
2725 }
2726
2727 /* Analyze the access patterns of the data-refs in the loop (consecutive,
2728 complex, etc.). FORNOW: Only handle consecutive access pattern. */
2729
2730 ok = vect_analyze_data_ref_accesses (loop_vinfo);
2731 if (!ok)
2732 {
2733 if (vect_print_dump_info (REPORT_DETAILS, LOOP_LOC (loop_vinfo)))
2734 fprintf (vect_dump, "bad data access.");
2735 destroy_loop_vec_info (loop_vinfo);
2736 return NULL;
2737 }
2738
2739 /* Analyze the alignment of the data-refs in the loop.
2740 FORNOW: Only aligned accesses are handled. */
2741
2742 ok = vect_analyze_data_refs_alignment (loop_vinfo);
2743 if (!ok)
2744 {
2745 if (vect_print_dump_info (REPORT_DETAILS, LOOP_LOC (loop_vinfo)))
2746 fprintf (vect_dump, "bad data alignment.");
2747 destroy_loop_vec_info (loop_vinfo);
2748 return NULL;
2749 }
2750
2751 /* Scan all the operations in the loop and make sure they are
2752 vectorizable. */
2753
2754 ok = vect_analyze_operations (loop_vinfo);
2755 if (!ok)
2756 {
2757 if (vect_print_dump_info (REPORT_DETAILS, LOOP_LOC (loop_vinfo)))
2758 fprintf (vect_dump, "bad operation or unsupported loop bound.");
2759 destroy_loop_vec_info (loop_vinfo);
2760 return NULL;
2761 }
2762
2763 LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1;
2764
2765 return loop_vinfo;
2766 }