tree-vect-analyze.c (vect_enhance_data_refs_alignment): Copy UNITS_PER_SIMD_WORD...
[gcc.git] / gcc / tree-vect-analyze.c
1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2003,2004,2005 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
10 version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to the Free
19 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
20 02111-1307, USA. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "errors.h"
27 #include "ggc.h"
28 #include "tree.h"
29 #include "basic-block.h"
30 #include "diagnostic.h"
31 #include "tree-flow.h"
32 #include "tree-dump.h"
33 #include "timevar.h"
34 #include "cfgloop.h"
35 #include "expr.h"
36 #include "optabs.h"
37 #include "tree-chrec.h"
38 #include "tree-data-ref.h"
39 #include "tree-scalar-evolution.h"
40 #include "tree-vectorizer.h"
41
42 /* Main analysis functions. */
43 static loop_vec_info vect_analyze_loop_form (struct loop *);
44 static bool vect_analyze_data_refs (loop_vec_info);
45 static bool vect_mark_stmts_to_be_vectorized (loop_vec_info);
46 static bool vect_analyze_scalar_cycles (loop_vec_info);
47 static bool vect_analyze_data_ref_accesses (loop_vec_info);
48 static bool vect_analyze_data_ref_dependences (loop_vec_info);
49 static bool vect_analyze_data_refs_alignment (loop_vec_info);
50 static bool vect_compute_data_refs_alignment (loop_vec_info);
51 static void vect_enhance_data_refs_alignment (loop_vec_info);
52 static bool vect_analyze_operations (loop_vec_info);
53 static bool vect_determine_vectorization_factor (loop_vec_info);
54
55 /* Utility functions for the analyses. */
56 static bool exist_non_indexing_operands_for_use_p (tree, tree);
57 static void vect_mark_relevant (varray_type *, tree);
58 static bool vect_stmt_relevant_p (tree, loop_vec_info);
59 static tree vect_get_loop_niters (struct loop *, tree *);
60 static bool vect_analyze_data_ref_dependence
61 (struct data_reference *, struct data_reference *, loop_vec_info);
62 static bool vect_compute_data_ref_alignment (struct data_reference *);
63 static bool vect_analyze_data_ref_access (struct data_reference *);
64 static struct data_reference * vect_analyze_pointer_ref_access
65 (tree, tree, bool, tree, tree *, tree *);
66 static bool vect_can_advance_ivs_p (loop_vec_info);
67 static tree vect_get_ptr_offset (tree, tree, tree *);
68 static bool vect_analyze_offset_expr (tree, struct loop *, tree, tree *,
69 tree *, tree *);
70 static bool vect_base_addr_differ_p (struct data_reference *,
71 struct data_reference *drb, bool *);
72 static tree vect_object_analysis (tree, tree, bool, tree,
73 struct data_reference **, tree *, tree *,
74 tree *, bool *, tree *, subvar_t *);
75 static tree vect_address_analysis (tree, tree, bool, tree,
76 struct data_reference *, tree *, tree *,
77 tree *, bool *);
78
79
80 /* Function vect_get_ptr_offset
81
82 Compute the OFFSET modulo vector-type alignment of pointer REF in bits. */
83
84 static tree
85 vect_get_ptr_offset (tree ref ATTRIBUTE_UNUSED,
86 tree vectype ATTRIBUTE_UNUSED,
87 tree *offset ATTRIBUTE_UNUSED)
88 {
89 /* TODO: Use alignment information. */
90 return NULL_TREE;
91 }
92
93
94 /* Function vect_analyze_offset_expr
95
96 Given an offset expression EXPR received from get_inner_reference, analyze
97 it and create an expression for INITIAL_OFFSET by substituting the variables
98 of EXPR with initial_condition of the corresponding access_fn in the loop.
99 E.g.,
100 for i
101 for (j = 3; j < N; j++)
102 a[j].b[i][j] = 0;
103
104 For a[j].b[i][j], EXPR will be 'i * C_i + j * C_j + C'. 'i' cannot be
105 substituted, since its access_fn in the inner loop is i. 'j' will be
106 substituted with 3. An INITIAL_OFFSET will be 'i * C_i + C`', where
107 C` = 3 * C_j + C.
108
109 Compute MISALIGN (the misalignment of the data reference initial access from
110 its base) if possible. Misalignment can be calculated only if all the
111 variables can be substituted with constants, or if a variable is multiplied
112 by a multiple of VECTYPE_ALIGNMENT. In the above example, since 'i' cannot
113 be substituted, MISALIGN will be NULL_TREE in case that C_i is not a multiple
114 of VECTYPE_ALIGNMENT, and C` otherwise. (We perform MISALIGN modulo
115 VECTYPE_ALIGNMENT computation in the caller of this function).
116
117 STEP is an evolution of the data reference in this loop in bytes.
118 In the above example, STEP is C_j.
119
120 Return FALSE, if the analysis fails, e.g., there is no access_fn for a
121 variable. In this case, all the outputs (INITIAL_OFFSET, MISALIGN and STEP)
122 are NULL_TREEs. Otherwise, return TRUE.
123
124 */
125
126 static bool
127 vect_analyze_offset_expr (tree expr,
128 struct loop *loop,
129 tree vectype_alignment,
130 tree *initial_offset,
131 tree *misalign,
132 tree *step)
133 {
134 tree oprnd0;
135 tree oprnd1;
136 tree left_offset = ssize_int (0);
137 tree right_offset = ssize_int (0);
138 tree left_misalign = ssize_int (0);
139 tree right_misalign = ssize_int (0);
140 tree left_step = ssize_int (0);
141 tree right_step = ssize_int (0);
142 enum tree_code code;
143 tree init, evolution;
144
145 *step = NULL_TREE;
146 *misalign = NULL_TREE;
147 *initial_offset = NULL_TREE;
148
149 /* Strip conversions that don't narrow the mode. */
150 expr = vect_strip_conversion (expr);
151 if (!expr)
152 return false;
153
154 /* Stop conditions:
155 1. Constant. */
156 if (TREE_CODE (expr) == INTEGER_CST)
157 {
158 *initial_offset = fold_convert (ssizetype, expr);
159 *misalign = fold_convert (ssizetype, expr);
160 *step = ssize_int (0);
161 return true;
162 }
163
164 /* 2. Variable. Try to substitute with initial_condition of the corresponding
165 access_fn in the current loop. */
166 if (SSA_VAR_P (expr))
167 {
168 tree access_fn = analyze_scalar_evolution (loop, expr);
169
170 if (access_fn == chrec_dont_know)
171 /* No access_fn. */
172 return false;
173
174 init = initial_condition_in_loop_num (access_fn, loop->num);
175 if (init == expr && !expr_invariant_in_loop_p (loop, init))
176 /* Not enough information: may be not loop invariant.
177 E.g., for a[b[i]], we get a[D], where D=b[i]. EXPR is D, its
178 initial_condition is D, but it depends on i - loop's induction
179 variable. */
180 return false;
181
182 evolution = evolution_part_in_loop_num (access_fn, loop->num);
183 if (evolution && TREE_CODE (evolution) != INTEGER_CST)
184 /* Evolution is not constant. */
185 return false;
186
187 if (TREE_CODE (init) == INTEGER_CST)
188 *misalign = fold_convert (ssizetype, init);
189 else
190 /* Not constant, misalignment cannot be calculated. */
191 *misalign = NULL_TREE;
192
193 *initial_offset = fold_convert (ssizetype, init);
194
195 *step = evolution ? fold_convert (ssizetype, evolution) : ssize_int (0);
196 return true;
197 }
198
199 /* Recursive computation. */
200 if (!BINARY_CLASS_P (expr))
201 {
202 /* We expect to get binary expressions (PLUS/MINUS and MULT). */
203 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
204 {
205 fprintf (vect_dump, "Not binary expression ");
206 print_generic_expr (vect_dump, expr, TDF_SLIM);
207 }
208 return false;
209 }
210 oprnd0 = TREE_OPERAND (expr, 0);
211 oprnd1 = TREE_OPERAND (expr, 1);
212
213 if (!vect_analyze_offset_expr (oprnd0, loop, vectype_alignment, &left_offset,
214 &left_misalign, &left_step)
215 || !vect_analyze_offset_expr (oprnd1, loop, vectype_alignment,
216 &right_offset, &right_misalign, &right_step))
217 return false;
218
219 /* The type of the operation: plus, minus or mult. */
220 code = TREE_CODE (expr);
221 switch (code)
222 {
223 case MULT_EXPR:
224 if (TREE_CODE (right_offset) != INTEGER_CST)
225 /* RIGHT_OFFSET can be not constant. For example, for arrays of variable
226 sized types.
227 FORNOW: We don't support such cases. */
228 return false;
229
230 /* Strip conversions that don't narrow the mode. */
231 left_offset = vect_strip_conversion (left_offset);
232 if (!left_offset)
233 return false;
234 /* Misalignment computation. */
235 if (SSA_VAR_P (left_offset))
236 {
237 /* If the left side contains variables that can't be substituted with
238 constants, we check if the right side is a multiple of ALIGNMENT.
239 */
240 if (integer_zerop (size_binop (TRUNC_MOD_EXPR, right_offset,
241 fold_convert (ssizetype, vectype_alignment))))
242 *misalign = ssize_int (0);
243 else
244 /* If the remainder is not zero or the right side isn't constant,
245 we can't compute misalignment. */
246 *misalign = NULL_TREE;
247 }
248 else
249 {
250 /* The left operand was successfully substituted with constant. */
251 if (left_misalign)
252 /* In case of EXPR '(i * C1 + j) * C2', LEFT_MISALIGN is
253 NULL_TREE. */
254 *misalign = size_binop (code, left_misalign, right_misalign);
255 else
256 *misalign = NULL_TREE;
257 }
258
259 /* Step calculation. */
260 /* Multiply the step by the right operand. */
261 *step = size_binop (MULT_EXPR, left_step, right_offset);
262 break;
263
264 case PLUS_EXPR:
265 case MINUS_EXPR:
266 /* Combine the recursive calculations for step and misalignment. */
267 *step = size_binop (code, left_step, right_step);
268
269 if (left_misalign && right_misalign)
270 *misalign = size_binop (code, left_misalign, right_misalign);
271 else
272 *misalign = NULL_TREE;
273
274 break;
275
276 default:
277 gcc_unreachable ();
278 }
279
280 /* Compute offset. */
281 *initial_offset = fold_convert (ssizetype,
282 fold (build2 (code, TREE_TYPE (left_offset),
283 left_offset,
284 right_offset)));
285 return true;
286 }
287
288
289 /* Function vect_determine_vectorization_factor
290
291 Determine the vectorization factor (VF). VF is the number of data elements
292 that are operated upon in parallel in a single iteration of the vectorized
293 loop. For example, when vectorizing a loop that operates on 4byte elements,
294 on a target with vector size (VS) 16byte, the VF is set to 4, since 4
295 elements can fit in a single vector register.
296
297 We currently support vectorization of loops in which all types operated upon
298 are of the same size. Therefore this function currently sets VF according to
299 the size of the types operated upon, and fails if there are multiple sizes
300 in the loop.
301
302 VF is also the factor by which the loop iterations are strip-mined, e.g.:
303 original loop:
304 for (i=0; i<N; i++){
305 a[i] = b[i] + c[i];
306 }
307
308 vectorized loop:
309 for (i=0; i<N; i+=VF){
310 a[i:VF] = b[i:VF] + c[i:VF];
311 }
312 */
313
314 static bool
315 vect_determine_vectorization_factor (loop_vec_info loop_vinfo)
316 {
317 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
318 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
319 int nbbs = loop->num_nodes;
320 block_stmt_iterator si;
321 unsigned int vectorization_factor = 0;
322 int i;
323 tree scalar_type;
324
325 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
326 fprintf (vect_dump, "=== vect_determine_vectorization_factor ===");
327
328 for (i = 0; i < nbbs; i++)
329 {
330 basic_block bb = bbs[i];
331
332 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
333 {
334 tree stmt = bsi_stmt (si);
335 unsigned int nunits;
336 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
337 tree vectype;
338
339 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
340 {
341 fprintf (vect_dump, "==> examining statement: ");
342 print_generic_expr (vect_dump, stmt, TDF_SLIM);
343 }
344
345 gcc_assert (stmt_info);
346 /* skip stmts which do not need to be vectorized. */
347 if (!STMT_VINFO_RELEVANT_P (stmt_info))
348 continue;
349
350 if (VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt))))
351 {
352 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
353 LOOP_LOC (loop_vinfo)))
354 {
355 fprintf (vect_dump, "not vectorized: vector stmt in loop:");
356 print_generic_expr (vect_dump, stmt, TDF_SLIM);
357 }
358 return false;
359 }
360
361 if (STMT_VINFO_DATA_REF (stmt_info))
362 scalar_type = TREE_TYPE (DR_REF (STMT_VINFO_DATA_REF (stmt_info)));
363 else if (TREE_CODE (stmt) == MODIFY_EXPR)
364 scalar_type = TREE_TYPE (TREE_OPERAND (stmt, 0));
365 else
366 scalar_type = TREE_TYPE (stmt);
367
368 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
369 {
370 fprintf (vect_dump, "get vectype for scalar type: ");
371 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
372 }
373
374 vectype = get_vectype_for_scalar_type (scalar_type);
375 if (!vectype)
376 {
377 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
378 LOOP_LOC (loop_vinfo)))
379 {
380 fprintf (vect_dump, "not vectorized: unsupported data-type ");
381 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
382 }
383 return false;
384 }
385 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
386 {
387 fprintf (vect_dump, "vectype: ");
388 print_generic_expr (vect_dump, vectype, TDF_SLIM);
389 }
390 STMT_VINFO_VECTYPE (stmt_info) = vectype;
391
392 nunits = GET_MODE_NUNITS (TYPE_MODE (vectype));
393 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
394 fprintf (vect_dump, "nunits = %d", nunits);
395
396 if (vectorization_factor)
397 {
398 /* FORNOW: don't allow mixed units.
399 This restriction will be relaxed in the future. */
400 if (nunits != vectorization_factor)
401 {
402 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
403 LOOP_LOC (loop_vinfo)))
404 fprintf (vect_dump, "not vectorized: mixed data-types");
405 return false;
406 }
407 }
408 else
409 vectorization_factor = nunits;
410
411 #ifdef ENABLE_CHECKING
412 gcc_assert (GET_MODE_SIZE (TYPE_MODE (scalar_type))
413 * vectorization_factor == UNITS_PER_SIMD_WORD);
414 #endif
415 }
416 }
417
418 /* TODO: Analyze cost. Decide if worth while to vectorize. */
419
420 if (vectorization_factor <= 1)
421 {
422 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
423 LOOP_LOC (loop_vinfo)))
424 fprintf (vect_dump, "not vectorized: unsupported data-type");
425 return false;
426 }
427 LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
428
429 return true;
430 }
431
432
433 /* Function vect_analyze_operations.
434
435 Scan the loop stmts and make sure they are all vectorizable. */
436
437 static bool
438 vect_analyze_operations (loop_vec_info loop_vinfo)
439 {
440 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
441 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
442 int nbbs = loop->num_nodes;
443 block_stmt_iterator si;
444 unsigned int vectorization_factor = 0;
445 int i;
446 bool ok;
447
448 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
449 fprintf (vect_dump, "=== vect_analyze_operations ===");
450
451 gcc_assert (LOOP_VINFO_VECT_FACTOR (loop_vinfo));
452 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
453
454 for (i = 0; i < nbbs; i++)
455 {
456 basic_block bb = bbs[i];
457
458 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
459 {
460 tree stmt = bsi_stmt (si);
461 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
462
463 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
464 {
465 fprintf (vect_dump, "==> examining statement: ");
466 print_generic_expr (vect_dump, stmt, TDF_SLIM);
467 }
468
469 gcc_assert (stmt_info);
470
471 /* skip stmts which do not need to be vectorized.
472 this is expected to include:
473 - the COND_EXPR which is the loop exit condition
474 - any LABEL_EXPRs in the loop
475 - computations that are used only for array indexing or loop
476 control */
477
478 if (!STMT_VINFO_RELEVANT_P (stmt_info))
479 {
480 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
481 fprintf (vect_dump, "irrelevant.");
482 continue;
483 }
484
485 #ifdef ENABLE_CHECKING
486 if (STMT_VINFO_RELEVANT_P (stmt_info))
487 {
488 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt))));
489 gcc_assert (STMT_VINFO_VECTYPE (stmt_info));
490 }
491 #endif
492
493 ok = (vectorizable_operation (stmt, NULL, NULL)
494 || vectorizable_assignment (stmt, NULL, NULL)
495 || vectorizable_load (stmt, NULL, NULL)
496 || vectorizable_store (stmt, NULL, NULL));
497
498 if (!ok)
499 {
500 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
501 LOOP_LOC (loop_vinfo)))
502 {
503 fprintf (vect_dump, "not vectorized: stmt not supported: ");
504 print_generic_expr (vect_dump, stmt, TDF_SLIM);
505 }
506 return false;
507 }
508 }
509 }
510
511 /* TODO: Analyze cost. Decide if worth while to vectorize. */
512
513 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
514 && vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
515 fprintf (vect_dump,
516 "vectorization_factor = %d, niters = " HOST_WIDE_INT_PRINT_DEC,
517 vectorization_factor, LOOP_VINFO_INT_NITERS (loop_vinfo));
518
519 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
520 && LOOP_VINFO_INT_NITERS (loop_vinfo) < vectorization_factor)
521 {
522 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
523 LOOP_LOC (loop_vinfo)))
524 fprintf (vect_dump, "not vectorized: iteration count too small.");
525 return false;
526 }
527
528 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
529 || LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0)
530 {
531 if (vect_print_dump_info (REPORT_DETAILS, LOOP_LOC (loop_vinfo)))
532 fprintf (vect_dump, "epilog loop required.");
533 if (!vect_can_advance_ivs_p (loop_vinfo))
534 {
535 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
536 LOOP_LOC (loop_vinfo)))
537 fprintf (vect_dump,
538 "not vectorized: can't create epilog loop 1.");
539 return false;
540 }
541 if (!slpeel_can_duplicate_loop_p (loop, loop->single_exit))
542 {
543 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
544 LOOP_LOC (loop_vinfo)))
545 fprintf (vect_dump,
546 "not vectorized: can't create epilog loop 2.");
547 return false;
548 }
549 }
550
551 return true;
552 }
553
554
555 /* Function exist_non_indexing_operands_for_use_p
556
557 USE is one of the uses attached to STMT. Check if USE is
558 used in STMT for anything other than indexing an array. */
559
560 static bool
561 exist_non_indexing_operands_for_use_p (tree use, tree stmt)
562 {
563 tree operand;
564 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
565
566 /* USE corresponds to some operand in STMT. If there is no data
567 reference in STMT, then any operand that corresponds to USE
568 is not indexing an array. */
569 if (!STMT_VINFO_DATA_REF (stmt_info))
570 return true;
571
572 /* STMT has a data_ref. FORNOW this means that its of one of
573 the following forms:
574 -1- ARRAY_REF = var
575 -2- var = ARRAY_REF
576 (This should have been verified in analyze_data_refs).
577
578 'var' in the second case corresponds to a def, not a use,
579 so USE cannot correspond to any operands that are not used
580 for array indexing.
581
582 Therefore, all we need to check is if STMT falls into the
583 first case, and whether var corresponds to USE. */
584
585 if (TREE_CODE (TREE_OPERAND (stmt, 0)) == SSA_NAME)
586 return false;
587
588 operand = TREE_OPERAND (stmt, 1);
589
590 if (TREE_CODE (operand) != SSA_NAME)
591 return false;
592
593 if (operand == use)
594 return true;
595
596 return false;
597 }
598
599
600 /* Function vect_analyze_scalar_cycles.
601
602 Examine the cross iteration def-use cycles of scalar variables, by
603 analyzing the loop (scalar) PHIs; verify that the cross iteration def-use
604 cycles that they represent do not impede vectorization.
605
606 FORNOW: Reduction as in the following loop, is not supported yet:
607 loop1:
608 for (i=0; i<N; i++)
609 sum += a[i];
610 The cross-iteration cycle corresponding to variable 'sum' will be
611 considered too complicated and will impede vectorization.
612
613 FORNOW: Induction as in the following loop, is not supported yet:
614 loop2:
615 for (i=0; i<N; i++)
616 a[i] = i;
617
618 However, the following loop *is* vectorizable:
619 loop3:
620 for (i=0; i<N; i++)
621 a[i] = b[i];
622
623 In both loops there exists a def-use cycle for the variable i:
624 loop: i_2 = PHI (i_0, i_1)
625 a[i_2] = ...;
626 i_1 = i_2 + 1;
627 GOTO loop;
628
629 The evolution of the above cycle is considered simple enough,
630 however, we also check that the cycle does not need to be
631 vectorized, i.e - we check that the variable that this cycle
632 defines is only used for array indexing or in stmts that do not
633 need to be vectorized. This is not the case in loop2, but it
634 *is* the case in loop3. */
635
636 static bool
637 vect_analyze_scalar_cycles (loop_vec_info loop_vinfo)
638 {
639 tree phi;
640 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
641 basic_block bb = loop->header;
642 tree dummy;
643
644 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
645 fprintf (vect_dump, "=== vect_analyze_scalar_cycles ===");
646
647 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
648 {
649 tree access_fn = NULL;
650
651 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
652 {
653 fprintf (vect_dump, "Analyze phi: ");
654 print_generic_expr (vect_dump, phi, TDF_SLIM);
655 }
656
657 /* Skip virtual phi's. The data dependences that are associated with
658 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
659
660 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
661 {
662 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
663 fprintf (vect_dump, "virtual phi. skip.");
664 continue;
665 }
666
667 /* Analyze the evolution function. */
668
669 /* FORNOW: The only scalar cross-iteration cycles that we allow are
670 those of loop induction variables; This property is verified here.
671
672 Furthermore, if that induction variable is used in an operation
673 that needs to be vectorized (i.e, is not solely used to index
674 arrays and check the exit condition) - we do not support its
675 vectorization yet. This property is verified in vect_is_simple_use,
676 during vect_analyze_operations. */
677
678 access_fn = /* instantiate_parameters
679 (loop,*/
680 analyze_scalar_evolution (loop, PHI_RESULT (phi));
681
682 if (!access_fn)
683 {
684 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
685 LOOP_LOC (loop_vinfo)))
686 fprintf (vect_dump, "not vectorized: unsupported scalar cycle.");
687 return false;
688 }
689
690 if (vect_print_dump_info (REPORT_DETAILS,
691 LOOP_LOC (loop_vinfo)))
692 {
693 fprintf (vect_dump, "Access function of PHI: ");
694 print_generic_expr (vect_dump, access_fn, TDF_SLIM);
695 }
696
697 if (!vect_is_simple_iv_evolution (loop->num, access_fn, &dummy, &dummy))
698 {
699 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
700 LOOP_LOC (loop_vinfo)))
701 fprintf (vect_dump, "not vectorized: unsupported scalar cycle.");
702 return false;
703 }
704 }
705
706 return true;
707 }
708
709
710 /* Function vect_base_addr_differ_p.
711
712 This is the simplest data dependence test: determines whether the
713 data references A and B access the same array/region. Returns
714 false when the property is not computable at compile time.
715 Otherwise return true, and DIFFER_P will record the result. This
716 utility will not be necessary when alias_sets_conflict_p will be
717 less conservative. */
718
719 static bool
720 vect_base_addr_differ_p (struct data_reference *dra,
721 struct data_reference *drb,
722 bool *differ_p)
723 {
724 tree stmt_a = DR_STMT (dra);
725 stmt_vec_info stmt_info_a = vinfo_for_stmt (stmt_a);
726 tree stmt_b = DR_STMT (drb);
727 stmt_vec_info stmt_info_b = vinfo_for_stmt (stmt_b);
728 tree addr_a = STMT_VINFO_VECT_DR_BASE_ADDRESS (stmt_info_a);
729 tree addr_b = STMT_VINFO_VECT_DR_BASE_ADDRESS (stmt_info_b);
730 tree type_a = TREE_TYPE (addr_a);
731 tree type_b = TREE_TYPE (addr_b);
732 HOST_WIDE_INT alias_set_a, alias_set_b;
733
734 gcc_assert (POINTER_TYPE_P (type_a) && POINTER_TYPE_P (type_b));
735
736 /* Both references are ADDR_EXPR, i.e., we have the objects. */
737 if (TREE_CODE (addr_a) == ADDR_EXPR && TREE_CODE (addr_b) == ADDR_EXPR)
738 return array_base_name_differ_p (dra, drb, differ_p);
739
740 alias_set_a = (TREE_CODE (addr_a) == ADDR_EXPR) ?
741 get_alias_set (TREE_OPERAND (addr_a, 0)) : get_alias_set (addr_a);
742 alias_set_b = (TREE_CODE (addr_b) == ADDR_EXPR) ?
743 get_alias_set (TREE_OPERAND (addr_b, 0)) : get_alias_set (addr_b);
744
745 if (!alias_sets_conflict_p (alias_set_a, alias_set_b))
746 {
747 *differ_p = true;
748 return true;
749 }
750
751 /* An instruction writing through a restricted pointer is "independent" of any
752 instruction reading or writing through a different pointer, in the same
753 block/scope. */
754 else if ((TYPE_RESTRICT (type_a) && !DR_IS_READ (dra))
755 || (TYPE_RESTRICT (type_b) && !DR_IS_READ (drb)))
756 {
757 *differ_p = true;
758 return true;
759 }
760 return false;
761 }
762
763
764 /* Function vect_analyze_data_ref_dependence.
765
766 Return TRUE if there (might) exist a dependence between a memory-reference
767 DRA and a memory-reference DRB. */
768
769 static bool
770 vect_analyze_data_ref_dependence (struct data_reference *dra,
771 struct data_reference *drb,
772 loop_vec_info loop_vinfo)
773 {
774 bool differ_p;
775 struct data_dependence_relation *ddr;
776
777 if (!vect_base_addr_differ_p (dra, drb, &differ_p))
778 {
779 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
780 LOOP_LOC (loop_vinfo)))
781 {
782 fprintf (vect_dump,
783 "not vectorized: can't determine dependence between: ");
784 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
785 fprintf (vect_dump, " and ");
786 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
787 }
788 return true;
789 }
790
791 if (differ_p)
792 return false;
793
794 ddr = initialize_data_dependence_relation (dra, drb);
795 compute_affine_dependence (ddr);
796
797 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
798 return false;
799
800 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
801 LOOP_LOC (loop_vinfo)))
802 {
803 fprintf (vect_dump,
804 "not vectorized: possible dependence between data-refs ");
805 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
806 fprintf (vect_dump, " and ");
807 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
808 }
809
810 return true;
811 }
812
813
814 /* Function vect_analyze_data_ref_dependences.
815
816 Examine all the data references in the loop, and make sure there do not
817 exist any data dependences between them.
818
819 TODO: dependences which distance is greater than the vectorization factor
820 can be ignored. */
821
822 static bool
823 vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo)
824 {
825 unsigned int i, j;
826 varray_type loop_write_refs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
827 varray_type loop_read_refs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
828
829 /* Examine store-store (output) dependences. */
830
831 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
832 fprintf (vect_dump, "=== vect_analyze_dependences ===");
833
834 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
835 fprintf (vect_dump, "compare all store-store pairs.");
836
837 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_refs); i++)
838 {
839 for (j = i + 1; j < VARRAY_ACTIVE_SIZE (loop_write_refs); j++)
840 {
841 struct data_reference *dra =
842 VARRAY_GENERIC_PTR (loop_write_refs, i);
843 struct data_reference *drb =
844 VARRAY_GENERIC_PTR (loop_write_refs, j);
845 if (vect_analyze_data_ref_dependence (dra, drb, loop_vinfo))
846 return false;
847 }
848 }
849
850 /* Examine load-store (true/anti) dependences. */
851
852 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
853 fprintf (vect_dump, "compare all load-store pairs.");
854
855 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_refs); i++)
856 {
857 for (j = 0; j < VARRAY_ACTIVE_SIZE (loop_write_refs); j++)
858 {
859 struct data_reference *dra = VARRAY_GENERIC_PTR (loop_read_refs, i);
860 struct data_reference *drb =
861 VARRAY_GENERIC_PTR (loop_write_refs, j);
862 if (vect_analyze_data_ref_dependence (dra, drb, loop_vinfo))
863 return false;
864 }
865 }
866
867 return true;
868 }
869
870
871 /* Function vect_compute_data_ref_alignment
872
873 Compute the misalignment of the data reference DR.
874
875 Output:
876 1. If during the misalignment computation it is found that the data reference
877 cannot be vectorized then false is returned.
878 2. DR_MISALIGNMENT (DR) is defined.
879
880 FOR NOW: No analysis is actually performed. Misalignment is calculated
881 only for trivial cases. TODO. */
882
883 static bool
884 vect_compute_data_ref_alignment (struct data_reference *dr)
885 {
886 tree stmt = DR_STMT (dr);
887 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
888 tree ref = DR_REF (dr);
889 tree vectype;
890 tree base, alignment;
891 bool base_aligned_p;
892 tree misalign;
893
894 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
895 fprintf (vect_dump, "vect_compute_data_ref_alignment:");
896
897 /* Initialize misalignment to unknown. */
898 DR_MISALIGNMENT (dr) = -1;
899
900 misalign = STMT_VINFO_VECT_MISALIGNMENT (stmt_info);
901 base_aligned_p = STMT_VINFO_VECT_BASE_ALIGNED_P (stmt_info);
902 base = build_fold_indirect_ref (STMT_VINFO_VECT_DR_BASE_ADDRESS (stmt_info));
903 vectype = STMT_VINFO_VECTYPE (stmt_info);
904
905 if (!misalign)
906 {
907 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
908 {
909 fprintf (vect_dump, "Unknown alignment for access: ");
910 print_generic_expr (vect_dump, base, TDF_SLIM);
911 }
912 return true;
913 }
914
915 if (!base_aligned_p)
916 {
917 if (!vect_can_force_dr_alignment_p (base, TYPE_ALIGN (vectype)))
918 {
919 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
920 {
921 fprintf (vect_dump, "can't force alignment of ref: ");
922 print_generic_expr (vect_dump, ref, TDF_SLIM);
923 }
924 return true;
925 }
926
927 /* Force the alignment of the decl.
928 NOTE: This is the only change to the code we make during
929 the analysis phase, before deciding to vectorize the loop. */
930 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
931 fprintf (vect_dump, "force alignment");
932 DECL_ALIGN (base) = TYPE_ALIGN (vectype);
933 DECL_USER_ALIGN (base) = 1;
934 }
935
936 /* At this point we assume that the base is aligned. */
937 gcc_assert (base_aligned_p
938 || (TREE_CODE (base) == VAR_DECL
939 && DECL_ALIGN (base) >= TYPE_ALIGN (vectype)));
940
941 /* Alignment required, in bytes: */
942 alignment = ssize_int (TYPE_ALIGN (vectype)/BITS_PER_UNIT);
943
944 /* Modulo alignment. */
945 misalign = size_binop (TRUNC_MOD_EXPR, misalign, alignment);
946 if (tree_int_cst_sgn (misalign) < 0)
947 {
948 /* Negative misalignment value. */
949 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
950 fprintf (vect_dump, "unexpected misalign value");
951 return false;
952 }
953
954 DR_MISALIGNMENT (dr) = tree_low_cst (misalign, 1);
955
956 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
957 fprintf (vect_dump, "misalign = %d bytes", DR_MISALIGNMENT (dr));
958
959 return true;
960 }
961
962
963 /* Function vect_compute_data_refs_alignment
964
965 Compute the misalignment of data references in the loop.
966 This pass may take place at function granularity instead of at loop
967 granularity.
968
969 FOR NOW: No analysis is actually performed. Misalignment is calculated
970 only for trivial cases. TODO. */
971
972 static bool
973 vect_compute_data_refs_alignment (loop_vec_info loop_vinfo)
974 {
975 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
976 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
977 unsigned int i;
978
979 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
980 {
981 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
982 if (!vect_compute_data_ref_alignment (dr))
983 return false;
984 }
985
986 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
987 {
988 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
989 if (!vect_compute_data_ref_alignment (dr))
990 return false;
991 }
992
993 return true;
994 }
995
996
997 /* Function vect_enhance_data_refs_alignment
998
999 This pass will use loop versioning and loop peeling in order to enhance
1000 the alignment of data references in the loop.
1001
1002 FOR NOW: we assume that whatever versioning/peeling takes place, only the
1003 original loop is to be vectorized; Any other loops that are created by
1004 the transformations performed in this pass - are not supposed to be
1005 vectorized. This restriction will be relaxed. */
1006
1007 static void
1008 vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
1009 {
1010 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
1011 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
1012 varray_type datarefs;
1013 struct data_reference *dr0 = NULL;
1014 unsigned int i, j;
1015
1016 /* Sigh, a hack to make targets that do not define UNITS_PER_SIMD_WORD
1017 bootstrap. Copy UNITS_PER_SIMD_WORD to a local variable to avoid a
1018 "division by zero" error. This error would be issued because we
1019 we do "... % UNITS_PER_SIMD_WORD" below, and UNITS_PER_SIMD_WORD
1020 defaults to 0 if it is not defined by the target. */
1021 int units_per_simd_word = UNITS_PER_SIMD_WORD;
1022
1023 /*
1024 This pass will require a cost model to guide it whether to apply peeling
1025 or versioning or a combination of the two. For example, the scheme that
1026 intel uses when given a loop with several memory accesses, is as follows:
1027 choose one memory access ('p') which alignment you want to force by doing
1028 peeling. Then, either (1) generate a loop in which 'p' is aligned and all
1029 other accesses are not necessarily aligned, or (2) use loop versioning to
1030 generate one loop in which all accesses are aligned, and another loop in
1031 which only 'p' is necessarily aligned.
1032
1033 ("Automatic Intra-Register Vectorization for the Intel Architecture",
1034 Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
1035 Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
1036
1037 Devising a cost model is the most critical aspect of this work. It will
1038 guide us on which access to peel for, whether to use loop versioning, how
1039 many versions to create, etc. The cost model will probably consist of
1040 generic considerations as well as target specific considerations (on
1041 powerpc for example, misaligned stores are more painful than misaligned
1042 loads).
1043
1044 Here is the general steps involved in alignment enhancements:
1045
1046 -- original loop, before alignment analysis:
1047 for (i=0; i<N; i++){
1048 x = q[i]; # DR_MISALIGNMENT(q) = unknown
1049 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1050 }
1051
1052 -- After vect_compute_data_refs_alignment:
1053 for (i=0; i<N; i++){
1054 x = q[i]; # DR_MISALIGNMENT(q) = 3
1055 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1056 }
1057
1058 -- Possibility 1: we do loop versioning:
1059 if (p is aligned) {
1060 for (i=0; i<N; i++){ # loop 1A
1061 x = q[i]; # DR_MISALIGNMENT(q) = 3
1062 p[i] = y; # DR_MISALIGNMENT(p) = 0
1063 }
1064 }
1065 else {
1066 for (i=0; i<N; i++){ # loop 1B
1067 x = q[i]; # DR_MISALIGNMENT(q) = 3
1068 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1069 }
1070 }
1071
1072 -- Possibility 2: we do loop peeling:
1073 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1074 x = q[i];
1075 p[i] = y;
1076 }
1077 for (i = 3; i < N; i++){ # loop 2A
1078 x = q[i]; # DR_MISALIGNMENT(q) = 0
1079 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1080 }
1081
1082 -- Possibility 3: combination of loop peeling and versioning:
1083 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1084 x = q[i];
1085 p[i] = y;
1086 }
1087 if (p is aligned) {
1088 for (i = 3; i<N; i++){ # loop 3A
1089 x = q[i]; # DR_MISALIGNMENT(q) = 0
1090 p[i] = y; # DR_MISALIGNMENT(p) = 0
1091 }
1092 }
1093 else {
1094 for (i = 3; i<N; i++){ # loop 3B
1095 x = q[i]; # DR_MISALIGNMENT(q) = 0
1096 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1097 }
1098 }
1099
1100 These loops are later passed to loop_transform to be vectorized. The
1101 vectorizer will use the alignment information to guide the transformation
1102 (whether to generate regular loads/stores, or with special handling for
1103 misalignment).
1104 */
1105
1106 /* (1) Peeling to force alignment. */
1107
1108 /* (1.1) Decide whether to perform peeling, and how many iterations to peel:
1109 Considerations:
1110 + How many accesses will become aligned due to the peeling
1111 - How many accesses will become unaligned due to the peeling,
1112 and the cost of misaligned accesses.
1113 - The cost of peeling (the extra runtime checks, the increase
1114 in code size).
1115
1116 The scheme we use FORNOW: peel to force the alignment of the first
1117 misaligned store in the loop.
1118 Rationale: misaligned stores are not yet supported.
1119
1120 TODO: Use a better cost model. */
1121
1122 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
1123 {
1124 dr0 = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
1125 if (!aligned_access_p (dr0))
1126 {
1127 LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr0;
1128 LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) = DR_MISALIGNMENT (dr0);
1129 break;
1130 }
1131 }
1132
1133 /* (1.2) Update the alignment info according to the peeling factor.
1134 If the misalignment of the DR we peel for is M, then the
1135 peeling factor is VF - M, and the misalignment of each access DR_i
1136 in the loop is DR_MISALIGNMENT (DR_i) + VF - M.
1137 If the misalignment of the DR we peel for is unknown, then the
1138 misalignment of each access DR_i in the loop is also unknown.
1139
1140 TODO: - consider accesses that are known to have the same
1141 alignment, even if that alignment is unknown. */
1142
1143 if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo))
1144 {
1145 int mis;
1146 int npeel = 0;
1147
1148 if (known_alignment_for_access_p (dr0))
1149 {
1150 /* Since it's known at compile time, compute the number of iterations
1151 in the peeled loop (the peeling factor) for use in updating
1152 DR_MISALIGNMENT values. The peeling factor is the vectorization
1153 factor minus the misalignment as an element count. */
1154 mis = DR_MISALIGNMENT (dr0);
1155 mis /= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr0))));
1156 npeel = LOOP_VINFO_VECT_FACTOR (loop_vinfo) - mis;
1157 }
1158
1159 datarefs = loop_write_datarefs;
1160 for (j = 0; j < 2; j++)
1161 {
1162 for (i = 0; i < VARRAY_ACTIVE_SIZE (datarefs); i++)
1163 {
1164 struct data_reference *dr = VARRAY_GENERIC_PTR (datarefs, i);
1165
1166 if (dr == dr0)
1167 continue;
1168 if (known_alignment_for_access_p (dr)
1169 && DR_MISALIGNMENT (dr) == DR_MISALIGNMENT (dr0))
1170 DR_MISALIGNMENT (dr) = 0;
1171 else if (known_alignment_for_access_p (dr)
1172 && known_alignment_for_access_p (dr0))
1173 {
1174 int drsize = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr))));
1175
1176 DR_MISALIGNMENT (dr) += npeel * drsize;
1177 DR_MISALIGNMENT (dr) %= units_per_simd_word;
1178 }
1179 else
1180 DR_MISALIGNMENT (dr) = -1;
1181 }
1182 datarefs = loop_read_datarefs;
1183 }
1184
1185 DR_MISALIGNMENT (dr0) = 0;
1186 if (vect_print_dump_info (REPORT_ALIGNMENT, LOOP_LOC (loop_vinfo)))
1187 fprintf (vect_dump, "Alignment of access forced using peeling.");
1188 }
1189 }
1190
1191
1192 /* Function vect_analyze_data_refs_alignment
1193
1194 Analyze the alignment of the data-references in the loop.
1195 FOR NOW: Until support for misliagned accesses is in place, only if all
1196 accesses are aligned can the loop be vectorized. This restriction will be
1197 relaxed. */
1198
1199 static bool
1200 vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo)
1201 {
1202 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
1203 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
1204 enum dr_alignment_support supportable_dr_alignment;
1205 unsigned int i;
1206
1207 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1208 fprintf (vect_dump, "=== vect_analyze_data_refs_alignment ===");
1209
1210
1211 /* This pass may take place at function granularity instead of at loop
1212 granularity. */
1213
1214 if (!vect_compute_data_refs_alignment (loop_vinfo))
1215 {
1216 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
1217 LOOP_LOC (loop_vinfo)))
1218 fprintf (vect_dump,
1219 "not vectorized: can't calculate alignment for data ref.");
1220 return false;
1221 }
1222
1223
1224 /* This pass will decide on using loop versioning and/or loop peeling in
1225 order to enhance the alignment of data references in the loop. */
1226
1227 vect_enhance_data_refs_alignment (loop_vinfo);
1228
1229
1230 /* Finally, check that all the data references in the loop can be
1231 handled with respect to their alignment. */
1232
1233 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
1234 {
1235 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
1236 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
1237 if (!supportable_dr_alignment)
1238 {
1239 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
1240 LOOP_LOC (loop_vinfo)))
1241 fprintf (vect_dump, "not vectorized: unsupported unaligned load.");
1242 return false;
1243 }
1244 if (supportable_dr_alignment != dr_aligned
1245 && (vect_print_dump_info (REPORT_ALIGNMENT, LOOP_LOC (loop_vinfo))))
1246 fprintf (vect_dump, "Vectorizing an unaligned access.");
1247 }
1248 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
1249 {
1250 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
1251 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
1252 if (!supportable_dr_alignment)
1253 {
1254 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
1255 LOOP_LOC (loop_vinfo)))
1256 fprintf (vect_dump, "not vectorized: unsupported unaligned store.");
1257 return false;
1258 }
1259 if (supportable_dr_alignment != dr_aligned
1260 && (vect_print_dump_info (REPORT_ALIGNMENT, LOOP_LOC (loop_vinfo))))
1261 fprintf (vect_dump, "Vectorizing an unaligned access.");
1262 }
1263
1264 return true;
1265 }
1266
1267
1268 /* Function vect_analyze_data_ref_access.
1269
1270 Analyze the access pattern of the data-reference DR. For now, a data access
1271 has to consecutive to be considered vectorizable. */
1272
1273 static bool
1274 vect_analyze_data_ref_access (struct data_reference *dr)
1275 {
1276 tree stmt = DR_STMT (dr);
1277 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1278 tree step = STMT_VINFO_VECT_STEP (stmt_info);
1279 tree scalar_type = TREE_TYPE (DR_REF (dr));
1280
1281 if (!step || tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type)))
1282 {
1283 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1284 fprintf (vect_dump, "not consecutive access");
1285 return false;
1286 }
1287 return true;
1288 }
1289
1290
1291 /* Function vect_analyze_data_ref_accesses.
1292
1293 Analyze the access pattern of all the data references in the loop.
1294
1295 FORNOW: the only access pattern that is considered vectorizable is a
1296 simple step 1 (consecutive) access.
1297
1298 FORNOW: handle only arrays and pointer accesses. */
1299
1300 static bool
1301 vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo)
1302 {
1303 unsigned int i;
1304 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
1305 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
1306
1307 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1308 fprintf (vect_dump, "=== vect_analyze_data_ref_accesses ===");
1309
1310 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
1311 {
1312 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
1313 bool ok = vect_analyze_data_ref_access (dr);
1314 if (!ok)
1315 {
1316 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
1317 LOOP_LOC (loop_vinfo)))
1318 fprintf (vect_dump, "not vectorized: complicated access pattern.");
1319 return false;
1320 }
1321 }
1322
1323 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
1324 {
1325 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
1326 bool ok = vect_analyze_data_ref_access (dr);
1327 if (!ok)
1328 {
1329 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
1330 LOOP_LOC (loop_vinfo)))
1331 fprintf (vect_dump, "not vectorized: complicated access pattern.");
1332 return false;
1333 }
1334 }
1335
1336 return true;
1337 }
1338
1339
1340 /* Function vect_analyze_pointer_ref_access.
1341
1342 Input:
1343 STMT - a stmt that contains a data-ref.
1344 MEMREF - a data-ref in STMT, which is an INDIRECT_REF.
1345 ACCESS_FN - the access function of MEMREF.
1346
1347 Output:
1348 If the data-ref access is vectorizable, return a data_reference structure
1349 that represents it (DR). Otherwise - return NULL.
1350 STEP - the stride of MEMREF in the loop.
1351 INIT - the initial condition of MEMREF in the loop.
1352 */
1353
1354 static struct data_reference *
1355 vect_analyze_pointer_ref_access (tree memref, tree stmt, bool is_read,
1356 tree access_fn, tree *ptr_init, tree *ptr_step)
1357 {
1358 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1359 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1360 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1361 tree step, init;
1362 tree reftype, innertype;
1363 tree indx_access_fn;
1364 int loopnum = loop->num;
1365 struct data_reference *dr;
1366
1367 if (!vect_is_simple_iv_evolution (loopnum, access_fn, &init, &step))
1368 {
1369 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
1370 LOOP_LOC (loop_vinfo)))
1371 fprintf (vect_dump, "not vectorized: pointer access is not simple.");
1372 return NULL;
1373 }
1374
1375 STRIP_NOPS (init);
1376
1377 if (!expr_invariant_in_loop_p (loop, init))
1378 {
1379 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
1380 LOOP_LOC (loop_vinfo)))
1381 fprintf (vect_dump,
1382 "not vectorized: initial condition is not loop invariant.");
1383 return NULL;
1384 }
1385
1386 if (TREE_CODE (step) != INTEGER_CST)
1387 {
1388 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
1389 LOOP_LOC (loop_vinfo)))
1390 fprintf (vect_dump,
1391 "not vectorized: non constant step for pointer access.");
1392 return NULL;
1393 }
1394
1395 reftype = TREE_TYPE (TREE_OPERAND (memref, 0));
1396 if (!POINTER_TYPE_P (reftype))
1397 {
1398 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
1399 LOOP_LOC (loop_vinfo)))
1400 fprintf (vect_dump, "not vectorized: unexpected pointer access form.");
1401 return NULL;
1402 }
1403
1404 reftype = TREE_TYPE (init);
1405 if (!POINTER_TYPE_P (reftype))
1406 {
1407 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
1408 LOOP_LOC (loop_vinfo)))
1409 fprintf (vect_dump, "not vectorized: unexpected pointer access form.");
1410 return NULL;
1411 }
1412
1413 *ptr_step = fold_convert (ssizetype, step);
1414 innertype = TREE_TYPE (reftype);
1415 /* Check that STEP is a multiple of type size. */
1416 if (!integer_zerop (size_binop (TRUNC_MOD_EXPR, *ptr_step,
1417 fold_convert (ssizetype, TYPE_SIZE_UNIT (innertype)))))
1418 {
1419 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
1420 LOOP_LOC (loop_vinfo)))
1421 fprintf (vect_dump, "not vectorized: non consecutive access.");
1422 return NULL;
1423 }
1424
1425 indx_access_fn =
1426 build_polynomial_chrec (loopnum, integer_zero_node, integer_one_node);
1427 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1428 {
1429 fprintf (vect_dump, "Access function of ptr indx: ");
1430 print_generic_expr (vect_dump, indx_access_fn, TDF_SLIM);
1431 }
1432 dr = init_data_ref (stmt, memref, NULL_TREE, indx_access_fn, is_read);
1433 *ptr_init = init;
1434 return dr;
1435 }
1436
1437
1438 /* Function vect_address_analysis
1439
1440 Return the BASE of the address expression EXPR.
1441 Also compute the INITIAL_OFFSET from BASE, MISALIGN and STEP.
1442
1443 Input:
1444 EXPR - the address expression that is being analyzed
1445 STMT - the statement that contains EXPR or its original memory reference
1446 IS_READ - TRUE if STMT reads from EXPR, FALSE if writes to EXPR
1447 VECTYPE - the type that defines the alignment (i.e, we compute
1448 alignment relative to TYPE_ALIGN(VECTYPE))
1449 DR - data_reference struct for the original memory reference
1450
1451 Output:
1452 BASE (returned value) - the base of the data reference EXPR.
1453 INITIAL_OFFSET - initial offset of EXPR from BASE (an expression)
1454 MISALIGN - offset of EXPR from BASE in bytes (a constant) or NULL_TREE if the
1455 computation is impossible
1456 STEP - evolution of EXPR in the loop
1457 BASE_ALIGNED - indicates if BASE is aligned
1458
1459 If something unexpected is encountered (an unsupported form of data-ref),
1460 then NULL_TREE is returned.
1461 */
1462
1463 static tree
1464 vect_address_analysis (tree expr, tree stmt, bool is_read, tree vectype,
1465 struct data_reference *dr, tree *offset, tree *misalign,
1466 tree *step, bool *base_aligned)
1467 {
1468 tree oprnd0, oprnd1, base_address, offset_expr, base_addr0, base_addr1;
1469 tree address_offset = ssize_int (0), address_misalign = ssize_int (0);
1470 tree dummy;
1471 subvar_t dummy2;
1472
1473 switch (TREE_CODE (expr))
1474 {
1475 case PLUS_EXPR:
1476 case MINUS_EXPR:
1477 /* EXPR is of form {base +/- offset} (or {offset +/- base}). */
1478 oprnd0 = TREE_OPERAND (expr, 0);
1479 oprnd1 = TREE_OPERAND (expr, 1);
1480
1481 STRIP_NOPS (oprnd0);
1482 STRIP_NOPS (oprnd1);
1483
1484 /* Recursively try to find the base of the address contained in EXPR.
1485 For offset, the returned base will be NULL. */
1486 base_addr0 = vect_address_analysis (oprnd0, stmt, is_read, vectype, dr,
1487 &address_offset, &address_misalign, step,
1488 base_aligned);
1489
1490 base_addr1 = vect_address_analysis (oprnd1, stmt, is_read, vectype, dr,
1491 &address_offset, &address_misalign, step,
1492 base_aligned);
1493
1494 /* We support cases where only one of the operands contains an
1495 address. */
1496 if ((base_addr0 && base_addr1) || (!base_addr0 && !base_addr1))
1497 return NULL_TREE;
1498
1499 /* To revert STRIP_NOPS. */
1500 oprnd0 = TREE_OPERAND (expr, 0);
1501 oprnd1 = TREE_OPERAND (expr, 1);
1502
1503 offset_expr = base_addr0 ?
1504 fold_convert (ssizetype, oprnd1) : fold_convert (ssizetype, oprnd0);
1505
1506 /* EXPR is of form {base +/- offset} (or {offset +/- base}). If offset is
1507 a number, we can add it to the misalignment value calculated for base,
1508 otherwise, misalignment is NULL. */
1509 if (TREE_CODE (offset_expr) == INTEGER_CST && address_misalign)
1510 *misalign = size_binop (TREE_CODE (expr), address_misalign,
1511 offset_expr);
1512 else
1513 *misalign = NULL_TREE;
1514
1515 /* Combine offset (from EXPR {base + offset}) with the offset calculated
1516 for base. */
1517 *offset = size_binop (TREE_CODE (expr), address_offset, offset_expr);
1518 return base_addr0 ? base_addr0 : base_addr1;
1519
1520 case ADDR_EXPR:
1521 base_address = vect_object_analysis (TREE_OPERAND (expr, 0), stmt,
1522 is_read, vectype, &dr, offset,
1523 misalign, step, base_aligned,
1524 &dummy, &dummy2);
1525 return base_address;
1526
1527 case SSA_NAME:
1528 if (!POINTER_TYPE_P (TREE_TYPE (expr)))
1529 return NULL_TREE;
1530
1531 if (TYPE_ALIGN (TREE_TYPE (TREE_TYPE (expr))) < TYPE_ALIGN (vectype))
1532 {
1533 if (vect_get_ptr_offset (expr, vectype, misalign))
1534 *base_aligned = true;
1535 else
1536 *base_aligned = false;
1537 }
1538 else
1539 {
1540 *base_aligned = true;
1541 *misalign = ssize_int (0);
1542 }
1543 *offset = ssize_int (0);
1544 *step = ssize_int (0);
1545 return expr;
1546
1547 default:
1548 return NULL_TREE;
1549 }
1550 }
1551
1552
1553 /* Function vect_object_analysis
1554
1555 Return the BASE of the data reference MEMREF.
1556 Also compute the INITIAL_OFFSET from BASE, MISALIGN and STEP.
1557 E.g., for EXPR a.b[i] + 4B, BASE is a, and OFFSET is the overall offset
1558 'a.b[i] + 4B' from a (can be an expression), MISALIGN is an OFFSET
1559 instantiated with initial_conditions of access_functions of variables,
1560 modulo alignment, and STEP is the evolution of the DR_REF in this loop.
1561
1562 Function get_inner_reference is used for the above in case of ARRAY_REF and
1563 COMPONENT_REF.
1564
1565 The structure of the function is as follows:
1566 Part 1:
1567 Case 1. For handled_component_p refs
1568 1.1 call get_inner_reference
1569 1.1.1 analyze offset expr received from get_inner_reference
1570 1.2. build data-reference structure for MEMREF
1571 (fall through with BASE)
1572 Case 2. For declarations
1573 2.1 check alignment
1574 2.2 update DR_BASE_NAME if necessary for alias
1575 Case 3. For INDIRECT_REFs
1576 3.1 get the access function
1577 3.2 analyze evolution of MEMREF
1578 3.3 set data-reference structure for MEMREF
1579 3.4 call vect_address_analysis to analyze INIT of the access function
1580
1581 Part 2:
1582 Combine the results of object and address analysis to calculate
1583 INITIAL_OFFSET, STEP and misalignment info.
1584
1585 Input:
1586 MEMREF - the memory reference that is being analyzed
1587 STMT - the statement that contains MEMREF
1588 IS_READ - TRUE if STMT reads from MEMREF, FALSE if writes to MEMREF
1589 VECTYPE - the type that defines the alignment (i.e, we compute
1590 alignment relative to TYPE_ALIGN(VECTYPE))
1591
1592 Output:
1593 BASE_ADDRESS (returned value) - the base address of the data reference MEMREF
1594 E.g, if MEMREF is a.b[k].c[i][j] the returned
1595 base is &a.
1596 DR - data_reference struct for MEMREF
1597 INITIAL_OFFSET - initial offset of MEMREF from BASE (an expression)
1598 MISALIGN - offset of MEMREF from BASE in bytes (a constant) or NULL_TREE if
1599 the computation is impossible
1600 STEP - evolution of the DR_REF in the loop
1601 BASE_ALIGNED - indicates if BASE is aligned
1602 MEMTAG - memory tag for aliasing purposes
1603 SUBVAR - Sub-variables of the variable
1604
1605 If something unexpected is encountered (an unsupported form of data-ref),
1606 then NULL_TREE is returned. */
1607
1608 static tree
1609 vect_object_analysis (tree memref, tree stmt, bool is_read,
1610 tree vectype, struct data_reference **dr,
1611 tree *offset, tree *misalign, tree *step,
1612 bool *base_aligned, tree *memtag,
1613 subvar_t *subvars)
1614 {
1615 tree base = NULL_TREE, base_address = NULL_TREE;
1616 tree object_offset = ssize_int (0), object_misalign = ssize_int (0);
1617 tree object_step = ssize_int (0), address_step = ssize_int (0);
1618 bool object_base_aligned = true, address_base_aligned = true;
1619 tree address_offset = ssize_int (0), address_misalign = ssize_int (0);
1620 HOST_WIDE_INT pbitsize, pbitpos;
1621 tree poffset, bit_pos_in_bytes;
1622 enum machine_mode pmode;
1623 int punsignedp, pvolatilep;
1624 tree ptr_step = ssize_int (0), ptr_init = NULL_TREE;
1625 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1626 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1627 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1628 struct data_reference *ptr_dr = NULL;
1629 tree access_fn, evolution_part, address_to_analyze;
1630
1631 /* Part 1: */
1632 /* Case 1. handled_component_p refs. */
1633 if (handled_component_p (memref))
1634 {
1635 /* 1.1 call get_inner_reference. */
1636 /* Find the base and the offset from it. */
1637 base = get_inner_reference (memref, &pbitsize, &pbitpos, &poffset,
1638 &pmode, &punsignedp, &pvolatilep, false);
1639 if (!base)
1640 return NULL_TREE;
1641
1642 /* 1.1.1 analyze offset expr received from get_inner_reference. */
1643 if (poffset
1644 && !vect_analyze_offset_expr (poffset, loop, TYPE_SIZE_UNIT (vectype),
1645 &object_offset, &object_misalign, &object_step))
1646 {
1647 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1648 {
1649 fprintf (vect_dump, "failed to compute offset or step for ");
1650 print_generic_expr (vect_dump, memref, TDF_SLIM);
1651 }
1652 return NULL_TREE;
1653 }
1654
1655 /* Add bit position to OFFSET and MISALIGN. */
1656
1657 bit_pos_in_bytes = ssize_int (pbitpos/BITS_PER_UNIT);
1658 /* Check that there is no remainder in bits. */
1659 if (pbitpos%BITS_PER_UNIT)
1660 {
1661 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1662 fprintf (vect_dump, "bit offset alignment.");
1663 return NULL_TREE;
1664 }
1665 object_offset = size_binop (PLUS_EXPR, bit_pos_in_bytes, object_offset);
1666 if (object_misalign)
1667 object_misalign = size_binop (PLUS_EXPR, object_misalign,
1668 bit_pos_in_bytes);
1669
1670 /* Create data-reference for MEMREF. TODO: handle COMPONENT_REFs. */
1671 if (!(*dr))
1672 {
1673 if (TREE_CODE (memref) == ARRAY_REF)
1674 *dr = analyze_array (stmt, memref, is_read);
1675 else
1676 /* FORNOW. */
1677 return NULL_TREE;
1678 }
1679 memref = base; /* To continue analysis of BASE. */
1680 /* fall through */
1681 }
1682
1683 /* Part 1: Case 2. Declarations. */
1684 if (DECL_P (memref))
1685 {
1686 /* We expect to get a decl only if we already have a DR. */
1687 if (!(*dr))
1688 {
1689 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1690 {
1691 fprintf (vect_dump, "unhandled decl ");
1692 print_generic_expr (vect_dump, memref, TDF_SLIM);
1693 }
1694 return NULL_TREE;
1695 }
1696
1697 /* 2.1 check the alignment. */
1698 if (DECL_ALIGN (memref) >= TYPE_ALIGN (vectype))
1699 object_base_aligned = true;
1700 else
1701 object_base_aligned = false;
1702
1703 /* 2.2 update DR_BASE_NAME if necessary. */
1704 if (!DR_BASE_NAME ((*dr)))
1705 /* For alias analysis. In case the analysis of INDIRECT_REF brought
1706 us to object. */
1707 DR_BASE_NAME ((*dr)) = memref;
1708
1709 if (SSA_VAR_P (memref) && var_can_have_subvars (memref))
1710 *subvars = get_subvars_for_var (memref);
1711 base_address = build_fold_addr_expr (memref);
1712 *memtag = memref;
1713 }
1714
1715 /* Part 1: Case 3. INDIRECT_REFs. */
1716 else if (TREE_CODE (memref) == INDIRECT_REF)
1717 {
1718 /* 3.1 get the access function. */
1719 access_fn = analyze_scalar_evolution (loop, TREE_OPERAND (memref, 0));
1720 if (!access_fn)
1721 {
1722 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
1723 LOOP_LOC (loop_vinfo)))
1724 fprintf (vect_dump, "not vectorized: complicated pointer access.");
1725 return NULL_TREE;
1726 }
1727 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1728 {
1729 fprintf (vect_dump, "Access function of ptr: ");
1730 print_generic_expr (vect_dump, access_fn, TDF_SLIM);
1731 }
1732
1733 /* 3.2 analyze evolution of MEMREF. */
1734 evolution_part = evolution_part_in_loop_num (access_fn, loop->num);
1735 if (evolution_part)
1736 {
1737 ptr_dr = vect_analyze_pointer_ref_access (memref, stmt, is_read,
1738 access_fn, &ptr_init, &ptr_step);
1739 if (!(ptr_dr))
1740 return NULL_TREE;
1741
1742 object_step = size_binop (PLUS_EXPR, object_step, ptr_step);
1743 address_to_analyze = ptr_init;
1744 }
1745 else
1746 {
1747 if (!(*dr))
1748 {
1749 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
1750 LOOP_LOC (loop_vinfo)))
1751 fprintf (vect_dump, "not vectorized: ptr is loop invariant.");
1752 return NULL_TREE;
1753 }
1754 /* Since there exists DR for MEMREF, we are analyzing the init of
1755 the access function, which not necessary has evolution in the
1756 loop. */
1757 address_to_analyze = initial_condition_in_loop_num (access_fn,
1758 loop->num);
1759 }
1760
1761 /* 3.3 set data-reference structure for MEMREF. */
1762 *dr = (*dr) ? *dr : ptr_dr;
1763
1764 /* 3.4 call vect_address_analysis to analyze INIT of the access
1765 function. */
1766 base_address = vect_address_analysis (address_to_analyze, stmt, is_read,
1767 vectype, *dr, &address_offset, &address_misalign,
1768 &address_step, &address_base_aligned);
1769 if (!base_address)
1770 return NULL_TREE;
1771
1772 switch (TREE_CODE (base_address))
1773 {
1774 case SSA_NAME:
1775 *memtag = get_var_ann (SSA_NAME_VAR (base_address))->type_mem_tag;
1776 if (!(*memtag) && TREE_CODE (TREE_OPERAND (memref, 0)) == SSA_NAME)
1777 *memtag = get_var_ann (
1778 SSA_NAME_VAR (TREE_OPERAND (memref, 0)))->type_mem_tag;
1779 break;
1780 case ADDR_EXPR:
1781 *memtag = TREE_OPERAND (base_address, 0);
1782 break;
1783 default:
1784 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
1785 LOOP_LOC (loop_vinfo)))
1786 {
1787 fprintf (vect_dump, "not vectorized: no memtag ref: ");
1788 print_generic_expr (vect_dump, memref, TDF_SLIM);
1789 }
1790 return NULL_TREE;
1791 }
1792 }
1793
1794 if (!base_address)
1795 /* MEMREF cannot be analyzed. */
1796 return NULL_TREE;
1797
1798 if (SSA_VAR_P (*memtag) && var_can_have_subvars (*memtag))
1799 *subvars = get_subvars_for_var (*memtag);
1800
1801 /* Part 2: Combine the results of object and address analysis to calculate
1802 INITIAL_OFFSET, STEP and misalignment info. */
1803 *offset = size_binop (PLUS_EXPR, object_offset, address_offset);
1804 if (object_misalign && address_misalign)
1805 *misalign = size_binop (PLUS_EXPR, object_misalign, address_misalign);
1806 else
1807 *misalign = NULL_TREE;
1808 *step = size_binop (PLUS_EXPR, object_step, address_step);
1809 *base_aligned = object_base_aligned && address_base_aligned;
1810
1811 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1812 {
1813 fprintf (vect_dump, "Results of object analysis for: ");
1814 print_generic_expr (vect_dump, memref, TDF_SLIM);
1815 fprintf (vect_dump, "\n\tbase_address: ");
1816 print_generic_expr (vect_dump, base_address, TDF_SLIM);
1817 fprintf (vect_dump, "\n\toffset: ");
1818 print_generic_expr (vect_dump, *offset, TDF_SLIM);
1819 fprintf (vect_dump, "\n\tstep: ");
1820 print_generic_expr (vect_dump, *step, TDF_SLIM);
1821 fprintf (vect_dump, "\n\tbase aligned %d\n\tmisalign: ", *base_aligned);
1822 print_generic_expr (vect_dump, *misalign, TDF_SLIM);
1823 }
1824 return base_address;
1825 }
1826
1827
1828 /* Function vect_analyze_data_refs.
1829
1830 Find all the data references in the loop.
1831
1832 The general structure of the analysis of data refs in the vectorizer is as
1833 follows:
1834 1- vect_analyze_data_refs(loop):
1835 Find and analyze all data-refs in the loop:
1836 foreach ref
1837 base_address = vect_object_analysis(ref)
1838 1.1- vect_object_analysis(ref):
1839 Analyze ref, and build a DR (data_referece struct) for it;
1840 compute base, initial_offset, step and alignment.
1841 Call get_inner_reference for refs handled in this function.
1842 Call vect_addr_analysis(addr) to analyze pointer type expressions.
1843 Set ref_stmt.base, ref_stmt.initial_offset, ref_stmt.alignment,
1844 ref_stmt.memtag and ref_stmt.step accordingly.
1845 2- vect_analyze_dependences(): apply dependence testing using ref_stmt.DR
1846 3- vect_analyze_drs_alignment(): check that ref_stmt.alignment is ok.
1847 4- vect_analyze_drs_access(): check that ref_stmt.step is ok.
1848
1849 FORNOW: Handle aligned INDIRECT_REFs and ARRAY_REFs
1850 which base is really an array (not a pointer) and which alignment
1851 can be forced. This restriction will be relaxed. */
1852
1853 static bool
1854 vect_analyze_data_refs (loop_vec_info loop_vinfo)
1855 {
1856 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1857 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
1858 int nbbs = loop->num_nodes;
1859 block_stmt_iterator si;
1860 int j;
1861 struct data_reference *dr;
1862
1863 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1864 fprintf (vect_dump, "=== vect_analyze_data_refs ===");
1865
1866 for (j = 0; j < nbbs; j++)
1867 {
1868 basic_block bb = bbs[j];
1869 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1870 {
1871 bool is_read = false;
1872 tree stmt = bsi_stmt (si);
1873 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1874 v_may_def_optype v_may_defs = STMT_V_MAY_DEF_OPS (stmt);
1875 v_must_def_optype v_must_defs = STMT_V_MUST_DEF_OPS (stmt);
1876 vuse_optype vuses = STMT_VUSE_OPS (stmt);
1877 varray_type *datarefs = NULL;
1878 int nvuses, nv_may_defs, nv_must_defs;
1879 tree memref = NULL;
1880 tree scalar_type, vectype;
1881 tree base, offset, misalign, step, tag;
1882 bool base_aligned;
1883 subvar_t subvars;
1884
1885 /* Assumption: there exists a data-ref in stmt, if and only if
1886 it has vuses/vdefs. */
1887
1888 if (!vuses && !v_may_defs && !v_must_defs)
1889 continue;
1890
1891 nvuses = NUM_VUSES (vuses);
1892 nv_may_defs = NUM_V_MAY_DEFS (v_may_defs);
1893 nv_must_defs = NUM_V_MUST_DEFS (v_must_defs);
1894
1895 if (nvuses && (nv_may_defs || nv_must_defs))
1896 {
1897 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1898 {
1899 fprintf (vect_dump, "unexpected vdefs and vuses in stmt: ");
1900 print_generic_expr (vect_dump, stmt, TDF_SLIM);
1901 }
1902 return false;
1903 }
1904
1905 if (TREE_CODE (stmt) != MODIFY_EXPR)
1906 {
1907 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1908 {
1909 fprintf (vect_dump, "unexpected vops in stmt: ");
1910 print_generic_expr (vect_dump, stmt, TDF_SLIM);
1911 }
1912 return false;
1913 }
1914
1915 if (vuses)
1916 {
1917 memref = TREE_OPERAND (stmt, 1);
1918 datarefs = &(LOOP_VINFO_DATAREF_READS (loop_vinfo));
1919 is_read = true;
1920 }
1921 else /* vdefs */
1922 {
1923 memref = TREE_OPERAND (stmt, 0);
1924 datarefs = &(LOOP_VINFO_DATAREF_WRITES (loop_vinfo));
1925 is_read = false;
1926 }
1927
1928 scalar_type = TREE_TYPE (memref);
1929 vectype = get_vectype_for_scalar_type (scalar_type);
1930 if (!vectype)
1931 {
1932 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1933 {
1934 fprintf (vect_dump, "no vectype for stmt: ");
1935 print_generic_expr (vect_dump, stmt, TDF_SLIM);
1936 fprintf (vect_dump, " scalar_type: ");
1937 print_generic_expr (vect_dump, scalar_type, TDF_DETAILS);
1938 }
1939 /* It is not possible to vectorize this data reference. */
1940 return false;
1941 }
1942 /* Analyze MEMREF. If it is of a supported form, build data_reference
1943 struct for it (DR). */
1944 dr = NULL;
1945 base = vect_object_analysis (memref, stmt, is_read, vectype, &dr,
1946 &offset, &misalign, &step,
1947 &base_aligned, &tag, &subvars);
1948 if (!base)
1949 {
1950 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
1951 LOOP_LOC (loop_vinfo)))
1952 {
1953 fprintf (vect_dump, "not vectorized: unhandled data ref: ");
1954 print_generic_expr (vect_dump, stmt, TDF_SLIM);
1955 }
1956 return false;
1957 }
1958 STMT_VINFO_VECT_DR_BASE_ADDRESS (stmt_info) = base;
1959 STMT_VINFO_VECT_INIT_OFFSET (stmt_info) = offset;
1960 STMT_VINFO_VECT_STEP (stmt_info) = step;
1961 STMT_VINFO_VECT_MISALIGNMENT (stmt_info) = misalign;
1962 STMT_VINFO_VECT_BASE_ALIGNED_P (stmt_info) = base_aligned;
1963 STMT_VINFO_MEMTAG (stmt_info) = tag;
1964 STMT_VINFO_SUBVARS (stmt_info) = subvars;
1965 STMT_VINFO_VECTYPE (stmt_info) = vectype;
1966 VARRAY_PUSH_GENERIC_PTR (*datarefs, dr);
1967 STMT_VINFO_DATA_REF (stmt_info) = dr;
1968 }
1969 }
1970
1971 return true;
1972 }
1973
1974
1975 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
1976
1977 /* Function vect_mark_relevant.
1978
1979 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
1980
1981 static void
1982 vect_mark_relevant (varray_type *worklist, tree stmt)
1983 {
1984 stmt_vec_info stmt_info;
1985
1986 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1987 fprintf (vect_dump, "mark relevant.");
1988
1989 if (TREE_CODE (stmt) == PHI_NODE)
1990 {
1991 VARRAY_PUSH_TREE (*worklist, stmt);
1992 return;
1993 }
1994
1995 stmt_info = vinfo_for_stmt (stmt);
1996
1997 if (!stmt_info)
1998 {
1999 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2000 {
2001 fprintf (vect_dump, "mark relevant: no stmt info!!.");
2002 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2003 }
2004 return;
2005 }
2006
2007 if (STMT_VINFO_RELEVANT_P (stmt_info))
2008 {
2009 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2010 fprintf (vect_dump, "already marked relevant.");
2011 return;
2012 }
2013
2014 STMT_VINFO_RELEVANT_P (stmt_info) = 1;
2015 VARRAY_PUSH_TREE (*worklist, stmt);
2016 }
2017
2018
2019 /* Function vect_stmt_relevant_p.
2020
2021 Return true if STMT in loop that is represented by LOOP_VINFO is
2022 "relevant for vectorization".
2023
2024 A stmt is considered "relevant for vectorization" if:
2025 - it has uses outside the loop.
2026 - it has vdefs (it alters memory).
2027 - control stmts in the loop (except for the exit condition).
2028
2029 CHECKME: what other side effects would the vectorizer allow? */
2030
2031 static bool
2032 vect_stmt_relevant_p (tree stmt, loop_vec_info loop_vinfo)
2033 {
2034 v_may_def_optype v_may_defs;
2035 v_must_def_optype v_must_defs;
2036 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2037 int i;
2038 dataflow_t df;
2039 int num_uses;
2040
2041 /* cond stmt other than loop exit cond. */
2042 if (is_ctrl_stmt (stmt) && (stmt != LOOP_VINFO_EXIT_COND (loop_vinfo)))
2043 return true;
2044
2045 /* changing memory. */
2046 if (TREE_CODE (stmt) != PHI_NODE)
2047 {
2048 v_may_defs = STMT_V_MAY_DEF_OPS (stmt);
2049 v_must_defs = STMT_V_MUST_DEF_OPS (stmt);
2050 if (v_may_defs || v_must_defs)
2051 {
2052 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2053 fprintf (vect_dump, "vec_stmt_relevant_p: stmt has vdefs.");
2054 return true;
2055 }
2056 }
2057
2058 /* uses outside the loop. */
2059 df = get_immediate_uses (stmt);
2060 num_uses = num_immediate_uses (df);
2061 for (i = 0; i < num_uses; i++)
2062 {
2063 tree use = immediate_use (df, i);
2064 basic_block bb = bb_for_stmt (use);
2065 if (!flow_bb_inside_loop_p (loop, bb))
2066 {
2067 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2068 fprintf (vect_dump, "vec_stmt_relevant_p: used out of loop.");
2069 return true;
2070 }
2071 }
2072
2073 return false;
2074 }
2075
2076
2077 /* Function vect_mark_stmts_to_be_vectorized.
2078
2079 Not all stmts in the loop need to be vectorized. For example:
2080
2081 for i...
2082 for j...
2083 1. T0 = i + j
2084 2. T1 = a[T0]
2085
2086 3. j = j + 1
2087
2088 Stmt 1 and 3 do not need to be vectorized, because loop control and
2089 addressing of vectorized data-refs are handled differently.
2090
2091 This pass detects such stmts. */
2092
2093 static bool
2094 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
2095 {
2096 varray_type worklist;
2097 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2098 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
2099 unsigned int nbbs = loop->num_nodes;
2100 block_stmt_iterator si;
2101 tree stmt;
2102 stmt_ann_t ann;
2103 unsigned int i;
2104 int j;
2105 use_optype use_ops;
2106 stmt_vec_info stmt_info;
2107 basic_block bb;
2108 tree phi;
2109
2110 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2111 fprintf (vect_dump, "=== vect_mark_stmts_to_be_vectorized ===");
2112
2113 bb = loop->header;
2114 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
2115 {
2116 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2117 {
2118 fprintf (vect_dump, "init: phi relevant? ");
2119 print_generic_expr (vect_dump, phi, TDF_SLIM);
2120 }
2121
2122 if (vect_stmt_relevant_p (phi, loop_vinfo))
2123 {
2124 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
2125 LOOP_LOC (loop_vinfo)))
2126 fprintf (vect_dump, "unsupported reduction/induction.");
2127 return false;
2128 }
2129 }
2130
2131 VARRAY_TREE_INIT (worklist, 64, "work list");
2132
2133 /* 1. Init worklist. */
2134
2135 for (i = 0; i < nbbs; i++)
2136 {
2137 bb = bbs[i];
2138 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
2139 {
2140 stmt = bsi_stmt (si);
2141
2142 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2143 {
2144 fprintf (vect_dump, "init: stmt relevant? ");
2145 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2146 }
2147
2148 stmt_info = vinfo_for_stmt (stmt);
2149 STMT_VINFO_RELEVANT_P (stmt_info) = 0;
2150
2151 if (vect_stmt_relevant_p (stmt, loop_vinfo))
2152 vect_mark_relevant (&worklist, stmt);
2153 }
2154 }
2155
2156
2157 /* 2. Process_worklist */
2158
2159 while (VARRAY_ACTIVE_SIZE (worklist) > 0)
2160 {
2161 stmt = VARRAY_TOP_TREE (worklist);
2162 VARRAY_POP (worklist);
2163
2164 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2165 {
2166 fprintf (vect_dump, "worklist: examine stmt: ");
2167 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2168 }
2169
2170 /* Examine the USES in this statement. Mark all the statements which
2171 feed this statement's uses as "relevant", unless the USE is used as
2172 an array index. */
2173
2174 if (TREE_CODE (stmt) == PHI_NODE)
2175 {
2176 /* follow the def-use chain inside the loop. */
2177 for (j = 0; j < PHI_NUM_ARGS (stmt); j++)
2178 {
2179 tree arg = PHI_ARG_DEF (stmt, j);
2180 tree def_stmt = NULL_TREE;
2181 basic_block bb;
2182 if (!vect_is_simple_use (arg, loop_vinfo, &def_stmt))
2183 {
2184 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
2185 LOOP_LOC (loop_vinfo)))
2186 fprintf (vect_dump, "not vectorized: unsupported use in stmt.");
2187 varray_clear (worklist);
2188 return false;
2189 }
2190 if (!def_stmt)
2191 continue;
2192
2193 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2194 {
2195 fprintf (vect_dump, "worklist: def_stmt: ");
2196 print_generic_expr (vect_dump, def_stmt, TDF_SLIM);
2197 }
2198
2199 bb = bb_for_stmt (def_stmt);
2200 if (flow_bb_inside_loop_p (loop, bb))
2201 vect_mark_relevant (&worklist, def_stmt);
2202 }
2203 }
2204
2205 ann = stmt_ann (stmt);
2206 use_ops = USE_OPS (ann);
2207
2208 for (i = 0; i < NUM_USES (use_ops); i++)
2209 {
2210 tree use = USE_OP (use_ops, i);
2211
2212 /* We are only interested in uses that need to be vectorized. Uses
2213 that are used for address computation are not considered relevant.
2214 */
2215 if (exist_non_indexing_operands_for_use_p (use, stmt))
2216 {
2217 tree def_stmt = NULL_TREE;
2218 basic_block bb;
2219 if (!vect_is_simple_use (use, loop_vinfo, &def_stmt))
2220 {
2221 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
2222 LOOP_LOC (loop_vinfo)))
2223 fprintf (vect_dump, "not vectorized: unsupported use in stmt.");
2224 varray_clear (worklist);
2225 return false;
2226 }
2227
2228 if (!def_stmt)
2229 continue;
2230
2231 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2232 {
2233 fprintf (vect_dump, "worklist: examine use %d: ", i);
2234 print_generic_expr (vect_dump, use, TDF_SLIM);
2235 }
2236
2237 bb = bb_for_stmt (def_stmt);
2238 if (flow_bb_inside_loop_p (loop, bb))
2239 vect_mark_relevant (&worklist, def_stmt);
2240 }
2241 }
2242 } /* while worklist */
2243
2244 varray_clear (worklist);
2245 return true;
2246 }
2247
2248
2249 /* Function vect_can_advance_ivs_p
2250
2251 In case the number of iterations that LOOP iterates in unknown at compile
2252 time, an epilog loop will be generated, and the loop induction variables
2253 (IVs) will be "advanced" to the value they are supposed to take just before
2254 the epilog loop. Here we check that the access function of the loop IVs
2255 and the expression that represents the loop bound are simple enough.
2256 These restrictions will be relaxed in the future. */
2257
2258 static bool
2259 vect_can_advance_ivs_p (loop_vec_info loop_vinfo)
2260 {
2261 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2262 basic_block bb = loop->header;
2263 tree phi;
2264
2265 /* Analyze phi functions of the loop header. */
2266
2267 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
2268 {
2269 tree access_fn = NULL;
2270 tree evolution_part;
2271
2272 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2273 {
2274 fprintf (vect_dump, "Analyze phi: ");
2275 print_generic_expr (vect_dump, phi, TDF_SLIM);
2276 }
2277
2278 /* Skip virtual phi's. The data dependences that are associated with
2279 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
2280
2281 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
2282 {
2283 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2284 fprintf (vect_dump, "virtual phi. skip.");
2285 continue;
2286 }
2287
2288 /* Analyze the evolution function. */
2289
2290 access_fn = instantiate_parameters
2291 (loop, analyze_scalar_evolution (loop, PHI_RESULT (phi)));
2292
2293 if (!access_fn)
2294 {
2295 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2296 fprintf (vect_dump, "No Access function.");
2297 return false;
2298 }
2299
2300 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2301 {
2302 fprintf (vect_dump, "Access function of PHI: ");
2303 print_generic_expr (vect_dump, access_fn, TDF_SLIM);
2304 }
2305
2306 evolution_part = evolution_part_in_loop_num (access_fn, loop->num);
2307
2308 if (evolution_part == NULL_TREE)
2309 return false;
2310
2311 /* FORNOW: We do not transform initial conditions of IVs
2312 which evolution functions are a polynomial of degree >= 2. */
2313
2314 if (tree_is_chrec (evolution_part))
2315 return false;
2316 }
2317
2318 return true;
2319 }
2320
2321
2322 /* Function vect_get_loop_niters.
2323
2324 Determine how many iterations the loop is executed.
2325 If an expression that represents the number of iterations
2326 can be constructed, place it in NUMBER_OF_ITERATIONS.
2327 Return the loop exit condition. */
2328
2329 static tree
2330 vect_get_loop_niters (struct loop *loop, tree *number_of_iterations)
2331 {
2332 tree niters;
2333
2334 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2335 fprintf (vect_dump, "=== get_loop_niters ===");
2336
2337 niters = number_of_iterations_in_loop (loop);
2338
2339 if (niters != NULL_TREE
2340 && niters != chrec_dont_know)
2341 {
2342 *number_of_iterations = niters;
2343
2344 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2345 {
2346 fprintf (vect_dump, "==> get_loop_niters:" );
2347 print_generic_expr (vect_dump, *number_of_iterations, TDF_SLIM);
2348 }
2349 }
2350
2351 return get_loop_exit_condition (loop);
2352 }
2353
2354
2355 /* Function vect_analyze_loop_form.
2356
2357 Verify the following restrictions (some may be relaxed in the future):
2358 - it's an inner-most loop
2359 - number of BBs = 2 (which are the loop header and the latch)
2360 - the loop has a pre-header
2361 - the loop has a single entry and exit
2362 - the loop exit condition is simple enough, and the number of iterations
2363 can be analyzed (a countable loop). */
2364
2365 static loop_vec_info
2366 vect_analyze_loop_form (struct loop *loop)
2367 {
2368 loop_vec_info loop_vinfo;
2369 tree loop_cond;
2370 tree number_of_iterations = NULL;
2371 LOC loop_loc;
2372
2373 loop_loc = find_loop_location (loop);
2374
2375 if (vect_print_dump_info (REPORT_DETAILS, loop_loc))
2376 fprintf (vect_dump, "=== vect_analyze_loop_form ===");
2377
2378 if (loop->inner)
2379 {
2380 if (vect_print_dump_info (REPORT_OUTER_LOOPS, loop_loc))
2381 fprintf (vect_dump, "not vectorized: nested loop.");
2382 return NULL;
2383 }
2384
2385 if (!loop->single_exit
2386 || loop->num_nodes != 2
2387 || EDGE_COUNT (loop->header->preds) != 2)
2388 {
2389 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS, loop_loc))
2390 {
2391 if (!loop->single_exit)
2392 fprintf (vect_dump, "not vectorized: multiple exits.");
2393 else if (loop->num_nodes != 2)
2394 fprintf (vect_dump, "not vectorized: too many BBs in loop.");
2395 else if (EDGE_COUNT (loop->header->preds) != 2)
2396 fprintf (vect_dump, "not vectorized: too many incoming edges.");
2397 }
2398
2399 return NULL;
2400 }
2401
2402 /* We assume that the loop exit condition is at the end of the loop. i.e,
2403 that the loop is represented as a do-while (with a proper if-guard
2404 before the loop if needed), where the loop header contains all the
2405 executable statements, and the latch is empty. */
2406 if (!empty_block_p (loop->latch))
2407 {
2408 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS, loop_loc))
2409 fprintf (vect_dump, "not vectorized: unexpectd loop form.");
2410 return NULL;
2411 }
2412
2413 /* Make sure there exists a single-predecessor exit bb: */
2414 if (!single_pred_p (loop->single_exit->dest))
2415 {
2416 edge e = loop->single_exit;
2417 if (!(e->flags & EDGE_ABNORMAL))
2418 {
2419 loop_split_edge_with (e, NULL);
2420 if (vect_print_dump_info (REPORT_DETAILS, loop_loc))
2421 fprintf (vect_dump, "split exit edge.");
2422 }
2423 else
2424 {
2425 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS, loop_loc))
2426 fprintf (vect_dump, "not vectorized: abnormal loop exit edge.");
2427 return NULL;
2428 }
2429 }
2430
2431 if (empty_block_p (loop->header))
2432 {
2433 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS, loop_loc))
2434 fprintf (vect_dump, "not vectorized: empty loop.");
2435 return NULL;
2436 }
2437
2438 loop_cond = vect_get_loop_niters (loop, &number_of_iterations);
2439 if (!loop_cond)
2440 {
2441 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS, loop_loc))
2442 fprintf (vect_dump, "not vectorized: complicated exit condition.");
2443 return NULL;
2444 }
2445
2446 if (!number_of_iterations)
2447 {
2448 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS, loop_loc))
2449 fprintf (vect_dump,
2450 "not vectorized: number of iterations cannot be computed.");
2451 return NULL;
2452 }
2453
2454 if (chrec_contains_undetermined (number_of_iterations))
2455 {
2456 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS, loop_loc))
2457 fprintf (vect_dump, "Infinite number of iterations.");
2458 return false;
2459 }
2460
2461 loop_vinfo = new_loop_vec_info (loop);
2462 LOOP_VINFO_NITERS (loop_vinfo) = number_of_iterations;
2463
2464 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
2465 {
2466 if (vect_print_dump_info (REPORT_DETAILS, loop_loc))
2467 {
2468 fprintf (vect_dump, "Symbolic number of iterations is ");
2469 print_generic_expr (vect_dump, number_of_iterations, TDF_DETAILS);
2470 }
2471 }
2472 else
2473 if (LOOP_VINFO_INT_NITERS (loop_vinfo) == 0)
2474 {
2475 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS, loop_loc))
2476 fprintf (vect_dump, "not vectorized: number of iterations = 0.");
2477 return NULL;
2478 }
2479
2480 LOOP_VINFO_EXIT_COND (loop_vinfo) = loop_cond;
2481 LOOP_VINFO_LOC (loop_vinfo) = loop_loc;
2482
2483 return loop_vinfo;
2484 }
2485
2486
2487 /* Function vect_analyze_loop.
2488
2489 Apply a set of analyses on LOOP, and create a loop_vec_info struct
2490 for it. The different analyses will record information in the
2491 loop_vec_info struct. */
2492 loop_vec_info
2493 vect_analyze_loop (struct loop *loop)
2494 {
2495 bool ok;
2496 loop_vec_info loop_vinfo;
2497
2498 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2499 fprintf (vect_dump, "===== analyze_loop_nest =====");
2500
2501 /* Check the CFG characteristics of the loop (nesting, entry/exit, etc. */
2502
2503 loop_vinfo = vect_analyze_loop_form (loop);
2504 if (!loop_vinfo)
2505 {
2506 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2507 fprintf (vect_dump, "bad loop form.");
2508 return NULL;
2509 }
2510
2511 /* Find all data references in the loop (which correspond to vdefs/vuses)
2512 and analyze their evolution in the loop.
2513
2514 FORNOW: Handle only simple, array references, which
2515 alignment can be forced, and aligned pointer-references. */
2516
2517 ok = vect_analyze_data_refs (loop_vinfo);
2518 if (!ok)
2519 {
2520 if (vect_print_dump_info (REPORT_DETAILS, LOOP_LOC (loop_vinfo)))
2521 fprintf (vect_dump, "bad data references.");
2522 destroy_loop_vec_info (loop_vinfo);
2523 return NULL;
2524 }
2525
2526 /* Data-flow analysis to detect stmts that do not need to be vectorized. */
2527
2528 ok = vect_mark_stmts_to_be_vectorized (loop_vinfo);
2529 if (!ok)
2530 {
2531 if (vect_print_dump_info (REPORT_DETAILS, LOOP_LOC (loop_vinfo)))
2532 fprintf (vect_dump, "unexpected pattern.");
2533 destroy_loop_vec_info (loop_vinfo);
2534 return NULL;
2535 }
2536
2537 /* Check that all cross-iteration scalar data-flow cycles are OK.
2538 Cross-iteration cycles caused by virtual phis are analyzed separately. */
2539
2540 ok = vect_analyze_scalar_cycles (loop_vinfo);
2541 if (!ok)
2542 {
2543 if (vect_print_dump_info (REPORT_DETAILS, LOOP_LOC (loop_vinfo)))
2544 fprintf (vect_dump, "bad scalar cycle.");
2545 destroy_loop_vec_info (loop_vinfo);
2546 return NULL;
2547 }
2548
2549 /* Analyze data dependences between the data-refs in the loop.
2550 FORNOW: fail at the first data dependence that we encounter. */
2551
2552 ok = vect_analyze_data_ref_dependences (loop_vinfo);
2553 if (!ok)
2554 {
2555 if (vect_print_dump_info (REPORT_DETAILS, LOOP_LOC (loop_vinfo)))
2556 fprintf (vect_dump, "bad data dependence.");
2557 destroy_loop_vec_info (loop_vinfo);
2558 return NULL;
2559 }
2560
2561 /* Analyze the access patterns of the data-refs in the loop (consecutive,
2562 complex, etc.). FORNOW: Only handle consecutive access pattern. */
2563
2564 ok = vect_analyze_data_ref_accesses (loop_vinfo);
2565 if (!ok)
2566 {
2567 if (vect_print_dump_info (REPORT_DETAILS, LOOP_LOC (loop_vinfo)))
2568 fprintf (vect_dump, "bad data access.");
2569 destroy_loop_vec_info (loop_vinfo);
2570 return NULL;
2571 }
2572
2573 ok = vect_determine_vectorization_factor (loop_vinfo);
2574 if (!ok)
2575 {
2576 if (vect_print_dump_info (REPORT_DETAILS, LOOP_LOC (loop_vinfo)))
2577 fprintf (vect_dump, "can't determine vectorization factor.");
2578 destroy_loop_vec_info (loop_vinfo);
2579 return NULL;
2580 }
2581
2582 /* Analyze the alignment of the data-refs in the loop.
2583 FORNOW: Only aligned accesses are handled. */
2584
2585 ok = vect_analyze_data_refs_alignment (loop_vinfo);
2586 if (!ok)
2587 {
2588 if (vect_print_dump_info (REPORT_DETAILS, LOOP_LOC (loop_vinfo)))
2589 fprintf (vect_dump, "bad data alignment.");
2590 destroy_loop_vec_info (loop_vinfo);
2591 return NULL;
2592 }
2593
2594 /* Scan all the operations in the loop and make sure they are
2595 vectorizable. */
2596
2597 ok = vect_analyze_operations (loop_vinfo);
2598 if (!ok)
2599 {
2600 if (vect_print_dump_info (REPORT_DETAILS, LOOP_LOC (loop_vinfo)))
2601 fprintf (vect_dump, "bad operation or unsupported loop bound.");
2602 destroy_loop_vec_info (loop_vinfo);
2603 return NULL;
2604 }
2605
2606 LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1;
2607
2608 return loop_vinfo;
2609 }