tree-vect-analyze.c (vect_analyze_loop_form): Call split_loop_exit_edge instead of...
[gcc.git] / gcc / tree-vect-analyze.c
1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2003,2004,2005 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
10 version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to the Free
19 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
20 02111-1307, USA. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "errors.h"
27 #include "ggc.h"
28 #include "tree.h"
29 #include "basic-block.h"
30 #include "diagnostic.h"
31 #include "tree-flow.h"
32 #include "tree-dump.h"
33 #include "timevar.h"
34 #include "cfgloop.h"
35 #include "expr.h"
36 #include "optabs.h"
37 #include "tree-chrec.h"
38 #include "tree-data-ref.h"
39 #include "tree-scalar-evolution.h"
40 #include "tree-vectorizer.h"
41
42 /* Main analysis functions. */
43 static loop_vec_info vect_analyze_loop_form (struct loop *);
44 static bool vect_analyze_data_refs (loop_vec_info);
45 static bool vect_mark_stmts_to_be_vectorized (loop_vec_info);
46 static bool vect_analyze_scalar_cycles (loop_vec_info);
47 static bool vect_analyze_data_ref_accesses (loop_vec_info);
48 static bool vect_analyze_data_ref_dependences (loop_vec_info);
49 static bool vect_analyze_data_refs_alignment (loop_vec_info);
50 static bool vect_compute_data_refs_alignment (loop_vec_info);
51 static void vect_enhance_data_refs_alignment (loop_vec_info);
52 static bool vect_analyze_operations (loop_vec_info);
53 static bool vect_determine_vectorization_factor (loop_vec_info);
54
55 /* Utility functions for the analyses. */
56 static bool exist_non_indexing_operands_for_use_p (tree, tree);
57 static void vect_mark_relevant (varray_type *, tree);
58 static bool vect_stmt_relevant_p (tree, loop_vec_info);
59 static tree vect_get_loop_niters (struct loop *, tree *);
60 static bool vect_analyze_data_ref_dependence
61 (struct data_reference *, struct data_reference *, loop_vec_info);
62 static bool vect_compute_data_ref_alignment (struct data_reference *);
63 static bool vect_analyze_data_ref_access (struct data_reference *);
64 static struct data_reference * vect_analyze_pointer_ref_access
65 (tree, tree, bool, tree, tree *, tree *);
66 static bool vect_can_advance_ivs_p (loop_vec_info);
67 static tree vect_get_ptr_offset (tree, tree, tree *);
68 static bool vect_analyze_offset_expr (tree, struct loop *, tree, tree *,
69 tree *, tree *);
70 static bool vect_base_addr_differ_p (struct data_reference *,
71 struct data_reference *drb, bool *);
72 static tree vect_object_analysis (tree, tree, bool, tree,
73 struct data_reference **, tree *, tree *,
74 tree *, bool *, tree *, struct ptr_info_def **,
75 subvar_t *);
76 static tree vect_address_analysis (tree, tree, bool, tree,
77 struct data_reference *, tree *, tree *,
78 tree *, bool *);
79
80
81 /* Function vect_get_ptr_offset
82
83 Compute the OFFSET modulo vector-type alignment of pointer REF in bits. */
84
85 static tree
86 vect_get_ptr_offset (tree ref ATTRIBUTE_UNUSED,
87 tree vectype ATTRIBUTE_UNUSED,
88 tree *offset ATTRIBUTE_UNUSED)
89 {
90 /* TODO: Use alignment information. */
91 return NULL_TREE;
92 }
93
94
95 /* Function vect_analyze_offset_expr
96
97 Given an offset expression EXPR received from get_inner_reference, analyze
98 it and create an expression for INITIAL_OFFSET by substituting the variables
99 of EXPR with initial_condition of the corresponding access_fn in the loop.
100 E.g.,
101 for i
102 for (j = 3; j < N; j++)
103 a[j].b[i][j] = 0;
104
105 For a[j].b[i][j], EXPR will be 'i * C_i + j * C_j + C'. 'i' cannot be
106 substituted, since its access_fn in the inner loop is i. 'j' will be
107 substituted with 3. An INITIAL_OFFSET will be 'i * C_i + C`', where
108 C` = 3 * C_j + C.
109
110 Compute MISALIGN (the misalignment of the data reference initial access from
111 its base) if possible. Misalignment can be calculated only if all the
112 variables can be substituted with constants, or if a variable is multiplied
113 by a multiple of VECTYPE_ALIGNMENT. In the above example, since 'i' cannot
114 be substituted, MISALIGN will be NULL_TREE in case that C_i is not a multiple
115 of VECTYPE_ALIGNMENT, and C` otherwise. (We perform MISALIGN modulo
116 VECTYPE_ALIGNMENT computation in the caller of this function).
117
118 STEP is an evolution of the data reference in this loop in bytes.
119 In the above example, STEP is C_j.
120
121 Return FALSE, if the analysis fails, e.g., there is no access_fn for a
122 variable. In this case, all the outputs (INITIAL_OFFSET, MISALIGN and STEP)
123 are NULL_TREEs. Otherwise, return TRUE.
124
125 */
126
127 static bool
128 vect_analyze_offset_expr (tree expr,
129 struct loop *loop,
130 tree vectype_alignment,
131 tree *initial_offset,
132 tree *misalign,
133 tree *step)
134 {
135 tree oprnd0;
136 tree oprnd1;
137 tree left_offset = ssize_int (0);
138 tree right_offset = ssize_int (0);
139 tree left_misalign = ssize_int (0);
140 tree right_misalign = ssize_int (0);
141 tree left_step = ssize_int (0);
142 tree right_step = ssize_int (0);
143 enum tree_code code;
144 tree init, evolution;
145
146 *step = NULL_TREE;
147 *misalign = NULL_TREE;
148 *initial_offset = NULL_TREE;
149
150 /* Strip conversions that don't narrow the mode. */
151 expr = vect_strip_conversion (expr);
152 if (!expr)
153 return false;
154
155 /* Stop conditions:
156 1. Constant. */
157 if (TREE_CODE (expr) == INTEGER_CST)
158 {
159 *initial_offset = fold_convert (ssizetype, expr);
160 *misalign = fold_convert (ssizetype, expr);
161 *step = ssize_int (0);
162 return true;
163 }
164
165 /* 2. Variable. Try to substitute with initial_condition of the corresponding
166 access_fn in the current loop. */
167 if (SSA_VAR_P (expr))
168 {
169 tree access_fn = analyze_scalar_evolution (loop, expr);
170
171 if (access_fn == chrec_dont_know)
172 /* No access_fn. */
173 return false;
174
175 init = initial_condition_in_loop_num (access_fn, loop->num);
176 if (init == expr && !expr_invariant_in_loop_p (loop, init))
177 /* Not enough information: may be not loop invariant.
178 E.g., for a[b[i]], we get a[D], where D=b[i]. EXPR is D, its
179 initial_condition is D, but it depends on i - loop's induction
180 variable. */
181 return false;
182
183 evolution = evolution_part_in_loop_num (access_fn, loop->num);
184 if (evolution && TREE_CODE (evolution) != INTEGER_CST)
185 /* Evolution is not constant. */
186 return false;
187
188 if (TREE_CODE (init) == INTEGER_CST)
189 *misalign = fold_convert (ssizetype, init);
190 else
191 /* Not constant, misalignment cannot be calculated. */
192 *misalign = NULL_TREE;
193
194 *initial_offset = fold_convert (ssizetype, init);
195
196 *step = evolution ? fold_convert (ssizetype, evolution) : ssize_int (0);
197 return true;
198 }
199
200 /* Recursive computation. */
201 if (!BINARY_CLASS_P (expr))
202 {
203 /* We expect to get binary expressions (PLUS/MINUS and MULT). */
204 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
205 {
206 fprintf (vect_dump, "Not binary expression ");
207 print_generic_expr (vect_dump, expr, TDF_SLIM);
208 }
209 return false;
210 }
211 oprnd0 = TREE_OPERAND (expr, 0);
212 oprnd1 = TREE_OPERAND (expr, 1);
213
214 if (!vect_analyze_offset_expr (oprnd0, loop, vectype_alignment, &left_offset,
215 &left_misalign, &left_step)
216 || !vect_analyze_offset_expr (oprnd1, loop, vectype_alignment,
217 &right_offset, &right_misalign, &right_step))
218 return false;
219
220 /* The type of the operation: plus, minus or mult. */
221 code = TREE_CODE (expr);
222 switch (code)
223 {
224 case MULT_EXPR:
225 if (TREE_CODE (right_offset) != INTEGER_CST)
226 /* RIGHT_OFFSET can be not constant. For example, for arrays of variable
227 sized types.
228 FORNOW: We don't support such cases. */
229 return false;
230
231 /* Strip conversions that don't narrow the mode. */
232 left_offset = vect_strip_conversion (left_offset);
233 if (!left_offset)
234 return false;
235 /* Misalignment computation. */
236 if (SSA_VAR_P (left_offset))
237 {
238 /* If the left side contains variables that can't be substituted with
239 constants, we check if the right side is a multiple of ALIGNMENT.
240 */
241 if (integer_zerop (size_binop (TRUNC_MOD_EXPR, right_offset,
242 fold_convert (ssizetype, vectype_alignment))))
243 *misalign = ssize_int (0);
244 else
245 /* If the remainder is not zero or the right side isn't constant,
246 we can't compute misalignment. */
247 *misalign = NULL_TREE;
248 }
249 else
250 {
251 /* The left operand was successfully substituted with constant. */
252 if (left_misalign)
253 /* In case of EXPR '(i * C1 + j) * C2', LEFT_MISALIGN is
254 NULL_TREE. */
255 *misalign = size_binop (code, left_misalign, right_misalign);
256 else
257 *misalign = NULL_TREE;
258 }
259
260 /* Step calculation. */
261 /* Multiply the step by the right operand. */
262 *step = size_binop (MULT_EXPR, left_step, right_offset);
263 break;
264
265 case PLUS_EXPR:
266 case MINUS_EXPR:
267 /* Combine the recursive calculations for step and misalignment. */
268 *step = size_binop (code, left_step, right_step);
269
270 if (left_misalign && right_misalign)
271 *misalign = size_binop (code, left_misalign, right_misalign);
272 else
273 *misalign = NULL_TREE;
274
275 break;
276
277 default:
278 gcc_unreachable ();
279 }
280
281 /* Compute offset. */
282 *initial_offset = fold_convert (ssizetype,
283 fold (build2 (code, TREE_TYPE (left_offset),
284 left_offset,
285 right_offset)));
286 return true;
287 }
288
289
290 /* Function vect_determine_vectorization_factor
291
292 Determine the vectorization factor (VF). VF is the number of data elements
293 that are operated upon in parallel in a single iteration of the vectorized
294 loop. For example, when vectorizing a loop that operates on 4byte elements,
295 on a target with vector size (VS) 16byte, the VF is set to 4, since 4
296 elements can fit in a single vector register.
297
298 We currently support vectorization of loops in which all types operated upon
299 are of the same size. Therefore this function currently sets VF according to
300 the size of the types operated upon, and fails if there are multiple sizes
301 in the loop.
302
303 VF is also the factor by which the loop iterations are strip-mined, e.g.:
304 original loop:
305 for (i=0; i<N; i++){
306 a[i] = b[i] + c[i];
307 }
308
309 vectorized loop:
310 for (i=0; i<N; i+=VF){
311 a[i:VF] = b[i:VF] + c[i:VF];
312 }
313 */
314
315 static bool
316 vect_determine_vectorization_factor (loop_vec_info loop_vinfo)
317 {
318 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
319 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
320 int nbbs = loop->num_nodes;
321 block_stmt_iterator si;
322 unsigned int vectorization_factor = 0;
323 int i;
324 tree scalar_type;
325
326 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
327 fprintf (vect_dump, "=== vect_determine_vectorization_factor ===");
328
329 for (i = 0; i < nbbs; i++)
330 {
331 basic_block bb = bbs[i];
332
333 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
334 {
335 tree stmt = bsi_stmt (si);
336 unsigned int nunits;
337 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
338 tree vectype;
339
340 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
341 {
342 fprintf (vect_dump, "==> examining statement: ");
343 print_generic_expr (vect_dump, stmt, TDF_SLIM);
344 }
345
346 gcc_assert (stmt_info);
347 /* skip stmts which do not need to be vectorized. */
348 if (!STMT_VINFO_RELEVANT_P (stmt_info))
349 continue;
350
351 if (VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt))))
352 {
353 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
354 LOOP_LOC (loop_vinfo)))
355 {
356 fprintf (vect_dump, "not vectorized: vector stmt in loop:");
357 print_generic_expr (vect_dump, stmt, TDF_SLIM);
358 }
359 return false;
360 }
361
362 if (STMT_VINFO_DATA_REF (stmt_info))
363 scalar_type = TREE_TYPE (DR_REF (STMT_VINFO_DATA_REF (stmt_info)));
364 else if (TREE_CODE (stmt) == MODIFY_EXPR)
365 scalar_type = TREE_TYPE (TREE_OPERAND (stmt, 0));
366 else
367 scalar_type = TREE_TYPE (stmt);
368
369 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
370 {
371 fprintf (vect_dump, "get vectype for scalar type: ");
372 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
373 }
374
375 vectype = get_vectype_for_scalar_type (scalar_type);
376 if (!vectype)
377 {
378 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
379 LOOP_LOC (loop_vinfo)))
380 {
381 fprintf (vect_dump, "not vectorized: unsupported data-type ");
382 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
383 }
384 return false;
385 }
386 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
387 {
388 fprintf (vect_dump, "vectype: ");
389 print_generic_expr (vect_dump, vectype, TDF_SLIM);
390 }
391 STMT_VINFO_VECTYPE (stmt_info) = vectype;
392
393 nunits = GET_MODE_NUNITS (TYPE_MODE (vectype));
394 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
395 fprintf (vect_dump, "nunits = %d", nunits);
396
397 if (vectorization_factor)
398 {
399 /* FORNOW: don't allow mixed units.
400 This restriction will be relaxed in the future. */
401 if (nunits != vectorization_factor)
402 {
403 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
404 LOOP_LOC (loop_vinfo)))
405 fprintf (vect_dump, "not vectorized: mixed data-types");
406 return false;
407 }
408 }
409 else
410 vectorization_factor = nunits;
411
412 #ifdef ENABLE_CHECKING
413 gcc_assert (GET_MODE_SIZE (TYPE_MODE (scalar_type))
414 * vectorization_factor == UNITS_PER_SIMD_WORD);
415 #endif
416 }
417 }
418
419 /* TODO: Analyze cost. Decide if worth while to vectorize. */
420
421 if (vectorization_factor <= 1)
422 {
423 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
424 LOOP_LOC (loop_vinfo)))
425 fprintf (vect_dump, "not vectorized: unsupported data-type");
426 return false;
427 }
428 LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
429
430 return true;
431 }
432
433
434 /* Function vect_analyze_operations.
435
436 Scan the loop stmts and make sure they are all vectorizable. */
437
438 static bool
439 vect_analyze_operations (loop_vec_info loop_vinfo)
440 {
441 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
442 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
443 int nbbs = loop->num_nodes;
444 block_stmt_iterator si;
445 unsigned int vectorization_factor = 0;
446 int i;
447 bool ok;
448
449 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
450 fprintf (vect_dump, "=== vect_analyze_operations ===");
451
452 gcc_assert (LOOP_VINFO_VECT_FACTOR (loop_vinfo));
453 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
454
455 for (i = 0; i < nbbs; i++)
456 {
457 basic_block bb = bbs[i];
458
459 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
460 {
461 tree stmt = bsi_stmt (si);
462 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
463
464 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
465 {
466 fprintf (vect_dump, "==> examining statement: ");
467 print_generic_expr (vect_dump, stmt, TDF_SLIM);
468 }
469
470 gcc_assert (stmt_info);
471
472 /* skip stmts which do not need to be vectorized.
473 this is expected to include:
474 - the COND_EXPR which is the loop exit condition
475 - any LABEL_EXPRs in the loop
476 - computations that are used only for array indexing or loop
477 control */
478
479 if (!STMT_VINFO_RELEVANT_P (stmt_info))
480 {
481 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
482 fprintf (vect_dump, "irrelevant.");
483 continue;
484 }
485
486 #ifdef ENABLE_CHECKING
487 if (STMT_VINFO_RELEVANT_P (stmt_info))
488 {
489 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt))));
490 gcc_assert (STMT_VINFO_VECTYPE (stmt_info));
491 }
492 #endif
493
494 ok = (vectorizable_operation (stmt, NULL, NULL)
495 || vectorizable_assignment (stmt, NULL, NULL)
496 || vectorizable_load (stmt, NULL, NULL)
497 || vectorizable_store (stmt, NULL, NULL));
498
499 if (!ok)
500 {
501 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
502 LOOP_LOC (loop_vinfo)))
503 {
504 fprintf (vect_dump, "not vectorized: stmt not supported: ");
505 print_generic_expr (vect_dump, stmt, TDF_SLIM);
506 }
507 return false;
508 }
509 }
510 }
511
512 /* TODO: Analyze cost. Decide if worth while to vectorize. */
513
514 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
515 && vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
516 fprintf (vect_dump,
517 "vectorization_factor = %d, niters = " HOST_WIDE_INT_PRINT_DEC,
518 vectorization_factor, LOOP_VINFO_INT_NITERS (loop_vinfo));
519
520 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
521 && LOOP_VINFO_INT_NITERS (loop_vinfo) < vectorization_factor)
522 {
523 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
524 LOOP_LOC (loop_vinfo)))
525 fprintf (vect_dump, "not vectorized: iteration count too small.");
526 return false;
527 }
528
529 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
530 || LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0)
531 {
532 if (vect_print_dump_info (REPORT_DETAILS, LOOP_LOC (loop_vinfo)))
533 fprintf (vect_dump, "epilog loop required.");
534 if (!vect_can_advance_ivs_p (loop_vinfo))
535 {
536 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
537 LOOP_LOC (loop_vinfo)))
538 fprintf (vect_dump,
539 "not vectorized: can't create epilog loop 1.");
540 return false;
541 }
542 if (!slpeel_can_duplicate_loop_p (loop, loop->single_exit))
543 {
544 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
545 LOOP_LOC (loop_vinfo)))
546 fprintf (vect_dump,
547 "not vectorized: can't create epilog loop 2.");
548 return false;
549 }
550 }
551
552 return true;
553 }
554
555
556 /* Function exist_non_indexing_operands_for_use_p
557
558 USE is one of the uses attached to STMT. Check if USE is
559 used in STMT for anything other than indexing an array. */
560
561 static bool
562 exist_non_indexing_operands_for_use_p (tree use, tree stmt)
563 {
564 tree operand;
565 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
566
567 /* USE corresponds to some operand in STMT. If there is no data
568 reference in STMT, then any operand that corresponds to USE
569 is not indexing an array. */
570 if (!STMT_VINFO_DATA_REF (stmt_info))
571 return true;
572
573 /* STMT has a data_ref. FORNOW this means that its of one of
574 the following forms:
575 -1- ARRAY_REF = var
576 -2- var = ARRAY_REF
577 (This should have been verified in analyze_data_refs).
578
579 'var' in the second case corresponds to a def, not a use,
580 so USE cannot correspond to any operands that are not used
581 for array indexing.
582
583 Therefore, all we need to check is if STMT falls into the
584 first case, and whether var corresponds to USE. */
585
586 if (TREE_CODE (TREE_OPERAND (stmt, 0)) == SSA_NAME)
587 return false;
588
589 operand = TREE_OPERAND (stmt, 1);
590
591 if (TREE_CODE (operand) != SSA_NAME)
592 return false;
593
594 if (operand == use)
595 return true;
596
597 return false;
598 }
599
600
601 /* Function vect_analyze_scalar_cycles.
602
603 Examine the cross iteration def-use cycles of scalar variables, by
604 analyzing the loop (scalar) PHIs; verify that the cross iteration def-use
605 cycles that they represent do not impede vectorization.
606
607 FORNOW: Reduction as in the following loop, is not supported yet:
608 loop1:
609 for (i=0; i<N; i++)
610 sum += a[i];
611 The cross-iteration cycle corresponding to variable 'sum' will be
612 considered too complicated and will impede vectorization.
613
614 FORNOW: Induction as in the following loop, is not supported yet:
615 loop2:
616 for (i=0; i<N; i++)
617 a[i] = i;
618
619 However, the following loop *is* vectorizable:
620 loop3:
621 for (i=0; i<N; i++)
622 a[i] = b[i];
623
624 In both loops there exists a def-use cycle for the variable i:
625 loop: i_2 = PHI (i_0, i_1)
626 a[i_2] = ...;
627 i_1 = i_2 + 1;
628 GOTO loop;
629
630 The evolution of the above cycle is considered simple enough,
631 however, we also check that the cycle does not need to be
632 vectorized, i.e - we check that the variable that this cycle
633 defines is only used for array indexing or in stmts that do not
634 need to be vectorized. This is not the case in loop2, but it
635 *is* the case in loop3. */
636
637 static bool
638 vect_analyze_scalar_cycles (loop_vec_info loop_vinfo)
639 {
640 tree phi;
641 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
642 basic_block bb = loop->header;
643 tree dummy;
644
645 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
646 fprintf (vect_dump, "=== vect_analyze_scalar_cycles ===");
647
648 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
649 {
650 tree access_fn = NULL;
651
652 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
653 {
654 fprintf (vect_dump, "Analyze phi: ");
655 print_generic_expr (vect_dump, phi, TDF_SLIM);
656 }
657
658 /* Skip virtual phi's. The data dependences that are associated with
659 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
660
661 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
662 {
663 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
664 fprintf (vect_dump, "virtual phi. skip.");
665 continue;
666 }
667
668 /* Analyze the evolution function. */
669
670 /* FORNOW: The only scalar cross-iteration cycles that we allow are
671 those of loop induction variables; This property is verified here.
672
673 Furthermore, if that induction variable is used in an operation
674 that needs to be vectorized (i.e, is not solely used to index
675 arrays and check the exit condition) - we do not support its
676 vectorization yet. This property is verified in vect_is_simple_use,
677 during vect_analyze_operations. */
678
679 access_fn = /* instantiate_parameters
680 (loop,*/
681 analyze_scalar_evolution (loop, PHI_RESULT (phi));
682
683 if (!access_fn)
684 {
685 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
686 LOOP_LOC (loop_vinfo)))
687 fprintf (vect_dump, "not vectorized: unsupported scalar cycle.");
688 return false;
689 }
690
691 if (vect_print_dump_info (REPORT_DETAILS,
692 LOOP_LOC (loop_vinfo)))
693 {
694 fprintf (vect_dump, "Access function of PHI: ");
695 print_generic_expr (vect_dump, access_fn, TDF_SLIM);
696 }
697
698 if (!vect_is_simple_iv_evolution (loop->num, access_fn, &dummy, &dummy))
699 {
700 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
701 LOOP_LOC (loop_vinfo)))
702 fprintf (vect_dump, "not vectorized: unsupported scalar cycle.");
703 return false;
704 }
705 }
706
707 return true;
708 }
709
710
711 /* Function vect_base_addr_differ_p.
712
713 This is the simplest data dependence test: determines whether the
714 data references A and B access the same array/region. Returns
715 false when the property is not computable at compile time.
716 Otherwise return true, and DIFFER_P will record the result. This
717 utility will not be necessary when alias_sets_conflict_p will be
718 less conservative. */
719
720 static bool
721 vect_base_addr_differ_p (struct data_reference *dra,
722 struct data_reference *drb,
723 bool *differ_p)
724 {
725 tree stmt_a = DR_STMT (dra);
726 stmt_vec_info stmt_info_a = vinfo_for_stmt (stmt_a);
727 tree stmt_b = DR_STMT (drb);
728 stmt_vec_info stmt_info_b = vinfo_for_stmt (stmt_b);
729 tree addr_a = STMT_VINFO_VECT_DR_BASE_ADDRESS (stmt_info_a);
730 tree addr_b = STMT_VINFO_VECT_DR_BASE_ADDRESS (stmt_info_b);
731 tree type_a = TREE_TYPE (addr_a);
732 tree type_b = TREE_TYPE (addr_b);
733 HOST_WIDE_INT alias_set_a, alias_set_b;
734
735 gcc_assert (POINTER_TYPE_P (type_a) && POINTER_TYPE_P (type_b));
736
737 /* Both references are ADDR_EXPR, i.e., we have the objects. */
738 if (TREE_CODE (addr_a) == ADDR_EXPR && TREE_CODE (addr_b) == ADDR_EXPR)
739 return array_base_name_differ_p (dra, drb, differ_p);
740
741 alias_set_a = (TREE_CODE (addr_a) == ADDR_EXPR) ?
742 get_alias_set (TREE_OPERAND (addr_a, 0)) : get_alias_set (addr_a);
743 alias_set_b = (TREE_CODE (addr_b) == ADDR_EXPR) ?
744 get_alias_set (TREE_OPERAND (addr_b, 0)) : get_alias_set (addr_b);
745
746 if (!alias_sets_conflict_p (alias_set_a, alias_set_b))
747 {
748 *differ_p = true;
749 return true;
750 }
751
752 /* An instruction writing through a restricted pointer is "independent" of any
753 instruction reading or writing through a different pointer, in the same
754 block/scope. */
755 else if ((TYPE_RESTRICT (type_a) && !DR_IS_READ (dra))
756 || (TYPE_RESTRICT (type_b) && !DR_IS_READ (drb)))
757 {
758 *differ_p = true;
759 return true;
760 }
761 return false;
762 }
763
764
765 /* Function vect_analyze_data_ref_dependence.
766
767 Return TRUE if there (might) exist a dependence between a memory-reference
768 DRA and a memory-reference DRB. */
769
770 static bool
771 vect_analyze_data_ref_dependence (struct data_reference *dra,
772 struct data_reference *drb,
773 loop_vec_info loop_vinfo)
774 {
775 bool differ_p;
776 struct data_dependence_relation *ddr;
777
778 if (!vect_base_addr_differ_p (dra, drb, &differ_p))
779 {
780 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
781 LOOP_LOC (loop_vinfo)))
782 {
783 fprintf (vect_dump,
784 "not vectorized: can't determine dependence between: ");
785 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
786 fprintf (vect_dump, " and ");
787 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
788 }
789 return true;
790 }
791
792 if (differ_p)
793 return false;
794
795 ddr = initialize_data_dependence_relation (dra, drb);
796 compute_affine_dependence (ddr);
797
798 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
799 return false;
800
801 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
802 LOOP_LOC (loop_vinfo)))
803 {
804 fprintf (vect_dump,
805 "not vectorized: possible dependence between data-refs ");
806 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
807 fprintf (vect_dump, " and ");
808 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
809 }
810
811 return true;
812 }
813
814
815 /* Function vect_analyze_data_ref_dependences.
816
817 Examine all the data references in the loop, and make sure there do not
818 exist any data dependences between them.
819
820 TODO: dependences which distance is greater than the vectorization factor
821 can be ignored. */
822
823 static bool
824 vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo)
825 {
826 unsigned int i, j;
827 varray_type loop_write_refs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
828 varray_type loop_read_refs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
829
830 /* Examine store-store (output) dependences. */
831
832 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
833 fprintf (vect_dump, "=== vect_analyze_dependences ===");
834
835 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
836 fprintf (vect_dump, "compare all store-store pairs.");
837
838 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_refs); i++)
839 {
840 for (j = i + 1; j < VARRAY_ACTIVE_SIZE (loop_write_refs); j++)
841 {
842 struct data_reference *dra =
843 VARRAY_GENERIC_PTR (loop_write_refs, i);
844 struct data_reference *drb =
845 VARRAY_GENERIC_PTR (loop_write_refs, j);
846 if (vect_analyze_data_ref_dependence (dra, drb, loop_vinfo))
847 return false;
848 }
849 }
850
851 /* Examine load-store (true/anti) dependences. */
852
853 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
854 fprintf (vect_dump, "compare all load-store pairs.");
855
856 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_refs); i++)
857 {
858 for (j = 0; j < VARRAY_ACTIVE_SIZE (loop_write_refs); j++)
859 {
860 struct data_reference *dra = VARRAY_GENERIC_PTR (loop_read_refs, i);
861 struct data_reference *drb =
862 VARRAY_GENERIC_PTR (loop_write_refs, j);
863 if (vect_analyze_data_ref_dependence (dra, drb, loop_vinfo))
864 return false;
865 }
866 }
867
868 return true;
869 }
870
871
872 /* Function vect_compute_data_ref_alignment
873
874 Compute the misalignment of the data reference DR.
875
876 Output:
877 1. If during the misalignment computation it is found that the data reference
878 cannot be vectorized then false is returned.
879 2. DR_MISALIGNMENT (DR) is defined.
880
881 FOR NOW: No analysis is actually performed. Misalignment is calculated
882 only for trivial cases. TODO. */
883
884 static bool
885 vect_compute_data_ref_alignment (struct data_reference *dr)
886 {
887 tree stmt = DR_STMT (dr);
888 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
889 tree ref = DR_REF (dr);
890 tree vectype;
891 tree base, alignment;
892 bool base_aligned_p;
893 tree misalign;
894
895 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
896 fprintf (vect_dump, "vect_compute_data_ref_alignment:");
897
898 /* Initialize misalignment to unknown. */
899 DR_MISALIGNMENT (dr) = -1;
900
901 misalign = STMT_VINFO_VECT_MISALIGNMENT (stmt_info);
902 base_aligned_p = STMT_VINFO_VECT_BASE_ALIGNED_P (stmt_info);
903 base = build_fold_indirect_ref (STMT_VINFO_VECT_DR_BASE_ADDRESS (stmt_info));
904 vectype = STMT_VINFO_VECTYPE (stmt_info);
905
906 if (!misalign)
907 {
908 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
909 {
910 fprintf (vect_dump, "Unknown alignment for access: ");
911 print_generic_expr (vect_dump, base, TDF_SLIM);
912 }
913 return true;
914 }
915
916 if (!base_aligned_p)
917 {
918 if (!vect_can_force_dr_alignment_p (base, TYPE_ALIGN (vectype)))
919 {
920 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
921 {
922 fprintf (vect_dump, "can't force alignment of ref: ");
923 print_generic_expr (vect_dump, ref, TDF_SLIM);
924 }
925 return true;
926 }
927
928 /* Force the alignment of the decl.
929 NOTE: This is the only change to the code we make during
930 the analysis phase, before deciding to vectorize the loop. */
931 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
932 fprintf (vect_dump, "force alignment");
933 DECL_ALIGN (base) = TYPE_ALIGN (vectype);
934 DECL_USER_ALIGN (base) = 1;
935 }
936
937 /* At this point we assume that the base is aligned. */
938 gcc_assert (base_aligned_p
939 || (TREE_CODE (base) == VAR_DECL
940 && DECL_ALIGN (base) >= TYPE_ALIGN (vectype)));
941
942 /* Alignment required, in bytes: */
943 alignment = ssize_int (TYPE_ALIGN (vectype)/BITS_PER_UNIT);
944
945 /* Modulo alignment. */
946 misalign = size_binop (TRUNC_MOD_EXPR, misalign, alignment);
947 if (tree_int_cst_sgn (misalign) < 0)
948 {
949 /* Negative misalignment value. */
950 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
951 fprintf (vect_dump, "unexpected misalign value");
952 return false;
953 }
954
955 DR_MISALIGNMENT (dr) = tree_low_cst (misalign, 1);
956
957 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
958 fprintf (vect_dump, "misalign = %d bytes", DR_MISALIGNMENT (dr));
959
960 return true;
961 }
962
963
964 /* Function vect_compute_data_refs_alignment
965
966 Compute the misalignment of data references in the loop.
967 This pass may take place at function granularity instead of at loop
968 granularity.
969
970 FOR NOW: No analysis is actually performed. Misalignment is calculated
971 only for trivial cases. TODO. */
972
973 static bool
974 vect_compute_data_refs_alignment (loop_vec_info loop_vinfo)
975 {
976 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
977 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
978 unsigned int i;
979
980 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
981 {
982 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
983 if (!vect_compute_data_ref_alignment (dr))
984 return false;
985 }
986
987 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
988 {
989 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
990 if (!vect_compute_data_ref_alignment (dr))
991 return false;
992 }
993
994 return true;
995 }
996
997
998 /* Function vect_enhance_data_refs_alignment
999
1000 This pass will use loop versioning and loop peeling in order to enhance
1001 the alignment of data references in the loop.
1002
1003 FOR NOW: we assume that whatever versioning/peeling takes place, only the
1004 original loop is to be vectorized; Any other loops that are created by
1005 the transformations performed in this pass - are not supposed to be
1006 vectorized. This restriction will be relaxed. */
1007
1008 static void
1009 vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
1010 {
1011 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
1012 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
1013 varray_type datarefs;
1014 struct data_reference *dr0 = NULL;
1015 unsigned int i, j;
1016
1017 /* Sigh, a hack to make targets that do not define UNITS_PER_SIMD_WORD
1018 bootstrap. Copy UNITS_PER_SIMD_WORD to a local variable to avoid a
1019 "division by zero" error. This error would be issued because we
1020 we do "... % UNITS_PER_SIMD_WORD" below, and UNITS_PER_SIMD_WORD
1021 defaults to 0 if it is not defined by the target. */
1022 int units_per_simd_word = UNITS_PER_SIMD_WORD;
1023
1024 /*
1025 This pass will require a cost model to guide it whether to apply peeling
1026 or versioning or a combination of the two. For example, the scheme that
1027 intel uses when given a loop with several memory accesses, is as follows:
1028 choose one memory access ('p') which alignment you want to force by doing
1029 peeling. Then, either (1) generate a loop in which 'p' is aligned and all
1030 other accesses are not necessarily aligned, or (2) use loop versioning to
1031 generate one loop in which all accesses are aligned, and another loop in
1032 which only 'p' is necessarily aligned.
1033
1034 ("Automatic Intra-Register Vectorization for the Intel Architecture",
1035 Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
1036 Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
1037
1038 Devising a cost model is the most critical aspect of this work. It will
1039 guide us on which access to peel for, whether to use loop versioning, how
1040 many versions to create, etc. The cost model will probably consist of
1041 generic considerations as well as target specific considerations (on
1042 powerpc for example, misaligned stores are more painful than misaligned
1043 loads).
1044
1045 Here is the general steps involved in alignment enhancements:
1046
1047 -- original loop, before alignment analysis:
1048 for (i=0; i<N; i++){
1049 x = q[i]; # DR_MISALIGNMENT(q) = unknown
1050 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1051 }
1052
1053 -- After vect_compute_data_refs_alignment:
1054 for (i=0; i<N; i++){
1055 x = q[i]; # DR_MISALIGNMENT(q) = 3
1056 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1057 }
1058
1059 -- Possibility 1: we do loop versioning:
1060 if (p is aligned) {
1061 for (i=0; i<N; i++){ # loop 1A
1062 x = q[i]; # DR_MISALIGNMENT(q) = 3
1063 p[i] = y; # DR_MISALIGNMENT(p) = 0
1064 }
1065 }
1066 else {
1067 for (i=0; i<N; i++){ # loop 1B
1068 x = q[i]; # DR_MISALIGNMENT(q) = 3
1069 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1070 }
1071 }
1072
1073 -- Possibility 2: we do loop peeling:
1074 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1075 x = q[i];
1076 p[i] = y;
1077 }
1078 for (i = 3; i < N; i++){ # loop 2A
1079 x = q[i]; # DR_MISALIGNMENT(q) = 0
1080 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1081 }
1082
1083 -- Possibility 3: combination of loop peeling and versioning:
1084 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1085 x = q[i];
1086 p[i] = y;
1087 }
1088 if (p is aligned) {
1089 for (i = 3; i<N; i++){ # loop 3A
1090 x = q[i]; # DR_MISALIGNMENT(q) = 0
1091 p[i] = y; # DR_MISALIGNMENT(p) = 0
1092 }
1093 }
1094 else {
1095 for (i = 3; i<N; i++){ # loop 3B
1096 x = q[i]; # DR_MISALIGNMENT(q) = 0
1097 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1098 }
1099 }
1100
1101 These loops are later passed to loop_transform to be vectorized. The
1102 vectorizer will use the alignment information to guide the transformation
1103 (whether to generate regular loads/stores, or with special handling for
1104 misalignment).
1105 */
1106
1107 /* (1) Peeling to force alignment. */
1108
1109 /* (1.1) Decide whether to perform peeling, and how many iterations to peel:
1110 Considerations:
1111 + How many accesses will become aligned due to the peeling
1112 - How many accesses will become unaligned due to the peeling,
1113 and the cost of misaligned accesses.
1114 - The cost of peeling (the extra runtime checks, the increase
1115 in code size).
1116
1117 The scheme we use FORNOW: peel to force the alignment of the first
1118 misaligned store in the loop.
1119 Rationale: misaligned stores are not yet supported.
1120
1121 TODO: Use a better cost model. */
1122
1123 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
1124 {
1125 dr0 = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
1126 if (!aligned_access_p (dr0))
1127 {
1128 LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr0;
1129 LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) = DR_MISALIGNMENT (dr0);
1130 break;
1131 }
1132 }
1133
1134 /* (1.2) Update the alignment info according to the peeling factor.
1135 If the misalignment of the DR we peel for is M, then the
1136 peeling factor is VF - M, and the misalignment of each access DR_i
1137 in the loop is DR_MISALIGNMENT (DR_i) + VF - M.
1138 If the misalignment of the DR we peel for is unknown, then the
1139 misalignment of each access DR_i in the loop is also unknown.
1140
1141 TODO: - consider accesses that are known to have the same
1142 alignment, even if that alignment is unknown. */
1143
1144 if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo))
1145 {
1146 int mis;
1147 int npeel = 0;
1148
1149 if (known_alignment_for_access_p (dr0))
1150 {
1151 /* Since it's known at compile time, compute the number of iterations
1152 in the peeled loop (the peeling factor) for use in updating
1153 DR_MISALIGNMENT values. The peeling factor is the vectorization
1154 factor minus the misalignment as an element count. */
1155 mis = DR_MISALIGNMENT (dr0);
1156 mis /= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr0))));
1157 npeel = LOOP_VINFO_VECT_FACTOR (loop_vinfo) - mis;
1158 }
1159
1160 datarefs = loop_write_datarefs;
1161 for (j = 0; j < 2; j++)
1162 {
1163 for (i = 0; i < VARRAY_ACTIVE_SIZE (datarefs); i++)
1164 {
1165 struct data_reference *dr = VARRAY_GENERIC_PTR (datarefs, i);
1166
1167 if (dr == dr0)
1168 continue;
1169 if (known_alignment_for_access_p (dr)
1170 && DR_MISALIGNMENT (dr) == DR_MISALIGNMENT (dr0))
1171 DR_MISALIGNMENT (dr) = 0;
1172 else if (known_alignment_for_access_p (dr)
1173 && known_alignment_for_access_p (dr0))
1174 {
1175 int drsize = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr))));
1176
1177 DR_MISALIGNMENT (dr) += npeel * drsize;
1178 DR_MISALIGNMENT (dr) %= units_per_simd_word;
1179 }
1180 else
1181 DR_MISALIGNMENT (dr) = -1;
1182 }
1183 datarefs = loop_read_datarefs;
1184 }
1185
1186 DR_MISALIGNMENT (dr0) = 0;
1187 }
1188 }
1189
1190
1191 /* Function vect_analyze_data_refs_alignment
1192
1193 Analyze the alignment of the data-references in the loop.
1194 FOR NOW: Until support for misliagned accesses is in place, only if all
1195 accesses are aligned can the loop be vectorized. This restriction will be
1196 relaxed. */
1197
1198 static bool
1199 vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo)
1200 {
1201 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
1202 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
1203 enum dr_alignment_support supportable_dr_alignment;
1204 unsigned int i;
1205
1206 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1207 fprintf (vect_dump, "=== vect_analyze_data_refs_alignment ===");
1208
1209
1210 /* This pass may take place at function granularity instead of at loop
1211 granularity. */
1212
1213 if (!vect_compute_data_refs_alignment (loop_vinfo))
1214 {
1215 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
1216 LOOP_LOC (loop_vinfo)))
1217 fprintf (vect_dump,
1218 "not vectorized: can't calculate alignment for data ref.");
1219 return false;
1220 }
1221
1222
1223 /* This pass will decide on using loop versioning and/or loop peeling in
1224 order to enhance the alignment of data references in the loop. */
1225
1226 vect_enhance_data_refs_alignment (loop_vinfo);
1227
1228
1229 /* Finally, check that all the data references in the loop can be
1230 handled with respect to their alignment. */
1231
1232 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
1233 {
1234 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
1235 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
1236 if (!supportable_dr_alignment)
1237 {
1238 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
1239 LOOP_LOC (loop_vinfo)))
1240 fprintf (vect_dump, "not vectorized: unsupported unaligned load.");
1241 return false;
1242 }
1243 if (supportable_dr_alignment != dr_aligned
1244 && (vect_print_dump_info (REPORT_ALIGNMENT, LOOP_LOC (loop_vinfo))))
1245 fprintf (vect_dump, "Vectorizing an unaligned access.");
1246 }
1247 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
1248 {
1249 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
1250 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
1251 if (!supportable_dr_alignment)
1252 {
1253 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
1254 LOOP_LOC (loop_vinfo)))
1255 fprintf (vect_dump, "not vectorized: unsupported unaligned store.");
1256 return false;
1257 }
1258 if (supportable_dr_alignment != dr_aligned
1259 && (vect_print_dump_info (REPORT_ALIGNMENT, LOOP_LOC (loop_vinfo))))
1260 fprintf (vect_dump, "Vectorizing an unaligned access.");
1261 }
1262 if (LOOP_VINFO_UNALIGNED_DR (loop_vinfo)
1263 && vect_print_dump_info (REPORT_ALIGNMENT, LOOP_LOC (loop_vinfo)))
1264 fprintf (vect_dump, "Alignment of access forced using peeling.");
1265
1266 return true;
1267 }
1268
1269
1270 /* Function vect_analyze_data_ref_access.
1271
1272 Analyze the access pattern of the data-reference DR. For now, a data access
1273 has to consecutive to be considered vectorizable. */
1274
1275 static bool
1276 vect_analyze_data_ref_access (struct data_reference *dr)
1277 {
1278 tree stmt = DR_STMT (dr);
1279 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1280 tree step = STMT_VINFO_VECT_STEP (stmt_info);
1281 tree scalar_type = TREE_TYPE (DR_REF (dr));
1282
1283 if (!step || tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type)))
1284 {
1285 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1286 fprintf (vect_dump, "not consecutive access");
1287 return false;
1288 }
1289 return true;
1290 }
1291
1292
1293 /* Function vect_analyze_data_ref_accesses.
1294
1295 Analyze the access pattern of all the data references in the loop.
1296
1297 FORNOW: the only access pattern that is considered vectorizable is a
1298 simple step 1 (consecutive) access.
1299
1300 FORNOW: handle only arrays and pointer accesses. */
1301
1302 static bool
1303 vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo)
1304 {
1305 unsigned int i;
1306 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
1307 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
1308
1309 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1310 fprintf (vect_dump, "=== vect_analyze_data_ref_accesses ===");
1311
1312 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
1313 {
1314 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
1315 bool ok = vect_analyze_data_ref_access (dr);
1316 if (!ok)
1317 {
1318 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
1319 LOOP_LOC (loop_vinfo)))
1320 fprintf (vect_dump, "not vectorized: complicated access pattern.");
1321 return false;
1322 }
1323 }
1324
1325 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
1326 {
1327 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
1328 bool ok = vect_analyze_data_ref_access (dr);
1329 if (!ok)
1330 {
1331 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
1332 LOOP_LOC (loop_vinfo)))
1333 fprintf (vect_dump, "not vectorized: complicated access pattern.");
1334 return false;
1335 }
1336 }
1337
1338 return true;
1339 }
1340
1341
1342 /* Function vect_analyze_pointer_ref_access.
1343
1344 Input:
1345 STMT - a stmt that contains a data-ref.
1346 MEMREF - a data-ref in STMT, which is an INDIRECT_REF.
1347 ACCESS_FN - the access function of MEMREF.
1348
1349 Output:
1350 If the data-ref access is vectorizable, return a data_reference structure
1351 that represents it (DR). Otherwise - return NULL.
1352 STEP - the stride of MEMREF in the loop.
1353 INIT - the initial condition of MEMREF in the loop.
1354 */
1355
1356 static struct data_reference *
1357 vect_analyze_pointer_ref_access (tree memref, tree stmt, bool is_read,
1358 tree access_fn, tree *ptr_init, tree *ptr_step)
1359 {
1360 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1361 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1362 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1363 tree step, init;
1364 tree reftype, innertype;
1365 tree indx_access_fn;
1366 int loopnum = loop->num;
1367 struct data_reference *dr;
1368
1369 if (!vect_is_simple_iv_evolution (loopnum, access_fn, &init, &step))
1370 {
1371 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
1372 LOOP_LOC (loop_vinfo)))
1373 fprintf (vect_dump, "not vectorized: pointer access is not simple.");
1374 return NULL;
1375 }
1376
1377 STRIP_NOPS (init);
1378
1379 if (!expr_invariant_in_loop_p (loop, init))
1380 {
1381 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
1382 LOOP_LOC (loop_vinfo)))
1383 fprintf (vect_dump,
1384 "not vectorized: initial condition is not loop invariant.");
1385 return NULL;
1386 }
1387
1388 if (TREE_CODE (step) != INTEGER_CST)
1389 {
1390 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
1391 LOOP_LOC (loop_vinfo)))
1392 fprintf (vect_dump,
1393 "not vectorized: non constant step for pointer access.");
1394 return NULL;
1395 }
1396
1397 reftype = TREE_TYPE (TREE_OPERAND (memref, 0));
1398 if (!POINTER_TYPE_P (reftype))
1399 {
1400 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
1401 LOOP_LOC (loop_vinfo)))
1402 fprintf (vect_dump, "not vectorized: unexpected pointer access form.");
1403 return NULL;
1404 }
1405
1406 if (!POINTER_TYPE_P (TREE_TYPE (init)))
1407 {
1408 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
1409 LOOP_LOC (loop_vinfo)))
1410 fprintf (vect_dump, "not vectorized: unexpected pointer access form.");
1411 return NULL;
1412 }
1413
1414 *ptr_step = fold_convert (ssizetype, step);
1415 innertype = TREE_TYPE (reftype);
1416 if (!COMPLETE_TYPE_P (innertype))
1417 {
1418 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
1419 LOOP_LOC (loop_vinfo)))
1420 fprintf (vect_dump, "not vectorized: pointer to incomplete type.");
1421 return NULL;
1422 }
1423
1424 /* Check that STEP is a multiple of type size. */
1425 if (!integer_zerop (size_binop (TRUNC_MOD_EXPR, *ptr_step,
1426 fold_convert (ssizetype, TYPE_SIZE_UNIT (innertype)))))
1427 {
1428 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
1429 LOOP_LOC (loop_vinfo)))
1430 fprintf (vect_dump, "not vectorized: non consecutive access.");
1431 return NULL;
1432 }
1433
1434 indx_access_fn =
1435 build_polynomial_chrec (loopnum, integer_zero_node, integer_one_node);
1436 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1437 {
1438 fprintf (vect_dump, "Access function of ptr indx: ");
1439 print_generic_expr (vect_dump, indx_access_fn, TDF_SLIM);
1440 }
1441 dr = init_data_ref (stmt, memref, NULL_TREE, indx_access_fn, is_read);
1442 *ptr_init = init;
1443 return dr;
1444 }
1445
1446
1447 /* Function vect_address_analysis
1448
1449 Return the BASE of the address expression EXPR.
1450 Also compute the INITIAL_OFFSET from BASE, MISALIGN and STEP.
1451
1452 Input:
1453 EXPR - the address expression that is being analyzed
1454 STMT - the statement that contains EXPR or its original memory reference
1455 IS_READ - TRUE if STMT reads from EXPR, FALSE if writes to EXPR
1456 VECTYPE - the type that defines the alignment (i.e, we compute
1457 alignment relative to TYPE_ALIGN(VECTYPE))
1458 DR - data_reference struct for the original memory reference
1459
1460 Output:
1461 BASE (returned value) - the base of the data reference EXPR.
1462 INITIAL_OFFSET - initial offset of EXPR from BASE (an expression)
1463 MISALIGN - offset of EXPR from BASE in bytes (a constant) or NULL_TREE if the
1464 computation is impossible
1465 STEP - evolution of EXPR in the loop
1466 BASE_ALIGNED - indicates if BASE is aligned
1467
1468 If something unexpected is encountered (an unsupported form of data-ref),
1469 then NULL_TREE is returned.
1470 */
1471
1472 static tree
1473 vect_address_analysis (tree expr, tree stmt, bool is_read, tree vectype,
1474 struct data_reference *dr, tree *offset, tree *misalign,
1475 tree *step, bool *base_aligned)
1476 {
1477 tree oprnd0, oprnd1, base_address, offset_expr, base_addr0, base_addr1;
1478 tree address_offset = ssize_int (0), address_misalign = ssize_int (0);
1479 tree dummy;
1480 struct ptr_info_def *dummy1;
1481 subvar_t dummy2;
1482
1483 switch (TREE_CODE (expr))
1484 {
1485 case PLUS_EXPR:
1486 case MINUS_EXPR:
1487 /* EXPR is of form {base +/- offset} (or {offset +/- base}). */
1488 oprnd0 = TREE_OPERAND (expr, 0);
1489 oprnd1 = TREE_OPERAND (expr, 1);
1490
1491 STRIP_NOPS (oprnd0);
1492 STRIP_NOPS (oprnd1);
1493
1494 /* Recursively try to find the base of the address contained in EXPR.
1495 For offset, the returned base will be NULL. */
1496 base_addr0 = vect_address_analysis (oprnd0, stmt, is_read, vectype, dr,
1497 &address_offset, &address_misalign, step,
1498 base_aligned);
1499
1500 base_addr1 = vect_address_analysis (oprnd1, stmt, is_read, vectype, dr,
1501 &address_offset, &address_misalign, step,
1502 base_aligned);
1503
1504 /* We support cases where only one of the operands contains an
1505 address. */
1506 if ((base_addr0 && base_addr1) || (!base_addr0 && !base_addr1))
1507 return NULL_TREE;
1508
1509 /* To revert STRIP_NOPS. */
1510 oprnd0 = TREE_OPERAND (expr, 0);
1511 oprnd1 = TREE_OPERAND (expr, 1);
1512
1513 offset_expr = base_addr0 ?
1514 fold_convert (ssizetype, oprnd1) : fold_convert (ssizetype, oprnd0);
1515
1516 /* EXPR is of form {base +/- offset} (or {offset +/- base}). If offset is
1517 a number, we can add it to the misalignment value calculated for base,
1518 otherwise, misalignment is NULL. */
1519 if (TREE_CODE (offset_expr) == INTEGER_CST && address_misalign)
1520 *misalign = size_binop (TREE_CODE (expr), address_misalign,
1521 offset_expr);
1522 else
1523 *misalign = NULL_TREE;
1524
1525 /* Combine offset (from EXPR {base + offset}) with the offset calculated
1526 for base. */
1527 *offset = size_binop (TREE_CODE (expr), address_offset, offset_expr);
1528 return base_addr0 ? base_addr0 : base_addr1;
1529
1530 case ADDR_EXPR:
1531 base_address = vect_object_analysis (TREE_OPERAND (expr, 0), stmt,
1532 is_read, vectype, &dr, offset,
1533 misalign, step, base_aligned,
1534 &dummy, &dummy1, &dummy2);
1535 return base_address;
1536
1537 case SSA_NAME:
1538 if (!POINTER_TYPE_P (TREE_TYPE (expr)))
1539 return NULL_TREE;
1540
1541 if (TYPE_ALIGN (TREE_TYPE (TREE_TYPE (expr))) < TYPE_ALIGN (vectype))
1542 {
1543 if (vect_get_ptr_offset (expr, vectype, misalign))
1544 *base_aligned = true;
1545 else
1546 *base_aligned = false;
1547 }
1548 else
1549 {
1550 *base_aligned = true;
1551 *misalign = ssize_int (0);
1552 }
1553 *offset = ssize_int (0);
1554 *step = ssize_int (0);
1555 return expr;
1556
1557 default:
1558 return NULL_TREE;
1559 }
1560 }
1561
1562
1563 /* Function vect_object_analysis
1564
1565 Return the BASE of the data reference MEMREF.
1566 Also compute the INITIAL_OFFSET from BASE, MISALIGN and STEP.
1567 E.g., for EXPR a.b[i] + 4B, BASE is a, and OFFSET is the overall offset
1568 'a.b[i] + 4B' from a (can be an expression), MISALIGN is an OFFSET
1569 instantiated with initial_conditions of access_functions of variables,
1570 modulo alignment, and STEP is the evolution of the DR_REF in this loop.
1571
1572 Function get_inner_reference is used for the above in case of ARRAY_REF and
1573 COMPONENT_REF.
1574
1575 The structure of the function is as follows:
1576 Part 1:
1577 Case 1. For handled_component_p refs
1578 1.1 call get_inner_reference
1579 1.1.1 analyze offset expr received from get_inner_reference
1580 1.2. build data-reference structure for MEMREF
1581 (fall through with BASE)
1582 Case 2. For declarations
1583 2.1 check alignment
1584 2.2 update DR_BASE_NAME if necessary for alias
1585 Case 3. For INDIRECT_REFs
1586 3.1 get the access function
1587 3.2 analyze evolution of MEMREF
1588 3.3 set data-reference structure for MEMREF
1589 3.4 call vect_address_analysis to analyze INIT of the access function
1590
1591 Part 2:
1592 Combine the results of object and address analysis to calculate
1593 INITIAL_OFFSET, STEP and misalignment info.
1594
1595 Input:
1596 MEMREF - the memory reference that is being analyzed
1597 STMT - the statement that contains MEMREF
1598 IS_READ - TRUE if STMT reads from MEMREF, FALSE if writes to MEMREF
1599 VECTYPE - the type that defines the alignment (i.e, we compute
1600 alignment relative to TYPE_ALIGN(VECTYPE))
1601
1602 Output:
1603 BASE_ADDRESS (returned value) - the base address of the data reference MEMREF
1604 E.g, if MEMREF is a.b[k].c[i][j] the returned
1605 base is &a.
1606 DR - data_reference struct for MEMREF
1607 INITIAL_OFFSET - initial offset of MEMREF from BASE (an expression)
1608 MISALIGN - offset of MEMREF from BASE in bytes (a constant) or NULL_TREE if
1609 the computation is impossible
1610 STEP - evolution of the DR_REF in the loop
1611 BASE_ALIGNED - indicates if BASE is aligned
1612 MEMTAG - memory tag for aliasing purposes
1613 PTR_INFO - NULL or points-to aliasing info from a pointer SSA_NAME
1614 SUBVAR - Sub-variables of the variable
1615
1616 If something unexpected is encountered (an unsupported form of data-ref),
1617 then NULL_TREE is returned. */
1618
1619 static tree
1620 vect_object_analysis (tree memref, tree stmt, bool is_read,
1621 tree vectype, struct data_reference **dr,
1622 tree *offset, tree *misalign, tree *step,
1623 bool *base_aligned, tree *memtag,
1624 struct ptr_info_def **ptr_info, subvar_t *subvars)
1625 {
1626 tree base = NULL_TREE, base_address = NULL_TREE;
1627 tree object_offset = ssize_int (0), object_misalign = ssize_int (0);
1628 tree object_step = ssize_int (0), address_step = ssize_int (0);
1629 bool object_base_aligned = true, address_base_aligned = true;
1630 tree address_offset = ssize_int (0), address_misalign = ssize_int (0);
1631 HOST_WIDE_INT pbitsize, pbitpos;
1632 tree poffset, bit_pos_in_bytes;
1633 enum machine_mode pmode;
1634 int punsignedp, pvolatilep;
1635 tree ptr_step = ssize_int (0), ptr_init = NULL_TREE;
1636 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1637 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1638 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1639 struct data_reference *ptr_dr = NULL;
1640 tree access_fn, evolution_part, address_to_analyze;
1641
1642 *ptr_info = NULL;
1643
1644 /* Part 1: */
1645 /* Case 1. handled_component_p refs. */
1646 if (handled_component_p (memref))
1647 {
1648 /* 1.1 call get_inner_reference. */
1649 /* Find the base and the offset from it. */
1650 base = get_inner_reference (memref, &pbitsize, &pbitpos, &poffset,
1651 &pmode, &punsignedp, &pvolatilep, false);
1652 if (!base)
1653 return NULL_TREE;
1654
1655 /* 1.1.1 analyze offset expr received from get_inner_reference. */
1656 if (poffset
1657 && !vect_analyze_offset_expr (poffset, loop, TYPE_SIZE_UNIT (vectype),
1658 &object_offset, &object_misalign, &object_step))
1659 {
1660 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1661 {
1662 fprintf (vect_dump, "failed to compute offset or step for ");
1663 print_generic_expr (vect_dump, memref, TDF_SLIM);
1664 }
1665 return NULL_TREE;
1666 }
1667
1668 /* Add bit position to OFFSET and MISALIGN. */
1669
1670 bit_pos_in_bytes = ssize_int (pbitpos/BITS_PER_UNIT);
1671 /* Check that there is no remainder in bits. */
1672 if (pbitpos%BITS_PER_UNIT)
1673 {
1674 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1675 fprintf (vect_dump, "bit offset alignment.");
1676 return NULL_TREE;
1677 }
1678 object_offset = size_binop (PLUS_EXPR, bit_pos_in_bytes, object_offset);
1679 if (object_misalign)
1680 object_misalign = size_binop (PLUS_EXPR, object_misalign,
1681 bit_pos_in_bytes);
1682
1683 /* Create data-reference for MEMREF. TODO: handle COMPONENT_REFs. */
1684 if (!(*dr))
1685 {
1686 if (TREE_CODE (memref) == ARRAY_REF)
1687 *dr = analyze_array (stmt, memref, is_read);
1688 else
1689 /* FORNOW. */
1690 return NULL_TREE;
1691 }
1692 memref = base; /* To continue analysis of BASE. */
1693 /* fall through */
1694 }
1695
1696 /* Part 1: Case 2. Declarations. */
1697 if (DECL_P (memref))
1698 {
1699 /* We expect to get a decl only if we already have a DR. */
1700 if (!(*dr))
1701 {
1702 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1703 {
1704 fprintf (vect_dump, "unhandled decl ");
1705 print_generic_expr (vect_dump, memref, TDF_SLIM);
1706 }
1707 return NULL_TREE;
1708 }
1709
1710 /* 2.1 check the alignment. */
1711 if (DECL_ALIGN (memref) >= TYPE_ALIGN (vectype))
1712 object_base_aligned = true;
1713 else
1714 object_base_aligned = false;
1715
1716 /* 2.2 update DR_BASE_NAME if necessary. */
1717 if (!DR_BASE_NAME ((*dr)))
1718 /* For alias analysis. In case the analysis of INDIRECT_REF brought
1719 us to object. */
1720 DR_BASE_NAME ((*dr)) = memref;
1721
1722 if (SSA_VAR_P (memref) && var_can_have_subvars (memref))
1723 *subvars = get_subvars_for_var (memref);
1724 base_address = build_fold_addr_expr (memref);
1725 *memtag = memref;
1726 }
1727
1728 /* Part 1: Case 3. INDIRECT_REFs. */
1729 else if (TREE_CODE (memref) == INDIRECT_REF)
1730 {
1731 tree ptr_ref = TREE_OPERAND (memref, 0);
1732 if (TREE_CODE (ptr_ref) == SSA_NAME)
1733 *ptr_info = SSA_NAME_PTR_INFO (ptr_ref);
1734
1735 /* 3.1 get the access function. */
1736 access_fn = analyze_scalar_evolution (loop, ptr_ref);
1737 if (!access_fn)
1738 {
1739 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
1740 LOOP_LOC (loop_vinfo)))
1741 fprintf (vect_dump, "not vectorized: complicated pointer access.");
1742 return NULL_TREE;
1743 }
1744 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1745 {
1746 fprintf (vect_dump, "Access function of ptr: ");
1747 print_generic_expr (vect_dump, access_fn, TDF_SLIM);
1748 }
1749
1750 /* 3.2 analyze evolution of MEMREF. */
1751 evolution_part = evolution_part_in_loop_num (access_fn, loop->num);
1752 if (evolution_part)
1753 {
1754 ptr_dr = vect_analyze_pointer_ref_access (memref, stmt, is_read,
1755 access_fn, &ptr_init, &ptr_step);
1756 if (!(ptr_dr))
1757 return NULL_TREE;
1758
1759 object_step = size_binop (PLUS_EXPR, object_step, ptr_step);
1760 address_to_analyze = ptr_init;
1761 }
1762 else
1763 {
1764 if (!(*dr))
1765 {
1766 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
1767 LOOP_LOC (loop_vinfo)))
1768 fprintf (vect_dump, "not vectorized: ptr is loop invariant.");
1769 return NULL_TREE;
1770 }
1771 /* Since there exists DR for MEMREF, we are analyzing the init of
1772 the access function, which not necessary has evolution in the
1773 loop. */
1774 address_to_analyze = initial_condition_in_loop_num (access_fn,
1775 loop->num);
1776 }
1777
1778 /* 3.3 set data-reference structure for MEMREF. */
1779 *dr = (*dr) ? *dr : ptr_dr;
1780
1781 /* 3.4 call vect_address_analysis to analyze INIT of the access
1782 function. */
1783 base_address = vect_address_analysis (address_to_analyze, stmt, is_read,
1784 vectype, *dr, &address_offset, &address_misalign,
1785 &address_step, &address_base_aligned);
1786 if (!base_address)
1787 return NULL_TREE;
1788
1789 switch (TREE_CODE (base_address))
1790 {
1791 case SSA_NAME:
1792 *memtag = get_var_ann (SSA_NAME_VAR (base_address))->type_mem_tag;
1793 if (!(*memtag) && TREE_CODE (TREE_OPERAND (memref, 0)) == SSA_NAME)
1794 *memtag = get_var_ann (
1795 SSA_NAME_VAR (TREE_OPERAND (memref, 0)))->type_mem_tag;
1796 break;
1797 case ADDR_EXPR:
1798 *memtag = TREE_OPERAND (base_address, 0);
1799 break;
1800 default:
1801 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
1802 LOOP_LOC (loop_vinfo)))
1803 {
1804 fprintf (vect_dump, "not vectorized: no memtag ref: ");
1805 print_generic_expr (vect_dump, memref, TDF_SLIM);
1806 }
1807 return NULL_TREE;
1808 }
1809 }
1810
1811 if (!base_address)
1812 /* MEMREF cannot be analyzed. */
1813 return NULL_TREE;
1814
1815 if (SSA_VAR_P (*memtag) && var_can_have_subvars (*memtag))
1816 *subvars = get_subvars_for_var (*memtag);
1817
1818 /* Part 2: Combine the results of object and address analysis to calculate
1819 INITIAL_OFFSET, STEP and misalignment info. */
1820 *offset = size_binop (PLUS_EXPR, object_offset, address_offset);
1821 if (object_misalign && address_misalign)
1822 *misalign = size_binop (PLUS_EXPR, object_misalign, address_misalign);
1823 else
1824 *misalign = NULL_TREE;
1825 *step = size_binop (PLUS_EXPR, object_step, address_step);
1826 *base_aligned = object_base_aligned && address_base_aligned;
1827
1828 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1829 {
1830 fprintf (vect_dump, "Results of object analysis for: ");
1831 print_generic_expr (vect_dump, memref, TDF_SLIM);
1832 fprintf (vect_dump, "\n\tbase_address: ");
1833 print_generic_expr (vect_dump, base_address, TDF_SLIM);
1834 fprintf (vect_dump, "\n\toffset: ");
1835 print_generic_expr (vect_dump, *offset, TDF_SLIM);
1836 fprintf (vect_dump, "\n\tstep: ");
1837 print_generic_expr (vect_dump, *step, TDF_SLIM);
1838 fprintf (vect_dump, "\n\tbase aligned %d\n\tmisalign: ", *base_aligned);
1839 print_generic_expr (vect_dump, *misalign, TDF_SLIM);
1840 }
1841 return base_address;
1842 }
1843
1844
1845 /* Function vect_analyze_data_refs.
1846
1847 Find all the data references in the loop.
1848
1849 The general structure of the analysis of data refs in the vectorizer is as
1850 follows:
1851 1- vect_analyze_data_refs(loop):
1852 Find and analyze all data-refs in the loop:
1853 foreach ref
1854 base_address = vect_object_analysis(ref)
1855 1.1- vect_object_analysis(ref):
1856 Analyze ref, and build a DR (data_referece struct) for it;
1857 compute base, initial_offset, step and alignment.
1858 Call get_inner_reference for refs handled in this function.
1859 Call vect_addr_analysis(addr) to analyze pointer type expressions.
1860 Set ref_stmt.base, ref_stmt.initial_offset, ref_stmt.alignment,
1861 ref_stmt.memtag, ref_stmt.ptr_info and ref_stmt.step accordingly.
1862 2- vect_analyze_dependences(): apply dependence testing using ref_stmt.DR
1863 3- vect_analyze_drs_alignment(): check that ref_stmt.alignment is ok.
1864 4- vect_analyze_drs_access(): check that ref_stmt.step is ok.
1865
1866 FORNOW: Handle aligned INDIRECT_REFs and ARRAY_REFs
1867 which base is really an array (not a pointer) and which alignment
1868 can be forced. This restriction will be relaxed. */
1869
1870 static bool
1871 vect_analyze_data_refs (loop_vec_info loop_vinfo)
1872 {
1873 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1874 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
1875 int nbbs = loop->num_nodes;
1876 block_stmt_iterator si;
1877 int j;
1878 struct data_reference *dr;
1879
1880 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1881 fprintf (vect_dump, "=== vect_analyze_data_refs ===");
1882
1883 for (j = 0; j < nbbs; j++)
1884 {
1885 basic_block bb = bbs[j];
1886 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1887 {
1888 bool is_read = false;
1889 tree stmt = bsi_stmt (si);
1890 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1891 v_may_def_optype v_may_defs = STMT_V_MAY_DEF_OPS (stmt);
1892 v_must_def_optype v_must_defs = STMT_V_MUST_DEF_OPS (stmt);
1893 vuse_optype vuses = STMT_VUSE_OPS (stmt);
1894 varray_type *datarefs = NULL;
1895 int nvuses, nv_may_defs, nv_must_defs;
1896 tree memref = NULL;
1897 tree scalar_type, vectype;
1898 tree base, offset, misalign, step, tag;
1899 struct ptr_info_def *ptr_info;
1900 bool base_aligned;
1901 subvar_t subvars = NULL;
1902
1903 /* Assumption: there exists a data-ref in stmt, if and only if
1904 it has vuses/vdefs. */
1905
1906 if (!vuses && !v_may_defs && !v_must_defs)
1907 continue;
1908
1909 nvuses = NUM_VUSES (vuses);
1910 nv_may_defs = NUM_V_MAY_DEFS (v_may_defs);
1911 nv_must_defs = NUM_V_MUST_DEFS (v_must_defs);
1912
1913 if (nvuses && (nv_may_defs || nv_must_defs))
1914 {
1915 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1916 {
1917 fprintf (vect_dump, "unexpected vdefs and vuses in stmt: ");
1918 print_generic_expr (vect_dump, stmt, TDF_SLIM);
1919 }
1920 return false;
1921 }
1922
1923 if (TREE_CODE (stmt) != MODIFY_EXPR)
1924 {
1925 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1926 {
1927 fprintf (vect_dump, "unexpected vops in stmt: ");
1928 print_generic_expr (vect_dump, stmt, TDF_SLIM);
1929 }
1930 return false;
1931 }
1932
1933 if (vuses)
1934 {
1935 memref = TREE_OPERAND (stmt, 1);
1936 datarefs = &(LOOP_VINFO_DATAREF_READS (loop_vinfo));
1937 is_read = true;
1938 }
1939 else /* vdefs */
1940 {
1941 memref = TREE_OPERAND (stmt, 0);
1942 datarefs = &(LOOP_VINFO_DATAREF_WRITES (loop_vinfo));
1943 is_read = false;
1944 }
1945
1946 scalar_type = TREE_TYPE (memref);
1947 vectype = get_vectype_for_scalar_type (scalar_type);
1948 if (!vectype)
1949 {
1950 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1951 {
1952 fprintf (vect_dump, "no vectype for stmt: ");
1953 print_generic_expr (vect_dump, stmt, TDF_SLIM);
1954 fprintf (vect_dump, " scalar_type: ");
1955 print_generic_expr (vect_dump, scalar_type, TDF_DETAILS);
1956 }
1957 /* It is not possible to vectorize this data reference. */
1958 return false;
1959 }
1960 /* Analyze MEMREF. If it is of a supported form, build data_reference
1961 struct for it (DR). */
1962 dr = NULL;
1963 base = vect_object_analysis (memref, stmt, is_read, vectype, &dr,
1964 &offset, &misalign, &step,
1965 &base_aligned, &tag, &ptr_info,
1966 &subvars);
1967 if (!base)
1968 {
1969 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
1970 LOOP_LOC (loop_vinfo)))
1971 {
1972 fprintf (vect_dump, "not vectorized: unhandled data ref: ");
1973 print_generic_expr (vect_dump, stmt, TDF_SLIM);
1974 }
1975 return false;
1976 }
1977 STMT_VINFO_VECT_DR_BASE_ADDRESS (stmt_info) = base;
1978 STMT_VINFO_VECT_INIT_OFFSET (stmt_info) = offset;
1979 STMT_VINFO_VECT_STEP (stmt_info) = step;
1980 STMT_VINFO_VECT_MISALIGNMENT (stmt_info) = misalign;
1981 STMT_VINFO_VECT_BASE_ALIGNED_P (stmt_info) = base_aligned;
1982 STMT_VINFO_MEMTAG (stmt_info) = tag;
1983 STMT_VINFO_PTR_INFO (stmt_info) = ptr_info;
1984 STMT_VINFO_SUBVARS (stmt_info) = subvars;
1985 STMT_VINFO_VECTYPE (stmt_info) = vectype;
1986 VARRAY_PUSH_GENERIC_PTR (*datarefs, dr);
1987 STMT_VINFO_DATA_REF (stmt_info) = dr;
1988 }
1989 }
1990
1991 return true;
1992 }
1993
1994
1995 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
1996
1997 /* Function vect_mark_relevant.
1998
1999 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
2000
2001 static void
2002 vect_mark_relevant (varray_type *worklist, tree stmt)
2003 {
2004 stmt_vec_info stmt_info;
2005
2006 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2007 fprintf (vect_dump, "mark relevant.");
2008
2009 if (TREE_CODE (stmt) == PHI_NODE)
2010 {
2011 VARRAY_PUSH_TREE (*worklist, stmt);
2012 return;
2013 }
2014
2015 stmt_info = vinfo_for_stmt (stmt);
2016
2017 if (!stmt_info)
2018 {
2019 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2020 {
2021 fprintf (vect_dump, "mark relevant: no stmt info!!.");
2022 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2023 }
2024 return;
2025 }
2026
2027 if (STMT_VINFO_RELEVANT_P (stmt_info))
2028 {
2029 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2030 fprintf (vect_dump, "already marked relevant.");
2031 return;
2032 }
2033
2034 STMT_VINFO_RELEVANT_P (stmt_info) = 1;
2035 VARRAY_PUSH_TREE (*worklist, stmt);
2036 }
2037
2038
2039 /* Function vect_stmt_relevant_p.
2040
2041 Return true if STMT in loop that is represented by LOOP_VINFO is
2042 "relevant for vectorization".
2043
2044 A stmt is considered "relevant for vectorization" if:
2045 - it has uses outside the loop.
2046 - it has vdefs (it alters memory).
2047 - control stmts in the loop (except for the exit condition).
2048
2049 CHECKME: what other side effects would the vectorizer allow? */
2050
2051 static bool
2052 vect_stmt_relevant_p (tree stmt, loop_vec_info loop_vinfo)
2053 {
2054 v_may_def_optype v_may_defs;
2055 v_must_def_optype v_must_defs;
2056 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2057 ssa_op_iter op_iter;
2058 imm_use_iterator imm_iter;
2059 use_operand_p use_p;
2060 tree var;
2061
2062 /* cond stmt other than loop exit cond. */
2063 if (is_ctrl_stmt (stmt) && (stmt != LOOP_VINFO_EXIT_COND (loop_vinfo)))
2064 return true;
2065
2066 /* changing memory. */
2067 if (TREE_CODE (stmt) != PHI_NODE)
2068 {
2069 v_may_defs = STMT_V_MAY_DEF_OPS (stmt);
2070 v_must_defs = STMT_V_MUST_DEF_OPS (stmt);
2071 if (v_may_defs || v_must_defs)
2072 {
2073 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2074 fprintf (vect_dump, "vec_stmt_relevant_p: stmt has vdefs.");
2075 return true;
2076 }
2077 }
2078
2079 /* uses outside the loop. */
2080 FOR_EACH_SSA_TREE_OPERAND (var, stmt, op_iter, SSA_OP_DEF)
2081 {
2082 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, var)
2083 {
2084 basic_block bb = bb_for_stmt (USE_STMT (use_p));
2085 if (!flow_bb_inside_loop_p (loop, bb))
2086 {
2087 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2088 fprintf (vect_dump, "vec_stmt_relevant_p: used out of loop.");
2089 return true;
2090 }
2091 }
2092 }
2093
2094 return false;
2095 }
2096
2097
2098 /* Function vect_mark_stmts_to_be_vectorized.
2099
2100 Not all stmts in the loop need to be vectorized. For example:
2101
2102 for i...
2103 for j...
2104 1. T0 = i + j
2105 2. T1 = a[T0]
2106
2107 3. j = j + 1
2108
2109 Stmt 1 and 3 do not need to be vectorized, because loop control and
2110 addressing of vectorized data-refs are handled differently.
2111
2112 This pass detects such stmts. */
2113
2114 static bool
2115 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
2116 {
2117 varray_type worklist;
2118 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2119 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
2120 unsigned int nbbs = loop->num_nodes;
2121 block_stmt_iterator si;
2122 tree stmt;
2123 stmt_ann_t ann;
2124 unsigned int i;
2125 int j;
2126 use_optype use_ops;
2127 stmt_vec_info stmt_info;
2128 basic_block bb;
2129 tree phi;
2130
2131 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2132 fprintf (vect_dump, "=== vect_mark_stmts_to_be_vectorized ===");
2133
2134 bb = loop->header;
2135 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
2136 {
2137 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2138 {
2139 fprintf (vect_dump, "init: phi relevant? ");
2140 print_generic_expr (vect_dump, phi, TDF_SLIM);
2141 }
2142
2143 if (vect_stmt_relevant_p (phi, loop_vinfo))
2144 {
2145 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
2146 LOOP_LOC (loop_vinfo)))
2147 fprintf (vect_dump, "unsupported reduction/induction.");
2148 return false;
2149 }
2150 }
2151
2152 VARRAY_TREE_INIT (worklist, 64, "work list");
2153
2154 /* 1. Init worklist. */
2155
2156 for (i = 0; i < nbbs; i++)
2157 {
2158 bb = bbs[i];
2159 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
2160 {
2161 stmt = bsi_stmt (si);
2162
2163 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2164 {
2165 fprintf (vect_dump, "init: stmt relevant? ");
2166 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2167 }
2168
2169 stmt_info = vinfo_for_stmt (stmt);
2170 STMT_VINFO_RELEVANT_P (stmt_info) = 0;
2171
2172 if (vect_stmt_relevant_p (stmt, loop_vinfo))
2173 vect_mark_relevant (&worklist, stmt);
2174 }
2175 }
2176
2177
2178 /* 2. Process_worklist */
2179
2180 while (VARRAY_ACTIVE_SIZE (worklist) > 0)
2181 {
2182 stmt = VARRAY_TOP_TREE (worklist);
2183 VARRAY_POP (worklist);
2184
2185 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2186 {
2187 fprintf (vect_dump, "worklist: examine stmt: ");
2188 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2189 }
2190
2191 /* Examine the USES in this statement. Mark all the statements which
2192 feed this statement's uses as "relevant", unless the USE is used as
2193 an array index. */
2194
2195 if (TREE_CODE (stmt) == PHI_NODE)
2196 {
2197 /* follow the def-use chain inside the loop. */
2198 for (j = 0; j < PHI_NUM_ARGS (stmt); j++)
2199 {
2200 tree arg = PHI_ARG_DEF (stmt, j);
2201 tree def_stmt = NULL_TREE;
2202 basic_block bb;
2203 if (!vect_is_simple_use (arg, loop_vinfo, &def_stmt))
2204 {
2205 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
2206 LOOP_LOC (loop_vinfo)))
2207 fprintf (vect_dump, "not vectorized: unsupported use in stmt.");
2208 varray_clear (worklist);
2209 return false;
2210 }
2211 if (!def_stmt)
2212 continue;
2213
2214 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2215 {
2216 fprintf (vect_dump, "worklist: def_stmt: ");
2217 print_generic_expr (vect_dump, def_stmt, TDF_SLIM);
2218 }
2219
2220 bb = bb_for_stmt (def_stmt);
2221 if (flow_bb_inside_loop_p (loop, bb))
2222 vect_mark_relevant (&worklist, def_stmt);
2223 }
2224 }
2225
2226 ann = stmt_ann (stmt);
2227 use_ops = USE_OPS (ann);
2228
2229 for (i = 0; i < NUM_USES (use_ops); i++)
2230 {
2231 tree use = USE_OP (use_ops, i);
2232
2233 /* We are only interested in uses that need to be vectorized. Uses
2234 that are used for address computation are not considered relevant.
2235 */
2236 if (exist_non_indexing_operands_for_use_p (use, stmt))
2237 {
2238 tree def_stmt = NULL_TREE;
2239 basic_block bb;
2240 if (!vect_is_simple_use (use, loop_vinfo, &def_stmt))
2241 {
2242 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
2243 LOOP_LOC (loop_vinfo)))
2244 fprintf (vect_dump, "not vectorized: unsupported use in stmt.");
2245 varray_clear (worklist);
2246 return false;
2247 }
2248
2249 if (!def_stmt)
2250 continue;
2251
2252 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2253 {
2254 fprintf (vect_dump, "worklist: examine use %d: ", i);
2255 print_generic_expr (vect_dump, use, TDF_SLIM);
2256 }
2257
2258 bb = bb_for_stmt (def_stmt);
2259 if (flow_bb_inside_loop_p (loop, bb))
2260 vect_mark_relevant (&worklist, def_stmt);
2261 }
2262 }
2263 } /* while worklist */
2264
2265 varray_clear (worklist);
2266 return true;
2267 }
2268
2269
2270 /* Function vect_can_advance_ivs_p
2271
2272 In case the number of iterations that LOOP iterates in unknown at compile
2273 time, an epilog loop will be generated, and the loop induction variables
2274 (IVs) will be "advanced" to the value they are supposed to take just before
2275 the epilog loop. Here we check that the access function of the loop IVs
2276 and the expression that represents the loop bound are simple enough.
2277 These restrictions will be relaxed in the future. */
2278
2279 static bool
2280 vect_can_advance_ivs_p (loop_vec_info loop_vinfo)
2281 {
2282 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2283 basic_block bb = loop->header;
2284 tree phi;
2285
2286 /* Analyze phi functions of the loop header. */
2287
2288 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
2289 {
2290 tree access_fn = NULL;
2291 tree evolution_part;
2292
2293 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2294 {
2295 fprintf (vect_dump, "Analyze phi: ");
2296 print_generic_expr (vect_dump, phi, TDF_SLIM);
2297 }
2298
2299 /* Skip virtual phi's. The data dependences that are associated with
2300 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
2301
2302 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
2303 {
2304 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2305 fprintf (vect_dump, "virtual phi. skip.");
2306 continue;
2307 }
2308
2309 /* Analyze the evolution function. */
2310
2311 access_fn = instantiate_parameters
2312 (loop, analyze_scalar_evolution (loop, PHI_RESULT (phi)));
2313
2314 if (!access_fn)
2315 {
2316 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2317 fprintf (vect_dump, "No Access function.");
2318 return false;
2319 }
2320
2321 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2322 {
2323 fprintf (vect_dump, "Access function of PHI: ");
2324 print_generic_expr (vect_dump, access_fn, TDF_SLIM);
2325 }
2326
2327 evolution_part = evolution_part_in_loop_num (access_fn, loop->num);
2328
2329 if (evolution_part == NULL_TREE)
2330 return false;
2331
2332 /* FORNOW: We do not transform initial conditions of IVs
2333 which evolution functions are a polynomial of degree >= 2. */
2334
2335 if (tree_is_chrec (evolution_part))
2336 return false;
2337 }
2338
2339 return true;
2340 }
2341
2342
2343 /* Function vect_get_loop_niters.
2344
2345 Determine how many iterations the loop is executed.
2346 If an expression that represents the number of iterations
2347 can be constructed, place it in NUMBER_OF_ITERATIONS.
2348 Return the loop exit condition. */
2349
2350 static tree
2351 vect_get_loop_niters (struct loop *loop, tree *number_of_iterations)
2352 {
2353 tree niters;
2354
2355 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2356 fprintf (vect_dump, "=== get_loop_niters ===");
2357
2358 niters = number_of_iterations_in_loop (loop);
2359
2360 if (niters != NULL_TREE
2361 && niters != chrec_dont_know)
2362 {
2363 *number_of_iterations = niters;
2364
2365 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2366 {
2367 fprintf (vect_dump, "==> get_loop_niters:" );
2368 print_generic_expr (vect_dump, *number_of_iterations, TDF_SLIM);
2369 }
2370 }
2371
2372 return get_loop_exit_condition (loop);
2373 }
2374
2375
2376 /* Function vect_analyze_loop_form.
2377
2378 Verify the following restrictions (some may be relaxed in the future):
2379 - it's an inner-most loop
2380 - number of BBs = 2 (which are the loop header and the latch)
2381 - the loop has a pre-header
2382 - the loop has a single entry and exit
2383 - the loop exit condition is simple enough, and the number of iterations
2384 can be analyzed (a countable loop). */
2385
2386 static loop_vec_info
2387 vect_analyze_loop_form (struct loop *loop)
2388 {
2389 loop_vec_info loop_vinfo;
2390 tree loop_cond;
2391 tree number_of_iterations = NULL;
2392 LOC loop_loc;
2393
2394 loop_loc = find_loop_location (loop);
2395
2396 if (vect_print_dump_info (REPORT_DETAILS, loop_loc))
2397 fprintf (vect_dump, "=== vect_analyze_loop_form ===");
2398
2399 if (loop->inner)
2400 {
2401 if (vect_print_dump_info (REPORT_OUTER_LOOPS, loop_loc))
2402 fprintf (vect_dump, "not vectorized: nested loop.");
2403 return NULL;
2404 }
2405
2406 if (!loop->single_exit
2407 || loop->num_nodes != 2
2408 || EDGE_COUNT (loop->header->preds) != 2)
2409 {
2410 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS, loop_loc))
2411 {
2412 if (!loop->single_exit)
2413 fprintf (vect_dump, "not vectorized: multiple exits.");
2414 else if (loop->num_nodes != 2)
2415 fprintf (vect_dump, "not vectorized: too many BBs in loop.");
2416 else if (EDGE_COUNT (loop->header->preds) != 2)
2417 fprintf (vect_dump, "not vectorized: too many incoming edges.");
2418 }
2419
2420 return NULL;
2421 }
2422
2423 /* We assume that the loop exit condition is at the end of the loop. i.e,
2424 that the loop is represented as a do-while (with a proper if-guard
2425 before the loop if needed), where the loop header contains all the
2426 executable statements, and the latch is empty. */
2427 if (!empty_block_p (loop->latch))
2428 {
2429 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS, loop_loc))
2430 fprintf (vect_dump, "not vectorized: unexpectd loop form.");
2431 return NULL;
2432 }
2433
2434 /* Make sure there exists a single-predecessor exit bb: */
2435 if (!single_pred_p (loop->single_exit->dest))
2436 {
2437 edge e = loop->single_exit;
2438 if (!(e->flags & EDGE_ABNORMAL))
2439 {
2440 split_loop_exit_edge (e);
2441 if (vect_print_dump_info (REPORT_DETAILS, loop_loc))
2442 fprintf (vect_dump, "split exit edge.");
2443 }
2444 else
2445 {
2446 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS, loop_loc))
2447 fprintf (vect_dump, "not vectorized: abnormal loop exit edge.");
2448 return NULL;
2449 }
2450 }
2451
2452 if (empty_block_p (loop->header))
2453 {
2454 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS, loop_loc))
2455 fprintf (vect_dump, "not vectorized: empty loop.");
2456 return NULL;
2457 }
2458
2459 loop_cond = vect_get_loop_niters (loop, &number_of_iterations);
2460 if (!loop_cond)
2461 {
2462 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS, loop_loc))
2463 fprintf (vect_dump, "not vectorized: complicated exit condition.");
2464 return NULL;
2465 }
2466
2467 if (!number_of_iterations)
2468 {
2469 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS, loop_loc))
2470 fprintf (vect_dump,
2471 "not vectorized: number of iterations cannot be computed.");
2472 return NULL;
2473 }
2474
2475 if (chrec_contains_undetermined (number_of_iterations))
2476 {
2477 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS, loop_loc))
2478 fprintf (vect_dump, "Infinite number of iterations.");
2479 return false;
2480 }
2481
2482 loop_vinfo = new_loop_vec_info (loop);
2483 LOOP_VINFO_NITERS (loop_vinfo) = number_of_iterations;
2484
2485 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
2486 {
2487 if (vect_print_dump_info (REPORT_DETAILS, loop_loc))
2488 {
2489 fprintf (vect_dump, "Symbolic number of iterations is ");
2490 print_generic_expr (vect_dump, number_of_iterations, TDF_DETAILS);
2491 }
2492 }
2493 else
2494 if (LOOP_VINFO_INT_NITERS (loop_vinfo) == 0)
2495 {
2496 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS, loop_loc))
2497 fprintf (vect_dump, "not vectorized: number of iterations = 0.");
2498 return NULL;
2499 }
2500
2501 LOOP_VINFO_EXIT_COND (loop_vinfo) = loop_cond;
2502 LOOP_VINFO_LOC (loop_vinfo) = loop_loc;
2503
2504 return loop_vinfo;
2505 }
2506
2507
2508 /* Function vect_analyze_loop.
2509
2510 Apply a set of analyses on LOOP, and create a loop_vec_info struct
2511 for it. The different analyses will record information in the
2512 loop_vec_info struct. */
2513 loop_vec_info
2514 vect_analyze_loop (struct loop *loop)
2515 {
2516 bool ok;
2517 loop_vec_info loop_vinfo;
2518
2519 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2520 fprintf (vect_dump, "===== analyze_loop_nest =====");
2521
2522 /* Check the CFG characteristics of the loop (nesting, entry/exit, etc. */
2523
2524 loop_vinfo = vect_analyze_loop_form (loop);
2525 if (!loop_vinfo)
2526 {
2527 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2528 fprintf (vect_dump, "bad loop form.");
2529 return NULL;
2530 }
2531
2532 /* Find all data references in the loop (which correspond to vdefs/vuses)
2533 and analyze their evolution in the loop.
2534
2535 FORNOW: Handle only simple, array references, which
2536 alignment can be forced, and aligned pointer-references. */
2537
2538 ok = vect_analyze_data_refs (loop_vinfo);
2539 if (!ok)
2540 {
2541 if (vect_print_dump_info (REPORT_DETAILS, LOOP_LOC (loop_vinfo)))
2542 fprintf (vect_dump, "bad data references.");
2543 destroy_loop_vec_info (loop_vinfo);
2544 return NULL;
2545 }
2546
2547 /* Data-flow analysis to detect stmts that do not need to be vectorized. */
2548
2549 ok = vect_mark_stmts_to_be_vectorized (loop_vinfo);
2550 if (!ok)
2551 {
2552 if (vect_print_dump_info (REPORT_DETAILS, LOOP_LOC (loop_vinfo)))
2553 fprintf (vect_dump, "unexpected pattern.");
2554 destroy_loop_vec_info (loop_vinfo);
2555 return NULL;
2556 }
2557
2558 /* Check that all cross-iteration scalar data-flow cycles are OK.
2559 Cross-iteration cycles caused by virtual phis are analyzed separately. */
2560
2561 ok = vect_analyze_scalar_cycles (loop_vinfo);
2562 if (!ok)
2563 {
2564 if (vect_print_dump_info (REPORT_DETAILS, LOOP_LOC (loop_vinfo)))
2565 fprintf (vect_dump, "bad scalar cycle.");
2566 destroy_loop_vec_info (loop_vinfo);
2567 return NULL;
2568 }
2569
2570 /* Analyze data dependences between the data-refs in the loop.
2571 FORNOW: fail at the first data dependence that we encounter. */
2572
2573 ok = vect_analyze_data_ref_dependences (loop_vinfo);
2574 if (!ok)
2575 {
2576 if (vect_print_dump_info (REPORT_DETAILS, LOOP_LOC (loop_vinfo)))
2577 fprintf (vect_dump, "bad data dependence.");
2578 destroy_loop_vec_info (loop_vinfo);
2579 return NULL;
2580 }
2581
2582 /* Analyze the access patterns of the data-refs in the loop (consecutive,
2583 complex, etc.). FORNOW: Only handle consecutive access pattern. */
2584
2585 ok = vect_analyze_data_ref_accesses (loop_vinfo);
2586 if (!ok)
2587 {
2588 if (vect_print_dump_info (REPORT_DETAILS, LOOP_LOC (loop_vinfo)))
2589 fprintf (vect_dump, "bad data access.");
2590 destroy_loop_vec_info (loop_vinfo);
2591 return NULL;
2592 }
2593
2594 ok = vect_determine_vectorization_factor (loop_vinfo);
2595 if (!ok)
2596 {
2597 if (vect_print_dump_info (REPORT_DETAILS, LOOP_LOC (loop_vinfo)))
2598 fprintf (vect_dump, "can't determine vectorization factor.");
2599 destroy_loop_vec_info (loop_vinfo);
2600 return NULL;
2601 }
2602
2603 /* Analyze the alignment of the data-refs in the loop.
2604 FORNOW: Only aligned accesses are handled. */
2605
2606 ok = vect_analyze_data_refs_alignment (loop_vinfo);
2607 if (!ok)
2608 {
2609 if (vect_print_dump_info (REPORT_DETAILS, LOOP_LOC (loop_vinfo)))
2610 fprintf (vect_dump, "bad data alignment.");
2611 destroy_loop_vec_info (loop_vinfo);
2612 return NULL;
2613 }
2614
2615 /* Scan all the operations in the loop and make sure they are
2616 vectorizable. */
2617
2618 ok = vect_analyze_operations (loop_vinfo);
2619 if (!ok)
2620 {
2621 if (vect_print_dump_info (REPORT_DETAILS, LOOP_LOC (loop_vinfo)))
2622 fprintf (vect_dump, "bad operation or unsupported loop bound.");
2623 destroy_loop_vec_info (loop_vinfo);
2624 return NULL;
2625 }
2626
2627 LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1;
2628
2629 return loop_vinfo;
2630 }