d1b274b56ecfa3e0da0f327430720ffc1051c29e
[gcc.git] / gcc / tree-vect-analyze.c
1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2003,2004,2005 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
10 version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to the Free
19 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
20 02111-1307, USA. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "errors.h"
27 #include "ggc.h"
28 #include "tree.h"
29 #include "basic-block.h"
30 #include "diagnostic.h"
31 #include "tree-flow.h"
32 #include "tree-dump.h"
33 #include "timevar.h"
34 #include "cfgloop.h"
35 #include "expr.h"
36 #include "optabs.h"
37 #include "tree-chrec.h"
38 #include "tree-data-ref.h"
39 #include "tree-scalar-evolution.h"
40 #include "tree-vectorizer.h"
41
42 /* Main analysis functions. */
43 static loop_vec_info vect_analyze_loop_form (struct loop *);
44 static bool vect_analyze_data_refs (loop_vec_info);
45 static bool vect_mark_stmts_to_be_vectorized (loop_vec_info);
46 static bool vect_analyze_scalar_cycles (loop_vec_info);
47 static bool vect_analyze_data_ref_accesses (loop_vec_info);
48 static bool vect_analyze_data_ref_dependences (loop_vec_info);
49 static bool vect_analyze_data_refs_alignment (loop_vec_info);
50 static bool vect_compute_data_refs_alignment (loop_vec_info);
51 static void vect_enhance_data_refs_alignment (loop_vec_info);
52 static bool vect_analyze_operations (loop_vec_info);
53 static bool vect_determine_vectorization_factor (loop_vec_info);
54
55 /* Utility functions for the analyses. */
56 static bool exist_non_indexing_operands_for_use_p (tree, tree);
57 static void vect_mark_relevant (varray_type *, tree);
58 static bool vect_stmt_relevant_p (tree, loop_vec_info);
59 static tree vect_get_loop_niters (struct loop *, tree *);
60 static bool vect_analyze_data_ref_dependence
61 (struct data_reference *, struct data_reference *, loop_vec_info);
62 static bool vect_compute_data_ref_alignment (struct data_reference *);
63 static bool vect_analyze_data_ref_access (struct data_reference *);
64 static struct data_reference * vect_analyze_pointer_ref_access
65 (tree, tree, bool, tree, tree *, tree *);
66 static bool vect_can_advance_ivs_p (loop_vec_info);
67 static tree vect_get_ptr_offset (tree, tree, tree *);
68 static bool vect_analyze_offset_expr (tree, struct loop *, tree, tree *,
69 tree *, tree *);
70 static bool vect_base_addr_differ_p (struct data_reference *,
71 struct data_reference *drb, bool *);
72 static tree vect_object_analysis (tree, tree, bool, tree,
73 struct data_reference **, tree *, tree *,
74 tree *, bool *, tree *, subvar_t *);
75 static tree vect_address_analysis (tree, tree, bool, tree,
76 struct data_reference *, tree *, tree *,
77 tree *, bool *);
78
79
80 /* Function vect_get_ptr_offset
81
82 Compute the OFFSET modulo vector-type alignment of pointer REF in bits. */
83
84 static tree
85 vect_get_ptr_offset (tree ref ATTRIBUTE_UNUSED,
86 tree vectype ATTRIBUTE_UNUSED,
87 tree *offset ATTRIBUTE_UNUSED)
88 {
89 /* TODO: Use alignment information. */
90 return NULL_TREE;
91 }
92
93
94 /* Function vect_analyze_offset_expr
95
96 Given an offset expression EXPR received from get_inner_reference, analyze
97 it and create an expression for INITIAL_OFFSET by substituting the variables
98 of EXPR with initial_condition of the corresponding access_fn in the loop.
99 E.g.,
100 for i
101 for (j = 3; j < N; j++)
102 a[j].b[i][j] = 0;
103
104 For a[j].b[i][j], EXPR will be 'i * C_i + j * C_j + C'. 'i' cannot be
105 substituted, since its access_fn in the inner loop is i. 'j' will be
106 substituted with 3. An INITIAL_OFFSET will be 'i * C_i + C`', where
107 C` = 3 * C_j + C.
108
109 Compute MISALIGN (the misalignment of the data reference initial access from
110 its base) if possible. Misalignment can be calculated only if all the
111 variables can be substituted with constants, or if a variable is multiplied
112 by a multiple of VECTYPE_ALIGNMENT. In the above example, since 'i' cannot
113 be substituted, MISALIGN will be NULL_TREE in case that C_i is not a multiple
114 of VECTYPE_ALIGNMENT, and C` otherwise. (We perform MISALIGN modulo
115 VECTYPE_ALIGNMENT computation in the caller of this function).
116
117 STEP is an evolution of the data reference in this loop in bytes.
118 In the above example, STEP is C_j.
119
120 Return FALSE, if the analysis fails, e.g., there is no access_fn for a
121 variable. In this case, all the outputs (INITIAL_OFFSET, MISALIGN and STEP)
122 are NULL_TREEs. Otherwise, return TRUE.
123
124 */
125
126 static bool
127 vect_analyze_offset_expr (tree expr,
128 struct loop *loop,
129 tree vectype_alignment,
130 tree *initial_offset,
131 tree *misalign,
132 tree *step)
133 {
134 tree oprnd0;
135 tree oprnd1;
136 tree left_offset = ssize_int (0);
137 tree right_offset = ssize_int (0);
138 tree left_misalign = ssize_int (0);
139 tree right_misalign = ssize_int (0);
140 tree left_step = ssize_int (0);
141 tree right_step = ssize_int (0);
142 enum tree_code code;
143 tree init, evolution;
144
145 *step = NULL_TREE;
146 *misalign = NULL_TREE;
147 *initial_offset = NULL_TREE;
148
149 /* Strip conversions that don't narrow the mode. */
150 expr = vect_strip_conversion (expr);
151 if (!expr)
152 return false;
153
154 /* Stop conditions:
155 1. Constant. */
156 if (TREE_CODE (expr) == INTEGER_CST)
157 {
158 *initial_offset = fold_convert (ssizetype, expr);
159 *misalign = fold_convert (ssizetype, expr);
160 *step = ssize_int (0);
161 return true;
162 }
163
164 /* 2. Variable. Try to substitute with initial_condition of the corresponding
165 access_fn in the current loop. */
166 if (SSA_VAR_P (expr))
167 {
168 tree access_fn = analyze_scalar_evolution (loop, expr);
169
170 if (access_fn == chrec_dont_know)
171 /* No access_fn. */
172 return false;
173
174 init = initial_condition_in_loop_num (access_fn, loop->num);
175 if (init == expr && !expr_invariant_in_loop_p (loop, init))
176 /* Not enough information: may be not loop invariant.
177 E.g., for a[b[i]], we get a[D], where D=b[i]. EXPR is D, its
178 initial_condition is D, but it depends on i - loop's induction
179 variable. */
180 return false;
181
182 evolution = evolution_part_in_loop_num (access_fn, loop->num);
183 if (evolution && TREE_CODE (evolution) != INTEGER_CST)
184 /* Evolution is not constant. */
185 return false;
186
187 if (TREE_CODE (init) == INTEGER_CST)
188 *misalign = fold_convert (ssizetype, init);
189 else
190 /* Not constant, misalignment cannot be calculated. */
191 *misalign = NULL_TREE;
192
193 *initial_offset = fold_convert (ssizetype, init);
194
195 *step = evolution ? fold_convert (ssizetype, evolution) : ssize_int (0);
196 return true;
197 }
198
199 /* Recursive computation. */
200 if (!BINARY_CLASS_P (expr))
201 {
202 /* We expect to get binary expressions (PLUS/MINUS and MULT). */
203 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
204 {
205 fprintf (vect_dump, "Not binary expression ");
206 print_generic_expr (vect_dump, expr, TDF_SLIM);
207 }
208 return false;
209 }
210 oprnd0 = TREE_OPERAND (expr, 0);
211 oprnd1 = TREE_OPERAND (expr, 1);
212
213 if (!vect_analyze_offset_expr (oprnd0, loop, vectype_alignment, &left_offset,
214 &left_misalign, &left_step)
215 || !vect_analyze_offset_expr (oprnd1, loop, vectype_alignment,
216 &right_offset, &right_misalign, &right_step))
217 return false;
218
219 /* The type of the operation: plus, minus or mult. */
220 code = TREE_CODE (expr);
221 switch (code)
222 {
223 case MULT_EXPR:
224 if (TREE_CODE (right_offset) != INTEGER_CST)
225 /* RIGHT_OFFSET can be not constant. For example, for arrays of variable
226 sized types.
227 FORNOW: We don't support such cases. */
228 return false;
229
230 /* Strip conversions that don't narrow the mode. */
231 left_offset = vect_strip_conversion (left_offset);
232 if (!left_offset)
233 return false;
234 /* Misalignment computation. */
235 if (SSA_VAR_P (left_offset))
236 {
237 /* If the left side contains variables that can't be substituted with
238 constants, we check if the right side is a multiple of ALIGNMENT.
239 */
240 if (integer_zerop (size_binop (TRUNC_MOD_EXPR, right_offset,
241 fold_convert (ssizetype, vectype_alignment))))
242 *misalign = ssize_int (0);
243 else
244 /* If the remainder is not zero or the right side isn't constant,
245 we can't compute misalignment. */
246 *misalign = NULL_TREE;
247 }
248 else
249 {
250 /* The left operand was successfully substituted with constant. */
251 if (left_misalign)
252 /* In case of EXPR '(i * C1 + j) * C2', LEFT_MISALIGN is
253 NULL_TREE. */
254 *misalign = size_binop (code, left_misalign, right_misalign);
255 else
256 *misalign = NULL_TREE;
257 }
258
259 /* Step calculation. */
260 /* Multiply the step by the right operand. */
261 *step = size_binop (MULT_EXPR, left_step, right_offset);
262 break;
263
264 case PLUS_EXPR:
265 case MINUS_EXPR:
266 /* Combine the recursive calculations for step and misalignment. */
267 *step = size_binop (code, left_step, right_step);
268
269 if (left_misalign && right_misalign)
270 *misalign = size_binop (code, left_misalign, right_misalign);
271 else
272 *misalign = NULL_TREE;
273
274 break;
275
276 default:
277 gcc_unreachable ();
278 }
279
280 /* Compute offset. */
281 *initial_offset = fold_convert (ssizetype,
282 fold (build2 (code, TREE_TYPE (left_offset),
283 left_offset,
284 right_offset)));
285 return true;
286 }
287
288
289 /* Function vect_determine_vectorization_factor
290
291 Determine the vectorization factor (VF). VF is the number of data elements
292 that are operated upon in parallel in a single iteration of the vectorized
293 loop. For example, when vectorizing a loop that operates on 4byte elements,
294 on a target with vector size (VS) 16byte, the VF is set to 4, since 4
295 elements can fit in a single vector register.
296
297 We currently support vectorization of loops in which all types operated upon
298 are of the same size. Therefore this function currently sets VF according to
299 the size of the types operated upon, and fails if there are multiple sizes
300 in the loop.
301
302 VF is also the factor by which the loop iterations are strip-mined, e.g.:
303 original loop:
304 for (i=0; i<N; i++){
305 a[i] = b[i] + c[i];
306 }
307
308 vectorized loop:
309 for (i=0; i<N; i+=VF){
310 a[i:VF] = b[i:VF] + c[i:VF];
311 }
312 */
313
314 static bool
315 vect_determine_vectorization_factor (loop_vec_info loop_vinfo)
316 {
317 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
318 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
319 int nbbs = loop->num_nodes;
320 block_stmt_iterator si;
321 unsigned int vectorization_factor = 0;
322 int i;
323 tree scalar_type;
324
325 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
326 fprintf (vect_dump, "=== vect_determine_vectorization_factor ===");
327
328 for (i = 0; i < nbbs; i++)
329 {
330 basic_block bb = bbs[i];
331
332 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
333 {
334 tree stmt = bsi_stmt (si);
335 unsigned int nunits;
336 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
337 tree vectype;
338
339 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
340 {
341 fprintf (vect_dump, "==> examining statement: ");
342 print_generic_expr (vect_dump, stmt, TDF_SLIM);
343 }
344
345 gcc_assert (stmt_info);
346 /* skip stmts which do not need to be vectorized. */
347 if (!STMT_VINFO_RELEVANT_P (stmt_info))
348 continue;
349
350 if (VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt))))
351 {
352 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
353 LOOP_LOC (loop_vinfo)))
354 {
355 fprintf (vect_dump, "not vectorized: vector stmt in loop:");
356 print_generic_expr (vect_dump, stmt, TDF_SLIM);
357 }
358 return false;
359 }
360
361 if (STMT_VINFO_DATA_REF (stmt_info))
362 scalar_type = TREE_TYPE (DR_REF (STMT_VINFO_DATA_REF (stmt_info)));
363 else if (TREE_CODE (stmt) == MODIFY_EXPR)
364 scalar_type = TREE_TYPE (TREE_OPERAND (stmt, 0));
365 else
366 scalar_type = TREE_TYPE (stmt);
367
368 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
369 {
370 fprintf (vect_dump, "get vectype for scalar type: ");
371 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
372 }
373
374 vectype = get_vectype_for_scalar_type (scalar_type);
375 if (!vectype)
376 {
377 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
378 LOOP_LOC (loop_vinfo)))
379 {
380 fprintf (vect_dump, "not vectorized: unsupported data-type ");
381 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
382 }
383 return false;
384 }
385 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
386 {
387 fprintf (vect_dump, "vectype: ");
388 print_generic_expr (vect_dump, vectype, TDF_SLIM);
389 }
390 STMT_VINFO_VECTYPE (stmt_info) = vectype;
391
392 nunits = GET_MODE_NUNITS (TYPE_MODE (vectype));
393 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
394 fprintf (vect_dump, "nunits = %d", nunits);
395
396 if (vectorization_factor)
397 {
398 /* FORNOW: don't allow mixed units.
399 This restriction will be relaxed in the future. */
400 if (nunits != vectorization_factor)
401 {
402 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
403 LOOP_LOC (loop_vinfo)))
404 fprintf (vect_dump, "not vectorized: mixed data-types");
405 return false;
406 }
407 }
408 else
409 vectorization_factor = nunits;
410
411 #ifdef ENABLE_CHECKING
412 gcc_assert (GET_MODE_SIZE (TYPE_MODE (scalar_type))
413 * vectorization_factor == UNITS_PER_SIMD_WORD);
414 #endif
415 }
416 }
417
418 /* TODO: Analyze cost. Decide if worth while to vectorize. */
419
420 if (vectorization_factor <= 1)
421 {
422 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
423 LOOP_LOC (loop_vinfo)))
424 fprintf (vect_dump, "not vectorized: unsupported data-type");
425 return false;
426 }
427 LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
428
429 return true;
430 }
431
432
433 /* Function vect_analyze_operations.
434
435 Scan the loop stmts and make sure they are all vectorizable. */
436
437 static bool
438 vect_analyze_operations (loop_vec_info loop_vinfo)
439 {
440 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
441 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
442 int nbbs = loop->num_nodes;
443 block_stmt_iterator si;
444 unsigned int vectorization_factor = 0;
445 int i;
446 bool ok;
447
448 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
449 fprintf (vect_dump, "=== vect_analyze_operations ===");
450
451 gcc_assert (LOOP_VINFO_VECT_FACTOR (loop_vinfo));
452 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
453
454 for (i = 0; i < nbbs; i++)
455 {
456 basic_block bb = bbs[i];
457
458 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
459 {
460 tree stmt = bsi_stmt (si);
461 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
462
463 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
464 {
465 fprintf (vect_dump, "==> examining statement: ");
466 print_generic_expr (vect_dump, stmt, TDF_SLIM);
467 }
468
469 gcc_assert (stmt_info);
470
471 /* skip stmts which do not need to be vectorized.
472 this is expected to include:
473 - the COND_EXPR which is the loop exit condition
474 - any LABEL_EXPRs in the loop
475 - computations that are used only for array indexing or loop
476 control */
477
478 if (!STMT_VINFO_RELEVANT_P (stmt_info))
479 {
480 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
481 fprintf (vect_dump, "irrelevant.");
482 continue;
483 }
484
485 #ifdef ENABLE_CHECKING
486 if (STMT_VINFO_RELEVANT_P (stmt_info))
487 {
488 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt))));
489 gcc_assert (STMT_VINFO_VECTYPE (stmt_info));
490 }
491 #endif
492
493 ok = (vectorizable_operation (stmt, NULL, NULL)
494 || vectorizable_assignment (stmt, NULL, NULL)
495 || vectorizable_load (stmt, NULL, NULL)
496 || vectorizable_store (stmt, NULL, NULL));
497
498 if (!ok)
499 {
500 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
501 LOOP_LOC (loop_vinfo)))
502 {
503 fprintf (vect_dump, "not vectorized: stmt not supported: ");
504 print_generic_expr (vect_dump, stmt, TDF_SLIM);
505 }
506 return false;
507 }
508 }
509 }
510
511 /* TODO: Analyze cost. Decide if worth while to vectorize. */
512
513 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
514 && vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
515 fprintf (vect_dump,
516 "vectorization_factor = %d, niters = " HOST_WIDE_INT_PRINT_DEC,
517 vectorization_factor, LOOP_VINFO_INT_NITERS (loop_vinfo));
518
519 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
520 && LOOP_VINFO_INT_NITERS (loop_vinfo) < vectorization_factor)
521 {
522 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
523 LOOP_LOC (loop_vinfo)))
524 fprintf (vect_dump, "not vectorized: iteration count too small.");
525 return false;
526 }
527
528 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
529 || LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0)
530 {
531 if (vect_print_dump_info (REPORT_DETAILS, LOOP_LOC (loop_vinfo)))
532 fprintf (vect_dump, "epilog loop required.");
533 if (!vect_can_advance_ivs_p (loop_vinfo))
534 {
535 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
536 LOOP_LOC (loop_vinfo)))
537 fprintf (vect_dump,
538 "not vectorized: can't create epilog loop 1.");
539 return false;
540 }
541 if (!slpeel_can_duplicate_loop_p (loop, loop->single_exit))
542 {
543 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
544 LOOP_LOC (loop_vinfo)))
545 fprintf (vect_dump,
546 "not vectorized: can't create epilog loop 2.");
547 return false;
548 }
549 }
550
551 return true;
552 }
553
554
555 /* Function exist_non_indexing_operands_for_use_p
556
557 USE is one of the uses attached to STMT. Check if USE is
558 used in STMT for anything other than indexing an array. */
559
560 static bool
561 exist_non_indexing_operands_for_use_p (tree use, tree stmt)
562 {
563 tree operand;
564 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
565
566 /* USE corresponds to some operand in STMT. If there is no data
567 reference in STMT, then any operand that corresponds to USE
568 is not indexing an array. */
569 if (!STMT_VINFO_DATA_REF (stmt_info))
570 return true;
571
572 /* STMT has a data_ref. FORNOW this means that its of one of
573 the following forms:
574 -1- ARRAY_REF = var
575 -2- var = ARRAY_REF
576 (This should have been verified in analyze_data_refs).
577
578 'var' in the second case corresponds to a def, not a use,
579 so USE cannot correspond to any operands that are not used
580 for array indexing.
581
582 Therefore, all we need to check is if STMT falls into the
583 first case, and whether var corresponds to USE. */
584
585 if (TREE_CODE (TREE_OPERAND (stmt, 0)) == SSA_NAME)
586 return false;
587
588 operand = TREE_OPERAND (stmt, 1);
589
590 if (TREE_CODE (operand) != SSA_NAME)
591 return false;
592
593 if (operand == use)
594 return true;
595
596 return false;
597 }
598
599
600 /* Function vect_analyze_scalar_cycles.
601
602 Examine the cross iteration def-use cycles of scalar variables, by
603 analyzing the loop (scalar) PHIs; verify that the cross iteration def-use
604 cycles that they represent do not impede vectorization.
605
606 FORNOW: Reduction as in the following loop, is not supported yet:
607 loop1:
608 for (i=0; i<N; i++)
609 sum += a[i];
610 The cross-iteration cycle corresponding to variable 'sum' will be
611 considered too complicated and will impede vectorization.
612
613 FORNOW: Induction as in the following loop, is not supported yet:
614 loop2:
615 for (i=0; i<N; i++)
616 a[i] = i;
617
618 However, the following loop *is* vectorizable:
619 loop3:
620 for (i=0; i<N; i++)
621 a[i] = b[i];
622
623 In both loops there exists a def-use cycle for the variable i:
624 loop: i_2 = PHI (i_0, i_1)
625 a[i_2] = ...;
626 i_1 = i_2 + 1;
627 GOTO loop;
628
629 The evolution of the above cycle is considered simple enough,
630 however, we also check that the cycle does not need to be
631 vectorized, i.e - we check that the variable that this cycle
632 defines is only used for array indexing or in stmts that do not
633 need to be vectorized. This is not the case in loop2, but it
634 *is* the case in loop3. */
635
636 static bool
637 vect_analyze_scalar_cycles (loop_vec_info loop_vinfo)
638 {
639 tree phi;
640 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
641 basic_block bb = loop->header;
642 tree dummy;
643
644 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
645 fprintf (vect_dump, "=== vect_analyze_scalar_cycles ===");
646
647 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
648 {
649 tree access_fn = NULL;
650
651 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
652 {
653 fprintf (vect_dump, "Analyze phi: ");
654 print_generic_expr (vect_dump, phi, TDF_SLIM);
655 }
656
657 /* Skip virtual phi's. The data dependences that are associated with
658 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
659
660 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
661 {
662 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
663 fprintf (vect_dump, "virtual phi. skip.");
664 continue;
665 }
666
667 /* Analyze the evolution function. */
668
669 /* FORNOW: The only scalar cross-iteration cycles that we allow are
670 those of loop induction variables; This property is verified here.
671
672 Furthermore, if that induction variable is used in an operation
673 that needs to be vectorized (i.e, is not solely used to index
674 arrays and check the exit condition) - we do not support its
675 vectorization yet. This property is verified in vect_is_simple_use,
676 during vect_analyze_operations. */
677
678 access_fn = /* instantiate_parameters
679 (loop,*/
680 analyze_scalar_evolution (loop, PHI_RESULT (phi));
681
682 if (!access_fn)
683 {
684 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
685 LOOP_LOC (loop_vinfo)))
686 fprintf (vect_dump, "not vectorized: unsupported scalar cycle.");
687 return false;
688 }
689
690 if (vect_print_dump_info (REPORT_DETAILS,
691 LOOP_LOC (loop_vinfo)))
692 {
693 fprintf (vect_dump, "Access function of PHI: ");
694 print_generic_expr (vect_dump, access_fn, TDF_SLIM);
695 }
696
697 if (!vect_is_simple_iv_evolution (loop->num, access_fn, &dummy, &dummy))
698 {
699 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
700 LOOP_LOC (loop_vinfo)))
701 fprintf (vect_dump, "not vectorized: unsupported scalar cycle.");
702 return false;
703 }
704 }
705
706 return true;
707 }
708
709
710 /* Function vect_base_addr_differ_p.
711
712 This is the simplest data dependence test: determines whether the
713 data references A and B access the same array/region. Returns
714 false when the property is not computable at compile time.
715 Otherwise return true, and DIFFER_P will record the result. This
716 utility will not be necessary when alias_sets_conflict_p will be
717 less conservative. */
718
719 static bool
720 vect_base_addr_differ_p (struct data_reference *dra,
721 struct data_reference *drb,
722 bool *differ_p)
723 {
724 tree stmt_a = DR_STMT (dra);
725 stmt_vec_info stmt_info_a = vinfo_for_stmt (stmt_a);
726 tree stmt_b = DR_STMT (drb);
727 stmt_vec_info stmt_info_b = vinfo_for_stmt (stmt_b);
728 tree addr_a = STMT_VINFO_VECT_DR_BASE_ADDRESS (stmt_info_a);
729 tree addr_b = STMT_VINFO_VECT_DR_BASE_ADDRESS (stmt_info_b);
730 tree type_a = TREE_TYPE (addr_a);
731 tree type_b = TREE_TYPE (addr_b);
732 HOST_WIDE_INT alias_set_a, alias_set_b;
733
734 gcc_assert (POINTER_TYPE_P (type_a) && POINTER_TYPE_P (type_b));
735
736 /* Both references are ADDR_EXPR, i.e., we have the objects. */
737 if (TREE_CODE (addr_a) == ADDR_EXPR && TREE_CODE (addr_b) == ADDR_EXPR)
738 return array_base_name_differ_p (dra, drb, differ_p);
739
740 alias_set_a = (TREE_CODE (addr_a) == ADDR_EXPR) ?
741 get_alias_set (TREE_OPERAND (addr_a, 0)) : get_alias_set (addr_a);
742 alias_set_b = (TREE_CODE (addr_b) == ADDR_EXPR) ?
743 get_alias_set (TREE_OPERAND (addr_b, 0)) : get_alias_set (addr_b);
744
745 if (!alias_sets_conflict_p (alias_set_a, alias_set_b))
746 {
747 *differ_p = true;
748 return true;
749 }
750
751 /* An instruction writing through a restricted pointer is "independent" of any
752 instruction reading or writing through a different pointer, in the same
753 block/scope. */
754 else if ((TYPE_RESTRICT (type_a) && !DR_IS_READ (dra))
755 || (TYPE_RESTRICT (type_b) && !DR_IS_READ (drb)))
756 {
757 *differ_p = true;
758 return true;
759 }
760 return false;
761 }
762
763
764 /* Function vect_analyze_data_ref_dependence.
765
766 Return TRUE if there (might) exist a dependence between a memory-reference
767 DRA and a memory-reference DRB. */
768
769 static bool
770 vect_analyze_data_ref_dependence (struct data_reference *dra,
771 struct data_reference *drb,
772 loop_vec_info loop_vinfo)
773 {
774 bool differ_p;
775 struct data_dependence_relation *ddr;
776
777 if (!vect_base_addr_differ_p (dra, drb, &differ_p))
778 {
779 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
780 LOOP_LOC (loop_vinfo)))
781 {
782 fprintf (vect_dump,
783 "not vectorized: can't determine dependence between: ");
784 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
785 fprintf (vect_dump, " and ");
786 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
787 }
788 return true;
789 }
790
791 if (differ_p)
792 return false;
793
794 ddr = initialize_data_dependence_relation (dra, drb);
795 compute_affine_dependence (ddr);
796
797 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
798 return false;
799
800 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
801 LOOP_LOC (loop_vinfo)))
802 {
803 fprintf (vect_dump,
804 "not vectorized: possible dependence between data-refs ");
805 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
806 fprintf (vect_dump, " and ");
807 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
808 }
809
810 return true;
811 }
812
813
814 /* Function vect_analyze_data_ref_dependences.
815
816 Examine all the data references in the loop, and make sure there do not
817 exist any data dependences between them.
818
819 TODO: dependences which distance is greater than the vectorization factor
820 can be ignored. */
821
822 static bool
823 vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo)
824 {
825 unsigned int i, j;
826 varray_type loop_write_refs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
827 varray_type loop_read_refs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
828
829 /* Examine store-store (output) dependences. */
830
831 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
832 fprintf (vect_dump, "=== vect_analyze_dependences ===");
833
834 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
835 fprintf (vect_dump, "compare all store-store pairs.");
836
837 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_refs); i++)
838 {
839 for (j = i + 1; j < VARRAY_ACTIVE_SIZE (loop_write_refs); j++)
840 {
841 struct data_reference *dra =
842 VARRAY_GENERIC_PTR (loop_write_refs, i);
843 struct data_reference *drb =
844 VARRAY_GENERIC_PTR (loop_write_refs, j);
845 if (vect_analyze_data_ref_dependence (dra, drb, loop_vinfo))
846 return false;
847 }
848 }
849
850 /* Examine load-store (true/anti) dependences. */
851
852 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
853 fprintf (vect_dump, "compare all load-store pairs.");
854
855 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_refs); i++)
856 {
857 for (j = 0; j < VARRAY_ACTIVE_SIZE (loop_write_refs); j++)
858 {
859 struct data_reference *dra = VARRAY_GENERIC_PTR (loop_read_refs, i);
860 struct data_reference *drb =
861 VARRAY_GENERIC_PTR (loop_write_refs, j);
862 if (vect_analyze_data_ref_dependence (dra, drb, loop_vinfo))
863 return false;
864 }
865 }
866
867 return true;
868 }
869
870
871 /* Function vect_compute_data_ref_alignment
872
873 Compute the misalignment of the data reference DR.
874
875 Output:
876 1. If during the misalignment computation it is found that the data reference
877 cannot be vectorized then false is returned.
878 2. DR_MISALIGNMENT (DR) is defined.
879
880 FOR NOW: No analysis is actually performed. Misalignment is calculated
881 only for trivial cases. TODO. */
882
883 static bool
884 vect_compute_data_ref_alignment (struct data_reference *dr)
885 {
886 tree stmt = DR_STMT (dr);
887 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
888 tree ref = DR_REF (dr);
889 tree vectype;
890 tree base, alignment;
891 bool base_aligned_p;
892 tree misalign;
893
894 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
895 fprintf (vect_dump, "vect_compute_data_ref_alignment:");
896
897 /* Initialize misalignment to unknown. */
898 DR_MISALIGNMENT (dr) = -1;
899
900 misalign = STMT_VINFO_VECT_MISALIGNMENT (stmt_info);
901 base_aligned_p = STMT_VINFO_VECT_BASE_ALIGNED_P (stmt_info);
902 base = build_fold_indirect_ref (STMT_VINFO_VECT_DR_BASE_ADDRESS (stmt_info));
903 vectype = STMT_VINFO_VECTYPE (stmt_info);
904
905 if (!misalign)
906 {
907 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
908 {
909 fprintf (vect_dump, "Unknown alignment for access: ");
910 print_generic_expr (vect_dump, base, TDF_SLIM);
911 }
912 return true;
913 }
914
915 if (!base_aligned_p)
916 {
917 if (!vect_can_force_dr_alignment_p (base, TYPE_ALIGN (vectype)))
918 {
919 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
920 {
921 fprintf (vect_dump, "can't force alignment of ref: ");
922 print_generic_expr (vect_dump, ref, TDF_SLIM);
923 }
924 return true;
925 }
926
927 /* Force the alignment of the decl.
928 NOTE: This is the only change to the code we make during
929 the analysis phase, before deciding to vectorize the loop. */
930 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
931 fprintf (vect_dump, "force alignment");
932 DECL_ALIGN (base) = TYPE_ALIGN (vectype);
933 DECL_USER_ALIGN (base) = 1;
934 }
935
936 /* At this point we assume that the base is aligned. */
937 gcc_assert (base_aligned_p
938 || (TREE_CODE (base) == VAR_DECL
939 && DECL_ALIGN (base) >= TYPE_ALIGN (vectype)));
940
941 /* Alignment required, in bytes: */
942 alignment = ssize_int (TYPE_ALIGN (vectype)/BITS_PER_UNIT);
943
944 /* Modulo alignment. */
945 misalign = size_binop (TRUNC_MOD_EXPR, misalign, alignment);
946 if (tree_int_cst_sgn (misalign) < 0)
947 {
948 /* Negative misalignment value. */
949 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
950 fprintf (vect_dump, "unexpected misalign value");
951 return false;
952 }
953
954 DR_MISALIGNMENT (dr) = tree_low_cst (misalign, 1);
955
956 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
957 fprintf (vect_dump, "misalign = %d bytes", DR_MISALIGNMENT (dr));
958
959 return true;
960 }
961
962
963 /* Function vect_compute_data_refs_alignment
964
965 Compute the misalignment of data references in the loop.
966 This pass may take place at function granularity instead of at loop
967 granularity.
968
969 FOR NOW: No analysis is actually performed. Misalignment is calculated
970 only for trivial cases. TODO. */
971
972 static bool
973 vect_compute_data_refs_alignment (loop_vec_info loop_vinfo)
974 {
975 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
976 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
977 unsigned int i;
978
979 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
980 {
981 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
982 if (!vect_compute_data_ref_alignment (dr))
983 return false;
984 }
985
986 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
987 {
988 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
989 if (!vect_compute_data_ref_alignment (dr))
990 return false;
991 }
992
993 return true;
994 }
995
996
997 /* Function vect_enhance_data_refs_alignment
998
999 This pass will use loop versioning and loop peeling in order to enhance
1000 the alignment of data references in the loop.
1001
1002 FOR NOW: we assume that whatever versioning/peeling takes place, only the
1003 original loop is to be vectorized; Any other loops that are created by
1004 the transformations performed in this pass - are not supposed to be
1005 vectorized. This restriction will be relaxed. */
1006
1007 static void
1008 vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
1009 {
1010 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
1011 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
1012 varray_type datarefs;
1013 struct data_reference *dr0 = NULL;
1014 unsigned int i, j;
1015
1016 /*
1017 This pass will require a cost model to guide it whether to apply peeling
1018 or versioning or a combination of the two. For example, the scheme that
1019 intel uses when given a loop with several memory accesses, is as follows:
1020 choose one memory access ('p') which alignment you want to force by doing
1021 peeling. Then, either (1) generate a loop in which 'p' is aligned and all
1022 other accesses are not necessarily aligned, or (2) use loop versioning to
1023 generate one loop in which all accesses are aligned, and another loop in
1024 which only 'p' is necessarily aligned.
1025
1026 ("Automatic Intra-Register Vectorization for the Intel Architecture",
1027 Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
1028 Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
1029
1030 Devising a cost model is the most critical aspect of this work. It will
1031 guide us on which access to peel for, whether to use loop versioning, how
1032 many versions to create, etc. The cost model will probably consist of
1033 generic considerations as well as target specific considerations (on
1034 powerpc for example, misaligned stores are more painful than misaligned
1035 loads).
1036
1037 Here is the general steps involved in alignment enhancements:
1038
1039 -- original loop, before alignment analysis:
1040 for (i=0; i<N; i++){
1041 x = q[i]; # DR_MISALIGNMENT(q) = unknown
1042 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1043 }
1044
1045 -- After vect_compute_data_refs_alignment:
1046 for (i=0; i<N; i++){
1047 x = q[i]; # DR_MISALIGNMENT(q) = 3
1048 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1049 }
1050
1051 -- Possibility 1: we do loop versioning:
1052 if (p is aligned) {
1053 for (i=0; i<N; i++){ # loop 1A
1054 x = q[i]; # DR_MISALIGNMENT(q) = 3
1055 p[i] = y; # DR_MISALIGNMENT(p) = 0
1056 }
1057 }
1058 else {
1059 for (i=0; i<N; i++){ # loop 1B
1060 x = q[i]; # DR_MISALIGNMENT(q) = 3
1061 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1062 }
1063 }
1064
1065 -- Possibility 2: we do loop peeling:
1066 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1067 x = q[i];
1068 p[i] = y;
1069 }
1070 for (i = 3; i < N; i++){ # loop 2A
1071 x = q[i]; # DR_MISALIGNMENT(q) = 0
1072 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1073 }
1074
1075 -- Possibility 3: combination of loop peeling and versioning:
1076 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1077 x = q[i];
1078 p[i] = y;
1079 }
1080 if (p is aligned) {
1081 for (i = 3; i<N; i++){ # loop 3A
1082 x = q[i]; # DR_MISALIGNMENT(q) = 0
1083 p[i] = y; # DR_MISALIGNMENT(p) = 0
1084 }
1085 }
1086 else {
1087 for (i = 3; i<N; i++){ # loop 3B
1088 x = q[i]; # DR_MISALIGNMENT(q) = 0
1089 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1090 }
1091 }
1092
1093 These loops are later passed to loop_transform to be vectorized. The
1094 vectorizer will use the alignment information to guide the transformation
1095 (whether to generate regular loads/stores, or with special handling for
1096 misalignment).
1097 */
1098
1099 /* (1) Peeling to force alignment. */
1100
1101 /* (1.1) Decide whether to perform peeling, and how many iterations to peel:
1102 Considerations:
1103 + How many accesses will become aligned due to the peeling
1104 - How many accesses will become unaligned due to the peeling,
1105 and the cost of misaligned accesses.
1106 - The cost of peeling (the extra runtime checks, the increase
1107 in code size).
1108
1109 The scheme we use FORNOW: peel to force the alignment of the first
1110 misaligned store in the loop.
1111 Rationale: misaligned stores are not yet supported.
1112
1113 TODO: Use a better cost model. */
1114
1115 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
1116 {
1117 dr0 = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
1118 if (!aligned_access_p (dr0))
1119 {
1120 LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr0;
1121 LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) = DR_MISALIGNMENT (dr0);
1122 break;
1123 }
1124 }
1125
1126 /* (1.2) Update the alignment info according to the peeling factor.
1127 If the misalignment of the DR we peel for is M, then the
1128 peeling factor is VF - M, and the misalignment of each access DR_i
1129 in the loop is DR_MISALIGNMENT (DR_i) + VF - M.
1130 If the misalignment of the DR we peel for is unknown, then the
1131 misalignment of each access DR_i in the loop is also unknown.
1132
1133 TODO: - consider accesses that are known to have the same
1134 alignment, even if that alignment is unknown. */
1135
1136 if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo))
1137 {
1138 int mis;
1139 int npeel = 0;
1140
1141 if (known_alignment_for_access_p (dr0))
1142 {
1143 /* Since it's known at compile time, compute the number of iterations
1144 in the peeled loop (the peeling factor) for use in updating
1145 DR_MISALIGNMENT values. The peeling factor is the vectorization
1146 factor minus the misalignment as an element count. */
1147 mis = DR_MISALIGNMENT (dr0);
1148 mis /= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr0))));
1149 npeel = LOOP_VINFO_VECT_FACTOR (loop_vinfo) - mis;
1150 }
1151
1152 datarefs = loop_write_datarefs;
1153 for (j = 0; j < 2; j++)
1154 {
1155 for (i = 0; i < VARRAY_ACTIVE_SIZE (datarefs); i++)
1156 {
1157 struct data_reference *dr = VARRAY_GENERIC_PTR (datarefs, i);
1158
1159 if (dr == dr0)
1160 continue;
1161 if (known_alignment_for_access_p (dr)
1162 && DR_MISALIGNMENT (dr) == DR_MISALIGNMENT (dr0))
1163 DR_MISALIGNMENT (dr) = 0;
1164 else if (known_alignment_for_access_p (dr)
1165 && known_alignment_for_access_p (dr0))
1166 {
1167 int drsize = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr))));
1168
1169 DR_MISALIGNMENT (dr) += npeel * drsize;
1170 DR_MISALIGNMENT (dr) %= UNITS_PER_SIMD_WORD;
1171 }
1172 else
1173 DR_MISALIGNMENT (dr) = -1;
1174 }
1175 datarefs = loop_read_datarefs;
1176 }
1177
1178 DR_MISALIGNMENT (dr0) = 0;
1179 if (vect_print_dump_info (REPORT_ALIGNMENT, LOOP_LOC (loop_vinfo)))
1180 fprintf (vect_dump, "Alignment of access forced using peeling.");
1181 }
1182 }
1183
1184
1185 /* Function vect_analyze_data_refs_alignment
1186
1187 Analyze the alignment of the data-references in the loop.
1188 FOR NOW: Until support for misliagned accesses is in place, only if all
1189 accesses are aligned can the loop be vectorized. This restriction will be
1190 relaxed. */
1191
1192 static bool
1193 vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo)
1194 {
1195 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
1196 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
1197 enum dr_alignment_support supportable_dr_alignment;
1198 unsigned int i;
1199
1200 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1201 fprintf (vect_dump, "=== vect_analyze_data_refs_alignment ===");
1202
1203
1204 /* This pass may take place at function granularity instead of at loop
1205 granularity. */
1206
1207 if (!vect_compute_data_refs_alignment (loop_vinfo))
1208 {
1209 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
1210 LOOP_LOC (loop_vinfo)))
1211 fprintf (vect_dump,
1212 "not vectorized: can't calculate alignment for data ref.");
1213 return false;
1214 }
1215
1216
1217 /* This pass will decide on using loop versioning and/or loop peeling in
1218 order to enhance the alignment of data references in the loop. */
1219
1220 vect_enhance_data_refs_alignment (loop_vinfo);
1221
1222
1223 /* Finally, check that all the data references in the loop can be
1224 handled with respect to their alignment. */
1225
1226 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
1227 {
1228 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
1229 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
1230 if (!supportable_dr_alignment)
1231 {
1232 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
1233 LOOP_LOC (loop_vinfo)))
1234 fprintf (vect_dump, "not vectorized: unsupported unaligned load.");
1235 return false;
1236 }
1237 if (supportable_dr_alignment != dr_aligned
1238 && (vect_print_dump_info (REPORT_ALIGNMENT, LOOP_LOC (loop_vinfo))))
1239 fprintf (vect_dump, "Vectorizing an unaligned access.");
1240 }
1241 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
1242 {
1243 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
1244 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
1245 if (!supportable_dr_alignment)
1246 {
1247 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
1248 LOOP_LOC (loop_vinfo)))
1249 fprintf (vect_dump, "not vectorized: unsupported unaligned store.");
1250 return false;
1251 }
1252 if (supportable_dr_alignment != dr_aligned
1253 && (vect_print_dump_info (REPORT_ALIGNMENT, LOOP_LOC (loop_vinfo))))
1254 fprintf (vect_dump, "Vectorizing an unaligned access.");
1255 }
1256
1257 return true;
1258 }
1259
1260
1261 /* Function vect_analyze_data_ref_access.
1262
1263 Analyze the access pattern of the data-reference DR. For now, a data access
1264 has to consecutive to be considered vectorizable. */
1265
1266 static bool
1267 vect_analyze_data_ref_access (struct data_reference *dr)
1268 {
1269 tree stmt = DR_STMT (dr);
1270 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1271 tree step = STMT_VINFO_VECT_STEP (stmt_info);
1272 tree scalar_type = TREE_TYPE (DR_REF (dr));
1273
1274 if (!step || tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type)))
1275 {
1276 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1277 fprintf (vect_dump, "not consecutive access");
1278 return false;
1279 }
1280 return true;
1281 }
1282
1283
1284 /* Function vect_analyze_data_ref_accesses.
1285
1286 Analyze the access pattern of all the data references in the loop.
1287
1288 FORNOW: the only access pattern that is considered vectorizable is a
1289 simple step 1 (consecutive) access.
1290
1291 FORNOW: handle only arrays and pointer accesses. */
1292
1293 static bool
1294 vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo)
1295 {
1296 unsigned int i;
1297 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
1298 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
1299
1300 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1301 fprintf (vect_dump, "=== vect_analyze_data_ref_accesses ===");
1302
1303 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
1304 {
1305 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
1306 bool ok = vect_analyze_data_ref_access (dr);
1307 if (!ok)
1308 {
1309 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
1310 LOOP_LOC (loop_vinfo)))
1311 fprintf (vect_dump, "not vectorized: complicated access pattern.");
1312 return false;
1313 }
1314 }
1315
1316 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
1317 {
1318 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
1319 bool ok = vect_analyze_data_ref_access (dr);
1320 if (!ok)
1321 {
1322 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
1323 LOOP_LOC (loop_vinfo)))
1324 fprintf (vect_dump, "not vectorized: complicated access pattern.");
1325 return false;
1326 }
1327 }
1328
1329 return true;
1330 }
1331
1332
1333 /* Function vect_analyze_pointer_ref_access.
1334
1335 Input:
1336 STMT - a stmt that contains a data-ref.
1337 MEMREF - a data-ref in STMT, which is an INDIRECT_REF.
1338 ACCESS_FN - the access function of MEMREF.
1339
1340 Output:
1341 If the data-ref access is vectorizable, return a data_reference structure
1342 that represents it (DR). Otherwise - return NULL.
1343 STEP - the stride of MEMREF in the loop.
1344 INIT - the initial condition of MEMREF in the loop.
1345 */
1346
1347 static struct data_reference *
1348 vect_analyze_pointer_ref_access (tree memref, tree stmt, bool is_read,
1349 tree access_fn, tree *ptr_init, tree *ptr_step)
1350 {
1351 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1352 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1353 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1354 tree step, init;
1355 tree reftype, innertype;
1356 tree indx_access_fn;
1357 int loopnum = loop->num;
1358 struct data_reference *dr;
1359
1360 if (!vect_is_simple_iv_evolution (loopnum, access_fn, &init, &step))
1361 {
1362 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
1363 LOOP_LOC (loop_vinfo)))
1364 fprintf (vect_dump, "not vectorized: pointer access is not simple.");
1365 return NULL;
1366 }
1367
1368 STRIP_NOPS (init);
1369
1370 if (!expr_invariant_in_loop_p (loop, init))
1371 {
1372 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
1373 LOOP_LOC (loop_vinfo)))
1374 fprintf (vect_dump,
1375 "not vectorized: initial condition is not loop invariant.");
1376 return NULL;
1377 }
1378
1379 if (TREE_CODE (step) != INTEGER_CST)
1380 {
1381 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
1382 LOOP_LOC (loop_vinfo)))
1383 fprintf (vect_dump,
1384 "not vectorized: non constant step for pointer access.");
1385 return NULL;
1386 }
1387
1388 reftype = TREE_TYPE (TREE_OPERAND (memref, 0));
1389 if (!POINTER_TYPE_P (reftype))
1390 {
1391 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
1392 LOOP_LOC (loop_vinfo)))
1393 fprintf (vect_dump, "not vectorized: unexpected pointer access form.");
1394 return NULL;
1395 }
1396
1397 reftype = TREE_TYPE (init);
1398 if (!POINTER_TYPE_P (reftype))
1399 {
1400 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
1401 LOOP_LOC (loop_vinfo)))
1402 fprintf (vect_dump, "not vectorized: unexpected pointer access form.");
1403 return NULL;
1404 }
1405
1406 *ptr_step = fold_convert (ssizetype, step);
1407 innertype = TREE_TYPE (reftype);
1408 /* Check that STEP is a multiple of type size. */
1409 if (!integer_zerop (size_binop (TRUNC_MOD_EXPR, *ptr_step,
1410 fold_convert (ssizetype, TYPE_SIZE_UNIT (innertype)))))
1411 {
1412 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
1413 LOOP_LOC (loop_vinfo)))
1414 fprintf (vect_dump, "not vectorized: non consecutive access.");
1415 return NULL;
1416 }
1417
1418 indx_access_fn =
1419 build_polynomial_chrec (loopnum, integer_zero_node, integer_one_node);
1420 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1421 {
1422 fprintf (vect_dump, "Access function of ptr indx: ");
1423 print_generic_expr (vect_dump, indx_access_fn, TDF_SLIM);
1424 }
1425 dr = init_data_ref (stmt, memref, NULL_TREE, indx_access_fn, is_read);
1426 *ptr_init = init;
1427 return dr;
1428 }
1429
1430
1431 /* Function vect_address_analysis
1432
1433 Return the BASE of the address expression EXPR.
1434 Also compute the INITIAL_OFFSET from BASE, MISALIGN and STEP.
1435
1436 Input:
1437 EXPR - the address expression that is being analyzed
1438 STMT - the statement that contains EXPR or its original memory reference
1439 IS_READ - TRUE if STMT reads from EXPR, FALSE if writes to EXPR
1440 VECTYPE - the type that defines the alignment (i.e, we compute
1441 alignment relative to TYPE_ALIGN(VECTYPE))
1442 DR - data_reference struct for the original memory reference
1443
1444 Output:
1445 BASE (returned value) - the base of the data reference EXPR.
1446 INITIAL_OFFSET - initial offset of EXPR from BASE (an expression)
1447 MISALIGN - offset of EXPR from BASE in bytes (a constant) or NULL_TREE if the
1448 computation is impossible
1449 STEP - evolution of EXPR in the loop
1450 BASE_ALIGNED - indicates if BASE is aligned
1451
1452 If something unexpected is encountered (an unsupported form of data-ref),
1453 then NULL_TREE is returned.
1454 */
1455
1456 static tree
1457 vect_address_analysis (tree expr, tree stmt, bool is_read, tree vectype,
1458 struct data_reference *dr, tree *offset, tree *misalign,
1459 tree *step, bool *base_aligned)
1460 {
1461 tree oprnd0, oprnd1, base_address, offset_expr, base_addr0, base_addr1;
1462 tree address_offset = ssize_int (0), address_misalign = ssize_int (0);
1463 tree dummy;
1464 subvar_t dummy2;
1465
1466 switch (TREE_CODE (expr))
1467 {
1468 case PLUS_EXPR:
1469 case MINUS_EXPR:
1470 /* EXPR is of form {base +/- offset} (or {offset +/- base}). */
1471 oprnd0 = TREE_OPERAND (expr, 0);
1472 oprnd1 = TREE_OPERAND (expr, 1);
1473
1474 STRIP_NOPS (oprnd0);
1475 STRIP_NOPS (oprnd1);
1476
1477 /* Recursively try to find the base of the address contained in EXPR.
1478 For offset, the returned base will be NULL. */
1479 base_addr0 = vect_address_analysis (oprnd0, stmt, is_read, vectype, dr,
1480 &address_offset, &address_misalign, step,
1481 base_aligned);
1482
1483 base_addr1 = vect_address_analysis (oprnd1, stmt, is_read, vectype, dr,
1484 &address_offset, &address_misalign, step,
1485 base_aligned);
1486
1487 /* We support cases where only one of the operands contains an
1488 address. */
1489 if ((base_addr0 && base_addr1) || (!base_addr0 && !base_addr1))
1490 return NULL_TREE;
1491
1492 /* To revert STRIP_NOPS. */
1493 oprnd0 = TREE_OPERAND (expr, 0);
1494 oprnd1 = TREE_OPERAND (expr, 1);
1495
1496 offset_expr = base_addr0 ?
1497 fold_convert (ssizetype, oprnd1) : fold_convert (ssizetype, oprnd0);
1498
1499 /* EXPR is of form {base +/- offset} (or {offset +/- base}). If offset is
1500 a number, we can add it to the misalignment value calculated for base,
1501 otherwise, misalignment is NULL. */
1502 if (TREE_CODE (offset_expr) == INTEGER_CST && address_misalign)
1503 *misalign = size_binop (TREE_CODE (expr), address_misalign,
1504 offset_expr);
1505 else
1506 *misalign = NULL_TREE;
1507
1508 /* Combine offset (from EXPR {base + offset}) with the offset calculated
1509 for base. */
1510 *offset = size_binop (TREE_CODE (expr), address_offset, offset_expr);
1511 return base_addr0 ? base_addr0 : base_addr1;
1512
1513 case ADDR_EXPR:
1514 base_address = vect_object_analysis (TREE_OPERAND (expr, 0), stmt,
1515 is_read, vectype, &dr, offset,
1516 misalign, step, base_aligned,
1517 &dummy, &dummy2);
1518 return base_address;
1519
1520 case SSA_NAME:
1521 if (!POINTER_TYPE_P (TREE_TYPE (expr)))
1522 return NULL_TREE;
1523
1524 if (TYPE_ALIGN (TREE_TYPE (TREE_TYPE (expr))) < TYPE_ALIGN (vectype))
1525 {
1526 if (vect_get_ptr_offset (expr, vectype, misalign))
1527 *base_aligned = true;
1528 else
1529 *base_aligned = false;
1530 }
1531 else
1532 {
1533 *base_aligned = true;
1534 *misalign = ssize_int (0);
1535 }
1536 *offset = ssize_int (0);
1537 *step = ssize_int (0);
1538 return expr;
1539
1540 default:
1541 return NULL_TREE;
1542 }
1543 }
1544
1545
1546 /* Function vect_object_analysis
1547
1548 Return the BASE of the data reference MEMREF.
1549 Also compute the INITIAL_OFFSET from BASE, MISALIGN and STEP.
1550 E.g., for EXPR a.b[i] + 4B, BASE is a, and OFFSET is the overall offset
1551 'a.b[i] + 4B' from a (can be an expression), MISALIGN is an OFFSET
1552 instantiated with initial_conditions of access_functions of variables,
1553 modulo alignment, and STEP is the evolution of the DR_REF in this loop.
1554
1555 Function get_inner_reference is used for the above in case of ARRAY_REF and
1556 COMPONENT_REF.
1557
1558 The structure of the function is as follows:
1559 Part 1:
1560 Case 1. For handled_component_p refs
1561 1.1 call get_inner_reference
1562 1.1.1 analyze offset expr received from get_inner_reference
1563 1.2. build data-reference structure for MEMREF
1564 (fall through with BASE)
1565 Case 2. For declarations
1566 2.1 check alignment
1567 2.2 update DR_BASE_NAME if necessary for alias
1568 Case 3. For INDIRECT_REFs
1569 3.1 get the access function
1570 3.2 analyze evolution of MEMREF
1571 3.3 set data-reference structure for MEMREF
1572 3.4 call vect_address_analysis to analyze INIT of the access function
1573
1574 Part 2:
1575 Combine the results of object and address analysis to calculate
1576 INITIAL_OFFSET, STEP and misalignment info.
1577
1578 Input:
1579 MEMREF - the memory reference that is being analyzed
1580 STMT - the statement that contains MEMREF
1581 IS_READ - TRUE if STMT reads from MEMREF, FALSE if writes to MEMREF
1582 VECTYPE - the type that defines the alignment (i.e, we compute
1583 alignment relative to TYPE_ALIGN(VECTYPE))
1584
1585 Output:
1586 BASE_ADDRESS (returned value) - the base address of the data reference MEMREF
1587 E.g, if MEMREF is a.b[k].c[i][j] the returned
1588 base is &a.
1589 DR - data_reference struct for MEMREF
1590 INITIAL_OFFSET - initial offset of MEMREF from BASE (an expression)
1591 MISALIGN - offset of MEMREF from BASE in bytes (a constant) or NULL_TREE if
1592 the computation is impossible
1593 STEP - evolution of the DR_REF in the loop
1594 BASE_ALIGNED - indicates if BASE is aligned
1595 MEMTAG - memory tag for aliasing purposes
1596 SUBVAR - Sub-variables of the variable
1597
1598 If something unexpected is encountered (an unsupported form of data-ref),
1599 then NULL_TREE is returned. */
1600
1601 static tree
1602 vect_object_analysis (tree memref, tree stmt, bool is_read,
1603 tree vectype, struct data_reference **dr,
1604 tree *offset, tree *misalign, tree *step,
1605 bool *base_aligned, tree *memtag,
1606 subvar_t *subvars)
1607 {
1608 tree base = NULL_TREE, base_address = NULL_TREE;
1609 tree object_offset = ssize_int (0), object_misalign = ssize_int (0);
1610 tree object_step = ssize_int (0), address_step = ssize_int (0);
1611 bool object_base_aligned = true, address_base_aligned = true;
1612 tree address_offset = ssize_int (0), address_misalign = ssize_int (0);
1613 HOST_WIDE_INT pbitsize, pbitpos;
1614 tree poffset, bit_pos_in_bytes;
1615 enum machine_mode pmode;
1616 int punsignedp, pvolatilep;
1617 tree ptr_step = ssize_int (0), ptr_init = NULL_TREE;
1618 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1619 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1620 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1621 struct data_reference *ptr_dr = NULL;
1622 tree access_fn, evolution_part, address_to_analyze;
1623
1624 /* Part 1: */
1625 /* Case 1. handled_component_p refs. */
1626 if (handled_component_p (memref))
1627 {
1628 /* 1.1 call get_inner_reference. */
1629 /* Find the base and the offset from it. */
1630 base = get_inner_reference (memref, &pbitsize, &pbitpos, &poffset,
1631 &pmode, &punsignedp, &pvolatilep, false);
1632 if (!base)
1633 return NULL_TREE;
1634
1635 /* 1.1.1 analyze offset expr received from get_inner_reference. */
1636 if (poffset
1637 && !vect_analyze_offset_expr (poffset, loop, TYPE_SIZE_UNIT (vectype),
1638 &object_offset, &object_misalign, &object_step))
1639 {
1640 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1641 {
1642 fprintf (vect_dump, "failed to compute offset or step for ");
1643 print_generic_expr (vect_dump, memref, TDF_SLIM);
1644 }
1645 return NULL_TREE;
1646 }
1647
1648 /* Add bit position to OFFSET and MISALIGN. */
1649
1650 bit_pos_in_bytes = ssize_int (pbitpos/BITS_PER_UNIT);
1651 /* Check that there is no remainder in bits. */
1652 if (pbitpos%BITS_PER_UNIT)
1653 {
1654 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1655 fprintf (vect_dump, "bit offset alignment.");
1656 return NULL_TREE;
1657 }
1658 object_offset = size_binop (PLUS_EXPR, bit_pos_in_bytes, object_offset);
1659 if (object_misalign)
1660 object_misalign = size_binop (PLUS_EXPR, object_misalign,
1661 bit_pos_in_bytes);
1662
1663 /* Create data-reference for MEMREF. TODO: handle COMPONENT_REFs. */
1664 if (!(*dr))
1665 {
1666 if (TREE_CODE (memref) == ARRAY_REF)
1667 *dr = analyze_array (stmt, memref, is_read);
1668 else
1669 /* FORNOW. */
1670 return NULL_TREE;
1671 }
1672 memref = base; /* To continue analysis of BASE. */
1673 /* fall through */
1674 }
1675
1676 /* Part 1: Case 2. Declarations. */
1677 if (DECL_P (memref))
1678 {
1679 /* We expect to get a decl only if we already have a DR. */
1680 if (!(*dr))
1681 {
1682 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1683 {
1684 fprintf (vect_dump, "unhandled decl ");
1685 print_generic_expr (vect_dump, memref, TDF_SLIM);
1686 }
1687 return NULL_TREE;
1688 }
1689
1690 /* 2.1 check the alignment. */
1691 if (DECL_ALIGN (memref) >= TYPE_ALIGN (vectype))
1692 object_base_aligned = true;
1693 else
1694 object_base_aligned = false;
1695
1696 /* 2.2 update DR_BASE_NAME if necessary. */
1697 if (!DR_BASE_NAME ((*dr)))
1698 /* For alias analysis. In case the analysis of INDIRECT_REF brought
1699 us to object. */
1700 DR_BASE_NAME ((*dr)) = memref;
1701
1702 if (SSA_VAR_P (memref) && var_can_have_subvars (memref))
1703 *subvars = get_subvars_for_var (memref);
1704 base_address = build_fold_addr_expr (memref);
1705 *memtag = memref;
1706 }
1707
1708 /* Part 1: Case 3. INDIRECT_REFs. */
1709 else if (TREE_CODE (memref) == INDIRECT_REF)
1710 {
1711 /* 3.1 get the access function. */
1712 access_fn = analyze_scalar_evolution (loop, TREE_OPERAND (memref, 0));
1713 if (!access_fn)
1714 {
1715 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
1716 LOOP_LOC (loop_vinfo)))
1717 fprintf (vect_dump, "not vectorized: complicated pointer access.");
1718 return NULL_TREE;
1719 }
1720 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1721 {
1722 fprintf (vect_dump, "Access function of ptr: ");
1723 print_generic_expr (vect_dump, access_fn, TDF_SLIM);
1724 }
1725
1726 /* 3.2 analyze evolution of MEMREF. */
1727 evolution_part = evolution_part_in_loop_num (access_fn, loop->num);
1728 if (evolution_part)
1729 {
1730 ptr_dr = vect_analyze_pointer_ref_access (memref, stmt, is_read,
1731 access_fn, &ptr_init, &ptr_step);
1732 if (!(ptr_dr))
1733 return NULL_TREE;
1734
1735 object_step = size_binop (PLUS_EXPR, object_step, ptr_step);
1736 address_to_analyze = ptr_init;
1737 }
1738 else
1739 {
1740 if (!(*dr))
1741 {
1742 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
1743 LOOP_LOC (loop_vinfo)))
1744 fprintf (vect_dump, "not vectorized: ptr is loop invariant.");
1745 return NULL_TREE;
1746 }
1747 /* Since there exists DR for MEMREF, we are analyzing the init of
1748 the access function, which not necessary has evolution in the
1749 loop. */
1750 address_to_analyze = initial_condition_in_loop_num (access_fn,
1751 loop->num);
1752 }
1753
1754 /* 3.3 set data-reference structure for MEMREF. */
1755 *dr = (*dr) ? *dr : ptr_dr;
1756
1757 /* 3.4 call vect_address_analysis to analyze INIT of the access
1758 function. */
1759 base_address = vect_address_analysis (address_to_analyze, stmt, is_read,
1760 vectype, *dr, &address_offset, &address_misalign,
1761 &address_step, &address_base_aligned);
1762 if (!base_address)
1763 return NULL_TREE;
1764
1765 switch (TREE_CODE (base_address))
1766 {
1767 case SSA_NAME:
1768 *memtag = get_var_ann (SSA_NAME_VAR (base_address))->type_mem_tag;
1769 if (!(*memtag) && TREE_CODE (TREE_OPERAND (memref, 0)) == SSA_NAME)
1770 *memtag = get_var_ann (
1771 SSA_NAME_VAR (TREE_OPERAND (memref, 0)))->type_mem_tag;
1772 break;
1773 case ADDR_EXPR:
1774 *memtag = TREE_OPERAND (base_address, 0);
1775 break;
1776 default:
1777 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
1778 LOOP_LOC (loop_vinfo)))
1779 {
1780 fprintf (vect_dump, "not vectorized: no memtag ref: ");
1781 print_generic_expr (vect_dump, memref, TDF_SLIM);
1782 }
1783 return NULL_TREE;
1784 }
1785 }
1786
1787 if (!base_address)
1788 /* MEMREF cannot be analyzed. */
1789 return NULL_TREE;
1790
1791 if (SSA_VAR_P (*memtag) && var_can_have_subvars (*memtag))
1792 *subvars = get_subvars_for_var (*memtag);
1793
1794 /* Part 2: Combine the results of object and address analysis to calculate
1795 INITIAL_OFFSET, STEP and misalignment info. */
1796 *offset = size_binop (PLUS_EXPR, object_offset, address_offset);
1797 if (object_misalign && address_misalign)
1798 *misalign = size_binop (PLUS_EXPR, object_misalign, address_misalign);
1799 else
1800 *misalign = NULL_TREE;
1801 *step = size_binop (PLUS_EXPR, object_step, address_step);
1802 *base_aligned = object_base_aligned && address_base_aligned;
1803
1804 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1805 {
1806 fprintf (vect_dump, "Results of object analysis for: ");
1807 print_generic_expr (vect_dump, memref, TDF_SLIM);
1808 fprintf (vect_dump, "\n\tbase_address: ");
1809 print_generic_expr (vect_dump, base_address, TDF_SLIM);
1810 fprintf (vect_dump, "\n\toffset: ");
1811 print_generic_expr (vect_dump, *offset, TDF_SLIM);
1812 fprintf (vect_dump, "\n\tstep: ");
1813 print_generic_expr (vect_dump, *step, TDF_SLIM);
1814 fprintf (vect_dump, "\n\tbase aligned %d\n\tmisalign: ", *base_aligned);
1815 print_generic_expr (vect_dump, *misalign, TDF_SLIM);
1816 }
1817 return base_address;
1818 }
1819
1820
1821 /* Function vect_analyze_data_refs.
1822
1823 Find all the data references in the loop.
1824
1825 The general structure of the analysis of data refs in the vectorizer is as
1826 follows:
1827 1- vect_analyze_data_refs(loop):
1828 Find and analyze all data-refs in the loop:
1829 foreach ref
1830 base_address = vect_object_analysis(ref)
1831 1.1- vect_object_analysis(ref):
1832 Analyze ref, and build a DR (data_referece struct) for it;
1833 compute base, initial_offset, step and alignment.
1834 Call get_inner_reference for refs handled in this function.
1835 Call vect_addr_analysis(addr) to analyze pointer type expressions.
1836 Set ref_stmt.base, ref_stmt.initial_offset, ref_stmt.alignment,
1837 ref_stmt.memtag and ref_stmt.step accordingly.
1838 2- vect_analyze_dependences(): apply dependence testing using ref_stmt.DR
1839 3- vect_analyze_drs_alignment(): check that ref_stmt.alignment is ok.
1840 4- vect_analyze_drs_access(): check that ref_stmt.step is ok.
1841
1842 FORNOW: Handle aligned INDIRECT_REFs and ARRAY_REFs
1843 which base is really an array (not a pointer) and which alignment
1844 can be forced. This restriction will be relaxed. */
1845
1846 static bool
1847 vect_analyze_data_refs (loop_vec_info loop_vinfo)
1848 {
1849 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1850 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
1851 int nbbs = loop->num_nodes;
1852 block_stmt_iterator si;
1853 int j;
1854 struct data_reference *dr;
1855
1856 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1857 fprintf (vect_dump, "=== vect_analyze_data_refs ===");
1858
1859 for (j = 0; j < nbbs; j++)
1860 {
1861 basic_block bb = bbs[j];
1862 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1863 {
1864 bool is_read = false;
1865 tree stmt = bsi_stmt (si);
1866 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1867 v_may_def_optype v_may_defs = STMT_V_MAY_DEF_OPS (stmt);
1868 v_must_def_optype v_must_defs = STMT_V_MUST_DEF_OPS (stmt);
1869 vuse_optype vuses = STMT_VUSE_OPS (stmt);
1870 varray_type *datarefs = NULL;
1871 int nvuses, nv_may_defs, nv_must_defs;
1872 tree memref = NULL;
1873 tree scalar_type, vectype;
1874 tree base, offset, misalign, step, tag;
1875 bool base_aligned;
1876 subvar_t subvars;
1877
1878 /* Assumption: there exists a data-ref in stmt, if and only if
1879 it has vuses/vdefs. */
1880
1881 if (!vuses && !v_may_defs && !v_must_defs)
1882 continue;
1883
1884 nvuses = NUM_VUSES (vuses);
1885 nv_may_defs = NUM_V_MAY_DEFS (v_may_defs);
1886 nv_must_defs = NUM_V_MUST_DEFS (v_must_defs);
1887
1888 if (nvuses && (nv_may_defs || nv_must_defs))
1889 {
1890 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1891 {
1892 fprintf (vect_dump, "unexpected vdefs and vuses in stmt: ");
1893 print_generic_expr (vect_dump, stmt, TDF_SLIM);
1894 }
1895 return false;
1896 }
1897
1898 if (TREE_CODE (stmt) != MODIFY_EXPR)
1899 {
1900 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1901 {
1902 fprintf (vect_dump, "unexpected vops in stmt: ");
1903 print_generic_expr (vect_dump, stmt, TDF_SLIM);
1904 }
1905 return false;
1906 }
1907
1908 if (vuses)
1909 {
1910 memref = TREE_OPERAND (stmt, 1);
1911 datarefs = &(LOOP_VINFO_DATAREF_READS (loop_vinfo));
1912 is_read = true;
1913 }
1914 else /* vdefs */
1915 {
1916 memref = TREE_OPERAND (stmt, 0);
1917 datarefs = &(LOOP_VINFO_DATAREF_WRITES (loop_vinfo));
1918 is_read = false;
1919 }
1920
1921 scalar_type = TREE_TYPE (memref);
1922 vectype = get_vectype_for_scalar_type (scalar_type);
1923 if (!vectype)
1924 {
1925 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1926 {
1927 fprintf (vect_dump, "no vectype for stmt: ");
1928 print_generic_expr (vect_dump, stmt, TDF_SLIM);
1929 fprintf (vect_dump, " scalar_type: ");
1930 print_generic_expr (vect_dump, scalar_type, TDF_DETAILS);
1931 }
1932 /* It is not possible to vectorize this data reference. */
1933 return false;
1934 }
1935 /* Analyze MEMREF. If it is of a supported form, build data_reference
1936 struct for it (DR). */
1937 dr = NULL;
1938 base = vect_object_analysis (memref, stmt, is_read, vectype, &dr,
1939 &offset, &misalign, &step,
1940 &base_aligned, &tag, &subvars);
1941 if (!base)
1942 {
1943 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
1944 LOOP_LOC (loop_vinfo)))
1945 {
1946 fprintf (vect_dump, "not vectorized: unhandled data ref: ");
1947 print_generic_expr (vect_dump, stmt, TDF_SLIM);
1948 }
1949 return false;
1950 }
1951 STMT_VINFO_VECT_DR_BASE_ADDRESS (stmt_info) = base;
1952 STMT_VINFO_VECT_INIT_OFFSET (stmt_info) = offset;
1953 STMT_VINFO_VECT_STEP (stmt_info) = step;
1954 STMT_VINFO_VECT_MISALIGNMENT (stmt_info) = misalign;
1955 STMT_VINFO_VECT_BASE_ALIGNED_P (stmt_info) = base_aligned;
1956 STMT_VINFO_MEMTAG (stmt_info) = tag;
1957 STMT_VINFO_SUBVARS (stmt_info) = subvars;
1958 STMT_VINFO_VECTYPE (stmt_info) = vectype;
1959 VARRAY_PUSH_GENERIC_PTR (*datarefs, dr);
1960 STMT_VINFO_DATA_REF (stmt_info) = dr;
1961 }
1962 }
1963
1964 return true;
1965 }
1966
1967
1968 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
1969
1970 /* Function vect_mark_relevant.
1971
1972 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
1973
1974 static void
1975 vect_mark_relevant (varray_type *worklist, tree stmt)
1976 {
1977 stmt_vec_info stmt_info;
1978
1979 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1980 fprintf (vect_dump, "mark relevant.");
1981
1982 if (TREE_CODE (stmt) == PHI_NODE)
1983 {
1984 VARRAY_PUSH_TREE (*worklist, stmt);
1985 return;
1986 }
1987
1988 stmt_info = vinfo_for_stmt (stmt);
1989
1990 if (!stmt_info)
1991 {
1992 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1993 {
1994 fprintf (vect_dump, "mark relevant: no stmt info!!.");
1995 print_generic_expr (vect_dump, stmt, TDF_SLIM);
1996 }
1997 return;
1998 }
1999
2000 if (STMT_VINFO_RELEVANT_P (stmt_info))
2001 {
2002 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2003 fprintf (vect_dump, "already marked relevant.");
2004 return;
2005 }
2006
2007 STMT_VINFO_RELEVANT_P (stmt_info) = 1;
2008 VARRAY_PUSH_TREE (*worklist, stmt);
2009 }
2010
2011
2012 /* Function vect_stmt_relevant_p.
2013
2014 Return true if STMT in loop that is represented by LOOP_VINFO is
2015 "relevant for vectorization".
2016
2017 A stmt is considered "relevant for vectorization" if:
2018 - it has uses outside the loop.
2019 - it has vdefs (it alters memory).
2020 - control stmts in the loop (except for the exit condition).
2021
2022 CHECKME: what other side effects would the vectorizer allow? */
2023
2024 static bool
2025 vect_stmt_relevant_p (tree stmt, loop_vec_info loop_vinfo)
2026 {
2027 v_may_def_optype v_may_defs;
2028 v_must_def_optype v_must_defs;
2029 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2030 int i;
2031 dataflow_t df;
2032 int num_uses;
2033
2034 /* cond stmt other than loop exit cond. */
2035 if (is_ctrl_stmt (stmt) && (stmt != LOOP_VINFO_EXIT_COND (loop_vinfo)))
2036 return true;
2037
2038 /* changing memory. */
2039 if (TREE_CODE (stmt) != PHI_NODE)
2040 {
2041 v_may_defs = STMT_V_MAY_DEF_OPS (stmt);
2042 v_must_defs = STMT_V_MUST_DEF_OPS (stmt);
2043 if (v_may_defs || v_must_defs)
2044 {
2045 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2046 fprintf (vect_dump, "vec_stmt_relevant_p: stmt has vdefs.");
2047 return true;
2048 }
2049 }
2050
2051 /* uses outside the loop. */
2052 df = get_immediate_uses (stmt);
2053 num_uses = num_immediate_uses (df);
2054 for (i = 0; i < num_uses; i++)
2055 {
2056 tree use = immediate_use (df, i);
2057 basic_block bb = bb_for_stmt (use);
2058 if (!flow_bb_inside_loop_p (loop, bb))
2059 {
2060 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2061 fprintf (vect_dump, "vec_stmt_relevant_p: used out of loop.");
2062 return true;
2063 }
2064 }
2065
2066 return false;
2067 }
2068
2069
2070 /* Function vect_mark_stmts_to_be_vectorized.
2071
2072 Not all stmts in the loop need to be vectorized. For example:
2073
2074 for i...
2075 for j...
2076 1. T0 = i + j
2077 2. T1 = a[T0]
2078
2079 3. j = j + 1
2080
2081 Stmt 1 and 3 do not need to be vectorized, because loop control and
2082 addressing of vectorized data-refs are handled differently.
2083
2084 This pass detects such stmts. */
2085
2086 static bool
2087 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
2088 {
2089 varray_type worklist;
2090 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2091 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
2092 unsigned int nbbs = loop->num_nodes;
2093 block_stmt_iterator si;
2094 tree stmt;
2095 stmt_ann_t ann;
2096 unsigned int i;
2097 int j;
2098 use_optype use_ops;
2099 stmt_vec_info stmt_info;
2100 basic_block bb;
2101 tree phi;
2102
2103 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2104 fprintf (vect_dump, "=== vect_mark_stmts_to_be_vectorized ===");
2105
2106 bb = loop->header;
2107 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
2108 {
2109 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2110 {
2111 fprintf (vect_dump, "init: phi relevant? ");
2112 print_generic_expr (vect_dump, phi, TDF_SLIM);
2113 }
2114
2115 if (vect_stmt_relevant_p (phi, loop_vinfo))
2116 {
2117 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
2118 LOOP_LOC (loop_vinfo)))
2119 fprintf (vect_dump, "unsupported reduction/induction.");
2120 return false;
2121 }
2122 }
2123
2124 VARRAY_TREE_INIT (worklist, 64, "work list");
2125
2126 /* 1. Init worklist. */
2127
2128 for (i = 0; i < nbbs; i++)
2129 {
2130 bb = bbs[i];
2131 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
2132 {
2133 stmt = bsi_stmt (si);
2134
2135 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2136 {
2137 fprintf (vect_dump, "init: stmt relevant? ");
2138 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2139 }
2140
2141 stmt_info = vinfo_for_stmt (stmt);
2142 STMT_VINFO_RELEVANT_P (stmt_info) = 0;
2143
2144 if (vect_stmt_relevant_p (stmt, loop_vinfo))
2145 vect_mark_relevant (&worklist, stmt);
2146 }
2147 }
2148
2149
2150 /* 2. Process_worklist */
2151
2152 while (VARRAY_ACTIVE_SIZE (worklist) > 0)
2153 {
2154 stmt = VARRAY_TOP_TREE (worklist);
2155 VARRAY_POP (worklist);
2156
2157 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2158 {
2159 fprintf (vect_dump, "worklist: examine stmt: ");
2160 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2161 }
2162
2163 /* Examine the USES in this statement. Mark all the statements which
2164 feed this statement's uses as "relevant", unless the USE is used as
2165 an array index. */
2166
2167 if (TREE_CODE (stmt) == PHI_NODE)
2168 {
2169 /* follow the def-use chain inside the loop. */
2170 for (j = 0; j < PHI_NUM_ARGS (stmt); j++)
2171 {
2172 tree arg = PHI_ARG_DEF (stmt, j);
2173 tree def_stmt = NULL_TREE;
2174 basic_block bb;
2175 if (!vect_is_simple_use (arg, loop_vinfo, &def_stmt))
2176 {
2177 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
2178 LOOP_LOC (loop_vinfo)))
2179 fprintf (vect_dump, "not vectorized: unsupported use in stmt.");
2180 varray_clear (worklist);
2181 return false;
2182 }
2183 if (!def_stmt)
2184 continue;
2185
2186 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2187 {
2188 fprintf (vect_dump, "worklist: def_stmt: ");
2189 print_generic_expr (vect_dump, def_stmt, TDF_SLIM);
2190 }
2191
2192 bb = bb_for_stmt (def_stmt);
2193 if (flow_bb_inside_loop_p (loop, bb))
2194 vect_mark_relevant (&worklist, def_stmt);
2195 }
2196 }
2197
2198 ann = stmt_ann (stmt);
2199 use_ops = USE_OPS (ann);
2200
2201 for (i = 0; i < NUM_USES (use_ops); i++)
2202 {
2203 tree use = USE_OP (use_ops, i);
2204
2205 /* We are only interested in uses that need to be vectorized. Uses
2206 that are used for address computation are not considered relevant.
2207 */
2208 if (exist_non_indexing_operands_for_use_p (use, stmt))
2209 {
2210 tree def_stmt = NULL_TREE;
2211 basic_block bb;
2212 if (!vect_is_simple_use (use, loop_vinfo, &def_stmt))
2213 {
2214 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
2215 LOOP_LOC (loop_vinfo)))
2216 fprintf (vect_dump, "not vectorized: unsupported use in stmt.");
2217 varray_clear (worklist);
2218 return false;
2219 }
2220
2221 if (!def_stmt)
2222 continue;
2223
2224 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2225 {
2226 fprintf (vect_dump, "worklist: examine use %d: ", i);
2227 print_generic_expr (vect_dump, use, TDF_SLIM);
2228 }
2229
2230 bb = bb_for_stmt (def_stmt);
2231 if (flow_bb_inside_loop_p (loop, bb))
2232 vect_mark_relevant (&worklist, def_stmt);
2233 }
2234 }
2235 } /* while worklist */
2236
2237 varray_clear (worklist);
2238 return true;
2239 }
2240
2241
2242 /* Function vect_can_advance_ivs_p
2243
2244 In case the number of iterations that LOOP iterates in unknown at compile
2245 time, an epilog loop will be generated, and the loop induction variables
2246 (IVs) will be "advanced" to the value they are supposed to take just before
2247 the epilog loop. Here we check that the access function of the loop IVs
2248 and the expression that represents the loop bound are simple enough.
2249 These restrictions will be relaxed in the future. */
2250
2251 static bool
2252 vect_can_advance_ivs_p (loop_vec_info loop_vinfo)
2253 {
2254 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2255 basic_block bb = loop->header;
2256 tree phi;
2257
2258 /* Analyze phi functions of the loop header. */
2259
2260 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
2261 {
2262 tree access_fn = NULL;
2263 tree evolution_part;
2264
2265 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2266 {
2267 fprintf (vect_dump, "Analyze phi: ");
2268 print_generic_expr (vect_dump, phi, TDF_SLIM);
2269 }
2270
2271 /* Skip virtual phi's. The data dependences that are associated with
2272 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
2273
2274 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
2275 {
2276 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2277 fprintf (vect_dump, "virtual phi. skip.");
2278 continue;
2279 }
2280
2281 /* Analyze the evolution function. */
2282
2283 access_fn = instantiate_parameters
2284 (loop, analyze_scalar_evolution (loop, PHI_RESULT (phi)));
2285
2286 if (!access_fn)
2287 {
2288 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2289 fprintf (vect_dump, "No Access function.");
2290 return false;
2291 }
2292
2293 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2294 {
2295 fprintf (vect_dump, "Access function of PHI: ");
2296 print_generic_expr (vect_dump, access_fn, TDF_SLIM);
2297 }
2298
2299 evolution_part = evolution_part_in_loop_num (access_fn, loop->num);
2300
2301 if (evolution_part == NULL_TREE)
2302 return false;
2303
2304 /* FORNOW: We do not transform initial conditions of IVs
2305 which evolution functions are a polynomial of degree >= 2. */
2306
2307 if (tree_is_chrec (evolution_part))
2308 return false;
2309 }
2310
2311 return true;
2312 }
2313
2314
2315 /* Function vect_get_loop_niters.
2316
2317 Determine how many iterations the loop is executed.
2318 If an expression that represents the number of iterations
2319 can be constructed, place it in NUMBER_OF_ITERATIONS.
2320 Return the loop exit condition. */
2321
2322 static tree
2323 vect_get_loop_niters (struct loop *loop, tree *number_of_iterations)
2324 {
2325 tree niters;
2326
2327 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2328 fprintf (vect_dump, "=== get_loop_niters ===");
2329
2330 niters = number_of_iterations_in_loop (loop);
2331
2332 if (niters != NULL_TREE
2333 && niters != chrec_dont_know)
2334 {
2335 *number_of_iterations = niters;
2336
2337 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2338 {
2339 fprintf (vect_dump, "==> get_loop_niters:" );
2340 print_generic_expr (vect_dump, *number_of_iterations, TDF_SLIM);
2341 }
2342 }
2343
2344 return get_loop_exit_condition (loop);
2345 }
2346
2347
2348 /* Function vect_analyze_loop_form.
2349
2350 Verify the following restrictions (some may be relaxed in the future):
2351 - it's an inner-most loop
2352 - number of BBs = 2 (which are the loop header and the latch)
2353 - the loop has a pre-header
2354 - the loop has a single entry and exit
2355 - the loop exit condition is simple enough, and the number of iterations
2356 can be analyzed (a countable loop). */
2357
2358 static loop_vec_info
2359 vect_analyze_loop_form (struct loop *loop)
2360 {
2361 loop_vec_info loop_vinfo;
2362 tree loop_cond;
2363 tree number_of_iterations = NULL;
2364 LOC loop_loc;
2365
2366 loop_loc = find_loop_location (loop);
2367
2368 if (vect_print_dump_info (REPORT_DETAILS, loop_loc))
2369 fprintf (vect_dump, "=== vect_analyze_loop_form ===");
2370
2371 if (loop->inner)
2372 {
2373 if (vect_print_dump_info (REPORT_OUTER_LOOPS, loop_loc))
2374 fprintf (vect_dump, "not vectorized: nested loop.");
2375 return NULL;
2376 }
2377
2378 if (!loop->single_exit
2379 || loop->num_nodes != 2
2380 || EDGE_COUNT (loop->header->preds) != 2)
2381 {
2382 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS, loop_loc))
2383 {
2384 if (!loop->single_exit)
2385 fprintf (vect_dump, "not vectorized: multiple exits.");
2386 else if (loop->num_nodes != 2)
2387 fprintf (vect_dump, "not vectorized: too many BBs in loop.");
2388 else if (EDGE_COUNT (loop->header->preds) != 2)
2389 fprintf (vect_dump, "not vectorized: too many incoming edges.");
2390 }
2391
2392 return NULL;
2393 }
2394
2395 /* We assume that the loop exit condition is at the end of the loop. i.e,
2396 that the loop is represented as a do-while (with a proper if-guard
2397 before the loop if needed), where the loop header contains all the
2398 executable statements, and the latch is empty. */
2399 if (!empty_block_p (loop->latch))
2400 {
2401 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS, loop_loc))
2402 fprintf (vect_dump, "not vectorized: unexpectd loop form.");
2403 return NULL;
2404 }
2405
2406 /* Make sure there exists a single-predecessor exit bb: */
2407 if (!single_pred_p (loop->single_exit->dest))
2408 {
2409 edge e = loop->single_exit;
2410 if (!(e->flags & EDGE_ABNORMAL))
2411 {
2412 loop_split_edge_with (e, NULL);
2413 if (vect_print_dump_info (REPORT_DETAILS, loop_loc))
2414 fprintf (vect_dump, "split exit edge.");
2415 }
2416 else
2417 {
2418 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS, loop_loc))
2419 fprintf (vect_dump, "not vectorized: abnormal loop exit edge.");
2420 return NULL;
2421 }
2422 }
2423
2424 if (empty_block_p (loop->header))
2425 {
2426 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS, loop_loc))
2427 fprintf (vect_dump, "not vectorized: empty loop.");
2428 return NULL;
2429 }
2430
2431 loop_cond = vect_get_loop_niters (loop, &number_of_iterations);
2432 if (!loop_cond)
2433 {
2434 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS, loop_loc))
2435 fprintf (vect_dump, "not vectorized: complicated exit condition.");
2436 return NULL;
2437 }
2438
2439 if (!number_of_iterations)
2440 {
2441 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS, loop_loc))
2442 fprintf (vect_dump,
2443 "not vectorized: number of iterations cannot be computed.");
2444 return NULL;
2445 }
2446
2447 if (chrec_contains_undetermined (number_of_iterations))
2448 {
2449 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS, loop_loc))
2450 fprintf (vect_dump, "Infinite number of iterations.");
2451 return false;
2452 }
2453
2454 loop_vinfo = new_loop_vec_info (loop);
2455 LOOP_VINFO_NITERS (loop_vinfo) = number_of_iterations;
2456
2457 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
2458 {
2459 if (vect_print_dump_info (REPORT_DETAILS, loop_loc))
2460 {
2461 fprintf (vect_dump, "Symbolic number of iterations is ");
2462 print_generic_expr (vect_dump, number_of_iterations, TDF_DETAILS);
2463 }
2464 }
2465 else
2466 if (LOOP_VINFO_INT_NITERS (loop_vinfo) == 0)
2467 {
2468 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS, loop_loc))
2469 fprintf (vect_dump, "not vectorized: number of iterations = 0.");
2470 return NULL;
2471 }
2472
2473 LOOP_VINFO_EXIT_COND (loop_vinfo) = loop_cond;
2474 LOOP_VINFO_LOC (loop_vinfo) = loop_loc;
2475
2476 return loop_vinfo;
2477 }
2478
2479
2480 /* Function vect_analyze_loop.
2481
2482 Apply a set of analyses on LOOP, and create a loop_vec_info struct
2483 for it. The different analyses will record information in the
2484 loop_vec_info struct. */
2485 loop_vec_info
2486 vect_analyze_loop (struct loop *loop)
2487 {
2488 bool ok;
2489 loop_vec_info loop_vinfo;
2490
2491 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2492 fprintf (vect_dump, "===== analyze_loop_nest =====");
2493
2494 /* Check the CFG characteristics of the loop (nesting, entry/exit, etc. */
2495
2496 loop_vinfo = vect_analyze_loop_form (loop);
2497 if (!loop_vinfo)
2498 {
2499 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2500 fprintf (vect_dump, "bad loop form.");
2501 return NULL;
2502 }
2503
2504 /* Find all data references in the loop (which correspond to vdefs/vuses)
2505 and analyze their evolution in the loop.
2506
2507 FORNOW: Handle only simple, array references, which
2508 alignment can be forced, and aligned pointer-references. */
2509
2510 ok = vect_analyze_data_refs (loop_vinfo);
2511 if (!ok)
2512 {
2513 if (vect_print_dump_info (REPORT_DETAILS, LOOP_LOC (loop_vinfo)))
2514 fprintf (vect_dump, "bad data references.");
2515 destroy_loop_vec_info (loop_vinfo);
2516 return NULL;
2517 }
2518
2519 /* Data-flow analysis to detect stmts that do not need to be vectorized. */
2520
2521 ok = vect_mark_stmts_to_be_vectorized (loop_vinfo);
2522 if (!ok)
2523 {
2524 if (vect_print_dump_info (REPORT_DETAILS, LOOP_LOC (loop_vinfo)))
2525 fprintf (vect_dump, "unexpected pattern.");
2526 destroy_loop_vec_info (loop_vinfo);
2527 return NULL;
2528 }
2529
2530 /* Check that all cross-iteration scalar data-flow cycles are OK.
2531 Cross-iteration cycles caused by virtual phis are analyzed separately. */
2532
2533 ok = vect_analyze_scalar_cycles (loop_vinfo);
2534 if (!ok)
2535 {
2536 if (vect_print_dump_info (REPORT_DETAILS, LOOP_LOC (loop_vinfo)))
2537 fprintf (vect_dump, "bad scalar cycle.");
2538 destroy_loop_vec_info (loop_vinfo);
2539 return NULL;
2540 }
2541
2542 /* Analyze data dependences between the data-refs in the loop.
2543 FORNOW: fail at the first data dependence that we encounter. */
2544
2545 ok = vect_analyze_data_ref_dependences (loop_vinfo);
2546 if (!ok)
2547 {
2548 if (vect_print_dump_info (REPORT_DETAILS, LOOP_LOC (loop_vinfo)))
2549 fprintf (vect_dump, "bad data dependence.");
2550 destroy_loop_vec_info (loop_vinfo);
2551 return NULL;
2552 }
2553
2554 /* Analyze the access patterns of the data-refs in the loop (consecutive,
2555 complex, etc.). FORNOW: Only handle consecutive access pattern. */
2556
2557 ok = vect_analyze_data_ref_accesses (loop_vinfo);
2558 if (!ok)
2559 {
2560 if (vect_print_dump_info (REPORT_DETAILS, LOOP_LOC (loop_vinfo)))
2561 fprintf (vect_dump, "bad data access.");
2562 destroy_loop_vec_info (loop_vinfo);
2563 return NULL;
2564 }
2565
2566 ok = vect_determine_vectorization_factor (loop_vinfo);
2567 if (!ok)
2568 {
2569 if (vect_print_dump_info (REPORT_DETAILS, LOOP_LOC (loop_vinfo)))
2570 fprintf (vect_dump, "can't determine vectorization factor.");
2571 destroy_loop_vec_info (loop_vinfo);
2572 return NULL;
2573 }
2574
2575 /* Analyze the alignment of the data-refs in the loop.
2576 FORNOW: Only aligned accesses are handled. */
2577
2578 ok = vect_analyze_data_refs_alignment (loop_vinfo);
2579 if (!ok)
2580 {
2581 if (vect_print_dump_info (REPORT_DETAILS, LOOP_LOC (loop_vinfo)))
2582 fprintf (vect_dump, "bad data alignment.");
2583 destroy_loop_vec_info (loop_vinfo);
2584 return NULL;
2585 }
2586
2587 /* Scan all the operations in the loop and make sure they are
2588 vectorizable. */
2589
2590 ok = vect_analyze_operations (loop_vinfo);
2591 if (!ok)
2592 {
2593 if (vect_print_dump_info (REPORT_DETAILS, LOOP_LOC (loop_vinfo)))
2594 fprintf (vect_dump, "bad operation or unsupported loop bound.");
2595 destroy_loop_vec_info (loop_vinfo);
2596 return NULL;
2597 }
2598
2599 LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1;
2600
2601 return loop_vinfo;
2602 }