re PR tree-optimization/22236 (wrong code for casts and scev)
[gcc.git] / gcc / tree-vect-analyze.c
1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2003,2004,2005 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
10 version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to the Free
19 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
20 02110-1301, USA. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "ggc.h"
27 #include "tree.h"
28 #include "basic-block.h"
29 #include "diagnostic.h"
30 #include "tree-flow.h"
31 #include "tree-dump.h"
32 #include "timevar.h"
33 #include "cfgloop.h"
34 #include "expr.h"
35 #include "optabs.h"
36 #include "tree-chrec.h"
37 #include "tree-data-ref.h"
38 #include "tree-scalar-evolution.h"
39 #include "tree-vectorizer.h"
40
41 /* Main analysis functions. */
42 static loop_vec_info vect_analyze_loop_form (struct loop *);
43 static bool vect_analyze_data_refs (loop_vec_info);
44 static bool vect_mark_stmts_to_be_vectorized (loop_vec_info);
45 static void vect_analyze_scalar_cycles (loop_vec_info);
46 static bool vect_analyze_data_ref_accesses (loop_vec_info);
47 static bool vect_analyze_data_ref_dependences (loop_vec_info);
48 static bool vect_analyze_data_refs_alignment (loop_vec_info);
49 static bool vect_compute_data_refs_alignment (loop_vec_info);
50 static void vect_enhance_data_refs_alignment (loop_vec_info);
51 static bool vect_analyze_operations (loop_vec_info);
52 static bool vect_determine_vectorization_factor (loop_vec_info);
53
54 /* Utility functions for the analyses. */
55 static bool exist_non_indexing_operands_for_use_p (tree, tree);
56 static void vect_mark_relevant (VEC(tree,heap) **, tree, bool, bool);
57 static bool vect_stmt_relevant_p (tree, loop_vec_info, bool *, bool *);
58 static tree vect_get_loop_niters (struct loop *, tree *);
59 static bool vect_analyze_data_ref_dependence
60 (struct data_dependence_relation *, loop_vec_info);
61 static bool vect_compute_data_ref_alignment (struct data_reference *);
62 static bool vect_analyze_data_ref_access (struct data_reference *);
63 static bool vect_can_advance_ivs_p (loop_vec_info);
64
65 /* Function vect_determine_vectorization_factor
66
67 Determine the vectorization factor (VF). VF is the number of data elements
68 that are operated upon in parallel in a single iteration of the vectorized
69 loop. For example, when vectorizing a loop that operates on 4byte elements,
70 on a target with vector size (VS) 16byte, the VF is set to 4, since 4
71 elements can fit in a single vector register.
72
73 We currently support vectorization of loops in which all types operated upon
74 are of the same size. Therefore this function currently sets VF according to
75 the size of the types operated upon, and fails if there are multiple sizes
76 in the loop.
77
78 VF is also the factor by which the loop iterations are strip-mined, e.g.:
79 original loop:
80 for (i=0; i<N; i++){
81 a[i] = b[i] + c[i];
82 }
83
84 vectorized loop:
85 for (i=0; i<N; i+=VF){
86 a[i:VF] = b[i:VF] + c[i:VF];
87 }
88 */
89
90 static bool
91 vect_determine_vectorization_factor (loop_vec_info loop_vinfo)
92 {
93 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
94 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
95 int nbbs = loop->num_nodes;
96 block_stmt_iterator si;
97 unsigned int vectorization_factor = 0;
98 int i;
99 tree scalar_type;
100
101 if (vect_print_dump_info (REPORT_DETAILS))
102 fprintf (vect_dump, "=== vect_determine_vectorization_factor ===");
103
104 for (i = 0; i < nbbs; i++)
105 {
106 basic_block bb = bbs[i];
107
108 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
109 {
110 tree stmt = bsi_stmt (si);
111 unsigned int nunits;
112 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
113 tree vectype;
114
115 if (vect_print_dump_info (REPORT_DETAILS))
116 {
117 fprintf (vect_dump, "==> examining statement: ");
118 print_generic_expr (vect_dump, stmt, TDF_SLIM);
119 }
120
121 gcc_assert (stmt_info);
122 /* skip stmts which do not need to be vectorized. */
123 if (!STMT_VINFO_RELEVANT_P (stmt_info)
124 && !STMT_VINFO_LIVE_P (stmt_info))
125 {
126 if (vect_print_dump_info (REPORT_DETAILS))
127 fprintf (vect_dump, "skip.");
128 continue;
129 }
130
131 if (VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt))))
132 {
133 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
134 {
135 fprintf (vect_dump, "not vectorized: vector stmt in loop:");
136 print_generic_expr (vect_dump, stmt, TDF_SLIM);
137 }
138 return false;
139 }
140
141 if (STMT_VINFO_DATA_REF (stmt_info))
142 scalar_type = TREE_TYPE (DR_REF (STMT_VINFO_DATA_REF (stmt_info)));
143 else if (TREE_CODE (stmt) == MODIFY_EXPR)
144 scalar_type = TREE_TYPE (TREE_OPERAND (stmt, 0));
145 else
146 scalar_type = TREE_TYPE (stmt);
147
148 if (vect_print_dump_info (REPORT_DETAILS))
149 {
150 fprintf (vect_dump, "get vectype for scalar type: ");
151 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
152 }
153
154 vectype = get_vectype_for_scalar_type (scalar_type);
155 if (!vectype)
156 {
157 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
158 {
159 fprintf (vect_dump, "not vectorized: unsupported data-type ");
160 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
161 }
162 return false;
163 }
164 if (vect_print_dump_info (REPORT_DETAILS))
165 {
166 fprintf (vect_dump, "vectype: ");
167 print_generic_expr (vect_dump, vectype, TDF_SLIM);
168 }
169 STMT_VINFO_VECTYPE (stmt_info) = vectype;
170
171 nunits = TYPE_VECTOR_SUBPARTS (vectype);
172 if (vect_print_dump_info (REPORT_DETAILS))
173 fprintf (vect_dump, "nunits = %d", nunits);
174
175 if (vectorization_factor)
176 {
177 /* FORNOW: don't allow mixed units.
178 This restriction will be relaxed in the future. */
179 if (nunits != vectorization_factor)
180 {
181 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
182 fprintf (vect_dump, "not vectorized: mixed data-types");
183 return false;
184 }
185 }
186 else
187 vectorization_factor = nunits;
188
189 gcc_assert (GET_MODE_SIZE (TYPE_MODE (scalar_type))
190 * vectorization_factor == UNITS_PER_SIMD_WORD);
191 }
192 }
193
194 /* TODO: Analyze cost. Decide if worth while to vectorize. */
195
196 if (vectorization_factor <= 1)
197 {
198 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
199 fprintf (vect_dump, "not vectorized: unsupported data-type");
200 return false;
201 }
202 LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
203
204 return true;
205 }
206
207
208 /* Function vect_analyze_operations.
209
210 Scan the loop stmts and make sure they are all vectorizable. */
211
212 static bool
213 vect_analyze_operations (loop_vec_info loop_vinfo)
214 {
215 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
216 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
217 int nbbs = loop->num_nodes;
218 block_stmt_iterator si;
219 unsigned int vectorization_factor = 0;
220 int i;
221 bool ok;
222 tree phi;
223 stmt_vec_info stmt_info;
224 bool need_to_vectorize = false;
225
226 if (vect_print_dump_info (REPORT_DETAILS))
227 fprintf (vect_dump, "=== vect_analyze_operations ===");
228
229 gcc_assert (LOOP_VINFO_VECT_FACTOR (loop_vinfo));
230 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
231
232 for (i = 0; i < nbbs; i++)
233 {
234 basic_block bb = bbs[i];
235
236 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
237 {
238 stmt_info = vinfo_for_stmt (phi);
239 if (vect_print_dump_info (REPORT_DETAILS))
240 {
241 fprintf (vect_dump, "examining phi: ");
242 print_generic_expr (vect_dump, phi, TDF_SLIM);
243 }
244
245 gcc_assert (stmt_info);
246
247 if (STMT_VINFO_LIVE_P (stmt_info))
248 {
249 /* FORNOW: not yet supported. */
250 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
251 fprintf (vect_dump, "not vectorized: value used after loop.");
252 return false;
253 }
254
255 if (STMT_VINFO_RELEVANT_P (stmt_info))
256 {
257 /* Most likely a reduction-like computation that is used
258 in the loop. */
259 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
260 fprintf (vect_dump, "not vectorized: unsupported pattern.");
261 return false;
262 }
263 }
264
265 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
266 {
267 tree stmt = bsi_stmt (si);
268 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
269
270 if (vect_print_dump_info (REPORT_DETAILS))
271 {
272 fprintf (vect_dump, "==> examining statement: ");
273 print_generic_expr (vect_dump, stmt, TDF_SLIM);
274 }
275
276 gcc_assert (stmt_info);
277
278 /* skip stmts which do not need to be vectorized.
279 this is expected to include:
280 - the COND_EXPR which is the loop exit condition
281 - any LABEL_EXPRs in the loop
282 - computations that are used only for array indexing or loop
283 control */
284
285 if (!STMT_VINFO_RELEVANT_P (stmt_info)
286 && !STMT_VINFO_LIVE_P (stmt_info))
287 {
288 if (vect_print_dump_info (REPORT_DETAILS))
289 fprintf (vect_dump, "irrelevant.");
290 continue;
291 }
292
293 if (STMT_VINFO_RELEVANT_P (stmt_info))
294 {
295 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt))));
296 gcc_assert (STMT_VINFO_VECTYPE (stmt_info));
297
298 ok = (vectorizable_operation (stmt, NULL, NULL)
299 || vectorizable_assignment (stmt, NULL, NULL)
300 || vectorizable_load (stmt, NULL, NULL)
301 || vectorizable_store (stmt, NULL, NULL)
302 || vectorizable_condition (stmt, NULL, NULL));
303
304 if (!ok)
305 {
306 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
307 {
308 fprintf (vect_dump,
309 "not vectorized: relevant stmt not supported: ");
310 print_generic_expr (vect_dump, stmt, TDF_SLIM);
311 }
312 return false;
313 }
314 need_to_vectorize = true;
315 }
316
317 if (STMT_VINFO_LIVE_P (stmt_info))
318 {
319 ok = vectorizable_reduction (stmt, NULL, NULL);
320
321 if (ok)
322 need_to_vectorize = true;
323 else
324 ok = vectorizable_live_operation (stmt, NULL, NULL);
325
326 if (!ok)
327 {
328 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
329 {
330 fprintf (vect_dump,
331 "not vectorized: live stmt not supported: ");
332 print_generic_expr (vect_dump, stmt, TDF_SLIM);
333 }
334 return false;
335 }
336 }
337 } /* stmts in bb */
338 } /* bbs */
339
340 /* TODO: Analyze cost. Decide if worth while to vectorize. */
341
342 /* All operations in the loop are either irrelevant (deal with loop
343 control, or dead), or only used outside the loop and can be moved
344 out of the loop (e.g. invariants, inductions). The loop can be
345 optimized away by scalar optimizations. We're better off not
346 touching this loop. */
347 if (!need_to_vectorize)
348 {
349 if (vect_print_dump_info (REPORT_DETAILS))
350 fprintf (vect_dump,
351 "All the computation can be taken out of the loop.");
352 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
353 fprintf (vect_dump,
354 "not vectorized: redundant loop. no profit to vectorize.");
355 return false;
356 }
357
358 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
359 && vect_print_dump_info (REPORT_DETAILS))
360 fprintf (vect_dump,
361 "vectorization_factor = %d, niters = " HOST_WIDE_INT_PRINT_DEC,
362 vectorization_factor, LOOP_VINFO_INT_NITERS (loop_vinfo));
363
364 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
365 && LOOP_VINFO_INT_NITERS (loop_vinfo) < vectorization_factor)
366 {
367 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
368 fprintf (vect_dump, "not vectorized: iteration count too small.");
369 return false;
370 }
371
372 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
373 || LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0)
374 {
375 if (vect_print_dump_info (REPORT_DETAILS))
376 fprintf (vect_dump, "epilog loop required.");
377 if (!vect_can_advance_ivs_p (loop_vinfo))
378 {
379 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
380 fprintf (vect_dump,
381 "not vectorized: can't create epilog loop 1.");
382 return false;
383 }
384 if (!slpeel_can_duplicate_loop_p (loop, loop->single_exit))
385 {
386 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
387 fprintf (vect_dump,
388 "not vectorized: can't create epilog loop 2.");
389 return false;
390 }
391 }
392
393 return true;
394 }
395
396
397 /* Function exist_non_indexing_operands_for_use_p
398
399 USE is one of the uses attached to STMT. Check if USE is
400 used in STMT for anything other than indexing an array. */
401
402 static bool
403 exist_non_indexing_operands_for_use_p (tree use, tree stmt)
404 {
405 tree operand;
406 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
407
408 /* USE corresponds to some operand in STMT. If there is no data
409 reference in STMT, then any operand that corresponds to USE
410 is not indexing an array. */
411 if (!STMT_VINFO_DATA_REF (stmt_info))
412 return true;
413
414 /* STMT has a data_ref. FORNOW this means that its of one of
415 the following forms:
416 -1- ARRAY_REF = var
417 -2- var = ARRAY_REF
418 (This should have been verified in analyze_data_refs).
419
420 'var' in the second case corresponds to a def, not a use,
421 so USE cannot correspond to any operands that are not used
422 for array indexing.
423
424 Therefore, all we need to check is if STMT falls into the
425 first case, and whether var corresponds to USE. */
426
427 if (TREE_CODE (TREE_OPERAND (stmt, 0)) == SSA_NAME)
428 return false;
429
430 operand = TREE_OPERAND (stmt, 1);
431
432 if (TREE_CODE (operand) != SSA_NAME)
433 return false;
434
435 if (operand == use)
436 return true;
437
438 return false;
439 }
440
441
442 /* Function vect_analyze_scalar_cycles.
443
444 Examine the cross iteration def-use cycles of scalar variables, by
445 analyzing the loop (scalar) PHIs; Classify each cycle as one of the
446 following: invariant, induction, reduction, unknown.
447
448 Some forms of scalar cycles are not yet supported.
449
450 Example1: reduction: (unsupported yet)
451
452 loop1:
453 for (i=0; i<N; i++)
454 sum += a[i];
455
456 Example2: induction: (unsupported yet)
457
458 loop2:
459 for (i=0; i<N; i++)
460 a[i] = i;
461
462 Note: the following loop *is* vectorizable:
463
464 loop3:
465 for (i=0; i<N; i++)
466 a[i] = b[i];
467
468 even though it has a def-use cycle caused by the induction variable i:
469
470 loop: i_2 = PHI (i_0, i_1)
471 a[i_2] = ...;
472 i_1 = i_2 + 1;
473 GOTO loop;
474
475 because the def-use cycle in loop3 is considered "not relevant" - i.e.,
476 it does not need to be vectorized because it is only used for array
477 indexing (see 'mark_stmts_to_be_vectorized'). The def-use cycle in
478 loop2 on the other hand is relevant (it is being written to memory).
479 */
480
481 static void
482 vect_analyze_scalar_cycles (loop_vec_info loop_vinfo)
483 {
484 tree phi;
485 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
486 basic_block bb = loop->header;
487 tree dummy;
488
489 if (vect_print_dump_info (REPORT_DETAILS))
490 fprintf (vect_dump, "=== vect_analyze_scalar_cycles ===");
491
492 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
493 {
494 tree access_fn = NULL;
495 tree def = PHI_RESULT (phi);
496 stmt_vec_info stmt_vinfo = vinfo_for_stmt (phi);
497 tree reduc_stmt;
498
499 if (vect_print_dump_info (REPORT_DETAILS))
500 {
501 fprintf (vect_dump, "Analyze phi: ");
502 print_generic_expr (vect_dump, phi, TDF_SLIM);
503 }
504
505 /* Skip virtual phi's. The data dependences that are associated with
506 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
507
508 if (!is_gimple_reg (SSA_NAME_VAR (def)))
509 {
510 if (vect_print_dump_info (REPORT_DETAILS))
511 fprintf (vect_dump, "virtual phi. skip.");
512 continue;
513 }
514
515 STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_unknown_def_type;
516
517 /* Analyze the evolution function. */
518
519 access_fn = analyze_scalar_evolution (loop, def);
520
521 if (!access_fn)
522 continue;
523
524 if (vect_print_dump_info (REPORT_DETAILS))
525 {
526 fprintf (vect_dump, "Access function of PHI: ");
527 print_generic_expr (vect_dump, access_fn, TDF_SLIM);
528 }
529
530 if (vect_is_simple_iv_evolution (loop->num, access_fn, &dummy, &dummy))
531 {
532 if (vect_print_dump_info (REPORT_DETAILS))
533 fprintf (vect_dump, "Detected induction.");
534 STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_induction_def;
535 continue;
536 }
537
538 /* TODO: handle invariant phis */
539
540 reduc_stmt = vect_is_simple_reduction (loop, phi);
541 if (reduc_stmt)
542 {
543 if (vect_print_dump_info (REPORT_DETAILS))
544 fprintf (vect_dump, "Detected reduction.");
545 STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_reduction_def;
546 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (reduc_stmt)) =
547 vect_reduction_def;
548 }
549 else
550 if (vect_print_dump_info (REPORT_DETAILS))
551 fprintf (vect_dump, "Unknown def-use cycle pattern.");
552
553 }
554
555 return;
556 }
557
558
559 /* Function vect_analyze_data_ref_dependence.
560
561 Return TRUE if there (might) exist a dependence between a memory-reference
562 DRA and a memory-reference DRB. */
563
564 static bool
565 vect_analyze_data_ref_dependence (struct data_dependence_relation *ddr,
566 loop_vec_info loop_vinfo)
567 {
568 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
569 int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
570 int dist = 0;
571 unsigned int loop_depth = 0;
572 struct loop *loop_nest = loop;
573 struct data_reference *dra = DDR_A (ddr);
574 struct data_reference *drb = DDR_B (ddr);
575 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
576 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
577
578 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
579 return false;
580
581 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
582 {
583 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
584 {
585 fprintf (vect_dump,
586 "not vectorized: can't determine dependence between ");
587 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
588 fprintf (vect_dump, " and ");
589 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
590 }
591 return true;
592 }
593
594 if (!DDR_DIST_VECT (ddr))
595 {
596 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
597 {
598 fprintf (vect_dump, "not vectorized: bad dist vector for ");
599 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
600 fprintf (vect_dump, " and ");
601 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
602 }
603 return true;
604 }
605
606 /* Find loop depth. */
607 while (loop_nest && loop_nest->outer && loop_nest->outer->outer)
608 {
609 loop_nest = loop_nest->outer;
610 loop_depth++;
611 }
612
613 dist = DDR_DIST_VECT (ddr)[loop_depth];
614 if (vect_print_dump_info (REPORT_DR_DETAILS))
615 fprintf (vect_dump, "dependence distance = %d.",dist);
616
617 /* Same loop iteration. */
618 if (dist % vectorization_factor == 0)
619 {
620 /* Two references with distance zero have the same alignment. */
621 VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_a), drb);
622 VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_b), dra);
623 if (vect_print_dump_info (REPORT_ALIGNMENT))
624 fprintf (vect_dump, "accesses have the same alignment.");
625 if (vect_print_dump_info (REPORT_DR_DETAILS))
626 {
627 fprintf (vect_dump, "dependence distance modulo vf == 0 between ");
628 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
629 fprintf (vect_dump, " and ");
630 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
631 }
632 return false;
633 }
634
635 if (abs (dist) >= vectorization_factor)
636 {
637 /* Dependence distance does not create dependence, as far as vectorization
638 is concerned, in this case. */
639 if (vect_print_dump_info (REPORT_DR_DETAILS))
640 fprintf (vect_dump, "dependence distance >= VF.");
641 return false;
642 }
643
644 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
645 {
646 fprintf (vect_dump,
647 "not vectorized: possible dependence between data-refs ");
648 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
649 fprintf (vect_dump, " and ");
650 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
651 }
652
653 return true;
654 }
655
656
657 /* Function vect_analyze_data_ref_dependences.
658
659 Examine all the data references in the loop, and make sure there do not
660 exist any data dependences between them. */
661
662 static bool
663 vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo)
664 {
665 unsigned int i;
666 varray_type ddrs = LOOP_VINFO_DDRS (loop_vinfo);
667
668 if (vect_print_dump_info (REPORT_DETAILS))
669 fprintf (vect_dump, "=== vect_analyze_dependences ===");
670
671 for (i = 0; i < VARRAY_ACTIVE_SIZE (ddrs); i++)
672 {
673 struct data_dependence_relation *ddr = VARRAY_GENERIC_PTR (ddrs, i);
674
675 if (vect_analyze_data_ref_dependence (ddr, loop_vinfo))
676 return false;
677 }
678
679 return true;
680 }
681
682
683 /* Function vect_compute_data_ref_alignment
684
685 Compute the misalignment of the data reference DR.
686
687 Output:
688 1. If during the misalignment computation it is found that the data reference
689 cannot be vectorized then false is returned.
690 2. DR_MISALIGNMENT (DR) is defined.
691
692 FOR NOW: No analysis is actually performed. Misalignment is calculated
693 only for trivial cases. TODO. */
694
695 static bool
696 vect_compute_data_ref_alignment (struct data_reference *dr)
697 {
698 tree stmt = DR_STMT (dr);
699 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
700 tree ref = DR_REF (dr);
701 tree vectype;
702 tree base, base_addr;
703 bool base_aligned;
704 tree misalign;
705 tree aligned_to, alignment;
706
707 if (vect_print_dump_info (REPORT_DETAILS))
708 fprintf (vect_dump, "vect_compute_data_ref_alignment:");
709
710 /* Initialize misalignment to unknown. */
711 DR_MISALIGNMENT (dr) = -1;
712
713 misalign = DR_OFFSET_MISALIGNMENT (dr);
714 aligned_to = DR_ALIGNED_TO (dr);
715 base_addr = DR_BASE_ADDRESS (dr);
716 base = build_fold_indirect_ref (base_addr);
717 vectype = STMT_VINFO_VECTYPE (stmt_info);
718 alignment = ssize_int (TYPE_ALIGN (vectype)/BITS_PER_UNIT);
719
720 if ((aligned_to && tree_int_cst_compare (aligned_to, alignment) < 0)
721 || !misalign)
722 {
723 if (vect_print_dump_info (REPORT_DETAILS))
724 {
725 fprintf (vect_dump, "Unknown alignment for access: ");
726 print_generic_expr (vect_dump, base, TDF_SLIM);
727 }
728 return true;
729 }
730
731 if ((DECL_P (base)
732 && tree_int_cst_compare (ssize_int (DECL_ALIGN_UNIT (base)),
733 alignment) >= 0)
734 || (TREE_CODE (base_addr) == SSA_NAME
735 && tree_int_cst_compare (ssize_int (TYPE_ALIGN_UNIT (TREE_TYPE (
736 TREE_TYPE (base_addr)))),
737 alignment) >= 0))
738 base_aligned = true;
739 else
740 base_aligned = false;
741
742 if (!base_aligned)
743 {
744 if (!vect_can_force_dr_alignment_p (base, TYPE_ALIGN (vectype)))
745 {
746 if (vect_print_dump_info (REPORT_DETAILS))
747 {
748 fprintf (vect_dump, "can't force alignment of ref: ");
749 print_generic_expr (vect_dump, ref, TDF_SLIM);
750 }
751 return true;
752 }
753
754 /* Force the alignment of the decl.
755 NOTE: This is the only change to the code we make during
756 the analysis phase, before deciding to vectorize the loop. */
757 if (vect_print_dump_info (REPORT_DETAILS))
758 fprintf (vect_dump, "force alignment");
759 DECL_ALIGN (base) = TYPE_ALIGN (vectype);
760 DECL_USER_ALIGN (base) = 1;
761 }
762
763 /* At this point we assume that the base is aligned. */
764 gcc_assert (base_aligned
765 || (TREE_CODE (base) == VAR_DECL
766 && DECL_ALIGN (base) >= TYPE_ALIGN (vectype)));
767
768 /* Modulo alignment. */
769 misalign = size_binop (TRUNC_MOD_EXPR, misalign, alignment);
770
771 if (tree_int_cst_sgn (misalign) < 0)
772 {
773 /* Negative misalignment value. */
774 if (vect_print_dump_info (REPORT_DETAILS))
775 fprintf (vect_dump, "unexpected misalign value");
776 return false;
777 }
778
779 DR_MISALIGNMENT (dr) = tree_low_cst (misalign, 1);
780
781 if (vect_print_dump_info (REPORT_DETAILS))
782 {
783 fprintf (vect_dump, "misalign = %d bytes of ref ", DR_MISALIGNMENT (dr));
784 print_generic_expr (vect_dump, ref, TDF_SLIM);
785 }
786
787 return true;
788 }
789
790
791 /* Function vect_compute_data_refs_alignment
792
793 Compute the misalignment of data references in the loop.
794 This pass may take place at function granularity instead of at loop
795 granularity.
796
797 FOR NOW: No analysis is actually performed. Misalignment is calculated
798 only for trivial cases. TODO. */
799
800 static bool
801 vect_compute_data_refs_alignment (loop_vec_info loop_vinfo)
802 {
803 varray_type datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
804 unsigned int i;
805
806 for (i = 0; i < VARRAY_ACTIVE_SIZE (datarefs); i++)
807 {
808 struct data_reference *dr = VARRAY_GENERIC_PTR (datarefs, i);
809 if (!vect_compute_data_ref_alignment (dr))
810 return false;
811 }
812
813 return true;
814 }
815
816
817 /* Function vect_enhance_data_refs_alignment
818
819 This pass will use loop versioning and loop peeling in order to enhance
820 the alignment of data references in the loop.
821
822 FOR NOW: we assume that whatever versioning/peeling takes place, only the
823 original loop is to be vectorized; Any other loops that are created by
824 the transformations performed in this pass - are not supposed to be
825 vectorized. This restriction will be relaxed. */
826
827 static void
828 vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
829 {
830 varray_type loop_datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
831 varray_type datarefs;
832 VEC(dr_p,heap) *same_align_drs;
833 struct data_reference *dr0 = NULL;
834 struct data_reference *dr;
835 unsigned int i, j;
836 bool check_loads;
837
838 /*
839 This pass will require a cost model to guide it whether to apply peeling
840 or versioning or a combination of the two. For example, the scheme that
841 intel uses when given a loop with several memory accesses, is as follows:
842 choose one memory access ('p') which alignment you want to force by doing
843 peeling. Then, either (1) generate a loop in which 'p' is aligned and all
844 other accesses are not necessarily aligned, or (2) use loop versioning to
845 generate one loop in which all accesses are aligned, and another loop in
846 which only 'p' is necessarily aligned.
847
848 ("Automatic Intra-Register Vectorization for the Intel Architecture",
849 Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
850 Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
851
852 Devising a cost model is the most critical aspect of this work. It will
853 guide us on which access to peel for, whether to use loop versioning, how
854 many versions to create, etc. The cost model will probably consist of
855 generic considerations as well as target specific considerations (on
856 powerpc for example, misaligned stores are more painful than misaligned
857 loads).
858
859 Here is the general steps involved in alignment enhancements:
860
861 -- original loop, before alignment analysis:
862 for (i=0; i<N; i++){
863 x = q[i]; # DR_MISALIGNMENT(q) = unknown
864 p[i] = y; # DR_MISALIGNMENT(p) = unknown
865 }
866
867 -- After vect_compute_data_refs_alignment:
868 for (i=0; i<N; i++){
869 x = q[i]; # DR_MISALIGNMENT(q) = 3
870 p[i] = y; # DR_MISALIGNMENT(p) = unknown
871 }
872
873 -- Possibility 1: we do loop versioning:
874 if (p is aligned) {
875 for (i=0; i<N; i++){ # loop 1A
876 x = q[i]; # DR_MISALIGNMENT(q) = 3
877 p[i] = y; # DR_MISALIGNMENT(p) = 0
878 }
879 }
880 else {
881 for (i=0; i<N; i++){ # loop 1B
882 x = q[i]; # DR_MISALIGNMENT(q) = 3
883 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
884 }
885 }
886
887 -- Possibility 2: we do loop peeling:
888 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
889 x = q[i];
890 p[i] = y;
891 }
892 for (i = 3; i < N; i++){ # loop 2A
893 x = q[i]; # DR_MISALIGNMENT(q) = 0
894 p[i] = y; # DR_MISALIGNMENT(p) = unknown
895 }
896
897 -- Possibility 3: combination of loop peeling and versioning:
898 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
899 x = q[i];
900 p[i] = y;
901 }
902 if (p is aligned) {
903 for (i = 3; i<N; i++){ # loop 3A
904 x = q[i]; # DR_MISALIGNMENT(q) = 0
905 p[i] = y; # DR_MISALIGNMENT(p) = 0
906 }
907 }
908 else {
909 for (i = 3; i<N; i++){ # loop 3B
910 x = q[i]; # DR_MISALIGNMENT(q) = 0
911 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
912 }
913 }
914
915 These loops are later passed to loop_transform to be vectorized. The
916 vectorizer will use the alignment information to guide the transformation
917 (whether to generate regular loads/stores, or with special handling for
918 misalignment).
919 */
920
921 /* (1) Peeling to force alignment. */
922
923 /* (1.1) Decide whether to perform peeling, and how many iterations to peel:
924 Considerations:
925 + How many accesses will become aligned due to the peeling
926 - How many accesses will become unaligned due to the peeling,
927 and the cost of misaligned accesses.
928 - The cost of peeling (the extra runtime checks, the increase
929 in code size).
930
931 The scheme we use FORNOW: peel to force the alignment of the first
932 misaligned store in the loop.
933 Rationale: misaligned stores are not yet supported.
934
935 TODO: Use a better cost model. */
936
937 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_datarefs); i++)
938 {
939 dr0 = VARRAY_GENERIC_PTR (loop_datarefs, i);
940 if (!DR_IS_READ (dr0) && !aligned_access_p (dr0))
941 {
942 LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr0;
943 LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) = DR_MISALIGNMENT (dr0);
944 break;
945 }
946 }
947
948 /* (1.2) Update the alignment info according to the peeling factor.
949 If the misalignment of the DR we peel for is M, then the
950 peeling factor is VF - M, and the misalignment of each access DR_i
951 in the loop is DR_MISALIGNMENT (DR_i) + VF - M.
952 If the misalignment of the DR we peel for is unknown, then the
953 misalignment of each access DR_i in the loop is also unknown.
954
955 TODO: - consider accesses that are known to have the same
956 alignment, even if that alignment is unknown. */
957
958 if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo))
959 {
960 int mis;
961 int npeel = 0;
962
963 if (known_alignment_for_access_p (dr0))
964 {
965 /* Since it's known at compile time, compute the number of iterations
966 in the peeled loop (the peeling factor) for use in updating
967 DR_MISALIGNMENT values. The peeling factor is the vectorization
968 factor minus the misalignment as an element count. */
969 mis = DR_MISALIGNMENT (dr0);
970 mis /= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr0))));
971 npeel = LOOP_VINFO_VECT_FACTOR (loop_vinfo) - mis;
972 }
973
974 datarefs = loop_datarefs;
975 check_loads = false;
976 for (j = 0; j < 2; j++)
977 {
978 for (i = 0; i < VARRAY_ACTIVE_SIZE (datarefs); i++)
979 {
980 struct data_reference *dr = VARRAY_GENERIC_PTR (datarefs, i);
981
982 if (dr == dr0 || (!check_loads && DR_IS_READ (dr)))
983 continue;
984 if (known_alignment_for_access_p (dr)
985 && DR_MISALIGNMENT (dr) == DR_MISALIGNMENT (dr0))
986 DR_MISALIGNMENT (dr) = 0;
987 else if (known_alignment_for_access_p (dr)
988 && known_alignment_for_access_p (dr0))
989 {
990 int drsize =
991 GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr))));
992
993 DR_MISALIGNMENT (dr) += npeel * drsize;
994 DR_MISALIGNMENT (dr) %= UNITS_PER_SIMD_WORD;
995 }
996 else
997 DR_MISALIGNMENT (dr) = -1;
998 }
999 check_loads = true;
1000 }
1001
1002 same_align_drs =
1003 STMT_VINFO_SAME_ALIGN_REFS (vinfo_for_stmt (DR_STMT (dr0)));
1004 for (i = 0; VEC_iterate (dr_p, same_align_drs, i, dr); i++)
1005 {
1006 DR_MISALIGNMENT (dr) = 0;
1007 }
1008
1009 DR_MISALIGNMENT (dr0) = 0;
1010 }
1011 }
1012
1013
1014 /* Function vect_analyze_data_refs_alignment
1015
1016 Analyze the alignment of the data-references in the loop.
1017 FOR NOW: Until support for misaligned accesses is in place, only if all
1018 accesses are aligned can the loop be vectorized. This restriction will be
1019 relaxed. */
1020
1021 static bool
1022 vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo)
1023 {
1024 varray_type datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1025 enum dr_alignment_support supportable_dr_alignment;
1026 unsigned int i;
1027
1028 if (vect_print_dump_info (REPORT_DETAILS))
1029 fprintf (vect_dump, "=== vect_analyze_data_refs_alignment ===");
1030
1031
1032 /* This pass may take place at function granularity instead of at loop
1033 granularity. */
1034
1035 if (!vect_compute_data_refs_alignment (loop_vinfo))
1036 {
1037 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1038 fprintf (vect_dump,
1039 "not vectorized: can't calculate alignment for data ref.");
1040 return false;
1041 }
1042
1043
1044 /* This pass will decide on using loop versioning and/or loop peeling in
1045 order to enhance the alignment of data references in the loop. */
1046
1047 vect_enhance_data_refs_alignment (loop_vinfo);
1048
1049
1050 /* Finally, check that all the data references in the loop can be
1051 handled with respect to their alignment. */
1052
1053 for (i = 0; i < VARRAY_ACTIVE_SIZE (datarefs); i++)
1054 {
1055 struct data_reference *dr = VARRAY_GENERIC_PTR (datarefs, i);
1056 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
1057 if (!supportable_dr_alignment)
1058 {
1059 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1060 {
1061 if (DR_IS_READ (dr))
1062 fprintf (vect_dump,
1063 "not vectorized: unsupported unaligned load.");
1064 else
1065 fprintf (vect_dump,
1066 "not vectorized: unsupported unaligned store.");
1067 }
1068 return false;
1069 }
1070 if (supportable_dr_alignment != dr_aligned
1071 && (vect_print_dump_info (REPORT_ALIGNMENT)))
1072 fprintf (vect_dump, "Vectorizing an unaligned access.");
1073 }
1074 if (LOOP_VINFO_UNALIGNED_DR (loop_vinfo)
1075 && vect_print_dump_info (REPORT_ALIGNMENT))
1076 fprintf (vect_dump, "Alignment of access forced using peeling.");
1077 return true;
1078 }
1079
1080
1081 /* Function vect_analyze_data_ref_access.
1082
1083 Analyze the access pattern of the data-reference DR. For now, a data access
1084 has to consecutive to be considered vectorizable. */
1085
1086 static bool
1087 vect_analyze_data_ref_access (struct data_reference *dr)
1088 {
1089 tree step = DR_STEP (dr);
1090 tree scalar_type = TREE_TYPE (DR_REF (dr));
1091
1092 if (!step || tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type)))
1093 {
1094 if (vect_print_dump_info (REPORT_DETAILS))
1095 fprintf (vect_dump, "not consecutive access");
1096 return false;
1097 }
1098 return true;
1099 }
1100
1101
1102 /* Function vect_analyze_data_ref_accesses.
1103
1104 Analyze the access pattern of all the data references in the loop.
1105
1106 FORNOW: the only access pattern that is considered vectorizable is a
1107 simple step 1 (consecutive) access.
1108
1109 FORNOW: handle only arrays and pointer accesses. */
1110
1111 static bool
1112 vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo)
1113 {
1114 unsigned int i;
1115 varray_type datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1116
1117 if (vect_print_dump_info (REPORT_DETAILS))
1118 fprintf (vect_dump, "=== vect_analyze_data_ref_accesses ===");
1119
1120 for (i = 0; i < VARRAY_ACTIVE_SIZE (datarefs); i++)
1121 {
1122 struct data_reference *dr = VARRAY_GENERIC_PTR (datarefs, i);
1123 if (!vect_analyze_data_ref_access (dr))
1124 {
1125 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1126 fprintf (vect_dump, "not vectorized: complicated access pattern.");
1127 return false;
1128 }
1129 }
1130
1131 return true;
1132 }
1133
1134
1135 /* Function vect_analyze_data_refs.
1136
1137 Find all the data references in the loop.
1138
1139 The general structure of the analysis of data refs in the vectorizer is as
1140 follows:
1141 1- vect_analyze_data_refs(loop): call compute_data_dependences_for_loop to
1142 find and analyze all data-refs in the loop and their dependences.
1143 2- vect_analyze_dependences(): apply dependence testing using ddrs.
1144 3- vect_analyze_drs_alignment(): check that ref_stmt.alignment is ok.
1145 4- vect_analyze_drs_access(): check that ref_stmt.step is ok.
1146
1147 */
1148
1149 static bool
1150 vect_analyze_data_refs (loop_vec_info loop_vinfo)
1151 {
1152 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1153 unsigned int i;
1154 varray_type datarefs;
1155 tree scalar_type;
1156
1157 if (vect_print_dump_info (REPORT_DETAILS))
1158 fprintf (vect_dump, "=== vect_analyze_data_refs ===");
1159
1160 compute_data_dependences_for_loop (loop, false,
1161 &(LOOP_VINFO_DATAREFS (loop_vinfo)),
1162 &(LOOP_VINFO_DDRS (loop_vinfo)));
1163
1164 /* Go through the data-refs, check that the analysis succeeded. Update pointer
1165 from stmt_vec_info struct to DR and vectype. */
1166 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1167 for (i = 0; i < VARRAY_ACTIVE_SIZE (datarefs); i++)
1168 {
1169 struct data_reference *dr = VARRAY_GENERIC_PTR (datarefs, i);
1170 tree stmt;
1171 stmt_vec_info stmt_info;
1172
1173 if (!dr || !DR_REF (dr))
1174 {
1175 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1176 fprintf (vect_dump, "not vectorized: unhandled data-ref ");
1177 return false;
1178 }
1179
1180 /* Update DR field in stmt_vec_info struct. */
1181 stmt = DR_STMT (dr);
1182 stmt_info = vinfo_for_stmt (stmt);
1183
1184 if (STMT_VINFO_DATA_REF (stmt_info))
1185 {
1186 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1187 {
1188 fprintf (vect_dump,
1189 "not vectorized: more than one data ref in stmt: ");
1190 print_generic_expr (vect_dump, stmt, TDF_SLIM);
1191 }
1192 return false;
1193 }
1194 STMT_VINFO_DATA_REF (stmt_info) = dr;
1195
1196 /* Check that analysis of the data-ref succeeded. */
1197 if (!DR_BASE_ADDRESS (dr) || !DR_OFFSET (dr) || !DR_INIT (dr)
1198 || !DR_STEP (dr))
1199 {
1200 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1201 {
1202 fprintf (vect_dump, "not vectorized: data ref analysis failed ");
1203 print_generic_expr (vect_dump, stmt, TDF_SLIM);
1204 }
1205 return false;
1206 }
1207 if (!DR_MEMTAG (dr))
1208 {
1209 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1210 {
1211 fprintf (vect_dump, "not vectorized: no memory tag for ");
1212 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
1213 }
1214 return false;
1215 }
1216
1217 /* Set vectype for STMT. */
1218 scalar_type = TREE_TYPE (DR_REF (dr));
1219 STMT_VINFO_VECTYPE (stmt_info) =
1220 get_vectype_for_scalar_type (scalar_type);
1221 if (!STMT_VINFO_VECTYPE (stmt_info))
1222 {
1223 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1224 {
1225 fprintf (vect_dump,
1226 "not vectorized: no vectype for stmt: ");
1227 print_generic_expr (vect_dump, stmt, TDF_SLIM);
1228 fprintf (vect_dump, " scalar_type: ");
1229 print_generic_expr (vect_dump, scalar_type, TDF_DETAILS);
1230 }
1231 return false;
1232 }
1233 }
1234
1235 return true;
1236 }
1237
1238
1239 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
1240
1241 /* Function vect_mark_relevant.
1242
1243 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
1244
1245 static void
1246 vect_mark_relevant (VEC(tree,heap) **worklist, tree stmt,
1247 bool relevant_p, bool live_p)
1248 {
1249 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1250 bool save_relevant_p = STMT_VINFO_RELEVANT_P (stmt_info);
1251 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
1252
1253 if (vect_print_dump_info (REPORT_DETAILS))
1254 fprintf (vect_dump, "mark relevant %d, live %d.",relevant_p, live_p);
1255
1256 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
1257 STMT_VINFO_RELEVANT_P (stmt_info) |= relevant_p;
1258
1259 if (TREE_CODE (stmt) == PHI_NODE)
1260 /* Don't put phi-nodes in the worklist. Phis that are marked relevant
1261 or live will fail vectorization later on. */
1262 return;
1263
1264 if (STMT_VINFO_RELEVANT_P (stmt_info) == save_relevant_p
1265 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
1266 {
1267 if (vect_print_dump_info (REPORT_DETAILS))
1268 fprintf (vect_dump, "already marked relevant/live.");
1269 return;
1270 }
1271
1272 VEC_safe_push (tree, heap, *worklist, stmt);
1273 }
1274
1275
1276 /* Function vect_stmt_relevant_p.
1277
1278 Return true if STMT in loop that is represented by LOOP_VINFO is
1279 "relevant for vectorization".
1280
1281 A stmt is considered "relevant for vectorization" if:
1282 - it has uses outside the loop.
1283 - it has vdefs (it alters memory).
1284 - control stmts in the loop (except for the exit condition).
1285
1286 CHECKME: what other side effects would the vectorizer allow? */
1287
1288 static bool
1289 vect_stmt_relevant_p (tree stmt, loop_vec_info loop_vinfo,
1290 bool *relevant_p, bool *live_p)
1291 {
1292 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1293 ssa_op_iter op_iter;
1294 imm_use_iterator imm_iter;
1295 use_operand_p use_p;
1296 def_operand_p def_p;
1297
1298 *relevant_p = false;
1299 *live_p = false;
1300
1301 /* cond stmt other than loop exit cond. */
1302 if (is_ctrl_stmt (stmt) && (stmt != LOOP_VINFO_EXIT_COND (loop_vinfo)))
1303 *relevant_p = true;
1304
1305 /* changing memory. */
1306 if (TREE_CODE (stmt) != PHI_NODE)
1307 if (!ZERO_SSA_OPERANDS (stmt, SSA_OP_VIRTUAL_DEFS))
1308 {
1309 if (vect_print_dump_info (REPORT_DETAILS))
1310 fprintf (vect_dump, "vec_stmt_relevant_p: stmt has vdefs.");
1311 *relevant_p = true;
1312 }
1313
1314 /* uses outside the loop. */
1315 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
1316 {
1317 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
1318 {
1319 basic_block bb = bb_for_stmt (USE_STMT (use_p));
1320 if (!flow_bb_inside_loop_p (loop, bb))
1321 {
1322 if (vect_print_dump_info (REPORT_DETAILS))
1323 fprintf (vect_dump, "vec_stmt_relevant_p: used out of loop.");
1324
1325 /* We expect all such uses to be in the loop exit phis
1326 (because of loop closed form) */
1327 gcc_assert (TREE_CODE (USE_STMT (use_p)) == PHI_NODE);
1328 gcc_assert (bb == loop->single_exit->dest);
1329
1330 *live_p = true;
1331 }
1332 }
1333 }
1334
1335 return (*live_p || *relevant_p);
1336 }
1337
1338
1339 /* Function vect_mark_stmts_to_be_vectorized.
1340
1341 Not all stmts in the loop need to be vectorized. For example:
1342
1343 for i...
1344 for j...
1345 1. T0 = i + j
1346 2. T1 = a[T0]
1347
1348 3. j = j + 1
1349
1350 Stmt 1 and 3 do not need to be vectorized, because loop control and
1351 addressing of vectorized data-refs are handled differently.
1352
1353 This pass detects such stmts. */
1354
1355 static bool
1356 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
1357 {
1358 VEC(tree,heap) *worklist;
1359 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1360 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
1361 unsigned int nbbs = loop->num_nodes;
1362 block_stmt_iterator si;
1363 tree stmt, use;
1364 stmt_ann_t ann;
1365 ssa_op_iter iter;
1366 unsigned int i;
1367 stmt_vec_info stmt_vinfo;
1368 basic_block bb;
1369 tree phi;
1370 bool relevant_p, live_p;
1371 tree def, def_stmt;
1372 enum vect_def_type dt;
1373
1374 if (vect_print_dump_info (REPORT_DETAILS))
1375 fprintf (vect_dump, "=== vect_mark_stmts_to_be_vectorized ===");
1376
1377 worklist = VEC_alloc (tree, heap, 64);
1378
1379 /* 1. Init worklist. */
1380
1381 bb = loop->header;
1382 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
1383 {
1384 if (vect_print_dump_info (REPORT_DETAILS))
1385 {
1386 fprintf (vect_dump, "init: phi relevant? ");
1387 print_generic_expr (vect_dump, phi, TDF_SLIM);
1388 }
1389
1390 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant_p, &live_p))
1391 vect_mark_relevant (&worklist, phi, relevant_p, live_p);
1392 }
1393
1394 for (i = 0; i < nbbs; i++)
1395 {
1396 bb = bbs[i];
1397 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1398 {
1399 stmt = bsi_stmt (si);
1400
1401 if (vect_print_dump_info (REPORT_DETAILS))
1402 {
1403 fprintf (vect_dump, "init: stmt relevant? ");
1404 print_generic_expr (vect_dump, stmt, TDF_SLIM);
1405 }
1406
1407 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant_p, &live_p))
1408 vect_mark_relevant (&worklist, stmt, relevant_p, live_p);
1409 }
1410 }
1411
1412
1413 /* 2. Process_worklist */
1414
1415 while (VEC_length (tree, worklist) > 0)
1416 {
1417 stmt = VEC_pop (tree, worklist);
1418
1419 if (vect_print_dump_info (REPORT_DETAILS))
1420 {
1421 fprintf (vect_dump, "worklist: examine stmt: ");
1422 print_generic_expr (vect_dump, stmt, TDF_SLIM);
1423 }
1424
1425 /* Examine the USEs of STMT. For each ssa-name USE thta is defined
1426 in the loop, mark the stmt that defines it (DEF_STMT) as
1427 relevant/irrelevant and live/dead according to the liveness and
1428 relevance properties of STMT.
1429 */
1430
1431 gcc_assert (TREE_CODE (stmt) != PHI_NODE);
1432
1433 ann = stmt_ann (stmt);
1434 stmt_vinfo = vinfo_for_stmt (stmt);
1435
1436 relevant_p = STMT_VINFO_RELEVANT_P (stmt_vinfo);
1437 live_p = STMT_VINFO_LIVE_P (stmt_vinfo);
1438
1439 /* Generally, the liveness and relevance properties of STMT are
1440 propagated to the DEF_STMTs of its USEs:
1441 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
1442 STMT_VINFO_RELEVANT_P (DEF_STMT_info) <-- relevant_p
1443
1444 Exceptions:
1445
1446 (case 1)
1447 If USE is used only for address computations (e.g. array indexing),
1448 which does not need to be directly vectorized, then the
1449 liveness/relevance of the respective DEF_STMT is left unchanged.
1450
1451 (case 2)
1452 If STMT has been identified as defining a reduction variable, then
1453 we have two cases:
1454 (case 2.1)
1455 The last use of STMT is the reduction-variable, which is defined
1456 by a loop-header-phi. We don't want to mark the phi as live or
1457 relevant (because it does not need to be vectorized, it is handled
1458 as part of the vectorization of the reduction), so in this case we
1459 skip the call to vect_mark_relevant.
1460 (case 2.2)
1461 The rest of the uses of STMT are defined in the loop body. For
1462 the def_stmt of these uses we want to set liveness/relevance
1463 as follows:
1464 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- false
1465 STMT_VINFO_RELEVANT_P (DEF_STMT_info) <-- true
1466 because even though STMT is classified as live (since it defines a
1467 value that is used across loop iterations) and irrelevant (since it
1468 is not used inside the loop), it will be vectorized, and therefore
1469 the corresponding DEF_STMTs need to marked as relevant.
1470 */
1471
1472 /* case 2.2: */
1473 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def)
1474 {
1475 gcc_assert (!relevant_p && live_p);
1476 relevant_p = true;
1477 live_p = false;
1478 }
1479
1480 FOR_EACH_SSA_TREE_OPERAND (use, stmt, iter, SSA_OP_USE)
1481 {
1482 /* case 1: we are only interested in uses that need to be vectorized.
1483 Uses that are used for address computation are not considered
1484 relevant.
1485 */
1486 if (!exist_non_indexing_operands_for_use_p (use, stmt))
1487 continue;
1488
1489 if (!vect_is_simple_use (use, loop_vinfo, &def_stmt, &def, &dt))
1490 {
1491 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1492 fprintf (vect_dump, "not vectorized: unsupported use in stmt.");
1493 VEC_free (tree, heap, worklist);
1494 return false;
1495 }
1496
1497 if (!def_stmt || IS_EMPTY_STMT (def_stmt))
1498 continue;
1499
1500 if (vect_print_dump_info (REPORT_DETAILS))
1501 {
1502 fprintf (vect_dump, "worklist: examine use %d: ", i);
1503 print_generic_expr (vect_dump, use, TDF_SLIM);
1504 }
1505
1506 bb = bb_for_stmt (def_stmt);
1507 if (!flow_bb_inside_loop_p (loop, bb))
1508 continue;
1509
1510 /* case 2.1: the reduction-use does not mark the defining-phi
1511 as relevant. */
1512 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
1513 && TREE_CODE (def_stmt) == PHI_NODE)
1514 continue;
1515
1516 vect_mark_relevant (&worklist, def_stmt, relevant_p, live_p);
1517 }
1518 } /* while worklist */
1519
1520 VEC_free (tree, heap, worklist);
1521 return true;
1522 }
1523
1524
1525 /* Function vect_can_advance_ivs_p
1526
1527 In case the number of iterations that LOOP iterates in unknown at compile
1528 time, an epilog loop will be generated, and the loop induction variables
1529 (IVs) will be "advanced" to the value they are supposed to take just before
1530 the epilog loop. Here we check that the access function of the loop IVs
1531 and the expression that represents the loop bound are simple enough.
1532 These restrictions will be relaxed in the future. */
1533
1534 static bool
1535 vect_can_advance_ivs_p (loop_vec_info loop_vinfo)
1536 {
1537 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1538 basic_block bb = loop->header;
1539 tree phi;
1540
1541 /* Analyze phi functions of the loop header. */
1542
1543 if (vect_print_dump_info (REPORT_DETAILS))
1544 fprintf (vect_dump, "=== vect_can_advance_ivs_p ===");
1545
1546 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
1547 {
1548 tree access_fn = NULL;
1549 tree evolution_part;
1550
1551 if (vect_print_dump_info (REPORT_DETAILS))
1552 {
1553 fprintf (vect_dump, "Analyze phi: ");
1554 print_generic_expr (vect_dump, phi, TDF_SLIM);
1555 }
1556
1557 /* Skip virtual phi's. The data dependences that are associated with
1558 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
1559
1560 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
1561 {
1562 if (vect_print_dump_info (REPORT_DETAILS))
1563 fprintf (vect_dump, "virtual phi. skip.");
1564 continue;
1565 }
1566
1567 /* Skip reduction phis. */
1568
1569 if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (phi)) == vect_reduction_def)
1570 {
1571 if (vect_print_dump_info (REPORT_DETAILS))
1572 fprintf (vect_dump, "reduc phi. skip.");
1573 continue;
1574 }
1575
1576 /* Analyze the evolution function. */
1577
1578 access_fn = instantiate_parameters
1579 (loop, analyze_scalar_evolution (loop, PHI_RESULT (phi)));
1580
1581 if (!access_fn)
1582 {
1583 if (vect_print_dump_info (REPORT_DETAILS))
1584 fprintf (vect_dump, "No Access function.");
1585 return false;
1586 }
1587
1588 if (vect_print_dump_info (REPORT_DETAILS))
1589 {
1590 fprintf (vect_dump, "Access function of PHI: ");
1591 print_generic_expr (vect_dump, access_fn, TDF_SLIM);
1592 }
1593
1594 evolution_part = evolution_part_in_loop_num (access_fn, loop->num);
1595
1596 if (evolution_part == NULL_TREE)
1597 {
1598 if (vect_print_dump_info (REPORT_DETAILS))
1599 fprintf (vect_dump, "No evolution.");
1600 return false;
1601 }
1602
1603 /* FORNOW: We do not transform initial conditions of IVs
1604 which evolution functions are a polynomial of degree >= 2. */
1605
1606 if (tree_is_chrec (evolution_part))
1607 return false;
1608 }
1609
1610 return true;
1611 }
1612
1613
1614 /* Function vect_get_loop_niters.
1615
1616 Determine how many iterations the loop is executed.
1617 If an expression that represents the number of iterations
1618 can be constructed, place it in NUMBER_OF_ITERATIONS.
1619 Return the loop exit condition. */
1620
1621 static tree
1622 vect_get_loop_niters (struct loop *loop, tree *number_of_iterations)
1623 {
1624 tree niters;
1625
1626 if (vect_print_dump_info (REPORT_DETAILS))
1627 fprintf (vect_dump, "=== get_loop_niters ===");
1628
1629 niters = number_of_iterations_in_loop (loop);
1630
1631 if (niters != NULL_TREE
1632 && niters != chrec_dont_know)
1633 {
1634 *number_of_iterations = niters;
1635
1636 if (vect_print_dump_info (REPORT_DETAILS))
1637 {
1638 fprintf (vect_dump, "==> get_loop_niters:" );
1639 print_generic_expr (vect_dump, *number_of_iterations, TDF_SLIM);
1640 }
1641 }
1642
1643 return get_loop_exit_condition (loop);
1644 }
1645
1646
1647 /* Function vect_analyze_loop_form.
1648
1649 Verify the following restrictions (some may be relaxed in the future):
1650 - it's an inner-most loop
1651 - number of BBs = 2 (which are the loop header and the latch)
1652 - the loop has a pre-header
1653 - the loop has a single entry and exit
1654 - the loop exit condition is simple enough, and the number of iterations
1655 can be analyzed (a countable loop). */
1656
1657 static loop_vec_info
1658 vect_analyze_loop_form (struct loop *loop)
1659 {
1660 loop_vec_info loop_vinfo;
1661 tree loop_cond;
1662 tree number_of_iterations = NULL;
1663
1664 if (vect_print_dump_info (REPORT_DETAILS))
1665 fprintf (vect_dump, "=== vect_analyze_loop_form ===");
1666
1667 if (loop->inner)
1668 {
1669 if (vect_print_dump_info (REPORT_OUTER_LOOPS))
1670 fprintf (vect_dump, "not vectorized: nested loop.");
1671 return NULL;
1672 }
1673
1674 if (!loop->single_exit
1675 || loop->num_nodes != 2
1676 || EDGE_COUNT (loop->header->preds) != 2)
1677 {
1678 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
1679 {
1680 if (!loop->single_exit)
1681 fprintf (vect_dump, "not vectorized: multiple exits.");
1682 else if (loop->num_nodes != 2)
1683 fprintf (vect_dump, "not vectorized: too many BBs in loop.");
1684 else if (EDGE_COUNT (loop->header->preds) != 2)
1685 fprintf (vect_dump, "not vectorized: too many incoming edges.");
1686 }
1687
1688 return NULL;
1689 }
1690
1691 /* We assume that the loop exit condition is at the end of the loop. i.e,
1692 that the loop is represented as a do-while (with a proper if-guard
1693 before the loop if needed), where the loop header contains all the
1694 executable statements, and the latch is empty. */
1695 if (!empty_block_p (loop->latch))
1696 {
1697 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
1698 fprintf (vect_dump, "not vectorized: unexpected loop form.");
1699 return NULL;
1700 }
1701
1702 /* Make sure there exists a single-predecessor exit bb: */
1703 if (!single_pred_p (loop->single_exit->dest))
1704 {
1705 edge e = loop->single_exit;
1706 if (!(e->flags & EDGE_ABNORMAL))
1707 {
1708 split_loop_exit_edge (e);
1709 if (vect_print_dump_info (REPORT_DETAILS))
1710 fprintf (vect_dump, "split exit edge.");
1711 }
1712 else
1713 {
1714 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
1715 fprintf (vect_dump, "not vectorized: abnormal loop exit edge.");
1716 return NULL;
1717 }
1718 }
1719
1720 if (empty_block_p (loop->header))
1721 {
1722 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
1723 fprintf (vect_dump, "not vectorized: empty loop.");
1724 return NULL;
1725 }
1726
1727 loop_cond = vect_get_loop_niters (loop, &number_of_iterations);
1728 if (!loop_cond)
1729 {
1730 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
1731 fprintf (vect_dump, "not vectorized: complicated exit condition.");
1732 return NULL;
1733 }
1734
1735 if (!number_of_iterations)
1736 {
1737 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
1738 fprintf (vect_dump,
1739 "not vectorized: number of iterations cannot be computed.");
1740 return NULL;
1741 }
1742
1743 if (chrec_contains_undetermined (number_of_iterations))
1744 {
1745 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
1746 fprintf (vect_dump, "Infinite number of iterations.");
1747 return false;
1748 }
1749
1750 loop_vinfo = new_loop_vec_info (loop);
1751 LOOP_VINFO_NITERS (loop_vinfo) = number_of_iterations;
1752
1753 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
1754 {
1755 if (vect_print_dump_info (REPORT_DETAILS))
1756 {
1757 fprintf (vect_dump, "Symbolic number of iterations is ");
1758 print_generic_expr (vect_dump, number_of_iterations, TDF_DETAILS);
1759 }
1760 }
1761 else
1762 if (LOOP_VINFO_INT_NITERS (loop_vinfo) == 0)
1763 {
1764 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1765 fprintf (vect_dump, "not vectorized: number of iterations = 0.");
1766 return NULL;
1767 }
1768
1769 LOOP_VINFO_EXIT_COND (loop_vinfo) = loop_cond;
1770
1771 return loop_vinfo;
1772 }
1773
1774
1775 /* Function vect_analyze_loop.
1776
1777 Apply a set of analyses on LOOP, and create a loop_vec_info struct
1778 for it. The different analyses will record information in the
1779 loop_vec_info struct. */
1780 loop_vec_info
1781 vect_analyze_loop (struct loop *loop)
1782 {
1783 bool ok;
1784 loop_vec_info loop_vinfo;
1785
1786 if (vect_print_dump_info (REPORT_DETAILS))
1787 fprintf (vect_dump, "===== analyze_loop_nest =====");
1788
1789 /* Check the CFG characteristics of the loop (nesting, entry/exit, etc. */
1790
1791 loop_vinfo = vect_analyze_loop_form (loop);
1792 if (!loop_vinfo)
1793 {
1794 if (vect_print_dump_info (REPORT_DETAILS))
1795 fprintf (vect_dump, "bad loop form.");
1796 return NULL;
1797 }
1798
1799 /* Find all data references in the loop (which correspond to vdefs/vuses)
1800 and analyze their evolution in the loop.
1801
1802 FORNOW: Handle only simple, array references, which
1803 alignment can be forced, and aligned pointer-references. */
1804
1805 ok = vect_analyze_data_refs (loop_vinfo);
1806 if (!ok)
1807 {
1808 if (vect_print_dump_info (REPORT_DETAILS))
1809 fprintf (vect_dump, "bad data references.");
1810 destroy_loop_vec_info (loop_vinfo);
1811 return NULL;
1812 }
1813
1814 /* Classify all cross-iteration scalar data-flow cycles.
1815 Cross-iteration cycles caused by virtual phis are analyzed separately. */
1816
1817 vect_analyze_scalar_cycles (loop_vinfo);
1818
1819 /* Data-flow analysis to detect stmts that do not need to be vectorized. */
1820
1821 ok = vect_mark_stmts_to_be_vectorized (loop_vinfo);
1822 if (!ok)
1823 {
1824 if (vect_print_dump_info (REPORT_DETAILS))
1825 fprintf (vect_dump, "unexpected pattern.");
1826 destroy_loop_vec_info (loop_vinfo);
1827 return NULL;
1828 }
1829
1830 ok = vect_determine_vectorization_factor (loop_vinfo);
1831 if (!ok)
1832 {
1833 if (vect_print_dump_info (REPORT_DETAILS))
1834 fprintf (vect_dump, "can't determine vectorization factor.");
1835 destroy_loop_vec_info (loop_vinfo);
1836 return NULL;
1837 }
1838
1839 /* Analyze data dependences between the data-refs in the loop.
1840 FORNOW: fail at the first data dependence that we encounter. */
1841
1842 ok = vect_analyze_data_ref_dependences (loop_vinfo);
1843 if (!ok)
1844 {
1845 if (vect_print_dump_info (REPORT_DETAILS))
1846 fprintf (vect_dump, "bad data dependence.");
1847 destroy_loop_vec_info (loop_vinfo);
1848 return NULL;
1849 }
1850
1851 /* Analyze the access patterns of the data-refs in the loop (consecutive,
1852 complex, etc.). FORNOW: Only handle consecutive access pattern. */
1853
1854 ok = vect_analyze_data_ref_accesses (loop_vinfo);
1855 if (!ok)
1856 {
1857 if (vect_print_dump_info (REPORT_DETAILS))
1858 fprintf (vect_dump, "bad data access.");
1859 destroy_loop_vec_info (loop_vinfo);
1860 return NULL;
1861 }
1862
1863 /* Analyze the alignment of the data-refs in the loop.
1864 FORNOW: Only aligned accesses are handled. */
1865
1866 ok = vect_analyze_data_refs_alignment (loop_vinfo);
1867 if (!ok)
1868 {
1869 if (vect_print_dump_info (REPORT_DETAILS))
1870 fprintf (vect_dump, "bad data alignment.");
1871 destroy_loop_vec_info (loop_vinfo);
1872 return NULL;
1873 }
1874
1875 /* Scan all the operations in the loop and make sure they are
1876 vectorizable. */
1877
1878 ok = vect_analyze_operations (loop_vinfo);
1879 if (!ok)
1880 {
1881 if (vect_print_dump_info (REPORT_DETAILS))
1882 fprintf (vect_dump, "bad operation or unsupported loop bound.");
1883 destroy_loop_vec_info (loop_vinfo);
1884 return NULL;
1885 }
1886
1887 LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1;
1888
1889 return loop_vinfo;
1890 }