1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2003,2004,2005,2006 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to the Free
19 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
24 #include "coretypes.h"
28 #include "basic-block.h"
29 #include "diagnostic.h"
30 #include "tree-flow.h"
31 #include "tree-dump.h"
37 #include "tree-chrec.h"
38 #include "tree-data-ref.h"
39 #include "tree-scalar-evolution.h"
40 #include "tree-vectorizer.h"
43 /* Main analysis functions. */
44 static loop_vec_info
vect_analyze_loop_form (struct loop
*);
45 static bool vect_analyze_data_refs (loop_vec_info
);
46 static bool vect_mark_stmts_to_be_vectorized (loop_vec_info
);
47 static void vect_analyze_scalar_cycles (loop_vec_info
);
48 static bool vect_analyze_data_ref_accesses (loop_vec_info
);
49 static bool vect_analyze_data_ref_dependences (loop_vec_info
);
50 static bool vect_analyze_data_refs_alignment (loop_vec_info
);
51 static bool vect_compute_data_refs_alignment (loop_vec_info
);
52 static bool vect_enhance_data_refs_alignment (loop_vec_info
);
53 static bool vect_analyze_operations (loop_vec_info
);
54 static bool vect_determine_vectorization_factor (loop_vec_info
);
56 /* Utility functions for the analyses. */
57 static bool exist_non_indexing_operands_for_use_p (tree
, tree
);
58 static tree
vect_get_loop_niters (struct loop
*, tree
*);
59 static bool vect_analyze_data_ref_dependence
60 (struct data_dependence_relation
*, loop_vec_info
);
61 static bool vect_compute_data_ref_alignment (struct data_reference
*);
62 static bool vect_analyze_data_ref_access (struct data_reference
*);
63 static bool vect_can_advance_ivs_p (loop_vec_info
);
64 static void vect_update_misalignment_for_peel
65 (struct data_reference
*, struct data_reference
*, int npeel
);
67 /* Function vect_determine_vectorization_factor
69 Determine the vectorization factor (VF). VF is the number of data elements
70 that are operated upon in parallel in a single iteration of the vectorized
71 loop. For example, when vectorizing a loop that operates on 4byte elements,
72 on a target with vector size (VS) 16byte, the VF is set to 4, since 4
73 elements can fit in a single vector register.
75 We currently support vectorization of loops in which all types operated upon
76 are of the same size. Therefore this function currently sets VF according to
77 the size of the types operated upon, and fails if there are multiple sizes
80 VF is also the factor by which the loop iterations are strip-mined, e.g.:
87 for (i=0; i<N; i+=VF){
88 a[i:VF] = b[i:VF] + c[i:VF];
93 vect_determine_vectorization_factor (loop_vec_info loop_vinfo
)
95 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
96 basic_block
*bbs
= LOOP_VINFO_BBS (loop_vinfo
);
97 int nbbs
= loop
->num_nodes
;
98 block_stmt_iterator si
;
99 unsigned int vectorization_factor
= 0;
103 if (vect_print_dump_info (REPORT_DETAILS
))
104 fprintf (vect_dump
, "=== vect_determine_vectorization_factor ===");
106 for (i
= 0; i
< nbbs
; i
++)
108 basic_block bb
= bbs
[i
];
110 for (si
= bsi_start (bb
); !bsi_end_p (si
); bsi_next (&si
))
112 tree stmt
= bsi_stmt (si
);
114 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
117 if (vect_print_dump_info (REPORT_DETAILS
))
119 fprintf (vect_dump
, "==> examining statement: ");
120 print_generic_expr (vect_dump
, stmt
, TDF_SLIM
);
123 if (TREE_CODE (stmt
) != GIMPLE_MODIFY_STMT
)
126 gcc_assert (stmt_info
);
128 /* skip stmts which do not need to be vectorized. */
129 if (!STMT_VINFO_RELEVANT_P (stmt_info
)
130 && !STMT_VINFO_LIVE_P (stmt_info
))
132 if (vect_print_dump_info (REPORT_DETAILS
))
133 fprintf (vect_dump
, "skip.");
137 if (!GIMPLE_STMT_P (stmt
)
138 && VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt
))))
140 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
142 fprintf (vect_dump
, "not vectorized: vector stmt in loop:");
143 print_generic_expr (vect_dump
, stmt
, TDF_SLIM
);
148 if (STMT_VINFO_VECTYPE (stmt_info
))
150 /* The only case when a vectype had been already set is for stmts
151 that contain a dataref, or for "pattern-stmts" (stmts generated
152 by the vectorizer to represent/replace a certain idiom). */
153 gcc_assert (STMT_VINFO_DATA_REF (stmt_info
)
154 || is_pattern_stmt_p (stmt_info
));
155 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
159 gcc_assert (! STMT_VINFO_DATA_REF (stmt_info
)
160 && !is_pattern_stmt_p (stmt_info
));
162 /* We set the vectype according to the type of the result (lhs).
163 For stmts whose result-type is different than the type of the
164 arguments (e.g. demotion, promotion), vectype will be reset
165 appropriately (later). Note that we have to visit the smallest
166 datatype in this function, because that determines the VF.
167 If the smallest datatype in the loop is present only as the
168 rhs of a promotion operation - we'd miss it here.
169 However, in such a case, that a variable of this datatype
170 does not appear in the lhs anywhere in the loop, it shouldn't
171 affect the vectorization factor. */
172 scalar_type
= TREE_TYPE (GIMPLE_STMT_OPERAND (stmt
, 0));
174 if (vect_print_dump_info (REPORT_DETAILS
))
176 fprintf (vect_dump
, "get vectype for scalar type: ");
177 print_generic_expr (vect_dump
, scalar_type
, TDF_SLIM
);
180 vectype
= get_vectype_for_scalar_type (scalar_type
);
183 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
186 "not vectorized: unsupported data-type ");
187 print_generic_expr (vect_dump
, scalar_type
, TDF_SLIM
);
191 STMT_VINFO_VECTYPE (stmt_info
) = vectype
;
194 if (vect_print_dump_info (REPORT_DETAILS
))
196 fprintf (vect_dump
, "vectype: ");
197 print_generic_expr (vect_dump
, vectype
, TDF_SLIM
);
200 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
201 if (vect_print_dump_info (REPORT_DETAILS
))
202 fprintf (vect_dump
, "nunits = %d", nunits
);
204 if (!vectorization_factor
205 || (nunits
> vectorization_factor
))
206 vectorization_factor
= nunits
;
211 /* TODO: Analyze cost. Decide if worth while to vectorize. */
213 if (vectorization_factor
<= 1)
215 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
216 fprintf (vect_dump
, "not vectorized: unsupported data-type");
219 LOOP_VINFO_VECT_FACTOR (loop_vinfo
) = vectorization_factor
;
225 /* Function vect_analyze_operations.
227 Scan the loop stmts and make sure they are all vectorizable. */
230 vect_analyze_operations (loop_vec_info loop_vinfo
)
232 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
233 basic_block
*bbs
= LOOP_VINFO_BBS (loop_vinfo
);
234 int nbbs
= loop
->num_nodes
;
235 block_stmt_iterator si
;
236 unsigned int vectorization_factor
= 0;
240 stmt_vec_info stmt_info
;
241 bool need_to_vectorize
= false;
243 if (vect_print_dump_info (REPORT_DETAILS
))
244 fprintf (vect_dump
, "=== vect_analyze_operations ===");
246 gcc_assert (LOOP_VINFO_VECT_FACTOR (loop_vinfo
));
247 vectorization_factor
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
249 for (i
= 0; i
< nbbs
; i
++)
251 basic_block bb
= bbs
[i
];
253 for (phi
= phi_nodes (bb
); phi
; phi
= PHI_CHAIN (phi
))
255 stmt_info
= vinfo_for_stmt (phi
);
256 if (vect_print_dump_info (REPORT_DETAILS
))
258 fprintf (vect_dump
, "examining phi: ");
259 print_generic_expr (vect_dump
, phi
, TDF_SLIM
);
262 gcc_assert (stmt_info
);
264 if (STMT_VINFO_LIVE_P (stmt_info
))
266 /* FORNOW: not yet supported. */
267 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
268 fprintf (vect_dump
, "not vectorized: value used after loop.");
272 if (STMT_VINFO_RELEVANT_P (stmt_info
))
274 /* Most likely a reduction-like computation that is used
276 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
277 fprintf (vect_dump
, "not vectorized: unsupported pattern.");
282 for (si
= bsi_start (bb
); !bsi_end_p (si
); bsi_next (&si
))
284 tree stmt
= bsi_stmt (si
);
285 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
287 if (vect_print_dump_info (REPORT_DETAILS
))
289 fprintf (vect_dump
, "==> examining statement: ");
290 print_generic_expr (vect_dump
, stmt
, TDF_SLIM
);
293 gcc_assert (stmt_info
);
295 /* skip stmts which do not need to be vectorized.
296 this is expected to include:
297 - the COND_EXPR which is the loop exit condition
298 - any LABEL_EXPRs in the loop
299 - computations that are used only for array indexing or loop
302 if (!STMT_VINFO_RELEVANT_P (stmt_info
)
303 && !STMT_VINFO_LIVE_P (stmt_info
))
305 if (vect_print_dump_info (REPORT_DETAILS
))
306 fprintf (vect_dump
, "irrelevant.");
310 if (STMT_VINFO_RELEVANT_P (stmt_info
))
312 gcc_assert (GIMPLE_STMT_P (stmt
)
313 || !VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt
))));
314 gcc_assert (STMT_VINFO_VECTYPE (stmt_info
));
316 ok
= (vectorizable_type_promotion (stmt
, NULL
, NULL
)
317 || vectorizable_type_demotion (stmt
, NULL
, NULL
)
318 || vectorizable_conversion (stmt
, NULL
, NULL
)
319 || vectorizable_operation (stmt
, NULL
, NULL
)
320 || vectorizable_assignment (stmt
, NULL
, NULL
)
321 || vectorizable_load (stmt
, NULL
, NULL
)
322 || vectorizable_call (stmt
, NULL
, NULL
)
323 || vectorizable_store (stmt
, NULL
, NULL
)
324 || vectorizable_condition (stmt
, NULL
, NULL
));
328 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
331 "not vectorized: relevant stmt not supported: ");
332 print_generic_expr (vect_dump
, stmt
, TDF_SLIM
);
336 need_to_vectorize
= true;
339 if (STMT_VINFO_LIVE_P (stmt_info
))
341 ok
= vectorizable_reduction (stmt
, NULL
, NULL
);
344 need_to_vectorize
= true;
346 ok
= vectorizable_live_operation (stmt
, NULL
, NULL
);
350 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
353 "not vectorized: live stmt not supported: ");
354 print_generic_expr (vect_dump
, stmt
, TDF_SLIM
);
362 /* TODO: Analyze cost. Decide if worth while to vectorize. */
364 /* All operations in the loop are either irrelevant (deal with loop
365 control, or dead), or only used outside the loop and can be moved
366 out of the loop (e.g. invariants, inductions). The loop can be
367 optimized away by scalar optimizations. We're better off not
368 touching this loop. */
369 if (!need_to_vectorize
)
371 if (vect_print_dump_info (REPORT_DETAILS
))
373 "All the computation can be taken out of the loop.");
374 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
376 "not vectorized: redundant loop. no profit to vectorize.");
380 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo
)
381 && vect_print_dump_info (REPORT_DETAILS
))
383 "vectorization_factor = %d, niters = " HOST_WIDE_INT_PRINT_DEC
,
384 vectorization_factor
, LOOP_VINFO_INT_NITERS (loop_vinfo
));
386 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo
)
387 && ((LOOP_VINFO_INT_NITERS (loop_vinfo
) < vectorization_factor
)
388 || (LOOP_VINFO_INT_NITERS (loop_vinfo
) <=
389 ((unsigned) (PARAM_VALUE (PARAM_MIN_VECT_LOOP_BOUND
))
390 * vectorization_factor
))))
392 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
393 fprintf (vect_dump
, "not vectorized: iteration count too small.");
397 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo
)
398 || LOOP_VINFO_INT_NITERS (loop_vinfo
) % vectorization_factor
!= 0
399 || LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo
))
401 if (vect_print_dump_info (REPORT_DETAILS
))
402 fprintf (vect_dump
, "epilog loop required.");
403 if (!vect_can_advance_ivs_p (loop_vinfo
))
405 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
407 "not vectorized: can't create epilog loop 1.");
410 if (!slpeel_can_duplicate_loop_p (loop
, single_exit (loop
)))
412 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
414 "not vectorized: can't create epilog loop 2.");
423 /* Function exist_non_indexing_operands_for_use_p
425 USE is one of the uses attached to STMT. Check if USE is
426 used in STMT for anything other than indexing an array. */
429 exist_non_indexing_operands_for_use_p (tree use
, tree stmt
)
432 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
434 /* USE corresponds to some operand in STMT. If there is no data
435 reference in STMT, then any operand that corresponds to USE
436 is not indexing an array. */
437 if (!STMT_VINFO_DATA_REF (stmt_info
))
440 /* STMT has a data_ref. FORNOW this means that its of one of
444 (This should have been verified in analyze_data_refs).
446 'var' in the second case corresponds to a def, not a use,
447 so USE cannot correspond to any operands that are not used
450 Therefore, all we need to check is if STMT falls into the
451 first case, and whether var corresponds to USE. */
453 if (TREE_CODE (GIMPLE_STMT_OPERAND (stmt
, 0)) == SSA_NAME
)
456 operand
= GIMPLE_STMT_OPERAND (stmt
, 1);
458 if (TREE_CODE (operand
) != SSA_NAME
)
468 /* Function vect_analyze_scalar_cycles.
470 Examine the cross iteration def-use cycles of scalar variables, by
471 analyzing the loop (scalar) PHIs; Classify each cycle as one of the
472 following: invariant, induction, reduction, unknown.
474 Some forms of scalar cycles are not yet supported.
476 Example1: reduction: (unsupported yet)
482 Example2: induction: (unsupported yet)
488 Note: the following loop *is* vectorizable:
494 even though it has a def-use cycle caused by the induction variable i:
496 loop: i_2 = PHI (i_0, i_1)
501 because the def-use cycle in loop3 is considered "not relevant" - i.e.,
502 it does not need to be vectorized because it is only used for array
503 indexing (see 'mark_stmts_to_be_vectorized'). The def-use cycle in
504 loop2 on the other hand is relevant (it is being written to memory).
508 vect_analyze_scalar_cycles (loop_vec_info loop_vinfo
)
511 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
512 basic_block bb
= loop
->header
;
514 VEC(tree
,heap
) *worklist
= VEC_alloc (tree
, heap
, 64);
516 if (vect_print_dump_info (REPORT_DETAILS
))
517 fprintf (vect_dump
, "=== vect_analyze_scalar_cycles ===");
519 /* First - identify all inductions. */
520 for (phi
= phi_nodes (bb
); phi
; phi
= PHI_CHAIN (phi
))
522 tree access_fn
= NULL
;
523 tree def
= PHI_RESULT (phi
);
524 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (phi
);
526 if (vect_print_dump_info (REPORT_DETAILS
))
528 fprintf (vect_dump
, "Analyze phi: ");
529 print_generic_expr (vect_dump
, phi
, TDF_SLIM
);
532 /* Skip virtual phi's. The data dependences that are associated with
533 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
534 if (!is_gimple_reg (SSA_NAME_VAR (def
)))
537 STMT_VINFO_DEF_TYPE (stmt_vinfo
) = vect_unknown_def_type
;
539 /* Analyze the evolution function. */
540 access_fn
= analyze_scalar_evolution (loop
, def
);
541 if (access_fn
&& vect_print_dump_info (REPORT_DETAILS
))
543 fprintf (vect_dump
, "Access function of PHI: ");
544 print_generic_expr (vect_dump
, access_fn
, TDF_SLIM
);
548 || !vect_is_simple_iv_evolution (loop
->num
, access_fn
, &dumy
, &dumy
))
550 VEC_safe_push (tree
, heap
, worklist
, phi
);
554 if (vect_print_dump_info (REPORT_DETAILS
))
555 fprintf (vect_dump
, "Detected induction.");
556 STMT_VINFO_DEF_TYPE (stmt_vinfo
) = vect_induction_def
;
560 /* Second - identify all reductions. */
561 while (VEC_length (tree
, worklist
) > 0)
563 tree phi
= VEC_pop (tree
, worklist
);
564 tree def
= PHI_RESULT (phi
);
565 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (phi
);
568 if (vect_print_dump_info (REPORT_DETAILS
))
570 fprintf (vect_dump
, "Analyze phi: ");
571 print_generic_expr (vect_dump
, phi
, TDF_SLIM
);
574 gcc_assert (is_gimple_reg (SSA_NAME_VAR (def
)));
575 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_unknown_def_type
);
577 reduc_stmt
= vect_is_simple_reduction (loop
, phi
);
580 if (vect_print_dump_info (REPORT_DETAILS
))
581 fprintf (vect_dump
, "Detected reduction.");
582 STMT_VINFO_DEF_TYPE (stmt_vinfo
) = vect_reduction_def
;
583 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (reduc_stmt
)) =
587 if (vect_print_dump_info (REPORT_DETAILS
))
588 fprintf (vect_dump
, "Unknown def-use cycle pattern.");
591 VEC_free (tree
, heap
, worklist
);
596 /* Function vect_insert_into_interleaving_chain.
598 Insert DRA into the interleaving chain of DRB according to DRA's INIT. */
601 vect_insert_into_interleaving_chain (struct data_reference
*dra
,
602 struct data_reference
*drb
)
604 tree prev
, next
, next_init
;
605 stmt_vec_info stmtinfo_a
= vinfo_for_stmt (DR_STMT (dra
));
606 stmt_vec_info stmtinfo_b
= vinfo_for_stmt (DR_STMT (drb
));
608 prev
= DR_GROUP_FIRST_DR (stmtinfo_b
);
609 next
= DR_GROUP_NEXT_DR (vinfo_for_stmt (prev
));
612 next_init
= DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next
)));
613 if (tree_int_cst_compare (next_init
, DR_INIT (dra
)) > 0)
616 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev
)) = DR_STMT (dra
);
617 DR_GROUP_NEXT_DR (stmtinfo_a
) = next
;
621 next
= DR_GROUP_NEXT_DR (vinfo_for_stmt (prev
));
624 /* We got to the end of the list. Insert here. */
625 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev
)) = DR_STMT (dra
);
626 DR_GROUP_NEXT_DR (stmtinfo_a
) = NULL_TREE
;
630 /* Function vect_update_interleaving_chain.
632 For two data-refs DRA and DRB that are a part of a chain interleaved data
633 accesses, update the interleaving chain. DRB's INIT is smaller than DRA's.
635 There are four possible cases:
636 1. New stmts - both DRA and DRB are not a part of any chain:
639 2. DRB is a part of a chain and DRA is not:
640 no need to update FIRST_DR
641 no need to insert DRB
642 insert DRA according to init
643 3. DRA is a part of a chain and DRB is not:
644 if (init of FIRST_DR > init of DRB)
646 NEXT(FIRST_DR) = previous FIRST_DR
648 insert DRB according to its init
649 4. both DRA and DRB are in some interleaving chains:
650 choose the chain with the smallest init of FIRST_DR
651 insert the nodes of the second chain into the first one. */
654 vect_update_interleaving_chain (struct data_reference
*drb
,
655 struct data_reference
*dra
)
657 stmt_vec_info stmtinfo_a
= vinfo_for_stmt (DR_STMT (dra
));
658 stmt_vec_info stmtinfo_b
= vinfo_for_stmt (DR_STMT (drb
));
659 tree next_init
, init_dra_chain
, init_drb_chain
, first_a
, first_b
;
660 tree node
, prev
, next
, node_init
, first_stmt
;
662 /* 1. New stmts - both DRA and DRB are not a part of any chain. */
663 if (!DR_GROUP_FIRST_DR (stmtinfo_a
) && !DR_GROUP_FIRST_DR (stmtinfo_b
))
665 DR_GROUP_FIRST_DR (stmtinfo_a
) = DR_STMT (drb
);
666 DR_GROUP_FIRST_DR (stmtinfo_b
) = DR_STMT (drb
);
667 DR_GROUP_NEXT_DR (stmtinfo_b
) = DR_STMT (dra
);
671 /* 2. DRB is a part of a chain and DRA is not. */
672 if (!DR_GROUP_FIRST_DR (stmtinfo_a
) && DR_GROUP_FIRST_DR (stmtinfo_b
))
674 DR_GROUP_FIRST_DR (stmtinfo_a
) = DR_GROUP_FIRST_DR (stmtinfo_b
);
675 /* Insert DRA into the chain of DRB. */
676 vect_insert_into_interleaving_chain (dra
, drb
);
680 /* 3. DRA is a part of a chain and DRB is not. */
681 if (DR_GROUP_FIRST_DR (stmtinfo_a
) && !DR_GROUP_FIRST_DR (stmtinfo_b
))
683 tree old_first_stmt
= DR_GROUP_FIRST_DR (stmtinfo_a
);
684 tree init_old
= DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (
688 if (tree_int_cst_compare (init_old
, DR_INIT (drb
)) > 0)
690 /* DRB's init is smaller than the init of the stmt previously marked
691 as the first stmt of the interleaving chain of DRA. Therefore, we
692 update FIRST_STMT and put DRB in the head of the list. */
693 DR_GROUP_FIRST_DR (stmtinfo_b
) = DR_STMT (drb
);
694 DR_GROUP_NEXT_DR (stmtinfo_b
) = old_first_stmt
;
696 /* Update all the stmts in the list to point to the new FIRST_STMT. */
697 tmp
= old_first_stmt
;
700 DR_GROUP_FIRST_DR (vinfo_for_stmt (tmp
)) = DR_STMT (drb
);
701 tmp
= DR_GROUP_NEXT_DR (vinfo_for_stmt (tmp
));
706 /* Insert DRB in the list of DRA. */
707 vect_insert_into_interleaving_chain (drb
, dra
);
708 DR_GROUP_FIRST_DR (stmtinfo_b
) = DR_GROUP_FIRST_DR (stmtinfo_a
);
713 /* 4. both DRA and DRB are in some interleaving chains. */
714 first_a
= DR_GROUP_FIRST_DR (stmtinfo_a
);
715 first_b
= DR_GROUP_FIRST_DR (stmtinfo_b
);
716 if (first_a
== first_b
)
718 init_dra_chain
= DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_a
)));
719 init_drb_chain
= DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_b
)));
721 if (tree_int_cst_compare (init_dra_chain
, init_drb_chain
) > 0)
723 /* Insert the nodes of DRA chain into the DRB chain.
724 After inserting a node, continue from this node of the DRB chain (don't
725 start from the beginning. */
726 node
= DR_GROUP_FIRST_DR (stmtinfo_a
);
727 prev
= DR_GROUP_FIRST_DR (stmtinfo_b
);
728 first_stmt
= first_b
;
732 /* Insert the nodes of DRB chain into the DRA chain.
733 After inserting a node, continue from this node of the DRA chain (don't
734 start from the beginning. */
735 node
= DR_GROUP_FIRST_DR (stmtinfo_b
);
736 prev
= DR_GROUP_FIRST_DR (stmtinfo_a
);
737 first_stmt
= first_a
;
742 node_init
= DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (node
)));
743 next
= DR_GROUP_NEXT_DR (vinfo_for_stmt (prev
));
746 next_init
= DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next
)));
747 if (tree_int_cst_compare (next_init
, node_init
) > 0)
750 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev
)) = node
;
751 DR_GROUP_NEXT_DR (vinfo_for_stmt (node
)) = next
;
756 next
= DR_GROUP_NEXT_DR (vinfo_for_stmt (prev
));
760 /* We got to the end of the list. Insert here. */
761 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev
)) = node
;
762 DR_GROUP_NEXT_DR (vinfo_for_stmt (node
)) = NULL_TREE
;
765 DR_GROUP_FIRST_DR (vinfo_for_stmt (node
)) = first_stmt
;
766 node
= DR_GROUP_NEXT_DR (vinfo_for_stmt (node
));
771 /* Function vect_equal_offsets.
773 Check if OFFSET1 and OFFSET2 are identical expressions. */
776 vect_equal_offsets (tree offset1
, tree offset2
)
780 STRIP_NOPS (offset1
);
781 STRIP_NOPS (offset2
);
783 if (offset1
== offset2
)
786 if (TREE_CODE (offset1
) != TREE_CODE (offset2
)
787 || !BINARY_CLASS_P (offset1
)
788 || !BINARY_CLASS_P (offset2
))
791 res0
= vect_equal_offsets (TREE_OPERAND (offset1
, 0),
792 TREE_OPERAND (offset2
, 0));
793 res1
= vect_equal_offsets (TREE_OPERAND (offset1
, 1),
794 TREE_OPERAND (offset2
, 1));
796 return (res0
&& res1
);
800 /* Function vect_check_interleaving.
802 Check if DRA and DRB are a part of interleaving. In case they are, insert
803 DRA and DRB in an interleaving chain. */
806 vect_check_interleaving (struct data_reference
*dra
,
807 struct data_reference
*drb
)
809 HOST_WIDE_INT type_size_a
, type_size_b
, diff_mod_size
, step
, init_a
, init_b
;
811 /* Check that the data-refs have same first location (except init) and they
812 are both either store or load (not load and store). */
813 if ((DR_BASE_ADDRESS (dra
) != DR_BASE_ADDRESS (drb
)
814 && (TREE_CODE (DR_BASE_ADDRESS (dra
)) != ADDR_EXPR
815 || TREE_CODE (DR_BASE_ADDRESS (drb
)) != ADDR_EXPR
816 || TREE_OPERAND (DR_BASE_ADDRESS (dra
), 0)
817 != TREE_OPERAND (DR_BASE_ADDRESS (drb
),0)))
818 || !vect_equal_offsets (DR_OFFSET (dra
), DR_OFFSET (drb
))
819 || !tree_int_cst_compare (DR_INIT (dra
), DR_INIT (drb
))
820 || DR_IS_READ (dra
) != DR_IS_READ (drb
))
824 1. data-refs are of the same type
825 2. their steps are equal
826 3. the step is greater than the difference between data-refs' inits */
827 type_size_a
= TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra
))));
828 type_size_b
= TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb
))));
830 if (type_size_a
!= type_size_b
831 || tree_int_cst_compare (DR_STEP (dra
), DR_STEP (drb
)))
834 init_a
= TREE_INT_CST_LOW (DR_INIT (dra
));
835 init_b
= TREE_INT_CST_LOW (DR_INIT (drb
));
836 step
= TREE_INT_CST_LOW (DR_STEP (dra
));
840 /* If init_a == init_b + the size of the type * k, we have an interleaving,
841 and DRB is accessed before DRA. */
842 diff_mod_size
= (init_a
- init_b
) % type_size_a
;
844 if ((init_a
- init_b
) > step
)
847 if (diff_mod_size
== 0)
849 vect_update_interleaving_chain (drb
, dra
);
850 if (vect_print_dump_info (REPORT_DR_DETAILS
))
852 fprintf (vect_dump
, "Detected interleaving ");
853 print_generic_expr (vect_dump
, DR_REF (dra
), TDF_SLIM
);
854 fprintf (vect_dump
, " and ");
855 print_generic_expr (vect_dump
, DR_REF (drb
), TDF_SLIM
);
862 /* If init_b == init_a + the size of the type * k, we have an
863 interleaving, and DRA is accessed before DRB. */
864 diff_mod_size
= (init_b
- init_a
) % type_size_a
;
866 if ((init_b
- init_a
) > step
)
869 if (diff_mod_size
== 0)
871 vect_update_interleaving_chain (dra
, drb
);
872 if (vect_print_dump_info (REPORT_DR_DETAILS
))
874 fprintf (vect_dump
, "Detected interleaving ");
875 print_generic_expr (vect_dump
, DR_REF (dra
), TDF_SLIM
);
876 fprintf (vect_dump
, " and ");
877 print_generic_expr (vect_dump
, DR_REF (drb
), TDF_SLIM
);
885 /* Function vect_analyze_data_ref_dependence.
887 Return TRUE if there (might) exist a dependence between a memory-reference
888 DRA and a memory-reference DRB. */
891 vect_analyze_data_ref_dependence (struct data_dependence_relation
*ddr
,
892 loop_vec_info loop_vinfo
)
895 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
896 int vectorization_factor
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
897 struct data_reference
*dra
= DDR_A (ddr
);
898 struct data_reference
*drb
= DDR_B (ddr
);
899 stmt_vec_info stmtinfo_a
= vinfo_for_stmt (DR_STMT (dra
));
900 stmt_vec_info stmtinfo_b
= vinfo_for_stmt (DR_STMT (drb
));
901 int dra_size
= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dra
))));
902 int drb_size
= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (drb
))));
903 lambda_vector dist_v
;
904 unsigned int loop_depth
;
906 if (DDR_ARE_DEPENDENT (ddr
) == chrec_known
)
908 /* Independent data accesses. */
909 vect_check_interleaving (dra
, drb
);
913 if ((DR_IS_READ (dra
) && DR_IS_READ (drb
)) || dra
== drb
)
916 if (DDR_ARE_DEPENDENT (ddr
) == chrec_dont_know
)
918 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
921 "not vectorized: can't determine dependence between ");
922 print_generic_expr (vect_dump
, DR_REF (dra
), TDF_SLIM
);
923 fprintf (vect_dump
, " and ");
924 print_generic_expr (vect_dump
, DR_REF (drb
), TDF_SLIM
);
929 if (DDR_NUM_DIST_VECTS (ddr
) == 0)
931 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
933 fprintf (vect_dump
, "not vectorized: bad dist vector for ");
934 print_generic_expr (vect_dump
, DR_REF (dra
), TDF_SLIM
);
935 fprintf (vect_dump
, " and ");
936 print_generic_expr (vect_dump
, DR_REF (drb
), TDF_SLIM
);
941 loop_depth
= index_in_loop_nest (loop
->num
, DDR_LOOP_NEST (ddr
));
942 for (i
= 0; VEC_iterate (lambda_vector
, DDR_DIST_VECTS (ddr
), i
, dist_v
); i
++)
944 int dist
= dist_v
[loop_depth
];
946 if (vect_print_dump_info (REPORT_DR_DETAILS
))
947 fprintf (vect_dump
, "dependence distance = %d.", dist
);
949 /* Same loop iteration. */
950 if (dist
% vectorization_factor
== 0 && dra_size
== drb_size
)
952 /* Two references with distance zero have the same alignment. */
953 VEC_safe_push (dr_p
, heap
, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_a
), drb
);
954 VEC_safe_push (dr_p
, heap
, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_b
), dra
);
955 if (vect_print_dump_info (REPORT_ALIGNMENT
))
956 fprintf (vect_dump
, "accesses have the same alignment.");
957 if (vect_print_dump_info (REPORT_DR_DETAILS
))
959 fprintf (vect_dump
, "dependence distance modulo vf == 0 between ");
960 print_generic_expr (vect_dump
, DR_REF (dra
), TDF_SLIM
);
961 fprintf (vect_dump
, " and ");
962 print_generic_expr (vect_dump
, DR_REF (drb
), TDF_SLIM
);
965 /* For interleaving, mark that there is a read-write dependency if
966 necessary. We check before that one of the data-refs is store. */
967 if (DR_IS_READ (dra
))
968 DR_GROUP_READ_WRITE_DEPENDENCE (stmtinfo_a
) = true;
971 if (DR_IS_READ (drb
))
972 DR_GROUP_READ_WRITE_DEPENDENCE (stmtinfo_b
) = true;
978 if (abs (dist
) >= vectorization_factor
)
980 /* Dependence distance does not create dependence, as far as vectorization
981 is concerned, in this case. */
982 if (vect_print_dump_info (REPORT_DR_DETAILS
))
983 fprintf (vect_dump
, "dependence distance >= VF.");
987 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
990 "not vectorized: possible dependence between data-refs ");
991 print_generic_expr (vect_dump
, DR_REF (dra
), TDF_SLIM
);
992 fprintf (vect_dump
, " and ");
993 print_generic_expr (vect_dump
, DR_REF (drb
), TDF_SLIM
);
1003 /* Function vect_analyze_data_ref_dependences.
1005 Examine all the data references in the loop, and make sure there do not
1006 exist any data dependences between them. */
1009 vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo
)
1012 VEC (ddr_p
, heap
) *ddrs
= LOOP_VINFO_DDRS (loop_vinfo
);
1013 struct data_dependence_relation
*ddr
;
1015 if (vect_print_dump_info (REPORT_DETAILS
))
1016 fprintf (vect_dump
, "=== vect_analyze_dependences ===");
1018 for (i
= 0; VEC_iterate (ddr_p
, ddrs
, i
, ddr
); i
++)
1019 if (vect_analyze_data_ref_dependence (ddr
, loop_vinfo
))
1026 /* Function vect_compute_data_ref_alignment
1028 Compute the misalignment of the data reference DR.
1031 1. If during the misalignment computation it is found that the data reference
1032 cannot be vectorized then false is returned.
1033 2. DR_MISALIGNMENT (DR) is defined.
1035 FOR NOW: No analysis is actually performed. Misalignment is calculated
1036 only for trivial cases. TODO. */
1039 vect_compute_data_ref_alignment (struct data_reference
*dr
)
1041 tree stmt
= DR_STMT (dr
);
1042 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1043 tree ref
= DR_REF (dr
);
1045 tree base
, base_addr
;
1048 tree aligned_to
, alignment
;
1050 if (vect_print_dump_info (REPORT_DETAILS
))
1051 fprintf (vect_dump
, "vect_compute_data_ref_alignment:");
1053 /* Initialize misalignment to unknown. */
1054 DR_MISALIGNMENT (dr
) = -1;
1056 misalign
= DR_OFFSET_MISALIGNMENT (dr
);
1057 aligned_to
= DR_ALIGNED_TO (dr
);
1058 base_addr
= DR_BASE_ADDRESS (dr
);
1059 base
= build_fold_indirect_ref (base_addr
);
1060 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1061 alignment
= ssize_int (TYPE_ALIGN (vectype
)/BITS_PER_UNIT
);
1063 if ((aligned_to
&& tree_int_cst_compare (aligned_to
, alignment
) < 0)
1066 if (vect_print_dump_info (REPORT_DETAILS
))
1068 fprintf (vect_dump
, "Unknown alignment for access: ");
1069 print_generic_expr (vect_dump
, base
, TDF_SLIM
);
1075 && tree_int_cst_compare (ssize_int (DECL_ALIGN_UNIT (base
)),
1077 || (TREE_CODE (base_addr
) == SSA_NAME
1078 && tree_int_cst_compare (ssize_int (TYPE_ALIGN_UNIT (TREE_TYPE (
1079 TREE_TYPE (base_addr
)))),
1081 base_aligned
= true;
1083 base_aligned
= false;
1087 /* Do not change the alignment of global variables if
1088 flag_section_anchors is enabled. */
1089 if (!vect_can_force_dr_alignment_p (base
, TYPE_ALIGN (vectype
))
1090 || (TREE_STATIC (base
) && flag_section_anchors
))
1092 if (vect_print_dump_info (REPORT_DETAILS
))
1094 fprintf (vect_dump
, "can't force alignment of ref: ");
1095 print_generic_expr (vect_dump
, ref
, TDF_SLIM
);
1100 /* Force the alignment of the decl.
1101 NOTE: This is the only change to the code we make during
1102 the analysis phase, before deciding to vectorize the loop. */
1103 if (vect_print_dump_info (REPORT_DETAILS
))
1104 fprintf (vect_dump
, "force alignment");
1105 DECL_ALIGN (base
) = TYPE_ALIGN (vectype
);
1106 DECL_USER_ALIGN (base
) = 1;
1109 /* At this point we assume that the base is aligned. */
1110 gcc_assert (base_aligned
1111 || (TREE_CODE (base
) == VAR_DECL
1112 && DECL_ALIGN (base
) >= TYPE_ALIGN (vectype
)));
1114 /* Modulo alignment. */
1115 misalign
= size_binop (TRUNC_MOD_EXPR
, misalign
, alignment
);
1117 if (!host_integerp (misalign
, 1))
1119 /* Negative or overflowed misalignment value. */
1120 if (vect_print_dump_info (REPORT_DETAILS
))
1121 fprintf (vect_dump
, "unexpected misalign value");
1125 DR_MISALIGNMENT (dr
) = TREE_INT_CST_LOW (misalign
);
1127 if (vect_print_dump_info (REPORT_DETAILS
))
1129 fprintf (vect_dump
, "misalign = %d bytes of ref ", DR_MISALIGNMENT (dr
));
1130 print_generic_expr (vect_dump
, ref
, TDF_SLIM
);
1137 /* Function vect_compute_data_refs_alignment
1139 Compute the misalignment of data references in the loop.
1140 Return FALSE if a data reference is found that cannot be vectorized. */
1143 vect_compute_data_refs_alignment (loop_vec_info loop_vinfo
)
1145 VEC (data_reference_p
, heap
) *datarefs
= LOOP_VINFO_DATAREFS (loop_vinfo
);
1146 struct data_reference
*dr
;
1149 for (i
= 0; VEC_iterate (data_reference_p
, datarefs
, i
, dr
); i
++)
1150 if (!vect_compute_data_ref_alignment (dr
))
1157 /* Function vect_update_misalignment_for_peel
1159 DR - the data reference whose misalignment is to be adjusted.
1160 DR_PEEL - the data reference whose misalignment is being made
1161 zero in the vector loop by the peel.
1162 NPEEL - the number of iterations in the peel loop if the misalignment
1163 of DR_PEEL is known at compile time. */
1166 vect_update_misalignment_for_peel (struct data_reference
*dr
,
1167 struct data_reference
*dr_peel
, int npeel
)
1170 VEC(dr_p
,heap
) *same_align_drs
;
1171 struct data_reference
*current_dr
;
1172 int dr_size
= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr
))));
1173 int dr_peel_size
= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr_peel
))));
1174 stmt_vec_info stmt_info
= vinfo_for_stmt (DR_STMT (dr
));
1175 stmt_vec_info peel_stmt_info
= vinfo_for_stmt (DR_STMT (dr_peel
));
1177 /* For interleaved data accesses the step in the loop must be multiplied by
1178 the size of the interleaving group. */
1179 if (DR_GROUP_FIRST_DR (stmt_info
))
1180 dr_size
*= DR_GROUP_SIZE (vinfo_for_stmt (DR_GROUP_FIRST_DR (stmt_info
)));
1181 if (DR_GROUP_FIRST_DR (peel_stmt_info
))
1182 dr_peel_size
*= DR_GROUP_SIZE (peel_stmt_info
);
1184 if (known_alignment_for_access_p (dr
)
1185 && known_alignment_for_access_p (dr_peel
)
1186 && (DR_MISALIGNMENT (dr
) / dr_size
==
1187 DR_MISALIGNMENT (dr_peel
) / dr_peel_size
))
1189 DR_MISALIGNMENT (dr
) = 0;
1193 /* It can be assumed that the data refs with the same alignment as dr_peel
1194 are aligned in the vector loop. */
1196 = STMT_VINFO_SAME_ALIGN_REFS (vinfo_for_stmt (DR_STMT (dr_peel
)));
1197 for (i
= 0; VEC_iterate (dr_p
, same_align_drs
, i
, current_dr
); i
++)
1199 if (current_dr
!= dr
)
1201 gcc_assert (DR_MISALIGNMENT (dr
) / dr_size
==
1202 DR_MISALIGNMENT (dr_peel
) / dr_peel_size
);
1203 DR_MISALIGNMENT (dr
) = 0;
1207 if (known_alignment_for_access_p (dr
)
1208 && known_alignment_for_access_p (dr_peel
))
1210 DR_MISALIGNMENT (dr
) += npeel
* dr_size
;
1211 DR_MISALIGNMENT (dr
) %= UNITS_PER_SIMD_WORD
;
1215 if (vect_print_dump_info (REPORT_DETAILS
))
1216 fprintf (vect_dump
, "Setting misalignment to -1.");
1217 DR_MISALIGNMENT (dr
) = -1;
1221 /* Function vect_verify_datarefs_alignment
1223 Return TRUE if all data references in the loop can be
1224 handled with respect to alignment. */
1227 vect_verify_datarefs_alignment (loop_vec_info loop_vinfo
)
1229 VEC (data_reference_p
, heap
) *datarefs
= LOOP_VINFO_DATAREFS (loop_vinfo
);
1230 struct data_reference
*dr
;
1231 enum dr_alignment_support supportable_dr_alignment
;
1234 for (i
= 0; VEC_iterate (data_reference_p
, datarefs
, i
, dr
); i
++)
1236 tree stmt
= DR_STMT (dr
);
1237 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1239 /* For interleaving, only the alignment of the first access matters. */
1240 if (DR_GROUP_FIRST_DR (stmt_info
)
1241 && DR_GROUP_FIRST_DR (stmt_info
) != stmt
)
1244 supportable_dr_alignment
= vect_supportable_dr_alignment (dr
);
1245 if (!supportable_dr_alignment
)
1247 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
1249 if (DR_IS_READ (dr
))
1251 "not vectorized: unsupported unaligned load.");
1254 "not vectorized: unsupported unaligned store.");
1258 if (supportable_dr_alignment
!= dr_aligned
1259 && vect_print_dump_info (REPORT_ALIGNMENT
))
1260 fprintf (vect_dump
, "Vectorizing an unaligned access.");
1266 /* Function vect_enhance_data_refs_alignment
1268 This pass will use loop versioning and loop peeling in order to enhance
1269 the alignment of data references in the loop.
1271 FOR NOW: we assume that whatever versioning/peeling takes place, only the
1272 original loop is to be vectorized; Any other loops that are created by
1273 the transformations performed in this pass - are not supposed to be
1274 vectorized. This restriction will be relaxed.
1276 This pass will require a cost model to guide it whether to apply peeling
1277 or versioning or a combination of the two. For example, the scheme that
1278 intel uses when given a loop with several memory accesses, is as follows:
1279 choose one memory access ('p') which alignment you want to force by doing
1280 peeling. Then, either (1) generate a loop in which 'p' is aligned and all
1281 other accesses are not necessarily aligned, or (2) use loop versioning to
1282 generate one loop in which all accesses are aligned, and another loop in
1283 which only 'p' is necessarily aligned.
1285 ("Automatic Intra-Register Vectorization for the Intel Architecture",
1286 Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
1287 Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
1289 Devising a cost model is the most critical aspect of this work. It will
1290 guide us on which access to peel for, whether to use loop versioning, how
1291 many versions to create, etc. The cost model will probably consist of
1292 generic considerations as well as target specific considerations (on
1293 powerpc for example, misaligned stores are more painful than misaligned
1296 Here are the general steps involved in alignment enhancements:
1298 -- original loop, before alignment analysis:
1299 for (i=0; i<N; i++){
1300 x = q[i]; # DR_MISALIGNMENT(q) = unknown
1301 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1304 -- After vect_compute_data_refs_alignment:
1305 for (i=0; i<N; i++){
1306 x = q[i]; # DR_MISALIGNMENT(q) = 3
1307 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1310 -- Possibility 1: we do loop versioning:
1312 for (i=0; i<N; i++){ # loop 1A
1313 x = q[i]; # DR_MISALIGNMENT(q) = 3
1314 p[i] = y; # DR_MISALIGNMENT(p) = 0
1318 for (i=0; i<N; i++){ # loop 1B
1319 x = q[i]; # DR_MISALIGNMENT(q) = 3
1320 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1324 -- Possibility 2: we do loop peeling:
1325 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1329 for (i = 3; i < N; i++){ # loop 2A
1330 x = q[i]; # DR_MISALIGNMENT(q) = 0
1331 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1334 -- Possibility 3: combination of loop peeling and versioning:
1335 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1340 for (i = 3; i<N; i++){ # loop 3A
1341 x = q[i]; # DR_MISALIGNMENT(q) = 0
1342 p[i] = y; # DR_MISALIGNMENT(p) = 0
1346 for (i = 3; i<N; i++){ # loop 3B
1347 x = q[i]; # DR_MISALIGNMENT(q) = 0
1348 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1352 These loops are later passed to loop_transform to be vectorized. The
1353 vectorizer will use the alignment information to guide the transformation
1354 (whether to generate regular loads/stores, or with special handling for
1358 vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo
)
1360 VEC (data_reference_p
, heap
) *datarefs
= LOOP_VINFO_DATAREFS (loop_vinfo
);
1361 enum dr_alignment_support supportable_dr_alignment
;
1362 struct data_reference
*dr0
= NULL
;
1363 struct data_reference
*dr
;
1365 bool do_peeling
= false;
1366 bool do_versioning
= false;
1369 stmt_vec_info stmt_info
;
1371 if (vect_print_dump_info (REPORT_DETAILS
))
1372 fprintf (vect_dump
, "=== vect_enhance_data_refs_alignment ===");
1374 /* While cost model enhancements are expected in the future, the high level
1375 view of the code at this time is as follows:
1377 A) If there is a misaligned write then see if peeling to align this write
1378 can make all data references satisfy vect_supportable_dr_alignment.
1379 If so, update data structures as needed and return true. Note that
1380 at this time vect_supportable_dr_alignment is known to return false
1381 for a a misaligned write.
1383 B) If peeling wasn't possible and there is a data reference with an
1384 unknown misalignment that does not satisfy vect_supportable_dr_alignment
1385 then see if loop versioning checks can be used to make all data
1386 references satisfy vect_supportable_dr_alignment. If so, update
1387 data structures as needed and return true.
1389 C) If neither peeling nor versioning were successful then return false if
1390 any data reference does not satisfy vect_supportable_dr_alignment.
1392 D) Return true (all data references satisfy vect_supportable_dr_alignment).
1394 Note, Possibility 3 above (which is peeling and versioning together) is not
1395 being done at this time. */
1397 /* (1) Peeling to force alignment. */
1399 /* (1.1) Decide whether to perform peeling, and how many iterations to peel:
1401 + How many accesses will become aligned due to the peeling
1402 - How many accesses will become unaligned due to the peeling,
1403 and the cost of misaligned accesses.
1404 - The cost of peeling (the extra runtime checks, the increase
1407 The scheme we use FORNOW: peel to force the alignment of the first
1408 misaligned store in the loop.
1409 Rationale: misaligned stores are not yet supported.
1411 TODO: Use a cost model. */
1413 for (i
= 0; VEC_iterate (data_reference_p
, datarefs
, i
, dr
); i
++)
1415 stmt
= DR_STMT (dr
);
1416 stmt_info
= vinfo_for_stmt (stmt
);
1418 /* For interleaving, only the alignment of the first access
1420 if (DR_GROUP_FIRST_DR (stmt_info
)
1421 && DR_GROUP_FIRST_DR (stmt_info
) != stmt
)
1424 if (!DR_IS_READ (dr
) && !aligned_access_p (dr
))
1426 if (DR_GROUP_FIRST_DR (stmt_info
))
1428 /* For interleaved access we peel only if number of iterations in
1429 the prolog loop ({VF - misalignment}), is a multiple of the
1430 number of the interleaved accesses. */
1431 int elem_size
, mis_in_elements
;
1432 int vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
1434 /* FORNOW: handle only known alignment. */
1435 if (!known_alignment_for_access_p (dr
))
1441 elem_size
= UNITS_PER_SIMD_WORD
/ vf
;
1442 mis_in_elements
= DR_MISALIGNMENT (dr
) / elem_size
;
1444 if ((vf
- mis_in_elements
) % DR_GROUP_SIZE (stmt_info
))
1456 /* Often peeling for alignment will require peeling for loop-bound, which in
1457 turn requires that we know how to adjust the loop ivs after the loop. */
1458 if (!vect_can_advance_ivs_p (loop_vinfo
))
1466 if (known_alignment_for_access_p (dr0
))
1468 /* Since it's known at compile time, compute the number of iterations
1469 in the peeled loop (the peeling factor) for use in updating
1470 DR_MISALIGNMENT values. The peeling factor is the vectorization
1471 factor minus the misalignment as an element count. */
1472 mis
= DR_MISALIGNMENT (dr0
);
1473 mis
/= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr0
))));
1474 npeel
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) - mis
;
1476 /* For interleaved data access every iteration accesses all the
1477 members of the group, therefore we divide the number of iterations
1478 by the group size. */
1479 stmt_info
= vinfo_for_stmt (DR_STMT (dr0
));
1480 if (DR_GROUP_FIRST_DR (stmt_info
))
1481 npeel
/= DR_GROUP_SIZE (stmt_info
);
1483 if (vect_print_dump_info (REPORT_DETAILS
))
1484 fprintf (vect_dump
, "Try peeling by %d", npeel
);
1487 /* Ensure that all data refs can be vectorized after the peel. */
1488 for (i
= 0; VEC_iterate (data_reference_p
, datarefs
, i
, dr
); i
++)
1490 int save_misalignment
;
1495 stmt
= DR_STMT (dr
);
1496 stmt_info
= vinfo_for_stmt (stmt
);
1497 /* For interleaving, only the alignment of the first access
1499 if (DR_GROUP_FIRST_DR (stmt_info
)
1500 && DR_GROUP_FIRST_DR (stmt_info
) != stmt
)
1503 save_misalignment
= DR_MISALIGNMENT (dr
);
1504 vect_update_misalignment_for_peel (dr
, dr0
, npeel
);
1505 supportable_dr_alignment
= vect_supportable_dr_alignment (dr
);
1506 DR_MISALIGNMENT (dr
) = save_misalignment
;
1508 if (!supportable_dr_alignment
)
1517 /* (1.2) Update the DR_MISALIGNMENT of each data reference DR_i.
1518 If the misalignment of DR_i is identical to that of dr0 then set
1519 DR_MISALIGNMENT (DR_i) to zero. If the misalignment of DR_i and
1520 dr0 are known at compile time then increment DR_MISALIGNMENT (DR_i)
1521 by the peeling factor times the element size of DR_i (MOD the
1522 vectorization factor times the size). Otherwise, the
1523 misalignment of DR_i must be set to unknown. */
1524 for (i
= 0; VEC_iterate (data_reference_p
, datarefs
, i
, dr
); i
++)
1526 vect_update_misalignment_for_peel (dr
, dr0
, npeel
);
1528 LOOP_VINFO_UNALIGNED_DR (loop_vinfo
) = dr0
;
1529 LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo
) = DR_MISALIGNMENT (dr0
);
1530 DR_MISALIGNMENT (dr0
) = 0;
1531 if (vect_print_dump_info (REPORT_ALIGNMENT
))
1532 fprintf (vect_dump
, "Alignment of access forced using peeling.");
1534 if (vect_print_dump_info (REPORT_DETAILS
))
1535 fprintf (vect_dump
, "Peeling for alignment will be applied.");
1537 stat
= vect_verify_datarefs_alignment (loop_vinfo
);
1544 /* (2) Versioning to force alignment. */
1546 /* Try versioning if:
1547 1) flag_tree_vect_loop_version is TRUE
1548 2) optimize_size is FALSE
1549 3) there is at least one unsupported misaligned data ref with an unknown
1551 4) all misaligned data refs with a known misalignment are supported, and
1552 5) the number of runtime alignment checks is within reason. */
1554 do_versioning
= flag_tree_vect_loop_version
&& (!optimize_size
);
1558 for (i
= 0; VEC_iterate (data_reference_p
, datarefs
, i
, dr
); i
++)
1560 stmt
= DR_STMT (dr
);
1561 stmt_info
= vinfo_for_stmt (stmt
);
1563 /* For interleaving, only the alignment of the first access
1565 if (aligned_access_p (dr
)
1566 || (DR_GROUP_FIRST_DR (stmt_info
)
1567 && DR_GROUP_FIRST_DR (stmt_info
) != stmt
))
1570 supportable_dr_alignment
= vect_supportable_dr_alignment (dr
);
1572 if (!supportable_dr_alignment
)
1578 if (known_alignment_for_access_p (dr
)
1579 || VEC_length (tree
,
1580 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo
))
1581 >= (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_CHECKS
))
1583 do_versioning
= false;
1587 stmt
= DR_STMT (dr
);
1588 vectype
= STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt
));
1589 gcc_assert (vectype
);
1591 /* The rightmost bits of an aligned address must be zeros.
1592 Construct the mask needed for this test. For example,
1593 GET_MODE_SIZE for the vector mode V4SI is 16 bytes so the
1594 mask must be 15 = 0xf. */
1595 mask
= GET_MODE_SIZE (TYPE_MODE (vectype
)) - 1;
1597 /* FORNOW: use the same mask to test all potentially unaligned
1598 references in the loop. The vectorizer currently supports
1599 a single vector size, see the reference to
1600 GET_MODE_NUNITS (TYPE_MODE (vectype)) where the
1601 vectorization factor is computed. */
1602 gcc_assert (!LOOP_VINFO_PTR_MASK (loop_vinfo
)
1603 || LOOP_VINFO_PTR_MASK (loop_vinfo
) == mask
);
1604 LOOP_VINFO_PTR_MASK (loop_vinfo
) = mask
;
1605 VEC_safe_push (tree
, heap
,
1606 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo
),
1611 /* Versioning requires at least one misaligned data reference. */
1612 if (VEC_length (tree
, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo
)) == 0)
1613 do_versioning
= false;
1614 else if (!do_versioning
)
1615 VEC_truncate (tree
, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo
), 0);
1620 VEC(tree
,heap
) *may_misalign_stmts
1621 = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo
);
1624 /* It can now be assumed that the data references in the statements
1625 in LOOP_VINFO_MAY_MISALIGN_STMTS will be aligned in the version
1626 of the loop being vectorized. */
1627 for (i
= 0; VEC_iterate (tree
, may_misalign_stmts
, i
, stmt
); i
++)
1629 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1630 dr
= STMT_VINFO_DATA_REF (stmt_info
);
1631 DR_MISALIGNMENT (dr
) = 0;
1632 if (vect_print_dump_info (REPORT_ALIGNMENT
))
1633 fprintf (vect_dump
, "Alignment of access forced using versioning.");
1636 if (vect_print_dump_info (REPORT_DETAILS
))
1637 fprintf (vect_dump
, "Versioning for alignment will be applied.");
1639 /* Peeling and versioning can't be done together at this time. */
1640 gcc_assert (! (do_peeling
&& do_versioning
));
1642 stat
= vect_verify_datarefs_alignment (loop_vinfo
);
1647 /* This point is reached if neither peeling nor versioning is being done. */
1648 gcc_assert (! (do_peeling
|| do_versioning
));
1650 stat
= vect_verify_datarefs_alignment (loop_vinfo
);
1655 /* Function vect_analyze_data_refs_alignment
1657 Analyze the alignment of the data-references in the loop.
1658 Return FALSE if a data reference is found that cannot be vectorized. */
1661 vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo
)
1663 if (vect_print_dump_info (REPORT_DETAILS
))
1664 fprintf (vect_dump
, "=== vect_analyze_data_refs_alignment ===");
1666 if (!vect_compute_data_refs_alignment (loop_vinfo
))
1668 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
1670 "not vectorized: can't calculate alignment for data ref.");
1678 /* Function vect_analyze_data_ref_access.
1680 Analyze the access pattern of the data-reference DR. For now, a data access
1681 has to be consecutive to be considered vectorizable. */
1684 vect_analyze_data_ref_access (struct data_reference
*dr
)
1686 tree step
= DR_STEP (dr
);
1687 HOST_WIDE_INT dr_step
= TREE_INT_CST_LOW (step
);
1688 tree scalar_type
= TREE_TYPE (DR_REF (dr
));
1689 HOST_WIDE_INT type_size
= TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type
));
1690 tree stmt
= DR_STMT (dr
);
1691 /* For interleaving, STRIDE is STEP counted in elements, i.e., the size of the
1692 interleaving group (including gaps). */
1693 HOST_WIDE_INT stride
= dr_step
/ type_size
;
1697 if (vect_print_dump_info (REPORT_DETAILS
))
1698 fprintf (vect_dump
, "bad data-ref access");
1703 if (!tree_int_cst_compare (step
, TYPE_SIZE_UNIT (scalar_type
)))
1705 /* Mark that it is not interleaving. */
1706 DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt
)) = NULL_TREE
;
1710 /* Not consecutive access is possible only if it is a part of interleaving. */
1711 if (!DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt
)))
1713 /* Check if it this DR is a part of interleaving, and is a single
1714 element of the group that is accessed in the loop. */
1716 /* Gaps are supported only for loads. STEP must be a multiple of the type
1717 size. The size of the group must be a power of 2. */
1719 && (dr_step
% type_size
) == 0
1721 && exact_log2 (stride
) != -1)
1723 DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt
)) = stmt
;
1724 DR_GROUP_SIZE (vinfo_for_stmt (stmt
)) = stride
;
1725 if (vect_print_dump_info (REPORT_DR_DETAILS
))
1727 fprintf (vect_dump
, "Detected single element interleaving %d ",
1728 DR_GROUP_SIZE (vinfo_for_stmt (stmt
)));
1729 print_generic_expr (vect_dump
, DR_REF (dr
), TDF_SLIM
);
1730 fprintf (vect_dump
, " step ");
1731 print_generic_expr (vect_dump
, step
, TDF_SLIM
);
1735 if (vect_print_dump_info (REPORT_DETAILS
))
1736 fprintf (vect_dump
, "not consecutive access");
1740 if (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt
)) == stmt
)
1742 /* First stmt in the interleaving chain. Check the chain. */
1743 tree next
= DR_GROUP_NEXT_DR (vinfo_for_stmt (stmt
));
1744 struct data_reference
*data_ref
= dr
;
1745 unsigned int count
= 1;
1747 tree prev_init
= DR_INIT (data_ref
);
1749 HOST_WIDE_INT diff
, count_in_bytes
;
1753 /* Skip same data-refs. In case that two or more stmts share data-ref
1754 (supported only for loads), we vectorize only the first stmt, and
1755 the rest get their vectorized loads from the the first one. */
1756 if (!tree_int_cst_compare (DR_INIT (data_ref
),
1757 DR_INIT (STMT_VINFO_DATA_REF (
1758 vinfo_for_stmt (next
)))))
1760 if (!DR_IS_READ (data_ref
))
1762 if (vect_print_dump_info (REPORT_DETAILS
))
1763 fprintf (vect_dump
, "Two store stmts share the same dr.");
1767 /* Check that there is no load-store dependencies for this loads
1768 to prevent a case of load-store-load to the same location. */
1769 if (DR_GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (next
))
1770 || DR_GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (prev
)))
1772 if (vect_print_dump_info (REPORT_DETAILS
))
1774 "READ_WRITE dependence in interleaving.");
1778 /* For load use the same data-ref load. */
1779 DR_GROUP_SAME_DR_STMT (vinfo_for_stmt (next
)) = prev
;
1782 next
= DR_GROUP_NEXT_DR (vinfo_for_stmt (next
));
1787 /* Check that all the accesses have the same STEP. */
1788 next_step
= DR_STEP (STMT_VINFO_DATA_REF (vinfo_for_stmt (next
)));
1789 if (tree_int_cst_compare (step
, next_step
))
1791 if (vect_print_dump_info (REPORT_DETAILS
))
1792 fprintf (vect_dump
, "not consecutive access in interleaving");
1796 data_ref
= STMT_VINFO_DATA_REF (vinfo_for_stmt (next
));
1797 /* Check that the distance between two accesses is equal to the type
1798 size. Otherwise, we have gaps. */
1799 diff
= (TREE_INT_CST_LOW (DR_INIT (data_ref
))
1800 - TREE_INT_CST_LOW (prev_init
)) / type_size
;
1801 if (!DR_IS_READ (data_ref
) && diff
!= 1)
1803 if (vect_print_dump_info (REPORT_DETAILS
))
1804 fprintf (vect_dump
, "interleaved store with gaps");
1807 /* Store the gap from the previous member of the group. If there is no
1808 gap in the access, DR_GROUP_GAP is always 1. */
1809 DR_GROUP_GAP (vinfo_for_stmt (next
)) = diff
;
1811 prev_init
= DR_INIT (data_ref
);
1812 next
= DR_GROUP_NEXT_DR (vinfo_for_stmt (next
));
1813 /* Count the number of data-refs in the chain. */
1817 /* COUNT is the number of accesses found, we multiply it by the size of
1818 the type to get COUNT_IN_BYTES. */
1819 count_in_bytes
= type_size
* count
;
1821 /* Check that the size of the interleaving is not greater than STEP. */
1822 if (dr_step
< count_in_bytes
)
1824 if (vect_print_dump_info (REPORT_DETAILS
))
1826 fprintf (vect_dump
, "interleaving size is greater than step for ");
1827 print_generic_expr (vect_dump
, DR_REF (dr
), TDF_SLIM
);
1832 /* Check that the size of the interleaving is equal to STEP for stores,
1833 i.e., that there are no gaps. */
1834 if (!DR_IS_READ (dr
) && dr_step
!= count_in_bytes
)
1836 if (vect_print_dump_info (REPORT_DETAILS
))
1837 fprintf (vect_dump
, "interleaved store with gaps");
1841 /* Check that STEP is a multiple of type size. */
1842 if ((dr_step
% type_size
) != 0)
1844 if (vect_print_dump_info (REPORT_DETAILS
))
1846 fprintf (vect_dump
, "step is not a multiple of type size: step ");
1847 print_generic_expr (vect_dump
, step
, TDF_SLIM
);
1848 fprintf (vect_dump
, " size ");
1849 print_generic_expr (vect_dump
, TYPE_SIZE_UNIT (scalar_type
),
1855 /* FORNOW: we handle only interleaving that is a power of 2. */
1856 if (exact_log2 (stride
) == -1)
1858 if (vect_print_dump_info (REPORT_DETAILS
))
1859 fprintf (vect_dump
, "interleaving is not a power of 2");
1862 DR_GROUP_SIZE (vinfo_for_stmt (stmt
)) = stride
;
1868 /* Function vect_analyze_data_ref_accesses.
1870 Analyze the access pattern of all the data references in the loop.
1872 FORNOW: the only access pattern that is considered vectorizable is a
1873 simple step 1 (consecutive) access.
1875 FORNOW: handle only arrays and pointer accesses. */
1878 vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo
)
1881 VEC (data_reference_p
, heap
) *datarefs
= LOOP_VINFO_DATAREFS (loop_vinfo
);
1882 struct data_reference
*dr
;
1884 if (vect_print_dump_info (REPORT_DETAILS
))
1885 fprintf (vect_dump
, "=== vect_analyze_data_ref_accesses ===");
1887 for (i
= 0; VEC_iterate (data_reference_p
, datarefs
, i
, dr
); i
++)
1888 if (!vect_analyze_data_ref_access (dr
))
1890 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
1891 fprintf (vect_dump
, "not vectorized: complicated access pattern.");
1899 /* Function vect_analyze_data_refs.
1901 Find all the data references in the loop.
1903 The general structure of the analysis of data refs in the vectorizer is as
1905 1- vect_analyze_data_refs(loop): call compute_data_dependences_for_loop to
1906 find and analyze all data-refs in the loop and their dependences.
1907 2- vect_analyze_dependences(): apply dependence testing using ddrs.
1908 3- vect_analyze_drs_alignment(): check that ref_stmt.alignment is ok.
1909 4- vect_analyze_drs_access(): check that ref_stmt.step is ok.
1914 vect_analyze_data_refs (loop_vec_info loop_vinfo
)
1916 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1918 VEC (data_reference_p
, heap
) *datarefs
;
1919 struct data_reference
*dr
;
1922 if (vect_print_dump_info (REPORT_DETAILS
))
1923 fprintf (vect_dump
, "=== vect_analyze_data_refs ===\n");
1925 compute_data_dependences_for_loop (loop
, true,
1926 &LOOP_VINFO_DATAREFS (loop_vinfo
),
1927 &LOOP_VINFO_DDRS (loop_vinfo
));
1929 /* Go through the data-refs, check that the analysis succeeded. Update pointer
1930 from stmt_vec_info struct to DR and vectype. */
1931 datarefs
= LOOP_VINFO_DATAREFS (loop_vinfo
);
1933 for (i
= 0; VEC_iterate (data_reference_p
, datarefs
, i
, dr
); i
++)
1936 stmt_vec_info stmt_info
;
1938 if (!dr
|| !DR_REF (dr
))
1940 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
1941 fprintf (vect_dump
, "not vectorized: unhandled data-ref ");
1945 /* Update DR field in stmt_vec_info struct. */
1946 stmt
= DR_STMT (dr
);
1947 stmt_info
= vinfo_for_stmt (stmt
);
1949 if (STMT_VINFO_DATA_REF (stmt_info
))
1951 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
1954 "not vectorized: more than one data ref in stmt: ");
1955 print_generic_expr (vect_dump
, stmt
, TDF_SLIM
);
1959 STMT_VINFO_DATA_REF (stmt_info
) = dr
;
1961 /* Check that analysis of the data-ref succeeded. */
1962 if (!DR_BASE_ADDRESS (dr
) || !DR_OFFSET (dr
) || !DR_INIT (dr
)
1965 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
1967 fprintf (vect_dump
, "not vectorized: data ref analysis failed ");
1968 print_generic_expr (vect_dump
, stmt
, TDF_SLIM
);
1972 if (!DR_MEMTAG (dr
))
1974 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
1976 fprintf (vect_dump
, "not vectorized: no memory tag for ");
1977 print_generic_expr (vect_dump
, DR_REF (dr
), TDF_SLIM
);
1982 /* Set vectype for STMT. */
1983 scalar_type
= TREE_TYPE (DR_REF (dr
));
1984 STMT_VINFO_VECTYPE (stmt_info
) =
1985 get_vectype_for_scalar_type (scalar_type
);
1986 if (!STMT_VINFO_VECTYPE (stmt_info
))
1988 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
1991 "not vectorized: no vectype for stmt: ");
1992 print_generic_expr (vect_dump
, stmt
, TDF_SLIM
);
1993 fprintf (vect_dump
, " scalar_type: ");
1994 print_generic_expr (vect_dump
, scalar_type
, TDF_DETAILS
);
2004 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
2006 /* Function vect_mark_relevant.
2008 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
2011 vect_mark_relevant (VEC(tree
,heap
) **worklist
, tree stmt
,
2012 enum vect_relevant relevant
, bool live_p
)
2014 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2015 enum vect_relevant save_relevant
= STMT_VINFO_RELEVANT (stmt_info
);
2016 bool save_live_p
= STMT_VINFO_LIVE_P (stmt_info
);
2018 if (vect_print_dump_info (REPORT_DETAILS
))
2019 fprintf (vect_dump
, "mark relevant %d, live %d.", relevant
, live_p
);
2021 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
2025 /* This is the last stmt in a sequence that was detected as a
2026 pattern that can potentially be vectorized. Don't mark the stmt
2027 as relevant/live because it's not going to vectorized.
2028 Instead mark the pattern-stmt that replaces it. */
2029 if (vect_print_dump_info (REPORT_DETAILS
))
2030 fprintf (vect_dump
, "last stmt in pattern. don't mark relevant/live.");
2031 pattern_stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
2032 stmt_info
= vinfo_for_stmt (pattern_stmt
);
2033 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info
) == stmt
);
2034 save_relevant
= STMT_VINFO_RELEVANT (stmt_info
);
2035 save_live_p
= STMT_VINFO_LIVE_P (stmt_info
);
2036 stmt
= pattern_stmt
;
2039 STMT_VINFO_LIVE_P (stmt_info
) |= live_p
;
2040 if (relevant
> STMT_VINFO_RELEVANT (stmt_info
))
2041 STMT_VINFO_RELEVANT (stmt_info
) = relevant
;
2043 if (TREE_CODE (stmt
) == PHI_NODE
)
2044 /* Don't put phi-nodes in the worklist. Phis that are marked relevant
2045 or live will fail vectorization later on. */
2048 if (STMT_VINFO_RELEVANT (stmt_info
) == save_relevant
2049 && STMT_VINFO_LIVE_P (stmt_info
) == save_live_p
)
2051 if (vect_print_dump_info (REPORT_DETAILS
))
2052 fprintf (vect_dump
, "already marked relevant/live.");
2056 VEC_safe_push (tree
, heap
, *worklist
, stmt
);
2060 /* Function vect_stmt_relevant_p.
2062 Return true if STMT in loop that is represented by LOOP_VINFO is
2063 "relevant for vectorization".
2065 A stmt is considered "relevant for vectorization" if:
2066 - it has uses outside the loop.
2067 - it has vdefs (it alters memory).
2068 - control stmts in the loop (except for the exit condition).
2070 CHECKME: what other side effects would the vectorizer allow? */
2073 vect_stmt_relevant_p (tree stmt
, loop_vec_info loop_vinfo
,
2074 enum vect_relevant
*relevant
, bool *live_p
)
2076 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
2077 ssa_op_iter op_iter
;
2078 imm_use_iterator imm_iter
;
2079 use_operand_p use_p
;
2080 def_operand_p def_p
;
2082 *relevant
= vect_unused_in_loop
;
2085 /* cond stmt other than loop exit cond. */
2086 if (is_ctrl_stmt (stmt
) && (stmt
!= LOOP_VINFO_EXIT_COND (loop_vinfo
)))
2087 *relevant
= vect_used_in_loop
;
2089 /* changing memory. */
2090 if (TREE_CODE (stmt
) != PHI_NODE
)
2091 if (!ZERO_SSA_OPERANDS (stmt
, SSA_OP_VIRTUAL_DEFS
))
2093 if (vect_print_dump_info (REPORT_DETAILS
))
2094 fprintf (vect_dump
, "vec_stmt_relevant_p: stmt has vdefs.");
2095 *relevant
= vect_used_in_loop
;
2098 /* uses outside the loop. */
2099 FOR_EACH_PHI_OR_STMT_DEF (def_p
, stmt
, op_iter
, SSA_OP_DEF
)
2101 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, DEF_FROM_PTR (def_p
))
2103 basic_block bb
= bb_for_stmt (USE_STMT (use_p
));
2104 if (!flow_bb_inside_loop_p (loop
, bb
))
2106 if (vect_print_dump_info (REPORT_DETAILS
))
2107 fprintf (vect_dump
, "vec_stmt_relevant_p: used out of loop.");
2109 /* We expect all such uses to be in the loop exit phis
2110 (because of loop closed form) */
2111 gcc_assert (TREE_CODE (USE_STMT (use_p
)) == PHI_NODE
);
2112 gcc_assert (bb
== single_exit (loop
)->dest
);
2119 return (*live_p
|| *relevant
);
2123 /* Function vect_mark_stmts_to_be_vectorized.
2125 Not all stmts in the loop need to be vectorized. For example:
2134 Stmt 1 and 3 do not need to be vectorized, because loop control and
2135 addressing of vectorized data-refs are handled differently.
2137 This pass detects such stmts. */
2140 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo
)
2142 VEC(tree
,heap
) *worklist
;
2143 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
2144 basic_block
*bbs
= LOOP_VINFO_BBS (loop_vinfo
);
2145 unsigned int nbbs
= loop
->num_nodes
;
2146 block_stmt_iterator si
;
2151 stmt_vec_info stmt_vinfo
;
2155 enum vect_relevant relevant
;
2157 enum vect_def_type dt
;
2159 if (vect_print_dump_info (REPORT_DETAILS
))
2160 fprintf (vect_dump
, "=== vect_mark_stmts_to_be_vectorized ===");
2162 worklist
= VEC_alloc (tree
, heap
, 64);
2164 /* 1. Init worklist. */
2167 for (phi
= phi_nodes (bb
); phi
; phi
= PHI_CHAIN (phi
))
2169 if (vect_print_dump_info (REPORT_DETAILS
))
2171 fprintf (vect_dump
, "init: phi relevant? ");
2172 print_generic_expr (vect_dump
, phi
, TDF_SLIM
);
2175 if (vect_stmt_relevant_p (phi
, loop_vinfo
, &relevant
, &live_p
))
2176 vect_mark_relevant (&worklist
, phi
, relevant
, live_p
);
2179 for (i
= 0; i
< nbbs
; i
++)
2182 for (si
= bsi_start (bb
); !bsi_end_p (si
); bsi_next (&si
))
2184 stmt
= bsi_stmt (si
);
2186 if (vect_print_dump_info (REPORT_DETAILS
))
2188 fprintf (vect_dump
, "init: stmt relevant? ");
2189 print_generic_expr (vect_dump
, stmt
, TDF_SLIM
);
2192 if (vect_stmt_relevant_p (stmt
, loop_vinfo
, &relevant
, &live_p
))
2193 vect_mark_relevant (&worklist
, stmt
, relevant
, live_p
);
2198 /* 2. Process_worklist */
2200 while (VEC_length (tree
, worklist
) > 0)
2202 stmt
= VEC_pop (tree
, worklist
);
2204 if (vect_print_dump_info (REPORT_DETAILS
))
2206 fprintf (vect_dump
, "worklist: examine stmt: ");
2207 print_generic_expr (vect_dump
, stmt
, TDF_SLIM
);
2210 /* Examine the USEs of STMT. For each ssa-name USE that is defined
2211 in the loop, mark the stmt that defines it (DEF_STMT) as
2212 relevant/irrelevant and live/dead according to the liveness and
2213 relevance properties of STMT.
2216 gcc_assert (TREE_CODE (stmt
) != PHI_NODE
);
2218 ann
= stmt_ann (stmt
);
2219 stmt_vinfo
= vinfo_for_stmt (stmt
);
2221 relevant
= STMT_VINFO_RELEVANT (stmt_vinfo
);
2222 live_p
= STMT_VINFO_LIVE_P (stmt_vinfo
);
2224 /* Generally, the liveness and relevance properties of STMT are
2225 propagated to the DEF_STMTs of its USEs:
2226 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
2227 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
2232 If USE is used only for address computations (e.g. array indexing),
2233 which does not need to be directly vectorized, then the
2234 liveness/relevance of the respective DEF_STMT is left unchanged.
2237 If STMT has been identified as defining a reduction variable, then
2240 The last use of STMT is the reduction-variable, which is defined
2241 by a loop-header-phi. We don't want to mark the phi as live or
2242 relevant (because it does not need to be vectorized, it is handled
2243 as part of the vectorization of the reduction), so in this case we
2244 skip the call to vect_mark_relevant.
2246 The rest of the uses of STMT are defined in the loop body. For
2247 the def_stmt of these uses we want to set liveness/relevance
2249 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- false
2250 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- vect_used_by_reduction
2251 because even though STMT is classified as live (since it defines a
2252 value that is used across loop iterations) and irrelevant (since it
2253 is not used inside the loop), it will be vectorized, and therefore
2254 the corresponding DEF_STMTs need to marked as relevant.
2255 We distinguish between two kinds of relevant stmts - those that are
2256 used by a reduction computation, and those that are (also) used by
2257 a regular computation. This allows us later on to identify stmts
2258 that are used solely by a reduction, and therefore the order of
2259 the results that they produce does not have to be kept.
2263 if (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
)
2265 gcc_assert (relevant
== vect_unused_in_loop
&& live_p
);
2266 relevant
= vect_used_by_reduction
;
2271 FOR_EACH_SSA_TREE_OPERAND (use
, stmt
, iter
, SSA_OP_USE
)
2273 if (vect_print_dump_info (REPORT_DETAILS
))
2275 fprintf (vect_dump
, "worklist: examine use %d: ", i
++);
2276 print_generic_expr (vect_dump
, use
, TDF_SLIM
);
2279 /* case 1: we are only interested in uses that need to be vectorized.
2280 Uses that are used for address computation are not considered
2283 if (!exist_non_indexing_operands_for_use_p (use
, stmt
))
2286 if (!vect_is_simple_use (use
, loop_vinfo
, &def_stmt
, &def
, &dt
))
2288 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
2289 fprintf (vect_dump
, "not vectorized: unsupported use in stmt.");
2290 VEC_free (tree
, heap
, worklist
);
2294 if (!def_stmt
|| IS_EMPTY_STMT (def_stmt
))
2297 bb
= bb_for_stmt (def_stmt
);
2298 if (!flow_bb_inside_loop_p (loop
, bb
))
2301 /* case 2.1: the reduction-use does not mark the defining-phi
2303 if (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
2304 && TREE_CODE (def_stmt
) == PHI_NODE
)
2307 if (dt
== vect_induction_def
&& TREE_CODE (def_stmt
) == PHI_NODE
)
2310 vect_mark_relevant (&worklist
, def_stmt
, relevant
, live_p
);
2312 } /* while worklist */
2314 VEC_free (tree
, heap
, worklist
);
2319 /* Function vect_can_advance_ivs_p
2321 In case the number of iterations that LOOP iterates is unknown at compile
2322 time, an epilog loop will be generated, and the loop induction variables
2323 (IVs) will be "advanced" to the value they are supposed to take just before
2324 the epilog loop. Here we check that the access function of the loop IVs
2325 and the expression that represents the loop bound are simple enough.
2326 These restrictions will be relaxed in the future. */
2329 vect_can_advance_ivs_p (loop_vec_info loop_vinfo
)
2331 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
2332 basic_block bb
= loop
->header
;
2335 /* Analyze phi functions of the loop header. */
2337 if (vect_print_dump_info (REPORT_DETAILS
))
2338 fprintf (vect_dump
, "vect_can_advance_ivs_p:");
2340 for (phi
= phi_nodes (bb
); phi
; phi
= PHI_CHAIN (phi
))
2342 tree access_fn
= NULL
;
2343 tree evolution_part
;
2345 if (vect_print_dump_info (REPORT_DETAILS
))
2347 fprintf (vect_dump
, "Analyze phi: ");
2348 print_generic_expr (vect_dump
, phi
, TDF_SLIM
);
2351 /* Skip virtual phi's. The data dependences that are associated with
2352 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
2354 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi
))))
2356 if (vect_print_dump_info (REPORT_DETAILS
))
2357 fprintf (vect_dump
, "virtual phi. skip.");
2361 /* Skip reduction phis. */
2363 if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (phi
)) == vect_reduction_def
)
2365 if (vect_print_dump_info (REPORT_DETAILS
))
2366 fprintf (vect_dump
, "reduc phi. skip.");
2370 /* Analyze the evolution function. */
2372 access_fn
= instantiate_parameters
2373 (loop
, analyze_scalar_evolution (loop
, PHI_RESULT (phi
)));
2377 if (vect_print_dump_info (REPORT_DETAILS
))
2378 fprintf (vect_dump
, "No Access function.");
2382 if (vect_print_dump_info (REPORT_DETAILS
))
2384 fprintf (vect_dump
, "Access function of PHI: ");
2385 print_generic_expr (vect_dump
, access_fn
, TDF_SLIM
);
2388 evolution_part
= evolution_part_in_loop_num (access_fn
, loop
->num
);
2390 if (evolution_part
== NULL_TREE
)
2392 if (vect_print_dump_info (REPORT_DETAILS
))
2393 fprintf (vect_dump
, "No evolution.");
2397 /* FORNOW: We do not transform initial conditions of IVs
2398 which evolution functions are a polynomial of degree >= 2. */
2400 if (tree_is_chrec (evolution_part
))
2408 /* Function vect_get_loop_niters.
2410 Determine how many iterations the loop is executed.
2411 If an expression that represents the number of iterations
2412 can be constructed, place it in NUMBER_OF_ITERATIONS.
2413 Return the loop exit condition. */
2416 vect_get_loop_niters (struct loop
*loop
, tree
*number_of_iterations
)
2420 if (vect_print_dump_info (REPORT_DETAILS
))
2421 fprintf (vect_dump
, "=== get_loop_niters ===");
2423 niters
= number_of_exit_cond_executions (loop
);
2425 if (niters
!= NULL_TREE
2426 && niters
!= chrec_dont_know
)
2428 *number_of_iterations
= niters
;
2430 if (vect_print_dump_info (REPORT_DETAILS
))
2432 fprintf (vect_dump
, "==> get_loop_niters:" );
2433 print_generic_expr (vect_dump
, *number_of_iterations
, TDF_SLIM
);
2437 return get_loop_exit_condition (loop
);
2441 /* Function vect_analyze_loop_form.
2443 Verify the following restrictions (some may be relaxed in the future):
2444 - it's an inner-most loop
2445 - number of BBs = 2 (which are the loop header and the latch)
2446 - the loop has a pre-header
2447 - the loop has a single entry and exit
2448 - the loop exit condition is simple enough, and the number of iterations
2449 can be analyzed (a countable loop). */
2451 static loop_vec_info
2452 vect_analyze_loop_form (struct loop
*loop
)
2454 loop_vec_info loop_vinfo
;
2456 tree number_of_iterations
= NULL
;
2458 if (vect_print_dump_info (REPORT_DETAILS
))
2459 fprintf (vect_dump
, "=== vect_analyze_loop_form ===");
2463 if (vect_print_dump_info (REPORT_OUTER_LOOPS
))
2464 fprintf (vect_dump
, "not vectorized: nested loop.");
2468 if (!single_exit (loop
)
2469 || loop
->num_nodes
!= 2
2470 || EDGE_COUNT (loop
->header
->preds
) != 2)
2472 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS
))
2474 if (!single_exit (loop
))
2475 fprintf (vect_dump
, "not vectorized: multiple exits.");
2476 else if (loop
->num_nodes
!= 2)
2477 fprintf (vect_dump
, "not vectorized: too many BBs in loop.");
2478 else if (EDGE_COUNT (loop
->header
->preds
) != 2)
2479 fprintf (vect_dump
, "not vectorized: too many incoming edges.");
2485 /* We assume that the loop exit condition is at the end of the loop. i.e,
2486 that the loop is represented as a do-while (with a proper if-guard
2487 before the loop if needed), where the loop header contains all the
2488 executable statements, and the latch is empty. */
2489 if (!empty_block_p (loop
->latch
)
2490 || phi_nodes (loop
->latch
))
2492 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS
))
2493 fprintf (vect_dump
, "not vectorized: unexpected loop form.");
2497 /* Make sure there exists a single-predecessor exit bb: */
2498 if (!single_pred_p (single_exit (loop
)->dest
))
2500 edge e
= single_exit (loop
);
2501 if (!(e
->flags
& EDGE_ABNORMAL
))
2503 split_loop_exit_edge (e
);
2504 if (vect_print_dump_info (REPORT_DETAILS
))
2505 fprintf (vect_dump
, "split exit edge.");
2509 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS
))
2510 fprintf (vect_dump
, "not vectorized: abnormal loop exit edge.");
2515 if (empty_block_p (loop
->header
))
2517 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS
))
2518 fprintf (vect_dump
, "not vectorized: empty loop.");
2522 loop_cond
= vect_get_loop_niters (loop
, &number_of_iterations
);
2525 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS
))
2526 fprintf (vect_dump
, "not vectorized: complicated exit condition.");
2530 if (!number_of_iterations
)
2532 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS
))
2534 "not vectorized: number of iterations cannot be computed.");
2538 if (chrec_contains_undetermined (number_of_iterations
))
2540 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS
))
2541 fprintf (vect_dump
, "Infinite number of iterations.");
2545 loop_vinfo
= new_loop_vec_info (loop
);
2546 LOOP_VINFO_NITERS (loop_vinfo
) = number_of_iterations
;
2548 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo
))
2550 if (vect_print_dump_info (REPORT_DETAILS
))
2552 fprintf (vect_dump
, "Symbolic number of iterations is ");
2553 print_generic_expr (vect_dump
, number_of_iterations
, TDF_DETAILS
);
2557 if (LOOP_VINFO_INT_NITERS (loop_vinfo
) == 0)
2559 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
2560 fprintf (vect_dump
, "not vectorized: number of iterations = 0.");
2564 LOOP_VINFO_EXIT_COND (loop_vinfo
) = loop_cond
;
2570 /* Function vect_analyze_loop.
2572 Apply a set of analyses on LOOP, and create a loop_vec_info struct
2573 for it. The different analyses will record information in the
2574 loop_vec_info struct. */
2576 vect_analyze_loop (struct loop
*loop
)
2579 loop_vec_info loop_vinfo
;
2581 if (vect_print_dump_info (REPORT_DETAILS
))
2582 fprintf (vect_dump
, "===== analyze_loop_nest =====");
2584 /* Check the CFG characteristics of the loop (nesting, entry/exit, etc. */
2586 loop_vinfo
= vect_analyze_loop_form (loop
);
2589 if (vect_print_dump_info (REPORT_DETAILS
))
2590 fprintf (vect_dump
, "bad loop form.");
2594 /* Find all data references in the loop (which correspond to vdefs/vuses)
2595 and analyze their evolution in the loop.
2597 FORNOW: Handle only simple, array references, which
2598 alignment can be forced, and aligned pointer-references. */
2600 ok
= vect_analyze_data_refs (loop_vinfo
);
2603 if (vect_print_dump_info (REPORT_DETAILS
))
2604 fprintf (vect_dump
, "bad data references.");
2605 destroy_loop_vec_info (loop_vinfo
);
2609 /* Classify all cross-iteration scalar data-flow cycles.
2610 Cross-iteration cycles caused by virtual phis are analyzed separately. */
2612 vect_analyze_scalar_cycles (loop_vinfo
);
2614 vect_pattern_recog (loop_vinfo
);
2616 /* Data-flow analysis to detect stmts that do not need to be vectorized. */
2618 ok
= vect_mark_stmts_to_be_vectorized (loop_vinfo
);
2621 if (vect_print_dump_info (REPORT_DETAILS
))
2622 fprintf (vect_dump
, "unexpected pattern.");
2623 destroy_loop_vec_info (loop_vinfo
);
2627 /* Analyze the alignment of the data-refs in the loop.
2628 Fail if a data reference is found that cannot be vectorized. */
2630 ok
= vect_analyze_data_refs_alignment (loop_vinfo
);
2633 if (vect_print_dump_info (REPORT_DETAILS
))
2634 fprintf (vect_dump
, "bad data alignment.");
2635 destroy_loop_vec_info (loop_vinfo
);
2639 ok
= vect_determine_vectorization_factor (loop_vinfo
);
2642 if (vect_print_dump_info (REPORT_DETAILS
))
2643 fprintf (vect_dump
, "can't determine vectorization factor.");
2644 destroy_loop_vec_info (loop_vinfo
);
2648 /* Analyze data dependences between the data-refs in the loop.
2649 FORNOW: fail at the first data dependence that we encounter. */
2651 ok
= vect_analyze_data_ref_dependences (loop_vinfo
);
2654 if (vect_print_dump_info (REPORT_DETAILS
))
2655 fprintf (vect_dump
, "bad data dependence.");
2656 destroy_loop_vec_info (loop_vinfo
);
2660 /* Analyze the access patterns of the data-refs in the loop (consecutive,
2661 complex, etc.). FORNOW: Only handle consecutive access pattern. */
2663 ok
= vect_analyze_data_ref_accesses (loop_vinfo
);
2666 if (vect_print_dump_info (REPORT_DETAILS
))
2667 fprintf (vect_dump
, "bad data access.");
2668 destroy_loop_vec_info (loop_vinfo
);
2672 /* This pass will decide on using loop versioning and/or loop peeling in
2673 order to enhance the alignment of data references in the loop. */
2675 ok
= vect_enhance_data_refs_alignment (loop_vinfo
);
2678 if (vect_print_dump_info (REPORT_DETAILS
))
2679 fprintf (vect_dump
, "bad data alignment.");
2680 destroy_loop_vec_info (loop_vinfo
);
2684 /* Scan all the operations in the loop and make sure they are
2687 ok
= vect_analyze_operations (loop_vinfo
);
2690 if (vect_print_dump_info (REPORT_DETAILS
))
2691 fprintf (vect_dump
, "bad operation or unsupported loop bound.");
2692 destroy_loop_vec_info (loop_vinfo
);
2696 LOOP_VINFO_VECTORIZABLE_P (loop_vinfo
) = 1;