1 /* Data References Analysis and Manipulation Utilities for Vectorization.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software
4 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 and Ira Rosen <irar@il.ibm.com>
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
25 #include "coretypes.h"
30 #include "basic-block.h"
31 #include "diagnostic.h"
32 #include "tree-flow.h"
33 #include "tree-dump.h"
37 #include "tree-chrec.h"
38 #include "tree-scalar-evolution.h"
39 #include "tree-vectorizer.h"
43 /* Return the smallest scalar part of STMT.
44 This is used to determine the vectype of the stmt. We generally set the
45 vectype according to the type of the result (lhs). For stmts whose
46 result-type is different than the type of the arguments (e.g., demotion,
47 promotion), vectype will be reset appropriately (later). Note that we have
48 to visit the smallest datatype in this function, because that determines the
49 VF. If the smallest datatype in the loop is present only as the rhs of a
50 promotion operation - we'd miss it.
51 Such a case, where a variable of this datatype does not appear in the lhs
52 anywhere in the loop, can only occur if it's an invariant: e.g.:
53 'int_x = (int) short_inv', which we'd expect to have been optimized away by
54 invariant motion. However, we cannot rely on invariant motion to always take
55 invariants out of the loop, and so in the case of promotion we also have to
57 LHS_SIZE_UNIT and RHS_SIZE_UNIT contain the sizes of the corresponding
61 vect_get_smallest_scalar_type (gimple stmt
, HOST_WIDE_INT
*lhs_size_unit
,
62 HOST_WIDE_INT
*rhs_size_unit
)
64 tree scalar_type
= gimple_expr_type (stmt
);
65 HOST_WIDE_INT lhs
, rhs
;
67 lhs
= rhs
= TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type
));
69 if (is_gimple_assign (stmt
)
70 && (gimple_assign_cast_p (stmt
)
71 || gimple_assign_rhs_code (stmt
) == WIDEN_MULT_EXPR
72 || gimple_assign_rhs_code (stmt
) == FLOAT_EXPR
))
74 tree rhs_type
= TREE_TYPE (gimple_assign_rhs1 (stmt
));
76 rhs
= TREE_INT_CST_LOW (TYPE_SIZE_UNIT (rhs_type
));
78 scalar_type
= rhs_type
;
87 /* Find the place of the data-ref in STMT in the interleaving chain that starts
88 from FIRST_STMT. Return -1 if the data-ref is not a part of the chain. */
91 vect_get_place_in_interleaving_chain (gimple stmt
, gimple first_stmt
)
93 gimple next_stmt
= first_stmt
;
96 if (first_stmt
!= DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt
)))
99 while (next_stmt
&& next_stmt
!= stmt
)
102 next_stmt
= DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt
));
112 /* Function vect_insert_into_interleaving_chain.
114 Insert DRA into the interleaving chain of DRB according to DRA's INIT. */
117 vect_insert_into_interleaving_chain (struct data_reference
*dra
,
118 struct data_reference
*drb
)
122 stmt_vec_info stmtinfo_a
= vinfo_for_stmt (DR_STMT (dra
));
123 stmt_vec_info stmtinfo_b
= vinfo_for_stmt (DR_STMT (drb
));
125 prev
= DR_GROUP_FIRST_DR (stmtinfo_b
);
126 next
= DR_GROUP_NEXT_DR (vinfo_for_stmt (prev
));
129 next_init
= DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next
)));
130 if (tree_int_cst_compare (next_init
, DR_INIT (dra
)) > 0)
133 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev
)) = DR_STMT (dra
);
134 DR_GROUP_NEXT_DR (stmtinfo_a
) = next
;
138 next
= DR_GROUP_NEXT_DR (vinfo_for_stmt (prev
));
141 /* We got to the end of the list. Insert here. */
142 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev
)) = DR_STMT (dra
);
143 DR_GROUP_NEXT_DR (stmtinfo_a
) = NULL
;
147 /* Function vect_update_interleaving_chain.
149 For two data-refs DRA and DRB that are a part of a chain interleaved data
150 accesses, update the interleaving chain. DRB's INIT is smaller than DRA's.
152 There are four possible cases:
153 1. New stmts - both DRA and DRB are not a part of any chain:
156 2. DRB is a part of a chain and DRA is not:
157 no need to update FIRST_DR
158 no need to insert DRB
159 insert DRA according to init
160 3. DRA is a part of a chain and DRB is not:
161 if (init of FIRST_DR > init of DRB)
163 NEXT(FIRST_DR) = previous FIRST_DR
165 insert DRB according to its init
166 4. both DRA and DRB are in some interleaving chains:
167 choose the chain with the smallest init of FIRST_DR
168 insert the nodes of the second chain into the first one. */
171 vect_update_interleaving_chain (struct data_reference
*drb
,
172 struct data_reference
*dra
)
174 stmt_vec_info stmtinfo_a
= vinfo_for_stmt (DR_STMT (dra
));
175 stmt_vec_info stmtinfo_b
= vinfo_for_stmt (DR_STMT (drb
));
176 tree next_init
, init_dra_chain
, init_drb_chain
;
177 gimple first_a
, first_b
;
179 gimple node
, prev
, next
, first_stmt
;
181 /* 1. New stmts - both DRA and DRB are not a part of any chain. */
182 if (!DR_GROUP_FIRST_DR (stmtinfo_a
) && !DR_GROUP_FIRST_DR (stmtinfo_b
))
184 DR_GROUP_FIRST_DR (stmtinfo_a
) = DR_STMT (drb
);
185 DR_GROUP_FIRST_DR (stmtinfo_b
) = DR_STMT (drb
);
186 DR_GROUP_NEXT_DR (stmtinfo_b
) = DR_STMT (dra
);
190 /* 2. DRB is a part of a chain and DRA is not. */
191 if (!DR_GROUP_FIRST_DR (stmtinfo_a
) && DR_GROUP_FIRST_DR (stmtinfo_b
))
193 DR_GROUP_FIRST_DR (stmtinfo_a
) = DR_GROUP_FIRST_DR (stmtinfo_b
);
194 /* Insert DRA into the chain of DRB. */
195 vect_insert_into_interleaving_chain (dra
, drb
);
199 /* 3. DRA is a part of a chain and DRB is not. */
200 if (DR_GROUP_FIRST_DR (stmtinfo_a
) && !DR_GROUP_FIRST_DR (stmtinfo_b
))
202 gimple old_first_stmt
= DR_GROUP_FIRST_DR (stmtinfo_a
);
203 tree init_old
= DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (
207 if (tree_int_cst_compare (init_old
, DR_INIT (drb
)) > 0)
209 /* DRB's init is smaller than the init of the stmt previously marked
210 as the first stmt of the interleaving chain of DRA. Therefore, we
211 update FIRST_STMT and put DRB in the head of the list. */
212 DR_GROUP_FIRST_DR (stmtinfo_b
) = DR_STMT (drb
);
213 DR_GROUP_NEXT_DR (stmtinfo_b
) = old_first_stmt
;
215 /* Update all the stmts in the list to point to the new FIRST_STMT. */
216 tmp
= old_first_stmt
;
219 DR_GROUP_FIRST_DR (vinfo_for_stmt (tmp
)) = DR_STMT (drb
);
220 tmp
= DR_GROUP_NEXT_DR (vinfo_for_stmt (tmp
));
225 /* Insert DRB in the list of DRA. */
226 vect_insert_into_interleaving_chain (drb
, dra
);
227 DR_GROUP_FIRST_DR (stmtinfo_b
) = DR_GROUP_FIRST_DR (stmtinfo_a
);
232 /* 4. both DRA and DRB are in some interleaving chains. */
233 first_a
= DR_GROUP_FIRST_DR (stmtinfo_a
);
234 first_b
= DR_GROUP_FIRST_DR (stmtinfo_b
);
235 if (first_a
== first_b
)
237 init_dra_chain
= DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_a
)));
238 init_drb_chain
= DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_b
)));
240 if (tree_int_cst_compare (init_dra_chain
, init_drb_chain
) > 0)
242 /* Insert the nodes of DRA chain into the DRB chain.
243 After inserting a node, continue from this node of the DRB chain (don't
244 start from the beginning. */
245 node
= DR_GROUP_FIRST_DR (stmtinfo_a
);
246 prev
= DR_GROUP_FIRST_DR (stmtinfo_b
);
247 first_stmt
= first_b
;
251 /* Insert the nodes of DRB chain into the DRA chain.
252 After inserting a node, continue from this node of the DRA chain (don't
253 start from the beginning. */
254 node
= DR_GROUP_FIRST_DR (stmtinfo_b
);
255 prev
= DR_GROUP_FIRST_DR (stmtinfo_a
);
256 first_stmt
= first_a
;
261 node_init
= DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (node
)));
262 next
= DR_GROUP_NEXT_DR (vinfo_for_stmt (prev
));
265 next_init
= DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next
)));
266 if (tree_int_cst_compare (next_init
, node_init
) > 0)
269 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev
)) = node
;
270 DR_GROUP_NEXT_DR (vinfo_for_stmt (node
)) = next
;
275 next
= DR_GROUP_NEXT_DR (vinfo_for_stmt (prev
));
279 /* We got to the end of the list. Insert here. */
280 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev
)) = node
;
281 DR_GROUP_NEXT_DR (vinfo_for_stmt (node
)) = NULL
;
284 DR_GROUP_FIRST_DR (vinfo_for_stmt (node
)) = first_stmt
;
285 node
= DR_GROUP_NEXT_DR (vinfo_for_stmt (node
));
290 /* Function vect_equal_offsets.
292 Check if OFFSET1 and OFFSET2 are identical expressions. */
295 vect_equal_offsets (tree offset1
, tree offset2
)
299 STRIP_NOPS (offset1
);
300 STRIP_NOPS (offset2
);
302 if (offset1
== offset2
)
305 if (TREE_CODE (offset1
) != TREE_CODE (offset2
)
306 || !BINARY_CLASS_P (offset1
)
307 || !BINARY_CLASS_P (offset2
))
310 res0
= vect_equal_offsets (TREE_OPERAND (offset1
, 0),
311 TREE_OPERAND (offset2
, 0));
312 res1
= vect_equal_offsets (TREE_OPERAND (offset1
, 1),
313 TREE_OPERAND (offset2
, 1));
315 return (res0
&& res1
);
319 /* Function vect_check_interleaving.
321 Check if DRA and DRB are a part of interleaving. In case they are, insert
322 DRA and DRB in an interleaving chain. */
325 vect_check_interleaving (struct data_reference
*dra
,
326 struct data_reference
*drb
)
328 HOST_WIDE_INT type_size_a
, type_size_b
, diff_mod_size
, step
, init_a
, init_b
;
330 /* Check that the data-refs have same first location (except init) and they
331 are both either store or load (not load and store). */
332 if ((DR_BASE_ADDRESS (dra
) != DR_BASE_ADDRESS (drb
)
333 && (TREE_CODE (DR_BASE_ADDRESS (dra
)) != ADDR_EXPR
334 || TREE_CODE (DR_BASE_ADDRESS (drb
)) != ADDR_EXPR
335 || TREE_OPERAND (DR_BASE_ADDRESS (dra
), 0)
336 != TREE_OPERAND (DR_BASE_ADDRESS (drb
),0)))
337 || !vect_equal_offsets (DR_OFFSET (dra
), DR_OFFSET (drb
))
338 || !tree_int_cst_compare (DR_INIT (dra
), DR_INIT (drb
))
339 || DR_IS_READ (dra
) != DR_IS_READ (drb
))
343 1. data-refs are of the same type
344 2. their steps are equal
345 3. the step (if greater than zero) is greater than the difference between
347 type_size_a
= TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra
))));
348 type_size_b
= TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb
))));
350 if (type_size_a
!= type_size_b
351 || tree_int_cst_compare (DR_STEP (dra
), DR_STEP (drb
))
352 || !types_compatible_p (TREE_TYPE (DR_REF (dra
)),
353 TREE_TYPE (DR_REF (drb
))))
356 init_a
= TREE_INT_CST_LOW (DR_INIT (dra
));
357 init_b
= TREE_INT_CST_LOW (DR_INIT (drb
));
358 step
= TREE_INT_CST_LOW (DR_STEP (dra
));
362 /* If init_a == init_b + the size of the type * k, we have an interleaving,
363 and DRB is accessed before DRA. */
364 diff_mod_size
= (init_a
- init_b
) % type_size_a
;
366 if ((init_a
- init_b
) > step
)
369 if (diff_mod_size
== 0)
371 vect_update_interleaving_chain (drb
, dra
);
372 if (vect_print_dump_info (REPORT_DR_DETAILS
))
374 fprintf (vect_dump
, "Detected interleaving ");
375 print_generic_expr (vect_dump
, DR_REF (dra
), TDF_SLIM
);
376 fprintf (vect_dump
, " and ");
377 print_generic_expr (vect_dump
, DR_REF (drb
), TDF_SLIM
);
384 /* If init_b == init_a + the size of the type * k, we have an
385 interleaving, and DRA is accessed before DRB. */
386 diff_mod_size
= (init_b
- init_a
) % type_size_a
;
388 if ((init_b
- init_a
) > step
)
391 if (diff_mod_size
== 0)
393 vect_update_interleaving_chain (dra
, drb
);
394 if (vect_print_dump_info (REPORT_DR_DETAILS
))
396 fprintf (vect_dump
, "Detected interleaving ");
397 print_generic_expr (vect_dump
, DR_REF (dra
), TDF_SLIM
);
398 fprintf (vect_dump
, " and ");
399 print_generic_expr (vect_dump
, DR_REF (drb
), TDF_SLIM
);
408 /* Check if data references pointed by DR_I and DR_J are same or
409 belong to same interleaving group. Return FALSE if drs are
410 different, otherwise return TRUE. */
413 vect_same_range_drs (data_reference_p dr_i
, data_reference_p dr_j
)
415 gimple stmt_i
= DR_STMT (dr_i
);
416 gimple stmt_j
= DR_STMT (dr_j
);
418 if (operand_equal_p (DR_REF (dr_i
), DR_REF (dr_j
), 0)
419 || (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_i
))
420 && DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_j
))
421 && (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_i
))
422 == DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_j
)))))
428 /* If address ranges represented by DDR_I and DDR_J are equal,
429 return TRUE, otherwise return FALSE. */
432 vect_vfa_range_equal (ddr_p ddr_i
, ddr_p ddr_j
)
434 if ((vect_same_range_drs (DDR_A (ddr_i
), DDR_A (ddr_j
))
435 && vect_same_range_drs (DDR_B (ddr_i
), DDR_B (ddr_j
)))
436 || (vect_same_range_drs (DDR_A (ddr_i
), DDR_B (ddr_j
))
437 && vect_same_range_drs (DDR_B (ddr_i
), DDR_A (ddr_j
))))
443 /* Insert DDR into LOOP_VINFO list of ddrs that may alias and need to be
444 tested at run-time. Return TRUE if DDR was successfully inserted.
445 Return false if versioning is not supported. */
448 vect_mark_for_runtime_alias_test (ddr_p ddr
, loop_vec_info loop_vinfo
)
450 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
452 if ((unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS
) == 0)
455 if (vect_print_dump_info (REPORT_DR_DETAILS
))
457 fprintf (vect_dump
, "mark for run-time aliasing test between ");
458 print_generic_expr (vect_dump
, DR_REF (DDR_A (ddr
)), TDF_SLIM
);
459 fprintf (vect_dump
, " and ");
460 print_generic_expr (vect_dump
, DR_REF (DDR_B (ddr
)), TDF_SLIM
);
463 if (optimize_loop_nest_for_size_p (loop
))
465 if (vect_print_dump_info (REPORT_DR_DETAILS
))
466 fprintf (vect_dump
, "versioning not supported when optimizing for size.");
470 /* FORNOW: We don't support versioning with outer-loop vectorization. */
473 if (vect_print_dump_info (REPORT_DR_DETAILS
))
474 fprintf (vect_dump
, "versioning not yet supported for outer-loops.");
478 VEC_safe_push (ddr_p
, heap
, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo
), ddr
);
482 /* Function vect_analyze_data_ref_dependence.
484 Return TRUE if there (might) exist a dependence between a memory-reference
485 DRA and a memory-reference DRB. When versioning for alias may check a
486 dependence at run-time, return FALSE. */
489 vect_analyze_data_ref_dependence (struct data_dependence_relation
*ddr
,
490 loop_vec_info loop_vinfo
)
493 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
494 int vectorization_factor
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
495 struct data_reference
*dra
= DDR_A (ddr
);
496 struct data_reference
*drb
= DDR_B (ddr
);
497 stmt_vec_info stmtinfo_a
= vinfo_for_stmt (DR_STMT (dra
));
498 stmt_vec_info stmtinfo_b
= vinfo_for_stmt (DR_STMT (drb
));
499 int dra_size
= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dra
))));
500 int drb_size
= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (drb
))));
501 lambda_vector dist_v
;
502 unsigned int loop_depth
;
504 if (DDR_ARE_DEPENDENT (ddr
) == chrec_known
)
506 /* Independent data accesses. */
507 vect_check_interleaving (dra
, drb
);
511 if ((DR_IS_READ (dra
) && DR_IS_READ (drb
)) || dra
== drb
)
514 if (DDR_ARE_DEPENDENT (ddr
) == chrec_dont_know
)
516 if (vect_print_dump_info (REPORT_DR_DETAILS
))
519 "versioning for alias required: can't determine dependence between ");
520 print_generic_expr (vect_dump
, DR_REF (dra
), TDF_SLIM
);
521 fprintf (vect_dump
, " and ");
522 print_generic_expr (vect_dump
, DR_REF (drb
), TDF_SLIM
);
524 /* Add to list of ddrs that need to be tested at run-time. */
525 return !vect_mark_for_runtime_alias_test (ddr
, loop_vinfo
);
528 if (DDR_NUM_DIST_VECTS (ddr
) == 0)
530 if (vect_print_dump_info (REPORT_DR_DETAILS
))
532 fprintf (vect_dump
, "versioning for alias required: bad dist vector for ");
533 print_generic_expr (vect_dump
, DR_REF (dra
), TDF_SLIM
);
534 fprintf (vect_dump
, " and ");
535 print_generic_expr (vect_dump
, DR_REF (drb
), TDF_SLIM
);
537 /* Add to list of ddrs that need to be tested at run-time. */
538 return !vect_mark_for_runtime_alias_test (ddr
, loop_vinfo
);
541 loop_depth
= index_in_loop_nest (loop
->num
, DDR_LOOP_NEST (ddr
));
542 for (i
= 0; VEC_iterate (lambda_vector
, DDR_DIST_VECTS (ddr
), i
, dist_v
); i
++)
544 int dist
= dist_v
[loop_depth
];
546 if (vect_print_dump_info (REPORT_DR_DETAILS
))
547 fprintf (vect_dump
, "dependence distance = %d.", dist
);
549 /* Same loop iteration. */
550 if (dist
% vectorization_factor
== 0 && dra_size
== drb_size
)
552 /* Two references with distance zero have the same alignment. */
553 VEC_safe_push (dr_p
, heap
, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_a
), drb
);
554 VEC_safe_push (dr_p
, heap
, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_b
), dra
);
555 if (vect_print_dump_info (REPORT_ALIGNMENT
))
556 fprintf (vect_dump
, "accesses have the same alignment.");
557 if (vect_print_dump_info (REPORT_DR_DETAILS
))
559 fprintf (vect_dump
, "dependence distance modulo vf == 0 between ");
560 print_generic_expr (vect_dump
, DR_REF (dra
), TDF_SLIM
);
561 fprintf (vect_dump
, " and ");
562 print_generic_expr (vect_dump
, DR_REF (drb
), TDF_SLIM
);
565 /* For interleaving, mark that there is a read-write dependency if
566 necessary. We check before that one of the data-refs is store. */
567 if (DR_IS_READ (dra
))
568 DR_GROUP_READ_WRITE_DEPENDENCE (stmtinfo_a
) = true;
571 if (DR_IS_READ (drb
))
572 DR_GROUP_READ_WRITE_DEPENDENCE (stmtinfo_b
) = true;
578 if (abs (dist
) >= vectorization_factor
579 || (dist
> 0 && DDR_REVERSED_P (ddr
)))
581 /* Dependence distance does not create dependence, as far as
582 vectorization is concerned, in this case. If DDR_REVERSED_P the
583 order of the data-refs in DDR was reversed (to make distance
584 vector positive), and the actual distance is negative. */
585 if (vect_print_dump_info (REPORT_DR_DETAILS
))
586 fprintf (vect_dump
, "dependence distance >= VF or negative.");
590 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
593 "not vectorized, possible dependence "
594 "between data-refs ");
595 print_generic_expr (vect_dump
, DR_REF (dra
), TDF_SLIM
);
596 fprintf (vect_dump
, " and ");
597 print_generic_expr (vect_dump
, DR_REF (drb
), TDF_SLIM
);
606 /* Function vect_analyze_data_ref_dependences.
608 Examine all the data references in the loop, and make sure there do not
609 exist any data dependences between them. */
612 vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo
)
615 VEC (ddr_p
, heap
) * ddrs
= LOOP_VINFO_DDRS (loop_vinfo
);
616 struct data_dependence_relation
*ddr
;
618 if (vect_print_dump_info (REPORT_DETAILS
))
619 fprintf (vect_dump
, "=== vect_analyze_dependences ===");
621 for (i
= 0; VEC_iterate (ddr_p
, ddrs
, i
, ddr
); i
++)
622 if (vect_analyze_data_ref_dependence (ddr
, loop_vinfo
))
629 /* Function vect_compute_data_ref_alignment
631 Compute the misalignment of the data reference DR.
634 1. If during the misalignment computation it is found that the data reference
635 cannot be vectorized then false is returned.
636 2. DR_MISALIGNMENT (DR) is defined.
638 FOR NOW: No analysis is actually performed. Misalignment is calculated
639 only for trivial cases. TODO. */
642 vect_compute_data_ref_alignment (struct data_reference
*dr
)
644 gimple stmt
= DR_STMT (dr
);
645 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
646 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
647 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
648 tree ref
= DR_REF (dr
);
650 tree base
, base_addr
;
653 tree aligned_to
, alignment
;
655 if (vect_print_dump_info (REPORT_DETAILS
))
656 fprintf (vect_dump
, "vect_compute_data_ref_alignment:");
658 /* Initialize misalignment to unknown. */
659 SET_DR_MISALIGNMENT (dr
, -1);
661 misalign
= DR_INIT (dr
);
662 aligned_to
= DR_ALIGNED_TO (dr
);
663 base_addr
= DR_BASE_ADDRESS (dr
);
664 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
666 /* In case the dataref is in an inner-loop of the loop that is being
667 vectorized (LOOP), we use the base and misalignment information
668 relative to the outer-loop (LOOP). This is ok only if the misalignment
669 stays the same throughout the execution of the inner-loop, which is why
670 we have to check that the stride of the dataref in the inner-loop evenly
671 divides by the vector size. */
672 if (nested_in_vect_loop_p (loop
, stmt
))
674 tree step
= DR_STEP (dr
);
675 HOST_WIDE_INT dr_step
= TREE_INT_CST_LOW (step
);
677 if (dr_step
% GET_MODE_SIZE (TYPE_MODE (vectype
)) == 0)
679 if (vect_print_dump_info (REPORT_ALIGNMENT
))
680 fprintf (vect_dump
, "inner step divides the vector-size.");
681 misalign
= STMT_VINFO_DR_INIT (stmt_info
);
682 aligned_to
= STMT_VINFO_DR_ALIGNED_TO (stmt_info
);
683 base_addr
= STMT_VINFO_DR_BASE_ADDRESS (stmt_info
);
687 if (vect_print_dump_info (REPORT_ALIGNMENT
))
688 fprintf (vect_dump
, "inner step doesn't divide the vector-size.");
689 misalign
= NULL_TREE
;
693 base
= build_fold_indirect_ref (base_addr
);
694 alignment
= ssize_int (TYPE_ALIGN (vectype
)/BITS_PER_UNIT
);
696 if ((aligned_to
&& tree_int_cst_compare (aligned_to
, alignment
) < 0)
699 if (vect_print_dump_info (REPORT_ALIGNMENT
))
701 fprintf (vect_dump
, "Unknown alignment for access: ");
702 print_generic_expr (vect_dump
, base
, TDF_SLIM
);
708 && tree_int_cst_compare (ssize_int (DECL_ALIGN_UNIT (base
)),
710 || (TREE_CODE (base_addr
) == SSA_NAME
711 && tree_int_cst_compare (ssize_int (TYPE_ALIGN_UNIT (TREE_TYPE (
712 TREE_TYPE (base_addr
)))),
716 base_aligned
= false;
720 /* Do not change the alignment of global variables if
721 flag_section_anchors is enabled. */
722 if (!vect_can_force_dr_alignment_p (base
, TYPE_ALIGN (vectype
))
723 || (TREE_STATIC (base
) && flag_section_anchors
))
725 if (vect_print_dump_info (REPORT_DETAILS
))
727 fprintf (vect_dump
, "can't force alignment of ref: ");
728 print_generic_expr (vect_dump
, ref
, TDF_SLIM
);
733 /* Force the alignment of the decl.
734 NOTE: This is the only change to the code we make during
735 the analysis phase, before deciding to vectorize the loop. */
736 if (vect_print_dump_info (REPORT_DETAILS
))
737 fprintf (vect_dump
, "force alignment");
738 DECL_ALIGN (base
) = TYPE_ALIGN (vectype
);
739 DECL_USER_ALIGN (base
) = 1;
742 /* At this point we assume that the base is aligned. */
743 gcc_assert (base_aligned
744 || (TREE_CODE (base
) == VAR_DECL
745 && DECL_ALIGN (base
) >= TYPE_ALIGN (vectype
)));
747 /* Modulo alignment. */
748 misalign
= size_binop (FLOOR_MOD_EXPR
, misalign
, alignment
);
750 if (!host_integerp (misalign
, 1))
752 /* Negative or overflowed misalignment value. */
753 if (vect_print_dump_info (REPORT_DETAILS
))
754 fprintf (vect_dump
, "unexpected misalign value");
758 SET_DR_MISALIGNMENT (dr
, TREE_INT_CST_LOW (misalign
));
760 if (vect_print_dump_info (REPORT_DETAILS
))
762 fprintf (vect_dump
, "misalign = %d bytes of ref ", DR_MISALIGNMENT (dr
));
763 print_generic_expr (vect_dump
, ref
, TDF_SLIM
);
770 /* Function vect_compute_data_refs_alignment
772 Compute the misalignment of data references in the loop.
773 Return FALSE if a data reference is found that cannot be vectorized. */
776 vect_compute_data_refs_alignment (loop_vec_info loop_vinfo
)
778 VEC (data_reference_p
, heap
) *datarefs
= LOOP_VINFO_DATAREFS (loop_vinfo
);
779 struct data_reference
*dr
;
782 for (i
= 0; VEC_iterate (data_reference_p
, datarefs
, i
, dr
); i
++)
783 if (!vect_compute_data_ref_alignment (dr
))
790 /* Function vect_update_misalignment_for_peel
792 DR - the data reference whose misalignment is to be adjusted.
793 DR_PEEL - the data reference whose misalignment is being made
794 zero in the vector loop by the peel.
795 NPEEL - the number of iterations in the peel loop if the misalignment
796 of DR_PEEL is known at compile time. */
799 vect_update_misalignment_for_peel (struct data_reference
*dr
,
800 struct data_reference
*dr_peel
, int npeel
)
803 VEC(dr_p
,heap
) *same_align_drs
;
804 struct data_reference
*current_dr
;
805 int dr_size
= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr
))));
806 int dr_peel_size
= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr_peel
))));
807 stmt_vec_info stmt_info
= vinfo_for_stmt (DR_STMT (dr
));
808 stmt_vec_info peel_stmt_info
= vinfo_for_stmt (DR_STMT (dr_peel
));
810 /* For interleaved data accesses the step in the loop must be multiplied by
811 the size of the interleaving group. */
812 if (STMT_VINFO_STRIDED_ACCESS (stmt_info
))
813 dr_size
*= DR_GROUP_SIZE (vinfo_for_stmt (DR_GROUP_FIRST_DR (stmt_info
)));
814 if (STMT_VINFO_STRIDED_ACCESS (peel_stmt_info
))
815 dr_peel_size
*= DR_GROUP_SIZE (peel_stmt_info
);
817 /* It can be assumed that the data refs with the same alignment as dr_peel
818 are aligned in the vector loop. */
820 = STMT_VINFO_SAME_ALIGN_REFS (vinfo_for_stmt (DR_STMT (dr_peel
)));
821 for (i
= 0; VEC_iterate (dr_p
, same_align_drs
, i
, current_dr
); i
++)
823 if (current_dr
!= dr
)
825 gcc_assert (DR_MISALIGNMENT (dr
) / dr_size
==
826 DR_MISALIGNMENT (dr_peel
) / dr_peel_size
);
827 SET_DR_MISALIGNMENT (dr
, 0);
831 if (known_alignment_for_access_p (dr
)
832 && known_alignment_for_access_p (dr_peel
))
834 int misal
= DR_MISALIGNMENT (dr
);
835 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
836 misal
+= npeel
* dr_size
;
837 misal
%= GET_MODE_SIZE (TYPE_MODE (vectype
));
838 SET_DR_MISALIGNMENT (dr
, misal
);
842 if (vect_print_dump_info (REPORT_DETAILS
))
843 fprintf (vect_dump
, "Setting misalignment to -1.");
844 SET_DR_MISALIGNMENT (dr
, -1);
848 /* Function vect_verify_datarefs_alignment
850 Return TRUE if all data references in the loop can be
851 handled with respect to alignment. */
854 vect_verify_datarefs_alignment (loop_vec_info loop_vinfo
)
856 VEC (data_reference_p
, heap
) *datarefs
= LOOP_VINFO_DATAREFS (loop_vinfo
);
857 struct data_reference
*dr
;
858 enum dr_alignment_support supportable_dr_alignment
;
861 for (i
= 0; VEC_iterate (data_reference_p
, datarefs
, i
, dr
); i
++)
863 gimple stmt
= DR_STMT (dr
);
864 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
866 /* For interleaving, only the alignment of the first access matters. */
867 if (STMT_VINFO_STRIDED_ACCESS (stmt_info
)
868 && DR_GROUP_FIRST_DR (stmt_info
) != stmt
)
871 supportable_dr_alignment
= vect_supportable_dr_alignment (dr
);
872 if (!supportable_dr_alignment
)
874 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
878 "not vectorized: unsupported unaligned load.");
881 "not vectorized: unsupported unaligned store.");
885 if (supportable_dr_alignment
!= dr_aligned
886 && vect_print_dump_info (REPORT_ALIGNMENT
))
887 fprintf (vect_dump
, "Vectorizing an unaligned access.");
893 /* Function vector_alignment_reachable_p
895 Return true if vector alignment for DR is reachable by peeling
896 a few loop iterations. Return false otherwise. */
899 vector_alignment_reachable_p (struct data_reference
*dr
)
901 gimple stmt
= DR_STMT (dr
);
902 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
903 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
905 if (STMT_VINFO_STRIDED_ACCESS (stmt_info
))
907 /* For interleaved access we peel only if number of iterations in
908 the prolog loop ({VF - misalignment}), is a multiple of the
909 number of the interleaved accesses. */
910 int elem_size
, mis_in_elements
;
911 int nelements
= TYPE_VECTOR_SUBPARTS (vectype
);
913 /* FORNOW: handle only known alignment. */
914 if (!known_alignment_for_access_p (dr
))
917 elem_size
= GET_MODE_SIZE (TYPE_MODE (vectype
)) / nelements
;
918 mis_in_elements
= DR_MISALIGNMENT (dr
) / elem_size
;
920 if ((nelements
- mis_in_elements
) % DR_GROUP_SIZE (stmt_info
))
924 /* If misalignment is known at the compile time then allow peeling
925 only if natural alignment is reachable through peeling. */
926 if (known_alignment_for_access_p (dr
) && !aligned_access_p (dr
))
928 HOST_WIDE_INT elmsize
=
929 int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype
)));
930 if (vect_print_dump_info (REPORT_DETAILS
))
932 fprintf (vect_dump
, "data size =" HOST_WIDE_INT_PRINT_DEC
, elmsize
);
933 fprintf (vect_dump
, ". misalignment = %d. ", DR_MISALIGNMENT (dr
));
935 if (DR_MISALIGNMENT (dr
) % elmsize
)
937 if (vect_print_dump_info (REPORT_DETAILS
))
938 fprintf (vect_dump
, "data size does not divide the misalignment.\n");
943 if (!known_alignment_for_access_p (dr
))
945 tree type
= (TREE_TYPE (DR_REF (dr
)));
946 tree ba
= DR_BASE_OBJECT (dr
);
947 bool is_packed
= false;
950 is_packed
= contains_packed_reference (ba
);
952 if (vect_print_dump_info (REPORT_DETAILS
))
953 fprintf (vect_dump
, "Unknown misalignment, is_packed = %d",is_packed
);
954 if (targetm
.vectorize
.vector_alignment_reachable (type
, is_packed
))
963 /* Function vect_enhance_data_refs_alignment
965 This pass will use loop versioning and loop peeling in order to enhance
966 the alignment of data references in the loop.
968 FOR NOW: we assume that whatever versioning/peeling takes place, only the
969 original loop is to be vectorized; Any other loops that are created by
970 the transformations performed in this pass - are not supposed to be
971 vectorized. This restriction will be relaxed.
973 This pass will require a cost model to guide it whether to apply peeling
974 or versioning or a combination of the two. For example, the scheme that
975 intel uses when given a loop with several memory accesses, is as follows:
976 choose one memory access ('p') which alignment you want to force by doing
977 peeling. Then, either (1) generate a loop in which 'p' is aligned and all
978 other accesses are not necessarily aligned, or (2) use loop versioning to
979 generate one loop in which all accesses are aligned, and another loop in
980 which only 'p' is necessarily aligned.
982 ("Automatic Intra-Register Vectorization for the Intel Architecture",
983 Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
984 Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
986 Devising a cost model is the most critical aspect of this work. It will
987 guide us on which access to peel for, whether to use loop versioning, how
988 many versions to create, etc. The cost model will probably consist of
989 generic considerations as well as target specific considerations (on
990 powerpc for example, misaligned stores are more painful than misaligned
993 Here are the general steps involved in alignment enhancements:
995 -- original loop, before alignment analysis:
997 x = q[i]; # DR_MISALIGNMENT(q) = unknown
998 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1001 -- After vect_compute_data_refs_alignment:
1002 for (i=0; i<N; i++){
1003 x = q[i]; # DR_MISALIGNMENT(q) = 3
1004 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1007 -- Possibility 1: we do loop versioning:
1009 for (i=0; i<N; i++){ # loop 1A
1010 x = q[i]; # DR_MISALIGNMENT(q) = 3
1011 p[i] = y; # DR_MISALIGNMENT(p) = 0
1015 for (i=0; i<N; i++){ # loop 1B
1016 x = q[i]; # DR_MISALIGNMENT(q) = 3
1017 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1021 -- Possibility 2: we do loop peeling:
1022 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1026 for (i = 3; i < N; i++){ # loop 2A
1027 x = q[i]; # DR_MISALIGNMENT(q) = 0
1028 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1031 -- Possibility 3: combination of loop peeling and versioning:
1032 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1037 for (i = 3; i<N; i++){ # loop 3A
1038 x = q[i]; # DR_MISALIGNMENT(q) = 0
1039 p[i] = y; # DR_MISALIGNMENT(p) = 0
1043 for (i = 3; i<N; i++){ # loop 3B
1044 x = q[i]; # DR_MISALIGNMENT(q) = 0
1045 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1049 These loops are later passed to loop_transform to be vectorized. The
1050 vectorizer will use the alignment information to guide the transformation
1051 (whether to generate regular loads/stores, or with special handling for
1055 vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo
)
1057 VEC (data_reference_p
, heap
) *datarefs
= LOOP_VINFO_DATAREFS (loop_vinfo
);
1058 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1059 enum dr_alignment_support supportable_dr_alignment
;
1060 struct data_reference
*dr0
= NULL
;
1061 struct data_reference
*dr
;
1063 bool do_peeling
= false;
1064 bool do_versioning
= false;
1067 stmt_vec_info stmt_info
;
1068 int vect_versioning_for_alias_required
;
1070 if (vect_print_dump_info (REPORT_DETAILS
))
1071 fprintf (vect_dump
, "=== vect_enhance_data_refs_alignment ===");
1073 /* While cost model enhancements are expected in the future, the high level
1074 view of the code at this time is as follows:
1076 A) If there is a misaligned write then see if peeling to align this write
1077 can make all data references satisfy vect_supportable_dr_alignment.
1078 If so, update data structures as needed and return true. Note that
1079 at this time vect_supportable_dr_alignment is known to return false
1080 for a misaligned write.
1082 B) If peeling wasn't possible and there is a data reference with an
1083 unknown misalignment that does not satisfy vect_supportable_dr_alignment
1084 then see if loop versioning checks can be used to make all data
1085 references satisfy vect_supportable_dr_alignment. If so, update
1086 data structures as needed and return true.
1088 C) If neither peeling nor versioning were successful then return false if
1089 any data reference does not satisfy vect_supportable_dr_alignment.
1091 D) Return true (all data references satisfy vect_supportable_dr_alignment).
1093 Note, Possibility 3 above (which is peeling and versioning together) is not
1094 being done at this time. */
1096 /* (1) Peeling to force alignment. */
1098 /* (1.1) Decide whether to perform peeling, and how many iterations to peel:
1100 + How many accesses will become aligned due to the peeling
1101 - How many accesses will become unaligned due to the peeling,
1102 and the cost of misaligned accesses.
1103 - The cost of peeling (the extra runtime checks, the increase
1106 The scheme we use FORNOW: peel to force the alignment of the first
1107 misaligned store in the loop.
1108 Rationale: misaligned stores are not yet supported.
1110 TODO: Use a cost model. */
1112 for (i
= 0; VEC_iterate (data_reference_p
, datarefs
, i
, dr
); i
++)
1114 stmt
= DR_STMT (dr
);
1115 stmt_info
= vinfo_for_stmt (stmt
);
1117 /* For interleaving, only the alignment of the first access
1119 if (STMT_VINFO_STRIDED_ACCESS (stmt_info
)
1120 && DR_GROUP_FIRST_DR (stmt_info
) != stmt
)
1123 if (!DR_IS_READ (dr
) && !aligned_access_p (dr
))
1125 do_peeling
= vector_alignment_reachable_p (dr
);
1128 if (!do_peeling
&& vect_print_dump_info (REPORT_DETAILS
))
1129 fprintf (vect_dump
, "vector alignment may not be reachable");
1134 vect_versioning_for_alias_required
=
1135 (VEC_length (ddr_p
, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo
)) > 0);
1137 /* Temporarily, if versioning for alias is required, we disable peeling
1138 until we support peeling and versioning. Often peeling for alignment
1139 will require peeling for loop-bound, which in turn requires that we
1140 know how to adjust the loop ivs after the loop. */
1141 if (vect_versioning_for_alias_required
1142 || !vect_can_advance_ivs_p (loop_vinfo
)
1143 || !slpeel_can_duplicate_loop_p (loop
, single_exit (loop
)))
1150 gimple stmt
= DR_STMT (dr0
);
1151 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1152 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1153 int nelements
= TYPE_VECTOR_SUBPARTS (vectype
);
1155 if (known_alignment_for_access_p (dr0
))
1157 /* Since it's known at compile time, compute the number of iterations
1158 in the peeled loop (the peeling factor) for use in updating
1159 DR_MISALIGNMENT values. The peeling factor is the vectorization
1160 factor minus the misalignment as an element count. */
1161 mis
= DR_MISALIGNMENT (dr0
);
1162 mis
/= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr0
))));
1163 npeel
= nelements
- mis
;
1165 /* For interleaved data access every iteration accesses all the
1166 members of the group, therefore we divide the number of iterations
1167 by the group size. */
1168 stmt_info
= vinfo_for_stmt (DR_STMT (dr0
));
1169 if (STMT_VINFO_STRIDED_ACCESS (stmt_info
))
1170 npeel
/= DR_GROUP_SIZE (stmt_info
);
1172 if (vect_print_dump_info (REPORT_DETAILS
))
1173 fprintf (vect_dump
, "Try peeling by %d", npeel
);
1176 /* Ensure that all data refs can be vectorized after the peel. */
1177 for (i
= 0; VEC_iterate (data_reference_p
, datarefs
, i
, dr
); i
++)
1179 int save_misalignment
;
1184 stmt
= DR_STMT (dr
);
1185 stmt_info
= vinfo_for_stmt (stmt
);
1186 /* For interleaving, only the alignment of the first access
1188 if (STMT_VINFO_STRIDED_ACCESS (stmt_info
)
1189 && DR_GROUP_FIRST_DR (stmt_info
) != stmt
)
1192 save_misalignment
= DR_MISALIGNMENT (dr
);
1193 vect_update_misalignment_for_peel (dr
, dr0
, npeel
);
1194 supportable_dr_alignment
= vect_supportable_dr_alignment (dr
);
1195 SET_DR_MISALIGNMENT (dr
, save_misalignment
);
1197 if (!supportable_dr_alignment
)
1206 /* (1.2) Update the DR_MISALIGNMENT of each data reference DR_i.
1207 If the misalignment of DR_i is identical to that of dr0 then set
1208 DR_MISALIGNMENT (DR_i) to zero. If the misalignment of DR_i and
1209 dr0 are known at compile time then increment DR_MISALIGNMENT (DR_i)
1210 by the peeling factor times the element size of DR_i (MOD the
1211 vectorization factor times the size). Otherwise, the
1212 misalignment of DR_i must be set to unknown. */
1213 for (i
= 0; VEC_iterate (data_reference_p
, datarefs
, i
, dr
); i
++)
1215 vect_update_misalignment_for_peel (dr
, dr0
, npeel
);
1217 LOOP_VINFO_UNALIGNED_DR (loop_vinfo
) = dr0
;
1218 LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo
) = DR_MISALIGNMENT (dr0
);
1219 SET_DR_MISALIGNMENT (dr0
, 0);
1220 if (vect_print_dump_info (REPORT_ALIGNMENT
))
1221 fprintf (vect_dump
, "Alignment of access forced using peeling.");
1223 if (vect_print_dump_info (REPORT_DETAILS
))
1224 fprintf (vect_dump
, "Peeling for alignment will be applied.");
1226 stat
= vect_verify_datarefs_alignment (loop_vinfo
);
1233 /* (2) Versioning to force alignment. */
1235 /* Try versioning if:
1236 1) flag_tree_vect_loop_version is TRUE
1237 2) optimize loop for speed
1238 3) there is at least one unsupported misaligned data ref with an unknown
1240 4) all misaligned data refs with a known misalignment are supported, and
1241 5) the number of runtime alignment checks is within reason. */
1244 flag_tree_vect_loop_version
1245 && optimize_loop_nest_for_speed_p (loop
)
1246 && (!loop
->inner
); /* FORNOW */
1250 for (i
= 0; VEC_iterate (data_reference_p
, datarefs
, i
, dr
); i
++)
1252 stmt
= DR_STMT (dr
);
1253 stmt_info
= vinfo_for_stmt (stmt
);
1255 /* For interleaving, only the alignment of the first access
1257 if (aligned_access_p (dr
)
1258 || (STMT_VINFO_STRIDED_ACCESS (stmt_info
)
1259 && DR_GROUP_FIRST_DR (stmt_info
) != stmt
))
1262 supportable_dr_alignment
= vect_supportable_dr_alignment (dr
);
1264 if (!supportable_dr_alignment
)
1270 if (known_alignment_for_access_p (dr
)
1271 || VEC_length (gimple
,
1272 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo
))
1273 >= (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIGNMENT_CHECKS
))
1275 do_versioning
= false;
1279 stmt
= DR_STMT (dr
);
1280 vectype
= STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt
));
1281 gcc_assert (vectype
);
1283 /* The rightmost bits of an aligned address must be zeros.
1284 Construct the mask needed for this test. For example,
1285 GET_MODE_SIZE for the vector mode V4SI is 16 bytes so the
1286 mask must be 15 = 0xf. */
1287 mask
= GET_MODE_SIZE (TYPE_MODE (vectype
)) - 1;
1289 /* FORNOW: use the same mask to test all potentially unaligned
1290 references in the loop. The vectorizer currently supports
1291 a single vector size, see the reference to
1292 GET_MODE_NUNITS (TYPE_MODE (vectype)) where the
1293 vectorization factor is computed. */
1294 gcc_assert (!LOOP_VINFO_PTR_MASK (loop_vinfo
)
1295 || LOOP_VINFO_PTR_MASK (loop_vinfo
) == mask
);
1296 LOOP_VINFO_PTR_MASK (loop_vinfo
) = mask
;
1297 VEC_safe_push (gimple
, heap
,
1298 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo
),
1303 /* Versioning requires at least one misaligned data reference. */
1304 if (VEC_length (gimple
, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo
)) == 0)
1305 do_versioning
= false;
1306 else if (!do_versioning
)
1307 VEC_truncate (gimple
, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo
), 0);
1312 VEC(gimple
,heap
) *may_misalign_stmts
1313 = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo
);
1316 /* It can now be assumed that the data references in the statements
1317 in LOOP_VINFO_MAY_MISALIGN_STMTS will be aligned in the version
1318 of the loop being vectorized. */
1319 for (i
= 0; VEC_iterate (gimple
, may_misalign_stmts
, i
, stmt
); i
++)
1321 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1322 dr
= STMT_VINFO_DATA_REF (stmt_info
);
1323 SET_DR_MISALIGNMENT (dr
, 0);
1324 if (vect_print_dump_info (REPORT_ALIGNMENT
))
1325 fprintf (vect_dump
, "Alignment of access forced using versioning.");
1328 if (vect_print_dump_info (REPORT_DETAILS
))
1329 fprintf (vect_dump
, "Versioning for alignment will be applied.");
1331 /* Peeling and versioning can't be done together at this time. */
1332 gcc_assert (! (do_peeling
&& do_versioning
));
1334 stat
= vect_verify_datarefs_alignment (loop_vinfo
);
1339 /* This point is reached if neither peeling nor versioning is being done. */
1340 gcc_assert (! (do_peeling
|| do_versioning
));
1342 stat
= vect_verify_datarefs_alignment (loop_vinfo
);
1347 /* Function vect_analyze_data_refs_alignment
1349 Analyze the alignment of the data-references in the loop.
1350 Return FALSE if a data reference is found that cannot be vectorized. */
1353 vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo
)
1355 if (vect_print_dump_info (REPORT_DETAILS
))
1356 fprintf (vect_dump
, "=== vect_analyze_data_refs_alignment ===");
1358 if (!vect_compute_data_refs_alignment (loop_vinfo
))
1360 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
1362 "not vectorized: can't calculate alignment for data ref.");
1370 /* Analyze groups of strided accesses: check that DR belongs to a group of
1371 strided accesses of legal size, step, etc. Detect gaps, single element
1372 interleaving, and other special cases. Set strided access info.
1373 Collect groups of strided stores for further use in SLP analysis. */
1376 vect_analyze_group_access (struct data_reference
*dr
)
1378 tree step
= DR_STEP (dr
);
1379 tree scalar_type
= TREE_TYPE (DR_REF (dr
));
1380 HOST_WIDE_INT type_size
= TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type
));
1381 gimple stmt
= DR_STMT (dr
);
1382 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1383 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
1384 HOST_WIDE_INT dr_step
= TREE_INT_CST_LOW (step
);
1385 HOST_WIDE_INT stride
;
1386 bool slp_impossible
= false;
1388 /* For interleaving, STRIDE is STEP counted in elements, i.e., the size of the
1389 interleaving group (including gaps). */
1390 stride
= dr_step
/ type_size
;
1392 /* Not consecutive access is possible only if it is a part of interleaving. */
1393 if (!DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt
)))
1395 /* Check if it this DR is a part of interleaving, and is a single
1396 element of the group that is accessed in the loop. */
1398 /* Gaps are supported only for loads. STEP must be a multiple of the type
1399 size. The size of the group must be a power of 2. */
1401 && (dr_step
% type_size
) == 0
1403 && exact_log2 (stride
) != -1)
1405 DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt
)) = stmt
;
1406 DR_GROUP_SIZE (vinfo_for_stmt (stmt
)) = stride
;
1407 if (vect_print_dump_info (REPORT_DR_DETAILS
))
1409 fprintf (vect_dump
, "Detected single element interleaving %d ",
1410 DR_GROUP_SIZE (vinfo_for_stmt (stmt
)));
1411 print_generic_expr (vect_dump
, DR_REF (dr
), TDF_SLIM
);
1412 fprintf (vect_dump
, " step ");
1413 print_generic_expr (vect_dump
, step
, TDF_SLIM
);
1417 if (vect_print_dump_info (REPORT_DETAILS
))
1418 fprintf (vect_dump
, "not consecutive access");
1422 if (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt
)) == stmt
)
1424 /* First stmt in the interleaving chain. Check the chain. */
1425 gimple next
= DR_GROUP_NEXT_DR (vinfo_for_stmt (stmt
));
1426 struct data_reference
*data_ref
= dr
;
1427 unsigned int count
= 1;
1429 tree prev_init
= DR_INIT (data_ref
);
1431 HOST_WIDE_INT diff
, count_in_bytes
, gaps
= 0;
1435 /* Skip same data-refs. In case that two or more stmts share data-ref
1436 (supported only for loads), we vectorize only the first stmt, and
1437 the rest get their vectorized loads from the first one. */
1438 if (!tree_int_cst_compare (DR_INIT (data_ref
),
1439 DR_INIT (STMT_VINFO_DATA_REF (
1440 vinfo_for_stmt (next
)))))
1442 if (!DR_IS_READ (data_ref
))
1444 if (vect_print_dump_info (REPORT_DETAILS
))
1445 fprintf (vect_dump
, "Two store stmts share the same dr.");
1449 /* Check that there is no load-store dependencies for this loads
1450 to prevent a case of load-store-load to the same location. */
1451 if (DR_GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (next
))
1452 || DR_GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (prev
)))
1454 if (vect_print_dump_info (REPORT_DETAILS
))
1456 "READ_WRITE dependence in interleaving.");
1460 /* For load use the same data-ref load. */
1461 DR_GROUP_SAME_DR_STMT (vinfo_for_stmt (next
)) = prev
;
1464 next
= DR_GROUP_NEXT_DR (vinfo_for_stmt (next
));
1469 /* Check that all the accesses have the same STEP. */
1470 next_step
= DR_STEP (STMT_VINFO_DATA_REF (vinfo_for_stmt (next
)));
1471 if (tree_int_cst_compare (step
, next_step
))
1473 if (vect_print_dump_info (REPORT_DETAILS
))
1474 fprintf (vect_dump
, "not consecutive access in interleaving");
1478 data_ref
= STMT_VINFO_DATA_REF (vinfo_for_stmt (next
));
1479 /* Check that the distance between two accesses is equal to the type
1480 size. Otherwise, we have gaps. */
1481 diff
= (TREE_INT_CST_LOW (DR_INIT (data_ref
))
1482 - TREE_INT_CST_LOW (prev_init
)) / type_size
;
1485 /* FORNOW: SLP of accesses with gaps is not supported. */
1486 slp_impossible
= true;
1487 if (!DR_IS_READ (data_ref
))
1489 if (vect_print_dump_info (REPORT_DETAILS
))
1490 fprintf (vect_dump
, "interleaved store with gaps");
1497 /* Store the gap from the previous member of the group. If there is no
1498 gap in the access, DR_GROUP_GAP is always 1. */
1499 DR_GROUP_GAP (vinfo_for_stmt (next
)) = diff
;
1501 prev_init
= DR_INIT (data_ref
);
1502 next
= DR_GROUP_NEXT_DR (vinfo_for_stmt (next
));
1503 /* Count the number of data-refs in the chain. */
1507 /* COUNT is the number of accesses found, we multiply it by the size of
1508 the type to get COUNT_IN_BYTES. */
1509 count_in_bytes
= type_size
* count
;
1511 /* Check that the size of the interleaving (including gaps) is not greater
1513 if (dr_step
&& dr_step
< count_in_bytes
+ gaps
* type_size
)
1515 if (vect_print_dump_info (REPORT_DETAILS
))
1517 fprintf (vect_dump
, "interleaving size is greater than step for ");
1518 print_generic_expr (vect_dump
, DR_REF (dr
), TDF_SLIM
);
1523 /* Check that the size of the interleaving is equal to STEP for stores,
1524 i.e., that there are no gaps. */
1525 if (dr_step
!= count_in_bytes
)
1527 if (DR_IS_READ (dr
))
1529 slp_impossible
= true;
1530 /* There is a gap after the last load in the group. This gap is a
1531 difference between the stride and the number of elements. When
1532 there is no gap, this difference should be 0. */
1533 DR_GROUP_GAP (vinfo_for_stmt (stmt
)) = stride
- count
;
1537 if (vect_print_dump_info (REPORT_DETAILS
))
1538 fprintf (vect_dump
, "interleaved store with gaps");
1543 /* Check that STEP is a multiple of type size. */
1544 if ((dr_step
% type_size
) != 0)
1546 if (vect_print_dump_info (REPORT_DETAILS
))
1548 fprintf (vect_dump
, "step is not a multiple of type size: step ");
1549 print_generic_expr (vect_dump
, step
, TDF_SLIM
);
1550 fprintf (vect_dump
, " size ");
1551 print_generic_expr (vect_dump
, TYPE_SIZE_UNIT (scalar_type
),
1557 /* FORNOW: we handle only interleaving that is a power of 2.
1558 We don't fail here if it may be still possible to vectorize the
1559 group using SLP. If not, the size of the group will be checked in
1560 vect_analyze_operations, and the vectorization will fail. */
1561 if (exact_log2 (stride
) == -1)
1563 if (vect_print_dump_info (REPORT_DETAILS
))
1564 fprintf (vect_dump
, "interleaving is not a power of 2");
1569 DR_GROUP_SIZE (vinfo_for_stmt (stmt
)) = stride
;
1570 if (vect_print_dump_info (REPORT_DETAILS
))
1571 fprintf (vect_dump
, "Detected interleaving of size %d", (int)stride
);
1573 /* SLP: create an SLP data structure for every interleaving group of
1574 stores for further analysis in vect_analyse_slp. */
1575 if (!DR_IS_READ (dr
) && !slp_impossible
)
1576 VEC_safe_push (gimple
, heap
, LOOP_VINFO_STRIDED_STORES (loop_vinfo
), stmt
);
1583 /* Analyze the access pattern of the data-reference DR.
1584 In case of non-consecutive accesses call vect_analyze_group_access() to
1585 analyze groups of strided accesses. */
1588 vect_analyze_data_ref_access (struct data_reference
*dr
)
1590 tree step
= DR_STEP (dr
);
1591 tree scalar_type
= TREE_TYPE (DR_REF (dr
));
1592 gimple stmt
= DR_STMT (dr
);
1593 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1594 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
1595 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1596 HOST_WIDE_INT dr_step
= TREE_INT_CST_LOW (step
);
1600 if (vect_print_dump_info (REPORT_DETAILS
))
1601 fprintf (vect_dump
, "bad data-ref access");
1605 /* Don't allow invariant accesses. */
1609 if (nested_in_vect_loop_p (loop
, stmt
))
1611 /* Interleaved accesses are not yet supported within outer-loop
1612 vectorization for references in the inner-loop. */
1613 DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt
)) = NULL
;
1615 /* For the rest of the analysis we use the outer-loop step. */
1616 step
= STMT_VINFO_DR_STEP (stmt_info
);
1617 dr_step
= TREE_INT_CST_LOW (step
);
1621 if (vect_print_dump_info (REPORT_ALIGNMENT
))
1622 fprintf (vect_dump
, "zero step in outer loop.");
1623 if (DR_IS_READ (dr
))
1631 if (!tree_int_cst_compare (step
, TYPE_SIZE_UNIT (scalar_type
)))
1633 /* Mark that it is not interleaving. */
1634 DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt
)) = NULL
;
1638 if (nested_in_vect_loop_p (loop
, stmt
))
1640 if (vect_print_dump_info (REPORT_ALIGNMENT
))
1641 fprintf (vect_dump
, "strided access in outer loop.");
1645 /* Not consecutive access - check if it's a part of interleaving group. */
1646 return vect_analyze_group_access (dr
);
1650 /* Function vect_analyze_data_ref_accesses.
1652 Analyze the access pattern of all the data references in the loop.
1654 FORNOW: the only access pattern that is considered vectorizable is a
1655 simple step 1 (consecutive) access.
1657 FORNOW: handle only arrays and pointer accesses. */
1660 vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo
)
1663 VEC (data_reference_p
, heap
) *datarefs
= LOOP_VINFO_DATAREFS (loop_vinfo
);
1664 struct data_reference
*dr
;
1666 if (vect_print_dump_info (REPORT_DETAILS
))
1667 fprintf (vect_dump
, "=== vect_analyze_data_ref_accesses ===");
1669 for (i
= 0; VEC_iterate (data_reference_p
, datarefs
, i
, dr
); i
++)
1670 if (!vect_analyze_data_ref_access (dr
))
1672 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
1673 fprintf (vect_dump
, "not vectorized: complicated access pattern.");
1680 /* Function vect_prune_runtime_alias_test_list.
1682 Prune a list of ddrs to be tested at run-time by versioning for alias.
1683 Return FALSE if resulting list of ddrs is longer then allowed by
1684 PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS, otherwise return TRUE. */
1687 vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo
)
1689 VEC (ddr_p
, heap
) * ddrs
=
1690 LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo
);
1693 if (vect_print_dump_info (REPORT_DETAILS
))
1694 fprintf (vect_dump
, "=== vect_prune_runtime_alias_test_list ===");
1696 for (i
= 0; i
< VEC_length (ddr_p
, ddrs
); )
1701 ddr_i
= VEC_index (ddr_p
, ddrs
, i
);
1704 for (j
= 0; j
< i
; j
++)
1706 ddr_p ddr_j
= VEC_index (ddr_p
, ddrs
, j
);
1708 if (vect_vfa_range_equal (ddr_i
, ddr_j
))
1710 if (vect_print_dump_info (REPORT_DR_DETAILS
))
1712 fprintf (vect_dump
, "found equal ranges ");
1713 print_generic_expr (vect_dump
, DR_REF (DDR_A (ddr_i
)), TDF_SLIM
);
1714 fprintf (vect_dump
, ", ");
1715 print_generic_expr (vect_dump
, DR_REF (DDR_B (ddr_i
)), TDF_SLIM
);
1716 fprintf (vect_dump
, " and ");
1717 print_generic_expr (vect_dump
, DR_REF (DDR_A (ddr_j
)), TDF_SLIM
);
1718 fprintf (vect_dump
, ", ");
1719 print_generic_expr (vect_dump
, DR_REF (DDR_B (ddr_j
)), TDF_SLIM
);
1728 VEC_ordered_remove (ddr_p
, ddrs
, i
);
1734 if (VEC_length (ddr_p
, ddrs
) >
1735 (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS
))
1737 if (vect_print_dump_info (REPORT_DR_DETAILS
))
1740 "disable versioning for alias - max number of generated "
1741 "checks exceeded.");
1744 VEC_truncate (ddr_p
, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo
), 0);
1753 /* Function vect_analyze_data_refs.
1755 Find all the data references in the loop.
1757 The general structure of the analysis of data refs in the vectorizer is as
1759 1- vect_analyze_data_refs(loop): call compute_data_dependences_for_loop to
1760 find and analyze all data-refs in the loop and their dependences.
1761 2- vect_analyze_dependences(): apply dependence testing using ddrs.
1762 3- vect_analyze_drs_alignment(): check that ref_stmt.alignment is ok.
1763 4- vect_analyze_drs_access(): check that ref_stmt.step is ok.
1768 vect_analyze_data_refs (loop_vec_info loop_vinfo
)
1770 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1772 VEC (data_reference_p
, heap
) *datarefs
;
1773 struct data_reference
*dr
;
1776 if (vect_print_dump_info (REPORT_DETAILS
))
1777 fprintf (vect_dump
, "=== vect_analyze_data_refs ===\n");
1779 compute_data_dependences_for_loop (loop
, true,
1780 &LOOP_VINFO_DATAREFS (loop_vinfo
),
1781 &LOOP_VINFO_DDRS (loop_vinfo
));
1783 /* Go through the data-refs, check that the analysis succeeded. Update pointer
1784 from stmt_vec_info struct to DR and vectype. */
1785 datarefs
= LOOP_VINFO_DATAREFS (loop_vinfo
);
1787 for (i
= 0; VEC_iterate (data_reference_p
, datarefs
, i
, dr
); i
++)
1790 stmt_vec_info stmt_info
;
1792 tree base
, offset
, init
;
1794 if (!dr
|| !DR_REF (dr
))
1796 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
1797 fprintf (vect_dump
, "not vectorized: unhandled data-ref ");
1801 stmt
= DR_STMT (dr
);
1802 stmt_info
= vinfo_for_stmt (stmt
);
1804 /* Check that analysis of the data-ref succeeded. */
1805 if (!DR_BASE_ADDRESS (dr
) || !DR_OFFSET (dr
) || !DR_INIT (dr
)
1808 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
1810 fprintf (vect_dump
, "not vectorized: data ref analysis failed ");
1811 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
1816 if (TREE_CODE (DR_BASE_ADDRESS (dr
)) == INTEGER_CST
)
1818 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
1819 fprintf (vect_dump
, "not vectorized: base addr of dr is a "
1824 base
= unshare_expr (DR_BASE_ADDRESS (dr
));
1825 offset
= unshare_expr (DR_OFFSET (dr
));
1826 init
= unshare_expr (DR_INIT (dr
));
1828 /* Update DR field in stmt_vec_info struct. */
1829 bb
= gimple_bb (stmt
);
1831 /* If the dataref is in an inner-loop of the loop that is considered for
1832 for vectorization, we also want to analyze the access relative to
1833 the outer-loop (DR contains information only relative to the
1834 inner-most enclosing loop). We do that by building a reference to the
1835 first location accessed by the inner-loop, and analyze it relative to
1837 if (nested_in_vect_loop_p (loop
, stmt
))
1839 tree outer_step
, outer_base
, outer_init
;
1840 HOST_WIDE_INT pbitsize
, pbitpos
;
1842 enum machine_mode pmode
;
1843 int punsignedp
, pvolatilep
;
1844 affine_iv base_iv
, offset_iv
;
1847 /* Build a reference to the first location accessed by the
1848 inner-loop: *(BASE+INIT). (The first location is actually
1849 BASE+INIT+OFFSET, but we add OFFSET separately later). */
1850 tree inner_base
= build_fold_indirect_ref
1851 (fold_build2 (POINTER_PLUS_EXPR
,
1852 TREE_TYPE (base
), base
,
1853 fold_convert (sizetype
, init
)));
1855 if (vect_print_dump_info (REPORT_DETAILS
))
1857 fprintf (vect_dump
, "analyze in outer-loop: ");
1858 print_generic_expr (vect_dump
, inner_base
, TDF_SLIM
);
1861 outer_base
= get_inner_reference (inner_base
, &pbitsize
, &pbitpos
,
1862 &poffset
, &pmode
, &punsignedp
, &pvolatilep
, false);
1863 gcc_assert (outer_base
!= NULL_TREE
);
1865 if (pbitpos
% BITS_PER_UNIT
!= 0)
1867 if (vect_print_dump_info (REPORT_DETAILS
))
1868 fprintf (vect_dump
, "failed: bit offset alignment.\n");
1872 outer_base
= build_fold_addr_expr (outer_base
);
1873 if (!simple_iv (loop
, loop_containing_stmt (stmt
), outer_base
,
1876 if (vect_print_dump_info (REPORT_DETAILS
))
1877 fprintf (vect_dump
, "failed: evolution of base is not affine.\n");
1884 poffset
= fold_build2 (PLUS_EXPR
, TREE_TYPE (offset
), offset
,
1892 offset_iv
.base
= ssize_int (0);
1893 offset_iv
.step
= ssize_int (0);
1895 else if (!simple_iv (loop
, loop_containing_stmt (stmt
), poffset
,
1898 if (vect_print_dump_info (REPORT_DETAILS
))
1899 fprintf (vect_dump
, "evolution of offset is not affine.\n");
1903 outer_init
= ssize_int (pbitpos
/ BITS_PER_UNIT
);
1904 split_constant_offset (base_iv
.base
, &base_iv
.base
, &dinit
);
1905 outer_init
= size_binop (PLUS_EXPR
, outer_init
, dinit
);
1906 split_constant_offset (offset_iv
.base
, &offset_iv
.base
, &dinit
);
1907 outer_init
= size_binop (PLUS_EXPR
, outer_init
, dinit
);
1909 outer_step
= size_binop (PLUS_EXPR
,
1910 fold_convert (ssizetype
, base_iv
.step
),
1911 fold_convert (ssizetype
, offset_iv
.step
));
1913 STMT_VINFO_DR_STEP (stmt_info
) = outer_step
;
1914 /* FIXME: Use canonicalize_base_object_address (base_iv.base); */
1915 STMT_VINFO_DR_BASE_ADDRESS (stmt_info
) = base_iv
.base
;
1916 STMT_VINFO_DR_INIT (stmt_info
) = outer_init
;
1917 STMT_VINFO_DR_OFFSET (stmt_info
) =
1918 fold_convert (ssizetype
, offset_iv
.base
);
1919 STMT_VINFO_DR_ALIGNED_TO (stmt_info
) =
1920 size_int (highest_pow2_factor (offset_iv
.base
));
1922 if (vect_print_dump_info (REPORT_DETAILS
))
1924 fprintf (vect_dump
, "\touter base_address: ");
1925 print_generic_expr (vect_dump
, STMT_VINFO_DR_BASE_ADDRESS (stmt_info
), TDF_SLIM
);
1926 fprintf (vect_dump
, "\n\touter offset from base address: ");
1927 print_generic_expr (vect_dump
, STMT_VINFO_DR_OFFSET (stmt_info
), TDF_SLIM
);
1928 fprintf (vect_dump
, "\n\touter constant offset from base address: ");
1929 print_generic_expr (vect_dump
, STMT_VINFO_DR_INIT (stmt_info
), TDF_SLIM
);
1930 fprintf (vect_dump
, "\n\touter step: ");
1931 print_generic_expr (vect_dump
, STMT_VINFO_DR_STEP (stmt_info
), TDF_SLIM
);
1932 fprintf (vect_dump
, "\n\touter aligned to: ");
1933 print_generic_expr (vect_dump
, STMT_VINFO_DR_ALIGNED_TO (stmt_info
), TDF_SLIM
);
1937 if (STMT_VINFO_DATA_REF (stmt_info
))
1939 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
1942 "not vectorized: more than one data ref in stmt: ");
1943 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
1948 STMT_VINFO_DATA_REF (stmt_info
) = dr
;
1950 /* Set vectype for STMT. */
1951 scalar_type
= TREE_TYPE (DR_REF (dr
));
1952 STMT_VINFO_VECTYPE (stmt_info
) =
1953 get_vectype_for_scalar_type (scalar_type
);
1954 if (!STMT_VINFO_VECTYPE (stmt_info
))
1956 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
1959 "not vectorized: no vectype for stmt: ");
1960 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
1961 fprintf (vect_dump
, " scalar_type: ");
1962 print_generic_expr (vect_dump
, scalar_type
, TDF_DETAILS
);
1972 /* Function vect_get_new_vect_var.
1974 Returns a name for a new variable. The current naming scheme appends the
1975 prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to
1976 the name of vectorizer generated variables, and appends that to NAME if
1980 vect_get_new_vect_var (tree type
, enum vect_var_kind var_kind
, const char *name
)
1987 case vect_simple_var
:
1990 case vect_scalar_var
:
1993 case vect_pointer_var
:
2002 char* tmp
= concat (prefix
, name
, NULL
);
2003 new_vect_var
= create_tmp_var (type
, tmp
);
2007 new_vect_var
= create_tmp_var (type
, prefix
);
2009 /* Mark vector typed variable as a gimple register variable. */
2010 if (TREE_CODE (type
) == VECTOR_TYPE
)
2011 DECL_GIMPLE_REG_P (new_vect_var
) = true;
2013 return new_vect_var
;
2017 /* Function vect_create_addr_base_for_vector_ref.
2019 Create an expression that computes the address of the first memory location
2020 that will be accessed for a data reference.
2023 STMT: The statement containing the data reference.
2024 NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list.
2025 OFFSET: Optional. If supplied, it is be added to the initial address.
2026 LOOP: Specify relative to which loop-nest should the address be computed.
2027 For example, when the dataref is in an inner-loop nested in an
2028 outer-loop that is now being vectorized, LOOP can be either the
2029 outer-loop, or the inner-loop. The first memory location accessed
2030 by the following dataref ('in' points to short):
2037 if LOOP=i_loop: &in (relative to i_loop)
2038 if LOOP=j_loop: &in+i*2B (relative to j_loop)
2041 1. Return an SSA_NAME whose value is the address of the memory location of
2042 the first vector of the data reference.
2043 2. If new_stmt_list is not NULL_TREE after return then the caller must insert
2044 these statement(s) which define the returned SSA_NAME.
2046 FORNOW: We are only handling array accesses with step 1. */
2049 vect_create_addr_base_for_vector_ref (gimple stmt
,
2050 gimple_seq
*new_stmt_list
,
2054 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2055 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
2056 struct loop
*containing_loop
= (gimple_bb (stmt
))->loop_father
;
2057 tree data_ref_base
= unshare_expr (DR_BASE_ADDRESS (dr
));
2059 tree data_ref_base_var
;
2061 tree addr_base
, addr_expr
;
2063 gimple_seq seq
= NULL
;
2064 tree base_offset
= unshare_expr (DR_OFFSET (dr
));
2065 tree init
= unshare_expr (DR_INIT (dr
));
2067 tree step
= TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr
)));
2070 if (loop
!= containing_loop
)
2072 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2073 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
2075 gcc_assert (nested_in_vect_loop_p (loop
, stmt
));
2077 data_ref_base
= unshare_expr (STMT_VINFO_DR_BASE_ADDRESS (stmt_info
));
2078 base_offset
= unshare_expr (STMT_VINFO_DR_OFFSET (stmt_info
));
2079 init
= unshare_expr (STMT_VINFO_DR_INIT (stmt_info
));
2082 /* Create data_ref_base */
2083 base_name
= build_fold_indirect_ref (data_ref_base
);
2084 data_ref_base_var
= create_tmp_var (TREE_TYPE (data_ref_base
), "batmp");
2085 add_referenced_var (data_ref_base_var
);
2086 data_ref_base
= force_gimple_operand (data_ref_base
, &seq
, true,
2088 gimple_seq_add_seq (new_stmt_list
, seq
);
2090 /* Create base_offset */
2091 base_offset
= size_binop (PLUS_EXPR
,
2092 fold_convert (sizetype
, base_offset
),
2093 fold_convert (sizetype
, init
));
2094 dest
= create_tmp_var (sizetype
, "base_off");
2095 add_referenced_var (dest
);
2096 base_offset
= force_gimple_operand (base_offset
, &seq
, true, dest
);
2097 gimple_seq_add_seq (new_stmt_list
, seq
);
2101 tree tmp
= create_tmp_var (sizetype
, "offset");
2103 add_referenced_var (tmp
);
2104 offset
= fold_build2 (MULT_EXPR
, sizetype
,
2105 fold_convert (sizetype
, offset
), step
);
2106 base_offset
= fold_build2 (PLUS_EXPR
, sizetype
,
2107 base_offset
, offset
);
2108 base_offset
= force_gimple_operand (base_offset
, &seq
, false, tmp
);
2109 gimple_seq_add_seq (new_stmt_list
, seq
);
2112 /* base + base_offset */
2113 addr_base
= fold_build2 (POINTER_PLUS_EXPR
, TREE_TYPE (data_ref_base
),
2114 data_ref_base
, base_offset
);
2116 vect_ptr_type
= build_pointer_type (STMT_VINFO_VECTYPE (stmt_info
));
2118 vec_stmt
= fold_convert (vect_ptr_type
, addr_base
);
2119 addr_expr
= vect_get_new_vect_var (vect_ptr_type
, vect_pointer_var
,
2120 get_name (base_name
));
2122 add_referenced_var (addr_expr
);
2123 vec_stmt
= force_gimple_operand (vec_stmt
, &seq
, false, addr_expr
);
2124 gimple_seq_add_seq (new_stmt_list
, seq
);
2126 if (vect_print_dump_info (REPORT_DETAILS
))
2128 fprintf (vect_dump
, "created ");
2129 print_generic_expr (vect_dump
, vec_stmt
, TDF_SLIM
);
2136 /* Function vect_create_data_ref_ptr.
2138 Create a new pointer to vector type (vp), that points to the first location
2139 accessed in the loop by STMT, along with the def-use update chain to
2140 appropriately advance the pointer through the loop iterations. Also set
2141 aliasing information for the pointer. This vector pointer is used by the
2142 callers to this function to create a memory reference expression for vector
2146 1. STMT: a stmt that references memory. Expected to be of the form
2147 GIMPLE_ASSIGN <name, data-ref> or
2148 GIMPLE_ASSIGN <data-ref, name>.
2149 2. AT_LOOP: the loop where the vector memref is to be created.
2150 3. OFFSET (optional): an offset to be added to the initial address accessed
2151 by the data-ref in STMT.
2152 4. ONLY_INIT: indicate if vp is to be updated in the loop, or remain
2153 pointing to the initial address.
2154 5. TYPE: if not NULL indicates the required type of the data-ref.
2157 1. Declare a new ptr to vector_type, and have it point to the base of the
2158 data reference (initial addressed accessed by the data reference).
2159 For example, for vector of type V8HI, the following code is generated:
2162 vp = (v8hi *)initial_address;
2164 if OFFSET is not supplied:
2165 initial_address = &a[init];
2166 if OFFSET is supplied:
2167 initial_address = &a[init + OFFSET];
2169 Return the initial_address in INITIAL_ADDRESS.
2171 2. If ONLY_INIT is true, just return the initial pointer. Otherwise, also
2172 update the pointer in each iteration of the loop.
2174 Return the increment stmt that updates the pointer in PTR_INCR.
2176 3. Set INV_P to true if the access pattern of the data reference in the
2177 vectorized loop is invariant. Set it to false otherwise.
2179 4. Return the pointer. */
2182 vect_create_data_ref_ptr (gimple stmt
, struct loop
*at_loop
,
2183 tree offset
, tree
*initial_address
, gimple
*ptr_incr
,
2184 bool only_init
, bool *inv_p
)
2187 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2188 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2189 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
2190 bool nested_in_vect_loop
= nested_in_vect_loop_p (loop
, stmt
);
2191 struct loop
*containing_loop
= (gimple_bb (stmt
))->loop_father
;
2192 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2197 gimple_seq new_stmt_list
= NULL
;
2201 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
2203 gimple_stmt_iterator incr_gsi
;
2205 tree indx_before_incr
, indx_after_incr
;
2209 /* Check the step (evolution) of the load in LOOP, and record
2210 whether it's invariant. */
2211 if (nested_in_vect_loop
)
2212 step
= STMT_VINFO_DR_STEP (stmt_info
);
2214 step
= DR_STEP (STMT_VINFO_DATA_REF (stmt_info
));
2216 if (tree_int_cst_compare (step
, size_zero_node
) == 0)
2221 /* Create an expression for the first address accessed by this load
2223 base_name
= build_fold_indirect_ref (unshare_expr (DR_BASE_ADDRESS (dr
)));
2225 if (vect_print_dump_info (REPORT_DETAILS
))
2227 tree data_ref_base
= base_name
;
2228 fprintf (vect_dump
, "create vector-pointer variable to type: ");
2229 print_generic_expr (vect_dump
, vectype
, TDF_SLIM
);
2230 if (TREE_CODE (data_ref_base
) == VAR_DECL
)
2231 fprintf (vect_dump
, " vectorizing a one dimensional array ref: ");
2232 else if (TREE_CODE (data_ref_base
) == ARRAY_REF
)
2233 fprintf (vect_dump
, " vectorizing a multidimensional array ref: ");
2234 else if (TREE_CODE (data_ref_base
) == COMPONENT_REF
)
2235 fprintf (vect_dump
, " vectorizing a record based array ref: ");
2236 else if (TREE_CODE (data_ref_base
) == SSA_NAME
)
2237 fprintf (vect_dump
, " vectorizing a pointer ref: ");
2238 print_generic_expr (vect_dump
, base_name
, TDF_SLIM
);
2241 /** (1) Create the new vector-pointer variable: **/
2242 vect_ptr_type
= build_pointer_type (vectype
);
2243 vect_ptr
= vect_get_new_vect_var (vect_ptr_type
, vect_pointer_var
,
2244 get_name (base_name
));
2245 /* If any of the data-references in the stmt group does not conflict
2246 with the created vector data-reference use a ref-all pointer instead. */
2247 if (STMT_VINFO_DR_GROUP_SIZE (stmt_info
) > 1)
2249 gimple orig_stmt
= STMT_VINFO_DR_GROUP_FIRST_DR (stmt_info
);
2252 tree lhs
= gimple_assign_lhs (orig_stmt
);
2253 if (!alias_sets_conflict_p (get_deref_alias_set (vect_ptr
),
2254 get_alias_set (lhs
)))
2256 vect_ptr_type
= build_pointer_type_for_mode (vectype
,
2258 vect_ptr
= vect_get_new_vect_var (vect_ptr_type
, vect_pointer_var
,
2259 get_name (base_name
));
2263 orig_stmt
= STMT_VINFO_DR_GROUP_NEXT_DR (vinfo_for_stmt (orig_stmt
));
2268 add_referenced_var (vect_ptr
);
2270 /** Note: If the dataref is in an inner-loop nested in LOOP, and we are
2271 vectorizing LOOP (i.e. outer-loop vectorization), we need to create two
2272 def-use update cycles for the pointer: One relative to the outer-loop
2273 (LOOP), which is what steps (3) and (4) below do. The other is relative
2274 to the inner-loop (which is the inner-most loop containing the dataref),
2275 and this is done be step (5) below.
2277 When vectorizing inner-most loops, the vectorized loop (LOOP) is also the
2278 inner-most loop, and so steps (3),(4) work the same, and step (5) is
2279 redundant. Steps (3),(4) create the following:
2282 LOOP: vp1 = phi(vp0,vp2)
2288 If there is an inner-loop nested in loop, then step (5) will also be
2289 applied, and an additional update in the inner-loop will be created:
2292 LOOP: vp1 = phi(vp0,vp2)
2294 inner: vp3 = phi(vp1,vp4)
2295 vp4 = vp3 + inner_step
2301 /** (3) Calculate the initial address the vector-pointer, and set
2302 the vector-pointer to point to it before the loop: **/
2304 /* Create: (&(base[init_val+offset]) in the loop preheader. */
2306 new_temp
= vect_create_addr_base_for_vector_ref (stmt
, &new_stmt_list
,
2308 pe
= loop_preheader_edge (loop
);
2311 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, new_stmt_list
);
2312 gcc_assert (!new_bb
);
2315 *initial_address
= new_temp
;
2317 /* Create: p = (vectype *) initial_base */
2318 vec_stmt
= gimple_build_assign (vect_ptr
,
2319 fold_convert (vect_ptr_type
, new_temp
));
2320 vect_ptr_init
= make_ssa_name (vect_ptr
, vec_stmt
);
2321 gimple_assign_set_lhs (vec_stmt
, vect_ptr_init
);
2322 new_bb
= gsi_insert_on_edge_immediate (pe
, vec_stmt
);
2323 gcc_assert (!new_bb
);
2326 /** (4) Handle the updating of the vector-pointer inside the loop.
2327 This is needed when ONLY_INIT is false, and also when AT_LOOP
2328 is the inner-loop nested in LOOP (during outer-loop vectorization).
2331 if (only_init
&& at_loop
== loop
) /* No update in loop is required. */
2333 /* Copy the points-to information if it exists. */
2334 if (DR_PTR_INFO (dr
))
2335 duplicate_ssa_name_ptr_info (vect_ptr_init
, DR_PTR_INFO (dr
));
2336 vptr
= vect_ptr_init
;
2340 /* The step of the vector pointer is the Vector Size. */
2341 tree step
= TYPE_SIZE_UNIT (vectype
);
2342 /* One exception to the above is when the scalar step of the load in
2343 LOOP is zero. In this case the step here is also zero. */
2345 step
= size_zero_node
;
2347 standard_iv_increment_position (loop
, &incr_gsi
, &insert_after
);
2349 create_iv (vect_ptr_init
,
2350 fold_convert (vect_ptr_type
, step
),
2351 vect_ptr
, loop
, &incr_gsi
, insert_after
,
2352 &indx_before_incr
, &indx_after_incr
);
2353 incr
= gsi_stmt (incr_gsi
);
2354 set_vinfo_for_stmt (incr
, new_stmt_vec_info (incr
, loop_vinfo
));
2356 /* Copy the points-to information if it exists. */
2357 if (DR_PTR_INFO (dr
))
2359 duplicate_ssa_name_ptr_info (indx_before_incr
, DR_PTR_INFO (dr
));
2360 duplicate_ssa_name_ptr_info (indx_after_incr
, DR_PTR_INFO (dr
));
2362 merge_alias_info (vect_ptr_init
, indx_before_incr
);
2363 merge_alias_info (vect_ptr_init
, indx_after_incr
);
2367 vptr
= indx_before_incr
;
2370 if (!nested_in_vect_loop
|| only_init
)
2374 /** (5) Handle the updating of the vector-pointer inside the inner-loop
2375 nested in LOOP, if exists: **/
2377 gcc_assert (nested_in_vect_loop
);
2380 standard_iv_increment_position (containing_loop
, &incr_gsi
,
2382 create_iv (vptr
, fold_convert (vect_ptr_type
, DR_STEP (dr
)), vect_ptr
,
2383 containing_loop
, &incr_gsi
, insert_after
, &indx_before_incr
,
2385 incr
= gsi_stmt (incr_gsi
);
2386 set_vinfo_for_stmt (incr
, new_stmt_vec_info (incr
, loop_vinfo
));
2388 /* Copy the points-to information if it exists. */
2389 if (DR_PTR_INFO (dr
))
2391 duplicate_ssa_name_ptr_info (indx_before_incr
, DR_PTR_INFO (dr
));
2392 duplicate_ssa_name_ptr_info (indx_after_incr
, DR_PTR_INFO (dr
));
2394 merge_alias_info (vect_ptr_init
, indx_before_incr
);
2395 merge_alias_info (vect_ptr_init
, indx_after_incr
);
2399 return indx_before_incr
;
2406 /* Function bump_vector_ptr
2408 Increment a pointer (to a vector type) by vector-size. If requested,
2409 i.e. if PTR-INCR is given, then also connect the new increment stmt
2410 to the existing def-use update-chain of the pointer, by modifying
2411 the PTR_INCR as illustrated below:
2413 The pointer def-use update-chain before this function:
2414 DATAREF_PTR = phi (p_0, p_2)
2416 PTR_INCR: p_2 = DATAREF_PTR + step
2418 The pointer def-use update-chain after this function:
2419 DATAREF_PTR = phi (p_0, p_2)
2421 NEW_DATAREF_PTR = DATAREF_PTR + BUMP
2423 PTR_INCR: p_2 = NEW_DATAREF_PTR + step
2426 DATAREF_PTR - ssa_name of a pointer (to vector type) that is being updated
2428 PTR_INCR - optional. The stmt that updates the pointer in each iteration of
2429 the loop. The increment amount across iterations is expected
2431 BSI - location where the new update stmt is to be placed.
2432 STMT - the original scalar memory-access stmt that is being vectorized.
2433 BUMP - optional. The offset by which to bump the pointer. If not given,
2434 the offset is assumed to be vector_size.
2436 Output: Return NEW_DATAREF_PTR as illustrated above.
2441 bump_vector_ptr (tree dataref_ptr
, gimple ptr_incr
, gimple_stmt_iterator
*gsi
,
2442 gimple stmt
, tree bump
)
2444 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2445 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
2446 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2447 tree ptr_var
= SSA_NAME_VAR (dataref_ptr
);
2448 tree update
= TYPE_SIZE_UNIT (vectype
);
2451 use_operand_p use_p
;
2452 tree new_dataref_ptr
;
2457 incr_stmt
= gimple_build_assign_with_ops (POINTER_PLUS_EXPR
, ptr_var
,
2458 dataref_ptr
, update
);
2459 new_dataref_ptr
= make_ssa_name (ptr_var
, incr_stmt
);
2460 gimple_assign_set_lhs (incr_stmt
, new_dataref_ptr
);
2461 vect_finish_stmt_generation (stmt
, incr_stmt
, gsi
);
2463 /* Copy the points-to information if it exists. */
2464 if (DR_PTR_INFO (dr
))
2465 duplicate_ssa_name_ptr_info (new_dataref_ptr
, DR_PTR_INFO (dr
));
2466 merge_alias_info (new_dataref_ptr
, dataref_ptr
);
2469 return new_dataref_ptr
;
2471 /* Update the vector-pointer's cross-iteration increment. */
2472 FOR_EACH_SSA_USE_OPERAND (use_p
, ptr_incr
, iter
, SSA_OP_USE
)
2474 tree use
= USE_FROM_PTR (use_p
);
2476 if (use
== dataref_ptr
)
2477 SET_USE (use_p
, new_dataref_ptr
);
2479 gcc_assert (tree_int_cst_compare (use
, update
) == 0);
2482 return new_dataref_ptr
;
2486 /* Function vect_create_destination_var.
2488 Create a new temporary of type VECTYPE. */
2491 vect_create_destination_var (tree scalar_dest
, tree vectype
)
2494 const char *new_name
;
2496 enum vect_var_kind kind
;
2498 kind
= vectype
? vect_simple_var
: vect_scalar_var
;
2499 type
= vectype
? vectype
: TREE_TYPE (scalar_dest
);
2501 gcc_assert (TREE_CODE (scalar_dest
) == SSA_NAME
);
2503 new_name
= get_name (scalar_dest
);
2506 vec_dest
= vect_get_new_vect_var (type
, kind
, new_name
);
2507 add_referenced_var (vec_dest
);
2512 /* Function vect_strided_store_supported.
2514 Returns TRUE is INTERLEAVE_HIGH and INTERLEAVE_LOW operations are supported,
2515 and FALSE otherwise. */
2518 vect_strided_store_supported (tree vectype
)
2520 optab interleave_high_optab
, interleave_low_optab
;
2523 mode
= (int) TYPE_MODE (vectype
);
2525 /* Check that the operation is supported. */
2526 interleave_high_optab
= optab_for_tree_code (VEC_INTERLEAVE_HIGH_EXPR
,
2527 vectype
, optab_default
);
2528 interleave_low_optab
= optab_for_tree_code (VEC_INTERLEAVE_LOW_EXPR
,
2529 vectype
, optab_default
);
2530 if (!interleave_high_optab
|| !interleave_low_optab
)
2532 if (vect_print_dump_info (REPORT_DETAILS
))
2533 fprintf (vect_dump
, "no optab for interleave.");
2537 if (optab_handler (interleave_high_optab
, mode
)->insn_code
2539 || optab_handler (interleave_low_optab
, mode
)->insn_code
2540 == CODE_FOR_nothing
)
2542 if (vect_print_dump_info (REPORT_DETAILS
))
2543 fprintf (vect_dump
, "interleave op not supported by target.");
2551 /* Function vect_permute_store_chain.
2553 Given a chain of interleaved stores in DR_CHAIN of LENGTH that must be
2554 a power of 2, generate interleave_high/low stmts to reorder the data
2555 correctly for the stores. Return the final references for stores in
2558 E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
2559 The input is 4 vectors each containing 8 elements. We assign a number to each
2560 element, the input sequence is:
2562 1st vec: 0 1 2 3 4 5 6 7
2563 2nd vec: 8 9 10 11 12 13 14 15
2564 3rd vec: 16 17 18 19 20 21 22 23
2565 4th vec: 24 25 26 27 28 29 30 31
2567 The output sequence should be:
2569 1st vec: 0 8 16 24 1 9 17 25
2570 2nd vec: 2 10 18 26 3 11 19 27
2571 3rd vec: 4 12 20 28 5 13 21 30
2572 4th vec: 6 14 22 30 7 15 23 31
2574 i.e., we interleave the contents of the four vectors in their order.
2576 We use interleave_high/low instructions to create such output. The input of
2577 each interleave_high/low operation is two vectors:
2580 the even elements of the result vector are obtained left-to-right from the
2581 high/low elements of the first vector. The odd elements of the result are
2582 obtained left-to-right from the high/low elements of the second vector.
2583 The output of interleave_high will be: 0 4 1 5
2584 and of interleave_low: 2 6 3 7
2587 The permutation is done in log LENGTH stages. In each stage interleave_high
2588 and interleave_low stmts are created for each pair of vectors in DR_CHAIN,
2589 where the first argument is taken from the first half of DR_CHAIN and the
2590 second argument from it's second half.
2593 I1: interleave_high (1st vec, 3rd vec)
2594 I2: interleave_low (1st vec, 3rd vec)
2595 I3: interleave_high (2nd vec, 4th vec)
2596 I4: interleave_low (2nd vec, 4th vec)
2598 The output for the first stage is:
2600 I1: 0 16 1 17 2 18 3 19
2601 I2: 4 20 5 21 6 22 7 23
2602 I3: 8 24 9 25 10 26 11 27
2603 I4: 12 28 13 29 14 30 15 31
2605 The output of the second stage, i.e. the final result is:
2607 I1: 0 8 16 24 1 9 17 25
2608 I2: 2 10 18 26 3 11 19 27
2609 I3: 4 12 20 28 5 13 21 30
2610 I4: 6 14 22 30 7 15 23 31. */
2613 vect_permute_store_chain (VEC(tree
,heap
) *dr_chain
,
2614 unsigned int length
,
2616 gimple_stmt_iterator
*gsi
,
2617 VEC(tree
,heap
) **result_chain
)
2619 tree perm_dest
, vect1
, vect2
, high
, low
;
2621 tree vectype
= STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt
));
2625 enum tree_code high_code
, low_code
;
2627 scalar_dest
= gimple_assign_lhs (stmt
);
2629 /* Check that the operation is supported. */
2630 if (!vect_strided_store_supported (vectype
))
2633 *result_chain
= VEC_copy (tree
, heap
, dr_chain
);
2635 for (i
= 0; i
< exact_log2 (length
); i
++)
2637 for (j
= 0; j
< length
/2; j
++)
2639 vect1
= VEC_index (tree
, dr_chain
, j
);
2640 vect2
= VEC_index (tree
, dr_chain
, j
+length
/2);
2642 /* Create interleaving stmt:
2643 in the case of big endian:
2644 high = interleave_high (vect1, vect2)
2645 and in the case of little endian:
2646 high = interleave_low (vect1, vect2). */
2647 perm_dest
= create_tmp_var (vectype
, "vect_inter_high");
2648 DECL_GIMPLE_REG_P (perm_dest
) = 1;
2649 add_referenced_var (perm_dest
);
2650 if (BYTES_BIG_ENDIAN
)
2652 high_code
= VEC_INTERLEAVE_HIGH_EXPR
;
2653 low_code
= VEC_INTERLEAVE_LOW_EXPR
;
2657 low_code
= VEC_INTERLEAVE_HIGH_EXPR
;
2658 high_code
= VEC_INTERLEAVE_LOW_EXPR
;
2660 perm_stmt
= gimple_build_assign_with_ops (high_code
, perm_dest
,
2662 high
= make_ssa_name (perm_dest
, perm_stmt
);
2663 gimple_assign_set_lhs (perm_stmt
, high
);
2664 vect_finish_stmt_generation (stmt
, perm_stmt
, gsi
);
2665 VEC_replace (tree
, *result_chain
, 2*j
, high
);
2667 /* Create interleaving stmt:
2668 in the case of big endian:
2669 low = interleave_low (vect1, vect2)
2670 and in the case of little endian:
2671 low = interleave_high (vect1, vect2). */
2672 perm_dest
= create_tmp_var (vectype
, "vect_inter_low");
2673 DECL_GIMPLE_REG_P (perm_dest
) = 1;
2674 add_referenced_var (perm_dest
);
2675 perm_stmt
= gimple_build_assign_with_ops (low_code
, perm_dest
,
2677 low
= make_ssa_name (perm_dest
, perm_stmt
);
2678 gimple_assign_set_lhs (perm_stmt
, low
);
2679 vect_finish_stmt_generation (stmt
, perm_stmt
, gsi
);
2680 VEC_replace (tree
, *result_chain
, 2*j
+1, low
);
2682 dr_chain
= VEC_copy (tree
, heap
, *result_chain
);
2687 /* Function vect_setup_realignment
2689 This function is called when vectorizing an unaligned load using
2690 the dr_explicit_realign[_optimized] scheme.
2691 This function generates the following code at the loop prolog:
2694 x msq_init = *(floor(p)); # prolog load
2695 realignment_token = call target_builtin;
2697 x msq = phi (msq_init, ---)
2699 The stmts marked with x are generated only for the case of
2700 dr_explicit_realign_optimized.
2702 The code above sets up a new (vector) pointer, pointing to the first
2703 location accessed by STMT, and a "floor-aligned" load using that pointer.
2704 It also generates code to compute the "realignment-token" (if the relevant
2705 target hook was defined), and creates a phi-node at the loop-header bb
2706 whose arguments are the result of the prolog-load (created by this
2707 function) and the result of a load that takes place in the loop (to be
2708 created by the caller to this function).
2710 For the case of dr_explicit_realign_optimized:
2711 The caller to this function uses the phi-result (msq) to create the
2712 realignment code inside the loop, and sets up the missing phi argument,
2715 msq = phi (msq_init, lsq)
2716 lsq = *(floor(p')); # load in loop
2717 result = realign_load (msq, lsq, realignment_token);
2719 For the case of dr_explicit_realign:
2721 msq = *(floor(p)); # load in loop
2723 lsq = *(floor(p')); # load in loop
2724 result = realign_load (msq, lsq, realignment_token);
2727 STMT - (scalar) load stmt to be vectorized. This load accesses
2728 a memory location that may be unaligned.
2729 BSI - place where new code is to be inserted.
2730 ALIGNMENT_SUPPORT_SCHEME - which of the two misalignment handling schemes
2734 REALIGNMENT_TOKEN - the result of a call to the builtin_mask_for_load
2735 target hook, if defined.
2736 Return value - the result of the loop-header phi node. */
2739 vect_setup_realignment (gimple stmt
, gimple_stmt_iterator
*gsi
,
2740 tree
*realignment_token
,
2741 enum dr_alignment_support alignment_support_scheme
,
2743 struct loop
**at_loop
)
2745 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2746 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2747 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2748 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
2750 tree scalar_dest
= gimple_assign_lhs (stmt
);
2757 tree msq_init
= NULL_TREE
;
2760 tree msq
= NULL_TREE
;
2761 gimple_seq stmts
= NULL
;
2763 bool compute_in_loop
= false;
2764 bool nested_in_vect_loop
= nested_in_vect_loop_p (loop
, stmt
);
2765 struct loop
*containing_loop
= (gimple_bb (stmt
))->loop_father
;
2766 struct loop
*loop_for_initial_load
;
2768 gcc_assert (alignment_support_scheme
== dr_explicit_realign
2769 || alignment_support_scheme
== dr_explicit_realign_optimized
);
2771 /* We need to generate three things:
2772 1. the misalignment computation
2773 2. the extra vector load (for the optimized realignment scheme).
2774 3. the phi node for the two vectors from which the realignment is
2775 done (for the optimized realignment scheme).
2778 /* 1. Determine where to generate the misalignment computation.
2780 If INIT_ADDR is NULL_TREE, this indicates that the misalignment
2781 calculation will be generated by this function, outside the loop (in the
2782 preheader). Otherwise, INIT_ADDR had already been computed for us by the
2783 caller, inside the loop.
2785 Background: If the misalignment remains fixed throughout the iterations of
2786 the loop, then both realignment schemes are applicable, and also the
2787 misalignment computation can be done outside LOOP. This is because we are
2788 vectorizing LOOP, and so the memory accesses in LOOP advance in steps that
2789 are a multiple of VS (the Vector Size), and therefore the misalignment in
2790 different vectorized LOOP iterations is always the same.
2791 The problem arises only if the memory access is in an inner-loop nested
2792 inside LOOP, which is now being vectorized using outer-loop vectorization.
2793 This is the only case when the misalignment of the memory access may not
2794 remain fixed throughout the iterations of the inner-loop (as explained in
2795 detail in vect_supportable_dr_alignment). In this case, not only is the
2796 optimized realignment scheme not applicable, but also the misalignment
2797 computation (and generation of the realignment token that is passed to
2798 REALIGN_LOAD) have to be done inside the loop.
2800 In short, INIT_ADDR indicates whether we are in a COMPUTE_IN_LOOP mode
2801 or not, which in turn determines if the misalignment is computed inside
2802 the inner-loop, or outside LOOP. */
2804 if (init_addr
!= NULL_TREE
)
2806 compute_in_loop
= true;
2807 gcc_assert (alignment_support_scheme
== dr_explicit_realign
);
2811 /* 2. Determine where to generate the extra vector load.
2813 For the optimized realignment scheme, instead of generating two vector
2814 loads in each iteration, we generate a single extra vector load in the
2815 preheader of the loop, and in each iteration reuse the result of the
2816 vector load from the previous iteration. In case the memory access is in
2817 an inner-loop nested inside LOOP, which is now being vectorized using
2818 outer-loop vectorization, we need to determine whether this initial vector
2819 load should be generated at the preheader of the inner-loop, or can be
2820 generated at the preheader of LOOP. If the memory access has no evolution
2821 in LOOP, it can be generated in the preheader of LOOP. Otherwise, it has
2822 to be generated inside LOOP (in the preheader of the inner-loop). */
2824 if (nested_in_vect_loop
)
2826 tree outerloop_step
= STMT_VINFO_DR_STEP (stmt_info
);
2827 bool invariant_in_outerloop
=
2828 (tree_int_cst_compare (outerloop_step
, size_zero_node
) == 0);
2829 loop_for_initial_load
= (invariant_in_outerloop
? loop
: loop
->inner
);
2832 loop_for_initial_load
= loop
;
2834 *at_loop
= loop_for_initial_load
;
2836 /* 3. For the case of the optimized realignment, create the first vector
2837 load at the loop preheader. */
2839 if (alignment_support_scheme
== dr_explicit_realign_optimized
)
2841 /* Create msq_init = *(floor(p1)) in the loop preheader */
2843 gcc_assert (!compute_in_loop
);
2844 pe
= loop_preheader_edge (loop_for_initial_load
);
2845 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
2846 ptr
= vect_create_data_ref_ptr (stmt
, loop_for_initial_load
, NULL_TREE
,
2847 &init_addr
, &inc
, true, &inv_p
);
2848 data_ref
= build1 (ALIGN_INDIRECT_REF
, vectype
, ptr
);
2849 new_stmt
= gimple_build_assign (vec_dest
, data_ref
);
2850 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2851 gimple_assign_set_lhs (new_stmt
, new_temp
);
2852 mark_symbols_for_renaming (new_stmt
);
2853 new_bb
= gsi_insert_on_edge_immediate (pe
, new_stmt
);
2854 gcc_assert (!new_bb
);
2855 msq_init
= gimple_assign_lhs (new_stmt
);
2858 /* 4. Create realignment token using a target builtin, if available.
2859 It is done either inside the containing loop, or before LOOP (as
2860 determined above). */
2862 if (targetm
.vectorize
.builtin_mask_for_load
)
2866 /* Compute INIT_ADDR - the initial addressed accessed by this memref. */
2867 if (compute_in_loop
)
2868 gcc_assert (init_addr
); /* already computed by the caller. */
2871 /* Generate the INIT_ADDR computation outside LOOP. */
2872 init_addr
= vect_create_addr_base_for_vector_ref (stmt
, &stmts
,
2874 pe
= loop_preheader_edge (loop
);
2875 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, stmts
);
2876 gcc_assert (!new_bb
);
2879 builtin_decl
= targetm
.vectorize
.builtin_mask_for_load ();
2880 new_stmt
= gimple_build_call (builtin_decl
, 1, init_addr
);
2882 vect_create_destination_var (scalar_dest
,
2883 gimple_call_return_type (new_stmt
));
2884 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2885 gimple_call_set_lhs (new_stmt
, new_temp
);
2887 if (compute_in_loop
)
2888 gsi_insert_before (gsi
, new_stmt
, GSI_SAME_STMT
);
2891 /* Generate the misalignment computation outside LOOP. */
2892 pe
= loop_preheader_edge (loop
);
2893 new_bb
= gsi_insert_on_edge_immediate (pe
, new_stmt
);
2894 gcc_assert (!new_bb
);
2897 *realignment_token
= gimple_call_lhs (new_stmt
);
2899 /* The result of the CALL_EXPR to this builtin is determined from
2900 the value of the parameter and no global variables are touched
2901 which makes the builtin a "const" function. Requiring the
2902 builtin to have the "const" attribute makes it unnecessary
2903 to call mark_call_clobbered. */
2904 gcc_assert (TREE_READONLY (builtin_decl
));
2907 if (alignment_support_scheme
== dr_explicit_realign
)
2910 gcc_assert (!compute_in_loop
);
2911 gcc_assert (alignment_support_scheme
== dr_explicit_realign_optimized
);
2914 /* 5. Create msq = phi <msq_init, lsq> in loop */
2916 pe
= loop_preheader_edge (containing_loop
);
2917 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
2918 msq
= make_ssa_name (vec_dest
, NULL
);
2919 phi_stmt
= create_phi_node (msq
, containing_loop
->header
);
2920 SSA_NAME_DEF_STMT (msq
) = phi_stmt
;
2921 add_phi_arg (phi_stmt
, msq_init
, pe
);
2927 /* Function vect_strided_load_supported.
2929 Returns TRUE is EXTRACT_EVEN and EXTRACT_ODD operations are supported,
2930 and FALSE otherwise. */
2933 vect_strided_load_supported (tree vectype
)
2935 optab perm_even_optab
, perm_odd_optab
;
2938 mode
= (int) TYPE_MODE (vectype
);
2940 perm_even_optab
= optab_for_tree_code (VEC_EXTRACT_EVEN_EXPR
, vectype
,
2942 if (!perm_even_optab
)
2944 if (vect_print_dump_info (REPORT_DETAILS
))
2945 fprintf (vect_dump
, "no optab for perm_even.");
2949 if (optab_handler (perm_even_optab
, mode
)->insn_code
== CODE_FOR_nothing
)
2951 if (vect_print_dump_info (REPORT_DETAILS
))
2952 fprintf (vect_dump
, "perm_even op not supported by target.");
2956 perm_odd_optab
= optab_for_tree_code (VEC_EXTRACT_ODD_EXPR
, vectype
,
2958 if (!perm_odd_optab
)
2960 if (vect_print_dump_info (REPORT_DETAILS
))
2961 fprintf (vect_dump
, "no optab for perm_odd.");
2965 if (optab_handler (perm_odd_optab
, mode
)->insn_code
== CODE_FOR_nothing
)
2967 if (vect_print_dump_info (REPORT_DETAILS
))
2968 fprintf (vect_dump
, "perm_odd op not supported by target.");
2975 /* Function vect_permute_load_chain.
2977 Given a chain of interleaved loads in DR_CHAIN of LENGTH that must be
2978 a power of 2, generate extract_even/odd stmts to reorder the input data
2979 correctly. Return the final references for loads in RESULT_CHAIN.
2981 E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
2982 The input is 4 vectors each containing 8 elements. We assign a number to each
2983 element, the input sequence is:
2985 1st vec: 0 1 2 3 4 5 6 7
2986 2nd vec: 8 9 10 11 12 13 14 15
2987 3rd vec: 16 17 18 19 20 21 22 23
2988 4th vec: 24 25 26 27 28 29 30 31
2990 The output sequence should be:
2992 1st vec: 0 4 8 12 16 20 24 28
2993 2nd vec: 1 5 9 13 17 21 25 29
2994 3rd vec: 2 6 10 14 18 22 26 30
2995 4th vec: 3 7 11 15 19 23 27 31
2997 i.e., the first output vector should contain the first elements of each
2998 interleaving group, etc.
3000 We use extract_even/odd instructions to create such output. The input of each
3001 extract_even/odd operation is two vectors
3005 and the output is the vector of extracted even/odd elements. The output of
3006 extract_even will be: 0 2 4 6
3007 and of extract_odd: 1 3 5 7
3010 The permutation is done in log LENGTH stages. In each stage extract_even and
3011 extract_odd stmts are created for each pair of vectors in DR_CHAIN in their
3012 order. In our example,
3014 E1: extract_even (1st vec, 2nd vec)
3015 E2: extract_odd (1st vec, 2nd vec)
3016 E3: extract_even (3rd vec, 4th vec)
3017 E4: extract_odd (3rd vec, 4th vec)
3019 The output for the first stage will be:
3021 E1: 0 2 4 6 8 10 12 14
3022 E2: 1 3 5 7 9 11 13 15
3023 E3: 16 18 20 22 24 26 28 30
3024 E4: 17 19 21 23 25 27 29 31
3026 In order to proceed and create the correct sequence for the next stage (or
3027 for the correct output, if the second stage is the last one, as in our
3028 example), we first put the output of extract_even operation and then the
3029 output of extract_odd in RESULT_CHAIN (which is then copied to DR_CHAIN).
3030 The input for the second stage is:
3032 1st vec (E1): 0 2 4 6 8 10 12 14
3033 2nd vec (E3): 16 18 20 22 24 26 28 30
3034 3rd vec (E2): 1 3 5 7 9 11 13 15
3035 4th vec (E4): 17 19 21 23 25 27 29 31
3037 The output of the second stage:
3039 E1: 0 4 8 12 16 20 24 28
3040 E2: 2 6 10 14 18 22 26 30
3041 E3: 1 5 9 13 17 21 25 29
3042 E4: 3 7 11 15 19 23 27 31
3044 And RESULT_CHAIN after reordering:
3046 1st vec (E1): 0 4 8 12 16 20 24 28
3047 2nd vec (E3): 1 5 9 13 17 21 25 29
3048 3rd vec (E2): 2 6 10 14 18 22 26 30
3049 4th vec (E4): 3 7 11 15 19 23 27 31. */
3052 vect_permute_load_chain (VEC(tree
,heap
) *dr_chain
,
3053 unsigned int length
,
3055 gimple_stmt_iterator
*gsi
,
3056 VEC(tree
,heap
) **result_chain
)
3058 tree perm_dest
, data_ref
, first_vect
, second_vect
;
3060 tree vectype
= STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt
));
3064 /* Check that the operation is supported. */
3065 if (!vect_strided_load_supported (vectype
))
3068 *result_chain
= VEC_copy (tree
, heap
, dr_chain
);
3069 for (i
= 0; i
< exact_log2 (length
); i
++)
3071 for (j
= 0; j
< length
; j
+=2)
3073 first_vect
= VEC_index (tree
, dr_chain
, j
);
3074 second_vect
= VEC_index (tree
, dr_chain
, j
+1);
3076 /* data_ref = permute_even (first_data_ref, second_data_ref); */
3077 perm_dest
= create_tmp_var (vectype
, "vect_perm_even");
3078 DECL_GIMPLE_REG_P (perm_dest
) = 1;
3079 add_referenced_var (perm_dest
);
3081 perm_stmt
= gimple_build_assign_with_ops (VEC_EXTRACT_EVEN_EXPR
,
3082 perm_dest
, first_vect
,
3085 data_ref
= make_ssa_name (perm_dest
, perm_stmt
);
3086 gimple_assign_set_lhs (perm_stmt
, data_ref
);
3087 vect_finish_stmt_generation (stmt
, perm_stmt
, gsi
);
3088 mark_symbols_for_renaming (perm_stmt
);
3090 VEC_replace (tree
, *result_chain
, j
/2, data_ref
);
3092 /* data_ref = permute_odd (first_data_ref, second_data_ref); */
3093 perm_dest
= create_tmp_var (vectype
, "vect_perm_odd");
3094 DECL_GIMPLE_REG_P (perm_dest
) = 1;
3095 add_referenced_var (perm_dest
);
3097 perm_stmt
= gimple_build_assign_with_ops (VEC_EXTRACT_ODD_EXPR
,
3098 perm_dest
, first_vect
,
3100 data_ref
= make_ssa_name (perm_dest
, perm_stmt
);
3101 gimple_assign_set_lhs (perm_stmt
, data_ref
);
3102 vect_finish_stmt_generation (stmt
, perm_stmt
, gsi
);
3103 mark_symbols_for_renaming (perm_stmt
);
3105 VEC_replace (tree
, *result_chain
, j
/2+length
/2, data_ref
);
3107 dr_chain
= VEC_copy (tree
, heap
, *result_chain
);
3113 /* Function vect_transform_strided_load.
3115 Given a chain of input interleaved data-refs (in DR_CHAIN), build statements
3116 to perform their permutation and ascribe the result vectorized statements to
3117 the scalar statements.
3121 vect_transform_strided_load (gimple stmt
, VEC(tree
,heap
) *dr_chain
, int size
,
3122 gimple_stmt_iterator
*gsi
)
3124 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
3125 gimple first_stmt
= DR_GROUP_FIRST_DR (stmt_info
);
3126 gimple next_stmt
, new_stmt
;
3127 VEC(tree
,heap
) *result_chain
= NULL
;
3128 unsigned int i
, gap_count
;
3131 /* DR_CHAIN contains input data-refs that are a part of the interleaving.
3132 RESULT_CHAIN is the output of vect_permute_load_chain, it contains permuted
3133 vectors, that are ready for vector computation. */
3134 result_chain
= VEC_alloc (tree
, heap
, size
);
3136 if (!vect_permute_load_chain (dr_chain
, size
, stmt
, gsi
, &result_chain
))
3139 /* Put a permuted data-ref in the VECTORIZED_STMT field.
3140 Since we scan the chain starting from it's first node, their order
3141 corresponds the order of data-refs in RESULT_CHAIN. */
3142 next_stmt
= first_stmt
;
3144 for (i
= 0; VEC_iterate (tree
, result_chain
, i
, tmp_data_ref
); i
++)
3149 /* Skip the gaps. Loads created for the gaps will be removed by dead
3150 code elimination pass later. No need to check for the first stmt in
3151 the group, since it always exists.
3152 DR_GROUP_GAP is the number of steps in elements from the previous
3153 access (if there is no gap DR_GROUP_GAP is 1). We skip loads that
3154 correspond to the gaps.
3156 if (next_stmt
!= first_stmt
3157 && gap_count
< DR_GROUP_GAP (vinfo_for_stmt (next_stmt
)))
3165 new_stmt
= SSA_NAME_DEF_STMT (tmp_data_ref
);
3166 /* We assume that if VEC_STMT is not NULL, this is a case of multiple
3167 copies, and we put the new vector statement in the first available
3169 if (!STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt
)))
3170 STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt
)) = new_stmt
;
3173 if (!DR_GROUP_SAME_DR_STMT (vinfo_for_stmt (next_stmt
)))
3176 STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt
));
3178 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (prev_stmt
));
3181 prev_stmt
= rel_stmt
;
3183 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (rel_stmt
));
3186 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (prev_stmt
)) =
3191 next_stmt
= DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt
));
3193 /* If NEXT_STMT accesses the same DR as the previous statement,
3194 put the same TMP_DATA_REF as its vectorized statement; otherwise
3195 get the next data-ref from RESULT_CHAIN. */
3196 if (!next_stmt
|| !DR_GROUP_SAME_DR_STMT (vinfo_for_stmt (next_stmt
)))
3201 VEC_free (tree
, heap
, result_chain
);
3205 /* Function vect_force_dr_alignment_p.
3207 Returns whether the alignment of a DECL can be forced to be aligned
3208 on ALIGNMENT bit boundary. */
3211 vect_can_force_dr_alignment_p (const_tree decl
, unsigned int alignment
)
3213 if (TREE_CODE (decl
) != VAR_DECL
)
3216 if (DECL_EXTERNAL (decl
))
3219 if (TREE_ASM_WRITTEN (decl
))
3222 if (TREE_STATIC (decl
))
3223 return (alignment
<= MAX_OFILE_ALIGNMENT
);
3225 return (alignment
<= MAX_STACK_ALIGNMENT
);
3228 /* Function vect_supportable_dr_alignment
3230 Return whether the data reference DR is supported with respect to its
3233 enum dr_alignment_support
3234 vect_supportable_dr_alignment (struct data_reference
*dr
)
3236 gimple stmt
= DR_STMT (dr
);
3237 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
3238 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3239 enum machine_mode mode
= TYPE_MODE (vectype
);
3240 struct loop
*vect_loop
= LOOP_VINFO_LOOP (STMT_VINFO_LOOP_VINFO (stmt_info
));
3241 bool nested_in_vect_loop
= nested_in_vect_loop_p (vect_loop
, stmt
);
3242 bool invariant_in_outerloop
= false;
3244 if (aligned_access_p (dr
))
3247 if (nested_in_vect_loop
)
3249 tree outerloop_step
= STMT_VINFO_DR_STEP (stmt_info
);
3250 invariant_in_outerloop
=
3251 (tree_int_cst_compare (outerloop_step
, size_zero_node
) == 0);
3254 /* Possibly unaligned access. */
3256 /* We can choose between using the implicit realignment scheme (generating
3257 a misaligned_move stmt) and the explicit realignment scheme (generating
3258 aligned loads with a REALIGN_LOAD). There are two variants to the explicit
3259 realignment scheme: optimized, and unoptimized.
3260 We can optimize the realignment only if the step between consecutive
3261 vector loads is equal to the vector size. Since the vector memory
3262 accesses advance in steps of VS (Vector Size) in the vectorized loop, it
3263 is guaranteed that the misalignment amount remains the same throughout the
3264 execution of the vectorized loop. Therefore, we can create the
3265 "realignment token" (the permutation mask that is passed to REALIGN_LOAD)
3266 at the loop preheader.
3268 However, in the case of outer-loop vectorization, when vectorizing a
3269 memory access in the inner-loop nested within the LOOP that is now being
3270 vectorized, while it is guaranteed that the misalignment of the
3271 vectorized memory access will remain the same in different outer-loop
3272 iterations, it is *not* guaranteed that is will remain the same throughout
3273 the execution of the inner-loop. This is because the inner-loop advances
3274 with the original scalar step (and not in steps of VS). If the inner-loop
3275 step happens to be a multiple of VS, then the misalignment remains fixed
3276 and we can use the optimized realignment scheme. For example:
3282 When vectorizing the i-loop in the above example, the step between
3283 consecutive vector loads is 1, and so the misalignment does not remain
3284 fixed across the execution of the inner-loop, and the realignment cannot
3285 be optimized (as illustrated in the following pseudo vectorized loop):
3287 for (i=0; i<N; i+=4)
3288 for (j=0; j<M; j++){
3289 vs += vp[i+j]; // misalignment of &vp[i+j] is {0,1,2,3,0,1,2,3,...}
3290 // when j is {0,1,2,3,4,5,6,7,...} respectively.
3291 // (assuming that we start from an aligned address).
3294 We therefore have to use the unoptimized realignment scheme:
3296 for (i=0; i<N; i+=4)
3297 for (j=k; j<M; j+=4)
3298 vs += vp[i+j]; // misalignment of &vp[i+j] is always k (assuming
3299 // that the misalignment of the initial address is
3302 The loop can then be vectorized as follows:
3304 for (k=0; k<4; k++){
3305 rt = get_realignment_token (&vp[k]);
3306 for (i=0; i<N; i+=4){
3308 for (j=k; j<M; j+=4){
3310 va = REALIGN_LOAD <v1,v2,rt>;
3317 if (DR_IS_READ (dr
))
3319 if (optab_handler (vec_realign_load_optab
, mode
)->insn_code
!=
3321 && (!targetm
.vectorize
.builtin_mask_for_load
3322 || targetm
.vectorize
.builtin_mask_for_load ()))
3324 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3325 if (nested_in_vect_loop
3326 && (TREE_INT_CST_LOW (DR_STEP (dr
))
3327 != GET_MODE_SIZE (TYPE_MODE (vectype
))))
3328 return dr_explicit_realign
;
3330 return dr_explicit_realign_optimized
;
3333 if (optab_handler (movmisalign_optab
, mode
)->insn_code
!=
3335 /* Can't software pipeline the loads, but can at least do them. */
3336 return dr_unaligned_supported
;
3340 return dr_unaligned_unsupported
;