1 /* Data References Analysis and Manipulation Utilities for Vectorization.
2 Copyright (C) 2003-2013 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
31 #include "basic-block.h"
32 #include "gimple-pretty-print.h"
33 #include "tree-flow.h"
36 #include "tree-chrec.h"
37 #include "tree-scalar-evolution.h"
38 #include "tree-vectorizer.h"
39 #include "diagnostic-core.h"
41 /* Need to include rtl.h, expr.h, etc. for optabs. */
45 /* Return true if load- or store-lanes optab OPTAB is implemented for
46 COUNT vectors of type VECTYPE. NAME is the name of OPTAB. */
49 vect_lanes_optab_supported_p (const char *name
, convert_optab optab
,
50 tree vectype
, unsigned HOST_WIDE_INT count
)
52 enum machine_mode mode
, array_mode
;
55 mode
= TYPE_MODE (vectype
);
56 limit_p
= !targetm
.array_mode_supported_p (mode
, count
);
57 array_mode
= mode_for_size (count
* GET_MODE_BITSIZE (mode
),
60 if (array_mode
== BLKmode
)
62 if (dump_enabled_p ())
63 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
64 "no array mode for %s[" HOST_WIDE_INT_PRINT_DEC
"]",
65 GET_MODE_NAME (mode
), count
);
69 if (convert_optab_handler (optab
, array_mode
, mode
) == CODE_FOR_nothing
)
71 if (dump_enabled_p ())
72 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
73 "cannot use %s<%s><%s>", name
,
74 GET_MODE_NAME (array_mode
), GET_MODE_NAME (mode
));
78 if (dump_enabled_p ())
79 dump_printf_loc (MSG_NOTE
, vect_location
,
80 "can use %s<%s><%s>", name
, GET_MODE_NAME (array_mode
),
81 GET_MODE_NAME (mode
));
87 /* Return the smallest scalar part of STMT.
88 This is used to determine the vectype of the stmt. We generally set the
89 vectype according to the type of the result (lhs). For stmts whose
90 result-type is different than the type of the arguments (e.g., demotion,
91 promotion), vectype will be reset appropriately (later). Note that we have
92 to visit the smallest datatype in this function, because that determines the
93 VF. If the smallest datatype in the loop is present only as the rhs of a
94 promotion operation - we'd miss it.
95 Such a case, where a variable of this datatype does not appear in the lhs
96 anywhere in the loop, can only occur if it's an invariant: e.g.:
97 'int_x = (int) short_inv', which we'd expect to have been optimized away by
98 invariant motion. However, we cannot rely on invariant motion to always
99 take invariants out of the loop, and so in the case of promotion we also
100 have to check the rhs.
101 LHS_SIZE_UNIT and RHS_SIZE_UNIT contain the sizes of the corresponding
105 vect_get_smallest_scalar_type (gimple stmt
, HOST_WIDE_INT
*lhs_size_unit
,
106 HOST_WIDE_INT
*rhs_size_unit
)
108 tree scalar_type
= gimple_expr_type (stmt
);
109 HOST_WIDE_INT lhs
, rhs
;
111 lhs
= rhs
= TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type
));
113 if (is_gimple_assign (stmt
)
114 && (gimple_assign_cast_p (stmt
)
115 || gimple_assign_rhs_code (stmt
) == WIDEN_MULT_EXPR
116 || gimple_assign_rhs_code (stmt
) == WIDEN_LSHIFT_EXPR
117 || gimple_assign_rhs_code (stmt
) == FLOAT_EXPR
))
119 tree rhs_type
= TREE_TYPE (gimple_assign_rhs1 (stmt
));
121 rhs
= TREE_INT_CST_LOW (TYPE_SIZE_UNIT (rhs_type
));
123 scalar_type
= rhs_type
;
126 *lhs_size_unit
= lhs
;
127 *rhs_size_unit
= rhs
;
132 /* Find the place of the data-ref in STMT in the interleaving chain that starts
133 from FIRST_STMT. Return -1 if the data-ref is not a part of the chain. */
136 vect_get_place_in_interleaving_chain (gimple stmt
, gimple first_stmt
)
138 gimple next_stmt
= first_stmt
;
141 if (first_stmt
!= GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)))
144 while (next_stmt
&& next_stmt
!= stmt
)
147 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
157 /* Check if data references pointed by DR_I and DR_J are same or
158 belong to same interleaving group. Return FALSE if drs are
159 different, otherwise return TRUE. */
162 vect_same_range_drs (data_reference_p dr_i
, data_reference_p dr_j
)
164 gimple stmt_i
= DR_STMT (dr_i
);
165 gimple stmt_j
= DR_STMT (dr_j
);
167 if (operand_equal_p (DR_REF (dr_i
), DR_REF (dr_j
), 0)
168 || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt_i
))
169 && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt_j
))
170 && (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt_i
))
171 == GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt_j
)))))
177 /* If address ranges represented by DDR_I and DDR_J are equal,
178 return TRUE, otherwise return FALSE. */
181 vect_vfa_range_equal (ddr_p ddr_i
, ddr_p ddr_j
)
183 if ((vect_same_range_drs (DDR_A (ddr_i
), DDR_A (ddr_j
))
184 && vect_same_range_drs (DDR_B (ddr_i
), DDR_B (ddr_j
)))
185 || (vect_same_range_drs (DDR_A (ddr_i
), DDR_B (ddr_j
))
186 && vect_same_range_drs (DDR_B (ddr_i
), DDR_A (ddr_j
))))
192 /* Insert DDR into LOOP_VINFO list of ddrs that may alias and need to be
193 tested at run-time. Return TRUE if DDR was successfully inserted.
194 Return false if versioning is not supported. */
197 vect_mark_for_runtime_alias_test (ddr_p ddr
, loop_vec_info loop_vinfo
)
199 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
201 if ((unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS
) == 0)
204 if (dump_enabled_p ())
206 dump_printf_loc (MSG_NOTE
, vect_location
,
207 "mark for run-time aliasing test between ");
208 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, DR_REF (DDR_A (ddr
)));
209 dump_printf (MSG_NOTE
, " and ");
210 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, DR_REF (DDR_B (ddr
)));
213 if (optimize_loop_nest_for_size_p (loop
))
215 if (dump_enabled_p ())
216 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
217 "versioning not supported when optimizing for size.");
221 /* FORNOW: We don't support versioning with outer-loop vectorization. */
224 if (dump_enabled_p ())
225 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
226 "versioning not yet supported for outer-loops.");
230 /* FORNOW: We don't support creating runtime alias tests for non-constant
232 if (TREE_CODE (DR_STEP (DDR_A (ddr
))) != INTEGER_CST
233 || TREE_CODE (DR_STEP (DDR_B (ddr
))) != INTEGER_CST
)
235 if (dump_enabled_p ())
236 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
237 "versioning not yet supported for non-constant "
242 LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo
).safe_push (ddr
);
247 /* Function vect_analyze_data_ref_dependence.
249 Return TRUE if there (might) exist a dependence between a memory-reference
250 DRA and a memory-reference DRB. When versioning for alias may check a
251 dependence at run-time, return FALSE. Adjust *MAX_VF according to
252 the data dependence. */
255 vect_analyze_data_ref_dependence (struct data_dependence_relation
*ddr
,
256 loop_vec_info loop_vinfo
, int *max_vf
)
259 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
260 struct data_reference
*dra
= DDR_A (ddr
);
261 struct data_reference
*drb
= DDR_B (ddr
);
262 stmt_vec_info stmtinfo_a
= vinfo_for_stmt (DR_STMT (dra
));
263 stmt_vec_info stmtinfo_b
= vinfo_for_stmt (DR_STMT (drb
));
264 lambda_vector dist_v
;
265 unsigned int loop_depth
;
267 /* In loop analysis all data references should be vectorizable. */
268 if (!STMT_VINFO_VECTORIZABLE (stmtinfo_a
)
269 || !STMT_VINFO_VECTORIZABLE (stmtinfo_b
))
272 /* Independent data accesses. */
273 if (DDR_ARE_DEPENDENT (ddr
) == chrec_known
)
277 || (DR_IS_READ (dra
) && DR_IS_READ (drb
)))
280 /* Unknown data dependence. */
281 if (DDR_ARE_DEPENDENT (ddr
) == chrec_dont_know
)
283 if (dump_enabled_p ())
285 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
286 "versioning for alias required: "
287 "can't determine dependence between ");
288 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
290 dump_printf (MSG_MISSED_OPTIMIZATION
, " and ");
291 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
295 /* Add to list of ddrs that need to be tested at run-time. */
296 return !vect_mark_for_runtime_alias_test (ddr
, loop_vinfo
);
299 /* Known data dependence. */
300 if (DDR_NUM_DIST_VECTS (ddr
) == 0)
302 if (dump_enabled_p ())
304 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
305 "versioning for alias required: "
306 "bad dist vector for ");
307 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, DR_REF (dra
));
308 dump_printf (MSG_MISSED_OPTIMIZATION
, " and ");
309 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, DR_REF (drb
));
311 /* Add to list of ddrs that need to be tested at run-time. */
312 return !vect_mark_for_runtime_alias_test (ddr
, loop_vinfo
);
315 loop_depth
= index_in_loop_nest (loop
->num
, DDR_LOOP_NEST (ddr
));
316 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr
), i
, dist_v
)
318 int dist
= dist_v
[loop_depth
];
320 if (dump_enabled_p ())
321 dump_printf_loc (MSG_NOTE
, vect_location
,
322 "dependence distance = %d.", dist
);
326 if (dump_enabled_p ())
328 dump_printf_loc (MSG_NOTE
, vect_location
,
329 "dependence distance == 0 between ");
330 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, DR_REF (dra
));
331 dump_printf (MSG_NOTE
, " and ");
332 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, DR_REF (drb
));
335 /* For interleaving, mark that there is a read-write dependency if
336 necessary. We check before that one of the data-refs is store. */
337 if (DR_IS_READ (dra
))
338 GROUP_READ_WRITE_DEPENDENCE (stmtinfo_a
) = true;
341 if (DR_IS_READ (drb
))
342 GROUP_READ_WRITE_DEPENDENCE (stmtinfo_b
) = true;
348 if (dist
> 0 && DDR_REVERSED_P (ddr
))
350 /* If DDR_REVERSED_P the order of the data-refs in DDR was
351 reversed (to make distance vector positive), and the actual
352 distance is negative. */
353 if (dump_enabled_p ())
354 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
355 "dependence distance negative.");
360 && abs (dist
) < *max_vf
)
362 /* The dependence distance requires reduction of the maximal
363 vectorization factor. */
364 *max_vf
= abs (dist
);
365 if (dump_enabled_p ())
366 dump_printf_loc (MSG_NOTE
, vect_location
,
367 "adjusting maximal vectorization factor to %i",
371 if (abs (dist
) >= *max_vf
)
373 /* Dependence distance does not create dependence, as far as
374 vectorization is concerned, in this case. */
375 if (dump_enabled_p ())
376 dump_printf_loc (MSG_NOTE
, vect_location
,
377 "dependence distance >= VF.");
381 if (dump_enabled_p ())
383 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
384 "not vectorized, possible dependence "
385 "between data-refs ");
386 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, DR_REF (dra
));
387 dump_printf (MSG_NOTE
, " and ");
388 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, DR_REF (drb
));
397 /* Function vect_analyze_data_ref_dependences.
399 Examine all the data references in the loop, and make sure there do not
400 exist any data dependences between them. Set *MAX_VF according to
401 the maximum vectorization factor the data dependences allow. */
404 vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo
, int *max_vf
)
407 struct data_dependence_relation
*ddr
;
409 if (dump_enabled_p ())
410 dump_printf_loc (MSG_NOTE
, vect_location
,
411 "=== vect_analyze_data_ref_dependences ===");
413 if (!compute_all_dependences (LOOP_VINFO_DATAREFS (loop_vinfo
),
414 &LOOP_VINFO_DDRS (loop_vinfo
),
415 LOOP_VINFO_LOOP_NEST (loop_vinfo
), true))
418 FOR_EACH_VEC_ELT (LOOP_VINFO_DDRS (loop_vinfo
), i
, ddr
)
419 if (vect_analyze_data_ref_dependence (ddr
, loop_vinfo
, max_vf
))
426 /* Function vect_slp_analyze_data_ref_dependence.
428 Return TRUE if there (might) exist a dependence between a memory-reference
429 DRA and a memory-reference DRB. When versioning for alias may check a
430 dependence at run-time, return FALSE. Adjust *MAX_VF according to
431 the data dependence. */
434 vect_slp_analyze_data_ref_dependence (struct data_dependence_relation
*ddr
)
436 struct data_reference
*dra
= DDR_A (ddr
);
437 struct data_reference
*drb
= DDR_B (ddr
);
439 /* We need to check dependences of statements marked as unvectorizable
440 as well, they still can prohibit vectorization. */
442 /* Independent data accesses. */
443 if (DDR_ARE_DEPENDENT (ddr
) == chrec_known
)
449 /* Read-read is OK. */
450 if (DR_IS_READ (dra
) && DR_IS_READ (drb
))
453 /* Unknown data dependence. */
454 if (DDR_ARE_DEPENDENT (ddr
) == chrec_dont_know
)
458 if (dump_enabled_p ())
460 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
461 "can't determine dependence between ");
462 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, DR_REF (dra
));
463 dump_printf (MSG_MISSED_OPTIMIZATION
, " and ");
464 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, DR_REF (drb
));
467 /* We do not vectorize basic blocks with write-write dependencies. */
468 if (DR_IS_WRITE (dra
) && DR_IS_WRITE (drb
))
471 /* Check that it's not a load-after-store dependence. */
472 earlier_stmt
= get_earlier_stmt (DR_STMT (dra
), DR_STMT (drb
));
473 if (DR_IS_WRITE (STMT_VINFO_DATA_REF (vinfo_for_stmt (earlier_stmt
))))
479 if (dump_enabled_p ())
481 dump_printf_loc (MSG_NOTE
, vect_location
,
482 "determined dependence between ");
483 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, DR_REF (dra
));
484 dump_printf (MSG_NOTE
, " and ");
485 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, DR_REF (drb
));
488 /* Do not vectorize basic blocks with write-write dependences. */
489 if (DR_IS_WRITE (dra
) && DR_IS_WRITE (drb
))
492 /* Check dependence between DRA and DRB for basic block vectorization.
493 If the accesses share same bases and offsets, we can compare their initial
494 constant offsets to decide whether they differ or not. In case of a read-
495 write dependence we check that the load is before the store to ensure that
496 vectorization will not change the order of the accesses. */
498 HOST_WIDE_INT type_size_a
, type_size_b
, init_a
, init_b
;
501 /* Check that the data-refs have same bases and offsets. If not, we can't
502 determine if they are dependent. */
503 if (!operand_equal_p (DR_BASE_ADDRESS (dra
), DR_BASE_ADDRESS (drb
), 0)
504 || !dr_equal_offsets_p (dra
, drb
))
507 /* Check the types. */
508 type_size_a
= TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra
))));
509 type_size_b
= TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb
))));
511 if (type_size_a
!= type_size_b
512 || !types_compatible_p (TREE_TYPE (DR_REF (dra
)),
513 TREE_TYPE (DR_REF (drb
))))
516 init_a
= TREE_INT_CST_LOW (DR_INIT (dra
));
517 init_b
= TREE_INT_CST_LOW (DR_INIT (drb
));
519 /* Two different locations - no dependence. */
520 if (init_a
!= init_b
)
523 /* We have a read-write dependence. Check that the load is before the store.
524 When we vectorize basic blocks, vector load can be only before
525 corresponding scalar load, and vector store can be only after its
526 corresponding scalar store. So the order of the acceses is preserved in
527 case the load is before the store. */
528 earlier_stmt
= get_earlier_stmt (DR_STMT (dra
), DR_STMT (drb
));
529 if (DR_IS_READ (STMT_VINFO_DATA_REF (vinfo_for_stmt (earlier_stmt
))))
536 /* Function vect_analyze_data_ref_dependences.
538 Examine all the data references in the basic-block, and make sure there
539 do not exist any data dependences between them. Set *MAX_VF according to
540 the maximum vectorization factor the data dependences allow. */
543 vect_slp_analyze_data_ref_dependences (bb_vec_info bb_vinfo
)
545 struct data_dependence_relation
*ddr
;
548 if (dump_enabled_p ())
549 dump_printf_loc (MSG_NOTE
, vect_location
,
550 "=== vect_slp_analyze_data_ref_dependences ===");
552 if (!compute_all_dependences (BB_VINFO_DATAREFS (bb_vinfo
),
553 &BB_VINFO_DDRS (bb_vinfo
),
557 FOR_EACH_VEC_ELT (BB_VINFO_DDRS (bb_vinfo
), i
, ddr
)
558 if (vect_slp_analyze_data_ref_dependence (ddr
))
565 /* Function vect_compute_data_ref_alignment
567 Compute the misalignment of the data reference DR.
570 1. If during the misalignment computation it is found that the data reference
571 cannot be vectorized then false is returned.
572 2. DR_MISALIGNMENT (DR) is defined.
574 FOR NOW: No analysis is actually performed. Misalignment is calculated
575 only for trivial cases. TODO. */
578 vect_compute_data_ref_alignment (struct data_reference
*dr
)
580 gimple stmt
= DR_STMT (dr
);
581 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
582 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
583 struct loop
*loop
= NULL
;
584 tree ref
= DR_REF (dr
);
586 tree base
, base_addr
;
589 tree aligned_to
, alignment
;
591 if (dump_enabled_p ())
592 dump_printf_loc (MSG_NOTE
, vect_location
,
593 "vect_compute_data_ref_alignment:");
596 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
598 /* Initialize misalignment to unknown. */
599 SET_DR_MISALIGNMENT (dr
, -1);
601 /* Strided loads perform only component accesses, misalignment information
602 is irrelevant for them. */
603 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info
))
606 misalign
= DR_INIT (dr
);
607 aligned_to
= DR_ALIGNED_TO (dr
);
608 base_addr
= DR_BASE_ADDRESS (dr
);
609 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
611 /* In case the dataref is in an inner-loop of the loop that is being
612 vectorized (LOOP), we use the base and misalignment information
613 relative to the outer-loop (LOOP). This is ok only if the misalignment
614 stays the same throughout the execution of the inner-loop, which is why
615 we have to check that the stride of the dataref in the inner-loop evenly
616 divides by the vector size. */
617 if (loop
&& nested_in_vect_loop_p (loop
, stmt
))
619 tree step
= DR_STEP (dr
);
620 HOST_WIDE_INT dr_step
= TREE_INT_CST_LOW (step
);
622 if (dr_step
% GET_MODE_SIZE (TYPE_MODE (vectype
)) == 0)
624 if (dump_enabled_p ())
625 dump_printf_loc (MSG_NOTE
, vect_location
,
626 "inner step divides the vector-size.");
627 misalign
= STMT_VINFO_DR_INIT (stmt_info
);
628 aligned_to
= STMT_VINFO_DR_ALIGNED_TO (stmt_info
);
629 base_addr
= STMT_VINFO_DR_BASE_ADDRESS (stmt_info
);
633 if (dump_enabled_p ())
634 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
635 "inner step doesn't divide the vector-size.");
636 misalign
= NULL_TREE
;
640 /* Similarly, if we're doing basic-block vectorization, we can only use
641 base and misalignment information relative to an innermost loop if the
642 misalignment stays the same throughout the execution of the loop.
643 As above, this is the case if the stride of the dataref evenly divides
644 by the vector size. */
647 tree step
= DR_STEP (dr
);
648 HOST_WIDE_INT dr_step
= TREE_INT_CST_LOW (step
);
650 if (dr_step
% GET_MODE_SIZE (TYPE_MODE (vectype
)) != 0)
652 if (dump_enabled_p ())
653 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
654 "SLP: step doesn't divide the vector-size.");
655 misalign
= NULL_TREE
;
659 base
= build_fold_indirect_ref (base_addr
);
660 alignment
= ssize_int (TYPE_ALIGN (vectype
)/BITS_PER_UNIT
);
662 if ((aligned_to
&& tree_int_cst_compare (aligned_to
, alignment
) < 0)
665 if (dump_enabled_p ())
667 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
668 "Unknown alignment for access: ");
669 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, base
);
675 && tree_int_cst_compare (ssize_int (DECL_ALIGN_UNIT (base
)),
677 || (TREE_CODE (base_addr
) == SSA_NAME
678 && tree_int_cst_compare (ssize_int (TYPE_ALIGN_UNIT (TREE_TYPE (
679 TREE_TYPE (base_addr
)))),
681 || (get_pointer_alignment (base_addr
) >= TYPE_ALIGN (vectype
)))
684 base_aligned
= false;
688 /* Do not change the alignment of global variables here if
689 flag_section_anchors is enabled as we already generated
690 RTL for other functions. Most global variables should
691 have been aligned during the IPA increase_alignment pass. */
692 if (!vect_can_force_dr_alignment_p (base
, TYPE_ALIGN (vectype
))
693 || (TREE_STATIC (base
) && flag_section_anchors
))
695 if (dump_enabled_p ())
697 dump_printf_loc (MSG_NOTE
, vect_location
,
698 "can't force alignment of ref: ");
699 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, ref
);
704 /* Force the alignment of the decl.
705 NOTE: This is the only change to the code we make during
706 the analysis phase, before deciding to vectorize the loop. */
707 if (dump_enabled_p ())
709 dump_printf_loc (MSG_NOTE
, vect_location
, "force alignment of ");
710 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, ref
);
713 DECL_ALIGN (base
) = TYPE_ALIGN (vectype
);
714 DECL_USER_ALIGN (base
) = 1;
717 /* At this point we assume that the base is aligned. */
718 gcc_assert (base_aligned
719 || (TREE_CODE (base
) == VAR_DECL
720 && DECL_ALIGN (base
) >= TYPE_ALIGN (vectype
)));
722 /* If this is a backward running DR then first access in the larger
723 vectype actually is N-1 elements before the address in the DR.
724 Adjust misalign accordingly. */
725 if (tree_int_cst_compare (DR_STEP (dr
), size_zero_node
) < 0)
727 tree offset
= ssize_int (TYPE_VECTOR_SUBPARTS (vectype
) - 1);
728 /* DR_STEP(dr) is the same as -TYPE_SIZE of the scalar type,
729 otherwise we wouldn't be here. */
730 offset
= fold_build2 (MULT_EXPR
, ssizetype
, offset
, DR_STEP (dr
));
731 /* PLUS because DR_STEP was negative. */
732 misalign
= size_binop (PLUS_EXPR
, misalign
, offset
);
735 /* Modulo alignment. */
736 misalign
= size_binop (FLOOR_MOD_EXPR
, misalign
, alignment
);
738 if (!host_integerp (misalign
, 1))
740 /* Negative or overflowed misalignment value. */
741 if (dump_enabled_p ())
742 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
743 "unexpected misalign value");
747 SET_DR_MISALIGNMENT (dr
, TREE_INT_CST_LOW (misalign
));
749 if (dump_enabled_p ())
751 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
752 "misalign = %d bytes of ref ", DR_MISALIGNMENT (dr
));
753 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, ref
);
760 /* Function vect_compute_data_refs_alignment
762 Compute the misalignment of data references in the loop.
763 Return FALSE if a data reference is found that cannot be vectorized. */
766 vect_compute_data_refs_alignment (loop_vec_info loop_vinfo
,
767 bb_vec_info bb_vinfo
)
769 vec
<data_reference_p
> datarefs
;
770 struct data_reference
*dr
;
774 datarefs
= LOOP_VINFO_DATAREFS (loop_vinfo
);
776 datarefs
= BB_VINFO_DATAREFS (bb_vinfo
);
778 FOR_EACH_VEC_ELT (datarefs
, i
, dr
)
779 if (STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr
)))
780 && !vect_compute_data_ref_alignment (dr
))
784 /* Mark unsupported statement as unvectorizable. */
785 STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr
))) = false;
796 /* Function vect_update_misalignment_for_peel
798 DR - the data reference whose misalignment is to be adjusted.
799 DR_PEEL - the data reference whose misalignment is being made
800 zero in the vector loop by the peel.
801 NPEEL - the number of iterations in the peel loop if the misalignment
802 of DR_PEEL is known at compile time. */
805 vect_update_misalignment_for_peel (struct data_reference
*dr
,
806 struct data_reference
*dr_peel
, int npeel
)
809 vec
<dr_p
> same_align_drs
;
810 struct data_reference
*current_dr
;
811 int dr_size
= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr
))));
812 int dr_peel_size
= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr_peel
))));
813 stmt_vec_info stmt_info
= vinfo_for_stmt (DR_STMT (dr
));
814 stmt_vec_info peel_stmt_info
= vinfo_for_stmt (DR_STMT (dr_peel
));
816 /* For interleaved data accesses the step in the loop must be multiplied by
817 the size of the interleaving group. */
818 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
819 dr_size
*= GROUP_SIZE (vinfo_for_stmt (GROUP_FIRST_ELEMENT (stmt_info
)));
820 if (STMT_VINFO_GROUPED_ACCESS (peel_stmt_info
))
821 dr_peel_size
*= GROUP_SIZE (peel_stmt_info
);
823 /* It can be assumed that the data refs with the same alignment as dr_peel
824 are aligned in the vector loop. */
826 = STMT_VINFO_SAME_ALIGN_REFS (vinfo_for_stmt (DR_STMT (dr_peel
)));
827 FOR_EACH_VEC_ELT (same_align_drs
, i
, current_dr
)
829 if (current_dr
!= dr
)
831 gcc_assert (DR_MISALIGNMENT (dr
) / dr_size
==
832 DR_MISALIGNMENT (dr_peel
) / dr_peel_size
);
833 SET_DR_MISALIGNMENT (dr
, 0);
837 if (known_alignment_for_access_p (dr
)
838 && known_alignment_for_access_p (dr_peel
))
840 bool negative
= tree_int_cst_compare (DR_STEP (dr
), size_zero_node
) < 0;
841 int misal
= DR_MISALIGNMENT (dr
);
842 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
843 misal
+= negative
? -npeel
* dr_size
: npeel
* dr_size
;
844 misal
&= (TYPE_ALIGN (vectype
) / BITS_PER_UNIT
) - 1;
845 SET_DR_MISALIGNMENT (dr
, misal
);
849 if (dump_enabled_p ())
850 dump_printf_loc (MSG_NOTE
, vect_location
, "Setting misalignment to -1.");
851 SET_DR_MISALIGNMENT (dr
, -1);
855 /* Function vect_verify_datarefs_alignment
857 Return TRUE if all data references in the loop can be
858 handled with respect to alignment. */
861 vect_verify_datarefs_alignment (loop_vec_info loop_vinfo
, bb_vec_info bb_vinfo
)
863 vec
<data_reference_p
> datarefs
;
864 struct data_reference
*dr
;
865 enum dr_alignment_support supportable_dr_alignment
;
869 datarefs
= LOOP_VINFO_DATAREFS (loop_vinfo
);
871 datarefs
= BB_VINFO_DATAREFS (bb_vinfo
);
873 FOR_EACH_VEC_ELT (datarefs
, i
, dr
)
875 gimple stmt
= DR_STMT (dr
);
876 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
878 if (!STMT_VINFO_RELEVANT_P (stmt_info
))
881 /* For interleaving, only the alignment of the first access matters.
882 Skip statements marked as not vectorizable. */
883 if ((STMT_VINFO_GROUPED_ACCESS (stmt_info
)
884 && GROUP_FIRST_ELEMENT (stmt_info
) != stmt
)
885 || !STMT_VINFO_VECTORIZABLE (stmt_info
))
888 /* Strided loads perform only component accesses, alignment is
889 irrelevant for them. */
890 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info
))
893 supportable_dr_alignment
= vect_supportable_dr_alignment (dr
, false);
894 if (!supportable_dr_alignment
)
896 if (dump_enabled_p ())
899 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
900 "not vectorized: unsupported unaligned load.");
902 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
903 "not vectorized: unsupported unaligned "
906 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
911 if (supportable_dr_alignment
!= dr_aligned
&& dump_enabled_p ())
912 dump_printf_loc (MSG_NOTE
, vect_location
,
913 "Vectorizing an unaligned access.");
918 /* Given an memory reference EXP return whether its alignment is less
922 not_size_aligned (tree exp
)
924 if (!host_integerp (TYPE_SIZE (TREE_TYPE (exp
)), 1))
927 return (TREE_INT_CST_LOW (TYPE_SIZE (TREE_TYPE (exp
)))
928 > get_object_alignment (exp
));
931 /* Function vector_alignment_reachable_p
933 Return true if vector alignment for DR is reachable by peeling
934 a few loop iterations. Return false otherwise. */
937 vector_alignment_reachable_p (struct data_reference
*dr
)
939 gimple stmt
= DR_STMT (dr
);
940 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
941 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
943 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
945 /* For interleaved access we peel only if number of iterations in
946 the prolog loop ({VF - misalignment}), is a multiple of the
947 number of the interleaved accesses. */
948 int elem_size
, mis_in_elements
;
949 int nelements
= TYPE_VECTOR_SUBPARTS (vectype
);
951 /* FORNOW: handle only known alignment. */
952 if (!known_alignment_for_access_p (dr
))
955 elem_size
= GET_MODE_SIZE (TYPE_MODE (vectype
)) / nelements
;
956 mis_in_elements
= DR_MISALIGNMENT (dr
) / elem_size
;
958 if ((nelements
- mis_in_elements
) % GROUP_SIZE (stmt_info
))
962 /* If misalignment is known at the compile time then allow peeling
963 only if natural alignment is reachable through peeling. */
964 if (known_alignment_for_access_p (dr
) && !aligned_access_p (dr
))
966 HOST_WIDE_INT elmsize
=
967 int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype
)));
968 if (dump_enabled_p ())
970 dump_printf_loc (MSG_NOTE
, vect_location
,
971 "data size =" HOST_WIDE_INT_PRINT_DEC
, elmsize
);
972 dump_printf (MSG_NOTE
,
973 ". misalignment = %d. ", DR_MISALIGNMENT (dr
));
975 if (DR_MISALIGNMENT (dr
) % elmsize
)
977 if (dump_enabled_p ())
978 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
979 "data size does not divide the misalignment.\n");
984 if (!known_alignment_for_access_p (dr
))
986 tree type
= TREE_TYPE (DR_REF (dr
));
987 bool is_packed
= not_size_aligned (DR_REF (dr
));
988 if (dump_enabled_p ())
989 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
990 "Unknown misalignment, is_packed = %d",is_packed
);
991 if (targetm
.vectorize
.vector_alignment_reachable (type
, is_packed
))
1001 /* Calculate the cost of the memory access represented by DR. */
1004 vect_get_data_access_cost (struct data_reference
*dr
,
1005 unsigned int *inside_cost
,
1006 unsigned int *outside_cost
,
1007 stmt_vector_for_cost
*body_cost_vec
)
1009 gimple stmt
= DR_STMT (dr
);
1010 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1011 int nunits
= TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info
));
1012 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
1013 int vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
1014 int ncopies
= vf
/ nunits
;
1016 if (DR_IS_READ (dr
))
1017 vect_get_load_cost (dr
, ncopies
, true, inside_cost
, outside_cost
,
1018 NULL
, body_cost_vec
, false);
1020 vect_get_store_cost (dr
, ncopies
, inside_cost
, body_cost_vec
);
1022 if (dump_enabled_p ())
1023 dump_printf_loc (MSG_NOTE
, vect_location
,
1024 "vect_get_data_access_cost: inside_cost = %d, "
1025 "outside_cost = %d.", *inside_cost
, *outside_cost
);
1030 vect_peeling_hash (const void *elem
)
1032 const struct _vect_peel_info
*peel_info
;
1034 peel_info
= (const struct _vect_peel_info
*) elem
;
1035 return (hashval_t
) peel_info
->npeel
;
1040 vect_peeling_hash_eq (const void *elem1
, const void *elem2
)
1042 const struct _vect_peel_info
*a
, *b
;
1044 a
= (const struct _vect_peel_info
*) elem1
;
1045 b
= (const struct _vect_peel_info
*) elem2
;
1046 return (a
->npeel
== b
->npeel
);
1050 /* Insert DR into peeling hash table with NPEEL as key. */
1053 vect_peeling_hash_insert (loop_vec_info loop_vinfo
, struct data_reference
*dr
,
1056 struct _vect_peel_info elem
, *slot
;
1058 bool supportable_dr_alignment
= vect_supportable_dr_alignment (dr
, true);
1061 slot
= (vect_peel_info
) htab_find (LOOP_VINFO_PEELING_HTAB (loop_vinfo
),
1067 slot
= XNEW (struct _vect_peel_info
);
1068 slot
->npeel
= npeel
;
1071 new_slot
= htab_find_slot (LOOP_VINFO_PEELING_HTAB (loop_vinfo
), slot
,
1076 if (!supportable_dr_alignment
&& !flag_vect_cost_model
)
1077 slot
->count
+= VECT_MAX_COST
;
1081 /* Traverse peeling hash table to find peeling option that aligns maximum
1082 number of data accesses. */
1085 vect_peeling_hash_get_most_frequent (void **slot
, void *data
)
1087 vect_peel_info elem
= (vect_peel_info
) *slot
;
1088 vect_peel_extended_info max
= (vect_peel_extended_info
) data
;
1090 if (elem
->count
> max
->peel_info
.count
1091 || (elem
->count
== max
->peel_info
.count
1092 && max
->peel_info
.npeel
> elem
->npeel
))
1094 max
->peel_info
.npeel
= elem
->npeel
;
1095 max
->peel_info
.count
= elem
->count
;
1096 max
->peel_info
.dr
= elem
->dr
;
1103 /* Traverse peeling hash table and calculate cost for each peeling option.
1104 Find the one with the lowest cost. */
1107 vect_peeling_hash_get_lowest_cost (void **slot
, void *data
)
1109 vect_peel_info elem
= (vect_peel_info
) *slot
;
1110 vect_peel_extended_info min
= (vect_peel_extended_info
) data
;
1111 int save_misalignment
, dummy
;
1112 unsigned int inside_cost
= 0, outside_cost
= 0, i
;
1113 gimple stmt
= DR_STMT (elem
->dr
);
1114 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1115 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
1116 vec
<data_reference_p
> datarefs
= LOOP_VINFO_DATAREFS (loop_vinfo
);
1117 struct data_reference
*dr
;
1118 stmt_vector_for_cost prologue_cost_vec
, body_cost_vec
, epilogue_cost_vec
;
1119 int single_iter_cost
;
1121 prologue_cost_vec
.create (2);
1122 body_cost_vec
.create (2);
1123 epilogue_cost_vec
.create (2);
1125 FOR_EACH_VEC_ELT (datarefs
, i
, dr
)
1127 stmt
= DR_STMT (dr
);
1128 stmt_info
= vinfo_for_stmt (stmt
);
1129 /* For interleaving, only the alignment of the first access
1131 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
)
1132 && GROUP_FIRST_ELEMENT (stmt_info
) != stmt
)
1135 save_misalignment
= DR_MISALIGNMENT (dr
);
1136 vect_update_misalignment_for_peel (dr
, elem
->dr
, elem
->npeel
);
1137 vect_get_data_access_cost (dr
, &inside_cost
, &outside_cost
,
1139 SET_DR_MISALIGNMENT (dr
, save_misalignment
);
1142 single_iter_cost
= vect_get_single_scalar_iteration_cost (loop_vinfo
);
1143 outside_cost
+= vect_get_known_peeling_cost (loop_vinfo
, elem
->npeel
,
1144 &dummy
, single_iter_cost
,
1146 &epilogue_cost_vec
);
1148 /* Prologue and epilogue costs are added to the target model later.
1149 These costs depend only on the scalar iteration cost, the
1150 number of peeling iterations finally chosen, and the number of
1151 misaligned statements. So discard the information found here. */
1152 prologue_cost_vec
.release ();
1153 epilogue_cost_vec
.release ();
1155 if (inside_cost
< min
->inside_cost
1156 || (inside_cost
== min
->inside_cost
&& outside_cost
< min
->outside_cost
))
1158 min
->inside_cost
= inside_cost
;
1159 min
->outside_cost
= outside_cost
;
1160 min
->body_cost_vec
.release ();
1161 min
->body_cost_vec
= body_cost_vec
;
1162 min
->peel_info
.dr
= elem
->dr
;
1163 min
->peel_info
.npeel
= elem
->npeel
;
1166 body_cost_vec
.release ();
1172 /* Choose best peeling option by traversing peeling hash table and either
1173 choosing an option with the lowest cost (if cost model is enabled) or the
1174 option that aligns as many accesses as possible. */
1176 static struct data_reference
*
1177 vect_peeling_hash_choose_best_peeling (loop_vec_info loop_vinfo
,
1178 unsigned int *npeel
,
1179 stmt_vector_for_cost
*body_cost_vec
)
1181 struct _vect_peel_extended_info res
;
1183 res
.peel_info
.dr
= NULL
;
1184 res
.body_cost_vec
= stmt_vector_for_cost();
1186 if (flag_vect_cost_model
)
1188 res
.inside_cost
= INT_MAX
;
1189 res
.outside_cost
= INT_MAX
;
1190 htab_traverse (LOOP_VINFO_PEELING_HTAB (loop_vinfo
),
1191 vect_peeling_hash_get_lowest_cost
, &res
);
1195 res
.peel_info
.count
= 0;
1196 htab_traverse (LOOP_VINFO_PEELING_HTAB (loop_vinfo
),
1197 vect_peeling_hash_get_most_frequent
, &res
);
1200 *npeel
= res
.peel_info
.npeel
;
1201 *body_cost_vec
= res
.body_cost_vec
;
1202 return res
.peel_info
.dr
;
1206 /* Function vect_enhance_data_refs_alignment
1208 This pass will use loop versioning and loop peeling in order to enhance
1209 the alignment of data references in the loop.
1211 FOR NOW: we assume that whatever versioning/peeling takes place, only the
1212 original loop is to be vectorized. Any other loops that are created by
1213 the transformations performed in this pass - are not supposed to be
1214 vectorized. This restriction will be relaxed.
1216 This pass will require a cost model to guide it whether to apply peeling
1217 or versioning or a combination of the two. For example, the scheme that
1218 intel uses when given a loop with several memory accesses, is as follows:
1219 choose one memory access ('p') which alignment you want to force by doing
1220 peeling. Then, either (1) generate a loop in which 'p' is aligned and all
1221 other accesses are not necessarily aligned, or (2) use loop versioning to
1222 generate one loop in which all accesses are aligned, and another loop in
1223 which only 'p' is necessarily aligned.
1225 ("Automatic Intra-Register Vectorization for the Intel Architecture",
1226 Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
1227 Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
1229 Devising a cost model is the most critical aspect of this work. It will
1230 guide us on which access to peel for, whether to use loop versioning, how
1231 many versions to create, etc. The cost model will probably consist of
1232 generic considerations as well as target specific considerations (on
1233 powerpc for example, misaligned stores are more painful than misaligned
1236 Here are the general steps involved in alignment enhancements:
1238 -- original loop, before alignment analysis:
1239 for (i=0; i<N; i++){
1240 x = q[i]; # DR_MISALIGNMENT(q) = unknown
1241 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1244 -- After vect_compute_data_refs_alignment:
1245 for (i=0; i<N; i++){
1246 x = q[i]; # DR_MISALIGNMENT(q) = 3
1247 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1250 -- Possibility 1: we do loop versioning:
1252 for (i=0; i<N; i++){ # loop 1A
1253 x = q[i]; # DR_MISALIGNMENT(q) = 3
1254 p[i] = y; # DR_MISALIGNMENT(p) = 0
1258 for (i=0; i<N; i++){ # loop 1B
1259 x = q[i]; # DR_MISALIGNMENT(q) = 3
1260 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1264 -- Possibility 2: we do loop peeling:
1265 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1269 for (i = 3; i < N; i++){ # loop 2A
1270 x = q[i]; # DR_MISALIGNMENT(q) = 0
1271 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1274 -- Possibility 3: combination of loop peeling and versioning:
1275 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1280 for (i = 3; i<N; i++){ # loop 3A
1281 x = q[i]; # DR_MISALIGNMENT(q) = 0
1282 p[i] = y; # DR_MISALIGNMENT(p) = 0
1286 for (i = 3; i<N; i++){ # loop 3B
1287 x = q[i]; # DR_MISALIGNMENT(q) = 0
1288 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1292 These loops are later passed to loop_transform to be vectorized. The
1293 vectorizer will use the alignment information to guide the transformation
1294 (whether to generate regular loads/stores, or with special handling for
1298 vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo
)
1300 vec
<data_reference_p
> datarefs
= LOOP_VINFO_DATAREFS (loop_vinfo
);
1301 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1302 enum dr_alignment_support supportable_dr_alignment
;
1303 struct data_reference
*dr0
= NULL
, *first_store
= NULL
;
1304 struct data_reference
*dr
;
1306 bool do_peeling
= false;
1307 bool do_versioning
= false;
1310 stmt_vec_info stmt_info
;
1311 int vect_versioning_for_alias_required
;
1312 unsigned int npeel
= 0;
1313 bool all_misalignments_unknown
= true;
1314 unsigned int vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
1315 unsigned possible_npeel_number
= 1;
1317 unsigned int nelements
, mis
, same_align_drs_max
= 0;
1318 stmt_vector_for_cost body_cost_vec
= stmt_vector_for_cost();
1320 if (dump_enabled_p ())
1321 dump_printf_loc (MSG_NOTE
, vect_location
,
1322 "=== vect_enhance_data_refs_alignment ===");
1324 /* While cost model enhancements are expected in the future, the high level
1325 view of the code at this time is as follows:
1327 A) If there is a misaligned access then see if peeling to align
1328 this access can make all data references satisfy
1329 vect_supportable_dr_alignment. If so, update data structures
1330 as needed and return true.
1332 B) If peeling wasn't possible and there is a data reference with an
1333 unknown misalignment that does not satisfy vect_supportable_dr_alignment
1334 then see if loop versioning checks can be used to make all data
1335 references satisfy vect_supportable_dr_alignment. If so, update
1336 data structures as needed and return true.
1338 C) If neither peeling nor versioning were successful then return false if
1339 any data reference does not satisfy vect_supportable_dr_alignment.
1341 D) Return true (all data references satisfy vect_supportable_dr_alignment).
1343 Note, Possibility 3 above (which is peeling and versioning together) is not
1344 being done at this time. */
1346 /* (1) Peeling to force alignment. */
1348 /* (1.1) Decide whether to perform peeling, and how many iterations to peel:
1350 + How many accesses will become aligned due to the peeling
1351 - How many accesses will become unaligned due to the peeling,
1352 and the cost of misaligned accesses.
1353 - The cost of peeling (the extra runtime checks, the increase
1356 FOR_EACH_VEC_ELT (datarefs
, i
, dr
)
1358 stmt
= DR_STMT (dr
);
1359 stmt_info
= vinfo_for_stmt (stmt
);
1361 if (!STMT_VINFO_RELEVANT_P (stmt_info
))
1364 /* For interleaving, only the alignment of the first access
1366 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
)
1367 && GROUP_FIRST_ELEMENT (stmt_info
) != stmt
)
1370 /* For invariant accesses there is nothing to enhance. */
1371 if (integer_zerop (DR_STEP (dr
)))
1374 /* Strided loads perform only component accesses, alignment is
1375 irrelevant for them. */
1376 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info
))
1379 supportable_dr_alignment
= vect_supportable_dr_alignment (dr
, true);
1380 do_peeling
= vector_alignment_reachable_p (dr
);
1383 if (known_alignment_for_access_p (dr
))
1385 unsigned int npeel_tmp
;
1386 bool negative
= tree_int_cst_compare (DR_STEP (dr
),
1387 size_zero_node
) < 0;
1389 /* Save info about DR in the hash table. */
1390 if (!LOOP_VINFO_PEELING_HTAB (loop_vinfo
))
1391 LOOP_VINFO_PEELING_HTAB (loop_vinfo
) =
1392 htab_create (1, vect_peeling_hash
,
1393 vect_peeling_hash_eq
, free
);
1395 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1396 nelements
= TYPE_VECTOR_SUBPARTS (vectype
);
1397 mis
= DR_MISALIGNMENT (dr
) / GET_MODE_SIZE (TYPE_MODE (
1398 TREE_TYPE (DR_REF (dr
))));
1399 npeel_tmp
= (negative
1400 ? (mis
- nelements
) : (nelements
- mis
))
1403 /* For multiple types, it is possible that the bigger type access
1404 will have more than one peeling option. E.g., a loop with two
1405 types: one of size (vector size / 4), and the other one of
1406 size (vector size / 8). Vectorization factor will 8. If both
1407 access are misaligned by 3, the first one needs one scalar
1408 iteration to be aligned, and the second one needs 5. But the
1409 the first one will be aligned also by peeling 5 scalar
1410 iterations, and in that case both accesses will be aligned.
1411 Hence, except for the immediate peeling amount, we also want
1412 to try to add full vector size, while we don't exceed
1413 vectorization factor.
1414 We do this automtically for cost model, since we calculate cost
1415 for every peeling option. */
1416 if (!flag_vect_cost_model
)
1417 possible_npeel_number
= vf
/nelements
;
1419 /* Handle the aligned case. We may decide to align some other
1420 access, making DR unaligned. */
1421 if (DR_MISALIGNMENT (dr
) == 0)
1424 if (!flag_vect_cost_model
)
1425 possible_npeel_number
++;
1428 for (j
= 0; j
< possible_npeel_number
; j
++)
1430 gcc_assert (npeel_tmp
<= vf
);
1431 vect_peeling_hash_insert (loop_vinfo
, dr
, npeel_tmp
);
1432 npeel_tmp
+= nelements
;
1435 all_misalignments_unknown
= false;
1436 /* Data-ref that was chosen for the case that all the
1437 misalignments are unknown is not relevant anymore, since we
1438 have a data-ref with known alignment. */
1443 /* If we don't know all the misalignment values, we prefer
1444 peeling for data-ref that has maximum number of data-refs
1445 with the same alignment, unless the target prefers to align
1446 stores over load. */
1447 if (all_misalignments_unknown
)
1449 if (same_align_drs_max
1450 < STMT_VINFO_SAME_ALIGN_REFS (stmt_info
).length ()
1454 = STMT_VINFO_SAME_ALIGN_REFS (stmt_info
).length ();
1458 if (!first_store
&& DR_IS_WRITE (dr
))
1462 /* If there are both known and unknown misaligned accesses in the
1463 loop, we choose peeling amount according to the known
1467 if (!supportable_dr_alignment
)
1470 if (!first_store
&& DR_IS_WRITE (dr
))
1477 if (!aligned_access_p (dr
))
1479 if (dump_enabled_p ())
1480 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1481 "vector alignment may not be reachable");
1487 vect_versioning_for_alias_required
1488 = LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo
);
1490 /* Temporarily, if versioning for alias is required, we disable peeling
1491 until we support peeling and versioning. Often peeling for alignment
1492 will require peeling for loop-bound, which in turn requires that we
1493 know how to adjust the loop ivs after the loop. */
1494 if (vect_versioning_for_alias_required
1495 || !vect_can_advance_ivs_p (loop_vinfo
)
1496 || !slpeel_can_duplicate_loop_p (loop
, single_exit (loop
)))
1499 if (do_peeling
&& all_misalignments_unknown
1500 && vect_supportable_dr_alignment (dr0
, false))
1503 /* Check if the target requires to prefer stores over loads, i.e., if
1504 misaligned stores are more expensive than misaligned loads (taking
1505 drs with same alignment into account). */
1506 if (first_store
&& DR_IS_READ (dr0
))
1508 unsigned int load_inside_cost
= 0, load_outside_cost
= 0;
1509 unsigned int store_inside_cost
= 0, store_outside_cost
= 0;
1510 unsigned int load_inside_penalty
= 0, load_outside_penalty
= 0;
1511 unsigned int store_inside_penalty
= 0, store_outside_penalty
= 0;
1512 stmt_vector_for_cost dummy
;
1515 vect_get_data_access_cost (dr0
, &load_inside_cost
, &load_outside_cost
,
1517 vect_get_data_access_cost (first_store
, &store_inside_cost
,
1518 &store_outside_cost
, &dummy
);
1522 /* Calculate the penalty for leaving FIRST_STORE unaligned (by
1523 aligning the load DR0). */
1524 load_inside_penalty
= store_inside_cost
;
1525 load_outside_penalty
= store_outside_cost
;
1527 STMT_VINFO_SAME_ALIGN_REFS (vinfo_for_stmt (
1528 DR_STMT (first_store
))).iterate (i
, &dr
);
1530 if (DR_IS_READ (dr
))
1532 load_inside_penalty
+= load_inside_cost
;
1533 load_outside_penalty
+= load_outside_cost
;
1537 load_inside_penalty
+= store_inside_cost
;
1538 load_outside_penalty
+= store_outside_cost
;
1541 /* Calculate the penalty for leaving DR0 unaligned (by
1542 aligning the FIRST_STORE). */
1543 store_inside_penalty
= load_inside_cost
;
1544 store_outside_penalty
= load_outside_cost
;
1546 STMT_VINFO_SAME_ALIGN_REFS (vinfo_for_stmt (
1547 DR_STMT (dr0
))).iterate (i
, &dr
);
1549 if (DR_IS_READ (dr
))
1551 store_inside_penalty
+= load_inside_cost
;
1552 store_outside_penalty
+= load_outside_cost
;
1556 store_inside_penalty
+= store_inside_cost
;
1557 store_outside_penalty
+= store_outside_cost
;
1560 if (load_inside_penalty
> store_inside_penalty
1561 || (load_inside_penalty
== store_inside_penalty
1562 && load_outside_penalty
> store_outside_penalty
))
1566 /* In case there are only loads with different unknown misalignments, use
1567 peeling only if it may help to align other accesses in the loop. */
1569 && !STMT_VINFO_SAME_ALIGN_REFS (
1570 vinfo_for_stmt (DR_STMT (dr0
))).length ()
1571 && vect_supportable_dr_alignment (dr0
, false)
1572 != dr_unaligned_supported
)
1576 if (do_peeling
&& !dr0
)
1578 /* Peeling is possible, but there is no data access that is not supported
1579 unless aligned. So we try to choose the best possible peeling. */
1581 /* We should get here only if there are drs with known misalignment. */
1582 gcc_assert (!all_misalignments_unknown
);
1584 /* Choose the best peeling from the hash table. */
1585 dr0
= vect_peeling_hash_choose_best_peeling (loop_vinfo
, &npeel
,
1593 stmt
= DR_STMT (dr0
);
1594 stmt_info
= vinfo_for_stmt (stmt
);
1595 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1596 nelements
= TYPE_VECTOR_SUBPARTS (vectype
);
1598 if (known_alignment_for_access_p (dr0
))
1600 bool negative
= tree_int_cst_compare (DR_STEP (dr0
),
1601 size_zero_node
) < 0;
1604 /* Since it's known at compile time, compute the number of
1605 iterations in the peeled loop (the peeling factor) for use in
1606 updating DR_MISALIGNMENT values. The peeling factor is the
1607 vectorization factor minus the misalignment as an element
1609 mis
= DR_MISALIGNMENT (dr0
);
1610 mis
/= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr0
))));
1611 npeel
= ((negative
? mis
- nelements
: nelements
- mis
)
1615 /* For interleaved data access every iteration accesses all the
1616 members of the group, therefore we divide the number of iterations
1617 by the group size. */
1618 stmt_info
= vinfo_for_stmt (DR_STMT (dr0
));
1619 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
1620 npeel
/= GROUP_SIZE (stmt_info
);
1622 if (dump_enabled_p ())
1623 dump_printf_loc (MSG_NOTE
, vect_location
,
1624 "Try peeling by %d", npeel
);
1627 /* Ensure that all data refs can be vectorized after the peel. */
1628 FOR_EACH_VEC_ELT (datarefs
, i
, dr
)
1630 int save_misalignment
;
1635 stmt
= DR_STMT (dr
);
1636 stmt_info
= vinfo_for_stmt (stmt
);
1637 /* For interleaving, only the alignment of the first access
1639 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
)
1640 && GROUP_FIRST_ELEMENT (stmt_info
) != stmt
)
1643 /* Strided loads perform only component accesses, alignment is
1644 irrelevant for them. */
1645 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info
))
1648 save_misalignment
= DR_MISALIGNMENT (dr
);
1649 vect_update_misalignment_for_peel (dr
, dr0
, npeel
);
1650 supportable_dr_alignment
= vect_supportable_dr_alignment (dr
, false);
1651 SET_DR_MISALIGNMENT (dr
, save_misalignment
);
1653 if (!supportable_dr_alignment
)
1660 if (do_peeling
&& known_alignment_for_access_p (dr0
) && npeel
== 0)
1662 stat
= vect_verify_datarefs_alignment (loop_vinfo
, NULL
);
1667 body_cost_vec
.release ();
1674 stmt_info_for_cost
*si
;
1675 void *data
= LOOP_VINFO_TARGET_COST_DATA (loop_vinfo
);
1677 /* (1.2) Update the DR_MISALIGNMENT of each data reference DR_i.
1678 If the misalignment of DR_i is identical to that of dr0 then set
1679 DR_MISALIGNMENT (DR_i) to zero. If the misalignment of DR_i and
1680 dr0 are known at compile time then increment DR_MISALIGNMENT (DR_i)
1681 by the peeling factor times the element size of DR_i (MOD the
1682 vectorization factor times the size). Otherwise, the
1683 misalignment of DR_i must be set to unknown. */
1684 FOR_EACH_VEC_ELT (datarefs
, i
, dr
)
1686 vect_update_misalignment_for_peel (dr
, dr0
, npeel
);
1688 LOOP_VINFO_UNALIGNED_DR (loop_vinfo
) = dr0
;
1690 LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo
) = npeel
;
1692 LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo
) = DR_MISALIGNMENT (dr0
);
1693 SET_DR_MISALIGNMENT (dr0
, 0);
1694 if (dump_enabled_p ())
1696 dump_printf_loc (MSG_NOTE
, vect_location
,
1697 "Alignment of access forced using peeling.");
1698 dump_printf_loc (MSG_NOTE
, vect_location
,
1699 "Peeling for alignment will be applied.");
1701 /* We've delayed passing the inside-loop peeling costs to the
1702 target cost model until we were sure peeling would happen.
1704 if (body_cost_vec
.exists ())
1706 FOR_EACH_VEC_ELT (body_cost_vec
, i
, si
)
1708 struct _stmt_vec_info
*stmt_info
1709 = si
->stmt
? vinfo_for_stmt (si
->stmt
) : NULL
;
1710 (void) add_stmt_cost (data
, si
->count
, si
->kind
, stmt_info
,
1711 si
->misalign
, vect_body
);
1713 body_cost_vec
.release ();
1716 stat
= vect_verify_datarefs_alignment (loop_vinfo
, NULL
);
1722 body_cost_vec
.release ();
1724 /* (2) Versioning to force alignment. */
1726 /* Try versioning if:
1727 1) flag_tree_vect_loop_version is TRUE
1728 2) optimize loop for speed
1729 3) there is at least one unsupported misaligned data ref with an unknown
1731 4) all misaligned data refs with a known misalignment are supported, and
1732 5) the number of runtime alignment checks is within reason. */
1735 flag_tree_vect_loop_version
1736 && optimize_loop_nest_for_speed_p (loop
)
1737 && (!loop
->inner
); /* FORNOW */
1741 FOR_EACH_VEC_ELT (datarefs
, i
, dr
)
1743 stmt
= DR_STMT (dr
);
1744 stmt_info
= vinfo_for_stmt (stmt
);
1746 /* For interleaving, only the alignment of the first access
1748 if (aligned_access_p (dr
)
1749 || (STMT_VINFO_GROUPED_ACCESS (stmt_info
)
1750 && GROUP_FIRST_ELEMENT (stmt_info
) != stmt
))
1753 /* Strided loads perform only component accesses, alignment is
1754 irrelevant for them. */
1755 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info
))
1758 supportable_dr_alignment
= vect_supportable_dr_alignment (dr
, false);
1760 if (!supportable_dr_alignment
)
1766 if (known_alignment_for_access_p (dr
)
1767 || LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo
).length ()
1768 >= (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIGNMENT_CHECKS
))
1770 do_versioning
= false;
1774 stmt
= DR_STMT (dr
);
1775 vectype
= STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt
));
1776 gcc_assert (vectype
);
1778 /* The rightmost bits of an aligned address must be zeros.
1779 Construct the mask needed for this test. For example,
1780 GET_MODE_SIZE for the vector mode V4SI is 16 bytes so the
1781 mask must be 15 = 0xf. */
1782 mask
= GET_MODE_SIZE (TYPE_MODE (vectype
)) - 1;
1784 /* FORNOW: use the same mask to test all potentially unaligned
1785 references in the loop. The vectorizer currently supports
1786 a single vector size, see the reference to
1787 GET_MODE_NUNITS (TYPE_MODE (vectype)) where the
1788 vectorization factor is computed. */
1789 gcc_assert (!LOOP_VINFO_PTR_MASK (loop_vinfo
)
1790 || LOOP_VINFO_PTR_MASK (loop_vinfo
) == mask
);
1791 LOOP_VINFO_PTR_MASK (loop_vinfo
) = mask
;
1792 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo
).safe_push (
1797 /* Versioning requires at least one misaligned data reference. */
1798 if (!LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo
))
1799 do_versioning
= false;
1800 else if (!do_versioning
)
1801 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo
).truncate (0);
1806 vec
<gimple
> may_misalign_stmts
1807 = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo
);
1810 /* It can now be assumed that the data references in the statements
1811 in LOOP_VINFO_MAY_MISALIGN_STMTS will be aligned in the version
1812 of the loop being vectorized. */
1813 FOR_EACH_VEC_ELT (may_misalign_stmts
, i
, stmt
)
1815 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1816 dr
= STMT_VINFO_DATA_REF (stmt_info
);
1817 SET_DR_MISALIGNMENT (dr
, 0);
1818 if (dump_enabled_p ())
1819 dump_printf_loc (MSG_NOTE
, vect_location
,
1820 "Alignment of access forced using versioning.");
1823 if (dump_enabled_p ())
1824 dump_printf_loc (MSG_NOTE
, vect_location
,
1825 "Versioning for alignment will be applied.");
1827 /* Peeling and versioning can't be done together at this time. */
1828 gcc_assert (! (do_peeling
&& do_versioning
));
1830 stat
= vect_verify_datarefs_alignment (loop_vinfo
, NULL
);
1835 /* This point is reached if neither peeling nor versioning is being done. */
1836 gcc_assert (! (do_peeling
|| do_versioning
));
1838 stat
= vect_verify_datarefs_alignment (loop_vinfo
, NULL
);
1843 /* Function vect_find_same_alignment_drs.
1845 Update group and alignment relations according to the chosen
1846 vectorization factor. */
1849 vect_find_same_alignment_drs (struct data_dependence_relation
*ddr
,
1850 loop_vec_info loop_vinfo
)
1853 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1854 int vectorization_factor
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
1855 struct data_reference
*dra
= DDR_A (ddr
);
1856 struct data_reference
*drb
= DDR_B (ddr
);
1857 stmt_vec_info stmtinfo_a
= vinfo_for_stmt (DR_STMT (dra
));
1858 stmt_vec_info stmtinfo_b
= vinfo_for_stmt (DR_STMT (drb
));
1859 int dra_size
= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dra
))));
1860 int drb_size
= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (drb
))));
1861 lambda_vector dist_v
;
1862 unsigned int loop_depth
;
1864 if (DDR_ARE_DEPENDENT (ddr
) == chrec_known
)
1870 if (DDR_ARE_DEPENDENT (ddr
) == chrec_dont_know
)
1873 /* Loop-based vectorization and known data dependence. */
1874 if (DDR_NUM_DIST_VECTS (ddr
) == 0)
1877 /* Data-dependence analysis reports a distance vector of zero
1878 for data-references that overlap only in the first iteration
1879 but have different sign step (see PR45764).
1880 So as a sanity check require equal DR_STEP. */
1881 if (!operand_equal_p (DR_STEP (dra
), DR_STEP (drb
), 0))
1884 loop_depth
= index_in_loop_nest (loop
->num
, DDR_LOOP_NEST (ddr
));
1885 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr
), i
, dist_v
)
1887 int dist
= dist_v
[loop_depth
];
1889 if (dump_enabled_p ())
1890 dump_printf_loc (MSG_NOTE
, vect_location
,
1891 "dependence distance = %d.", dist
);
1893 /* Same loop iteration. */
1895 || (dist
% vectorization_factor
== 0 && dra_size
== drb_size
))
1897 /* Two references with distance zero have the same alignment. */
1898 STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_a
).safe_push (drb
);
1899 STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_b
).safe_push (dra
);
1900 if (dump_enabled_p ())
1902 dump_printf_loc (MSG_NOTE
, vect_location
,
1903 "accesses have the same alignment.");
1904 dump_printf (MSG_NOTE
,
1905 "dependence distance modulo vf == 0 between ");
1906 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, DR_REF (dra
));
1907 dump_printf (MSG_NOTE
, " and ");
1908 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, DR_REF (drb
));
1915 /* Function vect_analyze_data_refs_alignment
1917 Analyze the alignment of the data-references in the loop.
1918 Return FALSE if a data reference is found that cannot be vectorized. */
1921 vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo
,
1922 bb_vec_info bb_vinfo
)
1924 if (dump_enabled_p ())
1925 dump_printf_loc (MSG_NOTE
, vect_location
,
1926 "=== vect_analyze_data_refs_alignment ===");
1928 /* Mark groups of data references with same alignment using
1929 data dependence information. */
1932 vec
<ddr_p
> ddrs
= LOOP_VINFO_DDRS (loop_vinfo
);
1933 struct data_dependence_relation
*ddr
;
1936 FOR_EACH_VEC_ELT (ddrs
, i
, ddr
)
1937 vect_find_same_alignment_drs (ddr
, loop_vinfo
);
1940 if (!vect_compute_data_refs_alignment (loop_vinfo
, bb_vinfo
))
1942 if (dump_enabled_p ())
1943 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1944 "not vectorized: can't calculate alignment "
1953 /* Analyze groups of accesses: check that DR belongs to a group of
1954 accesses of legal size, step, etc. Detect gaps, single element
1955 interleaving, and other special cases. Set grouped access info.
1956 Collect groups of strided stores for further use in SLP analysis. */
1959 vect_analyze_group_access (struct data_reference
*dr
)
1961 tree step
= DR_STEP (dr
);
1962 tree scalar_type
= TREE_TYPE (DR_REF (dr
));
1963 HOST_WIDE_INT type_size
= TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type
));
1964 gimple stmt
= DR_STMT (dr
);
1965 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1966 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
1967 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
1968 HOST_WIDE_INT dr_step
= TREE_INT_CST_LOW (step
);
1969 HOST_WIDE_INT groupsize
, last_accessed_element
= 1;
1970 bool slp_impossible
= false;
1971 struct loop
*loop
= NULL
;
1974 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1976 /* For interleaving, GROUPSIZE is STEP counted in elements, i.e., the
1977 size of the interleaving group (including gaps). */
1978 groupsize
= dr_step
/ type_size
;
1980 /* Not consecutive access is possible only if it is a part of interleaving. */
1981 if (!GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)))
1983 /* Check if it this DR is a part of interleaving, and is a single
1984 element of the group that is accessed in the loop. */
1986 /* Gaps are supported only for loads. STEP must be a multiple of the type
1987 size. The size of the group must be a power of 2. */
1989 && (dr_step
% type_size
) == 0
1991 && exact_log2 (groupsize
) != -1)
1993 GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)) = stmt
;
1994 GROUP_SIZE (vinfo_for_stmt (stmt
)) = groupsize
;
1995 if (dump_enabled_p ())
1997 dump_printf_loc (MSG_NOTE
, vect_location
,
1998 "Detected single element interleaving ");
1999 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, DR_REF (dr
));
2000 dump_printf (MSG_NOTE
, " step ");
2001 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, step
);
2006 if (dump_enabled_p ())
2007 dump_printf_loc (MSG_NOTE
, vect_location
,
2008 "Data access with gaps requires scalar "
2012 if (dump_enabled_p ())
2013 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2014 "Peeling for outer loop is not"
2019 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo
) = true;
2025 if (dump_enabled_p ())
2027 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2028 "not consecutive access ");
2029 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
2034 /* Mark the statement as unvectorizable. */
2035 STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr
))) = false;
2042 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)) == stmt
)
2044 /* First stmt in the interleaving chain. Check the chain. */
2045 gimple next
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt
));
2046 struct data_reference
*data_ref
= dr
;
2047 unsigned int count
= 1;
2049 tree prev_init
= DR_INIT (data_ref
);
2051 HOST_WIDE_INT diff
, count_in_bytes
, gaps
= 0;
2055 /* Skip same data-refs. In case that two or more stmts share
2056 data-ref (supported only for loads), we vectorize only the first
2057 stmt, and the rest get their vectorized loads from the first
2059 if (!tree_int_cst_compare (DR_INIT (data_ref
),
2060 DR_INIT (STMT_VINFO_DATA_REF (
2061 vinfo_for_stmt (next
)))))
2063 if (DR_IS_WRITE (data_ref
))
2065 if (dump_enabled_p ())
2066 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2067 "Two store stmts share the same dr.");
2071 /* Check that there is no load-store dependencies for this loads
2072 to prevent a case of load-store-load to the same location. */
2073 if (GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (next
))
2074 || GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (prev
)))
2076 if (dump_enabled_p ())
2077 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2078 "READ_WRITE dependence in interleaving.");
2082 /* For load use the same data-ref load. */
2083 GROUP_SAME_DR_STMT (vinfo_for_stmt (next
)) = prev
;
2086 next
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next
));
2092 /* Check that all the accesses have the same STEP. */
2093 next_step
= DR_STEP (STMT_VINFO_DATA_REF (vinfo_for_stmt (next
)));
2094 if (tree_int_cst_compare (step
, next_step
))
2096 if (dump_enabled_p ())
2097 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2098 "not consecutive access in interleaving");
2102 data_ref
= STMT_VINFO_DATA_REF (vinfo_for_stmt (next
));
2103 /* Check that the distance between two accesses is equal to the type
2104 size. Otherwise, we have gaps. */
2105 diff
= (TREE_INT_CST_LOW (DR_INIT (data_ref
))
2106 - TREE_INT_CST_LOW (prev_init
)) / type_size
;
2109 /* FORNOW: SLP of accesses with gaps is not supported. */
2110 slp_impossible
= true;
2111 if (DR_IS_WRITE (data_ref
))
2113 if (dump_enabled_p ())
2114 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2115 "interleaved store with gaps");
2122 last_accessed_element
+= diff
;
2124 /* Store the gap from the previous member of the group. If there is no
2125 gap in the access, GROUP_GAP is always 1. */
2126 GROUP_GAP (vinfo_for_stmt (next
)) = diff
;
2128 prev_init
= DR_INIT (data_ref
);
2129 next
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next
));
2130 /* Count the number of data-refs in the chain. */
2134 /* COUNT is the number of accesses found, we multiply it by the size of
2135 the type to get COUNT_IN_BYTES. */
2136 count_in_bytes
= type_size
* count
;
2138 /* Check that the size of the interleaving (including gaps) is not
2139 greater than STEP. */
2140 if (dr_step
&& dr_step
< count_in_bytes
+ gaps
* type_size
)
2142 if (dump_enabled_p ())
2144 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2145 "interleaving size is greater than step for ");
2146 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, DR_REF (dr
));
2151 /* Check that the size of the interleaving is equal to STEP for stores,
2152 i.e., that there are no gaps. */
2153 if (dr_step
&& dr_step
!= count_in_bytes
)
2155 if (DR_IS_READ (dr
))
2157 slp_impossible
= true;
2158 /* There is a gap after the last load in the group. This gap is a
2159 difference between the groupsize and the number of elements.
2160 When there is no gap, this difference should be 0. */
2161 GROUP_GAP (vinfo_for_stmt (stmt
)) = groupsize
- count
;
2165 if (dump_enabled_p ())
2166 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2167 "interleaved store with gaps");
2172 /* Check that STEP is a multiple of type size. */
2173 if (dr_step
&& (dr_step
% type_size
) != 0)
2175 if (dump_enabled_p ())
2177 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2178 "step is not a multiple of type size: step ");
2179 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, step
);
2180 dump_printf (MSG_MISSED_OPTIMIZATION
, " size ");
2181 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
2182 TYPE_SIZE_UNIT (scalar_type
));
2190 GROUP_SIZE (vinfo_for_stmt (stmt
)) = groupsize
;
2191 if (dump_enabled_p ())
2192 dump_printf_loc (MSG_NOTE
, vect_location
,
2193 "Detected interleaving of size %d", (int)groupsize
);
2195 /* SLP: create an SLP data structure for every interleaving group of
2196 stores for further analysis in vect_analyse_slp. */
2197 if (DR_IS_WRITE (dr
) && !slp_impossible
)
2200 LOOP_VINFO_GROUPED_STORES (loop_vinfo
).safe_push (stmt
);
2202 BB_VINFO_GROUPED_STORES (bb_vinfo
).safe_push (stmt
);
2205 /* There is a gap in the end of the group. */
2206 if (groupsize
- last_accessed_element
> 0 && loop_vinfo
)
2208 if (dump_enabled_p ())
2209 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2210 "Data access with gaps requires scalar "
2214 if (dump_enabled_p ())
2215 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2216 "Peeling for outer loop is not supported");
2220 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo
) = true;
2228 /* Analyze the access pattern of the data-reference DR.
2229 In case of non-consecutive accesses call vect_analyze_group_access() to
2230 analyze groups of accesses. */
2233 vect_analyze_data_ref_access (struct data_reference
*dr
)
2235 tree step
= DR_STEP (dr
);
2236 tree scalar_type
= TREE_TYPE (DR_REF (dr
));
2237 gimple stmt
= DR_STMT (dr
);
2238 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2239 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2240 struct loop
*loop
= NULL
;
2243 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
2245 if (loop_vinfo
&& !step
)
2247 if (dump_enabled_p ())
2248 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2249 "bad data-ref access in loop");
2253 /* Allow invariant loads in loops. */
2254 if (loop_vinfo
&& integer_zerop (step
))
2256 GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)) = NULL
;
2257 return DR_IS_READ (dr
);
2260 if (loop
&& nested_in_vect_loop_p (loop
, stmt
))
2262 /* Interleaved accesses are not yet supported within outer-loop
2263 vectorization for references in the inner-loop. */
2264 GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)) = NULL
;
2266 /* For the rest of the analysis we use the outer-loop step. */
2267 step
= STMT_VINFO_DR_STEP (stmt_info
);
2268 if (integer_zerop (step
))
2270 if (dump_enabled_p ())
2271 dump_printf_loc (MSG_NOTE
, vect_location
,
2272 "zero step in outer loop.");
2273 if (DR_IS_READ (dr
))
2281 if (TREE_CODE (step
) == INTEGER_CST
)
2283 HOST_WIDE_INT dr_step
= TREE_INT_CST_LOW (step
);
2284 if (!tree_int_cst_compare (step
, TYPE_SIZE_UNIT (scalar_type
))
2286 && !compare_tree_int (TYPE_SIZE_UNIT (scalar_type
), -dr_step
)))
2288 /* Mark that it is not interleaving. */
2289 GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)) = NULL
;
2294 if (loop
&& nested_in_vect_loop_p (loop
, stmt
))
2296 if (dump_enabled_p ())
2297 dump_printf_loc (MSG_NOTE
, vect_location
,
2298 "grouped access in outer loop.");
2302 /* Assume this is a DR handled by non-constant strided load case. */
2303 if (TREE_CODE (step
) != INTEGER_CST
)
2304 return STMT_VINFO_STRIDE_LOAD_P (stmt_info
);
2306 /* Not consecutive access - check if it's a part of interleaving group. */
2307 return vect_analyze_group_access (dr
);
2310 /* Compare two data-references DRA and DRB to group them into chunks
2311 suitable for grouping. */
2314 dr_group_sort_cmp (const void *dra_
, const void *drb_
)
2316 data_reference_p dra
= *(data_reference_p
*)const_cast<void *>(dra_
);
2317 data_reference_p drb
= *(data_reference_p
*)const_cast<void *>(drb_
);
2321 /* Stabilize sort. */
2325 /* Ordering of DRs according to base. */
2326 if (!operand_equal_p (DR_BASE_ADDRESS (dra
), DR_BASE_ADDRESS (drb
), 0))
2328 h1
= iterative_hash_expr (DR_BASE_ADDRESS (dra
), 0);
2329 h2
= iterative_hash_expr (DR_BASE_ADDRESS (drb
), 0);
2331 return h1
< h2
? -1 : 1;
2334 /* And according to DR_OFFSET. */
2335 if (!dr_equal_offsets_p (dra
, drb
))
2337 h1
= iterative_hash_expr (DR_OFFSET (dra
), 0);
2338 h2
= iterative_hash_expr (DR_OFFSET (drb
), 0);
2340 return h1
< h2
? -1 : 1;
2343 /* Put reads before writes. */
2344 if (DR_IS_READ (dra
) != DR_IS_READ (drb
))
2345 return DR_IS_READ (dra
) ? -1 : 1;
2347 /* Then sort after access size. */
2348 if (!operand_equal_p (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra
))),
2349 TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb
))), 0))
2351 h1
= iterative_hash_expr (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra
))), 0);
2352 h2
= iterative_hash_expr (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb
))), 0);
2354 return h1
< h2
? -1 : 1;
2357 /* And after step. */
2358 if (!operand_equal_p (DR_STEP (dra
), DR_STEP (drb
), 0))
2360 h1
= iterative_hash_expr (DR_STEP (dra
), 0);
2361 h2
= iterative_hash_expr (DR_STEP (drb
), 0);
2363 return h1
< h2
? -1 : 1;
2366 /* Then sort after DR_INIT. In case of identical DRs sort after stmt UID. */
2367 cmp
= tree_int_cst_compare (DR_INIT (dra
), DR_INIT (drb
));
2369 return gimple_uid (DR_STMT (dra
)) < gimple_uid (DR_STMT (drb
)) ? -1 : 1;
2373 /* Function vect_analyze_data_ref_accesses.
2375 Analyze the access pattern of all the data references in the loop.
2377 FORNOW: the only access pattern that is considered vectorizable is a
2378 simple step 1 (consecutive) access.
2380 FORNOW: handle only arrays and pointer accesses. */
2383 vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo
, bb_vec_info bb_vinfo
)
2386 vec
<data_reference_p
> datarefs
;
2387 struct data_reference
*dr
;
2389 if (dump_enabled_p ())
2390 dump_printf_loc (MSG_NOTE
, vect_location
,
2391 "=== vect_analyze_data_ref_accesses ===");
2394 datarefs
= LOOP_VINFO_DATAREFS (loop_vinfo
);
2396 datarefs
= BB_VINFO_DATAREFS (bb_vinfo
);
2398 if (datarefs
.is_empty ())
2401 /* Sort the array of datarefs to make building the interleaving chains
2403 qsort (datarefs
.address(), datarefs
.length (),
2404 sizeof (data_reference_p
), dr_group_sort_cmp
);
2406 /* Build the interleaving chains. */
2407 for (i
= 0; i
< datarefs
.length () - 1;)
2409 data_reference_p dra
= datarefs
[i
];
2410 stmt_vec_info stmtinfo_a
= vinfo_for_stmt (DR_STMT (dra
));
2411 stmt_vec_info lastinfo
= NULL
;
2412 for (i
= i
+ 1; i
< datarefs
.length (); ++i
)
2414 data_reference_p drb
= datarefs
[i
];
2415 stmt_vec_info stmtinfo_b
= vinfo_for_stmt (DR_STMT (drb
));
2417 /* ??? Imperfect sorting (non-compatible types, non-modulo
2418 accesses, same accesses) can lead to a group to be artificially
2419 split here as we don't just skip over those. If it really
2420 matters we can push those to a worklist and re-iterate
2421 over them. The we can just skip ahead to the next DR here. */
2423 /* Check that the data-refs have same first location (except init)
2424 and they are both either store or load (not load and store). */
2425 if (DR_IS_READ (dra
) != DR_IS_READ (drb
)
2426 || !operand_equal_p (DR_BASE_ADDRESS (dra
),
2427 DR_BASE_ADDRESS (drb
), 0)
2428 || !dr_equal_offsets_p (dra
, drb
))
2431 /* Check that the data-refs have the same constant size and step. */
2432 tree sza
= TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra
)));
2433 tree szb
= TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb
)));
2434 if (!host_integerp (sza
, 1)
2435 || !host_integerp (szb
, 1)
2436 || !tree_int_cst_equal (sza
, szb
)
2437 || !host_integerp (DR_STEP (dra
), 0)
2438 || !host_integerp (DR_STEP (drb
), 0)
2439 || !tree_int_cst_equal (DR_STEP (dra
), DR_STEP (drb
)))
2442 /* Do not place the same access in the interleaving chain twice. */
2443 if (tree_int_cst_compare (DR_INIT (dra
), DR_INIT (drb
)) == 0)
2446 /* Check the types are compatible.
2447 ??? We don't distinguish this during sorting. */
2448 if (!types_compatible_p (TREE_TYPE (DR_REF (dra
)),
2449 TREE_TYPE (DR_REF (drb
))))
2452 /* Sorting has ensured that DR_INIT (dra) <= DR_INIT (drb). */
2453 HOST_WIDE_INT init_a
= TREE_INT_CST_LOW (DR_INIT (dra
));
2454 HOST_WIDE_INT init_b
= TREE_INT_CST_LOW (DR_INIT (drb
));
2455 gcc_assert (init_a
< init_b
);
2457 /* If init_b == init_a + the size of the type * k, we have an
2458 interleaving, and DRA is accessed before DRB. */
2459 HOST_WIDE_INT type_size_a
= TREE_INT_CST_LOW (sza
);
2460 if ((init_b
- init_a
) % type_size_a
!= 0)
2463 /* The step (if not zero) is greater than the difference between
2464 data-refs' inits. This splits groups into suitable sizes. */
2465 HOST_WIDE_INT step
= TREE_INT_CST_LOW (DR_STEP (dra
));
2466 if (step
!= 0 && step
<= (init_b
- init_a
))
2469 if (dump_enabled_p ())
2471 dump_printf_loc (MSG_NOTE
, vect_location
,
2472 "Detected interleaving ");
2473 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, DR_REF (dra
));
2474 dump_printf (MSG_NOTE
, " and ");
2475 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, DR_REF (drb
));
2478 /* Link the found element into the group list. */
2479 if (!GROUP_FIRST_ELEMENT (stmtinfo_a
))
2481 GROUP_FIRST_ELEMENT (stmtinfo_a
) = DR_STMT (dra
);
2482 lastinfo
= stmtinfo_a
;
2484 GROUP_FIRST_ELEMENT (stmtinfo_b
) = DR_STMT (dra
);
2485 GROUP_NEXT_ELEMENT (lastinfo
) = DR_STMT (drb
);
2486 lastinfo
= stmtinfo_b
;
2490 FOR_EACH_VEC_ELT (datarefs
, i
, dr
)
2491 if (STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr
)))
2492 && !vect_analyze_data_ref_access (dr
))
2494 if (dump_enabled_p ())
2495 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2496 "not vectorized: complicated access pattern.");
2500 /* Mark the statement as not vectorizable. */
2501 STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr
))) = false;
2511 /* Function vect_prune_runtime_alias_test_list.
2513 Prune a list of ddrs to be tested at run-time by versioning for alias.
2514 Return FALSE if resulting list of ddrs is longer then allowed by
2515 PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS, otherwise return TRUE. */
2518 vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo
)
2521 LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo
);
2524 if (dump_enabled_p ())
2525 dump_printf_loc (MSG_NOTE
, vect_location
,
2526 "=== vect_prune_runtime_alias_test_list ===");
2528 for (i
= 0; i
< ddrs
.length (); )
2536 for (j
= 0; j
< i
; j
++)
2538 ddr_p ddr_j
= ddrs
[j
];
2540 if (vect_vfa_range_equal (ddr_i
, ddr_j
))
2542 if (dump_enabled_p ())
2544 dump_printf_loc (MSG_NOTE
, vect_location
,
2545 "found equal ranges ");
2546 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, DR_REF (DDR_A (ddr_i
)));
2547 dump_printf (MSG_NOTE
, ", ");
2548 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, DR_REF (DDR_B (ddr_i
)));
2549 dump_printf (MSG_NOTE
, " and ");
2550 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, DR_REF (DDR_A (ddr_j
)));
2551 dump_printf (MSG_NOTE
, ", ");
2552 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, DR_REF (DDR_B (ddr_j
)));
2561 ddrs
.ordered_remove (i
);
2567 if (ddrs
.length () >
2568 (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS
))
2570 if (dump_enabled_p ())
2572 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2573 "disable versioning for alias - max number of "
2574 "generated checks exceeded.");
2577 LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo
).truncate (0);
2585 /* Check whether a non-affine read in stmt is suitable for gather load
2586 and if so, return a builtin decl for that operation. */
2589 vect_check_gather (gimple stmt
, loop_vec_info loop_vinfo
, tree
*basep
,
2590 tree
*offp
, int *scalep
)
2592 HOST_WIDE_INT scale
= 1, pbitpos
, pbitsize
;
2593 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
2594 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2595 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
2596 tree offtype
= NULL_TREE
;
2597 tree decl
, base
, off
;
2598 enum machine_mode pmode
;
2599 int punsignedp
, pvolatilep
;
2601 /* The gather builtins need address of the form
2602 loop_invariant + vector * {1, 2, 4, 8}
2604 loop_invariant + sign_extend (vector) * { 1, 2, 4, 8 }.
2605 Unfortunately DR_BASE_ADDRESS/DR_OFFSET can be a mixture
2606 of loop invariants/SSA_NAMEs defined in the loop, with casts,
2607 multiplications and additions in it. To get a vector, we need
2608 a single SSA_NAME that will be defined in the loop and will
2609 contain everything that is not loop invariant and that can be
2610 vectorized. The following code attempts to find such a preexistng
2611 SSA_NAME OFF and put the loop invariants into a tree BASE
2612 that can be gimplified before the loop. */
2613 base
= get_inner_reference (DR_REF (dr
), &pbitsize
, &pbitpos
, &off
,
2614 &pmode
, &punsignedp
, &pvolatilep
, false);
2615 gcc_assert (base
!= NULL_TREE
&& (pbitpos
% BITS_PER_UNIT
) == 0);
2617 if (TREE_CODE (base
) == MEM_REF
)
2619 if (!integer_zerop (TREE_OPERAND (base
, 1)))
2621 if (off
== NULL_TREE
)
2623 double_int moff
= mem_ref_offset (base
);
2624 off
= double_int_to_tree (sizetype
, moff
);
2627 off
= size_binop (PLUS_EXPR
, off
,
2628 fold_convert (sizetype
, TREE_OPERAND (base
, 1)));
2630 base
= TREE_OPERAND (base
, 0);
2633 base
= build_fold_addr_expr (base
);
2635 if (off
== NULL_TREE
)
2636 off
= size_zero_node
;
2638 /* If base is not loop invariant, either off is 0, then we start with just
2639 the constant offset in the loop invariant BASE and continue with base
2640 as OFF, otherwise give up.
2641 We could handle that case by gimplifying the addition of base + off
2642 into some SSA_NAME and use that as off, but for now punt. */
2643 if (!expr_invariant_in_loop_p (loop
, base
))
2645 if (!integer_zerop (off
))
2648 base
= size_int (pbitpos
/ BITS_PER_UNIT
);
2650 /* Otherwise put base + constant offset into the loop invariant BASE
2651 and continue with OFF. */
2654 base
= fold_convert (sizetype
, base
);
2655 base
= size_binop (PLUS_EXPR
, base
, size_int (pbitpos
/ BITS_PER_UNIT
));
2658 /* OFF at this point may be either a SSA_NAME or some tree expression
2659 from get_inner_reference. Try to peel off loop invariants from it
2660 into BASE as long as possible. */
2662 while (offtype
== NULL_TREE
)
2664 enum tree_code code
;
2665 tree op0
, op1
, add
= NULL_TREE
;
2667 if (TREE_CODE (off
) == SSA_NAME
)
2669 gimple def_stmt
= SSA_NAME_DEF_STMT (off
);
2671 if (expr_invariant_in_loop_p (loop
, off
))
2674 if (gimple_code (def_stmt
) != GIMPLE_ASSIGN
)
2677 op0
= gimple_assign_rhs1 (def_stmt
);
2678 code
= gimple_assign_rhs_code (def_stmt
);
2679 op1
= gimple_assign_rhs2 (def_stmt
);
2683 if (get_gimple_rhs_class (TREE_CODE (off
)) == GIMPLE_TERNARY_RHS
)
2685 code
= TREE_CODE (off
);
2686 extract_ops_from_tree (off
, &code
, &op0
, &op1
);
2690 case POINTER_PLUS_EXPR
:
2692 if (expr_invariant_in_loop_p (loop
, op0
))
2697 add
= fold_convert (sizetype
, add
);
2699 add
= size_binop (MULT_EXPR
, add
, size_int (scale
));
2700 base
= size_binop (PLUS_EXPR
, base
, add
);
2703 if (expr_invariant_in_loop_p (loop
, op1
))
2711 if (expr_invariant_in_loop_p (loop
, op1
))
2713 add
= fold_convert (sizetype
, op1
);
2714 add
= size_binop (MINUS_EXPR
, size_zero_node
, add
);
2720 if (scale
== 1 && host_integerp (op1
, 0))
2722 scale
= tree_low_cst (op1
, 0);
2731 if (!POINTER_TYPE_P (TREE_TYPE (op0
))
2732 && !INTEGRAL_TYPE_P (TREE_TYPE (op0
)))
2734 if (TYPE_PRECISION (TREE_TYPE (op0
))
2735 == TYPE_PRECISION (TREE_TYPE (off
)))
2740 if (TYPE_PRECISION (TREE_TYPE (op0
))
2741 < TYPE_PRECISION (TREE_TYPE (off
)))
2744 offtype
= TREE_TYPE (off
);
2755 /* If at the end OFF still isn't a SSA_NAME or isn't
2756 defined in the loop, punt. */
2757 if (TREE_CODE (off
) != SSA_NAME
2758 || expr_invariant_in_loop_p (loop
, off
))
2761 if (offtype
== NULL_TREE
)
2762 offtype
= TREE_TYPE (off
);
2764 decl
= targetm
.vectorize
.builtin_gather (STMT_VINFO_VECTYPE (stmt_info
),
2766 if (decl
== NULL_TREE
)
2778 /* Check wether a non-affine load in STMT (being in the loop referred to
2779 in LOOP_VINFO) is suitable for handling as strided load. That is the case
2780 if its address is a simple induction variable. If so return the base
2781 of that induction variable in *BASEP and the (loop-invariant) step
2782 in *STEPP, both only when that pointer is non-zero.
2784 This handles ARRAY_REFs (with variant index) and MEM_REFs (with variant
2785 base pointer) only. */
2788 vect_check_strided_load (gimple stmt
, loop_vec_info loop_vinfo
)
2790 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
2791 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2792 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
2796 if (!DR_IS_READ (dr
))
2801 if (TREE_CODE (base
) == ARRAY_REF
)
2803 off
= TREE_OPERAND (base
, 1);
2804 base
= TREE_OPERAND (base
, 0);
2806 else if (TREE_CODE (base
) == MEM_REF
)
2808 off
= TREE_OPERAND (base
, 0);
2809 base
= TREE_OPERAND (base
, 1);
2814 if (TREE_CODE (off
) != SSA_NAME
)
2817 if (!expr_invariant_in_loop_p (loop
, base
)
2818 || !simple_iv (loop
, loop_containing_stmt (stmt
), off
, &iv
, true))
2824 /* Function vect_analyze_data_refs.
2826 Find all the data references in the loop or basic block.
2828 The general structure of the analysis of data refs in the vectorizer is as
2830 1- vect_analyze_data_refs(loop/bb): call
2831 compute_data_dependences_for_loop/bb to find and analyze all data-refs
2832 in the loop/bb and their dependences.
2833 2- vect_analyze_dependences(): apply dependence testing using ddrs.
2834 3- vect_analyze_drs_alignment(): check that ref_stmt.alignment is ok.
2835 4- vect_analyze_drs_access(): check that ref_stmt.step is ok.
2840 vect_analyze_data_refs (loop_vec_info loop_vinfo
,
2841 bb_vec_info bb_vinfo
,
2844 struct loop
*loop
= NULL
;
2845 basic_block bb
= NULL
;
2847 vec
<data_reference_p
> datarefs
;
2848 struct data_reference
*dr
;
2851 if (dump_enabled_p ())
2852 dump_printf_loc (MSG_NOTE
, vect_location
,
2853 "=== vect_analyze_data_refs ===\n");
2857 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
2858 if (!find_loop_nest (loop
, &LOOP_VINFO_LOOP_NEST (loop_vinfo
))
2859 || find_data_references_in_loop
2860 (loop
, &LOOP_VINFO_DATAREFS (loop_vinfo
)))
2862 if (dump_enabled_p ())
2863 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2864 "not vectorized: loop contains function calls"
2865 " or data references that cannot be analyzed");
2869 datarefs
= LOOP_VINFO_DATAREFS (loop_vinfo
);
2873 gimple_stmt_iterator gsi
;
2875 bb
= BB_VINFO_BB (bb_vinfo
);
2876 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
2878 gimple stmt
= gsi_stmt (gsi
);
2879 if (!find_data_references_in_stmt (NULL
, stmt
,
2880 &BB_VINFO_DATAREFS (bb_vinfo
)))
2882 /* Mark the rest of the basic-block as unvectorizable. */
2883 for (; !gsi_end_p (gsi
); gsi_next (&gsi
))
2885 stmt
= gsi_stmt (gsi
);
2886 STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt
)) = false;
2892 datarefs
= BB_VINFO_DATAREFS (bb_vinfo
);
2895 /* Go through the data-refs, check that the analysis succeeded. Update
2896 pointer from stmt_vec_info struct to DR and vectype. */
2898 FOR_EACH_VEC_ELT (datarefs
, i
, dr
)
2901 stmt_vec_info stmt_info
;
2902 tree base
, offset
, init
;
2903 bool gather
= false;
2906 if (!dr
|| !DR_REF (dr
))
2908 if (dump_enabled_p ())
2909 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2910 "not vectorized: unhandled data-ref ");
2914 stmt
= DR_STMT (dr
);
2915 stmt_info
= vinfo_for_stmt (stmt
);
2917 /* Check that analysis of the data-ref succeeded. */
2918 if (!DR_BASE_ADDRESS (dr
) || !DR_OFFSET (dr
) || !DR_INIT (dr
)
2921 /* If target supports vector gather loads, see if they can't
2925 && !TREE_THIS_VOLATILE (DR_REF (dr
))
2926 && targetm
.vectorize
.builtin_gather
!= NULL
2927 && !nested_in_vect_loop_p (loop
, stmt
))
2929 struct data_reference
*newdr
2930 = create_data_ref (NULL
, loop_containing_stmt (stmt
),
2931 DR_REF (dr
), stmt
, true);
2932 gcc_assert (newdr
!= NULL
&& DR_REF (newdr
));
2933 if (DR_BASE_ADDRESS (newdr
)
2934 && DR_OFFSET (newdr
)
2937 && integer_zerop (DR_STEP (newdr
)))
2943 free_data_ref (newdr
);
2948 if (dump_enabled_p ())
2950 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2951 "not vectorized: data ref analysis "
2953 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
2963 if (TREE_CODE (DR_BASE_ADDRESS (dr
)) == INTEGER_CST
)
2965 if (dump_enabled_p ())
2966 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2967 "not vectorized: base addr of dr is a "
2978 if (TREE_THIS_VOLATILE (DR_REF (dr
)))
2980 if (dump_enabled_p ())
2982 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2983 "not vectorized: volatile type ");
2984 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
2993 if (stmt_can_throw_internal (stmt
))
2995 if (dump_enabled_p ())
2997 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2998 "not vectorized: statement can throw an "
3000 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
3011 if (TREE_CODE (DR_REF (dr
)) == COMPONENT_REF
3012 && DECL_BIT_FIELD (TREE_OPERAND (DR_REF (dr
), 1)))
3014 if (dump_enabled_p ())
3016 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3017 "not vectorized: statement is bitfield "
3019 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
3030 base
= unshare_expr (DR_BASE_ADDRESS (dr
));
3031 offset
= unshare_expr (DR_OFFSET (dr
));
3032 init
= unshare_expr (DR_INIT (dr
));
3034 if (is_gimple_call (stmt
))
3036 if (dump_enabled_p ())
3038 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3039 "not vectorized: dr in a call ");
3040 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
3051 /* Update DR field in stmt_vec_info struct. */
3053 /* If the dataref is in an inner-loop of the loop that is considered for
3054 for vectorization, we also want to analyze the access relative to
3055 the outer-loop (DR contains information only relative to the
3056 inner-most enclosing loop). We do that by building a reference to the
3057 first location accessed by the inner-loop, and analyze it relative to
3059 if (loop
&& nested_in_vect_loop_p (loop
, stmt
))
3061 tree outer_step
, outer_base
, outer_init
;
3062 HOST_WIDE_INT pbitsize
, pbitpos
;
3064 enum machine_mode pmode
;
3065 int punsignedp
, pvolatilep
;
3066 affine_iv base_iv
, offset_iv
;
3069 /* Build a reference to the first location accessed by the
3070 inner-loop: *(BASE+INIT). (The first location is actually
3071 BASE+INIT+OFFSET, but we add OFFSET separately later). */
3072 tree inner_base
= build_fold_indirect_ref
3073 (fold_build_pointer_plus (base
, init
));
3075 if (dump_enabled_p ())
3077 dump_printf_loc (MSG_NOTE
, vect_location
,
3078 "analyze in outer-loop: ");
3079 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, inner_base
);
3082 outer_base
= get_inner_reference (inner_base
, &pbitsize
, &pbitpos
,
3083 &poffset
, &pmode
, &punsignedp
, &pvolatilep
, false);
3084 gcc_assert (outer_base
!= NULL_TREE
);
3086 if (pbitpos
% BITS_PER_UNIT
!= 0)
3088 if (dump_enabled_p ())
3089 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3090 "failed: bit offset alignment.\n");
3094 outer_base
= build_fold_addr_expr (outer_base
);
3095 if (!simple_iv (loop
, loop_containing_stmt (stmt
), outer_base
,
3098 if (dump_enabled_p ())
3099 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3100 "failed: evolution of base is not affine.\n");
3107 poffset
= fold_build2 (PLUS_EXPR
, TREE_TYPE (offset
), offset
,
3115 offset_iv
.base
= ssize_int (0);
3116 offset_iv
.step
= ssize_int (0);
3118 else if (!simple_iv (loop
, loop_containing_stmt (stmt
), poffset
,
3121 if (dump_enabled_p ())
3122 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3123 "evolution of offset is not affine.\n");
3127 outer_init
= ssize_int (pbitpos
/ BITS_PER_UNIT
);
3128 split_constant_offset (base_iv
.base
, &base_iv
.base
, &dinit
);
3129 outer_init
= size_binop (PLUS_EXPR
, outer_init
, dinit
);
3130 split_constant_offset (offset_iv
.base
, &offset_iv
.base
, &dinit
);
3131 outer_init
= size_binop (PLUS_EXPR
, outer_init
, dinit
);
3133 outer_step
= size_binop (PLUS_EXPR
,
3134 fold_convert (ssizetype
, base_iv
.step
),
3135 fold_convert (ssizetype
, offset_iv
.step
));
3137 STMT_VINFO_DR_STEP (stmt_info
) = outer_step
;
3138 /* FIXME: Use canonicalize_base_object_address (base_iv.base); */
3139 STMT_VINFO_DR_BASE_ADDRESS (stmt_info
) = base_iv
.base
;
3140 STMT_VINFO_DR_INIT (stmt_info
) = outer_init
;
3141 STMT_VINFO_DR_OFFSET (stmt_info
) =
3142 fold_convert (ssizetype
, offset_iv
.base
);
3143 STMT_VINFO_DR_ALIGNED_TO (stmt_info
) =
3144 size_int (highest_pow2_factor (offset_iv
.base
));
3146 if (dump_enabled_p ())
3148 dump_printf_loc (MSG_NOTE
, vect_location
,
3149 "\touter base_address: ");
3150 dump_generic_expr (MSG_NOTE
, TDF_SLIM
,
3151 STMT_VINFO_DR_BASE_ADDRESS (stmt_info
));
3152 dump_printf (MSG_NOTE
, "\n\touter offset from base address: ");
3153 dump_generic_expr (MSG_NOTE
, TDF_SLIM
,
3154 STMT_VINFO_DR_OFFSET (stmt_info
));
3155 dump_printf (MSG_NOTE
,
3156 "\n\touter constant offset from base address: ");
3157 dump_generic_expr (MSG_NOTE
, TDF_SLIM
,
3158 STMT_VINFO_DR_INIT (stmt_info
));
3159 dump_printf (MSG_NOTE
, "\n\touter step: ");
3160 dump_generic_expr (MSG_NOTE
, TDF_SLIM
,
3161 STMT_VINFO_DR_STEP (stmt_info
));
3162 dump_printf (MSG_NOTE
, "\n\touter aligned to: ");
3163 dump_generic_expr (MSG_NOTE
, TDF_SLIM
,
3164 STMT_VINFO_DR_ALIGNED_TO (stmt_info
));
3168 if (STMT_VINFO_DATA_REF (stmt_info
))
3170 if (dump_enabled_p ())
3172 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3173 "not vectorized: more than one data ref "
3175 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
3186 STMT_VINFO_DATA_REF (stmt_info
) = dr
;
3188 /* Set vectype for STMT. */
3189 scalar_type
= TREE_TYPE (DR_REF (dr
));
3190 STMT_VINFO_VECTYPE (stmt_info
) =
3191 get_vectype_for_scalar_type (scalar_type
);
3192 if (!STMT_VINFO_VECTYPE (stmt_info
))
3194 if (dump_enabled_p ())
3196 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3197 "not vectorized: no vectype for stmt: ");
3198 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
3199 dump_printf (MSG_MISSED_OPTIMIZATION
, " scalar_type: ");
3200 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_DETAILS
,
3209 STMT_VINFO_DATA_REF (stmt_info
) = NULL
;
3215 /* Adjust the minimal vectorization factor according to the
3217 vf
= TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info
));
3225 gather
= 0 != vect_check_gather (stmt
, loop_vinfo
, NULL
, &off
, NULL
);
3227 && get_vectype_for_scalar_type (TREE_TYPE (off
)) == NULL_TREE
)
3231 STMT_VINFO_DATA_REF (stmt_info
) = NULL
;
3233 if (dump_enabled_p ())
3235 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3236 "not vectorized: not suitable for gather "
3238 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
3244 STMT_VINFO_GATHER_P (stmt_info
) = true;
3247 && TREE_CODE (DR_STEP (dr
)) != INTEGER_CST
)
3249 bool strided_load
= false;
3250 if (!nested_in_vect_loop_p (loop
, stmt
))
3251 strided_load
= vect_check_strided_load (stmt
, loop_vinfo
);
3254 if (dump_enabled_p ())
3256 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3257 "not vectorized: not suitable for strided "
3259 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
3263 STMT_VINFO_STRIDE_LOAD_P (stmt_info
) = true;
3267 /* If we stopped analysis at the first dataref we could not analyze
3268 when trying to vectorize a basic-block mark the rest of the datarefs
3269 as not vectorizable and truncate the vector of datarefs. That
3270 avoids spending useless time in analyzing their dependence. */
3271 if (i
!= datarefs
.length ())
3273 gcc_assert (bb_vinfo
!= NULL
);
3274 for (unsigned j
= i
; j
< datarefs
.length (); ++j
)
3276 data_reference_p dr
= datarefs
[j
];
3277 STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr
))) = false;
3280 datarefs
.truncate (i
);
3287 /* Function vect_get_new_vect_var.
3289 Returns a name for a new variable. The current naming scheme appends the
3290 prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to
3291 the name of vectorizer generated variables, and appends that to NAME if
3295 vect_get_new_vect_var (tree type
, enum vect_var_kind var_kind
, const char *name
)
3302 case vect_simple_var
:
3305 case vect_scalar_var
:
3308 case vect_pointer_var
:
3317 char* tmp
= concat (prefix
, name
, NULL
);
3318 new_vect_var
= create_tmp_reg (type
, tmp
);
3322 new_vect_var
= create_tmp_reg (type
, prefix
);
3324 return new_vect_var
;
3328 /* Function vect_create_addr_base_for_vector_ref.
3330 Create an expression that computes the address of the first memory location
3331 that will be accessed for a data reference.
3334 STMT: The statement containing the data reference.
3335 NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list.
3336 OFFSET: Optional. If supplied, it is be added to the initial address.
3337 LOOP: Specify relative to which loop-nest should the address be computed.
3338 For example, when the dataref is in an inner-loop nested in an
3339 outer-loop that is now being vectorized, LOOP can be either the
3340 outer-loop, or the inner-loop. The first memory location accessed
3341 by the following dataref ('in' points to short):
3348 if LOOP=i_loop: &in (relative to i_loop)
3349 if LOOP=j_loop: &in+i*2B (relative to j_loop)
3352 1. Return an SSA_NAME whose value is the address of the memory location of
3353 the first vector of the data reference.
3354 2. If new_stmt_list is not NULL_TREE after return then the caller must insert
3355 these statement(s) which define the returned SSA_NAME.
3357 FORNOW: We are only handling array accesses with step 1. */
3360 vect_create_addr_base_for_vector_ref (gimple stmt
,
3361 gimple_seq
*new_stmt_list
,
3365 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
3366 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
3368 const char *base_name
;
3371 gimple_seq seq
= NULL
;
3375 tree step
= TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr
)));
3376 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
3378 if (loop_vinfo
&& loop
&& loop
!= (gimple_bb (stmt
))->loop_father
)
3380 struct loop
*outer_loop
= LOOP_VINFO_LOOP (loop_vinfo
);
3382 gcc_assert (nested_in_vect_loop_p (outer_loop
, stmt
));
3384 data_ref_base
= unshare_expr (STMT_VINFO_DR_BASE_ADDRESS (stmt_info
));
3385 base_offset
= unshare_expr (STMT_VINFO_DR_OFFSET (stmt_info
));
3386 init
= unshare_expr (STMT_VINFO_DR_INIT (stmt_info
));
3390 data_ref_base
= unshare_expr (DR_BASE_ADDRESS (dr
));
3391 base_offset
= unshare_expr (DR_OFFSET (dr
));
3392 init
= unshare_expr (DR_INIT (dr
));
3396 base_name
= get_name (data_ref_base
);
3399 base_offset
= ssize_int (0);
3400 init
= ssize_int (0);
3401 base_name
= get_name (DR_REF (dr
));
3404 /* Create base_offset */
3405 base_offset
= size_binop (PLUS_EXPR
,
3406 fold_convert (sizetype
, base_offset
),
3407 fold_convert (sizetype
, init
));
3411 offset
= fold_build2 (MULT_EXPR
, sizetype
,
3412 fold_convert (sizetype
, offset
), step
);
3413 base_offset
= fold_build2 (PLUS_EXPR
, sizetype
,
3414 base_offset
, offset
);
3417 /* base + base_offset */
3419 addr_base
= fold_build_pointer_plus (data_ref_base
, base_offset
);
3422 addr_base
= build1 (ADDR_EXPR
,
3423 build_pointer_type (TREE_TYPE (DR_REF (dr
))),
3424 unshare_expr (DR_REF (dr
)));
3427 vect_ptr_type
= build_pointer_type (STMT_VINFO_VECTYPE (stmt_info
));
3428 addr_base
= fold_convert (vect_ptr_type
, addr_base
);
3429 dest
= vect_get_new_vect_var (vect_ptr_type
, vect_pointer_var
, base_name
);
3430 addr_base
= force_gimple_operand (addr_base
, &seq
, false, dest
);
3431 gimple_seq_add_seq (new_stmt_list
, seq
);
3433 if (DR_PTR_INFO (dr
)
3434 && TREE_CODE (addr_base
) == SSA_NAME
)
3436 duplicate_ssa_name_ptr_info (addr_base
, DR_PTR_INFO (dr
));
3438 mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (addr_base
));
3441 if (dump_enabled_p ())
3443 dump_printf_loc (MSG_NOTE
, vect_location
, "created ");
3444 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, addr_base
);
3451 /* Function vect_create_data_ref_ptr.
3453 Create a new pointer-to-AGGR_TYPE variable (ap), that points to the first
3454 location accessed in the loop by STMT, along with the def-use update
3455 chain to appropriately advance the pointer through the loop iterations.
3456 Also set aliasing information for the pointer. This pointer is used by
3457 the callers to this function to create a memory reference expression for
3458 vector load/store access.
3461 1. STMT: a stmt that references memory. Expected to be of the form
3462 GIMPLE_ASSIGN <name, data-ref> or
3463 GIMPLE_ASSIGN <data-ref, name>.
3464 2. AGGR_TYPE: the type of the reference, which should be either a vector
3466 3. AT_LOOP: the loop where the vector memref is to be created.
3467 4. OFFSET (optional): an offset to be added to the initial address accessed
3468 by the data-ref in STMT.
3469 5. BSI: location where the new stmts are to be placed if there is no loop
3470 6. ONLY_INIT: indicate if ap is to be updated in the loop, or remain
3471 pointing to the initial address.
3474 1. Declare a new ptr to vector_type, and have it point to the base of the
3475 data reference (initial addressed accessed by the data reference).
3476 For example, for vector of type V8HI, the following code is generated:
3479 ap = (v8hi *)initial_address;
3481 if OFFSET is not supplied:
3482 initial_address = &a[init];
3483 if OFFSET is supplied:
3484 initial_address = &a[init + OFFSET];
3486 Return the initial_address in INITIAL_ADDRESS.
3488 2. If ONLY_INIT is true, just return the initial pointer. Otherwise, also
3489 update the pointer in each iteration of the loop.
3491 Return the increment stmt that updates the pointer in PTR_INCR.
3493 3. Set INV_P to true if the access pattern of the data reference in the
3494 vectorized loop is invariant. Set it to false otherwise.
3496 4. Return the pointer. */
3499 vect_create_data_ref_ptr (gimple stmt
, tree aggr_type
, struct loop
*at_loop
,
3500 tree offset
, tree
*initial_address
,
3501 gimple_stmt_iterator
*gsi
, gimple
*ptr_incr
,
3502 bool only_init
, bool *inv_p
)
3504 const char *base_name
;
3505 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
3506 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
3507 struct loop
*loop
= NULL
;
3508 bool nested_in_vect_loop
= false;
3509 struct loop
*containing_loop
= NULL
;
3514 gimple_seq new_stmt_list
= NULL
;
3518 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
3520 gimple_stmt_iterator incr_gsi
;
3523 tree indx_before_incr
, indx_after_incr
;
3526 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
3528 gcc_assert (TREE_CODE (aggr_type
) == ARRAY_TYPE
3529 || TREE_CODE (aggr_type
) == VECTOR_TYPE
);
3533 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
3534 nested_in_vect_loop
= nested_in_vect_loop_p (loop
, stmt
);
3535 containing_loop
= (gimple_bb (stmt
))->loop_father
;
3536 pe
= loop_preheader_edge (loop
);
3540 gcc_assert (bb_vinfo
);
3545 /* Check the step (evolution) of the load in LOOP, and record
3546 whether it's invariant. */
3547 if (nested_in_vect_loop
)
3548 step
= STMT_VINFO_DR_STEP (stmt_info
);
3550 step
= DR_STEP (STMT_VINFO_DATA_REF (stmt_info
));
3552 if (tree_int_cst_compare (step
, size_zero_node
) == 0)
3556 negative
= tree_int_cst_compare (step
, size_zero_node
) < 0;
3558 /* Create an expression for the first address accessed by this load
3560 base_name
= get_name (DR_BASE_ADDRESS (dr
));
3562 if (dump_enabled_p ())
3564 tree dr_base_type
= TREE_TYPE (DR_BASE_OBJECT (dr
));
3565 dump_printf_loc (MSG_NOTE
, vect_location
,
3566 "create %s-pointer variable to type: ",
3567 tree_code_name
[(int) TREE_CODE (aggr_type
)]);
3568 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, aggr_type
);
3569 if (TREE_CODE (dr_base_type
) == ARRAY_TYPE
)
3570 dump_printf (MSG_NOTE
, " vectorizing an array ref: ");
3571 else if (TREE_CODE (dr_base_type
) == RECORD_TYPE
)
3572 dump_printf (MSG_NOTE
, " vectorizing a record based array ref: ");
3574 dump_printf (MSG_NOTE
, " vectorizing a pointer ref: ");
3575 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, DR_BASE_OBJECT (dr
));
3578 /* (1) Create the new aggregate-pointer variable.
3579 Vector and array types inherit the alias set of their component
3580 type by default so we need to use a ref-all pointer if the data
3581 reference does not conflict with the created aggregated data
3582 reference because it is not addressable. */
3583 bool need_ref_all
= false;
3584 if (!alias_sets_conflict_p (get_alias_set (aggr_type
),
3585 get_alias_set (DR_REF (dr
))))
3586 need_ref_all
= true;
3587 /* Likewise for any of the data references in the stmt group. */
3588 else if (STMT_VINFO_GROUP_SIZE (stmt_info
) > 1)
3590 gimple orig_stmt
= STMT_VINFO_GROUP_FIRST_ELEMENT (stmt_info
);
3593 stmt_vec_info sinfo
= vinfo_for_stmt (orig_stmt
);
3594 struct data_reference
*sdr
= STMT_VINFO_DATA_REF (sinfo
);
3595 if (!alias_sets_conflict_p (get_alias_set (aggr_type
),
3596 get_alias_set (DR_REF (sdr
))))
3598 need_ref_all
= true;
3601 orig_stmt
= STMT_VINFO_GROUP_NEXT_ELEMENT (sinfo
);
3605 aggr_ptr_type
= build_pointer_type_for_mode (aggr_type
, ptr_mode
,
3607 aggr_ptr
= vect_get_new_vect_var (aggr_ptr_type
, vect_pointer_var
, base_name
);
3610 /* Note: If the dataref is in an inner-loop nested in LOOP, and we are
3611 vectorizing LOOP (i.e., outer-loop vectorization), we need to create two
3612 def-use update cycles for the pointer: one relative to the outer-loop
3613 (LOOP), which is what steps (3) and (4) below do. The other is relative
3614 to the inner-loop (which is the inner-most loop containing the dataref),
3615 and this is done be step (5) below.
3617 When vectorizing inner-most loops, the vectorized loop (LOOP) is also the
3618 inner-most loop, and so steps (3),(4) work the same, and step (5) is
3619 redundant. Steps (3),(4) create the following:
3622 LOOP: vp1 = phi(vp0,vp2)
3628 If there is an inner-loop nested in loop, then step (5) will also be
3629 applied, and an additional update in the inner-loop will be created:
3632 LOOP: vp1 = phi(vp0,vp2)
3634 inner: vp3 = phi(vp1,vp4)
3635 vp4 = vp3 + inner_step
3641 /* (2) Calculate the initial address of the aggregate-pointer, and set
3642 the aggregate-pointer to point to it before the loop. */
3644 /* Create: (&(base[init_val+offset]) in the loop preheader. */
3646 new_temp
= vect_create_addr_base_for_vector_ref (stmt
, &new_stmt_list
,
3652 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, new_stmt_list
);
3653 gcc_assert (!new_bb
);
3656 gsi_insert_seq_before (gsi
, new_stmt_list
, GSI_SAME_STMT
);
3659 *initial_address
= new_temp
;
3661 /* Create: p = (aggr_type *) initial_base */
3662 if (TREE_CODE (new_temp
) != SSA_NAME
3663 || !useless_type_conversion_p (aggr_ptr_type
, TREE_TYPE (new_temp
)))
3665 vec_stmt
= gimple_build_assign (aggr_ptr
,
3666 fold_convert (aggr_ptr_type
, new_temp
));
3667 aggr_ptr_init
= make_ssa_name (aggr_ptr
, vec_stmt
);
3668 /* Copy the points-to information if it exists. */
3669 if (DR_PTR_INFO (dr
))
3670 duplicate_ssa_name_ptr_info (aggr_ptr_init
, DR_PTR_INFO (dr
));
3671 gimple_assign_set_lhs (vec_stmt
, aggr_ptr_init
);
3674 new_bb
= gsi_insert_on_edge_immediate (pe
, vec_stmt
);
3675 gcc_assert (!new_bb
);
3678 gsi_insert_before (gsi
, vec_stmt
, GSI_SAME_STMT
);
3681 aggr_ptr_init
= new_temp
;
3683 /* (3) Handle the updating of the aggregate-pointer inside the loop.
3684 This is needed when ONLY_INIT is false, and also when AT_LOOP is the
3685 inner-loop nested in LOOP (during outer-loop vectorization). */
3687 /* No update in loop is required. */
3688 if (only_init
&& (!loop_vinfo
|| at_loop
== loop
))
3689 aptr
= aggr_ptr_init
;
3692 /* The step of the aggregate pointer is the type size. */
3693 tree step
= TYPE_SIZE_UNIT (aggr_type
);
3694 /* One exception to the above is when the scalar step of the load in
3695 LOOP is zero. In this case the step here is also zero. */
3697 step
= size_zero_node
;
3699 step
= fold_build1 (NEGATE_EXPR
, TREE_TYPE (step
), step
);
3701 standard_iv_increment_position (loop
, &incr_gsi
, &insert_after
);
3703 create_iv (aggr_ptr_init
,
3704 fold_convert (aggr_ptr_type
, step
),
3705 aggr_ptr
, loop
, &incr_gsi
, insert_after
,
3706 &indx_before_incr
, &indx_after_incr
);
3707 incr
= gsi_stmt (incr_gsi
);
3708 set_vinfo_for_stmt (incr
, new_stmt_vec_info (incr
, loop_vinfo
, NULL
));
3710 /* Copy the points-to information if it exists. */
3711 if (DR_PTR_INFO (dr
))
3713 duplicate_ssa_name_ptr_info (indx_before_incr
, DR_PTR_INFO (dr
));
3714 duplicate_ssa_name_ptr_info (indx_after_incr
, DR_PTR_INFO (dr
));
3719 aptr
= indx_before_incr
;
3722 if (!nested_in_vect_loop
|| only_init
)
3726 /* (4) Handle the updating of the aggregate-pointer inside the inner-loop
3727 nested in LOOP, if exists. */
3729 gcc_assert (nested_in_vect_loop
);
3732 standard_iv_increment_position (containing_loop
, &incr_gsi
,
3734 create_iv (aptr
, fold_convert (aggr_ptr_type
, DR_STEP (dr
)), aggr_ptr
,
3735 containing_loop
, &incr_gsi
, insert_after
, &indx_before_incr
,
3737 incr
= gsi_stmt (incr_gsi
);
3738 set_vinfo_for_stmt (incr
, new_stmt_vec_info (incr
, loop_vinfo
, NULL
));
3740 /* Copy the points-to information if it exists. */
3741 if (DR_PTR_INFO (dr
))
3743 duplicate_ssa_name_ptr_info (indx_before_incr
, DR_PTR_INFO (dr
));
3744 duplicate_ssa_name_ptr_info (indx_after_incr
, DR_PTR_INFO (dr
));
3749 return indx_before_incr
;
3756 /* Function bump_vector_ptr
3758 Increment a pointer (to a vector type) by vector-size. If requested,
3759 i.e. if PTR-INCR is given, then also connect the new increment stmt
3760 to the existing def-use update-chain of the pointer, by modifying
3761 the PTR_INCR as illustrated below:
3763 The pointer def-use update-chain before this function:
3764 DATAREF_PTR = phi (p_0, p_2)
3766 PTR_INCR: p_2 = DATAREF_PTR + step
3768 The pointer def-use update-chain after this function:
3769 DATAREF_PTR = phi (p_0, p_2)
3771 NEW_DATAREF_PTR = DATAREF_PTR + BUMP
3773 PTR_INCR: p_2 = NEW_DATAREF_PTR + step
3776 DATAREF_PTR - ssa_name of a pointer (to vector type) that is being updated
3778 PTR_INCR - optional. The stmt that updates the pointer in each iteration of
3779 the loop. The increment amount across iterations is expected
3781 BSI - location where the new update stmt is to be placed.
3782 STMT - the original scalar memory-access stmt that is being vectorized.
3783 BUMP - optional. The offset by which to bump the pointer. If not given,
3784 the offset is assumed to be vector_size.
3786 Output: Return NEW_DATAREF_PTR as illustrated above.
3791 bump_vector_ptr (tree dataref_ptr
, gimple ptr_incr
, gimple_stmt_iterator
*gsi
,
3792 gimple stmt
, tree bump
)
3794 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
3795 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
3796 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3797 tree update
= TYPE_SIZE_UNIT (vectype
);
3800 use_operand_p use_p
;
3801 tree new_dataref_ptr
;
3806 new_dataref_ptr
= copy_ssa_name (dataref_ptr
, NULL
);
3807 incr_stmt
= gimple_build_assign_with_ops (POINTER_PLUS_EXPR
, new_dataref_ptr
,
3808 dataref_ptr
, update
);
3809 vect_finish_stmt_generation (stmt
, incr_stmt
, gsi
);
3811 /* Copy the points-to information if it exists. */
3812 if (DR_PTR_INFO (dr
))
3814 duplicate_ssa_name_ptr_info (new_dataref_ptr
, DR_PTR_INFO (dr
));
3815 mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (new_dataref_ptr
));
3819 return new_dataref_ptr
;
3821 /* Update the vector-pointer's cross-iteration increment. */
3822 FOR_EACH_SSA_USE_OPERAND (use_p
, ptr_incr
, iter
, SSA_OP_USE
)
3824 tree use
= USE_FROM_PTR (use_p
);
3826 if (use
== dataref_ptr
)
3827 SET_USE (use_p
, new_dataref_ptr
);
3829 gcc_assert (tree_int_cst_compare (use
, update
) == 0);
3832 return new_dataref_ptr
;
3836 /* Function vect_create_destination_var.
3838 Create a new temporary of type VECTYPE. */
3841 vect_create_destination_var (tree scalar_dest
, tree vectype
)
3844 const char *new_name
;
3846 enum vect_var_kind kind
;
3848 kind
= vectype
? vect_simple_var
: vect_scalar_var
;
3849 type
= vectype
? vectype
: TREE_TYPE (scalar_dest
);
3851 gcc_assert (TREE_CODE (scalar_dest
) == SSA_NAME
);
3853 new_name
= get_name (scalar_dest
);
3856 vec_dest
= vect_get_new_vect_var (type
, kind
, new_name
);
3861 /* Function vect_grouped_store_supported.
3863 Returns TRUE if interleave high and interleave low permutations
3864 are supported, and FALSE otherwise. */
3867 vect_grouped_store_supported (tree vectype
, unsigned HOST_WIDE_INT count
)
3869 enum machine_mode mode
= TYPE_MODE (vectype
);
3871 /* vect_permute_store_chain requires the group size to be a power of two. */
3872 if (exact_log2 (count
) == -1)
3874 if (dump_enabled_p ())
3875 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3876 "the size of the group of accesses"
3877 " is not a power of 2");
3881 /* Check that the permutation is supported. */
3882 if (VECTOR_MODE_P (mode
))
3884 unsigned int i
, nelt
= GET_MODE_NUNITS (mode
);
3885 unsigned char *sel
= XALLOCAVEC (unsigned char, nelt
);
3886 for (i
= 0; i
< nelt
/ 2; i
++)
3889 sel
[i
* 2 + 1] = i
+ nelt
;
3891 if (can_vec_perm_p (mode
, false, sel
))
3893 for (i
= 0; i
< nelt
; i
++)
3895 if (can_vec_perm_p (mode
, false, sel
))
3900 if (dump_enabled_p ())
3901 dump_printf (MSG_MISSED_OPTIMIZATION
,
3902 "interleave op not supported by target.");
3907 /* Return TRUE if vec_store_lanes is available for COUNT vectors of
3911 vect_store_lanes_supported (tree vectype
, unsigned HOST_WIDE_INT count
)
3913 return vect_lanes_optab_supported_p ("vec_store_lanes",
3914 vec_store_lanes_optab
,
3919 /* Function vect_permute_store_chain.
3921 Given a chain of interleaved stores in DR_CHAIN of LENGTH that must be
3922 a power of 2, generate interleave_high/low stmts to reorder the data
3923 correctly for the stores. Return the final references for stores in
3926 E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
3927 The input is 4 vectors each containing 8 elements. We assign a number to
3928 each element, the input sequence is:
3930 1st vec: 0 1 2 3 4 5 6 7
3931 2nd vec: 8 9 10 11 12 13 14 15
3932 3rd vec: 16 17 18 19 20 21 22 23
3933 4th vec: 24 25 26 27 28 29 30 31
3935 The output sequence should be:
3937 1st vec: 0 8 16 24 1 9 17 25
3938 2nd vec: 2 10 18 26 3 11 19 27
3939 3rd vec: 4 12 20 28 5 13 21 30
3940 4th vec: 6 14 22 30 7 15 23 31
3942 i.e., we interleave the contents of the four vectors in their order.
3944 We use interleave_high/low instructions to create such output. The input of
3945 each interleave_high/low operation is two vectors:
3948 the even elements of the result vector are obtained left-to-right from the
3949 high/low elements of the first vector. The odd elements of the result are
3950 obtained left-to-right from the high/low elements of the second vector.
3951 The output of interleave_high will be: 0 4 1 5
3952 and of interleave_low: 2 6 3 7
3955 The permutation is done in log LENGTH stages. In each stage interleave_high
3956 and interleave_low stmts are created for each pair of vectors in DR_CHAIN,
3957 where the first argument is taken from the first half of DR_CHAIN and the
3958 second argument from it's second half.
3961 I1: interleave_high (1st vec, 3rd vec)
3962 I2: interleave_low (1st vec, 3rd vec)
3963 I3: interleave_high (2nd vec, 4th vec)
3964 I4: interleave_low (2nd vec, 4th vec)
3966 The output for the first stage is:
3968 I1: 0 16 1 17 2 18 3 19
3969 I2: 4 20 5 21 6 22 7 23
3970 I3: 8 24 9 25 10 26 11 27
3971 I4: 12 28 13 29 14 30 15 31
3973 The output of the second stage, i.e. the final result is:
3975 I1: 0 8 16 24 1 9 17 25
3976 I2: 2 10 18 26 3 11 19 27
3977 I3: 4 12 20 28 5 13 21 30
3978 I4: 6 14 22 30 7 15 23 31. */
3981 vect_permute_store_chain (vec
<tree
> dr_chain
,
3982 unsigned int length
,
3984 gimple_stmt_iterator
*gsi
,
3985 vec
<tree
> *result_chain
)
3987 tree vect1
, vect2
, high
, low
;
3989 tree vectype
= STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt
));
3990 tree perm_mask_low
, perm_mask_high
;
3992 unsigned int j
, nelt
= TYPE_VECTOR_SUBPARTS (vectype
);
3993 unsigned char *sel
= XALLOCAVEC (unsigned char, nelt
);
3995 result_chain
->quick_grow (length
);
3996 memcpy (result_chain
->address (), dr_chain
.address (),
3997 length
* sizeof (tree
));
3999 for (i
= 0, n
= nelt
/ 2; i
< n
; i
++)
4002 sel
[i
* 2 + 1] = i
+ nelt
;
4004 perm_mask_high
= vect_gen_perm_mask (vectype
, sel
);
4005 gcc_assert (perm_mask_high
!= NULL
);
4007 for (i
= 0; i
< nelt
; i
++)
4009 perm_mask_low
= vect_gen_perm_mask (vectype
, sel
);
4010 gcc_assert (perm_mask_low
!= NULL
);
4012 for (i
= 0, n
= exact_log2 (length
); i
< n
; i
++)
4014 for (j
= 0; j
< length
/2; j
++)
4016 vect1
= dr_chain
[j
];
4017 vect2
= dr_chain
[j
+length
/2];
4019 /* Create interleaving stmt:
4020 high = VEC_PERM_EXPR <vect1, vect2, {0, nelt, 1, nelt+1, ...}> */
4021 high
= make_temp_ssa_name (vectype
, NULL
, "vect_inter_high");
4023 = gimple_build_assign_with_ops (VEC_PERM_EXPR
, high
,
4024 vect1
, vect2
, perm_mask_high
);
4025 vect_finish_stmt_generation (stmt
, perm_stmt
, gsi
);
4026 (*result_chain
)[2*j
] = high
;
4028 /* Create interleaving stmt:
4029 low = VEC_PERM_EXPR <vect1, vect2, {nelt/2, nelt*3/2, nelt/2+1,
4030 nelt*3/2+1, ...}> */
4031 low
= make_temp_ssa_name (vectype
, NULL
, "vect_inter_low");
4033 = gimple_build_assign_with_ops (VEC_PERM_EXPR
, low
,
4034 vect1
, vect2
, perm_mask_low
);
4035 vect_finish_stmt_generation (stmt
, perm_stmt
, gsi
);
4036 (*result_chain
)[2*j
+1] = low
;
4038 memcpy (dr_chain
.address (), result_chain
->address (),
4039 length
* sizeof (tree
));
4043 /* Function vect_setup_realignment
4045 This function is called when vectorizing an unaligned load using
4046 the dr_explicit_realign[_optimized] scheme.
4047 This function generates the following code at the loop prolog:
4050 x msq_init = *(floor(p)); # prolog load
4051 realignment_token = call target_builtin;
4053 x msq = phi (msq_init, ---)
4055 The stmts marked with x are generated only for the case of
4056 dr_explicit_realign_optimized.
4058 The code above sets up a new (vector) pointer, pointing to the first
4059 location accessed by STMT, and a "floor-aligned" load using that pointer.
4060 It also generates code to compute the "realignment-token" (if the relevant
4061 target hook was defined), and creates a phi-node at the loop-header bb
4062 whose arguments are the result of the prolog-load (created by this
4063 function) and the result of a load that takes place in the loop (to be
4064 created by the caller to this function).
4066 For the case of dr_explicit_realign_optimized:
4067 The caller to this function uses the phi-result (msq) to create the
4068 realignment code inside the loop, and sets up the missing phi argument,
4071 msq = phi (msq_init, lsq)
4072 lsq = *(floor(p')); # load in loop
4073 result = realign_load (msq, lsq, realignment_token);
4075 For the case of dr_explicit_realign:
4077 msq = *(floor(p)); # load in loop
4079 lsq = *(floor(p')); # load in loop
4080 result = realign_load (msq, lsq, realignment_token);
4083 STMT - (scalar) load stmt to be vectorized. This load accesses
4084 a memory location that may be unaligned.
4085 BSI - place where new code is to be inserted.
4086 ALIGNMENT_SUPPORT_SCHEME - which of the two misalignment handling schemes
4090 REALIGNMENT_TOKEN - the result of a call to the builtin_mask_for_load
4091 target hook, if defined.
4092 Return value - the result of the loop-header phi node. */
4095 vect_setup_realignment (gimple stmt
, gimple_stmt_iterator
*gsi
,
4096 tree
*realignment_token
,
4097 enum dr_alignment_support alignment_support_scheme
,
4099 struct loop
**at_loop
)
4101 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
4102 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
4103 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
4104 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
4105 struct loop
*loop
= NULL
;
4107 tree scalar_dest
= gimple_assign_lhs (stmt
);
4114 tree msq_init
= NULL_TREE
;
4117 tree msq
= NULL_TREE
;
4118 gimple_seq stmts
= NULL
;
4120 bool compute_in_loop
= false;
4121 bool nested_in_vect_loop
= false;
4122 struct loop
*containing_loop
= (gimple_bb (stmt
))->loop_father
;
4123 struct loop
*loop_for_initial_load
= NULL
;
4127 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
4128 nested_in_vect_loop
= nested_in_vect_loop_p (loop
, stmt
);
4131 gcc_assert (alignment_support_scheme
== dr_explicit_realign
4132 || alignment_support_scheme
== dr_explicit_realign_optimized
);
4134 /* We need to generate three things:
4135 1. the misalignment computation
4136 2. the extra vector load (for the optimized realignment scheme).
4137 3. the phi node for the two vectors from which the realignment is
4138 done (for the optimized realignment scheme). */
4140 /* 1. Determine where to generate the misalignment computation.
4142 If INIT_ADDR is NULL_TREE, this indicates that the misalignment
4143 calculation will be generated by this function, outside the loop (in the
4144 preheader). Otherwise, INIT_ADDR had already been computed for us by the
4145 caller, inside the loop.
4147 Background: If the misalignment remains fixed throughout the iterations of
4148 the loop, then both realignment schemes are applicable, and also the
4149 misalignment computation can be done outside LOOP. This is because we are
4150 vectorizing LOOP, and so the memory accesses in LOOP advance in steps that
4151 are a multiple of VS (the Vector Size), and therefore the misalignment in
4152 different vectorized LOOP iterations is always the same.
4153 The problem arises only if the memory access is in an inner-loop nested
4154 inside LOOP, which is now being vectorized using outer-loop vectorization.
4155 This is the only case when the misalignment of the memory access may not
4156 remain fixed throughout the iterations of the inner-loop (as explained in
4157 detail in vect_supportable_dr_alignment). In this case, not only is the
4158 optimized realignment scheme not applicable, but also the misalignment
4159 computation (and generation of the realignment token that is passed to
4160 REALIGN_LOAD) have to be done inside the loop.
4162 In short, INIT_ADDR indicates whether we are in a COMPUTE_IN_LOOP mode
4163 or not, which in turn determines if the misalignment is computed inside
4164 the inner-loop, or outside LOOP. */
4166 if (init_addr
!= NULL_TREE
|| !loop_vinfo
)
4168 compute_in_loop
= true;
4169 gcc_assert (alignment_support_scheme
== dr_explicit_realign
);
4173 /* 2. Determine where to generate the extra vector load.
4175 For the optimized realignment scheme, instead of generating two vector
4176 loads in each iteration, we generate a single extra vector load in the
4177 preheader of the loop, and in each iteration reuse the result of the
4178 vector load from the previous iteration. In case the memory access is in
4179 an inner-loop nested inside LOOP, which is now being vectorized using
4180 outer-loop vectorization, we need to determine whether this initial vector
4181 load should be generated at the preheader of the inner-loop, or can be
4182 generated at the preheader of LOOP. If the memory access has no evolution
4183 in LOOP, it can be generated in the preheader of LOOP. Otherwise, it has
4184 to be generated inside LOOP (in the preheader of the inner-loop). */
4186 if (nested_in_vect_loop
)
4188 tree outerloop_step
= STMT_VINFO_DR_STEP (stmt_info
);
4189 bool invariant_in_outerloop
=
4190 (tree_int_cst_compare (outerloop_step
, size_zero_node
) == 0);
4191 loop_for_initial_load
= (invariant_in_outerloop
? loop
: loop
->inner
);
4194 loop_for_initial_load
= loop
;
4196 *at_loop
= loop_for_initial_load
;
4198 if (loop_for_initial_load
)
4199 pe
= loop_preheader_edge (loop_for_initial_load
);
4201 /* 3. For the case of the optimized realignment, create the first vector
4202 load at the loop preheader. */
4204 if (alignment_support_scheme
== dr_explicit_realign_optimized
)
4206 /* Create msq_init = *(floor(p1)) in the loop preheader */
4208 gcc_assert (!compute_in_loop
);
4209 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
4210 ptr
= vect_create_data_ref_ptr (stmt
, vectype
, loop_for_initial_load
,
4211 NULL_TREE
, &init_addr
, NULL
, &inc
,
4213 new_temp
= copy_ssa_name (ptr
, NULL
);
4214 new_stmt
= gimple_build_assign_with_ops
4215 (BIT_AND_EXPR
, new_temp
, ptr
,
4216 build_int_cst (TREE_TYPE (ptr
),
4217 -(HOST_WIDE_INT
)TYPE_ALIGN_UNIT (vectype
)));
4218 new_bb
= gsi_insert_on_edge_immediate (pe
, new_stmt
);
4219 gcc_assert (!new_bb
);
4221 = build2 (MEM_REF
, TREE_TYPE (vec_dest
), new_temp
,
4222 build_int_cst (reference_alias_ptr_type (DR_REF (dr
)), 0));
4223 new_stmt
= gimple_build_assign (vec_dest
, data_ref
);
4224 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4225 gimple_assign_set_lhs (new_stmt
, new_temp
);
4228 new_bb
= gsi_insert_on_edge_immediate (pe
, new_stmt
);
4229 gcc_assert (!new_bb
);
4232 gsi_insert_before (gsi
, new_stmt
, GSI_SAME_STMT
);
4234 msq_init
= gimple_assign_lhs (new_stmt
);
4237 /* 4. Create realignment token using a target builtin, if available.
4238 It is done either inside the containing loop, or before LOOP (as
4239 determined above). */
4241 if (targetm
.vectorize
.builtin_mask_for_load
)
4245 /* Compute INIT_ADDR - the initial addressed accessed by this memref. */
4248 /* Generate the INIT_ADDR computation outside LOOP. */
4249 init_addr
= vect_create_addr_base_for_vector_ref (stmt
, &stmts
,
4253 pe
= loop_preheader_edge (loop
);
4254 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, stmts
);
4255 gcc_assert (!new_bb
);
4258 gsi_insert_seq_before (gsi
, stmts
, GSI_SAME_STMT
);
4261 builtin_decl
= targetm
.vectorize
.builtin_mask_for_load ();
4262 new_stmt
= gimple_build_call (builtin_decl
, 1, init_addr
);
4264 vect_create_destination_var (scalar_dest
,
4265 gimple_call_return_type (new_stmt
));
4266 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4267 gimple_call_set_lhs (new_stmt
, new_temp
);
4269 if (compute_in_loop
)
4270 gsi_insert_before (gsi
, new_stmt
, GSI_SAME_STMT
);
4273 /* Generate the misalignment computation outside LOOP. */
4274 pe
= loop_preheader_edge (loop
);
4275 new_bb
= gsi_insert_on_edge_immediate (pe
, new_stmt
);
4276 gcc_assert (!new_bb
);
4279 *realignment_token
= gimple_call_lhs (new_stmt
);
4281 /* The result of the CALL_EXPR to this builtin is determined from
4282 the value of the parameter and no global variables are touched
4283 which makes the builtin a "const" function. Requiring the
4284 builtin to have the "const" attribute makes it unnecessary
4285 to call mark_call_clobbered. */
4286 gcc_assert (TREE_READONLY (builtin_decl
));
4289 if (alignment_support_scheme
== dr_explicit_realign
)
4292 gcc_assert (!compute_in_loop
);
4293 gcc_assert (alignment_support_scheme
== dr_explicit_realign_optimized
);
4296 /* 5. Create msq = phi <msq_init, lsq> in loop */
4298 pe
= loop_preheader_edge (containing_loop
);
4299 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
4300 msq
= make_ssa_name (vec_dest
, NULL
);
4301 phi_stmt
= create_phi_node (msq
, containing_loop
->header
);
4302 add_phi_arg (phi_stmt
, msq_init
, pe
, UNKNOWN_LOCATION
);
4308 /* Function vect_grouped_load_supported.
4310 Returns TRUE if even and odd permutations are supported,
4311 and FALSE otherwise. */
4314 vect_grouped_load_supported (tree vectype
, unsigned HOST_WIDE_INT count
)
4316 enum machine_mode mode
= TYPE_MODE (vectype
);
4318 /* vect_permute_load_chain requires the group size to be a power of two. */
4319 if (exact_log2 (count
) == -1)
4321 if (dump_enabled_p ())
4322 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4323 "the size of the group of accesses"
4324 " is not a power of 2");
4328 /* Check that the permutation is supported. */
4329 if (VECTOR_MODE_P (mode
))
4331 unsigned int i
, nelt
= GET_MODE_NUNITS (mode
);
4332 unsigned char *sel
= XALLOCAVEC (unsigned char, nelt
);
4334 for (i
= 0; i
< nelt
; i
++)
4336 if (can_vec_perm_p (mode
, false, sel
))
4338 for (i
= 0; i
< nelt
; i
++)
4340 if (can_vec_perm_p (mode
, false, sel
))
4345 if (dump_enabled_p ())
4346 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4347 "extract even/odd not supported by target");
4351 /* Return TRUE if vec_load_lanes is available for COUNT vectors of
4355 vect_load_lanes_supported (tree vectype
, unsigned HOST_WIDE_INT count
)
4357 return vect_lanes_optab_supported_p ("vec_load_lanes",
4358 vec_load_lanes_optab
,
4362 /* Function vect_permute_load_chain.
4364 Given a chain of interleaved loads in DR_CHAIN of LENGTH that must be
4365 a power of 2, generate extract_even/odd stmts to reorder the input data
4366 correctly. Return the final references for loads in RESULT_CHAIN.
4368 E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
4369 The input is 4 vectors each containing 8 elements. We assign a number to each
4370 element, the input sequence is:
4372 1st vec: 0 1 2 3 4 5 6 7
4373 2nd vec: 8 9 10 11 12 13 14 15
4374 3rd vec: 16 17 18 19 20 21 22 23
4375 4th vec: 24 25 26 27 28 29 30 31
4377 The output sequence should be:
4379 1st vec: 0 4 8 12 16 20 24 28
4380 2nd vec: 1 5 9 13 17 21 25 29
4381 3rd vec: 2 6 10 14 18 22 26 30
4382 4th vec: 3 7 11 15 19 23 27 31
4384 i.e., the first output vector should contain the first elements of each
4385 interleaving group, etc.
4387 We use extract_even/odd instructions to create such output. The input of
4388 each extract_even/odd operation is two vectors
4392 and the output is the vector of extracted even/odd elements. The output of
4393 extract_even will be: 0 2 4 6
4394 and of extract_odd: 1 3 5 7
4397 The permutation is done in log LENGTH stages. In each stage extract_even
4398 and extract_odd stmts are created for each pair of vectors in DR_CHAIN in
4399 their order. In our example,
4401 E1: extract_even (1st vec, 2nd vec)
4402 E2: extract_odd (1st vec, 2nd vec)
4403 E3: extract_even (3rd vec, 4th vec)
4404 E4: extract_odd (3rd vec, 4th vec)
4406 The output for the first stage will be:
4408 E1: 0 2 4 6 8 10 12 14
4409 E2: 1 3 5 7 9 11 13 15
4410 E3: 16 18 20 22 24 26 28 30
4411 E4: 17 19 21 23 25 27 29 31
4413 In order to proceed and create the correct sequence for the next stage (or
4414 for the correct output, if the second stage is the last one, as in our
4415 example), we first put the output of extract_even operation and then the
4416 output of extract_odd in RESULT_CHAIN (which is then copied to DR_CHAIN).
4417 The input for the second stage is:
4419 1st vec (E1): 0 2 4 6 8 10 12 14
4420 2nd vec (E3): 16 18 20 22 24 26 28 30
4421 3rd vec (E2): 1 3 5 7 9 11 13 15
4422 4th vec (E4): 17 19 21 23 25 27 29 31
4424 The output of the second stage:
4426 E1: 0 4 8 12 16 20 24 28
4427 E2: 2 6 10 14 18 22 26 30
4428 E3: 1 5 9 13 17 21 25 29
4429 E4: 3 7 11 15 19 23 27 31
4431 And RESULT_CHAIN after reordering:
4433 1st vec (E1): 0 4 8 12 16 20 24 28
4434 2nd vec (E3): 1 5 9 13 17 21 25 29
4435 3rd vec (E2): 2 6 10 14 18 22 26 30
4436 4th vec (E4): 3 7 11 15 19 23 27 31. */
4439 vect_permute_load_chain (vec
<tree
> dr_chain
,
4440 unsigned int length
,
4442 gimple_stmt_iterator
*gsi
,
4443 vec
<tree
> *result_chain
)
4445 tree data_ref
, first_vect
, second_vect
;
4446 tree perm_mask_even
, perm_mask_odd
;
4448 tree vectype
= STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt
));
4449 unsigned int i
, j
, log_length
= exact_log2 (length
);
4450 unsigned nelt
= TYPE_VECTOR_SUBPARTS (vectype
);
4451 unsigned char *sel
= XALLOCAVEC (unsigned char, nelt
);
4453 result_chain
->quick_grow (length
);
4454 memcpy (result_chain
->address (), dr_chain
.address (),
4455 length
* sizeof (tree
));
4457 for (i
= 0; i
< nelt
; ++i
)
4459 perm_mask_even
= vect_gen_perm_mask (vectype
, sel
);
4460 gcc_assert (perm_mask_even
!= NULL
);
4462 for (i
= 0; i
< nelt
; ++i
)
4464 perm_mask_odd
= vect_gen_perm_mask (vectype
, sel
);
4465 gcc_assert (perm_mask_odd
!= NULL
);
4467 for (i
= 0; i
< log_length
; i
++)
4469 for (j
= 0; j
< length
; j
+= 2)
4471 first_vect
= dr_chain
[j
];
4472 second_vect
= dr_chain
[j
+1];
4474 /* data_ref = permute_even (first_data_ref, second_data_ref); */
4475 data_ref
= make_temp_ssa_name (vectype
, NULL
, "vect_perm_even");
4476 perm_stmt
= gimple_build_assign_with_ops (VEC_PERM_EXPR
, data_ref
,
4477 first_vect
, second_vect
,
4479 vect_finish_stmt_generation (stmt
, perm_stmt
, gsi
);
4480 (*result_chain
)[j
/2] = data_ref
;
4482 /* data_ref = permute_odd (first_data_ref, second_data_ref); */
4483 data_ref
= make_temp_ssa_name (vectype
, NULL
, "vect_perm_odd");
4484 perm_stmt
= gimple_build_assign_with_ops (VEC_PERM_EXPR
, data_ref
,
4485 first_vect
, second_vect
,
4487 vect_finish_stmt_generation (stmt
, perm_stmt
, gsi
);
4488 (*result_chain
)[j
/2+length
/2] = data_ref
;
4490 memcpy (dr_chain
.address (), result_chain
->address (),
4491 length
* sizeof (tree
));
4496 /* Function vect_transform_grouped_load.
4498 Given a chain of input interleaved data-refs (in DR_CHAIN), build statements
4499 to perform their permutation and ascribe the result vectorized statements to
4500 the scalar statements.
4504 vect_transform_grouped_load (gimple stmt
, vec
<tree
> dr_chain
, int size
,
4505 gimple_stmt_iterator
*gsi
)
4507 vec
<tree
> result_chain
= vNULL
;
4509 /* DR_CHAIN contains input data-refs that are a part of the interleaving.
4510 RESULT_CHAIN is the output of vect_permute_load_chain, it contains permuted
4511 vectors, that are ready for vector computation. */
4512 result_chain
.create (size
);
4513 vect_permute_load_chain (dr_chain
, size
, stmt
, gsi
, &result_chain
);
4514 vect_record_grouped_load_vectors (stmt
, result_chain
);
4515 result_chain
.release ();
4518 /* RESULT_CHAIN contains the output of a group of grouped loads that were
4519 generated as part of the vectorization of STMT. Assign the statement
4520 for each vector to the associated scalar statement. */
4523 vect_record_grouped_load_vectors (gimple stmt
, vec
<tree
> result_chain
)
4525 gimple first_stmt
= GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
));
4526 gimple next_stmt
, new_stmt
;
4527 unsigned int i
, gap_count
;
4530 /* Put a permuted data-ref in the VECTORIZED_STMT field.
4531 Since we scan the chain starting from it's first node, their order
4532 corresponds the order of data-refs in RESULT_CHAIN. */
4533 next_stmt
= first_stmt
;
4535 FOR_EACH_VEC_ELT (result_chain
, i
, tmp_data_ref
)
4540 /* Skip the gaps. Loads created for the gaps will be removed by dead
4541 code elimination pass later. No need to check for the first stmt in
4542 the group, since it always exists.
4543 GROUP_GAP is the number of steps in elements from the previous
4544 access (if there is no gap GROUP_GAP is 1). We skip loads that
4545 correspond to the gaps. */
4546 if (next_stmt
!= first_stmt
4547 && gap_count
< GROUP_GAP (vinfo_for_stmt (next_stmt
)))
4555 new_stmt
= SSA_NAME_DEF_STMT (tmp_data_ref
);
4556 /* We assume that if VEC_STMT is not NULL, this is a case of multiple
4557 copies, and we put the new vector statement in the first available
4559 if (!STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt
)))
4560 STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt
)) = new_stmt
;
4563 if (!GROUP_SAME_DR_STMT (vinfo_for_stmt (next_stmt
)))
4566 STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt
));
4568 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (prev_stmt
));
4571 prev_stmt
= rel_stmt
;
4573 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (rel_stmt
));
4576 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (prev_stmt
)) =
4581 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
4583 /* If NEXT_STMT accesses the same DR as the previous statement,
4584 put the same TMP_DATA_REF as its vectorized statement; otherwise
4585 get the next data-ref from RESULT_CHAIN. */
4586 if (!next_stmt
|| !GROUP_SAME_DR_STMT (vinfo_for_stmt (next_stmt
)))
4592 /* Function vect_force_dr_alignment_p.
4594 Returns whether the alignment of a DECL can be forced to be aligned
4595 on ALIGNMENT bit boundary. */
4598 vect_can_force_dr_alignment_p (const_tree decl
, unsigned int alignment
)
4600 if (TREE_CODE (decl
) != VAR_DECL
)
4603 /* We cannot change alignment of common or external symbols as another
4604 translation unit may contain a definition with lower alignment.
4605 The rules of common symbol linking mean that the definition
4606 will override the common symbol. The same is true for constant
4607 pool entries which may be shared and are not properly merged
4609 if (DECL_EXTERNAL (decl
)
4610 || DECL_COMMON (decl
)
4611 || DECL_IN_CONSTANT_POOL (decl
))
4614 if (TREE_ASM_WRITTEN (decl
))
4617 /* Do not override the alignment as specified by the ABI when the used
4618 attribute is set. */
4619 if (DECL_PRESERVE_P (decl
))
4622 /* Do not override explicit alignment set by the user when an explicit
4623 section name is also used. This is a common idiom used by many
4624 software projects. */
4625 if (DECL_SECTION_NAME (decl
) != NULL_TREE
4626 && !DECL_HAS_IMPLICIT_SECTION_NAME_P (decl
))
4629 if (TREE_STATIC (decl
))
4630 return (alignment
<= MAX_OFILE_ALIGNMENT
);
4632 return (alignment
<= MAX_STACK_ALIGNMENT
);
4636 /* Return whether the data reference DR is supported with respect to its
4638 If CHECK_ALIGNED_ACCESSES is TRUE, check if the access is supported even
4639 it is aligned, i.e., check if it is possible to vectorize it with different
4642 enum dr_alignment_support
4643 vect_supportable_dr_alignment (struct data_reference
*dr
,
4644 bool check_aligned_accesses
)
4646 gimple stmt
= DR_STMT (dr
);
4647 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
4648 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
4649 enum machine_mode mode
= TYPE_MODE (vectype
);
4650 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
4651 struct loop
*vect_loop
= NULL
;
4652 bool nested_in_vect_loop
= false;
4654 if (aligned_access_p (dr
) && !check_aligned_accesses
)
4659 vect_loop
= LOOP_VINFO_LOOP (loop_vinfo
);
4660 nested_in_vect_loop
= nested_in_vect_loop_p (vect_loop
, stmt
);
4663 /* Possibly unaligned access. */
4665 /* We can choose between using the implicit realignment scheme (generating
4666 a misaligned_move stmt) and the explicit realignment scheme (generating
4667 aligned loads with a REALIGN_LOAD). There are two variants to the
4668 explicit realignment scheme: optimized, and unoptimized.
4669 We can optimize the realignment only if the step between consecutive
4670 vector loads is equal to the vector size. Since the vector memory
4671 accesses advance in steps of VS (Vector Size) in the vectorized loop, it
4672 is guaranteed that the misalignment amount remains the same throughout the
4673 execution of the vectorized loop. Therefore, we can create the
4674 "realignment token" (the permutation mask that is passed to REALIGN_LOAD)
4675 at the loop preheader.
4677 However, in the case of outer-loop vectorization, when vectorizing a
4678 memory access in the inner-loop nested within the LOOP that is now being
4679 vectorized, while it is guaranteed that the misalignment of the
4680 vectorized memory access will remain the same in different outer-loop
4681 iterations, it is *not* guaranteed that is will remain the same throughout
4682 the execution of the inner-loop. This is because the inner-loop advances
4683 with the original scalar step (and not in steps of VS). If the inner-loop
4684 step happens to be a multiple of VS, then the misalignment remains fixed
4685 and we can use the optimized realignment scheme. For example:
4691 When vectorizing the i-loop in the above example, the step between
4692 consecutive vector loads is 1, and so the misalignment does not remain
4693 fixed across the execution of the inner-loop, and the realignment cannot
4694 be optimized (as illustrated in the following pseudo vectorized loop):
4696 for (i=0; i<N; i+=4)
4697 for (j=0; j<M; j++){
4698 vs += vp[i+j]; // misalignment of &vp[i+j] is {0,1,2,3,0,1,2,3,...}
4699 // when j is {0,1,2,3,4,5,6,7,...} respectively.
4700 // (assuming that we start from an aligned address).
4703 We therefore have to use the unoptimized realignment scheme:
4705 for (i=0; i<N; i+=4)
4706 for (j=k; j<M; j+=4)
4707 vs += vp[i+j]; // misalignment of &vp[i+j] is always k (assuming
4708 // that the misalignment of the initial address is
4711 The loop can then be vectorized as follows:
4713 for (k=0; k<4; k++){
4714 rt = get_realignment_token (&vp[k]);
4715 for (i=0; i<N; i+=4){
4717 for (j=k; j<M; j+=4){
4719 va = REALIGN_LOAD <v1,v2,rt>;
4726 if (DR_IS_READ (dr
))
4728 bool is_packed
= false;
4729 tree type
= (TREE_TYPE (DR_REF (dr
)));
4731 if (optab_handler (vec_realign_load_optab
, mode
) != CODE_FOR_nothing
4732 && (!targetm
.vectorize
.builtin_mask_for_load
4733 || targetm
.vectorize
.builtin_mask_for_load ()))
4735 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
4736 if ((nested_in_vect_loop
4737 && (TREE_INT_CST_LOW (DR_STEP (dr
))
4738 != GET_MODE_SIZE (TYPE_MODE (vectype
))))
4740 return dr_explicit_realign
;
4742 return dr_explicit_realign_optimized
;
4744 if (!known_alignment_for_access_p (dr
))
4745 is_packed
= not_size_aligned (DR_REF (dr
));
4747 if (targetm
.vectorize
.
4748 support_vector_misalignment (mode
, type
,
4749 DR_MISALIGNMENT (dr
), is_packed
))
4750 /* Can't software pipeline the loads, but can at least do them. */
4751 return dr_unaligned_supported
;
4755 bool is_packed
= false;
4756 tree type
= (TREE_TYPE (DR_REF (dr
)));
4758 if (!known_alignment_for_access_p (dr
))
4759 is_packed
= not_size_aligned (DR_REF (dr
));
4761 if (targetm
.vectorize
.
4762 support_vector_misalignment (mode
, type
,
4763 DR_MISALIGNMENT (dr
), is_packed
))
4764 return dr_unaligned_supported
;
4768 return dr_unaligned_unsupported
;