2 Copyright (C) 2003, 2004 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to the Free
19 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
22 /* Loop Vectorization Pass.
24 This pass tries to vectorize loops. This first implementation focuses on
25 simple inner-most loops, with no conditional control flow, and a set of
26 simple operations which vector form can be expressed using existing
27 tree codes (PLUS, MULT etc).
29 For example, the vectorizer transforms the following simple loop:
31 short a[N]; short b[N]; short c[N]; int i;
37 as if it was manually vectorized by rewriting the source code into:
39 typedef int __attribute__((mode(V8HI))) v8hi;
40 short a[N]; short b[N]; short c[N]; int i;
41 v8hi *pa = (v8hi*)a, *pb = (v8hi*)b, *pc = (v8hi*)c;
44 for (i=0; i<N/8; i++){
51 The main entry to this pass is vectorize_loops(), in which
52 the vectorizer applies a set of analyses on a given set of loops,
53 followed by the actual vectorization transformation for the loops that
54 had successfully passed the analysis phase.
56 Throughout this pass we make a distinction between two types of
57 data: scalars (which are represented by SSA_NAMES), and memory references
58 ("data-refs"). These two types of data require different handling both
59 during analysis and transformation. The types of data-refs that the
60 vectorizer currently supports are ARRAY_REFS which base is an array DECL
61 (not a pointer), and INDIRECT_REFS through pointers; both array and pointer
62 accesses are required to have a simple (consecutive) access pattern.
66 The driver for the analysis phase is vect_analyze_loop_nest().
67 It applies a set of analyses, some of which rely on the scalar evolution
68 analyzer (scev) developed by Sebastian Pop.
70 During the analysis phase the vectorizer records some information
71 per stmt in a "stmt_vec_info" struct which is attached to each stmt in the
72 loop, as well as general information about the loop as a whole, which is
73 recorded in a "loop_vec_info" struct attached to each loop.
77 The loop transformation phase scans all the stmts in the loop, and
78 creates a vector stmt (or a sequence of stmts) for each scalar stmt S in
79 the loop that needs to be vectorized. It insert the vector code sequence
80 just before the scalar stmt S, and records a pointer to the vector code
81 in STMT_VINFO_VEC_STMT (stmt_info) (stmt_info is the stmt_vec_info struct
82 attached to S). This pointer will be used for the vectorization of following
83 stmts which use the def of stmt S. Stmt S is removed if it writes to memory;
84 otherwise, we rely on dead code elimination for removing it.
86 For example, say stmt S1 was vectorized into stmt VS1:
89 S1: b = x[i]; STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
92 To vectorize stmt S2, the vectorizer first finds the stmt that defines
93 the operand 'b' (S1), and gets the relevant vector def 'vb' from the
94 vector stmt VS1 pointed by STMT_VINFO_VEC_STMT (stmt_info (S1)). The
95 resulting sequence would be:
98 S1: b = x[i]; STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
100 S2: a = b; STMT_VINFO_VEC_STMT (stmt_info (S2)) = VS2
102 Operands that are not SSA_NAMEs, are data-refs that appear in
103 load/store operations (like 'x[i]' in S1), and are handled differently.
107 Currently the only target specific information that is used is the
108 size of the vector (in bytes) - "UNITS_PER_SIMD_WORD". Targets that can
109 support different sizes of vectors, for now will need to specify one value
110 for "UNITS_PER_SIMD_WORD". More flexibility will be added in the future.
112 Since we only vectorize operations which vector form can be
113 expressed using existing tree codes, to verify that an operation is
114 supported, the vectorizer checks the relevant optab at the relevant
115 machine_mode (e.g, add_optab->handlers[(int) V8HImode].insn_code). If
116 the value found is CODE_FOR_nothing, then there's no target support, and
117 we can't vectorize the stmt.
119 For additional information on this project see:
120 http://gcc.gnu.org/projects/tree-ssa/vectorization.html
125 #include "coretypes.h"
133 #include "basic-block.h"
134 #include "diagnostic.h"
135 #include "tree-flow.h"
136 #include "tree-dump.h"
139 #include "cfglayout.h"
143 #include "tree-chrec.h"
144 #include "tree-data-ref.h"
145 #include "tree-scalar-evolution.h"
146 #include "tree-vectorizer.h"
147 #include "tree-pass.h"
148 #include "langhooks.h"
151 /*************************************************************************
152 Simple Loop Peeling Utilities
153 *************************************************************************/
155 /* Entry point for peeling of simple loops.
156 Peel the first/last iterations of a loop.
157 It can be used outside of the vectorizer for loops that are simple enough
158 (see function documentation). In the vectorizer it is used to peel the
159 last few iterations when the loop bound is unknown or does not evenly
160 divide by the vectorization factor, and to peel the first few iterations
161 to force the alignment of data references in the loop. */
162 struct loop
*slpeel_tree_peel_loop_to_edge
163 (struct loop
*, struct loops
*, edge
, tree
, tree
, bool);
164 static struct loop
*slpeel_tree_duplicate_loop_to_edge_cfg
165 (struct loop
*, struct loops
*, edge
);
166 static void slpeel_update_phis_for_duplicate_loop
167 (struct loop
*, struct loop
*, bool after
);
168 static void slpeel_update_phi_nodes_for_guard (edge
, struct loop
*, bool, bool);
169 static void slpeel_make_loop_iterate_ntimes (struct loop
*, tree
);
170 static edge
slpeel_add_loop_guard (basic_block
, tree
, basic_block
, basic_block
);
171 static bool slpeel_can_duplicate_loop_p (struct loop
*, edge
);
172 static void allocate_new_names (bitmap
);
173 static void rename_use_op (use_operand_p
);
174 static void rename_def_op (def_operand_p
, tree
);
175 static void rename_variables_in_bb (basic_block
);
176 static void free_new_names (bitmap
);
177 static void rename_variables_in_loop (struct loop
*);
178 #ifdef ENABLE_CHECKING
179 static void slpeel_verify_cfg_after_peeling (struct loop
*, struct loop
*);
183 /*************************************************************************
184 Vectorization Utilities.
185 *************************************************************************/
187 /* Main analysis functions. */
188 static loop_vec_info
vect_analyze_loop (struct loop
*);
189 static loop_vec_info
vect_analyze_loop_form (struct loop
*);
190 static bool vect_analyze_data_refs (loop_vec_info
);
191 static bool vect_mark_stmts_to_be_vectorized (loop_vec_info
);
192 static bool vect_analyze_scalar_cycles (loop_vec_info
);
193 static bool vect_analyze_data_ref_accesses (loop_vec_info
);
194 static bool vect_analyze_data_refs_alignment (loop_vec_info
);
195 static bool vect_compute_data_refs_alignment (loop_vec_info
);
196 static bool vect_analyze_operations (loop_vec_info
);
198 /* Main code transformation functions. */
199 static void vect_transform_loop (loop_vec_info
, struct loops
*);
200 static bool vect_transform_stmt (tree
, block_stmt_iterator
*);
201 static bool vectorizable_load (tree
, block_stmt_iterator
*, tree
*);
202 static bool vectorizable_store (tree
, block_stmt_iterator
*, tree
*);
203 static bool vectorizable_operation (tree
, block_stmt_iterator
*, tree
*);
204 static bool vectorizable_assignment (tree
, block_stmt_iterator
*, tree
*);
205 static enum dr_alignment_support vect_supportable_dr_alignment
206 (struct data_reference
*);
207 static void vect_align_data_ref (tree
);
208 static void vect_enhance_data_refs_alignment (loop_vec_info
);
210 /* Utility functions for the analyses. */
211 static bool vect_is_simple_use (tree
, struct loop
*, tree
*);
212 static bool exist_non_indexing_operands_for_use_p (tree
, tree
);
213 static bool vect_is_simple_iv_evolution (unsigned, tree
, tree
*, tree
*, bool);
214 static void vect_mark_relevant (varray_type
*, tree
);
215 static bool vect_stmt_relevant_p (tree
, loop_vec_info
);
216 static tree
vect_get_loop_niters (struct loop
*, tree
*);
217 static bool vect_compute_data_ref_alignment
218 (struct data_reference
*, loop_vec_info
);
219 static bool vect_analyze_data_ref_access (struct data_reference
*);
220 static bool vect_get_first_index (tree
, tree
*);
221 static bool vect_can_force_dr_alignment_p (tree
, unsigned int);
222 static struct data_reference
* vect_analyze_pointer_ref_access
224 static bool vect_can_advance_ivs_p (struct loop
*);
225 static tree vect_get_base_and_bit_offset
226 (struct data_reference
*, tree
, tree
, loop_vec_info
, tree
*, bool*);
227 static struct data_reference
* vect_analyze_pointer_ref_access
229 static tree
vect_compute_array_base_alignment (tree
, tree
, tree
*, tree
*);
230 static tree vect_compute_array_ref_alignment
231 (struct data_reference
*, loop_vec_info
, tree
, tree
*);
232 static tree
vect_get_ptr_offset (tree
, tree
, tree
*);
233 static tree vect_get_symbl_and_dr
234 (tree
, tree
, bool, loop_vec_info
, struct data_reference
**);
235 static bool vect_analyze_offset_expr (tree
, struct loop
*, tree
, tree
*,
238 /* Utility functions for the code transformation. */
239 static tree
vect_create_destination_var (tree
, tree
);
240 static tree vect_create_data_ref_ptr
241 (tree
, block_stmt_iterator
*, tree
, tree
*, bool);
242 static tree vect_create_index_for_vector_ref
243 (struct loop
*, block_stmt_iterator
*);
244 static tree
vect_create_addr_base_for_vector_ref (tree
, tree
*, tree
);
245 static tree
get_vectype_for_scalar_type (tree
);
246 static tree
vect_get_new_vect_var (tree
, enum vect_var_kind
, const char *);
247 static tree
vect_get_vec_def_for_operand (tree
, tree
);
248 static tree
vect_init_vector (tree
, tree
);
249 static void vect_finish_stmt_generation
250 (tree stmt
, tree vec_stmt
, block_stmt_iterator
*bsi
);
252 /* Utility function dealing with loop peeling (not peeling itself). */
253 static void vect_generate_tmps_on_preheader
254 (loop_vec_info
, tree
*, tree
*, tree
*);
255 static tree
vect_build_loop_niters (loop_vec_info
);
256 static void vect_update_ivs_after_vectorizer (struct loop
*, tree
, edge
);
257 static tree
vect_gen_niters_for_prolog_loop (loop_vec_info
, tree
);
258 static void vect_update_inits_of_dr
259 (struct data_reference
*, struct loop
*, tree niters
);
260 static void vect_update_inits_of_drs (loop_vec_info
, tree
);
261 static void vect_do_peeling_for_alignment (loop_vec_info
, struct loops
*);
262 static void vect_do_peeling_for_loop_bound
263 (loop_vec_info
, tree
*, struct loops
*);
265 /* Utilities for creation and deletion of vec_info structs. */
266 loop_vec_info
new_loop_vec_info (struct loop
*loop
);
267 void destroy_loop_vec_info (loop_vec_info
);
268 stmt_vec_info
new_stmt_vec_info (tree stmt
, struct loop
*loop
);
270 static bool vect_debug_stats (struct loop
*loop
);
271 static bool vect_debug_details (struct loop
*loop
);
274 /*************************************************************************
275 Simple Loop Peeling Utilities
277 Utilities to support loop peeling for vectorization purposes.
278 *************************************************************************/
281 /* For each definition in DEFINITIONS this function allocates
285 allocate_new_names (bitmap definitions
)
290 EXECUTE_IF_SET_IN_BITMAP (definitions
, 0, ver
, bi
)
292 tree def
= ssa_name (ver
);
293 tree
*new_name_ptr
= xmalloc (sizeof (tree
));
295 bool abnormal
= SSA_NAME_OCCURS_IN_ABNORMAL_PHI (def
);
297 *new_name_ptr
= duplicate_ssa_name (def
, SSA_NAME_DEF_STMT (def
));
298 SSA_NAME_OCCURS_IN_ABNORMAL_PHI (*new_name_ptr
) = abnormal
;
300 SSA_NAME_AUX (def
) = new_name_ptr
;
305 /* Renames the use *OP_P. */
308 rename_use_op (use_operand_p op_p
)
312 if (TREE_CODE (USE_FROM_PTR (op_p
)) != SSA_NAME
)
315 new_name_ptr
= SSA_NAME_AUX (USE_FROM_PTR (op_p
));
317 /* Something defined outside of the loop. */
321 /* An ordinary ssa name defined in the loop. */
323 SET_USE (op_p
, *new_name_ptr
);
327 /* Renames the def *OP_P in statement STMT. */
330 rename_def_op (def_operand_p op_p
, tree stmt
)
334 if (TREE_CODE (DEF_FROM_PTR (op_p
)) != SSA_NAME
)
337 new_name_ptr
= SSA_NAME_AUX (DEF_FROM_PTR (op_p
));
339 /* Something defined outside of the loop. */
343 /* An ordinary ssa name defined in the loop. */
345 SET_DEF (op_p
, *new_name_ptr
);
346 SSA_NAME_DEF_STMT (DEF_FROM_PTR (op_p
)) = stmt
;
350 /* Renames the variables in basic block BB. */
353 rename_variables_in_bb (basic_block bb
)
356 block_stmt_iterator bsi
;
362 v_may_def_optype v_may_defs
;
363 v_must_def_optype v_must_defs
;
367 struct loop
*loop
= bb
->loop_father
;
369 for (phi
= phi_nodes (bb
); phi
; phi
= PHI_CHAIN (phi
))
370 rename_def_op (PHI_RESULT_PTR (phi
), phi
);
372 for (bsi
= bsi_start (bb
); !bsi_end_p (bsi
); bsi_next (&bsi
))
374 stmt
= bsi_stmt (bsi
);
375 get_stmt_operands (stmt
);
376 ann
= stmt_ann (stmt
);
378 uses
= USE_OPS (ann
);
379 for (i
= 0; i
< NUM_USES (uses
); i
++)
380 rename_use_op (USE_OP_PTR (uses
, i
));
382 defs
= DEF_OPS (ann
);
383 for (i
= 0; i
< NUM_DEFS (defs
); i
++)
384 rename_def_op (DEF_OP_PTR (defs
, i
), stmt
);
386 vuses
= VUSE_OPS (ann
);
387 for (i
= 0; i
< NUM_VUSES (vuses
); i
++)
388 rename_use_op (VUSE_OP_PTR (vuses
, i
));
390 v_may_defs
= V_MAY_DEF_OPS (ann
);
391 for (i
= 0; i
< NUM_V_MAY_DEFS (v_may_defs
); i
++)
393 rename_use_op (V_MAY_DEF_OP_PTR (v_may_defs
, i
));
394 rename_def_op (V_MAY_DEF_RESULT_PTR (v_may_defs
, i
), stmt
);
397 v_must_defs
= V_MUST_DEF_OPS (ann
);
398 for (i
= 0; i
< NUM_V_MUST_DEFS (v_must_defs
); i
++)
400 rename_use_op (V_MUST_DEF_KILL_PTR (v_must_defs
, i
));
401 rename_def_op (V_MUST_DEF_RESULT_PTR (v_must_defs
, i
), stmt
);
405 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
407 if (!flow_bb_inside_loop_p (loop
, e
->dest
))
409 for (phi
= phi_nodes (e
->dest
); phi
; phi
= PHI_CHAIN (phi
))
410 rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (phi
, e
));
415 /* Releases the structures holding the new ssa names. */
418 free_new_names (bitmap definitions
)
423 EXECUTE_IF_SET_IN_BITMAP (definitions
, 0, ver
, bi
)
425 tree def
= ssa_name (ver
);
427 if (SSA_NAME_AUX (def
))
429 free (SSA_NAME_AUX (def
));
430 SSA_NAME_AUX (def
) = NULL
;
436 /* Renames variables in new generated LOOP. */
439 rename_variables_in_loop (struct loop
*loop
)
444 bbs
= get_loop_body (loop
);
446 for (i
= 0; i
< loop
->num_nodes
; i
++)
447 rename_variables_in_bb (bbs
[i
]);
453 /* Update the PHI nodes of NEW_LOOP.
455 NEW_LOOP is a duplicate of ORIG_LOOP.
456 AFTER indicates whether NEW_LOOP executes before or after ORIG_LOOP:
457 AFTER is true if NEW_LOOP executes after ORIG_LOOP, and false if it
458 executes before it. */
461 slpeel_update_phis_for_duplicate_loop (struct loop
*orig_loop
,
462 struct loop
*new_loop
, bool after
)
464 tree
*new_name_ptr
, new_ssa_name
;
465 tree phi_new
, phi_orig
;
467 edge orig_loop_latch
= loop_latch_edge (orig_loop
);
468 edge orig_entry_e
= loop_preheader_edge (orig_loop
);
469 edge new_loop_exit_e
= new_loop
->exit_edges
[0];
470 edge new_loop_entry_e
= loop_preheader_edge (new_loop
);
471 edge entry_arg_e
= (after
? orig_loop_latch
: orig_entry_e
);
474 step 1. For each loop-header-phi:
475 Add the first phi argument for the phi in NEW_LOOP
476 (the one associated with the entry of NEW_LOOP)
478 step 2. For each loop-header-phi:
479 Add the second phi argument for the phi in NEW_LOOP
480 (the one associated with the latch of NEW_LOOP)
482 step 3. Update the phis in the successor block of NEW_LOOP.
484 case 1: NEW_LOOP was placed before ORIG_LOOP:
485 The successor block of NEW_LOOP is the header of ORIG_LOOP.
486 Updating the phis in the successor block can therefore be done
487 along with the scanning of the loop header phis, because the
488 header blocks of ORIG_LOOP and NEW_LOOP have exactly the same
489 phi nodes, organized in the same order.
491 case 2: NEW_LOOP was placed after ORIG_LOOP:
492 The successor block of NEW_LOOP is the original exit block of
493 ORIG_LOOP - the phis to be updated are the loop-closed-ssa phis.
494 We postpone updating these phis to a later stage (when
495 loop guards are added).
499 /* Scan the phis in the headers of the old and new loops
500 (they are organized in exactly the same order). */
502 for (phi_new
= phi_nodes (new_loop
->header
),
503 phi_orig
= phi_nodes (orig_loop
->header
);
505 phi_new
= PHI_CHAIN (phi_new
), phi_orig
= PHI_CHAIN (phi_orig
))
508 def
= PHI_ARG_DEF_FROM_EDGE (phi_orig
, entry_arg_e
);
509 add_phi_arg (phi_new
, def
, new_loop_entry_e
);
512 def
= PHI_ARG_DEF_FROM_EDGE (phi_orig
, orig_loop_latch
);
513 if (TREE_CODE (def
) != SSA_NAME
)
516 new_name_ptr
= SSA_NAME_AUX (def
);
518 /* Something defined outside of the loop. */
521 /* An ordinary ssa name defined in the loop. */
522 new_ssa_name
= *new_name_ptr
;
523 add_phi_arg (phi_new
, new_ssa_name
, loop_latch_edge (new_loop
));
525 /* step 3 (case 1). */
528 gcc_assert (new_loop_exit_e
== orig_entry_e
);
529 SET_PHI_ARG_DEF (phi_orig
,
530 phi_arg_from_edge (phi_orig
, new_loop_exit_e
),
537 /* Update PHI nodes for a guard of the LOOP.
540 - LOOP, GUARD_EDGE: LOOP is a loop for which we added guard code that
541 controls whether LOOP is to be executed. GUARD_EDGE is the edge that
542 originates from the guard-bb, skips LOOP and reaches the (unique) exit
543 bb of LOOP. This loop-exit-bb is an empty bb with one successor.
544 We denote this bb NEW_MERGE_BB because it had a single predecessor (the
545 LOOP header) before the guard code was added, and now it became a merge
546 point of two paths - the path that ends with the LOOP exit-edge, and
547 the path that ends with GUARD_EDGE.
549 This function creates and updates the relevant phi nodes to account for
550 the new incoming edge (GUARD_EDGE) into NEW_MERGE_BB:
551 1. Create phi nodes at NEW_MERGE_BB.
552 2. Update the phi nodes at the successor of NEW_MERGE_BB (denoted
553 UPDATE_BB). UPDATE_BB was the exit-bb of LOOP before NEW_MERGE_BB
556 ===> The CFG before the guard-code was added:
558 if (exit_loop) goto update_bb : LOOP_header_bb
561 ==> The CFG after the guard-code was added:
563 if (LOOP_guard_condition) goto new_merge_bb : LOOP_header_bb
565 if (exit_loop_condition) goto new_merge_bb : LOOP_header_bb
570 - ENTRY_PHIS: If ENTRY_PHIS is TRUE, this indicates that the phis in
571 UPDATE_BB are loop entry phis, like the phis in the LOOP header,
572 organized in the same order.
573 If ENTRY_PHIs is FALSE, this indicates that the phis in UPDATE_BB are
576 - IS_NEW_LOOP: TRUE if LOOP is a new loop (a duplicated copy of another
577 "original" loop). FALSE if LOOP is an original loop (not a newly
578 created copy). The SSA_NAME_AUX fields of the defs in the original
579 loop are the corresponding new ssa-names used in the new duplicated
580 loop copy. IS_NEW_LOOP indicates which of the two args of the phi
581 nodes in UPDATE_BB takes the original ssa-name, and which takes the
582 new name: If IS_NEW_LOOP is TRUE, the phi-arg that is associated with
583 the LOOP-exit-edge takes the new-name, and the phi-arg that is
584 associated with GUARD_EDGE takes the original name. If IS_NEW_LOOP is
585 FALSE, it's the other way around.
589 slpeel_update_phi_nodes_for_guard (edge guard_edge
,
594 tree orig_phi
, new_phi
, update_phi
;
595 tree guard_arg
, loop_arg
;
596 basic_block new_merge_bb
= guard_edge
->dest
;
597 edge e
= EDGE_SUCC (new_merge_bb
, 0);
598 basic_block update_bb
= e
->dest
;
599 basic_block orig_bb
= (entry_phis
? loop
->header
: update_bb
);
601 for (orig_phi
= phi_nodes (orig_bb
), update_phi
= phi_nodes (update_bb
);
602 orig_phi
&& update_phi
;
603 orig_phi
= PHI_CHAIN (orig_phi
), update_phi
= PHI_CHAIN (update_phi
))
605 /* 1. Generate new phi node in NEW_MERGE_BB: */
606 new_phi
= create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi
)),
609 /* 2. NEW_MERGE_BB has two incoming edges: GUARD_EDGE and the exit-edge
610 of LOOP. Set the two phi args in NEW_PHI for these edges: */
613 loop_arg
= PHI_ARG_DEF_FROM_EDGE (orig_phi
,
614 EDGE_SUCC (loop
->latch
, 0));
615 guard_arg
= PHI_ARG_DEF_FROM_EDGE (orig_phi
, loop
->entry_edges
[0]);
619 tree orig_def
= PHI_ARG_DEF_FROM_EDGE (orig_phi
, e
);
620 tree
*new_name_ptr
= SSA_NAME_AUX (orig_def
);
624 new_name
= *new_name_ptr
;
626 /* Something defined outside of the loop */
631 guard_arg
= orig_def
;
636 guard_arg
= new_name
;
640 add_phi_arg (new_phi
, loop_arg
, loop
->exit_edges
[0]);
641 add_phi_arg (new_phi
, guard_arg
, guard_edge
);
643 /* 3. Update phi in successor block. */
644 gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi
, e
) == loop_arg
645 || PHI_ARG_DEF_FROM_EDGE (update_phi
, e
) == guard_arg
);
646 SET_PHI_ARG_DEF (update_phi
, phi_arg_from_edge (update_phi
, e
),
647 PHI_RESULT (new_phi
));
650 set_phi_nodes (new_merge_bb
, phi_reverse (phi_nodes (new_merge_bb
)));
654 /* Make the LOOP iterate NITERS times. This is done by adding a new IV
655 that starts at zero, increases by one and its limit is NITERS.
657 Assumption: the exit-condition of LOOP is the last stmt in the loop. */
660 slpeel_make_loop_iterate_ntimes (struct loop
*loop
, tree niters
)
662 tree indx_before_incr
, indx_after_incr
, cond_stmt
, cond
;
664 edge exit_edge
= loop
->exit_edges
[0];
665 block_stmt_iterator loop_exit_bsi
= bsi_last (exit_edge
->src
);
666 tree begin_label
= tree_block_label (loop
->latch
);
667 tree exit_label
= tree_block_label (loop
->single_exit
->dest
);
668 tree init
= build_int_cst (TREE_TYPE (niters
), 0);
669 tree step
= build_int_cst (TREE_TYPE (niters
), 1);
673 orig_cond
= get_loop_exit_condition (loop
);
674 gcc_assert (orig_cond
);
675 create_iv (init
, step
, NULL_TREE
, loop
,
676 &loop_exit_bsi
, false, &indx_before_incr
, &indx_after_incr
);
678 /* CREATE_IV uses BSI_INSERT with TSI_NEW_STMT, so we want to get
679 back to the exit condition statement. */
680 bsi_next (&loop_exit_bsi
);
681 gcc_assert (bsi_stmt (loop_exit_bsi
) == orig_cond
);
683 if (exit_edge
->flags
& EDGE_TRUE_VALUE
) /* 'then' edge exits the loop. */
685 cond
= build2 (GE_EXPR
, boolean_type_node
, indx_after_incr
, niters
);
686 then_label
= build1 (GOTO_EXPR
, void_type_node
, exit_label
);
687 else_label
= build1 (GOTO_EXPR
, void_type_node
, begin_label
);
689 else /* 'then' edge loops back. */
691 cond
= build2 (LT_EXPR
, boolean_type_node
, indx_after_incr
, niters
);
692 then_label
= build1 (GOTO_EXPR
, void_type_node
, begin_label
);
693 else_label
= build1 (GOTO_EXPR
, void_type_node
, exit_label
);
696 cond_stmt
= build3 (COND_EXPR
, TREE_TYPE (orig_cond
), cond
,
697 then_label
, else_label
);
698 bsi_insert_before (&loop_exit_bsi
, cond_stmt
, BSI_SAME_STMT
);
700 /* Remove old loop exit test: */
701 bsi_remove (&loop_exit_bsi
);
703 if (vect_debug_stats (loop
) || vect_debug_details (loop
))
704 print_generic_expr (dump_file
, cond_stmt
, TDF_SLIM
);
706 loop
->nb_iterations
= niters
;
710 /* Given LOOP this function generates a new copy of it and puts it
711 on E which is either the entry or exit of LOOP. */
714 slpeel_tree_duplicate_loop_to_edge_cfg (struct loop
*loop
, struct loops
*loops
,
717 struct loop
*new_loop
;
718 basic_block
*new_bbs
, *bbs
;
721 basic_block exit_dest
;
724 at_exit
= (e
== loop
->exit_edges
[0]);
725 if (!at_exit
&& e
!= loop_preheader_edge (loop
))
727 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
728 fprintf (dump_file
, "Edge is not an entry nor an exit edge.\n");
732 bbs
= get_loop_body (loop
);
734 /* Check whether duplication is possible. */
735 if (!can_copy_bbs_p (bbs
, loop
->num_nodes
))
737 if (vect_debug_stats (loop
) || vect_debug_details (loop
))
738 fprintf (dump_file
, "Cannot copy basic blocks.\n");
743 /* Generate new loop structure. */
744 new_loop
= duplicate_loop (loops
, loop
, loop
->outer
);
747 if (vect_debug_stats (loop
) || vect_debug_details (loop
))
748 fprintf (dump_file
, "duplicate_loop returns NULL.\n");
753 exit_dest
= loop
->exit_edges
[0]->dest
;
754 was_imm_dom
= (get_immediate_dominator (CDI_DOMINATORS
,
755 exit_dest
) == loop
->header
?
758 new_bbs
= xmalloc (sizeof (basic_block
) * loop
->num_nodes
);
760 copy_bbs (bbs
, loop
->num_nodes
, new_bbs
, NULL
, 0, NULL
, NULL
);
762 /* Duplicating phi args at exit bbs as coming
763 also from exit of duplicated loop. */
764 for (phi
= phi_nodes (exit_dest
); phi
; phi
= PHI_CHAIN (phi
))
766 phi_arg
= PHI_ARG_DEF_FROM_EDGE (phi
, loop
->exit_edges
[0]);
769 edge new_loop_exit_edge
;
771 if (EDGE_SUCC (new_loop
->header
, 0)->dest
== new_loop
->latch
)
772 new_loop_exit_edge
= EDGE_SUCC (new_loop
->header
, 1);
774 new_loop_exit_edge
= EDGE_SUCC (new_loop
->header
, 0);
776 add_phi_arg (phi
, phi_arg
, new_loop_exit_edge
);
780 if (at_exit
) /* Add the loop copy at exit. */
782 redirect_edge_and_branch_force (e
, new_loop
->header
);
783 set_immediate_dominator (CDI_DOMINATORS
, new_loop
->header
, e
->src
);
785 set_immediate_dominator (CDI_DOMINATORS
, exit_dest
, new_loop
->header
);
787 else /* Add the copy at entry. */
790 edge entry_e
= loop_preheader_edge (loop
);
791 basic_block preheader
= entry_e
->src
;
793 if (!flow_bb_inside_loop_p (new_loop
,
794 EDGE_SUCC (new_loop
->header
, 0)->dest
))
795 new_exit_e
= EDGE_SUCC (new_loop
->header
, 0);
797 new_exit_e
= EDGE_SUCC (new_loop
->header
, 1);
799 redirect_edge_and_branch_force (new_exit_e
, loop
->header
);
800 set_immediate_dominator (CDI_DOMINATORS
, loop
->header
,
803 /* We have to add phi args to the loop->header here as coming
804 from new_exit_e edge. */
805 for (phi
= phi_nodes (loop
->header
); phi
; phi
= PHI_CHAIN (phi
))
807 phi_arg
= PHI_ARG_DEF_FROM_EDGE (phi
, entry_e
);
809 add_phi_arg (phi
, phi_arg
, new_exit_e
);
812 redirect_edge_and_branch_force (entry_e
, new_loop
->header
);
813 set_immediate_dominator (CDI_DOMINATORS
, new_loop
->header
, preheader
);
816 flow_loop_scan (new_loop
, LOOP_ALL
);
817 flow_loop_scan (loop
, LOOP_ALL
);
825 /* Given the condition statement COND, put it as the last statement
826 of GUARD_BB; EXIT_BB is the basic block to skip the loop;
827 Assumes that this is the single exit of the guarded loop.
828 Returns the skip edge. */
831 slpeel_add_loop_guard (basic_block guard_bb
, tree cond
, basic_block exit_bb
,
834 block_stmt_iterator bsi
;
836 tree cond_stmt
, then_label
, else_label
;
838 enter_e
= EDGE_SUCC (guard_bb
, 0);
839 enter_e
->flags
&= ~EDGE_FALLTHRU
;
840 enter_e
->flags
|= EDGE_FALSE_VALUE
;
841 bsi
= bsi_last (guard_bb
);
843 then_label
= build1 (GOTO_EXPR
, void_type_node
,
844 tree_block_label (exit_bb
));
845 else_label
= build1 (GOTO_EXPR
, void_type_node
,
846 tree_block_label (enter_e
->dest
));
847 cond_stmt
= build3 (COND_EXPR
, void_type_node
, cond
,
848 then_label
, else_label
);
849 bsi_insert_after (&bsi
, cond_stmt
, BSI_NEW_STMT
);
850 /* Add new edge to connect entry block to the second loop. */
851 new_e
= make_edge (guard_bb
, exit_bb
, EDGE_TRUE_VALUE
);
852 set_immediate_dominator (CDI_DOMINATORS
, exit_bb
, dom_bb
);
857 /* This function verifies that the following restrictions apply to LOOP:
859 (2) it consists of exactly 2 basic blocks - header, and an empty latch.
860 (3) it is single entry, single exit
861 (4) its exit condition is the last stmt in the header
862 (5) E is the entry/exit edge of LOOP.
866 slpeel_can_duplicate_loop_p (struct loop
*loop
, edge e
)
868 edge exit_e
= loop
->exit_edges
[0];
869 edge entry_e
= loop_preheader_edge (loop
);
870 tree orig_cond
= get_loop_exit_condition (loop
);
871 block_stmt_iterator loop_exit_bsi
= bsi_last (exit_e
->src
);
873 if (any_marked_for_rewrite_p ())
877 /* All loops have an outer scope; the only case loop->outer is NULL is for
878 the function itself. */
880 || loop
->num_nodes
!= 2
881 || !empty_block_p (loop
->latch
)
882 || loop
->num_exits
!= 1
883 || loop
->num_entries
!= 1
884 /* Verify that new loop exit condition can be trivially modified. */
885 || (!orig_cond
|| orig_cond
!= bsi_stmt (loop_exit_bsi
))
886 || (e
!= exit_e
&& e
!= entry_e
))
892 #ifdef ENABLE_CHECKING
894 slpeel_verify_cfg_after_peeling (struct loop
*first_loop
,
895 struct loop
*second_loop
)
897 basic_block loop1_exit_bb
= first_loop
->exit_edges
[0]->dest
;
898 basic_block loop2_entry_bb
= second_loop
->pre_header
;
899 basic_block loop1_entry_bb
= loop_preheader_edge (first_loop
)->src
;
901 /* A guard that controls whether the second_loop is to be executed or skipped
902 is placed in first_loop->exit. first_loopt->exit therefore has two
903 successors - one is the preheader of second_loop, and the other is a bb
906 gcc_assert (EDGE_COUNT (loop1_exit_bb
->succs
) == 2);
909 /* 1. Verify that one of the successors of first_loopt->exit is the preheader
912 /* The preheader of new_loop is expected to have two predessors:
913 first_loop->exit and the block that precedes first_loop. */
915 gcc_assert (EDGE_COUNT (loop2_entry_bb
->preds
) == 2
916 && ((EDGE_PRED (loop2_entry_bb
, 0)->src
== loop1_exit_bb
917 && EDGE_PRED (loop2_entry_bb
, 1)->src
== loop1_entry_bb
)
918 || (EDGE_PRED (loop2_entry_bb
, 1)->src
== loop1_exit_bb
919 && EDGE_PRED (loop2_entry_bb
, 0)->src
== loop1_entry_bb
)));
921 /* Verify that the other successor of first_loopt->exit is after the
927 /* Function slpeel_tree_peel_loop_to_edge.
929 Peel the first (last) iterations of LOOP into a new prolog (epilog) loop
930 that is placed on the entry (exit) edge E of LOOP. After this transformation
931 we have two loops one after the other - first-loop iterates FIRST_NITERS
932 times, and second-loop iterates the remainder NITERS - FIRST_NITERS times.
935 - LOOP: the loop to be peeled.
936 - E: the exit or entry edge of LOOP.
937 If it is the entry edge, we peel the first iterations of LOOP. In this
938 case first-loop is LOOP, and second-loop is the newly created loop.
939 If it is the exit edge, we peel the last iterations of LOOP. In this
940 case, first-loop is the newly created loop, and second-loop is LOOP.
941 - NITERS: the number of iterations that LOOP iterates.
942 - FIRST_NITERS: the number of iterations that the first-loop should iterate.
943 - UPDATE_FIRST_LOOP_COUNT: specified whether this function is responsible
944 for updating the loop bound of the first-loop to FIRST_NITERS. If it
945 is false, the caller of this function may want to take care of this
946 (this can be useful if we don't want new stmts added to first-loop).
949 The function returns a pointer to the new loop-copy, or NULL if it failed
950 to perform the transformation.
952 The function generates two if-then-else guards: one before the first loop,
953 and the other before the second loop:
955 if (FIRST_NITERS == 0) then skip the first loop,
956 and go directly to the second loop.
958 if (FIRST_NITERS == NITERS) then skip the second loop.
960 FORNOW only simple loops are supported (see slpeel_can_duplicate_loop_p).
961 FORNOW the resulting code will not be in loop-closed-ssa form.
965 slpeel_tree_peel_loop_to_edge (struct loop
*loop
, struct loops
*loops
,
966 edge e
, tree first_niters
,
967 tree niters
, bool update_first_loop_count
)
969 struct loop
*new_loop
= NULL
, *first_loop
, *second_loop
;
973 basic_block bb_before_second_loop
, bb_after_second_loop
;
974 basic_block bb_before_first_loop
;
975 basic_block bb_between_loops
;
976 edge exit_e
= loop
->exit_edges
[0];
978 if (!slpeel_can_duplicate_loop_p (loop
, e
))
981 /* We have to initialize cfg_hooks. Then, when calling
982 cfg_hooks->split_edge, the function tree_split_edge
983 is actually called and, when calling cfg_hooks->duplicate_block,
984 the function tree_duplicate_bb is called. */
985 tree_register_cfg_hooks ();
988 /* 1. Generate a copy of LOOP and put it on E (E is the entry/exit of LOOP).
989 Resulting CFG would be:
1002 if (!(new_loop
= slpeel_tree_duplicate_loop_to_edge_cfg (loop
, loops
, e
)))
1004 if (vect_debug_stats (loop
) || vect_debug_details (loop
))
1005 fprintf (dump_file
, "tree_duplicate_loop_to_edge_cfg failed.\n");
1011 /* NEW_LOOP was placed after LOOP. */
1013 second_loop
= new_loop
;
1017 /* NEW_LOOP was placed before LOOP. */
1018 first_loop
= new_loop
;
1022 definitions
= marked_ssa_names ();
1023 allocate_new_names (definitions
);
1024 slpeel_update_phis_for_duplicate_loop (loop
, new_loop
, e
== exit_e
);
1025 rename_variables_in_loop (new_loop
);
1028 /* 2. Add the guard that controls whether the first loop is executed.
1029 Resulting CFG would be:
1031 bb_before_first_loop:
1032 if (FIRST_NITERS == 0) GOTO bb_before_second_loop
1039 bb_before_second_loop:
1048 bb_before_first_loop
= split_edge (loop_preheader_edge (first_loop
));
1049 add_bb_to_loop (bb_before_first_loop
, first_loop
->outer
);
1050 bb_before_second_loop
= split_edge (first_loop
->exit_edges
[0]);
1051 add_bb_to_loop (bb_before_second_loop
, first_loop
->outer
);
1052 flow_loop_scan (first_loop
, LOOP_ALL
);
1053 flow_loop_scan (second_loop
, LOOP_ALL
);
1056 build2 (LE_EXPR
, boolean_type_node
, first_niters
, integer_zero_node
);
1057 skip_e
= slpeel_add_loop_guard (bb_before_first_loop
, pre_condition
,
1058 bb_before_second_loop
, bb_before_first_loop
);
1059 slpeel_update_phi_nodes_for_guard (skip_e
, first_loop
, true /* entry-phis */,
1060 first_loop
== new_loop
);
1063 /* 3. Add the guard that controls whether the second loop is executed.
1064 Resulting CFG would be:
1066 bb_before_first_loop:
1067 if (FIRST_NITERS == 0) GOTO bb_before_second_loop (skip first loop)
1075 if (FIRST_NITERS == NITERS) GOTO bb_after_second_loop (skip second loop)
1076 GOTO bb_before_second_loop
1078 bb_before_second_loop:
1084 bb_after_second_loop:
1089 bb_between_loops
= split_edge (first_loop
->exit_edges
[0]);
1090 add_bb_to_loop (bb_between_loops
, first_loop
->outer
);
1091 bb_after_second_loop
= split_edge (second_loop
->exit_edges
[0]);
1092 add_bb_to_loop (bb_after_second_loop
, second_loop
->outer
);
1093 flow_loop_scan (first_loop
, LOOP_ALL
);
1094 flow_loop_scan (second_loop
, LOOP_ALL
);
1096 pre_condition
= build2 (EQ_EXPR
, boolean_type_node
, first_niters
, niters
);
1097 skip_e
= slpeel_add_loop_guard (bb_between_loops
, pre_condition
,
1098 bb_after_second_loop
, bb_before_first_loop
);
1099 slpeel_update_phi_nodes_for_guard (skip_e
, second_loop
, false /* exit-phis */,
1100 second_loop
== new_loop
);
1102 /* Flow loop scan does not update loop->single_exit field. */
1103 first_loop
->single_exit
= first_loop
->exit_edges
[0];
1104 second_loop
->single_exit
= second_loop
->exit_edges
[0];
1106 /* 4. Make first-loop iterate FIRST_NITERS times, if requested.
1108 if (update_first_loop_count
)
1109 slpeel_make_loop_iterate_ntimes (first_loop
, first_niters
);
1111 free_new_names (definitions
);
1112 BITMAP_XFREE (definitions
);
1113 unmark_all_for_rewrite ();
1119 /* Here the proper Vectorizer starts. */
1121 /*************************************************************************
1122 Vectorization Utilities.
1123 *************************************************************************/
1125 /* Function new_stmt_vec_info.
1127 Create and initialize a new stmt_vec_info struct for STMT. */
1130 new_stmt_vec_info (tree stmt
, struct loop
*loop
)
1133 res
= (stmt_vec_info
) xcalloc (1, sizeof (struct _stmt_vec_info
));
1135 STMT_VINFO_TYPE (res
) = undef_vec_info_type
;
1136 STMT_VINFO_STMT (res
) = stmt
;
1137 STMT_VINFO_LOOP (res
) = loop
;
1138 STMT_VINFO_RELEVANT_P (res
) = 0;
1139 STMT_VINFO_VECTYPE (res
) = NULL
;
1140 STMT_VINFO_VEC_STMT (res
) = NULL
;
1141 STMT_VINFO_DATA_REF (res
) = NULL
;
1142 STMT_VINFO_MEMTAG (res
) = NULL
;
1143 STMT_VINFO_VECT_DR_BASE (res
) = NULL
;
1144 STMT_VINFO_VECT_INIT_OFFSET (res
) = NULL_TREE
;
1145 STMT_VINFO_VECT_STEP (res
) = NULL_TREE
;
1146 STMT_VINFO_VECT_BASE_ALIGNED_P (res
) = false;
1147 STMT_VINFO_VECT_MISALIGNMENT (res
) = NULL_TREE
;
1153 /* Function new_loop_vec_info.
1155 Create and initialize a new loop_vec_info struct for LOOP, as well as
1156 stmt_vec_info structs for all the stmts in LOOP. */
1159 new_loop_vec_info (struct loop
*loop
)
1163 block_stmt_iterator si
;
1166 res
= (loop_vec_info
) xcalloc (1, sizeof (struct _loop_vec_info
));
1168 bbs
= get_loop_body (loop
);
1170 /* Create stmt_info for all stmts in the loop. */
1171 for (i
= 0; i
< loop
->num_nodes
; i
++)
1173 basic_block bb
= bbs
[i
];
1174 for (si
= bsi_start (bb
); !bsi_end_p (si
); bsi_next (&si
))
1176 tree stmt
= bsi_stmt (si
);
1179 get_stmt_operands (stmt
);
1180 ann
= stmt_ann (stmt
);
1181 set_stmt_info (ann
, new_stmt_vec_info (stmt
, loop
));
1185 LOOP_VINFO_LOOP (res
) = loop
;
1186 LOOP_VINFO_BBS (res
) = bbs
;
1187 LOOP_VINFO_EXIT_COND (res
) = NULL
;
1188 LOOP_VINFO_NITERS (res
) = NULL
;
1189 LOOP_VINFO_VECTORIZABLE_P (res
) = 0;
1190 LOOP_DO_PEELING_FOR_ALIGNMENT (res
) = false;
1191 LOOP_VINFO_VECT_FACTOR (res
) = 0;
1192 VARRAY_GENERIC_PTR_INIT (LOOP_VINFO_DATAREF_WRITES (res
), 20,
1193 "loop_write_datarefs");
1194 VARRAY_GENERIC_PTR_INIT (LOOP_VINFO_DATAREF_READS (res
), 20,
1195 "loop_read_datarefs");
1196 LOOP_VINFO_UNALIGNED_DR (res
) = NULL
;
1202 /* Function destroy_loop_vec_info.
1204 Free LOOP_VINFO struct, as well as all the stmt_vec_info structs of all the
1205 stmts in the loop. */
1208 destroy_loop_vec_info (loop_vec_info loop_vinfo
)
1213 block_stmt_iterator si
;
1219 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1221 bbs
= LOOP_VINFO_BBS (loop_vinfo
);
1222 nbbs
= loop
->num_nodes
;
1224 for (j
= 0; j
< nbbs
; j
++)
1226 basic_block bb
= bbs
[j
];
1227 for (si
= bsi_start (bb
); !bsi_end_p (si
); bsi_next (&si
))
1229 tree stmt
= bsi_stmt (si
);
1230 stmt_ann_t ann
= stmt_ann (stmt
);
1231 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1233 set_stmt_info (ann
, NULL
);
1237 free (LOOP_VINFO_BBS (loop_vinfo
));
1238 varray_clear (LOOP_VINFO_DATAREF_WRITES (loop_vinfo
));
1239 varray_clear (LOOP_VINFO_DATAREF_READS (loop_vinfo
));
1245 /* Function debug_loop_stats.
1247 For vectorization statistics dumps. */
1250 vect_debug_stats (struct loop
*loop
)
1253 block_stmt_iterator si
;
1254 tree node
= NULL_TREE
;
1256 if (!dump_file
|| !(dump_flags
& TDF_STATS
))
1261 fprintf (dump_file
, "\n");
1270 for (si
= bsi_start (bb
); !bsi_end_p (si
); bsi_next (&si
))
1272 node
= bsi_stmt (si
);
1273 if (node
&& EXPR_P (node
) && EXPR_LOCUS (node
))
1277 if (node
&& EXPR_P (node
) && EXPR_LOCUS (node
)
1278 && EXPR_FILENAME (node
) && EXPR_LINENO (node
))
1280 fprintf (dump_file
, "\nloop at %s:%d: ",
1281 EXPR_FILENAME (node
), EXPR_LINENO (node
));
1289 /* Function debug_loop_details.
1291 For vectorization debug dumps. */
1294 vect_debug_details (struct loop
*loop
)
1297 block_stmt_iterator si
;
1298 tree node
= NULL_TREE
;
1300 if (!dump_file
|| !(dump_flags
& TDF_DETAILS
))
1305 fprintf (dump_file
, "\n");
1314 for (si
= bsi_start (bb
); !bsi_end_p (si
); bsi_next (&si
))
1316 node
= bsi_stmt (si
);
1317 if (node
&& EXPR_P (node
) && EXPR_LOCUS (node
))
1321 if (node
&& EXPR_P (node
) && EXPR_LOCUS (node
)
1322 && EXPR_FILENAME (node
) && EXPR_LINENO (node
))
1324 fprintf (dump_file
, "\nloop at %s:%d: ",
1325 EXPR_FILENAME (node
), EXPR_LINENO (node
));
1333 /* Function vect_get_ptr_offset
1335 Compute the OFFSET modulo vector-type alignment of pointer REF in bits. */
1338 vect_get_ptr_offset (tree ref ATTRIBUTE_UNUSED
,
1339 tree vectype ATTRIBUTE_UNUSED
,
1340 tree
*offset ATTRIBUTE_UNUSED
)
1342 /* TODO: Use alignment information. */
1347 /* Function vect_analyze_offset_expr
1349 Given an offset expression EXPR received from get_inner_reference, analyze
1350 it and create an expression for INITIAL_OFFSET by substituting the variables
1351 of EXPR with initial_condition of the corresponding access_fn in the loop.
1354 for (j = 3; j < N; j++)
1357 For a[j].b[i][j], EXPR will be 'i * C_i + j * C_j + C'. 'i' cannot be
1358 subsituted, since its access_fn in the inner loop is i. 'j' will be
1359 substituted with 3. An INITIAL_OFFSET will be 'i * C_i + C`', where
1362 Compute MISALIGN (the misalignment of the data reference initial access from
1363 its base) if possible. Misalignment can be calculated only if all the
1364 variables can be substitued with constants, or if a variable is multiplied
1365 by a multiple of VECTYPE_ALIGNMENT. In the above example, since 'i' cannot
1366 be substituted, MISALIGN will be NULL_TREE in case that C_i is not a multiple
1367 of VECTYPE_ALIGNMENT, and C` otherwise. (We perform MISALIGN modulo
1368 VECTYPE_ALIGNMENT computation in the caller of this function).
1370 STEP is an evolution of the data reference in this loop in bytes.
1371 In the above example, STEP is C_j.
1373 Return FALSE, if the analysis fails, e.g., there is no access_fn for a
1374 variable. In this case, all the outputs (INITIAL_OFFSET, MISALIGN and STEP)
1375 are NULL_TREEs. Otherwise, return TRUE.
1380 vect_analyze_offset_expr (tree expr
,
1382 tree vectype_alignment
,
1383 tree
*initial_offset
,
1389 tree left_offset
= size_zero_node
;
1390 tree right_offset
= size_zero_node
;
1391 tree left_misalign
= size_zero_node
;
1392 tree right_misalign
= size_zero_node
;
1393 tree left_step
= size_zero_node
;
1394 tree right_step
= size_zero_node
;
1395 enum tree_code code
;
1396 tree init
, evolution
, def_stmt
;
1401 *misalign
= NULL_TREE
;
1402 *initial_offset
= NULL_TREE
;
1406 if (TREE_CONSTANT (expr
))
1408 *initial_offset
= fold_convert (sizetype
, expr
);
1409 *misalign
= fold_convert (sizetype
, expr
);
1410 *step
= size_zero_node
;
1414 /* 2. Variable. Try to substitute with initial_condition of the corresponding
1415 access_fn in the current loop. */
1416 if (SSA_VAR_P (expr
))
1418 tree access_fn
= analyze_scalar_evolution (loop
, expr
);
1420 if (access_fn
== chrec_dont_know
)
1424 init
= initial_condition_in_loop_num (access_fn
, loop
->num
);
1427 def_stmt
= SSA_NAME_DEF_STMT (init
);
1429 && !IS_EMPTY_STMT (def_stmt
)
1430 && flow_bb_inside_loop_p (loop
, bb_for_stmt (def_stmt
)))
1431 /* Not enough information: may be not loop invariant.
1432 E.g., for a[b[i]], we get a[D], where D=b[i]. EXPR is D, its
1433 initial_condition is D, but it depends on i - loop's induction
1438 evolution
= evolution_part_in_loop_num (access_fn
, loop
->num
);
1439 if (evolution
&& TREE_CODE (evolution
) != INTEGER_CST
)
1440 /* Evolution is not constant. */
1443 if (TREE_CONSTANT (init
))
1444 *misalign
= fold_convert (sizetype
, init
);
1446 /* Not constant, misalignment cannot be calculated. */
1447 *misalign
= NULL_TREE
;
1449 *initial_offset
= fold_convert (sizetype
, init
);
1451 *step
= evolution
? fold_convert (sizetype
, evolution
) : size_zero_node
;
1455 /* Recursive computation. */
1456 oprnd0
= TREE_OPERAND (expr
, 0);
1457 oprnd1
= TREE_OPERAND (expr
, 1);
1459 if (!vect_analyze_offset_expr (oprnd0
, loop
, vectype_alignment
, &left_offset
,
1460 &left_misalign
, &left_step
)
1461 || !vect_analyze_offset_expr (oprnd1
, loop
, vectype_alignment
,
1462 &right_offset
, &right_misalign
, &right_step
))
1465 /* The type of the operation: plus, minus or mult. */
1466 code
= TREE_CODE (expr
);
1470 if (!TREE_CONSTANT (right_offset
))
1471 /* RIGHT_OFFSET can be not constant. For example, for arrays of variable
1473 FORNOW: We don't support such cases. */
1476 /* Misalignment computation. */
1477 if (SSA_VAR_P (left_offset
))
1479 /* If the left side contains variable that cannot be substituted with
1480 constant, we check if the right side is a multiple of ALIGNMENT. */
1481 if (integer_zerop (size_binop (TRUNC_MOD_EXPR
, right_offset
,
1482 vectype_alignment
)))
1483 *misalign
= size_zero_node
;
1485 /* If the remainder is not zero or the right side isn't constant, we
1486 can't compute misalignment. */
1487 *misalign
= NULL_TREE
;
1491 /* The left operand was successfully substituted with constant. */
1493 /* In case of EXPR '(i * C1 + j) * C2', LEFT_MISALIGN is
1495 *misalign
= size_binop (code
, left_misalign
, right_misalign
);
1497 *misalign
= NULL_TREE
;
1500 /* Step calculation. */
1501 /* Multiply the step by the right operand. */
1502 *step
= size_binop (MULT_EXPR
, left_step
, right_offset
);
1507 /* Combine the recursive calculations for step and misalignment. */
1508 *step
= size_binop (code
, left_step
, right_step
);
1510 if (left_misalign
&& right_misalign
)
1511 *misalign
= size_binop (code
, left_misalign
, right_misalign
);
1513 *misalign
= NULL_TREE
;
1521 /* Compute offset. */
1522 *initial_offset
= fold_convert (sizetype
,
1523 fold (build2 (code
, TREE_TYPE (left_offset
),
1530 /* Function vect_get_base_and_bit_offset
1532 Return the BASE of the data reference EXPR.
1533 If VECTYPE is given, also compute the OFFSET from BASE in bits.
1534 E.g., for EXPR a.b[i] + 4B, BASE is a, and OFFSET is the overall offset in
1535 bits of 'a.b[i] + 4B' from a.
1538 EXPR - the memory reference that is being analyzed
1539 DR - the data_reference struct of the _original_ memory reference
1540 (Note: DR_REF (DR) is not necessarily EXPR)
1541 VECTYPE - the type that defines the alignment (i.e, we compute
1542 alignment relative to TYPE_ALIGN(VECTYPE))
1545 BASE (returned value) - the base of the data reference EXPR.
1546 E.g, if EXPR is a.b[k].c[i][j] the returned
1548 OFFSET - offset of EXPR from BASE in bits
1549 BASE_ALIGNED_P - indicates if BASE is aligned
1551 If something unexpected is encountered (an unsupported form of data-ref),
1552 or if VECTYPE is given but OFFSET cannot be determined:
1553 then NULL_TREE is returned. */
1556 vect_get_base_and_bit_offset (struct data_reference
*dr
,
1559 loop_vec_info loop_vinfo
,
1561 bool *base_aligned_p
)
1563 tree this_offset
= size_zero_node
;
1564 tree base
= NULL_TREE
;
1566 tree oprnd0
, oprnd1
;
1567 struct data_reference
*array_dr
;
1568 enum tree_code code
= TREE_CODE (expr
);
1570 *base_aligned_p
= false;
1574 /* These cases end the recursion: */
1576 *offset
= size_zero_node
;
1577 if (vectype
&& DECL_ALIGN (expr
) >= TYPE_ALIGN (vectype
))
1578 *base_aligned_p
= true;
1585 if (TREE_CODE (TREE_TYPE (expr
)) != POINTER_TYPE
)
1588 if (TYPE_ALIGN (TREE_TYPE (TREE_TYPE (expr
))) < TYPE_ALIGN (vectype
))
1590 base
= vect_get_ptr_offset (expr
, vectype
, offset
);
1592 *base_aligned_p
= true;
1596 *base_aligned_p
= true;
1597 *offset
= size_zero_node
;
1603 *offset
= int_const_binop (MULT_EXPR
, expr
,
1604 build_int_cst (NULL_TREE
, BITS_PER_UNIT
), 1);
1607 /* These cases continue the recursion: */
1609 oprnd0
= TREE_OPERAND (expr
, 0);
1610 oprnd1
= TREE_OPERAND (expr
, 1);
1612 this_offset
= bit_position (oprnd1
);
1613 if (vectype
&& !host_integerp (this_offset
, 1))
1619 oprnd0
= TREE_OPERAND (expr
, 0);
1624 oprnd0
= TREE_OPERAND (expr
, 0);
1629 if (DR_REF (dr
) != expr
)
1630 /* Build array data_reference struct if the existing DR_REF
1631 doesn't match EXPR. This happens, for example, when the
1632 EXPR is *T and T is initialized to &arr[indx]. The DR struct
1633 contains information on the access of T, not of arr. In order
1634 to continue the analysis, we create a new DR struct that
1635 describes the access of arr.
1637 array_dr
= analyze_array (DR_STMT (dr
), expr
, DR_IS_READ (dr
));
1641 next_ref
= vect_compute_array_ref_alignment (array_dr
, loop_vinfo
,
1642 vectype
, &this_offset
);
1647 TYPE_ALIGN (TREE_TYPE (TREE_TYPE (next_ref
))) >= TYPE_ALIGN (vectype
))
1649 *offset
= this_offset
;
1650 *base_aligned_p
= true;
1657 /* In case we have a PLUS_EXPR of the form
1658 (oprnd0 + oprnd1), we assume that only oprnd0 determines the base.
1659 This is verified in vect_get_symbl_and_dr. */
1660 oprnd0
= TREE_OPERAND (expr
, 0);
1661 oprnd1
= TREE_OPERAND (expr
, 1);
1663 base
= vect_get_base_and_bit_offset
1664 (dr
, oprnd1
, vectype
, loop_vinfo
, &this_offset
, base_aligned_p
);
1665 if (vectype
&& !base
)
1675 base
= vect_get_base_and_bit_offset (dr
, next_ref
, vectype
,
1676 loop_vinfo
, offset
, base_aligned_p
);
1678 if (vectype
&& base
)
1680 *offset
= int_const_binop (PLUS_EXPR
, *offset
, this_offset
, 1);
1681 if (!host_integerp (*offset
, 1) || TREE_OVERFLOW (*offset
))
1684 if (vect_debug_details (NULL
))
1686 print_generic_expr (dump_file
, expr
, TDF_SLIM
);
1687 fprintf (dump_file
, " --> total offset for ref: ");
1688 print_generic_expr (dump_file
, *offset
, TDF_SLIM
);
1695 /* Function vect_force_dr_alignment_p.
1697 Returns whether the alignment of a DECL can be forced to be aligned
1698 on ALIGNMENT bit boundary. */
1701 vect_can_force_dr_alignment_p (tree decl
, unsigned int alignment
)
1703 if (TREE_CODE (decl
) != VAR_DECL
)
1706 if (DECL_EXTERNAL (decl
))
1709 if (TREE_ASM_WRITTEN (decl
))
1712 if (TREE_STATIC (decl
))
1713 return (alignment
<= MAX_OFILE_ALIGNMENT
);
1715 /* This is not 100% correct. The absolute correct stack alignment
1716 is STACK_BOUNDARY. We're supposed to hope, but not assume, that
1717 PREFERRED_STACK_BOUNDARY is honored by all translation units.
1718 However, until someone implements forced stack alignment, SSE
1719 isn't really usable without this. */
1720 return (alignment
<= PREFERRED_STACK_BOUNDARY
);
1724 /* Function vect_get_new_vect_var.
1726 Returns a name for a new variable. The current naming scheme appends the
1727 prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to
1728 the name of vectorizer generated variables, and appends that to NAME if
1732 vect_get_new_vect_var (tree type
, enum vect_var_kind var_kind
, const char *name
)
1738 if (var_kind
== vect_simple_var
)
1743 prefix_len
= strlen (prefix
);
1746 new_vect_var
= create_tmp_var (type
, concat (prefix
, name
, NULL
));
1748 new_vect_var
= create_tmp_var (type
, prefix
);
1750 return new_vect_var
;
1754 /* Function vect_create_index_for_vector_ref.
1756 Create (and return) an index variable, along with it's update chain in the
1757 loop. This variable will be used to access a memory location in a vector
1761 LOOP: The loop being vectorized.
1762 BSI: The block_stmt_iterator where STMT is. Any new stmts created by this
1763 function can be added here, or in the loop pre-header.
1766 Return an index that will be used to index a vector array. It is expected
1767 that a pointer to the first vector will be used as the base address for the
1770 FORNOW: we are not trying to be efficient, just creating a new index each
1771 time from scratch. At this time all vector references could use the same
1774 TODO: create only one index to be used by all vector references. Record
1775 the index in the LOOP_VINFO the first time this procedure is called and
1776 return it on subsequent calls. The increment of this index must be placed
1777 just before the conditional expression that ends the single block loop. */
1780 vect_create_index_for_vector_ref (struct loop
*loop
, block_stmt_iterator
*bsi
)
1783 tree indx_before_incr
, indx_after_incr
;
1785 /* It is assumed that the base pointer used for vectorized access contains
1786 the address of the first vector. Therefore the index used for vectorized
1787 access must be initialized to zero and incremented by 1. */
1789 init
= integer_zero_node
;
1790 step
= integer_one_node
;
1792 /* Assuming that bsi_insert is used with BSI_NEW_STMT */
1793 create_iv (init
, step
, NULL_TREE
, loop
, bsi
, false,
1794 &indx_before_incr
, &indx_after_incr
);
1796 return indx_before_incr
;
1800 /* Function vect_create_addr_base_for_vector_ref.
1802 Create an expression that computes the address of the first memory location
1803 that will be accessed for a data reference.
1806 STMT: The statement containing the data reference.
1807 NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list.
1808 OFFSET: Optional. If supplied, it is be added to the initial address.
1811 1. Return an SSA_NAME whose value is the address of the memory location of
1812 the first vector of the data reference.
1813 2. If new_stmt_list is not NULL_TREE after return then the caller must insert
1814 these statement(s) which define the returned SSA_NAME.
1816 FORNOW: We are only handling array accesses with step 1. */
1819 vect_create_addr_base_for_vector_ref (tree stmt
,
1820 tree
*new_stmt_list
,
1823 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1824 struct loop
*loop
= STMT_VINFO_LOOP (stmt_info
);
1825 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
1826 tree data_ref_base
= unshare_expr (STMT_VINFO_VECT_DR_BASE (stmt_info
));
1827 tree base_name
= unshare_expr (DR_BASE_NAME (dr
));
1828 tree ref
= DR_REF (dr
);
1829 tree data_ref_base_type
= TREE_TYPE (data_ref_base
);
1830 tree scalar_type
= TREE_TYPE (ref
);
1831 tree scalar_ptr_type
= build_pointer_type (scalar_type
);
1833 tree init_val
, step
, init_oval
;
1835 bool is_ptr_ref
, is_array_ref
, is_addr_expr
;
1840 tree addr_base
, addr_expr
;
1841 tree dest
, new_stmt
;
1843 /* Only the access function of the last index is relevant (i_n in
1844 a[i_1][i_2]...[i_n]), the others correspond to loop invariants. */
1845 access_fn
= DR_ACCESS_FN (dr
, 0);
1846 ok
= vect_is_simple_iv_evolution (loop
->num
, access_fn
, &init_oval
, &step
,
1849 init_oval
= integer_zero_node
;
1851 is_ptr_ref
= TREE_CODE (data_ref_base_type
) == POINTER_TYPE
1852 && TREE_CODE (data_ref_base
) == SSA_NAME
;
1853 is_array_ref
= TREE_CODE (data_ref_base_type
) == ARRAY_TYPE
;
1854 is_addr_expr
= TREE_CODE (data_ref_base
) == ADDR_EXPR
1855 || TREE_CODE (data_ref_base
) == PLUS_EXPR
1856 || TREE_CODE (data_ref_base
) == MINUS_EXPR
;
1857 gcc_assert (is_ptr_ref
|| is_array_ref
|| is_addr_expr
);
1859 /** Create: &(base[init_val])
1861 if data_ref_base is an ARRAY_TYPE:
1862 base = data_ref_base
1864 if data_ref_base is the SSA_NAME of a POINTER_TYPE:
1865 base = *((scalar_array *) data_ref_base)
1869 array_base
= data_ref_base
;
1870 else /* is_ptr_ref or is_addr_expr */
1872 /* array_ptr = (scalar_array_ptr_type *) data_ref_base; */
1873 tree scalar_array_type
= build_array_type (scalar_type
, 0);
1874 tree scalar_array_ptr_type
= build_pointer_type (scalar_array_type
);
1875 tree array_ptr
= create_tmp_var (scalar_array_ptr_type
, "array_ptr");
1876 add_referenced_tmp_var (array_ptr
);
1878 dest
= create_tmp_var (TREE_TYPE (data_ref_base
), "dataref");
1879 add_referenced_tmp_var (dest
);
1881 force_gimple_operand (data_ref_base
, &new_stmt
, false, dest
);
1882 append_to_statement_list_force (new_stmt
, new_stmt_list
);
1884 vec_stmt
= fold_convert (scalar_array_ptr_type
, data_ref_base
);
1885 vec_stmt
= build2 (MODIFY_EXPR
, void_type_node
, array_ptr
, vec_stmt
);
1886 new_temp
= make_ssa_name (array_ptr
, vec_stmt
);
1887 TREE_OPERAND (vec_stmt
, 0) = new_temp
;
1888 append_to_statement_list_force (vec_stmt
, new_stmt_list
);
1891 array_base
= build_fold_indirect_ref (new_temp
);
1894 dest
= create_tmp_var (TREE_TYPE (init_oval
), "newinit");
1895 add_referenced_tmp_var (dest
);
1896 init_val
= force_gimple_operand (init_oval
, &new_stmt
, false, dest
);
1897 append_to_statement_list_force (new_stmt
, new_stmt_list
);
1901 tree tmp
= create_tmp_var (TREE_TYPE (init_val
), "offset");
1902 add_referenced_tmp_var (tmp
);
1903 vec_stmt
= build2 (PLUS_EXPR
, TREE_TYPE (init_val
), init_val
, offset
);
1904 vec_stmt
= build2 (MODIFY_EXPR
, TREE_TYPE (init_val
), tmp
, vec_stmt
);
1905 init_val
= make_ssa_name (tmp
, vec_stmt
);
1906 TREE_OPERAND (vec_stmt
, 0) = init_val
;
1907 append_to_statement_list_force (vec_stmt
, new_stmt_list
);
1910 array_ref
= build4 (ARRAY_REF
, scalar_type
, array_base
, init_val
,
1911 NULL_TREE
, NULL_TREE
);
1912 addr_base
= build_fold_addr_expr (array_ref
);
1914 /* addr_expr = addr_base */
1915 addr_expr
= vect_get_new_vect_var (scalar_ptr_type
, vect_pointer_var
,
1916 get_name (base_name
));
1917 add_referenced_tmp_var (addr_expr
);
1918 vec_stmt
= build2 (MODIFY_EXPR
, void_type_node
, addr_expr
, addr_base
);
1919 new_temp
= make_ssa_name (addr_expr
, vec_stmt
);
1920 TREE_OPERAND (vec_stmt
, 0) = new_temp
;
1921 append_to_statement_list_force (vec_stmt
, new_stmt_list
);
1927 /* Function get_vectype_for_scalar_type.
1929 Returns the vector type corresponding to SCALAR_TYPE as supported
1933 get_vectype_for_scalar_type (tree scalar_type
)
1935 enum machine_mode inner_mode
= TYPE_MODE (scalar_type
);
1936 int nbytes
= GET_MODE_SIZE (inner_mode
);
1943 /* FORNOW: Only a single vector size per target (UNITS_PER_SIMD_WORD)
1945 nunits
= UNITS_PER_SIMD_WORD
/ nbytes
;
1947 vectype
= build_vector_type (scalar_type
, nunits
);
1948 if (vect_debug_details (NULL
))
1950 fprintf (dump_file
, "get vectype with %d units of type ", nunits
);
1951 print_generic_expr (dump_file
, scalar_type
, TDF_SLIM
);
1957 if (vect_debug_details (NULL
))
1959 fprintf (dump_file
, "vectype: ");
1960 print_generic_expr (dump_file
, vectype
, TDF_SLIM
);
1963 if (!VECTOR_MODE_P (TYPE_MODE (vectype
)))
1965 /* TODO: tree-complex.c sometimes can parallelize operations
1966 on generic vectors. We can vectorize the loop in that case,
1967 but then we should re-run the lowering pass. */
1968 if (vect_debug_details (NULL
))
1969 fprintf (dump_file
, "mode not supported by target.");
1977 /* Function vect_align_data_ref.
1979 Handle mislignment of a memory accesses.
1981 FORNOW: Can't handle misaligned accesses.
1982 Make sure that the dataref is aligned. */
1985 vect_align_data_ref (tree stmt
)
1987 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1988 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
1990 /* FORNOW: can't handle misaligned accesses;
1991 all accesses expected to be aligned. */
1992 gcc_assert (aligned_access_p (dr
));
1996 /* Function vect_create_data_ref_ptr.
1998 Create a memory reference expression for vector access, to be used in a
1999 vector load/store stmt. The reference is based on a new pointer to vector
2003 1. STMT: a stmt that references memory. Expected to be of the form
2004 MODIFY_EXPR <name, data-ref> or MODIFY_EXPR <data-ref, name>.
2005 2. BSI: block_stmt_iterator where new stmts can be added.
2006 3. OFFSET (optional): an offset to be added to the initial address accessed
2007 by the data-ref in STMT.
2008 4. ONLY_INIT: indicate if vp is to be updated in the loop, or remain
2009 pointing to the initial address.
2012 1. Declare a new ptr to vector_type, and have it point to the base of the
2013 data reference (initial addressed accessed by the data reference).
2014 For example, for vector of type V8HI, the following code is generated:
2017 vp = (v8hi *)initial_address;
2019 if OFFSET is not supplied:
2020 initial_address = &a[init];
2021 if OFFSET is supplied:
2022 initial_address = &a[init + OFFSET];
2024 Return the initial_address in INITIAL_ADDRESS.
2026 2. Create a data-reference in the loop based on the new vector pointer vp,
2027 and using a new index variable 'idx' as follows:
2031 where if ONLY_INIT is true:
2034 update = idx + vector_type_size
2036 Return the pointer vp'.
2039 FORNOW: handle only aligned and consecutive accesses. */
2042 vect_create_data_ref_ptr (tree stmt
, block_stmt_iterator
*bsi
, tree offset
,
2043 tree
*initial_address
, bool only_init
)
2046 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2047 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
2048 struct loop
*loop
= STMT_VINFO_LOOP (stmt_info
);
2049 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2053 v_may_def_optype v_may_defs
= STMT_V_MAY_DEF_OPS (stmt
);
2054 v_must_def_optype v_must_defs
= STMT_V_MUST_DEF_OPS (stmt
);
2055 vuse_optype vuses
= STMT_VUSE_OPS (stmt
);
2056 int nvuses
, nv_may_defs
, nv_must_defs
;
2060 tree new_stmt_list
= NULL_TREE
;
2062 edge pe
= loop_preheader_edge (loop
);
2068 tree type
, tmp
, size
;
2070 base_name
= unshare_expr (DR_BASE_NAME (dr
));
2071 if (vect_debug_details (NULL
))
2073 tree data_ref_base
= base_name
;
2074 fprintf (dump_file
, "create array_ref of type: ");
2075 print_generic_expr (dump_file
, vectype
, TDF_SLIM
);
2076 if (TREE_CODE (data_ref_base
) == VAR_DECL
)
2077 fprintf (dump_file
, "\nvectorizing a one dimensional array ref: ");
2078 else if (TREE_CODE (data_ref_base
) == ARRAY_REF
)
2079 fprintf (dump_file
, "\nvectorizing a multidimensional array ref: ");
2080 else if (TREE_CODE (data_ref_base
) == COMPONENT_REF
)
2081 fprintf (dump_file
, "\nvectorizing a record based array ref: ");
2082 else if (TREE_CODE (data_ref_base
) == SSA_NAME
)
2083 fprintf (dump_file
, "\nvectorizing a pointer ref: ");
2084 print_generic_expr (dump_file
, base_name
, TDF_SLIM
);
2087 /** (1) Create the new vector-pointer variable: **/
2089 vect_ptr_type
= build_pointer_type (vectype
);
2090 vect_ptr
= vect_get_new_vect_var (vect_ptr_type
, vect_pointer_var
,
2091 get_name (base_name
));
2092 add_referenced_tmp_var (vect_ptr
);
2095 /** (2) Handle aliasing information of the new vector-pointer: **/
2097 tag
= STMT_VINFO_MEMTAG (stmt_info
);
2099 get_var_ann (vect_ptr
)->type_mem_tag
= tag
;
2101 /* Mark for renaming all aliased variables
2102 (i.e, the may-aliases of the type-mem-tag). */
2103 nvuses
= NUM_VUSES (vuses
);
2104 nv_may_defs
= NUM_V_MAY_DEFS (v_may_defs
);
2105 nv_must_defs
= NUM_V_MUST_DEFS (v_must_defs
);
2106 for (i
= 0; i
< nvuses
; i
++)
2108 tree use
= VUSE_OP (vuses
, i
);
2109 if (TREE_CODE (use
) == SSA_NAME
)
2110 bitmap_set_bit (vars_to_rename
, var_ann (SSA_NAME_VAR (use
))->uid
);
2112 for (i
= 0; i
< nv_may_defs
; i
++)
2114 tree def
= V_MAY_DEF_RESULT (v_may_defs
, i
);
2115 if (TREE_CODE (def
) == SSA_NAME
)
2116 bitmap_set_bit (vars_to_rename
, var_ann (SSA_NAME_VAR (def
))->uid
);
2118 for (i
= 0; i
< nv_must_defs
; i
++)
2120 tree def
= V_MUST_DEF_RESULT (v_must_defs
, i
);
2121 if (TREE_CODE (def
) == SSA_NAME
)
2122 bitmap_set_bit (vars_to_rename
, var_ann (SSA_NAME_VAR (def
))->uid
);
2126 /** (3) Calculate the initial address the vector-pointer, and set
2127 the vector-pointer to point to it before the loop: **/
2129 /* Create: (&(base[init_val+offset]) in the loop preheader. */
2130 new_temp
= vect_create_addr_base_for_vector_ref (stmt
, &new_stmt_list
,
2132 pe
= loop_preheader_edge (loop
);
2133 new_bb
= bsi_insert_on_edge_immediate (pe
, new_stmt_list
);
2134 gcc_assert (!new_bb
);
2135 *initial_address
= new_temp
;
2137 /* Create: p = (vectype *) initial_base */
2138 vec_stmt
= fold_convert (vect_ptr_type
, new_temp
);
2139 vec_stmt
= build2 (MODIFY_EXPR
, void_type_node
, vect_ptr
, vec_stmt
);
2140 new_temp
= make_ssa_name (vect_ptr
, vec_stmt
);
2141 TREE_OPERAND (vec_stmt
, 0) = new_temp
;
2142 new_bb
= bsi_insert_on_edge_immediate (pe
, vec_stmt
);
2143 gcc_assert (!new_bb
);
2144 vect_ptr_init
= TREE_OPERAND (vec_stmt
, 0);
2147 /** (4) Handle the updating of the vector-pointer inside the loop: **/
2149 if (only_init
) /* No update in loop is required. */
2150 return vect_ptr_init
;
2152 idx
= vect_create_index_for_vector_ref (loop
, bsi
);
2154 /* Create: update = idx * vectype_size */
2155 tmp
= create_tmp_var (integer_type_node
, "update");
2156 add_referenced_tmp_var (tmp
);
2157 size
= TYPE_SIZE (vect_ptr_type
);
2158 type
= lang_hooks
.types
.type_for_size (tree_low_cst (size
, 1), 1);
2159 ptr_update
= create_tmp_var (type
, "update");
2160 add_referenced_tmp_var (ptr_update
);
2161 vectype_size
= build_int_cst (integer_type_node
,
2162 GET_MODE_SIZE (TYPE_MODE (vectype
)));
2163 vec_stmt
= build2 (MULT_EXPR
, integer_type_node
, idx
, vectype_size
);
2164 vec_stmt
= build2 (MODIFY_EXPR
, void_type_node
, tmp
, vec_stmt
);
2165 new_temp
= make_ssa_name (tmp
, vec_stmt
);
2166 TREE_OPERAND (vec_stmt
, 0) = new_temp
;
2167 bsi_insert_before (bsi
, vec_stmt
, BSI_SAME_STMT
);
2168 vec_stmt
= fold_convert (type
, new_temp
);
2169 vec_stmt
= build2 (MODIFY_EXPR
, void_type_node
, ptr_update
, vec_stmt
);
2170 new_temp
= make_ssa_name (ptr_update
, vec_stmt
);
2171 TREE_OPERAND (vec_stmt
, 0) = new_temp
;
2172 bsi_insert_before (bsi
, vec_stmt
, BSI_SAME_STMT
);
2174 /* Create: data_ref_ptr = vect_ptr_init + update */
2175 vec_stmt
= build2 (PLUS_EXPR
, vect_ptr_type
, vect_ptr_init
, new_temp
);
2176 vec_stmt
= build2 (MODIFY_EXPR
, void_type_node
, vect_ptr
, vec_stmt
);
2177 new_temp
= make_ssa_name (vect_ptr
, vec_stmt
);
2178 TREE_OPERAND (vec_stmt
, 0) = new_temp
;
2179 bsi_insert_before (bsi
, vec_stmt
, BSI_SAME_STMT
);
2180 data_ref_ptr
= TREE_OPERAND (vec_stmt
, 0);
2182 return data_ref_ptr
;
2186 /* Function vect_create_destination_var.
2188 Create a new temporary of type VECTYPE. */
2191 vect_create_destination_var (tree scalar_dest
, tree vectype
)
2194 const char *new_name
;
2196 gcc_assert (TREE_CODE (scalar_dest
) == SSA_NAME
);
2198 new_name
= get_name (scalar_dest
);
2201 vec_dest
= vect_get_new_vect_var (vectype
, vect_simple_var
, new_name
);
2202 add_referenced_tmp_var (vec_dest
);
2208 /* Function vect_init_vector.
2210 Insert a new stmt (INIT_STMT) that initializes a new vector variable with
2211 the vector elements of VECTOR_VAR. Return the DEF of INIT_STMT. It will be
2212 used in the vectorization of STMT. */
2215 vect_init_vector (tree stmt
, tree vector_var
)
2217 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
2218 struct loop
*loop
= STMT_VINFO_LOOP (stmt_vinfo
);
2221 tree vectype
= STMT_VINFO_VECTYPE (stmt_vinfo
);
2227 new_var
= vect_get_new_vect_var (vectype
, vect_simple_var
, "cst_");
2228 add_referenced_tmp_var (new_var
);
2230 init_stmt
= build2 (MODIFY_EXPR
, vectype
, new_var
, vector_var
);
2231 new_temp
= make_ssa_name (new_var
, init_stmt
);
2232 TREE_OPERAND (init_stmt
, 0) = new_temp
;
2234 pe
= loop_preheader_edge (loop
);
2235 new_bb
= bsi_insert_on_edge_immediate (pe
, init_stmt
);
2236 gcc_assert (!new_bb
);
2238 if (vect_debug_details (NULL
))
2240 fprintf (dump_file
, "created new init_stmt: ");
2241 print_generic_expr (dump_file
, init_stmt
, TDF_SLIM
);
2244 vec_oprnd
= TREE_OPERAND (init_stmt
, 0);
2249 /* Function vect_get_vec_def_for_operand.
2251 OP is an operand in STMT. This function returns a (vector) def that will be
2252 used in the vectorized stmt for STMT.
2254 In the case that OP is an SSA_NAME which is defined in the loop, then
2255 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
2257 In case OP is an invariant or constant, a new stmt that creates a vector def
2258 needs to be introduced. */
2261 vect_get_vec_def_for_operand (tree op
, tree stmt
)
2266 stmt_vec_info def_stmt_info
= NULL
;
2267 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
2268 tree vectype
= STMT_VINFO_VECTYPE (stmt_vinfo
);
2269 int nunits
= GET_MODE_NUNITS (TYPE_MODE (vectype
));
2270 struct loop
*loop
= STMT_VINFO_LOOP (stmt_vinfo
);
2277 if (vect_debug_details (NULL
))
2279 fprintf (dump_file
, "vect_get_vec_def_for_operand: ");
2280 print_generic_expr (dump_file
, op
, TDF_SLIM
);
2283 /** ===> Case 1: operand is a constant. **/
2285 if (TREE_CODE (op
) == INTEGER_CST
|| TREE_CODE (op
) == REAL_CST
)
2287 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
2291 /* Build a tree with vector elements. */
2292 if (vect_debug_details (NULL
))
2293 fprintf (dump_file
, "Create vector_cst. nunits = %d", nunits
);
2295 for (i
= nunits
- 1; i
>= 0; --i
)
2297 t
= tree_cons (NULL_TREE
, op
, t
);
2299 vec_cst
= build_vector (vectype
, t
);
2300 return vect_init_vector (stmt
, vec_cst
);
2303 gcc_assert (TREE_CODE (op
) == SSA_NAME
);
2305 /** ===> Case 2: operand is an SSA_NAME - find the stmt that defines it. **/
2307 def_stmt
= SSA_NAME_DEF_STMT (op
);
2308 def_stmt_info
= vinfo_for_stmt (def_stmt
);
2310 if (vect_debug_details (NULL
))
2312 fprintf (dump_file
, "vect_get_vec_def_for_operand: def_stmt: ");
2313 print_generic_expr (dump_file
, def_stmt
, TDF_SLIM
);
2317 /** ==> Case 2.1: operand is defined inside the loop. **/
2321 /* Get the def from the vectorized stmt. */
2323 vec_stmt
= STMT_VINFO_VEC_STMT (def_stmt_info
);
2324 gcc_assert (vec_stmt
);
2325 vec_oprnd
= TREE_OPERAND (vec_stmt
, 0);
2330 /** ==> Case 2.2: operand is defined by the loop-header phi-node -
2331 it is a reduction/induction. **/
2333 bb
= bb_for_stmt (def_stmt
);
2334 if (TREE_CODE (def_stmt
) == PHI_NODE
&& flow_bb_inside_loop_p (loop
, bb
))
2336 if (vect_debug_details (NULL
))
2337 fprintf (dump_file
, "reduction/induction - unsupported.");
2338 internal_error ("no support for reduction/induction"); /* FORNOW */
2342 /** ==> Case 2.3: operand is defined outside the loop -
2343 it is a loop invariant. */
2345 switch (TREE_CODE (def_stmt
))
2348 def
= PHI_RESULT (def_stmt
);
2351 def
= TREE_OPERAND (def_stmt
, 0);
2354 def
= TREE_OPERAND (def_stmt
, 0);
2355 gcc_assert (IS_EMPTY_STMT (def_stmt
));
2359 if (vect_debug_details (NULL
))
2361 fprintf (dump_file
, "unsupported defining stmt: ");
2362 print_generic_expr (dump_file
, def_stmt
, TDF_SLIM
);
2364 internal_error ("unsupported defining stmt");
2367 /* Build a tree with vector elements. Create 'vec_inv = {inv,inv,..,inv}' */
2369 if (vect_debug_details (NULL
))
2370 fprintf (dump_file
, "Create vector_inv.");
2372 for (i
= nunits
- 1; i
>= 0; --i
)
2374 t
= tree_cons (NULL_TREE
, def
, t
);
2377 vec_inv
= build_constructor (vectype
, t
);
2378 return vect_init_vector (stmt
, vec_inv
);
2382 /* Function vect_finish_stmt_generation.
2384 Insert a new stmt. */
2387 vect_finish_stmt_generation (tree stmt
, tree vec_stmt
, block_stmt_iterator
*bsi
)
2389 bsi_insert_before (bsi
, vec_stmt
, BSI_SAME_STMT
);
2391 if (vect_debug_details (NULL
))
2393 fprintf (dump_file
, "add new stmt: ");
2394 print_generic_expr (dump_file
, vec_stmt
, TDF_SLIM
);
2397 /* Make sure bsi points to the stmt that is being vectorized. */
2399 /* Assumption: any stmts created for the vectorization of stmt S were
2400 inserted before S. BSI is expected to point to S or some new stmt before S.
2403 while (stmt
!= bsi_stmt (*bsi
) && !bsi_end_p (*bsi
))
2405 gcc_assert (stmt
== bsi_stmt (*bsi
));
2409 /* Function vectorizable_assignment.
2411 Check if STMT performs an assignment (copy) that can be vectorized.
2412 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2413 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2414 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2417 vectorizable_assignment (tree stmt
, block_stmt_iterator
*bsi
, tree
*vec_stmt
)
2423 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2424 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2425 struct loop
*loop
= STMT_VINFO_LOOP (stmt_info
);
2428 /* Is vectorizable assignment? */
2430 if (TREE_CODE (stmt
) != MODIFY_EXPR
)
2433 scalar_dest
= TREE_OPERAND (stmt
, 0);
2434 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
2437 op
= TREE_OPERAND (stmt
, 1);
2438 if (!vect_is_simple_use (op
, loop
, NULL
))
2440 if (vect_debug_details (NULL
))
2441 fprintf (dump_file
, "use not simple.");
2445 if (!vec_stmt
) /* transformation not required. */
2447 STMT_VINFO_TYPE (stmt_info
) = assignment_vec_info_type
;
2452 if (vect_debug_details (NULL
))
2453 fprintf (dump_file
, "transform assignment.");
2456 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
2459 op
= TREE_OPERAND (stmt
, 1);
2460 vec_oprnd
= vect_get_vec_def_for_operand (op
, stmt
);
2462 /* Arguments are ready. create the new vector stmt. */
2463 *vec_stmt
= build2 (MODIFY_EXPR
, vectype
, vec_dest
, vec_oprnd
);
2464 new_temp
= make_ssa_name (vec_dest
, *vec_stmt
);
2465 TREE_OPERAND (*vec_stmt
, 0) = new_temp
;
2466 vect_finish_stmt_generation (stmt
, *vec_stmt
, bsi
);
2472 /* Function vectorizable_operation.
2474 Check if STMT performs a binary or unary operation that can be vectorized.
2475 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2476 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2477 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2480 vectorizable_operation (tree stmt
, block_stmt_iterator
*bsi
, tree
*vec_stmt
)
2485 tree op0
, op1
= NULL
;
2486 tree vec_oprnd0
, vec_oprnd1
=NULL
;
2487 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2488 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2489 struct loop
*loop
= STMT_VINFO_LOOP (stmt_info
);
2491 enum tree_code code
;
2492 enum machine_mode vec_mode
;
2498 /* Is STMT a vectorizable binary/unary operation? */
2499 if (TREE_CODE (stmt
) != MODIFY_EXPR
)
2502 if (TREE_CODE (TREE_OPERAND (stmt
, 0)) != SSA_NAME
)
2505 operation
= TREE_OPERAND (stmt
, 1);
2506 code
= TREE_CODE (operation
);
2507 optab
= optab_for_tree_code (code
, vectype
);
2509 /* Support only unary or binary operations. */
2510 op_type
= TREE_CODE_LENGTH (code
);
2511 if (op_type
!= unary_op
&& op_type
!= binary_op
)
2513 if (vect_debug_details (NULL
))
2514 fprintf (dump_file
, "num. args = %d (not unary/binary op).", op_type
);
2518 for (i
= 0; i
< op_type
; i
++)
2520 op
= TREE_OPERAND (operation
, i
);
2521 if (!vect_is_simple_use (op
, loop
, NULL
))
2523 if (vect_debug_details (NULL
))
2524 fprintf (dump_file
, "use not simple.");
2529 /* Supportable by target? */
2532 if (vect_debug_details (NULL
))
2533 fprintf (dump_file
, "no optab.");
2536 vec_mode
= TYPE_MODE (vectype
);
2537 if (optab
->handlers
[(int) vec_mode
].insn_code
== CODE_FOR_nothing
)
2539 if (vect_debug_details (NULL
))
2540 fprintf (dump_file
, "op not supported by target.");
2544 if (!vec_stmt
) /* transformation not required. */
2546 STMT_VINFO_TYPE (stmt_info
) = op_vec_info_type
;
2552 if (vect_debug_details (NULL
))
2553 fprintf (dump_file
, "transform binary/unary operation.");
2556 scalar_dest
= TREE_OPERAND (stmt
, 0);
2557 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
2560 op0
= TREE_OPERAND (operation
, 0);
2561 vec_oprnd0
= vect_get_vec_def_for_operand (op0
, stmt
);
2563 if (op_type
== binary_op
)
2565 op1
= TREE_OPERAND (operation
, 1);
2566 vec_oprnd1
= vect_get_vec_def_for_operand (op1
, stmt
);
2569 /* Arguments are ready. create the new vector stmt. */
2571 if (op_type
== binary_op
)
2572 *vec_stmt
= build2 (MODIFY_EXPR
, vectype
, vec_dest
,
2573 build2 (code
, vectype
, vec_oprnd0
, vec_oprnd1
));
2575 *vec_stmt
= build2 (MODIFY_EXPR
, vectype
, vec_dest
,
2576 build1 (code
, vectype
, vec_oprnd0
));
2577 new_temp
= make_ssa_name (vec_dest
, *vec_stmt
);
2578 TREE_OPERAND (*vec_stmt
, 0) = new_temp
;
2579 vect_finish_stmt_generation (stmt
, *vec_stmt
, bsi
);
2585 /* Function vectorizable_store.
2587 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
2589 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2590 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2591 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2594 vectorizable_store (tree stmt
, block_stmt_iterator
*bsi
, tree
*vec_stmt
)
2600 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2601 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
2602 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2603 struct loop
*loop
= STMT_VINFO_LOOP (stmt_info
);
2604 enum machine_mode vec_mode
;
2606 enum dr_alignment_support alignment_support_cheme
;
2608 /* Is vectorizable store? */
2610 if (TREE_CODE (stmt
) != MODIFY_EXPR
)
2613 scalar_dest
= TREE_OPERAND (stmt
, 0);
2614 if (TREE_CODE (scalar_dest
) != ARRAY_REF
2615 && TREE_CODE (scalar_dest
) != INDIRECT_REF
)
2618 op
= TREE_OPERAND (stmt
, 1);
2619 if (!vect_is_simple_use (op
, loop
, NULL
))
2621 if (vect_debug_details (NULL
))
2622 fprintf (dump_file
, "use not simple.");
2626 vec_mode
= TYPE_MODE (vectype
);
2627 /* FORNOW. In some cases can vectorize even if data-type not supported
2628 (e.g. - array initialization with 0). */
2629 if (mov_optab
->handlers
[(int)vec_mode
].insn_code
== CODE_FOR_nothing
)
2632 if (!STMT_VINFO_DATA_REF (stmt_info
))
2636 if (!vec_stmt
) /* transformation not required. */
2638 STMT_VINFO_TYPE (stmt_info
) = store_vec_info_type
;
2644 if (vect_debug_details (NULL
))
2645 fprintf (dump_file
, "transform store");
2647 alignment_support_cheme
= vect_supportable_dr_alignment (dr
);
2648 gcc_assert (alignment_support_cheme
);
2649 gcc_assert (alignment_support_cheme
= dr_aligned
); /* FORNOW */
2651 /* Handle use - get the vectorized def from the defining stmt. */
2652 vec_oprnd1
= vect_get_vec_def_for_operand (op
, stmt
);
2655 /* FORNOW: make sure the data reference is aligned. */
2656 vect_align_data_ref (stmt
);
2657 data_ref
= vect_create_data_ref_ptr (stmt
, bsi
, NULL_TREE
, &dummy
, false);
2658 data_ref
= build_fold_indirect_ref (data_ref
);
2660 /* Arguments are ready. create the new vector stmt. */
2661 *vec_stmt
= build2 (MODIFY_EXPR
, vectype
, data_ref
, vec_oprnd1
);
2662 vect_finish_stmt_generation (stmt
, *vec_stmt
, bsi
);
2668 /* vectorizable_load.
2670 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
2672 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2673 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2674 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2677 vectorizable_load (tree stmt
, block_stmt_iterator
*bsi
, tree
*vec_stmt
)
2680 tree vec_dest
= NULL
;
2681 tree data_ref
= NULL
;
2683 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2684 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
2685 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2692 struct loop
*loop
= STMT_VINFO_LOOP (stmt_info
);
2693 edge pe
= loop_preheader_edge (loop
);
2694 enum dr_alignment_support alignment_support_cheme
;
2696 /* Is vectorizable load? */
2698 if (TREE_CODE (stmt
) != MODIFY_EXPR
)
2701 scalar_dest
= TREE_OPERAND (stmt
, 0);
2702 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
2705 op
= TREE_OPERAND (stmt
, 1);
2706 if (TREE_CODE (op
) != ARRAY_REF
&& TREE_CODE (op
) != INDIRECT_REF
)
2709 if (!STMT_VINFO_DATA_REF (stmt_info
))
2712 mode
= (int) TYPE_MODE (vectype
);
2714 /* FORNOW. In some cases can vectorize even if data-type not supported
2715 (e.g. - data copies). */
2716 if (mov_optab
->handlers
[mode
].insn_code
== CODE_FOR_nothing
)
2718 if (vect_debug_details (loop
))
2719 fprintf (dump_file
, "Aligned load, but unsupported type.");
2723 if (!vec_stmt
) /* transformation not required. */
2725 STMT_VINFO_TYPE (stmt_info
) = load_vec_info_type
;
2731 if (vect_debug_details (NULL
))
2732 fprintf (dump_file
, "transform load.");
2734 alignment_support_cheme
= vect_supportable_dr_alignment (dr
);
2735 gcc_assert (alignment_support_cheme
);
2737 if (alignment_support_cheme
== dr_aligned
2738 || alignment_support_cheme
== dr_unaligned_supported
)
2749 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
2750 data_ref
= vect_create_data_ref_ptr (stmt
, bsi
, NULL_TREE
, &dummy
, false);
2751 if (aligned_access_p (dr
))
2752 data_ref
= build_fold_indirect_ref (data_ref
);
2755 int mis
= DR_MISALIGNMENT (dr
);
2756 tree tmis
= (mis
== -1 ?
2758 build_int_cst (integer_type_node
, mis
));
2759 tmis
= int_const_binop (MULT_EXPR
, tmis
,
2760 build_int_cst (integer_type_node
, BITS_PER_UNIT
), 1);
2761 data_ref
= build2 (MISALIGNED_INDIRECT_REF
, vectype
, data_ref
, tmis
);
2763 new_stmt
= build2 (MODIFY_EXPR
, vectype
, vec_dest
, data_ref
);
2764 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2765 TREE_OPERAND (new_stmt
, 0) = new_temp
;
2766 vect_finish_stmt_generation (stmt
, new_stmt
, bsi
);
2768 else if (alignment_support_cheme
== dr_unaligned_software_pipeline
)
2772 msq_init = *(floor(p1))
2773 p2 = initial_addr + VS - 1;
2774 magic = have_builtin ? builtin_result : initial_address;
2777 p2' = p2 + indx * vectype_size
2779 vec_dest = realign_load (msq, lsq, magic)
2793 /* <1> Create msq_init = *(floor(p1)) in the loop preheader */
2794 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
2795 data_ref
= vect_create_data_ref_ptr (stmt
, bsi
, NULL_TREE
,
2797 data_ref
= build1 (ALIGN_INDIRECT_REF
, vectype
, data_ref
);
2798 new_stmt
= build2 (MODIFY_EXPR
, vectype
, vec_dest
, data_ref
);
2799 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2800 TREE_OPERAND (new_stmt
, 0) = new_temp
;
2801 new_bb
= bsi_insert_on_edge_immediate (pe
, new_stmt
);
2802 gcc_assert (!new_bb
);
2803 msq_init
= TREE_OPERAND (new_stmt
, 0);
2806 /* <2> Create lsq = *(floor(p2')) in the loop */
2807 offset
= build_int_cst (integer_type_node
,
2808 GET_MODE_NUNITS (TYPE_MODE (vectype
)));
2809 offset
= int_const_binop (MINUS_EXPR
, offset
, integer_one_node
, 1);
2810 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
2811 dataref_ptr
= vect_create_data_ref_ptr (stmt
, bsi
, offset
, &dummy
, false);
2812 data_ref
= build1 (ALIGN_INDIRECT_REF
, vectype
, dataref_ptr
);
2813 new_stmt
= build2 (MODIFY_EXPR
, vectype
, vec_dest
, data_ref
);
2814 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2815 TREE_OPERAND (new_stmt
, 0) = new_temp
;
2816 vect_finish_stmt_generation (stmt
, new_stmt
, bsi
);
2817 lsq
= TREE_OPERAND (new_stmt
, 0);
2821 if (targetm
.vectorize
.builtin_mask_for_load
)
2823 /* Create permutation mask, if required, in loop preheader. */
2825 params
= build_tree_list (NULL_TREE
, init_addr
);
2826 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
2827 builtin_decl
= targetm
.vectorize
.builtin_mask_for_load ();
2828 new_stmt
= build_function_call_expr (builtin_decl
, params
);
2829 new_stmt
= build2 (MODIFY_EXPR
, vectype
, vec_dest
, new_stmt
);
2830 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2831 TREE_OPERAND (new_stmt
, 0) = new_temp
;
2832 new_bb
= bsi_insert_on_edge_immediate (pe
, new_stmt
);
2833 gcc_assert (!new_bb
);
2834 magic
= TREE_OPERAND (new_stmt
, 0);
2836 /* Since we have just created a CALL_EXPR, we may need to
2837 rename call-clobbered variables. */
2838 mark_call_clobbered_vars_to_rename ();
2842 /* Use current address instead of init_addr for reduced reg pressure.
2844 magic
= dataref_ptr
;
2848 /* <4> Create msq = phi <msq_init, lsq> in loop */
2849 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
2850 msq
= make_ssa_name (vec_dest
, NULL_TREE
);
2851 phi_stmt
= create_phi_node (msq
, loop
->header
); /* CHECKME */
2852 SSA_NAME_DEF_STMT (msq
) = phi_stmt
;
2853 add_phi_arg (phi_stmt
, msq_init
, loop_preheader_edge (loop
));
2854 add_phi_arg (phi_stmt
, lsq
, loop_latch_edge (loop
));
2857 /* <5> Create <vec_dest = realign_load (msq, lsq, magic)> in loop */
2858 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
2859 new_stmt
= build3 (REALIGN_LOAD_EXPR
, vectype
, msq
, lsq
, magic
);
2860 new_stmt
= build2 (MODIFY_EXPR
, vectype
, vec_dest
, new_stmt
);
2861 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2862 TREE_OPERAND (new_stmt
, 0) = new_temp
;
2863 vect_finish_stmt_generation (stmt
, new_stmt
, bsi
);
2868 *vec_stmt
= new_stmt
;
2873 /* Function vect_supportable_dr_alignment
2875 Return whether the data reference DR is supported with respect to its
2878 static enum dr_alignment_support
2879 vect_supportable_dr_alignment (struct data_reference
*dr
)
2881 tree vectype
= STMT_VINFO_VECTYPE (vinfo_for_stmt (DR_STMT (dr
)));
2882 enum machine_mode mode
= (int) TYPE_MODE (vectype
);
2884 if (aligned_access_p (dr
))
2887 /* Possibly unaligned access. */
2889 if (DR_IS_READ (dr
))
2891 if (vec_realign_load_optab
->handlers
[mode
].insn_code
!= CODE_FOR_nothing
2892 && (!targetm
.vectorize
.builtin_mask_for_load
2893 || targetm
.vectorize
.builtin_mask_for_load ()))
2894 return dr_unaligned_software_pipeline
;
2896 if (movmisalign_optab
->handlers
[mode
].insn_code
!= CODE_FOR_nothing
)
2897 /* Can't software pipeline the loads, but can at least do them. */
2898 return dr_unaligned_supported
;
2902 return dr_unaligned_unsupported
;
2906 /* Function vect_transform_stmt.
2908 Create a vectorized stmt to replace STMT, and insert it at BSI. */
2911 vect_transform_stmt (tree stmt
, block_stmt_iterator
*bsi
)
2913 bool is_store
= false;
2914 tree vec_stmt
= NULL_TREE
;
2915 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2918 switch (STMT_VINFO_TYPE (stmt_info
))
2920 case op_vec_info_type
:
2921 done
= vectorizable_operation (stmt
, bsi
, &vec_stmt
);
2925 case assignment_vec_info_type
:
2926 done
= vectorizable_assignment (stmt
, bsi
, &vec_stmt
);
2930 case load_vec_info_type
:
2931 done
= vectorizable_load (stmt
, bsi
, &vec_stmt
);
2935 case store_vec_info_type
:
2936 done
= vectorizable_store (stmt
, bsi
, &vec_stmt
);
2941 if (vect_debug_details (NULL
))
2942 fprintf (dump_file
, "stmt not supported.");
2946 STMT_VINFO_VEC_STMT (stmt_info
) = vec_stmt
;
2952 /* This function builds ni_name = number of iterations loop executes
2953 on the loop preheader. */
2956 vect_build_loop_niters (loop_vec_info loop_vinfo
)
2958 tree ni_name
, stmt
, var
;
2960 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
2961 tree ni
= unshare_expr (LOOP_VINFO_NITERS (loop_vinfo
));
2963 var
= create_tmp_var (TREE_TYPE (ni
), "niters");
2964 add_referenced_tmp_var (var
);
2965 ni_name
= force_gimple_operand (ni
, &stmt
, false, var
);
2967 pe
= loop_preheader_edge (loop
);
2970 basic_block new_bb
= bsi_insert_on_edge_immediate (pe
, stmt
);
2971 gcc_assert (!new_bb
);
2978 /* This function generates the following statements:
2980 ni_name = number of iterations loop executes
2981 ratio = ni_name / vf
2982 ratio_mult_vf_name = ratio * vf
2984 and places them at the loop preheader edge. */
2987 vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo
,
2989 tree
*ratio_mult_vf_name_ptr
,
2990 tree
*ratio_name_ptr
)
2998 tree ratio_mult_vf_name
;
2999 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
3000 tree ni
= LOOP_VINFO_NITERS (loop_vinfo
);
3001 int vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
3002 tree log_vf
= build_int_cst (unsigned_type_node
, exact_log2 (vf
));
3004 pe
= loop_preheader_edge (loop
);
3006 /* Generate temporary variable that contains
3007 number of iterations loop executes. */
3009 ni_name
= vect_build_loop_niters (loop_vinfo
);
3011 /* Create: ratio = ni >> log2(vf) */
3013 var
= create_tmp_var (TREE_TYPE (ni
), "bnd");
3014 add_referenced_tmp_var (var
);
3015 ratio_name
= make_ssa_name (var
, NULL_TREE
);
3016 stmt
= build2 (MODIFY_EXPR
, void_type_node
, ratio_name
,
3017 build2 (RSHIFT_EXPR
, TREE_TYPE (ni_name
), ni_name
, log_vf
));
3018 SSA_NAME_DEF_STMT (ratio_name
) = stmt
;
3020 pe
= loop_preheader_edge (loop
);
3021 new_bb
= bsi_insert_on_edge_immediate (pe
, stmt
);
3022 gcc_assert (!new_bb
);
3024 /* Create: ratio_mult_vf = ratio << log2 (vf). */
3026 var
= create_tmp_var (TREE_TYPE (ni
), "ratio_mult_vf");
3027 add_referenced_tmp_var (var
);
3028 ratio_mult_vf_name
= make_ssa_name (var
, NULL_TREE
);
3029 stmt
= build2 (MODIFY_EXPR
, void_type_node
, ratio_mult_vf_name
,
3030 build2 (LSHIFT_EXPR
, TREE_TYPE (ratio_name
), ratio_name
, log_vf
));
3031 SSA_NAME_DEF_STMT (ratio_mult_vf_name
) = stmt
;
3033 pe
= loop_preheader_edge (loop
);
3034 new_bb
= bsi_insert_on_edge_immediate (pe
, stmt
);
3035 gcc_assert (!new_bb
);
3037 *ni_name_ptr
= ni_name
;
3038 *ratio_mult_vf_name_ptr
= ratio_mult_vf_name
;
3039 *ratio_name_ptr
= ratio_name
;
3045 /* Function vect_update_ivs_after_vectorizer.
3047 "Advance" the induction variables of LOOP to the value they should take
3048 after the execution of LOOP. This is currently necessary because the
3049 vectorizer does not handle induction variables that are used after the
3050 loop. Such a situation occurs when the last iterations of LOOP are
3052 1. We introduced new uses after LOOP for IVs that were not originally used
3053 after LOOP: the IVs of LOOP are now used by an epilog loop.
3054 2. LOOP is going to be vectorized; this means that it will iterate N/VF
3055 times, whereas the loop IVs should be bumped N times.
3058 - LOOP - a loop that is going to be vectorized. The last few iterations
3059 of LOOP were peeled.
3060 - NITERS - the number of iterations that LOOP executes (before it is
3061 vectorized). i.e, the number of times the ivs should be bumped.
3062 - UPDATE_E - a successor edge of LOOP->exit that is on the (only) path
3063 coming out from LOOP on which there are uses of the LOOP ivs
3064 (this is the path from LOOP->exit to epilog_loop->preheader).
3066 The new definitions of the ivs are placed in LOOP->exit.
3067 The phi args associated with the edge UPDATE_E in the bb
3068 UPDATE_E->dest are updated accordingly.
3070 Assumption 1: Like the rest of the vectorizer, this function assumes
3071 a single loop exit that has a single predecessor.
3073 Assumption 2: The phi nodes in the LOOP header and in update_bb are
3074 organized in the same order.
3076 Assumption 3: The access function of the ivs is simple enough (see
3077 vect_can_advance_ivs_p). This assumption will be relaxed in the future.
3079 Assumption 4: Exactly one of the successors of LOOP exit-bb is on a path
3080 coming out of LOOP on which the ivs of LOOP are used (this is the path
3081 that leads to the epilog loop; other paths skip the epilog loop). This
3082 path starts with the edge UPDATE_E, and its destination (denoted update_bb)
3083 needs to have its phis updated.
3087 vect_update_ivs_after_vectorizer (struct loop
*loop
, tree niters
, edge update_e
)
3089 basic_block exit_bb
= loop
->exit_edges
[0]->dest
;
3091 basic_block update_bb
= update_e
->dest
;
3093 /* gcc_assert (vect_can_advance_ivs_p (loop)); */
3095 /* Make sure there exists a single-predecessor exit bb: */
3096 gcc_assert (EDGE_COUNT (exit_bb
->preds
) == 1);
3098 for (phi
= phi_nodes (loop
->header
), phi1
= phi_nodes (update_bb
);
3100 phi
= PHI_CHAIN (phi
), phi1
= PHI_CHAIN (phi1
))
3102 tree access_fn
= NULL
;
3103 tree evolution_part
;
3106 tree var
, stmt
, ni
, ni_name
;
3107 block_stmt_iterator last_bsi
;
3109 /* Skip virtual phi's. */
3110 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi
))))
3112 if (vect_debug_details (NULL
))
3113 fprintf (dump_file
, "virtual phi. skip.");
3117 access_fn
= analyze_scalar_evolution (loop
, PHI_RESULT (phi
));
3118 gcc_assert (access_fn
);
3120 unshare_expr (evolution_part_in_loop_num (access_fn
, loop
->num
));
3121 gcc_assert (evolution_part
!= NULL_TREE
);
3123 /* FORNOW: We do not support IVs whose evolution function is a polynomial
3124 of degree >= 2 or exponential. */
3125 gcc_assert (!tree_is_chrec (evolution_part
));
3127 step_expr
= evolution_part
;
3128 init_expr
= unshare_expr (initial_condition (access_fn
));
3130 ni
= build2 (PLUS_EXPR
, TREE_TYPE (init_expr
),
3131 build2 (MULT_EXPR
, TREE_TYPE (niters
),
3132 niters
, step_expr
), init_expr
);
3134 var
= create_tmp_var (TREE_TYPE (init_expr
), "tmp");
3135 add_referenced_tmp_var (var
);
3137 ni_name
= force_gimple_operand (ni
, &stmt
, false, var
);
3139 /* Insert stmt into exit_bb. */
3140 last_bsi
= bsi_last (exit_bb
);
3142 bsi_insert_before (&last_bsi
, stmt
, BSI_SAME_STMT
);
3144 /* Fix phi expressions in the successor bb. */
3145 gcc_assert (PHI_ARG_DEF_FROM_EDGE (phi1
, update_e
) ==
3146 PHI_ARG_DEF_FROM_EDGE (phi
, EDGE_SUCC (loop
->latch
, 0)));
3147 SET_PHI_ARG_DEF (phi1
, phi_arg_from_edge (phi1
, update_e
), ni_name
);
3152 /* Function vect_do_peeling_for_loop_bound
3154 Peel the last iterations of the loop represented by LOOP_VINFO.
3155 The peeled iterations form a new epilog loop. Given that the loop now
3156 iterates NITERS times, the new epilog loop iterates
3157 NITERS % VECTORIZATION_FACTOR times.
3159 The original loop will later be made to iterate
3160 NITERS / VECTORIZATION_FACTOR times (this value is placed into RATIO). */
3163 vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo
, tree
*ratio
,
3164 struct loops
*loops
)
3167 tree ni_name
, ratio_mult_vf_name
;
3168 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
3169 struct loop
*new_loop
;
3171 #ifdef ENABLE_CHECKING
3175 if (vect_debug_details (NULL
))
3176 fprintf (dump_file
, "\n<<vect_transtorm_for_unknown_loop_bound>>\n");
3178 /* Generate the following variables on the preheader of original loop:
3180 ni_name = number of iteration the original loop executes
3181 ratio = ni_name / vf
3182 ratio_mult_vf_name = ratio * vf */
3183 vect_generate_tmps_on_preheader (loop_vinfo
, &ni_name
,
3184 &ratio_mult_vf_name
, ratio
);
3186 /* Update loop info. */
3187 loop
->pre_header
= loop_preheader_edge (loop
)->src
;
3188 loop
->pre_header_edges
[0] = loop_preheader_edge (loop
);
3190 #ifdef ENABLE_CHECKING
3191 loop_num
= loop
->num
;
3193 new_loop
= slpeel_tree_peel_loop_to_edge (loop
, loops
, loop
->exit_edges
[0],
3194 ratio_mult_vf_name
, ni_name
, false);
3195 #ifdef ENABLE_CHECKING
3196 gcc_assert (new_loop
);
3197 gcc_assert (loop_num
== loop
->num
);
3198 slpeel_verify_cfg_after_peeling (loop
, new_loop
);
3201 /* A guard that controls whether the new_loop is to be executed or skipped
3202 is placed in LOOP->exit. LOOP->exit therefore has two successors - one
3203 is the preheader of NEW_LOOP, where the IVs from LOOP are used. The other
3204 is a bb after NEW_LOOP, where these IVs are not used. Find the edge that
3205 is on the path where the LOOP IVs are used and need to be updated. */
3207 if (EDGE_PRED (new_loop
->pre_header
, 0)->src
== loop
->exit_edges
[0]->dest
)
3208 update_e
= EDGE_PRED (new_loop
->pre_header
, 0);
3210 update_e
= EDGE_PRED (new_loop
->pre_header
, 1);
3212 /* Update IVs of original loop as if they were advanced
3213 by ratio_mult_vf_name steps. */
3214 vect_update_ivs_after_vectorizer (loop
, ratio_mult_vf_name
, update_e
);
3216 /* After peeling we have to reset scalar evolution analyzer. */
3223 /* Function vect_gen_niters_for_prolog_loop
3225 Set the number of iterations for the loop represented by LOOP_VINFO
3226 to the minimum between LOOP_NITERS (the original iteration count of the loop)
3227 and the misalignment of DR - the first data reference recorded in
3228 LOOP_VINFO_UNALIGNED_DR (LOOP_VINFO). As a result, after the execution of
3229 this loop, the data reference DR will refer to an aligned location.
3231 The following computation is generated:
3233 compute address misalignment in bytes:
3234 addr_mis = addr & (vectype_size - 1)
3236 prolog_niters = min ( LOOP_NITERS , (VF - addr_mis/elem_size)&(VF-1) )
3238 (elem_size = element type size; an element is the scalar element
3239 whose type is the inner type of the vectype) */
3242 vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo
, tree loop_niters
)
3244 struct data_reference
*dr
= LOOP_VINFO_UNALIGNED_DR (loop_vinfo
);
3245 int vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
3246 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
3248 tree iters
, iters_name
;
3251 tree dr_stmt
= DR_STMT (dr
);
3252 stmt_vec_info stmt_info
= vinfo_for_stmt (dr_stmt
);
3253 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3254 int vectype_align
= TYPE_ALIGN (vectype
) / BITS_PER_UNIT
;
3257 tree new_stmts
= NULL_TREE
;
3259 vect_create_addr_base_for_vector_ref (dr_stmt
, &new_stmts
, NULL_TREE
);
3260 tree ptr_type
= TREE_TYPE (start_addr
);
3261 tree size
= TYPE_SIZE (ptr_type
);
3262 tree type
= lang_hooks
.types
.type_for_size (tree_low_cst (size
, 1), 1);
3263 tree vectype_size_minus_1
= build_int_cst (type
, vectype_align
- 1);
3264 tree vf_minus_1
= build_int_cst (unsigned_type_node
, vf
- 1);
3265 tree niters_type
= TREE_TYPE (loop_niters
);
3266 tree elem_size_log
=
3267 build_int_cst (unsigned_type_node
, exact_log2 (vectype_align
/vf
));
3268 tree vf_tree
= build_int_cst (unsigned_type_node
, vf
);
3270 pe
= loop_preheader_edge (loop
);
3271 new_bb
= bsi_insert_on_edge_immediate (pe
, new_stmts
);
3272 gcc_assert (!new_bb
);
3274 /* Create: byte_misalign = addr & (vectype_size - 1) */
3275 byte_misalign
= build2 (BIT_AND_EXPR
, type
, start_addr
, vectype_size_minus_1
);
3277 /* Create: elem_misalign = byte_misalign / element_size */
3279 build2 (RSHIFT_EXPR
, unsigned_type_node
, byte_misalign
, elem_size_log
);
3281 /* Create: (niters_type) (VF - elem_misalign)&(VF - 1) */
3282 iters
= build2 (MINUS_EXPR
, unsigned_type_node
, vf_tree
, elem_misalign
);
3283 iters
= build2 (BIT_AND_EXPR
, unsigned_type_node
, iters
, vf_minus_1
);
3284 iters
= fold_convert (niters_type
, iters
);
3286 /* Create: prolog_loop_niters = min (iters, loop_niters) */
3287 /* If the loop bound is known at compile time we already verified that it is
3288 greater than vf; since the misalignment ('iters') is at most vf, there's
3289 no need to generate the MIN_EXPR in this case. */
3290 if (!host_integerp (loop_niters
, 0))
3291 iters
= build2 (MIN_EXPR
, niters_type
, iters
, loop_niters
);
3293 var
= create_tmp_var (niters_type
, "prolog_loop_niters");
3294 add_referenced_tmp_var (var
);
3295 iters_name
= force_gimple_operand (iters
, &stmt
, false, var
);
3297 /* Insert stmt on loop preheader edge. */
3298 pe
= loop_preheader_edge (loop
);
3301 basic_block new_bb
= bsi_insert_on_edge_immediate (pe
, stmt
);
3302 gcc_assert (!new_bb
);
3309 /* Function vect_update_inits_of_dr
3311 NITERS iterations were peeled from LOOP. DR represents a data reference
3312 in LOOP. This function updates the information recorded in DR to
3313 account for the fact that the first NITERS iterations had already been
3314 executed. Specifically, it updates the initial_condition of the
3315 access_function of DR. */
3318 vect_update_inits_of_dr (struct data_reference
*dr
, struct loop
*loop
,
3321 tree access_fn
= DR_ACCESS_FN (dr
, 0);
3322 tree init
, init_new
, step
;
3324 step
= evolution_part_in_loop_num (access_fn
, loop
->num
);
3325 init
= initial_condition (access_fn
);
3327 init_new
= build2 (PLUS_EXPR
, TREE_TYPE (init
),
3328 build2 (MULT_EXPR
, TREE_TYPE (niters
),
3329 niters
, step
), init
);
3330 DR_ACCESS_FN (dr
, 0) = chrec_replace_initial_condition (access_fn
, init_new
);
3336 /* Function vect_update_inits_of_drs
3338 NITERS iterations were peeled from the loop represented by LOOP_VINFO.
3339 This function updates the information recorded for the data references in
3340 the loop to account for the fact that the first NITERS iterations had
3341 already been executed. Specifically, it updates the initial_condition of the
3342 access_function of all the data_references in the loop. */
3345 vect_update_inits_of_drs (loop_vec_info loop_vinfo
, tree niters
)
3348 varray_type loop_write_datarefs
= LOOP_VINFO_DATAREF_WRITES (loop_vinfo
);
3349 varray_type loop_read_datarefs
= LOOP_VINFO_DATAREF_READS (loop_vinfo
);
3350 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
3352 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3353 fprintf (dump_file
, "\n<<vect_update_inits_of_dr>>\n");
3355 for (i
= 0; i
< VARRAY_ACTIVE_SIZE (loop_write_datarefs
); i
++)
3357 struct data_reference
*dr
= VARRAY_GENERIC_PTR (loop_write_datarefs
, i
);
3358 vect_update_inits_of_dr (dr
, loop
, niters
);
3361 for (i
= 0; i
< VARRAY_ACTIVE_SIZE (loop_read_datarefs
); i
++)
3363 struct data_reference
*dr
= VARRAY_GENERIC_PTR (loop_read_datarefs
, i
);
3364 vect_update_inits_of_dr (dr
, loop
, niters
);
3369 /* Function vect_do_peeling_for_alignment
3371 Peel the first 'niters' iterations of the loop represented by LOOP_VINFO.
3372 'niters' is set to the misalignment of one of the data references in the
3373 loop, thereby forcing it to refer to an aligned location at the beginning
3374 of the execution of this loop. The data reference for which we are
3375 peeling is recorded in LOOP_VINFO_UNALIGNED_DR. */
3378 vect_do_peeling_for_alignment (loop_vec_info loop_vinfo
, struct loops
*loops
)
3380 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
3381 tree niters_of_prolog_loop
, ni_name
;
3383 struct loop
*new_loop
;
3385 if (vect_debug_details (NULL
))
3386 fprintf (dump_file
, "\n<<vect_do_peeling_for_alignment>>\n");
3388 ni_name
= vect_build_loop_niters (loop_vinfo
);
3389 niters_of_prolog_loop
= vect_gen_niters_for_prolog_loop (loop_vinfo
, ni_name
);
3391 /* Peel the prolog loop and iterate it niters_of_prolog_loop. */
3393 slpeel_tree_peel_loop_to_edge (loop
, loops
, loop_preheader_edge (loop
),
3394 niters_of_prolog_loop
, ni_name
, true);
3395 #ifdef ENABLE_CHECKING
3396 gcc_assert (new_loop
);
3397 slpeel_verify_cfg_after_peeling (new_loop
, loop
);
3400 /* Update number of times loop executes. */
3401 n_iters
= LOOP_VINFO_NITERS (loop_vinfo
);
3402 LOOP_VINFO_NITERS (loop_vinfo
) =
3403 build2 (MINUS_EXPR
, TREE_TYPE (n_iters
), n_iters
, niters_of_prolog_loop
);
3405 /* Update the init conditions of the access functions of all data refs. */
3406 vect_update_inits_of_drs (loop_vinfo
, niters_of_prolog_loop
);
3408 /* After peeling we have to reset scalar evolution analyzer. */
3415 /* Function vect_transform_loop.
3417 The analysis phase has determined that the loop is vectorizable.
3418 Vectorize the loop - created vectorized stmts to replace the scalar
3419 stmts in the loop, and update the loop exit condition. */
3422 vect_transform_loop (loop_vec_info loop_vinfo
,
3423 struct loops
*loops ATTRIBUTE_UNUSED
)
3425 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
3426 basic_block
*bbs
= LOOP_VINFO_BBS (loop_vinfo
);
3427 int nbbs
= loop
->num_nodes
;
3428 block_stmt_iterator si
;
3431 int vectorization_factor
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
3433 if (vect_debug_details (NULL
))
3434 fprintf (dump_file
, "\n<<vec_transform_loop>>\n");
3437 /* Peel the loop if there are data refs with unknown alignment.
3438 Only one data ref with unknown store is allowed. */
3440 if (LOOP_DO_PEELING_FOR_ALIGNMENT (loop_vinfo
))
3441 vect_do_peeling_for_alignment (loop_vinfo
, loops
);
3443 /* If the loop has a symbolic number of iterations 'n' (i.e. it's not a
3444 compile time constant), or it is a constant that doesn't divide by the
3445 vectorization factor, then an epilog loop needs to be created.
3446 We therefore duplicate the loop: the original loop will be vectorized,
3447 and will compute the first (n/VF) iterations. The second copy of the loop
3448 will remain scalar and will compute the remaining (n%VF) iterations.
3449 (VF is the vectorization factor). */
3451 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo
)
3452 || (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo
)
3453 && LOOP_VINFO_INT_NITERS (loop_vinfo
) % vectorization_factor
!= 0))
3454 vect_do_peeling_for_loop_bound (loop_vinfo
, &ratio
, loops
);
3456 ratio
= build_int_cst (TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo
)),
3457 LOOP_VINFO_INT_NITERS (loop_vinfo
) / vectorization_factor
);
3459 /* 1) Make sure the loop header has exactly two entries
3460 2) Make sure we have a preheader basic block. */
3462 gcc_assert (EDGE_COUNT (loop
->header
->preds
) == 2);
3464 loop_split_edge_with (loop_preheader_edge (loop
), NULL
);
3467 /* FORNOW: the vectorizer supports only loops which body consist
3468 of one basic block (header + empty latch). When the vectorizer will
3469 support more involved loop forms, the order by which the BBs are
3470 traversed need to be reconsidered. */
3472 for (i
= 0; i
< nbbs
; i
++)
3474 basic_block bb
= bbs
[i
];
3476 for (si
= bsi_start (bb
); !bsi_end_p (si
);)
3478 tree stmt
= bsi_stmt (si
);
3479 stmt_vec_info stmt_info
;
3482 if (vect_debug_details (NULL
))
3484 fprintf (dump_file
, "------>vectorizing statement: ");
3485 print_generic_expr (dump_file
, stmt
, TDF_SLIM
);
3487 stmt_info
= vinfo_for_stmt (stmt
);
3488 gcc_assert (stmt_info
);
3489 if (!STMT_VINFO_RELEVANT_P (stmt_info
))
3494 #ifdef ENABLE_CHECKING
3495 /* FORNOW: Verify that all stmts operate on the same number of
3496 units and no inner unrolling is necessary. */
3498 (GET_MODE_NUNITS (TYPE_MODE (STMT_VINFO_VECTYPE (stmt_info
)))
3499 == vectorization_factor
);
3501 /* -------- vectorize statement ------------ */
3502 if (vect_debug_details (NULL
))
3503 fprintf (dump_file
, "transform statement.");
3505 is_store
= vect_transform_stmt (stmt
, &si
);
3508 /* free the attached stmt_vec_info and remove the stmt. */
3509 stmt_ann_t ann
= stmt_ann (stmt
);
3511 set_stmt_info (ann
, NULL
);
3520 slpeel_make_loop_iterate_ntimes (loop
, ratio
);
3522 if (vect_debug_details (loop
))
3523 fprintf (dump_file
,"Success! loop vectorized.");
3524 if (vect_debug_stats (loop
))
3525 fprintf (dump_file
, "LOOP VECTORIZED.");
3529 /* Function vect_is_simple_use.
3532 LOOP - the loop that is being vectorized.
3533 OPERAND - operand of a stmt in LOOP.
3534 DEF - the defining stmt in case OPERAND is an SSA_NAME.
3536 Returns whether a stmt with OPERAND can be vectorized.
3537 Supportable operands are constants, loop invariants, and operands that are
3538 defined by the current iteration of the loop. Unsupportable operands are
3539 those that are defined by a previous iteration of the loop (as is the case
3540 in reduction/induction computations). */
3543 vect_is_simple_use (tree operand
, struct loop
*loop
, tree
*def
)
3551 if (TREE_CODE (operand
) == INTEGER_CST
|| TREE_CODE (operand
) == REAL_CST
)
3554 if (TREE_CODE (operand
) != SSA_NAME
)
3557 def_stmt
= SSA_NAME_DEF_STMT (operand
);
3558 if (def_stmt
== NULL_TREE
)
3560 if (vect_debug_details (NULL
))
3561 fprintf (dump_file
, "no def_stmt.");
3565 /* empty stmt is expected only in case of a function argument.
3566 (Otherwise - we expect a phi_node or a modify_expr). */
3567 if (IS_EMPTY_STMT (def_stmt
))
3569 tree arg
= TREE_OPERAND (def_stmt
, 0);
3570 if (TREE_CODE (arg
) == INTEGER_CST
|| TREE_CODE (arg
) == REAL_CST
)
3572 if (vect_debug_details (NULL
))
3574 fprintf (dump_file
, "Unexpected empty stmt: ");
3575 print_generic_expr (dump_file
, def_stmt
, TDF_SLIM
);
3580 /* phi_node inside the loop indicates an induction/reduction pattern.
3581 This is not supported yet. */
3582 bb
= bb_for_stmt (def_stmt
);
3583 if (TREE_CODE (def_stmt
) == PHI_NODE
&& flow_bb_inside_loop_p (loop
, bb
))
3585 if (vect_debug_details (NULL
))
3586 fprintf (dump_file
, "reduction/induction - unsupported.");
3587 return false; /* FORNOW: not supported yet. */
3590 /* Expecting a modify_expr or a phi_node. */
3591 if (TREE_CODE (def_stmt
) == MODIFY_EXPR
3592 || TREE_CODE (def_stmt
) == PHI_NODE
)
3603 /* Function vect_analyze_operations.
3605 Scan the loop stmts and make sure they are all vectorizable. */
3608 vect_analyze_operations (loop_vec_info loop_vinfo
)
3610 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
3611 basic_block
*bbs
= LOOP_VINFO_BBS (loop_vinfo
);
3612 int nbbs
= loop
->num_nodes
;
3613 block_stmt_iterator si
;
3614 unsigned int vectorization_factor
= 0;
3619 if (vect_debug_details (NULL
))
3620 fprintf (dump_file
, "\n<<vect_analyze_operations>>\n");
3622 for (i
= 0; i
< nbbs
; i
++)
3624 basic_block bb
= bbs
[i
];
3626 for (si
= bsi_start (bb
); !bsi_end_p (si
); bsi_next (&si
))
3628 tree stmt
= bsi_stmt (si
);
3629 unsigned int nunits
;
3630 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
3633 if (vect_debug_details (NULL
))
3635 fprintf (dump_file
, "==> examining statement: ");
3636 print_generic_expr (dump_file
, stmt
, TDF_SLIM
);
3639 gcc_assert (stmt_info
);
3641 /* skip stmts which do not need to be vectorized.
3642 this is expected to include:
3643 - the COND_EXPR which is the loop exit condition
3644 - any LABEL_EXPRs in the loop
3645 - computations that are used only for array indexing or loop
3648 if (!STMT_VINFO_RELEVANT_P (stmt_info
))
3650 if (vect_debug_details (NULL
))
3651 fprintf (dump_file
, "irrelevant.");
3655 if (VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt
))))
3657 if (vect_debug_stats (loop
) || vect_debug_details (loop
))
3659 fprintf (dump_file
, "not vectorized: vector stmt in loop:");
3660 print_generic_expr (dump_file
, stmt
, TDF_SLIM
);
3665 if (STMT_VINFO_DATA_REF (stmt_info
))
3666 scalar_type
= TREE_TYPE (DR_REF (STMT_VINFO_DATA_REF (stmt_info
)));
3667 else if (TREE_CODE (stmt
) == MODIFY_EXPR
)
3668 scalar_type
= TREE_TYPE (TREE_OPERAND (stmt
, 0));
3670 scalar_type
= TREE_TYPE (stmt
);
3672 if (vect_debug_details (NULL
))
3674 fprintf (dump_file
, "get vectype for scalar type: ");
3675 print_generic_expr (dump_file
, scalar_type
, TDF_SLIM
);
3678 vectype
= get_vectype_for_scalar_type (scalar_type
);
3681 if (vect_debug_stats (loop
) || vect_debug_details (loop
))
3683 fprintf (dump_file
, "not vectorized: unsupported data-type ");
3684 print_generic_expr (dump_file
, scalar_type
, TDF_SLIM
);
3689 if (vect_debug_details (NULL
))
3691 fprintf (dump_file
, "vectype: ");
3692 print_generic_expr (dump_file
, vectype
, TDF_SLIM
);
3694 STMT_VINFO_VECTYPE (stmt_info
) = vectype
;
3696 ok
= (vectorizable_operation (stmt
, NULL
, NULL
)
3697 || vectorizable_assignment (stmt
, NULL
, NULL
)
3698 || vectorizable_load (stmt
, NULL
, NULL
)
3699 || vectorizable_store (stmt
, NULL
, NULL
));
3703 if (vect_debug_stats (loop
) || vect_debug_details (loop
))
3705 fprintf (dump_file
, "not vectorized: stmt not supported: ");
3706 print_generic_expr (dump_file
, stmt
, TDF_SLIM
);
3711 nunits
= GET_MODE_NUNITS (TYPE_MODE (vectype
));
3712 if (vect_debug_details (NULL
))
3713 fprintf (dump_file
, "nunits = %d", nunits
);
3715 if (vectorization_factor
)
3717 /* FORNOW: don't allow mixed units.
3718 This restriction will be relaxed in the future. */
3719 if (nunits
!= vectorization_factor
)
3721 if (vect_debug_stats (loop
) || vect_debug_details (loop
))
3722 fprintf (dump_file
, "not vectorized: mixed data-types");
3727 vectorization_factor
= nunits
;
3729 #ifdef ENABLE_CHECKING
3730 gcc_assert (GET_MODE_SIZE (TYPE_MODE (scalar_type
))
3731 * vectorization_factor
== UNITS_PER_SIMD_WORD
);
3736 /* TODO: Analyze cost. Decide if worth while to vectorize. */
3738 if (vectorization_factor
<= 1)
3740 if (vect_debug_stats (loop
) || vect_debug_details (loop
))
3741 fprintf (dump_file
, "not vectorized: unsupported data-type");
3744 LOOP_VINFO_VECT_FACTOR (loop_vinfo
) = vectorization_factor
;
3746 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo
) && vect_debug_details (NULL
))
3748 "vectorization_factor = %d, niters = " HOST_WIDE_INT_PRINT_DEC
,
3749 vectorization_factor
, LOOP_VINFO_INT_NITERS (loop_vinfo
));
3751 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo
)
3752 && LOOP_VINFO_INT_NITERS (loop_vinfo
) < vectorization_factor
)
3754 if (vect_debug_stats (loop
) || vect_debug_details (loop
))
3755 fprintf (dump_file
, "not vectorized: iteration count too small.");
3759 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo
)
3760 || LOOP_VINFO_INT_NITERS (loop_vinfo
) % vectorization_factor
!= 0)
3762 if (vect_debug_stats (loop
) || vect_debug_details (loop
))
3763 fprintf (dump_file
, "epilog loop required.");
3764 if (!vect_can_advance_ivs_p (loop
))
3766 if (vect_debug_stats (loop
) || vect_debug_details (loop
))
3767 fprintf (dump_file
, "not vectorized: can't create epilog loop 1.");
3770 if (!slpeel_can_duplicate_loop_p (loop
, loop
->exit_edges
[0]))
3772 if (vect_debug_stats (loop
) || vect_debug_details (loop
))
3773 fprintf (dump_file
, "not vectorized: can't create epilog loop 2.");
3782 /* Function exist_non_indexing_operands_for_use_p
3784 USE is one of the uses attached to STMT. Check if USE is
3785 used in STMT for anything other than indexing an array. */
3788 exist_non_indexing_operands_for_use_p (tree use
, tree stmt
)
3791 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
3793 /* USE corresponds to some operand in STMT. If there is no data
3794 reference in STMT, then any operand that corresponds to USE
3795 is not indexing an array. */
3796 if (!STMT_VINFO_DATA_REF (stmt_info
))
3799 /* STMT has a data_ref. FORNOW this means that its of one of
3800 the following forms:
3803 (This should have been verified in analyze_data_refs).
3805 'var' in the second case corresponds to a def, not a use,
3806 so USE cannot correspond to any operands that are not used
3809 Therefore, all we need to check is if STMT falls into the
3810 first case, and whether var corresponds to USE. */
3812 if (TREE_CODE (TREE_OPERAND (stmt
, 0)) == SSA_NAME
)
3815 operand
= TREE_OPERAND (stmt
, 1);
3817 if (TREE_CODE (operand
) != SSA_NAME
)
3827 /* Function vect_is_simple_iv_evolution.
3829 FORNOW: A simple evolution of an induction variables in the loop is
3830 considered a polynomial evolution with constant step. */
3833 vect_is_simple_iv_evolution (unsigned loop_nb
, tree access_fn
, tree
* init
,
3834 tree
* step
, bool strict
)
3839 tree evolution_part
= evolution_part_in_loop_num (access_fn
, loop_nb
);
3841 /* When there is no evolution in this loop, the evolution function
3843 if (evolution_part
== NULL_TREE
)
3846 /* When the evolution is a polynomial of degree >= 2
3847 the evolution function is not "simple". */
3848 if (tree_is_chrec (evolution_part
))
3851 step_expr
= evolution_part
;
3852 init_expr
= unshare_expr (initial_condition (access_fn
));
3854 if (vect_debug_details (NULL
))
3856 fprintf (dump_file
, "step: ");
3857 print_generic_expr (dump_file
, step_expr
, TDF_SLIM
);
3858 fprintf (dump_file
, ", init: ");
3859 print_generic_expr (dump_file
, init_expr
, TDF_SLIM
);
3865 if (TREE_CODE (step_expr
) != INTEGER_CST
)
3867 if (vect_debug_details (NULL
))
3868 fprintf (dump_file
, "step unknown.");
3873 if (!integer_onep (step_expr
))
3875 if (vect_debug_details (NULL
))
3876 print_generic_expr (dump_file
, step_expr
, TDF_SLIM
);
3884 /* Function vect_analyze_scalar_cycles.
3886 Examine the cross iteration def-use cycles of scalar variables, by
3887 analyzing the loop (scalar) PHIs; verify that the cross iteration def-use
3888 cycles that they represent do not impede vectorization.
3890 FORNOW: Reduction as in the following loop, is not supported yet:
3894 The cross-iteration cycle corresponding to variable 'sum' will be
3895 considered too complicated and will impede vectorization.
3897 FORNOW: Induction as in the following loop, is not supported yet:
3902 However, the following loop *is* vectorizable:
3907 In both loops there exists a def-use cycle for the variable i:
3908 loop: i_2 = PHI (i_0, i_1)
3913 The evolution of the above cycle is considered simple enough,
3914 however, we also check that the cycle does not need to be
3915 vectorized, i.e - we check that the variable that this cycle
3916 defines is only used for array indexing or in stmts that do not
3917 need to be vectorized. This is not the case in loop2, but it
3918 *is* the case in loop3. */
3921 vect_analyze_scalar_cycles (loop_vec_info loop_vinfo
)
3924 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
3925 basic_block bb
= loop
->header
;
3928 if (vect_debug_details (NULL
))
3929 fprintf (dump_file
, "\n<<vect_analyze_scalar_cycles>>\n");
3931 for (phi
= phi_nodes (bb
); phi
; phi
= PHI_CHAIN (phi
))
3933 tree access_fn
= NULL
;
3935 if (vect_debug_details (NULL
))
3937 fprintf (dump_file
, "Analyze phi: ");
3938 print_generic_expr (dump_file
, phi
, TDF_SLIM
);
3941 /* Skip virtual phi's. The data dependences that are associated with
3942 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
3944 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi
))))
3946 if (vect_debug_details (NULL
))
3947 fprintf (dump_file
, "virtual phi. skip.");
3951 /* Analyze the evolution function. */
3953 /* FORNOW: The only scalar cross-iteration cycles that we allow are
3954 those of loop induction variables; This property is verified here.
3956 Furthermore, if that induction variable is used in an operation
3957 that needs to be vectorized (i.e, is not solely used to index
3958 arrays and check the exit condition) - we do not support its
3959 vectorization yet. This property is verified in vect_is_simple_use,
3960 during vect_analyze_operations. */
3962 access_fn
= /* instantiate_parameters
3964 analyze_scalar_evolution (loop
, PHI_RESULT (phi
));
3968 if (vect_debug_stats (loop
) || vect_debug_details (loop
))
3969 fprintf (dump_file
, "not vectorized: unsupported scalar cycle.");
3973 if (vect_debug_details (NULL
))
3975 fprintf (dump_file
, "Access function of PHI: ");
3976 print_generic_expr (dump_file
, access_fn
, TDF_SLIM
);
3979 if (!vect_is_simple_iv_evolution (loop
->num
, access_fn
, &dummy
,
3982 if (vect_debug_stats (loop
) || vect_debug_details (loop
))
3983 fprintf (dump_file
, "not vectorized: unsupported scalar cycle.");
3992 /* Function vect_analyze_data_ref_dependence.
3994 Return TRUE if there (might) exist a dependence between a memory-reference
3995 DRA and a memory-reference DRB. */
3998 vect_analyze_data_ref_dependence (struct data_reference
*dra
,
3999 struct data_reference
*drb
,
4003 struct data_dependence_relation
*ddr
;
4005 if (!array_base_name_differ_p (dra
, drb
, &differ_p
))
4007 if (vect_debug_stats (loop
) || vect_debug_details (loop
))
4010 "not vectorized: can't determine dependence between: ");
4011 print_generic_expr (dump_file
, DR_REF (dra
), TDF_SLIM
);
4012 fprintf (dump_file
, " and ");
4013 print_generic_expr (dump_file
, DR_REF (drb
), TDF_SLIM
);
4021 ddr
= initialize_data_dependence_relation (dra
, drb
);
4022 compute_affine_dependence (ddr
);
4024 if (DDR_ARE_DEPENDENT (ddr
) == chrec_known
)
4027 if (vect_debug_stats (loop
) || vect_debug_details (loop
))
4030 "not vectorized: possible dependence between data-refs ");
4031 print_generic_expr (dump_file
, DR_REF (dra
), TDF_SLIM
);
4032 fprintf (dump_file
, " and ");
4033 print_generic_expr (dump_file
, DR_REF (drb
), TDF_SLIM
);
4040 /* Function vect_analyze_data_ref_dependences.
4042 Examine all the data references in the loop, and make sure there do not
4043 exist any data dependences between them.
4045 TODO: dependences which distance is greater than the vectorization factor
4049 vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo
)
4052 varray_type loop_write_refs
= LOOP_VINFO_DATAREF_WRITES (loop_vinfo
);
4053 varray_type loop_read_refs
= LOOP_VINFO_DATAREF_READS (loop_vinfo
);
4054 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
4056 /* Examine store-store (output) dependences. */
4058 if (vect_debug_details (NULL
))
4059 fprintf (dump_file
, "\n<<vect_analyze_dependences>>\n");
4061 if (vect_debug_details (NULL
))
4062 fprintf (dump_file
, "compare all store-store pairs.");
4064 for (i
= 0; i
< VARRAY_ACTIVE_SIZE (loop_write_refs
); i
++)
4066 for (j
= i
+ 1; j
< VARRAY_ACTIVE_SIZE (loop_write_refs
); j
++)
4068 struct data_reference
*dra
=
4069 VARRAY_GENERIC_PTR (loop_write_refs
, i
);
4070 struct data_reference
*drb
=
4071 VARRAY_GENERIC_PTR (loop_write_refs
, j
);
4072 if (vect_analyze_data_ref_dependence (dra
, drb
, loop
))
4077 /* Examine load-store (true/anti) dependences. */
4079 if (vect_debug_details (NULL
))
4080 fprintf (dump_file
, "compare all load-store pairs.");
4082 for (i
= 0; i
< VARRAY_ACTIVE_SIZE (loop_read_refs
); i
++)
4084 for (j
= 0; j
< VARRAY_ACTIVE_SIZE (loop_write_refs
); j
++)
4086 struct data_reference
*dra
= VARRAY_GENERIC_PTR (loop_read_refs
, i
);
4087 struct data_reference
*drb
=
4088 VARRAY_GENERIC_PTR (loop_write_refs
, j
);
4089 if (vect_analyze_data_ref_dependence (dra
, drb
, loop
))
4098 /* Function vect_get_first_index.
4100 REF is a data reference.
4101 If it is an ARRAY_REF: if its lower bound is simple enough,
4102 put it in ARRAY_FIRST_INDEX and return TRUE; otherwise - return FALSE.
4103 If it is not an ARRAY_REF: REF has no "first index";
4104 ARRAY_FIRST_INDEX in zero, and the function returns TRUE. */
4107 vect_get_first_index (tree ref
, tree
*array_first_index
)
4111 if (TREE_CODE (ref
) != ARRAY_REF
)
4112 *array_first_index
= size_zero_node
;
4115 array_start
= array_ref_low_bound (ref
);
4116 if (!host_integerp (array_start
, 0))
4118 if (vect_debug_details (NULL
))
4120 fprintf (dump_file
, "array min val not simple integer cst.");
4121 print_generic_expr (dump_file
, array_start
, TDF_DETAILS
);
4125 *array_first_index
= array_start
;
4132 /* Function vect_compute_array_base_alignment.
4133 A utility function of vect_compute_array_ref_alignment.
4135 Compute the misalignment of ARRAY in bits.
4138 ARRAY - an array_ref (possibly multidimensional) of type ARRAY_TYPE.
4139 VECTYPE - we are interested in the misalignment modulo the size of vectype.
4140 if NULL: don't compute misalignment, just return the base of ARRAY.
4141 PREV_DIMENSIONS - initialized to one.
4142 MISALIGNMENT - the computed misalignment in bits.
4145 If VECTYPE is not NULL:
4146 Return NULL_TREE if the misalignment cannot be computed. Otherwise, return
4147 the base of the array, and put the computed misalignment in MISALIGNMENT.
4149 Return the base of the array.
4151 For a[idx_N]...[idx_2][idx_1][idx_0], the address of
4152 a[idx_N]...[idx_2][idx_1] is
4153 {&a + idx_1 * dim_0 + idx_2 * dim_0 * dim_1 + ...
4154 ... + idx_N * dim_0 * ... * dim_N-1}.
4155 (The misalignment of &a is not checked here).
4156 Note, that every term contains dim_0, therefore, if dim_0 is a
4157 multiple of NUNITS, the whole sum is a multiple of NUNITS.
4158 Otherwise, if idx_1 is constant, and dim_1 is a multiple of
4159 NUINTS, we can say that the misalignment of the sum is equal to
4160 the misalignment of {idx_1 * dim_0}. If idx_1 is not constant,
4161 we can't determine this array misalignment, and we return
4163 We proceed recursively in this manner, accumulating total misalignment
4164 and the multiplication of previous dimensions for correct misalignment
4168 vect_compute_array_base_alignment (tree array
,
4170 tree
*prev_dimensions
,
4175 tree dimension_size
;
4177 tree bits_per_vectype
;
4178 tree bits_per_vectype_unit
;
4180 /* The 'stop condition' of the recursion. */
4181 if (TREE_CODE (array
) != ARRAY_REF
)
4185 /* Just get the base decl. */
4186 return vect_compute_array_base_alignment
4187 (TREE_OPERAND (array
, 0), NULL
, NULL
, NULL
);
4189 if (!host_integerp (*misalignment
, 1) || TREE_OVERFLOW (*misalignment
) ||
4190 !host_integerp (*prev_dimensions
, 1) || TREE_OVERFLOW (*prev_dimensions
))
4193 domain
= TYPE_DOMAIN (TREE_TYPE (array
));
4195 int_const_binop (PLUS_EXPR
,
4196 int_const_binop (MINUS_EXPR
, TYPE_MAX_VALUE (domain
),
4197 TYPE_MIN_VALUE (domain
), 1),
4200 /* Check if the dimension size is a multiple of NUNITS, the remaining sum
4201 is a multiple of NUNITS:
4203 dimension_size % GET_MODE_NUNITS (TYPE_MODE (vectype)) == 0 ?
4205 mis
= int_const_binop (TRUNC_MOD_EXPR
, dimension_size
,
4206 build_int_cst (NULL_TREE
, GET_MODE_NUNITS (TYPE_MODE (vectype
))), 1);
4207 if (integer_zerop (mis
))
4208 /* This array is aligned. Continue just in order to get the base decl. */
4209 return vect_compute_array_base_alignment
4210 (TREE_OPERAND (array
, 0), NULL
, NULL
, NULL
);
4212 index
= TREE_OPERAND (array
, 1);
4213 if (!host_integerp (index
, 1))
4214 /* The current index is not constant. */
4217 index
= int_const_binop (MINUS_EXPR
, index
, TYPE_MIN_VALUE (domain
), 0);
4219 bits_per_vectype
= fold_convert (unsigned_type_node
,
4220 build_int_cst (NULL_TREE
, BITS_PER_UNIT
*
4221 GET_MODE_SIZE (TYPE_MODE (vectype
))));
4222 bits_per_vectype_unit
= fold_convert (unsigned_type_node
,
4223 build_int_cst (NULL_TREE
, BITS_PER_UNIT
*
4224 GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (vectype
)))));
4226 /* Add {idx_i * dim_i-1 * ... * dim_0 } to the misalignment computed
4230 (*misalignment + index_val * dimension_size * *prev_dimensions)
4234 mis
= int_const_binop (MULT_EXPR
, index
, dimension_size
, 1);
4235 mis
= int_const_binop (MULT_EXPR
, mis
, *prev_dimensions
, 1);
4236 mis
= int_const_binop (MULT_EXPR
, mis
, bits_per_vectype_unit
, 1);
4237 mis
= int_const_binop (PLUS_EXPR
, *misalignment
, mis
, 1);
4238 *misalignment
= int_const_binop (TRUNC_MOD_EXPR
, mis
, bits_per_vectype
, 1);
4241 *prev_dimensions
= int_const_binop (MULT_EXPR
,
4242 *prev_dimensions
, dimension_size
, 1);
4244 return vect_compute_array_base_alignment (TREE_OPERAND (array
, 0), vectype
,
4250 /* Function vect_compute_data_ref_alignment
4252 Compute the misalignment of the data reference DR.
4255 1. If during the misalignment computation it is found that the data reference
4256 cannot be vectorized then false is returned.
4257 2. DR_MISALIGNMENT (DR) is defined.
4259 FOR NOW: No analysis is actually performed. Misalignment is calculated
4260 only for trivial cases. TODO. */
4263 vect_compute_data_ref_alignment (struct data_reference
*dr
,
4264 loop_vec_info loop_vinfo
)
4266 tree stmt
= DR_STMT (dr
);
4267 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
4268 tree ref
= DR_REF (dr
);
4271 tree offset
= size_zero_node
;
4272 tree base
, bit_offset
, alignment
;
4273 tree unit_bits
= fold_convert (unsigned_type_node
,
4274 build_int_cst (NULL_TREE
, BITS_PER_UNIT
));
4276 bool base_aligned_p
;
4278 if (vect_debug_details (NULL
))
4279 fprintf (dump_file
, "vect_compute_data_ref_alignment:");
4281 /* Initialize misalignment to unknown. */
4282 DR_MISALIGNMENT (dr
) = -1;
4284 scalar_type
= TREE_TYPE (ref
);
4285 vectype
= get_vectype_for_scalar_type (scalar_type
);
4288 if (vect_debug_details (NULL
))
4290 fprintf (dump_file
, "no vectype for stmt: ");
4291 print_generic_expr (dump_file
, stmt
, TDF_SLIM
);
4292 fprintf (dump_file
, " scalar_type: ");
4293 print_generic_expr (dump_file
, scalar_type
, TDF_DETAILS
);
4295 /* It is not possible to vectorize this data reference. */
4298 STMT_VINFO_VECTYPE (stmt_info
) = vectype
;
4299 gcc_assert (TREE_CODE (ref
) == ARRAY_REF
|| TREE_CODE (ref
) == INDIRECT_REF
);
4301 if (TREE_CODE (ref
) == ARRAY_REF
)
4304 dr_base
= STMT_VINFO_VECT_DR_BASE (stmt_info
);
4306 base
= vect_get_base_and_bit_offset (dr
, dr_base
, vectype
,
4307 loop_vinfo
, &bit_offset
, &base_aligned_p
);
4310 if (vect_debug_details (NULL
))
4312 fprintf (dump_file
, "Unknown alignment for access: ");
4313 print_generic_expr (dump_file
,
4314 STMT_VINFO_VECT_DR_BASE (stmt_info
), TDF_SLIM
);
4319 if (!base_aligned_p
)
4321 if (!vect_can_force_dr_alignment_p (base
, TYPE_ALIGN (vectype
)))
4323 if (vect_debug_details (NULL
))
4325 fprintf (dump_file
, "can't force alignment of ref: ");
4326 print_generic_expr (dump_file
, ref
, TDF_SLIM
);
4331 /* Force the alignment of the decl.
4332 NOTE: This is the only change to the code we make during
4333 the analysis phase, before deciding to vectorize the loop. */
4334 if (vect_debug_details (NULL
))
4335 fprintf (dump_file
, "force alignment");
4336 DECL_ALIGN (base
) = TYPE_ALIGN (vectype
);
4337 DECL_USER_ALIGN (base
) = 1;
4340 /* At this point we assume that the base is aligned, and the offset from it
4341 (including index, if relevant) has been computed and is in BIT_OFFSET. */
4342 gcc_assert (base_aligned_p
4343 || (TREE_CODE (base
) == VAR_DECL
4344 && DECL_ALIGN (base
) >= TYPE_ALIGN (vectype
)));
4346 /* Convert into bytes. */
4347 offset
= int_const_binop (TRUNC_DIV_EXPR
, bit_offset
, unit_bits
, 1);
4348 /* Check that there is no remainder in bits. */
4349 bit_offset
= int_const_binop (TRUNC_MOD_EXPR
, bit_offset
, unit_bits
, 1);
4350 if (!integer_zerop (bit_offset
))
4352 if (vect_debug_details (NULL
))
4354 fprintf (dump_file
, "bit offset alignment: ");
4355 print_generic_expr (dump_file
, bit_offset
, TDF_SLIM
);
4360 /* Alignment required, in bytes: */
4361 alignment
= fold_convert (unsigned_type_node
,
4362 build_int_cst (NULL_TREE
, TYPE_ALIGN (vectype
)/BITS_PER_UNIT
));
4364 /* Modulo alignment. */
4365 offset
= int_const_binop (TRUNC_MOD_EXPR
, offset
, alignment
, 0);
4366 if (!host_integerp (offset
, 1) || TREE_OVERFLOW (offset
))
4368 if (vect_debug_details (NULL
))
4369 fprintf (dump_file
, "unexpected misalign value");
4373 DR_MISALIGNMENT (dr
) = tree_low_cst (offset
, 1);
4375 if (vect_debug_details (NULL
))
4376 fprintf (dump_file
, "misalign = %d", DR_MISALIGNMENT (dr
));
4382 /* Function vect_compute_array_ref_alignment
4384 Compute the alignment of an array-ref.
4385 The alignment we compute here is relative to
4386 TYPE_ALIGN(VECTYPE) boundary.
4389 OFFSET - the alignment in bits
4390 Return value - the base of the array-ref. E.g,
4391 if the array-ref is a.b[k].c[i][j] the returned
4396 vect_compute_array_ref_alignment (struct data_reference
*dr
,
4397 loop_vec_info loop_vinfo
,
4401 tree array_first_index
= size_zero_node
;
4403 tree ref
= DR_REF (dr
);
4404 tree scalar_type
= TREE_TYPE (ref
);
4405 tree oprnd0
= TREE_OPERAND (ref
, 0);
4406 tree dims
= size_one_node
;
4407 tree misalign
= size_zero_node
;
4408 tree next_ref
, this_offset
= size_zero_node
;
4412 if (TREE_CODE (TREE_TYPE (ref
)) == ARRAY_TYPE
)
4413 /* The reference is an array without its last index. */
4414 next_ref
= vect_compute_array_base_alignment (ref
, vectype
, &dims
,
4417 next_ref
= vect_compute_array_base_alignment (oprnd0
, vectype
, &dims
,
4420 /* Alignment is not requested. Just return the base. */
4423 /* Compute alignment. */
4424 if (!host_integerp (misalign
, 1) || TREE_OVERFLOW (misalign
) || !next_ref
)
4426 this_offset
= misalign
;
4428 /* Check the first index accessed. */
4429 if (!vect_get_first_index (ref
, &array_first_index
))
4431 if (vect_debug_details (NULL
))
4432 fprintf (dump_file
, "no first_index for array.");
4436 /* Check the index of the array_ref. */
4437 init
= initial_condition_in_loop_num (DR_ACCESS_FN (dr
, 0),
4438 LOOP_VINFO_LOOP (loop_vinfo
)->num
);
4440 /* FORNOW: In order to simplify the handling of alignment, we make sure
4441 that the first location at which the array is accessed ('init') is on an
4442 'NUNITS' boundary, since we are assuming here that 'array base' is aligned.
4443 This is too conservative, since we require that
4444 both {'array_base' is a multiple of NUNITS} && {'init' is a multiple of
4445 NUNITS}, instead of just {('array_base' + 'init') is a multiple of NUNITS}.
4446 This should be relaxed in the future. */
4448 if (!init
|| !host_integerp (init
, 0))
4450 if (vect_debug_details (NULL
))
4451 fprintf (dump_file
, "non constant init. ");
4455 /* bytes per scalar element: */
4456 nunits
= fold_convert (unsigned_type_node
,
4457 build_int_cst (NULL_TREE
, GET_MODE_SIZE (TYPE_MODE (scalar_type
))));
4458 nbits
= int_const_binop (MULT_EXPR
, nunits
,
4459 build_int_cst (NULL_TREE
, BITS_PER_UNIT
), 1);
4461 /* misalign = offset + (init-array_first_index)*nunits*bits_in_byte */
4462 misalign
= int_const_binop (MINUS_EXPR
, init
, array_first_index
, 0);
4463 misalign
= int_const_binop (MULT_EXPR
, misalign
, nbits
, 0);
4464 misalign
= int_const_binop (PLUS_EXPR
, misalign
, this_offset
, 0);
4466 /* TODO: allow negative misalign values. */
4467 if (!host_integerp (misalign
, 1) || TREE_OVERFLOW (misalign
))
4469 if (vect_debug_details (NULL
))
4470 fprintf (dump_file
, "unexpected misalign value");
4478 /* Function vect_compute_data_refs_alignment
4480 Compute the misalignment of data references in the loop.
4481 This pass may take place at function granularity instead of at loop
4484 FOR NOW: No analysis is actually performed. Misalignment is calculated
4485 only for trivial cases. TODO. */
4488 vect_compute_data_refs_alignment (loop_vec_info loop_vinfo
)
4490 varray_type loop_write_datarefs
= LOOP_VINFO_DATAREF_WRITES (loop_vinfo
);
4491 varray_type loop_read_datarefs
= LOOP_VINFO_DATAREF_READS (loop_vinfo
);
4494 for (i
= 0; i
< VARRAY_ACTIVE_SIZE (loop_write_datarefs
); i
++)
4496 struct data_reference
*dr
= VARRAY_GENERIC_PTR (loop_write_datarefs
, i
);
4497 if (!vect_compute_data_ref_alignment (dr
, loop_vinfo
))
4501 for (i
= 0; i
< VARRAY_ACTIVE_SIZE (loop_read_datarefs
); i
++)
4503 struct data_reference
*dr
= VARRAY_GENERIC_PTR (loop_read_datarefs
, i
);
4504 if (!vect_compute_data_ref_alignment (dr
, loop_vinfo
))
4512 /* Function vect_enhance_data_refs_alignment
4514 This pass will use loop versioning and loop peeling in order to enhance
4515 the alignment of data references in the loop.
4517 FOR NOW: we assume that whatever versioning/peeling takes place, only the
4518 original loop is to be vectorized; Any other loops that are created by
4519 the transformations performed in this pass - are not supposed to be
4520 vectorized. This restriction will be relaxed. */
4523 vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo
)
4525 varray_type loop_read_datarefs
= LOOP_VINFO_DATAREF_READS (loop_vinfo
);
4526 varray_type loop_write_datarefs
= LOOP_VINFO_DATAREF_WRITES (loop_vinfo
);
4527 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
4531 This pass will require a cost model to guide it whether to apply peeling
4532 or versioning or a combination of the two. For example, the scheme that
4533 intel uses when given a loop with several memory accesses, is as follows:
4534 choose one memory access ('p') which alignment you want to force by doing
4535 peeling. Then, either (1) generate a loop in which 'p' is aligned and all
4536 other accesses are not necessarily aligned, or (2) use loop versioning to
4537 generate one loop in which all accesses are aligned, and another loop in
4538 which only 'p' is necessarily aligned.
4540 ("Automatic Intra-Register Vectorization for the Intel Architecture",
4541 Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
4542 Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
4544 Devising a cost model is the most critical aspect of this work. It will
4545 guide us on which access to peel for, whether to use loop versioning, how
4546 many versions to create, etc. The cost model will probably consist of
4547 generic considerations as well as target specific considerations (on
4548 powerpc for example, misaligned stores are more painful than misaligned
4551 Here is the general steps involved in alignment enhancements:
4553 -- original loop, before alignment analysis:
4554 for (i=0; i<N; i++){
4555 x = q[i]; # DR_MISALIGNMENT(q) = unknown
4556 p[i] = y; # DR_MISALIGNMENT(p) = unknown
4559 -- After vect_compute_data_refs_alignment:
4560 for (i=0; i<N; i++){
4561 x = q[i]; # DR_MISALIGNMENT(q) = 3
4562 p[i] = y; # DR_MISALIGNMENT(p) = unknown
4565 -- Possibility 1: we do loop versioning:
4567 for (i=0; i<N; i++){ # loop 1A
4568 x = q[i]; # DR_MISALIGNMENT(q) = 3
4569 p[i] = y; # DR_MISALIGNMENT(p) = 0
4573 for (i=0; i<N; i++){ # loop 1B
4574 x = q[i]; # DR_MISALIGNMENT(q) = 3
4575 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
4579 -- Possibility 2: we do loop peeling:
4580 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
4584 for (i = 3; i < N; i++){ # loop 2A
4585 x = q[i]; # DR_MISALIGNMENT(q) = 0
4586 p[i] = y; # DR_MISALIGNMENT(p) = unknown
4589 -- Possibility 3: combination of loop peeling and versioning:
4590 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
4595 for (i = 3; i<N; i++){ # loop 3A
4596 x = q[i]; # DR_MISALIGNMENT(q) = 0
4597 p[i] = y; # DR_MISALIGNMENT(p) = 0
4601 for (i = 3; i<N; i++){ # loop 3B
4602 x = q[i]; # DR_MISALIGNMENT(q) = 0
4603 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
4607 These loops are later passed to loop_transform to be vectorized. The
4608 vectorizer will use the alignment information to guide the transformation
4609 (whether to generate regular loads/stores, or with special handling for
4613 /* (1) Peeling to force alignment. */
4615 /* (1.1) Decide whether to perform peeling, and how many iterations to peel:
4617 + How many accesses will become aligned due to the peeling
4618 - How many accesses will become unaligned due to the peeling,
4619 and the cost of misaligned accesses.
4620 - The cost of peeling (the extra runtime checks, the increase
4623 The scheme we use FORNOW: peel to force the alignment of the first
4624 misaligned store in the loop.
4625 Rationale: misaligned stores are not yet supported.
4627 TODO: Use a better cost model. */
4629 for (i
= 0; i
< VARRAY_ACTIVE_SIZE (loop_write_datarefs
); i
++)
4631 struct data_reference
*dr
= VARRAY_GENERIC_PTR (loop_write_datarefs
, i
);
4632 if (!aligned_access_p (dr
))
4634 LOOP_VINFO_UNALIGNED_DR (loop_vinfo
) = dr
;
4635 LOOP_DO_PEELING_FOR_ALIGNMENT (loop_vinfo
) = true;
4640 if (!LOOP_VINFO_UNALIGNED_DR (loop_vinfo
))
4642 if (vect_debug_details (loop
))
4643 fprintf (dump_file
, "Peeling for alignment will not be applied.");
4647 if (vect_debug_details (loop
))
4648 fprintf (dump_file
, "Peeling for alignment will be applied.");
4651 /* (1.2) Update the alignment info according to the peeling factor.
4652 If the misalignment of the DR we peel for is M, then the
4653 peeling factor is VF - M, and the misalignment of each access DR_i
4654 in the loop is DR_MISALIGNMENT (DR_i) + VF - M.
4655 If the misalignment of the DR we peel for is unknown, then the
4656 misalignment of each access DR_i in the loop is also unknown.
4658 FORNOW: set the misalignment of the accesses to unknown even
4659 if the peeling factor is known at compile time.
4661 TODO: - if the peeling factor is known at compile time, use that
4662 when updating the misalignment info of the loop DRs.
4663 - consider accesses that are known to have the same
4664 alignment, even if that alignment is unknown. */
4666 for (i
= 0; i
< VARRAY_ACTIVE_SIZE (loop_write_datarefs
); i
++)
4668 struct data_reference
*dr
= VARRAY_GENERIC_PTR (loop_write_datarefs
, i
);
4669 if (dr
== LOOP_VINFO_UNALIGNED_DR (loop_vinfo
))
4670 DR_MISALIGNMENT (dr
) = 0;
4672 DR_MISALIGNMENT (dr
) = -1;
4674 for (i
= 0; i
< VARRAY_ACTIVE_SIZE (loop_read_datarefs
); i
++)
4676 struct data_reference
*dr
= VARRAY_GENERIC_PTR (loop_read_datarefs
, i
);
4677 if (dr
== LOOP_VINFO_UNALIGNED_DR (loop_vinfo
))
4678 DR_MISALIGNMENT (dr
) = 0;
4680 DR_MISALIGNMENT (dr
) = -1;
4685 /* Function vect_analyze_data_refs_alignment
4687 Analyze the alignment of the data-references in the loop.
4688 FOR NOW: Until support for misliagned accesses is in place, only if all
4689 accesses are aligned can the loop be vectorized. This restriction will be
4693 vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo
)
4695 varray_type loop_read_datarefs
= LOOP_VINFO_DATAREF_READS (loop_vinfo
);
4696 varray_type loop_write_datarefs
= LOOP_VINFO_DATAREF_WRITES (loop_vinfo
);
4697 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
4698 enum dr_alignment_support supportable_dr_alignment
;
4701 if (vect_debug_details (NULL
))
4702 fprintf (dump_file
, "\n<<vect_analyze_data_refs_alignment>>\n");
4705 /* This pass may take place at function granularity instead of at loop
4708 if (!vect_compute_data_refs_alignment (loop_vinfo
))
4710 if (vect_debug_details (loop
) || vect_debug_stats (loop
))
4712 "not vectorized: can't calculate alignment for data ref.");
4717 /* This pass will decide on using loop versioning and/or loop peeling in
4718 order to enhance the alignment of data references in the loop. */
4720 vect_enhance_data_refs_alignment (loop_vinfo
);
4723 /* Finally, check that all the data references in the loop can be
4724 handled with respect to their alignment. */
4726 for (i
= 0; i
< VARRAY_ACTIVE_SIZE (loop_read_datarefs
); i
++)
4728 struct data_reference
*dr
= VARRAY_GENERIC_PTR (loop_read_datarefs
, i
);
4729 supportable_dr_alignment
= vect_supportable_dr_alignment (dr
);
4730 if (!supportable_dr_alignment
)
4732 if (vect_debug_details (loop
) || vect_debug_stats (loop
))
4733 fprintf (dump_file
, "not vectorized: unsupported unaligned load.");
4737 for (i
= 0; i
< VARRAY_ACTIVE_SIZE (loop_write_datarefs
); i
++)
4739 struct data_reference
*dr
= VARRAY_GENERIC_PTR (loop_write_datarefs
, i
);
4740 supportable_dr_alignment
= vect_supportable_dr_alignment (dr
);
4741 if (!supportable_dr_alignment
)
4743 if (vect_debug_details (loop
) || vect_debug_stats (loop
))
4744 fprintf (dump_file
, "not vectorized: unsupported unaligned store.");
4753 /* Function vect_analyze_data_ref_access.
4755 Analyze the access pattern of the data-reference DR. For now, a data access
4756 has to consecutive and aligned to be considered vectorizable. */
4759 vect_analyze_data_ref_access (struct data_reference
*dr
)
4761 varray_type access_fns
= DR_ACCESS_FNS (dr
);
4764 unsigned int dimensions
, i
;
4766 /* Check that in case of multidimensional array ref A[i1][i2]..[iN],
4767 i1, i2, ..., iN-1 are loop invariant (to make sure that the memory
4768 access is contiguous). */
4769 dimensions
= VARRAY_ACTIVE_SIZE (access_fns
);
4771 for (i
= 1; i
< dimensions
; i
++) /* Not including the last dimension. */
4773 access_fn
= DR_ACCESS_FN (dr
, i
);
4775 if (evolution_part_in_loop_num (access_fn
,
4776 loop_containing_stmt (DR_STMT (dr
))->num
))
4778 /* Evolution part is not NULL in this loop (it is neither constant
4780 if (vect_debug_details (NULL
))
4783 "not vectorized: complicated multidim. array access.");
4784 print_generic_expr (dump_file
, access_fn
, TDF_SLIM
);
4790 access_fn
= DR_ACCESS_FN (dr
, 0); /* The last dimension access function. */
4791 if (!evolution_function_is_constant_p (access_fn
)
4792 && !vect_is_simple_iv_evolution (loop_containing_stmt (DR_STMT (dr
))->num
,
4793 access_fn
, &init
, &step
, true))
4795 if (vect_debug_details (NULL
))
4797 fprintf (dump_file
, "not vectorized: complicated access function.");
4798 print_generic_expr (dump_file
, access_fn
, TDF_SLIM
);
4807 /* Function vect_analyze_data_ref_accesses.
4809 Analyze the access pattern of all the data references in the loop.
4811 FORNOW: the only access pattern that is considered vectorizable is a
4812 simple step 1 (consecutive) access.
4814 FORNOW: handle only arrays and pointer accesses. */
4817 vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo
)
4820 varray_type loop_write_datarefs
= LOOP_VINFO_DATAREF_WRITES (loop_vinfo
);
4821 varray_type loop_read_datarefs
= LOOP_VINFO_DATAREF_READS (loop_vinfo
);
4823 if (vect_debug_details (NULL
))
4824 fprintf (dump_file
, "\n<<vect_analyze_data_ref_accesses>>\n");
4826 for (i
= 0; i
< VARRAY_ACTIVE_SIZE (loop_write_datarefs
); i
++)
4828 struct data_reference
*dr
= VARRAY_GENERIC_PTR (loop_write_datarefs
, i
);
4829 bool ok
= vect_analyze_data_ref_access (dr
);
4832 if (vect_debug_stats (LOOP_VINFO_LOOP (loop_vinfo
))
4833 || vect_debug_details (LOOP_VINFO_LOOP (loop_vinfo
)))
4834 fprintf (dump_file
, "not vectorized: complicated access pattern.");
4839 for (i
= 0; i
< VARRAY_ACTIVE_SIZE (loop_read_datarefs
); i
++)
4841 struct data_reference
*dr
= VARRAY_GENERIC_PTR (loop_read_datarefs
, i
);
4842 bool ok
= vect_analyze_data_ref_access (dr
);
4845 if (vect_debug_stats (LOOP_VINFO_LOOP (loop_vinfo
))
4846 || vect_debug_details (LOOP_VINFO_LOOP (loop_vinfo
)))
4847 fprintf (dump_file
, "not vectorized: complicated access pattern.");
4856 /* Function vect_analyze_pointer_ref_access.
4859 STMT - a stmt that contains a data-ref
4860 MEMREF - a data-ref in STMT, which is an INDIRECT_REF.
4862 If the data-ref access is vectorizable, return a data_reference structure
4863 that represents it (DR). Otherwise - return NULL. */
4865 static struct data_reference
*
4866 vect_analyze_pointer_ref_access (tree memref
, tree stmt
, bool is_read
)
4868 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
4869 struct loop
*loop
= STMT_VINFO_LOOP (stmt_info
);
4870 tree access_fn
= analyze_scalar_evolution (loop
, TREE_OPERAND (memref
, 0));
4873 tree reftype
, innertype
;
4874 enum machine_mode innermode
;
4875 tree indx_access_fn
;
4876 int loopnum
= loop
->num
;
4877 struct data_reference
*dr
;
4881 if (vect_debug_stats (loop
) || vect_debug_details (loop
))
4882 fprintf (dump_file
, "not vectorized: complicated pointer access.");
4886 if (vect_debug_details (NULL
))
4888 fprintf (dump_file
, "Access function of ptr: ");
4889 print_generic_expr (dump_file
, access_fn
, TDF_SLIM
);
4892 if (!vect_is_simple_iv_evolution (loopnum
, access_fn
, &init
, &step
, false))
4894 if (vect_debug_stats (loop
) || vect_debug_details (loop
))
4895 fprintf (dump_file
, "not vectorized: pointer access is not simple.");
4901 if (!host_integerp (step
,0))
4903 if (vect_debug_stats (loop
) || vect_debug_details (loop
))
4905 "not vectorized: non constant step for pointer access.");
4909 step_val
= TREE_INT_CST_LOW (step
);
4911 reftype
= TREE_TYPE (TREE_OPERAND (memref
, 0));
4912 if (TREE_CODE (reftype
) != POINTER_TYPE
)
4914 if (vect_debug_stats (loop
) || vect_debug_details (loop
))
4915 fprintf (dump_file
, "not vectorized: unexpected pointer access form.");
4919 reftype
= TREE_TYPE (init
);
4920 if (TREE_CODE (reftype
) != POINTER_TYPE
)
4922 if (vect_debug_stats (loop
) || vect_debug_details (loop
))
4923 fprintf (dump_file
, "not vectorized: unexpected pointer access form.");
4927 innertype
= TREE_TYPE (reftype
);
4928 innermode
= TYPE_MODE (innertype
);
4929 if (GET_MODE_SIZE (innermode
) != step_val
)
4931 /* FORNOW: support only consecutive access */
4932 if (vect_debug_stats (loop
) || vect_debug_details (loop
))
4933 fprintf (dump_file
, "not vectorized: non consecutive access.");
4938 build_polynomial_chrec (loopnum
, integer_zero_node
, integer_one_node
);
4939 if (vect_debug_details (NULL
))
4941 fprintf (dump_file
, "Access function of ptr indx: ");
4942 print_generic_expr (dump_file
, indx_access_fn
, TDF_SLIM
);
4944 dr
= init_data_ref (stmt
, memref
, init
, indx_access_fn
, is_read
);
4949 /* Function vect_get_symbl_and_dr.
4951 The function returns SYMBL - the relevant variable for
4952 memory tag (for aliasing purposes).
4953 Also data reference structure DR is created.
4955 This function handles three kinds of MEMREF:
4957 It is called from vect_analyze_data_refs with a MEMREF that is either an
4958 ARRAY_REF or an INDIRECT_REF (this is category 1 - "recursion begins").
4959 It builds a DR for them using vect_get_base_and_bit_offset, and calls itself
4960 recursively to retrieve the relevant memtag for the MEMREF, "peeling" the
4961 MEMREF along the way. During the recursive calls, the function may be called
4962 with a MEMREF for which the recursion has to continue - PLUS_EXPR,
4963 MINUS_EXPR, INDIRECT_REF (category 2 - "recursion continues"),
4964 and/or with a MEMREF for which a memtag can be trivially obtained - VAR_DECL
4965 and SSA_NAME (this is category 3 - "recursion stop condition").
4967 When the MEMREF falls into category 1 there is still no data reference struct
4968 (DR) available. It is created by this function, and then, along the recursion,
4969 MEMREF will fall into category 2 or 3, in which case a DR will have already
4970 been created, but the analysis continues to retrieve the MEMTAG.
4973 MEMREF - data reference in STMT
4974 IS_READ - TRUE if STMT reads from MEMREF, FALSE if writes to MEMREF
4977 DR - data_reference struct for MEMREF
4978 return value - the relevant variable for memory tag (for aliasing purposes).
4983 vect_get_symbl_and_dr (tree memref
, tree stmt
, bool is_read
,
4984 loop_vec_info loop_vinfo
, struct data_reference
**dr
)
4986 tree symbl
, oprnd0
, oprnd1
;
4987 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
4990 struct data_reference
*new_dr
;
4991 bool base_aligned_p
;
4995 /* Category 3: recursion stop condition. */
4996 /* (1) A DR already exists. We only need to get the relevant memtag for
4997 MEMREF, the rest of the data was already initialized. */
4999 switch (TREE_CODE (memref
))
5001 /* (1.1) Stop condition: find the relevant memtag and return. */
5003 symbl
= SSA_NAME_VAR (memref
);
5004 tag
= get_var_ann (symbl
)->type_mem_tag
;
5007 tree ptr
= TREE_OPERAND (DR_REF ((*dr
)), 0);
5008 if (TREE_CODE (ptr
) == SSA_NAME
)
5009 tag
= get_var_ann (SSA_NAME_VAR (ptr
))->type_mem_tag
;
5013 if (vect_debug_details (NULL
))
5014 fprintf (dump_file
, "not vectorized: no memtag for ref.");
5023 /* Category 2: recursion continues. */
5024 /* (1.2) A recursive call to find the relevant memtag is required. */
5026 symbl
= TREE_OPERAND (memref
, 0);
5027 break; /* For recursive call. */
5030 /* Could have recorded more accurate information -
5031 i.e, the actual FIELD_DECL that is being referenced -
5032 but later passes expect VAR_DECL as the nmt. */
5036 symbl
= vect_get_base_and_bit_offset ((*dr
), memref
, NULL_TREE
,
5037 loop_vinfo
, &offset
, &base_aligned_p
);
5038 break; /* For recursive call. */
5042 /* Although DR exists, we have to call the function recursively to
5043 build MEMTAG for such expression. This is handled below. */
5044 oprnd0
= TREE_OPERAND (memref
, 0);
5045 oprnd1
= TREE_OPERAND (memref
, 1);
5047 STRIP_NOPS (oprnd1
);
5048 /* Supported plus/minus expressions are of the form
5049 {address_base + offset}, such that address_base is of type
5050 POINTER/ARRAY, and offset is either an INTEGER_CST of type POINTER,
5051 or it's not of type POINTER/ARRAY.
5052 TODO: swap operands if {offset + address_base}. */
5053 if ((TREE_CODE (TREE_TYPE (oprnd1
)) == POINTER_TYPE
5054 && TREE_CODE (oprnd1
) != INTEGER_CST
)
5055 || TREE_CODE (TREE_TYPE (oprnd1
)) == ARRAY_TYPE
)
5059 break; /* For recursive call. */
5067 /* Category 1: recursion begins. */
5068 /* (2) A DR does not exist yet and must be built, followed by a
5069 recursive call to get the relevant memtag for MEMREF. */
5071 switch (TREE_CODE (memref
))
5074 new_dr
= vect_analyze_pointer_ref_access (memref
, stmt
, is_read
);
5078 symbl
= DR_BASE_NAME (new_dr
);
5079 STMT_VINFO_VECT_DR_BASE (stmt_info
) = symbl
;
5083 new_dr
= analyze_array (stmt
, memref
, is_read
);
5085 symbl
= DR_BASE_NAME (new_dr
);
5086 STMT_VINFO_VECT_DR_BASE (stmt_info
) = TREE_OPERAND (memref
, 0);
5090 /* TODO: Support data-refs of form a[i].p for unions and single
5091 field structures. */
5098 /* Recursive call to retrieve the relevant memtag. */
5099 tag
= vect_get_symbl_and_dr (symbl
, stmt
, is_read
, loop_vinfo
, dr
);
5104 /* Function vect_analyze_data_refs.
5106 Find all the data references in the loop.
5108 FORNOW: Handle aligned INDIRECT_REFs and ARRAY_REFs
5109 which base is really an array (not a pointer) and which alignment
5110 can be forced. This restriction will be relaxed. */
5113 vect_analyze_data_refs (loop_vec_info loop_vinfo
)
5115 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
5116 basic_block
*bbs
= LOOP_VINFO_BBS (loop_vinfo
);
5117 int nbbs
= loop
->num_nodes
;
5118 block_stmt_iterator si
;
5120 struct data_reference
*dr
;
5122 if (vect_debug_details (NULL
))
5123 fprintf (dump_file
, "\n<<vect_analyze_data_refs>>\n");
5125 for (j
= 0; j
< nbbs
; j
++)
5127 basic_block bb
= bbs
[j
];
5128 for (si
= bsi_start (bb
); !bsi_end_p (si
); bsi_next (&si
))
5130 bool is_read
= false;
5131 tree stmt
= bsi_stmt (si
);
5132 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
5133 v_may_def_optype v_may_defs
= STMT_V_MAY_DEF_OPS (stmt
);
5134 v_must_def_optype v_must_defs
= STMT_V_MUST_DEF_OPS (stmt
);
5135 vuse_optype vuses
= STMT_VUSE_OPS (stmt
);
5136 varray_type
*datarefs
= NULL
;
5137 int nvuses
, nv_may_defs
, nv_must_defs
;
5141 /* Assumption: there exists a data-ref in stmt, if and only if
5142 it has vuses/vdefs. */
5144 if (!vuses
&& !v_may_defs
&& !v_must_defs
)
5147 nvuses
= NUM_VUSES (vuses
);
5148 nv_may_defs
= NUM_V_MAY_DEFS (v_may_defs
);
5149 nv_must_defs
= NUM_V_MUST_DEFS (v_must_defs
);
5151 if (nvuses
&& (nv_may_defs
|| nv_must_defs
))
5153 if (vect_debug_details (NULL
))
5155 fprintf (dump_file
, "unexpected vdefs and vuses in stmt: ");
5156 print_generic_expr (dump_file
, stmt
, TDF_SLIM
);
5161 if (TREE_CODE (stmt
) != MODIFY_EXPR
)
5163 if (vect_debug_details (NULL
))
5165 fprintf (dump_file
, "unexpected vops in stmt: ");
5166 print_generic_expr (dump_file
, stmt
, TDF_SLIM
);
5173 memref
= TREE_OPERAND (stmt
, 1);
5174 datarefs
= &(LOOP_VINFO_DATAREF_READS (loop_vinfo
));
5179 memref
= TREE_OPERAND (stmt
, 0);
5180 datarefs
= &(LOOP_VINFO_DATAREF_WRITES (loop_vinfo
));
5184 /* Analyze MEMREF. If it is of a supported form, build data_reference
5185 struct for it (DR) and find the relevant symbol for aliasing
5188 symbl
= vect_get_symbl_and_dr (memref
, stmt
, is_read
, loop_vinfo
,
5192 if (vect_debug_stats (loop
) || vect_debug_details (loop
))
5194 fprintf (dump_file
, "not vectorized: unhandled data ref: ");
5195 print_generic_expr (dump_file
, stmt
, TDF_SLIM
);
5199 STMT_VINFO_MEMTAG (stmt_info
) = symbl
;
5200 VARRAY_PUSH_GENERIC_PTR (*datarefs
, dr
);
5201 STMT_VINFO_DATA_REF (stmt_info
) = dr
;
5209 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
5211 /* Function vect_mark_relevant.
5213 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
5216 vect_mark_relevant (varray_type
*worklist
, tree stmt
)
5218 stmt_vec_info stmt_info
;
5220 if (vect_debug_details (NULL
))
5221 fprintf (dump_file
, "mark relevant.");
5223 if (TREE_CODE (stmt
) == PHI_NODE
)
5225 VARRAY_PUSH_TREE (*worklist
, stmt
);
5229 stmt_info
= vinfo_for_stmt (stmt
);
5233 if (vect_debug_details (NULL
))
5235 fprintf (dump_file
, "mark relevant: no stmt info!!.");
5236 print_generic_expr (dump_file
, stmt
, TDF_SLIM
);
5241 if (STMT_VINFO_RELEVANT_P (stmt_info
))
5243 if (vect_debug_details (NULL
))
5244 fprintf (dump_file
, "already marked relevant.");
5248 STMT_VINFO_RELEVANT_P (stmt_info
) = 1;
5249 VARRAY_PUSH_TREE (*worklist
, stmt
);
5253 /* Function vect_stmt_relevant_p.
5255 Return true if STMT in loop that is represented by LOOP_VINFO is
5256 "relevant for vectorization".
5258 A stmt is considered "relevant for vectorization" if:
5259 - it has uses outside the loop.
5260 - it has vdefs (it alters memory).
5261 - control stmts in the loop (except for the exit condition).
5263 CHECKME: what other side effects would the vectorizer allow? */
5266 vect_stmt_relevant_p (tree stmt
, loop_vec_info loop_vinfo
)
5268 v_may_def_optype v_may_defs
;
5269 v_must_def_optype v_must_defs
;
5270 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
5275 /* cond stmt other than loop exit cond. */
5276 if (is_ctrl_stmt (stmt
) && (stmt
!= LOOP_VINFO_EXIT_COND (loop_vinfo
)))
5279 /* changing memory. */
5280 v_may_defs
= STMT_V_MAY_DEF_OPS (stmt
);
5281 v_must_defs
= STMT_V_MUST_DEF_OPS (stmt
);
5282 if (v_may_defs
|| v_must_defs
)
5284 if (vect_debug_details (NULL
))
5285 fprintf (dump_file
, "vec_stmt_relevant_p: stmt has vdefs.");
5289 /* uses outside the loop. */
5290 df
= get_immediate_uses (stmt
);
5291 num_uses
= num_immediate_uses (df
);
5292 for (i
= 0; i
< num_uses
; i
++)
5294 tree use
= immediate_use (df
, i
);
5295 basic_block bb
= bb_for_stmt (use
);
5296 if (!flow_bb_inside_loop_p (loop
, bb
))
5298 if (vect_debug_details (NULL
))
5299 fprintf (dump_file
, "vec_stmt_relevant_p: used out of loop.");
5308 /* Function vect_mark_stmts_to_be_vectorized.
5310 Not all stmts in the loop need to be vectorized. For example:
5319 Stmt 1 and 3 do not need to be vectorized, because loop control and
5320 addressing of vectorized data-refs are handled differently.
5322 This pass detects such stmts. */
5325 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo
)
5327 varray_type worklist
;
5328 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
5329 basic_block
*bbs
= LOOP_VINFO_BBS (loop_vinfo
);
5330 unsigned int nbbs
= loop
->num_nodes
;
5331 block_stmt_iterator si
;
5337 stmt_vec_info stmt_info
;
5339 if (vect_debug_details (NULL
))
5340 fprintf (dump_file
, "\n<<vect_mark_stmts_to_be_vectorized>>\n");
5342 VARRAY_TREE_INIT (worklist
, 64, "work list");
5344 /* 1. Init worklist. */
5346 for (i
= 0; i
< nbbs
; i
++)
5348 basic_block bb
= bbs
[i
];
5349 for (si
= bsi_start (bb
); !bsi_end_p (si
); bsi_next (&si
))
5351 stmt
= bsi_stmt (si
);
5353 if (vect_debug_details (NULL
))
5355 fprintf (dump_file
, "init: stmt relevant? ");
5356 print_generic_expr (dump_file
, stmt
, TDF_SLIM
);
5359 stmt_info
= vinfo_for_stmt (stmt
);
5360 STMT_VINFO_RELEVANT_P (stmt_info
) = 0;
5362 if (vect_stmt_relevant_p (stmt
, loop_vinfo
))
5363 vect_mark_relevant (&worklist
, stmt
);
5368 /* 2. Process_worklist */
5370 while (VARRAY_ACTIVE_SIZE (worklist
) > 0)
5372 stmt
= VARRAY_TOP_TREE (worklist
);
5373 VARRAY_POP (worklist
);
5375 if (vect_debug_details (NULL
))
5377 fprintf (dump_file
, "worklist: examine stmt: ");
5378 print_generic_expr (dump_file
, stmt
, TDF_SLIM
);
5381 /* Examine the USES in this statement. Mark all the statements which
5382 feed this statement's uses as "relevant", unless the USE is used as
5385 if (TREE_CODE (stmt
) == PHI_NODE
)
5387 /* follow the def-use chain inside the loop. */
5388 for (j
= 0; j
< PHI_NUM_ARGS (stmt
); j
++)
5390 tree arg
= PHI_ARG_DEF (stmt
, j
);
5391 tree def_stmt
= NULL_TREE
;
5393 if (!vect_is_simple_use (arg
, loop
, &def_stmt
))
5395 if (vect_debug_details (NULL
))
5396 fprintf (dump_file
, "worklist: unsupported use.");
5397 varray_clear (worklist
);
5403 if (vect_debug_details (NULL
))
5405 fprintf (dump_file
, "worklist: def_stmt: ");
5406 print_generic_expr (dump_file
, def_stmt
, TDF_SLIM
);
5409 bb
= bb_for_stmt (def_stmt
);
5410 if (flow_bb_inside_loop_p (loop
, bb
))
5411 vect_mark_relevant (&worklist
, def_stmt
);
5415 ann
= stmt_ann (stmt
);
5416 use_ops
= USE_OPS (ann
);
5418 for (i
= 0; i
< NUM_USES (use_ops
); i
++)
5420 tree use
= USE_OP (use_ops
, i
);
5422 /* We are only interested in uses that need to be vectorized. Uses
5423 that are used for address computation are not considered relevant.
5425 if (exist_non_indexing_operands_for_use_p (use
, stmt
))
5427 tree def_stmt
= NULL_TREE
;
5429 if (!vect_is_simple_use (use
, loop
, &def_stmt
))
5431 if (vect_debug_details (NULL
))
5432 fprintf (dump_file
, "worklist: unsupported use.");
5433 varray_clear (worklist
);
5440 if (vect_debug_details (NULL
))
5442 fprintf (dump_file
, "worklist: examine use %d: ", i
);
5443 print_generic_expr (dump_file
, use
, TDF_SLIM
);
5446 bb
= bb_for_stmt (def_stmt
);
5447 if (flow_bb_inside_loop_p (loop
, bb
))
5448 vect_mark_relevant (&worklist
, def_stmt
);
5451 } /* while worklist */
5453 varray_clear (worklist
);
5458 /* Function vect_can_advance_ivs_p
5460 In case the number of iterations that LOOP iterates in unknown at compile
5461 time, an epilog loop will be generated, and the loop induction variables
5462 (IVs) will be "advanced" to the value they are supposed to take just before
5463 the epilog loop. Here we check that the access function of the loop IVs
5464 and the expression that represents the loop bound are simple enough.
5465 These restrictions will be relaxed in the future. */
5468 vect_can_advance_ivs_p (struct loop
*loop
)
5470 basic_block bb
= loop
->header
;
5473 /* Analyze phi functions of the loop header. */
5475 for (phi
= phi_nodes (bb
); phi
; phi
= PHI_CHAIN (phi
))
5477 tree access_fn
= NULL
;
5478 tree evolution_part
;
5480 if (vect_debug_details (NULL
))
5482 fprintf (dump_file
, "Analyze phi: ");
5483 print_generic_expr (dump_file
, phi
, TDF_SLIM
);
5486 /* Skip virtual phi's. The data dependences that are associated with
5487 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
5489 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi
))))
5491 if (vect_debug_details (NULL
))
5492 fprintf (dump_file
, "virtual phi. skip.");
5496 /* Analyze the evolution function. */
5498 access_fn
= instantiate_parameters
5499 (loop
, analyze_scalar_evolution (loop
, PHI_RESULT (phi
)));
5503 if (vect_debug_details (NULL
))
5504 fprintf (dump_file
, "No Access function.");
5508 if (vect_debug_details (NULL
))
5510 fprintf (dump_file
, "Access function of PHI: ");
5511 print_generic_expr (dump_file
, access_fn
, TDF_SLIM
);
5514 evolution_part
= evolution_part_in_loop_num (access_fn
, loop
->num
);
5516 if (evolution_part
== NULL_TREE
)
5519 /* FORNOW: We do not transform initial conditions of IVs
5520 which evolution functions are a polynomial of degree >= 2. */
5522 if (tree_is_chrec (evolution_part
))
5530 /* Function vect_get_loop_niters.
5532 Determine how many iterations the loop is executed.
5533 If an expression that represents the number of iterations
5534 can be constructed, place it in NUMBER_OF_ITERATIONS.
5535 Return the loop exit condition. */
5538 vect_get_loop_niters (struct loop
*loop
, tree
*number_of_iterations
)
5542 if (vect_debug_details (NULL
))
5543 fprintf (dump_file
, "\n<<get_loop_niters>>\n");
5545 niters
= number_of_iterations_in_loop (loop
);
5547 if (niters
!= NULL_TREE
5548 && niters
!= chrec_dont_know
)
5550 *number_of_iterations
= niters
;
5552 if (vect_debug_details (NULL
))
5554 fprintf (dump_file
, "==> get_loop_niters:" );
5555 print_generic_expr (dump_file
, *number_of_iterations
, TDF_SLIM
);
5559 return get_loop_exit_condition (loop
);
5563 /* Function vect_analyze_loop_form.
5565 Verify the following restrictions (some may be relaxed in the future):
5566 - it's an inner-most loop
5567 - number of BBs = 2 (which are the loop header and the latch)
5568 - the loop has a pre-header
5569 - the loop has a single entry and exit
5570 - the loop exit condition is simple enough, and the number of iterations
5571 can be analyzed (a countable loop). */
5573 static loop_vec_info
5574 vect_analyze_loop_form (struct loop
*loop
)
5576 loop_vec_info loop_vinfo
;
5578 tree number_of_iterations
= NULL
;
5579 bool rescan
= false;
5581 if (vect_debug_details (loop
))
5582 fprintf (dump_file
, "\n<<vect_analyze_loop_form>>\n");
5585 || !loop
->single_exit
5586 || loop
->num_nodes
!= 2
5587 || EDGE_COUNT (loop
->header
->preds
) != 2
5588 || loop
->num_entries
!= 1)
5590 if (vect_debug_stats (loop
) || vect_debug_details (loop
))
5592 fprintf (dump_file
, "not vectorized: bad loop form. ");
5594 fprintf (dump_file
, "nested loop.");
5595 else if (!loop
->single_exit
)
5596 fprintf (dump_file
, "multiple exits.");
5597 else if (loop
->num_nodes
!= 2)
5598 fprintf (dump_file
, "too many BBs in loop.");
5599 else if (EDGE_COUNT (loop
->header
->preds
) != 2)
5600 fprintf (dump_file
, "too many incoming edges.");
5601 else if (loop
->num_entries
!= 1)
5602 fprintf (dump_file
, "too many entries.");
5608 /* We assume that the loop exit condition is at the end of the loop. i.e,
5609 that the loop is represented as a do-while (with a proper if-guard
5610 before the loop if needed), where the loop header contains all the
5611 executable statements, and the latch is empty. */
5612 if (!empty_block_p (loop
->latch
))
5614 if (vect_debug_stats (loop
) || vect_debug_details (loop
))
5615 fprintf (dump_file
, "not vectorized: unexpectd loop form.");
5619 /* Make sure we have a preheader basic block. */
5620 if (!loop
->pre_header
)
5623 loop_split_edge_with (loop_preheader_edge (loop
), NULL
);
5626 /* Make sure there exists a single-predecessor exit bb: */
5627 if (EDGE_COUNT (loop
->exit_edges
[0]->dest
->preds
) != 1)
5630 loop_split_edge_with (loop
->exit_edges
[0], NULL
);
5635 flow_loop_scan (loop
, LOOP_ALL
);
5636 /* Flow loop scan does not update loop->single_exit field. */
5637 loop
->single_exit
= loop
->exit_edges
[0];
5640 if (empty_block_p (loop
->header
))
5642 if (vect_debug_stats (loop
) || vect_debug_details (loop
))
5643 fprintf (dump_file
, "not vectorized: empty loop.");
5647 loop_cond
= vect_get_loop_niters (loop
, &number_of_iterations
);
5650 if (vect_debug_stats (loop
) || vect_debug_details (loop
))
5651 fprintf (dump_file
, "not vectorized: complicated exit condition.");
5655 if (!number_of_iterations
)
5657 if (vect_debug_stats (loop
) || vect_debug_details (loop
))
5659 "not vectorized: number of iterations cannot be computed.");
5663 if (chrec_contains_undetermined (number_of_iterations
))
5665 if (vect_debug_details (NULL
))
5666 fprintf (dump_file
, "Infinite number of iterations.");
5670 loop_vinfo
= new_loop_vec_info (loop
);
5671 LOOP_VINFO_NITERS (loop_vinfo
) = number_of_iterations
;
5673 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo
))
5675 if (vect_debug_details (loop
))
5677 fprintf (dump_file
, "loop bound unknown.\n");
5678 fprintf (dump_file
, "Symbolic number of iterations is ");
5679 print_generic_expr (dump_file
, number_of_iterations
, TDF_DETAILS
);
5683 if (LOOP_VINFO_INT_NITERS (loop_vinfo
) == 0)
5685 if (vect_debug_stats (loop
) || vect_debug_details (loop
))
5686 fprintf (dump_file
, "not vectorized: number of iterations = 0.");
5690 LOOP_VINFO_EXIT_COND (loop_vinfo
) = loop_cond
;
5696 /* Function vect_analyze_loop.
5698 Apply a set of analyses on LOOP, and create a loop_vec_info struct
5699 for it. The different analyses will record information in the
5700 loop_vec_info struct. */
5702 static loop_vec_info
5703 vect_analyze_loop (struct loop
*loop
)
5706 loop_vec_info loop_vinfo
;
5708 if (vect_debug_details (NULL
))
5709 fprintf (dump_file
, "\n<<<<<<< analyze_loop_nest >>>>>>>\n");
5711 /* Check the CFG characteristics of the loop (nesting, entry/exit, etc. */
5713 loop_vinfo
= vect_analyze_loop_form (loop
);
5716 if (vect_debug_details (loop
))
5717 fprintf (dump_file
, "bad loop form.");
5721 /* Find all data references in the loop (which correspond to vdefs/vuses)
5722 and analyze their evolution in the loop.
5724 FORNOW: Handle only simple, array references, which
5725 alignment can be forced, and aligned pointer-references. */
5727 ok
= vect_analyze_data_refs (loop_vinfo
);
5730 if (vect_debug_details (loop
))
5731 fprintf (dump_file
, "bad data references.");
5732 destroy_loop_vec_info (loop_vinfo
);
5736 /* Data-flow analysis to detect stmts that do not need to be vectorized. */
5738 ok
= vect_mark_stmts_to_be_vectorized (loop_vinfo
);
5741 if (vect_debug_details (loop
))
5742 fprintf (dump_file
, "unexpected pattern.");
5743 if (vect_debug_details (loop
))
5744 fprintf (dump_file
, "not vectorized: unexpected pattern.");
5745 destroy_loop_vec_info (loop_vinfo
);
5749 /* Check that all cross-iteration scalar data-flow cycles are OK.
5750 Cross-iteration cycles caused by virtual phis are analyzed separately. */
5752 ok
= vect_analyze_scalar_cycles (loop_vinfo
);
5755 if (vect_debug_details (loop
))
5756 fprintf (dump_file
, "bad scalar cycle.");
5757 destroy_loop_vec_info (loop_vinfo
);
5761 /* Analyze data dependences between the data-refs in the loop.
5762 FORNOW: fail at the first data dependence that we encounter. */
5764 ok
= vect_analyze_data_ref_dependences (loop_vinfo
);
5767 if (vect_debug_details (loop
))
5768 fprintf (dump_file
, "bad data dependence.");
5769 destroy_loop_vec_info (loop_vinfo
);
5773 /* Analyze the access patterns of the data-refs in the loop (consecutive,
5774 complex, etc.). FORNOW: Only handle consecutive access pattern. */
5776 ok
= vect_analyze_data_ref_accesses (loop_vinfo
);
5779 if (vect_debug_details (loop
))
5780 fprintf (dump_file
, "bad data access.");
5781 destroy_loop_vec_info (loop_vinfo
);
5785 /* Analyze the alignment of the data-refs in the loop.
5786 FORNOW: Only aligned accesses are handled. */
5788 ok
= vect_analyze_data_refs_alignment (loop_vinfo
);
5791 if (vect_debug_details (loop
))
5792 fprintf (dump_file
, "bad data alignment.");
5793 destroy_loop_vec_info (loop_vinfo
);
5797 /* Scan all the operations in the loop and make sure they are
5800 ok
= vect_analyze_operations (loop_vinfo
);
5803 if (vect_debug_details (loop
))
5804 fprintf (dump_file
, "bad operation or unsupported loop bound.");
5805 destroy_loop_vec_info (loop_vinfo
);
5809 LOOP_VINFO_VECTORIZABLE_P (loop_vinfo
) = 1;
5815 /* Function need_imm_uses_for.
5817 Return whether we ought to include information for 'var'
5818 when calculating immediate uses. For this pass we only want use
5819 information for non-virtual variables. */
5822 need_imm_uses_for (tree var
)
5824 return is_gimple_reg (var
);
5828 /* Function vectorize_loops.
5830 Entry Point to loop vectorization phase. */
5833 vectorize_loops (struct loops
*loops
)
5835 unsigned int i
, loops_num
;
5836 unsigned int num_vectorized_loops
= 0;
5838 /* Does the target support SIMD? */
5839 /* FORNOW: until more sophisticated machine modelling is in place. */
5840 if (!UNITS_PER_SIMD_WORD
)
5842 if (vect_debug_details (NULL
))
5843 fprintf (dump_file
, "vectorizer: target vector size is not defined.");
5847 #ifdef ENABLE_CHECKING
5848 verify_loop_closed_ssa ();
5851 compute_immediate_uses (TDFA_USE_OPS
, need_imm_uses_for
);
5853 /* ----------- Analyze loops. ----------- */
5855 /* If some loop was duplicated, it gets bigger number
5856 than all previously defined loops. This fact allows us to run
5857 only over initial loops skipping newly generated ones. */
5858 loops_num
= loops
->num
;
5859 for (i
= 1; i
< loops_num
; i
++)
5861 loop_vec_info loop_vinfo
;
5862 struct loop
*loop
= loops
->parray
[i
];
5867 loop_vinfo
= vect_analyze_loop (loop
);
5868 loop
->aux
= loop_vinfo
;
5870 if (!loop_vinfo
|| !LOOP_VINFO_VECTORIZABLE_P (loop_vinfo
))
5873 vect_transform_loop (loop_vinfo
, loops
);
5874 num_vectorized_loops
++;
5877 if (vect_debug_stats (NULL
) || vect_debug_details (NULL
))
5878 fprintf (dump_file
, "\nvectorized %u loops in function.\n",
5879 num_vectorized_loops
);
5881 /* ----------- Finalize. ----------- */
5884 for (i
= 1; i
< loops_num
; i
++)
5886 struct loop
*loop
= loops
->parray
[i
];
5887 loop_vec_info loop_vinfo
;
5891 loop_vinfo
= loop
->aux
;
5892 destroy_loop_vec_info (loop_vinfo
);
5896 rewrite_into_ssa (false);
5897 rewrite_into_loop_closed_ssa (); /* FORNOW */
5898 bitmap_clear (vars_to_rename
);