2 Copyright (C) 2003-2017 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* Loop and basic block vectorizer.
23 This file contains drivers for the three vectorizers:
24 (1) loop vectorizer (inter-iteration parallelism),
25 (2) loop-aware SLP (intra-iteration parallelism) (invoked by the loop
27 (3) BB vectorizer (out-of-loops), aka SLP
29 The rest of the vectorizer's code is organized as follows:
30 - tree-vect-loop.c - loop specific parts such as reductions, etc. These are
31 used by drivers (1) and (2).
32 - tree-vect-loop-manip.c - vectorizer's loop control-flow utilities, used by
34 - tree-vect-slp.c - BB vectorization specific analysis and transformation,
35 used by drivers (2) and (3).
36 - tree-vect-stmts.c - statements analysis and transformation (used by all).
37 - tree-vect-data-refs.c - vectorizer specific data-refs analysis and
38 manipulations (used by all).
39 - tree-vect-patterns.c - vectorizable code patterns detector (used by all)
41 Here's a poor attempt at illustrating that:
44 loop_vect() loop_aware_slp() slp_vect()
47 tree-vect-loop.c tree-vect-slp.c
52 tree-vect-stmts.c tree-vect-data-refs.c
59 #include "coretypes.h"
64 #include "tree-pass.h"
67 #include "fold-const.h"
68 #include "stor-layout.h"
69 #include "gimple-iterator.h"
70 #include "gimple-walk.h"
71 #include "tree-ssa-loop-manip.h"
72 #include "tree-ssa-loop-niter.h"
75 #include "tree-vectorizer.h"
76 #include "tree-ssa-propagate.h"
78 #include "tree-scalar-evolution.h"
81 /* Loop or bb location. */
82 source_location vect_location
;
84 /* Vector mapping GIMPLE stmt to stmt_vec_info. */
85 vec
<stmt_vec_info
> stmt_vec_info_vec
;
87 /* For mapping simduid to vectorization factor. */
89 struct simduid_to_vf
: free_ptr_hash
<simduid_to_vf
>
94 /* hash_table support. */
95 static inline hashval_t
hash (const simduid_to_vf
*);
96 static inline int equal (const simduid_to_vf
*, const simduid_to_vf
*);
100 simduid_to_vf::hash (const simduid_to_vf
*p
)
106 simduid_to_vf::equal (const simduid_to_vf
*p1
, const simduid_to_vf
*p2
)
108 return p1
->simduid
== p2
->simduid
;
111 /* This hash maps the OMP simd array to the corresponding simduid used
112 to index into it. Like thus,
114 _7 = GOMP_SIMD_LANE (simduid.0)
120 This hash maps from the OMP simd array (D.1737[]) to DECL_UID of
123 struct simd_array_to_simduid
: free_ptr_hash
<simd_array_to_simduid
>
126 unsigned int simduid
;
128 /* hash_table support. */
129 static inline hashval_t
hash (const simd_array_to_simduid
*);
130 static inline int equal (const simd_array_to_simduid
*,
131 const simd_array_to_simduid
*);
135 simd_array_to_simduid::hash (const simd_array_to_simduid
*p
)
137 return DECL_UID (p
->decl
);
141 simd_array_to_simduid::equal (const simd_array_to_simduid
*p1
,
142 const simd_array_to_simduid
*p2
)
144 return p1
->decl
== p2
->decl
;
147 /* Fold IFN_GOMP_SIMD_LANE, IFN_GOMP_SIMD_VF, IFN_GOMP_SIMD_LAST_LANE,
148 into their corresponding constants and remove
149 IFN_GOMP_SIMD_ORDERED_{START,END}. */
152 adjust_simduid_builtins (hash_table
<simduid_to_vf
> *htab
)
156 FOR_EACH_BB_FN (bb
, cfun
)
158 gimple_stmt_iterator i
;
160 for (i
= gsi_start_bb (bb
); !gsi_end_p (i
); )
163 enum internal_fn ifn
;
164 gimple
*stmt
= gsi_stmt (i
);
166 if (!is_gimple_call (stmt
)
167 || !gimple_call_internal_p (stmt
))
172 ifn
= gimple_call_internal_fn (stmt
);
175 case IFN_GOMP_SIMD_LANE
:
176 case IFN_GOMP_SIMD_VF
:
177 case IFN_GOMP_SIMD_LAST_LANE
:
179 case IFN_GOMP_SIMD_ORDERED_START
:
180 case IFN_GOMP_SIMD_ORDERED_END
:
181 if (integer_onep (gimple_call_arg (stmt
, 0)))
183 enum built_in_function bcode
184 = (ifn
== IFN_GOMP_SIMD_ORDERED_START
185 ? BUILT_IN_GOMP_ORDERED_START
186 : BUILT_IN_GOMP_ORDERED_END
);
188 = gimple_build_call (builtin_decl_explicit (bcode
), 0);
189 tree vdef
= gimple_vdef (stmt
);
190 gimple_set_vdef (g
, vdef
);
191 SSA_NAME_DEF_STMT (vdef
) = g
;
192 gimple_set_vuse (g
, gimple_vuse (stmt
));
193 gsi_replace (&i
, g
, true);
196 gsi_remove (&i
, true);
197 unlink_stmt_vdef (stmt
);
203 tree arg
= gimple_call_arg (stmt
, 0);
204 gcc_assert (arg
!= NULL_TREE
);
205 gcc_assert (TREE_CODE (arg
) == SSA_NAME
);
206 simduid_to_vf
*p
= NULL
, data
;
207 data
.simduid
= DECL_UID (SSA_NAME_VAR (arg
));
208 /* Need to nullify loop safelen field since it's value is not
209 valid after transformation. */
210 if (bb
->loop_father
&& bb
->loop_father
->safelen
> 0)
211 bb
->loop_father
->safelen
= 0;
214 p
= htab
->find (&data
);
220 case IFN_GOMP_SIMD_VF
:
221 t
= build_int_cst (unsigned_type_node
, vf
);
223 case IFN_GOMP_SIMD_LANE
:
224 t
= build_int_cst (unsigned_type_node
, 0);
226 case IFN_GOMP_SIMD_LAST_LANE
:
227 t
= gimple_call_arg (stmt
, 1);
232 update_call_from_tree (&i
, t
);
238 /* Helper structure for note_simd_array_uses. */
240 struct note_simd_array_uses_struct
242 hash_table
<simd_array_to_simduid
> **htab
;
243 unsigned int simduid
;
246 /* Callback for note_simd_array_uses, called through walk_gimple_op. */
249 note_simd_array_uses_cb (tree
*tp
, int *walk_subtrees
, void *data
)
251 struct walk_stmt_info
*wi
= (struct walk_stmt_info
*) data
;
252 struct note_simd_array_uses_struct
*ns
253 = (struct note_simd_array_uses_struct
*) wi
->info
;
258 && lookup_attribute ("omp simd array", DECL_ATTRIBUTES (*tp
))
259 && DECL_CONTEXT (*tp
) == current_function_decl
)
261 simd_array_to_simduid data
;
263 *ns
->htab
= new hash_table
<simd_array_to_simduid
> (15);
265 data
.simduid
= ns
->simduid
;
266 simd_array_to_simduid
**slot
= (*ns
->htab
)->find_slot (&data
, INSERT
);
269 simd_array_to_simduid
*p
= XNEW (simd_array_to_simduid
);
273 else if ((*slot
)->simduid
!= ns
->simduid
)
274 (*slot
)->simduid
= -1U;
280 /* Find "omp simd array" temporaries and map them to corresponding
284 note_simd_array_uses (hash_table
<simd_array_to_simduid
> **htab
)
287 gimple_stmt_iterator gsi
;
288 struct walk_stmt_info wi
;
289 struct note_simd_array_uses_struct ns
;
291 memset (&wi
, 0, sizeof (wi
));
295 FOR_EACH_BB_FN (bb
, cfun
)
296 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
298 gimple
*stmt
= gsi_stmt (gsi
);
299 if (!is_gimple_call (stmt
) || !gimple_call_internal_p (stmt
))
301 switch (gimple_call_internal_fn (stmt
))
303 case IFN_GOMP_SIMD_LANE
:
304 case IFN_GOMP_SIMD_VF
:
305 case IFN_GOMP_SIMD_LAST_LANE
:
310 tree lhs
= gimple_call_lhs (stmt
);
311 if (lhs
== NULL_TREE
)
313 imm_use_iterator use_iter
;
315 ns
.simduid
= DECL_UID (SSA_NAME_VAR (gimple_call_arg (stmt
, 0)));
316 FOR_EACH_IMM_USE_STMT (use_stmt
, use_iter
, lhs
)
317 if (!is_gimple_debug (use_stmt
))
318 walk_gimple_op (use_stmt
, note_simd_array_uses_cb
, &wi
);
322 /* Shrink arrays with "omp simd array" attribute to the corresponding
323 vectorization factor. */
327 (hash_table
<simd_array_to_simduid
> *simd_array_to_simduid_htab
,
328 hash_table
<simduid_to_vf
> *simduid_to_vf_htab
)
330 for (hash_table
<simd_array_to_simduid
>::iterator iter
331 = simd_array_to_simduid_htab
->begin ();
332 iter
!= simd_array_to_simduid_htab
->end (); ++iter
)
333 if ((*iter
)->simduid
!= -1U)
335 tree decl
= (*iter
)->decl
;
337 if (simduid_to_vf_htab
)
339 simduid_to_vf
*p
= NULL
, data
;
340 data
.simduid
= (*iter
)->simduid
;
341 p
= simduid_to_vf_htab
->find (&data
);
346 = build_array_type_nelts (TREE_TYPE (TREE_TYPE (decl
)), vf
);
347 TREE_TYPE (decl
) = atype
;
348 relayout_decl (decl
);
351 delete simd_array_to_simduid_htab
;
354 /* A helper function to free data refs. */
357 vect_destroy_datarefs (vec_info
*vinfo
)
359 struct data_reference
*dr
;
362 FOR_EACH_VEC_ELT (vinfo
->datarefs
, i
, dr
)
369 free_data_refs (vinfo
->datarefs
);
372 /* A helper function to free scev and LOOP niter information, as well as
373 clear loop constraint LOOP_C_FINITE. */
376 vect_free_loop_info_assumptions (struct loop
*loop
)
379 /* We need to explicitly reset upper bound information since they are
380 used even after free_numbers_of_iterations_estimates_loop. */
381 loop
->any_upper_bound
= false;
382 loop
->any_likely_upper_bound
= false;
383 free_numbers_of_iterations_estimates_loop (loop
);
384 loop_constraint_clear (loop
, LOOP_C_FINITE
);
387 /* Return whether STMT is inside the region we try to vectorize. */
390 vect_stmt_in_region_p (vec_info
*vinfo
, gimple
*stmt
)
392 if (!gimple_bb (stmt
))
395 if (loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
))
397 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
398 if (!flow_bb_inside_loop_p (loop
, gimple_bb (stmt
)))
403 bb_vec_info bb_vinfo
= as_a
<bb_vec_info
> (vinfo
);
404 if (gimple_bb (stmt
) != BB_VINFO_BB (bb_vinfo
)
405 || gimple_uid (stmt
) == -1U
406 || gimple_code (stmt
) == GIMPLE_PHI
)
414 /* If LOOP has been versioned during ifcvt, return the internal call
418 vect_loop_vectorized_call (struct loop
*loop
)
420 basic_block bb
= loop_preheader_edge (loop
)->src
;
427 if (!single_pred_p (bb
))
429 bb
= single_pred (bb
);
432 if (g
&& gimple_code (g
) == GIMPLE_COND
)
434 gimple_stmt_iterator gsi
= gsi_for_stmt (g
);
436 if (!gsi_end_p (gsi
))
439 if (gimple_call_internal_p (g
, IFN_LOOP_VECTORIZED
)
440 && (tree_to_shwi (gimple_call_arg (g
, 0)) == loop
->num
441 || tree_to_shwi (gimple_call_arg (g
, 1)) == loop
->num
))
448 /* Fold LOOP_VECTORIZED internal call G to VALUE and
449 update any immediate uses of it's LHS. */
452 fold_loop_vectorized_call (gimple
*g
, tree value
)
454 tree lhs
= gimple_call_lhs (g
);
456 imm_use_iterator iter
;
458 gimple_stmt_iterator gsi
= gsi_for_stmt (g
);
460 update_call_from_tree (&gsi
, value
);
461 FOR_EACH_IMM_USE_STMT (use_stmt
, iter
, lhs
)
463 FOR_EACH_IMM_USE_ON_STMT (use_p
, iter
)
464 SET_USE (use_p
, value
);
465 update_stmt (use_stmt
);
468 /* Set the uids of all the statements in basic blocks inside loop
469 represented by LOOP_VINFO. LOOP_VECTORIZED_CALL is the internal
470 call guarding the loop which has been if converted. */
472 set_uid_loop_bbs (loop_vec_info loop_vinfo
, gimple
*loop_vectorized_call
)
474 tree arg
= gimple_call_arg (loop_vectorized_call
, 1);
477 struct loop
*scalar_loop
= get_loop (cfun
, tree_to_shwi (arg
));
479 LOOP_VINFO_SCALAR_LOOP (loop_vinfo
) = scalar_loop
;
480 gcc_checking_assert (vect_loop_vectorized_call
481 (LOOP_VINFO_SCALAR_LOOP (loop_vinfo
))
482 == loop_vectorized_call
);
483 bbs
= get_loop_body (scalar_loop
);
484 for (i
= 0; i
< scalar_loop
->num_nodes
; i
++)
486 basic_block bb
= bbs
[i
];
487 gimple_stmt_iterator gsi
;
488 for (gsi
= gsi_start_phis (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
490 gimple
*phi
= gsi_stmt (gsi
);
491 gimple_set_uid (phi
, 0);
493 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
495 gimple
*stmt
= gsi_stmt (gsi
);
496 gimple_set_uid (stmt
, 0);
502 /* Function vectorize_loops.
504 Entry point to loop vectorization phase. */
507 vectorize_loops (void)
510 unsigned int num_vectorized_loops
= 0;
511 unsigned int vect_loops_num
;
513 hash_table
<simduid_to_vf
> *simduid_to_vf_htab
= NULL
;
514 hash_table
<simd_array_to_simduid
> *simd_array_to_simduid_htab
= NULL
;
515 bool any_ifcvt_loops
= false;
517 struct loop
*new_loop
;
519 vect_loops_num
= number_of_loops (cfun
);
521 /* Bail out if there are no loops. */
522 if (vect_loops_num
<= 1)
525 if (cfun
->has_simduid_loops
)
526 note_simd_array_uses (&simd_array_to_simduid_htab
);
528 init_stmt_vec_info_vec ();
530 /* ----------- Analyze loops. ----------- */
532 /* If some loop was duplicated, it gets bigger number
533 than all previously defined loops. This fact allows us to run
534 only over initial loops skipping newly generated ones. */
535 FOR_EACH_LOOP (loop
, 0)
536 if (loop
->dont_vectorize
)
537 any_ifcvt_loops
= true;
538 else if ((flag_tree_loop_vectorize
539 && optimize_loop_nest_for_speed_p (loop
))
540 || loop
->force_vectorize
)
542 loop_vec_info loop_vinfo
, orig_loop_vinfo
= NULL
;
543 gimple
*loop_vectorized_call
= vect_loop_vectorized_call (loop
);
545 vect_location
= find_loop_location (loop
);
546 if (LOCATION_LOCUS (vect_location
) != UNKNOWN_LOCATION
547 && dump_enabled_p ())
548 dump_printf (MSG_NOTE
, "\nAnalyzing loop at %s:%d\n",
549 LOCATION_FILE (vect_location
),
550 LOCATION_LINE (vect_location
));
552 loop_vinfo
= vect_analyze_loop (loop
, orig_loop_vinfo
);
553 loop
->aux
= loop_vinfo
;
555 if (!loop_vinfo
|| !LOOP_VINFO_VECTORIZABLE_P (loop_vinfo
))
557 /* Free existing information if loop is analyzed with some
559 if (loop_constraint_set_p (loop
, LOOP_C_FINITE
))
560 vect_free_loop_info_assumptions (loop
);
562 /* If we applied if-conversion then try to vectorize the
563 BB of innermost loops.
564 ??? Ideally BB vectorization would learn to vectorize
565 control flow by applying if-conversion on-the-fly, the
566 following retains the if-converted loop body even when
567 only non-if-converted parts took part in BB vectorization. */
568 if (flag_tree_slp_vectorize
!= 0
569 && loop_vectorized_call
572 basic_block bb
= loop
->header
;
573 bool has_mask_load_store
= false;
574 for (gimple_stmt_iterator gsi
= gsi_start_bb (bb
);
575 !gsi_end_p (gsi
); gsi_next (&gsi
))
577 gimple
*stmt
= gsi_stmt (gsi
);
578 if (is_gimple_call (stmt
)
579 && gimple_call_internal_p (stmt
)
580 && (gimple_call_internal_fn (stmt
) == IFN_MASK_LOAD
581 || gimple_call_internal_fn (stmt
) == IFN_MASK_STORE
))
583 has_mask_load_store
= true;
586 gimple_set_uid (stmt
, -1);
587 gimple_set_visited (stmt
, false);
589 if (! has_mask_load_store
&& vect_slp_bb (bb
))
591 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS
, vect_location
,
592 "basic block vectorized\n");
593 fold_loop_vectorized_call (loop_vectorized_call
,
595 ret
|= TODO_cleanup_cfg
;
601 if (!dbg_cnt (vect_loop
))
603 /* We may miss some if-converted loops due to
604 debug counter. Set any_ifcvt_loops to visit
605 them at finalization. */
606 any_ifcvt_loops
= true;
607 /* Free existing information if loop is analyzed with some
609 if (loop_constraint_set_p (loop
, LOOP_C_FINITE
))
610 vect_free_loop_info_assumptions (loop
);
615 if (loop_vectorized_call
)
616 set_uid_loop_bbs (loop_vinfo
, loop_vectorized_call
);
617 if (LOCATION_LOCUS (vect_location
) != UNKNOWN_LOCATION
618 && dump_enabled_p ())
619 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS
, vect_location
,
620 "loop vectorized\n");
621 new_loop
= vect_transform_loop (loop_vinfo
);
622 num_vectorized_loops
++;
623 /* Now that the loop has been vectorized, allow it to be unrolled
625 loop
->force_vectorize
= false;
629 simduid_to_vf
*simduid_to_vf_data
= XNEW (simduid_to_vf
);
630 if (!simduid_to_vf_htab
)
631 simduid_to_vf_htab
= new hash_table
<simduid_to_vf
> (15);
632 simduid_to_vf_data
->simduid
= DECL_UID (loop
->simduid
);
633 simduid_to_vf_data
->vf
= loop_vinfo
->vectorization_factor
;
634 *simduid_to_vf_htab
->find_slot (simduid_to_vf_data
, INSERT
)
635 = simduid_to_vf_data
;
638 if (loop_vectorized_call
)
640 fold_loop_vectorized_call (loop_vectorized_call
, boolean_true_node
);
641 ret
|= TODO_cleanup_cfg
;
646 /* Epilogue of vectorized loop must be vectorized too. */
647 vect_loops_num
= number_of_loops (cfun
);
649 orig_loop_vinfo
= loop_vinfo
; /* To pass vect_analyze_loop. */
650 goto vectorize_epilogue
;
654 vect_location
= UNKNOWN_LOCATION
;
656 statistics_counter_event (cfun
, "Vectorized loops", num_vectorized_loops
);
657 if (dump_enabled_p ()
658 || (num_vectorized_loops
> 0 && dump_enabled_p ()))
659 dump_printf_loc (MSG_NOTE
, vect_location
,
660 "vectorized %u loops in function.\n",
661 num_vectorized_loops
);
663 /* ----------- Finalize. ----------- */
666 for (i
= 1; i
< vect_loops_num
; i
++)
668 loop
= get_loop (cfun
, i
);
669 if (loop
&& loop
->dont_vectorize
)
671 gimple
*g
= vect_loop_vectorized_call (loop
);
674 fold_loop_vectorized_call (g
, boolean_false_node
);
675 ret
|= TODO_cleanup_cfg
;
680 for (i
= 1; i
< vect_loops_num
; i
++)
682 loop_vec_info loop_vinfo
;
685 loop
= get_loop (cfun
, i
);
688 loop_vinfo
= (loop_vec_info
) loop
->aux
;
689 has_mask_store
= false;
691 has_mask_store
= LOOP_VINFO_HAS_MASK_STORE (loop_vinfo
);
692 destroy_loop_vec_info (loop_vinfo
, true);
694 optimize_mask_stores (loop
);
698 free_stmt_vec_info_vec ();
700 /* Fold IFN_GOMP_SIMD_{VF,LANE,LAST_LANE,ORDERED_{START,END}} builtins. */
701 if (cfun
->has_simduid_loops
)
702 adjust_simduid_builtins (simduid_to_vf_htab
);
704 /* Shrink any "omp array simd" temporary arrays to the
705 actual vectorization factors. */
706 if (simd_array_to_simduid_htab
)
707 shrink_simd_arrays (simd_array_to_simduid_htab
, simduid_to_vf_htab
);
708 delete simduid_to_vf_htab
;
709 cfun
->has_simduid_loops
= false;
711 if (num_vectorized_loops
> 0)
713 /* If we vectorized any loop only virtual SSA form needs to be updated.
714 ??? Also while we try hard to update loop-closed SSA form we fail
715 to properly do this in some corner-cases (see PR56286). */
716 rewrite_into_loop_closed_ssa (NULL
, TODO_update_ssa_only_virtuals
);
717 return TODO_cleanup_cfg
;
724 /* Entry point to the simduid cleanup pass. */
728 const pass_data pass_data_simduid_cleanup
=
730 GIMPLE_PASS
, /* type */
731 "simduid", /* name */
732 OPTGROUP_NONE
, /* optinfo_flags */
734 ( PROP_ssa
| PROP_cfg
), /* properties_required */
735 0, /* properties_provided */
736 0, /* properties_destroyed */
737 0, /* todo_flags_start */
738 0, /* todo_flags_finish */
741 class pass_simduid_cleanup
: public gimple_opt_pass
744 pass_simduid_cleanup (gcc::context
*ctxt
)
745 : gimple_opt_pass (pass_data_simduid_cleanup
, ctxt
)
748 /* opt_pass methods: */
749 opt_pass
* clone () { return new pass_simduid_cleanup (m_ctxt
); }
750 virtual bool gate (function
*fun
) { return fun
->has_simduid_loops
; }
751 virtual unsigned int execute (function
*);
753 }; // class pass_simduid_cleanup
756 pass_simduid_cleanup::execute (function
*fun
)
758 hash_table
<simd_array_to_simduid
> *simd_array_to_simduid_htab
= NULL
;
760 note_simd_array_uses (&simd_array_to_simduid_htab
);
762 /* Fold IFN_GOMP_SIMD_{VF,LANE,LAST_LANE,ORDERED_{START,END}} builtins. */
763 adjust_simduid_builtins (NULL
);
765 /* Shrink any "omp array simd" temporary arrays to the
766 actual vectorization factors. */
767 if (simd_array_to_simduid_htab
)
768 shrink_simd_arrays (simd_array_to_simduid_htab
, NULL
);
769 fun
->has_simduid_loops
= false;
776 make_pass_simduid_cleanup (gcc::context
*ctxt
)
778 return new pass_simduid_cleanup (ctxt
);
782 /* Entry point to basic block SLP phase. */
786 const pass_data pass_data_slp_vectorize
=
788 GIMPLE_PASS
, /* type */
790 OPTGROUP_LOOP
| OPTGROUP_VEC
, /* optinfo_flags */
791 TV_TREE_SLP_VECTORIZATION
, /* tv_id */
792 ( PROP_ssa
| PROP_cfg
), /* properties_required */
793 0, /* properties_provided */
794 0, /* properties_destroyed */
795 0, /* todo_flags_start */
796 TODO_update_ssa
, /* todo_flags_finish */
799 class pass_slp_vectorize
: public gimple_opt_pass
802 pass_slp_vectorize (gcc::context
*ctxt
)
803 : gimple_opt_pass (pass_data_slp_vectorize
, ctxt
)
806 /* opt_pass methods: */
807 opt_pass
* clone () { return new pass_slp_vectorize (m_ctxt
); }
808 virtual bool gate (function
*) { return flag_tree_slp_vectorize
!= 0; }
809 virtual unsigned int execute (function
*);
811 }; // class pass_slp_vectorize
814 pass_slp_vectorize::execute (function
*fun
)
818 bool in_loop_pipeline
= scev_initialized_p ();
819 if (!in_loop_pipeline
)
821 loop_optimizer_init (LOOPS_NORMAL
);
825 /* Mark all stmts as not belonging to the current region and unvisited. */
826 FOR_EACH_BB_FN (bb
, fun
)
828 for (gimple_stmt_iterator gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
);
831 gimple
*stmt
= gsi_stmt (gsi
);
832 gimple_set_uid (stmt
, -1);
833 gimple_set_visited (stmt
, false);
837 init_stmt_vec_info_vec ();
839 FOR_EACH_BB_FN (bb
, fun
)
841 if (vect_slp_bb (bb
))
842 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS
, vect_location
,
843 "basic block vectorized\n");
846 free_stmt_vec_info_vec ();
848 if (!in_loop_pipeline
)
851 loop_optimizer_finalize ();
860 make_pass_slp_vectorize (gcc::context
*ctxt
)
862 return new pass_slp_vectorize (ctxt
);
866 /* Increase alignment of global arrays to improve vectorization potential.
868 - Consider also structs that have an array field.
869 - Use ipa analysis to prune arrays that can't be vectorized?
870 This should involve global alignment analysis and in the future also
873 static unsigned get_vec_alignment_for_type (tree
);
874 static hash_map
<tree
, unsigned> *type_align_map
;
876 /* Return alignment of array's vector type corresponding to scalar type.
877 0 if no vector type exists. */
879 get_vec_alignment_for_array_type (tree type
)
881 gcc_assert (TREE_CODE (type
) == ARRAY_TYPE
);
883 tree vectype
= get_vectype_for_scalar_type (strip_array_types (type
));
886 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
887 || tree_int_cst_lt (TYPE_SIZE (type
), TYPE_SIZE (vectype
)))
890 return TYPE_ALIGN (vectype
);
893 /* Return alignment of field having maximum alignment of vector type
894 corresponding to it's scalar type. For now, we only consider fields whose
895 offset is a multiple of it's vector alignment.
896 0 if no suitable field is found. */
898 get_vec_alignment_for_record_type (tree type
)
900 gcc_assert (TREE_CODE (type
) == RECORD_TYPE
);
902 unsigned max_align
= 0, alignment
;
903 HOST_WIDE_INT offset
;
906 if (TYPE_PACKED (type
))
909 unsigned *slot
= type_align_map
->get (type
);
913 for (tree field
= first_field (type
);
915 field
= DECL_CHAIN (field
))
917 /* Skip if not FIELD_DECL or if alignment is set by user. */
918 if (TREE_CODE (field
) != FIELD_DECL
919 || DECL_USER_ALIGN (field
)
920 || DECL_ARTIFICIAL (field
))
923 /* We don't need to process the type further if offset is variable,
924 since the offsets of remaining members will also be variable. */
925 if (TREE_CODE (DECL_FIELD_OFFSET (field
)) != INTEGER_CST
926 || TREE_CODE (DECL_FIELD_BIT_OFFSET (field
)) != INTEGER_CST
)
929 /* Similarly stop processing the type if offset_tree
930 does not fit in unsigned HOST_WIDE_INT. */
931 offset_tree
= bit_position (field
);
932 if (!tree_fits_uhwi_p (offset_tree
))
935 offset
= tree_to_uhwi (offset_tree
);
936 alignment
= get_vec_alignment_for_type (TREE_TYPE (field
));
938 /* Get maximum alignment of vectorized field/array among those members
939 whose offset is multiple of the vector alignment. */
941 && (offset
% alignment
== 0)
942 && (alignment
> max_align
))
943 max_align
= alignment
;
946 type_align_map
->put (type
, max_align
);
950 /* Return alignment of vector type corresponding to decl's scalar type
951 or 0 if it doesn't exist or the vector alignment is lesser than
954 get_vec_alignment_for_type (tree type
)
956 if (type
== NULL_TREE
)
959 gcc_assert (TYPE_P (type
));
961 static unsigned alignment
= 0;
962 switch (TREE_CODE (type
))
965 alignment
= get_vec_alignment_for_array_type (type
);
968 alignment
= get_vec_alignment_for_record_type (type
);
975 return (alignment
> TYPE_ALIGN (type
)) ? alignment
: 0;
978 /* Entry point to increase_alignment pass. */
980 increase_alignment (void)
984 vect_location
= UNKNOWN_LOCATION
;
985 type_align_map
= new hash_map
<tree
, unsigned>;
987 /* Increase the alignment of all global arrays for vectorization. */
988 FOR_EACH_DEFINED_VARIABLE (vnode
)
990 tree decl
= vnode
->decl
;
991 unsigned int alignment
;
993 if ((decl_in_symtab_p (decl
)
994 && !symtab_node::get (decl
)->can_increase_alignment_p ())
995 || DECL_USER_ALIGN (decl
) || DECL_ARTIFICIAL (decl
))
998 alignment
= get_vec_alignment_for_type (TREE_TYPE (decl
));
999 if (alignment
&& vect_can_force_dr_alignment_p (decl
, alignment
))
1001 vnode
->increase_alignment (alignment
);
1002 dump_printf (MSG_NOTE
, "Increasing alignment of decl: ");
1003 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, decl
);
1004 dump_printf (MSG_NOTE
, "\n");
1008 delete type_align_map
;
1015 const pass_data pass_data_ipa_increase_alignment
=
1017 SIMPLE_IPA_PASS
, /* type */
1018 "increase_alignment", /* name */
1019 OPTGROUP_LOOP
| OPTGROUP_VEC
, /* optinfo_flags */
1020 TV_IPA_OPT
, /* tv_id */
1021 0, /* properties_required */
1022 0, /* properties_provided */
1023 0, /* properties_destroyed */
1024 0, /* todo_flags_start */
1025 0, /* todo_flags_finish */
1028 class pass_ipa_increase_alignment
: public simple_ipa_opt_pass
1031 pass_ipa_increase_alignment (gcc::context
*ctxt
)
1032 : simple_ipa_opt_pass (pass_data_ipa_increase_alignment
, ctxt
)
1035 /* opt_pass methods: */
1036 virtual bool gate (function
*)
1038 return flag_section_anchors
&& flag_tree_loop_vectorize
;
1041 virtual unsigned int execute (function
*) { return increase_alignment (); }
1043 }; // class pass_ipa_increase_alignment
1047 simple_ipa_opt_pass
*
1048 make_pass_ipa_increase_alignment (gcc::context
*ctxt
)
1050 return new pass_ipa_increase_alignment (ctxt
);