Update copyright years.
[gcc.git] / gcc / tree-vectorizer.c
1 /* Vectorizer
2 Copyright (C) 2003-2017 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 /* Loop and basic block vectorizer.
22
23 This file contains drivers for the three vectorizers:
24 (1) loop vectorizer (inter-iteration parallelism),
25 (2) loop-aware SLP (intra-iteration parallelism) (invoked by the loop
26 vectorizer)
27 (3) BB vectorizer (out-of-loops), aka SLP
28
29 The rest of the vectorizer's code is organized as follows:
30 - tree-vect-loop.c - loop specific parts such as reductions, etc. These are
31 used by drivers (1) and (2).
32 - tree-vect-loop-manip.c - vectorizer's loop control-flow utilities, used by
33 drivers (1) and (2).
34 - tree-vect-slp.c - BB vectorization specific analysis and transformation,
35 used by drivers (2) and (3).
36 - tree-vect-stmts.c - statements analysis and transformation (used by all).
37 - tree-vect-data-refs.c - vectorizer specific data-refs analysis and
38 manipulations (used by all).
39 - tree-vect-patterns.c - vectorizable code patterns detector (used by all)
40
41 Here's a poor attempt at illustrating that:
42
43 tree-vectorizer.c:
44 loop_vect() loop_aware_slp() slp_vect()
45 | / \ /
46 | / \ /
47 tree-vect-loop.c tree-vect-slp.c
48 | \ \ / / |
49 | \ \/ / |
50 | \ /\ / |
51 | \ / \ / |
52 tree-vect-stmts.c tree-vect-data-refs.c
53 \ /
54 tree-vect-patterns.c
55 */
56
57 #include "config.h"
58 #include "system.h"
59 #include "coretypes.h"
60 #include "backend.h"
61 #include "tree.h"
62 #include "gimple.h"
63 #include "predict.h"
64 #include "tree-pass.h"
65 #include "ssa.h"
66 #include "cgraph.h"
67 #include "fold-const.h"
68 #include "stor-layout.h"
69 #include "gimple-iterator.h"
70 #include "gimple-walk.h"
71 #include "tree-ssa-loop-manip.h"
72 #include "tree-ssa-loop-niter.h"
73 #include "tree-cfg.h"
74 #include "cfgloop.h"
75 #include "tree-vectorizer.h"
76 #include "tree-ssa-propagate.h"
77 #include "dbgcnt.h"
78 #include "tree-scalar-evolution.h"
79
80
81 /* Loop or bb location. */
82 source_location vect_location;
83
84 /* Vector mapping GIMPLE stmt to stmt_vec_info. */
85 vec<stmt_vec_info> stmt_vec_info_vec;
86 \f
87 /* For mapping simduid to vectorization factor. */
88
89 struct simduid_to_vf : free_ptr_hash<simduid_to_vf>
90 {
91 unsigned int simduid;
92 int vf;
93
94 /* hash_table support. */
95 static inline hashval_t hash (const simduid_to_vf *);
96 static inline int equal (const simduid_to_vf *, const simduid_to_vf *);
97 };
98
99 inline hashval_t
100 simduid_to_vf::hash (const simduid_to_vf *p)
101 {
102 return p->simduid;
103 }
104
105 inline int
106 simduid_to_vf::equal (const simduid_to_vf *p1, const simduid_to_vf *p2)
107 {
108 return p1->simduid == p2->simduid;
109 }
110
111 /* This hash maps the OMP simd array to the corresponding simduid used
112 to index into it. Like thus,
113
114 _7 = GOMP_SIMD_LANE (simduid.0)
115 ...
116 ...
117 D.1737[_7] = stuff;
118
119
120 This hash maps from the OMP simd array (D.1737[]) to DECL_UID of
121 simduid.0. */
122
123 struct simd_array_to_simduid : free_ptr_hash<simd_array_to_simduid>
124 {
125 tree decl;
126 unsigned int simduid;
127
128 /* hash_table support. */
129 static inline hashval_t hash (const simd_array_to_simduid *);
130 static inline int equal (const simd_array_to_simduid *,
131 const simd_array_to_simduid *);
132 };
133
134 inline hashval_t
135 simd_array_to_simduid::hash (const simd_array_to_simduid *p)
136 {
137 return DECL_UID (p->decl);
138 }
139
140 inline int
141 simd_array_to_simduid::equal (const simd_array_to_simduid *p1,
142 const simd_array_to_simduid *p2)
143 {
144 return p1->decl == p2->decl;
145 }
146
147 /* Fold IFN_GOMP_SIMD_LANE, IFN_GOMP_SIMD_VF, IFN_GOMP_SIMD_LAST_LANE,
148 into their corresponding constants and remove
149 IFN_GOMP_SIMD_ORDERED_{START,END}. */
150
151 static void
152 adjust_simduid_builtins (hash_table<simduid_to_vf> *htab)
153 {
154 basic_block bb;
155
156 FOR_EACH_BB_FN (bb, cfun)
157 {
158 gimple_stmt_iterator i;
159
160 for (i = gsi_start_bb (bb); !gsi_end_p (i); )
161 {
162 unsigned int vf = 1;
163 enum internal_fn ifn;
164 gimple *stmt = gsi_stmt (i);
165 tree t;
166 if (!is_gimple_call (stmt)
167 || !gimple_call_internal_p (stmt))
168 {
169 gsi_next (&i);
170 continue;
171 }
172 ifn = gimple_call_internal_fn (stmt);
173 switch (ifn)
174 {
175 case IFN_GOMP_SIMD_LANE:
176 case IFN_GOMP_SIMD_VF:
177 case IFN_GOMP_SIMD_LAST_LANE:
178 break;
179 case IFN_GOMP_SIMD_ORDERED_START:
180 case IFN_GOMP_SIMD_ORDERED_END:
181 if (integer_onep (gimple_call_arg (stmt, 0)))
182 {
183 enum built_in_function bcode
184 = (ifn == IFN_GOMP_SIMD_ORDERED_START
185 ? BUILT_IN_GOMP_ORDERED_START
186 : BUILT_IN_GOMP_ORDERED_END);
187 gimple *g
188 = gimple_build_call (builtin_decl_explicit (bcode), 0);
189 tree vdef = gimple_vdef (stmt);
190 gimple_set_vdef (g, vdef);
191 SSA_NAME_DEF_STMT (vdef) = g;
192 gimple_set_vuse (g, gimple_vuse (stmt));
193 gsi_replace (&i, g, true);
194 continue;
195 }
196 gsi_remove (&i, true);
197 unlink_stmt_vdef (stmt);
198 continue;
199 default:
200 gsi_next (&i);
201 continue;
202 }
203 tree arg = gimple_call_arg (stmt, 0);
204 gcc_assert (arg != NULL_TREE);
205 gcc_assert (TREE_CODE (arg) == SSA_NAME);
206 simduid_to_vf *p = NULL, data;
207 data.simduid = DECL_UID (SSA_NAME_VAR (arg));
208 /* Need to nullify loop safelen field since it's value is not
209 valid after transformation. */
210 if (bb->loop_father && bb->loop_father->safelen > 0)
211 bb->loop_father->safelen = 0;
212 if (htab)
213 {
214 p = htab->find (&data);
215 if (p)
216 vf = p->vf;
217 }
218 switch (ifn)
219 {
220 case IFN_GOMP_SIMD_VF:
221 t = build_int_cst (unsigned_type_node, vf);
222 break;
223 case IFN_GOMP_SIMD_LANE:
224 t = build_int_cst (unsigned_type_node, 0);
225 break;
226 case IFN_GOMP_SIMD_LAST_LANE:
227 t = gimple_call_arg (stmt, 1);
228 break;
229 default:
230 gcc_unreachable ();
231 }
232 update_call_from_tree (&i, t);
233 gsi_next (&i);
234 }
235 }
236 }
237
238 /* Helper structure for note_simd_array_uses. */
239
240 struct note_simd_array_uses_struct
241 {
242 hash_table<simd_array_to_simduid> **htab;
243 unsigned int simduid;
244 };
245
246 /* Callback for note_simd_array_uses, called through walk_gimple_op. */
247
248 static tree
249 note_simd_array_uses_cb (tree *tp, int *walk_subtrees, void *data)
250 {
251 struct walk_stmt_info *wi = (struct walk_stmt_info *) data;
252 struct note_simd_array_uses_struct *ns
253 = (struct note_simd_array_uses_struct *) wi->info;
254
255 if (TYPE_P (*tp))
256 *walk_subtrees = 0;
257 else if (VAR_P (*tp)
258 && lookup_attribute ("omp simd array", DECL_ATTRIBUTES (*tp))
259 && DECL_CONTEXT (*tp) == current_function_decl)
260 {
261 simd_array_to_simduid data;
262 if (!*ns->htab)
263 *ns->htab = new hash_table<simd_array_to_simduid> (15);
264 data.decl = *tp;
265 data.simduid = ns->simduid;
266 simd_array_to_simduid **slot = (*ns->htab)->find_slot (&data, INSERT);
267 if (*slot == NULL)
268 {
269 simd_array_to_simduid *p = XNEW (simd_array_to_simduid);
270 *p = data;
271 *slot = p;
272 }
273 else if ((*slot)->simduid != ns->simduid)
274 (*slot)->simduid = -1U;
275 *walk_subtrees = 0;
276 }
277 return NULL_TREE;
278 }
279
280 /* Find "omp simd array" temporaries and map them to corresponding
281 simduid. */
282
283 static void
284 note_simd_array_uses (hash_table<simd_array_to_simduid> **htab)
285 {
286 basic_block bb;
287 gimple_stmt_iterator gsi;
288 struct walk_stmt_info wi;
289 struct note_simd_array_uses_struct ns;
290
291 memset (&wi, 0, sizeof (wi));
292 wi.info = &ns;
293 ns.htab = htab;
294
295 FOR_EACH_BB_FN (bb, cfun)
296 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
297 {
298 gimple *stmt = gsi_stmt (gsi);
299 if (!is_gimple_call (stmt) || !gimple_call_internal_p (stmt))
300 continue;
301 switch (gimple_call_internal_fn (stmt))
302 {
303 case IFN_GOMP_SIMD_LANE:
304 case IFN_GOMP_SIMD_VF:
305 case IFN_GOMP_SIMD_LAST_LANE:
306 break;
307 default:
308 continue;
309 }
310 tree lhs = gimple_call_lhs (stmt);
311 if (lhs == NULL_TREE)
312 continue;
313 imm_use_iterator use_iter;
314 gimple *use_stmt;
315 ns.simduid = DECL_UID (SSA_NAME_VAR (gimple_call_arg (stmt, 0)));
316 FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, lhs)
317 if (!is_gimple_debug (use_stmt))
318 walk_gimple_op (use_stmt, note_simd_array_uses_cb, &wi);
319 }
320 }
321
322 /* Shrink arrays with "omp simd array" attribute to the corresponding
323 vectorization factor. */
324
325 static void
326 shrink_simd_arrays
327 (hash_table<simd_array_to_simduid> *simd_array_to_simduid_htab,
328 hash_table<simduid_to_vf> *simduid_to_vf_htab)
329 {
330 for (hash_table<simd_array_to_simduid>::iterator iter
331 = simd_array_to_simduid_htab->begin ();
332 iter != simd_array_to_simduid_htab->end (); ++iter)
333 if ((*iter)->simduid != -1U)
334 {
335 tree decl = (*iter)->decl;
336 int vf = 1;
337 if (simduid_to_vf_htab)
338 {
339 simduid_to_vf *p = NULL, data;
340 data.simduid = (*iter)->simduid;
341 p = simduid_to_vf_htab->find (&data);
342 if (p)
343 vf = p->vf;
344 }
345 tree atype
346 = build_array_type_nelts (TREE_TYPE (TREE_TYPE (decl)), vf);
347 TREE_TYPE (decl) = atype;
348 relayout_decl (decl);
349 }
350
351 delete simd_array_to_simduid_htab;
352 }
353 \f
354 /* A helper function to free data refs. */
355
356 void
357 vect_destroy_datarefs (vec_info *vinfo)
358 {
359 struct data_reference *dr;
360 unsigned int i;
361
362 FOR_EACH_VEC_ELT (vinfo->datarefs, i, dr)
363 if (dr->aux)
364 {
365 free (dr->aux);
366 dr->aux = NULL;
367 }
368
369 free_data_refs (vinfo->datarefs);
370 }
371
372 /* A helper function to free scev and LOOP niter information, as well as
373 clear loop constraint LOOP_C_FINITE. */
374
375 void
376 vect_free_loop_info_assumptions (struct loop *loop)
377 {
378 scev_reset_htab ();
379 /* We need to explicitly reset upper bound information since they are
380 used even after free_numbers_of_iterations_estimates_loop. */
381 loop->any_upper_bound = false;
382 loop->any_likely_upper_bound = false;
383 free_numbers_of_iterations_estimates_loop (loop);
384 loop_constraint_clear (loop, LOOP_C_FINITE);
385 }
386
387 /* Return whether STMT is inside the region we try to vectorize. */
388
389 bool
390 vect_stmt_in_region_p (vec_info *vinfo, gimple *stmt)
391 {
392 if (!gimple_bb (stmt))
393 return false;
394
395 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
396 {
397 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
398 if (!flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
399 return false;
400 }
401 else
402 {
403 bb_vec_info bb_vinfo = as_a <bb_vec_info> (vinfo);
404 if (gimple_bb (stmt) != BB_VINFO_BB (bb_vinfo)
405 || gimple_uid (stmt) == -1U
406 || gimple_code (stmt) == GIMPLE_PHI)
407 return false;
408 }
409
410 return true;
411 }
412
413
414 /* If LOOP has been versioned during ifcvt, return the internal call
415 guarding it. */
416
417 static gimple *
418 vect_loop_vectorized_call (struct loop *loop)
419 {
420 basic_block bb = loop_preheader_edge (loop)->src;
421 gimple *g;
422 do
423 {
424 g = last_stmt (bb);
425 if (g)
426 break;
427 if (!single_pred_p (bb))
428 break;
429 bb = single_pred (bb);
430 }
431 while (1);
432 if (g && gimple_code (g) == GIMPLE_COND)
433 {
434 gimple_stmt_iterator gsi = gsi_for_stmt (g);
435 gsi_prev (&gsi);
436 if (!gsi_end_p (gsi))
437 {
438 g = gsi_stmt (gsi);
439 if (gimple_call_internal_p (g, IFN_LOOP_VECTORIZED)
440 && (tree_to_shwi (gimple_call_arg (g, 0)) == loop->num
441 || tree_to_shwi (gimple_call_arg (g, 1)) == loop->num))
442 return g;
443 }
444 }
445 return NULL;
446 }
447
448 /* Fold LOOP_VECTORIZED internal call G to VALUE and
449 update any immediate uses of it's LHS. */
450
451 static void
452 fold_loop_vectorized_call (gimple *g, tree value)
453 {
454 tree lhs = gimple_call_lhs (g);
455 use_operand_p use_p;
456 imm_use_iterator iter;
457 gimple *use_stmt;
458 gimple_stmt_iterator gsi = gsi_for_stmt (g);
459
460 update_call_from_tree (&gsi, value);
461 FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
462 {
463 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
464 SET_USE (use_p, value);
465 update_stmt (use_stmt);
466 }
467 }
468 /* Set the uids of all the statements in basic blocks inside loop
469 represented by LOOP_VINFO. LOOP_VECTORIZED_CALL is the internal
470 call guarding the loop which has been if converted. */
471 static void
472 set_uid_loop_bbs (loop_vec_info loop_vinfo, gimple *loop_vectorized_call)
473 {
474 tree arg = gimple_call_arg (loop_vectorized_call, 1);
475 basic_block *bbs;
476 unsigned int i;
477 struct loop *scalar_loop = get_loop (cfun, tree_to_shwi (arg));
478
479 LOOP_VINFO_SCALAR_LOOP (loop_vinfo) = scalar_loop;
480 gcc_checking_assert (vect_loop_vectorized_call
481 (LOOP_VINFO_SCALAR_LOOP (loop_vinfo))
482 == loop_vectorized_call);
483 bbs = get_loop_body (scalar_loop);
484 for (i = 0; i < scalar_loop->num_nodes; i++)
485 {
486 basic_block bb = bbs[i];
487 gimple_stmt_iterator gsi;
488 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
489 {
490 gimple *phi = gsi_stmt (gsi);
491 gimple_set_uid (phi, 0);
492 }
493 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
494 {
495 gimple *stmt = gsi_stmt (gsi);
496 gimple_set_uid (stmt, 0);
497 }
498 }
499 free (bbs);
500 }
501
502 /* Function vectorize_loops.
503
504 Entry point to loop vectorization phase. */
505
506 unsigned
507 vectorize_loops (void)
508 {
509 unsigned int i;
510 unsigned int num_vectorized_loops = 0;
511 unsigned int vect_loops_num;
512 struct loop *loop;
513 hash_table<simduid_to_vf> *simduid_to_vf_htab = NULL;
514 hash_table<simd_array_to_simduid> *simd_array_to_simduid_htab = NULL;
515 bool any_ifcvt_loops = false;
516 unsigned ret = 0;
517 struct loop *new_loop;
518
519 vect_loops_num = number_of_loops (cfun);
520
521 /* Bail out if there are no loops. */
522 if (vect_loops_num <= 1)
523 return 0;
524
525 if (cfun->has_simduid_loops)
526 note_simd_array_uses (&simd_array_to_simduid_htab);
527
528 init_stmt_vec_info_vec ();
529
530 /* ----------- Analyze loops. ----------- */
531
532 /* If some loop was duplicated, it gets bigger number
533 than all previously defined loops. This fact allows us to run
534 only over initial loops skipping newly generated ones. */
535 FOR_EACH_LOOP (loop, 0)
536 if (loop->dont_vectorize)
537 any_ifcvt_loops = true;
538 else if ((flag_tree_loop_vectorize
539 && optimize_loop_nest_for_speed_p (loop))
540 || loop->force_vectorize)
541 {
542 loop_vec_info loop_vinfo, orig_loop_vinfo = NULL;
543 gimple *loop_vectorized_call = vect_loop_vectorized_call (loop);
544 vectorize_epilogue:
545 vect_location = find_loop_location (loop);
546 if (LOCATION_LOCUS (vect_location) != UNKNOWN_LOCATION
547 && dump_enabled_p ())
548 dump_printf (MSG_NOTE, "\nAnalyzing loop at %s:%d\n",
549 LOCATION_FILE (vect_location),
550 LOCATION_LINE (vect_location));
551
552 loop_vinfo = vect_analyze_loop (loop, orig_loop_vinfo);
553 loop->aux = loop_vinfo;
554
555 if (!loop_vinfo || !LOOP_VINFO_VECTORIZABLE_P (loop_vinfo))
556 {
557 /* Free existing information if loop is analyzed with some
558 assumptions. */
559 if (loop_constraint_set_p (loop, LOOP_C_FINITE))
560 vect_free_loop_info_assumptions (loop);
561
562 /* If we applied if-conversion then try to vectorize the
563 BB of innermost loops.
564 ??? Ideally BB vectorization would learn to vectorize
565 control flow by applying if-conversion on-the-fly, the
566 following retains the if-converted loop body even when
567 only non-if-converted parts took part in BB vectorization. */
568 if (flag_tree_slp_vectorize != 0
569 && loop_vectorized_call
570 && ! loop->inner)
571 {
572 basic_block bb = loop->header;
573 bool has_mask_load_store = false;
574 for (gimple_stmt_iterator gsi = gsi_start_bb (bb);
575 !gsi_end_p (gsi); gsi_next (&gsi))
576 {
577 gimple *stmt = gsi_stmt (gsi);
578 if (is_gimple_call (stmt)
579 && gimple_call_internal_p (stmt)
580 && (gimple_call_internal_fn (stmt) == IFN_MASK_LOAD
581 || gimple_call_internal_fn (stmt) == IFN_MASK_STORE))
582 {
583 has_mask_load_store = true;
584 break;
585 }
586 gimple_set_uid (stmt, -1);
587 gimple_set_visited (stmt, false);
588 }
589 if (! has_mask_load_store && vect_slp_bb (bb))
590 {
591 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
592 "basic block vectorized\n");
593 fold_loop_vectorized_call (loop_vectorized_call,
594 boolean_true_node);
595 ret |= TODO_cleanup_cfg;
596 }
597 }
598 continue;
599 }
600
601 if (!dbg_cnt (vect_loop))
602 {
603 /* We may miss some if-converted loops due to
604 debug counter. Set any_ifcvt_loops to visit
605 them at finalization. */
606 any_ifcvt_loops = true;
607 /* Free existing information if loop is analyzed with some
608 assumptions. */
609 if (loop_constraint_set_p (loop, LOOP_C_FINITE))
610 vect_free_loop_info_assumptions (loop);
611
612 break;
613 }
614
615 if (loop_vectorized_call)
616 set_uid_loop_bbs (loop_vinfo, loop_vectorized_call);
617 if (LOCATION_LOCUS (vect_location) != UNKNOWN_LOCATION
618 && dump_enabled_p ())
619 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
620 "loop vectorized\n");
621 new_loop = vect_transform_loop (loop_vinfo);
622 num_vectorized_loops++;
623 /* Now that the loop has been vectorized, allow it to be unrolled
624 etc. */
625 loop->force_vectorize = false;
626
627 if (loop->simduid)
628 {
629 simduid_to_vf *simduid_to_vf_data = XNEW (simduid_to_vf);
630 if (!simduid_to_vf_htab)
631 simduid_to_vf_htab = new hash_table<simduid_to_vf> (15);
632 simduid_to_vf_data->simduid = DECL_UID (loop->simduid);
633 simduid_to_vf_data->vf = loop_vinfo->vectorization_factor;
634 *simduid_to_vf_htab->find_slot (simduid_to_vf_data, INSERT)
635 = simduid_to_vf_data;
636 }
637
638 if (loop_vectorized_call)
639 {
640 fold_loop_vectorized_call (loop_vectorized_call, boolean_true_node);
641 ret |= TODO_cleanup_cfg;
642 }
643
644 if (new_loop)
645 {
646 /* Epilogue of vectorized loop must be vectorized too. */
647 vect_loops_num = number_of_loops (cfun);
648 loop = new_loop;
649 orig_loop_vinfo = loop_vinfo; /* To pass vect_analyze_loop. */
650 goto vectorize_epilogue;
651 }
652 }
653
654 vect_location = UNKNOWN_LOCATION;
655
656 statistics_counter_event (cfun, "Vectorized loops", num_vectorized_loops);
657 if (dump_enabled_p ()
658 || (num_vectorized_loops > 0 && dump_enabled_p ()))
659 dump_printf_loc (MSG_NOTE, vect_location,
660 "vectorized %u loops in function.\n",
661 num_vectorized_loops);
662
663 /* ----------- Finalize. ----------- */
664
665 if (any_ifcvt_loops)
666 for (i = 1; i < vect_loops_num; i++)
667 {
668 loop = get_loop (cfun, i);
669 if (loop && loop->dont_vectorize)
670 {
671 gimple *g = vect_loop_vectorized_call (loop);
672 if (g)
673 {
674 fold_loop_vectorized_call (g, boolean_false_node);
675 ret |= TODO_cleanup_cfg;
676 }
677 }
678 }
679
680 for (i = 1; i < vect_loops_num; i++)
681 {
682 loop_vec_info loop_vinfo;
683 bool has_mask_store;
684
685 loop = get_loop (cfun, i);
686 if (!loop)
687 continue;
688 loop_vinfo = (loop_vec_info) loop->aux;
689 has_mask_store = false;
690 if (loop_vinfo)
691 has_mask_store = LOOP_VINFO_HAS_MASK_STORE (loop_vinfo);
692 destroy_loop_vec_info (loop_vinfo, true);
693 if (has_mask_store)
694 optimize_mask_stores (loop);
695 loop->aux = NULL;
696 }
697
698 free_stmt_vec_info_vec ();
699
700 /* Fold IFN_GOMP_SIMD_{VF,LANE,LAST_LANE,ORDERED_{START,END}} builtins. */
701 if (cfun->has_simduid_loops)
702 adjust_simduid_builtins (simduid_to_vf_htab);
703
704 /* Shrink any "omp array simd" temporary arrays to the
705 actual vectorization factors. */
706 if (simd_array_to_simduid_htab)
707 shrink_simd_arrays (simd_array_to_simduid_htab, simduid_to_vf_htab);
708 delete simduid_to_vf_htab;
709 cfun->has_simduid_loops = false;
710
711 if (num_vectorized_loops > 0)
712 {
713 /* If we vectorized any loop only virtual SSA form needs to be updated.
714 ??? Also while we try hard to update loop-closed SSA form we fail
715 to properly do this in some corner-cases (see PR56286). */
716 rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa_only_virtuals);
717 return TODO_cleanup_cfg;
718 }
719
720 return ret;
721 }
722
723
724 /* Entry point to the simduid cleanup pass. */
725
726 namespace {
727
728 const pass_data pass_data_simduid_cleanup =
729 {
730 GIMPLE_PASS, /* type */
731 "simduid", /* name */
732 OPTGROUP_NONE, /* optinfo_flags */
733 TV_NONE, /* tv_id */
734 ( PROP_ssa | PROP_cfg ), /* properties_required */
735 0, /* properties_provided */
736 0, /* properties_destroyed */
737 0, /* todo_flags_start */
738 0, /* todo_flags_finish */
739 };
740
741 class pass_simduid_cleanup : public gimple_opt_pass
742 {
743 public:
744 pass_simduid_cleanup (gcc::context *ctxt)
745 : gimple_opt_pass (pass_data_simduid_cleanup, ctxt)
746 {}
747
748 /* opt_pass methods: */
749 opt_pass * clone () { return new pass_simduid_cleanup (m_ctxt); }
750 virtual bool gate (function *fun) { return fun->has_simduid_loops; }
751 virtual unsigned int execute (function *);
752
753 }; // class pass_simduid_cleanup
754
755 unsigned int
756 pass_simduid_cleanup::execute (function *fun)
757 {
758 hash_table<simd_array_to_simduid> *simd_array_to_simduid_htab = NULL;
759
760 note_simd_array_uses (&simd_array_to_simduid_htab);
761
762 /* Fold IFN_GOMP_SIMD_{VF,LANE,LAST_LANE,ORDERED_{START,END}} builtins. */
763 adjust_simduid_builtins (NULL);
764
765 /* Shrink any "omp array simd" temporary arrays to the
766 actual vectorization factors. */
767 if (simd_array_to_simduid_htab)
768 shrink_simd_arrays (simd_array_to_simduid_htab, NULL);
769 fun->has_simduid_loops = false;
770 return 0;
771 }
772
773 } // anon namespace
774
775 gimple_opt_pass *
776 make_pass_simduid_cleanup (gcc::context *ctxt)
777 {
778 return new pass_simduid_cleanup (ctxt);
779 }
780
781
782 /* Entry point to basic block SLP phase. */
783
784 namespace {
785
786 const pass_data pass_data_slp_vectorize =
787 {
788 GIMPLE_PASS, /* type */
789 "slp", /* name */
790 OPTGROUP_LOOP | OPTGROUP_VEC, /* optinfo_flags */
791 TV_TREE_SLP_VECTORIZATION, /* tv_id */
792 ( PROP_ssa | PROP_cfg ), /* properties_required */
793 0, /* properties_provided */
794 0, /* properties_destroyed */
795 0, /* todo_flags_start */
796 TODO_update_ssa, /* todo_flags_finish */
797 };
798
799 class pass_slp_vectorize : public gimple_opt_pass
800 {
801 public:
802 pass_slp_vectorize (gcc::context *ctxt)
803 : gimple_opt_pass (pass_data_slp_vectorize, ctxt)
804 {}
805
806 /* opt_pass methods: */
807 opt_pass * clone () { return new pass_slp_vectorize (m_ctxt); }
808 virtual bool gate (function *) { return flag_tree_slp_vectorize != 0; }
809 virtual unsigned int execute (function *);
810
811 }; // class pass_slp_vectorize
812
813 unsigned int
814 pass_slp_vectorize::execute (function *fun)
815 {
816 basic_block bb;
817
818 bool in_loop_pipeline = scev_initialized_p ();
819 if (!in_loop_pipeline)
820 {
821 loop_optimizer_init (LOOPS_NORMAL);
822 scev_initialize ();
823 }
824
825 /* Mark all stmts as not belonging to the current region and unvisited. */
826 FOR_EACH_BB_FN (bb, fun)
827 {
828 for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
829 gsi_next (&gsi))
830 {
831 gimple *stmt = gsi_stmt (gsi);
832 gimple_set_uid (stmt, -1);
833 gimple_set_visited (stmt, false);
834 }
835 }
836
837 init_stmt_vec_info_vec ();
838
839 FOR_EACH_BB_FN (bb, fun)
840 {
841 if (vect_slp_bb (bb))
842 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
843 "basic block vectorized\n");
844 }
845
846 free_stmt_vec_info_vec ();
847
848 if (!in_loop_pipeline)
849 {
850 scev_finalize ();
851 loop_optimizer_finalize ();
852 }
853
854 return 0;
855 }
856
857 } // anon namespace
858
859 gimple_opt_pass *
860 make_pass_slp_vectorize (gcc::context *ctxt)
861 {
862 return new pass_slp_vectorize (ctxt);
863 }
864
865
866 /* Increase alignment of global arrays to improve vectorization potential.
867 TODO:
868 - Consider also structs that have an array field.
869 - Use ipa analysis to prune arrays that can't be vectorized?
870 This should involve global alignment analysis and in the future also
871 array padding. */
872
873 static unsigned get_vec_alignment_for_type (tree);
874 static hash_map<tree, unsigned> *type_align_map;
875
876 /* Return alignment of array's vector type corresponding to scalar type.
877 0 if no vector type exists. */
878 static unsigned
879 get_vec_alignment_for_array_type (tree type)
880 {
881 gcc_assert (TREE_CODE (type) == ARRAY_TYPE);
882
883 tree vectype = get_vectype_for_scalar_type (strip_array_types (type));
884 if (!vectype
885 || !TYPE_SIZE (type)
886 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST
887 || tree_int_cst_lt (TYPE_SIZE (type), TYPE_SIZE (vectype)))
888 return 0;
889
890 return TYPE_ALIGN (vectype);
891 }
892
893 /* Return alignment of field having maximum alignment of vector type
894 corresponding to it's scalar type. For now, we only consider fields whose
895 offset is a multiple of it's vector alignment.
896 0 if no suitable field is found. */
897 static unsigned
898 get_vec_alignment_for_record_type (tree type)
899 {
900 gcc_assert (TREE_CODE (type) == RECORD_TYPE);
901
902 unsigned max_align = 0, alignment;
903 HOST_WIDE_INT offset;
904 tree offset_tree;
905
906 if (TYPE_PACKED (type))
907 return 0;
908
909 unsigned *slot = type_align_map->get (type);
910 if (slot)
911 return *slot;
912
913 for (tree field = first_field (type);
914 field != NULL_TREE;
915 field = DECL_CHAIN (field))
916 {
917 /* Skip if not FIELD_DECL or if alignment is set by user. */
918 if (TREE_CODE (field) != FIELD_DECL
919 || DECL_USER_ALIGN (field)
920 || DECL_ARTIFICIAL (field))
921 continue;
922
923 /* We don't need to process the type further if offset is variable,
924 since the offsets of remaining members will also be variable. */
925 if (TREE_CODE (DECL_FIELD_OFFSET (field)) != INTEGER_CST
926 || TREE_CODE (DECL_FIELD_BIT_OFFSET (field)) != INTEGER_CST)
927 break;
928
929 /* Similarly stop processing the type if offset_tree
930 does not fit in unsigned HOST_WIDE_INT. */
931 offset_tree = bit_position (field);
932 if (!tree_fits_uhwi_p (offset_tree))
933 break;
934
935 offset = tree_to_uhwi (offset_tree);
936 alignment = get_vec_alignment_for_type (TREE_TYPE (field));
937
938 /* Get maximum alignment of vectorized field/array among those members
939 whose offset is multiple of the vector alignment. */
940 if (alignment
941 && (offset % alignment == 0)
942 && (alignment > max_align))
943 max_align = alignment;
944 }
945
946 type_align_map->put (type, max_align);
947 return max_align;
948 }
949
950 /* Return alignment of vector type corresponding to decl's scalar type
951 or 0 if it doesn't exist or the vector alignment is lesser than
952 decl's alignment. */
953 static unsigned
954 get_vec_alignment_for_type (tree type)
955 {
956 if (type == NULL_TREE)
957 return 0;
958
959 gcc_assert (TYPE_P (type));
960
961 static unsigned alignment = 0;
962 switch (TREE_CODE (type))
963 {
964 case ARRAY_TYPE:
965 alignment = get_vec_alignment_for_array_type (type);
966 break;
967 case RECORD_TYPE:
968 alignment = get_vec_alignment_for_record_type (type);
969 break;
970 default:
971 alignment = 0;
972 break;
973 }
974
975 return (alignment > TYPE_ALIGN (type)) ? alignment : 0;
976 }
977
978 /* Entry point to increase_alignment pass. */
979 static unsigned int
980 increase_alignment (void)
981 {
982 varpool_node *vnode;
983
984 vect_location = UNKNOWN_LOCATION;
985 type_align_map = new hash_map<tree, unsigned>;
986
987 /* Increase the alignment of all global arrays for vectorization. */
988 FOR_EACH_DEFINED_VARIABLE (vnode)
989 {
990 tree decl = vnode->decl;
991 unsigned int alignment;
992
993 if ((decl_in_symtab_p (decl)
994 && !symtab_node::get (decl)->can_increase_alignment_p ())
995 || DECL_USER_ALIGN (decl) || DECL_ARTIFICIAL (decl))
996 continue;
997
998 alignment = get_vec_alignment_for_type (TREE_TYPE (decl));
999 if (alignment && vect_can_force_dr_alignment_p (decl, alignment))
1000 {
1001 vnode->increase_alignment (alignment);
1002 dump_printf (MSG_NOTE, "Increasing alignment of decl: ");
1003 dump_generic_expr (MSG_NOTE, TDF_SLIM, decl);
1004 dump_printf (MSG_NOTE, "\n");
1005 }
1006 }
1007
1008 delete type_align_map;
1009 return 0;
1010 }
1011
1012
1013 namespace {
1014
1015 const pass_data pass_data_ipa_increase_alignment =
1016 {
1017 SIMPLE_IPA_PASS, /* type */
1018 "increase_alignment", /* name */
1019 OPTGROUP_LOOP | OPTGROUP_VEC, /* optinfo_flags */
1020 TV_IPA_OPT, /* tv_id */
1021 0, /* properties_required */
1022 0, /* properties_provided */
1023 0, /* properties_destroyed */
1024 0, /* todo_flags_start */
1025 0, /* todo_flags_finish */
1026 };
1027
1028 class pass_ipa_increase_alignment : public simple_ipa_opt_pass
1029 {
1030 public:
1031 pass_ipa_increase_alignment (gcc::context *ctxt)
1032 : simple_ipa_opt_pass (pass_data_ipa_increase_alignment, ctxt)
1033 {}
1034
1035 /* opt_pass methods: */
1036 virtual bool gate (function *)
1037 {
1038 return flag_section_anchors && flag_tree_loop_vectorize;
1039 }
1040
1041 virtual unsigned int execute (function *) { return increase_alignment (); }
1042
1043 }; // class pass_ipa_increase_alignment
1044
1045 } // anon namespace
1046
1047 simple_ipa_opt_pass *
1048 make_pass_ipa_increase_alignment (gcc::context *ctxt)
1049 {
1050 return new pass_ipa_increase_alignment (ctxt);
1051 }