re PR bootstrap/92433 (r276645 breaks bootstrap on powerpc)
[gcc.git] / gcc / tree-vect-loop-manip.c
1 /* Vectorizer Specific Loop Manipulations
2 Copyright (C) 2003-2019 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
12
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "tree.h"
27 #include "gimple.h"
28 #include "cfghooks.h"
29 #include "tree-pass.h"
30 #include "ssa.h"
31 #include "fold-const.h"
32 #include "cfganal.h"
33 #include "gimplify.h"
34 #include "gimple-iterator.h"
35 #include "gimplify-me.h"
36 #include "tree-cfg.h"
37 #include "tree-ssa-loop-manip.h"
38 #include "tree-into-ssa.h"
39 #include "tree-ssa.h"
40 #include "cfgloop.h"
41 #include "tree-scalar-evolution.h"
42 #include "tree-vectorizer.h"
43 #include "tree-ssa-loop-ivopts.h"
44 #include "gimple-fold.h"
45 #include "tree-ssa-loop-niter.h"
46 #include "internal-fn.h"
47 #include "stor-layout.h"
48 #include "optabs-query.h"
49 #include "vec-perm-indices.h"
50
51 /*************************************************************************
52 Simple Loop Peeling Utilities
53
54 Utilities to support loop peeling for vectorization purposes.
55 *************************************************************************/
56
57
58 /* Renames the use *OP_P. */
59
60 static void
61 rename_use_op (use_operand_p op_p)
62 {
63 tree new_name;
64
65 if (TREE_CODE (USE_FROM_PTR (op_p)) != SSA_NAME)
66 return;
67
68 new_name = get_current_def (USE_FROM_PTR (op_p));
69
70 /* Something defined outside of the loop. */
71 if (!new_name)
72 return;
73
74 /* An ordinary ssa name defined in the loop. */
75
76 SET_USE (op_p, new_name);
77 }
78
79
80 /* Renames the variables in basic block BB. Allow renaming of PHI arguments
81 on edges incoming from outer-block header if RENAME_FROM_OUTER_LOOP is
82 true. */
83
84 static void
85 rename_variables_in_bb (basic_block bb, bool rename_from_outer_loop)
86 {
87 gimple *stmt;
88 use_operand_p use_p;
89 ssa_op_iter iter;
90 edge e;
91 edge_iterator ei;
92 class loop *loop = bb->loop_father;
93 class loop *outer_loop = NULL;
94
95 if (rename_from_outer_loop)
96 {
97 gcc_assert (loop);
98 outer_loop = loop_outer (loop);
99 }
100
101 for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
102 gsi_next (&gsi))
103 {
104 stmt = gsi_stmt (gsi);
105 FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_ALL_USES)
106 rename_use_op (use_p);
107 }
108
109 FOR_EACH_EDGE (e, ei, bb->preds)
110 {
111 if (!flow_bb_inside_loop_p (loop, e->src))
112 {
113 if (!rename_from_outer_loop)
114 continue;
115 if (e->src != outer_loop->header)
116 {
117 if (outer_loop->inner->next)
118 {
119 /* If outer_loop has 2 inner loops, allow there to
120 be an extra basic block which decides which of the
121 two loops to use using LOOP_VECTORIZED. */
122 if (!single_pred_p (e->src)
123 || single_pred (e->src) != outer_loop->header)
124 continue;
125 }
126 }
127 }
128 for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi);
129 gsi_next (&gsi))
130 rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (gsi.phi (), e));
131 }
132 }
133
134
135 struct adjust_info
136 {
137 tree from, to;
138 basic_block bb;
139 };
140
141 /* A stack of values to be adjusted in debug stmts. We have to
142 process them LIFO, so that the closest substitution applies. If we
143 processed them FIFO, without the stack, we might substitute uses
144 with a PHI DEF that would soon become non-dominant, and when we got
145 to the suitable one, it wouldn't have anything to substitute any
146 more. */
147 static vec<adjust_info, va_heap> adjust_vec;
148
149 /* Adjust any debug stmts that referenced AI->from values to use the
150 loop-closed AI->to, if the references are dominated by AI->bb and
151 not by the definition of AI->from. */
152
153 static void
154 adjust_debug_stmts_now (adjust_info *ai)
155 {
156 basic_block bbphi = ai->bb;
157 tree orig_def = ai->from;
158 tree new_def = ai->to;
159 imm_use_iterator imm_iter;
160 gimple *stmt;
161 basic_block bbdef = gimple_bb (SSA_NAME_DEF_STMT (orig_def));
162
163 gcc_assert (dom_info_available_p (CDI_DOMINATORS));
164
165 /* Adjust any debug stmts that held onto non-loop-closed
166 references. */
167 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, orig_def)
168 {
169 use_operand_p use_p;
170 basic_block bbuse;
171
172 if (!is_gimple_debug (stmt))
173 continue;
174
175 gcc_assert (gimple_debug_bind_p (stmt));
176
177 bbuse = gimple_bb (stmt);
178
179 if ((bbuse == bbphi
180 || dominated_by_p (CDI_DOMINATORS, bbuse, bbphi))
181 && !(bbuse == bbdef
182 || dominated_by_p (CDI_DOMINATORS, bbuse, bbdef)))
183 {
184 if (new_def)
185 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
186 SET_USE (use_p, new_def);
187 else
188 {
189 gimple_debug_bind_reset_value (stmt);
190 update_stmt (stmt);
191 }
192 }
193 }
194 }
195
196 /* Adjust debug stmts as scheduled before. */
197
198 static void
199 adjust_vec_debug_stmts (void)
200 {
201 if (!MAY_HAVE_DEBUG_BIND_STMTS)
202 return;
203
204 gcc_assert (adjust_vec.exists ());
205
206 while (!adjust_vec.is_empty ())
207 {
208 adjust_debug_stmts_now (&adjust_vec.last ());
209 adjust_vec.pop ();
210 }
211 }
212
213 /* Adjust any debug stmts that referenced FROM values to use the
214 loop-closed TO, if the references are dominated by BB and not by
215 the definition of FROM. If adjust_vec is non-NULL, adjustments
216 will be postponed until adjust_vec_debug_stmts is called. */
217
218 static void
219 adjust_debug_stmts (tree from, tree to, basic_block bb)
220 {
221 adjust_info ai;
222
223 if (MAY_HAVE_DEBUG_BIND_STMTS
224 && TREE_CODE (from) == SSA_NAME
225 && ! SSA_NAME_IS_DEFAULT_DEF (from)
226 && ! virtual_operand_p (from))
227 {
228 ai.from = from;
229 ai.to = to;
230 ai.bb = bb;
231
232 if (adjust_vec.exists ())
233 adjust_vec.safe_push (ai);
234 else
235 adjust_debug_stmts_now (&ai);
236 }
237 }
238
239 /* Change E's phi arg in UPDATE_PHI to NEW_DEF, and record information
240 to adjust any debug stmts that referenced the old phi arg,
241 presumably non-loop-closed references left over from other
242 transformations. */
243
244 static void
245 adjust_phi_and_debug_stmts (gimple *update_phi, edge e, tree new_def)
246 {
247 tree orig_def = PHI_ARG_DEF_FROM_EDGE (update_phi, e);
248
249 SET_PHI_ARG_DEF (update_phi, e->dest_idx, new_def);
250
251 if (MAY_HAVE_DEBUG_BIND_STMTS)
252 adjust_debug_stmts (orig_def, PHI_RESULT (update_phi),
253 gimple_bb (update_phi));
254 }
255
256 /* Define one loop mask MASK from loop LOOP. INIT_MASK is the value that
257 the mask should have during the first iteration and NEXT_MASK is the
258 value that it should have on subsequent iterations. */
259
260 static void
261 vect_set_loop_mask (class loop *loop, tree mask, tree init_mask,
262 tree next_mask)
263 {
264 gphi *phi = create_phi_node (mask, loop->header);
265 add_phi_arg (phi, init_mask, loop_preheader_edge (loop), UNKNOWN_LOCATION);
266 add_phi_arg (phi, next_mask, loop_latch_edge (loop), UNKNOWN_LOCATION);
267 }
268
269 /* Add SEQ to the end of LOOP's preheader block. */
270
271 static void
272 add_preheader_seq (class loop *loop, gimple_seq seq)
273 {
274 if (seq)
275 {
276 edge pe = loop_preheader_edge (loop);
277 basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
278 gcc_assert (!new_bb);
279 }
280 }
281
282 /* Add SEQ to the beginning of LOOP's header block. */
283
284 static void
285 add_header_seq (class loop *loop, gimple_seq seq)
286 {
287 if (seq)
288 {
289 gimple_stmt_iterator gsi = gsi_after_labels (loop->header);
290 gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
291 }
292 }
293
294 /* Return true if the target can interleave elements of two vectors.
295 OFFSET is 0 if the first half of the vectors should be interleaved
296 or 1 if the second half should. When returning true, store the
297 associated permutation in INDICES. */
298
299 static bool
300 interleave_supported_p (vec_perm_indices *indices, tree vectype,
301 unsigned int offset)
302 {
303 poly_uint64 nelts = TYPE_VECTOR_SUBPARTS (vectype);
304 poly_uint64 base = exact_div (nelts, 2) * offset;
305 vec_perm_builder sel (nelts, 2, 3);
306 for (unsigned int i = 0; i < 3; ++i)
307 {
308 sel.quick_push (base + i);
309 sel.quick_push (base + i + nelts);
310 }
311 indices->new_vector (sel, 2, nelts);
312 return can_vec_perm_const_p (TYPE_MODE (vectype), *indices);
313 }
314
315 /* Try to use permutes to define the masks in DEST_RGM using the masks
316 in SRC_RGM, given that the former has twice as many masks as the
317 latter. Return true on success, adding any new statements to SEQ. */
318
319 static bool
320 vect_maybe_permute_loop_masks (loop_vec_info loop_vinfo, gimple_seq *seq,
321 rgroup_masks *dest_rgm,
322 rgroup_masks *src_rgm)
323 {
324 tree src_masktype = src_rgm->mask_type;
325 tree dest_masktype = dest_rgm->mask_type;
326 machine_mode src_mode = TYPE_MODE (src_masktype);
327 if (dest_rgm->max_nscalars_per_iter <= src_rgm->max_nscalars_per_iter
328 && optab_handler (vec_unpacku_hi_optab, src_mode) != CODE_FOR_nothing
329 && optab_handler (vec_unpacku_lo_optab, src_mode) != CODE_FOR_nothing)
330 {
331 /* Unpacking the source masks gives at least as many mask bits as
332 we need. We can then VIEW_CONVERT any excess bits away. */
333 tree unpack_masktype = vect_halve_mask_nunits (loop_vinfo, src_masktype);
334 for (unsigned int i = 0; i < dest_rgm->masks.length (); ++i)
335 {
336 tree src = src_rgm->masks[i / 2];
337 tree dest = dest_rgm->masks[i];
338 tree_code code = ((i & 1) == (BYTES_BIG_ENDIAN ? 0 : 1)
339 ? VEC_UNPACK_HI_EXPR
340 : VEC_UNPACK_LO_EXPR);
341 gassign *stmt;
342 if (dest_masktype == unpack_masktype)
343 stmt = gimple_build_assign (dest, code, src);
344 else
345 {
346 tree temp = make_ssa_name (unpack_masktype);
347 stmt = gimple_build_assign (temp, code, src);
348 gimple_seq_add_stmt (seq, stmt);
349 stmt = gimple_build_assign (dest, VIEW_CONVERT_EXPR,
350 build1 (VIEW_CONVERT_EXPR,
351 dest_masktype, temp));
352 }
353 gimple_seq_add_stmt (seq, stmt);
354 }
355 return true;
356 }
357 vec_perm_indices indices[2];
358 if (dest_masktype == src_masktype
359 && interleave_supported_p (&indices[0], src_masktype, 0)
360 && interleave_supported_p (&indices[1], src_masktype, 1))
361 {
362 /* The destination requires twice as many mask bits as the source, so
363 we can use interleaving permutes to double up the number of bits. */
364 tree masks[2];
365 for (unsigned int i = 0; i < 2; ++i)
366 masks[i] = vect_gen_perm_mask_checked (src_masktype, indices[i]);
367 for (unsigned int i = 0; i < dest_rgm->masks.length (); ++i)
368 {
369 tree src = src_rgm->masks[i / 2];
370 tree dest = dest_rgm->masks[i];
371 gimple *stmt = gimple_build_assign (dest, VEC_PERM_EXPR,
372 src, src, masks[i & 1]);
373 gimple_seq_add_stmt (seq, stmt);
374 }
375 return true;
376 }
377 return false;
378 }
379
380 /* Helper for vect_set_loop_condition_masked. Generate definitions for
381 all the masks in RGM and return a mask that is nonzero when the loop
382 needs to iterate. Add any new preheader statements to PREHEADER_SEQ.
383 Use LOOP_COND_GSI to insert code before the exit gcond.
384
385 RGM belongs to loop LOOP. The loop originally iterated NITERS
386 times and has been vectorized according to LOOP_VINFO.
387
388 If NITERS_SKIP is nonnull, the first iteration of the vectorized loop
389 starts with NITERS_SKIP dummy iterations of the scalar loop before
390 the real work starts. The mask elements for these dummy iterations
391 must be 0, to ensure that the extra iterations do not have an effect.
392
393 It is known that:
394
395 NITERS * RGM->max_nscalars_per_iter
396
397 does not overflow. However, MIGHT_WRAP_P says whether an induction
398 variable that starts at 0 and has step:
399
400 VF * RGM->max_nscalars_per_iter
401
402 might overflow before hitting a value above:
403
404 (NITERS + NITERS_SKIP) * RGM->max_nscalars_per_iter
405
406 This means that we cannot guarantee that such an induction variable
407 would ever hit a value that produces a set of all-false masks for RGM. */
408
409 static tree
410 vect_set_loop_masks_directly (class loop *loop, loop_vec_info loop_vinfo,
411 gimple_seq *preheader_seq,
412 gimple_stmt_iterator loop_cond_gsi,
413 rgroup_masks *rgm, tree niters, tree niters_skip,
414 bool might_wrap_p)
415 {
416 tree compare_type = LOOP_VINFO_MASK_COMPARE_TYPE (loop_vinfo);
417 tree iv_type = LOOP_VINFO_MASK_IV_TYPE (loop_vinfo);
418 tree mask_type = rgm->mask_type;
419 unsigned int nscalars_per_iter = rgm->max_nscalars_per_iter;
420 poly_uint64 nscalars_per_mask = TYPE_VECTOR_SUBPARTS (mask_type);
421 poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
422
423 /* Calculate the maximum number of scalar values that the rgroup
424 handles in total, the number that it handles for each iteration
425 of the vector loop, and the number that it should skip during the
426 first iteration of the vector loop. */
427 tree nscalars_total = niters;
428 tree nscalars_step = build_int_cst (iv_type, vf);
429 tree nscalars_skip = niters_skip;
430 if (nscalars_per_iter != 1)
431 {
432 /* We checked before choosing to use a fully-masked loop that these
433 multiplications don't overflow. */
434 tree compare_factor = build_int_cst (compare_type, nscalars_per_iter);
435 tree iv_factor = build_int_cst (iv_type, nscalars_per_iter);
436 nscalars_total = gimple_build (preheader_seq, MULT_EXPR, compare_type,
437 nscalars_total, compare_factor);
438 nscalars_step = gimple_build (preheader_seq, MULT_EXPR, iv_type,
439 nscalars_step, iv_factor);
440 if (nscalars_skip)
441 nscalars_skip = gimple_build (preheader_seq, MULT_EXPR, compare_type,
442 nscalars_skip, compare_factor);
443 }
444
445 /* Create an induction variable that counts the number of scalars
446 processed. */
447 tree index_before_incr, index_after_incr;
448 gimple_stmt_iterator incr_gsi;
449 bool insert_after;
450 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
451 create_iv (build_int_cst (iv_type, 0), nscalars_step, NULL_TREE, loop,
452 &incr_gsi, insert_after, &index_before_incr, &index_after_incr);
453
454 tree zero_index = build_int_cst (compare_type, 0);
455 tree test_index, test_limit, first_limit;
456 gimple_stmt_iterator *test_gsi;
457 if (might_wrap_p)
458 {
459 /* In principle the loop should stop iterating once the incremented
460 IV reaches a value greater than or equal to:
461
462 NSCALARS_TOTAL +[infinite-prec] NSCALARS_SKIP
463
464 However, there's no guarantee that this addition doesn't overflow
465 the comparison type, or that the IV hits a value above it before
466 wrapping around. We therefore adjust the limit down by one
467 IV step:
468
469 (NSCALARS_TOTAL +[infinite-prec] NSCALARS_SKIP)
470 -[infinite-prec] NSCALARS_STEP
471
472 and compare the IV against this limit _before_ incrementing it.
473 Since the comparison type is unsigned, we actually want the
474 subtraction to saturate at zero:
475
476 (NSCALARS_TOTAL +[infinite-prec] NSCALARS_SKIP)
477 -[sat] NSCALARS_STEP
478
479 And since NSCALARS_SKIP < NSCALARS_STEP, we can reassociate this as:
480
481 NSCALARS_TOTAL -[sat] (NSCALARS_STEP - NSCALARS_SKIP)
482
483 where the rightmost subtraction can be done directly in
484 COMPARE_TYPE. */
485 test_index = index_before_incr;
486 tree adjust = gimple_convert (preheader_seq, compare_type,
487 nscalars_step);
488 if (nscalars_skip)
489 adjust = gimple_build (preheader_seq, MINUS_EXPR, compare_type,
490 adjust, nscalars_skip);
491 test_limit = gimple_build (preheader_seq, MAX_EXPR, compare_type,
492 nscalars_total, adjust);
493 test_limit = gimple_build (preheader_seq, MINUS_EXPR, compare_type,
494 test_limit, adjust);
495 test_gsi = &incr_gsi;
496
497 /* Get a safe limit for the first iteration. */
498 if (nscalars_skip)
499 {
500 /* The first vector iteration can handle at most NSCALARS_STEP
501 scalars. NSCALARS_STEP <= CONST_LIMIT, and adding
502 NSCALARS_SKIP to that cannot overflow. */
503 tree const_limit = build_int_cst (compare_type,
504 LOOP_VINFO_VECT_FACTOR (loop_vinfo)
505 * nscalars_per_iter);
506 first_limit = gimple_build (preheader_seq, MIN_EXPR, compare_type,
507 nscalars_total, const_limit);
508 first_limit = gimple_build (preheader_seq, PLUS_EXPR, compare_type,
509 first_limit, nscalars_skip);
510 }
511 else
512 /* For the first iteration it doesn't matter whether the IV hits
513 a value above NSCALARS_TOTAL. That only matters for the latch
514 condition. */
515 first_limit = nscalars_total;
516 }
517 else
518 {
519 /* Test the incremented IV, which will always hit a value above
520 the bound before wrapping. */
521 test_index = index_after_incr;
522 test_limit = nscalars_total;
523 if (nscalars_skip)
524 test_limit = gimple_build (preheader_seq, PLUS_EXPR, compare_type,
525 test_limit, nscalars_skip);
526 test_gsi = &loop_cond_gsi;
527
528 first_limit = test_limit;
529 }
530
531 /* Convert the IV value to the comparison type (either a no-op or
532 a demotion). */
533 gimple_seq test_seq = NULL;
534 test_index = gimple_convert (&test_seq, compare_type, test_index);
535 gsi_insert_seq_before (test_gsi, test_seq, GSI_SAME_STMT);
536
537 /* Provide a definition of each mask in the group. */
538 tree next_mask = NULL_TREE;
539 tree mask;
540 unsigned int i;
541 FOR_EACH_VEC_ELT_REVERSE (rgm->masks, i, mask)
542 {
543 /* Previous masks will cover BIAS scalars. This mask covers the
544 next batch. */
545 poly_uint64 bias = nscalars_per_mask * i;
546 tree bias_tree = build_int_cst (compare_type, bias);
547 gimple *tmp_stmt;
548
549 /* See whether the first iteration of the vector loop is known
550 to have a full mask. */
551 poly_uint64 const_limit;
552 bool first_iteration_full
553 = (poly_int_tree_p (first_limit, &const_limit)
554 && known_ge (const_limit, (i + 1) * nscalars_per_mask));
555
556 /* Rather than have a new IV that starts at BIAS and goes up to
557 TEST_LIMIT, prefer to use the same 0-based IV for each mask
558 and adjust the bound down by BIAS. */
559 tree this_test_limit = test_limit;
560 if (i != 0)
561 {
562 this_test_limit = gimple_build (preheader_seq, MAX_EXPR,
563 compare_type, this_test_limit,
564 bias_tree);
565 this_test_limit = gimple_build (preheader_seq, MINUS_EXPR,
566 compare_type, this_test_limit,
567 bias_tree);
568 }
569
570 /* Create the initial mask. First include all scalars that
571 are within the loop limit. */
572 tree init_mask = NULL_TREE;
573 if (!first_iteration_full)
574 {
575 tree start, end;
576 if (first_limit == test_limit)
577 {
578 /* Use a natural test between zero (the initial IV value)
579 and the loop limit. The "else" block would be valid too,
580 but this choice can avoid the need to load BIAS_TREE into
581 a register. */
582 start = zero_index;
583 end = this_test_limit;
584 }
585 else
586 {
587 /* FIRST_LIMIT is the maximum number of scalars handled by the
588 first iteration of the vector loop. Test the portion
589 associated with this mask. */
590 start = bias_tree;
591 end = first_limit;
592 }
593
594 init_mask = make_temp_ssa_name (mask_type, NULL, "max_mask");
595 tmp_stmt = vect_gen_while (init_mask, start, end);
596 gimple_seq_add_stmt (preheader_seq, tmp_stmt);
597 }
598
599 /* Now AND out the bits that are within the number of skipped
600 scalars. */
601 poly_uint64 const_skip;
602 if (nscalars_skip
603 && !(poly_int_tree_p (nscalars_skip, &const_skip)
604 && known_le (const_skip, bias)))
605 {
606 tree unskipped_mask = vect_gen_while_not (preheader_seq, mask_type,
607 bias_tree, nscalars_skip);
608 if (init_mask)
609 init_mask = gimple_build (preheader_seq, BIT_AND_EXPR, mask_type,
610 init_mask, unskipped_mask);
611 else
612 init_mask = unskipped_mask;
613 }
614
615 if (!init_mask)
616 /* First iteration is full. */
617 init_mask = build_minus_one_cst (mask_type);
618
619 /* Get the mask value for the next iteration of the loop. */
620 next_mask = make_temp_ssa_name (mask_type, NULL, "next_mask");
621 gcall *call = vect_gen_while (next_mask, test_index, this_test_limit);
622 gsi_insert_before (test_gsi, call, GSI_SAME_STMT);
623
624 vect_set_loop_mask (loop, mask, init_mask, next_mask);
625 }
626 return next_mask;
627 }
628
629 /* Make LOOP iterate NITERS times using masking and WHILE_ULT calls.
630 LOOP_VINFO describes the vectorization of LOOP. NITERS is the
631 number of iterations of the original scalar loop that should be
632 handled by the vector loop. NITERS_MAYBE_ZERO and FINAL_IV are
633 as for vect_set_loop_condition.
634
635 Insert the branch-back condition before LOOP_COND_GSI and return the
636 final gcond. */
637
638 static gcond *
639 vect_set_loop_condition_masked (class loop *loop, loop_vec_info loop_vinfo,
640 tree niters, tree final_iv,
641 bool niters_maybe_zero,
642 gimple_stmt_iterator loop_cond_gsi)
643 {
644 gimple_seq preheader_seq = NULL;
645 gimple_seq header_seq = NULL;
646
647 tree compare_type = LOOP_VINFO_MASK_COMPARE_TYPE (loop_vinfo);
648 unsigned int compare_precision = TYPE_PRECISION (compare_type);
649 tree orig_niters = niters;
650
651 /* Type of the initial value of NITERS. */
652 tree ni_actual_type = TREE_TYPE (niters);
653 unsigned int ni_actual_precision = TYPE_PRECISION (ni_actual_type);
654 tree niters_skip = LOOP_VINFO_MASK_SKIP_NITERS (loop_vinfo);
655
656 /* Convert NITERS to the same size as the compare. */
657 if (compare_precision > ni_actual_precision
658 && niters_maybe_zero)
659 {
660 /* We know that there is always at least one iteration, so if the
661 count is zero then it must have wrapped. Cope with this by
662 subtracting 1 before the conversion and adding 1 to the result. */
663 gcc_assert (TYPE_UNSIGNED (ni_actual_type));
664 niters = gimple_build (&preheader_seq, PLUS_EXPR, ni_actual_type,
665 niters, build_minus_one_cst (ni_actual_type));
666 niters = gimple_convert (&preheader_seq, compare_type, niters);
667 niters = gimple_build (&preheader_seq, PLUS_EXPR, compare_type,
668 niters, build_one_cst (compare_type));
669 }
670 else
671 niters = gimple_convert (&preheader_seq, compare_type, niters);
672
673 widest_int iv_limit = vect_iv_limit_for_full_masking (loop_vinfo);
674
675 /* Iterate over all the rgroups and fill in their masks. We could use
676 the first mask from any rgroup for the loop condition; here we
677 arbitrarily pick the last. */
678 tree test_mask = NULL_TREE;
679 rgroup_masks *rgm;
680 unsigned int i;
681 vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
682 FOR_EACH_VEC_ELT (*masks, i, rgm)
683 if (!rgm->masks.is_empty ())
684 {
685 /* First try using permutes. This adds a single vector
686 instruction to the loop for each mask, but needs no extra
687 loop invariants or IVs. */
688 unsigned int nmasks = i + 1;
689 if ((nmasks & 1) == 0)
690 {
691 rgroup_masks *half_rgm = &(*masks)[nmasks / 2 - 1];
692 if (!half_rgm->masks.is_empty ()
693 && vect_maybe_permute_loop_masks (loop_vinfo, &header_seq,
694 rgm, half_rgm))
695 continue;
696 }
697
698 /* See whether zero-based IV would ever generate all-false masks
699 before wrapping around. */
700 bool might_wrap_p
701 = (iv_limit == -1
702 || (wi::min_precision (iv_limit * rgm->max_nscalars_per_iter,
703 UNSIGNED)
704 > compare_precision));
705
706 /* Set up all masks for this group. */
707 test_mask = vect_set_loop_masks_directly (loop, loop_vinfo,
708 &preheader_seq,
709 loop_cond_gsi, rgm,
710 niters, niters_skip,
711 might_wrap_p);
712 }
713
714 /* Emit all accumulated statements. */
715 add_preheader_seq (loop, preheader_seq);
716 add_header_seq (loop, header_seq);
717
718 /* Get a boolean result that tells us whether to iterate. */
719 edge exit_edge = single_exit (loop);
720 tree_code code = (exit_edge->flags & EDGE_TRUE_VALUE) ? EQ_EXPR : NE_EXPR;
721 tree zero_mask = build_zero_cst (TREE_TYPE (test_mask));
722 gcond *cond_stmt = gimple_build_cond (code, test_mask, zero_mask,
723 NULL_TREE, NULL_TREE);
724 gsi_insert_before (&loop_cond_gsi, cond_stmt, GSI_SAME_STMT);
725
726 /* The loop iterates (NITERS - 1) / VF + 1 times.
727 Subtract one from this to get the latch count. */
728 tree step = build_int_cst (compare_type,
729 LOOP_VINFO_VECT_FACTOR (loop_vinfo));
730 tree niters_minus_one = fold_build2 (PLUS_EXPR, compare_type, niters,
731 build_minus_one_cst (compare_type));
732 loop->nb_iterations = fold_build2 (TRUNC_DIV_EXPR, compare_type,
733 niters_minus_one, step);
734
735 if (final_iv)
736 {
737 gassign *assign = gimple_build_assign (final_iv, orig_niters);
738 gsi_insert_on_edge_immediate (single_exit (loop), assign);
739 }
740
741 return cond_stmt;
742 }
743
744 /* Like vect_set_loop_condition, but handle the case in which there
745 are no loop masks. */
746
747 static gcond *
748 vect_set_loop_condition_unmasked (class loop *loop, tree niters,
749 tree step, tree final_iv,
750 bool niters_maybe_zero,
751 gimple_stmt_iterator loop_cond_gsi)
752 {
753 tree indx_before_incr, indx_after_incr;
754 gcond *cond_stmt;
755 gcond *orig_cond;
756 edge pe = loop_preheader_edge (loop);
757 edge exit_edge = single_exit (loop);
758 gimple_stmt_iterator incr_gsi;
759 bool insert_after;
760 enum tree_code code;
761 tree niters_type = TREE_TYPE (niters);
762
763 orig_cond = get_loop_exit_condition (loop);
764 gcc_assert (orig_cond);
765 loop_cond_gsi = gsi_for_stmt (orig_cond);
766
767 tree init, limit;
768 if (!niters_maybe_zero && integer_onep (step))
769 {
770 /* In this case we can use a simple 0-based IV:
771
772 A:
773 x = 0;
774 do
775 {
776 ...
777 x += 1;
778 }
779 while (x < NITERS); */
780 code = (exit_edge->flags & EDGE_TRUE_VALUE) ? GE_EXPR : LT_EXPR;
781 init = build_zero_cst (niters_type);
782 limit = niters;
783 }
784 else
785 {
786 /* The following works for all values of NITERS except 0:
787
788 B:
789 x = 0;
790 do
791 {
792 ...
793 x += STEP;
794 }
795 while (x <= NITERS - STEP);
796
797 so that the loop continues to iterate if x + STEP - 1 < NITERS
798 but stops if x + STEP - 1 >= NITERS.
799
800 However, if NITERS is zero, x never hits a value above NITERS - STEP
801 before wrapping around. There are two obvious ways of dealing with
802 this:
803
804 - start at STEP - 1 and compare x before incrementing it
805 - start at -1 and compare x after incrementing it
806
807 The latter is simpler and is what we use. The loop in this case
808 looks like:
809
810 C:
811 x = -1;
812 do
813 {
814 ...
815 x += STEP;
816 }
817 while (x < NITERS - STEP);
818
819 In both cases the loop limit is NITERS - STEP. */
820 gimple_seq seq = NULL;
821 limit = force_gimple_operand (niters, &seq, true, NULL_TREE);
822 limit = gimple_build (&seq, MINUS_EXPR, TREE_TYPE (limit), limit, step);
823 if (seq)
824 {
825 basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
826 gcc_assert (!new_bb);
827 }
828 if (niters_maybe_zero)
829 {
830 /* Case C. */
831 code = (exit_edge->flags & EDGE_TRUE_VALUE) ? GE_EXPR : LT_EXPR;
832 init = build_all_ones_cst (niters_type);
833 }
834 else
835 {
836 /* Case B. */
837 code = (exit_edge->flags & EDGE_TRUE_VALUE) ? GT_EXPR : LE_EXPR;
838 init = build_zero_cst (niters_type);
839 }
840 }
841
842 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
843 create_iv (init, step, NULL_TREE, loop,
844 &incr_gsi, insert_after, &indx_before_incr, &indx_after_incr);
845 indx_after_incr = force_gimple_operand_gsi (&loop_cond_gsi, indx_after_incr,
846 true, NULL_TREE, true,
847 GSI_SAME_STMT);
848 limit = force_gimple_operand_gsi (&loop_cond_gsi, limit, true, NULL_TREE,
849 true, GSI_SAME_STMT);
850
851 cond_stmt = gimple_build_cond (code, indx_after_incr, limit, NULL_TREE,
852 NULL_TREE);
853
854 gsi_insert_before (&loop_cond_gsi, cond_stmt, GSI_SAME_STMT);
855
856 /* Record the number of latch iterations. */
857 if (limit == niters)
858 /* Case A: the loop iterates NITERS times. Subtract one to get the
859 latch count. */
860 loop->nb_iterations = fold_build2 (MINUS_EXPR, niters_type, niters,
861 build_int_cst (niters_type, 1));
862 else
863 /* Case B or C: the loop iterates (NITERS - STEP) / STEP + 1 times.
864 Subtract one from this to get the latch count. */
865 loop->nb_iterations = fold_build2 (TRUNC_DIV_EXPR, niters_type,
866 limit, step);
867
868 if (final_iv)
869 {
870 gassign *assign = gimple_build_assign (final_iv, MINUS_EXPR,
871 indx_after_incr, init);
872 gsi_insert_on_edge_immediate (single_exit (loop), assign);
873 }
874
875 return cond_stmt;
876 }
877
878 /* If we're using fully-masked loops, make LOOP iterate:
879
880 N == (NITERS - 1) / STEP + 1
881
882 times. When NITERS is zero, this is equivalent to making the loop
883 execute (1 << M) / STEP times, where M is the precision of NITERS.
884 NITERS_MAYBE_ZERO is true if this last case might occur.
885
886 If we're not using fully-masked loops, make LOOP iterate:
887
888 N == (NITERS - STEP) / STEP + 1
889
890 times, where NITERS is known to be outside the range [1, STEP - 1].
891 This is equivalent to making the loop execute NITERS / STEP times
892 when NITERS is nonzero and (1 << M) / STEP times otherwise.
893 NITERS_MAYBE_ZERO again indicates whether this last case might occur.
894
895 If FINAL_IV is nonnull, it is an SSA name that should be set to
896 N * STEP on exit from the loop.
897
898 Assumption: the exit-condition of LOOP is the last stmt in the loop. */
899
900 void
901 vect_set_loop_condition (class loop *loop, loop_vec_info loop_vinfo,
902 tree niters, tree step, tree final_iv,
903 bool niters_maybe_zero)
904 {
905 gcond *cond_stmt;
906 gcond *orig_cond = get_loop_exit_condition (loop);
907 gimple_stmt_iterator loop_cond_gsi = gsi_for_stmt (orig_cond);
908
909 if (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
910 cond_stmt = vect_set_loop_condition_masked (loop, loop_vinfo, niters,
911 final_iv, niters_maybe_zero,
912 loop_cond_gsi);
913 else
914 cond_stmt = vect_set_loop_condition_unmasked (loop, niters, step,
915 final_iv, niters_maybe_zero,
916 loop_cond_gsi);
917
918 /* Remove old loop exit test. */
919 stmt_vec_info orig_cond_info;
920 if (loop_vinfo
921 && (orig_cond_info = loop_vinfo->lookup_stmt (orig_cond)))
922 loop_vinfo->remove_stmt (orig_cond_info);
923 else
924 gsi_remove (&loop_cond_gsi, true);
925
926 if (dump_enabled_p ())
927 dump_printf_loc (MSG_NOTE, vect_location, "New loop exit condition: %G",
928 cond_stmt);
929 }
930
931 /* Helper routine of slpeel_tree_duplicate_loop_to_edge_cfg.
932 For all PHI arguments in FROM->dest and TO->dest from those
933 edges ensure that TO->dest PHI arguments have current_def
934 to that in from. */
935
936 static void
937 slpeel_duplicate_current_defs_from_edges (edge from, edge to)
938 {
939 gimple_stmt_iterator gsi_from, gsi_to;
940
941 for (gsi_from = gsi_start_phis (from->dest),
942 gsi_to = gsi_start_phis (to->dest);
943 !gsi_end_p (gsi_from) && !gsi_end_p (gsi_to);)
944 {
945 gimple *from_phi = gsi_stmt (gsi_from);
946 gimple *to_phi = gsi_stmt (gsi_to);
947 tree from_arg = PHI_ARG_DEF_FROM_EDGE (from_phi, from);
948 tree to_arg = PHI_ARG_DEF_FROM_EDGE (to_phi, to);
949 if (virtual_operand_p (from_arg))
950 {
951 gsi_next (&gsi_from);
952 continue;
953 }
954 if (virtual_operand_p (to_arg))
955 {
956 gsi_next (&gsi_to);
957 continue;
958 }
959 if (TREE_CODE (from_arg) != SSA_NAME)
960 gcc_assert (operand_equal_p (from_arg, to_arg, 0));
961 else if (TREE_CODE (to_arg) == SSA_NAME
962 && from_arg != to_arg)
963 {
964 if (get_current_def (to_arg) == NULL_TREE)
965 {
966 gcc_assert (types_compatible_p (TREE_TYPE (to_arg),
967 TREE_TYPE (get_current_def
968 (from_arg))));
969 set_current_def (to_arg, get_current_def (from_arg));
970 }
971 }
972 gsi_next (&gsi_from);
973 gsi_next (&gsi_to);
974 }
975
976 gphi *from_phi = get_virtual_phi (from->dest);
977 gphi *to_phi = get_virtual_phi (to->dest);
978 if (from_phi)
979 set_current_def (PHI_ARG_DEF_FROM_EDGE (to_phi, to),
980 get_current_def (PHI_ARG_DEF_FROM_EDGE (from_phi, from)));
981 }
982
983
984 /* Given LOOP this function generates a new copy of it and puts it
985 on E which is either the entry or exit of LOOP. If SCALAR_LOOP is
986 non-NULL, assume LOOP and SCALAR_LOOP are equivalent and copy the
987 basic blocks from SCALAR_LOOP instead of LOOP, but to either the
988 entry or exit of LOOP. */
989
990 class loop *
991 slpeel_tree_duplicate_loop_to_edge_cfg (class loop *loop,
992 class loop *scalar_loop, edge e)
993 {
994 class loop *new_loop;
995 basic_block *new_bbs, *bbs, *pbbs;
996 bool at_exit;
997 bool was_imm_dom;
998 basic_block exit_dest;
999 edge exit, new_exit;
1000 bool duplicate_outer_loop = false;
1001
1002 exit = single_exit (loop);
1003 at_exit = (e == exit);
1004 if (!at_exit && e != loop_preheader_edge (loop))
1005 return NULL;
1006
1007 if (scalar_loop == NULL)
1008 scalar_loop = loop;
1009
1010 bbs = XNEWVEC (basic_block, scalar_loop->num_nodes + 1);
1011 pbbs = bbs + 1;
1012 get_loop_body_with_size (scalar_loop, pbbs, scalar_loop->num_nodes);
1013 /* Allow duplication of outer loops. */
1014 if (scalar_loop->inner)
1015 duplicate_outer_loop = true;
1016 /* Check whether duplication is possible. */
1017 if (!can_copy_bbs_p (pbbs, scalar_loop->num_nodes))
1018 {
1019 free (bbs);
1020 return NULL;
1021 }
1022
1023 /* Generate new loop structure. */
1024 new_loop = duplicate_loop (scalar_loop, loop_outer (scalar_loop));
1025 duplicate_subloops (scalar_loop, new_loop);
1026
1027 exit_dest = exit->dest;
1028 was_imm_dom = (get_immediate_dominator (CDI_DOMINATORS,
1029 exit_dest) == loop->header ?
1030 true : false);
1031
1032 /* Also copy the pre-header, this avoids jumping through hoops to
1033 duplicate the loop entry PHI arguments. Create an empty
1034 pre-header unconditionally for this. */
1035 basic_block preheader = split_edge (loop_preheader_edge (scalar_loop));
1036 edge entry_e = single_pred_edge (preheader);
1037 bbs[0] = preheader;
1038 new_bbs = XNEWVEC (basic_block, scalar_loop->num_nodes + 1);
1039
1040 exit = single_exit (scalar_loop);
1041 copy_bbs (bbs, scalar_loop->num_nodes + 1, new_bbs,
1042 &exit, 1, &new_exit, NULL,
1043 at_exit ? loop->latch : e->src, true);
1044 exit = single_exit (loop);
1045 basic_block new_preheader = new_bbs[0];
1046
1047 add_phi_args_after_copy (new_bbs, scalar_loop->num_nodes + 1, NULL);
1048
1049 if (scalar_loop != loop)
1050 {
1051 /* If we copied from SCALAR_LOOP rather than LOOP, SSA_NAMEs from
1052 SCALAR_LOOP will have current_def set to SSA_NAMEs in the new_loop,
1053 but LOOP will not. slpeel_update_phi_nodes_for_guard{1,2} expects
1054 the LOOP SSA_NAMEs (on the exit edge and edge from latch to
1055 header) to have current_def set, so copy them over. */
1056 slpeel_duplicate_current_defs_from_edges (single_exit (scalar_loop),
1057 exit);
1058 slpeel_duplicate_current_defs_from_edges (EDGE_SUCC (scalar_loop->latch,
1059 0),
1060 EDGE_SUCC (loop->latch, 0));
1061 }
1062
1063 if (at_exit) /* Add the loop copy at exit. */
1064 {
1065 if (scalar_loop != loop)
1066 {
1067 gphi_iterator gsi;
1068 new_exit = redirect_edge_and_branch (new_exit, exit_dest);
1069
1070 for (gsi = gsi_start_phis (exit_dest); !gsi_end_p (gsi);
1071 gsi_next (&gsi))
1072 {
1073 gphi *phi = gsi.phi ();
1074 tree orig_arg = PHI_ARG_DEF_FROM_EDGE (phi, e);
1075 location_t orig_locus
1076 = gimple_phi_arg_location_from_edge (phi, e);
1077
1078 add_phi_arg (phi, orig_arg, new_exit, orig_locus);
1079 }
1080 }
1081 redirect_edge_and_branch_force (e, new_preheader);
1082 flush_pending_stmts (e);
1083 set_immediate_dominator (CDI_DOMINATORS, new_preheader, e->src);
1084 if (was_imm_dom || duplicate_outer_loop)
1085 set_immediate_dominator (CDI_DOMINATORS, exit_dest, new_exit->src);
1086
1087 /* And remove the non-necessary forwarder again. Keep the other
1088 one so we have a proper pre-header for the loop at the exit edge. */
1089 redirect_edge_pred (single_succ_edge (preheader),
1090 single_pred (preheader));
1091 delete_basic_block (preheader);
1092 set_immediate_dominator (CDI_DOMINATORS, scalar_loop->header,
1093 loop_preheader_edge (scalar_loop)->src);
1094 }
1095 else /* Add the copy at entry. */
1096 {
1097 if (scalar_loop != loop)
1098 {
1099 /* Remove the non-necessary forwarder of scalar_loop again. */
1100 redirect_edge_pred (single_succ_edge (preheader),
1101 single_pred (preheader));
1102 delete_basic_block (preheader);
1103 set_immediate_dominator (CDI_DOMINATORS, scalar_loop->header,
1104 loop_preheader_edge (scalar_loop)->src);
1105 preheader = split_edge (loop_preheader_edge (loop));
1106 entry_e = single_pred_edge (preheader);
1107 }
1108
1109 redirect_edge_and_branch_force (entry_e, new_preheader);
1110 flush_pending_stmts (entry_e);
1111 set_immediate_dominator (CDI_DOMINATORS, new_preheader, entry_e->src);
1112
1113 redirect_edge_and_branch_force (new_exit, preheader);
1114 flush_pending_stmts (new_exit);
1115 set_immediate_dominator (CDI_DOMINATORS, preheader, new_exit->src);
1116
1117 /* And remove the non-necessary forwarder again. Keep the other
1118 one so we have a proper pre-header for the loop at the exit edge. */
1119 redirect_edge_pred (single_succ_edge (new_preheader),
1120 single_pred (new_preheader));
1121 delete_basic_block (new_preheader);
1122 set_immediate_dominator (CDI_DOMINATORS, new_loop->header,
1123 loop_preheader_edge (new_loop)->src);
1124 }
1125
1126 /* Skip new preheader since it's deleted if copy loop is added at entry. */
1127 for (unsigned i = (at_exit ? 0 : 1); i < scalar_loop->num_nodes + 1; i++)
1128 rename_variables_in_bb (new_bbs[i], duplicate_outer_loop);
1129
1130 if (scalar_loop != loop)
1131 {
1132 /* Update new_loop->header PHIs, so that on the preheader
1133 edge they are the ones from loop rather than scalar_loop. */
1134 gphi_iterator gsi_orig, gsi_new;
1135 edge orig_e = loop_preheader_edge (loop);
1136 edge new_e = loop_preheader_edge (new_loop);
1137
1138 for (gsi_orig = gsi_start_phis (loop->header),
1139 gsi_new = gsi_start_phis (new_loop->header);
1140 !gsi_end_p (gsi_orig) && !gsi_end_p (gsi_new);
1141 gsi_next (&gsi_orig), gsi_next (&gsi_new))
1142 {
1143 gphi *orig_phi = gsi_orig.phi ();
1144 gphi *new_phi = gsi_new.phi ();
1145 tree orig_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, orig_e);
1146 location_t orig_locus
1147 = gimple_phi_arg_location_from_edge (orig_phi, orig_e);
1148
1149 add_phi_arg (new_phi, orig_arg, new_e, orig_locus);
1150 }
1151 }
1152
1153 free (new_bbs);
1154 free (bbs);
1155
1156 checking_verify_dominators (CDI_DOMINATORS);
1157
1158 return new_loop;
1159 }
1160
1161
1162 /* Given the condition expression COND, put it as the last statement of
1163 GUARD_BB; set both edges' probability; set dominator of GUARD_TO to
1164 DOM_BB; return the skip edge. GUARD_TO is the target basic block to
1165 skip the loop. PROBABILITY is the skip edge's probability. Mark the
1166 new edge as irreducible if IRREDUCIBLE_P is true. */
1167
1168 static edge
1169 slpeel_add_loop_guard (basic_block guard_bb, tree cond,
1170 basic_block guard_to, basic_block dom_bb,
1171 profile_probability probability, bool irreducible_p)
1172 {
1173 gimple_stmt_iterator gsi;
1174 edge new_e, enter_e;
1175 gcond *cond_stmt;
1176 gimple_seq gimplify_stmt_list = NULL;
1177
1178 enter_e = EDGE_SUCC (guard_bb, 0);
1179 enter_e->flags &= ~EDGE_FALLTHRU;
1180 enter_e->flags |= EDGE_FALSE_VALUE;
1181 gsi = gsi_last_bb (guard_bb);
1182
1183 cond = force_gimple_operand_1 (cond, &gimplify_stmt_list, is_gimple_condexpr,
1184 NULL_TREE);
1185 if (gimplify_stmt_list)
1186 gsi_insert_seq_after (&gsi, gimplify_stmt_list, GSI_NEW_STMT);
1187
1188 cond_stmt = gimple_build_cond_from_tree (cond, NULL_TREE, NULL_TREE);
1189 gsi = gsi_last_bb (guard_bb);
1190 gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
1191
1192 /* Add new edge to connect guard block to the merge/loop-exit block. */
1193 new_e = make_edge (guard_bb, guard_to, EDGE_TRUE_VALUE);
1194
1195 new_e->probability = probability;
1196 if (irreducible_p)
1197 new_e->flags |= EDGE_IRREDUCIBLE_LOOP;
1198
1199 enter_e->probability = probability.invert ();
1200 set_immediate_dominator (CDI_DOMINATORS, guard_to, dom_bb);
1201
1202 /* Split enter_e to preserve LOOPS_HAVE_PREHEADERS. */
1203 if (enter_e->dest->loop_father->header == enter_e->dest)
1204 split_edge (enter_e);
1205
1206 return new_e;
1207 }
1208
1209
1210 /* This function verifies that the following restrictions apply to LOOP:
1211 (1) it consists of exactly 2 basic blocks - header, and an empty latch
1212 for innermost loop and 5 basic blocks for outer-loop.
1213 (2) it is single entry, single exit
1214 (3) its exit condition is the last stmt in the header
1215 (4) E is the entry/exit edge of LOOP.
1216 */
1217
1218 bool
1219 slpeel_can_duplicate_loop_p (const class loop *loop, const_edge e)
1220 {
1221 edge exit_e = single_exit (loop);
1222 edge entry_e = loop_preheader_edge (loop);
1223 gcond *orig_cond = get_loop_exit_condition (loop);
1224 gimple_stmt_iterator loop_exit_gsi = gsi_last_bb (exit_e->src);
1225 unsigned int num_bb = loop->inner? 5 : 2;
1226
1227 /* All loops have an outer scope; the only case loop->outer is NULL is for
1228 the function itself. */
1229 if (!loop_outer (loop)
1230 || loop->num_nodes != num_bb
1231 || !empty_block_p (loop->latch)
1232 || !single_exit (loop)
1233 /* Verify that new loop exit condition can be trivially modified. */
1234 || (!orig_cond || orig_cond != gsi_stmt (loop_exit_gsi))
1235 || (e != exit_e && e != entry_e))
1236 return false;
1237
1238 return true;
1239 }
1240
1241 /* If the loop has a virtual PHI, but exit bb doesn't, create a virtual PHI
1242 in the exit bb and rename all the uses after the loop. This simplifies
1243 the *guard[12] routines, which assume loop closed SSA form for all PHIs
1244 (but normally loop closed SSA form doesn't require virtual PHIs to be
1245 in the same form). Doing this early simplifies the checking what
1246 uses should be renamed. */
1247
1248 static void
1249 create_lcssa_for_virtual_phi (class loop *loop)
1250 {
1251 gphi_iterator gsi;
1252 edge exit_e = single_exit (loop);
1253
1254 for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
1255 if (virtual_operand_p (gimple_phi_result (gsi_stmt (gsi))))
1256 {
1257 gphi *phi = gsi.phi ();
1258 for (gsi = gsi_start_phis (exit_e->dest);
1259 !gsi_end_p (gsi); gsi_next (&gsi))
1260 if (virtual_operand_p (gimple_phi_result (gsi_stmt (gsi))))
1261 break;
1262 if (gsi_end_p (gsi))
1263 {
1264 tree new_vop = copy_ssa_name (PHI_RESULT (phi));
1265 gphi *new_phi = create_phi_node (new_vop, exit_e->dest);
1266 tree vop = PHI_ARG_DEF_FROM_EDGE (phi, EDGE_SUCC (loop->latch, 0));
1267 imm_use_iterator imm_iter;
1268 gimple *stmt;
1269 use_operand_p use_p;
1270
1271 SSA_NAME_OCCURS_IN_ABNORMAL_PHI (new_vop)
1272 = SSA_NAME_OCCURS_IN_ABNORMAL_PHI (vop);
1273 add_phi_arg (new_phi, vop, exit_e, UNKNOWN_LOCATION);
1274 gimple_phi_set_result (new_phi, new_vop);
1275 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, vop)
1276 if (stmt != new_phi
1277 && !flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
1278 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
1279 SET_USE (use_p, new_vop);
1280 }
1281 break;
1282 }
1283
1284 }
1285
1286 /* Function vect_get_loop_location.
1287
1288 Extract the location of the loop in the source code.
1289 If the loop is not well formed for vectorization, an estimated
1290 location is calculated.
1291 Return the loop location if succeed and NULL if not. */
1292
1293 dump_user_location_t
1294 find_loop_location (class loop *loop)
1295 {
1296 gimple *stmt = NULL;
1297 basic_block bb;
1298 gimple_stmt_iterator si;
1299
1300 if (!loop)
1301 return dump_user_location_t ();
1302
1303 stmt = get_loop_exit_condition (loop);
1304
1305 if (stmt
1306 && LOCATION_LOCUS (gimple_location (stmt)) > BUILTINS_LOCATION)
1307 return stmt;
1308
1309 /* If we got here the loop is probably not "well formed",
1310 try to estimate the loop location */
1311
1312 if (!loop->header)
1313 return dump_user_location_t ();
1314
1315 bb = loop->header;
1316
1317 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1318 {
1319 stmt = gsi_stmt (si);
1320 if (LOCATION_LOCUS (gimple_location (stmt)) > BUILTINS_LOCATION)
1321 return stmt;
1322 }
1323
1324 return dump_user_location_t ();
1325 }
1326
1327 /* Return true if the phi described by STMT_INFO defines an IV of the
1328 loop to be vectorized. */
1329
1330 static bool
1331 iv_phi_p (stmt_vec_info stmt_info)
1332 {
1333 gphi *phi = as_a <gphi *> (stmt_info->stmt);
1334 if (virtual_operand_p (PHI_RESULT (phi)))
1335 return false;
1336
1337 if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def
1338 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_double_reduction_def)
1339 return false;
1340
1341 return true;
1342 }
1343
1344 /* Function vect_can_advance_ivs_p
1345
1346 In case the number of iterations that LOOP iterates is unknown at compile
1347 time, an epilog loop will be generated, and the loop induction variables
1348 (IVs) will be "advanced" to the value they are supposed to take just before
1349 the epilog loop. Here we check that the access function of the loop IVs
1350 and the expression that represents the loop bound are simple enough.
1351 These restrictions will be relaxed in the future. */
1352
1353 bool
1354 vect_can_advance_ivs_p (loop_vec_info loop_vinfo)
1355 {
1356 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1357 basic_block bb = loop->header;
1358 gphi_iterator gsi;
1359
1360 /* Analyze phi functions of the loop header. */
1361
1362 if (dump_enabled_p ())
1363 dump_printf_loc (MSG_NOTE, vect_location, "vect_can_advance_ivs_p:\n");
1364 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1365 {
1366 tree evolution_part;
1367
1368 gphi *phi = gsi.phi ();
1369 stmt_vec_info phi_info = loop_vinfo->lookup_stmt (phi);
1370 if (dump_enabled_p ())
1371 dump_printf_loc (MSG_NOTE, vect_location, "Analyze phi: %G",
1372 phi_info->stmt);
1373
1374 /* Skip virtual phi's. The data dependences that are associated with
1375 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere.
1376
1377 Skip reduction phis. */
1378 if (!iv_phi_p (phi_info))
1379 {
1380 if (dump_enabled_p ())
1381 dump_printf_loc (MSG_NOTE, vect_location,
1382 "reduc or virtual phi. skip.\n");
1383 continue;
1384 }
1385
1386 /* Analyze the evolution function. */
1387
1388 evolution_part = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (phi_info);
1389 if (evolution_part == NULL_TREE)
1390 {
1391 if (dump_enabled_p ())
1392 dump_printf (MSG_MISSED_OPTIMIZATION,
1393 "No access function or evolution.\n");
1394 return false;
1395 }
1396
1397 /* FORNOW: We do not transform initial conditions of IVs
1398 which evolution functions are not invariants in the loop. */
1399
1400 if (!expr_invariant_in_loop_p (loop, evolution_part))
1401 {
1402 if (dump_enabled_p ())
1403 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1404 "evolution not invariant in loop.\n");
1405 return false;
1406 }
1407
1408 /* FORNOW: We do not transform initial conditions of IVs
1409 which evolution functions are a polynomial of degree >= 2. */
1410
1411 if (tree_is_chrec (evolution_part))
1412 {
1413 if (dump_enabled_p ())
1414 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1415 "evolution is chrec.\n");
1416 return false;
1417 }
1418 }
1419
1420 return true;
1421 }
1422
1423
1424 /* Function vect_update_ivs_after_vectorizer.
1425
1426 "Advance" the induction variables of LOOP to the value they should take
1427 after the execution of LOOP. This is currently necessary because the
1428 vectorizer does not handle induction variables that are used after the
1429 loop. Such a situation occurs when the last iterations of LOOP are
1430 peeled, because:
1431 1. We introduced new uses after LOOP for IVs that were not originally used
1432 after LOOP: the IVs of LOOP are now used by an epilog loop.
1433 2. LOOP is going to be vectorized; this means that it will iterate N/VF
1434 times, whereas the loop IVs should be bumped N times.
1435
1436 Input:
1437 - LOOP - a loop that is going to be vectorized. The last few iterations
1438 of LOOP were peeled.
1439 - NITERS - the number of iterations that LOOP executes (before it is
1440 vectorized). i.e, the number of times the ivs should be bumped.
1441 - UPDATE_E - a successor edge of LOOP->exit that is on the (only) path
1442 coming out from LOOP on which there are uses of the LOOP ivs
1443 (this is the path from LOOP->exit to epilog_loop->preheader).
1444
1445 The new definitions of the ivs are placed in LOOP->exit.
1446 The phi args associated with the edge UPDATE_E in the bb
1447 UPDATE_E->dest are updated accordingly.
1448
1449 Assumption 1: Like the rest of the vectorizer, this function assumes
1450 a single loop exit that has a single predecessor.
1451
1452 Assumption 2: The phi nodes in the LOOP header and in update_bb are
1453 organized in the same order.
1454
1455 Assumption 3: The access function of the ivs is simple enough (see
1456 vect_can_advance_ivs_p). This assumption will be relaxed in the future.
1457
1458 Assumption 4: Exactly one of the successors of LOOP exit-bb is on a path
1459 coming out of LOOP on which the ivs of LOOP are used (this is the path
1460 that leads to the epilog loop; other paths skip the epilog loop). This
1461 path starts with the edge UPDATE_E, and its destination (denoted update_bb)
1462 needs to have its phis updated.
1463 */
1464
1465 static void
1466 vect_update_ivs_after_vectorizer (loop_vec_info loop_vinfo,
1467 tree niters, edge update_e)
1468 {
1469 gphi_iterator gsi, gsi1;
1470 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1471 basic_block update_bb = update_e->dest;
1472 basic_block exit_bb = single_exit (loop)->dest;
1473
1474 /* Make sure there exists a single-predecessor exit bb: */
1475 gcc_assert (single_pred_p (exit_bb));
1476 gcc_assert (single_succ_edge (exit_bb) == update_e);
1477
1478 for (gsi = gsi_start_phis (loop->header), gsi1 = gsi_start_phis (update_bb);
1479 !gsi_end_p (gsi) && !gsi_end_p (gsi1);
1480 gsi_next (&gsi), gsi_next (&gsi1))
1481 {
1482 tree init_expr;
1483 tree step_expr, off;
1484 tree type;
1485 tree var, ni, ni_name;
1486 gimple_stmt_iterator last_gsi;
1487
1488 gphi *phi = gsi.phi ();
1489 gphi *phi1 = gsi1.phi ();
1490 stmt_vec_info phi_info = loop_vinfo->lookup_stmt (phi);
1491 if (dump_enabled_p ())
1492 dump_printf_loc (MSG_NOTE, vect_location,
1493 "vect_update_ivs_after_vectorizer: phi: %G", phi);
1494
1495 /* Skip reduction and virtual phis. */
1496 if (!iv_phi_p (phi_info))
1497 {
1498 if (dump_enabled_p ())
1499 dump_printf_loc (MSG_NOTE, vect_location,
1500 "reduc or virtual phi. skip.\n");
1501 continue;
1502 }
1503
1504 type = TREE_TYPE (gimple_phi_result (phi));
1505 step_expr = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (phi_info);
1506 step_expr = unshare_expr (step_expr);
1507
1508 /* FORNOW: We do not support IVs whose evolution function is a polynomial
1509 of degree >= 2 or exponential. */
1510 gcc_assert (!tree_is_chrec (step_expr));
1511
1512 init_expr = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1513
1514 off = fold_build2 (MULT_EXPR, TREE_TYPE (step_expr),
1515 fold_convert (TREE_TYPE (step_expr), niters),
1516 step_expr);
1517 if (POINTER_TYPE_P (type))
1518 ni = fold_build_pointer_plus (init_expr, off);
1519 else
1520 ni = fold_build2 (PLUS_EXPR, type,
1521 init_expr, fold_convert (type, off));
1522
1523 var = create_tmp_var (type, "tmp");
1524
1525 last_gsi = gsi_last_bb (exit_bb);
1526 gimple_seq new_stmts = NULL;
1527 ni_name = force_gimple_operand (ni, &new_stmts, false, var);
1528 /* Exit_bb shouldn't be empty. */
1529 if (!gsi_end_p (last_gsi))
1530 gsi_insert_seq_after (&last_gsi, new_stmts, GSI_SAME_STMT);
1531 else
1532 gsi_insert_seq_before (&last_gsi, new_stmts, GSI_SAME_STMT);
1533
1534 /* Fix phi expressions in the successor bb. */
1535 adjust_phi_and_debug_stmts (phi1, update_e, ni_name);
1536 }
1537 }
1538
1539 /* Return a gimple value containing the misalignment (measured in vector
1540 elements) for the loop described by LOOP_VINFO, i.e. how many elements
1541 it is away from a perfectly aligned address. Add any new statements
1542 to SEQ. */
1543
1544 static tree
1545 get_misalign_in_elems (gimple **seq, loop_vec_info loop_vinfo)
1546 {
1547 dr_vec_info *dr_info = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
1548 stmt_vec_info stmt_info = dr_info->stmt;
1549 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1550
1551 poly_uint64 target_align = DR_TARGET_ALIGNMENT (dr_info);
1552 unsigned HOST_WIDE_INT target_align_c;
1553 tree target_align_minus_1;
1554
1555 bool negative = tree_int_cst_compare (DR_STEP (dr_info->dr),
1556 size_zero_node) < 0;
1557 tree offset = (negative
1558 ? size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1)
1559 : size_zero_node);
1560 tree start_addr = vect_create_addr_base_for_vector_ref (stmt_info, seq,
1561 offset);
1562 tree type = unsigned_type_for (TREE_TYPE (start_addr));
1563 if (target_align.is_constant (&target_align_c))
1564 target_align_minus_1 = build_int_cst (type, target_align_c - 1);
1565 else
1566 {
1567 tree vla = build_int_cst (type, target_align);
1568 tree vla_align = fold_build2 (BIT_AND_EXPR, type, vla,
1569 fold_build2 (MINUS_EXPR, type,
1570 build_int_cst (type, 0), vla));
1571 target_align_minus_1 = fold_build2 (MINUS_EXPR, type, vla_align,
1572 build_int_cst (type, 1));
1573 }
1574
1575 HOST_WIDE_INT elem_size
1576 = int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
1577 tree elem_size_log = build_int_cst (type, exact_log2 (elem_size));
1578
1579 /* Create: misalign_in_bytes = addr & (target_align - 1). */
1580 tree int_start_addr = fold_convert (type, start_addr);
1581 tree misalign_in_bytes = fold_build2 (BIT_AND_EXPR, type, int_start_addr,
1582 target_align_minus_1);
1583
1584 /* Create: misalign_in_elems = misalign_in_bytes / element_size. */
1585 tree misalign_in_elems = fold_build2 (RSHIFT_EXPR, type, misalign_in_bytes,
1586 elem_size_log);
1587
1588 return misalign_in_elems;
1589 }
1590
1591 /* Function vect_gen_prolog_loop_niters
1592
1593 Generate the number of iterations which should be peeled as prolog for the
1594 loop represented by LOOP_VINFO. It is calculated as the misalignment of
1595 DR - the data reference recorded in LOOP_VINFO_UNALIGNED_DR (LOOP_VINFO).
1596 As a result, after the execution of this loop, the data reference DR will
1597 refer to an aligned location. The following computation is generated:
1598
1599 If the misalignment of DR is known at compile time:
1600 addr_mis = int mis = DR_MISALIGNMENT (dr);
1601 Else, compute address misalignment in bytes:
1602 addr_mis = addr & (target_align - 1)
1603
1604 prolog_niters = ((VF - addr_mis/elem_size)&(VF-1))/step
1605
1606 (elem_size = element type size; an element is the scalar element whose type
1607 is the inner type of the vectype)
1608
1609 The computations will be emitted at the end of BB. We also compute and
1610 store upper bound (included) of the result in BOUND.
1611
1612 When the step of the data-ref in the loop is not 1 (as in interleaved data
1613 and SLP), the number of iterations of the prolog must be divided by the step
1614 (which is equal to the size of interleaved group).
1615
1616 The above formulas assume that VF == number of elements in the vector. This
1617 may not hold when there are multiple-types in the loop.
1618 In this case, for some data-references in the loop the VF does not represent
1619 the number of elements that fit in the vector. Therefore, instead of VF we
1620 use TYPE_VECTOR_SUBPARTS. */
1621
1622 static tree
1623 vect_gen_prolog_loop_niters (loop_vec_info loop_vinfo,
1624 basic_block bb, int *bound)
1625 {
1626 dr_vec_info *dr_info = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
1627 tree var;
1628 tree niters_type = TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo));
1629 gimple_seq stmts = NULL, new_stmts = NULL;
1630 tree iters, iters_name;
1631 stmt_vec_info stmt_info = dr_info->stmt;
1632 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1633 poly_uint64 target_align = DR_TARGET_ALIGNMENT (dr_info);
1634
1635 if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0)
1636 {
1637 int npeel = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
1638
1639 if (dump_enabled_p ())
1640 dump_printf_loc (MSG_NOTE, vect_location,
1641 "known peeling = %d.\n", npeel);
1642
1643 iters = build_int_cst (niters_type, npeel);
1644 *bound = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
1645 }
1646 else
1647 {
1648 tree misalign_in_elems = get_misalign_in_elems (&stmts, loop_vinfo);
1649 tree type = TREE_TYPE (misalign_in_elems);
1650 HOST_WIDE_INT elem_size
1651 = int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
1652 /* We only do prolog peeling if the target alignment is known at compile
1653 time. */
1654 poly_uint64 align_in_elems =
1655 exact_div (target_align, elem_size);
1656 tree align_in_elems_minus_1 =
1657 build_int_cst (type, align_in_elems - 1);
1658 tree align_in_elems_tree = build_int_cst (type, align_in_elems);
1659
1660 /* Create: (niters_type) ((align_in_elems - misalign_in_elems)
1661 & (align_in_elems - 1)). */
1662 bool negative = tree_int_cst_compare (DR_STEP (dr_info->dr),
1663 size_zero_node) < 0;
1664 if (negative)
1665 iters = fold_build2 (MINUS_EXPR, type, misalign_in_elems,
1666 align_in_elems_tree);
1667 else
1668 iters = fold_build2 (MINUS_EXPR, type, align_in_elems_tree,
1669 misalign_in_elems);
1670 iters = fold_build2 (BIT_AND_EXPR, type, iters, align_in_elems_minus_1);
1671 iters = fold_convert (niters_type, iters);
1672 unsigned HOST_WIDE_INT align_in_elems_c;
1673 if (align_in_elems.is_constant (&align_in_elems_c))
1674 *bound = align_in_elems_c - 1;
1675 else
1676 *bound = -1;
1677 }
1678
1679 if (dump_enabled_p ())
1680 dump_printf_loc (MSG_NOTE, vect_location,
1681 "niters for prolog loop: %T\n", iters);
1682
1683 var = create_tmp_var (niters_type, "prolog_loop_niters");
1684 iters_name = force_gimple_operand (iters, &new_stmts, false, var);
1685
1686 if (new_stmts)
1687 gimple_seq_add_seq (&stmts, new_stmts);
1688 if (stmts)
1689 {
1690 gcc_assert (single_succ_p (bb));
1691 gimple_stmt_iterator gsi = gsi_last_bb (bb);
1692 if (gsi_end_p (gsi))
1693 gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
1694 else
1695 gsi_insert_seq_after (&gsi, stmts, GSI_SAME_STMT);
1696 }
1697 return iters_name;
1698 }
1699
1700
1701 /* Function vect_update_init_of_dr
1702
1703 If CODE is PLUS, the vector loop starts NITERS iterations after the
1704 scalar one, otherwise CODE is MINUS and the vector loop starts NITERS
1705 iterations before the scalar one (using masking to skip inactive
1706 elements). This function updates the information recorded in DR to
1707 account for the difference. Specifically, it updates the OFFSET
1708 field of DR. */
1709
1710 static void
1711 vect_update_init_of_dr (struct data_reference *dr, tree niters, tree_code code)
1712 {
1713 tree offset = DR_OFFSET (dr);
1714
1715 niters = fold_build2 (MULT_EXPR, sizetype,
1716 fold_convert (sizetype, niters),
1717 fold_convert (sizetype, DR_STEP (dr)));
1718 offset = fold_build2 (code, sizetype,
1719 fold_convert (sizetype, offset), niters);
1720 DR_OFFSET (dr) = offset;
1721 }
1722
1723
1724 /* Function vect_update_inits_of_drs
1725
1726 Apply vect_update_inits_of_dr to all accesses in LOOP_VINFO.
1727 CODE and NITERS are as for vect_update_inits_of_dr. */
1728
1729 void
1730 vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters,
1731 tree_code code)
1732 {
1733 unsigned int i;
1734 vec<data_reference_p> datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1735 struct data_reference *dr;
1736
1737 DUMP_VECT_SCOPE ("vect_update_inits_of_dr");
1738
1739 /* Adjust niters to sizetype. We used to insert the stmts on loop preheader
1740 here, but since we might use these niters to update the epilogues niters
1741 and data references we can't insert them here as this definition might not
1742 always dominate its uses. */
1743 if (!types_compatible_p (sizetype, TREE_TYPE (niters)))
1744 niters = fold_convert (sizetype, niters);
1745
1746 FOR_EACH_VEC_ELT (datarefs, i, dr)
1747 {
1748 dr_vec_info *dr_info = loop_vinfo->lookup_dr (dr);
1749 if (!STMT_VINFO_GATHER_SCATTER_P (dr_info->stmt))
1750 vect_update_init_of_dr (dr, niters, code);
1751 }
1752 }
1753
1754 /* For the information recorded in LOOP_VINFO prepare the loop for peeling
1755 by masking. This involves calculating the number of iterations to
1756 be peeled and then aligning all memory references appropriately. */
1757
1758 void
1759 vect_prepare_for_masked_peels (loop_vec_info loop_vinfo)
1760 {
1761 tree misalign_in_elems;
1762 tree type = LOOP_VINFO_MASK_COMPARE_TYPE (loop_vinfo);
1763
1764 gcc_assert (vect_use_loop_mask_for_alignment_p (loop_vinfo));
1765
1766 /* From the information recorded in LOOP_VINFO get the number of iterations
1767 that need to be skipped via masking. */
1768 if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0)
1769 {
1770 poly_int64 misalign = (LOOP_VINFO_VECT_FACTOR (loop_vinfo)
1771 - LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo));
1772 misalign_in_elems = build_int_cst (type, misalign);
1773 }
1774 else
1775 {
1776 gimple_seq seq1 = NULL, seq2 = NULL;
1777 misalign_in_elems = get_misalign_in_elems (&seq1, loop_vinfo);
1778 misalign_in_elems = fold_convert (type, misalign_in_elems);
1779 misalign_in_elems = force_gimple_operand (misalign_in_elems,
1780 &seq2, true, NULL_TREE);
1781 gimple_seq_add_seq (&seq1, seq2);
1782 if (seq1)
1783 {
1784 edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo));
1785 basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq1);
1786 gcc_assert (!new_bb);
1787 }
1788 }
1789
1790 if (dump_enabled_p ())
1791 dump_printf_loc (MSG_NOTE, vect_location,
1792 "misalignment for fully-masked loop: %T\n",
1793 misalign_in_elems);
1794
1795 LOOP_VINFO_MASK_SKIP_NITERS (loop_vinfo) = misalign_in_elems;
1796
1797 vect_update_inits_of_drs (loop_vinfo, misalign_in_elems, MINUS_EXPR);
1798 }
1799
1800 /* This function builds ni_name = number of iterations. Statements
1801 are emitted on the loop preheader edge. If NEW_VAR_P is not NULL, set
1802 it to TRUE if new ssa_var is generated. */
1803
1804 tree
1805 vect_build_loop_niters (loop_vec_info loop_vinfo, bool *new_var_p)
1806 {
1807 tree ni = unshare_expr (LOOP_VINFO_NITERS (loop_vinfo));
1808 if (TREE_CODE (ni) == INTEGER_CST)
1809 return ni;
1810 else
1811 {
1812 tree ni_name, var;
1813 gimple_seq stmts = NULL;
1814 edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo));
1815
1816 var = create_tmp_var (TREE_TYPE (ni), "niters");
1817 ni_name = force_gimple_operand (ni, &stmts, false, var);
1818 if (stmts)
1819 {
1820 gsi_insert_seq_on_edge_immediate (pe, stmts);
1821 if (new_var_p != NULL)
1822 *new_var_p = true;
1823 }
1824
1825 return ni_name;
1826 }
1827 }
1828
1829 /* Calculate the number of iterations above which vectorized loop will be
1830 preferred than scalar loop. NITERS_PROLOG is the number of iterations
1831 of prolog loop. If it's integer const, the integer number is also passed
1832 in INT_NITERS_PROLOG. BOUND_PROLOG is the upper bound (inclusive) of the
1833 number of iterations of the prolog loop. BOUND_EPILOG is the corresponding
1834 value for the epilog loop. If CHECK_PROFITABILITY is true, TH is the
1835 threshold below which the scalar (rather than vectorized) loop will be
1836 executed. This function stores the upper bound (inclusive) of the result
1837 in BOUND_SCALAR. */
1838
1839 static tree
1840 vect_gen_scalar_loop_niters (tree niters_prolog, int int_niters_prolog,
1841 int bound_prolog, poly_int64 bound_epilog, int th,
1842 poly_uint64 *bound_scalar,
1843 bool check_profitability)
1844 {
1845 tree type = TREE_TYPE (niters_prolog);
1846 tree niters = fold_build2 (PLUS_EXPR, type, niters_prolog,
1847 build_int_cst (type, bound_epilog));
1848
1849 *bound_scalar = bound_prolog + bound_epilog;
1850 if (check_profitability)
1851 {
1852 /* TH indicates the minimum niters of vectorized loop, while we
1853 compute the maximum niters of scalar loop. */
1854 th--;
1855 /* Peeling for constant times. */
1856 if (int_niters_prolog >= 0)
1857 {
1858 *bound_scalar = upper_bound (int_niters_prolog + bound_epilog, th);
1859 return build_int_cst (type, *bound_scalar);
1860 }
1861 /* Peeling an unknown number of times. Note that both BOUND_PROLOG
1862 and BOUND_EPILOG are inclusive upper bounds. */
1863 if (known_ge (th, bound_prolog + bound_epilog))
1864 {
1865 *bound_scalar = th;
1866 return build_int_cst (type, th);
1867 }
1868 /* Need to do runtime comparison. */
1869 else if (maybe_gt (th, bound_epilog))
1870 {
1871 *bound_scalar = upper_bound (*bound_scalar, th);
1872 return fold_build2 (MAX_EXPR, type,
1873 build_int_cst (type, th), niters);
1874 }
1875 }
1876 return niters;
1877 }
1878
1879 /* NITERS is the number of times that the original scalar loop executes
1880 after peeling. Work out the maximum number of iterations N that can
1881 be handled by the vectorized form of the loop and then either:
1882
1883 a) set *STEP_VECTOR_PTR to the vectorization factor and generate:
1884
1885 niters_vector = N
1886
1887 b) set *STEP_VECTOR_PTR to one and generate:
1888
1889 niters_vector = N / vf
1890
1891 In both cases, store niters_vector in *NITERS_VECTOR_PTR and add
1892 any new statements on the loop preheader edge. NITERS_NO_OVERFLOW
1893 is true if NITERS doesn't overflow (i.e. if NITERS is always nonzero). */
1894
1895 void
1896 vect_gen_vector_loop_niters (loop_vec_info loop_vinfo, tree niters,
1897 tree *niters_vector_ptr, tree *step_vector_ptr,
1898 bool niters_no_overflow)
1899 {
1900 tree ni_minus_gap, var;
1901 tree niters_vector, step_vector, type = TREE_TYPE (niters);
1902 poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1903 edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo));
1904 tree log_vf = NULL_TREE;
1905
1906 /* If epilogue loop is required because of data accesses with gaps, we
1907 subtract one iteration from the total number of iterations here for
1908 correct calculation of RATIO. */
1909 if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
1910 {
1911 ni_minus_gap = fold_build2 (MINUS_EXPR, type, niters,
1912 build_one_cst (type));
1913 if (!is_gimple_val (ni_minus_gap))
1914 {
1915 var = create_tmp_var (type, "ni_gap");
1916 gimple *stmts = NULL;
1917 ni_minus_gap = force_gimple_operand (ni_minus_gap, &stmts,
1918 true, var);
1919 gsi_insert_seq_on_edge_immediate (pe, stmts);
1920 }
1921 }
1922 else
1923 ni_minus_gap = niters;
1924
1925 unsigned HOST_WIDE_INT const_vf;
1926 if (vf.is_constant (&const_vf)
1927 && !LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
1928 {
1929 /* Create: niters >> log2(vf) */
1930 /* If it's known that niters == number of latch executions + 1 doesn't
1931 overflow, we can generate niters >> log2(vf); otherwise we generate
1932 (niters - vf) >> log2(vf) + 1 by using the fact that we know ratio
1933 will be at least one. */
1934 log_vf = build_int_cst (type, exact_log2 (const_vf));
1935 if (niters_no_overflow)
1936 niters_vector = fold_build2 (RSHIFT_EXPR, type, ni_minus_gap, log_vf);
1937 else
1938 niters_vector
1939 = fold_build2 (PLUS_EXPR, type,
1940 fold_build2 (RSHIFT_EXPR, type,
1941 fold_build2 (MINUS_EXPR, type,
1942 ni_minus_gap,
1943 build_int_cst (type, vf)),
1944 log_vf),
1945 build_int_cst (type, 1));
1946 step_vector = build_one_cst (type);
1947 }
1948 else
1949 {
1950 niters_vector = ni_minus_gap;
1951 step_vector = build_int_cst (type, vf);
1952 }
1953
1954 if (!is_gimple_val (niters_vector))
1955 {
1956 var = create_tmp_var (type, "bnd");
1957 gimple_seq stmts = NULL;
1958 niters_vector = force_gimple_operand (niters_vector, &stmts, true, var);
1959 gsi_insert_seq_on_edge_immediate (pe, stmts);
1960 /* Peeling algorithm guarantees that vector loop bound is at least ONE,
1961 we set range information to make niters analyzer's life easier. */
1962 if (stmts != NULL && log_vf)
1963 set_range_info (niters_vector, VR_RANGE,
1964 wi::to_wide (build_int_cst (type, 1)),
1965 wi::to_wide (fold_build2 (RSHIFT_EXPR, type,
1966 TYPE_MAX_VALUE (type),
1967 log_vf)));
1968 }
1969 *niters_vector_ptr = niters_vector;
1970 *step_vector_ptr = step_vector;
1971
1972 return;
1973 }
1974
1975 /* Given NITERS_VECTOR which is the number of iterations for vectorized
1976 loop specified by LOOP_VINFO after vectorization, compute the number
1977 of iterations before vectorization (niters_vector * vf) and store it
1978 to NITERS_VECTOR_MULT_VF_PTR. */
1979
1980 static void
1981 vect_gen_vector_loop_niters_mult_vf (loop_vec_info loop_vinfo,
1982 tree niters_vector,
1983 tree *niters_vector_mult_vf_ptr)
1984 {
1985 /* We should be using a step_vector of VF if VF is variable. */
1986 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo).to_constant ();
1987 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1988 tree type = TREE_TYPE (niters_vector);
1989 tree log_vf = build_int_cst (type, exact_log2 (vf));
1990 basic_block exit_bb = single_exit (loop)->dest;
1991
1992 gcc_assert (niters_vector_mult_vf_ptr != NULL);
1993 tree niters_vector_mult_vf = fold_build2 (LSHIFT_EXPR, type,
1994 niters_vector, log_vf);
1995 if (!is_gimple_val (niters_vector_mult_vf))
1996 {
1997 tree var = create_tmp_var (type, "niters_vector_mult_vf");
1998 gimple_seq stmts = NULL;
1999 niters_vector_mult_vf = force_gimple_operand (niters_vector_mult_vf,
2000 &stmts, true, var);
2001 gimple_stmt_iterator gsi = gsi_start_bb (exit_bb);
2002 gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
2003 }
2004 *niters_vector_mult_vf_ptr = niters_vector_mult_vf;
2005 }
2006
2007 /* LCSSA_PHI is a lcssa phi of EPILOG loop which is copied from LOOP,
2008 this function searches for the corresponding lcssa phi node in exit
2009 bb of LOOP. If it is found, return the phi result; otherwise return
2010 NULL. */
2011
2012 static tree
2013 find_guard_arg (class loop *loop, class loop *epilog ATTRIBUTE_UNUSED,
2014 gphi *lcssa_phi)
2015 {
2016 gphi_iterator gsi;
2017 edge e = single_exit (loop);
2018
2019 gcc_assert (single_pred_p (e->dest));
2020 for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
2021 {
2022 gphi *phi = gsi.phi ();
2023 if (operand_equal_p (PHI_ARG_DEF (phi, 0),
2024 PHI_ARG_DEF (lcssa_phi, 0), 0))
2025 return PHI_RESULT (phi);
2026 }
2027 return NULL_TREE;
2028 }
2029
2030 /* Function slpeel_tree_duplicate_loop_to_edge_cfg duplciates FIRST/SECOND
2031 from SECOND/FIRST and puts it at the original loop's preheader/exit
2032 edge, the two loops are arranged as below:
2033
2034 preheader_a:
2035 first_loop:
2036 header_a:
2037 i_1 = PHI<i_0, i_2>;
2038 ...
2039 i_2 = i_1 + 1;
2040 if (cond_a)
2041 goto latch_a;
2042 else
2043 goto between_bb;
2044 latch_a:
2045 goto header_a;
2046
2047 between_bb:
2048 ;; i_x = PHI<i_2>; ;; LCSSA phi node to be created for FIRST,
2049
2050 second_loop:
2051 header_b:
2052 i_3 = PHI<i_0, i_4>; ;; Use of i_0 to be replaced with i_x,
2053 or with i_2 if no LCSSA phi is created
2054 under condition of CREATE_LCSSA_FOR_IV_PHIS.
2055 ...
2056 i_4 = i_3 + 1;
2057 if (cond_b)
2058 goto latch_b;
2059 else
2060 goto exit_bb;
2061 latch_b:
2062 goto header_b;
2063
2064 exit_bb:
2065
2066 This function creates loop closed SSA for the first loop; update the
2067 second loop's PHI nodes by replacing argument on incoming edge with the
2068 result of newly created lcssa PHI nodes. IF CREATE_LCSSA_FOR_IV_PHIS
2069 is false, Loop closed ssa phis will only be created for non-iv phis for
2070 the first loop.
2071
2072 This function assumes exit bb of the first loop is preheader bb of the
2073 second loop, i.e, between_bb in the example code. With PHIs updated,
2074 the second loop will execute rest iterations of the first. */
2075
2076 static void
2077 slpeel_update_phi_nodes_for_loops (loop_vec_info loop_vinfo,
2078 class loop *first, class loop *second,
2079 bool create_lcssa_for_iv_phis)
2080 {
2081 gphi_iterator gsi_update, gsi_orig;
2082 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2083
2084 edge first_latch_e = EDGE_SUCC (first->latch, 0);
2085 edge second_preheader_e = loop_preheader_edge (second);
2086 basic_block between_bb = single_exit (first)->dest;
2087
2088 gcc_assert (between_bb == second_preheader_e->src);
2089 gcc_assert (single_pred_p (between_bb) && single_succ_p (between_bb));
2090 /* Either the first loop or the second is the loop to be vectorized. */
2091 gcc_assert (loop == first || loop == second);
2092
2093 for (gsi_orig = gsi_start_phis (first->header),
2094 gsi_update = gsi_start_phis (second->header);
2095 !gsi_end_p (gsi_orig) && !gsi_end_p (gsi_update);
2096 gsi_next (&gsi_orig), gsi_next (&gsi_update))
2097 {
2098 gphi *orig_phi = gsi_orig.phi ();
2099 gphi *update_phi = gsi_update.phi ();
2100
2101 tree arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, first_latch_e);
2102 /* Generate lcssa PHI node for the first loop. */
2103 gphi *vect_phi = (loop == first) ? orig_phi : update_phi;
2104 stmt_vec_info vect_phi_info = loop_vinfo->lookup_stmt (vect_phi);
2105 if (create_lcssa_for_iv_phis || !iv_phi_p (vect_phi_info))
2106 {
2107 tree new_res = copy_ssa_name (PHI_RESULT (orig_phi));
2108 gphi *lcssa_phi = create_phi_node (new_res, between_bb);
2109 add_phi_arg (lcssa_phi, arg, single_exit (first), UNKNOWN_LOCATION);
2110 arg = new_res;
2111 }
2112
2113 /* Update PHI node in the second loop by replacing arg on the loop's
2114 incoming edge. */
2115 adjust_phi_and_debug_stmts (update_phi, second_preheader_e, arg);
2116 }
2117
2118 /* For epilogue peeling we have to make sure to copy all LC PHIs
2119 for correct vectorization of live stmts. */
2120 if (loop == first)
2121 {
2122 basic_block orig_exit = single_exit (second)->dest;
2123 for (gsi_orig = gsi_start_phis (orig_exit);
2124 !gsi_end_p (gsi_orig); gsi_next (&gsi_orig))
2125 {
2126 gphi *orig_phi = gsi_orig.phi ();
2127 tree orig_arg = PHI_ARG_DEF (orig_phi, 0);
2128 if (TREE_CODE (orig_arg) != SSA_NAME || virtual_operand_p (orig_arg))
2129 continue;
2130
2131 /* Already created in the above loop. */
2132 if (find_guard_arg (first, second, orig_phi))
2133 continue;
2134
2135 tree new_res = copy_ssa_name (orig_arg);
2136 gphi *lcphi = create_phi_node (new_res, between_bb);
2137 add_phi_arg (lcphi, orig_arg, single_exit (first), UNKNOWN_LOCATION);
2138 }
2139 }
2140 }
2141
2142 /* Function slpeel_add_loop_guard adds guard skipping from the beginning
2143 of SKIP_LOOP to the beginning of UPDATE_LOOP. GUARD_EDGE and MERGE_EDGE
2144 are two pred edges of the merge point before UPDATE_LOOP. The two loops
2145 appear like below:
2146
2147 guard_bb:
2148 if (cond)
2149 goto merge_bb;
2150 else
2151 goto skip_loop;
2152
2153 skip_loop:
2154 header_a:
2155 i_1 = PHI<i_0, i_2>;
2156 ...
2157 i_2 = i_1 + 1;
2158 if (cond_a)
2159 goto latch_a;
2160 else
2161 goto exit_a;
2162 latch_a:
2163 goto header_a;
2164
2165 exit_a:
2166 i_5 = PHI<i_2>;
2167
2168 merge_bb:
2169 ;; PHI (i_x = PHI<i_0, i_5>) to be created at merge point.
2170
2171 update_loop:
2172 header_b:
2173 i_3 = PHI<i_5, i_4>; ;; Use of i_5 to be replaced with i_x.
2174 ...
2175 i_4 = i_3 + 1;
2176 if (cond_b)
2177 goto latch_b;
2178 else
2179 goto exit_bb;
2180 latch_b:
2181 goto header_b;
2182
2183 exit_bb:
2184
2185 This function creates PHI nodes at merge_bb and replaces the use of i_5
2186 in the update_loop's PHI node with the result of new PHI result. */
2187
2188 static void
2189 slpeel_update_phi_nodes_for_guard1 (class loop *skip_loop,
2190 class loop *update_loop,
2191 edge guard_edge, edge merge_edge)
2192 {
2193 location_t merge_loc, guard_loc;
2194 edge orig_e = loop_preheader_edge (skip_loop);
2195 edge update_e = loop_preheader_edge (update_loop);
2196 gphi_iterator gsi_orig, gsi_update;
2197
2198 for ((gsi_orig = gsi_start_phis (skip_loop->header),
2199 gsi_update = gsi_start_phis (update_loop->header));
2200 !gsi_end_p (gsi_orig) && !gsi_end_p (gsi_update);
2201 gsi_next (&gsi_orig), gsi_next (&gsi_update))
2202 {
2203 gphi *orig_phi = gsi_orig.phi ();
2204 gphi *update_phi = gsi_update.phi ();
2205
2206 /* Generate new phi node at merge bb of the guard. */
2207 tree new_res = copy_ssa_name (PHI_RESULT (orig_phi));
2208 gphi *new_phi = create_phi_node (new_res, guard_edge->dest);
2209
2210 /* Merge bb has two incoming edges: GUARD_EDGE and MERGE_EDGE. Set the
2211 args in NEW_PHI for these edges. */
2212 tree merge_arg = PHI_ARG_DEF_FROM_EDGE (update_phi, update_e);
2213 tree guard_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, orig_e);
2214 merge_loc = gimple_phi_arg_location_from_edge (update_phi, update_e);
2215 guard_loc = gimple_phi_arg_location_from_edge (orig_phi, orig_e);
2216 add_phi_arg (new_phi, merge_arg, merge_edge, merge_loc);
2217 add_phi_arg (new_phi, guard_arg, guard_edge, guard_loc);
2218
2219 /* Update phi in UPDATE_PHI. */
2220 adjust_phi_and_debug_stmts (update_phi, update_e, new_res);
2221 }
2222 }
2223
2224 /* LOOP and EPILOG are two consecutive loops in CFG and EPILOG is copied
2225 from LOOP. Function slpeel_add_loop_guard adds guard skipping from a
2226 point between the two loops to the end of EPILOG. Edges GUARD_EDGE
2227 and MERGE_EDGE are the two pred edges of merge_bb at the end of EPILOG.
2228 The CFG looks like:
2229
2230 loop:
2231 header_a:
2232 i_1 = PHI<i_0, i_2>;
2233 ...
2234 i_2 = i_1 + 1;
2235 if (cond_a)
2236 goto latch_a;
2237 else
2238 goto exit_a;
2239 latch_a:
2240 goto header_a;
2241
2242 exit_a:
2243
2244 guard_bb:
2245 if (cond)
2246 goto merge_bb;
2247 else
2248 goto epilog_loop;
2249
2250 ;; fall_through_bb
2251
2252 epilog_loop:
2253 header_b:
2254 i_3 = PHI<i_2, i_4>;
2255 ...
2256 i_4 = i_3 + 1;
2257 if (cond_b)
2258 goto latch_b;
2259 else
2260 goto merge_bb;
2261 latch_b:
2262 goto header_b;
2263
2264 merge_bb:
2265 ; PHI node (i_y = PHI<i_2, i_4>) to be created at merge point.
2266
2267 exit_bb:
2268 i_x = PHI<i_4>; ;Use of i_4 to be replaced with i_y in merge_bb.
2269
2270 For each name used out side EPILOG (i.e - for each name that has a lcssa
2271 phi in exit_bb) we create a new PHI in merge_bb. The new PHI has two
2272 args corresponding to GUARD_EDGE and MERGE_EDGE. Arg for MERGE_EDGE is
2273 the arg of the original PHI in exit_bb, arg for GUARD_EDGE is defined
2274 by LOOP and is found in the exit bb of LOOP. Arg of the original PHI
2275 in exit_bb will also be updated. */
2276
2277 static void
2278 slpeel_update_phi_nodes_for_guard2 (class loop *loop, class loop *epilog,
2279 edge guard_edge, edge merge_edge)
2280 {
2281 gphi_iterator gsi;
2282 basic_block merge_bb = guard_edge->dest;
2283
2284 gcc_assert (single_succ_p (merge_bb));
2285 edge e = single_succ_edge (merge_bb);
2286 basic_block exit_bb = e->dest;
2287 gcc_assert (single_pred_p (exit_bb));
2288 gcc_assert (single_pred (exit_bb) == single_exit (epilog)->dest);
2289
2290 for (gsi = gsi_start_phis (exit_bb); !gsi_end_p (gsi); gsi_next (&gsi))
2291 {
2292 gphi *update_phi = gsi.phi ();
2293 tree old_arg = PHI_ARG_DEF (update_phi, 0);
2294
2295 tree merge_arg = NULL_TREE;
2296
2297 /* If the old argument is a SSA_NAME use its current_def. */
2298 if (TREE_CODE (old_arg) == SSA_NAME)
2299 merge_arg = get_current_def (old_arg);
2300 /* If it's a constant or doesn't have a current_def, just use the old
2301 argument. */
2302 if (!merge_arg)
2303 merge_arg = old_arg;
2304
2305 tree guard_arg = find_guard_arg (loop, epilog, update_phi);
2306 /* If the var is live after loop but not a reduction, we simply
2307 use the old arg. */
2308 if (!guard_arg)
2309 guard_arg = old_arg;
2310
2311 /* Create new phi node in MERGE_BB: */
2312 tree new_res = copy_ssa_name (PHI_RESULT (update_phi));
2313 gphi *merge_phi = create_phi_node (new_res, merge_bb);
2314
2315 /* MERGE_BB has two incoming edges: GUARD_EDGE and MERGE_EDGE, Set
2316 the two PHI args in merge_phi for these edges. */
2317 add_phi_arg (merge_phi, merge_arg, merge_edge, UNKNOWN_LOCATION);
2318 add_phi_arg (merge_phi, guard_arg, guard_edge, UNKNOWN_LOCATION);
2319
2320 /* Update the original phi in exit_bb. */
2321 adjust_phi_and_debug_stmts (update_phi, e, new_res);
2322 }
2323 }
2324
2325 /* EPILOG loop is duplicated from the original loop for vectorizing,
2326 the arg of its loop closed ssa PHI needs to be updated. */
2327
2328 static void
2329 slpeel_update_phi_nodes_for_lcssa (class loop *epilog)
2330 {
2331 gphi_iterator gsi;
2332 basic_block exit_bb = single_exit (epilog)->dest;
2333
2334 gcc_assert (single_pred_p (exit_bb));
2335 edge e = EDGE_PRED (exit_bb, 0);
2336 for (gsi = gsi_start_phis (exit_bb); !gsi_end_p (gsi); gsi_next (&gsi))
2337 rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (gsi.phi (), e));
2338 }
2339
2340 /* Function vect_do_peeling.
2341
2342 Input:
2343 - LOOP_VINFO: Represent a loop to be vectorized, which looks like:
2344
2345 preheader:
2346 LOOP:
2347 header_bb:
2348 loop_body
2349 if (exit_loop_cond) goto exit_bb
2350 else goto header_bb
2351 exit_bb:
2352
2353 - NITERS: The number of iterations of the loop.
2354 - NITERSM1: The number of iterations of the loop's latch.
2355 - NITERS_NO_OVERFLOW: No overflow in computing NITERS.
2356 - TH, CHECK_PROFITABILITY: Threshold of niters to vectorize loop if
2357 CHECK_PROFITABILITY is true.
2358 Output:
2359 - *NITERS_VECTOR and *STEP_VECTOR describe how the main loop should
2360 iterate after vectorization; see vect_set_loop_condition for details.
2361 - *NITERS_VECTOR_MULT_VF_VAR is either null or an SSA name that
2362 should be set to the number of scalar iterations handled by the
2363 vector loop. The SSA name is only used on exit from the loop.
2364
2365 This function peels prolog and epilog from the loop, adds guards skipping
2366 PROLOG and EPILOG for various conditions. As a result, the changed CFG
2367 would look like:
2368
2369 guard_bb_1:
2370 if (prefer_scalar_loop) goto merge_bb_1
2371 else goto guard_bb_2
2372
2373 guard_bb_2:
2374 if (skip_prolog) goto merge_bb_2
2375 else goto prolog_preheader
2376
2377 prolog_preheader:
2378 PROLOG:
2379 prolog_header_bb:
2380 prolog_body
2381 if (exit_prolog_cond) goto prolog_exit_bb
2382 else goto prolog_header_bb
2383 prolog_exit_bb:
2384
2385 merge_bb_2:
2386
2387 vector_preheader:
2388 VECTOR LOOP:
2389 vector_header_bb:
2390 vector_body
2391 if (exit_vector_cond) goto vector_exit_bb
2392 else goto vector_header_bb
2393 vector_exit_bb:
2394
2395 guard_bb_3:
2396 if (skip_epilog) goto merge_bb_3
2397 else goto epilog_preheader
2398
2399 merge_bb_1:
2400
2401 epilog_preheader:
2402 EPILOG:
2403 epilog_header_bb:
2404 epilog_body
2405 if (exit_epilog_cond) goto merge_bb_3
2406 else goto epilog_header_bb
2407
2408 merge_bb_3:
2409
2410 Note this function peels prolog and epilog only if it's necessary,
2411 as well as guards.
2412 This function returns the epilogue loop if a decision was made to vectorize
2413 it, otherwise NULL.
2414
2415 The analysis resulting in this epilogue loop's loop_vec_info was performed
2416 in the same vect_analyze_loop call as the main loop's. At that time
2417 vect_analyze_loop constructs a list of accepted loop_vec_info's for lower
2418 vectorization factors than the main loop. This list is stored in the main
2419 loop's loop_vec_info in the 'epilogue_vinfos' member. Everytime we decide to
2420 vectorize the epilogue loop for a lower vectorization factor, the
2421 loop_vec_info sitting at the top of the epilogue_vinfos list is removed,
2422 updated and linked to the epilogue loop. This is later used to vectorize
2423 the epilogue. The reason the loop_vec_info needs updating is that it was
2424 constructed based on the original main loop, and the epilogue loop is a
2425 copy of this loop, so all links pointing to statements in the original loop
2426 need updating. Furthermore, these loop_vec_infos share the
2427 data_reference's records, which will also need to be updated.
2428
2429 TODO: Guard for prefer_scalar_loop should be emitted along with
2430 versioning conditions if loop versioning is needed. */
2431
2432
2433 class loop *
2434 vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1,
2435 tree *niters_vector, tree *step_vector,
2436 tree *niters_vector_mult_vf_var, int th,
2437 bool check_profitability, bool niters_no_overflow,
2438 tree *advance, drs_init_vec &orig_drs_init)
2439 {
2440 edge e, guard_e;
2441 tree type = TREE_TYPE (niters), guard_cond;
2442 basic_block guard_bb, guard_to;
2443 profile_probability prob_prolog, prob_vector, prob_epilog;
2444 int estimated_vf;
2445 int prolog_peeling = 0;
2446 bool vect_epilogues = loop_vinfo->epilogue_vinfos.length () > 0;
2447 /* We currently do not support prolog peeling if the target alignment is not
2448 known at compile time. 'vect_gen_prolog_loop_niters' depends on the
2449 target alignment being constant. */
2450 dr_vec_info *dr_info = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
2451 if (dr_info && !DR_TARGET_ALIGNMENT (dr_info).is_constant ())
2452 return NULL;
2453
2454 if (!vect_use_loop_mask_for_alignment_p (loop_vinfo))
2455 prolog_peeling = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
2456
2457 poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2458 poly_uint64 bound_epilog = 0;
2459 if (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
2460 && LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo))
2461 bound_epilog += vf - 1;
2462 if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
2463 bound_epilog += 1;
2464 bool epilog_peeling = maybe_ne (bound_epilog, 0U);
2465 poly_uint64 bound_scalar = bound_epilog;
2466
2467 if (!prolog_peeling && !epilog_peeling)
2468 return NULL;
2469
2470 prob_vector = profile_probability::guessed_always ().apply_scale (9, 10);
2471 estimated_vf = vect_vf_for_cost (loop_vinfo);
2472 if (estimated_vf == 2)
2473 estimated_vf = 3;
2474 prob_prolog = prob_epilog = profile_probability::guessed_always ()
2475 .apply_scale (estimated_vf - 1, estimated_vf);
2476
2477 class loop *prolog, *epilog = NULL, *loop = LOOP_VINFO_LOOP (loop_vinfo);
2478 class loop *first_loop = loop;
2479 bool irred_flag = loop_preheader_edge (loop)->flags & EDGE_IRREDUCIBLE_LOOP;
2480 create_lcssa_for_virtual_phi (loop);
2481 update_ssa (TODO_update_ssa_only_virtuals);
2482
2483 if (MAY_HAVE_DEBUG_BIND_STMTS)
2484 {
2485 gcc_assert (!adjust_vec.exists ());
2486 adjust_vec.create (32);
2487 }
2488 initialize_original_copy_tables ();
2489
2490 /* Record the anchor bb at which the guard should be placed if the scalar
2491 loop might be preferred. */
2492 basic_block anchor = loop_preheader_edge (loop)->src;
2493
2494 /* Generate the number of iterations for the prolog loop. We do this here
2495 so that we can also get the upper bound on the number of iterations. */
2496 tree niters_prolog;
2497 int bound_prolog = 0;
2498 if (prolog_peeling)
2499 niters_prolog = vect_gen_prolog_loop_niters (loop_vinfo, anchor,
2500 &bound_prolog);
2501 else
2502 niters_prolog = build_int_cst (type, 0);
2503
2504 loop_vec_info epilogue_vinfo = NULL;
2505 if (vect_epilogues)
2506 {
2507 epilogue_vinfo = loop_vinfo->epilogue_vinfos[0];
2508 loop_vinfo->epilogue_vinfos.ordered_remove (0);
2509 }
2510
2511 tree niters_vector_mult_vf = NULL_TREE;
2512 /* Saving NITERs before the loop, as this may be changed by prologue. */
2513 tree before_loop_niters = LOOP_VINFO_NITERS (loop_vinfo);
2514 edge update_e = NULL, skip_e = NULL;
2515 unsigned int lowest_vf = constant_lower_bound (vf);
2516 /* If we know the number of scalar iterations for the main loop we should
2517 check whether after the main loop there are enough iterations left over
2518 for the epilogue. */
2519 if (vect_epilogues
2520 && LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
2521 && prolog_peeling >= 0
2522 && known_eq (vf, lowest_vf))
2523 {
2524 unsigned HOST_WIDE_INT eiters
2525 = (LOOP_VINFO_INT_NITERS (loop_vinfo)
2526 - LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo));
2527
2528 eiters -= prolog_peeling;
2529 eiters
2530 = eiters % lowest_vf + LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo);
2531
2532 unsigned int ratio;
2533 unsigned int epilogue_gaps
2534 = LOOP_VINFO_PEELING_FOR_GAPS (epilogue_vinfo);
2535 while (!(constant_multiple_p (loop_vinfo->vector_size,
2536 epilogue_vinfo->vector_size, &ratio)
2537 && eiters >= lowest_vf / ratio + epilogue_gaps))
2538 {
2539 delete epilogue_vinfo;
2540 epilogue_vinfo = NULL;
2541 if (loop_vinfo->epilogue_vinfos.length () == 0)
2542 {
2543 vect_epilogues = false;
2544 break;
2545 }
2546 epilogue_vinfo = loop_vinfo->epilogue_vinfos[0];
2547 loop_vinfo->epilogue_vinfos.ordered_remove (0);
2548 epilogue_gaps = LOOP_VINFO_PEELING_FOR_GAPS (epilogue_vinfo);
2549 }
2550 }
2551 /* Prolog loop may be skipped. */
2552 bool skip_prolog = (prolog_peeling != 0);
2553 /* Skip this loop to epilog when there are not enough iterations to enter this
2554 vectorized loop. If true we should perform runtime checks on the NITERS
2555 to check whether we should skip the current vectorized loop. If we know
2556 the number of scalar iterations we may choose to add a runtime check if
2557 this number "maybe" smaller than the number of iterations required
2558 when we know the number of scalar iterations may potentially
2559 be smaller than the number of iterations required to enter this loop, for
2560 this we use the upper bounds on the prolog and epilog peeling. When we
2561 don't know the number of iterations and don't require versioning it is
2562 because we have asserted that there are enough scalar iterations to enter
2563 the main loop, so this skip is not necessary. When we are versioning then
2564 we only add such a skip if we have chosen to vectorize the epilogue. */
2565 bool skip_vector = (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
2566 ? maybe_lt (LOOP_VINFO_INT_NITERS (loop_vinfo),
2567 bound_prolog + bound_epilog)
2568 : (!LOOP_REQUIRES_VERSIONING (loop_vinfo)
2569 || vect_epilogues));
2570 /* Epilog loop must be executed if the number of iterations for epilog
2571 loop is known at compile time, otherwise we need to add a check at
2572 the end of vector loop and skip to the end of epilog loop. */
2573 bool skip_epilog = (prolog_peeling < 0
2574 || !LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
2575 || !vf.is_constant ());
2576 /* PEELING_FOR_GAPS is special because epilog loop must be executed. */
2577 if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
2578 skip_epilog = false;
2579
2580 if (skip_vector)
2581 {
2582 split_edge (loop_preheader_edge (loop));
2583
2584 /* Due to the order in which we peel prolog and epilog, we first
2585 propagate probability to the whole loop. The purpose is to
2586 avoid adjusting probabilities of both prolog and vector loops
2587 separately. Note in this case, the probability of epilog loop
2588 needs to be scaled back later. */
2589 basic_block bb_before_loop = loop_preheader_edge (loop)->src;
2590 if (prob_vector.initialized_p ())
2591 {
2592 scale_bbs_frequencies (&bb_before_loop, 1, prob_vector);
2593 scale_loop_profile (loop, prob_vector, 0);
2594 }
2595 }
2596
2597 dump_user_location_t loop_loc = find_loop_location (loop);
2598 class loop *scalar_loop = LOOP_VINFO_SCALAR_LOOP (loop_vinfo);
2599 if (vect_epilogues)
2600 /* Make sure to set the epilogue's epilogue scalar loop, such that we can
2601 use the original scalar loop as remaining epilogue if necessary. */
2602 LOOP_VINFO_SCALAR_LOOP (epilogue_vinfo)
2603 = LOOP_VINFO_SCALAR_LOOP (loop_vinfo);
2604
2605 if (prolog_peeling)
2606 {
2607 e = loop_preheader_edge (loop);
2608 if (!slpeel_can_duplicate_loop_p (loop, e))
2609 {
2610 dump_printf_loc (MSG_MISSED_OPTIMIZATION, loop_loc,
2611 "loop can't be duplicated to preheader edge.\n");
2612 gcc_unreachable ();
2613 }
2614 /* Peel prolog and put it on preheader edge of loop. */
2615 prolog = slpeel_tree_duplicate_loop_to_edge_cfg (loop, scalar_loop, e);
2616 if (!prolog)
2617 {
2618 dump_printf_loc (MSG_MISSED_OPTIMIZATION, loop_loc,
2619 "slpeel_tree_duplicate_loop_to_edge_cfg failed.\n");
2620 gcc_unreachable ();
2621 }
2622 prolog->force_vectorize = false;
2623 slpeel_update_phi_nodes_for_loops (loop_vinfo, prolog, loop, true);
2624 first_loop = prolog;
2625 reset_original_copy_tables ();
2626
2627 /* Update the number of iterations for prolog loop. */
2628 tree step_prolog = build_one_cst (TREE_TYPE (niters_prolog));
2629 vect_set_loop_condition (prolog, NULL, niters_prolog,
2630 step_prolog, NULL_TREE, false);
2631
2632 /* Skip the prolog loop. */
2633 if (skip_prolog)
2634 {
2635 guard_cond = fold_build2 (EQ_EXPR, boolean_type_node,
2636 niters_prolog, build_int_cst (type, 0));
2637 guard_bb = loop_preheader_edge (prolog)->src;
2638 basic_block bb_after_prolog = loop_preheader_edge (loop)->src;
2639 guard_to = split_edge (loop_preheader_edge (loop));
2640 guard_e = slpeel_add_loop_guard (guard_bb, guard_cond,
2641 guard_to, guard_bb,
2642 prob_prolog.invert (),
2643 irred_flag);
2644 e = EDGE_PRED (guard_to, 0);
2645 e = (e != guard_e ? e : EDGE_PRED (guard_to, 1));
2646 slpeel_update_phi_nodes_for_guard1 (prolog, loop, guard_e, e);
2647
2648 scale_bbs_frequencies (&bb_after_prolog, 1, prob_prolog);
2649 scale_loop_profile (prolog, prob_prolog, bound_prolog);
2650 }
2651
2652 /* Save original inits for each data_reference before advancing them with
2653 NITERS_PROLOG. */
2654 unsigned int i;
2655 struct data_reference *dr;
2656 vec<data_reference_p> datarefs = loop_vinfo->shared->datarefs;
2657 FOR_EACH_VEC_ELT (datarefs, i, dr)
2658 orig_drs_init.safe_push (std::make_pair (dr, DR_OFFSET (dr)));
2659
2660 /* Update init address of DRs. */
2661 vect_update_inits_of_drs (loop_vinfo, niters_prolog, PLUS_EXPR);
2662 /* Update niters for vector loop. */
2663 LOOP_VINFO_NITERS (loop_vinfo)
2664 = fold_build2 (MINUS_EXPR, type, niters, niters_prolog);
2665 LOOP_VINFO_NITERSM1 (loop_vinfo)
2666 = fold_build2 (MINUS_EXPR, type,
2667 LOOP_VINFO_NITERSM1 (loop_vinfo), niters_prolog);
2668 bool new_var_p = false;
2669 niters = vect_build_loop_niters (loop_vinfo, &new_var_p);
2670 /* It's guaranteed that vector loop bound before vectorization is at
2671 least VF, so set range information for newly generated var. */
2672 if (new_var_p)
2673 set_range_info (niters, VR_RANGE,
2674 wi::to_wide (build_int_cst (type, vf)),
2675 wi::to_wide (TYPE_MAX_VALUE (type)));
2676
2677 /* Prolog iterates at most bound_prolog times, latch iterates at
2678 most bound_prolog - 1 times. */
2679 record_niter_bound (prolog, bound_prolog - 1, false, true);
2680 delete_update_ssa ();
2681 adjust_vec_debug_stmts ();
2682 scev_reset ();
2683 }
2684
2685 if (epilog_peeling)
2686 {
2687 e = single_exit (loop);
2688 if (!slpeel_can_duplicate_loop_p (loop, e))
2689 {
2690 dump_printf_loc (MSG_MISSED_OPTIMIZATION, loop_loc,
2691 "loop can't be duplicated to exit edge.\n");
2692 gcc_unreachable ();
2693 }
2694 /* Peel epilog and put it on exit edge of loop. If we are vectorizing
2695 said epilog then we should use a copy of the main loop as a starting
2696 point. This loop may have already had some preliminary transformations
2697 to allow for more optimal vectorization, for example if-conversion.
2698 If we are not vectorizing the epilog then we should use the scalar loop
2699 as the transformations mentioned above make less or no sense when not
2700 vectorizing. */
2701 epilog = vect_epilogues ? get_loop_copy (loop) : scalar_loop;
2702 epilog = slpeel_tree_duplicate_loop_to_edge_cfg (loop, epilog, e);
2703 if (!epilog)
2704 {
2705 dump_printf_loc (MSG_MISSED_OPTIMIZATION, loop_loc,
2706 "slpeel_tree_duplicate_loop_to_edge_cfg failed.\n");
2707 gcc_unreachable ();
2708 }
2709 epilog->force_vectorize = false;
2710 slpeel_update_phi_nodes_for_loops (loop_vinfo, loop, epilog, false);
2711
2712 /* Scalar version loop may be preferred. In this case, add guard
2713 and skip to epilog. Note this only happens when the number of
2714 iterations of loop is unknown at compile time, otherwise this
2715 won't be vectorized. */
2716 if (skip_vector)
2717 {
2718 /* Additional epilogue iteration is peeled if gap exists. */
2719 tree t = vect_gen_scalar_loop_niters (niters_prolog, prolog_peeling,
2720 bound_prolog, bound_epilog,
2721 th, &bound_scalar,
2722 check_profitability);
2723 /* Build guard against NITERSM1 since NITERS may overflow. */
2724 guard_cond = fold_build2 (LT_EXPR, boolean_type_node, nitersm1, t);
2725 guard_bb = anchor;
2726 guard_to = split_edge (loop_preheader_edge (epilog));
2727 guard_e = slpeel_add_loop_guard (guard_bb, guard_cond,
2728 guard_to, guard_bb,
2729 prob_vector.invert (),
2730 irred_flag);
2731 skip_e = guard_e;
2732 e = EDGE_PRED (guard_to, 0);
2733 e = (e != guard_e ? e : EDGE_PRED (guard_to, 1));
2734 slpeel_update_phi_nodes_for_guard1 (first_loop, epilog, guard_e, e);
2735
2736 /* Simply propagate profile info from guard_bb to guard_to which is
2737 a merge point of control flow. */
2738 guard_to->count = guard_bb->count;
2739
2740 /* Scale probability of epilog loop back.
2741 FIXME: We should avoid scaling down and back up. Profile may
2742 get lost if we scale down to 0. */
2743 basic_block *bbs = get_loop_body (epilog);
2744 for (unsigned int i = 0; i < epilog->num_nodes; i++)
2745 bbs[i]->count = bbs[i]->count.apply_scale
2746 (bbs[i]->count,
2747 bbs[i]->count.apply_probability
2748 (prob_vector));
2749 free (bbs);
2750 }
2751
2752 basic_block bb_before_epilog = loop_preheader_edge (epilog)->src;
2753 /* If loop is peeled for non-zero constant times, now niters refers to
2754 orig_niters - prolog_peeling, it won't overflow even the orig_niters
2755 overflows. */
2756 niters_no_overflow |= (prolog_peeling > 0);
2757 vect_gen_vector_loop_niters (loop_vinfo, niters,
2758 niters_vector, step_vector,
2759 niters_no_overflow);
2760 if (!integer_onep (*step_vector))
2761 {
2762 /* On exit from the loop we will have an easy way of calcalating
2763 NITERS_VECTOR / STEP * STEP. Install a dummy definition
2764 until then. */
2765 niters_vector_mult_vf = make_ssa_name (TREE_TYPE (*niters_vector));
2766 SSA_NAME_DEF_STMT (niters_vector_mult_vf) = gimple_build_nop ();
2767 *niters_vector_mult_vf_var = niters_vector_mult_vf;
2768 }
2769 else
2770 vect_gen_vector_loop_niters_mult_vf (loop_vinfo, *niters_vector,
2771 &niters_vector_mult_vf);
2772 /* Update IVs of original loop as if they were advanced by
2773 niters_vector_mult_vf steps. */
2774 gcc_checking_assert (vect_can_advance_ivs_p (loop_vinfo));
2775 update_e = skip_vector ? e : loop_preheader_edge (epilog);
2776 vect_update_ivs_after_vectorizer (loop_vinfo, niters_vector_mult_vf,
2777 update_e);
2778
2779 if (skip_epilog)
2780 {
2781 guard_cond = fold_build2 (EQ_EXPR, boolean_type_node,
2782 niters, niters_vector_mult_vf);
2783 guard_bb = single_exit (loop)->dest;
2784 guard_to = split_edge (single_exit (epilog));
2785 guard_e = slpeel_add_loop_guard (guard_bb, guard_cond, guard_to,
2786 skip_vector ? anchor : guard_bb,
2787 prob_epilog.invert (),
2788 irred_flag);
2789 slpeel_update_phi_nodes_for_guard2 (loop, epilog, guard_e,
2790 single_exit (epilog));
2791 /* Only need to handle basic block before epilog loop if it's not
2792 the guard_bb, which is the case when skip_vector is true. */
2793 if (guard_bb != bb_before_epilog)
2794 {
2795 prob_epilog = prob_vector * prob_epilog + prob_vector.invert ();
2796
2797 scale_bbs_frequencies (&bb_before_epilog, 1, prob_epilog);
2798 }
2799 scale_loop_profile (epilog, prob_epilog, 0);
2800 }
2801 else
2802 slpeel_update_phi_nodes_for_lcssa (epilog);
2803
2804 unsigned HOST_WIDE_INT bound;
2805 if (bound_scalar.is_constant (&bound))
2806 {
2807 gcc_assert (bound != 0);
2808 /* -1 to convert loop iterations to latch iterations. */
2809 record_niter_bound (epilog, bound - 1, false, true);
2810 }
2811
2812 delete_update_ssa ();
2813 adjust_vec_debug_stmts ();
2814 scev_reset ();
2815 }
2816
2817 if (vect_epilogues)
2818 {
2819 epilog->aux = epilogue_vinfo;
2820 LOOP_VINFO_LOOP (epilogue_vinfo) = epilog;
2821
2822 loop_constraint_clear (epilog, LOOP_C_INFINITE);
2823
2824 /* We now must calculate the number of NITERS performed by the previous
2825 loop and EPILOGUE_NITERS to be performed by the epilogue. */
2826 tree niters = fold_build2 (PLUS_EXPR, TREE_TYPE (niters_vector_mult_vf),
2827 niters_prolog, niters_vector_mult_vf);
2828
2829 /* If skip_vector we may skip the previous loop, we insert a phi-node to
2830 determine whether we are coming from the previous vectorized loop
2831 using the update_e edge or the skip_vector basic block using the
2832 skip_e edge. */
2833 if (skip_vector)
2834 {
2835 gcc_assert (update_e != NULL && skip_e != NULL);
2836 gphi *new_phi = create_phi_node (make_ssa_name (TREE_TYPE (niters)),
2837 update_e->dest);
2838 tree new_ssa = make_ssa_name (TREE_TYPE (niters));
2839 gimple *stmt = gimple_build_assign (new_ssa, niters);
2840 gimple_stmt_iterator gsi;
2841 if (TREE_CODE (niters_vector_mult_vf) == SSA_NAME
2842 && SSA_NAME_DEF_STMT (niters_vector_mult_vf)->bb != NULL)
2843 {
2844 gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (niters_vector_mult_vf));
2845 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
2846 }
2847 else
2848 {
2849 gsi = gsi_last_bb (update_e->src);
2850 gsi_insert_before (&gsi, stmt, GSI_NEW_STMT);
2851 }
2852
2853 niters = new_ssa;
2854 add_phi_arg (new_phi, niters, update_e, UNKNOWN_LOCATION);
2855 add_phi_arg (new_phi, build_zero_cst (TREE_TYPE (niters)), skip_e,
2856 UNKNOWN_LOCATION);
2857 niters = PHI_RESULT (new_phi);
2858 }
2859
2860 /* Subtract the number of iterations performed by the vectorized loop
2861 from the number of total iterations. */
2862 tree epilogue_niters = fold_build2 (MINUS_EXPR, TREE_TYPE (niters),
2863 before_loop_niters,
2864 niters);
2865
2866 LOOP_VINFO_NITERS (epilogue_vinfo) = epilogue_niters;
2867 LOOP_VINFO_NITERSM1 (epilogue_vinfo)
2868 = fold_build2 (MINUS_EXPR, TREE_TYPE (epilogue_niters),
2869 epilogue_niters,
2870 build_one_cst (TREE_TYPE (epilogue_niters)));
2871
2872 /* Set ADVANCE to the number of iterations performed by the previous
2873 loop and its prologue. */
2874 *advance = niters;
2875
2876 /* Redo the peeling for niter analysis as the NITERs and alignment
2877 may have been updated to take the main loop into account. */
2878 determine_peel_for_niter (epilogue_vinfo);
2879 }
2880
2881 adjust_vec.release ();
2882 free_original_copy_tables ();
2883
2884 return vect_epilogues ? epilog : NULL;
2885 }
2886
2887 /* Function vect_create_cond_for_niters_checks.
2888
2889 Create a conditional expression that represents the run-time checks for
2890 loop's niter. The loop is guaranteed to terminate if the run-time
2891 checks hold.
2892
2893 Input:
2894 COND_EXPR - input conditional expression. New conditions will be chained
2895 with logical AND operation. If it is NULL, then the function
2896 is used to return the number of alias checks.
2897 LOOP_VINFO - field LOOP_VINFO_MAY_ALIAS_STMTS contains the list of ddrs
2898 to be checked.
2899
2900 Output:
2901 COND_EXPR - conditional expression.
2902
2903 The returned COND_EXPR is the conditional expression to be used in the
2904 if statement that controls which version of the loop gets executed at
2905 runtime. */
2906
2907 static void
2908 vect_create_cond_for_niters_checks (loop_vec_info loop_vinfo, tree *cond_expr)
2909 {
2910 tree part_cond_expr = LOOP_VINFO_NITERS_ASSUMPTIONS (loop_vinfo);
2911
2912 if (*cond_expr)
2913 *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2914 *cond_expr, part_cond_expr);
2915 else
2916 *cond_expr = part_cond_expr;
2917 }
2918
2919 /* Set *COND_EXPR to a tree that is true when both the original *COND_EXPR
2920 and PART_COND_EXPR are true. Treat a null *COND_EXPR as "true". */
2921
2922 static void
2923 chain_cond_expr (tree *cond_expr, tree part_cond_expr)
2924 {
2925 if (*cond_expr)
2926 *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2927 *cond_expr, part_cond_expr);
2928 else
2929 *cond_expr = part_cond_expr;
2930 }
2931
2932 /* Function vect_create_cond_for_align_checks.
2933
2934 Create a conditional expression that represents the alignment checks for
2935 all of data references (array element references) whose alignment must be
2936 checked at runtime.
2937
2938 Input:
2939 COND_EXPR - input conditional expression. New conditions will be chained
2940 with logical AND operation.
2941 LOOP_VINFO - two fields of the loop information are used.
2942 LOOP_VINFO_PTR_MASK is the mask used to check the alignment.
2943 LOOP_VINFO_MAY_MISALIGN_STMTS contains the refs to be checked.
2944
2945 Output:
2946 COND_EXPR_STMT_LIST - statements needed to construct the conditional
2947 expression.
2948 The returned value is the conditional expression to be used in the if
2949 statement that controls which version of the loop gets executed at runtime.
2950
2951 The algorithm makes two assumptions:
2952 1) The number of bytes "n" in a vector is a power of 2.
2953 2) An address "a" is aligned if a%n is zero and that this
2954 test can be done as a&(n-1) == 0. For example, for 16
2955 byte vectors the test is a&0xf == 0. */
2956
2957 static void
2958 vect_create_cond_for_align_checks (loop_vec_info loop_vinfo,
2959 tree *cond_expr,
2960 gimple_seq *cond_expr_stmt_list)
2961 {
2962 vec<stmt_vec_info> may_misalign_stmts
2963 = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
2964 stmt_vec_info stmt_info;
2965 int mask = LOOP_VINFO_PTR_MASK (loop_vinfo);
2966 tree mask_cst;
2967 unsigned int i;
2968 tree int_ptrsize_type;
2969 char tmp_name[20];
2970 tree or_tmp_name = NULL_TREE;
2971 tree and_tmp_name;
2972 gimple *and_stmt;
2973 tree ptrsize_zero;
2974 tree part_cond_expr;
2975
2976 /* Check that mask is one less than a power of 2, i.e., mask is
2977 all zeros followed by all ones. */
2978 gcc_assert ((mask != 0) && ((mask & (mask+1)) == 0));
2979
2980 int_ptrsize_type = signed_type_for (ptr_type_node);
2981
2982 /* Create expression (mask & (dr_1 || ... || dr_n)) where dr_i is the address
2983 of the first vector of the i'th data reference. */
2984
2985 FOR_EACH_VEC_ELT (may_misalign_stmts, i, stmt_info)
2986 {
2987 gimple_seq new_stmt_list = NULL;
2988 tree addr_base;
2989 tree addr_tmp_name;
2990 tree new_or_tmp_name;
2991 gimple *addr_stmt, *or_stmt;
2992 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2993 bool negative = tree_int_cst_compare
2994 (DR_STEP (STMT_VINFO_DATA_REF (stmt_info)), size_zero_node) < 0;
2995 tree offset = negative
2996 ? size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1) : size_zero_node;
2997
2998 /* create: addr_tmp = (int)(address_of_first_vector) */
2999 addr_base =
3000 vect_create_addr_base_for_vector_ref (stmt_info, &new_stmt_list,
3001 offset);
3002 if (new_stmt_list != NULL)
3003 gimple_seq_add_seq (cond_expr_stmt_list, new_stmt_list);
3004
3005 sprintf (tmp_name, "addr2int%d", i);
3006 addr_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, tmp_name);
3007 addr_stmt = gimple_build_assign (addr_tmp_name, NOP_EXPR, addr_base);
3008 gimple_seq_add_stmt (cond_expr_stmt_list, addr_stmt);
3009
3010 /* The addresses are OR together. */
3011
3012 if (or_tmp_name != NULL_TREE)
3013 {
3014 /* create: or_tmp = or_tmp | addr_tmp */
3015 sprintf (tmp_name, "orptrs%d", i);
3016 new_or_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, tmp_name);
3017 or_stmt = gimple_build_assign (new_or_tmp_name, BIT_IOR_EXPR,
3018 or_tmp_name, addr_tmp_name);
3019 gimple_seq_add_stmt (cond_expr_stmt_list, or_stmt);
3020 or_tmp_name = new_or_tmp_name;
3021 }
3022 else
3023 or_tmp_name = addr_tmp_name;
3024
3025 } /* end for i */
3026
3027 mask_cst = build_int_cst (int_ptrsize_type, mask);
3028
3029 /* create: and_tmp = or_tmp & mask */
3030 and_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, "andmask");
3031
3032 and_stmt = gimple_build_assign (and_tmp_name, BIT_AND_EXPR,
3033 or_tmp_name, mask_cst);
3034 gimple_seq_add_stmt (cond_expr_stmt_list, and_stmt);
3035
3036 /* Make and_tmp the left operand of the conditional test against zero.
3037 if and_tmp has a nonzero bit then some address is unaligned. */
3038 ptrsize_zero = build_int_cst (int_ptrsize_type, 0);
3039 part_cond_expr = fold_build2 (EQ_EXPR, boolean_type_node,
3040 and_tmp_name, ptrsize_zero);
3041 chain_cond_expr (cond_expr, part_cond_expr);
3042 }
3043
3044 /* If LOOP_VINFO_CHECK_UNEQUAL_ADDRS contains <A1, B1>, ..., <An, Bn>,
3045 create a tree representation of: (&A1 != &B1) && ... && (&An != &Bn).
3046 Set *COND_EXPR to a tree that is true when both the original *COND_EXPR
3047 and this new condition are true. Treat a null *COND_EXPR as "true". */
3048
3049 static void
3050 vect_create_cond_for_unequal_addrs (loop_vec_info loop_vinfo, tree *cond_expr)
3051 {
3052 vec<vec_object_pair> pairs = LOOP_VINFO_CHECK_UNEQUAL_ADDRS (loop_vinfo);
3053 unsigned int i;
3054 vec_object_pair *pair;
3055 FOR_EACH_VEC_ELT (pairs, i, pair)
3056 {
3057 tree addr1 = build_fold_addr_expr (pair->first);
3058 tree addr2 = build_fold_addr_expr (pair->second);
3059 tree part_cond_expr = fold_build2 (NE_EXPR, boolean_type_node,
3060 addr1, addr2);
3061 chain_cond_expr (cond_expr, part_cond_expr);
3062 }
3063 }
3064
3065 /* Create an expression that is true when all lower-bound conditions for
3066 the vectorized loop are met. Chain this condition with *COND_EXPR. */
3067
3068 static void
3069 vect_create_cond_for_lower_bounds (loop_vec_info loop_vinfo, tree *cond_expr)
3070 {
3071 vec<vec_lower_bound> lower_bounds = LOOP_VINFO_LOWER_BOUNDS (loop_vinfo);
3072 for (unsigned int i = 0; i < lower_bounds.length (); ++i)
3073 {
3074 tree expr = lower_bounds[i].expr;
3075 tree type = unsigned_type_for (TREE_TYPE (expr));
3076 expr = fold_convert (type, expr);
3077 poly_uint64 bound = lower_bounds[i].min_value;
3078 if (!lower_bounds[i].unsigned_p)
3079 {
3080 expr = fold_build2 (PLUS_EXPR, type, expr,
3081 build_int_cstu (type, bound - 1));
3082 bound += bound - 1;
3083 }
3084 tree part_cond_expr = fold_build2 (GE_EXPR, boolean_type_node, expr,
3085 build_int_cstu (type, bound));
3086 chain_cond_expr (cond_expr, part_cond_expr);
3087 }
3088 }
3089
3090 /* Function vect_create_cond_for_alias_checks.
3091
3092 Create a conditional expression that represents the run-time checks for
3093 overlapping of address ranges represented by a list of data references
3094 relations passed as input.
3095
3096 Input:
3097 COND_EXPR - input conditional expression. New conditions will be chained
3098 with logical AND operation. If it is NULL, then the function
3099 is used to return the number of alias checks.
3100 LOOP_VINFO - field LOOP_VINFO_MAY_ALIAS_STMTS contains the list of ddrs
3101 to be checked.
3102
3103 Output:
3104 COND_EXPR - conditional expression.
3105
3106 The returned COND_EXPR is the conditional expression to be used in the if
3107 statement that controls which version of the loop gets executed at runtime.
3108 */
3109
3110 void
3111 vect_create_cond_for_alias_checks (loop_vec_info loop_vinfo, tree * cond_expr)
3112 {
3113 vec<dr_with_seg_len_pair_t> comp_alias_ddrs =
3114 LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo);
3115
3116 if (comp_alias_ddrs.is_empty ())
3117 return;
3118
3119 create_runtime_alias_checks (LOOP_VINFO_LOOP (loop_vinfo),
3120 &comp_alias_ddrs, cond_expr);
3121 if (dump_enabled_p ())
3122 dump_printf_loc (MSG_NOTE, vect_location,
3123 "created %u versioning for alias checks.\n",
3124 comp_alias_ddrs.length ());
3125 }
3126
3127
3128 /* Function vect_loop_versioning.
3129
3130 If the loop has data references that may or may not be aligned or/and
3131 has data reference relations whose independence was not proven then
3132 two versions of the loop need to be generated, one which is vectorized
3133 and one which isn't. A test is then generated to control which of the
3134 loops is executed. The test checks for the alignment of all of the
3135 data references that may or may not be aligned. An additional
3136 sequence of runtime tests is generated for each pairs of DDRs whose
3137 independence was not proven. The vectorized version of loop is
3138 executed only if both alias and alignment tests are passed.
3139
3140 The test generated to check which version of loop is executed
3141 is modified to also check for profitability as indicated by the
3142 cost model threshold TH.
3143
3144 The versioning precondition(s) are placed in *COND_EXPR and
3145 *COND_EXPR_STMT_LIST. */
3146
3147 class loop *
3148 vect_loop_versioning (loop_vec_info loop_vinfo)
3149 {
3150 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo), *nloop;
3151 class loop *scalar_loop = LOOP_VINFO_SCALAR_LOOP (loop_vinfo);
3152 basic_block condition_bb;
3153 gphi_iterator gsi;
3154 gimple_stmt_iterator cond_exp_gsi;
3155 basic_block merge_bb;
3156 basic_block new_exit_bb;
3157 edge new_exit_e, e;
3158 gphi *orig_phi, *new_phi;
3159 tree cond_expr = NULL_TREE;
3160 gimple_seq cond_expr_stmt_list = NULL;
3161 tree arg;
3162 profile_probability prob = profile_probability::likely ();
3163 gimple_seq gimplify_stmt_list = NULL;
3164 tree scalar_loop_iters = LOOP_VINFO_NITERSM1 (loop_vinfo);
3165 bool version_align = LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo);
3166 bool version_alias = LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo);
3167 bool version_niter = LOOP_REQUIRES_VERSIONING_FOR_NITERS (loop_vinfo);
3168 poly_uint64 versioning_threshold
3169 = LOOP_VINFO_VERSIONING_THRESHOLD (loop_vinfo);
3170 tree version_simd_if_cond
3171 = LOOP_REQUIRES_VERSIONING_FOR_SIMD_IF_COND (loop_vinfo);
3172 unsigned th = LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo);
3173
3174 if (th >= vect_vf_for_cost (loop_vinfo)
3175 && !LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
3176 && !ordered_p (th, versioning_threshold))
3177 cond_expr = fold_build2 (GE_EXPR, boolean_type_node, scalar_loop_iters,
3178 build_int_cst (TREE_TYPE (scalar_loop_iters),
3179 th - 1));
3180 if (maybe_ne (versioning_threshold, 0U))
3181 {
3182 tree expr = fold_build2 (GE_EXPR, boolean_type_node, scalar_loop_iters,
3183 build_int_cst (TREE_TYPE (scalar_loop_iters),
3184 versioning_threshold - 1));
3185 if (cond_expr)
3186 cond_expr = fold_build2 (BIT_AND_EXPR, boolean_type_node,
3187 expr, cond_expr);
3188 else
3189 cond_expr = expr;
3190 }
3191
3192 if (version_niter)
3193 vect_create_cond_for_niters_checks (loop_vinfo, &cond_expr);
3194
3195 if (cond_expr)
3196 cond_expr = force_gimple_operand_1 (unshare_expr (cond_expr),
3197 &cond_expr_stmt_list,
3198 is_gimple_condexpr, NULL_TREE);
3199
3200 if (version_align)
3201 vect_create_cond_for_align_checks (loop_vinfo, &cond_expr,
3202 &cond_expr_stmt_list);
3203
3204 if (version_alias)
3205 {
3206 vect_create_cond_for_unequal_addrs (loop_vinfo, &cond_expr);
3207 vect_create_cond_for_lower_bounds (loop_vinfo, &cond_expr);
3208 vect_create_cond_for_alias_checks (loop_vinfo, &cond_expr);
3209 }
3210
3211 if (version_simd_if_cond)
3212 {
3213 gcc_assert (dom_info_available_p (CDI_DOMINATORS));
3214 if (flag_checking)
3215 if (basic_block bb
3216 = gimple_bb (SSA_NAME_DEF_STMT (version_simd_if_cond)))
3217 gcc_assert (bb != loop->header
3218 && dominated_by_p (CDI_DOMINATORS, loop->header, bb)
3219 && (scalar_loop == NULL
3220 || (bb != scalar_loop->header
3221 && dominated_by_p (CDI_DOMINATORS,
3222 scalar_loop->header, bb))));
3223 tree zero = build_zero_cst (TREE_TYPE (version_simd_if_cond));
3224 tree c = fold_build2 (NE_EXPR, boolean_type_node,
3225 version_simd_if_cond, zero);
3226 if (cond_expr)
3227 cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
3228 c, cond_expr);
3229 else
3230 cond_expr = c;
3231 if (dump_enabled_p ())
3232 dump_printf_loc (MSG_NOTE, vect_location,
3233 "created versioning for simd if condition check.\n");
3234 }
3235
3236 cond_expr = force_gimple_operand_1 (unshare_expr (cond_expr),
3237 &gimplify_stmt_list,
3238 is_gimple_condexpr, NULL_TREE);
3239 gimple_seq_add_seq (&cond_expr_stmt_list, gimplify_stmt_list);
3240
3241 /* Compute the outermost loop cond_expr and cond_expr_stmt_list are
3242 invariant in. */
3243 class loop *outermost = outermost_invariant_loop_for_expr (loop, cond_expr);
3244 for (gimple_stmt_iterator gsi = gsi_start (cond_expr_stmt_list);
3245 !gsi_end_p (gsi); gsi_next (&gsi))
3246 {
3247 gimple *stmt = gsi_stmt (gsi);
3248 update_stmt (stmt);
3249 ssa_op_iter iter;
3250 use_operand_p use_p;
3251 basic_block def_bb;
3252 FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_USE)
3253 if ((def_bb = gimple_bb (SSA_NAME_DEF_STMT (USE_FROM_PTR (use_p))))
3254 && flow_bb_inside_loop_p (outermost, def_bb))
3255 outermost = superloop_at_depth (loop, bb_loop_depth (def_bb) + 1);
3256 }
3257
3258 /* Search for the outermost loop we can version. Avoid versioning of
3259 non-perfect nests but allow if-conversion versioned loops inside. */
3260 class loop *loop_to_version = loop;
3261 if (flow_loop_nested_p (outermost, loop))
3262 {
3263 if (dump_enabled_p ())
3264 dump_printf_loc (MSG_NOTE, vect_location,
3265 "trying to apply versioning to outer loop %d\n",
3266 outermost->num);
3267 if (outermost->num == 0)
3268 outermost = superloop_at_depth (loop, 1);
3269 /* And avoid applying versioning on non-perfect nests. */
3270 while (loop_to_version != outermost
3271 && single_exit (loop_outer (loop_to_version))
3272 && (!loop_outer (loop_to_version)->inner->next
3273 || vect_loop_vectorized_call (loop_to_version))
3274 && (!loop_outer (loop_to_version)->inner->next
3275 || !loop_outer (loop_to_version)->inner->next->next))
3276 loop_to_version = loop_outer (loop_to_version);
3277 }
3278
3279 /* Apply versioning. If there is already a scalar version created by
3280 if-conversion re-use that. Note we cannot re-use the copy of
3281 an if-converted outer-loop when vectorizing the inner loop only. */
3282 gcond *cond;
3283 gimple *call;
3284 if ((!loop_to_version->inner || loop == loop_to_version)
3285 && (call = vect_loop_vectorized_call (loop_to_version, &cond)))
3286 {
3287 gcc_assert (scalar_loop);
3288 condition_bb = gimple_bb (cond);
3289 gimple_cond_set_condition_from_tree (cond, cond_expr);
3290 update_stmt (cond);
3291
3292 if (cond_expr_stmt_list)
3293 {
3294 cond_exp_gsi = gsi_for_stmt (call);
3295 gsi_insert_seq_before (&cond_exp_gsi, cond_expr_stmt_list,
3296 GSI_SAME_STMT);
3297 }
3298
3299 /* if-conversion uses profile_probability::always () for both paths,
3300 reset the paths probabilities appropriately. */
3301 edge te, fe;
3302 extract_true_false_edges_from_block (condition_bb, &te, &fe);
3303 te->probability = prob;
3304 fe->probability = prob.invert ();
3305 /* We can scale loops counts immediately but have to postpone
3306 scaling the scalar loop because we re-use it during peeling. */
3307 scale_loop_frequencies (loop_to_version, te->probability);
3308 LOOP_VINFO_SCALAR_LOOP_SCALING (loop_vinfo) = fe->probability;
3309
3310 nloop = scalar_loop;
3311 if (dump_enabled_p ())
3312 dump_printf_loc (MSG_NOTE, vect_location,
3313 "reusing %sloop version created by if conversion\n",
3314 loop_to_version != loop ? "outer " : "");
3315 }
3316 else
3317 {
3318 if (loop_to_version != loop
3319 && dump_enabled_p ())
3320 dump_printf_loc (MSG_NOTE, vect_location,
3321 "applying loop versioning to outer loop %d\n",
3322 loop_to_version->num);
3323
3324 initialize_original_copy_tables ();
3325 nloop = loop_version (loop_to_version, cond_expr, &condition_bb,
3326 prob, prob.invert (), prob, prob.invert (), true);
3327 gcc_assert (nloop);
3328 nloop = get_loop_copy (loop);
3329
3330 /* Kill off IFN_LOOP_VECTORIZED_CALL in the copy, nobody will
3331 reap those otherwise; they also refer to the original
3332 loops. */
3333 class loop *l = loop;
3334 while (gimple *call = vect_loop_vectorized_call (l))
3335 {
3336 call = SSA_NAME_DEF_STMT (get_current_def (gimple_call_lhs (call)));
3337 fold_loop_internal_call (call, boolean_false_node);
3338 l = loop_outer (l);
3339 }
3340 free_original_copy_tables ();
3341
3342 if (cond_expr_stmt_list)
3343 {
3344 cond_exp_gsi = gsi_last_bb (condition_bb);
3345 gsi_insert_seq_before (&cond_exp_gsi, cond_expr_stmt_list,
3346 GSI_SAME_STMT);
3347 }
3348
3349 /* Loop versioning violates an assumption we try to maintain during
3350 vectorization - that the loop exit block has a single predecessor.
3351 After versioning, the exit block of both loop versions is the same
3352 basic block (i.e. it has two predecessors). Just in order to simplify
3353 following transformations in the vectorizer, we fix this situation
3354 here by adding a new (empty) block on the exit-edge of the loop,
3355 with the proper loop-exit phis to maintain loop-closed-form.
3356 If loop versioning wasn't done from loop, but scalar_loop instead,
3357 merge_bb will have already just a single successor. */
3358
3359 merge_bb = single_exit (loop_to_version)->dest;
3360 if (EDGE_COUNT (merge_bb->preds) >= 2)
3361 {
3362 gcc_assert (EDGE_COUNT (merge_bb->preds) >= 2);
3363 new_exit_bb = split_edge (single_exit (loop_to_version));
3364 new_exit_e = single_exit (loop_to_version);
3365 e = EDGE_SUCC (new_exit_bb, 0);
3366
3367 for (gsi = gsi_start_phis (merge_bb); !gsi_end_p (gsi);
3368 gsi_next (&gsi))
3369 {
3370 tree new_res;
3371 orig_phi = gsi.phi ();
3372 new_res = copy_ssa_name (PHI_RESULT (orig_phi));
3373 new_phi = create_phi_node (new_res, new_exit_bb);
3374 arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
3375 add_phi_arg (new_phi, arg, new_exit_e,
3376 gimple_phi_arg_location_from_edge (orig_phi, e));
3377 adjust_phi_and_debug_stmts (orig_phi, e, PHI_RESULT (new_phi));
3378 }
3379 }
3380
3381 update_ssa (TODO_update_ssa);
3382 }
3383
3384 if (version_niter)
3385 {
3386 /* The versioned loop could be infinite, we need to clear existing
3387 niter information which is copied from the original loop. */
3388 gcc_assert (loop_constraint_set_p (loop, LOOP_C_FINITE));
3389 vect_free_loop_info_assumptions (nloop);
3390 /* And set constraint LOOP_C_INFINITE for niter analyzer. */
3391 loop_constraint_set (loop, LOOP_C_INFINITE);
3392 }
3393
3394 if (LOCATION_LOCUS (vect_location.get_location_t ()) != UNKNOWN_LOCATION
3395 && dump_enabled_p ())
3396 {
3397 if (version_alias)
3398 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS | MSG_PRIORITY_USER_FACING,
3399 vect_location,
3400 "loop versioned for vectorization because of "
3401 "possible aliasing\n");
3402 if (version_align)
3403 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS | MSG_PRIORITY_USER_FACING,
3404 vect_location,
3405 "loop versioned for vectorization to enhance "
3406 "alignment\n");
3407
3408 }
3409
3410 return nloop;
3411 }