cond.md (stzx_16): Use register_operand for operand 0.
[gcc.git] / gcc / tree-vect-loop-manip.c
1 /* Vectorizer Specific Loop Manipulations
2 Copyright (C) 2003-2013 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
12
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "dumpfile.h"
26 #include "tm.h"
27 #include "ggc.h"
28 #include "tree.h"
29 #include "basic-block.h"
30 #include "gimple-pretty-print.h"
31 #include "gimple.h"
32 #include "gimplify.h"
33 #include "gimple-iterator.h"
34 #include "gimplify-me.h"
35 #include "gimple-ssa.h"
36 #include "tree-cfg.h"
37 #include "tree-phinodes.h"
38 #include "ssa-iterators.h"
39 #include "stringpool.h"
40 #include "tree-ssanames.h"
41 #include "tree-ssa-loop-manip.h"
42 #include "tree-into-ssa.h"
43 #include "tree-ssa.h"
44 #include "tree-pass.h"
45 #include "cfgloop.h"
46 #include "diagnostic-core.h"
47 #include "tree-scalar-evolution.h"
48 #include "tree-vectorizer.h"
49 #include "langhooks.h"
50
51 /*************************************************************************
52 Simple Loop Peeling Utilities
53
54 Utilities to support loop peeling for vectorization purposes.
55 *************************************************************************/
56
57
58 /* Renames the use *OP_P. */
59
60 static void
61 rename_use_op (use_operand_p op_p)
62 {
63 tree new_name;
64
65 if (TREE_CODE (USE_FROM_PTR (op_p)) != SSA_NAME)
66 return;
67
68 new_name = get_current_def (USE_FROM_PTR (op_p));
69
70 /* Something defined outside of the loop. */
71 if (!new_name)
72 return;
73
74 /* An ordinary ssa name defined in the loop. */
75
76 SET_USE (op_p, new_name);
77 }
78
79
80 /* Renames the variables in basic block BB. */
81
82 static void
83 rename_variables_in_bb (basic_block bb)
84 {
85 gimple_stmt_iterator gsi;
86 gimple stmt;
87 use_operand_p use_p;
88 ssa_op_iter iter;
89 edge e;
90 edge_iterator ei;
91 struct loop *loop = bb->loop_father;
92
93 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
94 {
95 stmt = gsi_stmt (gsi);
96 FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_ALL_USES)
97 rename_use_op (use_p);
98 }
99
100 FOR_EACH_EDGE (e, ei, bb->preds)
101 {
102 if (!flow_bb_inside_loop_p (loop, e->src))
103 continue;
104 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
105 rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (gsi_stmt (gsi), e));
106 }
107 }
108
109
110 typedef struct
111 {
112 tree from, to;
113 basic_block bb;
114 } adjust_info;
115
116 /* A stack of values to be adjusted in debug stmts. We have to
117 process them LIFO, so that the closest substitution applies. If we
118 processed them FIFO, without the stack, we might substitute uses
119 with a PHI DEF that would soon become non-dominant, and when we got
120 to the suitable one, it wouldn't have anything to substitute any
121 more. */
122 static vec<adjust_info, va_heap> adjust_vec;
123
124 /* Adjust any debug stmts that referenced AI->from values to use the
125 loop-closed AI->to, if the references are dominated by AI->bb and
126 not by the definition of AI->from. */
127
128 static void
129 adjust_debug_stmts_now (adjust_info *ai)
130 {
131 basic_block bbphi = ai->bb;
132 tree orig_def = ai->from;
133 tree new_def = ai->to;
134 imm_use_iterator imm_iter;
135 gimple stmt;
136 basic_block bbdef = gimple_bb (SSA_NAME_DEF_STMT (orig_def));
137
138 gcc_assert (dom_info_available_p (CDI_DOMINATORS));
139
140 /* Adjust any debug stmts that held onto non-loop-closed
141 references. */
142 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, orig_def)
143 {
144 use_operand_p use_p;
145 basic_block bbuse;
146
147 if (!is_gimple_debug (stmt))
148 continue;
149
150 gcc_assert (gimple_debug_bind_p (stmt));
151
152 bbuse = gimple_bb (stmt);
153
154 if ((bbuse == bbphi
155 || dominated_by_p (CDI_DOMINATORS, bbuse, bbphi))
156 && !(bbuse == bbdef
157 || dominated_by_p (CDI_DOMINATORS, bbuse, bbdef)))
158 {
159 if (new_def)
160 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
161 SET_USE (use_p, new_def);
162 else
163 {
164 gimple_debug_bind_reset_value (stmt);
165 update_stmt (stmt);
166 }
167 }
168 }
169 }
170
171 /* Adjust debug stmts as scheduled before. */
172
173 static void
174 adjust_vec_debug_stmts (void)
175 {
176 if (!MAY_HAVE_DEBUG_STMTS)
177 return;
178
179 gcc_assert (adjust_vec.exists ());
180
181 while (!adjust_vec.is_empty ())
182 {
183 adjust_debug_stmts_now (&adjust_vec.last ());
184 adjust_vec.pop ();
185 }
186
187 adjust_vec.release ();
188 }
189
190 /* Adjust any debug stmts that referenced FROM values to use the
191 loop-closed TO, if the references are dominated by BB and not by
192 the definition of FROM. If adjust_vec is non-NULL, adjustments
193 will be postponed until adjust_vec_debug_stmts is called. */
194
195 static void
196 adjust_debug_stmts (tree from, tree to, basic_block bb)
197 {
198 adjust_info ai;
199
200 if (MAY_HAVE_DEBUG_STMTS
201 && TREE_CODE (from) == SSA_NAME
202 && ! SSA_NAME_IS_DEFAULT_DEF (from)
203 && ! virtual_operand_p (from))
204 {
205 ai.from = from;
206 ai.to = to;
207 ai.bb = bb;
208
209 if (adjust_vec.exists ())
210 adjust_vec.safe_push (ai);
211 else
212 adjust_debug_stmts_now (&ai);
213 }
214 }
215
216 /* Change E's phi arg in UPDATE_PHI to NEW_DEF, and record information
217 to adjust any debug stmts that referenced the old phi arg,
218 presumably non-loop-closed references left over from other
219 transformations. */
220
221 static void
222 adjust_phi_and_debug_stmts (gimple update_phi, edge e, tree new_def)
223 {
224 tree orig_def = PHI_ARG_DEF_FROM_EDGE (update_phi, e);
225
226 SET_PHI_ARG_DEF (update_phi, e->dest_idx, new_def);
227
228 if (MAY_HAVE_DEBUG_STMTS)
229 adjust_debug_stmts (orig_def, PHI_RESULT (update_phi),
230 gimple_bb (update_phi));
231 }
232
233
234 /* Update PHI nodes for a guard of the LOOP.
235
236 Input:
237 - LOOP, GUARD_EDGE: LOOP is a loop for which we added guard code that
238 controls whether LOOP is to be executed. GUARD_EDGE is the edge that
239 originates from the guard-bb, skips LOOP and reaches the (unique) exit
240 bb of LOOP. This loop-exit-bb is an empty bb with one successor.
241 We denote this bb NEW_MERGE_BB because before the guard code was added
242 it had a single predecessor (the LOOP header), and now it became a merge
243 point of two paths - the path that ends with the LOOP exit-edge, and
244 the path that ends with GUARD_EDGE.
245 - NEW_EXIT_BB: New basic block that is added by this function between LOOP
246 and NEW_MERGE_BB. It is used to place loop-closed-ssa-form exit-phis.
247
248 ===> The CFG before the guard-code was added:
249 LOOP_header_bb:
250 loop_body
251 if (exit_loop) goto update_bb
252 else goto LOOP_header_bb
253 update_bb:
254
255 ==> The CFG after the guard-code was added:
256 guard_bb:
257 if (LOOP_guard_condition) goto new_merge_bb
258 else goto LOOP_header_bb
259 LOOP_header_bb:
260 loop_body
261 if (exit_loop_condition) goto new_merge_bb
262 else goto LOOP_header_bb
263 new_merge_bb:
264 goto update_bb
265 update_bb:
266
267 ==> The CFG after this function:
268 guard_bb:
269 if (LOOP_guard_condition) goto new_merge_bb
270 else goto LOOP_header_bb
271 LOOP_header_bb:
272 loop_body
273 if (exit_loop_condition) goto new_exit_bb
274 else goto LOOP_header_bb
275 new_exit_bb:
276 new_merge_bb:
277 goto update_bb
278 update_bb:
279
280 This function:
281 1. creates and updates the relevant phi nodes to account for the new
282 incoming edge (GUARD_EDGE) into NEW_MERGE_BB. This involves:
283 1.1. Create phi nodes at NEW_MERGE_BB.
284 1.2. Update the phi nodes at the successor of NEW_MERGE_BB (denoted
285 UPDATE_BB). UPDATE_BB was the exit-bb of LOOP before NEW_MERGE_BB
286 2. preserves loop-closed-ssa-form by creating the required phi nodes
287 at the exit of LOOP (i.e, in NEW_EXIT_BB).
288
289 There are two flavors to this function:
290
291 slpeel_update_phi_nodes_for_guard1:
292 Here the guard controls whether we enter or skip LOOP, where LOOP is a
293 prolog_loop (loop1 below), and the new phis created in NEW_MERGE_BB are
294 for variables that have phis in the loop header.
295
296 slpeel_update_phi_nodes_for_guard2:
297 Here the guard controls whether we enter or skip LOOP, where LOOP is an
298 epilog_loop (loop2 below), and the new phis created in NEW_MERGE_BB are
299 for variables that have phis in the loop exit.
300
301 I.E., the overall structure is:
302
303 loop1_preheader_bb:
304 guard1 (goto loop1/merge1_bb)
305 loop1
306 loop1_exit_bb:
307 guard2 (goto merge1_bb/merge2_bb)
308 merge1_bb
309 loop2
310 loop2_exit_bb
311 merge2_bb
312 next_bb
313
314 slpeel_update_phi_nodes_for_guard1 takes care of creating phis in
315 loop1_exit_bb and merge1_bb. These are entry phis (phis for the vars
316 that have phis in loop1->header).
317
318 slpeel_update_phi_nodes_for_guard2 takes care of creating phis in
319 loop2_exit_bb and merge2_bb. These are exit phis (phis for the vars
320 that have phis in next_bb). It also adds some of these phis to
321 loop1_exit_bb.
322
323 slpeel_update_phi_nodes_for_guard1 is always called before
324 slpeel_update_phi_nodes_for_guard2. They are both needed in order
325 to create correct data-flow and loop-closed-ssa-form.
326
327 Generally slpeel_update_phi_nodes_for_guard1 creates phis for variables
328 that change between iterations of a loop (and therefore have a phi-node
329 at the loop entry), whereas slpeel_update_phi_nodes_for_guard2 creates
330 phis for variables that are used out of the loop (and therefore have
331 loop-closed exit phis). Some variables may be both updated between
332 iterations and used after the loop. This is why in loop1_exit_bb we
333 may need both entry_phis (created by slpeel_update_phi_nodes_for_guard1)
334 and exit phis (created by slpeel_update_phi_nodes_for_guard2).
335
336 - IS_NEW_LOOP: if IS_NEW_LOOP is true, then LOOP is a newly created copy of
337 an original loop. i.e., we have:
338
339 orig_loop
340 guard_bb (goto LOOP/new_merge)
341 new_loop <-- LOOP
342 new_exit
343 new_merge
344 next_bb
345
346 If IS_NEW_LOOP is false, then LOOP is an original loop, in which case we
347 have:
348
349 new_loop
350 guard_bb (goto LOOP/new_merge)
351 orig_loop <-- LOOP
352 new_exit
353 new_merge
354 next_bb
355
356 The SSA names defined in the original loop have a current
357 reaching definition that that records the corresponding new
358 ssa-name used in the new duplicated loop copy.
359 */
360
361 /* Function slpeel_update_phi_nodes_for_guard1
362
363 Input:
364 - GUARD_EDGE, LOOP, IS_NEW_LOOP, NEW_EXIT_BB - as explained above.
365 - DEFS - a bitmap of ssa names to mark new names for which we recorded
366 information.
367
368 In the context of the overall structure, we have:
369
370 loop1_preheader_bb:
371 guard1 (goto loop1/merge1_bb)
372 LOOP-> loop1
373 loop1_exit_bb:
374 guard2 (goto merge1_bb/merge2_bb)
375 merge1_bb
376 loop2
377 loop2_exit_bb
378 merge2_bb
379 next_bb
380
381 For each name updated between loop iterations (i.e - for each name that has
382 an entry (loop-header) phi in LOOP) we create a new phi in:
383 1. merge1_bb (to account for the edge from guard1)
384 2. loop1_exit_bb (an exit-phi to keep LOOP in loop-closed form)
385 */
386
387 static void
388 slpeel_update_phi_nodes_for_guard1 (edge guard_edge, struct loop *loop,
389 bool is_new_loop, basic_block *new_exit_bb)
390 {
391 gimple orig_phi, new_phi;
392 gimple update_phi, update_phi2;
393 tree guard_arg, loop_arg;
394 basic_block new_merge_bb = guard_edge->dest;
395 edge e = EDGE_SUCC (new_merge_bb, 0);
396 basic_block update_bb = e->dest;
397 basic_block orig_bb = loop->header;
398 edge new_exit_e;
399 tree current_new_name;
400 gimple_stmt_iterator gsi_orig, gsi_update;
401
402 /* Create new bb between loop and new_merge_bb. */
403 *new_exit_bb = split_edge (single_exit (loop));
404
405 new_exit_e = EDGE_SUCC (*new_exit_bb, 0);
406
407 for (gsi_orig = gsi_start_phis (orig_bb),
408 gsi_update = gsi_start_phis (update_bb);
409 !gsi_end_p (gsi_orig) && !gsi_end_p (gsi_update);
410 gsi_next (&gsi_orig), gsi_next (&gsi_update))
411 {
412 source_location loop_locus, guard_locus;
413 tree new_res;
414 orig_phi = gsi_stmt (gsi_orig);
415 update_phi = gsi_stmt (gsi_update);
416
417 /** 1. Handle new-merge-point phis **/
418
419 /* 1.1. Generate new phi node in NEW_MERGE_BB: */
420 new_res = copy_ssa_name (PHI_RESULT (orig_phi), NULL);
421 new_phi = create_phi_node (new_res, new_merge_bb);
422
423 /* 1.2. NEW_MERGE_BB has two incoming edges: GUARD_EDGE and the exit-edge
424 of LOOP. Set the two phi args in NEW_PHI for these edges: */
425 loop_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, EDGE_SUCC (loop->latch, 0));
426 loop_locus = gimple_phi_arg_location_from_edge (orig_phi,
427 EDGE_SUCC (loop->latch,
428 0));
429 guard_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, loop_preheader_edge (loop));
430 guard_locus
431 = gimple_phi_arg_location_from_edge (orig_phi,
432 loop_preheader_edge (loop));
433
434 add_phi_arg (new_phi, loop_arg, new_exit_e, loop_locus);
435 add_phi_arg (new_phi, guard_arg, guard_edge, guard_locus);
436
437 /* 1.3. Update phi in successor block. */
438 gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi, e) == loop_arg
439 || PHI_ARG_DEF_FROM_EDGE (update_phi, e) == guard_arg);
440 adjust_phi_and_debug_stmts (update_phi, e, PHI_RESULT (new_phi));
441 update_phi2 = new_phi;
442
443
444 /** 2. Handle loop-closed-ssa-form phis **/
445
446 if (virtual_operand_p (PHI_RESULT (orig_phi)))
447 continue;
448
449 /* 2.1. Generate new phi node in NEW_EXIT_BB: */
450 new_res = copy_ssa_name (PHI_RESULT (orig_phi), NULL);
451 new_phi = create_phi_node (new_res, *new_exit_bb);
452
453 /* 2.2. NEW_EXIT_BB has one incoming edge: the exit-edge of the loop. */
454 add_phi_arg (new_phi, loop_arg, single_exit (loop), loop_locus);
455
456 /* 2.3. Update phi in successor of NEW_EXIT_BB: */
457 gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, new_exit_e) == loop_arg);
458 adjust_phi_and_debug_stmts (update_phi2, new_exit_e,
459 PHI_RESULT (new_phi));
460
461 /* 2.4. Record the newly created name with set_current_def.
462 We want to find a name such that
463 name = get_current_def (orig_loop_name)
464 and to set its current definition as follows:
465 set_current_def (name, new_phi_name)
466
467 If LOOP is a new loop then loop_arg is already the name we're
468 looking for. If LOOP is the original loop, then loop_arg is
469 the orig_loop_name and the relevant name is recorded in its
470 current reaching definition. */
471 if (is_new_loop)
472 current_new_name = loop_arg;
473 else
474 {
475 current_new_name = get_current_def (loop_arg);
476 /* current_def is not available only if the variable does not
477 change inside the loop, in which case we also don't care
478 about recording a current_def for it because we won't be
479 trying to create loop-exit-phis for it. */
480 if (!current_new_name)
481 continue;
482 }
483 gcc_assert (get_current_def (current_new_name) == NULL_TREE);
484
485 set_current_def (current_new_name, PHI_RESULT (new_phi));
486 }
487 }
488
489
490 /* Function slpeel_update_phi_nodes_for_guard2
491
492 Input:
493 - GUARD_EDGE, LOOP, IS_NEW_LOOP, NEW_EXIT_BB - as explained above.
494
495 In the context of the overall structure, we have:
496
497 loop1_preheader_bb:
498 guard1 (goto loop1/merge1_bb)
499 loop1
500 loop1_exit_bb:
501 guard2 (goto merge1_bb/merge2_bb)
502 merge1_bb
503 LOOP-> loop2
504 loop2_exit_bb
505 merge2_bb
506 next_bb
507
508 For each name used out side the loop (i.e - for each name that has an exit
509 phi in next_bb) we create a new phi in:
510 1. merge2_bb (to account for the edge from guard_bb)
511 2. loop2_exit_bb (an exit-phi to keep LOOP in loop-closed form)
512 3. guard2 bb (an exit phi to keep the preceding loop in loop-closed form),
513 if needed (if it wasn't handled by slpeel_update_phis_nodes_for_phi1).
514 */
515
516 static void
517 slpeel_update_phi_nodes_for_guard2 (edge guard_edge, struct loop *loop,
518 bool is_new_loop, basic_block *new_exit_bb)
519 {
520 gimple orig_phi, new_phi;
521 gimple update_phi, update_phi2;
522 tree guard_arg, loop_arg;
523 basic_block new_merge_bb = guard_edge->dest;
524 edge e = EDGE_SUCC (new_merge_bb, 0);
525 basic_block update_bb = e->dest;
526 edge new_exit_e;
527 tree orig_def, orig_def_new_name;
528 tree new_name, new_name2;
529 tree arg;
530 gimple_stmt_iterator gsi;
531
532 /* Create new bb between loop and new_merge_bb. */
533 *new_exit_bb = split_edge (single_exit (loop));
534
535 new_exit_e = EDGE_SUCC (*new_exit_bb, 0);
536
537 for (gsi = gsi_start_phis (update_bb); !gsi_end_p (gsi); gsi_next (&gsi))
538 {
539 tree new_res;
540 update_phi = gsi_stmt (gsi);
541 orig_phi = update_phi;
542 orig_def = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
543 /* This loop-closed-phi actually doesn't represent a use
544 out of the loop - the phi arg is a constant. */
545 if (TREE_CODE (orig_def) != SSA_NAME)
546 continue;
547 orig_def_new_name = get_current_def (orig_def);
548 arg = NULL_TREE;
549
550 /** 1. Handle new-merge-point phis **/
551
552 /* 1.1. Generate new phi node in NEW_MERGE_BB: */
553 new_res = copy_ssa_name (PHI_RESULT (orig_phi), NULL);
554 new_phi = create_phi_node (new_res, new_merge_bb);
555
556 /* 1.2. NEW_MERGE_BB has two incoming edges: GUARD_EDGE and the exit-edge
557 of LOOP. Set the two PHI args in NEW_PHI for these edges: */
558 new_name = orig_def;
559 new_name2 = NULL_TREE;
560 if (orig_def_new_name)
561 {
562 new_name = orig_def_new_name;
563 /* Some variables have both loop-entry-phis and loop-exit-phis.
564 Such variables were given yet newer names by phis placed in
565 guard_bb by slpeel_update_phi_nodes_for_guard1. I.e:
566 new_name2 = get_current_def (get_current_def (orig_name)). */
567 new_name2 = get_current_def (new_name);
568 }
569
570 if (is_new_loop)
571 {
572 guard_arg = orig_def;
573 loop_arg = new_name;
574 }
575 else
576 {
577 guard_arg = new_name;
578 loop_arg = orig_def;
579 }
580 if (new_name2)
581 guard_arg = new_name2;
582
583 add_phi_arg (new_phi, loop_arg, new_exit_e, UNKNOWN_LOCATION);
584 add_phi_arg (new_phi, guard_arg, guard_edge, UNKNOWN_LOCATION);
585
586 /* 1.3. Update phi in successor block. */
587 gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi, e) == orig_def);
588 adjust_phi_and_debug_stmts (update_phi, e, PHI_RESULT (new_phi));
589 update_phi2 = new_phi;
590
591
592 /** 2. Handle loop-closed-ssa-form phis **/
593
594 /* 2.1. Generate new phi node in NEW_EXIT_BB: */
595 new_res = copy_ssa_name (PHI_RESULT (orig_phi), NULL);
596 new_phi = create_phi_node (new_res, *new_exit_bb);
597
598 /* 2.2. NEW_EXIT_BB has one incoming edge: the exit-edge of the loop. */
599 add_phi_arg (new_phi, loop_arg, single_exit (loop), UNKNOWN_LOCATION);
600
601 /* 2.3. Update phi in successor of NEW_EXIT_BB: */
602 gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, new_exit_e) == loop_arg);
603 adjust_phi_and_debug_stmts (update_phi2, new_exit_e,
604 PHI_RESULT (new_phi));
605
606
607 /** 3. Handle loop-closed-ssa-form phis for first loop **/
608
609 /* 3.1. Find the relevant names that need an exit-phi in
610 GUARD_BB, i.e. names for which
611 slpeel_update_phi_nodes_for_guard1 had not already created a
612 phi node. This is the case for names that are used outside
613 the loop (and therefore need an exit phi) but are not updated
614 across loop iterations (and therefore don't have a
615 loop-header-phi).
616
617 slpeel_update_phi_nodes_for_guard1 is responsible for
618 creating loop-exit phis in GUARD_BB for names that have a
619 loop-header-phi. When such a phi is created we also record
620 the new name in its current definition. If this new name
621 exists, then guard_arg was set to this new name (see 1.2
622 above). Therefore, if guard_arg is not this new name, this
623 is an indication that an exit-phi in GUARD_BB was not yet
624 created, so we take care of it here. */
625 if (guard_arg == new_name2)
626 continue;
627 arg = guard_arg;
628
629 /* 3.2. Generate new phi node in GUARD_BB: */
630 new_res = copy_ssa_name (PHI_RESULT (orig_phi), NULL);
631 new_phi = create_phi_node (new_res, guard_edge->src);
632
633 /* 3.3. GUARD_BB has one incoming edge: */
634 gcc_assert (EDGE_COUNT (guard_edge->src->preds) == 1);
635 add_phi_arg (new_phi, arg, EDGE_PRED (guard_edge->src, 0),
636 UNKNOWN_LOCATION);
637
638 /* 3.4. Update phi in successor of GUARD_BB: */
639 gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, guard_edge)
640 == guard_arg);
641 adjust_phi_and_debug_stmts (update_phi2, guard_edge,
642 PHI_RESULT (new_phi));
643 }
644 }
645
646
647 /* Make the LOOP iterate NITERS times. This is done by adding a new IV
648 that starts at zero, increases by one and its limit is NITERS.
649
650 Assumption: the exit-condition of LOOP is the last stmt in the loop. */
651
652 void
653 slpeel_make_loop_iterate_ntimes (struct loop *loop, tree niters)
654 {
655 tree indx_before_incr, indx_after_incr;
656 gimple cond_stmt;
657 gimple orig_cond;
658 edge exit_edge = single_exit (loop);
659 gimple_stmt_iterator loop_cond_gsi;
660 gimple_stmt_iterator incr_gsi;
661 bool insert_after;
662 tree init = build_int_cst (TREE_TYPE (niters), 0);
663 tree step = build_int_cst (TREE_TYPE (niters), 1);
664 source_location loop_loc;
665 enum tree_code code;
666
667 orig_cond = get_loop_exit_condition (loop);
668 gcc_assert (orig_cond);
669 loop_cond_gsi = gsi_for_stmt (orig_cond);
670
671 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
672 create_iv (init, step, NULL_TREE, loop,
673 &incr_gsi, insert_after, &indx_before_incr, &indx_after_incr);
674
675 indx_after_incr = force_gimple_operand_gsi (&loop_cond_gsi, indx_after_incr,
676 true, NULL_TREE, true,
677 GSI_SAME_STMT);
678 niters = force_gimple_operand_gsi (&loop_cond_gsi, niters, true, NULL_TREE,
679 true, GSI_SAME_STMT);
680
681 code = (exit_edge->flags & EDGE_TRUE_VALUE) ? GE_EXPR : LT_EXPR;
682 cond_stmt = gimple_build_cond (code, indx_after_incr, niters, NULL_TREE,
683 NULL_TREE);
684
685 gsi_insert_before (&loop_cond_gsi, cond_stmt, GSI_SAME_STMT);
686
687 /* Remove old loop exit test: */
688 gsi_remove (&loop_cond_gsi, true);
689 free_stmt_vec_info (orig_cond);
690
691 loop_loc = find_loop_location (loop);
692 if (dump_enabled_p ())
693 {
694 if (LOCATION_LOCUS (loop_loc) != UNKNOWN_LOCATION)
695 dump_printf (MSG_NOTE, "\nloop at %s:%d: ", LOCATION_FILE (loop_loc),
696 LOCATION_LINE (loop_loc));
697 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, cond_stmt, 0);
698 dump_printf (MSG_NOTE, "\n");
699 }
700 loop->nb_iterations = niters;
701 }
702
703
704 /* Given LOOP this function generates a new copy of it and puts it
705 on E which is either the entry or exit of LOOP. */
706
707 struct loop *
708 slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *loop, edge e)
709 {
710 struct loop *new_loop;
711 basic_block *new_bbs, *bbs;
712 bool at_exit;
713 bool was_imm_dom;
714 basic_block exit_dest;
715 edge exit, new_exit;
716
717 exit = single_exit (loop);
718 at_exit = (e == exit);
719 if (!at_exit && e != loop_preheader_edge (loop))
720 return NULL;
721
722 bbs = XNEWVEC (basic_block, loop->num_nodes + 1);
723 get_loop_body_with_size (loop, bbs, loop->num_nodes);
724
725 /* Check whether duplication is possible. */
726 if (!can_copy_bbs_p (bbs, loop->num_nodes))
727 {
728 free (bbs);
729 return NULL;
730 }
731
732 /* Generate new loop structure. */
733 new_loop = duplicate_loop (loop, loop_outer (loop));
734 duplicate_subloops (loop, new_loop);
735
736 exit_dest = exit->dest;
737 was_imm_dom = (get_immediate_dominator (CDI_DOMINATORS,
738 exit_dest) == loop->header ?
739 true : false);
740
741 /* Also copy the pre-header, this avoids jumping through hoops to
742 duplicate the loop entry PHI arguments. Create an empty
743 pre-header unconditionally for this. */
744 basic_block preheader = split_edge (loop_preheader_edge (loop));
745 edge entry_e = single_pred_edge (preheader);
746 bbs[loop->num_nodes] = preheader;
747 new_bbs = XNEWVEC (basic_block, loop->num_nodes + 1);
748
749 copy_bbs (bbs, loop->num_nodes + 1, new_bbs,
750 &exit, 1, &new_exit, NULL,
751 e->src, true);
752 basic_block new_preheader = new_bbs[loop->num_nodes];
753
754 add_phi_args_after_copy (new_bbs, loop->num_nodes + 1, NULL);
755
756 if (at_exit) /* Add the loop copy at exit. */
757 {
758 redirect_edge_and_branch_force (e, new_preheader);
759 flush_pending_stmts (e);
760 set_immediate_dominator (CDI_DOMINATORS, new_preheader, e->src);
761 if (was_imm_dom)
762 set_immediate_dominator (CDI_DOMINATORS, exit_dest, new_loop->header);
763
764 /* And remove the non-necessary forwarder again. Keep the other
765 one so we have a proper pre-header for the loop at the exit edge. */
766 redirect_edge_pred (single_succ_edge (preheader), single_pred (preheader));
767 delete_basic_block (preheader);
768 set_immediate_dominator (CDI_DOMINATORS, loop->header,
769 loop_preheader_edge (loop)->src);
770 }
771 else /* Add the copy at entry. */
772 {
773 redirect_edge_and_branch_force (entry_e, new_preheader);
774 flush_pending_stmts (entry_e);
775 set_immediate_dominator (CDI_DOMINATORS, new_preheader, entry_e->src);
776
777 redirect_edge_and_branch_force (new_exit, preheader);
778 flush_pending_stmts (new_exit);
779 set_immediate_dominator (CDI_DOMINATORS, preheader, new_exit->src);
780
781 /* And remove the non-necessary forwarder again. Keep the other
782 one so we have a proper pre-header for the loop at the exit edge. */
783 redirect_edge_pred (single_succ_edge (new_preheader), single_pred (new_preheader));
784 delete_basic_block (new_preheader);
785 set_immediate_dominator (CDI_DOMINATORS, new_loop->header,
786 loop_preheader_edge (new_loop)->src);
787 }
788
789 for (unsigned i = 0; i < loop->num_nodes+1; i++)
790 rename_variables_in_bb (new_bbs[i]);
791
792 free (new_bbs);
793 free (bbs);
794
795 #ifdef ENABLE_CHECKING
796 verify_dominators (CDI_DOMINATORS);
797 #endif
798
799 return new_loop;
800 }
801
802
803 /* Given the condition statement COND, put it as the last statement
804 of GUARD_BB; EXIT_BB is the basic block to skip the loop;
805 Assumes that this is the single exit of the guarded loop.
806 Returns the skip edge, inserts new stmts on the COND_EXPR_STMT_LIST. */
807
808 static edge
809 slpeel_add_loop_guard (basic_block guard_bb, tree cond,
810 gimple_seq cond_expr_stmt_list,
811 basic_block exit_bb, basic_block dom_bb,
812 int probability)
813 {
814 gimple_stmt_iterator gsi;
815 edge new_e, enter_e;
816 gimple cond_stmt;
817 gimple_seq gimplify_stmt_list = NULL;
818
819 enter_e = EDGE_SUCC (guard_bb, 0);
820 enter_e->flags &= ~EDGE_FALLTHRU;
821 enter_e->flags |= EDGE_FALSE_VALUE;
822 gsi = gsi_last_bb (guard_bb);
823
824 cond = force_gimple_operand_1 (cond, &gimplify_stmt_list, is_gimple_condexpr,
825 NULL_TREE);
826 if (gimplify_stmt_list)
827 gimple_seq_add_seq (&cond_expr_stmt_list, gimplify_stmt_list);
828 cond_stmt = gimple_build_cond_from_tree (cond, NULL_TREE, NULL_TREE);
829 if (cond_expr_stmt_list)
830 gsi_insert_seq_after (&gsi, cond_expr_stmt_list, GSI_NEW_STMT);
831
832 gsi = gsi_last_bb (guard_bb);
833 gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
834
835 /* Add new edge to connect guard block to the merge/loop-exit block. */
836 new_e = make_edge (guard_bb, exit_bb, EDGE_TRUE_VALUE);
837
838 new_e->count = guard_bb->count;
839 new_e->probability = probability;
840 new_e->count = apply_probability (enter_e->count, probability);
841 enter_e->count -= new_e->count;
842 enter_e->probability = inverse_probability (probability);
843 set_immediate_dominator (CDI_DOMINATORS, exit_bb, dom_bb);
844 return new_e;
845 }
846
847
848 /* This function verifies that the following restrictions apply to LOOP:
849 (1) it is innermost
850 (2) it consists of exactly 2 basic blocks - header, and an empty latch.
851 (3) it is single entry, single exit
852 (4) its exit condition is the last stmt in the header
853 (5) E is the entry/exit edge of LOOP.
854 */
855
856 bool
857 slpeel_can_duplicate_loop_p (const struct loop *loop, const_edge e)
858 {
859 edge exit_e = single_exit (loop);
860 edge entry_e = loop_preheader_edge (loop);
861 gimple orig_cond = get_loop_exit_condition (loop);
862 gimple_stmt_iterator loop_exit_gsi = gsi_last_bb (exit_e->src);
863
864 if (loop->inner
865 /* All loops have an outer scope; the only case loop->outer is NULL is for
866 the function itself. */
867 || !loop_outer (loop)
868 || loop->num_nodes != 2
869 || !empty_block_p (loop->latch)
870 || !single_exit (loop)
871 /* Verify that new loop exit condition can be trivially modified. */
872 || (!orig_cond || orig_cond != gsi_stmt (loop_exit_gsi))
873 || (e != exit_e && e != entry_e))
874 return false;
875
876 return true;
877 }
878
879 #ifdef ENABLE_CHECKING
880 static void
881 slpeel_verify_cfg_after_peeling (struct loop *first_loop,
882 struct loop *second_loop)
883 {
884 basic_block loop1_exit_bb = single_exit (first_loop)->dest;
885 basic_block loop2_entry_bb = loop_preheader_edge (second_loop)->src;
886 basic_block loop1_entry_bb = loop_preheader_edge (first_loop)->src;
887
888 /* A guard that controls whether the second_loop is to be executed or skipped
889 is placed in first_loop->exit. first_loop->exit therefore has two
890 successors - one is the preheader of second_loop, and the other is a bb
891 after second_loop.
892 */
893 gcc_assert (EDGE_COUNT (loop1_exit_bb->succs) == 2);
894
895 /* 1. Verify that one of the successors of first_loop->exit is the preheader
896 of second_loop. */
897
898 /* The preheader of new_loop is expected to have two predecessors:
899 first_loop->exit and the block that precedes first_loop. */
900
901 gcc_assert (EDGE_COUNT (loop2_entry_bb->preds) == 2
902 && ((EDGE_PRED (loop2_entry_bb, 0)->src == loop1_exit_bb
903 && EDGE_PRED (loop2_entry_bb, 1)->src == loop1_entry_bb)
904 || (EDGE_PRED (loop2_entry_bb, 1)->src == loop1_exit_bb
905 && EDGE_PRED (loop2_entry_bb, 0)->src == loop1_entry_bb)));
906
907 /* Verify that the other successor of first_loop->exit is after the
908 second_loop. */
909 /* TODO */
910 }
911 #endif
912
913 /* If the run time cost model check determines that vectorization is
914 not profitable and hence scalar loop should be generated then set
915 FIRST_NITERS to prologue peeled iterations. This will allow all the
916 iterations to be executed in the prologue peeled scalar loop. */
917
918 static void
919 set_prologue_iterations (basic_block bb_before_first_loop,
920 tree *first_niters,
921 struct loop *loop,
922 unsigned int th,
923 int probability)
924 {
925 edge e;
926 basic_block cond_bb, then_bb;
927 tree var, prologue_after_cost_adjust_name;
928 gimple_stmt_iterator gsi;
929 gimple newphi;
930 edge e_true, e_false, e_fallthru;
931 gimple cond_stmt;
932 gimple_seq stmts = NULL;
933 tree cost_pre_condition = NULL_TREE;
934 tree scalar_loop_iters =
935 unshare_expr (LOOP_VINFO_NITERS_UNCHANGED (loop_vec_info_for_loop (loop)));
936
937 e = single_pred_edge (bb_before_first_loop);
938 cond_bb = split_edge (e);
939
940 e = single_pred_edge (bb_before_first_loop);
941 then_bb = split_edge (e);
942 set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
943
944 e_false = make_single_succ_edge (cond_bb, bb_before_first_loop,
945 EDGE_FALSE_VALUE);
946 set_immediate_dominator (CDI_DOMINATORS, bb_before_first_loop, cond_bb);
947
948 e_true = EDGE_PRED (then_bb, 0);
949 e_true->flags &= ~EDGE_FALLTHRU;
950 e_true->flags |= EDGE_TRUE_VALUE;
951
952 e_true->probability = probability;
953 e_false->probability = inverse_probability (probability);
954 e_true->count = apply_probability (cond_bb->count, probability);
955 e_false->count = cond_bb->count - e_true->count;
956 then_bb->frequency = EDGE_FREQUENCY (e_true);
957 then_bb->count = e_true->count;
958
959 e_fallthru = EDGE_SUCC (then_bb, 0);
960 e_fallthru->count = then_bb->count;
961
962 gsi = gsi_last_bb (cond_bb);
963 cost_pre_condition =
964 fold_build2 (LE_EXPR, boolean_type_node, scalar_loop_iters,
965 build_int_cst (TREE_TYPE (scalar_loop_iters), th));
966 cost_pre_condition =
967 force_gimple_operand_gsi_1 (&gsi, cost_pre_condition, is_gimple_condexpr,
968 NULL_TREE, false, GSI_CONTINUE_LINKING);
969 cond_stmt = gimple_build_cond_from_tree (cost_pre_condition,
970 NULL_TREE, NULL_TREE);
971 gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
972
973 var = create_tmp_var (TREE_TYPE (scalar_loop_iters),
974 "prologue_after_cost_adjust");
975 prologue_after_cost_adjust_name =
976 force_gimple_operand (scalar_loop_iters, &stmts, false, var);
977
978 gsi = gsi_last_bb (then_bb);
979 if (stmts)
980 gsi_insert_seq_after (&gsi, stmts, GSI_NEW_STMT);
981
982 newphi = create_phi_node (var, bb_before_first_loop);
983 add_phi_arg (newphi, prologue_after_cost_adjust_name, e_fallthru,
984 UNKNOWN_LOCATION);
985 add_phi_arg (newphi, *first_niters, e_false, UNKNOWN_LOCATION);
986
987 *first_niters = PHI_RESULT (newphi);
988 }
989
990 /* Function slpeel_tree_peel_loop_to_edge.
991
992 Peel the first (last) iterations of LOOP into a new prolog (epilog) loop
993 that is placed on the entry (exit) edge E of LOOP. After this transformation
994 we have two loops one after the other - first-loop iterates FIRST_NITERS
995 times, and second-loop iterates the remainder NITERS - FIRST_NITERS times.
996 If the cost model indicates that it is profitable to emit a scalar
997 loop instead of the vector one, then the prolog (epilog) loop will iterate
998 for the entire unchanged scalar iterations of the loop.
999
1000 Input:
1001 - LOOP: the loop to be peeled.
1002 - E: the exit or entry edge of LOOP.
1003 If it is the entry edge, we peel the first iterations of LOOP. In this
1004 case first-loop is LOOP, and second-loop is the newly created loop.
1005 If it is the exit edge, we peel the last iterations of LOOP. In this
1006 case, first-loop is the newly created loop, and second-loop is LOOP.
1007 - NITERS: the number of iterations that LOOP iterates.
1008 - FIRST_NITERS: the number of iterations that the first-loop should iterate.
1009 - UPDATE_FIRST_LOOP_COUNT: specified whether this function is responsible
1010 for updating the loop bound of the first-loop to FIRST_NITERS. If it
1011 is false, the caller of this function may want to take care of this
1012 (this can be useful if we don't want new stmts added to first-loop).
1013 - TH: cost model profitability threshold of iterations for vectorization.
1014 - CHECK_PROFITABILITY: specify whether cost model check has not occurred
1015 during versioning and hence needs to occur during
1016 prologue generation or whether cost model check
1017 has not occurred during prologue generation and hence
1018 needs to occur during epilogue generation.
1019 - BOUND1 is the upper bound on number of iterations of the first loop (if known)
1020 - BOUND2 is the upper bound on number of iterations of the second loop (if known)
1021
1022
1023 Output:
1024 The function returns a pointer to the new loop-copy, or NULL if it failed
1025 to perform the transformation.
1026
1027 The function generates two if-then-else guards: one before the first loop,
1028 and the other before the second loop:
1029 The first guard is:
1030 if (FIRST_NITERS == 0) then skip the first loop,
1031 and go directly to the second loop.
1032 The second guard is:
1033 if (FIRST_NITERS == NITERS) then skip the second loop.
1034
1035 If the optional COND_EXPR and COND_EXPR_STMT_LIST arguments are given
1036 then the generated condition is combined with COND_EXPR and the
1037 statements in COND_EXPR_STMT_LIST are emitted together with it.
1038
1039 FORNOW only simple loops are supported (see slpeel_can_duplicate_loop_p).
1040 FORNOW the resulting code will not be in loop-closed-ssa form.
1041 */
1042
1043 static struct loop*
1044 slpeel_tree_peel_loop_to_edge (struct loop *loop,
1045 edge e, tree *first_niters,
1046 tree niters, bool update_first_loop_count,
1047 unsigned int th, bool check_profitability,
1048 tree cond_expr, gimple_seq cond_expr_stmt_list,
1049 int bound1, int bound2)
1050 {
1051 struct loop *new_loop = NULL, *first_loop, *second_loop;
1052 edge skip_e;
1053 tree pre_condition = NULL_TREE;
1054 basic_block bb_before_second_loop, bb_after_second_loop;
1055 basic_block bb_before_first_loop;
1056 basic_block bb_between_loops;
1057 basic_block new_exit_bb;
1058 gimple_stmt_iterator gsi;
1059 edge exit_e = single_exit (loop);
1060 source_location loop_loc;
1061 tree cost_pre_condition = NULL_TREE;
1062 /* There are many aspects to how likely the first loop is going to be executed.
1063 Without histogram we can't really do good job. Simply set it to
1064 2/3, so the first loop is not reordered to the end of function and
1065 the hot path through stays short. */
1066 int first_guard_probability = 2 * REG_BR_PROB_BASE / 3;
1067 int second_guard_probability = 2 * REG_BR_PROB_BASE / 3;
1068 int probability_of_second_loop;
1069
1070 if (!slpeel_can_duplicate_loop_p (loop, e))
1071 return NULL;
1072
1073 /* We might have a queued need to update virtual SSA form. As we
1074 delete the update SSA machinery below after doing a regular
1075 incremental SSA update during loop copying make sure we don't
1076 lose that fact.
1077 ??? Needing to update virtual SSA form by renaming is unfortunate
1078 but not all of the vectorizer code inserting new loads / stores
1079 properly assigns virtual operands to those statements. */
1080 update_ssa (TODO_update_ssa_only_virtuals);
1081
1082 /* If the loop has a virtual PHI, but exit bb doesn't, create a virtual PHI
1083 in the exit bb and rename all the uses after the loop. This simplifies
1084 the *guard[12] routines, which assume loop closed SSA form for all PHIs
1085 (but normally loop closed SSA form doesn't require virtual PHIs to be
1086 in the same form). Doing this early simplifies the checking what
1087 uses should be renamed. */
1088 for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
1089 if (virtual_operand_p (gimple_phi_result (gsi_stmt (gsi))))
1090 {
1091 gimple phi = gsi_stmt (gsi);
1092 for (gsi = gsi_start_phis (exit_e->dest);
1093 !gsi_end_p (gsi); gsi_next (&gsi))
1094 if (virtual_operand_p (gimple_phi_result (gsi_stmt (gsi))))
1095 break;
1096 if (gsi_end_p (gsi))
1097 {
1098 tree new_vop = copy_ssa_name (PHI_RESULT (phi), NULL);
1099 gimple new_phi = create_phi_node (new_vop, exit_e->dest);
1100 tree vop = PHI_ARG_DEF_FROM_EDGE (phi, EDGE_SUCC (loop->latch, 0));
1101 imm_use_iterator imm_iter;
1102 gimple stmt;
1103 use_operand_p use_p;
1104
1105 add_phi_arg (new_phi, vop, exit_e, UNKNOWN_LOCATION);
1106 gimple_phi_set_result (new_phi, new_vop);
1107 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, vop)
1108 if (stmt != new_phi && gimple_bb (stmt) != loop->header)
1109 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
1110 SET_USE (use_p, new_vop);
1111 }
1112 break;
1113 }
1114
1115 /* 1. Generate a copy of LOOP and put it on E (E is the entry/exit of LOOP).
1116 Resulting CFG would be:
1117
1118 first_loop:
1119 do {
1120 } while ...
1121
1122 second_loop:
1123 do {
1124 } while ...
1125
1126 orig_exit_bb:
1127 */
1128
1129 if (!(new_loop = slpeel_tree_duplicate_loop_to_edge_cfg (loop, e)))
1130 {
1131 loop_loc = find_loop_location (loop);
1132 dump_printf_loc (MSG_MISSED_OPTIMIZATION, loop_loc,
1133 "tree_duplicate_loop_to_edge_cfg failed.\n");
1134 return NULL;
1135 }
1136
1137 if (MAY_HAVE_DEBUG_STMTS)
1138 {
1139 gcc_assert (!adjust_vec.exists ());
1140 adjust_vec.create (32);
1141 }
1142
1143 if (e == exit_e)
1144 {
1145 /* NEW_LOOP was placed after LOOP. */
1146 first_loop = loop;
1147 second_loop = new_loop;
1148 }
1149 else
1150 {
1151 /* NEW_LOOP was placed before LOOP. */
1152 first_loop = new_loop;
1153 second_loop = loop;
1154 }
1155
1156 /* 2. Add the guard code in one of the following ways:
1157
1158 2.a Add the guard that controls whether the first loop is executed.
1159 This occurs when this function is invoked for prologue or epilogue
1160 generation and when the cost model check can be done at compile time.
1161
1162 Resulting CFG would be:
1163
1164 bb_before_first_loop:
1165 if (FIRST_NITERS == 0) GOTO bb_before_second_loop
1166 GOTO first-loop
1167
1168 first_loop:
1169 do {
1170 } while ...
1171
1172 bb_before_second_loop:
1173
1174 second_loop:
1175 do {
1176 } while ...
1177
1178 orig_exit_bb:
1179
1180 2.b Add the cost model check that allows the prologue
1181 to iterate for the entire unchanged scalar
1182 iterations of the loop in the event that the cost
1183 model indicates that the scalar loop is more
1184 profitable than the vector one. This occurs when
1185 this function is invoked for prologue generation
1186 and the cost model check needs to be done at run
1187 time.
1188
1189 Resulting CFG after prologue peeling would be:
1190
1191 if (scalar_loop_iterations <= th)
1192 FIRST_NITERS = scalar_loop_iterations
1193
1194 bb_before_first_loop:
1195 if (FIRST_NITERS == 0) GOTO bb_before_second_loop
1196 GOTO first-loop
1197
1198 first_loop:
1199 do {
1200 } while ...
1201
1202 bb_before_second_loop:
1203
1204 second_loop:
1205 do {
1206 } while ...
1207
1208 orig_exit_bb:
1209
1210 2.c Add the cost model check that allows the epilogue
1211 to iterate for the entire unchanged scalar
1212 iterations of the loop in the event that the cost
1213 model indicates that the scalar loop is more
1214 profitable than the vector one. This occurs when
1215 this function is invoked for epilogue generation
1216 and the cost model check needs to be done at run
1217 time. This check is combined with any pre-existing
1218 check in COND_EXPR to avoid versioning.
1219
1220 Resulting CFG after prologue peeling would be:
1221
1222 bb_before_first_loop:
1223 if ((scalar_loop_iterations <= th)
1224 ||
1225 FIRST_NITERS == 0) GOTO bb_before_second_loop
1226 GOTO first-loop
1227
1228 first_loop:
1229 do {
1230 } while ...
1231
1232 bb_before_second_loop:
1233
1234 second_loop:
1235 do {
1236 } while ...
1237
1238 orig_exit_bb:
1239 */
1240
1241 bb_before_first_loop = split_edge (loop_preheader_edge (first_loop));
1242 /* Loop copying insterted a forwarder block for us here. */
1243 bb_before_second_loop = single_exit (first_loop)->dest;
1244
1245 probability_of_second_loop = (inverse_probability (first_guard_probability)
1246 + combine_probabilities (second_guard_probability,
1247 first_guard_probability));
1248 /* Theoretically preheader edge of first loop and exit edge should have
1249 same frequencies. Loop exit probablities are however easy to get wrong.
1250 It is safer to copy value from original loop entry. */
1251 bb_before_second_loop->frequency
1252 = combine_probabilities (bb_before_first_loop->frequency,
1253 probability_of_second_loop);
1254 bb_before_second_loop->count
1255 = apply_probability (bb_before_first_loop->count,
1256 probability_of_second_loop);
1257 single_succ_edge (bb_before_second_loop)->count
1258 = bb_before_second_loop->count;
1259
1260 /* Epilogue peeling. */
1261 if (!update_first_loop_count)
1262 {
1263 pre_condition =
1264 fold_build2 (LE_EXPR, boolean_type_node, *first_niters,
1265 build_int_cst (TREE_TYPE (*first_niters), 0));
1266 if (check_profitability)
1267 {
1268 tree scalar_loop_iters
1269 = unshare_expr (LOOP_VINFO_NITERS_UNCHANGED
1270 (loop_vec_info_for_loop (loop)));
1271 cost_pre_condition =
1272 fold_build2 (LE_EXPR, boolean_type_node, scalar_loop_iters,
1273 build_int_cst (TREE_TYPE (scalar_loop_iters), th));
1274
1275 pre_condition = fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
1276 cost_pre_condition, pre_condition);
1277 }
1278 if (cond_expr)
1279 {
1280 pre_condition =
1281 fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
1282 pre_condition,
1283 fold_build1 (TRUTH_NOT_EXPR, boolean_type_node,
1284 cond_expr));
1285 }
1286 }
1287
1288 /* Prologue peeling. */
1289 else
1290 {
1291 if (check_profitability)
1292 set_prologue_iterations (bb_before_first_loop, first_niters,
1293 loop, th, first_guard_probability);
1294
1295 pre_condition =
1296 fold_build2 (LE_EXPR, boolean_type_node, *first_niters,
1297 build_int_cst (TREE_TYPE (*first_niters), 0));
1298 }
1299
1300 skip_e = slpeel_add_loop_guard (bb_before_first_loop, pre_condition,
1301 cond_expr_stmt_list,
1302 bb_before_second_loop, bb_before_first_loop,
1303 inverse_probability (first_guard_probability));
1304 scale_loop_profile (first_loop, first_guard_probability,
1305 check_profitability && (int)th > bound1 ? th : bound1);
1306 slpeel_update_phi_nodes_for_guard1 (skip_e, first_loop,
1307 first_loop == new_loop,
1308 &new_exit_bb);
1309
1310
1311 /* 3. Add the guard that controls whether the second loop is executed.
1312 Resulting CFG would be:
1313
1314 bb_before_first_loop:
1315 if (FIRST_NITERS == 0) GOTO bb_before_second_loop (skip first loop)
1316 GOTO first-loop
1317
1318 first_loop:
1319 do {
1320 } while ...
1321
1322 bb_between_loops:
1323 if (FIRST_NITERS == NITERS) GOTO bb_after_second_loop (skip second loop)
1324 GOTO bb_before_second_loop
1325
1326 bb_before_second_loop:
1327
1328 second_loop:
1329 do {
1330 } while ...
1331
1332 bb_after_second_loop:
1333
1334 orig_exit_bb:
1335 */
1336
1337 bb_between_loops = new_exit_bb;
1338 bb_after_second_loop = split_edge (single_exit (second_loop));
1339
1340 pre_condition =
1341 fold_build2 (EQ_EXPR, boolean_type_node, *first_niters, niters);
1342 skip_e = slpeel_add_loop_guard (bb_between_loops, pre_condition, NULL,
1343 bb_after_second_loop, bb_before_first_loop,
1344 inverse_probability (second_guard_probability));
1345 scale_loop_profile (second_loop, probability_of_second_loop, bound2);
1346 slpeel_update_phi_nodes_for_guard2 (skip_e, second_loop,
1347 second_loop == new_loop, &new_exit_bb);
1348
1349 /* 4. Make first-loop iterate FIRST_NITERS times, if requested.
1350 */
1351 if (update_first_loop_count)
1352 slpeel_make_loop_iterate_ntimes (first_loop, *first_niters);
1353
1354 delete_update_ssa ();
1355
1356 adjust_vec_debug_stmts ();
1357
1358 return new_loop;
1359 }
1360
1361 /* Function vect_get_loop_location.
1362
1363 Extract the location of the loop in the source code.
1364 If the loop is not well formed for vectorization, an estimated
1365 location is calculated.
1366 Return the loop location if succeed and NULL if not. */
1367
1368 source_location
1369 find_loop_location (struct loop *loop)
1370 {
1371 gimple stmt = NULL;
1372 basic_block bb;
1373 gimple_stmt_iterator si;
1374
1375 if (!loop)
1376 return UNKNOWN_LOCATION;
1377
1378 stmt = get_loop_exit_condition (loop);
1379
1380 if (stmt
1381 && LOCATION_LOCUS (gimple_location (stmt)) > BUILTINS_LOCATION)
1382 return gimple_location (stmt);
1383
1384 /* If we got here the loop is probably not "well formed",
1385 try to estimate the loop location */
1386
1387 if (!loop->header)
1388 return UNKNOWN_LOCATION;
1389
1390 bb = loop->header;
1391
1392 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1393 {
1394 stmt = gsi_stmt (si);
1395 if (LOCATION_LOCUS (gimple_location (stmt)) > BUILTINS_LOCATION)
1396 return gimple_location (stmt);
1397 }
1398
1399 return UNKNOWN_LOCATION;
1400 }
1401
1402
1403 /* Function vect_can_advance_ivs_p
1404
1405 In case the number of iterations that LOOP iterates is unknown at compile
1406 time, an epilog loop will be generated, and the loop induction variables
1407 (IVs) will be "advanced" to the value they are supposed to take just before
1408 the epilog loop. Here we check that the access function of the loop IVs
1409 and the expression that represents the loop bound are simple enough.
1410 These restrictions will be relaxed in the future. */
1411
1412 bool
1413 vect_can_advance_ivs_p (loop_vec_info loop_vinfo)
1414 {
1415 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1416 basic_block bb = loop->header;
1417 gimple phi;
1418 gimple_stmt_iterator gsi;
1419
1420 /* Analyze phi functions of the loop header. */
1421
1422 if (dump_enabled_p ())
1423 dump_printf_loc (MSG_NOTE, vect_location, "vect_can_advance_ivs_p:\n");
1424 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1425 {
1426 tree evolution_part;
1427
1428 phi = gsi_stmt (gsi);
1429 if (dump_enabled_p ())
1430 {
1431 dump_printf_loc (MSG_NOTE, vect_location, "Analyze phi: ");
1432 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
1433 dump_printf (MSG_NOTE, "\n");
1434 }
1435
1436 /* Skip virtual phi's. The data dependences that are associated with
1437 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
1438
1439 if (virtual_operand_p (PHI_RESULT (phi)))
1440 {
1441 if (dump_enabled_p ())
1442 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1443 "virtual phi. skip.\n");
1444 continue;
1445 }
1446
1447 /* Skip reduction phis. */
1448
1449 if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (phi)) == vect_reduction_def)
1450 {
1451 if (dump_enabled_p ())
1452 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1453 "reduc phi. skip.\n");
1454 continue;
1455 }
1456
1457 /* Analyze the evolution function. */
1458
1459 evolution_part
1460 = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (vinfo_for_stmt (phi));
1461 if (evolution_part == NULL_TREE)
1462 {
1463 if (dump_enabled_p ())
1464 dump_printf (MSG_MISSED_OPTIMIZATION,
1465 "No access function or evolution.\n");
1466 return false;
1467 }
1468
1469 /* FORNOW: We do not transform initial conditions of IVs
1470 which evolution functions are a polynomial of degree >= 2. */
1471
1472 if (tree_is_chrec (evolution_part))
1473 return false;
1474 }
1475
1476 return true;
1477 }
1478
1479
1480 /* Function vect_update_ivs_after_vectorizer.
1481
1482 "Advance" the induction variables of LOOP to the value they should take
1483 after the execution of LOOP. This is currently necessary because the
1484 vectorizer does not handle induction variables that are used after the
1485 loop. Such a situation occurs when the last iterations of LOOP are
1486 peeled, because:
1487 1. We introduced new uses after LOOP for IVs that were not originally used
1488 after LOOP: the IVs of LOOP are now used by an epilog loop.
1489 2. LOOP is going to be vectorized; this means that it will iterate N/VF
1490 times, whereas the loop IVs should be bumped N times.
1491
1492 Input:
1493 - LOOP - a loop that is going to be vectorized. The last few iterations
1494 of LOOP were peeled.
1495 - NITERS - the number of iterations that LOOP executes (before it is
1496 vectorized). i.e, the number of times the ivs should be bumped.
1497 - UPDATE_E - a successor edge of LOOP->exit that is on the (only) path
1498 coming out from LOOP on which there are uses of the LOOP ivs
1499 (this is the path from LOOP->exit to epilog_loop->preheader).
1500
1501 The new definitions of the ivs are placed in LOOP->exit.
1502 The phi args associated with the edge UPDATE_E in the bb
1503 UPDATE_E->dest are updated accordingly.
1504
1505 Assumption 1: Like the rest of the vectorizer, this function assumes
1506 a single loop exit that has a single predecessor.
1507
1508 Assumption 2: The phi nodes in the LOOP header and in update_bb are
1509 organized in the same order.
1510
1511 Assumption 3: The access function of the ivs is simple enough (see
1512 vect_can_advance_ivs_p). This assumption will be relaxed in the future.
1513
1514 Assumption 4: Exactly one of the successors of LOOP exit-bb is on a path
1515 coming out of LOOP on which the ivs of LOOP are used (this is the path
1516 that leads to the epilog loop; other paths skip the epilog loop). This
1517 path starts with the edge UPDATE_E, and its destination (denoted update_bb)
1518 needs to have its phis updated.
1519 */
1520
1521 static void
1522 vect_update_ivs_after_vectorizer (loop_vec_info loop_vinfo, tree niters,
1523 edge update_e)
1524 {
1525 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1526 basic_block exit_bb = single_exit (loop)->dest;
1527 gimple phi, phi1;
1528 gimple_stmt_iterator gsi, gsi1;
1529 basic_block update_bb = update_e->dest;
1530
1531 gcc_checking_assert (vect_can_advance_ivs_p (loop_vinfo));
1532
1533 /* Make sure there exists a single-predecessor exit bb: */
1534 gcc_assert (single_pred_p (exit_bb));
1535
1536 for (gsi = gsi_start_phis (loop->header), gsi1 = gsi_start_phis (update_bb);
1537 !gsi_end_p (gsi) && !gsi_end_p (gsi1);
1538 gsi_next (&gsi), gsi_next (&gsi1))
1539 {
1540 tree init_expr;
1541 tree step_expr, off;
1542 tree type;
1543 tree var, ni, ni_name;
1544 gimple_stmt_iterator last_gsi;
1545 stmt_vec_info stmt_info;
1546
1547 phi = gsi_stmt (gsi);
1548 phi1 = gsi_stmt (gsi1);
1549 if (dump_enabled_p ())
1550 {
1551 dump_printf_loc (MSG_NOTE, vect_location,
1552 "vect_update_ivs_after_vectorizer: phi: ");
1553 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
1554 dump_printf (MSG_NOTE, "\n");
1555 }
1556
1557 /* Skip virtual phi's. */
1558 if (virtual_operand_p (PHI_RESULT (phi)))
1559 {
1560 if (dump_enabled_p ())
1561 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1562 "virtual phi. skip.\n");
1563 continue;
1564 }
1565
1566 /* Skip reduction phis. */
1567 stmt_info = vinfo_for_stmt (phi);
1568 if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def)
1569 {
1570 if (dump_enabled_p ())
1571 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1572 "reduc phi. skip.\n");
1573 continue;
1574 }
1575
1576 type = TREE_TYPE (gimple_phi_result (phi));
1577 step_expr = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_info);
1578 step_expr = unshare_expr (step_expr);
1579
1580 /* FORNOW: We do not support IVs whose evolution function is a polynomial
1581 of degree >= 2 or exponential. */
1582 gcc_assert (!tree_is_chrec (step_expr));
1583
1584 init_expr = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1585
1586 off = fold_build2 (MULT_EXPR, TREE_TYPE (step_expr),
1587 fold_convert (TREE_TYPE (step_expr), niters),
1588 step_expr);
1589 if (POINTER_TYPE_P (type))
1590 ni = fold_build_pointer_plus (init_expr, off);
1591 else
1592 ni = fold_build2 (PLUS_EXPR, type,
1593 init_expr, fold_convert (type, off));
1594
1595 var = create_tmp_var (type, "tmp");
1596
1597 last_gsi = gsi_last_bb (exit_bb);
1598 ni_name = force_gimple_operand_gsi (&last_gsi, ni, false, var,
1599 true, GSI_SAME_STMT);
1600
1601 /* Fix phi expressions in the successor bb. */
1602 adjust_phi_and_debug_stmts (phi1, update_e, ni_name);
1603 }
1604 }
1605
1606 /* Function vect_do_peeling_for_loop_bound
1607
1608 Peel the last iterations of the loop represented by LOOP_VINFO.
1609 The peeled iterations form a new epilog loop. Given that the loop now
1610 iterates NITERS times, the new epilog loop iterates
1611 NITERS % VECTORIZATION_FACTOR times.
1612
1613 The original loop will later be made to iterate
1614 NITERS / VECTORIZATION_FACTOR times (this value is placed into RATIO).
1615
1616 COND_EXPR and COND_EXPR_STMT_LIST are combined with a new generated
1617 test. */
1618
1619 void
1620 vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo,
1621 tree ni_name, tree ratio_mult_vf_name,
1622 unsigned int th, bool check_profitability)
1623 {
1624 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1625 struct loop *new_loop;
1626 edge update_e;
1627 basic_block preheader;
1628 int loop_num;
1629 int max_iter;
1630 tree cond_expr = NULL_TREE;
1631 gimple_seq cond_expr_stmt_list = NULL;
1632
1633 if (dump_enabled_p ())
1634 dump_printf_loc (MSG_NOTE, vect_location,
1635 "=== vect_do_peeling_for_loop_bound ===\n");
1636
1637 initialize_original_copy_tables ();
1638
1639 loop_num = loop->num;
1640
1641 new_loop = slpeel_tree_peel_loop_to_edge (loop, single_exit (loop),
1642 &ratio_mult_vf_name, ni_name, false,
1643 th, check_profitability,
1644 cond_expr, cond_expr_stmt_list,
1645 0, LOOP_VINFO_VECT_FACTOR (loop_vinfo));
1646 gcc_assert (new_loop);
1647 gcc_assert (loop_num == loop->num);
1648 #ifdef ENABLE_CHECKING
1649 slpeel_verify_cfg_after_peeling (loop, new_loop);
1650 #endif
1651
1652 /* A guard that controls whether the new_loop is to be executed or skipped
1653 is placed in LOOP->exit. LOOP->exit therefore has two successors - one
1654 is the preheader of NEW_LOOP, where the IVs from LOOP are used. The other
1655 is a bb after NEW_LOOP, where these IVs are not used. Find the edge that
1656 is on the path where the LOOP IVs are used and need to be updated. */
1657
1658 preheader = loop_preheader_edge (new_loop)->src;
1659 if (EDGE_PRED (preheader, 0)->src == single_exit (loop)->dest)
1660 update_e = EDGE_PRED (preheader, 0);
1661 else
1662 update_e = EDGE_PRED (preheader, 1);
1663
1664 /* Update IVs of original loop as if they were advanced
1665 by ratio_mult_vf_name steps. */
1666 vect_update_ivs_after_vectorizer (loop_vinfo, ratio_mult_vf_name, update_e);
1667
1668 /* For vectorization factor N, we need to copy last N-1 values in epilogue
1669 and this means N-2 loopback edge executions.
1670
1671 PEELING_FOR_GAPS works by subtracting last iteration and thus the epilogue
1672 will execute at least LOOP_VINFO_VECT_FACTOR times. */
1673 max_iter = (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
1674 ? LOOP_VINFO_VECT_FACTOR (loop_vinfo) * 2
1675 : LOOP_VINFO_VECT_FACTOR (loop_vinfo)) - 2;
1676 if (check_profitability)
1677 max_iter = MAX (max_iter, (int) th - 1);
1678 record_niter_bound (new_loop, double_int::from_shwi (max_iter), false, true);
1679 dump_printf (MSG_NOTE,
1680 "Setting upper bound of nb iterations for epilogue "
1681 "loop to %d\n", max_iter);
1682
1683 /* After peeling we have to reset scalar evolution analyzer. */
1684 scev_reset ();
1685
1686 free_original_copy_tables ();
1687 }
1688
1689
1690 /* Function vect_gen_niters_for_prolog_loop
1691
1692 Set the number of iterations for the loop represented by LOOP_VINFO
1693 to the minimum between LOOP_NITERS (the original iteration count of the loop)
1694 and the misalignment of DR - the data reference recorded in
1695 LOOP_VINFO_UNALIGNED_DR (LOOP_VINFO). As a result, after the execution of
1696 this loop, the data reference DR will refer to an aligned location.
1697
1698 The following computation is generated:
1699
1700 If the misalignment of DR is known at compile time:
1701 addr_mis = int mis = DR_MISALIGNMENT (dr);
1702 Else, compute address misalignment in bytes:
1703 addr_mis = addr & (vectype_align - 1)
1704
1705 prolog_niters = min (LOOP_NITERS, ((VF - addr_mis/elem_size)&(VF-1))/step)
1706
1707 (elem_size = element type size; an element is the scalar element whose type
1708 is the inner type of the vectype)
1709
1710 When the step of the data-ref in the loop is not 1 (as in interleaved data
1711 and SLP), the number of iterations of the prolog must be divided by the step
1712 (which is equal to the size of interleaved group).
1713
1714 The above formulas assume that VF == number of elements in the vector. This
1715 may not hold when there are multiple-types in the loop.
1716 In this case, for some data-references in the loop the VF does not represent
1717 the number of elements that fit in the vector. Therefore, instead of VF we
1718 use TYPE_VECTOR_SUBPARTS. */
1719
1720 static tree
1721 vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters, int *bound)
1722 {
1723 struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
1724 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1725 tree var;
1726 gimple_seq stmts;
1727 tree iters, iters_name;
1728 edge pe;
1729 basic_block new_bb;
1730 gimple dr_stmt = DR_STMT (dr);
1731 stmt_vec_info stmt_info = vinfo_for_stmt (dr_stmt);
1732 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1733 int vectype_align = TYPE_ALIGN (vectype) / BITS_PER_UNIT;
1734 tree niters_type = TREE_TYPE (loop_niters);
1735 int nelements = TYPE_VECTOR_SUBPARTS (vectype);
1736
1737 pe = loop_preheader_edge (loop);
1738
1739 if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0)
1740 {
1741 int npeel = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
1742
1743 if (dump_enabled_p ())
1744 dump_printf_loc (MSG_NOTE, vect_location,
1745 "known peeling = %d.\n", npeel);
1746
1747 iters = build_int_cst (niters_type, npeel);
1748 *bound = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
1749 }
1750 else
1751 {
1752 gimple_seq new_stmts = NULL;
1753 bool negative = tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0;
1754 tree offset = negative
1755 ? size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1) : NULL_TREE;
1756 tree start_addr = vect_create_addr_base_for_vector_ref (dr_stmt,
1757 &new_stmts, offset, loop);
1758 tree type = unsigned_type_for (TREE_TYPE (start_addr));
1759 tree vectype_align_minus_1 = build_int_cst (type, vectype_align - 1);
1760 HOST_WIDE_INT elem_size =
1761 int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
1762 tree elem_size_log = build_int_cst (type, exact_log2 (elem_size));
1763 tree nelements_minus_1 = build_int_cst (type, nelements - 1);
1764 tree nelements_tree = build_int_cst (type, nelements);
1765 tree byte_misalign;
1766 tree elem_misalign;
1767
1768 new_bb = gsi_insert_seq_on_edge_immediate (pe, new_stmts);
1769 gcc_assert (!new_bb);
1770
1771 /* Create: byte_misalign = addr & (vectype_align - 1) */
1772 byte_misalign =
1773 fold_build2 (BIT_AND_EXPR, type, fold_convert (type, start_addr),
1774 vectype_align_minus_1);
1775
1776 /* Create: elem_misalign = byte_misalign / element_size */
1777 elem_misalign =
1778 fold_build2 (RSHIFT_EXPR, type, byte_misalign, elem_size_log);
1779
1780 /* Create: (niters_type) (nelements - elem_misalign)&(nelements - 1) */
1781 if (negative)
1782 iters = fold_build2 (MINUS_EXPR, type, elem_misalign, nelements_tree);
1783 else
1784 iters = fold_build2 (MINUS_EXPR, type, nelements_tree, elem_misalign);
1785 iters = fold_build2 (BIT_AND_EXPR, type, iters, nelements_minus_1);
1786 iters = fold_convert (niters_type, iters);
1787 *bound = nelements;
1788 }
1789
1790 /* Create: prolog_loop_niters = min (iters, loop_niters) */
1791 /* If the loop bound is known at compile time we already verified that it is
1792 greater than vf; since the misalignment ('iters') is at most vf, there's
1793 no need to generate the MIN_EXPR in this case. */
1794 if (TREE_CODE (loop_niters) != INTEGER_CST)
1795 iters = fold_build2 (MIN_EXPR, niters_type, iters, loop_niters);
1796
1797 if (dump_enabled_p ())
1798 {
1799 dump_printf_loc (MSG_NOTE, vect_location,
1800 "niters for prolog loop: ");
1801 dump_generic_expr (MSG_NOTE, TDF_SLIM, iters);
1802 dump_printf (MSG_NOTE, "\n");
1803 }
1804
1805 var = create_tmp_var (niters_type, "prolog_loop_niters");
1806 stmts = NULL;
1807 iters_name = force_gimple_operand (iters, &stmts, false, var);
1808
1809 /* Insert stmt on loop preheader edge. */
1810 if (stmts)
1811 {
1812 basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
1813 gcc_assert (!new_bb);
1814 }
1815
1816 return iters_name;
1817 }
1818
1819
1820 /* Function vect_update_init_of_dr
1821
1822 NITERS iterations were peeled from LOOP. DR represents a data reference
1823 in LOOP. This function updates the information recorded in DR to
1824 account for the fact that the first NITERS iterations had already been
1825 executed. Specifically, it updates the OFFSET field of DR. */
1826
1827 static void
1828 vect_update_init_of_dr (struct data_reference *dr, tree niters)
1829 {
1830 tree offset = DR_OFFSET (dr);
1831
1832 niters = fold_build2 (MULT_EXPR, sizetype,
1833 fold_convert (sizetype, niters),
1834 fold_convert (sizetype, DR_STEP (dr)));
1835 offset = fold_build2 (PLUS_EXPR, sizetype,
1836 fold_convert (sizetype, offset), niters);
1837 DR_OFFSET (dr) = offset;
1838 }
1839
1840
1841 /* Function vect_update_inits_of_drs
1842
1843 NITERS iterations were peeled from the loop represented by LOOP_VINFO.
1844 This function updates the information recorded for the data references in
1845 the loop to account for the fact that the first NITERS iterations had
1846 already been executed. Specifically, it updates the initial_condition of
1847 the access_function of all the data_references in the loop. */
1848
1849 static void
1850 vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters)
1851 {
1852 unsigned int i;
1853 vec<data_reference_p> datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1854 struct data_reference *dr;
1855
1856 if (dump_enabled_p ())
1857 dump_printf_loc (MSG_NOTE, vect_location,
1858 "=== vect_update_inits_of_dr ===\n");
1859
1860 FOR_EACH_VEC_ELT (datarefs, i, dr)
1861 vect_update_init_of_dr (dr, niters);
1862 }
1863
1864
1865 /* Function vect_do_peeling_for_alignment
1866
1867 Peel the first 'niters' iterations of the loop represented by LOOP_VINFO.
1868 'niters' is set to the misalignment of one of the data references in the
1869 loop, thereby forcing it to refer to an aligned location at the beginning
1870 of the execution of this loop. The data reference for which we are
1871 peeling is recorded in LOOP_VINFO_UNALIGNED_DR. */
1872
1873 void
1874 vect_do_peeling_for_alignment (loop_vec_info loop_vinfo, tree ni_name,
1875 unsigned int th, bool check_profitability)
1876 {
1877 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1878 tree niters_of_prolog_loop;
1879 tree wide_prolog_niters;
1880 struct loop *new_loop;
1881 int max_iter;
1882 int bound = 0;
1883
1884 if (dump_enabled_p ())
1885 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
1886 "loop peeled for vectorization to enhance"
1887 " alignment\n");
1888
1889 initialize_original_copy_tables ();
1890
1891 gimple_seq stmts = NULL;
1892 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
1893 niters_of_prolog_loop = vect_gen_niters_for_prolog_loop (loop_vinfo,
1894 ni_name,
1895 &bound);
1896
1897 /* Peel the prolog loop and iterate it niters_of_prolog_loop. */
1898 new_loop =
1899 slpeel_tree_peel_loop_to_edge (loop, loop_preheader_edge (loop),
1900 &niters_of_prolog_loop, ni_name, true,
1901 th, check_profitability, NULL_TREE, NULL,
1902 bound,
1903 0);
1904
1905 gcc_assert (new_loop);
1906 #ifdef ENABLE_CHECKING
1907 slpeel_verify_cfg_after_peeling (new_loop, loop);
1908 #endif
1909 /* For vectorization factor N, we need to copy at most N-1 values
1910 for alignment and this means N-2 loopback edge executions. */
1911 max_iter = LOOP_VINFO_VECT_FACTOR (loop_vinfo) - 2;
1912 if (check_profitability)
1913 max_iter = MAX (max_iter, (int) th - 1);
1914 record_niter_bound (new_loop, double_int::from_shwi (max_iter), false, true);
1915 dump_printf (MSG_NOTE,
1916 "Setting upper bound of nb iterations for prologue "
1917 "loop to %d\n", max_iter);
1918
1919 /* Update number of times loop executes. */
1920 LOOP_VINFO_NITERS (loop_vinfo) = fold_build2 (MINUS_EXPR,
1921 TREE_TYPE (ni_name), ni_name, niters_of_prolog_loop);
1922
1923 if (types_compatible_p (sizetype, TREE_TYPE (niters_of_prolog_loop)))
1924 wide_prolog_niters = niters_of_prolog_loop;
1925 else
1926 {
1927 gimple_seq seq = NULL;
1928 edge pe = loop_preheader_edge (loop);
1929 tree wide_iters = fold_convert (sizetype, niters_of_prolog_loop);
1930 tree var = create_tmp_var (sizetype, "prolog_loop_adjusted_niters");
1931 wide_prolog_niters = force_gimple_operand (wide_iters, &seq, false,
1932 var);
1933 if (seq)
1934 {
1935 /* Insert stmt on loop preheader edge. */
1936 basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
1937 gcc_assert (!new_bb);
1938 }
1939 }
1940
1941 /* Update the init conditions of the access functions of all data refs. */
1942 vect_update_inits_of_drs (loop_vinfo, wide_prolog_niters);
1943
1944 /* After peeling we have to reset scalar evolution analyzer. */
1945 scev_reset ();
1946
1947 free_original_copy_tables ();
1948 }
1949
1950
1951 /* Function vect_create_cond_for_align_checks.
1952
1953 Create a conditional expression that represents the alignment checks for
1954 all of data references (array element references) whose alignment must be
1955 checked at runtime.
1956
1957 Input:
1958 COND_EXPR - input conditional expression. New conditions will be chained
1959 with logical AND operation.
1960 LOOP_VINFO - two fields of the loop information are used.
1961 LOOP_VINFO_PTR_MASK is the mask used to check the alignment.
1962 LOOP_VINFO_MAY_MISALIGN_STMTS contains the refs to be checked.
1963
1964 Output:
1965 COND_EXPR_STMT_LIST - statements needed to construct the conditional
1966 expression.
1967 The returned value is the conditional expression to be used in the if
1968 statement that controls which version of the loop gets executed at runtime.
1969
1970 The algorithm makes two assumptions:
1971 1) The number of bytes "n" in a vector is a power of 2.
1972 2) An address "a" is aligned if a%n is zero and that this
1973 test can be done as a&(n-1) == 0. For example, for 16
1974 byte vectors the test is a&0xf == 0. */
1975
1976 static void
1977 vect_create_cond_for_align_checks (loop_vec_info loop_vinfo,
1978 tree *cond_expr,
1979 gimple_seq *cond_expr_stmt_list)
1980 {
1981 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1982 vec<gimple> may_misalign_stmts
1983 = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
1984 gimple ref_stmt;
1985 int mask = LOOP_VINFO_PTR_MASK (loop_vinfo);
1986 tree mask_cst;
1987 unsigned int i;
1988 tree int_ptrsize_type;
1989 char tmp_name[20];
1990 tree or_tmp_name = NULL_TREE;
1991 tree and_tmp_name;
1992 gimple and_stmt;
1993 tree ptrsize_zero;
1994 tree part_cond_expr;
1995
1996 /* Check that mask is one less than a power of 2, i.e., mask is
1997 all zeros followed by all ones. */
1998 gcc_assert ((mask != 0) && ((mask & (mask+1)) == 0));
1999
2000 int_ptrsize_type = signed_type_for (ptr_type_node);
2001
2002 /* Create expression (mask & (dr_1 || ... || dr_n)) where dr_i is the address
2003 of the first vector of the i'th data reference. */
2004
2005 FOR_EACH_VEC_ELT (may_misalign_stmts, i, ref_stmt)
2006 {
2007 gimple_seq new_stmt_list = NULL;
2008 tree addr_base;
2009 tree addr_tmp_name;
2010 tree new_or_tmp_name;
2011 gimple addr_stmt, or_stmt;
2012 stmt_vec_info stmt_vinfo = vinfo_for_stmt (ref_stmt);
2013 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
2014 bool negative = tree_int_cst_compare
2015 (DR_STEP (STMT_VINFO_DATA_REF (stmt_vinfo)), size_zero_node) < 0;
2016 tree offset = negative
2017 ? size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1) : NULL_TREE;
2018
2019 /* create: addr_tmp = (int)(address_of_first_vector) */
2020 addr_base =
2021 vect_create_addr_base_for_vector_ref (ref_stmt, &new_stmt_list,
2022 offset, loop);
2023 if (new_stmt_list != NULL)
2024 gimple_seq_add_seq (cond_expr_stmt_list, new_stmt_list);
2025
2026 sprintf (tmp_name, "addr2int%d", i);
2027 addr_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, tmp_name);
2028 addr_stmt = gimple_build_assign_with_ops (NOP_EXPR, addr_tmp_name,
2029 addr_base, NULL_TREE);
2030 gimple_seq_add_stmt (cond_expr_stmt_list, addr_stmt);
2031
2032 /* The addresses are OR together. */
2033
2034 if (or_tmp_name != NULL_TREE)
2035 {
2036 /* create: or_tmp = or_tmp | addr_tmp */
2037 sprintf (tmp_name, "orptrs%d", i);
2038 new_or_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, tmp_name);
2039 or_stmt = gimple_build_assign_with_ops (BIT_IOR_EXPR,
2040 new_or_tmp_name,
2041 or_tmp_name, addr_tmp_name);
2042 gimple_seq_add_stmt (cond_expr_stmt_list, or_stmt);
2043 or_tmp_name = new_or_tmp_name;
2044 }
2045 else
2046 or_tmp_name = addr_tmp_name;
2047
2048 } /* end for i */
2049
2050 mask_cst = build_int_cst (int_ptrsize_type, mask);
2051
2052 /* create: and_tmp = or_tmp & mask */
2053 and_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, "andmask");
2054
2055 and_stmt = gimple_build_assign_with_ops (BIT_AND_EXPR, and_tmp_name,
2056 or_tmp_name, mask_cst);
2057 gimple_seq_add_stmt (cond_expr_stmt_list, and_stmt);
2058
2059 /* Make and_tmp the left operand of the conditional test against zero.
2060 if and_tmp has a nonzero bit then some address is unaligned. */
2061 ptrsize_zero = build_int_cst (int_ptrsize_type, 0);
2062 part_cond_expr = fold_build2 (EQ_EXPR, boolean_type_node,
2063 and_tmp_name, ptrsize_zero);
2064 if (*cond_expr)
2065 *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2066 *cond_expr, part_cond_expr);
2067 else
2068 *cond_expr = part_cond_expr;
2069 }
2070
2071 /* Function vect_create_cond_for_alias_checks.
2072
2073 Create a conditional expression that represents the run-time checks for
2074 overlapping of address ranges represented by a list of data references
2075 relations passed as input.
2076
2077 Input:
2078 COND_EXPR - input conditional expression. New conditions will be chained
2079 with logical AND operation. If it is NULL, then the function
2080 is used to return the number of alias checks.
2081 LOOP_VINFO - field LOOP_VINFO_MAY_ALIAS_STMTS contains the list of ddrs
2082 to be checked.
2083
2084 Output:
2085 COND_EXPR - conditional expression.
2086
2087 The returned COND_EXPR is the conditional expression to be used in the if
2088 statement that controls which version of the loop gets executed at runtime.
2089 */
2090
2091 void
2092 vect_create_cond_for_alias_checks (loop_vec_info loop_vinfo, tree * cond_expr)
2093 {
2094 vec<dr_with_seg_len_pair_t> comp_alias_ddrs =
2095 LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo);
2096 tree part_cond_expr;
2097
2098 /* Create expression
2099 ((store_ptr_0 + store_segment_length_0) <= load_ptr_0)
2100 || (load_ptr_0 + load_segment_length_0) <= store_ptr_0))
2101 &&
2102 ...
2103 &&
2104 ((store_ptr_n + store_segment_length_n) <= load_ptr_n)
2105 || (load_ptr_n + load_segment_length_n) <= store_ptr_n)) */
2106
2107 if (comp_alias_ddrs.is_empty ())
2108 return;
2109
2110 for (size_t i = 0, s = comp_alias_ddrs.length (); i < s; ++i)
2111 {
2112 const dr_with_seg_len& dr_a = comp_alias_ddrs[i].first;
2113 const dr_with_seg_len& dr_b = comp_alias_ddrs[i].second;
2114 tree segment_length_a = dr_a.seg_len;
2115 tree segment_length_b = dr_b.seg_len;
2116
2117 tree addr_base_a
2118 = fold_build_pointer_plus (DR_BASE_ADDRESS (dr_a.dr), dr_a.offset);
2119 tree addr_base_b
2120 = fold_build_pointer_plus (DR_BASE_ADDRESS (dr_b.dr), dr_b.offset);
2121
2122 if (dump_enabled_p ())
2123 {
2124 dump_printf_loc (MSG_NOTE, vect_location,
2125 "create runtime check for data references ");
2126 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_a.dr));
2127 dump_printf (MSG_NOTE, " and ");
2128 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_b.dr));
2129 dump_printf (MSG_NOTE, "\n");
2130 }
2131
2132 tree seg_a_min = addr_base_a;
2133 tree seg_a_max = fold_build_pointer_plus (addr_base_a, segment_length_a);
2134 if (tree_int_cst_compare (DR_STEP (dr_a.dr), size_zero_node) < 0)
2135 seg_a_min = seg_a_max, seg_a_max = addr_base_a;
2136
2137 tree seg_b_min = addr_base_b;
2138 tree seg_b_max = fold_build_pointer_plus (addr_base_b, segment_length_b);
2139 if (tree_int_cst_compare (DR_STEP (dr_b.dr), size_zero_node) < 0)
2140 seg_b_min = seg_b_max, seg_b_max = addr_base_b;
2141
2142 part_cond_expr =
2143 fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
2144 fold_build2 (LE_EXPR, boolean_type_node, seg_a_max, seg_b_min),
2145 fold_build2 (LE_EXPR, boolean_type_node, seg_b_max, seg_a_min));
2146
2147 if (*cond_expr)
2148 *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2149 *cond_expr, part_cond_expr);
2150 else
2151 *cond_expr = part_cond_expr;
2152 }
2153
2154 if (dump_enabled_p ())
2155 dump_printf_loc (MSG_NOTE, vect_location,
2156 "created %u versioning for alias checks.\n",
2157 comp_alias_ddrs.length ());
2158
2159 comp_alias_ddrs.release ();
2160 }
2161
2162
2163 /* Function vect_loop_versioning.
2164
2165 If the loop has data references that may or may not be aligned or/and
2166 has data reference relations whose independence was not proven then
2167 two versions of the loop need to be generated, one which is vectorized
2168 and one which isn't. A test is then generated to control which of the
2169 loops is executed. The test checks for the alignment of all of the
2170 data references that may or may not be aligned. An additional
2171 sequence of runtime tests is generated for each pairs of DDRs whose
2172 independence was not proven. The vectorized version of loop is
2173 executed only if both alias and alignment tests are passed.
2174
2175 The test generated to check which version of loop is executed
2176 is modified to also check for profitability as indicated by the
2177 cost model initially.
2178
2179 The versioning precondition(s) are placed in *COND_EXPR and
2180 *COND_EXPR_STMT_LIST. */
2181
2182 void
2183 vect_loop_versioning (loop_vec_info loop_vinfo,
2184 unsigned int th, bool check_profitability)
2185 {
2186 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2187 basic_block condition_bb;
2188 gimple_stmt_iterator gsi, cond_exp_gsi;
2189 basic_block merge_bb;
2190 basic_block new_exit_bb;
2191 edge new_exit_e, e;
2192 gimple orig_phi, new_phi;
2193 tree cond_expr = NULL_TREE;
2194 gimple_seq cond_expr_stmt_list = NULL;
2195 tree arg;
2196 unsigned prob = 4 * REG_BR_PROB_BASE / 5;
2197 gimple_seq gimplify_stmt_list = NULL;
2198 tree scalar_loop_iters = LOOP_VINFO_NITERS (loop_vinfo);
2199 bool version_align = LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo);
2200 bool version_alias = LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo);
2201
2202 if (check_profitability)
2203 {
2204 cond_expr = fold_build2 (GT_EXPR, boolean_type_node, scalar_loop_iters,
2205 build_int_cst (TREE_TYPE (scalar_loop_iters), th));
2206 cond_expr = force_gimple_operand_1 (cond_expr, &cond_expr_stmt_list,
2207 is_gimple_condexpr, NULL_TREE);
2208 }
2209
2210 if (version_align)
2211 vect_create_cond_for_align_checks (loop_vinfo, &cond_expr,
2212 &cond_expr_stmt_list);
2213
2214 if (version_alias)
2215 vect_create_cond_for_alias_checks (loop_vinfo, &cond_expr);
2216
2217 cond_expr = force_gimple_operand_1 (cond_expr, &gimplify_stmt_list,
2218 is_gimple_condexpr, NULL_TREE);
2219 gimple_seq_add_seq (&cond_expr_stmt_list, gimplify_stmt_list);
2220
2221 initialize_original_copy_tables ();
2222 loop_version (loop, cond_expr, &condition_bb,
2223 prob, prob, REG_BR_PROB_BASE - prob, true);
2224
2225 if (LOCATION_LOCUS (vect_location) != UNKNOWN_LOCATION
2226 && dump_enabled_p ())
2227 {
2228 if (version_alias)
2229 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
2230 "loop versioned for vectorization because of "
2231 "possible aliasing\n");
2232 if (version_align)
2233 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
2234 "loop versioned for vectorization to enhance "
2235 "alignment\n");
2236
2237 }
2238 free_original_copy_tables ();
2239
2240 /* Loop versioning violates an assumption we try to maintain during
2241 vectorization - that the loop exit block has a single predecessor.
2242 After versioning, the exit block of both loop versions is the same
2243 basic block (i.e. it has two predecessors). Just in order to simplify
2244 following transformations in the vectorizer, we fix this situation
2245 here by adding a new (empty) block on the exit-edge of the loop,
2246 with the proper loop-exit phis to maintain loop-closed-form. */
2247
2248 merge_bb = single_exit (loop)->dest;
2249 gcc_assert (EDGE_COUNT (merge_bb->preds) == 2);
2250 new_exit_bb = split_edge (single_exit (loop));
2251 new_exit_e = single_exit (loop);
2252 e = EDGE_SUCC (new_exit_bb, 0);
2253
2254 for (gsi = gsi_start_phis (merge_bb); !gsi_end_p (gsi); gsi_next (&gsi))
2255 {
2256 tree new_res;
2257 orig_phi = gsi_stmt (gsi);
2258 new_res = copy_ssa_name (PHI_RESULT (orig_phi), NULL);
2259 new_phi = create_phi_node (new_res, new_exit_bb);
2260 arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
2261 add_phi_arg (new_phi, arg, new_exit_e,
2262 gimple_phi_arg_location_from_edge (orig_phi, e));
2263 adjust_phi_and_debug_stmts (orig_phi, e, PHI_RESULT (new_phi));
2264 }
2265
2266
2267 /* Extract load statements on memrefs with zero-stride accesses. */
2268
2269 if (LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo))
2270 {
2271 /* In the loop body, we iterate each statement to check if it is a load.
2272 Then we check the DR_STEP of the data reference. If DR_STEP is zero,
2273 then we will hoist the load statement to the loop preheader. */
2274
2275 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
2276 int nbbs = loop->num_nodes;
2277
2278 for (int i = 0; i < nbbs; ++i)
2279 {
2280 for (gimple_stmt_iterator si = gsi_start_bb (bbs[i]);
2281 !gsi_end_p (si);)
2282 {
2283 gimple stmt = gsi_stmt (si);
2284 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2285 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2286
2287 if (is_gimple_assign (stmt)
2288 && (!dr
2289 || (DR_IS_READ (dr) && integer_zerop (DR_STEP (dr)))))
2290 {
2291 bool hoist = true;
2292 ssa_op_iter iter;
2293 tree var;
2294
2295 /* We hoist a statement if all SSA uses in it are defined
2296 outside of the loop. */
2297 FOR_EACH_SSA_TREE_OPERAND (var, stmt, iter, SSA_OP_USE)
2298 {
2299 gimple def = SSA_NAME_DEF_STMT (var);
2300 if (!gimple_nop_p (def)
2301 && flow_bb_inside_loop_p (loop, gimple_bb (def)))
2302 {
2303 hoist = false;
2304 break;
2305 }
2306 }
2307
2308 if (hoist)
2309 {
2310 if (dr)
2311 gimple_set_vuse (stmt, NULL);
2312
2313 gsi_remove (&si, false);
2314 gsi_insert_on_edge_immediate (loop_preheader_edge (loop),
2315 stmt);
2316
2317 if (dump_enabled_p ())
2318 {
2319 dump_printf_loc
2320 (MSG_NOTE, vect_location,
2321 "hoisting out of the vectorized loop: ");
2322 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
2323 dump_printf (MSG_NOTE, "\n");
2324 }
2325 continue;
2326 }
2327 }
2328 gsi_next (&si);
2329 }
2330 }
2331 }
2332
2333 /* End loop-exit-fixes after versioning. */
2334
2335 if (cond_expr_stmt_list)
2336 {
2337 cond_exp_gsi = gsi_last_bb (condition_bb);
2338 gsi_insert_seq_before (&cond_exp_gsi, cond_expr_stmt_list,
2339 GSI_SAME_STMT);
2340 }
2341 update_ssa (TODO_update_ssa);
2342 }