real.h (struct real_format): Split the signbit field into two two fields, signbit_ro...
[gcc.git] / gcc / tree-vectorizer.c
1 /* Loop Vectorization
2 Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
10 version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to the Free
19 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
20 02111-1307, USA. */
21
22 /* Loop Vectorization Pass.
23
24 This pass tries to vectorize loops. This first implementation focuses on
25 simple inner-most loops, with no conditional control flow, and a set of
26 simple operations which vector form can be expressed using existing
27 tree codes (PLUS, MULT etc).
28
29 For example, the vectorizer transforms the following simple loop:
30
31 short a[N]; short b[N]; short c[N]; int i;
32
33 for (i=0; i<N; i++){
34 a[i] = b[i] + c[i];
35 }
36
37 as if it was manually vectorized by rewriting the source code into:
38
39 typedef int __attribute__((mode(V8HI))) v8hi;
40 short a[N]; short b[N]; short c[N]; int i;
41 v8hi *pa = (v8hi*)a, *pb = (v8hi*)b, *pc = (v8hi*)c;
42 v8hi va, vb, vc;
43
44 for (i=0; i<N/8; i++){
45 vb = pb[i];
46 vc = pc[i];
47 va = vb + vc;
48 pa[i] = va;
49 }
50
51 The main entry to this pass is vectorize_loops(), in which
52 the vectorizer applies a set of analyses on a given set of loops,
53 followed by the actual vectorization transformation for the loops that
54 had successfully passed the analysis phase.
55
56 Throughout this pass we make a distinction between two types of
57 data: scalars (which are represented by SSA_NAMES), and memory references
58 ("data-refs"). These two types of data require different handling both
59 during analysis and transformation. The types of data-refs that the
60 vectorizer currently supports are ARRAY_REFS which base is an array DECL
61 (not a pointer), and INDIRECT_REFS through pointers; both array and pointer
62 accesses are required to have a simple (consecutive) access pattern.
63
64 Analysis phase:
65 ===============
66 The driver for the analysis phase is vect_analyze_loop_nest().
67 It applies a set of analyses, some of which rely on the scalar evolution
68 analyzer (scev) developed by Sebastian Pop.
69
70 During the analysis phase the vectorizer records some information
71 per stmt in a "stmt_vec_info" struct which is attached to each stmt in the
72 loop, as well as general information about the loop as a whole, which is
73 recorded in a "loop_vec_info" struct attached to each loop.
74
75 Transformation phase:
76 =====================
77 The loop transformation phase scans all the stmts in the loop, and
78 creates a vector stmt (or a sequence of stmts) for each scalar stmt S in
79 the loop that needs to be vectorized. It insert the vector code sequence
80 just before the scalar stmt S, and records a pointer to the vector code
81 in STMT_VINFO_VEC_STMT (stmt_info) (stmt_info is the stmt_vec_info struct
82 attached to S). This pointer will be used for the vectorization of following
83 stmts which use the def of stmt S. Stmt S is removed if it writes to memory;
84 otherwise, we rely on dead code elimination for removing it.
85
86 For example, say stmt S1 was vectorized into stmt VS1:
87
88 VS1: vb = px[i];
89 S1: b = x[i]; STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
90 S2: a = b;
91
92 To vectorize stmt S2, the vectorizer first finds the stmt that defines
93 the operand 'b' (S1), and gets the relevant vector def 'vb' from the
94 vector stmt VS1 pointed by STMT_VINFO_VEC_STMT (stmt_info (S1)). The
95 resulting sequence would be:
96
97 VS1: vb = px[i];
98 S1: b = x[i]; STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
99 VS2: va = vb;
100 S2: a = b; STMT_VINFO_VEC_STMT (stmt_info (S2)) = VS2
101
102 Operands that are not SSA_NAMEs, are data-refs that appear in
103 load/store operations (like 'x[i]' in S1), and are handled differently.
104
105 Target modeling:
106 =================
107 Currently the only target specific information that is used is the
108 size of the vector (in bytes) - "UNITS_PER_SIMD_WORD". Targets that can
109 support different sizes of vectors, for now will need to specify one value
110 for "UNITS_PER_SIMD_WORD". More flexibility will be added in the future.
111
112 Since we only vectorize operations which vector form can be
113 expressed using existing tree codes, to verify that an operation is
114 supported, the vectorizer checks the relevant optab at the relevant
115 machine_mode (e.g, add_optab->handlers[(int) V8HImode].insn_code). If
116 the value found is CODE_FOR_nothing, then there's no target support, and
117 we can't vectorize the stmt.
118
119 For additional information on this project see:
120 http://gcc.gnu.org/projects/tree-ssa/vectorization.html
121 */
122
123 #include "config.h"
124 #include "system.h"
125 #include "coretypes.h"
126 #include "tm.h"
127 #include "errors.h"
128 #include "ggc.h"
129 #include "tree.h"
130 #include "target.h"
131 #include "rtl.h"
132 #include "basic-block.h"
133 #include "diagnostic.h"
134 #include "tree-flow.h"
135 #include "tree-dump.h"
136 #include "timevar.h"
137 #include "cfgloop.h"
138 #include "cfglayout.h"
139 #include "expr.h"
140 #include "optabs.h"
141 #include "toplev.h"
142 #include "tree-chrec.h"
143 #include "tree-data-ref.h"
144 #include "tree-scalar-evolution.h"
145 #include "input.h"
146 #include "tree-vectorizer.h"
147 #include "tree-pass.h"
148
149 /*************************************************************************
150 Simple Loop Peeling Utilities
151 *************************************************************************/
152 static struct loop *slpeel_tree_duplicate_loop_to_edge_cfg
153 (struct loop *, struct loops *, edge);
154 static void slpeel_update_phis_for_duplicate_loop
155 (struct loop *, struct loop *, bool after);
156 static void slpeel_update_phi_nodes_for_guard (edge, struct loop *, bool, bool);
157 static edge slpeel_add_loop_guard (basic_block, tree, basic_block, basic_block);
158
159 static void allocate_new_names (bitmap);
160 static void rename_use_op (use_operand_p);
161 static void rename_def_op (def_operand_p, tree);
162 static void rename_variables_in_bb (basic_block);
163 static void free_new_names (bitmap);
164 static void rename_variables_in_loop (struct loop *);
165
166 /*************************************************************************
167 General Vectorization Utilities
168 *************************************************************************/
169 static void vect_set_dump_settings (void);
170 static bool need_imm_uses_for (tree);
171
172 /* vect_dump will be set to stderr or dump_file if exist. */
173 FILE *vect_dump;
174
175 /* vect_verbosity_level set to an invalid value
176 to mark that it's uninitialized. */
177 enum verbosity_levels vect_verbosity_level = MAX_VERBOSITY_LEVEL;
178
179
180 \f
181 /*************************************************************************
182 Simple Loop Peeling Utilities
183
184 Utilities to support loop peeling for vectorization purposes.
185 *************************************************************************/
186
187
188 /* For each definition in DEFINITIONS this function allocates
189 new ssa name. */
190
191 static void
192 allocate_new_names (bitmap definitions)
193 {
194 unsigned ver;
195 bitmap_iterator bi;
196
197 EXECUTE_IF_SET_IN_BITMAP (definitions, 0, ver, bi)
198 {
199 tree def = ssa_name (ver);
200 tree *new_name_ptr = xmalloc (sizeof (tree));
201
202 bool abnormal = SSA_NAME_OCCURS_IN_ABNORMAL_PHI (def);
203
204 *new_name_ptr = duplicate_ssa_name (def, SSA_NAME_DEF_STMT (def));
205 SSA_NAME_OCCURS_IN_ABNORMAL_PHI (*new_name_ptr) = abnormal;
206
207 SSA_NAME_AUX (def) = new_name_ptr;
208 }
209 }
210
211
212 /* Renames the use *OP_P. */
213
214 static void
215 rename_use_op (use_operand_p op_p)
216 {
217 tree *new_name_ptr;
218
219 if (TREE_CODE (USE_FROM_PTR (op_p)) != SSA_NAME)
220 return;
221
222 new_name_ptr = SSA_NAME_AUX (USE_FROM_PTR (op_p));
223
224 /* Something defined outside of the loop. */
225 if (!new_name_ptr)
226 return;
227
228 /* An ordinary ssa name defined in the loop. */
229
230 SET_USE (op_p, *new_name_ptr);
231 }
232
233
234 /* Renames the def *OP_P in statement STMT. */
235
236 static void
237 rename_def_op (def_operand_p op_p, tree stmt)
238 {
239 tree *new_name_ptr;
240
241 if (TREE_CODE (DEF_FROM_PTR (op_p)) != SSA_NAME)
242 return;
243
244 new_name_ptr = SSA_NAME_AUX (DEF_FROM_PTR (op_p));
245
246 /* Something defined outside of the loop. */
247 if (!new_name_ptr)
248 return;
249
250 /* An ordinary ssa name defined in the loop. */
251
252 SET_DEF (op_p, *new_name_ptr);
253 SSA_NAME_DEF_STMT (DEF_FROM_PTR (op_p)) = stmt;
254 }
255
256
257 /* Renames the variables in basic block BB. */
258
259 static void
260 rename_variables_in_bb (basic_block bb)
261 {
262 tree phi;
263 block_stmt_iterator bsi;
264 tree stmt;
265 stmt_ann_t ann;
266 use_optype uses;
267 vuse_optype vuses;
268 def_optype defs;
269 v_may_def_optype v_may_defs;
270 v_must_def_optype v_must_defs;
271 unsigned i;
272 edge e;
273 edge_iterator ei;
274 struct loop *loop = bb->loop_father;
275
276 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
277 rename_def_op (PHI_RESULT_PTR (phi), phi);
278
279 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
280 {
281 stmt = bsi_stmt (bsi);
282 get_stmt_operands (stmt);
283 ann = stmt_ann (stmt);
284
285 uses = USE_OPS (ann);
286 for (i = 0; i < NUM_USES (uses); i++)
287 rename_use_op (USE_OP_PTR (uses, i));
288
289 defs = DEF_OPS (ann);
290 for (i = 0; i < NUM_DEFS (defs); i++)
291 rename_def_op (DEF_OP_PTR (defs, i), stmt);
292
293 vuses = VUSE_OPS (ann);
294 for (i = 0; i < NUM_VUSES (vuses); i++)
295 rename_use_op (VUSE_OP_PTR (vuses, i));
296
297 v_may_defs = V_MAY_DEF_OPS (ann);
298 for (i = 0; i < NUM_V_MAY_DEFS (v_may_defs); i++)
299 {
300 rename_use_op (V_MAY_DEF_OP_PTR (v_may_defs, i));
301 rename_def_op (V_MAY_DEF_RESULT_PTR (v_may_defs, i), stmt);
302 }
303
304 v_must_defs = V_MUST_DEF_OPS (ann);
305 for (i = 0; i < NUM_V_MUST_DEFS (v_must_defs); i++)
306 {
307 rename_use_op (V_MUST_DEF_KILL_PTR (v_must_defs, i));
308 rename_def_op (V_MUST_DEF_RESULT_PTR (v_must_defs, i), stmt);
309 }
310 }
311
312 FOR_EACH_EDGE (e, ei, bb->succs)
313 {
314 if (!flow_bb_inside_loop_p (loop, e->dest))
315 continue;
316 for (phi = phi_nodes (e->dest); phi; phi = PHI_CHAIN (phi))
317 rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (phi, e));
318 }
319 }
320
321
322 /* Releases the structures holding the new ssa names. */
323
324 static void
325 free_new_names (bitmap definitions)
326 {
327 unsigned ver;
328 bitmap_iterator bi;
329
330 EXECUTE_IF_SET_IN_BITMAP (definitions, 0, ver, bi)
331 {
332 tree def = ssa_name (ver);
333
334 if (SSA_NAME_AUX (def))
335 {
336 free (SSA_NAME_AUX (def));
337 SSA_NAME_AUX (def) = NULL;
338 }
339 }
340 }
341
342
343 /* Renames variables in new generated LOOP. */
344
345 static void
346 rename_variables_in_loop (struct loop *loop)
347 {
348 unsigned i;
349 basic_block *bbs;
350
351 bbs = get_loop_body (loop);
352
353 for (i = 0; i < loop->num_nodes; i++)
354 rename_variables_in_bb (bbs[i]);
355
356 free (bbs);
357 }
358
359
360 /* Update the PHI nodes of NEW_LOOP.
361
362 NEW_LOOP is a duplicate of ORIG_LOOP.
363 AFTER indicates whether NEW_LOOP executes before or after ORIG_LOOP:
364 AFTER is true if NEW_LOOP executes after ORIG_LOOP, and false if it
365 executes before it. */
366
367 static void
368 slpeel_update_phis_for_duplicate_loop (struct loop *orig_loop,
369 struct loop *new_loop, bool after)
370 {
371 tree *new_name_ptr, new_ssa_name;
372 tree phi_new, phi_orig;
373 tree def;
374 edge orig_loop_latch = loop_latch_edge (orig_loop);
375 edge orig_entry_e = loop_preheader_edge (orig_loop);
376 edge new_loop_exit_e = new_loop->single_exit;
377 edge new_loop_entry_e = loop_preheader_edge (new_loop);
378 edge entry_arg_e = (after ? orig_loop_latch : orig_entry_e);
379
380 /*
381 step 1. For each loop-header-phi:
382 Add the first phi argument for the phi in NEW_LOOP
383 (the one associated with the entry of NEW_LOOP)
384
385 step 2. For each loop-header-phi:
386 Add the second phi argument for the phi in NEW_LOOP
387 (the one associated with the latch of NEW_LOOP)
388
389 step 3. Update the phis in the successor block of NEW_LOOP.
390
391 case 1: NEW_LOOP was placed before ORIG_LOOP:
392 The successor block of NEW_LOOP is the header of ORIG_LOOP.
393 Updating the phis in the successor block can therefore be done
394 along with the scanning of the loop header phis, because the
395 header blocks of ORIG_LOOP and NEW_LOOP have exactly the same
396 phi nodes, organized in the same order.
397
398 case 2: NEW_LOOP was placed after ORIG_LOOP:
399 The successor block of NEW_LOOP is the original exit block of
400 ORIG_LOOP - the phis to be updated are the loop-closed-ssa phis.
401 We postpone updating these phis to a later stage (when
402 loop guards are added).
403 */
404
405
406 /* Scan the phis in the headers of the old and new loops
407 (they are organized in exactly the same order). */
408
409 for (phi_new = phi_nodes (new_loop->header),
410 phi_orig = phi_nodes (orig_loop->header);
411 phi_new && phi_orig;
412 phi_new = PHI_CHAIN (phi_new), phi_orig = PHI_CHAIN (phi_orig))
413 {
414 /* step 1. */
415 def = PHI_ARG_DEF_FROM_EDGE (phi_orig, entry_arg_e);
416 add_phi_arg (phi_new, def, new_loop_entry_e);
417
418 /* step 2. */
419 def = PHI_ARG_DEF_FROM_EDGE (phi_orig, orig_loop_latch);
420 if (TREE_CODE (def) != SSA_NAME)
421 continue;
422
423 new_name_ptr = SSA_NAME_AUX (def);
424 if (!new_name_ptr)
425 /* Something defined outside of the loop. */
426 continue;
427
428 /* An ordinary ssa name defined in the loop. */
429 new_ssa_name = *new_name_ptr;
430 add_phi_arg (phi_new, new_ssa_name, loop_latch_edge (new_loop));
431
432 /* step 3 (case 1). */
433 if (!after)
434 {
435 gcc_assert (new_loop_exit_e == orig_entry_e);
436 SET_PHI_ARG_DEF (phi_orig,
437 new_loop_exit_e->dest_idx,
438 new_ssa_name);
439 }
440 }
441 }
442
443
444 /* Update PHI nodes for a guard of the LOOP.
445
446 Input:
447 - LOOP, GUARD_EDGE: LOOP is a loop for which we added guard code that
448 controls whether LOOP is to be executed. GUARD_EDGE is the edge that
449 originates from the guard-bb, skips LOOP and reaches the (unique) exit
450 bb of LOOP. This loop-exit-bb is an empty bb with one successor.
451 We denote this bb NEW_MERGE_BB because it had a single predecessor (the
452 LOOP header) before the guard code was added, and now it became a merge
453 point of two paths - the path that ends with the LOOP exit-edge, and
454 the path that ends with GUARD_EDGE.
455
456 This function creates and updates the relevant phi nodes to account for
457 the new incoming edge (GUARD_EDGE) into NEW_MERGE_BB:
458 1. Create phi nodes at NEW_MERGE_BB.
459 2. Update the phi nodes at the successor of NEW_MERGE_BB (denoted
460 UPDATE_BB). UPDATE_BB was the exit-bb of LOOP before NEW_MERGE_BB
461 was added:
462
463 ===> The CFG before the guard-code was added:
464 LOOP_header_bb:
465 if (exit_loop) goto update_bb : LOOP_header_bb
466 update_bb:
467
468 ==> The CFG after the guard-code was added:
469 guard_bb:
470 if (LOOP_guard_condition) goto new_merge_bb : LOOP_header_bb
471 LOOP_header_bb:
472 if (exit_loop_condition) goto new_merge_bb : LOOP_header_bb
473 new_merge_bb:
474 goto update_bb
475 update_bb:
476
477 - ENTRY_PHIS: If ENTRY_PHIS is TRUE, this indicates that the phis in
478 UPDATE_BB are loop entry phis, like the phis in the LOOP header,
479 organized in the same order.
480 If ENTRY_PHIs is FALSE, this indicates that the phis in UPDATE_BB are
481 loop exit phis.
482
483 - IS_NEW_LOOP: TRUE if LOOP is a new loop (a duplicated copy of another
484 "original" loop). FALSE if LOOP is an original loop (not a newly
485 created copy). The SSA_NAME_AUX fields of the defs in the original
486 loop are the corresponding new ssa-names used in the new duplicated
487 loop copy. IS_NEW_LOOP indicates which of the two args of the phi
488 nodes in UPDATE_BB takes the original ssa-name, and which takes the
489 new name: If IS_NEW_LOOP is TRUE, the phi-arg that is associated with
490 the LOOP-exit-edge takes the new-name, and the phi-arg that is
491 associated with GUARD_EDGE takes the original name. If IS_NEW_LOOP is
492 FALSE, it's the other way around.
493 */
494
495 static void
496 slpeel_update_phi_nodes_for_guard (edge guard_edge,
497 struct loop *loop,
498 bool entry_phis,
499 bool is_new_loop)
500 {
501 tree orig_phi, new_phi, update_phi;
502 tree guard_arg, loop_arg;
503 basic_block new_merge_bb = guard_edge->dest;
504 edge e = single_succ_edge (new_merge_bb);
505 basic_block update_bb = e->dest;
506 basic_block orig_bb = (entry_phis ? loop->header : update_bb);
507
508 for (orig_phi = phi_nodes (orig_bb), update_phi = phi_nodes (update_bb);
509 orig_phi && update_phi;
510 orig_phi = PHI_CHAIN (orig_phi), update_phi = PHI_CHAIN (update_phi))
511 {
512 /* 1. Generate new phi node in NEW_MERGE_BB: */
513 new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
514 new_merge_bb);
515
516 /* 2. NEW_MERGE_BB has two incoming edges: GUARD_EDGE and the exit-edge
517 of LOOP. Set the two phi args in NEW_PHI for these edges: */
518 if (entry_phis)
519 {
520 loop_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi,
521 loop_latch_edge (loop));
522 guard_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi,
523 loop_preheader_edge (loop));
524 }
525 else /* exit phis */
526 {
527 tree orig_def = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
528 tree *new_name_ptr = SSA_NAME_AUX (orig_def);
529 tree new_name;
530
531 if (new_name_ptr)
532 new_name = *new_name_ptr;
533 else
534 /* Something defined outside of the loop */
535 new_name = orig_def;
536
537 if (is_new_loop)
538 {
539 guard_arg = orig_def;
540 loop_arg = new_name;
541 }
542 else
543 {
544 guard_arg = new_name;
545 loop_arg = orig_def;
546 }
547 }
548 add_phi_arg (new_phi, loop_arg, loop->single_exit);
549 add_phi_arg (new_phi, guard_arg, guard_edge);
550
551 /* 3. Update phi in successor block. */
552 gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi, e) == loop_arg
553 || PHI_ARG_DEF_FROM_EDGE (update_phi, e) == guard_arg);
554 SET_PHI_ARG_DEF (update_phi, e->dest_idx, PHI_RESULT (new_phi));
555 }
556
557 set_phi_nodes (new_merge_bb, phi_reverse (phi_nodes (new_merge_bb)));
558 }
559
560
561 /* Make the LOOP iterate NITERS times. This is done by adding a new IV
562 that starts at zero, increases by one and its limit is NITERS.
563
564 Assumption: the exit-condition of LOOP is the last stmt in the loop. */
565
566 void
567 slpeel_make_loop_iterate_ntimes (struct loop *loop, tree niters)
568 {
569 tree indx_before_incr, indx_after_incr, cond_stmt, cond;
570 tree orig_cond;
571 edge exit_edge = loop->single_exit;
572 block_stmt_iterator loop_cond_bsi;
573 block_stmt_iterator incr_bsi;
574 bool insert_after;
575 tree begin_label = tree_block_label (loop->latch);
576 tree exit_label = tree_block_label (loop->single_exit->dest);
577 tree init = build_int_cst (TREE_TYPE (niters), 0);
578 tree step = build_int_cst (TREE_TYPE (niters), 1);
579 tree then_label;
580 tree else_label;
581 LOC loop_loc;
582
583 orig_cond = get_loop_exit_condition (loop);
584 #ifdef ENABLE_CHECKING
585 gcc_assert (orig_cond);
586 #endif
587 loop_cond_bsi = bsi_for_stmt (orig_cond);
588
589 standard_iv_increment_position (loop, &incr_bsi, &insert_after);
590 create_iv (init, step, NULL_TREE, loop,
591 &incr_bsi, insert_after, &indx_before_incr, &indx_after_incr);
592
593 if (exit_edge->flags & EDGE_TRUE_VALUE) /* 'then' edge exits the loop. */
594 {
595 cond = build2 (GE_EXPR, boolean_type_node, indx_after_incr, niters);
596 then_label = build1 (GOTO_EXPR, void_type_node, exit_label);
597 else_label = build1 (GOTO_EXPR, void_type_node, begin_label);
598 }
599 else /* 'then' edge loops back. */
600 {
601 cond = build2 (LT_EXPR, boolean_type_node, indx_after_incr, niters);
602 then_label = build1 (GOTO_EXPR, void_type_node, begin_label);
603 else_label = build1 (GOTO_EXPR, void_type_node, exit_label);
604 }
605
606 cond_stmt = build3 (COND_EXPR, TREE_TYPE (orig_cond), cond,
607 then_label, else_label);
608 bsi_insert_before (&loop_cond_bsi, cond_stmt, BSI_SAME_STMT);
609
610 /* Remove old loop exit test: */
611 bsi_remove (&loop_cond_bsi);
612
613 loop_loc = find_loop_location (loop);
614 if (dump_file && (dump_flags & TDF_DETAILS))
615 {
616 if (loop_loc != UNKNOWN_LOC)
617 fprintf (dump_file, "\nloop at %s:%d: ",
618 LOC_FILE (loop_loc), LOC_LINE (loop_loc));
619 print_generic_expr (dump_file, cond_stmt, TDF_SLIM);
620 }
621
622 loop->nb_iterations = niters;
623 }
624
625
626 /* Given LOOP this function generates a new copy of it and puts it
627 on E which is either the entry or exit of LOOP. */
628
629 static struct loop *
630 slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *loop, struct loops *loops,
631 edge e)
632 {
633 struct loop *new_loop;
634 basic_block *new_bbs, *bbs;
635 bool at_exit;
636 bool was_imm_dom;
637 basic_block exit_dest;
638 tree phi, phi_arg;
639
640 at_exit = (e == loop->single_exit);
641 if (!at_exit && e != loop_preheader_edge (loop))
642 return NULL;
643
644 bbs = get_loop_body (loop);
645
646 /* Check whether duplication is possible. */
647 if (!can_copy_bbs_p (bbs, loop->num_nodes))
648 {
649 free (bbs);
650 return NULL;
651 }
652
653 /* Generate new loop structure. */
654 new_loop = duplicate_loop (loops, loop, loop->outer);
655 if (!new_loop)
656 {
657 free (bbs);
658 return NULL;
659 }
660
661 exit_dest = loop->single_exit->dest;
662 was_imm_dom = (get_immediate_dominator (CDI_DOMINATORS,
663 exit_dest) == loop->header ?
664 true : false);
665
666 new_bbs = xmalloc (sizeof (basic_block) * loop->num_nodes);
667
668 copy_bbs (bbs, loop->num_nodes, new_bbs,
669 &loop->single_exit, 1, &new_loop->single_exit, NULL);
670
671 /* Duplicating phi args at exit bbs as coming
672 also from exit of duplicated loop. */
673 for (phi = phi_nodes (exit_dest); phi; phi = PHI_CHAIN (phi))
674 {
675 phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, loop->single_exit);
676 if (phi_arg)
677 {
678 edge new_loop_exit_edge;
679
680 if (EDGE_SUCC (new_loop->header, 0)->dest == new_loop->latch)
681 new_loop_exit_edge = EDGE_SUCC (new_loop->header, 1);
682 else
683 new_loop_exit_edge = EDGE_SUCC (new_loop->header, 0);
684
685 add_phi_arg (phi, phi_arg, new_loop_exit_edge);
686 }
687 }
688
689 if (at_exit) /* Add the loop copy at exit. */
690 {
691 redirect_edge_and_branch_force (e, new_loop->header);
692 set_immediate_dominator (CDI_DOMINATORS, new_loop->header, e->src);
693 if (was_imm_dom)
694 set_immediate_dominator (CDI_DOMINATORS, exit_dest, new_loop->header);
695 }
696 else /* Add the copy at entry. */
697 {
698 edge new_exit_e;
699 edge entry_e = loop_preheader_edge (loop);
700 basic_block preheader = entry_e->src;
701
702 if (!flow_bb_inside_loop_p (new_loop,
703 EDGE_SUCC (new_loop->header, 0)->dest))
704 new_exit_e = EDGE_SUCC (new_loop->header, 0);
705 else
706 new_exit_e = EDGE_SUCC (new_loop->header, 1);
707
708 redirect_edge_and_branch_force (new_exit_e, loop->header);
709 set_immediate_dominator (CDI_DOMINATORS, loop->header,
710 new_exit_e->src);
711
712 /* We have to add phi args to the loop->header here as coming
713 from new_exit_e edge. */
714 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
715 {
716 phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, entry_e);
717 if (phi_arg)
718 add_phi_arg (phi, phi_arg, new_exit_e);
719 }
720
721 redirect_edge_and_branch_force (entry_e, new_loop->header);
722 set_immediate_dominator (CDI_DOMINATORS, new_loop->header, preheader);
723 }
724
725 free (new_bbs);
726 free (bbs);
727
728 return new_loop;
729 }
730
731
732 /* Given the condition statement COND, put it as the last statement
733 of GUARD_BB; EXIT_BB is the basic block to skip the loop;
734 Assumes that this is the single exit of the guarded loop.
735 Returns the skip edge. */
736
737 static edge
738 slpeel_add_loop_guard (basic_block guard_bb, tree cond, basic_block exit_bb,
739 basic_block dom_bb)
740 {
741 block_stmt_iterator bsi;
742 edge new_e, enter_e;
743 tree cond_stmt, then_label, else_label;
744
745 enter_e = single_succ_edge (guard_bb);
746 enter_e->flags &= ~EDGE_FALLTHRU;
747 enter_e->flags |= EDGE_FALSE_VALUE;
748 bsi = bsi_last (guard_bb);
749
750 then_label = build1 (GOTO_EXPR, void_type_node,
751 tree_block_label (exit_bb));
752 else_label = build1 (GOTO_EXPR, void_type_node,
753 tree_block_label (enter_e->dest));
754 cond_stmt = build3 (COND_EXPR, void_type_node, cond,
755 then_label, else_label);
756 bsi_insert_after (&bsi, cond_stmt, BSI_NEW_STMT);
757 /* Add new edge to connect entry block to the second loop. */
758 new_e = make_edge (guard_bb, exit_bb, EDGE_TRUE_VALUE);
759 set_immediate_dominator (CDI_DOMINATORS, exit_bb, dom_bb);
760 return new_e;
761 }
762
763
764 /* This function verifies that the following restrictions apply to LOOP:
765 (1) it is innermost
766 (2) it consists of exactly 2 basic blocks - header, and an empty latch.
767 (3) it is single entry, single exit
768 (4) its exit condition is the last stmt in the header
769 (5) E is the entry/exit edge of LOOP.
770 */
771
772 bool
773 slpeel_can_duplicate_loop_p (struct loop *loop, edge e)
774 {
775 edge exit_e = loop->single_exit;
776 edge entry_e = loop_preheader_edge (loop);
777 tree orig_cond = get_loop_exit_condition (loop);
778 block_stmt_iterator loop_exit_bsi = bsi_last (exit_e->src);
779
780 if (any_marked_for_rewrite_p ())
781 return false;
782
783 if (loop->inner
784 /* All loops have an outer scope; the only case loop->outer is NULL is for
785 the function itself. */
786 || !loop->outer
787 || loop->num_nodes != 2
788 || !empty_block_p (loop->latch)
789 || !loop->single_exit
790 /* Verify that new loop exit condition can be trivially modified. */
791 || (!orig_cond || orig_cond != bsi_stmt (loop_exit_bsi))
792 || (e != exit_e && e != entry_e))
793 return false;
794
795 return true;
796 }
797
798 #ifdef ENABLE_CHECKING
799 void
800 slpeel_verify_cfg_after_peeling (struct loop *first_loop,
801 struct loop *second_loop)
802 {
803 basic_block loop1_exit_bb = first_loop->single_exit->dest;
804 basic_block loop2_entry_bb = loop_preheader_edge (second_loop)->src;
805 basic_block loop1_entry_bb = loop_preheader_edge (first_loop)->src;
806
807 /* A guard that controls whether the second_loop is to be executed or skipped
808 is placed in first_loop->exit. first_loopt->exit therefore has two
809 successors - one is the preheader of second_loop, and the other is a bb
810 after second_loop.
811 */
812 gcc_assert (EDGE_COUNT (loop1_exit_bb->succs) == 2);
813
814 /* 1. Verify that one of the successors of first_loopt->exit is the preheader
815 of second_loop. */
816
817 /* The preheader of new_loop is expected to have two predessors:
818 first_loop->exit and the block that precedes first_loop. */
819
820 gcc_assert (EDGE_COUNT (loop2_entry_bb->preds) == 2
821 && ((EDGE_PRED (loop2_entry_bb, 0)->src == loop1_exit_bb
822 && EDGE_PRED (loop2_entry_bb, 1)->src == loop1_entry_bb)
823 || (EDGE_PRED (loop2_entry_bb, 1)->src == loop1_exit_bb
824 && EDGE_PRED (loop2_entry_bb, 0)->src == loop1_entry_bb)));
825
826 /* Verify that the other successor of first_loopt->exit is after the
827 second_loop. */
828 /* TODO */
829 }
830 #endif
831
832 /* Function slpeel_tree_peel_loop_to_edge.
833
834 Peel the first (last) iterations of LOOP into a new prolog (epilog) loop
835 that is placed on the entry (exit) edge E of LOOP. After this transformation
836 we have two loops one after the other - first-loop iterates FIRST_NITERS
837 times, and second-loop iterates the remainder NITERS - FIRST_NITERS times.
838
839 Input:
840 - LOOP: the loop to be peeled.
841 - E: the exit or entry edge of LOOP.
842 If it is the entry edge, we peel the first iterations of LOOP. In this
843 case first-loop is LOOP, and second-loop is the newly created loop.
844 If it is the exit edge, we peel the last iterations of LOOP. In this
845 case, first-loop is the newly created loop, and second-loop is LOOP.
846 - NITERS: the number of iterations that LOOP iterates.
847 - FIRST_NITERS: the number of iterations that the first-loop should iterate.
848 - UPDATE_FIRST_LOOP_COUNT: specified whether this function is responsible
849 for updating the loop bound of the first-loop to FIRST_NITERS. If it
850 is false, the caller of this function may want to take care of this
851 (this can be useful if we don't want new stmts added to first-loop).
852
853 Output:
854 The function returns a pointer to the new loop-copy, or NULL if it failed
855 to perform the transformation.
856
857 The function generates two if-then-else guards: one before the first loop,
858 and the other before the second loop:
859 The first guard is:
860 if (FIRST_NITERS == 0) then skip the first loop,
861 and go directly to the second loop.
862 The second guard is:
863 if (FIRST_NITERS == NITERS) then skip the second loop.
864
865 FORNOW only simple loops are supported (see slpeel_can_duplicate_loop_p).
866 FORNOW the resulting code will not be in loop-closed-ssa form.
867 */
868
869 struct loop*
870 slpeel_tree_peel_loop_to_edge (struct loop *loop, struct loops *loops,
871 edge e, tree first_niters,
872 tree niters, bool update_first_loop_count)
873 {
874 struct loop *new_loop = NULL, *first_loop, *second_loop;
875 edge skip_e;
876 tree pre_condition;
877 bitmap definitions;
878 basic_block bb_before_second_loop, bb_after_second_loop;
879 basic_block bb_before_first_loop;
880 basic_block bb_between_loops;
881 edge exit_e = loop->single_exit;
882 LOC loop_loc;
883
884 if (!slpeel_can_duplicate_loop_p (loop, e))
885 return NULL;
886
887 /* We have to initialize cfg_hooks. Then, when calling
888 cfg_hooks->split_edge, the function tree_split_edge
889 is actually called and, when calling cfg_hooks->duplicate_block,
890 the function tree_duplicate_bb is called. */
891 tree_register_cfg_hooks ();
892
893
894 /* 1. Generate a copy of LOOP and put it on E (E is the entry/exit of LOOP).
895 Resulting CFG would be:
896
897 first_loop:
898 do {
899 } while ...
900
901 second_loop:
902 do {
903 } while ...
904
905 orig_exit_bb:
906 */
907
908 if (!(new_loop = slpeel_tree_duplicate_loop_to_edge_cfg (loop, loops, e)))
909 {
910 loop_loc = find_loop_location (loop);
911 if (dump_file && (dump_flags & TDF_DETAILS))
912 {
913 if (loop_loc != UNKNOWN_LOC)
914 fprintf (dump_file, "\n%s:%d: note: ",
915 LOC_FILE (loop_loc), LOC_LINE (loop_loc));
916 fprintf (dump_file, "tree_duplicate_loop_to_edge_cfg failed.\n");
917 }
918 return NULL;
919 }
920
921 if (e == exit_e)
922 {
923 /* NEW_LOOP was placed after LOOP. */
924 first_loop = loop;
925 second_loop = new_loop;
926 }
927 else
928 {
929 /* NEW_LOOP was placed before LOOP. */
930 first_loop = new_loop;
931 second_loop = loop;
932 }
933
934 definitions = marked_ssa_names ();
935 allocate_new_names (definitions);
936 slpeel_update_phis_for_duplicate_loop (loop, new_loop, e == exit_e);
937 rename_variables_in_loop (new_loop);
938
939
940 /* 2. Add the guard that controls whether the first loop is executed.
941 Resulting CFG would be:
942
943 bb_before_first_loop:
944 if (FIRST_NITERS == 0) GOTO bb_before_second_loop
945 GOTO first-loop
946
947 first_loop:
948 do {
949 } while ...
950
951 bb_before_second_loop:
952
953 second_loop:
954 do {
955 } while ...
956
957 orig_exit_bb:
958 */
959
960 bb_before_first_loop = split_edge (loop_preheader_edge (first_loop));
961 add_bb_to_loop (bb_before_first_loop, first_loop->outer);
962 bb_before_second_loop = split_edge (first_loop->single_exit);
963 add_bb_to_loop (bb_before_second_loop, first_loop->outer);
964
965 pre_condition =
966 build2 (LE_EXPR, boolean_type_node, first_niters, integer_zero_node);
967 skip_e = slpeel_add_loop_guard (bb_before_first_loop, pre_condition,
968 bb_before_second_loop, bb_before_first_loop);
969 slpeel_update_phi_nodes_for_guard (skip_e, first_loop, true /* entry-phis */,
970 first_loop == new_loop);
971
972
973 /* 3. Add the guard that controls whether the second loop is executed.
974 Resulting CFG would be:
975
976 bb_before_first_loop:
977 if (FIRST_NITERS == 0) GOTO bb_before_second_loop (skip first loop)
978 GOTO first-loop
979
980 first_loop:
981 do {
982 } while ...
983
984 bb_between_loops:
985 if (FIRST_NITERS == NITERS) GOTO bb_after_second_loop (skip second loop)
986 GOTO bb_before_second_loop
987
988 bb_before_second_loop:
989
990 second_loop:
991 do {
992 } while ...
993
994 bb_after_second_loop:
995
996 orig_exit_bb:
997 */
998
999 bb_between_loops = split_edge (first_loop->single_exit);
1000 add_bb_to_loop (bb_between_loops, first_loop->outer);
1001 bb_after_second_loop = split_edge (second_loop->single_exit);
1002 add_bb_to_loop (bb_after_second_loop, second_loop->outer);
1003
1004 pre_condition = build2 (EQ_EXPR, boolean_type_node, first_niters, niters);
1005 skip_e = slpeel_add_loop_guard (bb_between_loops, pre_condition,
1006 bb_after_second_loop, bb_before_first_loop);
1007 slpeel_update_phi_nodes_for_guard (skip_e, second_loop, false /* exit-phis */,
1008 second_loop == new_loop);
1009
1010 /* Flow loop scan does not update loop->single_exit field. */
1011 first_loop->single_exit = first_loop->single_exit;
1012 second_loop->single_exit = second_loop->single_exit;
1013
1014 /* 4. Make first-loop iterate FIRST_NITERS times, if requested.
1015 */
1016 if (update_first_loop_count)
1017 slpeel_make_loop_iterate_ntimes (first_loop, first_niters);
1018
1019 free_new_names (definitions);
1020 BITMAP_FREE (definitions);
1021 unmark_all_for_rewrite ();
1022
1023 return new_loop;
1024 }
1025
1026 /* Function vect_get_loop_location.
1027
1028 Extract the location of the loop in the source code.
1029 If the loop is not well formed for vectorization, an estimated
1030 location is calculated.
1031 Return the loop location if succeed and NULL if not. */
1032
1033 LOC
1034 find_loop_location (struct loop *loop)
1035 {
1036 tree node = NULL_TREE;
1037 basic_block bb;
1038 block_stmt_iterator si;
1039
1040 if (!loop)
1041 return UNKNOWN_LOC;
1042
1043 node = get_loop_exit_condition (loop);
1044
1045 if (node && EXPR_P (node) && EXPR_HAS_LOCATION (node)
1046 && EXPR_FILENAME (node) && EXPR_LINENO (node))
1047 return EXPR_LOC (node);
1048
1049 /* If we got here the loop is probably not "well formed",
1050 try to estimate the loop location */
1051
1052 if (!loop->header)
1053 return UNKNOWN_LOC;
1054
1055 bb = loop->header;
1056
1057 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1058 {
1059 node = bsi_stmt (si);
1060 if (node && EXPR_P (node) && EXPR_HAS_LOCATION (node))
1061 return EXPR_LOC (node);
1062 }
1063
1064 return UNKNOWN_LOC;
1065 }
1066
1067
1068 /*************************************************************************
1069 Vectorization Debug Information.
1070 *************************************************************************/
1071
1072 /* Function vect_set_verbosity_level.
1073
1074 Called from toplev.c upon detection of the
1075 -ftree-vectorizer-verbose=N option. */
1076
1077 void
1078 vect_set_verbosity_level (const char *val)
1079 {
1080 unsigned int vl;
1081
1082 vl = atoi (val);
1083 if (vl < MAX_VERBOSITY_LEVEL)
1084 vect_verbosity_level = vl;
1085 else
1086 vect_verbosity_level = MAX_VERBOSITY_LEVEL - 1;
1087 }
1088
1089
1090 /* Function vect_set_dump_settings.
1091
1092 Fix the verbosity level of the vectorizer if the
1093 requested level was not set explicitly using the flag
1094 -ftree-vectorizer-verbose=N.
1095 Decide where to print the debugging information (dump_file/stderr).
1096 If the user defined the verbosity level, but there is no dump file,
1097 print to stderr, otherwise print to the dump file. */
1098
1099 static void
1100 vect_set_dump_settings (void)
1101 {
1102 vect_dump = dump_file;
1103
1104 /* Check if the verbosity level was defined by the user: */
1105 if (vect_verbosity_level != MAX_VERBOSITY_LEVEL)
1106 {
1107 /* If there is no dump file, print to stderr. */
1108 if (!dump_file)
1109 vect_dump = stderr;
1110 return;
1111 }
1112
1113 /* User didn't specify verbosity level: */
1114 if (dump_file && (dump_flags & TDF_DETAILS))
1115 vect_verbosity_level = REPORT_DETAILS;
1116 else if (dump_file && (dump_flags & TDF_STATS))
1117 vect_verbosity_level = REPORT_UNVECTORIZED_LOOPS;
1118 else
1119 vect_verbosity_level = REPORT_NONE;
1120
1121 gcc_assert (dump_file || vect_verbosity_level == REPORT_NONE);
1122 }
1123
1124
1125 /* Function debug_loop_details.
1126
1127 For vectorization debug dumps. */
1128
1129 bool
1130 vect_print_dump_info (enum verbosity_levels vl, LOC loc)
1131 {
1132 if (vl > vect_verbosity_level)
1133 return false;
1134
1135 if (loc == UNKNOWN_LOC)
1136 fprintf (vect_dump, "\n%s:%d: note: ",
1137 DECL_SOURCE_FILE (current_function_decl),
1138 DECL_SOURCE_LINE (current_function_decl));
1139 else
1140 fprintf (vect_dump, "\n%s:%d: note: ", LOC_FILE (loc), LOC_LINE (loc));
1141
1142
1143 return true;
1144 }
1145
1146
1147 /*************************************************************************
1148 Vectorization Utilities.
1149 *************************************************************************/
1150
1151 /* Function new_stmt_vec_info.
1152
1153 Create and initialize a new stmt_vec_info struct for STMT. */
1154
1155 stmt_vec_info
1156 new_stmt_vec_info (tree stmt, loop_vec_info loop_vinfo)
1157 {
1158 stmt_vec_info res;
1159 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
1160
1161 STMT_VINFO_TYPE (res) = undef_vec_info_type;
1162 STMT_VINFO_STMT (res) = stmt;
1163 STMT_VINFO_LOOP_VINFO (res) = loop_vinfo;
1164 STMT_VINFO_RELEVANT_P (res) = 0;
1165 STMT_VINFO_VECTYPE (res) = NULL;
1166 STMT_VINFO_VEC_STMT (res) = NULL;
1167 STMT_VINFO_DATA_REF (res) = NULL;
1168 STMT_VINFO_MEMTAG (res) = NULL;
1169 STMT_VINFO_VECT_DR_BASE_ADDRESS (res) = NULL;
1170 STMT_VINFO_VECT_INIT_OFFSET (res) = NULL_TREE;
1171 STMT_VINFO_VECT_STEP (res) = NULL_TREE;
1172 STMT_VINFO_VECT_BASE_ALIGNED_P (res) = false;
1173 STMT_VINFO_VECT_MISALIGNMENT (res) = NULL_TREE;
1174
1175 return res;
1176 }
1177
1178
1179 /* Function new_loop_vec_info.
1180
1181 Create and initialize a new loop_vec_info struct for LOOP, as well as
1182 stmt_vec_info structs for all the stmts in LOOP. */
1183
1184 loop_vec_info
1185 new_loop_vec_info (struct loop *loop)
1186 {
1187 loop_vec_info res;
1188 basic_block *bbs;
1189 block_stmt_iterator si;
1190 unsigned int i;
1191
1192 res = (loop_vec_info) xcalloc (1, sizeof (struct _loop_vec_info));
1193
1194 bbs = get_loop_body (loop);
1195
1196 /* Create stmt_info for all stmts in the loop. */
1197 for (i = 0; i < loop->num_nodes; i++)
1198 {
1199 basic_block bb = bbs[i];
1200 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1201 {
1202 tree stmt = bsi_stmt (si);
1203 stmt_ann_t ann;
1204
1205 get_stmt_operands (stmt);
1206 ann = stmt_ann (stmt);
1207 set_stmt_info (ann, new_stmt_vec_info (stmt, res));
1208 }
1209 }
1210
1211 LOOP_VINFO_LOOP (res) = loop;
1212 LOOP_VINFO_BBS (res) = bbs;
1213 LOOP_VINFO_EXIT_COND (res) = NULL;
1214 LOOP_VINFO_NITERS (res) = NULL;
1215 LOOP_VINFO_VECTORIZABLE_P (res) = 0;
1216 LOOP_DO_PEELING_FOR_ALIGNMENT (res) = false;
1217 LOOP_VINFO_VECT_FACTOR (res) = 0;
1218 VARRAY_GENERIC_PTR_INIT (LOOP_VINFO_DATAREF_WRITES (res), 20,
1219 "loop_write_datarefs");
1220 VARRAY_GENERIC_PTR_INIT (LOOP_VINFO_DATAREF_READS (res), 20,
1221 "loop_read_datarefs");
1222 LOOP_VINFO_UNALIGNED_DR (res) = NULL;
1223 LOOP_VINFO_LOC (res) = UNKNOWN_LOC;
1224
1225 return res;
1226 }
1227
1228
1229 /* Function destroy_loop_vec_info.
1230
1231 Free LOOP_VINFO struct, as well as all the stmt_vec_info structs of all the
1232 stmts in the loop. */
1233
1234 void
1235 destroy_loop_vec_info (loop_vec_info loop_vinfo)
1236 {
1237 struct loop *loop;
1238 basic_block *bbs;
1239 int nbbs;
1240 block_stmt_iterator si;
1241 int j;
1242
1243 if (!loop_vinfo)
1244 return;
1245
1246 loop = LOOP_VINFO_LOOP (loop_vinfo);
1247
1248 bbs = LOOP_VINFO_BBS (loop_vinfo);
1249 nbbs = loop->num_nodes;
1250
1251 for (j = 0; j < nbbs; j++)
1252 {
1253 basic_block bb = bbs[j];
1254 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1255 {
1256 tree stmt = bsi_stmt (si);
1257 stmt_ann_t ann = stmt_ann (stmt);
1258 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1259 free (stmt_info);
1260 set_stmt_info (ann, NULL);
1261 }
1262 }
1263
1264 free (LOOP_VINFO_BBS (loop_vinfo));
1265 varray_clear (LOOP_VINFO_DATAREF_WRITES (loop_vinfo));
1266 varray_clear (LOOP_VINFO_DATAREF_READS (loop_vinfo));
1267
1268 free (loop_vinfo);
1269 }
1270
1271
1272 /* Function vect_strip_conversions
1273
1274 Strip conversions that don't narrow the mode. */
1275
1276 tree
1277 vect_strip_conversion (tree expr)
1278 {
1279 tree to, ti, oprnd0;
1280
1281 while (TREE_CODE (expr) == NOP_EXPR || TREE_CODE (expr) == CONVERT_EXPR)
1282 {
1283 to = TREE_TYPE (expr);
1284 oprnd0 = TREE_OPERAND (expr, 0);
1285 ti = TREE_TYPE (oprnd0);
1286
1287 if (!INTEGRAL_TYPE_P (to) || !INTEGRAL_TYPE_P (ti))
1288 return NULL_TREE;
1289 if (GET_MODE_SIZE (TYPE_MODE (to)) < GET_MODE_SIZE (TYPE_MODE (ti)))
1290 return NULL_TREE;
1291
1292 expr = oprnd0;
1293 }
1294 return expr;
1295 }
1296
1297
1298 /* Function vect_force_dr_alignment_p.
1299
1300 Returns whether the alignment of a DECL can be forced to be aligned
1301 on ALIGNMENT bit boundary. */
1302
1303 bool
1304 vect_can_force_dr_alignment_p (tree decl, unsigned int alignment)
1305 {
1306 if (TREE_CODE (decl) != VAR_DECL)
1307 return false;
1308
1309 if (DECL_EXTERNAL (decl))
1310 return false;
1311
1312 if (TREE_ASM_WRITTEN (decl))
1313 return false;
1314
1315 if (TREE_STATIC (decl))
1316 return (alignment <= MAX_OFILE_ALIGNMENT);
1317 else
1318 /* This is not 100% correct. The absolute correct stack alignment
1319 is STACK_BOUNDARY. We're supposed to hope, but not assume, that
1320 PREFERRED_STACK_BOUNDARY is honored by all translation units.
1321 However, until someone implements forced stack alignment, SSE
1322 isn't really usable without this. */
1323 return (alignment <= PREFERRED_STACK_BOUNDARY);
1324 }
1325
1326
1327 /* Function get_vectype_for_scalar_type.
1328
1329 Returns the vector type corresponding to SCALAR_TYPE as supported
1330 by the target. */
1331
1332 tree
1333 get_vectype_for_scalar_type (tree scalar_type)
1334 {
1335 enum machine_mode inner_mode = TYPE_MODE (scalar_type);
1336 int nbytes = GET_MODE_SIZE (inner_mode);
1337 int nunits;
1338 tree vectype;
1339
1340 if (nbytes == 0)
1341 return NULL_TREE;
1342
1343 /* FORNOW: Only a single vector size per target (UNITS_PER_SIMD_WORD)
1344 is expected. */
1345 nunits = UNITS_PER_SIMD_WORD / nbytes;
1346
1347 vectype = build_vector_type (scalar_type, nunits);
1348 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1349 {
1350 fprintf (vect_dump, "get vectype with %d units of type ", nunits);
1351 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
1352 }
1353
1354 if (!vectype)
1355 return NULL_TREE;
1356
1357 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1358 {
1359 fprintf (vect_dump, "vectype: ");
1360 print_generic_expr (vect_dump, vectype, TDF_SLIM);
1361 }
1362
1363 if (!VECTOR_MODE_P (TYPE_MODE (vectype)))
1364 {
1365 /* TODO: tree-complex.c sometimes can parallelize operations
1366 on generic vectors. We can vectorize the loop in that case,
1367 but then we should re-run the lowering pass. */
1368 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1369 fprintf (vect_dump, "mode not supported by target.");
1370 return NULL_TREE;
1371 }
1372
1373 return vectype;
1374 }
1375
1376
1377 /* Function vect_supportable_dr_alignment
1378
1379 Return whether the data reference DR is supported with respect to its
1380 alignment. */
1381
1382 enum dr_alignment_support
1383 vect_supportable_dr_alignment (struct data_reference *dr)
1384 {
1385 tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (DR_STMT (dr)));
1386 enum machine_mode mode = (int) TYPE_MODE (vectype);
1387
1388 if (aligned_access_p (dr))
1389 return dr_aligned;
1390
1391 /* Possibly unaligned access. */
1392
1393 if (DR_IS_READ (dr))
1394 {
1395 if (vec_realign_load_optab->handlers[mode].insn_code != CODE_FOR_nothing
1396 && (!targetm.vectorize.builtin_mask_for_load
1397 || targetm.vectorize.builtin_mask_for_load ()))
1398 return dr_unaligned_software_pipeline;
1399
1400 if (movmisalign_optab->handlers[mode].insn_code != CODE_FOR_nothing)
1401 /* Can't software pipeline the loads, but can at least do them. */
1402 return dr_unaligned_supported;
1403 }
1404
1405 /* Unsupported. */
1406 return dr_unaligned_unsupported;
1407 }
1408
1409
1410 /* Function vect_is_simple_use.
1411
1412 Input:
1413 LOOP - the loop that is being vectorized.
1414 OPERAND - operand of a stmt in LOOP.
1415 DEF - the defining stmt in case OPERAND is an SSA_NAME.
1416
1417 Returns whether a stmt with OPERAND can be vectorized.
1418 Supportable operands are constants, loop invariants, and operands that are
1419 defined by the current iteration of the loop. Unsupportable operands are
1420 those that are defined by a previous iteration of the loop (as is the case
1421 in reduction/induction computations). */
1422
1423 bool
1424 vect_is_simple_use (tree operand, loop_vec_info loop_vinfo, tree *def)
1425 {
1426 tree def_stmt;
1427 basic_block bb;
1428 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1429
1430 if (def)
1431 *def = NULL_TREE;
1432
1433 if (TREE_CODE (operand) == INTEGER_CST || TREE_CODE (operand) == REAL_CST)
1434 return true;
1435
1436 if (TREE_CODE (operand) != SSA_NAME)
1437 return false;
1438
1439 def_stmt = SSA_NAME_DEF_STMT (operand);
1440 if (def_stmt == NULL_TREE )
1441 {
1442 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1443 fprintf (vect_dump, "no def_stmt.");
1444 return false;
1445 }
1446
1447 /* empty stmt is expected only in case of a function argument.
1448 (Otherwise - we expect a phi_node or a modify_expr). */
1449 if (IS_EMPTY_STMT (def_stmt))
1450 {
1451 tree arg = TREE_OPERAND (def_stmt, 0);
1452 if (TREE_CODE (arg) == INTEGER_CST || TREE_CODE (arg) == REAL_CST)
1453 return true;
1454 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1455 {
1456 fprintf (vect_dump, "Unexpected empty stmt: ");
1457 print_generic_expr (vect_dump, def_stmt, TDF_SLIM);
1458 }
1459 return false;
1460 }
1461
1462 /* phi_node inside the loop indicates an induction/reduction pattern.
1463 This is not supported yet. */
1464 bb = bb_for_stmt (def_stmt);
1465 if (TREE_CODE (def_stmt) == PHI_NODE && flow_bb_inside_loop_p (loop, bb))
1466 {
1467 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1468 fprintf (vect_dump, "reduction/induction - unsupported.");
1469 return false; /* FORNOW: not supported yet. */
1470 }
1471
1472 /* Expecting a modify_expr or a phi_node. */
1473 if (TREE_CODE (def_stmt) == MODIFY_EXPR
1474 || TREE_CODE (def_stmt) == PHI_NODE)
1475 {
1476 if (def)
1477 *def = def_stmt;
1478 return true;
1479 }
1480
1481 return false;
1482 }
1483
1484
1485 /* Function vect_is_simple_iv_evolution.
1486
1487 FORNOW: A simple evolution of an induction variables in the loop is
1488 considered a polynomial evolution with constant step. */
1489
1490 bool
1491 vect_is_simple_iv_evolution (unsigned loop_nb, tree access_fn, tree * init,
1492 tree * step)
1493 {
1494 tree init_expr;
1495 tree step_expr;
1496
1497 tree evolution_part = evolution_part_in_loop_num (access_fn, loop_nb);
1498
1499 /* When there is no evolution in this loop, the evolution function
1500 is not "simple". */
1501 if (evolution_part == NULL_TREE)
1502 return false;
1503
1504 /* When the evolution is a polynomial of degree >= 2
1505 the evolution function is not "simple". */
1506 if (tree_is_chrec (evolution_part))
1507 return false;
1508
1509 step_expr = evolution_part;
1510 init_expr = unshare_expr (initial_condition_in_loop_num (access_fn,
1511 loop_nb));
1512
1513 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1514 {
1515 fprintf (vect_dump, "step: ");
1516 print_generic_expr (vect_dump, step_expr, TDF_SLIM);
1517 fprintf (vect_dump, ", init: ");
1518 print_generic_expr (vect_dump, init_expr, TDF_SLIM);
1519 }
1520
1521 *init = init_expr;
1522 *step = step_expr;
1523
1524 if (TREE_CODE (step_expr) != INTEGER_CST)
1525 {
1526 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1527 fprintf (vect_dump, "step unknown.");
1528 return false;
1529 }
1530
1531 return true;
1532 }
1533
1534
1535 /* Function need_imm_uses_for.
1536
1537 Return whether we ought to include information for 'var'
1538 when calculating immediate uses. For this pass we only want use
1539 information for non-virtual variables. */
1540
1541 static bool
1542 need_imm_uses_for (tree var)
1543 {
1544 return is_gimple_reg (var);
1545 }
1546
1547
1548 /* Function vectorize_loops.
1549
1550 Entry Point to loop vectorization phase. */
1551
1552 void
1553 vectorize_loops (struct loops *loops)
1554 {
1555 unsigned int i, loops_num;
1556 unsigned int num_vectorized_loops = 0;
1557
1558 /* Fix the verbosity level if not defined explicitly by the user. */
1559 vect_set_dump_settings ();
1560
1561 /* Does the target support SIMD? */
1562 /* FORNOW: until more sophisticated machine modelling is in place. */
1563 if (!UNITS_PER_SIMD_WORD)
1564 {
1565 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1566 fprintf (vect_dump, "vectorizer: target vector size is not defined.");
1567 return;
1568 }
1569
1570 #ifdef ENABLE_CHECKING
1571 verify_loop_closed_ssa ();
1572 #endif
1573
1574 compute_immediate_uses (TDFA_USE_OPS, need_imm_uses_for);
1575
1576 /* ----------- Analyze loops. ----------- */
1577
1578 /* If some loop was duplicated, it gets bigger number
1579 than all previously defined loops. This fact allows us to run
1580 only over initial loops skipping newly generated ones. */
1581 loops_num = loops->num;
1582 for (i = 1; i < loops_num; i++)
1583 {
1584 loop_vec_info loop_vinfo;
1585 struct loop *loop = loops->parray[i];
1586
1587 if (!loop)
1588 continue;
1589
1590 loop_vinfo = vect_analyze_loop (loop);
1591 loop->aux = loop_vinfo;
1592
1593 if (!loop_vinfo || !LOOP_VINFO_VECTORIZABLE_P (loop_vinfo))
1594 continue;
1595
1596 vect_transform_loop (loop_vinfo, loops);
1597 num_vectorized_loops++;
1598 }
1599
1600 if (vect_print_dump_info (REPORT_VECTORIZED_LOOPS, UNKNOWN_LOC))
1601 fprintf (vect_dump, "vectorized %u loops in function.\n",
1602 num_vectorized_loops);
1603
1604 /* ----------- Finalize. ----------- */
1605
1606 free_df ();
1607 for (i = 1; i < loops_num; i++)
1608 {
1609 struct loop *loop = loops->parray[i];
1610 loop_vec_info loop_vinfo;
1611
1612 if (!loop)
1613 continue;
1614 loop_vinfo = loop->aux;
1615 destroy_loop_vec_info (loop_vinfo);
1616 loop->aux = NULL;
1617 }
1618
1619 rewrite_into_ssa (false);
1620 rewrite_into_loop_closed_ssa (NULL); /* FORNOW */
1621 bitmap_clear (vars_to_rename);
1622 }