pr39543-2.c: Skip if ilp32 && pic.
[gcc.git] / gcc / tree-vect-stmts.c
1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software
3 Foundation, Inc.
4 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 and Ira Rosen <irar@il.ibm.com>
6
7 This file is part of GCC.
8
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
13
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
22
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "tm.h"
27 #include "ggc.h"
28 #include "tree.h"
29 #include "target.h"
30 #include "basic-block.h"
31 #include "diagnostic.h"
32 #include "tree-flow.h"
33 #include "tree-dump.h"
34 #include "cfgloop.h"
35 #include "cfglayout.h"
36 #include "expr.h"
37 #include "recog.h"
38 #include "optabs.h"
39 #include "toplev.h"
40 #include "tree-vectorizer.h"
41 #include "langhooks.h"
42
43
44 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
45
46 /* Function vect_mark_relevant.
47
48 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
49
50 static void
51 vect_mark_relevant (VEC(gimple,heap) **worklist, gimple stmt,
52 enum vect_relevant relevant, bool live_p)
53 {
54 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
55 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
56 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
57
58 if (vect_print_dump_info (REPORT_DETAILS))
59 fprintf (vect_dump, "mark relevant %d, live %d.", relevant, live_p);
60
61 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
62 {
63 gimple pattern_stmt;
64
65 /* This is the last stmt in a sequence that was detected as a
66 pattern that can potentially be vectorized. Don't mark the stmt
67 as relevant/live because it's not going to be vectorized.
68 Instead mark the pattern-stmt that replaces it. */
69
70 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
71
72 if (vect_print_dump_info (REPORT_DETAILS))
73 fprintf (vect_dump, "last stmt in pattern. don't mark relevant/live.");
74 stmt_info = vinfo_for_stmt (pattern_stmt);
75 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
76 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
77 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
78 stmt = pattern_stmt;
79 }
80
81 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
82 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
83 STMT_VINFO_RELEVANT (stmt_info) = relevant;
84
85 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
86 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
87 {
88 if (vect_print_dump_info (REPORT_DETAILS))
89 fprintf (vect_dump, "already marked relevant/live.");
90 return;
91 }
92
93 VEC_safe_push (gimple, heap, *worklist, stmt);
94 }
95
96
97 /* Function vect_stmt_relevant_p.
98
99 Return true if STMT in loop that is represented by LOOP_VINFO is
100 "relevant for vectorization".
101
102 A stmt is considered "relevant for vectorization" if:
103 - it has uses outside the loop.
104 - it has vdefs (it alters memory).
105 - control stmts in the loop (except for the exit condition).
106
107 CHECKME: what other side effects would the vectorizer allow? */
108
109 static bool
110 vect_stmt_relevant_p (gimple stmt, loop_vec_info loop_vinfo,
111 enum vect_relevant *relevant, bool *live_p)
112 {
113 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
114 ssa_op_iter op_iter;
115 imm_use_iterator imm_iter;
116 use_operand_p use_p;
117 def_operand_p def_p;
118
119 *relevant = vect_unused_in_scope;
120 *live_p = false;
121
122 /* cond stmt other than loop exit cond. */
123 if (is_ctrl_stmt (stmt)
124 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt))
125 != loop_exit_ctrl_vec_info_type)
126 *relevant = vect_used_in_scope;
127
128 /* changing memory. */
129 if (gimple_code (stmt) != GIMPLE_PHI)
130 if (gimple_vdef (stmt))
131 {
132 if (vect_print_dump_info (REPORT_DETAILS))
133 fprintf (vect_dump, "vec_stmt_relevant_p: stmt has vdefs.");
134 *relevant = vect_used_in_scope;
135 }
136
137 /* uses outside the loop. */
138 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
139 {
140 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
141 {
142 basic_block bb = gimple_bb (USE_STMT (use_p));
143 if (!flow_bb_inside_loop_p (loop, bb))
144 {
145 if (vect_print_dump_info (REPORT_DETAILS))
146 fprintf (vect_dump, "vec_stmt_relevant_p: used out of loop.");
147
148 /* We expect all such uses to be in the loop exit phis
149 (because of loop closed form) */
150 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
151 gcc_assert (bb == single_exit (loop)->dest);
152
153 *live_p = true;
154 }
155 }
156 }
157
158 return (*live_p || *relevant);
159 }
160
161
162 /* Function exist_non_indexing_operands_for_use_p
163
164 USE is one of the uses attached to STMT. Check if USE is
165 used in STMT for anything other than indexing an array. */
166
167 static bool
168 exist_non_indexing_operands_for_use_p (tree use, gimple stmt)
169 {
170 tree operand;
171 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
172
173 /* USE corresponds to some operand in STMT. If there is no data
174 reference in STMT, then any operand that corresponds to USE
175 is not indexing an array. */
176 if (!STMT_VINFO_DATA_REF (stmt_info))
177 return true;
178
179 /* STMT has a data_ref. FORNOW this means that its of one of
180 the following forms:
181 -1- ARRAY_REF = var
182 -2- var = ARRAY_REF
183 (This should have been verified in analyze_data_refs).
184
185 'var' in the second case corresponds to a def, not a use,
186 so USE cannot correspond to any operands that are not used
187 for array indexing.
188
189 Therefore, all we need to check is if STMT falls into the
190 first case, and whether var corresponds to USE. */
191
192 if (TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME)
193 return false;
194
195 if (!gimple_assign_copy_p (stmt))
196 return false;
197 operand = gimple_assign_rhs1 (stmt);
198
199 if (TREE_CODE (operand) != SSA_NAME)
200 return false;
201
202 if (operand == use)
203 return true;
204
205 return false;
206 }
207
208
209 /*
210 Function process_use.
211
212 Inputs:
213 - a USE in STMT in a loop represented by LOOP_VINFO
214 - LIVE_P, RELEVANT - enum values to be set in the STMT_VINFO of the stmt
215 that defined USE. This is done by calling mark_relevant and passing it
216 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
217
218 Outputs:
219 Generally, LIVE_P and RELEVANT are used to define the liveness and
220 relevance info of the DEF_STMT of this USE:
221 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
222 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
223 Exceptions:
224 - case 1: If USE is used only for address computations (e.g. array indexing),
225 which does not need to be directly vectorized, then the liveness/relevance
226 of the respective DEF_STMT is left unchanged.
227 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
228 skip DEF_STMT cause it had already been processed.
229 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
230 be modified accordingly.
231
232 Return true if everything is as expected. Return false otherwise. */
233
234 static bool
235 process_use (gimple stmt, tree use, loop_vec_info loop_vinfo, bool live_p,
236 enum vect_relevant relevant, VEC(gimple,heap) **worklist)
237 {
238 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
239 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
240 stmt_vec_info dstmt_vinfo;
241 basic_block bb, def_bb;
242 tree def;
243 gimple def_stmt;
244 enum vect_def_type dt;
245
246 /* case 1: we are only interested in uses that need to be vectorized. Uses
247 that are used for address computation are not considered relevant. */
248 if (!exist_non_indexing_operands_for_use_p (use, stmt))
249 return true;
250
251 if (!vect_is_simple_use (use, loop_vinfo, &def_stmt, &def, &dt))
252 {
253 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
254 fprintf (vect_dump, "not vectorized: unsupported use in stmt.");
255 return false;
256 }
257
258 if (!def_stmt || gimple_nop_p (def_stmt))
259 return true;
260
261 def_bb = gimple_bb (def_stmt);
262 if (!flow_bb_inside_loop_p (loop, def_bb))
263 {
264 if (vect_print_dump_info (REPORT_DETAILS))
265 fprintf (vect_dump, "def_stmt is out of loop.");
266 return true;
267 }
268
269 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
270 DEF_STMT must have already been processed, because this should be the
271 only way that STMT, which is a reduction-phi, was put in the worklist,
272 as there should be no other uses for DEF_STMT in the loop. So we just
273 check that everything is as expected, and we are done. */
274 dstmt_vinfo = vinfo_for_stmt (def_stmt);
275 bb = gimple_bb (stmt);
276 if (gimple_code (stmt) == GIMPLE_PHI
277 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
278 && gimple_code (def_stmt) != GIMPLE_PHI
279 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
280 && bb->loop_father == def_bb->loop_father)
281 {
282 if (vect_print_dump_info (REPORT_DETAILS))
283 fprintf (vect_dump, "reduc-stmt defining reduc-phi in the same nest.");
284 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))
285 dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
286 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
287 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
288 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope);
289 return true;
290 }
291
292 /* case 3a: outer-loop stmt defining an inner-loop stmt:
293 outer-loop-header-bb:
294 d = def_stmt
295 inner-loop:
296 stmt # use (d)
297 outer-loop-tail-bb:
298 ... */
299 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
300 {
301 if (vect_print_dump_info (REPORT_DETAILS))
302 fprintf (vect_dump, "outer-loop def-stmt defining inner-loop stmt.");
303 switch (relevant)
304 {
305 case vect_unused_in_scope:
306 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def) ?
307 vect_used_by_reduction : vect_unused_in_scope;
308 break;
309 case vect_used_in_outer_by_reduction:
310 relevant = vect_used_by_reduction;
311 break;
312 case vect_used_in_outer:
313 relevant = vect_used_in_scope;
314 break;
315 case vect_used_by_reduction:
316 case vect_used_in_scope:
317 break;
318
319 default:
320 gcc_unreachable ();
321 }
322 }
323
324 /* case 3b: inner-loop stmt defining an outer-loop stmt:
325 outer-loop-header-bb:
326 ...
327 inner-loop:
328 d = def_stmt
329 outer-loop-tail-bb:
330 stmt # use (d) */
331 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
332 {
333 if (vect_print_dump_info (REPORT_DETAILS))
334 fprintf (vect_dump, "inner-loop def-stmt defining outer-loop stmt.");
335 switch (relevant)
336 {
337 case vect_unused_in_scope:
338 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def) ?
339 vect_used_in_outer_by_reduction : vect_unused_in_scope;
340 break;
341
342 case vect_used_in_outer_by_reduction:
343 case vect_used_in_outer:
344 break;
345
346 case vect_used_by_reduction:
347 relevant = vect_used_in_outer_by_reduction;
348 break;
349
350 case vect_used_in_scope:
351 relevant = vect_used_in_outer;
352 break;
353
354 default:
355 gcc_unreachable ();
356 }
357 }
358
359 vect_mark_relevant (worklist, def_stmt, relevant, live_p);
360 return true;
361 }
362
363
364 /* Function vect_mark_stmts_to_be_vectorized.
365
366 Not all stmts in the loop need to be vectorized. For example:
367
368 for i...
369 for j...
370 1. T0 = i + j
371 2. T1 = a[T0]
372
373 3. j = j + 1
374
375 Stmt 1 and 3 do not need to be vectorized, because loop control and
376 addressing of vectorized data-refs are handled differently.
377
378 This pass detects such stmts. */
379
380 bool
381 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
382 {
383 VEC(gimple,heap) *worklist;
384 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
385 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
386 unsigned int nbbs = loop->num_nodes;
387 gimple_stmt_iterator si;
388 gimple stmt;
389 unsigned int i;
390 stmt_vec_info stmt_vinfo;
391 basic_block bb;
392 gimple phi;
393 bool live_p;
394 enum vect_relevant relevant;
395
396 if (vect_print_dump_info (REPORT_DETAILS))
397 fprintf (vect_dump, "=== vect_mark_stmts_to_be_vectorized ===");
398
399 worklist = VEC_alloc (gimple, heap, 64);
400
401 /* 1. Init worklist. */
402 for (i = 0; i < nbbs; i++)
403 {
404 bb = bbs[i];
405 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
406 {
407 phi = gsi_stmt (si);
408 if (vect_print_dump_info (REPORT_DETAILS))
409 {
410 fprintf (vect_dump, "init: phi relevant? ");
411 print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM);
412 }
413
414 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
415 vect_mark_relevant (&worklist, phi, relevant, live_p);
416 }
417 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
418 {
419 stmt = gsi_stmt (si);
420 if (vect_print_dump_info (REPORT_DETAILS))
421 {
422 fprintf (vect_dump, "init: stmt relevant? ");
423 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
424 }
425
426 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
427 vect_mark_relevant (&worklist, stmt, relevant, live_p);
428 }
429 }
430
431 /* 2. Process_worklist */
432 while (VEC_length (gimple, worklist) > 0)
433 {
434 use_operand_p use_p;
435 ssa_op_iter iter;
436
437 stmt = VEC_pop (gimple, worklist);
438 if (vect_print_dump_info (REPORT_DETAILS))
439 {
440 fprintf (vect_dump, "worklist: examine stmt: ");
441 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
442 }
443
444 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
445 (DEF_STMT) as relevant/irrelevant and live/dead according to the
446 liveness and relevance properties of STMT. */
447 stmt_vinfo = vinfo_for_stmt (stmt);
448 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
449 live_p = STMT_VINFO_LIVE_P (stmt_vinfo);
450
451 /* Generally, the liveness and relevance properties of STMT are
452 propagated as is to the DEF_STMTs of its USEs:
453 live_p <-- STMT_VINFO_LIVE_P (STMT_VINFO)
454 relevant <-- STMT_VINFO_RELEVANT (STMT_VINFO)
455
456 One exception is when STMT has been identified as defining a reduction
457 variable; in this case we set the liveness/relevance as follows:
458 live_p = false
459 relevant = vect_used_by_reduction
460 This is because we distinguish between two kinds of relevant stmts -
461 those that are used by a reduction computation, and those that are
462 (also) used by a regular computation. This allows us later on to
463 identify stmts that are used solely by a reduction, and therefore the
464 order of the results that they produce does not have to be kept.
465
466 Reduction phis are expected to be used by a reduction stmt, or by
467 in an outer loop; Other reduction stmts are expected to be
468 in the loop, and possibly used by a stmt in an outer loop.
469 Here are the expected values of "relevant" for reduction phis/stmts:
470
471 relevance: phi stmt
472 vect_unused_in_scope ok
473 vect_used_in_outer_by_reduction ok ok
474 vect_used_in_outer ok ok
475 vect_used_by_reduction ok
476 vect_used_in_scope */
477
478 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def)
479 {
480 enum vect_relevant tmp_relevant = relevant;
481 switch (tmp_relevant)
482 {
483 case vect_unused_in_scope:
484 gcc_assert (gimple_code (stmt) != GIMPLE_PHI);
485 relevant = vect_used_by_reduction;
486 break;
487
488 case vect_used_in_outer_by_reduction:
489 case vect_used_in_outer:
490 gcc_assert (gimple_code (stmt) != GIMPLE_ASSIGN
491 || (gimple_assign_rhs_code (stmt) != WIDEN_SUM_EXPR
492 && (gimple_assign_rhs_code (stmt)
493 != DOT_PROD_EXPR)));
494 break;
495
496 case vect_used_by_reduction:
497 if (gimple_code (stmt) == GIMPLE_PHI)
498 break;
499 /* fall through */
500 case vect_used_in_scope:
501 default:
502 if (vect_print_dump_info (REPORT_DETAILS))
503 fprintf (vect_dump, "unsupported use of reduction.");
504 VEC_free (gimple, heap, worklist);
505 return false;
506 }
507 live_p = false;
508 }
509
510 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
511 {
512 tree op = USE_FROM_PTR (use_p);
513 if (!process_use (stmt, op, loop_vinfo, live_p, relevant, &worklist))
514 {
515 VEC_free (gimple, heap, worklist);
516 return false;
517 }
518 }
519 } /* while worklist */
520
521 VEC_free (gimple, heap, worklist);
522 return true;
523 }
524
525
526 int
527 cost_for_stmt (gimple stmt)
528 {
529 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
530
531 switch (STMT_VINFO_TYPE (stmt_info))
532 {
533 case load_vec_info_type:
534 return TARG_SCALAR_LOAD_COST;
535 case store_vec_info_type:
536 return TARG_SCALAR_STORE_COST;
537 case op_vec_info_type:
538 case condition_vec_info_type:
539 case assignment_vec_info_type:
540 case reduc_vec_info_type:
541 case induc_vec_info_type:
542 case type_promotion_vec_info_type:
543 case type_demotion_vec_info_type:
544 case type_conversion_vec_info_type:
545 case call_vec_info_type:
546 return TARG_SCALAR_STMT_COST;
547 case undef_vec_info_type:
548 default:
549 gcc_unreachable ();
550 }
551 }
552
553 /* Function vect_model_simple_cost.
554
555 Models cost for simple operations, i.e. those that only emit ncopies of a
556 single op. Right now, this does not account for multiple insns that could
557 be generated for the single vector op. We will handle that shortly. */
558
559 void
560 vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
561 enum vect_def_type *dt, slp_tree slp_node)
562 {
563 int i;
564 int inside_cost = 0, outside_cost = 0;
565
566 /* The SLP costs were already calculated during SLP tree build. */
567 if (PURE_SLP_STMT (stmt_info))
568 return;
569
570 inside_cost = ncopies * TARG_VEC_STMT_COST;
571
572 /* FORNOW: Assuming maximum 2 args per stmts. */
573 for (i = 0; i < 2; i++)
574 {
575 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
576 outside_cost += TARG_SCALAR_TO_VEC_COST;
577 }
578
579 if (vect_print_dump_info (REPORT_COST))
580 fprintf (vect_dump, "vect_model_simple_cost: inside_cost = %d, "
581 "outside_cost = %d .", inside_cost, outside_cost);
582
583 /* Set the costs either in STMT_INFO or SLP_NODE (if exists). */
584 stmt_vinfo_set_inside_of_loop_cost (stmt_info, slp_node, inside_cost);
585 stmt_vinfo_set_outside_of_loop_cost (stmt_info, slp_node, outside_cost);
586 }
587
588
589 /* Function vect_cost_strided_group_size
590
591 For strided load or store, return the group_size only if it is the first
592 load or store of a group, else return 1. This ensures that group size is
593 only returned once per group. */
594
595 static int
596 vect_cost_strided_group_size (stmt_vec_info stmt_info)
597 {
598 gimple first_stmt = DR_GROUP_FIRST_DR (stmt_info);
599
600 if (first_stmt == STMT_VINFO_STMT (stmt_info))
601 return DR_GROUP_SIZE (stmt_info);
602
603 return 1;
604 }
605
606
607 /* Function vect_model_store_cost
608
609 Models cost for stores. In the case of strided accesses, one access
610 has the overhead of the strided access attributed to it. */
611
612 void
613 vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
614 enum vect_def_type dt, slp_tree slp_node)
615 {
616 int group_size;
617 int inside_cost = 0, outside_cost = 0;
618
619 /* The SLP costs were already calculated during SLP tree build. */
620 if (PURE_SLP_STMT (stmt_info))
621 return;
622
623 if (dt == vect_constant_def || dt == vect_external_def)
624 outside_cost = TARG_SCALAR_TO_VEC_COST;
625
626 /* Strided access? */
627 if (DR_GROUP_FIRST_DR (stmt_info) && !slp_node)
628 group_size = vect_cost_strided_group_size (stmt_info);
629 /* Not a strided access. */
630 else
631 group_size = 1;
632
633 /* Is this an access in a group of stores, which provide strided access?
634 If so, add in the cost of the permutes. */
635 if (group_size > 1)
636 {
637 /* Uses a high and low interleave operation for each needed permute. */
638 inside_cost = ncopies * exact_log2(group_size) * group_size
639 * TARG_VEC_STMT_COST;
640
641 if (vect_print_dump_info (REPORT_COST))
642 fprintf (vect_dump, "vect_model_store_cost: strided group_size = %d .",
643 group_size);
644
645 }
646
647 /* Costs of the stores. */
648 inside_cost += ncopies * TARG_VEC_STORE_COST;
649
650 if (vect_print_dump_info (REPORT_COST))
651 fprintf (vect_dump, "vect_model_store_cost: inside_cost = %d, "
652 "outside_cost = %d .", inside_cost, outside_cost);
653
654 /* Set the costs either in STMT_INFO or SLP_NODE (if exists). */
655 stmt_vinfo_set_inside_of_loop_cost (stmt_info, slp_node, inside_cost);
656 stmt_vinfo_set_outside_of_loop_cost (stmt_info, slp_node, outside_cost);
657 }
658
659
660 /* Function vect_model_load_cost
661
662 Models cost for loads. In the case of strided accesses, the last access
663 has the overhead of the strided access attributed to it. Since unaligned
664 accesses are supported for loads, we also account for the costs of the
665 access scheme chosen. */
666
667 void
668 vect_model_load_cost (stmt_vec_info stmt_info, int ncopies, slp_tree slp_node)
669
670 {
671 int group_size;
672 int alignment_support_cheme;
673 gimple first_stmt;
674 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
675 int inside_cost = 0, outside_cost = 0;
676
677 /* The SLP costs were already calculated during SLP tree build. */
678 if (PURE_SLP_STMT (stmt_info))
679 return;
680
681 /* Strided accesses? */
682 first_stmt = DR_GROUP_FIRST_DR (stmt_info);
683 if (first_stmt && !slp_node)
684 {
685 group_size = vect_cost_strided_group_size (stmt_info);
686 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
687 }
688 /* Not a strided access. */
689 else
690 {
691 group_size = 1;
692 first_dr = dr;
693 }
694
695 alignment_support_cheme = vect_supportable_dr_alignment (first_dr);
696
697 /* Is this an access in a group of loads providing strided access?
698 If so, add in the cost of the permutes. */
699 if (group_size > 1)
700 {
701 /* Uses an even and odd extract operations for each needed permute. */
702 inside_cost = ncopies * exact_log2(group_size) * group_size
703 * TARG_VEC_STMT_COST;
704
705 if (vect_print_dump_info (REPORT_COST))
706 fprintf (vect_dump, "vect_model_load_cost: strided group_size = %d .",
707 group_size);
708
709 }
710
711 /* The loads themselves. */
712 switch (alignment_support_cheme)
713 {
714 case dr_aligned:
715 {
716 inside_cost += ncopies * TARG_VEC_LOAD_COST;
717
718 if (vect_print_dump_info (REPORT_COST))
719 fprintf (vect_dump, "vect_model_load_cost: aligned.");
720
721 break;
722 }
723 case dr_unaligned_supported:
724 {
725 /* Here, we assign an additional cost for the unaligned load. */
726 inside_cost += ncopies * TARG_VEC_UNALIGNED_LOAD_COST;
727
728 if (vect_print_dump_info (REPORT_COST))
729 fprintf (vect_dump, "vect_model_load_cost: unaligned supported by "
730 "hardware.");
731
732 break;
733 }
734 case dr_explicit_realign:
735 {
736 inside_cost += ncopies * (2*TARG_VEC_LOAD_COST + TARG_VEC_STMT_COST);
737
738 /* FIXME: If the misalignment remains fixed across the iterations of
739 the containing loop, the following cost should be added to the
740 outside costs. */
741 if (targetm.vectorize.builtin_mask_for_load)
742 inside_cost += TARG_VEC_STMT_COST;
743
744 break;
745 }
746 case dr_explicit_realign_optimized:
747 {
748 if (vect_print_dump_info (REPORT_COST))
749 fprintf (vect_dump, "vect_model_load_cost: unaligned software "
750 "pipelined.");
751
752 /* Unaligned software pipeline has a load of an address, an initial
753 load, and possibly a mask operation to "prime" the loop. However,
754 if this is an access in a group of loads, which provide strided
755 access, then the above cost should only be considered for one
756 access in the group. Inside the loop, there is a load op
757 and a realignment op. */
758
759 if ((!DR_GROUP_FIRST_DR (stmt_info)) || group_size > 1 || slp_node)
760 {
761 outside_cost = 2*TARG_VEC_STMT_COST;
762 if (targetm.vectorize.builtin_mask_for_load)
763 outside_cost += TARG_VEC_STMT_COST;
764 }
765
766 inside_cost += ncopies * (TARG_VEC_LOAD_COST + TARG_VEC_STMT_COST);
767
768 break;
769 }
770
771 default:
772 gcc_unreachable ();
773 }
774
775 if (vect_print_dump_info (REPORT_COST))
776 fprintf (vect_dump, "vect_model_load_cost: inside_cost = %d, "
777 "outside_cost = %d .", inside_cost, outside_cost);
778
779 /* Set the costs either in STMT_INFO or SLP_NODE (if exists). */
780 stmt_vinfo_set_inside_of_loop_cost (stmt_info, slp_node, inside_cost);
781 stmt_vinfo_set_outside_of_loop_cost (stmt_info, slp_node, outside_cost);
782 }
783
784
785 /* Function vect_init_vector.
786
787 Insert a new stmt (INIT_STMT) that initializes a new vector variable with
788 the vector elements of VECTOR_VAR. Place the initialization at BSI if it
789 is not NULL. Otherwise, place the initialization at the loop preheader.
790 Return the DEF of INIT_STMT.
791 It will be used in the vectorization of STMT. */
792
793 tree
794 vect_init_vector (gimple stmt, tree vector_var, tree vector_type,
795 gimple_stmt_iterator *gsi)
796 {
797 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
798 tree new_var;
799 gimple init_stmt;
800 tree vec_oprnd;
801 edge pe;
802 tree new_temp;
803 basic_block new_bb;
804
805 new_var = vect_get_new_vect_var (vector_type, vect_simple_var, "cst_");
806 add_referenced_var (new_var);
807 init_stmt = gimple_build_assign (new_var, vector_var);
808 new_temp = make_ssa_name (new_var, init_stmt);
809 gimple_assign_set_lhs (init_stmt, new_temp);
810
811 if (gsi)
812 vect_finish_stmt_generation (stmt, init_stmt, gsi);
813 else
814 {
815 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
816 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
817
818 if (nested_in_vect_loop_p (loop, stmt))
819 loop = loop->inner;
820 pe = loop_preheader_edge (loop);
821 new_bb = gsi_insert_on_edge_immediate (pe, init_stmt);
822 gcc_assert (!new_bb);
823 }
824
825 if (vect_print_dump_info (REPORT_DETAILS))
826 {
827 fprintf (vect_dump, "created new init_stmt: ");
828 print_gimple_stmt (vect_dump, init_stmt, 0, TDF_SLIM);
829 }
830
831 vec_oprnd = gimple_assign_lhs (init_stmt);
832 return vec_oprnd;
833 }
834
835 /* Function vect_get_vec_def_for_operand.
836
837 OP is an operand in STMT. This function returns a (vector) def that will be
838 used in the vectorized stmt for STMT.
839
840 In the case that OP is an SSA_NAME which is defined in the loop, then
841 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
842
843 In case OP is an invariant or constant, a new stmt that creates a vector def
844 needs to be introduced. */
845
846 tree
847 vect_get_vec_def_for_operand (tree op, gimple stmt, tree *scalar_def)
848 {
849 tree vec_oprnd;
850 gimple vec_stmt;
851 gimple def_stmt;
852 stmt_vec_info def_stmt_info = NULL;
853 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
854 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
855 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
856 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
857 tree vec_inv;
858 tree vec_cst;
859 tree t = NULL_TREE;
860 tree def;
861 int i;
862 enum vect_def_type dt;
863 bool is_simple_use;
864 tree vector_type;
865
866 if (vect_print_dump_info (REPORT_DETAILS))
867 {
868 fprintf (vect_dump, "vect_get_vec_def_for_operand: ");
869 print_generic_expr (vect_dump, op, TDF_SLIM);
870 }
871
872 is_simple_use = vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt);
873 gcc_assert (is_simple_use);
874 if (vect_print_dump_info (REPORT_DETAILS))
875 {
876 if (def)
877 {
878 fprintf (vect_dump, "def = ");
879 print_generic_expr (vect_dump, def, TDF_SLIM);
880 }
881 if (def_stmt)
882 {
883 fprintf (vect_dump, " def_stmt = ");
884 print_gimple_stmt (vect_dump, def_stmt, 0, TDF_SLIM);
885 }
886 }
887
888 switch (dt)
889 {
890 /* Case 1: operand is a constant. */
891 case vect_constant_def:
892 {
893 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
894 gcc_assert (vector_type);
895
896 if (scalar_def)
897 *scalar_def = op;
898
899 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
900 if (vect_print_dump_info (REPORT_DETAILS))
901 fprintf (vect_dump, "Create vector_cst. nunits = %d", nunits);
902
903 for (i = nunits - 1; i >= 0; --i)
904 {
905 t = tree_cons (NULL_TREE, op, t);
906 }
907 vec_cst = build_vector (vector_type, t);
908 return vect_init_vector (stmt, vec_cst, vector_type, NULL);
909 }
910
911 /* Case 2: operand is defined outside the loop - loop invariant. */
912 case vect_external_def:
913 {
914 vector_type = get_vectype_for_scalar_type (TREE_TYPE (def));
915 gcc_assert (vector_type);
916 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
917
918 if (scalar_def)
919 *scalar_def = def;
920
921 /* Create 'vec_inv = {inv,inv,..,inv}' */
922 if (vect_print_dump_info (REPORT_DETAILS))
923 fprintf (vect_dump, "Create vector_inv.");
924
925 for (i = nunits - 1; i >= 0; --i)
926 {
927 t = tree_cons (NULL_TREE, def, t);
928 }
929
930 /* FIXME: use build_constructor directly. */
931 vec_inv = build_constructor_from_list (vector_type, t);
932 return vect_init_vector (stmt, vec_inv, vector_type, NULL);
933 }
934
935 /* Case 3: operand is defined inside the loop. */
936 case vect_internal_def:
937 {
938 if (scalar_def)
939 *scalar_def = NULL/* FIXME tuples: def_stmt*/;
940
941 /* Get the def from the vectorized stmt. */
942 def_stmt_info = vinfo_for_stmt (def_stmt);
943 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
944 gcc_assert (vec_stmt);
945 if (gimple_code (vec_stmt) == GIMPLE_PHI)
946 vec_oprnd = PHI_RESULT (vec_stmt);
947 else if (is_gimple_call (vec_stmt))
948 vec_oprnd = gimple_call_lhs (vec_stmt);
949 else
950 vec_oprnd = gimple_assign_lhs (vec_stmt);
951 return vec_oprnd;
952 }
953
954 /* Case 4: operand is defined by a loop header phi - reduction */
955 case vect_reduction_def:
956 {
957 struct loop *loop;
958
959 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
960 loop = (gimple_bb (def_stmt))->loop_father;
961
962 /* Get the def before the loop */
963 op = PHI_ARG_DEF_FROM_EDGE (def_stmt, loop_preheader_edge (loop));
964 return get_initial_def_for_reduction (stmt, op, scalar_def);
965 }
966
967 /* Case 5: operand is defined by loop-header phi - induction. */
968 case vect_induction_def:
969 {
970 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
971
972 /* Get the def from the vectorized stmt. */
973 def_stmt_info = vinfo_for_stmt (def_stmt);
974 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
975 gcc_assert (vec_stmt && gimple_code (vec_stmt) == GIMPLE_PHI);
976 vec_oprnd = PHI_RESULT (vec_stmt);
977 return vec_oprnd;
978 }
979
980 default:
981 gcc_unreachable ();
982 }
983 }
984
985
986 /* Function vect_get_vec_def_for_stmt_copy
987
988 Return a vector-def for an operand. This function is used when the
989 vectorized stmt to be created (by the caller to this function) is a "copy"
990 created in case the vectorized result cannot fit in one vector, and several
991 copies of the vector-stmt are required. In this case the vector-def is
992 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
993 of the stmt that defines VEC_OPRND.
994 DT is the type of the vector def VEC_OPRND.
995
996 Context:
997 In case the vectorization factor (VF) is bigger than the number
998 of elements that can fit in a vectype (nunits), we have to generate
999 more than one vector stmt to vectorize the scalar stmt. This situation
1000 arises when there are multiple data-types operated upon in the loop; the
1001 smallest data-type determines the VF, and as a result, when vectorizing
1002 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1003 vector stmt (each computing a vector of 'nunits' results, and together
1004 computing 'VF' results in each iteration). This function is called when
1005 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1006 which VF=16 and nunits=4, so the number of copies required is 4):
1007
1008 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1009
1010 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1011 VS1.1: vx.1 = memref1 VS1.2
1012 VS1.2: vx.2 = memref2 VS1.3
1013 VS1.3: vx.3 = memref3
1014
1015 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1016 VSnew.1: vz1 = vx.1 + ... VSnew.2
1017 VSnew.2: vz2 = vx.2 + ... VSnew.3
1018 VSnew.3: vz3 = vx.3 + ...
1019
1020 The vectorization of S1 is explained in vectorizable_load.
1021 The vectorization of S2:
1022 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1023 the function 'vect_get_vec_def_for_operand' is called to
1024 get the relevant vector-def for each operand of S2. For operand x it
1025 returns the vector-def 'vx.0'.
1026
1027 To create the remaining copies of the vector-stmt (VSnew.j), this
1028 function is called to get the relevant vector-def for each operand. It is
1029 obtained from the respective VS1.j stmt, which is recorded in the
1030 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1031
1032 For example, to obtain the vector-def 'vx.1' in order to create the
1033 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1034 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1035 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1036 and return its def ('vx.1').
1037 Overall, to create the above sequence this function will be called 3 times:
1038 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1039 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1040 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1041
1042 tree
1043 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
1044 {
1045 gimple vec_stmt_for_operand;
1046 stmt_vec_info def_stmt_info;
1047
1048 /* Do nothing; can reuse same def. */
1049 if (dt == vect_external_def || dt == vect_constant_def )
1050 return vec_oprnd;
1051
1052 vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
1053 def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
1054 gcc_assert (def_stmt_info);
1055 vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
1056 gcc_assert (vec_stmt_for_operand);
1057 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1058 if (gimple_code (vec_stmt_for_operand) == GIMPLE_PHI)
1059 vec_oprnd = PHI_RESULT (vec_stmt_for_operand);
1060 else
1061 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1062 return vec_oprnd;
1063 }
1064
1065
1066 /* Get vectorized definitions for the operands to create a copy of an original
1067 stmt. See vect_get_vec_def_for_stmt_copy() for details. */
1068
1069 static void
1070 vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
1071 VEC(tree,heap) **vec_oprnds0,
1072 VEC(tree,heap) **vec_oprnds1)
1073 {
1074 tree vec_oprnd = VEC_pop (tree, *vec_oprnds0);
1075
1076 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd);
1077 VEC_quick_push (tree, *vec_oprnds0, vec_oprnd);
1078
1079 if (vec_oprnds1 && *vec_oprnds1)
1080 {
1081 vec_oprnd = VEC_pop (tree, *vec_oprnds1);
1082 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd);
1083 VEC_quick_push (tree, *vec_oprnds1, vec_oprnd);
1084 }
1085 }
1086
1087
1088 /* Get vectorized definitions for OP0 and OP1, or SLP_NODE if it is not NULL. */
1089
1090 static void
1091 vect_get_vec_defs (tree op0, tree op1, gimple stmt,
1092 VEC(tree,heap) **vec_oprnds0, VEC(tree,heap) **vec_oprnds1,
1093 slp_tree slp_node)
1094 {
1095 if (slp_node)
1096 vect_get_slp_defs (slp_node, vec_oprnds0, vec_oprnds1);
1097 else
1098 {
1099 tree vec_oprnd;
1100
1101 *vec_oprnds0 = VEC_alloc (tree, heap, 1);
1102 vec_oprnd = vect_get_vec_def_for_operand (op0, stmt, NULL);
1103 VEC_quick_push (tree, *vec_oprnds0, vec_oprnd);
1104
1105 if (op1)
1106 {
1107 *vec_oprnds1 = VEC_alloc (tree, heap, 1);
1108 vec_oprnd = vect_get_vec_def_for_operand (op1, stmt, NULL);
1109 VEC_quick_push (tree, *vec_oprnds1, vec_oprnd);
1110 }
1111 }
1112 }
1113
1114
1115 /* Function vect_finish_stmt_generation.
1116
1117 Insert a new stmt. */
1118
1119 void
1120 vect_finish_stmt_generation (gimple stmt, gimple vec_stmt,
1121 gimple_stmt_iterator *gsi)
1122 {
1123 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1124 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1125
1126 gcc_assert (gimple_code (stmt) != GIMPLE_LABEL);
1127
1128 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1129
1130 set_vinfo_for_stmt (vec_stmt, new_stmt_vec_info (vec_stmt, loop_vinfo));
1131
1132 if (vect_print_dump_info (REPORT_DETAILS))
1133 {
1134 fprintf (vect_dump, "add new stmt: ");
1135 print_gimple_stmt (vect_dump, vec_stmt, 0, TDF_SLIM);
1136 }
1137
1138 gimple_set_location (vec_stmt, gimple_location (gsi_stmt (*gsi)));
1139 }
1140
1141 /* Checks if CALL can be vectorized in type VECTYPE. Returns
1142 a function declaration if the target has a vectorized version
1143 of the function, or NULL_TREE if the function cannot be vectorized. */
1144
1145 tree
1146 vectorizable_function (gimple call, tree vectype_out, tree vectype_in)
1147 {
1148 tree fndecl = gimple_call_fndecl (call);
1149 enum built_in_function code;
1150
1151 /* We only handle functions that do not read or clobber memory -- i.e.
1152 const or novops ones. */
1153 if (!(gimple_call_flags (call) & (ECF_CONST | ECF_NOVOPS)))
1154 return NULL_TREE;
1155
1156 if (!fndecl
1157 || TREE_CODE (fndecl) != FUNCTION_DECL
1158 || !DECL_BUILT_IN (fndecl))
1159 return NULL_TREE;
1160
1161 code = DECL_FUNCTION_CODE (fndecl);
1162 return targetm.vectorize.builtin_vectorized_function (code, vectype_out,
1163 vectype_in);
1164 }
1165
1166 /* Function vectorizable_call.
1167
1168 Check if STMT performs a function call that can be vectorized.
1169 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1170 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1171 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1172
1173 static bool
1174 vectorizable_call (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt)
1175 {
1176 tree vec_dest;
1177 tree scalar_dest;
1178 tree op, type;
1179 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
1180 stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
1181 tree vectype_out, vectype_in;
1182 int nunits_in;
1183 int nunits_out;
1184 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1185 tree fndecl, new_temp, def, rhs_type, lhs_type;
1186 gimple def_stmt;
1187 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
1188 gimple new_stmt;
1189 int ncopies, j;
1190 VEC(tree, heap) *vargs = NULL;
1191 enum { NARROW, NONE, WIDEN } modifier;
1192 size_t i, nargs;
1193
1194 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1195 return false;
1196
1197 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
1198 return false;
1199
1200 /* FORNOW: SLP not supported. */
1201 if (STMT_SLP_TYPE (stmt_info))
1202 return false;
1203
1204 /* Is STMT a vectorizable call? */
1205 if (!is_gimple_call (stmt))
1206 return false;
1207
1208 if (TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
1209 return false;
1210
1211 /* Process function arguments. */
1212 rhs_type = NULL_TREE;
1213 nargs = gimple_call_num_args (stmt);
1214
1215 /* Bail out if the function has more than two arguments, we
1216 do not have interesting builtin functions to vectorize with
1217 more than two arguments. No arguments is also not good. */
1218 if (nargs == 0 || nargs > 2)
1219 return false;
1220
1221 for (i = 0; i < nargs; i++)
1222 {
1223 op = gimple_call_arg (stmt, i);
1224
1225 /* We can only handle calls with arguments of the same type. */
1226 if (rhs_type
1227 && rhs_type != TREE_TYPE (op))
1228 {
1229 if (vect_print_dump_info (REPORT_DETAILS))
1230 fprintf (vect_dump, "argument types differ.");
1231 return false;
1232 }
1233 rhs_type = TREE_TYPE (op);
1234
1235 if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt[i]))
1236 {
1237 if (vect_print_dump_info (REPORT_DETAILS))
1238 fprintf (vect_dump, "use not simple.");
1239 return false;
1240 }
1241 }
1242
1243 vectype_in = get_vectype_for_scalar_type (rhs_type);
1244 if (!vectype_in)
1245 return false;
1246 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
1247
1248 lhs_type = TREE_TYPE (gimple_call_lhs (stmt));
1249 vectype_out = get_vectype_for_scalar_type (lhs_type);
1250 if (!vectype_out)
1251 return false;
1252 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
1253
1254 /* FORNOW */
1255 if (nunits_in == nunits_out / 2)
1256 modifier = NARROW;
1257 else if (nunits_out == nunits_in)
1258 modifier = NONE;
1259 else if (nunits_out == nunits_in / 2)
1260 modifier = WIDEN;
1261 else
1262 return false;
1263
1264 /* For now, we only vectorize functions if a target specific builtin
1265 is available. TODO -- in some cases, it might be profitable to
1266 insert the calls for pieces of the vector, in order to be able
1267 to vectorize other operations in the loop. */
1268 fndecl = vectorizable_function (stmt, vectype_out, vectype_in);
1269 if (fndecl == NULL_TREE)
1270 {
1271 if (vect_print_dump_info (REPORT_DETAILS))
1272 fprintf (vect_dump, "function is not vectorizable.");
1273
1274 return false;
1275 }
1276
1277 gcc_assert (!gimple_vuse (stmt));
1278
1279 if (modifier == NARROW)
1280 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
1281 else
1282 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
1283
1284 /* Sanity check: make sure that at least one copy of the vectorized stmt
1285 needs to be generated. */
1286 gcc_assert (ncopies >= 1);
1287
1288 if (!vec_stmt) /* transformation not required. */
1289 {
1290 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
1291 if (vect_print_dump_info (REPORT_DETAILS))
1292 fprintf (vect_dump, "=== vectorizable_call ===");
1293 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
1294 return true;
1295 }
1296
1297 /** Transform. **/
1298
1299 if (vect_print_dump_info (REPORT_DETAILS))
1300 fprintf (vect_dump, "transform operation.");
1301
1302 /* Handle def. */
1303 scalar_dest = gimple_call_lhs (stmt);
1304 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
1305
1306 prev_stmt_info = NULL;
1307 switch (modifier)
1308 {
1309 case NONE:
1310 for (j = 0; j < ncopies; ++j)
1311 {
1312 /* Build argument list for the vectorized call. */
1313 if (j == 0)
1314 vargs = VEC_alloc (tree, heap, nargs);
1315 else
1316 VEC_truncate (tree, vargs, 0);
1317
1318 for (i = 0; i < nargs; i++)
1319 {
1320 op = gimple_call_arg (stmt, i);
1321 if (j == 0)
1322 vec_oprnd0
1323 = vect_get_vec_def_for_operand (op, stmt, NULL);
1324 else
1325 vec_oprnd0
1326 = vect_get_vec_def_for_stmt_copy (dt[nargs], vec_oprnd0);
1327
1328 VEC_quick_push (tree, vargs, vec_oprnd0);
1329 }
1330
1331 new_stmt = gimple_build_call_vec (fndecl, vargs);
1332 new_temp = make_ssa_name (vec_dest, new_stmt);
1333 gimple_call_set_lhs (new_stmt, new_temp);
1334
1335 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1336
1337 if (j == 0)
1338 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
1339 else
1340 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1341
1342 prev_stmt_info = vinfo_for_stmt (new_stmt);
1343 }
1344
1345 break;
1346
1347 case NARROW:
1348 for (j = 0; j < ncopies; ++j)
1349 {
1350 /* Build argument list for the vectorized call. */
1351 if (j == 0)
1352 vargs = VEC_alloc (tree, heap, nargs * 2);
1353 else
1354 VEC_truncate (tree, vargs, 0);
1355
1356 for (i = 0; i < nargs; i++)
1357 {
1358 op = gimple_call_arg (stmt, i);
1359 if (j == 0)
1360 {
1361 vec_oprnd0
1362 = vect_get_vec_def_for_operand (op, stmt, NULL);
1363 vec_oprnd1
1364 = vect_get_vec_def_for_stmt_copy (dt[nargs], vec_oprnd0);
1365 }
1366 else
1367 {
1368 vec_oprnd0
1369 = vect_get_vec_def_for_stmt_copy (dt[nargs], vec_oprnd1);
1370 vec_oprnd1
1371 = vect_get_vec_def_for_stmt_copy (dt[nargs], vec_oprnd0);
1372 }
1373
1374 VEC_quick_push (tree, vargs, vec_oprnd0);
1375 VEC_quick_push (tree, vargs, vec_oprnd1);
1376 }
1377
1378 new_stmt = gimple_build_call_vec (fndecl, vargs);
1379 new_temp = make_ssa_name (vec_dest, new_stmt);
1380 gimple_call_set_lhs (new_stmt, new_temp);
1381
1382 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1383
1384 if (j == 0)
1385 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
1386 else
1387 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1388
1389 prev_stmt_info = vinfo_for_stmt (new_stmt);
1390 }
1391
1392 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
1393
1394 break;
1395
1396 case WIDEN:
1397 /* No current target implements this case. */
1398 return false;
1399 }
1400
1401 VEC_free (tree, heap, vargs);
1402
1403 /* Update the exception handling table with the vector stmt if necessary. */
1404 if (maybe_clean_or_replace_eh_stmt (stmt, *vec_stmt))
1405 gimple_purge_dead_eh_edges (gimple_bb (stmt));
1406
1407 /* The call in STMT might prevent it from being removed in dce.
1408 We however cannot remove it here, due to the way the ssa name
1409 it defines is mapped to the new definition. So just replace
1410 rhs of the statement with something harmless. */
1411
1412 type = TREE_TYPE (scalar_dest);
1413 new_stmt = gimple_build_assign (gimple_call_lhs (stmt),
1414 fold_convert (type, integer_zero_node));
1415 set_vinfo_for_stmt (new_stmt, stmt_info);
1416 set_vinfo_for_stmt (stmt, NULL);
1417 STMT_VINFO_STMT (stmt_info) = new_stmt;
1418 gsi_replace (gsi, new_stmt, false);
1419 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt;
1420
1421 return true;
1422 }
1423
1424
1425 /* Function vect_gen_widened_results_half
1426
1427 Create a vector stmt whose code, type, number of arguments, and result
1428 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
1429 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
1430 In the case that CODE is a CALL_EXPR, this means that a call to DECL
1431 needs to be created (DECL is a function-decl of a target-builtin).
1432 STMT is the original scalar stmt that we are vectorizing. */
1433
1434 static gimple
1435 vect_gen_widened_results_half (enum tree_code code,
1436 tree decl,
1437 tree vec_oprnd0, tree vec_oprnd1, int op_type,
1438 tree vec_dest, gimple_stmt_iterator *gsi,
1439 gimple stmt)
1440 {
1441 gimple new_stmt;
1442 tree new_temp;
1443
1444 /* Generate half of the widened result: */
1445 if (code == CALL_EXPR)
1446 {
1447 /* Target specific support */
1448 if (op_type == binary_op)
1449 new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1);
1450 else
1451 new_stmt = gimple_build_call (decl, 1, vec_oprnd0);
1452 new_temp = make_ssa_name (vec_dest, new_stmt);
1453 gimple_call_set_lhs (new_stmt, new_temp);
1454 }
1455 else
1456 {
1457 /* Generic support */
1458 gcc_assert (op_type == TREE_CODE_LENGTH (code));
1459 if (op_type != binary_op)
1460 vec_oprnd1 = NULL;
1461 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vec_oprnd0,
1462 vec_oprnd1);
1463 new_temp = make_ssa_name (vec_dest, new_stmt);
1464 gimple_assign_set_lhs (new_stmt, new_temp);
1465 }
1466 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1467
1468 return new_stmt;
1469 }
1470
1471
1472 /* Check if STMT performs a conversion operation, that can be vectorized.
1473 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1474 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1475 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1476
1477 static bool
1478 vectorizable_conversion (gimple stmt, gimple_stmt_iterator *gsi,
1479 gimple *vec_stmt, slp_tree slp_node)
1480 {
1481 tree vec_dest;
1482 tree scalar_dest;
1483 tree op0;
1484 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
1485 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1486 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1487 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
1488 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
1489 tree new_temp;
1490 tree def;
1491 gimple def_stmt;
1492 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
1493 gimple new_stmt = NULL;
1494 stmt_vec_info prev_stmt_info;
1495 int nunits_in;
1496 int nunits_out;
1497 tree vectype_out, vectype_in;
1498 int ncopies, j;
1499 tree expr;
1500 tree rhs_type, lhs_type;
1501 tree builtin_decl;
1502 enum { NARROW, NONE, WIDEN } modifier;
1503 int i;
1504 VEC(tree,heap) *vec_oprnds0 = NULL;
1505 tree vop0;
1506 tree integral_type;
1507 VEC(tree,heap) *dummy = NULL;
1508 int dummy_int;
1509
1510 /* Is STMT a vectorizable conversion? */
1511
1512 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1513 return false;
1514
1515 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
1516 return false;
1517
1518 if (!is_gimple_assign (stmt))
1519 return false;
1520
1521 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
1522 return false;
1523
1524 code = gimple_assign_rhs_code (stmt);
1525 if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
1526 return false;
1527
1528 /* Check types of lhs and rhs. */
1529 op0 = gimple_assign_rhs1 (stmt);
1530 rhs_type = TREE_TYPE (op0);
1531 vectype_in = get_vectype_for_scalar_type (rhs_type);
1532 if (!vectype_in)
1533 return false;
1534 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
1535
1536 scalar_dest = gimple_assign_lhs (stmt);
1537 lhs_type = TREE_TYPE (scalar_dest);
1538 vectype_out = get_vectype_for_scalar_type (lhs_type);
1539 if (!vectype_out)
1540 return false;
1541 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
1542
1543 /* FORNOW */
1544 if (nunits_in == nunits_out / 2)
1545 modifier = NARROW;
1546 else if (nunits_out == nunits_in)
1547 modifier = NONE;
1548 else if (nunits_out == nunits_in / 2)
1549 modifier = WIDEN;
1550 else
1551 return false;
1552
1553 if (modifier == NONE)
1554 gcc_assert (STMT_VINFO_VECTYPE (stmt_info) == vectype_out);
1555
1556 /* Bail out if the types are both integral or non-integral. */
1557 if ((INTEGRAL_TYPE_P (rhs_type) && INTEGRAL_TYPE_P (lhs_type))
1558 || (!INTEGRAL_TYPE_P (rhs_type) && !INTEGRAL_TYPE_P (lhs_type)))
1559 return false;
1560
1561 integral_type = INTEGRAL_TYPE_P (rhs_type) ? vectype_in : vectype_out;
1562
1563 if (modifier == NARROW)
1564 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
1565 else
1566 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
1567
1568 /* FORNOW: SLP with multiple types is not supported. The SLP analysis verifies
1569 this, so we can safely override NCOPIES with 1 here. */
1570 if (slp_node)
1571 ncopies = 1;
1572
1573 /* Sanity check: make sure that at least one copy of the vectorized stmt
1574 needs to be generated. */
1575 gcc_assert (ncopies >= 1);
1576
1577 /* Check the operands of the operation. */
1578 if (!vect_is_simple_use (op0, loop_vinfo, &def_stmt, &def, &dt[0]))
1579 {
1580 if (vect_print_dump_info (REPORT_DETAILS))
1581 fprintf (vect_dump, "use not simple.");
1582 return false;
1583 }
1584
1585 /* Supportable by target? */
1586 if ((modifier == NONE
1587 && !targetm.vectorize.builtin_conversion (code, integral_type))
1588 || (modifier == WIDEN
1589 && !supportable_widening_operation (code, stmt, vectype_in,
1590 &decl1, &decl2,
1591 &code1, &code2,
1592 &dummy_int, &dummy))
1593 || (modifier == NARROW
1594 && !supportable_narrowing_operation (code, stmt, vectype_in,
1595 &code1, &dummy_int, &dummy)))
1596 {
1597 if (vect_print_dump_info (REPORT_DETAILS))
1598 fprintf (vect_dump, "conversion not supported by target.");
1599 return false;
1600 }
1601
1602 if (modifier != NONE)
1603 {
1604 STMT_VINFO_VECTYPE (stmt_info) = vectype_in;
1605 /* FORNOW: SLP not supported. */
1606 if (STMT_SLP_TYPE (stmt_info))
1607 return false;
1608 }
1609
1610 if (!vec_stmt) /* transformation not required. */
1611 {
1612 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
1613 return true;
1614 }
1615
1616 /** Transform. **/
1617 if (vect_print_dump_info (REPORT_DETAILS))
1618 fprintf (vect_dump, "transform conversion.");
1619
1620 /* Handle def. */
1621 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
1622
1623 if (modifier == NONE && !slp_node)
1624 vec_oprnds0 = VEC_alloc (tree, heap, 1);
1625
1626 prev_stmt_info = NULL;
1627 switch (modifier)
1628 {
1629 case NONE:
1630 for (j = 0; j < ncopies; j++)
1631 {
1632 if (j == 0)
1633 vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node);
1634 else
1635 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL);
1636
1637 builtin_decl =
1638 targetm.vectorize.builtin_conversion (code, integral_type);
1639 for (i = 0; VEC_iterate (tree, vec_oprnds0, i, vop0); i++)
1640 {
1641 /* Arguments are ready. create the new vector stmt. */
1642 new_stmt = gimple_build_call (builtin_decl, 1, vop0);
1643 new_temp = make_ssa_name (vec_dest, new_stmt);
1644 gimple_call_set_lhs (new_stmt, new_temp);
1645 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1646 if (slp_node)
1647 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
1648 }
1649
1650 if (j == 0)
1651 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
1652 else
1653 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1654 prev_stmt_info = vinfo_for_stmt (new_stmt);
1655 }
1656 break;
1657
1658 case WIDEN:
1659 /* In case the vectorization factor (VF) is bigger than the number
1660 of elements that we can fit in a vectype (nunits), we have to
1661 generate more than one vector stmt - i.e - we need to "unroll"
1662 the vector stmt by a factor VF/nunits. */
1663 for (j = 0; j < ncopies; j++)
1664 {
1665 if (j == 0)
1666 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
1667 else
1668 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
1669
1670 STMT_VINFO_VECTYPE (stmt_info) = vectype_in;
1671
1672 /* Generate first half of the widened result: */
1673 new_stmt
1674 = vect_gen_widened_results_half (code1, decl1,
1675 vec_oprnd0, vec_oprnd1,
1676 unary_op, vec_dest, gsi, stmt);
1677 if (j == 0)
1678 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
1679 else
1680 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1681 prev_stmt_info = vinfo_for_stmt (new_stmt);
1682
1683 /* Generate second half of the widened result: */
1684 new_stmt
1685 = vect_gen_widened_results_half (code2, decl2,
1686 vec_oprnd0, vec_oprnd1,
1687 unary_op, vec_dest, gsi, stmt);
1688 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1689 prev_stmt_info = vinfo_for_stmt (new_stmt);
1690 }
1691 break;
1692
1693 case NARROW:
1694 /* In case the vectorization factor (VF) is bigger than the number
1695 of elements that we can fit in a vectype (nunits), we have to
1696 generate more than one vector stmt - i.e - we need to "unroll"
1697 the vector stmt by a factor VF/nunits. */
1698 for (j = 0; j < ncopies; j++)
1699 {
1700 /* Handle uses. */
1701 if (j == 0)
1702 {
1703 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
1704 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
1705 }
1706 else
1707 {
1708 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd1);
1709 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
1710 }
1711
1712 /* Arguments are ready. Create the new vector stmt. */
1713 expr = build2 (code1, vectype_out, vec_oprnd0, vec_oprnd1);
1714 new_stmt = gimple_build_assign_with_ops (code1, vec_dest, vec_oprnd0,
1715 vec_oprnd1);
1716 new_temp = make_ssa_name (vec_dest, new_stmt);
1717 gimple_assign_set_lhs (new_stmt, new_temp);
1718 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1719
1720 if (j == 0)
1721 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
1722 else
1723 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1724
1725 prev_stmt_info = vinfo_for_stmt (new_stmt);
1726 }
1727
1728 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
1729 }
1730
1731 if (vec_oprnds0)
1732 VEC_free (tree, heap, vec_oprnds0);
1733
1734 return true;
1735 }
1736 /* Function vectorizable_assignment.
1737
1738 Check if STMT performs an assignment (copy) that can be vectorized.
1739 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1740 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1741 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1742
1743 static bool
1744 vectorizable_assignment (gimple stmt, gimple_stmt_iterator *gsi,
1745 gimple *vec_stmt, slp_tree slp_node)
1746 {
1747 tree vec_dest;
1748 tree scalar_dest;
1749 tree op;
1750 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1751 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1752 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1753 tree new_temp;
1754 tree def;
1755 gimple def_stmt;
1756 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
1757 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
1758 int ncopies;
1759 int i;
1760 VEC(tree,heap) *vec_oprnds = NULL;
1761 tree vop;
1762
1763 /* Multiple types in SLP are handled by creating the appropriate number of
1764 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
1765 case of SLP. */
1766 if (slp_node)
1767 ncopies = 1;
1768 else
1769 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
1770
1771 gcc_assert (ncopies >= 1);
1772 if (ncopies > 1)
1773 return false; /* FORNOW */
1774
1775 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1776 return false;
1777
1778 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
1779 return false;
1780
1781 /* Is vectorizable assignment? */
1782 if (!is_gimple_assign (stmt))
1783 return false;
1784
1785 scalar_dest = gimple_assign_lhs (stmt);
1786 if (TREE_CODE (scalar_dest) != SSA_NAME)
1787 return false;
1788
1789 if (gimple_assign_single_p (stmt)
1790 || gimple_assign_rhs_code (stmt) == PAREN_EXPR)
1791 op = gimple_assign_rhs1 (stmt);
1792 else
1793 return false;
1794
1795 if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt[0]))
1796 {
1797 if (vect_print_dump_info (REPORT_DETAILS))
1798 fprintf (vect_dump, "use not simple.");
1799 return false;
1800 }
1801
1802 if (!vec_stmt) /* transformation not required. */
1803 {
1804 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
1805 if (vect_print_dump_info (REPORT_DETAILS))
1806 fprintf (vect_dump, "=== vectorizable_assignment ===");
1807 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
1808 return true;
1809 }
1810
1811 /** Transform. **/
1812 if (vect_print_dump_info (REPORT_DETAILS))
1813 fprintf (vect_dump, "transform assignment.");
1814
1815 /* Handle def. */
1816 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1817
1818 /* Handle use. */
1819 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node);
1820
1821 /* Arguments are ready. create the new vector stmt. */
1822 for (i = 0; VEC_iterate (tree, vec_oprnds, i, vop); i++)
1823 {
1824 *vec_stmt = gimple_build_assign (vec_dest, vop);
1825 new_temp = make_ssa_name (vec_dest, *vec_stmt);
1826 gimple_assign_set_lhs (*vec_stmt, new_temp);
1827 vect_finish_stmt_generation (stmt, *vec_stmt, gsi);
1828 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt;
1829
1830 if (slp_node)
1831 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), *vec_stmt);
1832 }
1833
1834 VEC_free (tree, heap, vec_oprnds);
1835 return true;
1836 }
1837
1838 /* Function vectorizable_operation.
1839
1840 Check if STMT performs a binary or unary operation that can be vectorized.
1841 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1842 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1843 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1844
1845 static bool
1846 vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
1847 gimple *vec_stmt, slp_tree slp_node)
1848 {
1849 tree vec_dest;
1850 tree scalar_dest;
1851 tree op0, op1 = NULL;
1852 tree vec_oprnd1 = NULL_TREE;
1853 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1854 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1855 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1856 enum tree_code code;
1857 enum machine_mode vec_mode;
1858 tree new_temp;
1859 int op_type;
1860 optab optab;
1861 int icode;
1862 enum machine_mode optab_op2_mode;
1863 tree def;
1864 gimple def_stmt;
1865 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
1866 gimple new_stmt = NULL;
1867 stmt_vec_info prev_stmt_info;
1868 int nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
1869 int nunits_out;
1870 tree vectype_out;
1871 int ncopies;
1872 int j, i;
1873 VEC(tree,heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL;
1874 tree vop0, vop1;
1875 unsigned int k;
1876 bool shift_p = false;
1877 bool scalar_shift_arg = false;
1878
1879 /* Multiple types in SLP are handled by creating the appropriate number of
1880 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
1881 case of SLP. */
1882 if (slp_node)
1883 ncopies = 1;
1884 else
1885 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
1886
1887 gcc_assert (ncopies >= 1);
1888
1889 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1890 return false;
1891
1892 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
1893 return false;
1894
1895 /* Is STMT a vectorizable binary/unary operation? */
1896 if (!is_gimple_assign (stmt))
1897 return false;
1898
1899 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
1900 return false;
1901
1902 scalar_dest = gimple_assign_lhs (stmt);
1903 vectype_out = get_vectype_for_scalar_type (TREE_TYPE (scalar_dest));
1904 if (!vectype_out)
1905 return false;
1906 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
1907 if (nunits_out != nunits_in)
1908 return false;
1909
1910 code = gimple_assign_rhs_code (stmt);
1911
1912 /* For pointer addition, we should use the normal plus for
1913 the vector addition. */
1914 if (code == POINTER_PLUS_EXPR)
1915 code = PLUS_EXPR;
1916
1917 /* Support only unary or binary operations. */
1918 op_type = TREE_CODE_LENGTH (code);
1919 if (op_type != unary_op && op_type != binary_op)
1920 {
1921 if (vect_print_dump_info (REPORT_DETAILS))
1922 fprintf (vect_dump, "num. args = %d (not unary/binary op).", op_type);
1923 return false;
1924 }
1925
1926 op0 = gimple_assign_rhs1 (stmt);
1927 if (!vect_is_simple_use (op0, loop_vinfo, &def_stmt, &def, &dt[0]))
1928 {
1929 if (vect_print_dump_info (REPORT_DETAILS))
1930 fprintf (vect_dump, "use not simple.");
1931 return false;
1932 }
1933
1934 if (op_type == binary_op)
1935 {
1936 op1 = gimple_assign_rhs2 (stmt);
1937 if (!vect_is_simple_use (op1, loop_vinfo, &def_stmt, &def, &dt[1]))
1938 {
1939 if (vect_print_dump_info (REPORT_DETAILS))
1940 fprintf (vect_dump, "use not simple.");
1941 return false;
1942 }
1943 }
1944
1945 /* If this is a shift/rotate, determine whether the shift amount is a vector,
1946 or scalar. If the shift/rotate amount is a vector, use the vector/vector
1947 shift optabs. */
1948 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
1949 || code == RROTATE_EXPR)
1950 {
1951 shift_p = true;
1952
1953 /* vector shifted by vector */
1954 if (dt[1] == vect_internal_def)
1955 {
1956 optab = optab_for_tree_code (code, vectype, optab_vector);
1957 if (vect_print_dump_info (REPORT_DETAILS))
1958 fprintf (vect_dump, "vector/vector shift/rotate found.");
1959 }
1960
1961 /* See if the machine has a vector shifted by scalar insn and if not
1962 then see if it has a vector shifted by vector insn */
1963 else if (dt[1] == vect_constant_def || dt[1] == vect_external_def)
1964 {
1965 optab = optab_for_tree_code (code, vectype, optab_scalar);
1966 if (optab
1967 && (optab_handler (optab, TYPE_MODE (vectype))->insn_code
1968 != CODE_FOR_nothing))
1969 {
1970 scalar_shift_arg = true;
1971 if (vect_print_dump_info (REPORT_DETAILS))
1972 fprintf (vect_dump, "vector/scalar shift/rotate found.");
1973 }
1974 else
1975 {
1976 optab = optab_for_tree_code (code, vectype, optab_vector);
1977 if (optab
1978 && (optab_handler (optab, TYPE_MODE (vectype))->insn_code
1979 != CODE_FOR_nothing))
1980 {
1981 if (vect_print_dump_info (REPORT_DETAILS))
1982 fprintf (vect_dump, "vector/vector shift/rotate found.");
1983
1984 /* Unlike the other binary operators, shifts/rotates have
1985 the rhs being int, instead of the same type as the lhs,
1986 so make sure the scalar is the right type if we are
1987 dealing with vectors of short/char. */
1988 if (dt[1] == vect_constant_def)
1989 op1 = fold_convert (TREE_TYPE (vectype), op1);
1990 }
1991 }
1992 }
1993
1994 else
1995 {
1996 if (vect_print_dump_info (REPORT_DETAILS))
1997 fprintf (vect_dump, "operand mode requires invariant argument.");
1998 return false;
1999 }
2000 }
2001 else
2002 optab = optab_for_tree_code (code, vectype, optab_default);
2003
2004 /* Supportable by target? */
2005 if (!optab)
2006 {
2007 if (vect_print_dump_info (REPORT_DETAILS))
2008 fprintf (vect_dump, "no optab.");
2009 return false;
2010 }
2011 vec_mode = TYPE_MODE (vectype);
2012 icode = (int) optab_handler (optab, vec_mode)->insn_code;
2013 if (icode == CODE_FOR_nothing)
2014 {
2015 if (vect_print_dump_info (REPORT_DETAILS))
2016 fprintf (vect_dump, "op not supported by target.");
2017 /* Check only during analysis. */
2018 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
2019 || (LOOP_VINFO_VECT_FACTOR (loop_vinfo)
2020 < vect_min_worthwhile_factor (code)
2021 && !vec_stmt))
2022 return false;
2023 if (vect_print_dump_info (REPORT_DETAILS))
2024 fprintf (vect_dump, "proceeding using word mode.");
2025 }
2026
2027 /* Worthwhile without SIMD support? Check only during analysis. */
2028 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
2029 && LOOP_VINFO_VECT_FACTOR (loop_vinfo)
2030 < vect_min_worthwhile_factor (code)
2031 && !vec_stmt)
2032 {
2033 if (vect_print_dump_info (REPORT_DETAILS))
2034 fprintf (vect_dump, "not worthwhile without SIMD support.");
2035 return false;
2036 }
2037
2038 if (!vec_stmt) /* transformation not required. */
2039 {
2040 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
2041 if (vect_print_dump_info (REPORT_DETAILS))
2042 fprintf (vect_dump, "=== vectorizable_operation ===");
2043 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
2044 return true;
2045 }
2046
2047 /** Transform. **/
2048
2049 if (vect_print_dump_info (REPORT_DETAILS))
2050 fprintf (vect_dump, "transform binary/unary operation.");
2051
2052 /* Handle def. */
2053 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2054
2055 /* Allocate VECs for vector operands. In case of SLP, vector operands are
2056 created in the previous stages of the recursion, so no allocation is
2057 needed, except for the case of shift with scalar shift argument. In that
2058 case we store the scalar operand in VEC_OPRNDS1 for every vector stmt to
2059 be created to vectorize the SLP group, i.e., SLP_NODE->VEC_STMTS_SIZE.
2060 In case of loop-based vectorization we allocate VECs of size 1. We
2061 allocate VEC_OPRNDS1 only in case of binary operation. */
2062 if (!slp_node)
2063 {
2064 vec_oprnds0 = VEC_alloc (tree, heap, 1);
2065 if (op_type == binary_op)
2066 vec_oprnds1 = VEC_alloc (tree, heap, 1);
2067 }
2068 else if (scalar_shift_arg)
2069 vec_oprnds1 = VEC_alloc (tree, heap, slp_node->vec_stmts_size);
2070
2071 /* In case the vectorization factor (VF) is bigger than the number
2072 of elements that we can fit in a vectype (nunits), we have to generate
2073 more than one vector stmt - i.e - we need to "unroll" the
2074 vector stmt by a factor VF/nunits. In doing so, we record a pointer
2075 from one copy of the vector stmt to the next, in the field
2076 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
2077 stages to find the correct vector defs to be used when vectorizing
2078 stmts that use the defs of the current stmt. The example below illustrates
2079 the vectorization process when VF=16 and nunits=4 (i.e - we need to create
2080 4 vectorized stmts):
2081
2082 before vectorization:
2083 RELATED_STMT VEC_STMT
2084 S1: x = memref - -
2085 S2: z = x + 1 - -
2086
2087 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
2088 there):
2089 RELATED_STMT VEC_STMT
2090 VS1_0: vx0 = memref0 VS1_1 -
2091 VS1_1: vx1 = memref1 VS1_2 -
2092 VS1_2: vx2 = memref2 VS1_3 -
2093 VS1_3: vx3 = memref3 - -
2094 S1: x = load - VS1_0
2095 S2: z = x + 1 - -
2096
2097 step2: vectorize stmt S2 (done here):
2098 To vectorize stmt S2 we first need to find the relevant vector
2099 def for the first operand 'x'. This is, as usual, obtained from
2100 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
2101 that defines 'x' (S1). This way we find the stmt VS1_0, and the
2102 relevant vector def 'vx0'. Having found 'vx0' we can generate
2103 the vector stmt VS2_0, and as usual, record it in the
2104 STMT_VINFO_VEC_STMT of stmt S2.
2105 When creating the second copy (VS2_1), we obtain the relevant vector
2106 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
2107 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
2108 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
2109 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
2110 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
2111 chain of stmts and pointers:
2112 RELATED_STMT VEC_STMT
2113 VS1_0: vx0 = memref0 VS1_1 -
2114 VS1_1: vx1 = memref1 VS1_2 -
2115 VS1_2: vx2 = memref2 VS1_3 -
2116 VS1_3: vx3 = memref3 - -
2117 S1: x = load - VS1_0
2118 VS2_0: vz0 = vx0 + v1 VS2_1 -
2119 VS2_1: vz1 = vx1 + v1 VS2_2 -
2120 VS2_2: vz2 = vx2 + v1 VS2_3 -
2121 VS2_3: vz3 = vx3 + v1 - -
2122 S2: z = x + 1 - VS2_0 */
2123
2124 prev_stmt_info = NULL;
2125 for (j = 0; j < ncopies; j++)
2126 {
2127 /* Handle uses. */
2128 if (j == 0)
2129 {
2130 if (op_type == binary_op && scalar_shift_arg)
2131 {
2132 /* Vector shl and shr insn patterns can be defined with scalar
2133 operand 2 (shift operand). In this case, use constant or loop
2134 invariant op1 directly, without extending it to vector mode
2135 first. */
2136 optab_op2_mode = insn_data[icode].operand[2].mode;
2137 if (!VECTOR_MODE_P (optab_op2_mode))
2138 {
2139 if (vect_print_dump_info (REPORT_DETAILS))
2140 fprintf (vect_dump, "operand 1 using scalar mode.");
2141 vec_oprnd1 = op1;
2142 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
2143 if (slp_node)
2144 {
2145 /* Store vec_oprnd1 for every vector stmt to be created
2146 for SLP_NODE. We check during the analysis that all the
2147 shift arguments are the same.
2148 TODO: Allow different constants for different vector
2149 stmts generated for an SLP instance. */
2150 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
2151 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
2152 }
2153 }
2154 }
2155
2156 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
2157 (a special case for certain kind of vector shifts); otherwise,
2158 operand 1 should be of a vector type (the usual case). */
2159 if (op_type == binary_op && !vec_oprnd1)
2160 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
2161 slp_node);
2162 else
2163 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
2164 slp_node);
2165 }
2166 else
2167 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
2168
2169 /* Arguments are ready. Create the new vector stmt. */
2170 for (i = 0; VEC_iterate (tree, vec_oprnds0, i, vop0); i++)
2171 {
2172 vop1 = ((op_type == binary_op)
2173 ? VEC_index (tree, vec_oprnds1, i) : NULL);
2174 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vop0, vop1);
2175 new_temp = make_ssa_name (vec_dest, new_stmt);
2176 gimple_assign_set_lhs (new_stmt, new_temp);
2177 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2178 if (slp_node)
2179 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
2180 }
2181
2182 if (slp_node)
2183 continue;
2184
2185 if (j == 0)
2186 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2187 else
2188 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2189 prev_stmt_info = vinfo_for_stmt (new_stmt);
2190 }
2191
2192 VEC_free (tree, heap, vec_oprnds0);
2193 if (vec_oprnds1)
2194 VEC_free (tree, heap, vec_oprnds1);
2195
2196 return true;
2197 }
2198
2199
2200 /* Get vectorized definitions for loop-based vectorization. For the first
2201 operand we call vect_get_vec_def_for_operand() (with OPRND containing
2202 scalar operand), and for the rest we get a copy with
2203 vect_get_vec_def_for_stmt_copy() using the previous vector definition
2204 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
2205 The vectors are collected into VEC_OPRNDS. */
2206
2207 static void
2208 vect_get_loop_based_defs (tree *oprnd, gimple stmt, enum vect_def_type dt,
2209 VEC (tree, heap) **vec_oprnds, int multi_step_cvt)
2210 {
2211 tree vec_oprnd;
2212
2213 /* Get first vector operand. */
2214 /* All the vector operands except the very first one (that is scalar oprnd)
2215 are stmt copies. */
2216 if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
2217 vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt, NULL);
2218 else
2219 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, *oprnd);
2220
2221 VEC_quick_push (tree, *vec_oprnds, vec_oprnd);
2222
2223 /* Get second vector operand. */
2224 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
2225 VEC_quick_push (tree, *vec_oprnds, vec_oprnd);
2226
2227 *oprnd = vec_oprnd;
2228
2229 /* For conversion in multiple steps, continue to get operands
2230 recursively. */
2231 if (multi_step_cvt)
2232 vect_get_loop_based_defs (oprnd, stmt, dt, vec_oprnds, multi_step_cvt - 1);
2233 }
2234
2235
2236 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
2237 For multi-step conversions store the resulting vectors and call the function
2238 recursively. */
2239
2240 static void
2241 vect_create_vectorized_demotion_stmts (VEC (tree, heap) **vec_oprnds,
2242 int multi_step_cvt, gimple stmt,
2243 VEC (tree, heap) *vec_dsts,
2244 gimple_stmt_iterator *gsi,
2245 slp_tree slp_node, enum tree_code code,
2246 stmt_vec_info *prev_stmt_info)
2247 {
2248 unsigned int i;
2249 tree vop0, vop1, new_tmp, vec_dest;
2250 gimple new_stmt;
2251 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2252
2253 vec_dest = VEC_pop (tree, vec_dsts);
2254
2255 for (i = 0; i < VEC_length (tree, *vec_oprnds); i += 2)
2256 {
2257 /* Create demotion operation. */
2258 vop0 = VEC_index (tree, *vec_oprnds, i);
2259 vop1 = VEC_index (tree, *vec_oprnds, i + 1);
2260 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vop0, vop1);
2261 new_tmp = make_ssa_name (vec_dest, new_stmt);
2262 gimple_assign_set_lhs (new_stmt, new_tmp);
2263 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2264
2265 if (multi_step_cvt)
2266 /* Store the resulting vector for next recursive call. */
2267 VEC_replace (tree, *vec_oprnds, i/2, new_tmp);
2268 else
2269 {
2270 /* This is the last step of the conversion sequence. Store the
2271 vectors in SLP_NODE or in vector info of the scalar statement
2272 (or in STMT_VINFO_RELATED_STMT chain). */
2273 if (slp_node)
2274 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
2275 else
2276 {
2277 if (!*prev_stmt_info)
2278 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2279 else
2280 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt;
2281
2282 *prev_stmt_info = vinfo_for_stmt (new_stmt);
2283 }
2284 }
2285 }
2286
2287 /* For multi-step demotion operations we first generate demotion operations
2288 from the source type to the intermediate types, and then combine the
2289 results (stored in VEC_OPRNDS) in demotion operation to the destination
2290 type. */
2291 if (multi_step_cvt)
2292 {
2293 /* At each level of recursion we have have of the operands we had at the
2294 previous level. */
2295 VEC_truncate (tree, *vec_oprnds, (i+1)/2);
2296 vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
2297 stmt, vec_dsts, gsi, slp_node,
2298 code, prev_stmt_info);
2299 }
2300 }
2301
2302
2303 /* Function vectorizable_type_demotion
2304
2305 Check if STMT performs a binary or unary operation that involves
2306 type demotion, and if it can be vectorized.
2307 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2308 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2309 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2310
2311 static bool
2312 vectorizable_type_demotion (gimple stmt, gimple_stmt_iterator *gsi,
2313 gimple *vec_stmt, slp_tree slp_node)
2314 {
2315 tree vec_dest;
2316 tree scalar_dest;
2317 tree op0;
2318 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2319 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2320 enum tree_code code, code1 = ERROR_MARK;
2321 tree def;
2322 gimple def_stmt;
2323 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
2324 stmt_vec_info prev_stmt_info;
2325 int nunits_in;
2326 int nunits_out;
2327 tree vectype_out;
2328 int ncopies;
2329 int j, i;
2330 tree vectype_in;
2331 int multi_step_cvt = 0;
2332 VEC (tree, heap) *vec_oprnds0 = NULL;
2333 VEC (tree, heap) *vec_dsts = NULL, *interm_types = NULL, *tmp_vec_dsts = NULL;
2334 tree last_oprnd, intermediate_type;
2335
2336 if (!STMT_VINFO_RELEVANT_P (stmt_info))
2337 return false;
2338
2339 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2340 return false;
2341
2342 /* Is STMT a vectorizable type-demotion operation? */
2343 if (!is_gimple_assign (stmt))
2344 return false;
2345
2346 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
2347 return false;
2348
2349 code = gimple_assign_rhs_code (stmt);
2350 if (!CONVERT_EXPR_CODE_P (code))
2351 return false;
2352
2353 op0 = gimple_assign_rhs1 (stmt);
2354 vectype_in = get_vectype_for_scalar_type (TREE_TYPE (op0));
2355 if (!vectype_in)
2356 return false;
2357 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
2358
2359 scalar_dest = gimple_assign_lhs (stmt);
2360 vectype_out = get_vectype_for_scalar_type (TREE_TYPE (scalar_dest));
2361 if (!vectype_out)
2362 return false;
2363 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2364 if (nunits_in >= nunits_out)
2365 return false;
2366
2367 /* Multiple types in SLP are handled by creating the appropriate number of
2368 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2369 case of SLP. */
2370 if (slp_node)
2371 ncopies = 1;
2372 else
2373 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
2374
2375 gcc_assert (ncopies >= 1);
2376
2377 if (! ((INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
2378 && INTEGRAL_TYPE_P (TREE_TYPE (op0)))
2379 || (SCALAR_FLOAT_TYPE_P (TREE_TYPE (scalar_dest))
2380 && SCALAR_FLOAT_TYPE_P (TREE_TYPE (op0))
2381 && CONVERT_EXPR_CODE_P (code))))
2382 return false;
2383
2384 /* Check the operands of the operation. */
2385 if (!vect_is_simple_use (op0, loop_vinfo, &def_stmt, &def, &dt[0]))
2386 {
2387 if (vect_print_dump_info (REPORT_DETAILS))
2388 fprintf (vect_dump, "use not simple.");
2389 return false;
2390 }
2391
2392 /* Supportable by target? */
2393 if (!supportable_narrowing_operation (code, stmt, vectype_in, &code1,
2394 &multi_step_cvt, &interm_types))
2395 return false;
2396
2397 STMT_VINFO_VECTYPE (stmt_info) = vectype_in;
2398
2399 if (!vec_stmt) /* transformation not required. */
2400 {
2401 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
2402 if (vect_print_dump_info (REPORT_DETAILS))
2403 fprintf (vect_dump, "=== vectorizable_demotion ===");
2404 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
2405 return true;
2406 }
2407
2408 /** Transform. **/
2409 if (vect_print_dump_info (REPORT_DETAILS))
2410 fprintf (vect_dump, "transform type demotion operation. ncopies = %d.",
2411 ncopies);
2412
2413 /* In case of multi-step demotion, we first generate demotion operations to
2414 the intermediate types, and then from that types to the final one.
2415 We create vector destinations for the intermediate type (TYPES) received
2416 from supportable_narrowing_operation, and store them in the correct order
2417 for future use in vect_create_vectorized_demotion_stmts(). */
2418 if (multi_step_cvt)
2419 vec_dsts = VEC_alloc (tree, heap, multi_step_cvt + 1);
2420 else
2421 vec_dsts = VEC_alloc (tree, heap, 1);
2422
2423 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
2424 VEC_quick_push (tree, vec_dsts, vec_dest);
2425
2426 if (multi_step_cvt)
2427 {
2428 for (i = VEC_length (tree, interm_types) - 1;
2429 VEC_iterate (tree, interm_types, i, intermediate_type); i--)
2430 {
2431 vec_dest = vect_create_destination_var (scalar_dest,
2432 intermediate_type);
2433 VEC_quick_push (tree, vec_dsts, vec_dest);
2434 }
2435 }
2436
2437 /* In case the vectorization factor (VF) is bigger than the number
2438 of elements that we can fit in a vectype (nunits), we have to generate
2439 more than one vector stmt - i.e - we need to "unroll" the
2440 vector stmt by a factor VF/nunits. */
2441 last_oprnd = op0;
2442 prev_stmt_info = NULL;
2443 for (j = 0; j < ncopies; j++)
2444 {
2445 /* Handle uses. */
2446 if (slp_node)
2447 vect_get_slp_defs (slp_node, &vec_oprnds0, NULL);
2448 else
2449 {
2450 VEC_free (tree, heap, vec_oprnds0);
2451 vec_oprnds0 = VEC_alloc (tree, heap,
2452 (multi_step_cvt ? vect_pow2 (multi_step_cvt) * 2 : 2));
2453 vect_get_loop_based_defs (&last_oprnd, stmt, dt[0], &vec_oprnds0,
2454 vect_pow2 (multi_step_cvt) - 1);
2455 }
2456
2457 /* Arguments are ready. Create the new vector stmts. */
2458 tmp_vec_dsts = VEC_copy (tree, heap, vec_dsts);
2459 vect_create_vectorized_demotion_stmts (&vec_oprnds0,
2460 multi_step_cvt, stmt, tmp_vec_dsts,
2461 gsi, slp_node, code1,
2462 &prev_stmt_info);
2463 }
2464
2465 VEC_free (tree, heap, vec_oprnds0);
2466 VEC_free (tree, heap, vec_dsts);
2467 VEC_free (tree, heap, tmp_vec_dsts);
2468 VEC_free (tree, heap, interm_types);
2469
2470 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2471 return true;
2472 }
2473
2474
2475 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
2476 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
2477 the resulting vectors and call the function recursively. */
2478
2479 static void
2480 vect_create_vectorized_promotion_stmts (VEC (tree, heap) **vec_oprnds0,
2481 VEC (tree, heap) **vec_oprnds1,
2482 int multi_step_cvt, gimple stmt,
2483 VEC (tree, heap) *vec_dsts,
2484 gimple_stmt_iterator *gsi,
2485 slp_tree slp_node, enum tree_code code1,
2486 enum tree_code code2, tree decl1,
2487 tree decl2, int op_type,
2488 stmt_vec_info *prev_stmt_info)
2489 {
2490 int i;
2491 tree vop0, vop1, new_tmp1, new_tmp2, vec_dest;
2492 gimple new_stmt1, new_stmt2;
2493 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2494 VEC (tree, heap) *vec_tmp;
2495
2496 vec_dest = VEC_pop (tree, vec_dsts);
2497 vec_tmp = VEC_alloc (tree, heap, VEC_length (tree, *vec_oprnds0) * 2);
2498
2499 for (i = 0; VEC_iterate (tree, *vec_oprnds0, i, vop0); i++)
2500 {
2501 if (op_type == binary_op)
2502 vop1 = VEC_index (tree, *vec_oprnds1, i);
2503 else
2504 vop1 = NULL_TREE;
2505
2506 /* Generate the two halves of promotion operation. */
2507 new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1,
2508 op_type, vec_dest, gsi, stmt);
2509 new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1,
2510 op_type, vec_dest, gsi, stmt);
2511 if (is_gimple_call (new_stmt1))
2512 {
2513 new_tmp1 = gimple_call_lhs (new_stmt1);
2514 new_tmp2 = gimple_call_lhs (new_stmt2);
2515 }
2516 else
2517 {
2518 new_tmp1 = gimple_assign_lhs (new_stmt1);
2519 new_tmp2 = gimple_assign_lhs (new_stmt2);
2520 }
2521
2522 if (multi_step_cvt)
2523 {
2524 /* Store the results for the recursive call. */
2525 VEC_quick_push (tree, vec_tmp, new_tmp1);
2526 VEC_quick_push (tree, vec_tmp, new_tmp2);
2527 }
2528 else
2529 {
2530 /* Last step of promotion sequience - store the results. */
2531 if (slp_node)
2532 {
2533 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt1);
2534 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt2);
2535 }
2536 else
2537 {
2538 if (!*prev_stmt_info)
2539 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt1;
2540 else
2541 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt1;
2542
2543 *prev_stmt_info = vinfo_for_stmt (new_stmt1);
2544 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt2;
2545 *prev_stmt_info = vinfo_for_stmt (new_stmt2);
2546 }
2547 }
2548 }
2549
2550 if (multi_step_cvt)
2551 {
2552 /* For multi-step promotion operation we first generate we call the
2553 function recurcively for every stage. We start from the input type,
2554 create promotion operations to the intermediate types, and then
2555 create promotions to the output type. */
2556 *vec_oprnds0 = VEC_copy (tree, heap, vec_tmp);
2557 VEC_free (tree, heap, vec_tmp);
2558 vect_create_vectorized_promotion_stmts (vec_oprnds0, vec_oprnds1,
2559 multi_step_cvt - 1, stmt,
2560 vec_dsts, gsi, slp_node, code1,
2561 code2, decl2, decl2, op_type,
2562 prev_stmt_info);
2563 }
2564 }
2565
2566
2567 /* Function vectorizable_type_promotion
2568
2569 Check if STMT performs a binary or unary operation that involves
2570 type promotion, and if it can be vectorized.
2571 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2572 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2573 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2574
2575 static bool
2576 vectorizable_type_promotion (gimple stmt, gimple_stmt_iterator *gsi,
2577 gimple *vec_stmt, slp_tree slp_node)
2578 {
2579 tree vec_dest;
2580 tree scalar_dest;
2581 tree op0, op1 = NULL;
2582 tree vec_oprnd0=NULL, vec_oprnd1=NULL;
2583 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2584 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2585 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
2586 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
2587 int op_type;
2588 tree def;
2589 gimple def_stmt;
2590 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
2591 stmt_vec_info prev_stmt_info;
2592 int nunits_in;
2593 int nunits_out;
2594 tree vectype_out;
2595 int ncopies;
2596 int j, i;
2597 tree vectype_in;
2598 tree intermediate_type = NULL_TREE;
2599 int multi_step_cvt = 0;
2600 VEC (tree, heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL;
2601 VEC (tree, heap) *vec_dsts = NULL, *interm_types = NULL, *tmp_vec_dsts = NULL;
2602
2603 if (!STMT_VINFO_RELEVANT_P (stmt_info))
2604 return false;
2605
2606 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2607 return false;
2608
2609 /* Is STMT a vectorizable type-promotion operation? */
2610 if (!is_gimple_assign (stmt))
2611 return false;
2612
2613 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
2614 return false;
2615
2616 code = gimple_assign_rhs_code (stmt);
2617 if (!CONVERT_EXPR_CODE_P (code)
2618 && code != WIDEN_MULT_EXPR)
2619 return false;
2620
2621 op0 = gimple_assign_rhs1 (stmt);
2622 vectype_in = get_vectype_for_scalar_type (TREE_TYPE (op0));
2623 if (!vectype_in)
2624 return false;
2625 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
2626
2627 scalar_dest = gimple_assign_lhs (stmt);
2628 vectype_out = get_vectype_for_scalar_type (TREE_TYPE (scalar_dest));
2629 if (!vectype_out)
2630 return false;
2631 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2632 if (nunits_in <= nunits_out)
2633 return false;
2634
2635 /* Multiple types in SLP are handled by creating the appropriate number of
2636 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2637 case of SLP. */
2638 if (slp_node)
2639 ncopies = 1;
2640 else
2641 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
2642
2643 gcc_assert (ncopies >= 1);
2644
2645 if (! ((INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
2646 && INTEGRAL_TYPE_P (TREE_TYPE (op0)))
2647 || (SCALAR_FLOAT_TYPE_P (TREE_TYPE (scalar_dest))
2648 && SCALAR_FLOAT_TYPE_P (TREE_TYPE (op0))
2649 && CONVERT_EXPR_CODE_P (code))))
2650 return false;
2651
2652 /* Check the operands of the operation. */
2653 if (!vect_is_simple_use (op0, loop_vinfo, &def_stmt, &def, &dt[0]))
2654 {
2655 if (vect_print_dump_info (REPORT_DETAILS))
2656 fprintf (vect_dump, "use not simple.");
2657 return false;
2658 }
2659
2660 op_type = TREE_CODE_LENGTH (code);
2661 if (op_type == binary_op)
2662 {
2663 op1 = gimple_assign_rhs2 (stmt);
2664 if (!vect_is_simple_use (op1, loop_vinfo, &def_stmt, &def, &dt[1]))
2665 {
2666 if (vect_print_dump_info (REPORT_DETAILS))
2667 fprintf (vect_dump, "use not simple.");
2668 return false;
2669 }
2670 }
2671
2672 /* Supportable by target? */
2673 if (!supportable_widening_operation (code, stmt, vectype_in,
2674 &decl1, &decl2, &code1, &code2,
2675 &multi_step_cvt, &interm_types))
2676 return false;
2677
2678 /* Binary widening operation can only be supported directly by the
2679 architecture. */
2680 gcc_assert (!(multi_step_cvt && op_type == binary_op));
2681
2682 STMT_VINFO_VECTYPE (stmt_info) = vectype_in;
2683
2684 if (!vec_stmt) /* transformation not required. */
2685 {
2686 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
2687 if (vect_print_dump_info (REPORT_DETAILS))
2688 fprintf (vect_dump, "=== vectorizable_promotion ===");
2689 vect_model_simple_cost (stmt_info, 2*ncopies, dt, NULL);
2690 return true;
2691 }
2692
2693 /** Transform. **/
2694
2695 if (vect_print_dump_info (REPORT_DETAILS))
2696 fprintf (vect_dump, "transform type promotion operation. ncopies = %d.",
2697 ncopies);
2698
2699 /* Handle def. */
2700 /* In case of multi-step promotion, we first generate promotion operations
2701 to the intermediate types, and then from that types to the final one.
2702 We store vector destination in VEC_DSTS in the correct order for
2703 recursive creation of promotion operations in
2704 vect_create_vectorized_promotion_stmts(). Vector destinations are created
2705 according to TYPES recieved from supportable_widening_operation(). */
2706 if (multi_step_cvt)
2707 vec_dsts = VEC_alloc (tree, heap, multi_step_cvt + 1);
2708 else
2709 vec_dsts = VEC_alloc (tree, heap, 1);
2710
2711 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
2712 VEC_quick_push (tree, vec_dsts, vec_dest);
2713
2714 if (multi_step_cvt)
2715 {
2716 for (i = VEC_length (tree, interm_types) - 1;
2717 VEC_iterate (tree, interm_types, i, intermediate_type); i--)
2718 {
2719 vec_dest = vect_create_destination_var (scalar_dest,
2720 intermediate_type);
2721 VEC_quick_push (tree, vec_dsts, vec_dest);
2722 }
2723 }
2724
2725 if (!slp_node)
2726 {
2727 vec_oprnds0 = VEC_alloc (tree, heap,
2728 (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1));
2729 if (op_type == binary_op)
2730 vec_oprnds1 = VEC_alloc (tree, heap, 1);
2731 }
2732
2733 /* In case the vectorization factor (VF) is bigger than the number
2734 of elements that we can fit in a vectype (nunits), we have to generate
2735 more than one vector stmt - i.e - we need to "unroll" the
2736 vector stmt by a factor VF/nunits. */
2737
2738 prev_stmt_info = NULL;
2739 for (j = 0; j < ncopies; j++)
2740 {
2741 /* Handle uses. */
2742 if (j == 0)
2743 {
2744 if (slp_node)
2745 vect_get_slp_defs (slp_node, &vec_oprnds0, &vec_oprnds1);
2746 else
2747 {
2748 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
2749 VEC_quick_push (tree, vec_oprnds0, vec_oprnd0);
2750 if (op_type == binary_op)
2751 {
2752 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt, NULL);
2753 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
2754 }
2755 }
2756 }
2757 else
2758 {
2759 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
2760 VEC_replace (tree, vec_oprnds0, 0, vec_oprnd0);
2761 if (op_type == binary_op)
2762 {
2763 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd1);
2764 VEC_replace (tree, vec_oprnds1, 0, vec_oprnd1);
2765 }
2766 }
2767
2768 /* Arguments are ready. Create the new vector stmts. */
2769 tmp_vec_dsts = VEC_copy (tree, heap, vec_dsts);
2770 vect_create_vectorized_promotion_stmts (&vec_oprnds0, &vec_oprnds1,
2771 multi_step_cvt, stmt,
2772 tmp_vec_dsts,
2773 gsi, slp_node, code1, code2,
2774 decl1, decl2, op_type,
2775 &prev_stmt_info);
2776 }
2777
2778 VEC_free (tree, heap, vec_dsts);
2779 VEC_free (tree, heap, tmp_vec_dsts);
2780 VEC_free (tree, heap, interm_types);
2781 VEC_free (tree, heap, vec_oprnds0);
2782 VEC_free (tree, heap, vec_oprnds1);
2783
2784 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2785 return true;
2786 }
2787
2788
2789 /* Function vectorizable_store.
2790
2791 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
2792 can be vectorized.
2793 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2794 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2795 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2796
2797 static bool
2798 vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
2799 slp_tree slp_node)
2800 {
2801 tree scalar_dest;
2802 tree data_ref;
2803 tree op;
2804 tree vec_oprnd = NULL_TREE;
2805 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2806 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
2807 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2808 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2809 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2810 enum machine_mode vec_mode;
2811 tree dummy;
2812 enum dr_alignment_support alignment_support_scheme;
2813 tree def;
2814 gimple def_stmt;
2815 enum vect_def_type dt;
2816 stmt_vec_info prev_stmt_info = NULL;
2817 tree dataref_ptr = NULL_TREE;
2818 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
2819 int ncopies;
2820 int j;
2821 gimple next_stmt, first_stmt = NULL;
2822 bool strided_store = false;
2823 unsigned int group_size, i;
2824 VEC(tree,heap) *dr_chain = NULL, *oprnds = NULL, *result_chain = NULL;
2825 bool inv_p;
2826 VEC(tree,heap) *vec_oprnds = NULL;
2827 bool slp = (slp_node != NULL);
2828 stmt_vec_info first_stmt_vinfo;
2829 unsigned int vec_num;
2830
2831 /* Multiple types in SLP are handled by creating the appropriate number of
2832 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2833 case of SLP. */
2834 if (slp)
2835 ncopies = 1;
2836 else
2837 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
2838
2839 gcc_assert (ncopies >= 1);
2840
2841 /* FORNOW. This restriction should be relaxed. */
2842 if (nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
2843 {
2844 if (vect_print_dump_info (REPORT_DETAILS))
2845 fprintf (vect_dump, "multiple types in nested loop.");
2846 return false;
2847 }
2848
2849 if (!STMT_VINFO_RELEVANT_P (stmt_info))
2850 return false;
2851
2852 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2853 return false;
2854
2855 /* Is vectorizable store? */
2856
2857 if (!is_gimple_assign (stmt))
2858 return false;
2859
2860 scalar_dest = gimple_assign_lhs (stmt);
2861 if (TREE_CODE (scalar_dest) != ARRAY_REF
2862 && TREE_CODE (scalar_dest) != INDIRECT_REF
2863 && !STMT_VINFO_STRIDED_ACCESS (stmt_info))
2864 return false;
2865
2866 gcc_assert (gimple_assign_single_p (stmt));
2867 op = gimple_assign_rhs1 (stmt);
2868 if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt))
2869 {
2870 if (vect_print_dump_info (REPORT_DETAILS))
2871 fprintf (vect_dump, "use not simple.");
2872 return false;
2873 }
2874
2875 /* The scalar rhs type needs to be trivially convertible to the vector
2876 component type. This should always be the case. */
2877 if (!useless_type_conversion_p (TREE_TYPE (vectype), TREE_TYPE (op)))
2878 {
2879 if (vect_print_dump_info (REPORT_DETAILS))
2880 fprintf (vect_dump, "??? operands of different types");
2881 return false;
2882 }
2883
2884 vec_mode = TYPE_MODE (vectype);
2885 /* FORNOW. In some cases can vectorize even if data-type not supported
2886 (e.g. - array initialization with 0). */
2887 if (optab_handler (mov_optab, (int)vec_mode)->insn_code == CODE_FOR_nothing)
2888 return false;
2889
2890 if (!STMT_VINFO_DATA_REF (stmt_info))
2891 return false;
2892
2893 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
2894 {
2895 strided_store = true;
2896 first_stmt = DR_GROUP_FIRST_DR (stmt_info);
2897 if (!vect_strided_store_supported (vectype)
2898 && !PURE_SLP_STMT (stmt_info) && !slp)
2899 return false;
2900
2901 if (first_stmt == stmt)
2902 {
2903 /* STMT is the leader of the group. Check the operands of all the
2904 stmts of the group. */
2905 next_stmt = DR_GROUP_NEXT_DR (stmt_info);
2906 while (next_stmt)
2907 {
2908 gcc_assert (gimple_assign_single_p (next_stmt));
2909 op = gimple_assign_rhs1 (next_stmt);
2910 if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt))
2911 {
2912 if (vect_print_dump_info (REPORT_DETAILS))
2913 fprintf (vect_dump, "use not simple.");
2914 return false;
2915 }
2916 next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt));
2917 }
2918 }
2919 }
2920
2921 if (!vec_stmt) /* transformation not required. */
2922 {
2923 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
2924 vect_model_store_cost (stmt_info, ncopies, dt, NULL);
2925 return true;
2926 }
2927
2928 /** Transform. **/
2929
2930 if (strided_store)
2931 {
2932 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
2933 group_size = DR_GROUP_SIZE (vinfo_for_stmt (first_stmt));
2934
2935 DR_GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))++;
2936
2937 /* FORNOW */
2938 gcc_assert (!nested_in_vect_loop_p (loop, stmt));
2939
2940 /* We vectorize all the stmts of the interleaving group when we
2941 reach the last stmt in the group. */
2942 if (DR_GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))
2943 < DR_GROUP_SIZE (vinfo_for_stmt (first_stmt))
2944 && !slp)
2945 {
2946 *vec_stmt = NULL;
2947 return true;
2948 }
2949
2950 if (slp)
2951 strided_store = false;
2952
2953 /* VEC_NUM is the number of vect stmts to be created for this group. */
2954 if (slp)
2955 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
2956 else
2957 vec_num = group_size;
2958 }
2959 else
2960 {
2961 first_stmt = stmt;
2962 first_dr = dr;
2963 group_size = vec_num = 1;
2964 first_stmt_vinfo = stmt_info;
2965 }
2966
2967 if (vect_print_dump_info (REPORT_DETAILS))
2968 fprintf (vect_dump, "transform store. ncopies = %d",ncopies);
2969
2970 dr_chain = VEC_alloc (tree, heap, group_size);
2971 oprnds = VEC_alloc (tree, heap, group_size);
2972
2973 alignment_support_scheme = vect_supportable_dr_alignment (first_dr);
2974 gcc_assert (alignment_support_scheme);
2975 gcc_assert (alignment_support_scheme == dr_aligned); /* FORNOW */
2976
2977 /* In case the vectorization factor (VF) is bigger than the number
2978 of elements that we can fit in a vectype (nunits), we have to generate
2979 more than one vector stmt - i.e - we need to "unroll" the
2980 vector stmt by a factor VF/nunits. For more details see documentation in
2981 vect_get_vec_def_for_copy_stmt. */
2982
2983 /* In case of interleaving (non-unit strided access):
2984
2985 S1: &base + 2 = x2
2986 S2: &base = x0
2987 S3: &base + 1 = x1
2988 S4: &base + 3 = x3
2989
2990 We create vectorized stores starting from base address (the access of the
2991 first stmt in the chain (S2 in the above example), when the last store stmt
2992 of the chain (S4) is reached:
2993
2994 VS1: &base = vx2
2995 VS2: &base + vec_size*1 = vx0
2996 VS3: &base + vec_size*2 = vx1
2997 VS4: &base + vec_size*3 = vx3
2998
2999 Then permutation statements are generated:
3000
3001 VS5: vx5 = VEC_INTERLEAVE_HIGH_EXPR < vx0, vx3 >
3002 VS6: vx6 = VEC_INTERLEAVE_LOW_EXPR < vx0, vx3 >
3003 ...
3004
3005 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
3006 (the order of the data-refs in the output of vect_permute_store_chain
3007 corresponds to the order of scalar stmts in the interleaving chain - see
3008 the documentation of vect_permute_store_chain()).
3009
3010 In case of both multiple types and interleaving, above vector stores and
3011 permutation stmts are created for every copy. The result vector stmts are
3012 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
3013 STMT_VINFO_RELATED_STMT for the next copies.
3014 */
3015
3016 prev_stmt_info = NULL;
3017 for (j = 0; j < ncopies; j++)
3018 {
3019 gimple new_stmt;
3020 gimple ptr_incr;
3021
3022 if (j == 0)
3023 {
3024 if (slp)
3025 {
3026 /* Get vectorized arguments for SLP_NODE. */
3027 vect_get_slp_defs (slp_node, &vec_oprnds, NULL);
3028
3029 vec_oprnd = VEC_index (tree, vec_oprnds, 0);
3030 }
3031 else
3032 {
3033 /* For interleaved stores we collect vectorized defs for all the
3034 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
3035 used as an input to vect_permute_store_chain(), and OPRNDS as
3036 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
3037
3038 If the store is not strided, GROUP_SIZE is 1, and DR_CHAIN and
3039 OPRNDS are of size 1. */
3040 next_stmt = first_stmt;
3041 for (i = 0; i < group_size; i++)
3042 {
3043 /* Since gaps are not supported for interleaved stores,
3044 GROUP_SIZE is the exact number of stmts in the chain.
3045 Therefore, NEXT_STMT can't be NULL_TREE. In case that
3046 there is no interleaving, GROUP_SIZE is 1, and only one
3047 iteration of the loop will be executed. */
3048 gcc_assert (next_stmt
3049 && gimple_assign_single_p (next_stmt));
3050 op = gimple_assign_rhs1 (next_stmt);
3051
3052 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt,
3053 NULL);
3054 VEC_quick_push(tree, dr_chain, vec_oprnd);
3055 VEC_quick_push(tree, oprnds, vec_oprnd);
3056 next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt));
3057 }
3058 }
3059
3060 /* We should have catched mismatched types earlier. */
3061 gcc_assert (useless_type_conversion_p (vectype,
3062 TREE_TYPE (vec_oprnd)));
3063 dataref_ptr = vect_create_data_ref_ptr (first_stmt, NULL, NULL_TREE,
3064 &dummy, &ptr_incr, false,
3065 &inv_p);
3066 gcc_assert (!inv_p);
3067 }
3068 else
3069 {
3070 /* For interleaved stores we created vectorized defs for all the
3071 defs stored in OPRNDS in the previous iteration (previous copy).
3072 DR_CHAIN is then used as an input to vect_permute_store_chain(),
3073 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
3074 next copy.
3075 If the store is not strided, GROUP_SIZE is 1, and DR_CHAIN and
3076 OPRNDS are of size 1. */
3077 for (i = 0; i < group_size; i++)
3078 {
3079 op = VEC_index (tree, oprnds, i);
3080 vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt);
3081 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, op);
3082 VEC_replace(tree, dr_chain, i, vec_oprnd);
3083 VEC_replace(tree, oprnds, i, vec_oprnd);
3084 }
3085 dataref_ptr =
3086 bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt, NULL_TREE);
3087 }
3088
3089 if (strided_store)
3090 {
3091 result_chain = VEC_alloc (tree, heap, group_size);
3092 /* Permute. */
3093 if (!vect_permute_store_chain (dr_chain, group_size, stmt, gsi,
3094 &result_chain))
3095 return false;
3096 }
3097
3098 next_stmt = first_stmt;
3099 for (i = 0; i < vec_num; i++)
3100 {
3101 if (i > 0)
3102 /* Bump the vector pointer. */
3103 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
3104 NULL_TREE);
3105
3106 if (slp)
3107 vec_oprnd = VEC_index (tree, vec_oprnds, i);
3108 else if (strided_store)
3109 /* For strided stores vectorized defs are interleaved in
3110 vect_permute_store_chain(). */
3111 vec_oprnd = VEC_index (tree, result_chain, i);
3112
3113 data_ref = build_fold_indirect_ref (dataref_ptr);
3114 /* If accesses through a pointer to vectype do not alias the original
3115 memory reference we have a problem. This should never happen. */
3116 gcc_assert (alias_sets_conflict_p (get_alias_set (data_ref),
3117 get_alias_set (gimple_assign_lhs (stmt))));
3118
3119 /* Arguments are ready. Create the new vector stmt. */
3120 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
3121 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3122 mark_symbols_for_renaming (new_stmt);
3123
3124 if (slp)
3125 continue;
3126
3127 if (j == 0)
3128 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3129 else
3130 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3131
3132 prev_stmt_info = vinfo_for_stmt (new_stmt);
3133 next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt));
3134 if (!next_stmt)
3135 break;
3136 }
3137 }
3138
3139 VEC_free (tree, heap, dr_chain);
3140 VEC_free (tree, heap, oprnds);
3141 if (result_chain)
3142 VEC_free (tree, heap, result_chain);
3143
3144 return true;
3145 }
3146
3147 /* vectorizable_load.
3148
3149 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
3150 can be vectorized.
3151 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3152 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3153 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3154
3155 static bool
3156 vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
3157 slp_tree slp_node, slp_instance slp_node_instance)
3158 {
3159 tree scalar_dest;
3160 tree vec_dest = NULL;
3161 tree data_ref = NULL;
3162 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3163 stmt_vec_info prev_stmt_info;
3164 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3165 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3166 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
3167 bool nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
3168 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
3169 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3170 tree new_temp;
3171 int mode;
3172 gimple new_stmt = NULL;
3173 tree dummy;
3174 enum dr_alignment_support alignment_support_scheme;
3175 tree dataref_ptr = NULL_TREE;
3176 gimple ptr_incr;
3177 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
3178 int ncopies;
3179 int i, j, group_size;
3180 tree msq = NULL_TREE, lsq;
3181 tree offset = NULL_TREE;
3182 tree realignment_token = NULL_TREE;
3183 gimple phi = NULL;
3184 VEC(tree,heap) *dr_chain = NULL;
3185 bool strided_load = false;
3186 gimple first_stmt;
3187 tree scalar_type;
3188 bool inv_p;
3189 bool compute_in_loop = false;
3190 struct loop *at_loop;
3191 int vec_num;
3192 bool slp = (slp_node != NULL);
3193 bool slp_perm = false;
3194 enum tree_code code;
3195
3196 /* Multiple types in SLP are handled by creating the appropriate number of
3197 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3198 case of SLP. */
3199 if (slp)
3200 ncopies = 1;
3201 else
3202 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
3203
3204 gcc_assert (ncopies >= 1);
3205
3206 /* FORNOW. This restriction should be relaxed. */
3207 if (nested_in_vect_loop && ncopies > 1)
3208 {
3209 if (vect_print_dump_info (REPORT_DETAILS))
3210 fprintf (vect_dump, "multiple types in nested loop.");
3211 return false;
3212 }
3213
3214 if (slp && SLP_INSTANCE_LOAD_PERMUTATION (slp_node_instance))
3215 slp_perm = true;
3216
3217 if (!STMT_VINFO_RELEVANT_P (stmt_info))
3218 return false;
3219
3220 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
3221 return false;
3222
3223 /* Is vectorizable load? */
3224 if (!is_gimple_assign (stmt))
3225 return false;
3226
3227 scalar_dest = gimple_assign_lhs (stmt);
3228 if (TREE_CODE (scalar_dest) != SSA_NAME)
3229 return false;
3230
3231 code = gimple_assign_rhs_code (stmt);
3232 if (code != ARRAY_REF
3233 && code != INDIRECT_REF
3234 && !STMT_VINFO_STRIDED_ACCESS (stmt_info))
3235 return false;
3236
3237 if (!STMT_VINFO_DATA_REF (stmt_info))
3238 return false;
3239
3240 scalar_type = TREE_TYPE (DR_REF (dr));
3241 mode = (int) TYPE_MODE (vectype);
3242
3243 /* FORNOW. In some cases can vectorize even if data-type not supported
3244 (e.g. - data copies). */
3245 if (optab_handler (mov_optab, mode)->insn_code == CODE_FOR_nothing)
3246 {
3247 if (vect_print_dump_info (REPORT_DETAILS))
3248 fprintf (vect_dump, "Aligned load, but unsupported type.");
3249 return false;
3250 }
3251
3252 /* The vector component type needs to be trivially convertible to the
3253 scalar lhs. This should always be the case. */
3254 if (!useless_type_conversion_p (TREE_TYPE (scalar_dest), TREE_TYPE (vectype)))
3255 {
3256 if (vect_print_dump_info (REPORT_DETAILS))
3257 fprintf (vect_dump, "??? operands of different types");
3258 return false;
3259 }
3260
3261 /* Check if the load is a part of an interleaving chain. */
3262 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
3263 {
3264 strided_load = true;
3265 /* FORNOW */
3266 gcc_assert (! nested_in_vect_loop);
3267
3268 /* Check if interleaving is supported. */
3269 if (!vect_strided_load_supported (vectype)
3270 && !PURE_SLP_STMT (stmt_info) && !slp)
3271 return false;
3272 }
3273
3274 if (!vec_stmt) /* transformation not required. */
3275 {
3276 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
3277 vect_model_load_cost (stmt_info, ncopies, NULL);
3278 return true;
3279 }
3280
3281 if (vect_print_dump_info (REPORT_DETAILS))
3282 fprintf (vect_dump, "transform load.");
3283
3284 /** Transform. **/
3285
3286 if (strided_load)
3287 {
3288 first_stmt = DR_GROUP_FIRST_DR (stmt_info);
3289 /* Check if the chain of loads is already vectorized. */
3290 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt)))
3291 {
3292 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
3293 return true;
3294 }
3295 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
3296 group_size = DR_GROUP_SIZE (vinfo_for_stmt (first_stmt));
3297
3298 /* VEC_NUM is the number of vect stmts to be created for this group. */
3299 if (slp)
3300 {
3301 strided_load = false;
3302 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
3303 }
3304 else
3305 vec_num = group_size;
3306
3307 dr_chain = VEC_alloc (tree, heap, vec_num);
3308 }
3309 else
3310 {
3311 first_stmt = stmt;
3312 first_dr = dr;
3313 group_size = vec_num = 1;
3314 }
3315
3316 alignment_support_scheme = vect_supportable_dr_alignment (first_dr);
3317 gcc_assert (alignment_support_scheme);
3318
3319 /* In case the vectorization factor (VF) is bigger than the number
3320 of elements that we can fit in a vectype (nunits), we have to generate
3321 more than one vector stmt - i.e - we need to "unroll" the
3322 vector stmt by a factor VF/nunits. In doing so, we record a pointer
3323 from one copy of the vector stmt to the next, in the field
3324 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
3325 stages to find the correct vector defs to be used when vectorizing
3326 stmts that use the defs of the current stmt. The example below illustrates
3327 the vectorization process when VF=16 and nunits=4 (i.e - we need to create
3328 4 vectorized stmts):
3329
3330 before vectorization:
3331 RELATED_STMT VEC_STMT
3332 S1: x = memref - -
3333 S2: z = x + 1 - -
3334
3335 step 1: vectorize stmt S1:
3336 We first create the vector stmt VS1_0, and, as usual, record a
3337 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
3338 Next, we create the vector stmt VS1_1, and record a pointer to
3339 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
3340 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
3341 stmts and pointers:
3342 RELATED_STMT VEC_STMT
3343 VS1_0: vx0 = memref0 VS1_1 -
3344 VS1_1: vx1 = memref1 VS1_2 -
3345 VS1_2: vx2 = memref2 VS1_3 -
3346 VS1_3: vx3 = memref3 - -
3347 S1: x = load - VS1_0
3348 S2: z = x + 1 - -
3349
3350 See in documentation in vect_get_vec_def_for_stmt_copy for how the
3351 information we recorded in RELATED_STMT field is used to vectorize
3352 stmt S2. */
3353
3354 /* In case of interleaving (non-unit strided access):
3355
3356 S1: x2 = &base + 2
3357 S2: x0 = &base
3358 S3: x1 = &base + 1
3359 S4: x3 = &base + 3
3360
3361 Vectorized loads are created in the order of memory accesses
3362 starting from the access of the first stmt of the chain:
3363
3364 VS1: vx0 = &base
3365 VS2: vx1 = &base + vec_size*1
3366 VS3: vx3 = &base + vec_size*2
3367 VS4: vx4 = &base + vec_size*3
3368
3369 Then permutation statements are generated:
3370
3371 VS5: vx5 = VEC_EXTRACT_EVEN_EXPR < vx0, vx1 >
3372 VS6: vx6 = VEC_EXTRACT_ODD_EXPR < vx0, vx1 >
3373 ...
3374
3375 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
3376 (the order of the data-refs in the output of vect_permute_load_chain
3377 corresponds to the order of scalar stmts in the interleaving chain - see
3378 the documentation of vect_permute_load_chain()).
3379 The generation of permutation stmts and recording them in
3380 STMT_VINFO_VEC_STMT is done in vect_transform_strided_load().
3381
3382 In case of both multiple types and interleaving, the vector loads and
3383 permutation stmts above are created for every copy. The result vector stmts
3384 are put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
3385 STMT_VINFO_RELATED_STMT for the next copies. */
3386
3387 /* If the data reference is aligned (dr_aligned) or potentially unaligned
3388 on a target that supports unaligned accesses (dr_unaligned_supported)
3389 we generate the following code:
3390 p = initial_addr;
3391 indx = 0;
3392 loop {
3393 p = p + indx * vectype_size;
3394 vec_dest = *(p);
3395 indx = indx + 1;
3396 }
3397
3398 Otherwise, the data reference is potentially unaligned on a target that
3399 does not support unaligned accesses (dr_explicit_realign_optimized) -
3400 then generate the following code, in which the data in each iteration is
3401 obtained by two vector loads, one from the previous iteration, and one
3402 from the current iteration:
3403 p1 = initial_addr;
3404 msq_init = *(floor(p1))
3405 p2 = initial_addr + VS - 1;
3406 realignment_token = call target_builtin;
3407 indx = 0;
3408 loop {
3409 p2 = p2 + indx * vectype_size
3410 lsq = *(floor(p2))
3411 vec_dest = realign_load (msq, lsq, realignment_token)
3412 indx = indx + 1;
3413 msq = lsq;
3414 } */
3415
3416 /* If the misalignment remains the same throughout the execution of the
3417 loop, we can create the init_addr and permutation mask at the loop
3418 preheader. Otherwise, it needs to be created inside the loop.
3419 This can only occur when vectorizing memory accesses in the inner-loop
3420 nested within an outer-loop that is being vectorized. */
3421
3422 if (nested_in_vect_loop_p (loop, stmt)
3423 && (TREE_INT_CST_LOW (DR_STEP (dr))
3424 % GET_MODE_SIZE (TYPE_MODE (vectype)) != 0))
3425 {
3426 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
3427 compute_in_loop = true;
3428 }
3429
3430 if ((alignment_support_scheme == dr_explicit_realign_optimized
3431 || alignment_support_scheme == dr_explicit_realign)
3432 && !compute_in_loop)
3433 {
3434 msq = vect_setup_realignment (first_stmt, gsi, &realignment_token,
3435 alignment_support_scheme, NULL_TREE,
3436 &at_loop);
3437 if (alignment_support_scheme == dr_explicit_realign_optimized)
3438 {
3439 phi = SSA_NAME_DEF_STMT (msq);
3440 offset = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
3441 }
3442 }
3443 else
3444 at_loop = loop;
3445
3446 prev_stmt_info = NULL;
3447 for (j = 0; j < ncopies; j++)
3448 {
3449 /* 1. Create the vector pointer update chain. */
3450 if (j == 0)
3451 dataref_ptr = vect_create_data_ref_ptr (first_stmt,
3452 at_loop, offset,
3453 &dummy, &ptr_incr, false,
3454 &inv_p);
3455 else
3456 dataref_ptr =
3457 bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt, NULL_TREE);
3458
3459 for (i = 0; i < vec_num; i++)
3460 {
3461 if (i > 0)
3462 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
3463 NULL_TREE);
3464
3465 /* 2. Create the vector-load in the loop. */
3466 switch (alignment_support_scheme)
3467 {
3468 case dr_aligned:
3469 gcc_assert (aligned_access_p (first_dr));
3470 data_ref = build_fold_indirect_ref (dataref_ptr);
3471 break;
3472 case dr_unaligned_supported:
3473 {
3474 int mis = DR_MISALIGNMENT (first_dr);
3475 tree tmis = (mis == -1 ? size_zero_node : size_int (mis));
3476
3477 tmis = size_binop (MULT_EXPR, tmis, size_int(BITS_PER_UNIT));
3478 data_ref =
3479 build2 (MISALIGNED_INDIRECT_REF, vectype, dataref_ptr, tmis);
3480 break;
3481 }
3482 case dr_explicit_realign:
3483 {
3484 tree ptr, bump;
3485 tree vs_minus_1 = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
3486
3487 if (compute_in_loop)
3488 msq = vect_setup_realignment (first_stmt, gsi,
3489 &realignment_token,
3490 dr_explicit_realign,
3491 dataref_ptr, NULL);
3492
3493 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, dataref_ptr);
3494 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3495 new_stmt = gimple_build_assign (vec_dest, data_ref);
3496 new_temp = make_ssa_name (vec_dest, new_stmt);
3497 gimple_assign_set_lhs (new_stmt, new_temp);
3498 gimple_set_vdef (new_stmt, gimple_vdef (stmt));
3499 gimple_set_vuse (new_stmt, gimple_vuse (stmt));
3500 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3501 msq = new_temp;
3502
3503 bump = size_binop (MULT_EXPR, vs_minus_1,
3504 TYPE_SIZE_UNIT (scalar_type));
3505 ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump);
3506 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, ptr);
3507 break;
3508 }
3509 case dr_explicit_realign_optimized:
3510 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, dataref_ptr);
3511 break;
3512 default:
3513 gcc_unreachable ();
3514 }
3515 /* If accesses through a pointer to vectype do not alias the original
3516 memory reference we have a problem. This should never happen. */
3517 gcc_assert (alias_sets_conflict_p (get_alias_set (data_ref),
3518 get_alias_set (gimple_assign_rhs1 (stmt))));
3519 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3520 new_stmt = gimple_build_assign (vec_dest, data_ref);
3521 new_temp = make_ssa_name (vec_dest, new_stmt);
3522 gimple_assign_set_lhs (new_stmt, new_temp);
3523 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3524 mark_symbols_for_renaming (new_stmt);
3525
3526 /* 3. Handle explicit realignment if necessary/supported. Create in
3527 loop: vec_dest = realign_load (msq, lsq, realignment_token) */
3528 if (alignment_support_scheme == dr_explicit_realign_optimized
3529 || alignment_support_scheme == dr_explicit_realign)
3530 {
3531 tree tmp;
3532
3533 lsq = gimple_assign_lhs (new_stmt);
3534 if (!realignment_token)
3535 realignment_token = dataref_ptr;
3536 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3537 tmp = build3 (REALIGN_LOAD_EXPR, vectype, msq, lsq,
3538 realignment_token);
3539 new_stmt = gimple_build_assign (vec_dest, tmp);
3540 new_temp = make_ssa_name (vec_dest, new_stmt);
3541 gimple_assign_set_lhs (new_stmt, new_temp);
3542 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3543
3544 if (alignment_support_scheme == dr_explicit_realign_optimized)
3545 {
3546 gcc_assert (phi);
3547 if (i == vec_num - 1 && j == ncopies - 1)
3548 add_phi_arg (phi, lsq, loop_latch_edge (containing_loop));
3549 msq = lsq;
3550 }
3551 }
3552
3553 /* 4. Handle invariant-load. */
3554 if (inv_p)
3555 {
3556 gcc_assert (!strided_load);
3557 gcc_assert (nested_in_vect_loop_p (loop, stmt));
3558 if (j == 0)
3559 {
3560 int k;
3561 tree t = NULL_TREE;
3562 tree vec_inv, bitpos, bitsize = TYPE_SIZE (scalar_type);
3563
3564 /* CHECKME: bitpos depends on endianess? */
3565 bitpos = bitsize_zero_node;
3566 vec_inv = build3 (BIT_FIELD_REF, scalar_type, new_temp,
3567 bitsize, bitpos);
3568 vec_dest =
3569 vect_create_destination_var (scalar_dest, NULL_TREE);
3570 new_stmt = gimple_build_assign (vec_dest, vec_inv);
3571 new_temp = make_ssa_name (vec_dest, new_stmt);
3572 gimple_assign_set_lhs (new_stmt, new_temp);
3573 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3574
3575 for (k = nunits - 1; k >= 0; --k)
3576 t = tree_cons (NULL_TREE, new_temp, t);
3577 /* FIXME: use build_constructor directly. */
3578 vec_inv = build_constructor_from_list (vectype, t);
3579 new_temp = vect_init_vector (stmt, vec_inv, vectype, gsi);
3580 new_stmt = SSA_NAME_DEF_STMT (new_temp);
3581 }
3582 else
3583 gcc_unreachable (); /* FORNOW. */
3584 }
3585
3586 /* Collect vector loads and later create their permutation in
3587 vect_transform_strided_load (). */
3588 if (strided_load || slp_perm)
3589 VEC_quick_push (tree, dr_chain, new_temp);
3590
3591 /* Store vector loads in the corresponding SLP_NODE. */
3592 if (slp && !slp_perm)
3593 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
3594 }
3595
3596 if (slp && !slp_perm)
3597 continue;
3598
3599 if (slp_perm)
3600 {
3601 if (!vect_transform_slp_perm_load (stmt, dr_chain, gsi,
3602 LOOP_VINFO_VECT_FACTOR (loop_vinfo),
3603 slp_node_instance, false))
3604 {
3605 VEC_free (tree, heap, dr_chain);
3606 return false;
3607 }
3608 }
3609 else
3610 {
3611 if (strided_load)
3612 {
3613 if (!vect_transform_strided_load (stmt, dr_chain, group_size, gsi))
3614 return false;
3615
3616 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
3617 VEC_free (tree, heap, dr_chain);
3618 dr_chain = VEC_alloc (tree, heap, group_size);
3619 }
3620 else
3621 {
3622 if (j == 0)
3623 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3624 else
3625 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3626 prev_stmt_info = vinfo_for_stmt (new_stmt);
3627 }
3628 }
3629 }
3630
3631 if (dr_chain)
3632 VEC_free (tree, heap, dr_chain);
3633
3634 return true;
3635 }
3636
3637 /* Function vect_is_simple_cond.
3638
3639 Input:
3640 LOOP - the loop that is being vectorized.
3641 COND - Condition that is checked for simple use.
3642
3643 Returns whether a COND can be vectorized. Checks whether
3644 condition operands are supportable using vec_is_simple_use. */
3645
3646 static bool
3647 vect_is_simple_cond (tree cond, loop_vec_info loop_vinfo)
3648 {
3649 tree lhs, rhs;
3650 tree def;
3651 enum vect_def_type dt;
3652
3653 if (!COMPARISON_CLASS_P (cond))
3654 return false;
3655
3656 lhs = TREE_OPERAND (cond, 0);
3657 rhs = TREE_OPERAND (cond, 1);
3658
3659 if (TREE_CODE (lhs) == SSA_NAME)
3660 {
3661 gimple lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
3662 if (!vect_is_simple_use (lhs, loop_vinfo, &lhs_def_stmt, &def, &dt))
3663 return false;
3664 }
3665 else if (TREE_CODE (lhs) != INTEGER_CST && TREE_CODE (lhs) != REAL_CST
3666 && TREE_CODE (lhs) != FIXED_CST)
3667 return false;
3668
3669 if (TREE_CODE (rhs) == SSA_NAME)
3670 {
3671 gimple rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
3672 if (!vect_is_simple_use (rhs, loop_vinfo, &rhs_def_stmt, &def, &dt))
3673 return false;
3674 }
3675 else if (TREE_CODE (rhs) != INTEGER_CST && TREE_CODE (rhs) != REAL_CST
3676 && TREE_CODE (rhs) != FIXED_CST)
3677 return false;
3678
3679 return true;
3680 }
3681
3682 /* vectorizable_condition.
3683
3684 Check if STMT is conditional modify expression that can be vectorized.
3685 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3686 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
3687 at BSI.
3688
3689 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3690
3691 static bool
3692 vectorizable_condition (gimple stmt, gimple_stmt_iterator *gsi,
3693 gimple *vec_stmt)
3694 {
3695 tree scalar_dest = NULL_TREE;
3696 tree vec_dest = NULL_TREE;
3697 tree op = NULL_TREE;
3698 tree cond_expr, then_clause, else_clause;
3699 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3700 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3701 tree vec_cond_lhs, vec_cond_rhs, vec_then_clause, vec_else_clause;
3702 tree vec_compare, vec_cond_expr;
3703 tree new_temp;
3704 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3705 enum machine_mode vec_mode;
3706 tree def;
3707 enum vect_def_type dt;
3708 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
3709 int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
3710 enum tree_code code;
3711
3712 gcc_assert (ncopies >= 1);
3713 if (ncopies > 1)
3714 return false; /* FORNOW */
3715
3716 if (!STMT_VINFO_RELEVANT_P (stmt_info))
3717 return false;
3718
3719 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
3720 return false;
3721
3722 /* FORNOW: SLP not supported. */
3723 if (STMT_SLP_TYPE (stmt_info))
3724 return false;
3725
3726 /* FORNOW: not yet supported. */
3727 if (STMT_VINFO_LIVE_P (stmt_info))
3728 {
3729 if (vect_print_dump_info (REPORT_DETAILS))
3730 fprintf (vect_dump, "value used after loop.");
3731 return false;
3732 }
3733
3734 /* Is vectorizable conditional operation? */
3735 if (!is_gimple_assign (stmt))
3736 return false;
3737
3738 code = gimple_assign_rhs_code (stmt);
3739
3740 if (code != COND_EXPR)
3741 return false;
3742
3743 gcc_assert (gimple_assign_single_p (stmt));
3744 op = gimple_assign_rhs1 (stmt);
3745 cond_expr = TREE_OPERAND (op, 0);
3746 then_clause = TREE_OPERAND (op, 1);
3747 else_clause = TREE_OPERAND (op, 2);
3748
3749 if (!vect_is_simple_cond (cond_expr, loop_vinfo))
3750 return false;
3751
3752 /* We do not handle two different vector types for the condition
3753 and the values. */
3754 if (TREE_TYPE (TREE_OPERAND (cond_expr, 0)) != TREE_TYPE (vectype))
3755 return false;
3756
3757 if (TREE_CODE (then_clause) == SSA_NAME)
3758 {
3759 gimple then_def_stmt = SSA_NAME_DEF_STMT (then_clause);
3760 if (!vect_is_simple_use (then_clause, loop_vinfo,
3761 &then_def_stmt, &def, &dt))
3762 return false;
3763 }
3764 else if (TREE_CODE (then_clause) != INTEGER_CST
3765 && TREE_CODE (then_clause) != REAL_CST
3766 && TREE_CODE (then_clause) != FIXED_CST)
3767 return false;
3768
3769 if (TREE_CODE (else_clause) == SSA_NAME)
3770 {
3771 gimple else_def_stmt = SSA_NAME_DEF_STMT (else_clause);
3772 if (!vect_is_simple_use (else_clause, loop_vinfo,
3773 &else_def_stmt, &def, &dt))
3774 return false;
3775 }
3776 else if (TREE_CODE (else_clause) != INTEGER_CST
3777 && TREE_CODE (else_clause) != REAL_CST
3778 && TREE_CODE (else_clause) != FIXED_CST)
3779 return false;
3780
3781
3782 vec_mode = TYPE_MODE (vectype);
3783
3784 if (!vec_stmt)
3785 {
3786 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
3787 return expand_vec_cond_expr_p (op, vec_mode);
3788 }
3789
3790 /* Transform */
3791
3792 /* Handle def. */
3793 scalar_dest = gimple_assign_lhs (stmt);
3794 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3795
3796 /* Handle cond expr. */
3797 vec_cond_lhs =
3798 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0), stmt, NULL);
3799 vec_cond_rhs =
3800 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1), stmt, NULL);
3801 vec_then_clause = vect_get_vec_def_for_operand (then_clause, stmt, NULL);
3802 vec_else_clause = vect_get_vec_def_for_operand (else_clause, stmt, NULL);
3803
3804 /* Arguments are ready. Create the new vector stmt. */
3805 vec_compare = build2 (TREE_CODE (cond_expr), vectype,
3806 vec_cond_lhs, vec_cond_rhs);
3807 vec_cond_expr = build3 (VEC_COND_EXPR, vectype,
3808 vec_compare, vec_then_clause, vec_else_clause);
3809
3810 *vec_stmt = gimple_build_assign (vec_dest, vec_cond_expr);
3811 new_temp = make_ssa_name (vec_dest, *vec_stmt);
3812 gimple_assign_set_lhs (*vec_stmt, new_temp);
3813 vect_finish_stmt_generation (stmt, *vec_stmt, gsi);
3814
3815 return true;
3816 }
3817
3818
3819 /* Make sure the statement is vectorizable. */
3820
3821 bool
3822 vect_analyze_stmt (gimple stmt, bool *need_to_vectorize)
3823 {
3824 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3825 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
3826 bool ok;
3827
3828 if (vect_print_dump_info (REPORT_DETAILS))
3829 {
3830 fprintf (vect_dump, "==> examining statement: ");
3831 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
3832 }
3833
3834 /* Skip stmts that do not need to be vectorized. In loops this is expected
3835 to include:
3836 - the COND_EXPR which is the loop exit condition
3837 - any LABEL_EXPRs in the loop
3838 - computations that are used only for array indexing or loop control.
3839 In basic blocks we only analyze statements that are a part of some SLP
3840 instance, therefore, all the statements are relevant. */
3841
3842 if (!STMT_VINFO_RELEVANT_P (stmt_info)
3843 && !STMT_VINFO_LIVE_P (stmt_info))
3844 {
3845 if (vect_print_dump_info (REPORT_DETAILS))
3846 fprintf (vect_dump, "irrelevant.");
3847
3848 return true;
3849 }
3850
3851 switch (STMT_VINFO_DEF_TYPE (stmt_info))
3852 {
3853 case vect_internal_def:
3854 break;
3855
3856 case vect_reduction_def:
3857 gcc_assert (relevance == vect_used_in_outer
3858 || relevance == vect_used_in_outer_by_reduction
3859 || relevance == vect_unused_in_scope);
3860 break;
3861
3862 case vect_induction_def:
3863 case vect_constant_def:
3864 case vect_external_def:
3865 case vect_unknown_def_type:
3866 default:
3867 gcc_unreachable ();
3868 }
3869
3870 if (STMT_VINFO_RELEVANT_P (stmt_info))
3871 {
3872 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))));
3873 gcc_assert (STMT_VINFO_VECTYPE (stmt_info));
3874 *need_to_vectorize = true;
3875 }
3876
3877 ok = true;
3878 if (STMT_VINFO_RELEVANT_P (stmt_info)
3879 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def)
3880 ok = (vectorizable_type_promotion (stmt, NULL, NULL, NULL)
3881 || vectorizable_type_demotion (stmt, NULL, NULL, NULL)
3882 || vectorizable_conversion (stmt, NULL, NULL, NULL)
3883 || vectorizable_operation (stmt, NULL, NULL, NULL)
3884 || vectorizable_assignment (stmt, NULL, NULL, NULL)
3885 || vectorizable_load (stmt, NULL, NULL, NULL, NULL)
3886 || vectorizable_call (stmt, NULL, NULL)
3887 || vectorizable_store (stmt, NULL, NULL, NULL)
3888 || vectorizable_condition (stmt, NULL, NULL)
3889 || vectorizable_reduction (stmt, NULL, NULL));
3890
3891 if (!ok)
3892 {
3893 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
3894 {
3895 fprintf (vect_dump, "not vectorized: relevant stmt not ");
3896 fprintf (vect_dump, "supported: ");
3897 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
3898 }
3899
3900 return false;
3901 }
3902
3903 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
3904 need extra handling, except for vectorizable reductions. */
3905 if (STMT_VINFO_LIVE_P (stmt_info)
3906 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
3907 ok = vectorizable_live_operation (stmt, NULL, NULL);
3908
3909 if (!ok)
3910 {
3911 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
3912 {
3913 fprintf (vect_dump, "not vectorized: live stmt not ");
3914 fprintf (vect_dump, "supported: ");
3915 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
3916 }
3917
3918 return false;
3919 }
3920
3921 if (!PURE_SLP_STMT (stmt_info))
3922 {
3923 /* Groups of strided accesses whose size is not a power of 2 are not
3924 vectorizable yet using loop-vectorization. Therefore, if this stmt
3925 feeds non-SLP-able stmts (i.e., this stmt has to be both SLPed and
3926 loop-based vectorized), the loop cannot be vectorized. */
3927 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
3928 && exact_log2 (DR_GROUP_SIZE (vinfo_for_stmt (
3929 DR_GROUP_FIRST_DR (stmt_info)))) == -1)
3930 {
3931 if (vect_print_dump_info (REPORT_DETAILS))
3932 {
3933 fprintf (vect_dump, "not vectorized: the size of group "
3934 "of strided accesses is not a power of 2");
3935 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
3936 }
3937
3938 return false;
3939 }
3940 }
3941
3942 return true;
3943 }
3944
3945
3946 /* Function vect_transform_stmt.
3947
3948 Create a vectorized stmt to replace STMT, and insert it at BSI. */
3949
3950 bool
3951 vect_transform_stmt (gimple stmt, gimple_stmt_iterator *gsi,
3952 bool *strided_store, slp_tree slp_node,
3953 slp_instance slp_node_instance)
3954 {
3955 bool is_store = false;
3956 gimple vec_stmt = NULL;
3957 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3958 gimple orig_stmt_in_pattern;
3959 bool done;
3960 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3961 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3962
3963 switch (STMT_VINFO_TYPE (stmt_info))
3964 {
3965 case type_demotion_vec_info_type:
3966 done = vectorizable_type_demotion (stmt, gsi, &vec_stmt, slp_node);
3967 gcc_assert (done);
3968 break;
3969
3970 case type_promotion_vec_info_type:
3971 done = vectorizable_type_promotion (stmt, gsi, &vec_stmt, slp_node);
3972 gcc_assert (done);
3973 break;
3974
3975 case type_conversion_vec_info_type:
3976 done = vectorizable_conversion (stmt, gsi, &vec_stmt, slp_node);
3977 gcc_assert (done);
3978 break;
3979
3980 case induc_vec_info_type:
3981 gcc_assert (!slp_node);
3982 done = vectorizable_induction (stmt, gsi, &vec_stmt);
3983 gcc_assert (done);
3984 break;
3985
3986 case op_vec_info_type:
3987 done = vectorizable_operation (stmt, gsi, &vec_stmt, slp_node);
3988 gcc_assert (done);
3989 break;
3990
3991 case assignment_vec_info_type:
3992 done = vectorizable_assignment (stmt, gsi, &vec_stmt, slp_node);
3993 gcc_assert (done);
3994 break;
3995
3996 case load_vec_info_type:
3997 done = vectorizable_load (stmt, gsi, &vec_stmt, slp_node,
3998 slp_node_instance);
3999 gcc_assert (done);
4000 break;
4001
4002 case store_vec_info_type:
4003 done = vectorizable_store (stmt, gsi, &vec_stmt, slp_node);
4004 gcc_assert (done);
4005 if (STMT_VINFO_STRIDED_ACCESS (stmt_info) && !slp_node)
4006 {
4007 /* In case of interleaving, the whole chain is vectorized when the
4008 last store in the chain is reached. Store stmts before the last
4009 one are skipped, and there vec_stmt_info shouldn't be freed
4010 meanwhile. */
4011 *strided_store = true;
4012 if (STMT_VINFO_VEC_STMT (stmt_info))
4013 is_store = true;
4014 }
4015 else
4016 is_store = true;
4017 break;
4018
4019 case condition_vec_info_type:
4020 gcc_assert (!slp_node);
4021 done = vectorizable_condition (stmt, gsi, &vec_stmt);
4022 gcc_assert (done);
4023 break;
4024
4025 case call_vec_info_type:
4026 gcc_assert (!slp_node);
4027 done = vectorizable_call (stmt, gsi, &vec_stmt);
4028 break;
4029
4030 case reduc_vec_info_type:
4031 gcc_assert (!slp_node);
4032 done = vectorizable_reduction (stmt, gsi, &vec_stmt);
4033 gcc_assert (done);
4034 break;
4035
4036 default:
4037 if (!STMT_VINFO_LIVE_P (stmt_info))
4038 {
4039 if (vect_print_dump_info (REPORT_DETAILS))
4040 fprintf (vect_dump, "stmt not supported.");
4041 gcc_unreachable ();
4042 }
4043 }
4044
4045 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
4046 is being vectorized, but outside the immediately enclosing loop. */
4047 if (vec_stmt
4048 && nested_in_vect_loop_p (loop, stmt)
4049 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
4050 && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
4051 || STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer_by_reduction))
4052 {
4053 struct loop *innerloop = loop->inner;
4054 imm_use_iterator imm_iter;
4055 use_operand_p use_p;
4056 tree scalar_dest;
4057 gimple exit_phi;
4058
4059 if (vect_print_dump_info (REPORT_DETAILS))
4060 fprintf (vect_dump, "Record the vdef for outer-loop vectorization.");
4061
4062 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
4063 (to be used when vectorizing outer-loop stmts that use the DEF of
4064 STMT). */
4065 if (gimple_code (stmt) == GIMPLE_PHI)
4066 scalar_dest = PHI_RESULT (stmt);
4067 else
4068 scalar_dest = gimple_assign_lhs (stmt);
4069
4070 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
4071 {
4072 if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
4073 {
4074 exit_phi = USE_STMT (use_p);
4075 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi)) = vec_stmt;
4076 }
4077 }
4078 }
4079
4080 /* Handle stmts whose DEF is used outside the loop-nest that is
4081 being vectorized. */
4082 if (STMT_VINFO_LIVE_P (stmt_info)
4083 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
4084 {
4085 done = vectorizable_live_operation (stmt, gsi, &vec_stmt);
4086 gcc_assert (done);
4087 }
4088
4089 if (vec_stmt)
4090 {
4091 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
4092 orig_stmt_in_pattern = STMT_VINFO_RELATED_STMT (stmt_info);
4093 if (orig_stmt_in_pattern)
4094 {
4095 stmt_vec_info stmt_vinfo = vinfo_for_stmt (orig_stmt_in_pattern);
4096 /* STMT was inserted by the vectorizer to replace a computation idiom.
4097 ORIG_STMT_IN_PATTERN is a stmt in the original sequence that
4098 computed this idiom. We need to record a pointer to VEC_STMT in
4099 the stmt_info of ORIG_STMT_IN_PATTERN. See more details in the
4100 documentation of vect_pattern_recog. */
4101 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
4102 {
4103 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_vinfo) == stmt);
4104 STMT_VINFO_VEC_STMT (stmt_vinfo) = vec_stmt;
4105 }
4106 }
4107 }
4108
4109 return is_store;
4110 }
4111
4112
4113 /* Remove a group of stores (for SLP or interleaving), free their
4114 stmt_vec_info. */
4115
4116 void
4117 vect_remove_stores (gimple first_stmt)
4118 {
4119 gimple next = first_stmt;
4120 gimple tmp;
4121 gimple_stmt_iterator next_si;
4122
4123 while (next)
4124 {
4125 /* Free the attached stmt_vec_info and remove the stmt. */
4126 next_si = gsi_for_stmt (next);
4127 gsi_remove (&next_si, true);
4128 tmp = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
4129 free_stmt_vec_info (next);
4130 next = tmp;
4131 }
4132 }
4133
4134
4135 /* Function new_stmt_vec_info.
4136
4137 Create and initialize a new stmt_vec_info struct for STMT. */
4138
4139 stmt_vec_info
4140 new_stmt_vec_info (gimple stmt, loop_vec_info loop_vinfo)
4141 {
4142 stmt_vec_info res;
4143 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
4144
4145 STMT_VINFO_TYPE (res) = undef_vec_info_type;
4146 STMT_VINFO_STMT (res) = stmt;
4147 STMT_VINFO_LOOP_VINFO (res) = loop_vinfo;
4148 STMT_VINFO_RELEVANT (res) = vect_unused_in_scope;
4149 STMT_VINFO_LIVE_P (res) = false;
4150 STMT_VINFO_VECTYPE (res) = NULL;
4151 STMT_VINFO_VEC_STMT (res) = NULL;
4152 STMT_VINFO_IN_PATTERN_P (res) = false;
4153 STMT_VINFO_RELATED_STMT (res) = NULL;
4154 STMT_VINFO_DATA_REF (res) = NULL;
4155
4156 STMT_VINFO_DR_BASE_ADDRESS (res) = NULL;
4157 STMT_VINFO_DR_OFFSET (res) = NULL;
4158 STMT_VINFO_DR_INIT (res) = NULL;
4159 STMT_VINFO_DR_STEP (res) = NULL;
4160 STMT_VINFO_DR_ALIGNED_TO (res) = NULL;
4161
4162 if (gimple_code (stmt) == GIMPLE_PHI
4163 && is_loop_header_bb_p (gimple_bb (stmt)))
4164 STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
4165 else
4166 STMT_VINFO_DEF_TYPE (res) = vect_internal_def;
4167
4168 STMT_VINFO_SAME_ALIGN_REFS (res) = VEC_alloc (dr_p, heap, 5);
4169 STMT_VINFO_INSIDE_OF_LOOP_COST (res) = 0;
4170 STMT_VINFO_OUTSIDE_OF_LOOP_COST (res) = 0;
4171 STMT_SLP_TYPE (res) = loop_vect;
4172 DR_GROUP_FIRST_DR (res) = NULL;
4173 DR_GROUP_NEXT_DR (res) = NULL;
4174 DR_GROUP_SIZE (res) = 0;
4175 DR_GROUP_STORE_COUNT (res) = 0;
4176 DR_GROUP_GAP (res) = 0;
4177 DR_GROUP_SAME_DR_STMT (res) = NULL;
4178 DR_GROUP_READ_WRITE_DEPENDENCE (res) = false;
4179
4180 return res;
4181 }
4182
4183
4184 /* Create a hash table for stmt_vec_info. */
4185
4186 void
4187 init_stmt_vec_info_vec (void)
4188 {
4189 gcc_assert (!stmt_vec_info_vec);
4190 stmt_vec_info_vec = VEC_alloc (vec_void_p, heap, 50);
4191 }
4192
4193
4194 /* Free hash table for stmt_vec_info. */
4195
4196 void
4197 free_stmt_vec_info_vec (void)
4198 {
4199 gcc_assert (stmt_vec_info_vec);
4200 VEC_free (vec_void_p, heap, stmt_vec_info_vec);
4201 }
4202
4203
4204 /* Free stmt vectorization related info. */
4205
4206 void
4207 free_stmt_vec_info (gimple stmt)
4208 {
4209 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4210
4211 if (!stmt_info)
4212 return;
4213
4214 VEC_free (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmt_info));
4215 set_vinfo_for_stmt (stmt, NULL);
4216 free (stmt_info);
4217 }
4218
4219
4220 /* Function get_vectype_for_scalar_type.
4221
4222 Returns the vector type corresponding to SCALAR_TYPE as supported
4223 by the target. */
4224
4225 tree
4226 get_vectype_for_scalar_type (tree scalar_type)
4227 {
4228 enum machine_mode inner_mode = TYPE_MODE (scalar_type);
4229 int nbytes = GET_MODE_SIZE (inner_mode);
4230 int nunits;
4231 tree vectype;
4232
4233 if (nbytes == 0 || nbytes >= UNITS_PER_SIMD_WORD (inner_mode))
4234 return NULL_TREE;
4235
4236 /* FORNOW: Only a single vector size per mode (UNITS_PER_SIMD_WORD)
4237 is expected. */
4238 nunits = UNITS_PER_SIMD_WORD (inner_mode) / nbytes;
4239
4240 vectype = build_vector_type (scalar_type, nunits);
4241 if (vect_print_dump_info (REPORT_DETAILS))
4242 {
4243 fprintf (vect_dump, "get vectype with %d units of type ", nunits);
4244 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
4245 }
4246
4247 if (!vectype)
4248 return NULL_TREE;
4249
4250 if (vect_print_dump_info (REPORT_DETAILS))
4251 {
4252 fprintf (vect_dump, "vectype: ");
4253 print_generic_expr (vect_dump, vectype, TDF_SLIM);
4254 }
4255
4256 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
4257 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
4258 {
4259 if (vect_print_dump_info (REPORT_DETAILS))
4260 fprintf (vect_dump, "mode not supported by target.");
4261 return NULL_TREE;
4262 }
4263
4264 return vectype;
4265 }
4266
4267 /* Function vect_is_simple_use.
4268
4269 Input:
4270 LOOP - the loop that is being vectorized.
4271 OPERAND - operand of a stmt in LOOP.
4272 DEF - the defining stmt in case OPERAND is an SSA_NAME.
4273
4274 Returns whether a stmt with OPERAND can be vectorized.
4275 Supportable operands are constants, loop invariants, and operands that are
4276 defined by the current iteration of the loop. Unsupportable operands are
4277 those that are defined by a previous iteration of the loop (as is the case
4278 in reduction/induction computations). */
4279
4280 bool
4281 vect_is_simple_use (tree operand, loop_vec_info loop_vinfo, gimple *def_stmt,
4282 tree *def, enum vect_def_type *dt)
4283 {
4284 basic_block bb;
4285 stmt_vec_info stmt_vinfo;
4286 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4287
4288 *def_stmt = NULL;
4289 *def = NULL_TREE;
4290
4291 if (vect_print_dump_info (REPORT_DETAILS))
4292 {
4293 fprintf (vect_dump, "vect_is_simple_use: operand ");
4294 print_generic_expr (vect_dump, operand, TDF_SLIM);
4295 }
4296
4297 if (TREE_CODE (operand) == INTEGER_CST || TREE_CODE (operand) == REAL_CST)
4298 {
4299 *dt = vect_constant_def;
4300 return true;
4301 }
4302 if (is_gimple_min_invariant (operand))
4303 {
4304 *def = operand;
4305 *dt = vect_external_def;
4306 return true;
4307 }
4308
4309 if (TREE_CODE (operand) == PAREN_EXPR)
4310 {
4311 if (vect_print_dump_info (REPORT_DETAILS))
4312 fprintf (vect_dump, "non-associatable copy.");
4313 operand = TREE_OPERAND (operand, 0);
4314 }
4315 if (TREE_CODE (operand) != SSA_NAME)
4316 {
4317 if (vect_print_dump_info (REPORT_DETAILS))
4318 fprintf (vect_dump, "not ssa-name.");
4319 return false;
4320 }
4321
4322 *def_stmt = SSA_NAME_DEF_STMT (operand);
4323 if (*def_stmt == NULL)
4324 {
4325 if (vect_print_dump_info (REPORT_DETAILS))
4326 fprintf (vect_dump, "no def_stmt.");
4327 return false;
4328 }
4329
4330 if (vect_print_dump_info (REPORT_DETAILS))
4331 {
4332 fprintf (vect_dump, "def_stmt: ");
4333 print_gimple_stmt (vect_dump, *def_stmt, 0, TDF_SLIM);
4334 }
4335
4336 /* Empty stmt is expected only in case of a function argument.
4337 (Otherwise - we expect a phi_node or a GIMPLE_ASSIGN). */
4338 if (gimple_nop_p (*def_stmt))
4339 {
4340 *def = operand;
4341 *dt = vect_external_def;
4342 return true;
4343 }
4344
4345 bb = gimple_bb (*def_stmt);
4346 if (!flow_bb_inside_loop_p (loop, bb))
4347 *dt = vect_external_def;
4348 else
4349 {
4350 stmt_vinfo = vinfo_for_stmt (*def_stmt);
4351 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
4352 }
4353
4354 if (*dt == vect_unknown_def_type)
4355 {
4356 if (vect_print_dump_info (REPORT_DETAILS))
4357 fprintf (vect_dump, "Unsupported pattern.");
4358 return false;
4359 }
4360
4361 if (vect_print_dump_info (REPORT_DETAILS))
4362 fprintf (vect_dump, "type of def: %d.",*dt);
4363
4364 switch (gimple_code (*def_stmt))
4365 {
4366 case GIMPLE_PHI:
4367 *def = gimple_phi_result (*def_stmt);
4368 break;
4369
4370 case GIMPLE_ASSIGN:
4371 *def = gimple_assign_lhs (*def_stmt);
4372 break;
4373
4374 case GIMPLE_CALL:
4375 *def = gimple_call_lhs (*def_stmt);
4376 if (*def != NULL)
4377 break;
4378 /* FALLTHRU */
4379 default:
4380 if (vect_print_dump_info (REPORT_DETAILS))
4381 fprintf (vect_dump, "unsupported defining stmt: ");
4382 return false;
4383 }
4384
4385 return true;
4386 }
4387
4388
4389 /* Function supportable_widening_operation
4390
4391 Check whether an operation represented by the code CODE is a
4392 widening operation that is supported by the target platform in
4393 vector form (i.e., when operating on arguments of type VECTYPE).
4394
4395 Widening operations we currently support are NOP (CONVERT), FLOAT
4396 and WIDEN_MULT. This function checks if these operations are supported
4397 by the target platform either directly (via vector tree-codes), or via
4398 target builtins.
4399
4400 Output:
4401 - CODE1 and CODE2 are codes of vector operations to be used when
4402 vectorizing the operation, if available.
4403 - DECL1 and DECL2 are decls of target builtin functions to be used
4404 when vectorizing the operation, if available. In this case,
4405 CODE1 and CODE2 are CALL_EXPR.
4406 - MULTI_STEP_CVT determines the number of required intermediate steps in
4407 case of multi-step conversion (like char->short->int - in that case
4408 MULTI_STEP_CVT will be 1).
4409 - INTERM_TYPES contains the intermediate type required to perform the
4410 widening operation (short in the above example). */
4411
4412 bool
4413 supportable_widening_operation (enum tree_code code, gimple stmt, tree vectype,
4414 tree *decl1, tree *decl2,
4415 enum tree_code *code1, enum tree_code *code2,
4416 int *multi_step_cvt,
4417 VEC (tree, heap) **interm_types)
4418 {
4419 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4420 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
4421 struct loop *vect_loop = LOOP_VINFO_LOOP (loop_info);
4422 bool ordered_p;
4423 enum machine_mode vec_mode;
4424 enum insn_code icode1, icode2;
4425 optab optab1, optab2;
4426 tree type = gimple_expr_type (stmt);
4427 tree wide_vectype = get_vectype_for_scalar_type (type);
4428 enum tree_code c1, c2;
4429
4430 /* The result of a vectorized widening operation usually requires two vectors
4431 (because the widened results do not fit int one vector). The generated
4432 vector results would normally be expected to be generated in the same
4433 order as in the original scalar computation, i.e. if 8 results are
4434 generated in each vector iteration, they are to be organized as follows:
4435 vect1: [res1,res2,res3,res4], vect2: [res5,res6,res7,res8].
4436
4437 However, in the special case that the result of the widening operation is
4438 used in a reduction computation only, the order doesn't matter (because
4439 when vectorizing a reduction we change the order of the computation).
4440 Some targets can take advantage of this and generate more efficient code.
4441 For example, targets like Altivec, that support widen_mult using a sequence
4442 of {mult_even,mult_odd} generate the following vectors:
4443 vect1: [res1,res3,res5,res7], vect2: [res2,res4,res6,res8].
4444
4445 When vectorizing outer-loops, we execute the inner-loop sequentially
4446 (each vectorized inner-loop iteration contributes to VF outer-loop
4447 iterations in parallel). We therefore don't allow to change the order
4448 of the computation in the inner-loop during outer-loop vectorization. */
4449
4450 if (STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
4451 && !nested_in_vect_loop_p (vect_loop, stmt))
4452 ordered_p = false;
4453 else
4454 ordered_p = true;
4455
4456 if (!ordered_p
4457 && code == WIDEN_MULT_EXPR
4458 && targetm.vectorize.builtin_mul_widen_even
4459 && targetm.vectorize.builtin_mul_widen_even (vectype)
4460 && targetm.vectorize.builtin_mul_widen_odd
4461 && targetm.vectorize.builtin_mul_widen_odd (vectype))
4462 {
4463 if (vect_print_dump_info (REPORT_DETAILS))
4464 fprintf (vect_dump, "Unordered widening operation detected.");
4465
4466 *code1 = *code2 = CALL_EXPR;
4467 *decl1 = targetm.vectorize.builtin_mul_widen_even (vectype);
4468 *decl2 = targetm.vectorize.builtin_mul_widen_odd (vectype);
4469 return true;
4470 }
4471
4472 switch (code)
4473 {
4474 case WIDEN_MULT_EXPR:
4475 if (BYTES_BIG_ENDIAN)
4476 {
4477 c1 = VEC_WIDEN_MULT_HI_EXPR;
4478 c2 = VEC_WIDEN_MULT_LO_EXPR;
4479 }
4480 else
4481 {
4482 c2 = VEC_WIDEN_MULT_HI_EXPR;
4483 c1 = VEC_WIDEN_MULT_LO_EXPR;
4484 }
4485 break;
4486
4487 CASE_CONVERT:
4488 if (BYTES_BIG_ENDIAN)
4489 {
4490 c1 = VEC_UNPACK_HI_EXPR;
4491 c2 = VEC_UNPACK_LO_EXPR;
4492 }
4493 else
4494 {
4495 c2 = VEC_UNPACK_HI_EXPR;
4496 c1 = VEC_UNPACK_LO_EXPR;
4497 }
4498 break;
4499
4500 case FLOAT_EXPR:
4501 if (BYTES_BIG_ENDIAN)
4502 {
4503 c1 = VEC_UNPACK_FLOAT_HI_EXPR;
4504 c2 = VEC_UNPACK_FLOAT_LO_EXPR;
4505 }
4506 else
4507 {
4508 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
4509 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
4510 }
4511 break;
4512
4513 case FIX_TRUNC_EXPR:
4514 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
4515 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
4516 computing the operation. */
4517 return false;
4518
4519 default:
4520 gcc_unreachable ();
4521 }
4522
4523 if (code == FIX_TRUNC_EXPR)
4524 {
4525 /* The signedness is determined from output operand. */
4526 optab1 = optab_for_tree_code (c1, type, optab_default);
4527 optab2 = optab_for_tree_code (c2, type, optab_default);
4528 }
4529 else
4530 {
4531 optab1 = optab_for_tree_code (c1, vectype, optab_default);
4532 optab2 = optab_for_tree_code (c2, vectype, optab_default);
4533 }
4534
4535 if (!optab1 || !optab2)
4536 return false;
4537
4538 vec_mode = TYPE_MODE (vectype);
4539 if ((icode1 = optab_handler (optab1, vec_mode)->insn_code) == CODE_FOR_nothing
4540 || (icode2 = optab_handler (optab2, vec_mode)->insn_code)
4541 == CODE_FOR_nothing)
4542 return false;
4543
4544 /* Check if it's a multi-step conversion that can be done using intermediate
4545 types. */
4546 if (insn_data[icode1].operand[0].mode != TYPE_MODE (wide_vectype)
4547 || insn_data[icode2].operand[0].mode != TYPE_MODE (wide_vectype))
4548 {
4549 int i;
4550 tree prev_type = vectype, intermediate_type;
4551 enum machine_mode intermediate_mode, prev_mode = vec_mode;
4552 optab optab3, optab4;
4553
4554 if (!CONVERT_EXPR_CODE_P (code))
4555 return false;
4556
4557 *code1 = c1;
4558 *code2 = c2;
4559
4560 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
4561 intermediate steps in promotion sequence. We try MAX_INTERM_CVT_STEPS
4562 to get to NARROW_VECTYPE, and fail if we do not. */
4563 *interm_types = VEC_alloc (tree, heap, MAX_INTERM_CVT_STEPS);
4564 for (i = 0; i < 3; i++)
4565 {
4566 intermediate_mode = insn_data[icode1].operand[0].mode;
4567 intermediate_type = lang_hooks.types.type_for_mode (intermediate_mode,
4568 TYPE_UNSIGNED (prev_type));
4569 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
4570 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
4571
4572 if (!optab3 || !optab4
4573 || (icode1 = optab1->handlers[(int) prev_mode].insn_code)
4574 == CODE_FOR_nothing
4575 || insn_data[icode1].operand[0].mode != intermediate_mode
4576 || (icode2 = optab2->handlers[(int) prev_mode].insn_code)
4577 == CODE_FOR_nothing
4578 || insn_data[icode2].operand[0].mode != intermediate_mode
4579 || (icode1 = optab3->handlers[(int) intermediate_mode].insn_code)
4580 == CODE_FOR_nothing
4581 || (icode2 = optab4->handlers[(int) intermediate_mode].insn_code)
4582 == CODE_FOR_nothing)
4583 return false;
4584
4585 VEC_quick_push (tree, *interm_types, intermediate_type);
4586 (*multi_step_cvt)++;
4587
4588 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
4589 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
4590 return true;
4591
4592 prev_type = intermediate_type;
4593 prev_mode = intermediate_mode;
4594 }
4595
4596 return false;
4597 }
4598
4599 *code1 = c1;
4600 *code2 = c2;
4601 return true;
4602 }
4603
4604
4605 /* Function supportable_narrowing_operation
4606
4607 Check whether an operation represented by the code CODE is a
4608 narrowing operation that is supported by the target platform in
4609 vector form (i.e., when operating on arguments of type VECTYPE).
4610
4611 Narrowing operations we currently support are NOP (CONVERT) and
4612 FIX_TRUNC. This function checks if these operations are supported by
4613 the target platform directly via vector tree-codes.
4614
4615 Output:
4616 - CODE1 is the code of a vector operation to be used when
4617 vectorizing the operation, if available.
4618 - MULTI_STEP_CVT determines the number of required intermediate steps in
4619 case of multi-step conversion (like int->short->char - in that case
4620 MULTI_STEP_CVT will be 1).
4621 - INTERM_TYPES contains the intermediate type required to perform the
4622 narrowing operation (short in the above example). */
4623
4624 bool
4625 supportable_narrowing_operation (enum tree_code code,
4626 const_gimple stmt, tree vectype,
4627 enum tree_code *code1, int *multi_step_cvt,
4628 VEC (tree, heap) **interm_types)
4629 {
4630 enum machine_mode vec_mode;
4631 enum insn_code icode1;
4632 optab optab1, interm_optab;
4633 tree type = gimple_expr_type (stmt);
4634 tree narrow_vectype = get_vectype_for_scalar_type (type);
4635 enum tree_code c1;
4636 tree intermediate_type, prev_type;
4637 int i;
4638
4639 switch (code)
4640 {
4641 CASE_CONVERT:
4642 c1 = VEC_PACK_TRUNC_EXPR;
4643 break;
4644
4645 case FIX_TRUNC_EXPR:
4646 c1 = VEC_PACK_FIX_TRUNC_EXPR;
4647 break;
4648
4649 case FLOAT_EXPR:
4650 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
4651 tree code and optabs used for computing the operation. */
4652 return false;
4653
4654 default:
4655 gcc_unreachable ();
4656 }
4657
4658 if (code == FIX_TRUNC_EXPR)
4659 /* The signedness is determined from output operand. */
4660 optab1 = optab_for_tree_code (c1, type, optab_default);
4661 else
4662 optab1 = optab_for_tree_code (c1, vectype, optab_default);
4663
4664 if (!optab1)
4665 return false;
4666
4667 vec_mode = TYPE_MODE (vectype);
4668 if ((icode1 = optab_handler (optab1, vec_mode)->insn_code)
4669 == CODE_FOR_nothing)
4670 return false;
4671
4672 /* Check if it's a multi-step conversion that can be done using intermediate
4673 types. */
4674 if (insn_data[icode1].operand[0].mode != TYPE_MODE (narrow_vectype))
4675 {
4676 enum machine_mode intermediate_mode, prev_mode = vec_mode;
4677
4678 *code1 = c1;
4679 prev_type = vectype;
4680 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
4681 intermediate steps in promotion sequence. We try MAX_INTERM_CVT_STEPS
4682 to get to NARROW_VECTYPE, and fail if we do not. */
4683 *interm_types = VEC_alloc (tree, heap, MAX_INTERM_CVT_STEPS);
4684 for (i = 0; i < 3; i++)
4685 {
4686 intermediate_mode = insn_data[icode1].operand[0].mode;
4687 intermediate_type = lang_hooks.types.type_for_mode (intermediate_mode,
4688 TYPE_UNSIGNED (prev_type));
4689 interm_optab = optab_for_tree_code (c1, intermediate_type,
4690 optab_default);
4691 if (!interm_optab
4692 || (icode1 = optab1->handlers[(int) prev_mode].insn_code)
4693 == CODE_FOR_nothing
4694 || insn_data[icode1].operand[0].mode != intermediate_mode
4695 || (icode1
4696 = interm_optab->handlers[(int) intermediate_mode].insn_code)
4697 == CODE_FOR_nothing)
4698 return false;
4699
4700 VEC_quick_push (tree, *interm_types, intermediate_type);
4701 (*multi_step_cvt)++;
4702
4703 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
4704 return true;
4705
4706 prev_type = intermediate_type;
4707 prev_mode = intermediate_mode;
4708 }
4709
4710 return false;
4711 }
4712
4713 *code1 = c1;
4714 return true;
4715 }