passes.texi (Tree-SSA passes): Document SLP pass.
[gcc.git] / gcc / tree-vect-stmts.c
1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software
3 Foundation, Inc.
4 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 and Ira Rosen <irar@il.ibm.com>
6
7 This file is part of GCC.
8
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
13
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
22
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "tm.h"
27 #include "ggc.h"
28 #include "tree.h"
29 #include "target.h"
30 #include "basic-block.h"
31 #include "diagnostic.h"
32 #include "tree-flow.h"
33 #include "tree-dump.h"
34 #include "cfgloop.h"
35 #include "cfglayout.h"
36 #include "expr.h"
37 #include "recog.h"
38 #include "optabs.h"
39 #include "toplev.h"
40 #include "tree-vectorizer.h"
41 #include "langhooks.h"
42
43
44 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
45
46 /* Function vect_mark_relevant.
47
48 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
49
50 static void
51 vect_mark_relevant (VEC(gimple,heap) **worklist, gimple stmt,
52 enum vect_relevant relevant, bool live_p)
53 {
54 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
55 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
56 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
57
58 if (vect_print_dump_info (REPORT_DETAILS))
59 fprintf (vect_dump, "mark relevant %d, live %d.", relevant, live_p);
60
61 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
62 {
63 gimple pattern_stmt;
64
65 /* This is the last stmt in a sequence that was detected as a
66 pattern that can potentially be vectorized. Don't mark the stmt
67 as relevant/live because it's not going to be vectorized.
68 Instead mark the pattern-stmt that replaces it. */
69
70 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
71
72 if (vect_print_dump_info (REPORT_DETAILS))
73 fprintf (vect_dump, "last stmt in pattern. don't mark relevant/live.");
74 stmt_info = vinfo_for_stmt (pattern_stmt);
75 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
76 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
77 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
78 stmt = pattern_stmt;
79 }
80
81 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
82 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
83 STMT_VINFO_RELEVANT (stmt_info) = relevant;
84
85 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
86 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
87 {
88 if (vect_print_dump_info (REPORT_DETAILS))
89 fprintf (vect_dump, "already marked relevant/live.");
90 return;
91 }
92
93 VEC_safe_push (gimple, heap, *worklist, stmt);
94 }
95
96
97 /* Function vect_stmt_relevant_p.
98
99 Return true if STMT in loop that is represented by LOOP_VINFO is
100 "relevant for vectorization".
101
102 A stmt is considered "relevant for vectorization" if:
103 - it has uses outside the loop.
104 - it has vdefs (it alters memory).
105 - control stmts in the loop (except for the exit condition).
106
107 CHECKME: what other side effects would the vectorizer allow? */
108
109 static bool
110 vect_stmt_relevant_p (gimple stmt, loop_vec_info loop_vinfo,
111 enum vect_relevant *relevant, bool *live_p)
112 {
113 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
114 ssa_op_iter op_iter;
115 imm_use_iterator imm_iter;
116 use_operand_p use_p;
117 def_operand_p def_p;
118
119 *relevant = vect_unused_in_scope;
120 *live_p = false;
121
122 /* cond stmt other than loop exit cond. */
123 if (is_ctrl_stmt (stmt)
124 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt))
125 != loop_exit_ctrl_vec_info_type)
126 *relevant = vect_used_in_scope;
127
128 /* changing memory. */
129 if (gimple_code (stmt) != GIMPLE_PHI)
130 if (gimple_vdef (stmt))
131 {
132 if (vect_print_dump_info (REPORT_DETAILS))
133 fprintf (vect_dump, "vec_stmt_relevant_p: stmt has vdefs.");
134 *relevant = vect_used_in_scope;
135 }
136
137 /* uses outside the loop. */
138 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
139 {
140 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
141 {
142 basic_block bb = gimple_bb (USE_STMT (use_p));
143 if (!flow_bb_inside_loop_p (loop, bb))
144 {
145 if (vect_print_dump_info (REPORT_DETAILS))
146 fprintf (vect_dump, "vec_stmt_relevant_p: used out of loop.");
147
148 /* We expect all such uses to be in the loop exit phis
149 (because of loop closed form) */
150 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
151 gcc_assert (bb == single_exit (loop)->dest);
152
153 *live_p = true;
154 }
155 }
156 }
157
158 return (*live_p || *relevant);
159 }
160
161
162 /* Function exist_non_indexing_operands_for_use_p
163
164 USE is one of the uses attached to STMT. Check if USE is
165 used in STMT for anything other than indexing an array. */
166
167 static bool
168 exist_non_indexing_operands_for_use_p (tree use, gimple stmt)
169 {
170 tree operand;
171 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
172
173 /* USE corresponds to some operand in STMT. If there is no data
174 reference in STMT, then any operand that corresponds to USE
175 is not indexing an array. */
176 if (!STMT_VINFO_DATA_REF (stmt_info))
177 return true;
178
179 /* STMT has a data_ref. FORNOW this means that its of one of
180 the following forms:
181 -1- ARRAY_REF = var
182 -2- var = ARRAY_REF
183 (This should have been verified in analyze_data_refs).
184
185 'var' in the second case corresponds to a def, not a use,
186 so USE cannot correspond to any operands that are not used
187 for array indexing.
188
189 Therefore, all we need to check is if STMT falls into the
190 first case, and whether var corresponds to USE. */
191
192 if (TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME)
193 return false;
194
195 if (!gimple_assign_copy_p (stmt))
196 return false;
197 operand = gimple_assign_rhs1 (stmt);
198
199 if (TREE_CODE (operand) != SSA_NAME)
200 return false;
201
202 if (operand == use)
203 return true;
204
205 return false;
206 }
207
208
209 /*
210 Function process_use.
211
212 Inputs:
213 - a USE in STMT in a loop represented by LOOP_VINFO
214 - LIVE_P, RELEVANT - enum values to be set in the STMT_VINFO of the stmt
215 that defined USE. This is done by calling mark_relevant and passing it
216 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
217
218 Outputs:
219 Generally, LIVE_P and RELEVANT are used to define the liveness and
220 relevance info of the DEF_STMT of this USE:
221 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
222 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
223 Exceptions:
224 - case 1: If USE is used only for address computations (e.g. array indexing),
225 which does not need to be directly vectorized, then the liveness/relevance
226 of the respective DEF_STMT is left unchanged.
227 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
228 skip DEF_STMT cause it had already been processed.
229 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
230 be modified accordingly.
231
232 Return true if everything is as expected. Return false otherwise. */
233
234 static bool
235 process_use (gimple stmt, tree use, loop_vec_info loop_vinfo, bool live_p,
236 enum vect_relevant relevant, VEC(gimple,heap) **worklist)
237 {
238 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
239 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
240 stmt_vec_info dstmt_vinfo;
241 basic_block bb, def_bb;
242 tree def;
243 gimple def_stmt;
244 enum vect_def_type dt;
245
246 /* case 1: we are only interested in uses that need to be vectorized. Uses
247 that are used for address computation are not considered relevant. */
248 if (!exist_non_indexing_operands_for_use_p (use, stmt))
249 return true;
250
251 if (!vect_is_simple_use (use, loop_vinfo, NULL, &def_stmt, &def, &dt))
252 {
253 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
254 fprintf (vect_dump, "not vectorized: unsupported use in stmt.");
255 return false;
256 }
257
258 if (!def_stmt || gimple_nop_p (def_stmt))
259 return true;
260
261 def_bb = gimple_bb (def_stmt);
262 if (!flow_bb_inside_loop_p (loop, def_bb))
263 {
264 if (vect_print_dump_info (REPORT_DETAILS))
265 fprintf (vect_dump, "def_stmt is out of loop.");
266 return true;
267 }
268
269 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
270 DEF_STMT must have already been processed, because this should be the
271 only way that STMT, which is a reduction-phi, was put in the worklist,
272 as there should be no other uses for DEF_STMT in the loop. So we just
273 check that everything is as expected, and we are done. */
274 dstmt_vinfo = vinfo_for_stmt (def_stmt);
275 bb = gimple_bb (stmt);
276 if (gimple_code (stmt) == GIMPLE_PHI
277 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
278 && gimple_code (def_stmt) != GIMPLE_PHI
279 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
280 && bb->loop_father == def_bb->loop_father)
281 {
282 if (vect_print_dump_info (REPORT_DETAILS))
283 fprintf (vect_dump, "reduc-stmt defining reduc-phi in the same nest.");
284 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))
285 dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
286 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
287 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
288 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope);
289 return true;
290 }
291
292 /* case 3a: outer-loop stmt defining an inner-loop stmt:
293 outer-loop-header-bb:
294 d = def_stmt
295 inner-loop:
296 stmt # use (d)
297 outer-loop-tail-bb:
298 ... */
299 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
300 {
301 if (vect_print_dump_info (REPORT_DETAILS))
302 fprintf (vect_dump, "outer-loop def-stmt defining inner-loop stmt.");
303 switch (relevant)
304 {
305 case vect_unused_in_scope:
306 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def) ?
307 vect_used_by_reduction : vect_unused_in_scope;
308 break;
309 case vect_used_in_outer_by_reduction:
310 relevant = vect_used_by_reduction;
311 break;
312 case vect_used_in_outer:
313 relevant = vect_used_in_scope;
314 break;
315 case vect_used_by_reduction:
316 case vect_used_in_scope:
317 break;
318
319 default:
320 gcc_unreachable ();
321 }
322 }
323
324 /* case 3b: inner-loop stmt defining an outer-loop stmt:
325 outer-loop-header-bb:
326 ...
327 inner-loop:
328 d = def_stmt
329 outer-loop-tail-bb:
330 stmt # use (d) */
331 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
332 {
333 if (vect_print_dump_info (REPORT_DETAILS))
334 fprintf (vect_dump, "inner-loop def-stmt defining outer-loop stmt.");
335 switch (relevant)
336 {
337 case vect_unused_in_scope:
338 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def) ?
339 vect_used_in_outer_by_reduction : vect_unused_in_scope;
340 break;
341
342 case vect_used_in_outer_by_reduction:
343 case vect_used_in_outer:
344 break;
345
346 case vect_used_by_reduction:
347 relevant = vect_used_in_outer_by_reduction;
348 break;
349
350 case vect_used_in_scope:
351 relevant = vect_used_in_outer;
352 break;
353
354 default:
355 gcc_unreachable ();
356 }
357 }
358
359 vect_mark_relevant (worklist, def_stmt, relevant, live_p);
360 return true;
361 }
362
363
364 /* Function vect_mark_stmts_to_be_vectorized.
365
366 Not all stmts in the loop need to be vectorized. For example:
367
368 for i...
369 for j...
370 1. T0 = i + j
371 2. T1 = a[T0]
372
373 3. j = j + 1
374
375 Stmt 1 and 3 do not need to be vectorized, because loop control and
376 addressing of vectorized data-refs are handled differently.
377
378 This pass detects such stmts. */
379
380 bool
381 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
382 {
383 VEC(gimple,heap) *worklist;
384 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
385 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
386 unsigned int nbbs = loop->num_nodes;
387 gimple_stmt_iterator si;
388 gimple stmt;
389 unsigned int i;
390 stmt_vec_info stmt_vinfo;
391 basic_block bb;
392 gimple phi;
393 bool live_p;
394 enum vect_relevant relevant;
395
396 if (vect_print_dump_info (REPORT_DETAILS))
397 fprintf (vect_dump, "=== vect_mark_stmts_to_be_vectorized ===");
398
399 worklist = VEC_alloc (gimple, heap, 64);
400
401 /* 1. Init worklist. */
402 for (i = 0; i < nbbs; i++)
403 {
404 bb = bbs[i];
405 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
406 {
407 phi = gsi_stmt (si);
408 if (vect_print_dump_info (REPORT_DETAILS))
409 {
410 fprintf (vect_dump, "init: phi relevant? ");
411 print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM);
412 }
413
414 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
415 vect_mark_relevant (&worklist, phi, relevant, live_p);
416 }
417 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
418 {
419 stmt = gsi_stmt (si);
420 if (vect_print_dump_info (REPORT_DETAILS))
421 {
422 fprintf (vect_dump, "init: stmt relevant? ");
423 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
424 }
425
426 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
427 vect_mark_relevant (&worklist, stmt, relevant, live_p);
428 }
429 }
430
431 /* 2. Process_worklist */
432 while (VEC_length (gimple, worklist) > 0)
433 {
434 use_operand_p use_p;
435 ssa_op_iter iter;
436
437 stmt = VEC_pop (gimple, worklist);
438 if (vect_print_dump_info (REPORT_DETAILS))
439 {
440 fprintf (vect_dump, "worklist: examine stmt: ");
441 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
442 }
443
444 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
445 (DEF_STMT) as relevant/irrelevant and live/dead according to the
446 liveness and relevance properties of STMT. */
447 stmt_vinfo = vinfo_for_stmt (stmt);
448 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
449 live_p = STMT_VINFO_LIVE_P (stmt_vinfo);
450
451 /* Generally, the liveness and relevance properties of STMT are
452 propagated as is to the DEF_STMTs of its USEs:
453 live_p <-- STMT_VINFO_LIVE_P (STMT_VINFO)
454 relevant <-- STMT_VINFO_RELEVANT (STMT_VINFO)
455
456 One exception is when STMT has been identified as defining a reduction
457 variable; in this case we set the liveness/relevance as follows:
458 live_p = false
459 relevant = vect_used_by_reduction
460 This is because we distinguish between two kinds of relevant stmts -
461 those that are used by a reduction computation, and those that are
462 (also) used by a regular computation. This allows us later on to
463 identify stmts that are used solely by a reduction, and therefore the
464 order of the results that they produce does not have to be kept.
465
466 Reduction phis are expected to be used by a reduction stmt, or by
467 in an outer loop; Other reduction stmts are expected to be
468 in the loop, and possibly used by a stmt in an outer loop.
469 Here are the expected values of "relevant" for reduction phis/stmts:
470
471 relevance: phi stmt
472 vect_unused_in_scope ok
473 vect_used_in_outer_by_reduction ok ok
474 vect_used_in_outer ok ok
475 vect_used_by_reduction ok
476 vect_used_in_scope */
477
478 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def)
479 {
480 enum vect_relevant tmp_relevant = relevant;
481 switch (tmp_relevant)
482 {
483 case vect_unused_in_scope:
484 gcc_assert (gimple_code (stmt) != GIMPLE_PHI);
485 relevant = vect_used_by_reduction;
486 break;
487
488 case vect_used_in_outer_by_reduction:
489 case vect_used_in_outer:
490 gcc_assert (gimple_code (stmt) != GIMPLE_ASSIGN
491 || (gimple_assign_rhs_code (stmt) != WIDEN_SUM_EXPR
492 && (gimple_assign_rhs_code (stmt)
493 != DOT_PROD_EXPR)));
494 break;
495
496 case vect_used_by_reduction:
497 if (gimple_code (stmt) == GIMPLE_PHI)
498 break;
499 /* fall through */
500 case vect_used_in_scope:
501 default:
502 if (vect_print_dump_info (REPORT_DETAILS))
503 fprintf (vect_dump, "unsupported use of reduction.");
504 VEC_free (gimple, heap, worklist);
505 return false;
506 }
507 live_p = false;
508 }
509
510 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
511 {
512 tree op = USE_FROM_PTR (use_p);
513 if (!process_use (stmt, op, loop_vinfo, live_p, relevant, &worklist))
514 {
515 VEC_free (gimple, heap, worklist);
516 return false;
517 }
518 }
519 } /* while worklist */
520
521 VEC_free (gimple, heap, worklist);
522 return true;
523 }
524
525
526 int
527 cost_for_stmt (gimple stmt)
528 {
529 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
530
531 switch (STMT_VINFO_TYPE (stmt_info))
532 {
533 case load_vec_info_type:
534 return TARG_SCALAR_LOAD_COST;
535 case store_vec_info_type:
536 return TARG_SCALAR_STORE_COST;
537 case op_vec_info_type:
538 case condition_vec_info_type:
539 case assignment_vec_info_type:
540 case reduc_vec_info_type:
541 case induc_vec_info_type:
542 case type_promotion_vec_info_type:
543 case type_demotion_vec_info_type:
544 case type_conversion_vec_info_type:
545 case call_vec_info_type:
546 return TARG_SCALAR_STMT_COST;
547 case undef_vec_info_type:
548 default:
549 gcc_unreachable ();
550 }
551 }
552
553 /* Function vect_model_simple_cost.
554
555 Models cost for simple operations, i.e. those that only emit ncopies of a
556 single op. Right now, this does not account for multiple insns that could
557 be generated for the single vector op. We will handle that shortly. */
558
559 void
560 vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
561 enum vect_def_type *dt, slp_tree slp_node)
562 {
563 int i;
564 int inside_cost = 0, outside_cost = 0;
565
566 /* The SLP costs were already calculated during SLP tree build. */
567 if (PURE_SLP_STMT (stmt_info))
568 return;
569
570 inside_cost = ncopies * TARG_VEC_STMT_COST;
571
572 /* FORNOW: Assuming maximum 2 args per stmts. */
573 for (i = 0; i < 2; i++)
574 {
575 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
576 outside_cost += TARG_SCALAR_TO_VEC_COST;
577 }
578
579 if (vect_print_dump_info (REPORT_COST))
580 fprintf (vect_dump, "vect_model_simple_cost: inside_cost = %d, "
581 "outside_cost = %d .", inside_cost, outside_cost);
582
583 /* Set the costs either in STMT_INFO or SLP_NODE (if exists). */
584 stmt_vinfo_set_inside_of_loop_cost (stmt_info, slp_node, inside_cost);
585 stmt_vinfo_set_outside_of_loop_cost (stmt_info, slp_node, outside_cost);
586 }
587
588
589 /* Function vect_cost_strided_group_size
590
591 For strided load or store, return the group_size only if it is the first
592 load or store of a group, else return 1. This ensures that group size is
593 only returned once per group. */
594
595 static int
596 vect_cost_strided_group_size (stmt_vec_info stmt_info)
597 {
598 gimple first_stmt = DR_GROUP_FIRST_DR (stmt_info);
599
600 if (first_stmt == STMT_VINFO_STMT (stmt_info))
601 return DR_GROUP_SIZE (stmt_info);
602
603 return 1;
604 }
605
606
607 /* Function vect_model_store_cost
608
609 Models cost for stores. In the case of strided accesses, one access
610 has the overhead of the strided access attributed to it. */
611
612 void
613 vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
614 enum vect_def_type dt, slp_tree slp_node)
615 {
616 int group_size;
617 int inside_cost = 0, outside_cost = 0;
618
619 /* The SLP costs were already calculated during SLP tree build. */
620 if (PURE_SLP_STMT (stmt_info))
621 return;
622
623 if (dt == vect_constant_def || dt == vect_external_def)
624 outside_cost = TARG_SCALAR_TO_VEC_COST;
625
626 /* Strided access? */
627 if (DR_GROUP_FIRST_DR (stmt_info) && !slp_node)
628 group_size = vect_cost_strided_group_size (stmt_info);
629 /* Not a strided access. */
630 else
631 group_size = 1;
632
633 /* Is this an access in a group of stores, which provide strided access?
634 If so, add in the cost of the permutes. */
635 if (group_size > 1)
636 {
637 /* Uses a high and low interleave operation for each needed permute. */
638 inside_cost = ncopies * exact_log2(group_size) * group_size
639 * TARG_VEC_STMT_COST;
640
641 if (vect_print_dump_info (REPORT_COST))
642 fprintf (vect_dump, "vect_model_store_cost: strided group_size = %d .",
643 group_size);
644
645 }
646
647 /* Costs of the stores. */
648 inside_cost += ncopies * TARG_VEC_STORE_COST;
649
650 if (vect_print_dump_info (REPORT_COST))
651 fprintf (vect_dump, "vect_model_store_cost: inside_cost = %d, "
652 "outside_cost = %d .", inside_cost, outside_cost);
653
654 /* Set the costs either in STMT_INFO or SLP_NODE (if exists). */
655 stmt_vinfo_set_inside_of_loop_cost (stmt_info, slp_node, inside_cost);
656 stmt_vinfo_set_outside_of_loop_cost (stmt_info, slp_node, outside_cost);
657 }
658
659
660 /* Function vect_model_load_cost
661
662 Models cost for loads. In the case of strided accesses, the last access
663 has the overhead of the strided access attributed to it. Since unaligned
664 accesses are supported for loads, we also account for the costs of the
665 access scheme chosen. */
666
667 void
668 vect_model_load_cost (stmt_vec_info stmt_info, int ncopies, slp_tree slp_node)
669
670 {
671 int group_size;
672 int alignment_support_cheme;
673 gimple first_stmt;
674 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
675 int inside_cost = 0, outside_cost = 0;
676
677 /* The SLP costs were already calculated during SLP tree build. */
678 if (PURE_SLP_STMT (stmt_info))
679 return;
680
681 /* Strided accesses? */
682 first_stmt = DR_GROUP_FIRST_DR (stmt_info);
683 if (first_stmt && !slp_node)
684 {
685 group_size = vect_cost_strided_group_size (stmt_info);
686 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
687 }
688 /* Not a strided access. */
689 else
690 {
691 group_size = 1;
692 first_dr = dr;
693 }
694
695 alignment_support_cheme = vect_supportable_dr_alignment (first_dr);
696
697 /* Is this an access in a group of loads providing strided access?
698 If so, add in the cost of the permutes. */
699 if (group_size > 1)
700 {
701 /* Uses an even and odd extract operations for each needed permute. */
702 inside_cost = ncopies * exact_log2(group_size) * group_size
703 * TARG_VEC_STMT_COST;
704
705 if (vect_print_dump_info (REPORT_COST))
706 fprintf (vect_dump, "vect_model_load_cost: strided group_size = %d .",
707 group_size);
708
709 }
710
711 /* The loads themselves. */
712 switch (alignment_support_cheme)
713 {
714 case dr_aligned:
715 {
716 inside_cost += ncopies * TARG_VEC_LOAD_COST;
717
718 if (vect_print_dump_info (REPORT_COST))
719 fprintf (vect_dump, "vect_model_load_cost: aligned.");
720
721 break;
722 }
723 case dr_unaligned_supported:
724 {
725 /* Here, we assign an additional cost for the unaligned load. */
726 inside_cost += ncopies * TARG_VEC_UNALIGNED_LOAD_COST;
727
728 if (vect_print_dump_info (REPORT_COST))
729 fprintf (vect_dump, "vect_model_load_cost: unaligned supported by "
730 "hardware.");
731
732 break;
733 }
734 case dr_explicit_realign:
735 {
736 inside_cost += ncopies * (2*TARG_VEC_LOAD_COST + TARG_VEC_STMT_COST);
737
738 /* FIXME: If the misalignment remains fixed across the iterations of
739 the containing loop, the following cost should be added to the
740 outside costs. */
741 if (targetm.vectorize.builtin_mask_for_load)
742 inside_cost += TARG_VEC_STMT_COST;
743
744 break;
745 }
746 case dr_explicit_realign_optimized:
747 {
748 if (vect_print_dump_info (REPORT_COST))
749 fprintf (vect_dump, "vect_model_load_cost: unaligned software "
750 "pipelined.");
751
752 /* Unaligned software pipeline has a load of an address, an initial
753 load, and possibly a mask operation to "prime" the loop. However,
754 if this is an access in a group of loads, which provide strided
755 access, then the above cost should only be considered for one
756 access in the group. Inside the loop, there is a load op
757 and a realignment op. */
758
759 if ((!DR_GROUP_FIRST_DR (stmt_info)) || group_size > 1 || slp_node)
760 {
761 outside_cost = 2*TARG_VEC_STMT_COST;
762 if (targetm.vectorize.builtin_mask_for_load)
763 outside_cost += TARG_VEC_STMT_COST;
764 }
765
766 inside_cost += ncopies * (TARG_VEC_LOAD_COST + TARG_VEC_STMT_COST);
767
768 break;
769 }
770
771 default:
772 gcc_unreachable ();
773 }
774
775 if (vect_print_dump_info (REPORT_COST))
776 fprintf (vect_dump, "vect_model_load_cost: inside_cost = %d, "
777 "outside_cost = %d .", inside_cost, outside_cost);
778
779 /* Set the costs either in STMT_INFO or SLP_NODE (if exists). */
780 stmt_vinfo_set_inside_of_loop_cost (stmt_info, slp_node, inside_cost);
781 stmt_vinfo_set_outside_of_loop_cost (stmt_info, slp_node, outside_cost);
782 }
783
784
785 /* Function vect_init_vector.
786
787 Insert a new stmt (INIT_STMT) that initializes a new vector variable with
788 the vector elements of VECTOR_VAR. Place the initialization at BSI if it
789 is not NULL. Otherwise, place the initialization at the loop preheader.
790 Return the DEF of INIT_STMT.
791 It will be used in the vectorization of STMT. */
792
793 tree
794 vect_init_vector (gimple stmt, tree vector_var, tree vector_type,
795 gimple_stmt_iterator *gsi)
796 {
797 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
798 tree new_var;
799 gimple init_stmt;
800 tree vec_oprnd;
801 edge pe;
802 tree new_temp;
803 basic_block new_bb;
804
805 new_var = vect_get_new_vect_var (vector_type, vect_simple_var, "cst_");
806 add_referenced_var (new_var);
807 init_stmt = gimple_build_assign (new_var, vector_var);
808 new_temp = make_ssa_name (new_var, init_stmt);
809 gimple_assign_set_lhs (init_stmt, new_temp);
810
811 if (gsi)
812 vect_finish_stmt_generation (stmt, init_stmt, gsi);
813 else
814 {
815 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
816
817 if (loop_vinfo)
818 {
819 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
820
821 if (nested_in_vect_loop_p (loop, stmt))
822 loop = loop->inner;
823
824 pe = loop_preheader_edge (loop);
825 new_bb = gsi_insert_on_edge_immediate (pe, init_stmt);
826 gcc_assert (!new_bb);
827 }
828 else
829 {
830 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
831 basic_block bb;
832 gimple_stmt_iterator gsi_bb_start;
833
834 gcc_assert (bb_vinfo);
835 bb = BB_VINFO_BB (bb_vinfo);
836 gsi_bb_start = gsi_start_bb (bb);
837 gsi_insert_before (&gsi_bb_start, init_stmt, GSI_SAME_STMT);
838 }
839 }
840
841 if (vect_print_dump_info (REPORT_DETAILS))
842 {
843 fprintf (vect_dump, "created new init_stmt: ");
844 print_gimple_stmt (vect_dump, init_stmt, 0, TDF_SLIM);
845 }
846
847 vec_oprnd = gimple_assign_lhs (init_stmt);
848 return vec_oprnd;
849 }
850
851
852 /* Function vect_get_vec_def_for_operand.
853
854 OP is an operand in STMT. This function returns a (vector) def that will be
855 used in the vectorized stmt for STMT.
856
857 In the case that OP is an SSA_NAME which is defined in the loop, then
858 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
859
860 In case OP is an invariant or constant, a new stmt that creates a vector def
861 needs to be introduced. */
862
863 tree
864 vect_get_vec_def_for_operand (tree op, gimple stmt, tree *scalar_def)
865 {
866 tree vec_oprnd;
867 gimple vec_stmt;
868 gimple def_stmt;
869 stmt_vec_info def_stmt_info = NULL;
870 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
871 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
872 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
873 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
874 tree vec_inv;
875 tree vec_cst;
876 tree t = NULL_TREE;
877 tree def;
878 int i;
879 enum vect_def_type dt;
880 bool is_simple_use;
881 tree vector_type;
882
883 if (vect_print_dump_info (REPORT_DETAILS))
884 {
885 fprintf (vect_dump, "vect_get_vec_def_for_operand: ");
886 print_generic_expr (vect_dump, op, TDF_SLIM);
887 }
888
889 is_simple_use = vect_is_simple_use (op, loop_vinfo, NULL, &def_stmt, &def,
890 &dt);
891 gcc_assert (is_simple_use);
892 if (vect_print_dump_info (REPORT_DETAILS))
893 {
894 if (def)
895 {
896 fprintf (vect_dump, "def = ");
897 print_generic_expr (vect_dump, def, TDF_SLIM);
898 }
899 if (def_stmt)
900 {
901 fprintf (vect_dump, " def_stmt = ");
902 print_gimple_stmt (vect_dump, def_stmt, 0, TDF_SLIM);
903 }
904 }
905
906 switch (dt)
907 {
908 /* Case 1: operand is a constant. */
909 case vect_constant_def:
910 {
911 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
912 gcc_assert (vector_type);
913
914 if (scalar_def)
915 *scalar_def = op;
916
917 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
918 if (vect_print_dump_info (REPORT_DETAILS))
919 fprintf (vect_dump, "Create vector_cst. nunits = %d", nunits);
920
921 for (i = nunits - 1; i >= 0; --i)
922 {
923 t = tree_cons (NULL_TREE, op, t);
924 }
925 vec_cst = build_vector (vector_type, t);
926 return vect_init_vector (stmt, vec_cst, vector_type, NULL);
927 }
928
929 /* Case 2: operand is defined outside the loop - loop invariant. */
930 case vect_external_def:
931 {
932 vector_type = get_vectype_for_scalar_type (TREE_TYPE (def));
933 gcc_assert (vector_type);
934 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
935
936 if (scalar_def)
937 *scalar_def = def;
938
939 /* Create 'vec_inv = {inv,inv,..,inv}' */
940 if (vect_print_dump_info (REPORT_DETAILS))
941 fprintf (vect_dump, "Create vector_inv.");
942
943 for (i = nunits - 1; i >= 0; --i)
944 {
945 t = tree_cons (NULL_TREE, def, t);
946 }
947
948 /* FIXME: use build_constructor directly. */
949 vec_inv = build_constructor_from_list (vector_type, t);
950 return vect_init_vector (stmt, vec_inv, vector_type, NULL);
951 }
952
953 /* Case 3: operand is defined inside the loop. */
954 case vect_internal_def:
955 {
956 if (scalar_def)
957 *scalar_def = NULL/* FIXME tuples: def_stmt*/;
958
959 /* Get the def from the vectorized stmt. */
960 def_stmt_info = vinfo_for_stmt (def_stmt);
961 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
962 gcc_assert (vec_stmt);
963 if (gimple_code (vec_stmt) == GIMPLE_PHI)
964 vec_oprnd = PHI_RESULT (vec_stmt);
965 else if (is_gimple_call (vec_stmt))
966 vec_oprnd = gimple_call_lhs (vec_stmt);
967 else
968 vec_oprnd = gimple_assign_lhs (vec_stmt);
969 return vec_oprnd;
970 }
971
972 /* Case 4: operand is defined by a loop header phi - reduction */
973 case vect_reduction_def:
974 {
975 struct loop *loop;
976
977 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
978 loop = (gimple_bb (def_stmt))->loop_father;
979
980 /* Get the def before the loop */
981 op = PHI_ARG_DEF_FROM_EDGE (def_stmt, loop_preheader_edge (loop));
982 return get_initial_def_for_reduction (stmt, op, scalar_def);
983 }
984
985 /* Case 5: operand is defined by loop-header phi - induction. */
986 case vect_induction_def:
987 {
988 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
989
990 /* Get the def from the vectorized stmt. */
991 def_stmt_info = vinfo_for_stmt (def_stmt);
992 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
993 gcc_assert (vec_stmt && gimple_code (vec_stmt) == GIMPLE_PHI);
994 vec_oprnd = PHI_RESULT (vec_stmt);
995 return vec_oprnd;
996 }
997
998 default:
999 gcc_unreachable ();
1000 }
1001 }
1002
1003
1004 /* Function vect_get_vec_def_for_stmt_copy
1005
1006 Return a vector-def for an operand. This function is used when the
1007 vectorized stmt to be created (by the caller to this function) is a "copy"
1008 created in case the vectorized result cannot fit in one vector, and several
1009 copies of the vector-stmt are required. In this case the vector-def is
1010 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1011 of the stmt that defines VEC_OPRND.
1012 DT is the type of the vector def VEC_OPRND.
1013
1014 Context:
1015 In case the vectorization factor (VF) is bigger than the number
1016 of elements that can fit in a vectype (nunits), we have to generate
1017 more than one vector stmt to vectorize the scalar stmt. This situation
1018 arises when there are multiple data-types operated upon in the loop; the
1019 smallest data-type determines the VF, and as a result, when vectorizing
1020 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1021 vector stmt (each computing a vector of 'nunits' results, and together
1022 computing 'VF' results in each iteration). This function is called when
1023 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1024 which VF=16 and nunits=4, so the number of copies required is 4):
1025
1026 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1027
1028 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1029 VS1.1: vx.1 = memref1 VS1.2
1030 VS1.2: vx.2 = memref2 VS1.3
1031 VS1.3: vx.3 = memref3
1032
1033 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1034 VSnew.1: vz1 = vx.1 + ... VSnew.2
1035 VSnew.2: vz2 = vx.2 + ... VSnew.3
1036 VSnew.3: vz3 = vx.3 + ...
1037
1038 The vectorization of S1 is explained in vectorizable_load.
1039 The vectorization of S2:
1040 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1041 the function 'vect_get_vec_def_for_operand' is called to
1042 get the relevant vector-def for each operand of S2. For operand x it
1043 returns the vector-def 'vx.0'.
1044
1045 To create the remaining copies of the vector-stmt (VSnew.j), this
1046 function is called to get the relevant vector-def for each operand. It is
1047 obtained from the respective VS1.j stmt, which is recorded in the
1048 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1049
1050 For example, to obtain the vector-def 'vx.1' in order to create the
1051 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1052 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1053 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1054 and return its def ('vx.1').
1055 Overall, to create the above sequence this function will be called 3 times:
1056 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1057 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1058 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1059
1060 tree
1061 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
1062 {
1063 gimple vec_stmt_for_operand;
1064 stmt_vec_info def_stmt_info;
1065
1066 /* Do nothing; can reuse same def. */
1067 if (dt == vect_external_def || dt == vect_constant_def )
1068 return vec_oprnd;
1069
1070 vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
1071 def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
1072 gcc_assert (def_stmt_info);
1073 vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
1074 gcc_assert (vec_stmt_for_operand);
1075 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1076 if (gimple_code (vec_stmt_for_operand) == GIMPLE_PHI)
1077 vec_oprnd = PHI_RESULT (vec_stmt_for_operand);
1078 else
1079 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1080 return vec_oprnd;
1081 }
1082
1083
1084 /* Get vectorized definitions for the operands to create a copy of an original
1085 stmt. See vect_get_vec_def_for_stmt_copy() for details. */
1086
1087 static void
1088 vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
1089 VEC(tree,heap) **vec_oprnds0,
1090 VEC(tree,heap) **vec_oprnds1)
1091 {
1092 tree vec_oprnd = VEC_pop (tree, *vec_oprnds0);
1093
1094 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd);
1095 VEC_quick_push (tree, *vec_oprnds0, vec_oprnd);
1096
1097 if (vec_oprnds1 && *vec_oprnds1)
1098 {
1099 vec_oprnd = VEC_pop (tree, *vec_oprnds1);
1100 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd);
1101 VEC_quick_push (tree, *vec_oprnds1, vec_oprnd);
1102 }
1103 }
1104
1105
1106 /* Get vectorized definitions for OP0 and OP1, or SLP_NODE if it is not NULL. */
1107
1108 static void
1109 vect_get_vec_defs (tree op0, tree op1, gimple stmt,
1110 VEC(tree,heap) **vec_oprnds0, VEC(tree,heap) **vec_oprnds1,
1111 slp_tree slp_node)
1112 {
1113 if (slp_node)
1114 vect_get_slp_defs (slp_node, vec_oprnds0, vec_oprnds1);
1115 else
1116 {
1117 tree vec_oprnd;
1118
1119 *vec_oprnds0 = VEC_alloc (tree, heap, 1);
1120 vec_oprnd = vect_get_vec_def_for_operand (op0, stmt, NULL);
1121 VEC_quick_push (tree, *vec_oprnds0, vec_oprnd);
1122
1123 if (op1)
1124 {
1125 *vec_oprnds1 = VEC_alloc (tree, heap, 1);
1126 vec_oprnd = vect_get_vec_def_for_operand (op1, stmt, NULL);
1127 VEC_quick_push (tree, *vec_oprnds1, vec_oprnd);
1128 }
1129 }
1130 }
1131
1132
1133 /* Function vect_finish_stmt_generation.
1134
1135 Insert a new stmt. */
1136
1137 void
1138 vect_finish_stmt_generation (gimple stmt, gimple vec_stmt,
1139 gimple_stmt_iterator *gsi)
1140 {
1141 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1142 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1143 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
1144
1145 gcc_assert (gimple_code (stmt) != GIMPLE_LABEL);
1146
1147 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1148
1149 set_vinfo_for_stmt (vec_stmt, new_stmt_vec_info (vec_stmt, loop_vinfo,
1150 bb_vinfo));
1151
1152 if (vect_print_dump_info (REPORT_DETAILS))
1153 {
1154 fprintf (vect_dump, "add new stmt: ");
1155 print_gimple_stmt (vect_dump, vec_stmt, 0, TDF_SLIM);
1156 }
1157
1158 gimple_set_location (vec_stmt, gimple_location (gsi_stmt (*gsi)));
1159 }
1160
1161 /* Checks if CALL can be vectorized in type VECTYPE. Returns
1162 a function declaration if the target has a vectorized version
1163 of the function, or NULL_TREE if the function cannot be vectorized. */
1164
1165 tree
1166 vectorizable_function (gimple call, tree vectype_out, tree vectype_in)
1167 {
1168 tree fndecl = gimple_call_fndecl (call);
1169 enum built_in_function code;
1170
1171 /* We only handle functions that do not read or clobber memory -- i.e.
1172 const or novops ones. */
1173 if (!(gimple_call_flags (call) & (ECF_CONST | ECF_NOVOPS)))
1174 return NULL_TREE;
1175
1176 if (!fndecl
1177 || TREE_CODE (fndecl) != FUNCTION_DECL
1178 || !DECL_BUILT_IN (fndecl))
1179 return NULL_TREE;
1180
1181 code = DECL_FUNCTION_CODE (fndecl);
1182 return targetm.vectorize.builtin_vectorized_function (code, vectype_out,
1183 vectype_in);
1184 }
1185
1186 /* Function vectorizable_call.
1187
1188 Check if STMT performs a function call that can be vectorized.
1189 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1190 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1191 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1192
1193 static bool
1194 vectorizable_call (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt)
1195 {
1196 tree vec_dest;
1197 tree scalar_dest;
1198 tree op, type;
1199 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
1200 stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
1201 tree vectype_out, vectype_in;
1202 int nunits_in;
1203 int nunits_out;
1204 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1205 tree fndecl, new_temp, def, rhs_type, lhs_type;
1206 gimple def_stmt;
1207 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
1208 gimple new_stmt;
1209 int ncopies, j;
1210 VEC(tree, heap) *vargs = NULL;
1211 enum { NARROW, NONE, WIDEN } modifier;
1212 size_t i, nargs;
1213
1214 /* FORNOW: unsupported in basic block SLP. */
1215 gcc_assert (loop_vinfo);
1216
1217 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1218 return false;
1219
1220 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
1221 return false;
1222
1223 /* FORNOW: SLP not supported. */
1224 if (STMT_SLP_TYPE (stmt_info))
1225 return false;
1226
1227 /* Is STMT a vectorizable call? */
1228 if (!is_gimple_call (stmt))
1229 return false;
1230
1231 if (TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
1232 return false;
1233
1234 /* Process function arguments. */
1235 rhs_type = NULL_TREE;
1236 nargs = gimple_call_num_args (stmt);
1237
1238 /* Bail out if the function has more than two arguments, we
1239 do not have interesting builtin functions to vectorize with
1240 more than two arguments. No arguments is also not good. */
1241 if (nargs == 0 || nargs > 2)
1242 return false;
1243
1244 for (i = 0; i < nargs; i++)
1245 {
1246 op = gimple_call_arg (stmt, i);
1247
1248 /* We can only handle calls with arguments of the same type. */
1249 if (rhs_type
1250 && rhs_type != TREE_TYPE (op))
1251 {
1252 if (vect_print_dump_info (REPORT_DETAILS))
1253 fprintf (vect_dump, "argument types differ.");
1254 return false;
1255 }
1256 rhs_type = TREE_TYPE (op);
1257
1258 if (!vect_is_simple_use (op, loop_vinfo, NULL, &def_stmt, &def, &dt[i]))
1259 {
1260 if (vect_print_dump_info (REPORT_DETAILS))
1261 fprintf (vect_dump, "use not simple.");
1262 return false;
1263 }
1264 }
1265
1266 vectype_in = get_vectype_for_scalar_type (rhs_type);
1267 if (!vectype_in)
1268 return false;
1269 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
1270
1271 lhs_type = TREE_TYPE (gimple_call_lhs (stmt));
1272 vectype_out = get_vectype_for_scalar_type (lhs_type);
1273 if (!vectype_out)
1274 return false;
1275 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
1276
1277 /* FORNOW */
1278 if (nunits_in == nunits_out / 2)
1279 modifier = NARROW;
1280 else if (nunits_out == nunits_in)
1281 modifier = NONE;
1282 else if (nunits_out == nunits_in / 2)
1283 modifier = WIDEN;
1284 else
1285 return false;
1286
1287 /* For now, we only vectorize functions if a target specific builtin
1288 is available. TODO -- in some cases, it might be profitable to
1289 insert the calls for pieces of the vector, in order to be able
1290 to vectorize other operations in the loop. */
1291 fndecl = vectorizable_function (stmt, vectype_out, vectype_in);
1292 if (fndecl == NULL_TREE)
1293 {
1294 if (vect_print_dump_info (REPORT_DETAILS))
1295 fprintf (vect_dump, "function is not vectorizable.");
1296
1297 return false;
1298 }
1299
1300 gcc_assert (!gimple_vuse (stmt));
1301
1302 if (modifier == NARROW)
1303 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
1304 else
1305 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
1306
1307 /* Sanity check: make sure that at least one copy of the vectorized stmt
1308 needs to be generated. */
1309 gcc_assert (ncopies >= 1);
1310
1311 if (!vec_stmt) /* transformation not required. */
1312 {
1313 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
1314 if (vect_print_dump_info (REPORT_DETAILS))
1315 fprintf (vect_dump, "=== vectorizable_call ===");
1316 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
1317 return true;
1318 }
1319
1320 /** Transform. **/
1321
1322 if (vect_print_dump_info (REPORT_DETAILS))
1323 fprintf (vect_dump, "transform operation.");
1324
1325 /* Handle def. */
1326 scalar_dest = gimple_call_lhs (stmt);
1327 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
1328
1329 prev_stmt_info = NULL;
1330 switch (modifier)
1331 {
1332 case NONE:
1333 for (j = 0; j < ncopies; ++j)
1334 {
1335 /* Build argument list for the vectorized call. */
1336 if (j == 0)
1337 vargs = VEC_alloc (tree, heap, nargs);
1338 else
1339 VEC_truncate (tree, vargs, 0);
1340
1341 for (i = 0; i < nargs; i++)
1342 {
1343 op = gimple_call_arg (stmt, i);
1344 if (j == 0)
1345 vec_oprnd0
1346 = vect_get_vec_def_for_operand (op, stmt, NULL);
1347 else
1348 vec_oprnd0
1349 = vect_get_vec_def_for_stmt_copy (dt[nargs], vec_oprnd0);
1350
1351 VEC_quick_push (tree, vargs, vec_oprnd0);
1352 }
1353
1354 new_stmt = gimple_build_call_vec (fndecl, vargs);
1355 new_temp = make_ssa_name (vec_dest, new_stmt);
1356 gimple_call_set_lhs (new_stmt, new_temp);
1357
1358 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1359
1360 if (j == 0)
1361 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
1362 else
1363 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1364
1365 prev_stmt_info = vinfo_for_stmt (new_stmt);
1366 }
1367
1368 break;
1369
1370 case NARROW:
1371 for (j = 0; j < ncopies; ++j)
1372 {
1373 /* Build argument list for the vectorized call. */
1374 if (j == 0)
1375 vargs = VEC_alloc (tree, heap, nargs * 2);
1376 else
1377 VEC_truncate (tree, vargs, 0);
1378
1379 for (i = 0; i < nargs; i++)
1380 {
1381 op = gimple_call_arg (stmt, i);
1382 if (j == 0)
1383 {
1384 vec_oprnd0
1385 = vect_get_vec_def_for_operand (op, stmt, NULL);
1386 vec_oprnd1
1387 = vect_get_vec_def_for_stmt_copy (dt[nargs], vec_oprnd0);
1388 }
1389 else
1390 {
1391 vec_oprnd0
1392 = vect_get_vec_def_for_stmt_copy (dt[nargs], vec_oprnd1);
1393 vec_oprnd1
1394 = vect_get_vec_def_for_stmt_copy (dt[nargs], vec_oprnd0);
1395 }
1396
1397 VEC_quick_push (tree, vargs, vec_oprnd0);
1398 VEC_quick_push (tree, vargs, vec_oprnd1);
1399 }
1400
1401 new_stmt = gimple_build_call_vec (fndecl, vargs);
1402 new_temp = make_ssa_name (vec_dest, new_stmt);
1403 gimple_call_set_lhs (new_stmt, new_temp);
1404
1405 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1406
1407 if (j == 0)
1408 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
1409 else
1410 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1411
1412 prev_stmt_info = vinfo_for_stmt (new_stmt);
1413 }
1414
1415 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
1416
1417 break;
1418
1419 case WIDEN:
1420 /* No current target implements this case. */
1421 return false;
1422 }
1423
1424 VEC_free (tree, heap, vargs);
1425
1426 /* Update the exception handling table with the vector stmt if necessary. */
1427 if (maybe_clean_or_replace_eh_stmt (stmt, *vec_stmt))
1428 gimple_purge_dead_eh_edges (gimple_bb (stmt));
1429
1430 /* The call in STMT might prevent it from being removed in dce.
1431 We however cannot remove it here, due to the way the ssa name
1432 it defines is mapped to the new definition. So just replace
1433 rhs of the statement with something harmless. */
1434
1435 type = TREE_TYPE (scalar_dest);
1436 new_stmt = gimple_build_assign (gimple_call_lhs (stmt),
1437 fold_convert (type, integer_zero_node));
1438 set_vinfo_for_stmt (new_stmt, stmt_info);
1439 set_vinfo_for_stmt (stmt, NULL);
1440 STMT_VINFO_STMT (stmt_info) = new_stmt;
1441 gsi_replace (gsi, new_stmt, false);
1442 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt;
1443
1444 return true;
1445 }
1446
1447
1448 /* Function vect_gen_widened_results_half
1449
1450 Create a vector stmt whose code, type, number of arguments, and result
1451 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
1452 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
1453 In the case that CODE is a CALL_EXPR, this means that a call to DECL
1454 needs to be created (DECL is a function-decl of a target-builtin).
1455 STMT is the original scalar stmt that we are vectorizing. */
1456
1457 static gimple
1458 vect_gen_widened_results_half (enum tree_code code,
1459 tree decl,
1460 tree vec_oprnd0, tree vec_oprnd1, int op_type,
1461 tree vec_dest, gimple_stmt_iterator *gsi,
1462 gimple stmt)
1463 {
1464 gimple new_stmt;
1465 tree new_temp;
1466
1467 /* Generate half of the widened result: */
1468 if (code == CALL_EXPR)
1469 {
1470 /* Target specific support */
1471 if (op_type == binary_op)
1472 new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1);
1473 else
1474 new_stmt = gimple_build_call (decl, 1, vec_oprnd0);
1475 new_temp = make_ssa_name (vec_dest, new_stmt);
1476 gimple_call_set_lhs (new_stmt, new_temp);
1477 }
1478 else
1479 {
1480 /* Generic support */
1481 gcc_assert (op_type == TREE_CODE_LENGTH (code));
1482 if (op_type != binary_op)
1483 vec_oprnd1 = NULL;
1484 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vec_oprnd0,
1485 vec_oprnd1);
1486 new_temp = make_ssa_name (vec_dest, new_stmt);
1487 gimple_assign_set_lhs (new_stmt, new_temp);
1488 }
1489 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1490
1491 return new_stmt;
1492 }
1493
1494
1495 /* Check if STMT performs a conversion operation, that can be vectorized.
1496 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1497 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1498 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1499
1500 static bool
1501 vectorizable_conversion (gimple stmt, gimple_stmt_iterator *gsi,
1502 gimple *vec_stmt, slp_tree slp_node)
1503 {
1504 tree vec_dest;
1505 tree scalar_dest;
1506 tree op0;
1507 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
1508 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1509 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1510 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
1511 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
1512 tree new_temp;
1513 tree def;
1514 gimple def_stmt;
1515 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
1516 gimple new_stmt = NULL;
1517 stmt_vec_info prev_stmt_info;
1518 int nunits_in;
1519 int nunits_out;
1520 tree vectype_out, vectype_in;
1521 int ncopies, j;
1522 tree expr;
1523 tree rhs_type, lhs_type;
1524 tree builtin_decl;
1525 enum { NARROW, NONE, WIDEN } modifier;
1526 int i;
1527 VEC(tree,heap) *vec_oprnds0 = NULL;
1528 tree vop0;
1529 tree integral_type;
1530 VEC(tree,heap) *dummy = NULL;
1531 int dummy_int;
1532
1533 /* Is STMT a vectorizable conversion? */
1534
1535 /* FORNOW: unsupported in basic block SLP. */
1536 gcc_assert (loop_vinfo);
1537
1538 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1539 return false;
1540
1541 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
1542 return false;
1543
1544 if (!is_gimple_assign (stmt))
1545 return false;
1546
1547 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
1548 return false;
1549
1550 code = gimple_assign_rhs_code (stmt);
1551 if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
1552 return false;
1553
1554 /* Check types of lhs and rhs. */
1555 op0 = gimple_assign_rhs1 (stmt);
1556 rhs_type = TREE_TYPE (op0);
1557 vectype_in = get_vectype_for_scalar_type (rhs_type);
1558 if (!vectype_in)
1559 return false;
1560 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
1561
1562 scalar_dest = gimple_assign_lhs (stmt);
1563 lhs_type = TREE_TYPE (scalar_dest);
1564 vectype_out = get_vectype_for_scalar_type (lhs_type);
1565 if (!vectype_out)
1566 return false;
1567 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
1568
1569 /* FORNOW */
1570 if (nunits_in == nunits_out / 2)
1571 modifier = NARROW;
1572 else if (nunits_out == nunits_in)
1573 modifier = NONE;
1574 else if (nunits_out == nunits_in / 2)
1575 modifier = WIDEN;
1576 else
1577 return false;
1578
1579 if (modifier == NONE)
1580 gcc_assert (STMT_VINFO_VECTYPE (stmt_info) == vectype_out);
1581
1582 /* Bail out if the types are both integral or non-integral. */
1583 if ((INTEGRAL_TYPE_P (rhs_type) && INTEGRAL_TYPE_P (lhs_type))
1584 || (!INTEGRAL_TYPE_P (rhs_type) && !INTEGRAL_TYPE_P (lhs_type)))
1585 return false;
1586
1587 integral_type = INTEGRAL_TYPE_P (rhs_type) ? vectype_in : vectype_out;
1588
1589 if (modifier == NARROW)
1590 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
1591 else
1592 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
1593
1594 /* FORNOW: SLP with multiple types is not supported. The SLP analysis verifies
1595 this, so we can safely override NCOPIES with 1 here. */
1596 if (slp_node)
1597 ncopies = 1;
1598
1599 /* Sanity check: make sure that at least one copy of the vectorized stmt
1600 needs to be generated. */
1601 gcc_assert (ncopies >= 1);
1602
1603 /* Check the operands of the operation. */
1604 if (!vect_is_simple_use (op0, loop_vinfo, NULL, &def_stmt, &def, &dt[0]))
1605 {
1606 if (vect_print_dump_info (REPORT_DETAILS))
1607 fprintf (vect_dump, "use not simple.");
1608 return false;
1609 }
1610
1611 /* Supportable by target? */
1612 if ((modifier == NONE
1613 && !targetm.vectorize.builtin_conversion (code, integral_type))
1614 || (modifier == WIDEN
1615 && !supportable_widening_operation (code, stmt, vectype_in,
1616 &decl1, &decl2,
1617 &code1, &code2,
1618 &dummy_int, &dummy))
1619 || (modifier == NARROW
1620 && !supportable_narrowing_operation (code, stmt, vectype_in,
1621 &code1, &dummy_int, &dummy)))
1622 {
1623 if (vect_print_dump_info (REPORT_DETAILS))
1624 fprintf (vect_dump, "conversion not supported by target.");
1625 return false;
1626 }
1627
1628 if (modifier != NONE)
1629 {
1630 STMT_VINFO_VECTYPE (stmt_info) = vectype_in;
1631 /* FORNOW: SLP not supported. */
1632 if (STMT_SLP_TYPE (stmt_info))
1633 return false;
1634 }
1635
1636 if (!vec_stmt) /* transformation not required. */
1637 {
1638 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
1639 return true;
1640 }
1641
1642 /** Transform. **/
1643 if (vect_print_dump_info (REPORT_DETAILS))
1644 fprintf (vect_dump, "transform conversion.");
1645
1646 /* Handle def. */
1647 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
1648
1649 if (modifier == NONE && !slp_node)
1650 vec_oprnds0 = VEC_alloc (tree, heap, 1);
1651
1652 prev_stmt_info = NULL;
1653 switch (modifier)
1654 {
1655 case NONE:
1656 for (j = 0; j < ncopies; j++)
1657 {
1658 if (j == 0)
1659 vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node);
1660 else
1661 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL);
1662
1663 builtin_decl =
1664 targetm.vectorize.builtin_conversion (code, integral_type);
1665 for (i = 0; VEC_iterate (tree, vec_oprnds0, i, vop0); i++)
1666 {
1667 /* Arguments are ready. create the new vector stmt. */
1668 new_stmt = gimple_build_call (builtin_decl, 1, vop0);
1669 new_temp = make_ssa_name (vec_dest, new_stmt);
1670 gimple_call_set_lhs (new_stmt, new_temp);
1671 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1672 if (slp_node)
1673 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
1674 }
1675
1676 if (j == 0)
1677 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
1678 else
1679 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1680 prev_stmt_info = vinfo_for_stmt (new_stmt);
1681 }
1682 break;
1683
1684 case WIDEN:
1685 /* In case the vectorization factor (VF) is bigger than the number
1686 of elements that we can fit in a vectype (nunits), we have to
1687 generate more than one vector stmt - i.e - we need to "unroll"
1688 the vector stmt by a factor VF/nunits. */
1689 for (j = 0; j < ncopies; j++)
1690 {
1691 if (j == 0)
1692 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
1693 else
1694 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
1695
1696 STMT_VINFO_VECTYPE (stmt_info) = vectype_in;
1697
1698 /* Generate first half of the widened result: */
1699 new_stmt
1700 = vect_gen_widened_results_half (code1, decl1,
1701 vec_oprnd0, vec_oprnd1,
1702 unary_op, vec_dest, gsi, stmt);
1703 if (j == 0)
1704 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
1705 else
1706 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1707 prev_stmt_info = vinfo_for_stmt (new_stmt);
1708
1709 /* Generate second half of the widened result: */
1710 new_stmt
1711 = vect_gen_widened_results_half (code2, decl2,
1712 vec_oprnd0, vec_oprnd1,
1713 unary_op, vec_dest, gsi, stmt);
1714 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1715 prev_stmt_info = vinfo_for_stmt (new_stmt);
1716 }
1717 break;
1718
1719 case NARROW:
1720 /* In case the vectorization factor (VF) is bigger than the number
1721 of elements that we can fit in a vectype (nunits), we have to
1722 generate more than one vector stmt - i.e - we need to "unroll"
1723 the vector stmt by a factor VF/nunits. */
1724 for (j = 0; j < ncopies; j++)
1725 {
1726 /* Handle uses. */
1727 if (j == 0)
1728 {
1729 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
1730 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
1731 }
1732 else
1733 {
1734 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd1);
1735 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
1736 }
1737
1738 /* Arguments are ready. Create the new vector stmt. */
1739 expr = build2 (code1, vectype_out, vec_oprnd0, vec_oprnd1);
1740 new_stmt = gimple_build_assign_with_ops (code1, vec_dest, vec_oprnd0,
1741 vec_oprnd1);
1742 new_temp = make_ssa_name (vec_dest, new_stmt);
1743 gimple_assign_set_lhs (new_stmt, new_temp);
1744 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1745
1746 if (j == 0)
1747 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
1748 else
1749 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1750
1751 prev_stmt_info = vinfo_for_stmt (new_stmt);
1752 }
1753
1754 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
1755 }
1756
1757 if (vec_oprnds0)
1758 VEC_free (tree, heap, vec_oprnds0);
1759
1760 return true;
1761 }
1762 /* Function vectorizable_assignment.
1763
1764 Check if STMT performs an assignment (copy) that can be vectorized.
1765 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1766 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1767 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1768
1769 static bool
1770 vectorizable_assignment (gimple stmt, gimple_stmt_iterator *gsi,
1771 gimple *vec_stmt, slp_tree slp_node)
1772 {
1773 tree vec_dest;
1774 tree scalar_dest;
1775 tree op;
1776 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1777 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1778 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1779 tree new_temp;
1780 tree def;
1781 gimple def_stmt;
1782 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
1783 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
1784 int ncopies;
1785 int i;
1786 VEC(tree,heap) *vec_oprnds = NULL;
1787 tree vop;
1788 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
1789
1790 /* Multiple types in SLP are handled by creating the appropriate number of
1791 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
1792 case of SLP. */
1793 if (slp_node)
1794 ncopies = 1;
1795 else
1796 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
1797
1798 gcc_assert (ncopies >= 1);
1799 if (ncopies > 1)
1800 return false; /* FORNOW */
1801
1802 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
1803 return false;
1804
1805 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
1806 return false;
1807
1808 /* Is vectorizable assignment? */
1809 if (!is_gimple_assign (stmt))
1810 return false;
1811
1812 scalar_dest = gimple_assign_lhs (stmt);
1813 if (TREE_CODE (scalar_dest) != SSA_NAME)
1814 return false;
1815
1816 if (gimple_assign_single_p (stmt)
1817 || gimple_assign_rhs_code (stmt) == PAREN_EXPR)
1818 op = gimple_assign_rhs1 (stmt);
1819 else
1820 return false;
1821
1822 if (!vect_is_simple_use (op, loop_vinfo, bb_vinfo, &def_stmt, &def, &dt[0]))
1823 {
1824 if (vect_print_dump_info (REPORT_DETAILS))
1825 fprintf (vect_dump, "use not simple.");
1826 return false;
1827 }
1828
1829 if (!vec_stmt) /* transformation not required. */
1830 {
1831 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
1832 if (vect_print_dump_info (REPORT_DETAILS))
1833 fprintf (vect_dump, "=== vectorizable_assignment ===");
1834 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
1835 return true;
1836 }
1837
1838 /** Transform. **/
1839 if (vect_print_dump_info (REPORT_DETAILS))
1840 fprintf (vect_dump, "transform assignment.");
1841
1842 /* Handle def. */
1843 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1844
1845 /* Handle use. */
1846 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node);
1847
1848 /* Arguments are ready. create the new vector stmt. */
1849 for (i = 0; VEC_iterate (tree, vec_oprnds, i, vop); i++)
1850 {
1851 *vec_stmt = gimple_build_assign (vec_dest, vop);
1852 new_temp = make_ssa_name (vec_dest, *vec_stmt);
1853 gimple_assign_set_lhs (*vec_stmt, new_temp);
1854 vect_finish_stmt_generation (stmt, *vec_stmt, gsi);
1855 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt;
1856
1857 if (slp_node)
1858 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), *vec_stmt);
1859 }
1860
1861 VEC_free (tree, heap, vec_oprnds);
1862 return true;
1863 }
1864
1865 /* Function vectorizable_operation.
1866
1867 Check if STMT performs a binary or unary operation that can be vectorized.
1868 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1869 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1870 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1871
1872 static bool
1873 vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
1874 gimple *vec_stmt, slp_tree slp_node)
1875 {
1876 tree vec_dest;
1877 tree scalar_dest;
1878 tree op0, op1 = NULL;
1879 tree vec_oprnd1 = NULL_TREE;
1880 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1881 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1882 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1883 enum tree_code code;
1884 enum machine_mode vec_mode;
1885 tree new_temp;
1886 int op_type;
1887 optab optab;
1888 int icode;
1889 enum machine_mode optab_op2_mode;
1890 tree def;
1891 gimple def_stmt;
1892 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
1893 gimple new_stmt = NULL;
1894 stmt_vec_info prev_stmt_info;
1895 int nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
1896 int nunits_out;
1897 tree vectype_out;
1898 int ncopies;
1899 int j, i;
1900 VEC(tree,heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL;
1901 tree vop0, vop1;
1902 unsigned int k;
1903 bool shift_p = false;
1904 bool scalar_shift_arg = false;
1905 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
1906 int vf;
1907
1908 if (loop_vinfo)
1909 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1910 else
1911 /* FORNOW: multiple types are not supported in basic block SLP. */
1912 vf = nunits_in;
1913
1914 /* Multiple types in SLP are handled by creating the appropriate number of
1915 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
1916 case of SLP. */
1917 if (slp_node)
1918 ncopies = 1;
1919 else
1920 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
1921
1922 gcc_assert (ncopies >= 1);
1923
1924 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
1925 return false;
1926
1927 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
1928 return false;
1929
1930 /* Is STMT a vectorizable binary/unary operation? */
1931 if (!is_gimple_assign (stmt))
1932 return false;
1933
1934 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
1935 return false;
1936
1937 scalar_dest = gimple_assign_lhs (stmt);
1938 vectype_out = get_vectype_for_scalar_type (TREE_TYPE (scalar_dest));
1939 if (!vectype_out)
1940 return false;
1941 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
1942 if (nunits_out != nunits_in)
1943 return false;
1944
1945 code = gimple_assign_rhs_code (stmt);
1946
1947 /* For pointer addition, we should use the normal plus for
1948 the vector addition. */
1949 if (code == POINTER_PLUS_EXPR)
1950 code = PLUS_EXPR;
1951
1952 /* Support only unary or binary operations. */
1953 op_type = TREE_CODE_LENGTH (code);
1954 if (op_type != unary_op && op_type != binary_op)
1955 {
1956 if (vect_print_dump_info (REPORT_DETAILS))
1957 fprintf (vect_dump, "num. args = %d (not unary/binary op).", op_type);
1958 return false;
1959 }
1960
1961 op0 = gimple_assign_rhs1 (stmt);
1962 if (!vect_is_simple_use (op0, loop_vinfo, bb_vinfo, &def_stmt, &def, &dt[0]))
1963 {
1964 if (vect_print_dump_info (REPORT_DETAILS))
1965 fprintf (vect_dump, "use not simple.");
1966 return false;
1967 }
1968
1969 if (op_type == binary_op)
1970 {
1971 op1 = gimple_assign_rhs2 (stmt);
1972 if (!vect_is_simple_use (op1, loop_vinfo, bb_vinfo, &def_stmt, &def,
1973 &dt[1]))
1974 {
1975 if (vect_print_dump_info (REPORT_DETAILS))
1976 fprintf (vect_dump, "use not simple.");
1977 return false;
1978 }
1979 }
1980
1981 /* If this is a shift/rotate, determine whether the shift amount is a vector,
1982 or scalar. If the shift/rotate amount is a vector, use the vector/vector
1983 shift optabs. */
1984 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
1985 || code == RROTATE_EXPR)
1986 {
1987 shift_p = true;
1988
1989 /* vector shifted by vector */
1990 if (dt[1] == vect_internal_def)
1991 {
1992 optab = optab_for_tree_code (code, vectype, optab_vector);
1993 if (vect_print_dump_info (REPORT_DETAILS))
1994 fprintf (vect_dump, "vector/vector shift/rotate found.");
1995 }
1996
1997 /* See if the machine has a vector shifted by scalar insn and if not
1998 then see if it has a vector shifted by vector insn */
1999 else if (dt[1] == vect_constant_def || dt[1] == vect_external_def)
2000 {
2001 optab = optab_for_tree_code (code, vectype, optab_scalar);
2002 if (optab
2003 && (optab_handler (optab, TYPE_MODE (vectype))->insn_code
2004 != CODE_FOR_nothing))
2005 {
2006 scalar_shift_arg = true;
2007 if (vect_print_dump_info (REPORT_DETAILS))
2008 fprintf (vect_dump, "vector/scalar shift/rotate found.");
2009 }
2010 else
2011 {
2012 optab = optab_for_tree_code (code, vectype, optab_vector);
2013 if (optab
2014 && (optab_handler (optab, TYPE_MODE (vectype))->insn_code
2015 != CODE_FOR_nothing))
2016 {
2017 if (vect_print_dump_info (REPORT_DETAILS))
2018 fprintf (vect_dump, "vector/vector shift/rotate found.");
2019
2020 /* Unlike the other binary operators, shifts/rotates have
2021 the rhs being int, instead of the same type as the lhs,
2022 so make sure the scalar is the right type if we are
2023 dealing with vectors of short/char. */
2024 if (dt[1] == vect_constant_def)
2025 op1 = fold_convert (TREE_TYPE (vectype), op1);
2026 }
2027 }
2028 }
2029
2030 else
2031 {
2032 if (vect_print_dump_info (REPORT_DETAILS))
2033 fprintf (vect_dump, "operand mode requires invariant argument.");
2034 return false;
2035 }
2036 }
2037 else
2038 optab = optab_for_tree_code (code, vectype, optab_default);
2039
2040 /* Supportable by target? */
2041 if (!optab)
2042 {
2043 if (vect_print_dump_info (REPORT_DETAILS))
2044 fprintf (vect_dump, "no optab.");
2045 return false;
2046 }
2047 vec_mode = TYPE_MODE (vectype);
2048 icode = (int) optab_handler (optab, vec_mode)->insn_code;
2049 if (icode == CODE_FOR_nothing)
2050 {
2051 if (vect_print_dump_info (REPORT_DETAILS))
2052 fprintf (vect_dump, "op not supported by target.");
2053 /* Check only during analysis. */
2054 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
2055 || (vf < vect_min_worthwhile_factor (code)
2056 && !vec_stmt))
2057 return false;
2058 if (vect_print_dump_info (REPORT_DETAILS))
2059 fprintf (vect_dump, "proceeding using word mode.");
2060 }
2061
2062 /* Worthwhile without SIMD support? Check only during analysis. */
2063 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
2064 && vf < vect_min_worthwhile_factor (code)
2065 && !vec_stmt)
2066 {
2067 if (vect_print_dump_info (REPORT_DETAILS))
2068 fprintf (vect_dump, "not worthwhile without SIMD support.");
2069 return false;
2070 }
2071
2072 if (!vec_stmt) /* transformation not required. */
2073 {
2074 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
2075 if (vect_print_dump_info (REPORT_DETAILS))
2076 fprintf (vect_dump, "=== vectorizable_operation ===");
2077 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
2078 return true;
2079 }
2080
2081 /** Transform. **/
2082
2083 if (vect_print_dump_info (REPORT_DETAILS))
2084 fprintf (vect_dump, "transform binary/unary operation.");
2085
2086 /* Handle def. */
2087 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2088
2089 /* Allocate VECs for vector operands. In case of SLP, vector operands are
2090 created in the previous stages of the recursion, so no allocation is
2091 needed, except for the case of shift with scalar shift argument. In that
2092 case we store the scalar operand in VEC_OPRNDS1 for every vector stmt to
2093 be created to vectorize the SLP group, i.e., SLP_NODE->VEC_STMTS_SIZE.
2094 In case of loop-based vectorization we allocate VECs of size 1. We
2095 allocate VEC_OPRNDS1 only in case of binary operation. */
2096 if (!slp_node)
2097 {
2098 vec_oprnds0 = VEC_alloc (tree, heap, 1);
2099 if (op_type == binary_op)
2100 vec_oprnds1 = VEC_alloc (tree, heap, 1);
2101 }
2102 else if (scalar_shift_arg)
2103 vec_oprnds1 = VEC_alloc (tree, heap, slp_node->vec_stmts_size);
2104
2105 /* In case the vectorization factor (VF) is bigger than the number
2106 of elements that we can fit in a vectype (nunits), we have to generate
2107 more than one vector stmt - i.e - we need to "unroll" the
2108 vector stmt by a factor VF/nunits. In doing so, we record a pointer
2109 from one copy of the vector stmt to the next, in the field
2110 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
2111 stages to find the correct vector defs to be used when vectorizing
2112 stmts that use the defs of the current stmt. The example below illustrates
2113 the vectorization process when VF=16 and nunits=4 (i.e - we need to create
2114 4 vectorized stmts):
2115
2116 before vectorization:
2117 RELATED_STMT VEC_STMT
2118 S1: x = memref - -
2119 S2: z = x + 1 - -
2120
2121 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
2122 there):
2123 RELATED_STMT VEC_STMT
2124 VS1_0: vx0 = memref0 VS1_1 -
2125 VS1_1: vx1 = memref1 VS1_2 -
2126 VS1_2: vx2 = memref2 VS1_3 -
2127 VS1_3: vx3 = memref3 - -
2128 S1: x = load - VS1_0
2129 S2: z = x + 1 - -
2130
2131 step2: vectorize stmt S2 (done here):
2132 To vectorize stmt S2 we first need to find the relevant vector
2133 def for the first operand 'x'. This is, as usual, obtained from
2134 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
2135 that defines 'x' (S1). This way we find the stmt VS1_0, and the
2136 relevant vector def 'vx0'. Having found 'vx0' we can generate
2137 the vector stmt VS2_0, and as usual, record it in the
2138 STMT_VINFO_VEC_STMT of stmt S2.
2139 When creating the second copy (VS2_1), we obtain the relevant vector
2140 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
2141 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
2142 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
2143 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
2144 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
2145 chain of stmts and pointers:
2146 RELATED_STMT VEC_STMT
2147 VS1_0: vx0 = memref0 VS1_1 -
2148 VS1_1: vx1 = memref1 VS1_2 -
2149 VS1_2: vx2 = memref2 VS1_3 -
2150 VS1_3: vx3 = memref3 - -
2151 S1: x = load - VS1_0
2152 VS2_0: vz0 = vx0 + v1 VS2_1 -
2153 VS2_1: vz1 = vx1 + v1 VS2_2 -
2154 VS2_2: vz2 = vx2 + v1 VS2_3 -
2155 VS2_3: vz3 = vx3 + v1 - -
2156 S2: z = x + 1 - VS2_0 */
2157
2158 prev_stmt_info = NULL;
2159 for (j = 0; j < ncopies; j++)
2160 {
2161 /* Handle uses. */
2162 if (j == 0)
2163 {
2164 if (op_type == binary_op && scalar_shift_arg)
2165 {
2166 /* Vector shl and shr insn patterns can be defined with scalar
2167 operand 2 (shift operand). In this case, use constant or loop
2168 invariant op1 directly, without extending it to vector mode
2169 first. */
2170 optab_op2_mode = insn_data[icode].operand[2].mode;
2171 if (!VECTOR_MODE_P (optab_op2_mode))
2172 {
2173 if (vect_print_dump_info (REPORT_DETAILS))
2174 fprintf (vect_dump, "operand 1 using scalar mode.");
2175 vec_oprnd1 = op1;
2176 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
2177 if (slp_node)
2178 {
2179 /* Store vec_oprnd1 for every vector stmt to be created
2180 for SLP_NODE. We check during the analysis that all the
2181 shift arguments are the same.
2182 TODO: Allow different constants for different vector
2183 stmts generated for an SLP instance. */
2184 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
2185 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
2186 }
2187 }
2188 }
2189
2190 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
2191 (a special case for certain kind of vector shifts); otherwise,
2192 operand 1 should be of a vector type (the usual case). */
2193 if (op_type == binary_op && !vec_oprnd1)
2194 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
2195 slp_node);
2196 else
2197 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
2198 slp_node);
2199 }
2200 else
2201 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
2202
2203 /* Arguments are ready. Create the new vector stmt. */
2204 for (i = 0; VEC_iterate (tree, vec_oprnds0, i, vop0); i++)
2205 {
2206 vop1 = ((op_type == binary_op)
2207 ? VEC_index (tree, vec_oprnds1, i) : NULL);
2208 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vop0, vop1);
2209 new_temp = make_ssa_name (vec_dest, new_stmt);
2210 gimple_assign_set_lhs (new_stmt, new_temp);
2211 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2212 if (slp_node)
2213 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
2214 }
2215
2216 if (slp_node)
2217 continue;
2218
2219 if (j == 0)
2220 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2221 else
2222 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2223 prev_stmt_info = vinfo_for_stmt (new_stmt);
2224 }
2225
2226 VEC_free (tree, heap, vec_oprnds0);
2227 if (vec_oprnds1)
2228 VEC_free (tree, heap, vec_oprnds1);
2229
2230 return true;
2231 }
2232
2233
2234 /* Get vectorized definitions for loop-based vectorization. For the first
2235 operand we call vect_get_vec_def_for_operand() (with OPRND containing
2236 scalar operand), and for the rest we get a copy with
2237 vect_get_vec_def_for_stmt_copy() using the previous vector definition
2238 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
2239 The vectors are collected into VEC_OPRNDS. */
2240
2241 static void
2242 vect_get_loop_based_defs (tree *oprnd, gimple stmt, enum vect_def_type dt,
2243 VEC (tree, heap) **vec_oprnds, int multi_step_cvt)
2244 {
2245 tree vec_oprnd;
2246
2247 /* Get first vector operand. */
2248 /* All the vector operands except the very first one (that is scalar oprnd)
2249 are stmt copies. */
2250 if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
2251 vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt, NULL);
2252 else
2253 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, *oprnd);
2254
2255 VEC_quick_push (tree, *vec_oprnds, vec_oprnd);
2256
2257 /* Get second vector operand. */
2258 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
2259 VEC_quick_push (tree, *vec_oprnds, vec_oprnd);
2260
2261 *oprnd = vec_oprnd;
2262
2263 /* For conversion in multiple steps, continue to get operands
2264 recursively. */
2265 if (multi_step_cvt)
2266 vect_get_loop_based_defs (oprnd, stmt, dt, vec_oprnds, multi_step_cvt - 1);
2267 }
2268
2269
2270 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
2271 For multi-step conversions store the resulting vectors and call the function
2272 recursively. */
2273
2274 static void
2275 vect_create_vectorized_demotion_stmts (VEC (tree, heap) **vec_oprnds,
2276 int multi_step_cvt, gimple stmt,
2277 VEC (tree, heap) *vec_dsts,
2278 gimple_stmt_iterator *gsi,
2279 slp_tree slp_node, enum tree_code code,
2280 stmt_vec_info *prev_stmt_info)
2281 {
2282 unsigned int i;
2283 tree vop0, vop1, new_tmp, vec_dest;
2284 gimple new_stmt;
2285 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2286
2287 vec_dest = VEC_pop (tree, vec_dsts);
2288
2289 for (i = 0; i < VEC_length (tree, *vec_oprnds); i += 2)
2290 {
2291 /* Create demotion operation. */
2292 vop0 = VEC_index (tree, *vec_oprnds, i);
2293 vop1 = VEC_index (tree, *vec_oprnds, i + 1);
2294 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vop0, vop1);
2295 new_tmp = make_ssa_name (vec_dest, new_stmt);
2296 gimple_assign_set_lhs (new_stmt, new_tmp);
2297 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2298
2299 if (multi_step_cvt)
2300 /* Store the resulting vector for next recursive call. */
2301 VEC_replace (tree, *vec_oprnds, i/2, new_tmp);
2302 else
2303 {
2304 /* This is the last step of the conversion sequence. Store the
2305 vectors in SLP_NODE or in vector info of the scalar statement
2306 (or in STMT_VINFO_RELATED_STMT chain). */
2307 if (slp_node)
2308 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
2309 else
2310 {
2311 if (!*prev_stmt_info)
2312 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2313 else
2314 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt;
2315
2316 *prev_stmt_info = vinfo_for_stmt (new_stmt);
2317 }
2318 }
2319 }
2320
2321 /* For multi-step demotion operations we first generate demotion operations
2322 from the source type to the intermediate types, and then combine the
2323 results (stored in VEC_OPRNDS) in demotion operation to the destination
2324 type. */
2325 if (multi_step_cvt)
2326 {
2327 /* At each level of recursion we have have of the operands we had at the
2328 previous level. */
2329 VEC_truncate (tree, *vec_oprnds, (i+1)/2);
2330 vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
2331 stmt, vec_dsts, gsi, slp_node,
2332 code, prev_stmt_info);
2333 }
2334 }
2335
2336
2337 /* Function vectorizable_type_demotion
2338
2339 Check if STMT performs a binary or unary operation that involves
2340 type demotion, and if it can be vectorized.
2341 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2342 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2343 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2344
2345 static bool
2346 vectorizable_type_demotion (gimple stmt, gimple_stmt_iterator *gsi,
2347 gimple *vec_stmt, slp_tree slp_node)
2348 {
2349 tree vec_dest;
2350 tree scalar_dest;
2351 tree op0;
2352 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2353 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2354 enum tree_code code, code1 = ERROR_MARK;
2355 tree def;
2356 gimple def_stmt;
2357 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
2358 stmt_vec_info prev_stmt_info;
2359 int nunits_in;
2360 int nunits_out;
2361 tree vectype_out;
2362 int ncopies;
2363 int j, i;
2364 tree vectype_in;
2365 int multi_step_cvt = 0;
2366 VEC (tree, heap) *vec_oprnds0 = NULL;
2367 VEC (tree, heap) *vec_dsts = NULL, *interm_types = NULL, *tmp_vec_dsts = NULL;
2368 tree last_oprnd, intermediate_type;
2369
2370 /* FORNOW: not supported by basic block SLP vectorization. */
2371 gcc_assert (loop_vinfo);
2372
2373 if (!STMT_VINFO_RELEVANT_P (stmt_info))
2374 return false;
2375
2376 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2377 return false;
2378
2379 /* Is STMT a vectorizable type-demotion operation? */
2380 if (!is_gimple_assign (stmt))
2381 return false;
2382
2383 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
2384 return false;
2385
2386 code = gimple_assign_rhs_code (stmt);
2387 if (!CONVERT_EXPR_CODE_P (code))
2388 return false;
2389
2390 op0 = gimple_assign_rhs1 (stmt);
2391 vectype_in = get_vectype_for_scalar_type (TREE_TYPE (op0));
2392 if (!vectype_in)
2393 return false;
2394 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
2395
2396 scalar_dest = gimple_assign_lhs (stmt);
2397 vectype_out = get_vectype_for_scalar_type (TREE_TYPE (scalar_dest));
2398 if (!vectype_out)
2399 return false;
2400 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2401 if (nunits_in >= nunits_out)
2402 return false;
2403
2404 /* Multiple types in SLP are handled by creating the appropriate number of
2405 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2406 case of SLP. */
2407 if (slp_node)
2408 ncopies = 1;
2409 else
2410 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
2411 gcc_assert (ncopies >= 1);
2412
2413 if (! ((INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
2414 && INTEGRAL_TYPE_P (TREE_TYPE (op0)))
2415 || (SCALAR_FLOAT_TYPE_P (TREE_TYPE (scalar_dest))
2416 && SCALAR_FLOAT_TYPE_P (TREE_TYPE (op0))
2417 && CONVERT_EXPR_CODE_P (code))))
2418 return false;
2419
2420 /* Check the operands of the operation. */
2421 if (!vect_is_simple_use (op0, loop_vinfo, NULL, &def_stmt, &def, &dt[0]))
2422 {
2423 if (vect_print_dump_info (REPORT_DETAILS))
2424 fprintf (vect_dump, "use not simple.");
2425 return false;
2426 }
2427
2428 /* Supportable by target? */
2429 if (!supportable_narrowing_operation (code, stmt, vectype_in, &code1,
2430 &multi_step_cvt, &interm_types))
2431 return false;
2432
2433 STMT_VINFO_VECTYPE (stmt_info) = vectype_in;
2434
2435 if (!vec_stmt) /* transformation not required. */
2436 {
2437 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
2438 if (vect_print_dump_info (REPORT_DETAILS))
2439 fprintf (vect_dump, "=== vectorizable_demotion ===");
2440 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
2441 return true;
2442 }
2443
2444 /** Transform. **/
2445 if (vect_print_dump_info (REPORT_DETAILS))
2446 fprintf (vect_dump, "transform type demotion operation. ncopies = %d.",
2447 ncopies);
2448
2449 /* In case of multi-step demotion, we first generate demotion operations to
2450 the intermediate types, and then from that types to the final one.
2451 We create vector destinations for the intermediate type (TYPES) received
2452 from supportable_narrowing_operation, and store them in the correct order
2453 for future use in vect_create_vectorized_demotion_stmts(). */
2454 if (multi_step_cvt)
2455 vec_dsts = VEC_alloc (tree, heap, multi_step_cvt + 1);
2456 else
2457 vec_dsts = VEC_alloc (tree, heap, 1);
2458
2459 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
2460 VEC_quick_push (tree, vec_dsts, vec_dest);
2461
2462 if (multi_step_cvt)
2463 {
2464 for (i = VEC_length (tree, interm_types) - 1;
2465 VEC_iterate (tree, interm_types, i, intermediate_type); i--)
2466 {
2467 vec_dest = vect_create_destination_var (scalar_dest,
2468 intermediate_type);
2469 VEC_quick_push (tree, vec_dsts, vec_dest);
2470 }
2471 }
2472
2473 /* In case the vectorization factor (VF) is bigger than the number
2474 of elements that we can fit in a vectype (nunits), we have to generate
2475 more than one vector stmt - i.e - we need to "unroll" the
2476 vector stmt by a factor VF/nunits. */
2477 last_oprnd = op0;
2478 prev_stmt_info = NULL;
2479 for (j = 0; j < ncopies; j++)
2480 {
2481 /* Handle uses. */
2482 if (slp_node)
2483 vect_get_slp_defs (slp_node, &vec_oprnds0, NULL);
2484 else
2485 {
2486 VEC_free (tree, heap, vec_oprnds0);
2487 vec_oprnds0 = VEC_alloc (tree, heap,
2488 (multi_step_cvt ? vect_pow2 (multi_step_cvt) * 2 : 2));
2489 vect_get_loop_based_defs (&last_oprnd, stmt, dt[0], &vec_oprnds0,
2490 vect_pow2 (multi_step_cvt) - 1);
2491 }
2492
2493 /* Arguments are ready. Create the new vector stmts. */
2494 tmp_vec_dsts = VEC_copy (tree, heap, vec_dsts);
2495 vect_create_vectorized_demotion_stmts (&vec_oprnds0,
2496 multi_step_cvt, stmt, tmp_vec_dsts,
2497 gsi, slp_node, code1,
2498 &prev_stmt_info);
2499 }
2500
2501 VEC_free (tree, heap, vec_oprnds0);
2502 VEC_free (tree, heap, vec_dsts);
2503 VEC_free (tree, heap, tmp_vec_dsts);
2504 VEC_free (tree, heap, interm_types);
2505
2506 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2507 return true;
2508 }
2509
2510
2511 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
2512 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
2513 the resulting vectors and call the function recursively. */
2514
2515 static void
2516 vect_create_vectorized_promotion_stmts (VEC (tree, heap) **vec_oprnds0,
2517 VEC (tree, heap) **vec_oprnds1,
2518 int multi_step_cvt, gimple stmt,
2519 VEC (tree, heap) *vec_dsts,
2520 gimple_stmt_iterator *gsi,
2521 slp_tree slp_node, enum tree_code code1,
2522 enum tree_code code2, tree decl1,
2523 tree decl2, int op_type,
2524 stmt_vec_info *prev_stmt_info)
2525 {
2526 int i;
2527 tree vop0, vop1, new_tmp1, new_tmp2, vec_dest;
2528 gimple new_stmt1, new_stmt2;
2529 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2530 VEC (tree, heap) *vec_tmp;
2531
2532 vec_dest = VEC_pop (tree, vec_dsts);
2533 vec_tmp = VEC_alloc (tree, heap, VEC_length (tree, *vec_oprnds0) * 2);
2534
2535 for (i = 0; VEC_iterate (tree, *vec_oprnds0, i, vop0); i++)
2536 {
2537 if (op_type == binary_op)
2538 vop1 = VEC_index (tree, *vec_oprnds1, i);
2539 else
2540 vop1 = NULL_TREE;
2541
2542 /* Generate the two halves of promotion operation. */
2543 new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1,
2544 op_type, vec_dest, gsi, stmt);
2545 new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1,
2546 op_type, vec_dest, gsi, stmt);
2547 if (is_gimple_call (new_stmt1))
2548 {
2549 new_tmp1 = gimple_call_lhs (new_stmt1);
2550 new_tmp2 = gimple_call_lhs (new_stmt2);
2551 }
2552 else
2553 {
2554 new_tmp1 = gimple_assign_lhs (new_stmt1);
2555 new_tmp2 = gimple_assign_lhs (new_stmt2);
2556 }
2557
2558 if (multi_step_cvt)
2559 {
2560 /* Store the results for the recursive call. */
2561 VEC_quick_push (tree, vec_tmp, new_tmp1);
2562 VEC_quick_push (tree, vec_tmp, new_tmp2);
2563 }
2564 else
2565 {
2566 /* Last step of promotion sequience - store the results. */
2567 if (slp_node)
2568 {
2569 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt1);
2570 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt2);
2571 }
2572 else
2573 {
2574 if (!*prev_stmt_info)
2575 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt1;
2576 else
2577 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt1;
2578
2579 *prev_stmt_info = vinfo_for_stmt (new_stmt1);
2580 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt2;
2581 *prev_stmt_info = vinfo_for_stmt (new_stmt2);
2582 }
2583 }
2584 }
2585
2586 if (multi_step_cvt)
2587 {
2588 /* For multi-step promotion operation we first generate we call the
2589 function recurcively for every stage. We start from the input type,
2590 create promotion operations to the intermediate types, and then
2591 create promotions to the output type. */
2592 *vec_oprnds0 = VEC_copy (tree, heap, vec_tmp);
2593 VEC_free (tree, heap, vec_tmp);
2594 vect_create_vectorized_promotion_stmts (vec_oprnds0, vec_oprnds1,
2595 multi_step_cvt - 1, stmt,
2596 vec_dsts, gsi, slp_node, code1,
2597 code2, decl2, decl2, op_type,
2598 prev_stmt_info);
2599 }
2600 }
2601
2602
2603 /* Function vectorizable_type_promotion
2604
2605 Check if STMT performs a binary or unary operation that involves
2606 type promotion, and if it can be vectorized.
2607 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2608 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2609 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2610
2611 static bool
2612 vectorizable_type_promotion (gimple stmt, gimple_stmt_iterator *gsi,
2613 gimple *vec_stmt, slp_tree slp_node)
2614 {
2615 tree vec_dest;
2616 tree scalar_dest;
2617 tree op0, op1 = NULL;
2618 tree vec_oprnd0=NULL, vec_oprnd1=NULL;
2619 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2620 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2621 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
2622 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
2623 int op_type;
2624 tree def;
2625 gimple def_stmt;
2626 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
2627 stmt_vec_info prev_stmt_info;
2628 int nunits_in;
2629 int nunits_out;
2630 tree vectype_out;
2631 int ncopies;
2632 int j, i;
2633 tree vectype_in;
2634 tree intermediate_type = NULL_TREE;
2635 int multi_step_cvt = 0;
2636 VEC (tree, heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL;
2637 VEC (tree, heap) *vec_dsts = NULL, *interm_types = NULL, *tmp_vec_dsts = NULL;
2638
2639 /* FORNOW: not supported by basic block SLP vectorization. */
2640 gcc_assert (loop_vinfo);
2641
2642 if (!STMT_VINFO_RELEVANT_P (stmt_info))
2643 return false;
2644
2645 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2646 return false;
2647
2648 /* Is STMT a vectorizable type-promotion operation? */
2649 if (!is_gimple_assign (stmt))
2650 return false;
2651
2652 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
2653 return false;
2654
2655 code = gimple_assign_rhs_code (stmt);
2656 if (!CONVERT_EXPR_CODE_P (code)
2657 && code != WIDEN_MULT_EXPR)
2658 return false;
2659
2660 op0 = gimple_assign_rhs1 (stmt);
2661 vectype_in = get_vectype_for_scalar_type (TREE_TYPE (op0));
2662 if (!vectype_in)
2663 return false;
2664 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
2665
2666 scalar_dest = gimple_assign_lhs (stmt);
2667 vectype_out = get_vectype_for_scalar_type (TREE_TYPE (scalar_dest));
2668 if (!vectype_out)
2669 return false;
2670 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2671 if (nunits_in <= nunits_out)
2672 return false;
2673
2674 /* Multiple types in SLP are handled by creating the appropriate number of
2675 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2676 case of SLP. */
2677 if (slp_node)
2678 ncopies = 1;
2679 else
2680 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
2681
2682 gcc_assert (ncopies >= 1);
2683
2684 if (! ((INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
2685 && INTEGRAL_TYPE_P (TREE_TYPE (op0)))
2686 || (SCALAR_FLOAT_TYPE_P (TREE_TYPE (scalar_dest))
2687 && SCALAR_FLOAT_TYPE_P (TREE_TYPE (op0))
2688 && CONVERT_EXPR_CODE_P (code))))
2689 return false;
2690
2691 /* Check the operands of the operation. */
2692 if (!vect_is_simple_use (op0, loop_vinfo, NULL, &def_stmt, &def, &dt[0]))
2693 {
2694 if (vect_print_dump_info (REPORT_DETAILS))
2695 fprintf (vect_dump, "use not simple.");
2696 return false;
2697 }
2698
2699 op_type = TREE_CODE_LENGTH (code);
2700 if (op_type == binary_op)
2701 {
2702 op1 = gimple_assign_rhs2 (stmt);
2703 if (!vect_is_simple_use (op1, loop_vinfo, NULL, &def_stmt, &def, &dt[1]))
2704 {
2705 if (vect_print_dump_info (REPORT_DETAILS))
2706 fprintf (vect_dump, "use not simple.");
2707 return false;
2708 }
2709 }
2710
2711 /* Supportable by target? */
2712 if (!supportable_widening_operation (code, stmt, vectype_in,
2713 &decl1, &decl2, &code1, &code2,
2714 &multi_step_cvt, &interm_types))
2715 return false;
2716
2717 /* Binary widening operation can only be supported directly by the
2718 architecture. */
2719 gcc_assert (!(multi_step_cvt && op_type == binary_op));
2720
2721 STMT_VINFO_VECTYPE (stmt_info) = vectype_in;
2722
2723 if (!vec_stmt) /* transformation not required. */
2724 {
2725 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
2726 if (vect_print_dump_info (REPORT_DETAILS))
2727 fprintf (vect_dump, "=== vectorizable_promotion ===");
2728 vect_model_simple_cost (stmt_info, 2*ncopies, dt, NULL);
2729 return true;
2730 }
2731
2732 /** Transform. **/
2733
2734 if (vect_print_dump_info (REPORT_DETAILS))
2735 fprintf (vect_dump, "transform type promotion operation. ncopies = %d.",
2736 ncopies);
2737
2738 /* Handle def. */
2739 /* In case of multi-step promotion, we first generate promotion operations
2740 to the intermediate types, and then from that types to the final one.
2741 We store vector destination in VEC_DSTS in the correct order for
2742 recursive creation of promotion operations in
2743 vect_create_vectorized_promotion_stmts(). Vector destinations are created
2744 according to TYPES recieved from supportable_widening_operation(). */
2745 if (multi_step_cvt)
2746 vec_dsts = VEC_alloc (tree, heap, multi_step_cvt + 1);
2747 else
2748 vec_dsts = VEC_alloc (tree, heap, 1);
2749
2750 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
2751 VEC_quick_push (tree, vec_dsts, vec_dest);
2752
2753 if (multi_step_cvt)
2754 {
2755 for (i = VEC_length (tree, interm_types) - 1;
2756 VEC_iterate (tree, interm_types, i, intermediate_type); i--)
2757 {
2758 vec_dest = vect_create_destination_var (scalar_dest,
2759 intermediate_type);
2760 VEC_quick_push (tree, vec_dsts, vec_dest);
2761 }
2762 }
2763
2764 if (!slp_node)
2765 {
2766 vec_oprnds0 = VEC_alloc (tree, heap,
2767 (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1));
2768 if (op_type == binary_op)
2769 vec_oprnds1 = VEC_alloc (tree, heap, 1);
2770 }
2771
2772 /* In case the vectorization factor (VF) is bigger than the number
2773 of elements that we can fit in a vectype (nunits), we have to generate
2774 more than one vector stmt - i.e - we need to "unroll" the
2775 vector stmt by a factor VF/nunits. */
2776
2777 prev_stmt_info = NULL;
2778 for (j = 0; j < ncopies; j++)
2779 {
2780 /* Handle uses. */
2781 if (j == 0)
2782 {
2783 if (slp_node)
2784 vect_get_slp_defs (slp_node, &vec_oprnds0, &vec_oprnds1);
2785 else
2786 {
2787 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
2788 VEC_quick_push (tree, vec_oprnds0, vec_oprnd0);
2789 if (op_type == binary_op)
2790 {
2791 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt, NULL);
2792 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
2793 }
2794 }
2795 }
2796 else
2797 {
2798 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
2799 VEC_replace (tree, vec_oprnds0, 0, vec_oprnd0);
2800 if (op_type == binary_op)
2801 {
2802 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd1);
2803 VEC_replace (tree, vec_oprnds1, 0, vec_oprnd1);
2804 }
2805 }
2806
2807 /* Arguments are ready. Create the new vector stmts. */
2808 tmp_vec_dsts = VEC_copy (tree, heap, vec_dsts);
2809 vect_create_vectorized_promotion_stmts (&vec_oprnds0, &vec_oprnds1,
2810 multi_step_cvt, stmt,
2811 tmp_vec_dsts,
2812 gsi, slp_node, code1, code2,
2813 decl1, decl2, op_type,
2814 &prev_stmt_info);
2815 }
2816
2817 VEC_free (tree, heap, vec_dsts);
2818 VEC_free (tree, heap, tmp_vec_dsts);
2819 VEC_free (tree, heap, interm_types);
2820 VEC_free (tree, heap, vec_oprnds0);
2821 VEC_free (tree, heap, vec_oprnds1);
2822
2823 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2824 return true;
2825 }
2826
2827
2828 /* Function vectorizable_store.
2829
2830 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
2831 can be vectorized.
2832 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2833 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2834 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2835
2836 static bool
2837 vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
2838 slp_tree slp_node)
2839 {
2840 tree scalar_dest;
2841 tree data_ref;
2842 tree op;
2843 tree vec_oprnd = NULL_TREE;
2844 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2845 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
2846 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2847 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2848 struct loop *loop = NULL;
2849 enum machine_mode vec_mode;
2850 tree dummy;
2851 enum dr_alignment_support alignment_support_scheme;
2852 tree def;
2853 gimple def_stmt;
2854 enum vect_def_type dt;
2855 stmt_vec_info prev_stmt_info = NULL;
2856 tree dataref_ptr = NULL_TREE;
2857 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
2858 int ncopies;
2859 int j;
2860 gimple next_stmt, first_stmt = NULL;
2861 bool strided_store = false;
2862 unsigned int group_size, i;
2863 VEC(tree,heap) *dr_chain = NULL, *oprnds = NULL, *result_chain = NULL;
2864 bool inv_p;
2865 VEC(tree,heap) *vec_oprnds = NULL;
2866 bool slp = (slp_node != NULL);
2867 stmt_vec_info first_stmt_vinfo;
2868 unsigned int vec_num;
2869 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2870
2871 if (loop_vinfo)
2872 loop = LOOP_VINFO_LOOP (loop_vinfo);
2873
2874 /* Multiple types in SLP are handled by creating the appropriate number of
2875 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2876 case of SLP. */
2877 if (slp)
2878 ncopies = 1;
2879 else
2880 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
2881
2882 gcc_assert (ncopies >= 1);
2883
2884 /* FORNOW. This restriction should be relaxed. */
2885 if (loop && nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
2886 {
2887 if (vect_print_dump_info (REPORT_DETAILS))
2888 fprintf (vect_dump, "multiple types in nested loop.");
2889 return false;
2890 }
2891
2892 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2893 return false;
2894
2895 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2896 return false;
2897
2898 /* Is vectorizable store? */
2899
2900 if (!is_gimple_assign (stmt))
2901 return false;
2902
2903 scalar_dest = gimple_assign_lhs (stmt);
2904 if (TREE_CODE (scalar_dest) != ARRAY_REF
2905 && TREE_CODE (scalar_dest) != INDIRECT_REF
2906 && !STMT_VINFO_STRIDED_ACCESS (stmt_info))
2907 return false;
2908
2909 gcc_assert (gimple_assign_single_p (stmt));
2910 op = gimple_assign_rhs1 (stmt);
2911 if (!vect_is_simple_use (op, loop_vinfo, bb_vinfo, &def_stmt, &def, &dt))
2912 {
2913 if (vect_print_dump_info (REPORT_DETAILS))
2914 fprintf (vect_dump, "use not simple.");
2915 return false;
2916 }
2917
2918 /* The scalar rhs type needs to be trivially convertible to the vector
2919 component type. This should always be the case. */
2920 if (!useless_type_conversion_p (TREE_TYPE (vectype), TREE_TYPE (op)))
2921 {
2922 if (vect_print_dump_info (REPORT_DETAILS))
2923 fprintf (vect_dump, "??? operands of different types");
2924 return false;
2925 }
2926
2927 vec_mode = TYPE_MODE (vectype);
2928 /* FORNOW. In some cases can vectorize even if data-type not supported
2929 (e.g. - array initialization with 0). */
2930 if (optab_handler (mov_optab, (int)vec_mode)->insn_code == CODE_FOR_nothing)
2931 return false;
2932
2933 if (!STMT_VINFO_DATA_REF (stmt_info))
2934 return false;
2935
2936 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
2937 {
2938 strided_store = true;
2939 first_stmt = DR_GROUP_FIRST_DR (stmt_info);
2940 if (!vect_strided_store_supported (vectype)
2941 && !PURE_SLP_STMT (stmt_info) && !slp)
2942 return false;
2943
2944 if (first_stmt == stmt)
2945 {
2946 /* STMT is the leader of the group. Check the operands of all the
2947 stmts of the group. */
2948 next_stmt = DR_GROUP_NEXT_DR (stmt_info);
2949 while (next_stmt)
2950 {
2951 gcc_assert (gimple_assign_single_p (next_stmt));
2952 op = gimple_assign_rhs1 (next_stmt);
2953 if (!vect_is_simple_use (op, loop_vinfo, bb_vinfo, &def_stmt,
2954 &def, &dt))
2955 {
2956 if (vect_print_dump_info (REPORT_DETAILS))
2957 fprintf (vect_dump, "use not simple.");
2958 return false;
2959 }
2960 next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt));
2961 }
2962 }
2963 }
2964
2965 if (!vec_stmt) /* transformation not required. */
2966 {
2967 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
2968 vect_model_store_cost (stmt_info, ncopies, dt, NULL);
2969 return true;
2970 }
2971
2972 /** Transform. **/
2973
2974 if (strided_store)
2975 {
2976 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
2977 group_size = DR_GROUP_SIZE (vinfo_for_stmt (first_stmt));
2978
2979 DR_GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))++;
2980
2981 /* FORNOW */
2982 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt));
2983
2984 /* We vectorize all the stmts of the interleaving group when we
2985 reach the last stmt in the group. */
2986 if (DR_GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))
2987 < DR_GROUP_SIZE (vinfo_for_stmt (first_stmt))
2988 && !slp)
2989 {
2990 *vec_stmt = NULL;
2991 return true;
2992 }
2993
2994 if (slp)
2995 strided_store = false;
2996
2997 /* VEC_NUM is the number of vect stmts to be created for this group. */
2998 if (slp)
2999 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
3000 else
3001 vec_num = group_size;
3002 }
3003 else
3004 {
3005 first_stmt = stmt;
3006 first_dr = dr;
3007 group_size = vec_num = 1;
3008 first_stmt_vinfo = stmt_info;
3009 }
3010
3011 if (vect_print_dump_info (REPORT_DETAILS))
3012 fprintf (vect_dump, "transform store. ncopies = %d",ncopies);
3013
3014 dr_chain = VEC_alloc (tree, heap, group_size);
3015 oprnds = VEC_alloc (tree, heap, group_size);
3016
3017 alignment_support_scheme = vect_supportable_dr_alignment (first_dr);
3018 gcc_assert (alignment_support_scheme);
3019 gcc_assert (alignment_support_scheme == dr_aligned); /* FORNOW */
3020
3021 /* In case the vectorization factor (VF) is bigger than the number
3022 of elements that we can fit in a vectype (nunits), we have to generate
3023 more than one vector stmt - i.e - we need to "unroll" the
3024 vector stmt by a factor VF/nunits. For more details see documentation in
3025 vect_get_vec_def_for_copy_stmt. */
3026
3027 /* In case of interleaving (non-unit strided access):
3028
3029 S1: &base + 2 = x2
3030 S2: &base = x0
3031 S3: &base + 1 = x1
3032 S4: &base + 3 = x3
3033
3034 We create vectorized stores starting from base address (the access of the
3035 first stmt in the chain (S2 in the above example), when the last store stmt
3036 of the chain (S4) is reached:
3037
3038 VS1: &base = vx2
3039 VS2: &base + vec_size*1 = vx0
3040 VS3: &base + vec_size*2 = vx1
3041 VS4: &base + vec_size*3 = vx3
3042
3043 Then permutation statements are generated:
3044
3045 VS5: vx5 = VEC_INTERLEAVE_HIGH_EXPR < vx0, vx3 >
3046 VS6: vx6 = VEC_INTERLEAVE_LOW_EXPR < vx0, vx3 >
3047 ...
3048
3049 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
3050 (the order of the data-refs in the output of vect_permute_store_chain
3051 corresponds to the order of scalar stmts in the interleaving chain - see
3052 the documentation of vect_permute_store_chain()).
3053
3054 In case of both multiple types and interleaving, above vector stores and
3055 permutation stmts are created for every copy. The result vector stmts are
3056 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
3057 STMT_VINFO_RELATED_STMT for the next copies.
3058 */
3059
3060 prev_stmt_info = NULL;
3061 for (j = 0; j < ncopies; j++)
3062 {
3063 gimple new_stmt;
3064 gimple ptr_incr;
3065
3066 if (j == 0)
3067 {
3068 if (slp)
3069 {
3070 /* Get vectorized arguments for SLP_NODE. */
3071 vect_get_slp_defs (slp_node, &vec_oprnds, NULL);
3072
3073 vec_oprnd = VEC_index (tree, vec_oprnds, 0);
3074 }
3075 else
3076 {
3077 /* For interleaved stores we collect vectorized defs for all the
3078 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
3079 used as an input to vect_permute_store_chain(), and OPRNDS as
3080 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
3081
3082 If the store is not strided, GROUP_SIZE is 1, and DR_CHAIN and
3083 OPRNDS are of size 1. */
3084 next_stmt = first_stmt;
3085 for (i = 0; i < group_size; i++)
3086 {
3087 /* Since gaps are not supported for interleaved stores,
3088 GROUP_SIZE is the exact number of stmts in the chain.
3089 Therefore, NEXT_STMT can't be NULL_TREE. In case that
3090 there is no interleaving, GROUP_SIZE is 1, and only one
3091 iteration of the loop will be executed. */
3092 gcc_assert (next_stmt
3093 && gimple_assign_single_p (next_stmt));
3094 op = gimple_assign_rhs1 (next_stmt);
3095
3096 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt,
3097 NULL);
3098 VEC_quick_push(tree, dr_chain, vec_oprnd);
3099 VEC_quick_push(tree, oprnds, vec_oprnd);
3100 next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt));
3101 }
3102 }
3103
3104 /* We should have catched mismatched types earlier. */
3105 gcc_assert (useless_type_conversion_p (vectype,
3106 TREE_TYPE (vec_oprnd)));
3107 dataref_ptr = vect_create_data_ref_ptr (first_stmt, NULL, NULL_TREE,
3108 &dummy, &ptr_incr, false,
3109 &inv_p);
3110 gcc_assert (bb_vinfo || !inv_p);
3111 }
3112 else
3113 {
3114 /* For interleaved stores we created vectorized defs for all the
3115 defs stored in OPRNDS in the previous iteration (previous copy).
3116 DR_CHAIN is then used as an input to vect_permute_store_chain(),
3117 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
3118 next copy.
3119 If the store is not strided, GROUP_SIZE is 1, and DR_CHAIN and
3120 OPRNDS are of size 1. */
3121 for (i = 0; i < group_size; i++)
3122 {
3123 op = VEC_index (tree, oprnds, i);
3124 vect_is_simple_use (op, loop_vinfo, bb_vinfo, &def_stmt, &def,
3125 &dt);
3126 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, op);
3127 VEC_replace(tree, dr_chain, i, vec_oprnd);
3128 VEC_replace(tree, oprnds, i, vec_oprnd);
3129 }
3130 dataref_ptr =
3131 bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt, NULL_TREE);
3132 }
3133
3134 if (strided_store)
3135 {
3136 result_chain = VEC_alloc (tree, heap, group_size);
3137 /* Permute. */
3138 if (!vect_permute_store_chain (dr_chain, group_size, stmt, gsi,
3139 &result_chain))
3140 return false;
3141 }
3142
3143 next_stmt = first_stmt;
3144 for (i = 0; i < vec_num; i++)
3145 {
3146 if (i > 0)
3147 /* Bump the vector pointer. */
3148 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
3149 NULL_TREE);
3150
3151 if (slp)
3152 vec_oprnd = VEC_index (tree, vec_oprnds, i);
3153 else if (strided_store)
3154 /* For strided stores vectorized defs are interleaved in
3155 vect_permute_store_chain(). */
3156 vec_oprnd = VEC_index (tree, result_chain, i);
3157
3158 data_ref = build_fold_indirect_ref (dataref_ptr);
3159 /* If accesses through a pointer to vectype do not alias the original
3160 memory reference we have a problem. This should never happen. */
3161 gcc_assert (alias_sets_conflict_p (get_alias_set (data_ref),
3162 get_alias_set (gimple_assign_lhs (stmt))));
3163
3164 /* Arguments are ready. Create the new vector stmt. */
3165 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
3166 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3167 mark_symbols_for_renaming (new_stmt);
3168
3169 if (slp)
3170 continue;
3171
3172 if (j == 0)
3173 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3174 else
3175 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3176
3177 prev_stmt_info = vinfo_for_stmt (new_stmt);
3178 next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt));
3179 if (!next_stmt)
3180 break;
3181 }
3182 }
3183
3184 VEC_free (tree, heap, dr_chain);
3185 VEC_free (tree, heap, oprnds);
3186 if (result_chain)
3187 VEC_free (tree, heap, result_chain);
3188
3189 return true;
3190 }
3191
3192 /* vectorizable_load.
3193
3194 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
3195 can be vectorized.
3196 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3197 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3198 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3199
3200 static bool
3201 vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
3202 slp_tree slp_node, slp_instance slp_node_instance)
3203 {
3204 tree scalar_dest;
3205 tree vec_dest = NULL;
3206 tree data_ref = NULL;
3207 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3208 stmt_vec_info prev_stmt_info;
3209 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3210 struct loop *loop = NULL;
3211 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
3212 bool nested_in_vect_loop = false;
3213 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
3214 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3215 tree new_temp;
3216 int mode;
3217 gimple new_stmt = NULL;
3218 tree dummy;
3219 enum dr_alignment_support alignment_support_scheme;
3220 tree dataref_ptr = NULL_TREE;
3221 gimple ptr_incr;
3222 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
3223 int ncopies;
3224 int i, j, group_size;
3225 tree msq = NULL_TREE, lsq;
3226 tree offset = NULL_TREE;
3227 tree realignment_token = NULL_TREE;
3228 gimple phi = NULL;
3229 VEC(tree,heap) *dr_chain = NULL;
3230 bool strided_load = false;
3231 gimple first_stmt;
3232 tree scalar_type;
3233 bool inv_p;
3234 bool compute_in_loop = false;
3235 struct loop *at_loop;
3236 int vec_num;
3237 bool slp = (slp_node != NULL);
3238 bool slp_perm = false;
3239 enum tree_code code;
3240 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3241 int vf;
3242
3243 if (loop_vinfo)
3244 {
3245 loop = LOOP_VINFO_LOOP (loop_vinfo);
3246 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
3247 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3248 }
3249 else
3250 /* FORNOW: multiple types are not supported in basic block SLP. */
3251 vf = nunits;
3252
3253 /* Multiple types in SLP are handled by creating the appropriate number of
3254 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3255 case of SLP. */
3256 if (slp)
3257 ncopies = 1;
3258 else
3259 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
3260
3261 gcc_assert (ncopies >= 1);
3262
3263 /* FORNOW. This restriction should be relaxed. */
3264 if (nested_in_vect_loop && ncopies > 1)
3265 {
3266 if (vect_print_dump_info (REPORT_DETAILS))
3267 fprintf (vect_dump, "multiple types in nested loop.");
3268 return false;
3269 }
3270
3271 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3272 return false;
3273
3274 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
3275 return false;
3276
3277 /* Is vectorizable load? */
3278 if (!is_gimple_assign (stmt))
3279 return false;
3280
3281 scalar_dest = gimple_assign_lhs (stmt);
3282 if (TREE_CODE (scalar_dest) != SSA_NAME)
3283 return false;
3284
3285 code = gimple_assign_rhs_code (stmt);
3286 if (code != ARRAY_REF
3287 && code != INDIRECT_REF
3288 && !STMT_VINFO_STRIDED_ACCESS (stmt_info))
3289 return false;
3290
3291 if (!STMT_VINFO_DATA_REF (stmt_info))
3292 return false;
3293
3294 scalar_type = TREE_TYPE (DR_REF (dr));
3295 mode = (int) TYPE_MODE (vectype);
3296
3297 /* FORNOW. In some cases can vectorize even if data-type not supported
3298 (e.g. - data copies). */
3299 if (optab_handler (mov_optab, mode)->insn_code == CODE_FOR_nothing)
3300 {
3301 if (vect_print_dump_info (REPORT_DETAILS))
3302 fprintf (vect_dump, "Aligned load, but unsupported type.");
3303 return false;
3304 }
3305
3306 /* The vector component type needs to be trivially convertible to the
3307 scalar lhs. This should always be the case. */
3308 if (!useless_type_conversion_p (TREE_TYPE (scalar_dest), TREE_TYPE (vectype)))
3309 {
3310 if (vect_print_dump_info (REPORT_DETAILS))
3311 fprintf (vect_dump, "??? operands of different types");
3312 return false;
3313 }
3314
3315 /* Check if the load is a part of an interleaving chain. */
3316 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
3317 {
3318 strided_load = true;
3319 /* FORNOW */
3320 gcc_assert (! nested_in_vect_loop);
3321
3322 /* Check if interleaving is supported. */
3323 if (!vect_strided_load_supported (vectype)
3324 && !PURE_SLP_STMT (stmt_info) && !slp)
3325 return false;
3326 }
3327
3328 if (!vec_stmt) /* transformation not required. */
3329 {
3330 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
3331 vect_model_load_cost (stmt_info, ncopies, NULL);
3332 return true;
3333 }
3334
3335 if (vect_print_dump_info (REPORT_DETAILS))
3336 fprintf (vect_dump, "transform load.");
3337
3338 /** Transform. **/
3339
3340 if (strided_load)
3341 {
3342 first_stmt = DR_GROUP_FIRST_DR (stmt_info);
3343 /* Check if the chain of loads is already vectorized. */
3344 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt)))
3345 {
3346 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
3347 return true;
3348 }
3349 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
3350 group_size = DR_GROUP_SIZE (vinfo_for_stmt (first_stmt));
3351
3352 /* VEC_NUM is the number of vect stmts to be created for this group. */
3353 if (slp)
3354 {
3355 strided_load = false;
3356 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
3357 if (SLP_INSTANCE_LOAD_PERMUTATION (slp_node_instance))
3358 slp_perm = true;
3359 }
3360 else
3361 vec_num = group_size;
3362
3363 dr_chain = VEC_alloc (tree, heap, vec_num);
3364 }
3365 else
3366 {
3367 first_stmt = stmt;
3368 first_dr = dr;
3369 group_size = vec_num = 1;
3370 }
3371
3372 alignment_support_scheme = vect_supportable_dr_alignment (first_dr);
3373 gcc_assert (alignment_support_scheme);
3374
3375 /* In case the vectorization factor (VF) is bigger than the number
3376 of elements that we can fit in a vectype (nunits), we have to generate
3377 more than one vector stmt - i.e - we need to "unroll" the
3378 vector stmt by a factor VF/nunits. In doing so, we record a pointer
3379 from one copy of the vector stmt to the next, in the field
3380 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
3381 stages to find the correct vector defs to be used when vectorizing
3382 stmts that use the defs of the current stmt. The example below illustrates
3383 the vectorization process when VF=16 and nunits=4 (i.e - we need to create
3384 4 vectorized stmts):
3385
3386 before vectorization:
3387 RELATED_STMT VEC_STMT
3388 S1: x = memref - -
3389 S2: z = x + 1 - -
3390
3391 step 1: vectorize stmt S1:
3392 We first create the vector stmt VS1_0, and, as usual, record a
3393 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
3394 Next, we create the vector stmt VS1_1, and record a pointer to
3395 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
3396 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
3397 stmts and pointers:
3398 RELATED_STMT VEC_STMT
3399 VS1_0: vx0 = memref0 VS1_1 -
3400 VS1_1: vx1 = memref1 VS1_2 -
3401 VS1_2: vx2 = memref2 VS1_3 -
3402 VS1_3: vx3 = memref3 - -
3403 S1: x = load - VS1_0
3404 S2: z = x + 1 - -
3405
3406 See in documentation in vect_get_vec_def_for_stmt_copy for how the
3407 information we recorded in RELATED_STMT field is used to vectorize
3408 stmt S2. */
3409
3410 /* In case of interleaving (non-unit strided access):
3411
3412 S1: x2 = &base + 2
3413 S2: x0 = &base
3414 S3: x1 = &base + 1
3415 S4: x3 = &base + 3
3416
3417 Vectorized loads are created in the order of memory accesses
3418 starting from the access of the first stmt of the chain:
3419
3420 VS1: vx0 = &base
3421 VS2: vx1 = &base + vec_size*1
3422 VS3: vx3 = &base + vec_size*2
3423 VS4: vx4 = &base + vec_size*3
3424
3425 Then permutation statements are generated:
3426
3427 VS5: vx5 = VEC_EXTRACT_EVEN_EXPR < vx0, vx1 >
3428 VS6: vx6 = VEC_EXTRACT_ODD_EXPR < vx0, vx1 >
3429 ...
3430
3431 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
3432 (the order of the data-refs in the output of vect_permute_load_chain
3433 corresponds to the order of scalar stmts in the interleaving chain - see
3434 the documentation of vect_permute_load_chain()).
3435 The generation of permutation stmts and recording them in
3436 STMT_VINFO_VEC_STMT is done in vect_transform_strided_load().
3437
3438 In case of both multiple types and interleaving, the vector loads and
3439 permutation stmts above are created for every copy. The result vector stmts
3440 are put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
3441 STMT_VINFO_RELATED_STMT for the next copies. */
3442
3443 /* If the data reference is aligned (dr_aligned) or potentially unaligned
3444 on a target that supports unaligned accesses (dr_unaligned_supported)
3445 we generate the following code:
3446 p = initial_addr;
3447 indx = 0;
3448 loop {
3449 p = p + indx * vectype_size;
3450 vec_dest = *(p);
3451 indx = indx + 1;
3452 }
3453
3454 Otherwise, the data reference is potentially unaligned on a target that
3455 does not support unaligned accesses (dr_explicit_realign_optimized) -
3456 then generate the following code, in which the data in each iteration is
3457 obtained by two vector loads, one from the previous iteration, and one
3458 from the current iteration:
3459 p1 = initial_addr;
3460 msq_init = *(floor(p1))
3461 p2 = initial_addr + VS - 1;
3462 realignment_token = call target_builtin;
3463 indx = 0;
3464 loop {
3465 p2 = p2 + indx * vectype_size
3466 lsq = *(floor(p2))
3467 vec_dest = realign_load (msq, lsq, realignment_token)
3468 indx = indx + 1;
3469 msq = lsq;
3470 } */
3471
3472 /* If the misalignment remains the same throughout the execution of the
3473 loop, we can create the init_addr and permutation mask at the loop
3474 preheader. Otherwise, it needs to be created inside the loop.
3475 This can only occur when vectorizing memory accesses in the inner-loop
3476 nested within an outer-loop that is being vectorized. */
3477
3478 if (loop && nested_in_vect_loop_p (loop, stmt)
3479 && (TREE_INT_CST_LOW (DR_STEP (dr))
3480 % GET_MODE_SIZE (TYPE_MODE (vectype)) != 0))
3481 {
3482 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
3483 compute_in_loop = true;
3484 }
3485
3486 if ((alignment_support_scheme == dr_explicit_realign_optimized
3487 || alignment_support_scheme == dr_explicit_realign)
3488 && !compute_in_loop)
3489 {
3490 msq = vect_setup_realignment (first_stmt, gsi, &realignment_token,
3491 alignment_support_scheme, NULL_TREE,
3492 &at_loop);
3493 if (alignment_support_scheme == dr_explicit_realign_optimized)
3494 {
3495 phi = SSA_NAME_DEF_STMT (msq);
3496 offset = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
3497 }
3498 }
3499 else
3500 at_loop = loop;
3501
3502 prev_stmt_info = NULL;
3503 for (j = 0; j < ncopies; j++)
3504 {
3505 /* 1. Create the vector pointer update chain. */
3506 if (j == 0)
3507 dataref_ptr = vect_create_data_ref_ptr (first_stmt,
3508 at_loop, offset,
3509 &dummy, &ptr_incr, false,
3510 &inv_p);
3511 else
3512 dataref_ptr =
3513 bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt, NULL_TREE);
3514
3515 for (i = 0; i < vec_num; i++)
3516 {
3517 if (i > 0)
3518 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
3519 NULL_TREE);
3520
3521 /* 2. Create the vector-load in the loop. */
3522 switch (alignment_support_scheme)
3523 {
3524 case dr_aligned:
3525 gcc_assert (aligned_access_p (first_dr));
3526 data_ref = build_fold_indirect_ref (dataref_ptr);
3527 break;
3528 case dr_unaligned_supported:
3529 {
3530 int mis = DR_MISALIGNMENT (first_dr);
3531 tree tmis = (mis == -1 ? size_zero_node : size_int (mis));
3532
3533 tmis = size_binop (MULT_EXPR, tmis, size_int(BITS_PER_UNIT));
3534 data_ref =
3535 build2 (MISALIGNED_INDIRECT_REF, vectype, dataref_ptr, tmis);
3536 break;
3537 }
3538 case dr_explicit_realign:
3539 {
3540 tree ptr, bump;
3541 tree vs_minus_1 = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
3542
3543 if (compute_in_loop)
3544 msq = vect_setup_realignment (first_stmt, gsi,
3545 &realignment_token,
3546 dr_explicit_realign,
3547 dataref_ptr, NULL);
3548
3549 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, dataref_ptr);
3550 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3551 new_stmt = gimple_build_assign (vec_dest, data_ref);
3552 new_temp = make_ssa_name (vec_dest, new_stmt);
3553 gimple_assign_set_lhs (new_stmt, new_temp);
3554 gimple_set_vdef (new_stmt, gimple_vdef (stmt));
3555 gimple_set_vuse (new_stmt, gimple_vuse (stmt));
3556 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3557 msq = new_temp;
3558
3559 bump = size_binop (MULT_EXPR, vs_minus_1,
3560 TYPE_SIZE_UNIT (scalar_type));
3561 ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump);
3562 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, ptr);
3563 break;
3564 }
3565 case dr_explicit_realign_optimized:
3566 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, dataref_ptr);
3567 break;
3568 default:
3569 gcc_unreachable ();
3570 }
3571 /* If accesses through a pointer to vectype do not alias the original
3572 memory reference we have a problem. This should never happen. */
3573 gcc_assert (alias_sets_conflict_p (get_alias_set (data_ref),
3574 get_alias_set (gimple_assign_rhs1 (stmt))));
3575 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3576 new_stmt = gimple_build_assign (vec_dest, data_ref);
3577 new_temp = make_ssa_name (vec_dest, new_stmt);
3578 gimple_assign_set_lhs (new_stmt, new_temp);
3579 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3580 mark_symbols_for_renaming (new_stmt);
3581
3582 /* 3. Handle explicit realignment if necessary/supported. Create in
3583 loop: vec_dest = realign_load (msq, lsq, realignment_token) */
3584 if (alignment_support_scheme == dr_explicit_realign_optimized
3585 || alignment_support_scheme == dr_explicit_realign)
3586 {
3587 tree tmp;
3588
3589 lsq = gimple_assign_lhs (new_stmt);
3590 if (!realignment_token)
3591 realignment_token = dataref_ptr;
3592 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3593 tmp = build3 (REALIGN_LOAD_EXPR, vectype, msq, lsq,
3594 realignment_token);
3595 new_stmt = gimple_build_assign (vec_dest, tmp);
3596 new_temp = make_ssa_name (vec_dest, new_stmt);
3597 gimple_assign_set_lhs (new_stmt, new_temp);
3598 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3599
3600 if (alignment_support_scheme == dr_explicit_realign_optimized)
3601 {
3602 gcc_assert (phi);
3603 if (i == vec_num - 1 && j == ncopies - 1)
3604 add_phi_arg (phi, lsq, loop_latch_edge (containing_loop));
3605 msq = lsq;
3606 }
3607 }
3608
3609 /* 4. Handle invariant-load. */
3610 if (inv_p && !bb_vinfo)
3611 {
3612 gcc_assert (!strided_load);
3613 gcc_assert (nested_in_vect_loop_p (loop, stmt));
3614 if (j == 0)
3615 {
3616 int k;
3617 tree t = NULL_TREE;
3618 tree vec_inv, bitpos, bitsize = TYPE_SIZE (scalar_type);
3619
3620 /* CHECKME: bitpos depends on endianess? */
3621 bitpos = bitsize_zero_node;
3622 vec_inv = build3 (BIT_FIELD_REF, scalar_type, new_temp,
3623 bitsize, bitpos);
3624 vec_dest =
3625 vect_create_destination_var (scalar_dest, NULL_TREE);
3626 new_stmt = gimple_build_assign (vec_dest, vec_inv);
3627 new_temp = make_ssa_name (vec_dest, new_stmt);
3628 gimple_assign_set_lhs (new_stmt, new_temp);
3629 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3630
3631 for (k = nunits - 1; k >= 0; --k)
3632 t = tree_cons (NULL_TREE, new_temp, t);
3633 /* FIXME: use build_constructor directly. */
3634 vec_inv = build_constructor_from_list (vectype, t);
3635 new_temp = vect_init_vector (stmt, vec_inv, vectype, gsi);
3636 new_stmt = SSA_NAME_DEF_STMT (new_temp);
3637 }
3638 else
3639 gcc_unreachable (); /* FORNOW. */
3640 }
3641
3642 /* Collect vector loads and later create their permutation in
3643 vect_transform_strided_load (). */
3644 if (strided_load || slp_perm)
3645 VEC_quick_push (tree, dr_chain, new_temp);
3646
3647 /* Store vector loads in the corresponding SLP_NODE. */
3648 if (slp && !slp_perm)
3649 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
3650 }
3651
3652 if (slp && !slp_perm)
3653 continue;
3654
3655 if (slp_perm)
3656 {
3657 if (!vect_transform_slp_perm_load (stmt, dr_chain, gsi, vf,
3658 slp_node_instance, false))
3659 {
3660 VEC_free (tree, heap, dr_chain);
3661 return false;
3662 }
3663 }
3664 else
3665 {
3666 if (strided_load)
3667 {
3668 if (!vect_transform_strided_load (stmt, dr_chain, group_size, gsi))
3669 return false;
3670
3671 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
3672 VEC_free (tree, heap, dr_chain);
3673 dr_chain = VEC_alloc (tree, heap, group_size);
3674 }
3675 else
3676 {
3677 if (j == 0)
3678 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3679 else
3680 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3681 prev_stmt_info = vinfo_for_stmt (new_stmt);
3682 }
3683 }
3684 }
3685
3686 if (dr_chain)
3687 VEC_free (tree, heap, dr_chain);
3688
3689 return true;
3690 }
3691
3692 /* Function vect_is_simple_cond.
3693
3694 Input:
3695 LOOP - the loop that is being vectorized.
3696 COND - Condition that is checked for simple use.
3697
3698 Returns whether a COND can be vectorized. Checks whether
3699 condition operands are supportable using vec_is_simple_use. */
3700
3701 static bool
3702 vect_is_simple_cond (tree cond, loop_vec_info loop_vinfo)
3703 {
3704 tree lhs, rhs;
3705 tree def;
3706 enum vect_def_type dt;
3707
3708 if (!COMPARISON_CLASS_P (cond))
3709 return false;
3710
3711 lhs = TREE_OPERAND (cond, 0);
3712 rhs = TREE_OPERAND (cond, 1);
3713
3714 if (TREE_CODE (lhs) == SSA_NAME)
3715 {
3716 gimple lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
3717 if (!vect_is_simple_use (lhs, loop_vinfo, NULL, &lhs_def_stmt, &def,
3718 &dt))
3719 return false;
3720 }
3721 else if (TREE_CODE (lhs) != INTEGER_CST && TREE_CODE (lhs) != REAL_CST
3722 && TREE_CODE (lhs) != FIXED_CST)
3723 return false;
3724
3725 if (TREE_CODE (rhs) == SSA_NAME)
3726 {
3727 gimple rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
3728 if (!vect_is_simple_use (rhs, loop_vinfo, NULL, &rhs_def_stmt, &def,
3729 &dt))
3730 return false;
3731 }
3732 else if (TREE_CODE (rhs) != INTEGER_CST && TREE_CODE (rhs) != REAL_CST
3733 && TREE_CODE (rhs) != FIXED_CST)
3734 return false;
3735
3736 return true;
3737 }
3738
3739 /* vectorizable_condition.
3740
3741 Check if STMT is conditional modify expression that can be vectorized.
3742 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3743 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
3744 at BSI.
3745
3746 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3747
3748 static bool
3749 vectorizable_condition (gimple stmt, gimple_stmt_iterator *gsi,
3750 gimple *vec_stmt)
3751 {
3752 tree scalar_dest = NULL_TREE;
3753 tree vec_dest = NULL_TREE;
3754 tree op = NULL_TREE;
3755 tree cond_expr, then_clause, else_clause;
3756 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3757 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3758 tree vec_cond_lhs, vec_cond_rhs, vec_then_clause, vec_else_clause;
3759 tree vec_compare, vec_cond_expr;
3760 tree new_temp;
3761 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3762 enum machine_mode vec_mode;
3763 tree def;
3764 enum vect_def_type dt;
3765 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
3766 int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
3767 enum tree_code code;
3768
3769 /* FORNOW: unsupported in basic block SLP. */
3770 gcc_assert (loop_vinfo);
3771
3772 gcc_assert (ncopies >= 1);
3773 if (ncopies > 1)
3774 return false; /* FORNOW */
3775
3776 if (!STMT_VINFO_RELEVANT_P (stmt_info))
3777 return false;
3778
3779 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
3780 return false;
3781
3782 /* FORNOW: SLP not supported. */
3783 if (STMT_SLP_TYPE (stmt_info))
3784 return false;
3785
3786 /* FORNOW: not yet supported. */
3787 if (STMT_VINFO_LIVE_P (stmt_info))
3788 {
3789 if (vect_print_dump_info (REPORT_DETAILS))
3790 fprintf (vect_dump, "value used after loop.");
3791 return false;
3792 }
3793
3794 /* Is vectorizable conditional operation? */
3795 if (!is_gimple_assign (stmt))
3796 return false;
3797
3798 code = gimple_assign_rhs_code (stmt);
3799
3800 if (code != COND_EXPR)
3801 return false;
3802
3803 gcc_assert (gimple_assign_single_p (stmt));
3804 op = gimple_assign_rhs1 (stmt);
3805 cond_expr = TREE_OPERAND (op, 0);
3806 then_clause = TREE_OPERAND (op, 1);
3807 else_clause = TREE_OPERAND (op, 2);
3808
3809 if (!vect_is_simple_cond (cond_expr, loop_vinfo))
3810 return false;
3811
3812 /* We do not handle two different vector types for the condition
3813 and the values. */
3814 if (TREE_TYPE (TREE_OPERAND (cond_expr, 0)) != TREE_TYPE (vectype))
3815 return false;
3816
3817 if (TREE_CODE (then_clause) == SSA_NAME)
3818 {
3819 gimple then_def_stmt = SSA_NAME_DEF_STMT (then_clause);
3820 if (!vect_is_simple_use (then_clause, loop_vinfo, NULL,
3821 &then_def_stmt, &def, &dt))
3822 return false;
3823 }
3824 else if (TREE_CODE (then_clause) != INTEGER_CST
3825 && TREE_CODE (then_clause) != REAL_CST
3826 && TREE_CODE (then_clause) != FIXED_CST)
3827 return false;
3828
3829 if (TREE_CODE (else_clause) == SSA_NAME)
3830 {
3831 gimple else_def_stmt = SSA_NAME_DEF_STMT (else_clause);
3832 if (!vect_is_simple_use (else_clause, loop_vinfo, NULL,
3833 &else_def_stmt, &def, &dt))
3834 return false;
3835 }
3836 else if (TREE_CODE (else_clause) != INTEGER_CST
3837 && TREE_CODE (else_clause) != REAL_CST
3838 && TREE_CODE (else_clause) != FIXED_CST)
3839 return false;
3840
3841
3842 vec_mode = TYPE_MODE (vectype);
3843
3844 if (!vec_stmt)
3845 {
3846 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
3847 return expand_vec_cond_expr_p (op, vec_mode);
3848 }
3849
3850 /* Transform */
3851
3852 /* Handle def. */
3853 scalar_dest = gimple_assign_lhs (stmt);
3854 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3855
3856 /* Handle cond expr. */
3857 vec_cond_lhs =
3858 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0), stmt, NULL);
3859 vec_cond_rhs =
3860 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1), stmt, NULL);
3861 vec_then_clause = vect_get_vec_def_for_operand (then_clause, stmt, NULL);
3862 vec_else_clause = vect_get_vec_def_for_operand (else_clause, stmt, NULL);
3863
3864 /* Arguments are ready. Create the new vector stmt. */
3865 vec_compare = build2 (TREE_CODE (cond_expr), vectype,
3866 vec_cond_lhs, vec_cond_rhs);
3867 vec_cond_expr = build3 (VEC_COND_EXPR, vectype,
3868 vec_compare, vec_then_clause, vec_else_clause);
3869
3870 *vec_stmt = gimple_build_assign (vec_dest, vec_cond_expr);
3871 new_temp = make_ssa_name (vec_dest, *vec_stmt);
3872 gimple_assign_set_lhs (*vec_stmt, new_temp);
3873 vect_finish_stmt_generation (stmt, *vec_stmt, gsi);
3874
3875 return true;
3876 }
3877
3878
3879 /* Make sure the statement is vectorizable. */
3880
3881 bool
3882 vect_analyze_stmt (gimple stmt, bool *need_to_vectorize, slp_tree node)
3883 {
3884 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3885 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3886 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
3887 bool ok;
3888 HOST_WIDE_INT dummy;
3889 tree scalar_type, vectype;
3890
3891 if (vect_print_dump_info (REPORT_DETAILS))
3892 {
3893 fprintf (vect_dump, "==> examining statement: ");
3894 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
3895 }
3896
3897 /* Skip stmts that do not need to be vectorized. In loops this is expected
3898 to include:
3899 - the COND_EXPR which is the loop exit condition
3900 - any LABEL_EXPRs in the loop
3901 - computations that are used only for array indexing or loop control.
3902 In basic blocks we only analyze statements that are a part of some SLP
3903 instance, therefore, all the statements are relevant. */
3904
3905 if (!STMT_VINFO_RELEVANT_P (stmt_info)
3906 && !STMT_VINFO_LIVE_P (stmt_info))
3907 {
3908 if (vect_print_dump_info (REPORT_DETAILS))
3909 fprintf (vect_dump, "irrelevant.");
3910
3911 return true;
3912 }
3913
3914 switch (STMT_VINFO_DEF_TYPE (stmt_info))
3915 {
3916 case vect_internal_def:
3917 break;
3918
3919 case vect_reduction_def:
3920 gcc_assert (!bb_vinfo && (relevance == vect_used_in_outer
3921 || relevance == vect_used_in_outer_by_reduction
3922 || relevance == vect_unused_in_scope));
3923 break;
3924
3925 case vect_induction_def:
3926 case vect_constant_def:
3927 case vect_external_def:
3928 case vect_unknown_def_type:
3929 default:
3930 gcc_unreachable ();
3931 }
3932
3933 if (bb_vinfo)
3934 {
3935 gcc_assert (PURE_SLP_STMT (stmt_info));
3936
3937 scalar_type = vect_get_smallest_scalar_type (stmt, &dummy, &dummy);
3938 if (vect_print_dump_info (REPORT_DETAILS))
3939 {
3940 fprintf (vect_dump, "get vectype for scalar type: ");
3941 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
3942 }
3943
3944 vectype = get_vectype_for_scalar_type (scalar_type);
3945 if (!vectype)
3946 {
3947 if (vect_print_dump_info (REPORT_DETAILS))
3948 {
3949 fprintf (vect_dump, "not SLPed: unsupported data-type ");
3950 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
3951 }
3952 return false;
3953 }
3954
3955 if (vect_print_dump_info (REPORT_DETAILS))
3956 {
3957 fprintf (vect_dump, "vectype: ");
3958 print_generic_expr (vect_dump, vectype, TDF_SLIM);
3959 }
3960
3961 STMT_VINFO_VECTYPE (stmt_info) = vectype;
3962 }
3963
3964 if (STMT_VINFO_RELEVANT_P (stmt_info))
3965 {
3966 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))));
3967 gcc_assert (STMT_VINFO_VECTYPE (stmt_info));
3968 *need_to_vectorize = true;
3969 }
3970
3971 ok = true;
3972 if (!bb_vinfo
3973 && (STMT_VINFO_RELEVANT_P (stmt_info)
3974 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
3975 ok = (vectorizable_type_promotion (stmt, NULL, NULL, NULL)
3976 || vectorizable_type_demotion (stmt, NULL, NULL, NULL)
3977 || vectorizable_conversion (stmt, NULL, NULL, NULL)
3978 || vectorizable_operation (stmt, NULL, NULL, NULL)
3979 || vectorizable_assignment (stmt, NULL, NULL, NULL)
3980 || vectorizable_load (stmt, NULL, NULL, NULL, NULL)
3981 || vectorizable_call (stmt, NULL, NULL)
3982 || vectorizable_store (stmt, NULL, NULL, NULL)
3983 || vectorizable_condition (stmt, NULL, NULL)
3984 || vectorizable_reduction (stmt, NULL, NULL));
3985 else
3986 {
3987 if (bb_vinfo)
3988 ok = (vectorizable_operation (stmt, NULL, NULL, node)
3989 || vectorizable_assignment (stmt, NULL, NULL, node)
3990 || vectorizable_load (stmt, NULL, NULL, node, NULL)
3991 || vectorizable_store (stmt, NULL, NULL, node));
3992 }
3993
3994 if (!ok)
3995 {
3996 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
3997 {
3998 fprintf (vect_dump, "not vectorized: relevant stmt not ");
3999 fprintf (vect_dump, "supported: ");
4000 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
4001 }
4002
4003 return false;
4004 }
4005
4006 if (bb_vinfo)
4007 return true;
4008
4009 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
4010 need extra handling, except for vectorizable reductions. */
4011 if (STMT_VINFO_LIVE_P (stmt_info)
4012 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
4013 ok = vectorizable_live_operation (stmt, NULL, NULL);
4014
4015 if (!ok)
4016 {
4017 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
4018 {
4019 fprintf (vect_dump, "not vectorized: live stmt not ");
4020 fprintf (vect_dump, "supported: ");
4021 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
4022 }
4023
4024 return false;
4025 }
4026
4027 if (!PURE_SLP_STMT (stmt_info))
4028 {
4029 /* Groups of strided accesses whose size is not a power of 2 are not
4030 vectorizable yet using loop-vectorization. Therefore, if this stmt
4031 feeds non-SLP-able stmts (i.e., this stmt has to be both SLPed and
4032 loop-based vectorized), the loop cannot be vectorized. */
4033 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
4034 && exact_log2 (DR_GROUP_SIZE (vinfo_for_stmt (
4035 DR_GROUP_FIRST_DR (stmt_info)))) == -1)
4036 {
4037 if (vect_print_dump_info (REPORT_DETAILS))
4038 {
4039 fprintf (vect_dump, "not vectorized: the size of group "
4040 "of strided accesses is not a power of 2");
4041 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
4042 }
4043
4044 return false;
4045 }
4046 }
4047
4048 return true;
4049 }
4050
4051
4052 /* Function vect_transform_stmt.
4053
4054 Create a vectorized stmt to replace STMT, and insert it at BSI. */
4055
4056 bool
4057 vect_transform_stmt (gimple stmt, gimple_stmt_iterator *gsi,
4058 bool *strided_store, slp_tree slp_node,
4059 slp_instance slp_node_instance)
4060 {
4061 bool is_store = false;
4062 gimple vec_stmt = NULL;
4063 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4064 gimple orig_stmt_in_pattern;
4065 bool done;
4066
4067 switch (STMT_VINFO_TYPE (stmt_info))
4068 {
4069 case type_demotion_vec_info_type:
4070 done = vectorizable_type_demotion (stmt, gsi, &vec_stmt, slp_node);
4071 gcc_assert (done);
4072 break;
4073
4074 case type_promotion_vec_info_type:
4075 done = vectorizable_type_promotion (stmt, gsi, &vec_stmt, slp_node);
4076 gcc_assert (done);
4077 break;
4078
4079 case type_conversion_vec_info_type:
4080 done = vectorizable_conversion (stmt, gsi, &vec_stmt, slp_node);
4081 gcc_assert (done);
4082 break;
4083
4084 case induc_vec_info_type:
4085 gcc_assert (!slp_node);
4086 done = vectorizable_induction (stmt, gsi, &vec_stmt);
4087 gcc_assert (done);
4088 break;
4089
4090 case op_vec_info_type:
4091 done = vectorizable_operation (stmt, gsi, &vec_stmt, slp_node);
4092 gcc_assert (done);
4093 break;
4094
4095 case assignment_vec_info_type:
4096 done = vectorizable_assignment (stmt, gsi, &vec_stmt, slp_node);
4097 gcc_assert (done);
4098 break;
4099
4100 case load_vec_info_type:
4101 done = vectorizable_load (stmt, gsi, &vec_stmt, slp_node,
4102 slp_node_instance);
4103 gcc_assert (done);
4104 break;
4105
4106 case store_vec_info_type:
4107 done = vectorizable_store (stmt, gsi, &vec_stmt, slp_node);
4108 gcc_assert (done);
4109 if (STMT_VINFO_STRIDED_ACCESS (stmt_info) && !slp_node)
4110 {
4111 /* In case of interleaving, the whole chain is vectorized when the
4112 last store in the chain is reached. Store stmts before the last
4113 one are skipped, and there vec_stmt_info shouldn't be freed
4114 meanwhile. */
4115 *strided_store = true;
4116 if (STMT_VINFO_VEC_STMT (stmt_info))
4117 is_store = true;
4118 }
4119 else
4120 is_store = true;
4121 break;
4122
4123 case condition_vec_info_type:
4124 gcc_assert (!slp_node);
4125 done = vectorizable_condition (stmt, gsi, &vec_stmt);
4126 gcc_assert (done);
4127 break;
4128
4129 case call_vec_info_type:
4130 gcc_assert (!slp_node);
4131 done = vectorizable_call (stmt, gsi, &vec_stmt);
4132 break;
4133
4134 case reduc_vec_info_type:
4135 gcc_assert (!slp_node);
4136 done = vectorizable_reduction (stmt, gsi, &vec_stmt);
4137 gcc_assert (done);
4138 break;
4139
4140 default:
4141 if (!STMT_VINFO_LIVE_P (stmt_info))
4142 {
4143 if (vect_print_dump_info (REPORT_DETAILS))
4144 fprintf (vect_dump, "stmt not supported.");
4145 gcc_unreachable ();
4146 }
4147 }
4148
4149 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
4150 is being vectorized, but outside the immediately enclosing loop. */
4151 if (vec_stmt
4152 && STMT_VINFO_LOOP_VINFO (stmt_info)
4153 && nested_in_vect_loop_p (LOOP_VINFO_LOOP (
4154 STMT_VINFO_LOOP_VINFO (stmt_info)), stmt)
4155 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
4156 && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
4157 || STMT_VINFO_RELEVANT (stmt_info) ==
4158 vect_used_in_outer_by_reduction))
4159 {
4160 struct loop *innerloop = LOOP_VINFO_LOOP (
4161 STMT_VINFO_LOOP_VINFO (stmt_info))->inner;
4162 imm_use_iterator imm_iter;
4163 use_operand_p use_p;
4164 tree scalar_dest;
4165 gimple exit_phi;
4166
4167 if (vect_print_dump_info (REPORT_DETAILS))
4168 fprintf (vect_dump, "Record the vdef for outer-loop vectorization.");
4169
4170 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
4171 (to be used when vectorizing outer-loop stmts that use the DEF of
4172 STMT). */
4173 if (gimple_code (stmt) == GIMPLE_PHI)
4174 scalar_dest = PHI_RESULT (stmt);
4175 else
4176 scalar_dest = gimple_assign_lhs (stmt);
4177
4178 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
4179 {
4180 if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
4181 {
4182 exit_phi = USE_STMT (use_p);
4183 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi)) = vec_stmt;
4184 }
4185 }
4186 }
4187
4188 /* Handle stmts whose DEF is used outside the loop-nest that is
4189 being vectorized. */
4190 if (STMT_VINFO_LIVE_P (stmt_info)
4191 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
4192 {
4193 done = vectorizable_live_operation (stmt, gsi, &vec_stmt);
4194 gcc_assert (done);
4195 }
4196
4197 if (vec_stmt)
4198 {
4199 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
4200 orig_stmt_in_pattern = STMT_VINFO_RELATED_STMT (stmt_info);
4201 if (orig_stmt_in_pattern)
4202 {
4203 stmt_vec_info stmt_vinfo = vinfo_for_stmt (orig_stmt_in_pattern);
4204 /* STMT was inserted by the vectorizer to replace a computation idiom.
4205 ORIG_STMT_IN_PATTERN is a stmt in the original sequence that
4206 computed this idiom. We need to record a pointer to VEC_STMT in
4207 the stmt_info of ORIG_STMT_IN_PATTERN. See more details in the
4208 documentation of vect_pattern_recog. */
4209 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
4210 {
4211 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_vinfo) == stmt);
4212 STMT_VINFO_VEC_STMT (stmt_vinfo) = vec_stmt;
4213 }
4214 }
4215 }
4216
4217 return is_store;
4218 }
4219
4220
4221 /* Remove a group of stores (for SLP or interleaving), free their
4222 stmt_vec_info. */
4223
4224 void
4225 vect_remove_stores (gimple first_stmt)
4226 {
4227 gimple next = first_stmt;
4228 gimple tmp;
4229 gimple_stmt_iterator next_si;
4230
4231 while (next)
4232 {
4233 /* Free the attached stmt_vec_info and remove the stmt. */
4234 next_si = gsi_for_stmt (next);
4235 gsi_remove (&next_si, true);
4236 tmp = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
4237 free_stmt_vec_info (next);
4238 next = tmp;
4239 }
4240 }
4241
4242
4243 /* Function new_stmt_vec_info.
4244
4245 Create and initialize a new stmt_vec_info struct for STMT. */
4246
4247 stmt_vec_info
4248 new_stmt_vec_info (gimple stmt, loop_vec_info loop_vinfo,
4249 bb_vec_info bb_vinfo)
4250 {
4251 stmt_vec_info res;
4252 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
4253
4254 STMT_VINFO_TYPE (res) = undef_vec_info_type;
4255 STMT_VINFO_STMT (res) = stmt;
4256 STMT_VINFO_LOOP_VINFO (res) = loop_vinfo;
4257 STMT_VINFO_BB_VINFO (res) = bb_vinfo;
4258 STMT_VINFO_RELEVANT (res) = vect_unused_in_scope;
4259 STMT_VINFO_LIVE_P (res) = false;
4260 STMT_VINFO_VECTYPE (res) = NULL;
4261 STMT_VINFO_VEC_STMT (res) = NULL;
4262 STMT_VINFO_IN_PATTERN_P (res) = false;
4263 STMT_VINFO_RELATED_STMT (res) = NULL;
4264 STMT_VINFO_DATA_REF (res) = NULL;
4265
4266 STMT_VINFO_DR_BASE_ADDRESS (res) = NULL;
4267 STMT_VINFO_DR_OFFSET (res) = NULL;
4268 STMT_VINFO_DR_INIT (res) = NULL;
4269 STMT_VINFO_DR_STEP (res) = NULL;
4270 STMT_VINFO_DR_ALIGNED_TO (res) = NULL;
4271
4272 if (gimple_code (stmt) == GIMPLE_PHI
4273 && is_loop_header_bb_p (gimple_bb (stmt)))
4274 STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
4275 else
4276 STMT_VINFO_DEF_TYPE (res) = vect_internal_def;
4277
4278 STMT_VINFO_SAME_ALIGN_REFS (res) = VEC_alloc (dr_p, heap, 5);
4279 STMT_VINFO_INSIDE_OF_LOOP_COST (res) = 0;
4280 STMT_VINFO_OUTSIDE_OF_LOOP_COST (res) = 0;
4281 STMT_SLP_TYPE (res) = loop_vect;
4282 DR_GROUP_FIRST_DR (res) = NULL;
4283 DR_GROUP_NEXT_DR (res) = NULL;
4284 DR_GROUP_SIZE (res) = 0;
4285 DR_GROUP_STORE_COUNT (res) = 0;
4286 DR_GROUP_GAP (res) = 0;
4287 DR_GROUP_SAME_DR_STMT (res) = NULL;
4288 DR_GROUP_READ_WRITE_DEPENDENCE (res) = false;
4289
4290 return res;
4291 }
4292
4293
4294 /* Create a hash table for stmt_vec_info. */
4295
4296 void
4297 init_stmt_vec_info_vec (void)
4298 {
4299 gcc_assert (!stmt_vec_info_vec);
4300 stmt_vec_info_vec = VEC_alloc (vec_void_p, heap, 50);
4301 }
4302
4303
4304 /* Free hash table for stmt_vec_info. */
4305
4306 void
4307 free_stmt_vec_info_vec (void)
4308 {
4309 gcc_assert (stmt_vec_info_vec);
4310 VEC_free (vec_void_p, heap, stmt_vec_info_vec);
4311 }
4312
4313
4314 /* Free stmt vectorization related info. */
4315
4316 void
4317 free_stmt_vec_info (gimple stmt)
4318 {
4319 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4320
4321 if (!stmt_info)
4322 return;
4323
4324 VEC_free (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmt_info));
4325 set_vinfo_for_stmt (stmt, NULL);
4326 free (stmt_info);
4327 }
4328
4329
4330 /* Function get_vectype_for_scalar_type.
4331
4332 Returns the vector type corresponding to SCALAR_TYPE as supported
4333 by the target. */
4334
4335 tree
4336 get_vectype_for_scalar_type (tree scalar_type)
4337 {
4338 enum machine_mode inner_mode = TYPE_MODE (scalar_type);
4339 int nbytes = GET_MODE_SIZE (inner_mode);
4340 int nunits;
4341 tree vectype;
4342
4343 if (nbytes == 0 || nbytes >= UNITS_PER_SIMD_WORD (inner_mode))
4344 return NULL_TREE;
4345
4346 /* FORNOW: Only a single vector size per mode (UNITS_PER_SIMD_WORD)
4347 is expected. */
4348 nunits = UNITS_PER_SIMD_WORD (inner_mode) / nbytes;
4349
4350 vectype = build_vector_type (scalar_type, nunits);
4351 if (vect_print_dump_info (REPORT_DETAILS))
4352 {
4353 fprintf (vect_dump, "get vectype with %d units of type ", nunits);
4354 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
4355 }
4356
4357 if (!vectype)
4358 return NULL_TREE;
4359
4360 if (vect_print_dump_info (REPORT_DETAILS))
4361 {
4362 fprintf (vect_dump, "vectype: ");
4363 print_generic_expr (vect_dump, vectype, TDF_SLIM);
4364 }
4365
4366 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
4367 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
4368 {
4369 if (vect_print_dump_info (REPORT_DETAILS))
4370 fprintf (vect_dump, "mode not supported by target.");
4371 return NULL_TREE;
4372 }
4373
4374 return vectype;
4375 }
4376
4377 /* Function vect_is_simple_use.
4378
4379 Input:
4380 LOOP_VINFO - the vect info of the loop that is being vectorized.
4381 BB_VINFO - the vect info of the basic block that is being vectorized.
4382 OPERAND - operand of a stmt in the loop or bb.
4383 DEF - the defining stmt in case OPERAND is an SSA_NAME.
4384
4385 Returns whether a stmt with OPERAND can be vectorized.
4386 For loops, supportable operands are constants, loop invariants, and operands
4387 that are defined by the current iteration of the loop. Unsupportable
4388 operands are those that are defined by a previous iteration of the loop (as
4389 is the case in reduction/induction computations).
4390 For basic blocks, supportable operands are constants and bb invariants.
4391 For now, operands defined outside the basic block are not supported. */
4392
4393 bool
4394 vect_is_simple_use (tree operand, loop_vec_info loop_vinfo,
4395 bb_vec_info bb_vinfo, gimple *def_stmt,
4396 tree *def, enum vect_def_type *dt)
4397 {
4398 basic_block bb;
4399 stmt_vec_info stmt_vinfo;
4400 struct loop *loop = NULL;
4401
4402 if (loop_vinfo)
4403 loop = LOOP_VINFO_LOOP (loop_vinfo);
4404
4405 *def_stmt = NULL;
4406 *def = NULL_TREE;
4407
4408 if (vect_print_dump_info (REPORT_DETAILS))
4409 {
4410 fprintf (vect_dump, "vect_is_simple_use: operand ");
4411 print_generic_expr (vect_dump, operand, TDF_SLIM);
4412 }
4413
4414 if (TREE_CODE (operand) == INTEGER_CST || TREE_CODE (operand) == REAL_CST)
4415 {
4416 *dt = vect_constant_def;
4417 return true;
4418 }
4419
4420 if (is_gimple_min_invariant (operand))
4421 {
4422 *def = operand;
4423 *dt = vect_external_def;
4424 return true;
4425 }
4426
4427 if (TREE_CODE (operand) == PAREN_EXPR)
4428 {
4429 if (vect_print_dump_info (REPORT_DETAILS))
4430 fprintf (vect_dump, "non-associatable copy.");
4431 operand = TREE_OPERAND (operand, 0);
4432 }
4433
4434 if (TREE_CODE (operand) != SSA_NAME)
4435 {
4436 if (vect_print_dump_info (REPORT_DETAILS))
4437 fprintf (vect_dump, "not ssa-name.");
4438 return false;
4439 }
4440
4441 *def_stmt = SSA_NAME_DEF_STMT (operand);
4442 if (*def_stmt == NULL)
4443 {
4444 if (vect_print_dump_info (REPORT_DETAILS))
4445 fprintf (vect_dump, "no def_stmt.");
4446 return false;
4447 }
4448
4449 if (vect_print_dump_info (REPORT_DETAILS))
4450 {
4451 fprintf (vect_dump, "def_stmt: ");
4452 print_gimple_stmt (vect_dump, *def_stmt, 0, TDF_SLIM);
4453 }
4454
4455 /* Empty stmt is expected only in case of a function argument.
4456 (Otherwise - we expect a phi_node or a GIMPLE_ASSIGN). */
4457 if (gimple_nop_p (*def_stmt))
4458 {
4459 *def = operand;
4460 *dt = vect_external_def;
4461 return true;
4462 }
4463
4464 bb = gimple_bb (*def_stmt);
4465
4466 if ((loop && !flow_bb_inside_loop_p (loop, bb))
4467 || (!loop && bb != BB_VINFO_BB (bb_vinfo))
4468 || (!loop && gimple_code (*def_stmt) == GIMPLE_PHI))
4469 *dt = vect_external_def;
4470 else
4471 {
4472 stmt_vinfo = vinfo_for_stmt (*def_stmt);
4473 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
4474 }
4475
4476 if (*dt == vect_unknown_def_type)
4477 {
4478 if (vect_print_dump_info (REPORT_DETAILS))
4479 fprintf (vect_dump, "Unsupported pattern.");
4480 return false;
4481 }
4482
4483 if (vect_print_dump_info (REPORT_DETAILS))
4484 fprintf (vect_dump, "type of def: %d.",*dt);
4485
4486 switch (gimple_code (*def_stmt))
4487 {
4488 case GIMPLE_PHI:
4489 *def = gimple_phi_result (*def_stmt);
4490 break;
4491
4492 case GIMPLE_ASSIGN:
4493 *def = gimple_assign_lhs (*def_stmt);
4494 break;
4495
4496 case GIMPLE_CALL:
4497 *def = gimple_call_lhs (*def_stmt);
4498 if (*def != NULL)
4499 break;
4500 /* FALLTHRU */
4501 default:
4502 if (vect_print_dump_info (REPORT_DETAILS))
4503 fprintf (vect_dump, "unsupported defining stmt: ");
4504 return false;
4505 }
4506
4507 return true;
4508 }
4509
4510
4511 /* Function supportable_widening_operation
4512
4513 Check whether an operation represented by the code CODE is a
4514 widening operation that is supported by the target platform in
4515 vector form (i.e., when operating on arguments of type VECTYPE).
4516
4517 Widening operations we currently support are NOP (CONVERT), FLOAT
4518 and WIDEN_MULT. This function checks if these operations are supported
4519 by the target platform either directly (via vector tree-codes), or via
4520 target builtins.
4521
4522 Output:
4523 - CODE1 and CODE2 are codes of vector operations to be used when
4524 vectorizing the operation, if available.
4525 - DECL1 and DECL2 are decls of target builtin functions to be used
4526 when vectorizing the operation, if available. In this case,
4527 CODE1 and CODE2 are CALL_EXPR.
4528 - MULTI_STEP_CVT determines the number of required intermediate steps in
4529 case of multi-step conversion (like char->short->int - in that case
4530 MULTI_STEP_CVT will be 1).
4531 - INTERM_TYPES contains the intermediate type required to perform the
4532 widening operation (short in the above example). */
4533
4534 bool
4535 supportable_widening_operation (enum tree_code code, gimple stmt, tree vectype,
4536 tree *decl1, tree *decl2,
4537 enum tree_code *code1, enum tree_code *code2,
4538 int *multi_step_cvt,
4539 VEC (tree, heap) **interm_types)
4540 {
4541 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4542 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
4543 struct loop *vect_loop = LOOP_VINFO_LOOP (loop_info);
4544 bool ordered_p;
4545 enum machine_mode vec_mode;
4546 enum insn_code icode1, icode2;
4547 optab optab1, optab2;
4548 tree type = gimple_expr_type (stmt);
4549 tree wide_vectype = get_vectype_for_scalar_type (type);
4550 enum tree_code c1, c2;
4551
4552 /* The result of a vectorized widening operation usually requires two vectors
4553 (because the widened results do not fit int one vector). The generated
4554 vector results would normally be expected to be generated in the same
4555 order as in the original scalar computation, i.e. if 8 results are
4556 generated in each vector iteration, they are to be organized as follows:
4557 vect1: [res1,res2,res3,res4], vect2: [res5,res6,res7,res8].
4558
4559 However, in the special case that the result of the widening operation is
4560 used in a reduction computation only, the order doesn't matter (because
4561 when vectorizing a reduction we change the order of the computation).
4562 Some targets can take advantage of this and generate more efficient code.
4563 For example, targets like Altivec, that support widen_mult using a sequence
4564 of {mult_even,mult_odd} generate the following vectors:
4565 vect1: [res1,res3,res5,res7], vect2: [res2,res4,res6,res8].
4566
4567 When vectorizing outer-loops, we execute the inner-loop sequentially
4568 (each vectorized inner-loop iteration contributes to VF outer-loop
4569 iterations in parallel). We therefore don't allow to change the order
4570 of the computation in the inner-loop during outer-loop vectorization. */
4571
4572 if (STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
4573 && !nested_in_vect_loop_p (vect_loop, stmt))
4574 ordered_p = false;
4575 else
4576 ordered_p = true;
4577
4578 if (!ordered_p
4579 && code == WIDEN_MULT_EXPR
4580 && targetm.vectorize.builtin_mul_widen_even
4581 && targetm.vectorize.builtin_mul_widen_even (vectype)
4582 && targetm.vectorize.builtin_mul_widen_odd
4583 && targetm.vectorize.builtin_mul_widen_odd (vectype))
4584 {
4585 if (vect_print_dump_info (REPORT_DETAILS))
4586 fprintf (vect_dump, "Unordered widening operation detected.");
4587
4588 *code1 = *code2 = CALL_EXPR;
4589 *decl1 = targetm.vectorize.builtin_mul_widen_even (vectype);
4590 *decl2 = targetm.vectorize.builtin_mul_widen_odd (vectype);
4591 return true;
4592 }
4593
4594 switch (code)
4595 {
4596 case WIDEN_MULT_EXPR:
4597 if (BYTES_BIG_ENDIAN)
4598 {
4599 c1 = VEC_WIDEN_MULT_HI_EXPR;
4600 c2 = VEC_WIDEN_MULT_LO_EXPR;
4601 }
4602 else
4603 {
4604 c2 = VEC_WIDEN_MULT_HI_EXPR;
4605 c1 = VEC_WIDEN_MULT_LO_EXPR;
4606 }
4607 break;
4608
4609 CASE_CONVERT:
4610 if (BYTES_BIG_ENDIAN)
4611 {
4612 c1 = VEC_UNPACK_HI_EXPR;
4613 c2 = VEC_UNPACK_LO_EXPR;
4614 }
4615 else
4616 {
4617 c2 = VEC_UNPACK_HI_EXPR;
4618 c1 = VEC_UNPACK_LO_EXPR;
4619 }
4620 break;
4621
4622 case FLOAT_EXPR:
4623 if (BYTES_BIG_ENDIAN)
4624 {
4625 c1 = VEC_UNPACK_FLOAT_HI_EXPR;
4626 c2 = VEC_UNPACK_FLOAT_LO_EXPR;
4627 }
4628 else
4629 {
4630 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
4631 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
4632 }
4633 break;
4634
4635 case FIX_TRUNC_EXPR:
4636 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
4637 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
4638 computing the operation. */
4639 return false;
4640
4641 default:
4642 gcc_unreachable ();
4643 }
4644
4645 if (code == FIX_TRUNC_EXPR)
4646 {
4647 /* The signedness is determined from output operand. */
4648 optab1 = optab_for_tree_code (c1, type, optab_default);
4649 optab2 = optab_for_tree_code (c2, type, optab_default);
4650 }
4651 else
4652 {
4653 optab1 = optab_for_tree_code (c1, vectype, optab_default);
4654 optab2 = optab_for_tree_code (c2, vectype, optab_default);
4655 }
4656
4657 if (!optab1 || !optab2)
4658 return false;
4659
4660 vec_mode = TYPE_MODE (vectype);
4661 if ((icode1 = optab_handler (optab1, vec_mode)->insn_code) == CODE_FOR_nothing
4662 || (icode2 = optab_handler (optab2, vec_mode)->insn_code)
4663 == CODE_FOR_nothing)
4664 return false;
4665
4666 /* Check if it's a multi-step conversion that can be done using intermediate
4667 types. */
4668 if (insn_data[icode1].operand[0].mode != TYPE_MODE (wide_vectype)
4669 || insn_data[icode2].operand[0].mode != TYPE_MODE (wide_vectype))
4670 {
4671 int i;
4672 tree prev_type = vectype, intermediate_type;
4673 enum machine_mode intermediate_mode, prev_mode = vec_mode;
4674 optab optab3, optab4;
4675
4676 if (!CONVERT_EXPR_CODE_P (code))
4677 return false;
4678
4679 *code1 = c1;
4680 *code2 = c2;
4681
4682 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
4683 intermediate steps in promotion sequence. We try MAX_INTERM_CVT_STEPS
4684 to get to NARROW_VECTYPE, and fail if we do not. */
4685 *interm_types = VEC_alloc (tree, heap, MAX_INTERM_CVT_STEPS);
4686 for (i = 0; i < 3; i++)
4687 {
4688 intermediate_mode = insn_data[icode1].operand[0].mode;
4689 intermediate_type = lang_hooks.types.type_for_mode (intermediate_mode,
4690 TYPE_UNSIGNED (prev_type));
4691 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
4692 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
4693
4694 if (!optab3 || !optab4
4695 || (icode1 = optab1->handlers[(int) prev_mode].insn_code)
4696 == CODE_FOR_nothing
4697 || insn_data[icode1].operand[0].mode != intermediate_mode
4698 || (icode2 = optab2->handlers[(int) prev_mode].insn_code)
4699 == CODE_FOR_nothing
4700 || insn_data[icode2].operand[0].mode != intermediate_mode
4701 || (icode1 = optab3->handlers[(int) intermediate_mode].insn_code)
4702 == CODE_FOR_nothing
4703 || (icode2 = optab4->handlers[(int) intermediate_mode].insn_code)
4704 == CODE_FOR_nothing)
4705 return false;
4706
4707 VEC_quick_push (tree, *interm_types, intermediate_type);
4708 (*multi_step_cvt)++;
4709
4710 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
4711 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
4712 return true;
4713
4714 prev_type = intermediate_type;
4715 prev_mode = intermediate_mode;
4716 }
4717
4718 return false;
4719 }
4720
4721 *code1 = c1;
4722 *code2 = c2;
4723 return true;
4724 }
4725
4726
4727 /* Function supportable_narrowing_operation
4728
4729 Check whether an operation represented by the code CODE is a
4730 narrowing operation that is supported by the target platform in
4731 vector form (i.e., when operating on arguments of type VECTYPE).
4732
4733 Narrowing operations we currently support are NOP (CONVERT) and
4734 FIX_TRUNC. This function checks if these operations are supported by
4735 the target platform directly via vector tree-codes.
4736
4737 Output:
4738 - CODE1 is the code of a vector operation to be used when
4739 vectorizing the operation, if available.
4740 - MULTI_STEP_CVT determines the number of required intermediate steps in
4741 case of multi-step conversion (like int->short->char - in that case
4742 MULTI_STEP_CVT will be 1).
4743 - INTERM_TYPES contains the intermediate type required to perform the
4744 narrowing operation (short in the above example). */
4745
4746 bool
4747 supportable_narrowing_operation (enum tree_code code,
4748 const_gimple stmt, tree vectype,
4749 enum tree_code *code1, int *multi_step_cvt,
4750 VEC (tree, heap) **interm_types)
4751 {
4752 enum machine_mode vec_mode;
4753 enum insn_code icode1;
4754 optab optab1, interm_optab;
4755 tree type = gimple_expr_type (stmt);
4756 tree narrow_vectype = get_vectype_for_scalar_type (type);
4757 enum tree_code c1;
4758 tree intermediate_type, prev_type;
4759 int i;
4760
4761 switch (code)
4762 {
4763 CASE_CONVERT:
4764 c1 = VEC_PACK_TRUNC_EXPR;
4765 break;
4766
4767 case FIX_TRUNC_EXPR:
4768 c1 = VEC_PACK_FIX_TRUNC_EXPR;
4769 break;
4770
4771 case FLOAT_EXPR:
4772 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
4773 tree code and optabs used for computing the operation. */
4774 return false;
4775
4776 default:
4777 gcc_unreachable ();
4778 }
4779
4780 if (code == FIX_TRUNC_EXPR)
4781 /* The signedness is determined from output operand. */
4782 optab1 = optab_for_tree_code (c1, type, optab_default);
4783 else
4784 optab1 = optab_for_tree_code (c1, vectype, optab_default);
4785
4786 if (!optab1)
4787 return false;
4788
4789 vec_mode = TYPE_MODE (vectype);
4790 if ((icode1 = optab_handler (optab1, vec_mode)->insn_code)
4791 == CODE_FOR_nothing)
4792 return false;
4793
4794 /* Check if it's a multi-step conversion that can be done using intermediate
4795 types. */
4796 if (insn_data[icode1].operand[0].mode != TYPE_MODE (narrow_vectype))
4797 {
4798 enum machine_mode intermediate_mode, prev_mode = vec_mode;
4799
4800 *code1 = c1;
4801 prev_type = vectype;
4802 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
4803 intermediate steps in promotion sequence. We try MAX_INTERM_CVT_STEPS
4804 to get to NARROW_VECTYPE, and fail if we do not. */
4805 *interm_types = VEC_alloc (tree, heap, MAX_INTERM_CVT_STEPS);
4806 for (i = 0; i < 3; i++)
4807 {
4808 intermediate_mode = insn_data[icode1].operand[0].mode;
4809 intermediate_type = lang_hooks.types.type_for_mode (intermediate_mode,
4810 TYPE_UNSIGNED (prev_type));
4811 interm_optab = optab_for_tree_code (c1, intermediate_type,
4812 optab_default);
4813 if (!interm_optab
4814 || (icode1 = optab1->handlers[(int) prev_mode].insn_code)
4815 == CODE_FOR_nothing
4816 || insn_data[icode1].operand[0].mode != intermediate_mode
4817 || (icode1
4818 = interm_optab->handlers[(int) intermediate_mode].insn_code)
4819 == CODE_FOR_nothing)
4820 return false;
4821
4822 VEC_quick_push (tree, *interm_types, intermediate_type);
4823 (*multi_step_cvt)++;
4824
4825 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
4826 return true;
4827
4828 prev_type = intermediate_type;
4829 prev_mode = intermediate_mode;
4830 }
4831
4832 return false;
4833 }
4834
4835 *code1 = c1;
4836 return true;
4837 }