1c9415b70315902f36712effb5c3023d41cac5a3
[gcc.git] / gcc / tree-vect-stmts.c
1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software
3 Foundation, Inc.
4 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 and Ira Rosen <irar@il.ibm.com>
6
7 This file is part of GCC.
8
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
13
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
22
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "tm.h"
27 #include "ggc.h"
28 #include "tree.h"
29 #include "target.h"
30 #include "basic-block.h"
31 #include "diagnostic.h"
32 #include "tree-flow.h"
33 #include "tree-dump.h"
34 #include "cfgloop.h"
35 #include "cfglayout.h"
36 #include "expr.h"
37 #include "recog.h"
38 #include "optabs.h"
39 #include "toplev.h"
40 #include "tree-vectorizer.h"
41 #include "langhooks.h"
42
43
44 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
45
46 /* Function vect_mark_relevant.
47
48 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
49
50 static void
51 vect_mark_relevant (VEC(gimple,heap) **worklist, gimple stmt,
52 enum vect_relevant relevant, bool live_p)
53 {
54 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
55 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
56 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
57
58 if (vect_print_dump_info (REPORT_DETAILS))
59 fprintf (vect_dump, "mark relevant %d, live %d.", relevant, live_p);
60
61 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
62 {
63 gimple pattern_stmt;
64
65 /* This is the last stmt in a sequence that was detected as a
66 pattern that can potentially be vectorized. Don't mark the stmt
67 as relevant/live because it's not going to be vectorized.
68 Instead mark the pattern-stmt that replaces it. */
69
70 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
71
72 if (vect_print_dump_info (REPORT_DETAILS))
73 fprintf (vect_dump, "last stmt in pattern. don't mark relevant/live.");
74 stmt_info = vinfo_for_stmt (pattern_stmt);
75 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
76 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
77 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
78 stmt = pattern_stmt;
79 }
80
81 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
82 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
83 STMT_VINFO_RELEVANT (stmt_info) = relevant;
84
85 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
86 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
87 {
88 if (vect_print_dump_info (REPORT_DETAILS))
89 fprintf (vect_dump, "already marked relevant/live.");
90 return;
91 }
92
93 VEC_safe_push (gimple, heap, *worklist, stmt);
94 }
95
96
97 /* Function vect_stmt_relevant_p.
98
99 Return true if STMT in loop that is represented by LOOP_VINFO is
100 "relevant for vectorization".
101
102 A stmt is considered "relevant for vectorization" if:
103 - it has uses outside the loop.
104 - it has vdefs (it alters memory).
105 - control stmts in the loop (except for the exit condition).
106
107 CHECKME: what other side effects would the vectorizer allow? */
108
109 static bool
110 vect_stmt_relevant_p (gimple stmt, loop_vec_info loop_vinfo,
111 enum vect_relevant *relevant, bool *live_p)
112 {
113 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
114 ssa_op_iter op_iter;
115 imm_use_iterator imm_iter;
116 use_operand_p use_p;
117 def_operand_p def_p;
118
119 *relevant = vect_unused_in_scope;
120 *live_p = false;
121
122 /* cond stmt other than loop exit cond. */
123 if (is_ctrl_stmt (stmt)
124 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt))
125 != loop_exit_ctrl_vec_info_type)
126 *relevant = vect_used_in_scope;
127
128 /* changing memory. */
129 if (gimple_code (stmt) != GIMPLE_PHI)
130 if (gimple_vdef (stmt))
131 {
132 if (vect_print_dump_info (REPORT_DETAILS))
133 fprintf (vect_dump, "vec_stmt_relevant_p: stmt has vdefs.");
134 *relevant = vect_used_in_scope;
135 }
136
137 /* uses outside the loop. */
138 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
139 {
140 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
141 {
142 basic_block bb = gimple_bb (USE_STMT (use_p));
143 if (!flow_bb_inside_loop_p (loop, bb))
144 {
145 if (vect_print_dump_info (REPORT_DETAILS))
146 fprintf (vect_dump, "vec_stmt_relevant_p: used out of loop.");
147
148 /* We expect all such uses to be in the loop exit phis
149 (because of loop closed form) */
150 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
151 gcc_assert (bb == single_exit (loop)->dest);
152
153 *live_p = true;
154 }
155 }
156 }
157
158 return (*live_p || *relevant);
159 }
160
161
162 /* Function exist_non_indexing_operands_for_use_p
163
164 USE is one of the uses attached to STMT. Check if USE is
165 used in STMT for anything other than indexing an array. */
166
167 static bool
168 exist_non_indexing_operands_for_use_p (tree use, gimple stmt)
169 {
170 tree operand;
171 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
172
173 /* USE corresponds to some operand in STMT. If there is no data
174 reference in STMT, then any operand that corresponds to USE
175 is not indexing an array. */
176 if (!STMT_VINFO_DATA_REF (stmt_info))
177 return true;
178
179 /* STMT has a data_ref. FORNOW this means that its of one of
180 the following forms:
181 -1- ARRAY_REF = var
182 -2- var = ARRAY_REF
183 (This should have been verified in analyze_data_refs).
184
185 'var' in the second case corresponds to a def, not a use,
186 so USE cannot correspond to any operands that are not used
187 for array indexing.
188
189 Therefore, all we need to check is if STMT falls into the
190 first case, and whether var corresponds to USE. */
191
192 if (TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME)
193 return false;
194
195 if (!gimple_assign_copy_p (stmt))
196 return false;
197 operand = gimple_assign_rhs1 (stmt);
198
199 if (TREE_CODE (operand) != SSA_NAME)
200 return false;
201
202 if (operand == use)
203 return true;
204
205 return false;
206 }
207
208
209 /*
210 Function process_use.
211
212 Inputs:
213 - a USE in STMT in a loop represented by LOOP_VINFO
214 - LIVE_P, RELEVANT - enum values to be set in the STMT_VINFO of the stmt
215 that defined USE. This is done by calling mark_relevant and passing it
216 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
217
218 Outputs:
219 Generally, LIVE_P and RELEVANT are used to define the liveness and
220 relevance info of the DEF_STMT of this USE:
221 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
222 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
223 Exceptions:
224 - case 1: If USE is used only for address computations (e.g. array indexing),
225 which does not need to be directly vectorized, then the liveness/relevance
226 of the respective DEF_STMT is left unchanged.
227 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
228 skip DEF_STMT cause it had already been processed.
229 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
230 be modified accordingly.
231
232 Return true if everything is as expected. Return false otherwise. */
233
234 static bool
235 process_use (gimple stmt, tree use, loop_vec_info loop_vinfo, bool live_p,
236 enum vect_relevant relevant, VEC(gimple,heap) **worklist)
237 {
238 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
239 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
240 stmt_vec_info dstmt_vinfo;
241 basic_block bb, def_bb;
242 tree def;
243 gimple def_stmt;
244 enum vect_def_type dt;
245
246 /* case 1: we are only interested in uses that need to be vectorized. Uses
247 that are used for address computation are not considered relevant. */
248 if (!exist_non_indexing_operands_for_use_p (use, stmt))
249 return true;
250
251 if (!vect_is_simple_use (use, loop_vinfo, NULL, &def_stmt, &def, &dt))
252 {
253 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
254 fprintf (vect_dump, "not vectorized: unsupported use in stmt.");
255 return false;
256 }
257
258 if (!def_stmt || gimple_nop_p (def_stmt))
259 return true;
260
261 def_bb = gimple_bb (def_stmt);
262 if (!flow_bb_inside_loop_p (loop, def_bb))
263 {
264 if (vect_print_dump_info (REPORT_DETAILS))
265 fprintf (vect_dump, "def_stmt is out of loop.");
266 return true;
267 }
268
269 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
270 DEF_STMT must have already been processed, because this should be the
271 only way that STMT, which is a reduction-phi, was put in the worklist,
272 as there should be no other uses for DEF_STMT in the loop. So we just
273 check that everything is as expected, and we are done. */
274 dstmt_vinfo = vinfo_for_stmt (def_stmt);
275 bb = gimple_bb (stmt);
276 if (gimple_code (stmt) == GIMPLE_PHI
277 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
278 && gimple_code (def_stmt) != GIMPLE_PHI
279 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
280 && bb->loop_father == def_bb->loop_father)
281 {
282 if (vect_print_dump_info (REPORT_DETAILS))
283 fprintf (vect_dump, "reduc-stmt defining reduc-phi in the same nest.");
284 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))
285 dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
286 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
287 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
288 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope);
289 return true;
290 }
291
292 /* case 3a: outer-loop stmt defining an inner-loop stmt:
293 outer-loop-header-bb:
294 d = def_stmt
295 inner-loop:
296 stmt # use (d)
297 outer-loop-tail-bb:
298 ... */
299 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
300 {
301 if (vect_print_dump_info (REPORT_DETAILS))
302 fprintf (vect_dump, "outer-loop def-stmt defining inner-loop stmt.");
303
304 switch (relevant)
305 {
306 case vect_unused_in_scope:
307 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
308 vect_used_in_scope : vect_unused_in_scope;
309 break;
310
311 case vect_used_in_outer_by_reduction:
312 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
313 relevant = vect_used_by_reduction;
314 break;
315
316 case vect_used_in_outer:
317 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
318 relevant = vect_used_in_scope;
319 break;
320
321 case vect_used_in_scope:
322 break;
323
324 default:
325 gcc_unreachable ();
326 }
327 }
328
329 /* case 3b: inner-loop stmt defining an outer-loop stmt:
330 outer-loop-header-bb:
331 ...
332 inner-loop:
333 d = def_stmt
334 outer-loop-tail-bb:
335 stmt # use (d) */
336 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
337 {
338 if (vect_print_dump_info (REPORT_DETAILS))
339 fprintf (vect_dump, "inner-loop def-stmt defining outer-loop stmt.");
340
341 switch (relevant)
342 {
343 case vect_unused_in_scope:
344 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def) ?
345 vect_used_in_outer_by_reduction : vect_unused_in_scope;
346 break;
347
348 case vect_used_by_reduction:
349 relevant = vect_used_in_outer_by_reduction;
350 break;
351
352 case vect_used_in_scope:
353 relevant = vect_used_in_outer;
354 break;
355
356 default:
357 gcc_unreachable ();
358 }
359 }
360
361 vect_mark_relevant (worklist, def_stmt, relevant, live_p);
362 return true;
363 }
364
365
366 /* Function vect_mark_stmts_to_be_vectorized.
367
368 Not all stmts in the loop need to be vectorized. For example:
369
370 for i...
371 for j...
372 1. T0 = i + j
373 2. T1 = a[T0]
374
375 3. j = j + 1
376
377 Stmt 1 and 3 do not need to be vectorized, because loop control and
378 addressing of vectorized data-refs are handled differently.
379
380 This pass detects such stmts. */
381
382 bool
383 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
384 {
385 VEC(gimple,heap) *worklist;
386 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
387 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
388 unsigned int nbbs = loop->num_nodes;
389 gimple_stmt_iterator si;
390 gimple stmt;
391 unsigned int i;
392 stmt_vec_info stmt_vinfo;
393 basic_block bb;
394 gimple phi;
395 bool live_p;
396 enum vect_relevant relevant;
397
398 if (vect_print_dump_info (REPORT_DETAILS))
399 fprintf (vect_dump, "=== vect_mark_stmts_to_be_vectorized ===");
400
401 worklist = VEC_alloc (gimple, heap, 64);
402
403 /* 1. Init worklist. */
404 for (i = 0; i < nbbs; i++)
405 {
406 bb = bbs[i];
407 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
408 {
409 phi = gsi_stmt (si);
410 if (vect_print_dump_info (REPORT_DETAILS))
411 {
412 fprintf (vect_dump, "init: phi relevant? ");
413 print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM);
414 }
415
416 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
417 vect_mark_relevant (&worklist, phi, relevant, live_p);
418 }
419 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
420 {
421 stmt = gsi_stmt (si);
422 if (vect_print_dump_info (REPORT_DETAILS))
423 {
424 fprintf (vect_dump, "init: stmt relevant? ");
425 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
426 }
427
428 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
429 vect_mark_relevant (&worklist, stmt, relevant, live_p);
430 }
431 }
432
433 /* 2. Process_worklist */
434 while (VEC_length (gimple, worklist) > 0)
435 {
436 use_operand_p use_p;
437 ssa_op_iter iter;
438
439 stmt = VEC_pop (gimple, worklist);
440 if (vect_print_dump_info (REPORT_DETAILS))
441 {
442 fprintf (vect_dump, "worklist: examine stmt: ");
443 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
444 }
445
446 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
447 (DEF_STMT) as relevant/irrelevant and live/dead according to the
448 liveness and relevance properties of STMT. */
449 stmt_vinfo = vinfo_for_stmt (stmt);
450 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
451 live_p = STMT_VINFO_LIVE_P (stmt_vinfo);
452
453 /* Generally, the liveness and relevance properties of STMT are
454 propagated as is to the DEF_STMTs of its USEs:
455 live_p <-- STMT_VINFO_LIVE_P (STMT_VINFO)
456 relevant <-- STMT_VINFO_RELEVANT (STMT_VINFO)
457
458 One exception is when STMT has been identified as defining a reduction
459 variable; in this case we set the liveness/relevance as follows:
460 live_p = false
461 relevant = vect_used_by_reduction
462 This is because we distinguish between two kinds of relevant stmts -
463 those that are used by a reduction computation, and those that are
464 (also) used by a regular computation. This allows us later on to
465 identify stmts that are used solely by a reduction, and therefore the
466 order of the results that they produce does not have to be kept. */
467
468 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def)
469 {
470 enum vect_relevant tmp_relevant = relevant;
471 switch (tmp_relevant)
472 {
473 case vect_unused_in_scope:
474 gcc_assert (gimple_code (stmt) != GIMPLE_PHI);
475 relevant = vect_used_by_reduction;
476 break;
477
478 case vect_used_by_reduction:
479 if (gimple_code (stmt) == GIMPLE_PHI)
480 break;
481 /* fall through */
482
483 default:
484 if (vect_print_dump_info (REPORT_DETAILS))
485 fprintf (vect_dump, "unsupported use of reduction.");
486 VEC_free (gimple, heap, worklist);
487 return false;
488 }
489
490 live_p = false;
491 }
492 else if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle)
493 {
494 enum vect_relevant tmp_relevant = relevant;
495 switch (tmp_relevant)
496 {
497 case vect_unused_in_scope:
498 case vect_used_in_outer_by_reduction:
499 case vect_used_in_outer:
500 break;
501
502 default:
503 if (vect_print_dump_info (REPORT_DETAILS))
504 fprintf (vect_dump, "unsupported use of nested cycle.");
505
506 VEC_free (gimple, heap, worklist);
507 return false;
508 }
509
510 live_p = false;
511 }
512
513 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
514 {
515 tree op = USE_FROM_PTR (use_p);
516 if (!process_use (stmt, op, loop_vinfo, live_p, relevant, &worklist))
517 {
518 VEC_free (gimple, heap, worklist);
519 return false;
520 }
521 }
522 } /* while worklist */
523
524 VEC_free (gimple, heap, worklist);
525 return true;
526 }
527
528
529 int
530 cost_for_stmt (gimple stmt)
531 {
532 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
533
534 switch (STMT_VINFO_TYPE (stmt_info))
535 {
536 case load_vec_info_type:
537 return TARG_SCALAR_LOAD_COST;
538 case store_vec_info_type:
539 return TARG_SCALAR_STORE_COST;
540 case op_vec_info_type:
541 case condition_vec_info_type:
542 case assignment_vec_info_type:
543 case reduc_vec_info_type:
544 case induc_vec_info_type:
545 case type_promotion_vec_info_type:
546 case type_demotion_vec_info_type:
547 case type_conversion_vec_info_type:
548 case call_vec_info_type:
549 return TARG_SCALAR_STMT_COST;
550 case undef_vec_info_type:
551 default:
552 gcc_unreachable ();
553 }
554 }
555
556 /* Function vect_model_simple_cost.
557
558 Models cost for simple operations, i.e. those that only emit ncopies of a
559 single op. Right now, this does not account for multiple insns that could
560 be generated for the single vector op. We will handle that shortly. */
561
562 void
563 vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
564 enum vect_def_type *dt, slp_tree slp_node)
565 {
566 int i;
567 int inside_cost = 0, outside_cost = 0;
568
569 /* The SLP costs were already calculated during SLP tree build. */
570 if (PURE_SLP_STMT (stmt_info))
571 return;
572
573 inside_cost = ncopies * TARG_VEC_STMT_COST;
574
575 /* FORNOW: Assuming maximum 2 args per stmts. */
576 for (i = 0; i < 2; i++)
577 {
578 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
579 outside_cost += TARG_SCALAR_TO_VEC_COST;
580 }
581
582 if (vect_print_dump_info (REPORT_COST))
583 fprintf (vect_dump, "vect_model_simple_cost: inside_cost = %d, "
584 "outside_cost = %d .", inside_cost, outside_cost);
585
586 /* Set the costs either in STMT_INFO or SLP_NODE (if exists). */
587 stmt_vinfo_set_inside_of_loop_cost (stmt_info, slp_node, inside_cost);
588 stmt_vinfo_set_outside_of_loop_cost (stmt_info, slp_node, outside_cost);
589 }
590
591
592 /* Function vect_cost_strided_group_size
593
594 For strided load or store, return the group_size only if it is the first
595 load or store of a group, else return 1. This ensures that group size is
596 only returned once per group. */
597
598 static int
599 vect_cost_strided_group_size (stmt_vec_info stmt_info)
600 {
601 gimple first_stmt = DR_GROUP_FIRST_DR (stmt_info);
602
603 if (first_stmt == STMT_VINFO_STMT (stmt_info))
604 return DR_GROUP_SIZE (stmt_info);
605
606 return 1;
607 }
608
609
610 /* Function vect_model_store_cost
611
612 Models cost for stores. In the case of strided accesses, one access
613 has the overhead of the strided access attributed to it. */
614
615 void
616 vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
617 enum vect_def_type dt, slp_tree slp_node)
618 {
619 int group_size;
620 int inside_cost = 0, outside_cost = 0;
621
622 /* The SLP costs were already calculated during SLP tree build. */
623 if (PURE_SLP_STMT (stmt_info))
624 return;
625
626 if (dt == vect_constant_def || dt == vect_external_def)
627 outside_cost = TARG_SCALAR_TO_VEC_COST;
628
629 /* Strided access? */
630 if (DR_GROUP_FIRST_DR (stmt_info) && !slp_node)
631 group_size = vect_cost_strided_group_size (stmt_info);
632 /* Not a strided access. */
633 else
634 group_size = 1;
635
636 /* Is this an access in a group of stores, which provide strided access?
637 If so, add in the cost of the permutes. */
638 if (group_size > 1)
639 {
640 /* Uses a high and low interleave operation for each needed permute. */
641 inside_cost = ncopies * exact_log2(group_size) * group_size
642 * TARG_VEC_STMT_COST;
643
644 if (vect_print_dump_info (REPORT_COST))
645 fprintf (vect_dump, "vect_model_store_cost: strided group_size = %d .",
646 group_size);
647
648 }
649
650 /* Costs of the stores. */
651 inside_cost += ncopies * TARG_VEC_STORE_COST;
652
653 if (vect_print_dump_info (REPORT_COST))
654 fprintf (vect_dump, "vect_model_store_cost: inside_cost = %d, "
655 "outside_cost = %d .", inside_cost, outside_cost);
656
657 /* Set the costs either in STMT_INFO or SLP_NODE (if exists). */
658 stmt_vinfo_set_inside_of_loop_cost (stmt_info, slp_node, inside_cost);
659 stmt_vinfo_set_outside_of_loop_cost (stmt_info, slp_node, outside_cost);
660 }
661
662
663 /* Function vect_model_load_cost
664
665 Models cost for loads. In the case of strided accesses, the last access
666 has the overhead of the strided access attributed to it. Since unaligned
667 accesses are supported for loads, we also account for the costs of the
668 access scheme chosen. */
669
670 void
671 vect_model_load_cost (stmt_vec_info stmt_info, int ncopies, slp_tree slp_node)
672
673 {
674 int group_size;
675 int alignment_support_cheme;
676 gimple first_stmt;
677 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
678 int inside_cost = 0, outside_cost = 0;
679
680 /* The SLP costs were already calculated during SLP tree build. */
681 if (PURE_SLP_STMT (stmt_info))
682 return;
683
684 /* Strided accesses? */
685 first_stmt = DR_GROUP_FIRST_DR (stmt_info);
686 if (first_stmt && !slp_node)
687 {
688 group_size = vect_cost_strided_group_size (stmt_info);
689 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
690 }
691 /* Not a strided access. */
692 else
693 {
694 group_size = 1;
695 first_dr = dr;
696 }
697
698 alignment_support_cheme = vect_supportable_dr_alignment (first_dr);
699
700 /* Is this an access in a group of loads providing strided access?
701 If so, add in the cost of the permutes. */
702 if (group_size > 1)
703 {
704 /* Uses an even and odd extract operations for each needed permute. */
705 inside_cost = ncopies * exact_log2(group_size) * group_size
706 * TARG_VEC_STMT_COST;
707
708 if (vect_print_dump_info (REPORT_COST))
709 fprintf (vect_dump, "vect_model_load_cost: strided group_size = %d .",
710 group_size);
711
712 }
713
714 /* The loads themselves. */
715 switch (alignment_support_cheme)
716 {
717 case dr_aligned:
718 {
719 inside_cost += ncopies * TARG_VEC_LOAD_COST;
720
721 if (vect_print_dump_info (REPORT_COST))
722 fprintf (vect_dump, "vect_model_load_cost: aligned.");
723
724 break;
725 }
726 case dr_unaligned_supported:
727 {
728 /* Here, we assign an additional cost for the unaligned load. */
729 inside_cost += ncopies * TARG_VEC_UNALIGNED_LOAD_COST;
730
731 if (vect_print_dump_info (REPORT_COST))
732 fprintf (vect_dump, "vect_model_load_cost: unaligned supported by "
733 "hardware.");
734
735 break;
736 }
737 case dr_explicit_realign:
738 {
739 inside_cost += ncopies * (2*TARG_VEC_LOAD_COST + TARG_VEC_STMT_COST);
740
741 /* FIXME: If the misalignment remains fixed across the iterations of
742 the containing loop, the following cost should be added to the
743 outside costs. */
744 if (targetm.vectorize.builtin_mask_for_load)
745 inside_cost += TARG_VEC_STMT_COST;
746
747 break;
748 }
749 case dr_explicit_realign_optimized:
750 {
751 if (vect_print_dump_info (REPORT_COST))
752 fprintf (vect_dump, "vect_model_load_cost: unaligned software "
753 "pipelined.");
754
755 /* Unaligned software pipeline has a load of an address, an initial
756 load, and possibly a mask operation to "prime" the loop. However,
757 if this is an access in a group of loads, which provide strided
758 access, then the above cost should only be considered for one
759 access in the group. Inside the loop, there is a load op
760 and a realignment op. */
761
762 if ((!DR_GROUP_FIRST_DR (stmt_info)) || group_size > 1 || slp_node)
763 {
764 outside_cost = 2*TARG_VEC_STMT_COST;
765 if (targetm.vectorize.builtin_mask_for_load)
766 outside_cost += TARG_VEC_STMT_COST;
767 }
768
769 inside_cost += ncopies * (TARG_VEC_LOAD_COST + TARG_VEC_STMT_COST);
770
771 break;
772 }
773
774 default:
775 gcc_unreachable ();
776 }
777
778 if (vect_print_dump_info (REPORT_COST))
779 fprintf (vect_dump, "vect_model_load_cost: inside_cost = %d, "
780 "outside_cost = %d .", inside_cost, outside_cost);
781
782 /* Set the costs either in STMT_INFO or SLP_NODE (if exists). */
783 stmt_vinfo_set_inside_of_loop_cost (stmt_info, slp_node, inside_cost);
784 stmt_vinfo_set_outside_of_loop_cost (stmt_info, slp_node, outside_cost);
785 }
786
787
788 /* Function vect_init_vector.
789
790 Insert a new stmt (INIT_STMT) that initializes a new vector variable with
791 the vector elements of VECTOR_VAR. Place the initialization at BSI if it
792 is not NULL. Otherwise, place the initialization at the loop preheader.
793 Return the DEF of INIT_STMT.
794 It will be used in the vectorization of STMT. */
795
796 tree
797 vect_init_vector (gimple stmt, tree vector_var, tree vector_type,
798 gimple_stmt_iterator *gsi)
799 {
800 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
801 tree new_var;
802 gimple init_stmt;
803 tree vec_oprnd;
804 edge pe;
805 tree new_temp;
806 basic_block new_bb;
807
808 new_var = vect_get_new_vect_var (vector_type, vect_simple_var, "cst_");
809 add_referenced_var (new_var);
810 init_stmt = gimple_build_assign (new_var, vector_var);
811 new_temp = make_ssa_name (new_var, init_stmt);
812 gimple_assign_set_lhs (init_stmt, new_temp);
813
814 if (gsi)
815 vect_finish_stmt_generation (stmt, init_stmt, gsi);
816 else
817 {
818 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
819
820 if (loop_vinfo)
821 {
822 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
823
824 if (nested_in_vect_loop_p (loop, stmt))
825 loop = loop->inner;
826
827 pe = loop_preheader_edge (loop);
828 new_bb = gsi_insert_on_edge_immediate (pe, init_stmt);
829 gcc_assert (!new_bb);
830 }
831 else
832 {
833 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
834 basic_block bb;
835 gimple_stmt_iterator gsi_bb_start;
836
837 gcc_assert (bb_vinfo);
838 bb = BB_VINFO_BB (bb_vinfo);
839 gsi_bb_start = gsi_after_labels (bb);
840 gsi_insert_before (&gsi_bb_start, init_stmt, GSI_SAME_STMT);
841 }
842 }
843
844 if (vect_print_dump_info (REPORT_DETAILS))
845 {
846 fprintf (vect_dump, "created new init_stmt: ");
847 print_gimple_stmt (vect_dump, init_stmt, 0, TDF_SLIM);
848 }
849
850 vec_oprnd = gimple_assign_lhs (init_stmt);
851 return vec_oprnd;
852 }
853
854
855 /* Function vect_get_vec_def_for_operand.
856
857 OP is an operand in STMT. This function returns a (vector) def that will be
858 used in the vectorized stmt for STMT.
859
860 In the case that OP is an SSA_NAME which is defined in the loop, then
861 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
862
863 In case OP is an invariant or constant, a new stmt that creates a vector def
864 needs to be introduced. */
865
866 tree
867 vect_get_vec_def_for_operand (tree op, gimple stmt, tree *scalar_def)
868 {
869 tree vec_oprnd;
870 gimple vec_stmt;
871 gimple def_stmt;
872 stmt_vec_info def_stmt_info = NULL;
873 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
874 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
875 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
876 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
877 tree vec_inv;
878 tree vec_cst;
879 tree t = NULL_TREE;
880 tree def;
881 int i;
882 enum vect_def_type dt;
883 bool is_simple_use;
884 tree vector_type;
885
886 if (vect_print_dump_info (REPORT_DETAILS))
887 {
888 fprintf (vect_dump, "vect_get_vec_def_for_operand: ");
889 print_generic_expr (vect_dump, op, TDF_SLIM);
890 }
891
892 is_simple_use = vect_is_simple_use (op, loop_vinfo, NULL, &def_stmt, &def,
893 &dt);
894 gcc_assert (is_simple_use);
895 if (vect_print_dump_info (REPORT_DETAILS))
896 {
897 if (def)
898 {
899 fprintf (vect_dump, "def = ");
900 print_generic_expr (vect_dump, def, TDF_SLIM);
901 }
902 if (def_stmt)
903 {
904 fprintf (vect_dump, " def_stmt = ");
905 print_gimple_stmt (vect_dump, def_stmt, 0, TDF_SLIM);
906 }
907 }
908
909 switch (dt)
910 {
911 /* Case 1: operand is a constant. */
912 case vect_constant_def:
913 {
914 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
915 gcc_assert (vector_type);
916
917 if (scalar_def)
918 *scalar_def = op;
919
920 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
921 if (vect_print_dump_info (REPORT_DETAILS))
922 fprintf (vect_dump, "Create vector_cst. nunits = %d", nunits);
923
924 for (i = nunits - 1; i >= 0; --i)
925 {
926 t = tree_cons (NULL_TREE, op, t);
927 }
928 vec_cst = build_vector (vector_type, t);
929 return vect_init_vector (stmt, vec_cst, vector_type, NULL);
930 }
931
932 /* Case 2: operand is defined outside the loop - loop invariant. */
933 case vect_external_def:
934 {
935 vector_type = get_vectype_for_scalar_type (TREE_TYPE (def));
936 gcc_assert (vector_type);
937 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
938
939 if (scalar_def)
940 *scalar_def = def;
941
942 /* Create 'vec_inv = {inv,inv,..,inv}' */
943 if (vect_print_dump_info (REPORT_DETAILS))
944 fprintf (vect_dump, "Create vector_inv.");
945
946 for (i = nunits - 1; i >= 0; --i)
947 {
948 t = tree_cons (NULL_TREE, def, t);
949 }
950
951 /* FIXME: use build_constructor directly. */
952 vec_inv = build_constructor_from_list (vector_type, t);
953 return vect_init_vector (stmt, vec_inv, vector_type, NULL);
954 }
955
956 /* Case 3: operand is defined inside the loop. */
957 case vect_internal_def:
958 {
959 if (scalar_def)
960 *scalar_def = NULL/* FIXME tuples: def_stmt*/;
961
962 /* Get the def from the vectorized stmt. */
963 def_stmt_info = vinfo_for_stmt (def_stmt);
964 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
965 gcc_assert (vec_stmt);
966 if (gimple_code (vec_stmt) == GIMPLE_PHI)
967 vec_oprnd = PHI_RESULT (vec_stmt);
968 else if (is_gimple_call (vec_stmt))
969 vec_oprnd = gimple_call_lhs (vec_stmt);
970 else
971 vec_oprnd = gimple_assign_lhs (vec_stmt);
972 return vec_oprnd;
973 }
974
975 /* Case 4: operand is defined by a loop header phi - reduction */
976 case vect_reduction_def:
977 case vect_nested_cycle:
978 {
979 struct loop *loop;
980
981 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
982 loop = (gimple_bb (def_stmt))->loop_father;
983
984 /* Get the def before the loop */
985 op = PHI_ARG_DEF_FROM_EDGE (def_stmt, loop_preheader_edge (loop));
986 return get_initial_def_for_reduction (stmt, op, scalar_def);
987 }
988
989 /* Case 5: operand is defined by loop-header phi - induction. */
990 case vect_induction_def:
991 {
992 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
993
994 /* Get the def from the vectorized stmt. */
995 def_stmt_info = vinfo_for_stmt (def_stmt);
996 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
997 gcc_assert (vec_stmt && gimple_code (vec_stmt) == GIMPLE_PHI);
998 vec_oprnd = PHI_RESULT (vec_stmt);
999 return vec_oprnd;
1000 }
1001
1002 default:
1003 gcc_unreachable ();
1004 }
1005 }
1006
1007
1008 /* Function vect_get_vec_def_for_stmt_copy
1009
1010 Return a vector-def for an operand. This function is used when the
1011 vectorized stmt to be created (by the caller to this function) is a "copy"
1012 created in case the vectorized result cannot fit in one vector, and several
1013 copies of the vector-stmt are required. In this case the vector-def is
1014 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1015 of the stmt that defines VEC_OPRND.
1016 DT is the type of the vector def VEC_OPRND.
1017
1018 Context:
1019 In case the vectorization factor (VF) is bigger than the number
1020 of elements that can fit in a vectype (nunits), we have to generate
1021 more than one vector stmt to vectorize the scalar stmt. This situation
1022 arises when there are multiple data-types operated upon in the loop; the
1023 smallest data-type determines the VF, and as a result, when vectorizing
1024 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1025 vector stmt (each computing a vector of 'nunits' results, and together
1026 computing 'VF' results in each iteration). This function is called when
1027 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1028 which VF=16 and nunits=4, so the number of copies required is 4):
1029
1030 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1031
1032 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1033 VS1.1: vx.1 = memref1 VS1.2
1034 VS1.2: vx.2 = memref2 VS1.3
1035 VS1.3: vx.3 = memref3
1036
1037 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1038 VSnew.1: vz1 = vx.1 + ... VSnew.2
1039 VSnew.2: vz2 = vx.2 + ... VSnew.3
1040 VSnew.3: vz3 = vx.3 + ...
1041
1042 The vectorization of S1 is explained in vectorizable_load.
1043 The vectorization of S2:
1044 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1045 the function 'vect_get_vec_def_for_operand' is called to
1046 get the relevant vector-def for each operand of S2. For operand x it
1047 returns the vector-def 'vx.0'.
1048
1049 To create the remaining copies of the vector-stmt (VSnew.j), this
1050 function is called to get the relevant vector-def for each operand. It is
1051 obtained from the respective VS1.j stmt, which is recorded in the
1052 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1053
1054 For example, to obtain the vector-def 'vx.1' in order to create the
1055 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1056 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1057 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1058 and return its def ('vx.1').
1059 Overall, to create the above sequence this function will be called 3 times:
1060 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1061 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1062 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1063
1064 tree
1065 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
1066 {
1067 gimple vec_stmt_for_operand;
1068 stmt_vec_info def_stmt_info;
1069
1070 /* Do nothing; can reuse same def. */
1071 if (dt == vect_external_def || dt == vect_constant_def )
1072 return vec_oprnd;
1073
1074 vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
1075 def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
1076 gcc_assert (def_stmt_info);
1077 vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
1078 gcc_assert (vec_stmt_for_operand);
1079 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1080 if (gimple_code (vec_stmt_for_operand) == GIMPLE_PHI)
1081 vec_oprnd = PHI_RESULT (vec_stmt_for_operand);
1082 else
1083 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1084 return vec_oprnd;
1085 }
1086
1087
1088 /* Get vectorized definitions for the operands to create a copy of an original
1089 stmt. See vect_get_vec_def_for_stmt_copy() for details. */
1090
1091 static void
1092 vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
1093 VEC(tree,heap) **vec_oprnds0,
1094 VEC(tree,heap) **vec_oprnds1)
1095 {
1096 tree vec_oprnd = VEC_pop (tree, *vec_oprnds0);
1097
1098 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd);
1099 VEC_quick_push (tree, *vec_oprnds0, vec_oprnd);
1100
1101 if (vec_oprnds1 && *vec_oprnds1)
1102 {
1103 vec_oprnd = VEC_pop (tree, *vec_oprnds1);
1104 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd);
1105 VEC_quick_push (tree, *vec_oprnds1, vec_oprnd);
1106 }
1107 }
1108
1109
1110 /* Get vectorized definitions for OP0 and OP1, or SLP_NODE if it is not NULL. */
1111
1112 static void
1113 vect_get_vec_defs (tree op0, tree op1, gimple stmt,
1114 VEC(tree,heap) **vec_oprnds0, VEC(tree,heap) **vec_oprnds1,
1115 slp_tree slp_node)
1116 {
1117 if (slp_node)
1118 vect_get_slp_defs (slp_node, vec_oprnds0, vec_oprnds1);
1119 else
1120 {
1121 tree vec_oprnd;
1122
1123 *vec_oprnds0 = VEC_alloc (tree, heap, 1);
1124 vec_oprnd = vect_get_vec_def_for_operand (op0, stmt, NULL);
1125 VEC_quick_push (tree, *vec_oprnds0, vec_oprnd);
1126
1127 if (op1)
1128 {
1129 *vec_oprnds1 = VEC_alloc (tree, heap, 1);
1130 vec_oprnd = vect_get_vec_def_for_operand (op1, stmt, NULL);
1131 VEC_quick_push (tree, *vec_oprnds1, vec_oprnd);
1132 }
1133 }
1134 }
1135
1136
1137 /* Function vect_finish_stmt_generation.
1138
1139 Insert a new stmt. */
1140
1141 void
1142 vect_finish_stmt_generation (gimple stmt, gimple vec_stmt,
1143 gimple_stmt_iterator *gsi)
1144 {
1145 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1146 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1147 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
1148
1149 gcc_assert (gimple_code (stmt) != GIMPLE_LABEL);
1150
1151 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1152
1153 set_vinfo_for_stmt (vec_stmt, new_stmt_vec_info (vec_stmt, loop_vinfo,
1154 bb_vinfo));
1155
1156 if (vect_print_dump_info (REPORT_DETAILS))
1157 {
1158 fprintf (vect_dump, "add new stmt: ");
1159 print_gimple_stmt (vect_dump, vec_stmt, 0, TDF_SLIM);
1160 }
1161
1162 gimple_set_location (vec_stmt, gimple_location (gsi_stmt (*gsi)));
1163 }
1164
1165 /* Checks if CALL can be vectorized in type VECTYPE. Returns
1166 a function declaration if the target has a vectorized version
1167 of the function, or NULL_TREE if the function cannot be vectorized. */
1168
1169 tree
1170 vectorizable_function (gimple call, tree vectype_out, tree vectype_in)
1171 {
1172 tree fndecl = gimple_call_fndecl (call);
1173 enum built_in_function code;
1174
1175 /* We only handle functions that do not read or clobber memory -- i.e.
1176 const or novops ones. */
1177 if (!(gimple_call_flags (call) & (ECF_CONST | ECF_NOVOPS)))
1178 return NULL_TREE;
1179
1180 if (!fndecl
1181 || TREE_CODE (fndecl) != FUNCTION_DECL
1182 || !DECL_BUILT_IN (fndecl))
1183 return NULL_TREE;
1184
1185 code = DECL_FUNCTION_CODE (fndecl);
1186 return targetm.vectorize.builtin_vectorized_function (code, vectype_out,
1187 vectype_in);
1188 }
1189
1190 /* Function vectorizable_call.
1191
1192 Check if STMT performs a function call that can be vectorized.
1193 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1194 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1195 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1196
1197 static bool
1198 vectorizable_call (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt)
1199 {
1200 tree vec_dest;
1201 tree scalar_dest;
1202 tree op, type;
1203 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
1204 stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
1205 tree vectype_out, vectype_in;
1206 int nunits_in;
1207 int nunits_out;
1208 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1209 tree fndecl, new_temp, def, rhs_type, lhs_type;
1210 gimple def_stmt;
1211 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
1212 gimple new_stmt;
1213 int ncopies, j;
1214 VEC(tree, heap) *vargs = NULL;
1215 enum { NARROW, NONE, WIDEN } modifier;
1216 size_t i, nargs;
1217
1218 /* FORNOW: unsupported in basic block SLP. */
1219 gcc_assert (loop_vinfo);
1220
1221 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1222 return false;
1223
1224 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
1225 return false;
1226
1227 /* FORNOW: SLP not supported. */
1228 if (STMT_SLP_TYPE (stmt_info))
1229 return false;
1230
1231 /* Is STMT a vectorizable call? */
1232 if (!is_gimple_call (stmt))
1233 return false;
1234
1235 if (TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
1236 return false;
1237
1238 /* Process function arguments. */
1239 rhs_type = NULL_TREE;
1240 nargs = gimple_call_num_args (stmt);
1241
1242 /* Bail out if the function has more than two arguments, we
1243 do not have interesting builtin functions to vectorize with
1244 more than two arguments. No arguments is also not good. */
1245 if (nargs == 0 || nargs > 2)
1246 return false;
1247
1248 for (i = 0; i < nargs; i++)
1249 {
1250 op = gimple_call_arg (stmt, i);
1251
1252 /* We can only handle calls with arguments of the same type. */
1253 if (rhs_type
1254 && rhs_type != TREE_TYPE (op))
1255 {
1256 if (vect_print_dump_info (REPORT_DETAILS))
1257 fprintf (vect_dump, "argument types differ.");
1258 return false;
1259 }
1260 rhs_type = TREE_TYPE (op);
1261
1262 if (!vect_is_simple_use (op, loop_vinfo, NULL, &def_stmt, &def, &dt[i]))
1263 {
1264 if (vect_print_dump_info (REPORT_DETAILS))
1265 fprintf (vect_dump, "use not simple.");
1266 return false;
1267 }
1268 }
1269
1270 vectype_in = get_vectype_for_scalar_type (rhs_type);
1271 if (!vectype_in)
1272 return false;
1273 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
1274
1275 lhs_type = TREE_TYPE (gimple_call_lhs (stmt));
1276 vectype_out = get_vectype_for_scalar_type (lhs_type);
1277 if (!vectype_out)
1278 return false;
1279 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
1280
1281 /* FORNOW */
1282 if (nunits_in == nunits_out / 2)
1283 modifier = NARROW;
1284 else if (nunits_out == nunits_in)
1285 modifier = NONE;
1286 else if (nunits_out == nunits_in / 2)
1287 modifier = WIDEN;
1288 else
1289 return false;
1290
1291 /* For now, we only vectorize functions if a target specific builtin
1292 is available. TODO -- in some cases, it might be profitable to
1293 insert the calls for pieces of the vector, in order to be able
1294 to vectorize other operations in the loop. */
1295 fndecl = vectorizable_function (stmt, vectype_out, vectype_in);
1296 if (fndecl == NULL_TREE)
1297 {
1298 if (vect_print_dump_info (REPORT_DETAILS))
1299 fprintf (vect_dump, "function is not vectorizable.");
1300
1301 return false;
1302 }
1303
1304 gcc_assert (!gimple_vuse (stmt));
1305
1306 if (modifier == NARROW)
1307 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
1308 else
1309 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
1310
1311 /* Sanity check: make sure that at least one copy of the vectorized stmt
1312 needs to be generated. */
1313 gcc_assert (ncopies >= 1);
1314
1315 if (!vec_stmt) /* transformation not required. */
1316 {
1317 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
1318 if (vect_print_dump_info (REPORT_DETAILS))
1319 fprintf (vect_dump, "=== vectorizable_call ===");
1320 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
1321 return true;
1322 }
1323
1324 /** Transform. **/
1325
1326 if (vect_print_dump_info (REPORT_DETAILS))
1327 fprintf (vect_dump, "transform operation.");
1328
1329 /* Handle def. */
1330 scalar_dest = gimple_call_lhs (stmt);
1331 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
1332
1333 prev_stmt_info = NULL;
1334 switch (modifier)
1335 {
1336 case NONE:
1337 for (j = 0; j < ncopies; ++j)
1338 {
1339 /* Build argument list for the vectorized call. */
1340 if (j == 0)
1341 vargs = VEC_alloc (tree, heap, nargs);
1342 else
1343 VEC_truncate (tree, vargs, 0);
1344
1345 for (i = 0; i < nargs; i++)
1346 {
1347 op = gimple_call_arg (stmt, i);
1348 if (j == 0)
1349 vec_oprnd0
1350 = vect_get_vec_def_for_operand (op, stmt, NULL);
1351 else
1352 vec_oprnd0
1353 = vect_get_vec_def_for_stmt_copy (dt[nargs], vec_oprnd0);
1354
1355 VEC_quick_push (tree, vargs, vec_oprnd0);
1356 }
1357
1358 new_stmt = gimple_build_call_vec (fndecl, vargs);
1359 new_temp = make_ssa_name (vec_dest, new_stmt);
1360 gimple_call_set_lhs (new_stmt, new_temp);
1361
1362 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1363
1364 if (j == 0)
1365 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
1366 else
1367 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1368
1369 prev_stmt_info = vinfo_for_stmt (new_stmt);
1370 }
1371
1372 break;
1373
1374 case NARROW:
1375 for (j = 0; j < ncopies; ++j)
1376 {
1377 /* Build argument list for the vectorized call. */
1378 if (j == 0)
1379 vargs = VEC_alloc (tree, heap, nargs * 2);
1380 else
1381 VEC_truncate (tree, vargs, 0);
1382
1383 for (i = 0; i < nargs; i++)
1384 {
1385 op = gimple_call_arg (stmt, i);
1386 if (j == 0)
1387 {
1388 vec_oprnd0
1389 = vect_get_vec_def_for_operand (op, stmt, NULL);
1390 vec_oprnd1
1391 = vect_get_vec_def_for_stmt_copy (dt[nargs], vec_oprnd0);
1392 }
1393 else
1394 {
1395 vec_oprnd0
1396 = vect_get_vec_def_for_stmt_copy (dt[nargs], vec_oprnd1);
1397 vec_oprnd1
1398 = vect_get_vec_def_for_stmt_copy (dt[nargs], vec_oprnd0);
1399 }
1400
1401 VEC_quick_push (tree, vargs, vec_oprnd0);
1402 VEC_quick_push (tree, vargs, vec_oprnd1);
1403 }
1404
1405 new_stmt = gimple_build_call_vec (fndecl, vargs);
1406 new_temp = make_ssa_name (vec_dest, new_stmt);
1407 gimple_call_set_lhs (new_stmt, new_temp);
1408
1409 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1410
1411 if (j == 0)
1412 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
1413 else
1414 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1415
1416 prev_stmt_info = vinfo_for_stmt (new_stmt);
1417 }
1418
1419 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
1420
1421 break;
1422
1423 case WIDEN:
1424 /* No current target implements this case. */
1425 return false;
1426 }
1427
1428 VEC_free (tree, heap, vargs);
1429
1430 /* Update the exception handling table with the vector stmt if necessary. */
1431 if (maybe_clean_or_replace_eh_stmt (stmt, *vec_stmt))
1432 gimple_purge_dead_eh_edges (gimple_bb (stmt));
1433
1434 /* The call in STMT might prevent it from being removed in dce.
1435 We however cannot remove it here, due to the way the ssa name
1436 it defines is mapped to the new definition. So just replace
1437 rhs of the statement with something harmless. */
1438
1439 type = TREE_TYPE (scalar_dest);
1440 new_stmt = gimple_build_assign (gimple_call_lhs (stmt),
1441 fold_convert (type, integer_zero_node));
1442 set_vinfo_for_stmt (new_stmt, stmt_info);
1443 set_vinfo_for_stmt (stmt, NULL);
1444 STMT_VINFO_STMT (stmt_info) = new_stmt;
1445 gsi_replace (gsi, new_stmt, false);
1446 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt;
1447
1448 return true;
1449 }
1450
1451
1452 /* Function vect_gen_widened_results_half
1453
1454 Create a vector stmt whose code, type, number of arguments, and result
1455 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
1456 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
1457 In the case that CODE is a CALL_EXPR, this means that a call to DECL
1458 needs to be created (DECL is a function-decl of a target-builtin).
1459 STMT is the original scalar stmt that we are vectorizing. */
1460
1461 static gimple
1462 vect_gen_widened_results_half (enum tree_code code,
1463 tree decl,
1464 tree vec_oprnd0, tree vec_oprnd1, int op_type,
1465 tree vec_dest, gimple_stmt_iterator *gsi,
1466 gimple stmt)
1467 {
1468 gimple new_stmt;
1469 tree new_temp;
1470
1471 /* Generate half of the widened result: */
1472 if (code == CALL_EXPR)
1473 {
1474 /* Target specific support */
1475 if (op_type == binary_op)
1476 new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1);
1477 else
1478 new_stmt = gimple_build_call (decl, 1, vec_oprnd0);
1479 new_temp = make_ssa_name (vec_dest, new_stmt);
1480 gimple_call_set_lhs (new_stmt, new_temp);
1481 }
1482 else
1483 {
1484 /* Generic support */
1485 gcc_assert (op_type == TREE_CODE_LENGTH (code));
1486 if (op_type != binary_op)
1487 vec_oprnd1 = NULL;
1488 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vec_oprnd0,
1489 vec_oprnd1);
1490 new_temp = make_ssa_name (vec_dest, new_stmt);
1491 gimple_assign_set_lhs (new_stmt, new_temp);
1492 }
1493 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1494
1495 return new_stmt;
1496 }
1497
1498
1499 /* Check if STMT performs a conversion operation, that can be vectorized.
1500 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1501 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1502 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1503
1504 static bool
1505 vectorizable_conversion (gimple stmt, gimple_stmt_iterator *gsi,
1506 gimple *vec_stmt, slp_tree slp_node)
1507 {
1508 tree vec_dest;
1509 tree scalar_dest;
1510 tree op0;
1511 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
1512 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1513 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1514 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
1515 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
1516 tree new_temp;
1517 tree def;
1518 gimple def_stmt;
1519 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
1520 gimple new_stmt = NULL;
1521 stmt_vec_info prev_stmt_info;
1522 int nunits_in;
1523 int nunits_out;
1524 tree vectype_out, vectype_in;
1525 int ncopies, j;
1526 tree expr;
1527 tree rhs_type, lhs_type;
1528 tree builtin_decl;
1529 enum { NARROW, NONE, WIDEN } modifier;
1530 int i;
1531 VEC(tree,heap) *vec_oprnds0 = NULL;
1532 tree vop0;
1533 tree integral_type;
1534 VEC(tree,heap) *dummy = NULL;
1535 int dummy_int;
1536
1537 /* Is STMT a vectorizable conversion? */
1538
1539 /* FORNOW: unsupported in basic block SLP. */
1540 gcc_assert (loop_vinfo);
1541
1542 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1543 return false;
1544
1545 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
1546 return false;
1547
1548 if (!is_gimple_assign (stmt))
1549 return false;
1550
1551 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
1552 return false;
1553
1554 code = gimple_assign_rhs_code (stmt);
1555 if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
1556 return false;
1557
1558 /* Check types of lhs and rhs. */
1559 op0 = gimple_assign_rhs1 (stmt);
1560 rhs_type = TREE_TYPE (op0);
1561 vectype_in = get_vectype_for_scalar_type (rhs_type);
1562 if (!vectype_in)
1563 return false;
1564 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
1565
1566 scalar_dest = gimple_assign_lhs (stmt);
1567 lhs_type = TREE_TYPE (scalar_dest);
1568 vectype_out = get_vectype_for_scalar_type (lhs_type);
1569 if (!vectype_out)
1570 return false;
1571 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
1572
1573 /* FORNOW */
1574 if (nunits_in == nunits_out / 2)
1575 modifier = NARROW;
1576 else if (nunits_out == nunits_in)
1577 modifier = NONE;
1578 else if (nunits_out == nunits_in / 2)
1579 modifier = WIDEN;
1580 else
1581 return false;
1582
1583 if (modifier == NONE)
1584 gcc_assert (STMT_VINFO_VECTYPE (stmt_info) == vectype_out);
1585
1586 /* Bail out if the types are both integral or non-integral. */
1587 if ((INTEGRAL_TYPE_P (rhs_type) && INTEGRAL_TYPE_P (lhs_type))
1588 || (!INTEGRAL_TYPE_P (rhs_type) && !INTEGRAL_TYPE_P (lhs_type)))
1589 return false;
1590
1591 integral_type = INTEGRAL_TYPE_P (rhs_type) ? vectype_in : vectype_out;
1592
1593 if (modifier == NARROW)
1594 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
1595 else
1596 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
1597
1598 /* FORNOW: SLP with multiple types is not supported. The SLP analysis verifies
1599 this, so we can safely override NCOPIES with 1 here. */
1600 if (slp_node)
1601 ncopies = 1;
1602
1603 /* Sanity check: make sure that at least one copy of the vectorized stmt
1604 needs to be generated. */
1605 gcc_assert (ncopies >= 1);
1606
1607 /* Check the operands of the operation. */
1608 if (!vect_is_simple_use (op0, loop_vinfo, NULL, &def_stmt, &def, &dt[0]))
1609 {
1610 if (vect_print_dump_info (REPORT_DETAILS))
1611 fprintf (vect_dump, "use not simple.");
1612 return false;
1613 }
1614
1615 /* Supportable by target? */
1616 if ((modifier == NONE
1617 && !targetm.vectorize.builtin_conversion (code, integral_type))
1618 || (modifier == WIDEN
1619 && !supportable_widening_operation (code, stmt, vectype_in,
1620 &decl1, &decl2,
1621 &code1, &code2,
1622 &dummy_int, &dummy))
1623 || (modifier == NARROW
1624 && !supportable_narrowing_operation (code, stmt, vectype_in,
1625 &code1, &dummy_int, &dummy)))
1626 {
1627 if (vect_print_dump_info (REPORT_DETAILS))
1628 fprintf (vect_dump, "conversion not supported by target.");
1629 return false;
1630 }
1631
1632 if (modifier != NONE)
1633 {
1634 STMT_VINFO_VECTYPE (stmt_info) = vectype_in;
1635 /* FORNOW: SLP not supported. */
1636 if (STMT_SLP_TYPE (stmt_info))
1637 return false;
1638 }
1639
1640 if (!vec_stmt) /* transformation not required. */
1641 {
1642 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
1643 return true;
1644 }
1645
1646 /** Transform. **/
1647 if (vect_print_dump_info (REPORT_DETAILS))
1648 fprintf (vect_dump, "transform conversion.");
1649
1650 /* Handle def. */
1651 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
1652
1653 if (modifier == NONE && !slp_node)
1654 vec_oprnds0 = VEC_alloc (tree, heap, 1);
1655
1656 prev_stmt_info = NULL;
1657 switch (modifier)
1658 {
1659 case NONE:
1660 for (j = 0; j < ncopies; j++)
1661 {
1662 if (j == 0)
1663 vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node);
1664 else
1665 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL);
1666
1667 builtin_decl =
1668 targetm.vectorize.builtin_conversion (code, integral_type);
1669 for (i = 0; VEC_iterate (tree, vec_oprnds0, i, vop0); i++)
1670 {
1671 /* Arguments are ready. create the new vector stmt. */
1672 new_stmt = gimple_build_call (builtin_decl, 1, vop0);
1673 new_temp = make_ssa_name (vec_dest, new_stmt);
1674 gimple_call_set_lhs (new_stmt, new_temp);
1675 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1676 if (slp_node)
1677 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
1678 }
1679
1680 if (j == 0)
1681 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
1682 else
1683 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1684 prev_stmt_info = vinfo_for_stmt (new_stmt);
1685 }
1686 break;
1687
1688 case WIDEN:
1689 /* In case the vectorization factor (VF) is bigger than the number
1690 of elements that we can fit in a vectype (nunits), we have to
1691 generate more than one vector stmt - i.e - we need to "unroll"
1692 the vector stmt by a factor VF/nunits. */
1693 for (j = 0; j < ncopies; j++)
1694 {
1695 if (j == 0)
1696 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
1697 else
1698 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
1699
1700 STMT_VINFO_VECTYPE (stmt_info) = vectype_in;
1701
1702 /* Generate first half of the widened result: */
1703 new_stmt
1704 = vect_gen_widened_results_half (code1, decl1,
1705 vec_oprnd0, vec_oprnd1,
1706 unary_op, vec_dest, gsi, stmt);
1707 if (j == 0)
1708 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
1709 else
1710 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1711 prev_stmt_info = vinfo_for_stmt (new_stmt);
1712
1713 /* Generate second half of the widened result: */
1714 new_stmt
1715 = vect_gen_widened_results_half (code2, decl2,
1716 vec_oprnd0, vec_oprnd1,
1717 unary_op, vec_dest, gsi, stmt);
1718 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1719 prev_stmt_info = vinfo_for_stmt (new_stmt);
1720 }
1721 break;
1722
1723 case NARROW:
1724 /* In case the vectorization factor (VF) is bigger than the number
1725 of elements that we can fit in a vectype (nunits), we have to
1726 generate more than one vector stmt - i.e - we need to "unroll"
1727 the vector stmt by a factor VF/nunits. */
1728 for (j = 0; j < ncopies; j++)
1729 {
1730 /* Handle uses. */
1731 if (j == 0)
1732 {
1733 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
1734 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
1735 }
1736 else
1737 {
1738 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd1);
1739 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
1740 }
1741
1742 /* Arguments are ready. Create the new vector stmt. */
1743 expr = build2 (code1, vectype_out, vec_oprnd0, vec_oprnd1);
1744 new_stmt = gimple_build_assign_with_ops (code1, vec_dest, vec_oprnd0,
1745 vec_oprnd1);
1746 new_temp = make_ssa_name (vec_dest, new_stmt);
1747 gimple_assign_set_lhs (new_stmt, new_temp);
1748 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1749
1750 if (j == 0)
1751 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
1752 else
1753 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1754
1755 prev_stmt_info = vinfo_for_stmt (new_stmt);
1756 }
1757
1758 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
1759 }
1760
1761 if (vec_oprnds0)
1762 VEC_free (tree, heap, vec_oprnds0);
1763
1764 return true;
1765 }
1766 /* Function vectorizable_assignment.
1767
1768 Check if STMT performs an assignment (copy) that can be vectorized.
1769 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1770 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1771 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1772
1773 static bool
1774 vectorizable_assignment (gimple stmt, gimple_stmt_iterator *gsi,
1775 gimple *vec_stmt, slp_tree slp_node)
1776 {
1777 tree vec_dest;
1778 tree scalar_dest;
1779 tree op;
1780 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1781 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1782 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1783 tree new_temp;
1784 tree def;
1785 gimple def_stmt;
1786 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
1787 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
1788 int ncopies;
1789 int i;
1790 VEC(tree,heap) *vec_oprnds = NULL;
1791 tree vop;
1792 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
1793
1794 /* Multiple types in SLP are handled by creating the appropriate number of
1795 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
1796 case of SLP. */
1797 if (slp_node)
1798 ncopies = 1;
1799 else
1800 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
1801
1802 gcc_assert (ncopies >= 1);
1803 if (ncopies > 1)
1804 return false; /* FORNOW */
1805
1806 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
1807 return false;
1808
1809 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
1810 return false;
1811
1812 /* Is vectorizable assignment? */
1813 if (!is_gimple_assign (stmt))
1814 return false;
1815
1816 scalar_dest = gimple_assign_lhs (stmt);
1817 if (TREE_CODE (scalar_dest) != SSA_NAME)
1818 return false;
1819
1820 if (gimple_assign_single_p (stmt)
1821 || gimple_assign_rhs_code (stmt) == PAREN_EXPR)
1822 op = gimple_assign_rhs1 (stmt);
1823 else
1824 return false;
1825
1826 if (!vect_is_simple_use (op, loop_vinfo, bb_vinfo, &def_stmt, &def, &dt[0]))
1827 {
1828 if (vect_print_dump_info (REPORT_DETAILS))
1829 fprintf (vect_dump, "use not simple.");
1830 return false;
1831 }
1832
1833 if (!vec_stmt) /* transformation not required. */
1834 {
1835 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
1836 if (vect_print_dump_info (REPORT_DETAILS))
1837 fprintf (vect_dump, "=== vectorizable_assignment ===");
1838 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
1839 return true;
1840 }
1841
1842 /** Transform. **/
1843 if (vect_print_dump_info (REPORT_DETAILS))
1844 fprintf (vect_dump, "transform assignment.");
1845
1846 /* Handle def. */
1847 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1848
1849 /* Handle use. */
1850 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node);
1851
1852 /* Arguments are ready. create the new vector stmt. */
1853 for (i = 0; VEC_iterate (tree, vec_oprnds, i, vop); i++)
1854 {
1855 *vec_stmt = gimple_build_assign (vec_dest, vop);
1856 new_temp = make_ssa_name (vec_dest, *vec_stmt);
1857 gimple_assign_set_lhs (*vec_stmt, new_temp);
1858 vect_finish_stmt_generation (stmt, *vec_stmt, gsi);
1859 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt;
1860
1861 if (slp_node)
1862 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), *vec_stmt);
1863 }
1864
1865 VEC_free (tree, heap, vec_oprnds);
1866 return true;
1867 }
1868
1869 /* Function vectorizable_operation.
1870
1871 Check if STMT performs a binary or unary operation that can be vectorized.
1872 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1873 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1874 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1875
1876 static bool
1877 vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
1878 gimple *vec_stmt, slp_tree slp_node)
1879 {
1880 tree vec_dest;
1881 tree scalar_dest;
1882 tree op0, op1 = NULL;
1883 tree vec_oprnd1 = NULL_TREE;
1884 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1885 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1886 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1887 enum tree_code code;
1888 enum machine_mode vec_mode;
1889 tree new_temp;
1890 int op_type;
1891 optab optab;
1892 int icode;
1893 enum machine_mode optab_op2_mode;
1894 tree def;
1895 gimple def_stmt;
1896 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
1897 gimple new_stmt = NULL;
1898 stmt_vec_info prev_stmt_info;
1899 int nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
1900 int nunits_out;
1901 tree vectype_out;
1902 int ncopies;
1903 int j, i;
1904 VEC(tree,heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL;
1905 tree vop0, vop1;
1906 unsigned int k;
1907 bool shift_p = false;
1908 bool scalar_shift_arg = false;
1909 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
1910 int vf;
1911
1912 if (loop_vinfo)
1913 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1914 else
1915 /* FORNOW: multiple types are not supported in basic block SLP. */
1916 vf = nunits_in;
1917
1918 /* Multiple types in SLP are handled by creating the appropriate number of
1919 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
1920 case of SLP. */
1921 if (slp_node)
1922 ncopies = 1;
1923 else
1924 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
1925
1926 gcc_assert (ncopies >= 1);
1927
1928 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
1929 return false;
1930
1931 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
1932 return false;
1933
1934 /* Is STMT a vectorizable binary/unary operation? */
1935 if (!is_gimple_assign (stmt))
1936 return false;
1937
1938 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
1939 return false;
1940
1941 scalar_dest = gimple_assign_lhs (stmt);
1942 vectype_out = get_vectype_for_scalar_type (TREE_TYPE (scalar_dest));
1943 if (!vectype_out)
1944 return false;
1945 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
1946 if (nunits_out != nunits_in)
1947 return false;
1948
1949 code = gimple_assign_rhs_code (stmt);
1950
1951 /* For pointer addition, we should use the normal plus for
1952 the vector addition. */
1953 if (code == POINTER_PLUS_EXPR)
1954 code = PLUS_EXPR;
1955
1956 /* Support only unary or binary operations. */
1957 op_type = TREE_CODE_LENGTH (code);
1958 if (op_type != unary_op && op_type != binary_op)
1959 {
1960 if (vect_print_dump_info (REPORT_DETAILS))
1961 fprintf (vect_dump, "num. args = %d (not unary/binary op).", op_type);
1962 return false;
1963 }
1964
1965 op0 = gimple_assign_rhs1 (stmt);
1966 if (!vect_is_simple_use (op0, loop_vinfo, bb_vinfo, &def_stmt, &def, &dt[0]))
1967 {
1968 if (vect_print_dump_info (REPORT_DETAILS))
1969 fprintf (vect_dump, "use not simple.");
1970 return false;
1971 }
1972
1973 if (op_type == binary_op)
1974 {
1975 op1 = gimple_assign_rhs2 (stmt);
1976 if (!vect_is_simple_use (op1, loop_vinfo, bb_vinfo, &def_stmt, &def,
1977 &dt[1]))
1978 {
1979 if (vect_print_dump_info (REPORT_DETAILS))
1980 fprintf (vect_dump, "use not simple.");
1981 return false;
1982 }
1983 }
1984
1985 /* If this is a shift/rotate, determine whether the shift amount is a vector,
1986 or scalar. If the shift/rotate amount is a vector, use the vector/vector
1987 shift optabs. */
1988 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
1989 || code == RROTATE_EXPR)
1990 {
1991 shift_p = true;
1992
1993 /* vector shifted by vector */
1994 if (dt[1] == vect_internal_def)
1995 {
1996 optab = optab_for_tree_code (code, vectype, optab_vector);
1997 if (vect_print_dump_info (REPORT_DETAILS))
1998 fprintf (vect_dump, "vector/vector shift/rotate found.");
1999 }
2000
2001 /* See if the machine has a vector shifted by scalar insn and if not
2002 then see if it has a vector shifted by vector insn */
2003 else if (dt[1] == vect_constant_def || dt[1] == vect_external_def)
2004 {
2005 optab = optab_for_tree_code (code, vectype, optab_scalar);
2006 if (optab
2007 && (optab_handler (optab, TYPE_MODE (vectype))->insn_code
2008 != CODE_FOR_nothing))
2009 {
2010 scalar_shift_arg = true;
2011 if (vect_print_dump_info (REPORT_DETAILS))
2012 fprintf (vect_dump, "vector/scalar shift/rotate found.");
2013 }
2014 else
2015 {
2016 optab = optab_for_tree_code (code, vectype, optab_vector);
2017 if (optab
2018 && (optab_handler (optab, TYPE_MODE (vectype))->insn_code
2019 != CODE_FOR_nothing))
2020 {
2021 if (vect_print_dump_info (REPORT_DETAILS))
2022 fprintf (vect_dump, "vector/vector shift/rotate found.");
2023
2024 /* Unlike the other binary operators, shifts/rotates have
2025 the rhs being int, instead of the same type as the lhs,
2026 so make sure the scalar is the right type if we are
2027 dealing with vectors of short/char. */
2028 if (dt[1] == vect_constant_def)
2029 op1 = fold_convert (TREE_TYPE (vectype), op1);
2030 }
2031 }
2032 }
2033
2034 else
2035 {
2036 if (vect_print_dump_info (REPORT_DETAILS))
2037 fprintf (vect_dump, "operand mode requires invariant argument.");
2038 return false;
2039 }
2040 }
2041 else
2042 optab = optab_for_tree_code (code, vectype, optab_default);
2043
2044 /* Supportable by target? */
2045 if (!optab)
2046 {
2047 if (vect_print_dump_info (REPORT_DETAILS))
2048 fprintf (vect_dump, "no optab.");
2049 return false;
2050 }
2051 vec_mode = TYPE_MODE (vectype);
2052 icode = (int) optab_handler (optab, vec_mode)->insn_code;
2053 if (icode == CODE_FOR_nothing)
2054 {
2055 if (vect_print_dump_info (REPORT_DETAILS))
2056 fprintf (vect_dump, "op not supported by target.");
2057 /* Check only during analysis. */
2058 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
2059 || (vf < vect_min_worthwhile_factor (code)
2060 && !vec_stmt))
2061 return false;
2062 if (vect_print_dump_info (REPORT_DETAILS))
2063 fprintf (vect_dump, "proceeding using word mode.");
2064 }
2065
2066 /* Worthwhile without SIMD support? Check only during analysis. */
2067 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
2068 && vf < vect_min_worthwhile_factor (code)
2069 && !vec_stmt)
2070 {
2071 if (vect_print_dump_info (REPORT_DETAILS))
2072 fprintf (vect_dump, "not worthwhile without SIMD support.");
2073 return false;
2074 }
2075
2076 if (!vec_stmt) /* transformation not required. */
2077 {
2078 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
2079 if (vect_print_dump_info (REPORT_DETAILS))
2080 fprintf (vect_dump, "=== vectorizable_operation ===");
2081 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
2082 return true;
2083 }
2084
2085 /** Transform. **/
2086
2087 if (vect_print_dump_info (REPORT_DETAILS))
2088 fprintf (vect_dump, "transform binary/unary operation.");
2089
2090 /* Handle def. */
2091 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2092
2093 /* Allocate VECs for vector operands. In case of SLP, vector operands are
2094 created in the previous stages of the recursion, so no allocation is
2095 needed, except for the case of shift with scalar shift argument. In that
2096 case we store the scalar operand in VEC_OPRNDS1 for every vector stmt to
2097 be created to vectorize the SLP group, i.e., SLP_NODE->VEC_STMTS_SIZE.
2098 In case of loop-based vectorization we allocate VECs of size 1. We
2099 allocate VEC_OPRNDS1 only in case of binary operation. */
2100 if (!slp_node)
2101 {
2102 vec_oprnds0 = VEC_alloc (tree, heap, 1);
2103 if (op_type == binary_op)
2104 vec_oprnds1 = VEC_alloc (tree, heap, 1);
2105 }
2106 else if (scalar_shift_arg)
2107 vec_oprnds1 = VEC_alloc (tree, heap, slp_node->vec_stmts_size);
2108
2109 /* In case the vectorization factor (VF) is bigger than the number
2110 of elements that we can fit in a vectype (nunits), we have to generate
2111 more than one vector stmt - i.e - we need to "unroll" the
2112 vector stmt by a factor VF/nunits. In doing so, we record a pointer
2113 from one copy of the vector stmt to the next, in the field
2114 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
2115 stages to find the correct vector defs to be used when vectorizing
2116 stmts that use the defs of the current stmt. The example below illustrates
2117 the vectorization process when VF=16 and nunits=4 (i.e - we need to create
2118 4 vectorized stmts):
2119
2120 before vectorization:
2121 RELATED_STMT VEC_STMT
2122 S1: x = memref - -
2123 S2: z = x + 1 - -
2124
2125 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
2126 there):
2127 RELATED_STMT VEC_STMT
2128 VS1_0: vx0 = memref0 VS1_1 -
2129 VS1_1: vx1 = memref1 VS1_2 -
2130 VS1_2: vx2 = memref2 VS1_3 -
2131 VS1_3: vx3 = memref3 - -
2132 S1: x = load - VS1_0
2133 S2: z = x + 1 - -
2134
2135 step2: vectorize stmt S2 (done here):
2136 To vectorize stmt S2 we first need to find the relevant vector
2137 def for the first operand 'x'. This is, as usual, obtained from
2138 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
2139 that defines 'x' (S1). This way we find the stmt VS1_0, and the
2140 relevant vector def 'vx0'. Having found 'vx0' we can generate
2141 the vector stmt VS2_0, and as usual, record it in the
2142 STMT_VINFO_VEC_STMT of stmt S2.
2143 When creating the second copy (VS2_1), we obtain the relevant vector
2144 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
2145 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
2146 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
2147 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
2148 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
2149 chain of stmts and pointers:
2150 RELATED_STMT VEC_STMT
2151 VS1_0: vx0 = memref0 VS1_1 -
2152 VS1_1: vx1 = memref1 VS1_2 -
2153 VS1_2: vx2 = memref2 VS1_3 -
2154 VS1_3: vx3 = memref3 - -
2155 S1: x = load - VS1_0
2156 VS2_0: vz0 = vx0 + v1 VS2_1 -
2157 VS2_1: vz1 = vx1 + v1 VS2_2 -
2158 VS2_2: vz2 = vx2 + v1 VS2_3 -
2159 VS2_3: vz3 = vx3 + v1 - -
2160 S2: z = x + 1 - VS2_0 */
2161
2162 prev_stmt_info = NULL;
2163 for (j = 0; j < ncopies; j++)
2164 {
2165 /* Handle uses. */
2166 if (j == 0)
2167 {
2168 if (op_type == binary_op && scalar_shift_arg)
2169 {
2170 /* Vector shl and shr insn patterns can be defined with scalar
2171 operand 2 (shift operand). In this case, use constant or loop
2172 invariant op1 directly, without extending it to vector mode
2173 first. */
2174 optab_op2_mode = insn_data[icode].operand[2].mode;
2175 if (!VECTOR_MODE_P (optab_op2_mode))
2176 {
2177 if (vect_print_dump_info (REPORT_DETAILS))
2178 fprintf (vect_dump, "operand 1 using scalar mode.");
2179 vec_oprnd1 = op1;
2180 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
2181 if (slp_node)
2182 {
2183 /* Store vec_oprnd1 for every vector stmt to be created
2184 for SLP_NODE. We check during the analysis that all the
2185 shift arguments are the same.
2186 TODO: Allow different constants for different vector
2187 stmts generated for an SLP instance. */
2188 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
2189 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
2190 }
2191 }
2192 }
2193
2194 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
2195 (a special case for certain kind of vector shifts); otherwise,
2196 operand 1 should be of a vector type (the usual case). */
2197 if (op_type == binary_op && !vec_oprnd1)
2198 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
2199 slp_node);
2200 else
2201 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
2202 slp_node);
2203 }
2204 else
2205 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
2206
2207 /* Arguments are ready. Create the new vector stmt. */
2208 for (i = 0; VEC_iterate (tree, vec_oprnds0, i, vop0); i++)
2209 {
2210 vop1 = ((op_type == binary_op)
2211 ? VEC_index (tree, vec_oprnds1, i) : NULL);
2212 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vop0, vop1);
2213 new_temp = make_ssa_name (vec_dest, new_stmt);
2214 gimple_assign_set_lhs (new_stmt, new_temp);
2215 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2216 if (slp_node)
2217 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
2218 }
2219
2220 if (slp_node)
2221 continue;
2222
2223 if (j == 0)
2224 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2225 else
2226 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2227 prev_stmt_info = vinfo_for_stmt (new_stmt);
2228 }
2229
2230 VEC_free (tree, heap, vec_oprnds0);
2231 if (vec_oprnds1)
2232 VEC_free (tree, heap, vec_oprnds1);
2233
2234 return true;
2235 }
2236
2237
2238 /* Get vectorized definitions for loop-based vectorization. For the first
2239 operand we call vect_get_vec_def_for_operand() (with OPRND containing
2240 scalar operand), and for the rest we get a copy with
2241 vect_get_vec_def_for_stmt_copy() using the previous vector definition
2242 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
2243 The vectors are collected into VEC_OPRNDS. */
2244
2245 static void
2246 vect_get_loop_based_defs (tree *oprnd, gimple stmt, enum vect_def_type dt,
2247 VEC (tree, heap) **vec_oprnds, int multi_step_cvt)
2248 {
2249 tree vec_oprnd;
2250
2251 /* Get first vector operand. */
2252 /* All the vector operands except the very first one (that is scalar oprnd)
2253 are stmt copies. */
2254 if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
2255 vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt, NULL);
2256 else
2257 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, *oprnd);
2258
2259 VEC_quick_push (tree, *vec_oprnds, vec_oprnd);
2260
2261 /* Get second vector operand. */
2262 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
2263 VEC_quick_push (tree, *vec_oprnds, vec_oprnd);
2264
2265 *oprnd = vec_oprnd;
2266
2267 /* For conversion in multiple steps, continue to get operands
2268 recursively. */
2269 if (multi_step_cvt)
2270 vect_get_loop_based_defs (oprnd, stmt, dt, vec_oprnds, multi_step_cvt - 1);
2271 }
2272
2273
2274 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
2275 For multi-step conversions store the resulting vectors and call the function
2276 recursively. */
2277
2278 static void
2279 vect_create_vectorized_demotion_stmts (VEC (tree, heap) **vec_oprnds,
2280 int multi_step_cvt, gimple stmt,
2281 VEC (tree, heap) *vec_dsts,
2282 gimple_stmt_iterator *gsi,
2283 slp_tree slp_node, enum tree_code code,
2284 stmt_vec_info *prev_stmt_info)
2285 {
2286 unsigned int i;
2287 tree vop0, vop1, new_tmp, vec_dest;
2288 gimple new_stmt;
2289 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2290
2291 vec_dest = VEC_pop (tree, vec_dsts);
2292
2293 for (i = 0; i < VEC_length (tree, *vec_oprnds); i += 2)
2294 {
2295 /* Create demotion operation. */
2296 vop0 = VEC_index (tree, *vec_oprnds, i);
2297 vop1 = VEC_index (tree, *vec_oprnds, i + 1);
2298 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vop0, vop1);
2299 new_tmp = make_ssa_name (vec_dest, new_stmt);
2300 gimple_assign_set_lhs (new_stmt, new_tmp);
2301 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2302
2303 if (multi_step_cvt)
2304 /* Store the resulting vector for next recursive call. */
2305 VEC_replace (tree, *vec_oprnds, i/2, new_tmp);
2306 else
2307 {
2308 /* This is the last step of the conversion sequence. Store the
2309 vectors in SLP_NODE or in vector info of the scalar statement
2310 (or in STMT_VINFO_RELATED_STMT chain). */
2311 if (slp_node)
2312 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
2313 else
2314 {
2315 if (!*prev_stmt_info)
2316 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2317 else
2318 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt;
2319
2320 *prev_stmt_info = vinfo_for_stmt (new_stmt);
2321 }
2322 }
2323 }
2324
2325 /* For multi-step demotion operations we first generate demotion operations
2326 from the source type to the intermediate types, and then combine the
2327 results (stored in VEC_OPRNDS) in demotion operation to the destination
2328 type. */
2329 if (multi_step_cvt)
2330 {
2331 /* At each level of recursion we have have of the operands we had at the
2332 previous level. */
2333 VEC_truncate (tree, *vec_oprnds, (i+1)/2);
2334 vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
2335 stmt, vec_dsts, gsi, slp_node,
2336 code, prev_stmt_info);
2337 }
2338 }
2339
2340
2341 /* Function vectorizable_type_demotion
2342
2343 Check if STMT performs a binary or unary operation that involves
2344 type demotion, and if it can be vectorized.
2345 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2346 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2347 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2348
2349 static bool
2350 vectorizable_type_demotion (gimple stmt, gimple_stmt_iterator *gsi,
2351 gimple *vec_stmt, slp_tree slp_node)
2352 {
2353 tree vec_dest;
2354 tree scalar_dest;
2355 tree op0;
2356 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2357 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2358 enum tree_code code, code1 = ERROR_MARK;
2359 tree def;
2360 gimple def_stmt;
2361 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
2362 stmt_vec_info prev_stmt_info;
2363 int nunits_in;
2364 int nunits_out;
2365 tree vectype_out;
2366 int ncopies;
2367 int j, i;
2368 tree vectype_in;
2369 int multi_step_cvt = 0;
2370 VEC (tree, heap) *vec_oprnds0 = NULL;
2371 VEC (tree, heap) *vec_dsts = NULL, *interm_types = NULL, *tmp_vec_dsts = NULL;
2372 tree last_oprnd, intermediate_type;
2373
2374 /* FORNOW: not supported by basic block SLP vectorization. */
2375 gcc_assert (loop_vinfo);
2376
2377 if (!STMT_VINFO_RELEVANT_P (stmt_info))
2378 return false;
2379
2380 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2381 return false;
2382
2383 /* Is STMT a vectorizable type-demotion operation? */
2384 if (!is_gimple_assign (stmt))
2385 return false;
2386
2387 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
2388 return false;
2389
2390 code = gimple_assign_rhs_code (stmt);
2391 if (!CONVERT_EXPR_CODE_P (code))
2392 return false;
2393
2394 op0 = gimple_assign_rhs1 (stmt);
2395 vectype_in = get_vectype_for_scalar_type (TREE_TYPE (op0));
2396 if (!vectype_in)
2397 return false;
2398 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
2399
2400 scalar_dest = gimple_assign_lhs (stmt);
2401 vectype_out = get_vectype_for_scalar_type (TREE_TYPE (scalar_dest));
2402 if (!vectype_out)
2403 return false;
2404 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2405 if (nunits_in >= nunits_out)
2406 return false;
2407
2408 /* Multiple types in SLP are handled by creating the appropriate number of
2409 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2410 case of SLP. */
2411 if (slp_node)
2412 ncopies = 1;
2413 else
2414 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
2415 gcc_assert (ncopies >= 1);
2416
2417 if (! ((INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
2418 && INTEGRAL_TYPE_P (TREE_TYPE (op0)))
2419 || (SCALAR_FLOAT_TYPE_P (TREE_TYPE (scalar_dest))
2420 && SCALAR_FLOAT_TYPE_P (TREE_TYPE (op0))
2421 && CONVERT_EXPR_CODE_P (code))))
2422 return false;
2423
2424 /* Check the operands of the operation. */
2425 if (!vect_is_simple_use (op0, loop_vinfo, NULL, &def_stmt, &def, &dt[0]))
2426 {
2427 if (vect_print_dump_info (REPORT_DETAILS))
2428 fprintf (vect_dump, "use not simple.");
2429 return false;
2430 }
2431
2432 /* Supportable by target? */
2433 if (!supportable_narrowing_operation (code, stmt, vectype_in, &code1,
2434 &multi_step_cvt, &interm_types))
2435 return false;
2436
2437 STMT_VINFO_VECTYPE (stmt_info) = vectype_in;
2438
2439 if (!vec_stmt) /* transformation not required. */
2440 {
2441 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
2442 if (vect_print_dump_info (REPORT_DETAILS))
2443 fprintf (vect_dump, "=== vectorizable_demotion ===");
2444 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
2445 return true;
2446 }
2447
2448 /** Transform. **/
2449 if (vect_print_dump_info (REPORT_DETAILS))
2450 fprintf (vect_dump, "transform type demotion operation. ncopies = %d.",
2451 ncopies);
2452
2453 /* In case of multi-step demotion, we first generate demotion operations to
2454 the intermediate types, and then from that types to the final one.
2455 We create vector destinations for the intermediate type (TYPES) received
2456 from supportable_narrowing_operation, and store them in the correct order
2457 for future use in vect_create_vectorized_demotion_stmts(). */
2458 if (multi_step_cvt)
2459 vec_dsts = VEC_alloc (tree, heap, multi_step_cvt + 1);
2460 else
2461 vec_dsts = VEC_alloc (tree, heap, 1);
2462
2463 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
2464 VEC_quick_push (tree, vec_dsts, vec_dest);
2465
2466 if (multi_step_cvt)
2467 {
2468 for (i = VEC_length (tree, interm_types) - 1;
2469 VEC_iterate (tree, interm_types, i, intermediate_type); i--)
2470 {
2471 vec_dest = vect_create_destination_var (scalar_dest,
2472 intermediate_type);
2473 VEC_quick_push (tree, vec_dsts, vec_dest);
2474 }
2475 }
2476
2477 /* In case the vectorization factor (VF) is bigger than the number
2478 of elements that we can fit in a vectype (nunits), we have to generate
2479 more than one vector stmt - i.e - we need to "unroll" the
2480 vector stmt by a factor VF/nunits. */
2481 last_oprnd = op0;
2482 prev_stmt_info = NULL;
2483 for (j = 0; j < ncopies; j++)
2484 {
2485 /* Handle uses. */
2486 if (slp_node)
2487 vect_get_slp_defs (slp_node, &vec_oprnds0, NULL);
2488 else
2489 {
2490 VEC_free (tree, heap, vec_oprnds0);
2491 vec_oprnds0 = VEC_alloc (tree, heap,
2492 (multi_step_cvt ? vect_pow2 (multi_step_cvt) * 2 : 2));
2493 vect_get_loop_based_defs (&last_oprnd, stmt, dt[0], &vec_oprnds0,
2494 vect_pow2 (multi_step_cvt) - 1);
2495 }
2496
2497 /* Arguments are ready. Create the new vector stmts. */
2498 tmp_vec_dsts = VEC_copy (tree, heap, vec_dsts);
2499 vect_create_vectorized_demotion_stmts (&vec_oprnds0,
2500 multi_step_cvt, stmt, tmp_vec_dsts,
2501 gsi, slp_node, code1,
2502 &prev_stmt_info);
2503 }
2504
2505 VEC_free (tree, heap, vec_oprnds0);
2506 VEC_free (tree, heap, vec_dsts);
2507 VEC_free (tree, heap, tmp_vec_dsts);
2508 VEC_free (tree, heap, interm_types);
2509
2510 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2511 return true;
2512 }
2513
2514
2515 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
2516 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
2517 the resulting vectors and call the function recursively. */
2518
2519 static void
2520 vect_create_vectorized_promotion_stmts (VEC (tree, heap) **vec_oprnds0,
2521 VEC (tree, heap) **vec_oprnds1,
2522 int multi_step_cvt, gimple stmt,
2523 VEC (tree, heap) *vec_dsts,
2524 gimple_stmt_iterator *gsi,
2525 slp_tree slp_node, enum tree_code code1,
2526 enum tree_code code2, tree decl1,
2527 tree decl2, int op_type,
2528 stmt_vec_info *prev_stmt_info)
2529 {
2530 int i;
2531 tree vop0, vop1, new_tmp1, new_tmp2, vec_dest;
2532 gimple new_stmt1, new_stmt2;
2533 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2534 VEC (tree, heap) *vec_tmp;
2535
2536 vec_dest = VEC_pop (tree, vec_dsts);
2537 vec_tmp = VEC_alloc (tree, heap, VEC_length (tree, *vec_oprnds0) * 2);
2538
2539 for (i = 0; VEC_iterate (tree, *vec_oprnds0, i, vop0); i++)
2540 {
2541 if (op_type == binary_op)
2542 vop1 = VEC_index (tree, *vec_oprnds1, i);
2543 else
2544 vop1 = NULL_TREE;
2545
2546 /* Generate the two halves of promotion operation. */
2547 new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1,
2548 op_type, vec_dest, gsi, stmt);
2549 new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1,
2550 op_type, vec_dest, gsi, stmt);
2551 if (is_gimple_call (new_stmt1))
2552 {
2553 new_tmp1 = gimple_call_lhs (new_stmt1);
2554 new_tmp2 = gimple_call_lhs (new_stmt2);
2555 }
2556 else
2557 {
2558 new_tmp1 = gimple_assign_lhs (new_stmt1);
2559 new_tmp2 = gimple_assign_lhs (new_stmt2);
2560 }
2561
2562 if (multi_step_cvt)
2563 {
2564 /* Store the results for the recursive call. */
2565 VEC_quick_push (tree, vec_tmp, new_tmp1);
2566 VEC_quick_push (tree, vec_tmp, new_tmp2);
2567 }
2568 else
2569 {
2570 /* Last step of promotion sequience - store the results. */
2571 if (slp_node)
2572 {
2573 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt1);
2574 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt2);
2575 }
2576 else
2577 {
2578 if (!*prev_stmt_info)
2579 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt1;
2580 else
2581 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt1;
2582
2583 *prev_stmt_info = vinfo_for_stmt (new_stmt1);
2584 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt2;
2585 *prev_stmt_info = vinfo_for_stmt (new_stmt2);
2586 }
2587 }
2588 }
2589
2590 if (multi_step_cvt)
2591 {
2592 /* For multi-step promotion operation we first generate we call the
2593 function recurcively for every stage. We start from the input type,
2594 create promotion operations to the intermediate types, and then
2595 create promotions to the output type. */
2596 *vec_oprnds0 = VEC_copy (tree, heap, vec_tmp);
2597 VEC_free (tree, heap, vec_tmp);
2598 vect_create_vectorized_promotion_stmts (vec_oprnds0, vec_oprnds1,
2599 multi_step_cvt - 1, stmt,
2600 vec_dsts, gsi, slp_node, code1,
2601 code2, decl2, decl2, op_type,
2602 prev_stmt_info);
2603 }
2604 }
2605
2606
2607 /* Function vectorizable_type_promotion
2608
2609 Check if STMT performs a binary or unary operation that involves
2610 type promotion, and if it can be vectorized.
2611 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2612 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2613 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2614
2615 static bool
2616 vectorizable_type_promotion (gimple stmt, gimple_stmt_iterator *gsi,
2617 gimple *vec_stmt, slp_tree slp_node)
2618 {
2619 tree vec_dest;
2620 tree scalar_dest;
2621 tree op0, op1 = NULL;
2622 tree vec_oprnd0=NULL, vec_oprnd1=NULL;
2623 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2624 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2625 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
2626 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
2627 int op_type;
2628 tree def;
2629 gimple def_stmt;
2630 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
2631 stmt_vec_info prev_stmt_info;
2632 int nunits_in;
2633 int nunits_out;
2634 tree vectype_out;
2635 int ncopies;
2636 int j, i;
2637 tree vectype_in;
2638 tree intermediate_type = NULL_TREE;
2639 int multi_step_cvt = 0;
2640 VEC (tree, heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL;
2641 VEC (tree, heap) *vec_dsts = NULL, *interm_types = NULL, *tmp_vec_dsts = NULL;
2642
2643 /* FORNOW: not supported by basic block SLP vectorization. */
2644 gcc_assert (loop_vinfo);
2645
2646 if (!STMT_VINFO_RELEVANT_P (stmt_info))
2647 return false;
2648
2649 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2650 return false;
2651
2652 /* Is STMT a vectorizable type-promotion operation? */
2653 if (!is_gimple_assign (stmt))
2654 return false;
2655
2656 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
2657 return false;
2658
2659 code = gimple_assign_rhs_code (stmt);
2660 if (!CONVERT_EXPR_CODE_P (code)
2661 && code != WIDEN_MULT_EXPR)
2662 return false;
2663
2664 op0 = gimple_assign_rhs1 (stmt);
2665 vectype_in = get_vectype_for_scalar_type (TREE_TYPE (op0));
2666 if (!vectype_in)
2667 return false;
2668 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
2669
2670 scalar_dest = gimple_assign_lhs (stmt);
2671 vectype_out = get_vectype_for_scalar_type (TREE_TYPE (scalar_dest));
2672 if (!vectype_out)
2673 return false;
2674 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2675 if (nunits_in <= nunits_out)
2676 return false;
2677
2678 /* Multiple types in SLP are handled by creating the appropriate number of
2679 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2680 case of SLP. */
2681 if (slp_node)
2682 ncopies = 1;
2683 else
2684 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
2685
2686 gcc_assert (ncopies >= 1);
2687
2688 if (! ((INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
2689 && INTEGRAL_TYPE_P (TREE_TYPE (op0)))
2690 || (SCALAR_FLOAT_TYPE_P (TREE_TYPE (scalar_dest))
2691 && SCALAR_FLOAT_TYPE_P (TREE_TYPE (op0))
2692 && CONVERT_EXPR_CODE_P (code))))
2693 return false;
2694
2695 /* Check the operands of the operation. */
2696 if (!vect_is_simple_use (op0, loop_vinfo, NULL, &def_stmt, &def, &dt[0]))
2697 {
2698 if (vect_print_dump_info (REPORT_DETAILS))
2699 fprintf (vect_dump, "use not simple.");
2700 return false;
2701 }
2702
2703 op_type = TREE_CODE_LENGTH (code);
2704 if (op_type == binary_op)
2705 {
2706 op1 = gimple_assign_rhs2 (stmt);
2707 if (!vect_is_simple_use (op1, loop_vinfo, NULL, &def_stmt, &def, &dt[1]))
2708 {
2709 if (vect_print_dump_info (REPORT_DETAILS))
2710 fprintf (vect_dump, "use not simple.");
2711 return false;
2712 }
2713 }
2714
2715 /* Supportable by target? */
2716 if (!supportable_widening_operation (code, stmt, vectype_in,
2717 &decl1, &decl2, &code1, &code2,
2718 &multi_step_cvt, &interm_types))
2719 return false;
2720
2721 /* Binary widening operation can only be supported directly by the
2722 architecture. */
2723 gcc_assert (!(multi_step_cvt && op_type == binary_op));
2724
2725 STMT_VINFO_VECTYPE (stmt_info) = vectype_in;
2726
2727 if (!vec_stmt) /* transformation not required. */
2728 {
2729 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
2730 if (vect_print_dump_info (REPORT_DETAILS))
2731 fprintf (vect_dump, "=== vectorizable_promotion ===");
2732 vect_model_simple_cost (stmt_info, 2*ncopies, dt, NULL);
2733 return true;
2734 }
2735
2736 /** Transform. **/
2737
2738 if (vect_print_dump_info (REPORT_DETAILS))
2739 fprintf (vect_dump, "transform type promotion operation. ncopies = %d.",
2740 ncopies);
2741
2742 /* Handle def. */
2743 /* In case of multi-step promotion, we first generate promotion operations
2744 to the intermediate types, and then from that types to the final one.
2745 We store vector destination in VEC_DSTS in the correct order for
2746 recursive creation of promotion operations in
2747 vect_create_vectorized_promotion_stmts(). Vector destinations are created
2748 according to TYPES recieved from supportable_widening_operation(). */
2749 if (multi_step_cvt)
2750 vec_dsts = VEC_alloc (tree, heap, multi_step_cvt + 1);
2751 else
2752 vec_dsts = VEC_alloc (tree, heap, 1);
2753
2754 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
2755 VEC_quick_push (tree, vec_dsts, vec_dest);
2756
2757 if (multi_step_cvt)
2758 {
2759 for (i = VEC_length (tree, interm_types) - 1;
2760 VEC_iterate (tree, interm_types, i, intermediate_type); i--)
2761 {
2762 vec_dest = vect_create_destination_var (scalar_dest,
2763 intermediate_type);
2764 VEC_quick_push (tree, vec_dsts, vec_dest);
2765 }
2766 }
2767
2768 if (!slp_node)
2769 {
2770 vec_oprnds0 = VEC_alloc (tree, heap,
2771 (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1));
2772 if (op_type == binary_op)
2773 vec_oprnds1 = VEC_alloc (tree, heap, 1);
2774 }
2775
2776 /* In case the vectorization factor (VF) is bigger than the number
2777 of elements that we can fit in a vectype (nunits), we have to generate
2778 more than one vector stmt - i.e - we need to "unroll" the
2779 vector stmt by a factor VF/nunits. */
2780
2781 prev_stmt_info = NULL;
2782 for (j = 0; j < ncopies; j++)
2783 {
2784 /* Handle uses. */
2785 if (j == 0)
2786 {
2787 if (slp_node)
2788 vect_get_slp_defs (slp_node, &vec_oprnds0, &vec_oprnds1);
2789 else
2790 {
2791 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
2792 VEC_quick_push (tree, vec_oprnds0, vec_oprnd0);
2793 if (op_type == binary_op)
2794 {
2795 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt, NULL);
2796 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
2797 }
2798 }
2799 }
2800 else
2801 {
2802 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
2803 VEC_replace (tree, vec_oprnds0, 0, vec_oprnd0);
2804 if (op_type == binary_op)
2805 {
2806 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd1);
2807 VEC_replace (tree, vec_oprnds1, 0, vec_oprnd1);
2808 }
2809 }
2810
2811 /* Arguments are ready. Create the new vector stmts. */
2812 tmp_vec_dsts = VEC_copy (tree, heap, vec_dsts);
2813 vect_create_vectorized_promotion_stmts (&vec_oprnds0, &vec_oprnds1,
2814 multi_step_cvt, stmt,
2815 tmp_vec_dsts,
2816 gsi, slp_node, code1, code2,
2817 decl1, decl2, op_type,
2818 &prev_stmt_info);
2819 }
2820
2821 VEC_free (tree, heap, vec_dsts);
2822 VEC_free (tree, heap, tmp_vec_dsts);
2823 VEC_free (tree, heap, interm_types);
2824 VEC_free (tree, heap, vec_oprnds0);
2825 VEC_free (tree, heap, vec_oprnds1);
2826
2827 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2828 return true;
2829 }
2830
2831
2832 /* Function vectorizable_store.
2833
2834 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
2835 can be vectorized.
2836 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2837 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2838 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2839
2840 static bool
2841 vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
2842 slp_tree slp_node)
2843 {
2844 tree scalar_dest;
2845 tree data_ref;
2846 tree op;
2847 tree vec_oprnd = NULL_TREE;
2848 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2849 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
2850 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2851 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2852 struct loop *loop = NULL;
2853 enum machine_mode vec_mode;
2854 tree dummy;
2855 enum dr_alignment_support alignment_support_scheme;
2856 tree def;
2857 gimple def_stmt;
2858 enum vect_def_type dt;
2859 stmt_vec_info prev_stmt_info = NULL;
2860 tree dataref_ptr = NULL_TREE;
2861 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
2862 int ncopies;
2863 int j;
2864 gimple next_stmt, first_stmt = NULL;
2865 bool strided_store = false;
2866 unsigned int group_size, i;
2867 VEC(tree,heap) *dr_chain = NULL, *oprnds = NULL, *result_chain = NULL;
2868 bool inv_p;
2869 VEC(tree,heap) *vec_oprnds = NULL;
2870 bool slp = (slp_node != NULL);
2871 stmt_vec_info first_stmt_vinfo;
2872 unsigned int vec_num;
2873 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2874
2875 if (loop_vinfo)
2876 loop = LOOP_VINFO_LOOP (loop_vinfo);
2877
2878 /* Multiple types in SLP are handled by creating the appropriate number of
2879 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2880 case of SLP. */
2881 if (slp)
2882 ncopies = 1;
2883 else
2884 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
2885
2886 gcc_assert (ncopies >= 1);
2887
2888 /* FORNOW. This restriction should be relaxed. */
2889 if (loop && nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
2890 {
2891 if (vect_print_dump_info (REPORT_DETAILS))
2892 fprintf (vect_dump, "multiple types in nested loop.");
2893 return false;
2894 }
2895
2896 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2897 return false;
2898
2899 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2900 return false;
2901
2902 /* Is vectorizable store? */
2903
2904 if (!is_gimple_assign (stmt))
2905 return false;
2906
2907 scalar_dest = gimple_assign_lhs (stmt);
2908 if (TREE_CODE (scalar_dest) != ARRAY_REF
2909 && TREE_CODE (scalar_dest) != INDIRECT_REF
2910 && TREE_CODE (scalar_dest) != COMPONENT_REF
2911 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
2912 && TREE_CODE (scalar_dest) != REALPART_EXPR)
2913 return false;
2914
2915 gcc_assert (gimple_assign_single_p (stmt));
2916 op = gimple_assign_rhs1 (stmt);
2917 if (!vect_is_simple_use (op, loop_vinfo, bb_vinfo, &def_stmt, &def, &dt))
2918 {
2919 if (vect_print_dump_info (REPORT_DETAILS))
2920 fprintf (vect_dump, "use not simple.");
2921 return false;
2922 }
2923
2924 /* The scalar rhs type needs to be trivially convertible to the vector
2925 component type. This should always be the case. */
2926 if (!useless_type_conversion_p (TREE_TYPE (vectype), TREE_TYPE (op)))
2927 {
2928 if (vect_print_dump_info (REPORT_DETAILS))
2929 fprintf (vect_dump, "??? operands of different types");
2930 return false;
2931 }
2932
2933 vec_mode = TYPE_MODE (vectype);
2934 /* FORNOW. In some cases can vectorize even if data-type not supported
2935 (e.g. - array initialization with 0). */
2936 if (optab_handler (mov_optab, (int)vec_mode)->insn_code == CODE_FOR_nothing)
2937 return false;
2938
2939 if (!STMT_VINFO_DATA_REF (stmt_info))
2940 return false;
2941
2942 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
2943 {
2944 strided_store = true;
2945 first_stmt = DR_GROUP_FIRST_DR (stmt_info);
2946 if (!vect_strided_store_supported (vectype)
2947 && !PURE_SLP_STMT (stmt_info) && !slp)
2948 return false;
2949
2950 if (first_stmt == stmt)
2951 {
2952 /* STMT is the leader of the group. Check the operands of all the
2953 stmts of the group. */
2954 next_stmt = DR_GROUP_NEXT_DR (stmt_info);
2955 while (next_stmt)
2956 {
2957 gcc_assert (gimple_assign_single_p (next_stmt));
2958 op = gimple_assign_rhs1 (next_stmt);
2959 if (!vect_is_simple_use (op, loop_vinfo, bb_vinfo, &def_stmt,
2960 &def, &dt))
2961 {
2962 if (vect_print_dump_info (REPORT_DETAILS))
2963 fprintf (vect_dump, "use not simple.");
2964 return false;
2965 }
2966 next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt));
2967 }
2968 }
2969 }
2970
2971 if (!vec_stmt) /* transformation not required. */
2972 {
2973 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
2974 vect_model_store_cost (stmt_info, ncopies, dt, NULL);
2975 return true;
2976 }
2977
2978 /** Transform. **/
2979
2980 if (strided_store)
2981 {
2982 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
2983 group_size = DR_GROUP_SIZE (vinfo_for_stmt (first_stmt));
2984
2985 DR_GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))++;
2986
2987 /* FORNOW */
2988 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt));
2989
2990 /* We vectorize all the stmts of the interleaving group when we
2991 reach the last stmt in the group. */
2992 if (DR_GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))
2993 < DR_GROUP_SIZE (vinfo_for_stmt (first_stmt))
2994 && !slp)
2995 {
2996 *vec_stmt = NULL;
2997 return true;
2998 }
2999
3000 if (slp)
3001 strided_store = false;
3002
3003 /* VEC_NUM is the number of vect stmts to be created for this group. */
3004 if (slp)
3005 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
3006 else
3007 vec_num = group_size;
3008 }
3009 else
3010 {
3011 first_stmt = stmt;
3012 first_dr = dr;
3013 group_size = vec_num = 1;
3014 first_stmt_vinfo = stmt_info;
3015 }
3016
3017 if (vect_print_dump_info (REPORT_DETAILS))
3018 fprintf (vect_dump, "transform store. ncopies = %d",ncopies);
3019
3020 dr_chain = VEC_alloc (tree, heap, group_size);
3021 oprnds = VEC_alloc (tree, heap, group_size);
3022
3023 alignment_support_scheme = vect_supportable_dr_alignment (first_dr);
3024 gcc_assert (alignment_support_scheme);
3025
3026 /* In case the vectorization factor (VF) is bigger than the number
3027 of elements that we can fit in a vectype (nunits), we have to generate
3028 more than one vector stmt - i.e - we need to "unroll" the
3029 vector stmt by a factor VF/nunits. For more details see documentation in
3030 vect_get_vec_def_for_copy_stmt. */
3031
3032 /* In case of interleaving (non-unit strided access):
3033
3034 S1: &base + 2 = x2
3035 S2: &base = x0
3036 S3: &base + 1 = x1
3037 S4: &base + 3 = x3
3038
3039 We create vectorized stores starting from base address (the access of the
3040 first stmt in the chain (S2 in the above example), when the last store stmt
3041 of the chain (S4) is reached:
3042
3043 VS1: &base = vx2
3044 VS2: &base + vec_size*1 = vx0
3045 VS3: &base + vec_size*2 = vx1
3046 VS4: &base + vec_size*3 = vx3
3047
3048 Then permutation statements are generated:
3049
3050 VS5: vx5 = VEC_INTERLEAVE_HIGH_EXPR < vx0, vx3 >
3051 VS6: vx6 = VEC_INTERLEAVE_LOW_EXPR < vx0, vx3 >
3052 ...
3053
3054 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
3055 (the order of the data-refs in the output of vect_permute_store_chain
3056 corresponds to the order of scalar stmts in the interleaving chain - see
3057 the documentation of vect_permute_store_chain()).
3058
3059 In case of both multiple types and interleaving, above vector stores and
3060 permutation stmts are created for every copy. The result vector stmts are
3061 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
3062 STMT_VINFO_RELATED_STMT for the next copies.
3063 */
3064
3065 prev_stmt_info = NULL;
3066 for (j = 0; j < ncopies; j++)
3067 {
3068 gimple new_stmt;
3069 gimple ptr_incr;
3070
3071 if (j == 0)
3072 {
3073 if (slp)
3074 {
3075 /* Get vectorized arguments for SLP_NODE. */
3076 vect_get_slp_defs (slp_node, &vec_oprnds, NULL);
3077
3078 vec_oprnd = VEC_index (tree, vec_oprnds, 0);
3079 }
3080 else
3081 {
3082 /* For interleaved stores we collect vectorized defs for all the
3083 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
3084 used as an input to vect_permute_store_chain(), and OPRNDS as
3085 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
3086
3087 If the store is not strided, GROUP_SIZE is 1, and DR_CHAIN and
3088 OPRNDS are of size 1. */
3089 next_stmt = first_stmt;
3090 for (i = 0; i < group_size; i++)
3091 {
3092 /* Since gaps are not supported for interleaved stores,
3093 GROUP_SIZE is the exact number of stmts in the chain.
3094 Therefore, NEXT_STMT can't be NULL_TREE. In case that
3095 there is no interleaving, GROUP_SIZE is 1, and only one
3096 iteration of the loop will be executed. */
3097 gcc_assert (next_stmt
3098 && gimple_assign_single_p (next_stmt));
3099 op = gimple_assign_rhs1 (next_stmt);
3100
3101 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt,
3102 NULL);
3103 VEC_quick_push(tree, dr_chain, vec_oprnd);
3104 VEC_quick_push(tree, oprnds, vec_oprnd);
3105 next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt));
3106 }
3107 }
3108
3109 /* We should have catched mismatched types earlier. */
3110 gcc_assert (useless_type_conversion_p (vectype,
3111 TREE_TYPE (vec_oprnd)));
3112 dataref_ptr = vect_create_data_ref_ptr (first_stmt, NULL, NULL_TREE,
3113 &dummy, &ptr_incr, false,
3114 &inv_p);
3115 gcc_assert (bb_vinfo || !inv_p);
3116 }
3117 else
3118 {
3119 /* For interleaved stores we created vectorized defs for all the
3120 defs stored in OPRNDS in the previous iteration (previous copy).
3121 DR_CHAIN is then used as an input to vect_permute_store_chain(),
3122 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
3123 next copy.
3124 If the store is not strided, GROUP_SIZE is 1, and DR_CHAIN and
3125 OPRNDS are of size 1. */
3126 for (i = 0; i < group_size; i++)
3127 {
3128 op = VEC_index (tree, oprnds, i);
3129 vect_is_simple_use (op, loop_vinfo, bb_vinfo, &def_stmt, &def,
3130 &dt);
3131 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, op);
3132 VEC_replace(tree, dr_chain, i, vec_oprnd);
3133 VEC_replace(tree, oprnds, i, vec_oprnd);
3134 }
3135 dataref_ptr =
3136 bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt, NULL_TREE);
3137 }
3138
3139 if (strided_store)
3140 {
3141 result_chain = VEC_alloc (tree, heap, group_size);
3142 /* Permute. */
3143 if (!vect_permute_store_chain (dr_chain, group_size, stmt, gsi,
3144 &result_chain))
3145 return false;
3146 }
3147
3148 next_stmt = first_stmt;
3149 for (i = 0; i < vec_num; i++)
3150 {
3151 if (i > 0)
3152 /* Bump the vector pointer. */
3153 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
3154 NULL_TREE);
3155
3156 if (slp)
3157 vec_oprnd = VEC_index (tree, vec_oprnds, i);
3158 else if (strided_store)
3159 /* For strided stores vectorized defs are interleaved in
3160 vect_permute_store_chain(). */
3161 vec_oprnd = VEC_index (tree, result_chain, i);
3162
3163 if (aligned_access_p (first_dr))
3164 data_ref = build_fold_indirect_ref (dataref_ptr);
3165 else
3166 {
3167 int mis = DR_MISALIGNMENT (first_dr);
3168 tree tmis = (mis == -1 ? size_zero_node : size_int (mis));
3169 tmis = size_binop (MULT_EXPR, tmis, size_int (BITS_PER_UNIT));
3170 data_ref = build2 (MISALIGNED_INDIRECT_REF, vectype, dataref_ptr, tmis);
3171 }
3172
3173 /* If accesses through a pointer to vectype do not alias the original
3174 memory reference we have a problem. This should never happen. */
3175 gcc_assert (alias_sets_conflict_p (get_alias_set (data_ref),
3176 get_alias_set (gimple_assign_lhs (stmt))));
3177
3178 /* Arguments are ready. Create the new vector stmt. */
3179 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
3180 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3181 mark_symbols_for_renaming (new_stmt);
3182
3183 if (slp)
3184 continue;
3185
3186 if (j == 0)
3187 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3188 else
3189 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3190
3191 prev_stmt_info = vinfo_for_stmt (new_stmt);
3192 next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt));
3193 if (!next_stmt)
3194 break;
3195 }
3196 }
3197
3198 VEC_free (tree, heap, dr_chain);
3199 VEC_free (tree, heap, oprnds);
3200 if (result_chain)
3201 VEC_free (tree, heap, result_chain);
3202
3203 return true;
3204 }
3205
3206 /* vectorizable_load.
3207
3208 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
3209 can be vectorized.
3210 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3211 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3212 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3213
3214 static bool
3215 vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
3216 slp_tree slp_node, slp_instance slp_node_instance)
3217 {
3218 tree scalar_dest;
3219 tree vec_dest = NULL;
3220 tree data_ref = NULL;
3221 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3222 stmt_vec_info prev_stmt_info;
3223 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3224 struct loop *loop = NULL;
3225 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
3226 bool nested_in_vect_loop = false;
3227 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
3228 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3229 tree new_temp;
3230 int mode;
3231 gimple new_stmt = NULL;
3232 tree dummy;
3233 enum dr_alignment_support alignment_support_scheme;
3234 tree dataref_ptr = NULL_TREE;
3235 gimple ptr_incr;
3236 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
3237 int ncopies;
3238 int i, j, group_size;
3239 tree msq = NULL_TREE, lsq;
3240 tree offset = NULL_TREE;
3241 tree realignment_token = NULL_TREE;
3242 gimple phi = NULL;
3243 VEC(tree,heap) *dr_chain = NULL;
3244 bool strided_load = false;
3245 gimple first_stmt;
3246 tree scalar_type;
3247 bool inv_p;
3248 bool compute_in_loop = false;
3249 struct loop *at_loop;
3250 int vec_num;
3251 bool slp = (slp_node != NULL);
3252 bool slp_perm = false;
3253 enum tree_code code;
3254 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3255 int vf;
3256
3257 if (loop_vinfo)
3258 {
3259 loop = LOOP_VINFO_LOOP (loop_vinfo);
3260 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
3261 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3262 }
3263 else
3264 /* FORNOW: multiple types are not supported in basic block SLP. */
3265 vf = nunits;
3266
3267 /* Multiple types in SLP are handled by creating the appropriate number of
3268 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3269 case of SLP. */
3270 if (slp)
3271 ncopies = 1;
3272 else
3273 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
3274
3275 gcc_assert (ncopies >= 1);
3276
3277 /* FORNOW. This restriction should be relaxed. */
3278 if (nested_in_vect_loop && ncopies > 1)
3279 {
3280 if (vect_print_dump_info (REPORT_DETAILS))
3281 fprintf (vect_dump, "multiple types in nested loop.");
3282 return false;
3283 }
3284
3285 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3286 return false;
3287
3288 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
3289 return false;
3290
3291 /* Is vectorizable load? */
3292 if (!is_gimple_assign (stmt))
3293 return false;
3294
3295 scalar_dest = gimple_assign_lhs (stmt);
3296 if (TREE_CODE (scalar_dest) != SSA_NAME)
3297 return false;
3298
3299 code = gimple_assign_rhs_code (stmt);
3300 if (code != ARRAY_REF
3301 && code != INDIRECT_REF
3302 && code != COMPONENT_REF
3303 && code != IMAGPART_EXPR
3304 && code != REALPART_EXPR)
3305 return false;
3306
3307 if (!STMT_VINFO_DATA_REF (stmt_info))
3308 return false;
3309
3310 scalar_type = TREE_TYPE (DR_REF (dr));
3311 mode = (int) TYPE_MODE (vectype);
3312
3313 /* FORNOW. In some cases can vectorize even if data-type not supported
3314 (e.g. - data copies). */
3315 if (optab_handler (mov_optab, mode)->insn_code == CODE_FOR_nothing)
3316 {
3317 if (vect_print_dump_info (REPORT_DETAILS))
3318 fprintf (vect_dump, "Aligned load, but unsupported type.");
3319 return false;
3320 }
3321
3322 /* The vector component type needs to be trivially convertible to the
3323 scalar lhs. This should always be the case. */
3324 if (!useless_type_conversion_p (TREE_TYPE (scalar_dest), TREE_TYPE (vectype)))
3325 {
3326 if (vect_print_dump_info (REPORT_DETAILS))
3327 fprintf (vect_dump, "??? operands of different types");
3328 return false;
3329 }
3330
3331 /* Check if the load is a part of an interleaving chain. */
3332 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
3333 {
3334 strided_load = true;
3335 /* FORNOW */
3336 gcc_assert (! nested_in_vect_loop);
3337
3338 /* Check if interleaving is supported. */
3339 if (!vect_strided_load_supported (vectype)
3340 && !PURE_SLP_STMT (stmt_info) && !slp)
3341 return false;
3342 }
3343
3344 if (!vec_stmt) /* transformation not required. */
3345 {
3346 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
3347 vect_model_load_cost (stmt_info, ncopies, NULL);
3348 return true;
3349 }
3350
3351 if (vect_print_dump_info (REPORT_DETAILS))
3352 fprintf (vect_dump, "transform load.");
3353
3354 /** Transform. **/
3355
3356 if (strided_load)
3357 {
3358 first_stmt = DR_GROUP_FIRST_DR (stmt_info);
3359 /* Check if the chain of loads is already vectorized. */
3360 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt)))
3361 {
3362 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
3363 return true;
3364 }
3365 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
3366 group_size = DR_GROUP_SIZE (vinfo_for_stmt (first_stmt));
3367
3368 /* VEC_NUM is the number of vect stmts to be created for this group. */
3369 if (slp)
3370 {
3371 strided_load = false;
3372 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
3373 if (SLP_INSTANCE_LOAD_PERMUTATION (slp_node_instance))
3374 slp_perm = true;
3375 }
3376 else
3377 vec_num = group_size;
3378
3379 dr_chain = VEC_alloc (tree, heap, vec_num);
3380 }
3381 else
3382 {
3383 first_stmt = stmt;
3384 first_dr = dr;
3385 group_size = vec_num = 1;
3386 }
3387
3388 alignment_support_scheme = vect_supportable_dr_alignment (first_dr);
3389 gcc_assert (alignment_support_scheme);
3390
3391 /* In case the vectorization factor (VF) is bigger than the number
3392 of elements that we can fit in a vectype (nunits), we have to generate
3393 more than one vector stmt - i.e - we need to "unroll" the
3394 vector stmt by a factor VF/nunits. In doing so, we record a pointer
3395 from one copy of the vector stmt to the next, in the field
3396 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
3397 stages to find the correct vector defs to be used when vectorizing
3398 stmts that use the defs of the current stmt. The example below illustrates
3399 the vectorization process when VF=16 and nunits=4 (i.e - we need to create
3400 4 vectorized stmts):
3401
3402 before vectorization:
3403 RELATED_STMT VEC_STMT
3404 S1: x = memref - -
3405 S2: z = x + 1 - -
3406
3407 step 1: vectorize stmt S1:
3408 We first create the vector stmt VS1_0, and, as usual, record a
3409 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
3410 Next, we create the vector stmt VS1_1, and record a pointer to
3411 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
3412 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
3413 stmts and pointers:
3414 RELATED_STMT VEC_STMT
3415 VS1_0: vx0 = memref0 VS1_1 -
3416 VS1_1: vx1 = memref1 VS1_2 -
3417 VS1_2: vx2 = memref2 VS1_3 -
3418 VS1_3: vx3 = memref3 - -
3419 S1: x = load - VS1_0
3420 S2: z = x + 1 - -
3421
3422 See in documentation in vect_get_vec_def_for_stmt_copy for how the
3423 information we recorded in RELATED_STMT field is used to vectorize
3424 stmt S2. */
3425
3426 /* In case of interleaving (non-unit strided access):
3427
3428 S1: x2 = &base + 2
3429 S2: x0 = &base
3430 S3: x1 = &base + 1
3431 S4: x3 = &base + 3
3432
3433 Vectorized loads are created in the order of memory accesses
3434 starting from the access of the first stmt of the chain:
3435
3436 VS1: vx0 = &base
3437 VS2: vx1 = &base + vec_size*1
3438 VS3: vx3 = &base + vec_size*2
3439 VS4: vx4 = &base + vec_size*3
3440
3441 Then permutation statements are generated:
3442
3443 VS5: vx5 = VEC_EXTRACT_EVEN_EXPR < vx0, vx1 >
3444 VS6: vx6 = VEC_EXTRACT_ODD_EXPR < vx0, vx1 >
3445 ...
3446
3447 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
3448 (the order of the data-refs in the output of vect_permute_load_chain
3449 corresponds to the order of scalar stmts in the interleaving chain - see
3450 the documentation of vect_permute_load_chain()).
3451 The generation of permutation stmts and recording them in
3452 STMT_VINFO_VEC_STMT is done in vect_transform_strided_load().
3453
3454 In case of both multiple types and interleaving, the vector loads and
3455 permutation stmts above are created for every copy. The result vector stmts
3456 are put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
3457 STMT_VINFO_RELATED_STMT for the next copies. */
3458
3459 /* If the data reference is aligned (dr_aligned) or potentially unaligned
3460 on a target that supports unaligned accesses (dr_unaligned_supported)
3461 we generate the following code:
3462 p = initial_addr;
3463 indx = 0;
3464 loop {
3465 p = p + indx * vectype_size;
3466 vec_dest = *(p);
3467 indx = indx + 1;
3468 }
3469
3470 Otherwise, the data reference is potentially unaligned on a target that
3471 does not support unaligned accesses (dr_explicit_realign_optimized) -
3472 then generate the following code, in which the data in each iteration is
3473 obtained by two vector loads, one from the previous iteration, and one
3474 from the current iteration:
3475 p1 = initial_addr;
3476 msq_init = *(floor(p1))
3477 p2 = initial_addr + VS - 1;
3478 realignment_token = call target_builtin;
3479 indx = 0;
3480 loop {
3481 p2 = p2 + indx * vectype_size
3482 lsq = *(floor(p2))
3483 vec_dest = realign_load (msq, lsq, realignment_token)
3484 indx = indx + 1;
3485 msq = lsq;
3486 } */
3487
3488 /* If the misalignment remains the same throughout the execution of the
3489 loop, we can create the init_addr and permutation mask at the loop
3490 preheader. Otherwise, it needs to be created inside the loop.
3491 This can only occur when vectorizing memory accesses in the inner-loop
3492 nested within an outer-loop that is being vectorized. */
3493
3494 if (loop && nested_in_vect_loop_p (loop, stmt)
3495 && (TREE_INT_CST_LOW (DR_STEP (dr))
3496 % GET_MODE_SIZE (TYPE_MODE (vectype)) != 0))
3497 {
3498 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
3499 compute_in_loop = true;
3500 }
3501
3502 if ((alignment_support_scheme == dr_explicit_realign_optimized
3503 || alignment_support_scheme == dr_explicit_realign)
3504 && !compute_in_loop)
3505 {
3506 msq = vect_setup_realignment (first_stmt, gsi, &realignment_token,
3507 alignment_support_scheme, NULL_TREE,
3508 &at_loop);
3509 if (alignment_support_scheme == dr_explicit_realign_optimized)
3510 {
3511 phi = SSA_NAME_DEF_STMT (msq);
3512 offset = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
3513 }
3514 }
3515 else
3516 at_loop = loop;
3517
3518 prev_stmt_info = NULL;
3519 for (j = 0; j < ncopies; j++)
3520 {
3521 /* 1. Create the vector pointer update chain. */
3522 if (j == 0)
3523 dataref_ptr = vect_create_data_ref_ptr (first_stmt,
3524 at_loop, offset,
3525 &dummy, &ptr_incr, false,
3526 &inv_p);
3527 else
3528 dataref_ptr =
3529 bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt, NULL_TREE);
3530
3531 for (i = 0; i < vec_num; i++)
3532 {
3533 if (i > 0)
3534 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
3535 NULL_TREE);
3536
3537 /* 2. Create the vector-load in the loop. */
3538 switch (alignment_support_scheme)
3539 {
3540 case dr_aligned:
3541 gcc_assert (aligned_access_p (first_dr));
3542 data_ref = build_fold_indirect_ref (dataref_ptr);
3543 break;
3544 case dr_unaligned_supported:
3545 {
3546 int mis = DR_MISALIGNMENT (first_dr);
3547 tree tmis = (mis == -1 ? size_zero_node : size_int (mis));
3548
3549 tmis = size_binop (MULT_EXPR, tmis, size_int(BITS_PER_UNIT));
3550 data_ref =
3551 build2 (MISALIGNED_INDIRECT_REF, vectype, dataref_ptr, tmis);
3552 break;
3553 }
3554 case dr_explicit_realign:
3555 {
3556 tree ptr, bump;
3557 tree vs_minus_1 = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
3558
3559 if (compute_in_loop)
3560 msq = vect_setup_realignment (first_stmt, gsi,
3561 &realignment_token,
3562 dr_explicit_realign,
3563 dataref_ptr, NULL);
3564
3565 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, dataref_ptr);
3566 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3567 new_stmt = gimple_build_assign (vec_dest, data_ref);
3568 new_temp = make_ssa_name (vec_dest, new_stmt);
3569 gimple_assign_set_lhs (new_stmt, new_temp);
3570 gimple_set_vdef (new_stmt, gimple_vdef (stmt));
3571 gimple_set_vuse (new_stmt, gimple_vuse (stmt));
3572 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3573 msq = new_temp;
3574
3575 bump = size_binop (MULT_EXPR, vs_minus_1,
3576 TYPE_SIZE_UNIT (scalar_type));
3577 ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump);
3578 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, ptr);
3579 break;
3580 }
3581 case dr_explicit_realign_optimized:
3582 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, dataref_ptr);
3583 break;
3584 default:
3585 gcc_unreachable ();
3586 }
3587 /* If accesses through a pointer to vectype do not alias the original
3588 memory reference we have a problem. This should never happen. */
3589 gcc_assert (alias_sets_conflict_p (get_alias_set (data_ref),
3590 get_alias_set (gimple_assign_rhs1 (stmt))));
3591 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3592 new_stmt = gimple_build_assign (vec_dest, data_ref);
3593 new_temp = make_ssa_name (vec_dest, new_stmt);
3594 gimple_assign_set_lhs (new_stmt, new_temp);
3595 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3596 mark_symbols_for_renaming (new_stmt);
3597
3598 /* 3. Handle explicit realignment if necessary/supported. Create in
3599 loop: vec_dest = realign_load (msq, lsq, realignment_token) */
3600 if (alignment_support_scheme == dr_explicit_realign_optimized
3601 || alignment_support_scheme == dr_explicit_realign)
3602 {
3603 tree tmp;
3604
3605 lsq = gimple_assign_lhs (new_stmt);
3606 if (!realignment_token)
3607 realignment_token = dataref_ptr;
3608 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3609 tmp = build3 (REALIGN_LOAD_EXPR, vectype, msq, lsq,
3610 realignment_token);
3611 new_stmt = gimple_build_assign (vec_dest, tmp);
3612 new_temp = make_ssa_name (vec_dest, new_stmt);
3613 gimple_assign_set_lhs (new_stmt, new_temp);
3614 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3615
3616 if (alignment_support_scheme == dr_explicit_realign_optimized)
3617 {
3618 gcc_assert (phi);
3619 if (i == vec_num - 1 && j == ncopies - 1)
3620 add_phi_arg (phi, lsq, loop_latch_edge (containing_loop));
3621 msq = lsq;
3622 }
3623 }
3624
3625 /* 4. Handle invariant-load. */
3626 if (inv_p && !bb_vinfo)
3627 {
3628 gcc_assert (!strided_load);
3629 gcc_assert (nested_in_vect_loop_p (loop, stmt));
3630 if (j == 0)
3631 {
3632 int k;
3633 tree t = NULL_TREE;
3634 tree vec_inv, bitpos, bitsize = TYPE_SIZE (scalar_type);
3635
3636 /* CHECKME: bitpos depends on endianess? */
3637 bitpos = bitsize_zero_node;
3638 vec_inv = build3 (BIT_FIELD_REF, scalar_type, new_temp,
3639 bitsize, bitpos);
3640 vec_dest =
3641 vect_create_destination_var (scalar_dest, NULL_TREE);
3642 new_stmt = gimple_build_assign (vec_dest, vec_inv);
3643 new_temp = make_ssa_name (vec_dest, new_stmt);
3644 gimple_assign_set_lhs (new_stmt, new_temp);
3645 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3646
3647 for (k = nunits - 1; k >= 0; --k)
3648 t = tree_cons (NULL_TREE, new_temp, t);
3649 /* FIXME: use build_constructor directly. */
3650 vec_inv = build_constructor_from_list (vectype, t);
3651 new_temp = vect_init_vector (stmt, vec_inv, vectype, gsi);
3652 new_stmt = SSA_NAME_DEF_STMT (new_temp);
3653 }
3654 else
3655 gcc_unreachable (); /* FORNOW. */
3656 }
3657
3658 /* Collect vector loads and later create their permutation in
3659 vect_transform_strided_load (). */
3660 if (strided_load || slp_perm)
3661 VEC_quick_push (tree, dr_chain, new_temp);
3662
3663 /* Store vector loads in the corresponding SLP_NODE. */
3664 if (slp && !slp_perm)
3665 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
3666 }
3667
3668 if (slp && !slp_perm)
3669 continue;
3670
3671 if (slp_perm)
3672 {
3673 if (!vect_transform_slp_perm_load (stmt, dr_chain, gsi, vf,
3674 slp_node_instance, false))
3675 {
3676 VEC_free (tree, heap, dr_chain);
3677 return false;
3678 }
3679 }
3680 else
3681 {
3682 if (strided_load)
3683 {
3684 if (!vect_transform_strided_load (stmt, dr_chain, group_size, gsi))
3685 return false;
3686
3687 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
3688 VEC_free (tree, heap, dr_chain);
3689 dr_chain = VEC_alloc (tree, heap, group_size);
3690 }
3691 else
3692 {
3693 if (j == 0)
3694 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3695 else
3696 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3697 prev_stmt_info = vinfo_for_stmt (new_stmt);
3698 }
3699 }
3700 }
3701
3702 if (dr_chain)
3703 VEC_free (tree, heap, dr_chain);
3704
3705 return true;
3706 }
3707
3708 /* Function vect_is_simple_cond.
3709
3710 Input:
3711 LOOP - the loop that is being vectorized.
3712 COND - Condition that is checked for simple use.
3713
3714 Returns whether a COND can be vectorized. Checks whether
3715 condition operands are supportable using vec_is_simple_use. */
3716
3717 static bool
3718 vect_is_simple_cond (tree cond, loop_vec_info loop_vinfo)
3719 {
3720 tree lhs, rhs;
3721 tree def;
3722 enum vect_def_type dt;
3723
3724 if (!COMPARISON_CLASS_P (cond))
3725 return false;
3726
3727 lhs = TREE_OPERAND (cond, 0);
3728 rhs = TREE_OPERAND (cond, 1);
3729
3730 if (TREE_CODE (lhs) == SSA_NAME)
3731 {
3732 gimple lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
3733 if (!vect_is_simple_use (lhs, loop_vinfo, NULL, &lhs_def_stmt, &def,
3734 &dt))
3735 return false;
3736 }
3737 else if (TREE_CODE (lhs) != INTEGER_CST && TREE_CODE (lhs) != REAL_CST
3738 && TREE_CODE (lhs) != FIXED_CST)
3739 return false;
3740
3741 if (TREE_CODE (rhs) == SSA_NAME)
3742 {
3743 gimple rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
3744 if (!vect_is_simple_use (rhs, loop_vinfo, NULL, &rhs_def_stmt, &def,
3745 &dt))
3746 return false;
3747 }
3748 else if (TREE_CODE (rhs) != INTEGER_CST && TREE_CODE (rhs) != REAL_CST
3749 && TREE_CODE (rhs) != FIXED_CST)
3750 return false;
3751
3752 return true;
3753 }
3754
3755 /* vectorizable_condition.
3756
3757 Check if STMT is conditional modify expression that can be vectorized.
3758 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3759 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
3760 at BSI.
3761
3762 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3763
3764 static bool
3765 vectorizable_condition (gimple stmt, gimple_stmt_iterator *gsi,
3766 gimple *vec_stmt)
3767 {
3768 tree scalar_dest = NULL_TREE;
3769 tree vec_dest = NULL_TREE;
3770 tree op = NULL_TREE;
3771 tree cond_expr, then_clause, else_clause;
3772 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3773 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3774 tree vec_cond_lhs, vec_cond_rhs, vec_then_clause, vec_else_clause;
3775 tree vec_compare, vec_cond_expr;
3776 tree new_temp;
3777 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3778 enum machine_mode vec_mode;
3779 tree def;
3780 enum vect_def_type dt;
3781 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
3782 int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
3783 enum tree_code code;
3784
3785 /* FORNOW: unsupported in basic block SLP. */
3786 gcc_assert (loop_vinfo);
3787
3788 gcc_assert (ncopies >= 1);
3789 if (ncopies > 1)
3790 return false; /* FORNOW */
3791
3792 if (!STMT_VINFO_RELEVANT_P (stmt_info))
3793 return false;
3794
3795 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
3796 return false;
3797
3798 /* FORNOW: SLP not supported. */
3799 if (STMT_SLP_TYPE (stmt_info))
3800 return false;
3801
3802 /* FORNOW: not yet supported. */
3803 if (STMT_VINFO_LIVE_P (stmt_info))
3804 {
3805 if (vect_print_dump_info (REPORT_DETAILS))
3806 fprintf (vect_dump, "value used after loop.");
3807 return false;
3808 }
3809
3810 /* Is vectorizable conditional operation? */
3811 if (!is_gimple_assign (stmt))
3812 return false;
3813
3814 code = gimple_assign_rhs_code (stmt);
3815
3816 if (code != COND_EXPR)
3817 return false;
3818
3819 gcc_assert (gimple_assign_single_p (stmt));
3820 op = gimple_assign_rhs1 (stmt);
3821 cond_expr = TREE_OPERAND (op, 0);
3822 then_clause = TREE_OPERAND (op, 1);
3823 else_clause = TREE_OPERAND (op, 2);
3824
3825 if (!vect_is_simple_cond (cond_expr, loop_vinfo))
3826 return false;
3827
3828 /* We do not handle two different vector types for the condition
3829 and the values. */
3830 if (TREE_TYPE (TREE_OPERAND (cond_expr, 0)) != TREE_TYPE (vectype))
3831 return false;
3832
3833 if (TREE_CODE (then_clause) == SSA_NAME)
3834 {
3835 gimple then_def_stmt = SSA_NAME_DEF_STMT (then_clause);
3836 if (!vect_is_simple_use (then_clause, loop_vinfo, NULL,
3837 &then_def_stmt, &def, &dt))
3838 return false;
3839 }
3840 else if (TREE_CODE (then_clause) != INTEGER_CST
3841 && TREE_CODE (then_clause) != REAL_CST
3842 && TREE_CODE (then_clause) != FIXED_CST)
3843 return false;
3844
3845 if (TREE_CODE (else_clause) == SSA_NAME)
3846 {
3847 gimple else_def_stmt = SSA_NAME_DEF_STMT (else_clause);
3848 if (!vect_is_simple_use (else_clause, loop_vinfo, NULL,
3849 &else_def_stmt, &def, &dt))
3850 return false;
3851 }
3852 else if (TREE_CODE (else_clause) != INTEGER_CST
3853 && TREE_CODE (else_clause) != REAL_CST
3854 && TREE_CODE (else_clause) != FIXED_CST)
3855 return false;
3856
3857
3858 vec_mode = TYPE_MODE (vectype);
3859
3860 if (!vec_stmt)
3861 {
3862 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
3863 return expand_vec_cond_expr_p (op, vec_mode);
3864 }
3865
3866 /* Transform */
3867
3868 /* Handle def. */
3869 scalar_dest = gimple_assign_lhs (stmt);
3870 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3871
3872 /* Handle cond expr. */
3873 vec_cond_lhs =
3874 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0), stmt, NULL);
3875 vec_cond_rhs =
3876 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1), stmt, NULL);
3877 vec_then_clause = vect_get_vec_def_for_operand (then_clause, stmt, NULL);
3878 vec_else_clause = vect_get_vec_def_for_operand (else_clause, stmt, NULL);
3879
3880 /* Arguments are ready. Create the new vector stmt. */
3881 vec_compare = build2 (TREE_CODE (cond_expr), vectype,
3882 vec_cond_lhs, vec_cond_rhs);
3883 vec_cond_expr = build3 (VEC_COND_EXPR, vectype,
3884 vec_compare, vec_then_clause, vec_else_clause);
3885
3886 *vec_stmt = gimple_build_assign (vec_dest, vec_cond_expr);
3887 new_temp = make_ssa_name (vec_dest, *vec_stmt);
3888 gimple_assign_set_lhs (*vec_stmt, new_temp);
3889 vect_finish_stmt_generation (stmt, *vec_stmt, gsi);
3890
3891 return true;
3892 }
3893
3894
3895 /* Make sure the statement is vectorizable. */
3896
3897 bool
3898 vect_analyze_stmt (gimple stmt, bool *need_to_vectorize, slp_tree node)
3899 {
3900 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3901 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3902 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
3903 bool ok;
3904 HOST_WIDE_INT dummy;
3905 tree scalar_type, vectype;
3906
3907 if (vect_print_dump_info (REPORT_DETAILS))
3908 {
3909 fprintf (vect_dump, "==> examining statement: ");
3910 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
3911 }
3912
3913 if (gimple_has_volatile_ops (stmt))
3914 {
3915 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
3916 fprintf (vect_dump, "not vectorized: stmt has volatile operands");
3917
3918 return false;
3919 }
3920
3921 /* Skip stmts that do not need to be vectorized. In loops this is expected
3922 to include:
3923 - the COND_EXPR which is the loop exit condition
3924 - any LABEL_EXPRs in the loop
3925 - computations that are used only for array indexing or loop control.
3926 In basic blocks we only analyze statements that are a part of some SLP
3927 instance, therefore, all the statements are relevant. */
3928
3929 if (!STMT_VINFO_RELEVANT_P (stmt_info)
3930 && !STMT_VINFO_LIVE_P (stmt_info))
3931 {
3932 if (vect_print_dump_info (REPORT_DETAILS))
3933 fprintf (vect_dump, "irrelevant.");
3934
3935 return true;
3936 }
3937
3938 switch (STMT_VINFO_DEF_TYPE (stmt_info))
3939 {
3940 case vect_internal_def:
3941 break;
3942
3943 case vect_reduction_def:
3944 case vect_nested_cycle:
3945 gcc_assert (!bb_vinfo && (relevance == vect_used_in_outer
3946 || relevance == vect_used_in_outer_by_reduction
3947 || relevance == vect_unused_in_scope));
3948 break;
3949
3950 case vect_induction_def:
3951 case vect_constant_def:
3952 case vect_external_def:
3953 case vect_unknown_def_type:
3954 default:
3955 gcc_unreachable ();
3956 }
3957
3958 if (bb_vinfo)
3959 {
3960 gcc_assert (PURE_SLP_STMT (stmt_info));
3961
3962 scalar_type = vect_get_smallest_scalar_type (stmt, &dummy, &dummy);
3963 if (vect_print_dump_info (REPORT_DETAILS))
3964 {
3965 fprintf (vect_dump, "get vectype for scalar type: ");
3966 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
3967 }
3968
3969 vectype = get_vectype_for_scalar_type (scalar_type);
3970 if (!vectype)
3971 {
3972 if (vect_print_dump_info (REPORT_DETAILS))
3973 {
3974 fprintf (vect_dump, "not SLPed: unsupported data-type ");
3975 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
3976 }
3977 return false;
3978 }
3979
3980 if (vect_print_dump_info (REPORT_DETAILS))
3981 {
3982 fprintf (vect_dump, "vectype: ");
3983 print_generic_expr (vect_dump, vectype, TDF_SLIM);
3984 }
3985
3986 STMT_VINFO_VECTYPE (stmt_info) = vectype;
3987 }
3988
3989 if (STMT_VINFO_RELEVANT_P (stmt_info))
3990 {
3991 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))));
3992 gcc_assert (STMT_VINFO_VECTYPE (stmt_info));
3993 *need_to_vectorize = true;
3994 }
3995
3996 ok = true;
3997 if (!bb_vinfo
3998 && (STMT_VINFO_RELEVANT_P (stmt_info)
3999 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
4000 ok = (vectorizable_type_promotion (stmt, NULL, NULL, NULL)
4001 || vectorizable_type_demotion (stmt, NULL, NULL, NULL)
4002 || vectorizable_conversion (stmt, NULL, NULL, NULL)
4003 || vectorizable_operation (stmt, NULL, NULL, NULL)
4004 || vectorizable_assignment (stmt, NULL, NULL, NULL)
4005 || vectorizable_load (stmt, NULL, NULL, NULL, NULL)
4006 || vectorizable_call (stmt, NULL, NULL)
4007 || vectorizable_store (stmt, NULL, NULL, NULL)
4008 || vectorizable_condition (stmt, NULL, NULL)
4009 || vectorizable_reduction (stmt, NULL, NULL));
4010 else
4011 {
4012 if (bb_vinfo)
4013 ok = (vectorizable_operation (stmt, NULL, NULL, node)
4014 || vectorizable_assignment (stmt, NULL, NULL, node)
4015 || vectorizable_load (stmt, NULL, NULL, node, NULL)
4016 || vectorizable_store (stmt, NULL, NULL, node));
4017 }
4018
4019 if (!ok)
4020 {
4021 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
4022 {
4023 fprintf (vect_dump, "not vectorized: relevant stmt not ");
4024 fprintf (vect_dump, "supported: ");
4025 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
4026 }
4027
4028 return false;
4029 }
4030
4031 if (bb_vinfo)
4032 return true;
4033
4034 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
4035 need extra handling, except for vectorizable reductions. */
4036 if (STMT_VINFO_LIVE_P (stmt_info)
4037 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
4038 ok = vectorizable_live_operation (stmt, NULL, NULL);
4039
4040 if (!ok)
4041 {
4042 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
4043 {
4044 fprintf (vect_dump, "not vectorized: live stmt not ");
4045 fprintf (vect_dump, "supported: ");
4046 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
4047 }
4048
4049 return false;
4050 }
4051
4052 if (!PURE_SLP_STMT (stmt_info))
4053 {
4054 /* Groups of strided accesses whose size is not a power of 2 are not
4055 vectorizable yet using loop-vectorization. Therefore, if this stmt
4056 feeds non-SLP-able stmts (i.e., this stmt has to be both SLPed and
4057 loop-based vectorized), the loop cannot be vectorized. */
4058 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
4059 && exact_log2 (DR_GROUP_SIZE (vinfo_for_stmt (
4060 DR_GROUP_FIRST_DR (stmt_info)))) == -1)
4061 {
4062 if (vect_print_dump_info (REPORT_DETAILS))
4063 {
4064 fprintf (vect_dump, "not vectorized: the size of group "
4065 "of strided accesses is not a power of 2");
4066 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
4067 }
4068
4069 return false;
4070 }
4071 }
4072
4073 return true;
4074 }
4075
4076
4077 /* Function vect_transform_stmt.
4078
4079 Create a vectorized stmt to replace STMT, and insert it at BSI. */
4080
4081 bool
4082 vect_transform_stmt (gimple stmt, gimple_stmt_iterator *gsi,
4083 bool *strided_store, slp_tree slp_node,
4084 slp_instance slp_node_instance)
4085 {
4086 bool is_store = false;
4087 gimple vec_stmt = NULL;
4088 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4089 gimple orig_stmt_in_pattern;
4090 bool done;
4091
4092 switch (STMT_VINFO_TYPE (stmt_info))
4093 {
4094 case type_demotion_vec_info_type:
4095 done = vectorizable_type_demotion (stmt, gsi, &vec_stmt, slp_node);
4096 gcc_assert (done);
4097 break;
4098
4099 case type_promotion_vec_info_type:
4100 done = vectorizable_type_promotion (stmt, gsi, &vec_stmt, slp_node);
4101 gcc_assert (done);
4102 break;
4103
4104 case type_conversion_vec_info_type:
4105 done = vectorizable_conversion (stmt, gsi, &vec_stmt, slp_node);
4106 gcc_assert (done);
4107 break;
4108
4109 case induc_vec_info_type:
4110 gcc_assert (!slp_node);
4111 done = vectorizable_induction (stmt, gsi, &vec_stmt);
4112 gcc_assert (done);
4113 break;
4114
4115 case op_vec_info_type:
4116 done = vectorizable_operation (stmt, gsi, &vec_stmt, slp_node);
4117 gcc_assert (done);
4118 break;
4119
4120 case assignment_vec_info_type:
4121 done = vectorizable_assignment (stmt, gsi, &vec_stmt, slp_node);
4122 gcc_assert (done);
4123 break;
4124
4125 case load_vec_info_type:
4126 done = vectorizable_load (stmt, gsi, &vec_stmt, slp_node,
4127 slp_node_instance);
4128 gcc_assert (done);
4129 break;
4130
4131 case store_vec_info_type:
4132 done = vectorizable_store (stmt, gsi, &vec_stmt, slp_node);
4133 gcc_assert (done);
4134 if (STMT_VINFO_STRIDED_ACCESS (stmt_info) && !slp_node)
4135 {
4136 /* In case of interleaving, the whole chain is vectorized when the
4137 last store in the chain is reached. Store stmts before the last
4138 one are skipped, and there vec_stmt_info shouldn't be freed
4139 meanwhile. */
4140 *strided_store = true;
4141 if (STMT_VINFO_VEC_STMT (stmt_info))
4142 is_store = true;
4143 }
4144 else
4145 is_store = true;
4146 break;
4147
4148 case condition_vec_info_type:
4149 gcc_assert (!slp_node);
4150 done = vectorizable_condition (stmt, gsi, &vec_stmt);
4151 gcc_assert (done);
4152 break;
4153
4154 case call_vec_info_type:
4155 gcc_assert (!slp_node);
4156 done = vectorizable_call (stmt, gsi, &vec_stmt);
4157 break;
4158
4159 case reduc_vec_info_type:
4160 gcc_assert (!slp_node);
4161 done = vectorizable_reduction (stmt, gsi, &vec_stmt);
4162 gcc_assert (done);
4163 break;
4164
4165 default:
4166 if (!STMT_VINFO_LIVE_P (stmt_info))
4167 {
4168 if (vect_print_dump_info (REPORT_DETAILS))
4169 fprintf (vect_dump, "stmt not supported.");
4170 gcc_unreachable ();
4171 }
4172 }
4173
4174 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
4175 is being vectorized, but outside the immediately enclosing loop. */
4176 if (vec_stmt
4177 && STMT_VINFO_LOOP_VINFO (stmt_info)
4178 && nested_in_vect_loop_p (LOOP_VINFO_LOOP (
4179 STMT_VINFO_LOOP_VINFO (stmt_info)), stmt)
4180 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
4181 && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
4182 || STMT_VINFO_RELEVANT (stmt_info) ==
4183 vect_used_in_outer_by_reduction))
4184 {
4185 struct loop *innerloop = LOOP_VINFO_LOOP (
4186 STMT_VINFO_LOOP_VINFO (stmt_info))->inner;
4187 imm_use_iterator imm_iter;
4188 use_operand_p use_p;
4189 tree scalar_dest;
4190 gimple exit_phi;
4191
4192 if (vect_print_dump_info (REPORT_DETAILS))
4193 fprintf (vect_dump, "Record the vdef for outer-loop vectorization.");
4194
4195 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
4196 (to be used when vectorizing outer-loop stmts that use the DEF of
4197 STMT). */
4198 if (gimple_code (stmt) == GIMPLE_PHI)
4199 scalar_dest = PHI_RESULT (stmt);
4200 else
4201 scalar_dest = gimple_assign_lhs (stmt);
4202
4203 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
4204 {
4205 if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
4206 {
4207 exit_phi = USE_STMT (use_p);
4208 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi)) = vec_stmt;
4209 }
4210 }
4211 }
4212
4213 /* Handle stmts whose DEF is used outside the loop-nest that is
4214 being vectorized. */
4215 if (STMT_VINFO_LIVE_P (stmt_info)
4216 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
4217 {
4218 done = vectorizable_live_operation (stmt, gsi, &vec_stmt);
4219 gcc_assert (done);
4220 }
4221
4222 if (vec_stmt)
4223 {
4224 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
4225 orig_stmt_in_pattern = STMT_VINFO_RELATED_STMT (stmt_info);
4226 if (orig_stmt_in_pattern)
4227 {
4228 stmt_vec_info stmt_vinfo = vinfo_for_stmt (orig_stmt_in_pattern);
4229 /* STMT was inserted by the vectorizer to replace a computation idiom.
4230 ORIG_STMT_IN_PATTERN is a stmt in the original sequence that
4231 computed this idiom. We need to record a pointer to VEC_STMT in
4232 the stmt_info of ORIG_STMT_IN_PATTERN. See more details in the
4233 documentation of vect_pattern_recog. */
4234 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
4235 {
4236 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_vinfo) == stmt);
4237 STMT_VINFO_VEC_STMT (stmt_vinfo) = vec_stmt;
4238 }
4239 }
4240 }
4241
4242 return is_store;
4243 }
4244
4245
4246 /* Remove a group of stores (for SLP or interleaving), free their
4247 stmt_vec_info. */
4248
4249 void
4250 vect_remove_stores (gimple first_stmt)
4251 {
4252 gimple next = first_stmt;
4253 gimple tmp;
4254 gimple_stmt_iterator next_si;
4255
4256 while (next)
4257 {
4258 /* Free the attached stmt_vec_info and remove the stmt. */
4259 next_si = gsi_for_stmt (next);
4260 gsi_remove (&next_si, true);
4261 tmp = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
4262 free_stmt_vec_info (next);
4263 next = tmp;
4264 }
4265 }
4266
4267
4268 /* Function new_stmt_vec_info.
4269
4270 Create and initialize a new stmt_vec_info struct for STMT. */
4271
4272 stmt_vec_info
4273 new_stmt_vec_info (gimple stmt, loop_vec_info loop_vinfo,
4274 bb_vec_info bb_vinfo)
4275 {
4276 stmt_vec_info res;
4277 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
4278
4279 STMT_VINFO_TYPE (res) = undef_vec_info_type;
4280 STMT_VINFO_STMT (res) = stmt;
4281 STMT_VINFO_LOOP_VINFO (res) = loop_vinfo;
4282 STMT_VINFO_BB_VINFO (res) = bb_vinfo;
4283 STMT_VINFO_RELEVANT (res) = vect_unused_in_scope;
4284 STMT_VINFO_LIVE_P (res) = false;
4285 STMT_VINFO_VECTYPE (res) = NULL;
4286 STMT_VINFO_VEC_STMT (res) = NULL;
4287 STMT_VINFO_IN_PATTERN_P (res) = false;
4288 STMT_VINFO_RELATED_STMT (res) = NULL;
4289 STMT_VINFO_DATA_REF (res) = NULL;
4290
4291 STMT_VINFO_DR_BASE_ADDRESS (res) = NULL;
4292 STMT_VINFO_DR_OFFSET (res) = NULL;
4293 STMT_VINFO_DR_INIT (res) = NULL;
4294 STMT_VINFO_DR_STEP (res) = NULL;
4295 STMT_VINFO_DR_ALIGNED_TO (res) = NULL;
4296
4297 if (gimple_code (stmt) == GIMPLE_PHI
4298 && is_loop_header_bb_p (gimple_bb (stmt)))
4299 STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
4300 else
4301 STMT_VINFO_DEF_TYPE (res) = vect_internal_def;
4302
4303 STMT_VINFO_SAME_ALIGN_REFS (res) = VEC_alloc (dr_p, heap, 5);
4304 STMT_VINFO_INSIDE_OF_LOOP_COST (res) = 0;
4305 STMT_VINFO_OUTSIDE_OF_LOOP_COST (res) = 0;
4306 STMT_SLP_TYPE (res) = loop_vect;
4307 DR_GROUP_FIRST_DR (res) = NULL;
4308 DR_GROUP_NEXT_DR (res) = NULL;
4309 DR_GROUP_SIZE (res) = 0;
4310 DR_GROUP_STORE_COUNT (res) = 0;
4311 DR_GROUP_GAP (res) = 0;
4312 DR_GROUP_SAME_DR_STMT (res) = NULL;
4313 DR_GROUP_READ_WRITE_DEPENDENCE (res) = false;
4314
4315 return res;
4316 }
4317
4318
4319 /* Create a hash table for stmt_vec_info. */
4320
4321 void
4322 init_stmt_vec_info_vec (void)
4323 {
4324 gcc_assert (!stmt_vec_info_vec);
4325 stmt_vec_info_vec = VEC_alloc (vec_void_p, heap, 50);
4326 }
4327
4328
4329 /* Free hash table for stmt_vec_info. */
4330
4331 void
4332 free_stmt_vec_info_vec (void)
4333 {
4334 gcc_assert (stmt_vec_info_vec);
4335 VEC_free (vec_void_p, heap, stmt_vec_info_vec);
4336 }
4337
4338
4339 /* Free stmt vectorization related info. */
4340
4341 void
4342 free_stmt_vec_info (gimple stmt)
4343 {
4344 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4345
4346 if (!stmt_info)
4347 return;
4348
4349 VEC_free (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmt_info));
4350 set_vinfo_for_stmt (stmt, NULL);
4351 free (stmt_info);
4352 }
4353
4354
4355 /* Function get_vectype_for_scalar_type.
4356
4357 Returns the vector type corresponding to SCALAR_TYPE as supported
4358 by the target. */
4359
4360 tree
4361 get_vectype_for_scalar_type (tree scalar_type)
4362 {
4363 enum machine_mode inner_mode = TYPE_MODE (scalar_type);
4364 int nbytes = GET_MODE_SIZE (inner_mode);
4365 int nunits;
4366 tree vectype;
4367
4368 if (nbytes == 0 || nbytes >= UNITS_PER_SIMD_WORD (inner_mode))
4369 return NULL_TREE;
4370
4371 /* FORNOW: Only a single vector size per mode (UNITS_PER_SIMD_WORD)
4372 is expected. */
4373 nunits = UNITS_PER_SIMD_WORD (inner_mode) / nbytes;
4374
4375 vectype = build_vector_type (scalar_type, nunits);
4376 if (vect_print_dump_info (REPORT_DETAILS))
4377 {
4378 fprintf (vect_dump, "get vectype with %d units of type ", nunits);
4379 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
4380 }
4381
4382 if (!vectype)
4383 return NULL_TREE;
4384
4385 if (vect_print_dump_info (REPORT_DETAILS))
4386 {
4387 fprintf (vect_dump, "vectype: ");
4388 print_generic_expr (vect_dump, vectype, TDF_SLIM);
4389 }
4390
4391 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
4392 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
4393 {
4394 if (vect_print_dump_info (REPORT_DETAILS))
4395 fprintf (vect_dump, "mode not supported by target.");
4396 return NULL_TREE;
4397 }
4398
4399 return vectype;
4400 }
4401
4402 /* Function vect_is_simple_use.
4403
4404 Input:
4405 LOOP_VINFO - the vect info of the loop that is being vectorized.
4406 BB_VINFO - the vect info of the basic block that is being vectorized.
4407 OPERAND - operand of a stmt in the loop or bb.
4408 DEF - the defining stmt in case OPERAND is an SSA_NAME.
4409
4410 Returns whether a stmt with OPERAND can be vectorized.
4411 For loops, supportable operands are constants, loop invariants, and operands
4412 that are defined by the current iteration of the loop. Unsupportable
4413 operands are those that are defined by a previous iteration of the loop (as
4414 is the case in reduction/induction computations).
4415 For basic blocks, supportable operands are constants and bb invariants.
4416 For now, operands defined outside the basic block are not supported. */
4417
4418 bool
4419 vect_is_simple_use (tree operand, loop_vec_info loop_vinfo,
4420 bb_vec_info bb_vinfo, gimple *def_stmt,
4421 tree *def, enum vect_def_type *dt)
4422 {
4423 basic_block bb;
4424 stmt_vec_info stmt_vinfo;
4425 struct loop *loop = NULL;
4426
4427 if (loop_vinfo)
4428 loop = LOOP_VINFO_LOOP (loop_vinfo);
4429
4430 *def_stmt = NULL;
4431 *def = NULL_TREE;
4432
4433 if (vect_print_dump_info (REPORT_DETAILS))
4434 {
4435 fprintf (vect_dump, "vect_is_simple_use: operand ");
4436 print_generic_expr (vect_dump, operand, TDF_SLIM);
4437 }
4438
4439 if (TREE_CODE (operand) == INTEGER_CST || TREE_CODE (operand) == REAL_CST)
4440 {
4441 *dt = vect_constant_def;
4442 return true;
4443 }
4444
4445 if (is_gimple_min_invariant (operand))
4446 {
4447 *def = operand;
4448 *dt = vect_external_def;
4449 return true;
4450 }
4451
4452 if (TREE_CODE (operand) == PAREN_EXPR)
4453 {
4454 if (vect_print_dump_info (REPORT_DETAILS))
4455 fprintf (vect_dump, "non-associatable copy.");
4456 operand = TREE_OPERAND (operand, 0);
4457 }
4458
4459 if (TREE_CODE (operand) != SSA_NAME)
4460 {
4461 if (vect_print_dump_info (REPORT_DETAILS))
4462 fprintf (vect_dump, "not ssa-name.");
4463 return false;
4464 }
4465
4466 *def_stmt = SSA_NAME_DEF_STMT (operand);
4467 if (*def_stmt == NULL)
4468 {
4469 if (vect_print_dump_info (REPORT_DETAILS))
4470 fprintf (vect_dump, "no def_stmt.");
4471 return false;
4472 }
4473
4474 if (vect_print_dump_info (REPORT_DETAILS))
4475 {
4476 fprintf (vect_dump, "def_stmt: ");
4477 print_gimple_stmt (vect_dump, *def_stmt, 0, TDF_SLIM);
4478 }
4479
4480 /* Empty stmt is expected only in case of a function argument.
4481 (Otherwise - we expect a phi_node or a GIMPLE_ASSIGN). */
4482 if (gimple_nop_p (*def_stmt))
4483 {
4484 *def = operand;
4485 *dt = vect_external_def;
4486 return true;
4487 }
4488
4489 bb = gimple_bb (*def_stmt);
4490
4491 if ((loop && !flow_bb_inside_loop_p (loop, bb))
4492 || (!loop && bb != BB_VINFO_BB (bb_vinfo))
4493 || (!loop && gimple_code (*def_stmt) == GIMPLE_PHI))
4494 *dt = vect_external_def;
4495 else
4496 {
4497 stmt_vinfo = vinfo_for_stmt (*def_stmt);
4498 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
4499 }
4500
4501 if (*dt == vect_unknown_def_type)
4502 {
4503 if (vect_print_dump_info (REPORT_DETAILS))
4504 fprintf (vect_dump, "Unsupported pattern.");
4505 return false;
4506 }
4507
4508 if (vect_print_dump_info (REPORT_DETAILS))
4509 fprintf (vect_dump, "type of def: %d.",*dt);
4510
4511 switch (gimple_code (*def_stmt))
4512 {
4513 case GIMPLE_PHI:
4514 *def = gimple_phi_result (*def_stmt);
4515 break;
4516
4517 case GIMPLE_ASSIGN:
4518 *def = gimple_assign_lhs (*def_stmt);
4519 break;
4520
4521 case GIMPLE_CALL:
4522 *def = gimple_call_lhs (*def_stmt);
4523 if (*def != NULL)
4524 break;
4525 /* FALLTHRU */
4526 default:
4527 if (vect_print_dump_info (REPORT_DETAILS))
4528 fprintf (vect_dump, "unsupported defining stmt: ");
4529 return false;
4530 }
4531
4532 return true;
4533 }
4534
4535
4536 /* Function supportable_widening_operation
4537
4538 Check whether an operation represented by the code CODE is a
4539 widening operation that is supported by the target platform in
4540 vector form (i.e., when operating on arguments of type VECTYPE).
4541
4542 Widening operations we currently support are NOP (CONVERT), FLOAT
4543 and WIDEN_MULT. This function checks if these operations are supported
4544 by the target platform either directly (via vector tree-codes), or via
4545 target builtins.
4546
4547 Output:
4548 - CODE1 and CODE2 are codes of vector operations to be used when
4549 vectorizing the operation, if available.
4550 - DECL1 and DECL2 are decls of target builtin functions to be used
4551 when vectorizing the operation, if available. In this case,
4552 CODE1 and CODE2 are CALL_EXPR.
4553 - MULTI_STEP_CVT determines the number of required intermediate steps in
4554 case of multi-step conversion (like char->short->int - in that case
4555 MULTI_STEP_CVT will be 1).
4556 - INTERM_TYPES contains the intermediate type required to perform the
4557 widening operation (short in the above example). */
4558
4559 bool
4560 supportable_widening_operation (enum tree_code code, gimple stmt, tree vectype,
4561 tree *decl1, tree *decl2,
4562 enum tree_code *code1, enum tree_code *code2,
4563 int *multi_step_cvt,
4564 VEC (tree, heap) **interm_types)
4565 {
4566 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4567 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
4568 struct loop *vect_loop = LOOP_VINFO_LOOP (loop_info);
4569 bool ordered_p;
4570 enum machine_mode vec_mode;
4571 enum insn_code icode1, icode2;
4572 optab optab1, optab2;
4573 tree type = gimple_expr_type (stmt);
4574 tree wide_vectype = get_vectype_for_scalar_type (type);
4575 enum tree_code c1, c2;
4576
4577 /* The result of a vectorized widening operation usually requires two vectors
4578 (because the widened results do not fit int one vector). The generated
4579 vector results would normally be expected to be generated in the same
4580 order as in the original scalar computation, i.e. if 8 results are
4581 generated in each vector iteration, they are to be organized as follows:
4582 vect1: [res1,res2,res3,res4], vect2: [res5,res6,res7,res8].
4583
4584 However, in the special case that the result of the widening operation is
4585 used in a reduction computation only, the order doesn't matter (because
4586 when vectorizing a reduction we change the order of the computation).
4587 Some targets can take advantage of this and generate more efficient code.
4588 For example, targets like Altivec, that support widen_mult using a sequence
4589 of {mult_even,mult_odd} generate the following vectors:
4590 vect1: [res1,res3,res5,res7], vect2: [res2,res4,res6,res8].
4591
4592 When vectorizing outer-loops, we execute the inner-loop sequentially
4593 (each vectorized inner-loop iteration contributes to VF outer-loop
4594 iterations in parallel). We therefore don't allow to change the order
4595 of the computation in the inner-loop during outer-loop vectorization. */
4596
4597 if (STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
4598 && !nested_in_vect_loop_p (vect_loop, stmt))
4599 ordered_p = false;
4600 else
4601 ordered_p = true;
4602
4603 if (!ordered_p
4604 && code == WIDEN_MULT_EXPR
4605 && targetm.vectorize.builtin_mul_widen_even
4606 && targetm.vectorize.builtin_mul_widen_even (vectype)
4607 && targetm.vectorize.builtin_mul_widen_odd
4608 && targetm.vectorize.builtin_mul_widen_odd (vectype))
4609 {
4610 if (vect_print_dump_info (REPORT_DETAILS))
4611 fprintf (vect_dump, "Unordered widening operation detected.");
4612
4613 *code1 = *code2 = CALL_EXPR;
4614 *decl1 = targetm.vectorize.builtin_mul_widen_even (vectype);
4615 *decl2 = targetm.vectorize.builtin_mul_widen_odd (vectype);
4616 return true;
4617 }
4618
4619 switch (code)
4620 {
4621 case WIDEN_MULT_EXPR:
4622 if (BYTES_BIG_ENDIAN)
4623 {
4624 c1 = VEC_WIDEN_MULT_HI_EXPR;
4625 c2 = VEC_WIDEN_MULT_LO_EXPR;
4626 }
4627 else
4628 {
4629 c2 = VEC_WIDEN_MULT_HI_EXPR;
4630 c1 = VEC_WIDEN_MULT_LO_EXPR;
4631 }
4632 break;
4633
4634 CASE_CONVERT:
4635 if (BYTES_BIG_ENDIAN)
4636 {
4637 c1 = VEC_UNPACK_HI_EXPR;
4638 c2 = VEC_UNPACK_LO_EXPR;
4639 }
4640 else
4641 {
4642 c2 = VEC_UNPACK_HI_EXPR;
4643 c1 = VEC_UNPACK_LO_EXPR;
4644 }
4645 break;
4646
4647 case FLOAT_EXPR:
4648 if (BYTES_BIG_ENDIAN)
4649 {
4650 c1 = VEC_UNPACK_FLOAT_HI_EXPR;
4651 c2 = VEC_UNPACK_FLOAT_LO_EXPR;
4652 }
4653 else
4654 {
4655 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
4656 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
4657 }
4658 break;
4659
4660 case FIX_TRUNC_EXPR:
4661 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
4662 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
4663 computing the operation. */
4664 return false;
4665
4666 default:
4667 gcc_unreachable ();
4668 }
4669
4670 if (code == FIX_TRUNC_EXPR)
4671 {
4672 /* The signedness is determined from output operand. */
4673 optab1 = optab_for_tree_code (c1, type, optab_default);
4674 optab2 = optab_for_tree_code (c2, type, optab_default);
4675 }
4676 else
4677 {
4678 optab1 = optab_for_tree_code (c1, vectype, optab_default);
4679 optab2 = optab_for_tree_code (c2, vectype, optab_default);
4680 }
4681
4682 if (!optab1 || !optab2)
4683 return false;
4684
4685 vec_mode = TYPE_MODE (vectype);
4686 if ((icode1 = optab_handler (optab1, vec_mode)->insn_code) == CODE_FOR_nothing
4687 || (icode2 = optab_handler (optab2, vec_mode)->insn_code)
4688 == CODE_FOR_nothing)
4689 return false;
4690
4691 /* Check if it's a multi-step conversion that can be done using intermediate
4692 types. */
4693 if (insn_data[icode1].operand[0].mode != TYPE_MODE (wide_vectype)
4694 || insn_data[icode2].operand[0].mode != TYPE_MODE (wide_vectype))
4695 {
4696 int i;
4697 tree prev_type = vectype, intermediate_type;
4698 enum machine_mode intermediate_mode, prev_mode = vec_mode;
4699 optab optab3, optab4;
4700
4701 if (!CONVERT_EXPR_CODE_P (code))
4702 return false;
4703
4704 *code1 = c1;
4705 *code2 = c2;
4706
4707 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
4708 intermediate steps in promotion sequence. We try MAX_INTERM_CVT_STEPS
4709 to get to NARROW_VECTYPE, and fail if we do not. */
4710 *interm_types = VEC_alloc (tree, heap, MAX_INTERM_CVT_STEPS);
4711 for (i = 0; i < 3; i++)
4712 {
4713 intermediate_mode = insn_data[icode1].operand[0].mode;
4714 intermediate_type = lang_hooks.types.type_for_mode (intermediate_mode,
4715 TYPE_UNSIGNED (prev_type));
4716 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
4717 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
4718
4719 if (!optab3 || !optab4
4720 || (icode1 = optab1->handlers[(int) prev_mode].insn_code)
4721 == CODE_FOR_nothing
4722 || insn_data[icode1].operand[0].mode != intermediate_mode
4723 || (icode2 = optab2->handlers[(int) prev_mode].insn_code)
4724 == CODE_FOR_nothing
4725 || insn_data[icode2].operand[0].mode != intermediate_mode
4726 || (icode1 = optab3->handlers[(int) intermediate_mode].insn_code)
4727 == CODE_FOR_nothing
4728 || (icode2 = optab4->handlers[(int) intermediate_mode].insn_code)
4729 == CODE_FOR_nothing)
4730 return false;
4731
4732 VEC_quick_push (tree, *interm_types, intermediate_type);
4733 (*multi_step_cvt)++;
4734
4735 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
4736 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
4737 return true;
4738
4739 prev_type = intermediate_type;
4740 prev_mode = intermediate_mode;
4741 }
4742
4743 return false;
4744 }
4745
4746 *code1 = c1;
4747 *code2 = c2;
4748 return true;
4749 }
4750
4751
4752 /* Function supportable_narrowing_operation
4753
4754 Check whether an operation represented by the code CODE is a
4755 narrowing operation that is supported by the target platform in
4756 vector form (i.e., when operating on arguments of type VECTYPE).
4757
4758 Narrowing operations we currently support are NOP (CONVERT) and
4759 FIX_TRUNC. This function checks if these operations are supported by
4760 the target platform directly via vector tree-codes.
4761
4762 Output:
4763 - CODE1 is the code of a vector operation to be used when
4764 vectorizing the operation, if available.
4765 - MULTI_STEP_CVT determines the number of required intermediate steps in
4766 case of multi-step conversion (like int->short->char - in that case
4767 MULTI_STEP_CVT will be 1).
4768 - INTERM_TYPES contains the intermediate type required to perform the
4769 narrowing operation (short in the above example). */
4770
4771 bool
4772 supportable_narrowing_operation (enum tree_code code,
4773 const_gimple stmt, tree vectype,
4774 enum tree_code *code1, int *multi_step_cvt,
4775 VEC (tree, heap) **interm_types)
4776 {
4777 enum machine_mode vec_mode;
4778 enum insn_code icode1;
4779 optab optab1, interm_optab;
4780 tree type = gimple_expr_type (stmt);
4781 tree narrow_vectype = get_vectype_for_scalar_type (type);
4782 enum tree_code c1;
4783 tree intermediate_type, prev_type;
4784 int i;
4785
4786 switch (code)
4787 {
4788 CASE_CONVERT:
4789 c1 = VEC_PACK_TRUNC_EXPR;
4790 break;
4791
4792 case FIX_TRUNC_EXPR:
4793 c1 = VEC_PACK_FIX_TRUNC_EXPR;
4794 break;
4795
4796 case FLOAT_EXPR:
4797 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
4798 tree code and optabs used for computing the operation. */
4799 return false;
4800
4801 default:
4802 gcc_unreachable ();
4803 }
4804
4805 if (code == FIX_TRUNC_EXPR)
4806 /* The signedness is determined from output operand. */
4807 optab1 = optab_for_tree_code (c1, type, optab_default);
4808 else
4809 optab1 = optab_for_tree_code (c1, vectype, optab_default);
4810
4811 if (!optab1)
4812 return false;
4813
4814 vec_mode = TYPE_MODE (vectype);
4815 if ((icode1 = optab_handler (optab1, vec_mode)->insn_code)
4816 == CODE_FOR_nothing)
4817 return false;
4818
4819 /* Check if it's a multi-step conversion that can be done using intermediate
4820 types. */
4821 if (insn_data[icode1].operand[0].mode != TYPE_MODE (narrow_vectype))
4822 {
4823 enum machine_mode intermediate_mode, prev_mode = vec_mode;
4824
4825 *code1 = c1;
4826 prev_type = vectype;
4827 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
4828 intermediate steps in promotion sequence. We try MAX_INTERM_CVT_STEPS
4829 to get to NARROW_VECTYPE, and fail if we do not. */
4830 *interm_types = VEC_alloc (tree, heap, MAX_INTERM_CVT_STEPS);
4831 for (i = 0; i < 3; i++)
4832 {
4833 intermediate_mode = insn_data[icode1].operand[0].mode;
4834 intermediate_type = lang_hooks.types.type_for_mode (intermediate_mode,
4835 TYPE_UNSIGNED (prev_type));
4836 interm_optab = optab_for_tree_code (c1, intermediate_type,
4837 optab_default);
4838 if (!interm_optab
4839 || (icode1 = optab1->handlers[(int) prev_mode].insn_code)
4840 == CODE_FOR_nothing
4841 || insn_data[icode1].operand[0].mode != intermediate_mode
4842 || (icode1
4843 = interm_optab->handlers[(int) intermediate_mode].insn_code)
4844 == CODE_FOR_nothing)
4845 return false;
4846
4847 VEC_quick_push (tree, *interm_types, intermediate_type);
4848 (*multi_step_cvt)++;
4849
4850 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
4851 return true;
4852
4853 prev_type = intermediate_type;
4854 prev_mode = intermediate_mode;
4855 }
4856
4857 return false;
4858 }
4859
4860 *code1 = c1;
4861 return true;
4862 }