tree-vect-stmts.c (vectorizable_load): For SLP without permutation treat the first...
[gcc.git] / gcc / tree-vect-stmts.c
1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
3 Free Software Foundation, Inc.
4 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 and Ira Rosen <irar@il.ibm.com>
6
7 This file is part of GCC.
8
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
13
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
22
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "tm.h"
27 #include "ggc.h"
28 #include "tree.h"
29 #include "target.h"
30 #include "basic-block.h"
31 #include "tree-pretty-print.h"
32 #include "gimple-pretty-print.h"
33 #include "tree-flow.h"
34 #include "tree-dump.h"
35 #include "cfgloop.h"
36 #include "cfglayout.h"
37 #include "expr.h"
38 #include "recog.h"
39 #include "optabs.h"
40 #include "diagnostic-core.h"
41 #include "tree-vectorizer.h"
42 #include "langhooks.h"
43
44
45 /* Return a variable of type ELEM_TYPE[NELEMS]. */
46
47 static tree
48 create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
49 {
50 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
51 "vect_array");
52 }
53
54 /* ARRAY is an array of vectors created by create_vector_array.
55 Return an SSA_NAME for the vector in index N. The reference
56 is part of the vectorization of STMT and the vector is associated
57 with scalar destination SCALAR_DEST. */
58
59 static tree
60 read_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree scalar_dest,
61 tree array, unsigned HOST_WIDE_INT n)
62 {
63 tree vect_type, vect, vect_name, array_ref;
64 gimple new_stmt;
65
66 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
67 vect_type = TREE_TYPE (TREE_TYPE (array));
68 vect = vect_create_destination_var (scalar_dest, vect_type);
69 array_ref = build4 (ARRAY_REF, vect_type, array,
70 build_int_cst (size_type_node, n),
71 NULL_TREE, NULL_TREE);
72
73 new_stmt = gimple_build_assign (vect, array_ref);
74 vect_name = make_ssa_name (vect, new_stmt);
75 gimple_assign_set_lhs (new_stmt, vect_name);
76 vect_finish_stmt_generation (stmt, new_stmt, gsi);
77 mark_symbols_for_renaming (new_stmt);
78
79 return vect_name;
80 }
81
82 /* ARRAY is an array of vectors created by create_vector_array.
83 Emit code to store SSA_NAME VECT in index N of the array.
84 The store is part of the vectorization of STMT. */
85
86 static void
87 write_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree vect,
88 tree array, unsigned HOST_WIDE_INT n)
89 {
90 tree array_ref;
91 gimple new_stmt;
92
93 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
94 build_int_cst (size_type_node, n),
95 NULL_TREE, NULL_TREE);
96
97 new_stmt = gimple_build_assign (array_ref, vect);
98 vect_finish_stmt_generation (stmt, new_stmt, gsi);
99 mark_symbols_for_renaming (new_stmt);
100 }
101
102 /* PTR is a pointer to an array of type TYPE. Return a representation
103 of *PTR. The memory reference replaces those in FIRST_DR
104 (and its group). */
105
106 static tree
107 create_array_ref (tree type, tree ptr, struct data_reference *first_dr)
108 {
109 struct ptr_info_def *pi;
110 tree mem_ref, alias_ptr_type;
111
112 alias_ptr_type = reference_alias_ptr_type (DR_REF (first_dr));
113 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
114 /* Arrays have the same alignment as their type. */
115 pi = get_ptr_info (ptr);
116 pi->align = TYPE_ALIGN_UNIT (type);
117 pi->misalign = 0;
118 return mem_ref;
119 }
120
121 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
122
123 /* Function vect_mark_relevant.
124
125 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
126
127 static void
128 vect_mark_relevant (VEC(gimple,heap) **worklist, gimple stmt,
129 enum vect_relevant relevant, bool live_p,
130 bool used_in_pattern)
131 {
132 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
133 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
134 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
135 gimple pattern_stmt;
136
137 if (vect_print_dump_info (REPORT_DETAILS))
138 fprintf (vect_dump, "mark relevant %d, live %d.", relevant, live_p);
139
140 /* If this stmt is an original stmt in a pattern, we might need to mark its
141 related pattern stmt instead of the original stmt. However, such stmts
142 may have their own uses that are not in any pattern, in such cases the
143 stmt itself should be marked. */
144 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
145 {
146 bool found = false;
147 if (!used_in_pattern)
148 {
149 imm_use_iterator imm_iter;
150 use_operand_p use_p;
151 gimple use_stmt;
152 tree lhs;
153
154 if (is_gimple_assign (stmt))
155 lhs = gimple_assign_lhs (stmt);
156 else
157 lhs = gimple_call_lhs (stmt);
158
159 /* This use is out of pattern use, if LHS has other uses that are
160 pattern uses, we should mark the stmt itself, and not the pattern
161 stmt. */
162 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
163 {
164 if (is_gimple_debug (USE_STMT (use_p)))
165 continue;
166 use_stmt = USE_STMT (use_p);
167
168 if (vinfo_for_stmt (use_stmt)
169 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
170 {
171 found = true;
172 break;
173 }
174 }
175 }
176
177 if (!found)
178 {
179 /* This is the last stmt in a sequence that was detected as a
180 pattern that can potentially be vectorized. Don't mark the stmt
181 as relevant/live because it's not going to be vectorized.
182 Instead mark the pattern-stmt that replaces it. */
183
184 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
185
186 if (vect_print_dump_info (REPORT_DETAILS))
187 fprintf (vect_dump, "last stmt in pattern. don't mark"
188 " relevant/live.");
189 stmt_info = vinfo_for_stmt (pattern_stmt);
190 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
191 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
192 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
193 stmt = pattern_stmt;
194 }
195 }
196
197 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
198 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
199 STMT_VINFO_RELEVANT (stmt_info) = relevant;
200
201 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
202 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
203 {
204 if (vect_print_dump_info (REPORT_DETAILS))
205 fprintf (vect_dump, "already marked relevant/live.");
206 return;
207 }
208
209 VEC_safe_push (gimple, heap, *worklist, stmt);
210 }
211
212
213 /* Function vect_stmt_relevant_p.
214
215 Return true if STMT in loop that is represented by LOOP_VINFO is
216 "relevant for vectorization".
217
218 A stmt is considered "relevant for vectorization" if:
219 - it has uses outside the loop.
220 - it has vdefs (it alters memory).
221 - control stmts in the loop (except for the exit condition).
222
223 CHECKME: what other side effects would the vectorizer allow? */
224
225 static bool
226 vect_stmt_relevant_p (gimple stmt, loop_vec_info loop_vinfo,
227 enum vect_relevant *relevant, bool *live_p)
228 {
229 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
230 ssa_op_iter op_iter;
231 imm_use_iterator imm_iter;
232 use_operand_p use_p;
233 def_operand_p def_p;
234
235 *relevant = vect_unused_in_scope;
236 *live_p = false;
237
238 /* cond stmt other than loop exit cond. */
239 if (is_ctrl_stmt (stmt)
240 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt))
241 != loop_exit_ctrl_vec_info_type)
242 *relevant = vect_used_in_scope;
243
244 /* changing memory. */
245 if (gimple_code (stmt) != GIMPLE_PHI)
246 if (gimple_vdef (stmt))
247 {
248 if (vect_print_dump_info (REPORT_DETAILS))
249 fprintf (vect_dump, "vec_stmt_relevant_p: stmt has vdefs.");
250 *relevant = vect_used_in_scope;
251 }
252
253 /* uses outside the loop. */
254 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
255 {
256 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
257 {
258 basic_block bb = gimple_bb (USE_STMT (use_p));
259 if (!flow_bb_inside_loop_p (loop, bb))
260 {
261 if (vect_print_dump_info (REPORT_DETAILS))
262 fprintf (vect_dump, "vec_stmt_relevant_p: used out of loop.");
263
264 if (is_gimple_debug (USE_STMT (use_p)))
265 continue;
266
267 /* We expect all such uses to be in the loop exit phis
268 (because of loop closed form) */
269 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
270 gcc_assert (bb == single_exit (loop)->dest);
271
272 *live_p = true;
273 }
274 }
275 }
276
277 return (*live_p || *relevant);
278 }
279
280
281 /* Function exist_non_indexing_operands_for_use_p
282
283 USE is one of the uses attached to STMT. Check if USE is
284 used in STMT for anything other than indexing an array. */
285
286 static bool
287 exist_non_indexing_operands_for_use_p (tree use, gimple stmt)
288 {
289 tree operand;
290 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
291
292 /* USE corresponds to some operand in STMT. If there is no data
293 reference in STMT, then any operand that corresponds to USE
294 is not indexing an array. */
295 if (!STMT_VINFO_DATA_REF (stmt_info))
296 return true;
297
298 /* STMT has a data_ref. FORNOW this means that its of one of
299 the following forms:
300 -1- ARRAY_REF = var
301 -2- var = ARRAY_REF
302 (This should have been verified in analyze_data_refs).
303
304 'var' in the second case corresponds to a def, not a use,
305 so USE cannot correspond to any operands that are not used
306 for array indexing.
307
308 Therefore, all we need to check is if STMT falls into the
309 first case, and whether var corresponds to USE. */
310
311 if (!gimple_assign_copy_p (stmt))
312 return false;
313 if (TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME)
314 return false;
315 operand = gimple_assign_rhs1 (stmt);
316 if (TREE_CODE (operand) != SSA_NAME)
317 return false;
318
319 if (operand == use)
320 return true;
321
322 return false;
323 }
324
325
326 /*
327 Function process_use.
328
329 Inputs:
330 - a USE in STMT in a loop represented by LOOP_VINFO
331 - LIVE_P, RELEVANT - enum values to be set in the STMT_VINFO of the stmt
332 that defined USE. This is done by calling mark_relevant and passing it
333 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
334
335 Outputs:
336 Generally, LIVE_P and RELEVANT are used to define the liveness and
337 relevance info of the DEF_STMT of this USE:
338 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
339 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
340 Exceptions:
341 - case 1: If USE is used only for address computations (e.g. array indexing),
342 which does not need to be directly vectorized, then the liveness/relevance
343 of the respective DEF_STMT is left unchanged.
344 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
345 skip DEF_STMT cause it had already been processed.
346 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
347 be modified accordingly.
348
349 Return true if everything is as expected. Return false otherwise. */
350
351 static bool
352 process_use (gimple stmt, tree use, loop_vec_info loop_vinfo, bool live_p,
353 enum vect_relevant relevant, VEC(gimple,heap) **worklist)
354 {
355 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
356 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
357 stmt_vec_info dstmt_vinfo;
358 basic_block bb, def_bb;
359 tree def;
360 gimple def_stmt;
361 enum vect_def_type dt;
362
363 /* case 1: we are only interested in uses that need to be vectorized. Uses
364 that are used for address computation are not considered relevant. */
365 if (!exist_non_indexing_operands_for_use_p (use, stmt))
366 return true;
367
368 if (!vect_is_simple_use (use, loop_vinfo, NULL, &def_stmt, &def, &dt))
369 {
370 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
371 fprintf (vect_dump, "not vectorized: unsupported use in stmt.");
372 return false;
373 }
374
375 if (!def_stmt || gimple_nop_p (def_stmt))
376 return true;
377
378 def_bb = gimple_bb (def_stmt);
379 if (!flow_bb_inside_loop_p (loop, def_bb))
380 {
381 if (vect_print_dump_info (REPORT_DETAILS))
382 fprintf (vect_dump, "def_stmt is out of loop.");
383 return true;
384 }
385
386 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
387 DEF_STMT must have already been processed, because this should be the
388 only way that STMT, which is a reduction-phi, was put in the worklist,
389 as there should be no other uses for DEF_STMT in the loop. So we just
390 check that everything is as expected, and we are done. */
391 dstmt_vinfo = vinfo_for_stmt (def_stmt);
392 bb = gimple_bb (stmt);
393 if (gimple_code (stmt) == GIMPLE_PHI
394 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
395 && gimple_code (def_stmt) != GIMPLE_PHI
396 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
397 && bb->loop_father == def_bb->loop_father)
398 {
399 if (vect_print_dump_info (REPORT_DETAILS))
400 fprintf (vect_dump, "reduc-stmt defining reduc-phi in the same nest.");
401 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))
402 dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
403 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
404 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
405 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope);
406 return true;
407 }
408
409 /* case 3a: outer-loop stmt defining an inner-loop stmt:
410 outer-loop-header-bb:
411 d = def_stmt
412 inner-loop:
413 stmt # use (d)
414 outer-loop-tail-bb:
415 ... */
416 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
417 {
418 if (vect_print_dump_info (REPORT_DETAILS))
419 fprintf (vect_dump, "outer-loop def-stmt defining inner-loop stmt.");
420
421 switch (relevant)
422 {
423 case vect_unused_in_scope:
424 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
425 vect_used_in_scope : vect_unused_in_scope;
426 break;
427
428 case vect_used_in_outer_by_reduction:
429 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
430 relevant = vect_used_by_reduction;
431 break;
432
433 case vect_used_in_outer:
434 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
435 relevant = vect_used_in_scope;
436 break;
437
438 case vect_used_in_scope:
439 break;
440
441 default:
442 gcc_unreachable ();
443 }
444 }
445
446 /* case 3b: inner-loop stmt defining an outer-loop stmt:
447 outer-loop-header-bb:
448 ...
449 inner-loop:
450 d = def_stmt
451 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
452 stmt # use (d) */
453 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
454 {
455 if (vect_print_dump_info (REPORT_DETAILS))
456 fprintf (vect_dump, "inner-loop def-stmt defining outer-loop stmt.");
457
458 switch (relevant)
459 {
460 case vect_unused_in_scope:
461 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
462 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
463 vect_used_in_outer_by_reduction : vect_unused_in_scope;
464 break;
465
466 case vect_used_by_reduction:
467 relevant = vect_used_in_outer_by_reduction;
468 break;
469
470 case vect_used_in_scope:
471 relevant = vect_used_in_outer;
472 break;
473
474 default:
475 gcc_unreachable ();
476 }
477 }
478
479 vect_mark_relevant (worklist, def_stmt, relevant, live_p,
480 is_pattern_stmt_p (stmt_vinfo));
481 return true;
482 }
483
484
485 /* Function vect_mark_stmts_to_be_vectorized.
486
487 Not all stmts in the loop need to be vectorized. For example:
488
489 for i...
490 for j...
491 1. T0 = i + j
492 2. T1 = a[T0]
493
494 3. j = j + 1
495
496 Stmt 1 and 3 do not need to be vectorized, because loop control and
497 addressing of vectorized data-refs are handled differently.
498
499 This pass detects such stmts. */
500
501 bool
502 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
503 {
504 VEC(gimple,heap) *worklist;
505 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
506 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
507 unsigned int nbbs = loop->num_nodes;
508 gimple_stmt_iterator si;
509 gimple stmt;
510 unsigned int i;
511 stmt_vec_info stmt_vinfo;
512 basic_block bb;
513 gimple phi;
514 bool live_p;
515 enum vect_relevant relevant, tmp_relevant;
516 enum vect_def_type def_type;
517
518 if (vect_print_dump_info (REPORT_DETAILS))
519 fprintf (vect_dump, "=== vect_mark_stmts_to_be_vectorized ===");
520
521 worklist = VEC_alloc (gimple, heap, 64);
522
523 /* 1. Init worklist. */
524 for (i = 0; i < nbbs; i++)
525 {
526 bb = bbs[i];
527 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
528 {
529 phi = gsi_stmt (si);
530 if (vect_print_dump_info (REPORT_DETAILS))
531 {
532 fprintf (vect_dump, "init: phi relevant? ");
533 print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM);
534 }
535
536 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
537 vect_mark_relevant (&worklist, phi, relevant, live_p, false);
538 }
539 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
540 {
541 stmt = gsi_stmt (si);
542 if (vect_print_dump_info (REPORT_DETAILS))
543 {
544 fprintf (vect_dump, "init: stmt relevant? ");
545 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
546 }
547
548 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
549 vect_mark_relevant (&worklist, stmt, relevant, live_p, false);
550 }
551 }
552
553 /* 2. Process_worklist */
554 while (VEC_length (gimple, worklist) > 0)
555 {
556 use_operand_p use_p;
557 ssa_op_iter iter;
558
559 stmt = VEC_pop (gimple, worklist);
560 if (vect_print_dump_info (REPORT_DETAILS))
561 {
562 fprintf (vect_dump, "worklist: examine stmt: ");
563 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
564 }
565
566 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
567 (DEF_STMT) as relevant/irrelevant and live/dead according to the
568 liveness and relevance properties of STMT. */
569 stmt_vinfo = vinfo_for_stmt (stmt);
570 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
571 live_p = STMT_VINFO_LIVE_P (stmt_vinfo);
572
573 /* Generally, the liveness and relevance properties of STMT are
574 propagated as is to the DEF_STMTs of its USEs:
575 live_p <-- STMT_VINFO_LIVE_P (STMT_VINFO)
576 relevant <-- STMT_VINFO_RELEVANT (STMT_VINFO)
577
578 One exception is when STMT has been identified as defining a reduction
579 variable; in this case we set the liveness/relevance as follows:
580 live_p = false
581 relevant = vect_used_by_reduction
582 This is because we distinguish between two kinds of relevant stmts -
583 those that are used by a reduction computation, and those that are
584 (also) used by a regular computation. This allows us later on to
585 identify stmts that are used solely by a reduction, and therefore the
586 order of the results that they produce does not have to be kept. */
587
588 def_type = STMT_VINFO_DEF_TYPE (stmt_vinfo);
589 tmp_relevant = relevant;
590 switch (def_type)
591 {
592 case vect_reduction_def:
593 switch (tmp_relevant)
594 {
595 case vect_unused_in_scope:
596 relevant = vect_used_by_reduction;
597 break;
598
599 case vect_used_by_reduction:
600 if (gimple_code (stmt) == GIMPLE_PHI)
601 break;
602 /* fall through */
603
604 default:
605 if (vect_print_dump_info (REPORT_DETAILS))
606 fprintf (vect_dump, "unsupported use of reduction.");
607
608 VEC_free (gimple, heap, worklist);
609 return false;
610 }
611
612 live_p = false;
613 break;
614
615 case vect_nested_cycle:
616 if (tmp_relevant != vect_unused_in_scope
617 && tmp_relevant != vect_used_in_outer_by_reduction
618 && tmp_relevant != vect_used_in_outer)
619 {
620 if (vect_print_dump_info (REPORT_DETAILS))
621 fprintf (vect_dump, "unsupported use of nested cycle.");
622
623 VEC_free (gimple, heap, worklist);
624 return false;
625 }
626
627 live_p = false;
628 break;
629
630 case vect_double_reduction_def:
631 if (tmp_relevant != vect_unused_in_scope
632 && tmp_relevant != vect_used_by_reduction)
633 {
634 if (vect_print_dump_info (REPORT_DETAILS))
635 fprintf (vect_dump, "unsupported use of double reduction.");
636
637 VEC_free (gimple, heap, worklist);
638 return false;
639 }
640
641 live_p = false;
642 break;
643
644 default:
645 break;
646 }
647
648 if (is_pattern_stmt_p (vinfo_for_stmt (stmt)))
649 {
650 /* Pattern statements are not inserted into the code, so
651 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
652 have to scan the RHS or function arguments instead. */
653 if (is_gimple_assign (stmt))
654 {
655 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
656 tree op = gimple_assign_rhs1 (stmt);
657
658 i = 1;
659 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
660 {
661 if (!process_use (stmt, TREE_OPERAND (op, 0), loop_vinfo,
662 live_p, relevant, &worklist)
663 || !process_use (stmt, TREE_OPERAND (op, 1), loop_vinfo,
664 live_p, relevant, &worklist))
665 {
666 VEC_free (gimple, heap, worklist);
667 return false;
668 }
669 i = 2;
670 }
671 for (; i < gimple_num_ops (stmt); i++)
672 {
673 op = gimple_op (stmt, i);
674 if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
675 &worklist))
676 {
677 VEC_free (gimple, heap, worklist);
678 return false;
679 }
680 }
681 }
682 else if (is_gimple_call (stmt))
683 {
684 for (i = 0; i < gimple_call_num_args (stmt); i++)
685 {
686 tree arg = gimple_call_arg (stmt, i);
687 if (!process_use (stmt, arg, loop_vinfo, live_p, relevant,
688 &worklist))
689 {
690 VEC_free (gimple, heap, worklist);
691 return false;
692 }
693 }
694 }
695 }
696 else
697 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
698 {
699 tree op = USE_FROM_PTR (use_p);
700 if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
701 &worklist))
702 {
703 VEC_free (gimple, heap, worklist);
704 return false;
705 }
706 }
707 } /* while worklist */
708
709 VEC_free (gimple, heap, worklist);
710 return true;
711 }
712
713
714 /* Get cost by calling cost target builtin. */
715
716 static inline
717 int vect_get_stmt_cost (enum vect_cost_for_stmt type_of_cost)
718 {
719 tree dummy_type = NULL;
720 int dummy = 0;
721
722 return targetm.vectorize.builtin_vectorization_cost (type_of_cost,
723 dummy_type, dummy);
724 }
725
726
727 /* Get cost for STMT. */
728
729 int
730 cost_for_stmt (gimple stmt)
731 {
732 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
733
734 switch (STMT_VINFO_TYPE (stmt_info))
735 {
736 case load_vec_info_type:
737 return vect_get_stmt_cost (scalar_load);
738 case store_vec_info_type:
739 return vect_get_stmt_cost (scalar_store);
740 case op_vec_info_type:
741 case condition_vec_info_type:
742 case assignment_vec_info_type:
743 case reduc_vec_info_type:
744 case induc_vec_info_type:
745 case type_promotion_vec_info_type:
746 case type_demotion_vec_info_type:
747 case type_conversion_vec_info_type:
748 case call_vec_info_type:
749 return vect_get_stmt_cost (scalar_stmt);
750 case undef_vec_info_type:
751 default:
752 gcc_unreachable ();
753 }
754 }
755
756 /* Function vect_model_simple_cost.
757
758 Models cost for simple operations, i.e. those that only emit ncopies of a
759 single op. Right now, this does not account for multiple insns that could
760 be generated for the single vector op. We will handle that shortly. */
761
762 void
763 vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
764 enum vect_def_type *dt, slp_tree slp_node)
765 {
766 int i;
767 int inside_cost = 0, outside_cost = 0;
768
769 /* The SLP costs were already calculated during SLP tree build. */
770 if (PURE_SLP_STMT (stmt_info))
771 return;
772
773 inside_cost = ncopies * vect_get_stmt_cost (vector_stmt);
774
775 /* FORNOW: Assuming maximum 2 args per stmts. */
776 for (i = 0; i < 2; i++)
777 {
778 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
779 outside_cost += vect_get_stmt_cost (vector_stmt);
780 }
781
782 if (vect_print_dump_info (REPORT_COST))
783 fprintf (vect_dump, "vect_model_simple_cost: inside_cost = %d, "
784 "outside_cost = %d .", inside_cost, outside_cost);
785
786 /* Set the costs either in STMT_INFO or SLP_NODE (if exists). */
787 stmt_vinfo_set_inside_of_loop_cost (stmt_info, slp_node, inside_cost);
788 stmt_vinfo_set_outside_of_loop_cost (stmt_info, slp_node, outside_cost);
789 }
790
791
792 /* Function vect_cost_strided_group_size
793
794 For strided load or store, return the group_size only if it is the first
795 load or store of a group, else return 1. This ensures that group size is
796 only returned once per group. */
797
798 static int
799 vect_cost_strided_group_size (stmt_vec_info stmt_info)
800 {
801 gimple first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
802
803 if (first_stmt == STMT_VINFO_STMT (stmt_info))
804 return GROUP_SIZE (stmt_info);
805
806 return 1;
807 }
808
809
810 /* Function vect_model_store_cost
811
812 Models cost for stores. In the case of strided accesses, one access
813 has the overhead of the strided access attributed to it. */
814
815 void
816 vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
817 bool store_lanes_p, enum vect_def_type dt,
818 slp_tree slp_node)
819 {
820 int group_size;
821 unsigned int inside_cost = 0, outside_cost = 0;
822 struct data_reference *first_dr;
823 gimple first_stmt;
824
825 /* The SLP costs were already calculated during SLP tree build. */
826 if (PURE_SLP_STMT (stmt_info))
827 return;
828
829 if (dt == vect_constant_def || dt == vect_external_def)
830 outside_cost = vect_get_stmt_cost (scalar_to_vec);
831
832 /* Strided access? */
833 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
834 {
835 if (slp_node)
836 {
837 first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0);
838 group_size = 1;
839 }
840 else
841 {
842 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
843 group_size = vect_cost_strided_group_size (stmt_info);
844 }
845
846 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
847 }
848 /* Not a strided access. */
849 else
850 {
851 group_size = 1;
852 first_dr = STMT_VINFO_DATA_REF (stmt_info);
853 }
854
855 /* We assume that the cost of a single store-lanes instruction is
856 equivalent to the cost of GROUP_SIZE separate stores. If a strided
857 access is instead being provided by a permute-and-store operation,
858 include the cost of the permutes. */
859 if (!store_lanes_p && group_size > 1)
860 {
861 /* Uses a high and low interleave operation for each needed permute. */
862 inside_cost = ncopies * exact_log2(group_size) * group_size
863 * vect_get_stmt_cost (vector_stmt);
864
865 if (vect_print_dump_info (REPORT_COST))
866 fprintf (vect_dump, "vect_model_store_cost: strided group_size = %d .",
867 group_size);
868
869 }
870
871 /* Costs of the stores. */
872 vect_get_store_cost (first_dr, ncopies, &inside_cost);
873
874 if (vect_print_dump_info (REPORT_COST))
875 fprintf (vect_dump, "vect_model_store_cost: inside_cost = %d, "
876 "outside_cost = %d .", inside_cost, outside_cost);
877
878 /* Set the costs either in STMT_INFO or SLP_NODE (if exists). */
879 stmt_vinfo_set_inside_of_loop_cost (stmt_info, slp_node, inside_cost);
880 stmt_vinfo_set_outside_of_loop_cost (stmt_info, slp_node, outside_cost);
881 }
882
883
884 /* Calculate cost of DR's memory access. */
885 void
886 vect_get_store_cost (struct data_reference *dr, int ncopies,
887 unsigned int *inside_cost)
888 {
889 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
890
891 switch (alignment_support_scheme)
892 {
893 case dr_aligned:
894 {
895 *inside_cost += ncopies * vect_get_stmt_cost (vector_store);
896
897 if (vect_print_dump_info (REPORT_COST))
898 fprintf (vect_dump, "vect_model_store_cost: aligned.");
899
900 break;
901 }
902
903 case dr_unaligned_supported:
904 {
905 gimple stmt = DR_STMT (dr);
906 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
907 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
908
909 /* Here, we assign an additional cost for the unaligned store. */
910 *inside_cost += ncopies
911 * targetm.vectorize.builtin_vectorization_cost (unaligned_store,
912 vectype, DR_MISALIGNMENT (dr));
913
914 if (vect_print_dump_info (REPORT_COST))
915 fprintf (vect_dump, "vect_model_store_cost: unaligned supported by "
916 "hardware.");
917
918 break;
919 }
920
921 default:
922 gcc_unreachable ();
923 }
924 }
925
926
927 /* Function vect_model_load_cost
928
929 Models cost for loads. In the case of strided accesses, the last access
930 has the overhead of the strided access attributed to it. Since unaligned
931 accesses are supported for loads, we also account for the costs of the
932 access scheme chosen. */
933
934 void
935 vect_model_load_cost (stmt_vec_info stmt_info, int ncopies, bool load_lanes_p,
936 slp_tree slp_node)
937 {
938 int group_size;
939 gimple first_stmt;
940 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
941 unsigned int inside_cost = 0, outside_cost = 0;
942
943 /* The SLP costs were already calculated during SLP tree build. */
944 if (PURE_SLP_STMT (stmt_info))
945 return;
946
947 /* Strided accesses? */
948 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
949 if (STMT_VINFO_STRIDED_ACCESS (stmt_info) && first_stmt && !slp_node)
950 {
951 group_size = vect_cost_strided_group_size (stmt_info);
952 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
953 }
954 /* Not a strided access. */
955 else
956 {
957 group_size = 1;
958 first_dr = dr;
959 }
960
961 /* We assume that the cost of a single load-lanes instruction is
962 equivalent to the cost of GROUP_SIZE separate loads. If a strided
963 access is instead being provided by a load-and-permute operation,
964 include the cost of the permutes. */
965 if (!load_lanes_p && group_size > 1)
966 {
967 /* Uses an even and odd extract operations for each needed permute. */
968 inside_cost = ncopies * exact_log2(group_size) * group_size
969 * vect_get_stmt_cost (vector_stmt);
970
971 if (vect_print_dump_info (REPORT_COST))
972 fprintf (vect_dump, "vect_model_load_cost: strided group_size = %d .",
973 group_size);
974 }
975
976 /* The loads themselves. */
977 vect_get_load_cost (first_dr, ncopies,
978 ((!STMT_VINFO_STRIDED_ACCESS (stmt_info)) || group_size > 1
979 || slp_node),
980 &inside_cost, &outside_cost);
981
982 if (vect_print_dump_info (REPORT_COST))
983 fprintf (vect_dump, "vect_model_load_cost: inside_cost = %d, "
984 "outside_cost = %d .", inside_cost, outside_cost);
985
986 /* Set the costs either in STMT_INFO or SLP_NODE (if exists). */
987 stmt_vinfo_set_inside_of_loop_cost (stmt_info, slp_node, inside_cost);
988 stmt_vinfo_set_outside_of_loop_cost (stmt_info, slp_node, outside_cost);
989 }
990
991
992 /* Calculate cost of DR's memory access. */
993 void
994 vect_get_load_cost (struct data_reference *dr, int ncopies,
995 bool add_realign_cost, unsigned int *inside_cost,
996 unsigned int *outside_cost)
997 {
998 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
999
1000 switch (alignment_support_scheme)
1001 {
1002 case dr_aligned:
1003 {
1004 *inside_cost += ncopies * vect_get_stmt_cost (vector_load);
1005
1006 if (vect_print_dump_info (REPORT_COST))
1007 fprintf (vect_dump, "vect_model_load_cost: aligned.");
1008
1009 break;
1010 }
1011 case dr_unaligned_supported:
1012 {
1013 gimple stmt = DR_STMT (dr);
1014 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1015 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1016
1017 /* Here, we assign an additional cost for the unaligned load. */
1018 *inside_cost += ncopies
1019 * targetm.vectorize.builtin_vectorization_cost (unaligned_load,
1020 vectype, DR_MISALIGNMENT (dr));
1021 if (vect_print_dump_info (REPORT_COST))
1022 fprintf (vect_dump, "vect_model_load_cost: unaligned supported by "
1023 "hardware.");
1024
1025 break;
1026 }
1027 case dr_explicit_realign:
1028 {
1029 *inside_cost += ncopies * (2 * vect_get_stmt_cost (vector_load)
1030 + vect_get_stmt_cost (vector_stmt));
1031
1032 /* FIXME: If the misalignment remains fixed across the iterations of
1033 the containing loop, the following cost should be added to the
1034 outside costs. */
1035 if (targetm.vectorize.builtin_mask_for_load)
1036 *inside_cost += vect_get_stmt_cost (vector_stmt);
1037
1038 break;
1039 }
1040 case dr_explicit_realign_optimized:
1041 {
1042 if (vect_print_dump_info (REPORT_COST))
1043 fprintf (vect_dump, "vect_model_load_cost: unaligned software "
1044 "pipelined.");
1045
1046 /* Unaligned software pipeline has a load of an address, an initial
1047 load, and possibly a mask operation to "prime" the loop. However,
1048 if this is an access in a group of loads, which provide strided
1049 access, then the above cost should only be considered for one
1050 access in the group. Inside the loop, there is a load op
1051 and a realignment op. */
1052
1053 if (add_realign_cost)
1054 {
1055 *outside_cost = 2 * vect_get_stmt_cost (vector_stmt);
1056 if (targetm.vectorize.builtin_mask_for_load)
1057 *outside_cost += vect_get_stmt_cost (vector_stmt);
1058 }
1059
1060 *inside_cost += ncopies * (vect_get_stmt_cost (vector_load)
1061 + vect_get_stmt_cost (vector_stmt));
1062 break;
1063 }
1064
1065 default:
1066 gcc_unreachable ();
1067 }
1068 }
1069
1070
1071 /* Function vect_init_vector.
1072
1073 Insert a new stmt (INIT_STMT) that initializes a new vector variable with
1074 the vector elements of VECTOR_VAR. Place the initialization at BSI if it
1075 is not NULL. Otherwise, place the initialization at the loop preheader.
1076 Return the DEF of INIT_STMT.
1077 It will be used in the vectorization of STMT. */
1078
1079 tree
1080 vect_init_vector (gimple stmt, tree vector_var, tree vector_type,
1081 gimple_stmt_iterator *gsi)
1082 {
1083 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1084 tree new_var;
1085 gimple init_stmt;
1086 tree vec_oprnd;
1087 edge pe;
1088 tree new_temp;
1089 basic_block new_bb;
1090
1091 new_var = vect_get_new_vect_var (vector_type, vect_simple_var, "cst_");
1092 add_referenced_var (new_var);
1093 init_stmt = gimple_build_assign (new_var, vector_var);
1094 new_temp = make_ssa_name (new_var, init_stmt);
1095 gimple_assign_set_lhs (init_stmt, new_temp);
1096
1097 if (gsi)
1098 vect_finish_stmt_generation (stmt, init_stmt, gsi);
1099 else
1100 {
1101 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1102
1103 if (loop_vinfo)
1104 {
1105 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1106
1107 if (nested_in_vect_loop_p (loop, stmt))
1108 loop = loop->inner;
1109
1110 pe = loop_preheader_edge (loop);
1111 new_bb = gsi_insert_on_edge_immediate (pe, init_stmt);
1112 gcc_assert (!new_bb);
1113 }
1114 else
1115 {
1116 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1117 basic_block bb;
1118 gimple_stmt_iterator gsi_bb_start;
1119
1120 gcc_assert (bb_vinfo);
1121 bb = BB_VINFO_BB (bb_vinfo);
1122 gsi_bb_start = gsi_after_labels (bb);
1123 gsi_insert_before (&gsi_bb_start, init_stmt, GSI_SAME_STMT);
1124 }
1125 }
1126
1127 if (vect_print_dump_info (REPORT_DETAILS))
1128 {
1129 fprintf (vect_dump, "created new init_stmt: ");
1130 print_gimple_stmt (vect_dump, init_stmt, 0, TDF_SLIM);
1131 }
1132
1133 vec_oprnd = gimple_assign_lhs (init_stmt);
1134 return vec_oprnd;
1135 }
1136
1137
1138 /* Function vect_get_vec_def_for_operand.
1139
1140 OP is an operand in STMT. This function returns a (vector) def that will be
1141 used in the vectorized stmt for STMT.
1142
1143 In the case that OP is an SSA_NAME which is defined in the loop, then
1144 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1145
1146 In case OP is an invariant or constant, a new stmt that creates a vector def
1147 needs to be introduced. */
1148
1149 tree
1150 vect_get_vec_def_for_operand (tree op, gimple stmt, tree *scalar_def)
1151 {
1152 tree vec_oprnd;
1153 gimple vec_stmt;
1154 gimple def_stmt;
1155 stmt_vec_info def_stmt_info = NULL;
1156 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1157 unsigned int nunits;
1158 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1159 tree vec_inv;
1160 tree vec_cst;
1161 tree t = NULL_TREE;
1162 tree def;
1163 int i;
1164 enum vect_def_type dt;
1165 bool is_simple_use;
1166 tree vector_type;
1167
1168 if (vect_print_dump_info (REPORT_DETAILS))
1169 {
1170 fprintf (vect_dump, "vect_get_vec_def_for_operand: ");
1171 print_generic_expr (vect_dump, op, TDF_SLIM);
1172 }
1173
1174 is_simple_use = vect_is_simple_use (op, loop_vinfo, NULL, &def_stmt, &def,
1175 &dt);
1176 gcc_assert (is_simple_use);
1177 if (vect_print_dump_info (REPORT_DETAILS))
1178 {
1179 if (def)
1180 {
1181 fprintf (vect_dump, "def = ");
1182 print_generic_expr (vect_dump, def, TDF_SLIM);
1183 }
1184 if (def_stmt)
1185 {
1186 fprintf (vect_dump, " def_stmt = ");
1187 print_gimple_stmt (vect_dump, def_stmt, 0, TDF_SLIM);
1188 }
1189 }
1190
1191 switch (dt)
1192 {
1193 /* Case 1: operand is a constant. */
1194 case vect_constant_def:
1195 {
1196 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1197 gcc_assert (vector_type);
1198 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
1199
1200 if (scalar_def)
1201 *scalar_def = op;
1202
1203 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
1204 if (vect_print_dump_info (REPORT_DETAILS))
1205 fprintf (vect_dump, "Create vector_cst. nunits = %d", nunits);
1206
1207 vec_cst = build_vector_from_val (vector_type, op);
1208 return vect_init_vector (stmt, vec_cst, vector_type, NULL);
1209 }
1210
1211 /* Case 2: operand is defined outside the loop - loop invariant. */
1212 case vect_external_def:
1213 {
1214 vector_type = get_vectype_for_scalar_type (TREE_TYPE (def));
1215 gcc_assert (vector_type);
1216 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
1217
1218 if (scalar_def)
1219 *scalar_def = def;
1220
1221 /* Create 'vec_inv = {inv,inv,..,inv}' */
1222 if (vect_print_dump_info (REPORT_DETAILS))
1223 fprintf (vect_dump, "Create vector_inv.");
1224
1225 for (i = nunits - 1; i >= 0; --i)
1226 {
1227 t = tree_cons (NULL_TREE, def, t);
1228 }
1229
1230 /* FIXME: use build_constructor directly. */
1231 vec_inv = build_constructor_from_list (vector_type, t);
1232 return vect_init_vector (stmt, vec_inv, vector_type, NULL);
1233 }
1234
1235 /* Case 3: operand is defined inside the loop. */
1236 case vect_internal_def:
1237 {
1238 if (scalar_def)
1239 *scalar_def = NULL/* FIXME tuples: def_stmt*/;
1240
1241 /* Get the def from the vectorized stmt. */
1242 def_stmt_info = vinfo_for_stmt (def_stmt);
1243
1244 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1245 /* Get vectorized pattern statement. */
1246 if (!vec_stmt
1247 && STMT_VINFO_IN_PATTERN_P (def_stmt_info)
1248 && !STMT_VINFO_RELEVANT (def_stmt_info))
1249 vec_stmt = STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1250 STMT_VINFO_RELATED_STMT (def_stmt_info)));
1251 gcc_assert (vec_stmt);
1252 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1253 vec_oprnd = PHI_RESULT (vec_stmt);
1254 else if (is_gimple_call (vec_stmt))
1255 vec_oprnd = gimple_call_lhs (vec_stmt);
1256 else
1257 vec_oprnd = gimple_assign_lhs (vec_stmt);
1258 return vec_oprnd;
1259 }
1260
1261 /* Case 4: operand is defined by a loop header phi - reduction */
1262 case vect_reduction_def:
1263 case vect_double_reduction_def:
1264 case vect_nested_cycle:
1265 {
1266 struct loop *loop;
1267
1268 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1269 loop = (gimple_bb (def_stmt))->loop_father;
1270
1271 /* Get the def before the loop */
1272 op = PHI_ARG_DEF_FROM_EDGE (def_stmt, loop_preheader_edge (loop));
1273 return get_initial_def_for_reduction (stmt, op, scalar_def);
1274 }
1275
1276 /* Case 5: operand is defined by loop-header phi - induction. */
1277 case vect_induction_def:
1278 {
1279 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1280
1281 /* Get the def from the vectorized stmt. */
1282 def_stmt_info = vinfo_for_stmt (def_stmt);
1283 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1284 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1285 vec_oprnd = PHI_RESULT (vec_stmt);
1286 else
1287 vec_oprnd = gimple_get_lhs (vec_stmt);
1288 return vec_oprnd;
1289 }
1290
1291 default:
1292 gcc_unreachable ();
1293 }
1294 }
1295
1296
1297 /* Function vect_get_vec_def_for_stmt_copy
1298
1299 Return a vector-def for an operand. This function is used when the
1300 vectorized stmt to be created (by the caller to this function) is a "copy"
1301 created in case the vectorized result cannot fit in one vector, and several
1302 copies of the vector-stmt are required. In this case the vector-def is
1303 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1304 of the stmt that defines VEC_OPRND.
1305 DT is the type of the vector def VEC_OPRND.
1306
1307 Context:
1308 In case the vectorization factor (VF) is bigger than the number
1309 of elements that can fit in a vectype (nunits), we have to generate
1310 more than one vector stmt to vectorize the scalar stmt. This situation
1311 arises when there are multiple data-types operated upon in the loop; the
1312 smallest data-type determines the VF, and as a result, when vectorizing
1313 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1314 vector stmt (each computing a vector of 'nunits' results, and together
1315 computing 'VF' results in each iteration). This function is called when
1316 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1317 which VF=16 and nunits=4, so the number of copies required is 4):
1318
1319 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1320
1321 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1322 VS1.1: vx.1 = memref1 VS1.2
1323 VS1.2: vx.2 = memref2 VS1.3
1324 VS1.3: vx.3 = memref3
1325
1326 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1327 VSnew.1: vz1 = vx.1 + ... VSnew.2
1328 VSnew.2: vz2 = vx.2 + ... VSnew.3
1329 VSnew.3: vz3 = vx.3 + ...
1330
1331 The vectorization of S1 is explained in vectorizable_load.
1332 The vectorization of S2:
1333 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1334 the function 'vect_get_vec_def_for_operand' is called to
1335 get the relevant vector-def for each operand of S2. For operand x it
1336 returns the vector-def 'vx.0'.
1337
1338 To create the remaining copies of the vector-stmt (VSnew.j), this
1339 function is called to get the relevant vector-def for each operand. It is
1340 obtained from the respective VS1.j stmt, which is recorded in the
1341 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1342
1343 For example, to obtain the vector-def 'vx.1' in order to create the
1344 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1345 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1346 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1347 and return its def ('vx.1').
1348 Overall, to create the above sequence this function will be called 3 times:
1349 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1350 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1351 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1352
1353 tree
1354 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
1355 {
1356 gimple vec_stmt_for_operand;
1357 stmt_vec_info def_stmt_info;
1358
1359 /* Do nothing; can reuse same def. */
1360 if (dt == vect_external_def || dt == vect_constant_def )
1361 return vec_oprnd;
1362
1363 vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
1364 def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
1365 gcc_assert (def_stmt_info);
1366 vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
1367 gcc_assert (vec_stmt_for_operand);
1368 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1369 if (gimple_code (vec_stmt_for_operand) == GIMPLE_PHI)
1370 vec_oprnd = PHI_RESULT (vec_stmt_for_operand);
1371 else
1372 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1373 return vec_oprnd;
1374 }
1375
1376
1377 /* Get vectorized definitions for the operands to create a copy of an original
1378 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1379
1380 static void
1381 vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
1382 VEC(tree,heap) **vec_oprnds0,
1383 VEC(tree,heap) **vec_oprnds1)
1384 {
1385 tree vec_oprnd = VEC_pop (tree, *vec_oprnds0);
1386
1387 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd);
1388 VEC_quick_push (tree, *vec_oprnds0, vec_oprnd);
1389
1390 if (vec_oprnds1 && *vec_oprnds1)
1391 {
1392 vec_oprnd = VEC_pop (tree, *vec_oprnds1);
1393 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd);
1394 VEC_quick_push (tree, *vec_oprnds1, vec_oprnd);
1395 }
1396 }
1397
1398
1399 /* Get vectorized definitions for OP0 and OP1, or SLP_NODE if it is not
1400 NULL. */
1401
1402 static void
1403 vect_get_vec_defs (tree op0, tree op1, gimple stmt,
1404 VEC(tree,heap) **vec_oprnds0, VEC(tree,heap) **vec_oprnds1,
1405 slp_tree slp_node)
1406 {
1407 if (slp_node)
1408 vect_get_slp_defs (op0, op1, slp_node, vec_oprnds0, vec_oprnds1, -1);
1409 else
1410 {
1411 tree vec_oprnd;
1412
1413 *vec_oprnds0 = VEC_alloc (tree, heap, 1);
1414 vec_oprnd = vect_get_vec_def_for_operand (op0, stmt, NULL);
1415 VEC_quick_push (tree, *vec_oprnds0, vec_oprnd);
1416
1417 if (op1)
1418 {
1419 *vec_oprnds1 = VEC_alloc (tree, heap, 1);
1420 vec_oprnd = vect_get_vec_def_for_operand (op1, stmt, NULL);
1421 VEC_quick_push (tree, *vec_oprnds1, vec_oprnd);
1422 }
1423 }
1424 }
1425
1426
1427 /* Function vect_finish_stmt_generation.
1428
1429 Insert a new stmt. */
1430
1431 void
1432 vect_finish_stmt_generation (gimple stmt, gimple vec_stmt,
1433 gimple_stmt_iterator *gsi)
1434 {
1435 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1436 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1437 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
1438
1439 gcc_assert (gimple_code (stmt) != GIMPLE_LABEL);
1440
1441 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1442
1443 set_vinfo_for_stmt (vec_stmt, new_stmt_vec_info (vec_stmt, loop_vinfo,
1444 bb_vinfo));
1445
1446 if (vect_print_dump_info (REPORT_DETAILS))
1447 {
1448 fprintf (vect_dump, "add new stmt: ");
1449 print_gimple_stmt (vect_dump, vec_stmt, 0, TDF_SLIM);
1450 }
1451
1452 gimple_set_location (vec_stmt, gimple_location (stmt));
1453 }
1454
1455 /* Checks if CALL can be vectorized in type VECTYPE. Returns
1456 a function declaration if the target has a vectorized version
1457 of the function, or NULL_TREE if the function cannot be vectorized. */
1458
1459 tree
1460 vectorizable_function (gimple call, tree vectype_out, tree vectype_in)
1461 {
1462 tree fndecl = gimple_call_fndecl (call);
1463
1464 /* We only handle functions that do not read or clobber memory -- i.e.
1465 const or novops ones. */
1466 if (!(gimple_call_flags (call) & (ECF_CONST | ECF_NOVOPS)))
1467 return NULL_TREE;
1468
1469 if (!fndecl
1470 || TREE_CODE (fndecl) != FUNCTION_DECL
1471 || !DECL_BUILT_IN (fndecl))
1472 return NULL_TREE;
1473
1474 return targetm.vectorize.builtin_vectorized_function (fndecl, vectype_out,
1475 vectype_in);
1476 }
1477
1478 /* Function vectorizable_call.
1479
1480 Check if STMT performs a function call that can be vectorized.
1481 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1482 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1483 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1484
1485 static bool
1486 vectorizable_call (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt)
1487 {
1488 tree vec_dest;
1489 tree scalar_dest;
1490 tree op, type;
1491 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
1492 stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
1493 tree vectype_out, vectype_in;
1494 int nunits_in;
1495 int nunits_out;
1496 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1497 tree fndecl, new_temp, def, rhs_type;
1498 gimple def_stmt;
1499 enum vect_def_type dt[3]
1500 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
1501 gimple new_stmt = NULL;
1502 int ncopies, j;
1503 VEC(tree, heap) *vargs = NULL;
1504 enum { NARROW, NONE, WIDEN } modifier;
1505 size_t i, nargs;
1506 tree lhs;
1507
1508 /* FORNOW: unsupported in basic block SLP. */
1509 gcc_assert (loop_vinfo);
1510
1511 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1512 return false;
1513
1514 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
1515 return false;
1516
1517 /* FORNOW: SLP not supported. */
1518 if (STMT_SLP_TYPE (stmt_info))
1519 return false;
1520
1521 /* Is STMT a vectorizable call? */
1522 if (!is_gimple_call (stmt))
1523 return false;
1524
1525 if (TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
1526 return false;
1527
1528 if (stmt_can_throw_internal (stmt))
1529 return false;
1530
1531 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
1532
1533 /* Process function arguments. */
1534 rhs_type = NULL_TREE;
1535 vectype_in = NULL_TREE;
1536 nargs = gimple_call_num_args (stmt);
1537
1538 /* Bail out if the function has more than three arguments, we do not have
1539 interesting builtin functions to vectorize with more than two arguments
1540 except for fma. No arguments is also not good. */
1541 if (nargs == 0 || nargs > 3)
1542 return false;
1543
1544 for (i = 0; i < nargs; i++)
1545 {
1546 tree opvectype;
1547
1548 op = gimple_call_arg (stmt, i);
1549
1550 /* We can only handle calls with arguments of the same type. */
1551 if (rhs_type
1552 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
1553 {
1554 if (vect_print_dump_info (REPORT_DETAILS))
1555 fprintf (vect_dump, "argument types differ.");
1556 return false;
1557 }
1558 if (!rhs_type)
1559 rhs_type = TREE_TYPE (op);
1560
1561 if (!vect_is_simple_use_1 (op, loop_vinfo, NULL,
1562 &def_stmt, &def, &dt[i], &opvectype))
1563 {
1564 if (vect_print_dump_info (REPORT_DETAILS))
1565 fprintf (vect_dump, "use not simple.");
1566 return false;
1567 }
1568
1569 if (!vectype_in)
1570 vectype_in = opvectype;
1571 else if (opvectype
1572 && opvectype != vectype_in)
1573 {
1574 if (vect_print_dump_info (REPORT_DETAILS))
1575 fprintf (vect_dump, "argument vector types differ.");
1576 return false;
1577 }
1578 }
1579 /* If all arguments are external or constant defs use a vector type with
1580 the same size as the output vector type. */
1581 if (!vectype_in)
1582 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
1583 if (vec_stmt)
1584 gcc_assert (vectype_in);
1585 if (!vectype_in)
1586 {
1587 if (vect_print_dump_info (REPORT_DETAILS))
1588 {
1589 fprintf (vect_dump, "no vectype for scalar type ");
1590 print_generic_expr (vect_dump, rhs_type, TDF_SLIM);
1591 }
1592
1593 return false;
1594 }
1595
1596 /* FORNOW */
1597 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
1598 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
1599 if (nunits_in == nunits_out / 2)
1600 modifier = NARROW;
1601 else if (nunits_out == nunits_in)
1602 modifier = NONE;
1603 else if (nunits_out == nunits_in / 2)
1604 modifier = WIDEN;
1605 else
1606 return false;
1607
1608 /* For now, we only vectorize functions if a target specific builtin
1609 is available. TODO -- in some cases, it might be profitable to
1610 insert the calls for pieces of the vector, in order to be able
1611 to vectorize other operations in the loop. */
1612 fndecl = vectorizable_function (stmt, vectype_out, vectype_in);
1613 if (fndecl == NULL_TREE)
1614 {
1615 if (vect_print_dump_info (REPORT_DETAILS))
1616 fprintf (vect_dump, "function is not vectorizable.");
1617
1618 return false;
1619 }
1620
1621 gcc_assert (!gimple_vuse (stmt));
1622
1623 if (modifier == NARROW)
1624 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
1625 else
1626 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
1627
1628 /* Sanity check: make sure that at least one copy of the vectorized stmt
1629 needs to be generated. */
1630 gcc_assert (ncopies >= 1);
1631
1632 if (!vec_stmt) /* transformation not required. */
1633 {
1634 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
1635 if (vect_print_dump_info (REPORT_DETAILS))
1636 fprintf (vect_dump, "=== vectorizable_call ===");
1637 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
1638 return true;
1639 }
1640
1641 /** Transform. **/
1642
1643 if (vect_print_dump_info (REPORT_DETAILS))
1644 fprintf (vect_dump, "transform call.");
1645
1646 /* Handle def. */
1647 scalar_dest = gimple_call_lhs (stmt);
1648 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
1649
1650 prev_stmt_info = NULL;
1651 switch (modifier)
1652 {
1653 case NONE:
1654 for (j = 0; j < ncopies; ++j)
1655 {
1656 /* Build argument list for the vectorized call. */
1657 if (j == 0)
1658 vargs = VEC_alloc (tree, heap, nargs);
1659 else
1660 VEC_truncate (tree, vargs, 0);
1661
1662 for (i = 0; i < nargs; i++)
1663 {
1664 op = gimple_call_arg (stmt, i);
1665 if (j == 0)
1666 vec_oprnd0
1667 = vect_get_vec_def_for_operand (op, stmt, NULL);
1668 else
1669 {
1670 vec_oprnd0 = gimple_call_arg (new_stmt, i);
1671 vec_oprnd0
1672 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
1673 }
1674
1675 VEC_quick_push (tree, vargs, vec_oprnd0);
1676 }
1677
1678 new_stmt = gimple_build_call_vec (fndecl, vargs);
1679 new_temp = make_ssa_name (vec_dest, new_stmt);
1680 gimple_call_set_lhs (new_stmt, new_temp);
1681
1682 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1683 mark_symbols_for_renaming (new_stmt);
1684
1685 if (j == 0)
1686 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
1687 else
1688 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1689
1690 prev_stmt_info = vinfo_for_stmt (new_stmt);
1691 }
1692
1693 break;
1694
1695 case NARROW:
1696 for (j = 0; j < ncopies; ++j)
1697 {
1698 /* Build argument list for the vectorized call. */
1699 if (j == 0)
1700 vargs = VEC_alloc (tree, heap, nargs * 2);
1701 else
1702 VEC_truncate (tree, vargs, 0);
1703
1704 for (i = 0; i < nargs; i++)
1705 {
1706 op = gimple_call_arg (stmt, i);
1707 if (j == 0)
1708 {
1709 vec_oprnd0
1710 = vect_get_vec_def_for_operand (op, stmt, NULL);
1711 vec_oprnd1
1712 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
1713 }
1714 else
1715 {
1716 vec_oprnd1 = gimple_call_arg (new_stmt, 2*i + 1);
1717 vec_oprnd0
1718 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd1);
1719 vec_oprnd1
1720 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
1721 }
1722
1723 VEC_quick_push (tree, vargs, vec_oprnd0);
1724 VEC_quick_push (tree, vargs, vec_oprnd1);
1725 }
1726
1727 new_stmt = gimple_build_call_vec (fndecl, vargs);
1728 new_temp = make_ssa_name (vec_dest, new_stmt);
1729 gimple_call_set_lhs (new_stmt, new_temp);
1730
1731 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1732 mark_symbols_for_renaming (new_stmt);
1733
1734 if (j == 0)
1735 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
1736 else
1737 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1738
1739 prev_stmt_info = vinfo_for_stmt (new_stmt);
1740 }
1741
1742 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
1743
1744 break;
1745
1746 case WIDEN:
1747 /* No current target implements this case. */
1748 return false;
1749 }
1750
1751 VEC_free (tree, heap, vargs);
1752
1753 /* Update the exception handling table with the vector stmt if necessary. */
1754 if (maybe_clean_or_replace_eh_stmt (stmt, *vec_stmt))
1755 gimple_purge_dead_eh_edges (gimple_bb (stmt));
1756
1757 /* The call in STMT might prevent it from being removed in dce.
1758 We however cannot remove it here, due to the way the ssa name
1759 it defines is mapped to the new definition. So just replace
1760 rhs of the statement with something harmless. */
1761
1762 type = TREE_TYPE (scalar_dest);
1763 if (is_pattern_stmt_p (stmt_info))
1764 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
1765 else
1766 lhs = gimple_call_lhs (stmt);
1767 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
1768 set_vinfo_for_stmt (new_stmt, stmt_info);
1769 set_vinfo_for_stmt (stmt, NULL);
1770 STMT_VINFO_STMT (stmt_info) = new_stmt;
1771 gsi_replace (gsi, new_stmt, false);
1772 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt;
1773
1774 return true;
1775 }
1776
1777
1778 /* Function vect_gen_widened_results_half
1779
1780 Create a vector stmt whose code, type, number of arguments, and result
1781 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
1782 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
1783 In the case that CODE is a CALL_EXPR, this means that a call to DECL
1784 needs to be created (DECL is a function-decl of a target-builtin).
1785 STMT is the original scalar stmt that we are vectorizing. */
1786
1787 static gimple
1788 vect_gen_widened_results_half (enum tree_code code,
1789 tree decl,
1790 tree vec_oprnd0, tree vec_oprnd1, int op_type,
1791 tree vec_dest, gimple_stmt_iterator *gsi,
1792 gimple stmt)
1793 {
1794 gimple new_stmt;
1795 tree new_temp;
1796
1797 /* Generate half of the widened result: */
1798 if (code == CALL_EXPR)
1799 {
1800 /* Target specific support */
1801 if (op_type == binary_op)
1802 new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1);
1803 else
1804 new_stmt = gimple_build_call (decl, 1, vec_oprnd0);
1805 new_temp = make_ssa_name (vec_dest, new_stmt);
1806 gimple_call_set_lhs (new_stmt, new_temp);
1807 }
1808 else
1809 {
1810 /* Generic support */
1811 gcc_assert (op_type == TREE_CODE_LENGTH (code));
1812 if (op_type != binary_op)
1813 vec_oprnd1 = NULL;
1814 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vec_oprnd0,
1815 vec_oprnd1);
1816 new_temp = make_ssa_name (vec_dest, new_stmt);
1817 gimple_assign_set_lhs (new_stmt, new_temp);
1818 }
1819 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1820
1821 return new_stmt;
1822 }
1823
1824
1825 /* Check if STMT performs a conversion operation, that can be vectorized.
1826 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1827 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1828 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1829
1830 static bool
1831 vectorizable_conversion (gimple stmt, gimple_stmt_iterator *gsi,
1832 gimple *vec_stmt, slp_tree slp_node)
1833 {
1834 tree vec_dest;
1835 tree scalar_dest;
1836 tree op0;
1837 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
1838 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1839 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1840 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
1841 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
1842 tree new_temp;
1843 tree def;
1844 gimple def_stmt;
1845 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
1846 gimple new_stmt = NULL;
1847 stmt_vec_info prev_stmt_info;
1848 int nunits_in;
1849 int nunits_out;
1850 tree vectype_out, vectype_in;
1851 int ncopies, j;
1852 tree rhs_type;
1853 tree builtin_decl;
1854 enum { NARROW, NONE, WIDEN } modifier;
1855 int i;
1856 VEC(tree,heap) *vec_oprnds0 = NULL;
1857 tree vop0;
1858 VEC(tree,heap) *dummy = NULL;
1859 int dummy_int;
1860
1861 /* Is STMT a vectorizable conversion? */
1862
1863 /* FORNOW: unsupported in basic block SLP. */
1864 gcc_assert (loop_vinfo);
1865
1866 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1867 return false;
1868
1869 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
1870 return false;
1871
1872 if (!is_gimple_assign (stmt))
1873 return false;
1874
1875 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
1876 return false;
1877
1878 code = gimple_assign_rhs_code (stmt);
1879 if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
1880 return false;
1881
1882 /* Check types of lhs and rhs. */
1883 scalar_dest = gimple_assign_lhs (stmt);
1884 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
1885
1886 op0 = gimple_assign_rhs1 (stmt);
1887 rhs_type = TREE_TYPE (op0);
1888 /* Check the operands of the operation. */
1889 if (!vect_is_simple_use_1 (op0, loop_vinfo, NULL,
1890 &def_stmt, &def, &dt[0], &vectype_in))
1891 {
1892 if (vect_print_dump_info (REPORT_DETAILS))
1893 fprintf (vect_dump, "use not simple.");
1894 return false;
1895 }
1896 /* If op0 is an external or constant defs use a vector type of
1897 the same size as the output vector type. */
1898 if (!vectype_in)
1899 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
1900 if (vec_stmt)
1901 gcc_assert (vectype_in);
1902 if (!vectype_in)
1903 {
1904 if (vect_print_dump_info (REPORT_DETAILS))
1905 {
1906 fprintf (vect_dump, "no vectype for scalar type ");
1907 print_generic_expr (vect_dump, rhs_type, TDF_SLIM);
1908 }
1909
1910 return false;
1911 }
1912
1913 /* FORNOW */
1914 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
1915 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
1916 if (nunits_in == nunits_out / 2)
1917 modifier = NARROW;
1918 else if (nunits_out == nunits_in)
1919 modifier = NONE;
1920 else if (nunits_out == nunits_in / 2)
1921 modifier = WIDEN;
1922 else
1923 return false;
1924
1925 if (modifier == NARROW)
1926 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
1927 else
1928 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
1929
1930 /* Multiple types in SLP are handled by creating the appropriate number of
1931 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
1932 case of SLP. */
1933 if (slp_node || PURE_SLP_STMT (stmt_info))
1934 ncopies = 1;
1935
1936 /* Sanity check: make sure that at least one copy of the vectorized stmt
1937 needs to be generated. */
1938 gcc_assert (ncopies >= 1);
1939
1940 /* Supportable by target? */
1941 if ((modifier == NONE
1942 && !targetm.vectorize.builtin_conversion (code, vectype_out, vectype_in))
1943 || (modifier == WIDEN
1944 && !supportable_widening_operation (code, stmt,
1945 vectype_out, vectype_in,
1946 &decl1, &decl2,
1947 &code1, &code2,
1948 &dummy_int, &dummy))
1949 || (modifier == NARROW
1950 && !supportable_narrowing_operation (code, vectype_out, vectype_in,
1951 &code1, &dummy_int, &dummy)))
1952 {
1953 if (vect_print_dump_info (REPORT_DETAILS))
1954 fprintf (vect_dump, "conversion not supported by target.");
1955 return false;
1956 }
1957
1958 if (modifier != NONE)
1959 {
1960 /* FORNOW: SLP not supported. */
1961 if (STMT_SLP_TYPE (stmt_info))
1962 return false;
1963 }
1964
1965 if (!vec_stmt) /* transformation not required. */
1966 {
1967 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
1968 return true;
1969 }
1970
1971 /** Transform. **/
1972 if (vect_print_dump_info (REPORT_DETAILS))
1973 fprintf (vect_dump, "transform conversion.");
1974
1975 /* Handle def. */
1976 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
1977
1978 if (modifier == NONE && !slp_node)
1979 vec_oprnds0 = VEC_alloc (tree, heap, 1);
1980
1981 prev_stmt_info = NULL;
1982 switch (modifier)
1983 {
1984 case NONE:
1985 for (j = 0; j < ncopies; j++)
1986 {
1987 if (j == 0)
1988 vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node);
1989 else
1990 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL);
1991
1992 builtin_decl =
1993 targetm.vectorize.builtin_conversion (code,
1994 vectype_out, vectype_in);
1995 FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vop0)
1996 {
1997 /* Arguments are ready. create the new vector stmt. */
1998 new_stmt = gimple_build_call (builtin_decl, 1, vop0);
1999 new_temp = make_ssa_name (vec_dest, new_stmt);
2000 gimple_call_set_lhs (new_stmt, new_temp);
2001 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2002 if (slp_node)
2003 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
2004 }
2005
2006 if (j == 0)
2007 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2008 else
2009 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2010 prev_stmt_info = vinfo_for_stmt (new_stmt);
2011 }
2012 break;
2013
2014 case WIDEN:
2015 /* In case the vectorization factor (VF) is bigger than the number
2016 of elements that we can fit in a vectype (nunits), we have to
2017 generate more than one vector stmt - i.e - we need to "unroll"
2018 the vector stmt by a factor VF/nunits. */
2019 for (j = 0; j < ncopies; j++)
2020 {
2021 if (j == 0)
2022 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
2023 else
2024 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
2025
2026 /* Generate first half of the widened result: */
2027 new_stmt
2028 = vect_gen_widened_results_half (code1, decl1,
2029 vec_oprnd0, vec_oprnd1,
2030 unary_op, vec_dest, gsi, stmt);
2031 if (j == 0)
2032 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2033 else
2034 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2035 prev_stmt_info = vinfo_for_stmt (new_stmt);
2036
2037 /* Generate second half of the widened result: */
2038 new_stmt
2039 = vect_gen_widened_results_half (code2, decl2,
2040 vec_oprnd0, vec_oprnd1,
2041 unary_op, vec_dest, gsi, stmt);
2042 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2043 prev_stmt_info = vinfo_for_stmt (new_stmt);
2044 }
2045 break;
2046
2047 case NARROW:
2048 /* In case the vectorization factor (VF) is bigger than the number
2049 of elements that we can fit in a vectype (nunits), we have to
2050 generate more than one vector stmt - i.e - we need to "unroll"
2051 the vector stmt by a factor VF/nunits. */
2052 for (j = 0; j < ncopies; j++)
2053 {
2054 /* Handle uses. */
2055 if (j == 0)
2056 {
2057 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
2058 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
2059 }
2060 else
2061 {
2062 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd1);
2063 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
2064 }
2065
2066 /* Arguments are ready. Create the new vector stmt. */
2067 new_stmt = gimple_build_assign_with_ops (code1, vec_dest, vec_oprnd0,
2068 vec_oprnd1);
2069 new_temp = make_ssa_name (vec_dest, new_stmt);
2070 gimple_assign_set_lhs (new_stmt, new_temp);
2071 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2072
2073 if (j == 0)
2074 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2075 else
2076 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2077
2078 prev_stmt_info = vinfo_for_stmt (new_stmt);
2079 }
2080
2081 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2082 }
2083
2084 if (vec_oprnds0)
2085 VEC_free (tree, heap, vec_oprnds0);
2086
2087 return true;
2088 }
2089
2090
2091 /* Function vectorizable_assignment.
2092
2093 Check if STMT performs an assignment (copy) that can be vectorized.
2094 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2095 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2096 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2097
2098 static bool
2099 vectorizable_assignment (gimple stmt, gimple_stmt_iterator *gsi,
2100 gimple *vec_stmt, slp_tree slp_node)
2101 {
2102 tree vec_dest;
2103 tree scalar_dest;
2104 tree op;
2105 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2106 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2107 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2108 tree new_temp;
2109 tree def;
2110 gimple def_stmt;
2111 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
2112 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
2113 int ncopies;
2114 int i, j;
2115 VEC(tree,heap) *vec_oprnds = NULL;
2116 tree vop;
2117 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2118 gimple new_stmt = NULL;
2119 stmt_vec_info prev_stmt_info = NULL;
2120 enum tree_code code;
2121 tree vectype_in;
2122
2123 /* Multiple types in SLP are handled by creating the appropriate number of
2124 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2125 case of SLP. */
2126 if (slp_node || PURE_SLP_STMT (stmt_info))
2127 ncopies = 1;
2128 else
2129 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
2130
2131 gcc_assert (ncopies >= 1);
2132
2133 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2134 return false;
2135
2136 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2137 return false;
2138
2139 /* Is vectorizable assignment? */
2140 if (!is_gimple_assign (stmt))
2141 return false;
2142
2143 scalar_dest = gimple_assign_lhs (stmt);
2144 if (TREE_CODE (scalar_dest) != SSA_NAME)
2145 return false;
2146
2147 code = gimple_assign_rhs_code (stmt);
2148 if (gimple_assign_single_p (stmt)
2149 || code == PAREN_EXPR
2150 || CONVERT_EXPR_CODE_P (code))
2151 op = gimple_assign_rhs1 (stmt);
2152 else
2153 return false;
2154
2155 if (code == VIEW_CONVERT_EXPR)
2156 op = TREE_OPERAND (op, 0);
2157
2158 if (!vect_is_simple_use_1 (op, loop_vinfo, bb_vinfo,
2159 &def_stmt, &def, &dt[0], &vectype_in))
2160 {
2161 if (vect_print_dump_info (REPORT_DETAILS))
2162 fprintf (vect_dump, "use not simple.");
2163 return false;
2164 }
2165
2166 /* We can handle NOP_EXPR conversions that do not change the number
2167 of elements or the vector size. */
2168 if ((CONVERT_EXPR_CODE_P (code)
2169 || code == VIEW_CONVERT_EXPR)
2170 && (!vectype_in
2171 || TYPE_VECTOR_SUBPARTS (vectype_in) != nunits
2172 || (GET_MODE_SIZE (TYPE_MODE (vectype))
2173 != GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
2174 return false;
2175
2176 if (!vec_stmt) /* transformation not required. */
2177 {
2178 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
2179 if (vect_print_dump_info (REPORT_DETAILS))
2180 fprintf (vect_dump, "=== vectorizable_assignment ===");
2181 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
2182 return true;
2183 }
2184
2185 /** Transform. **/
2186 if (vect_print_dump_info (REPORT_DETAILS))
2187 fprintf (vect_dump, "transform assignment.");
2188
2189 /* Handle def. */
2190 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2191
2192 /* Handle use. */
2193 for (j = 0; j < ncopies; j++)
2194 {
2195 /* Handle uses. */
2196 if (j == 0)
2197 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node);
2198 else
2199 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
2200
2201 /* Arguments are ready. create the new vector stmt. */
2202 FOR_EACH_VEC_ELT (tree, vec_oprnds, i, vop)
2203 {
2204 if (CONVERT_EXPR_CODE_P (code)
2205 || code == VIEW_CONVERT_EXPR)
2206 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
2207 new_stmt = gimple_build_assign (vec_dest, vop);
2208 new_temp = make_ssa_name (vec_dest, new_stmt);
2209 gimple_assign_set_lhs (new_stmt, new_temp);
2210 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2211 if (slp_node)
2212 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
2213 }
2214
2215 if (slp_node)
2216 continue;
2217
2218 if (j == 0)
2219 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2220 else
2221 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2222
2223 prev_stmt_info = vinfo_for_stmt (new_stmt);
2224 }
2225
2226 VEC_free (tree, heap, vec_oprnds);
2227 return true;
2228 }
2229
2230
2231 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
2232 either as shift by a scalar or by a vector. */
2233
2234 bool
2235 vect_supportable_shift (enum tree_code code, tree scalar_type)
2236 {
2237
2238 enum machine_mode vec_mode;
2239 optab optab;
2240 int icode;
2241 tree vectype;
2242
2243 vectype = get_vectype_for_scalar_type (scalar_type);
2244 if (!vectype)
2245 return false;
2246
2247 optab = optab_for_tree_code (code, vectype, optab_scalar);
2248 if (!optab
2249 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
2250 {
2251 optab = optab_for_tree_code (code, vectype, optab_vector);
2252 if (!optab
2253 || (optab_handler (optab, TYPE_MODE (vectype))
2254 == CODE_FOR_nothing))
2255 return false;
2256 }
2257
2258 vec_mode = TYPE_MODE (vectype);
2259 icode = (int) optab_handler (optab, vec_mode);
2260 if (icode == CODE_FOR_nothing)
2261 return false;
2262
2263 return true;
2264 }
2265
2266
2267 /* Function vectorizable_shift.
2268
2269 Check if STMT performs a shift operation that can be vectorized.
2270 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2271 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2272 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2273
2274 static bool
2275 vectorizable_shift (gimple stmt, gimple_stmt_iterator *gsi,
2276 gimple *vec_stmt, slp_tree slp_node)
2277 {
2278 tree vec_dest;
2279 tree scalar_dest;
2280 tree op0, op1 = NULL;
2281 tree vec_oprnd1 = NULL_TREE;
2282 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2283 tree vectype;
2284 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2285 enum tree_code code;
2286 enum machine_mode vec_mode;
2287 tree new_temp;
2288 optab optab;
2289 int icode;
2290 enum machine_mode optab_op2_mode;
2291 tree def;
2292 gimple def_stmt;
2293 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
2294 gimple new_stmt = NULL;
2295 stmt_vec_info prev_stmt_info;
2296 int nunits_in;
2297 int nunits_out;
2298 tree vectype_out;
2299 int ncopies;
2300 int j, i;
2301 VEC (tree, heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL;
2302 tree vop0, vop1;
2303 unsigned int k;
2304 bool scalar_shift_arg = true;
2305 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2306 int vf;
2307
2308 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2309 return false;
2310
2311 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2312 return false;
2313
2314 /* Is STMT a vectorizable binary/unary operation? */
2315 if (!is_gimple_assign (stmt))
2316 return false;
2317
2318 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
2319 return false;
2320
2321 code = gimple_assign_rhs_code (stmt);
2322
2323 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
2324 || code == RROTATE_EXPR))
2325 return false;
2326
2327 scalar_dest = gimple_assign_lhs (stmt);
2328 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
2329
2330 op0 = gimple_assign_rhs1 (stmt);
2331 if (!vect_is_simple_use_1 (op0, loop_vinfo, bb_vinfo,
2332 &def_stmt, &def, &dt[0], &vectype))
2333 {
2334 if (vect_print_dump_info (REPORT_DETAILS))
2335 fprintf (vect_dump, "use not simple.");
2336 return false;
2337 }
2338 /* If op0 is an external or constant def use a vector type with
2339 the same size as the output vector type. */
2340 if (!vectype)
2341 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
2342 if (vec_stmt)
2343 gcc_assert (vectype);
2344 if (!vectype)
2345 {
2346 if (vect_print_dump_info (REPORT_DETAILS))
2347 {
2348 fprintf (vect_dump, "no vectype for scalar type ");
2349 print_generic_expr (vect_dump, TREE_TYPE (op0), TDF_SLIM);
2350 }
2351
2352 return false;
2353 }
2354
2355 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2356 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
2357 if (nunits_out != nunits_in)
2358 return false;
2359
2360 op1 = gimple_assign_rhs2 (stmt);
2361 if (!vect_is_simple_use (op1, loop_vinfo, bb_vinfo, &def_stmt, &def, &dt[1]))
2362 {
2363 if (vect_print_dump_info (REPORT_DETAILS))
2364 fprintf (vect_dump, "use not simple.");
2365 return false;
2366 }
2367
2368 if (loop_vinfo)
2369 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2370 else
2371 vf = 1;
2372
2373 /* Multiple types in SLP are handled by creating the appropriate number of
2374 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2375 case of SLP. */
2376 if (slp_node || PURE_SLP_STMT (stmt_info))
2377 ncopies = 1;
2378 else
2379 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
2380
2381 gcc_assert (ncopies >= 1);
2382
2383 /* Determine whether the shift amount is a vector, or scalar. If the
2384 shift/rotate amount is a vector, use the vector/vector shift optabs. */
2385
2386 if (dt[1] == vect_internal_def && !slp_node)
2387 scalar_shift_arg = false;
2388 else if (dt[1] == vect_constant_def
2389 || dt[1] == vect_external_def
2390 || dt[1] == vect_internal_def)
2391 {
2392 /* In SLP, need to check whether the shift count is the same,
2393 in loops if it is a constant or invariant, it is always
2394 a scalar shift. */
2395 if (slp_node)
2396 {
2397 VEC (gimple, heap) *stmts = SLP_TREE_SCALAR_STMTS (slp_node);
2398 gimple slpstmt;
2399
2400 FOR_EACH_VEC_ELT (gimple, stmts, k, slpstmt)
2401 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
2402 scalar_shift_arg = false;
2403 }
2404 }
2405 else
2406 {
2407 if (vect_print_dump_info (REPORT_DETAILS))
2408 fprintf (vect_dump, "operand mode requires invariant argument.");
2409 return false;
2410 }
2411
2412 /* Vector shifted by vector. */
2413 if (!scalar_shift_arg)
2414 {
2415 optab = optab_for_tree_code (code, vectype, optab_vector);
2416 if (vect_print_dump_info (REPORT_DETAILS))
2417 fprintf (vect_dump, "vector/vector shift/rotate found.");
2418 }
2419 /* See if the machine has a vector shifted by scalar insn and if not
2420 then see if it has a vector shifted by vector insn. */
2421 else
2422 {
2423 optab = optab_for_tree_code (code, vectype, optab_scalar);
2424 if (optab
2425 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
2426 {
2427 if (vect_print_dump_info (REPORT_DETAILS))
2428 fprintf (vect_dump, "vector/scalar shift/rotate found.");
2429 }
2430 else
2431 {
2432 optab = optab_for_tree_code (code, vectype, optab_vector);
2433 if (optab
2434 && (optab_handler (optab, TYPE_MODE (vectype))
2435 != CODE_FOR_nothing))
2436 {
2437 scalar_shift_arg = false;
2438
2439 if (vect_print_dump_info (REPORT_DETAILS))
2440 fprintf (vect_dump, "vector/vector shift/rotate found.");
2441
2442 /* Unlike the other binary operators, shifts/rotates have
2443 the rhs being int, instead of the same type as the lhs,
2444 so make sure the scalar is the right type if we are
2445 dealing with vectors of short/char. */
2446 if (dt[1] == vect_constant_def)
2447 op1 = fold_convert (TREE_TYPE (vectype), op1);
2448 }
2449 }
2450 }
2451
2452 /* Supportable by target? */
2453 if (!optab)
2454 {
2455 if (vect_print_dump_info (REPORT_DETAILS))
2456 fprintf (vect_dump, "no optab.");
2457 return false;
2458 }
2459 vec_mode = TYPE_MODE (vectype);
2460 icode = (int) optab_handler (optab, vec_mode);
2461 if (icode == CODE_FOR_nothing)
2462 {
2463 if (vect_print_dump_info (REPORT_DETAILS))
2464 fprintf (vect_dump, "op not supported by target.");
2465 /* Check only during analysis. */
2466 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
2467 || (vf < vect_min_worthwhile_factor (code)
2468 && !vec_stmt))
2469 return false;
2470 if (vect_print_dump_info (REPORT_DETAILS))
2471 fprintf (vect_dump, "proceeding using word mode.");
2472 }
2473
2474 /* Worthwhile without SIMD support? Check only during analysis. */
2475 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
2476 && vf < vect_min_worthwhile_factor (code)
2477 && !vec_stmt)
2478 {
2479 if (vect_print_dump_info (REPORT_DETAILS))
2480 fprintf (vect_dump, "not worthwhile without SIMD support.");
2481 return false;
2482 }
2483
2484 if (!vec_stmt) /* transformation not required. */
2485 {
2486 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
2487 if (vect_print_dump_info (REPORT_DETAILS))
2488 fprintf (vect_dump, "=== vectorizable_shift ===");
2489 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
2490 return true;
2491 }
2492
2493 /** Transform. **/
2494
2495 if (vect_print_dump_info (REPORT_DETAILS))
2496 fprintf (vect_dump, "transform binary/unary operation.");
2497
2498 /* Handle def. */
2499 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2500
2501 /* Allocate VECs for vector operands. In case of SLP, vector operands are
2502 created in the previous stages of the recursion, so no allocation is
2503 needed, except for the case of shift with scalar shift argument. In that
2504 case we store the scalar operand in VEC_OPRNDS1 for every vector stmt to
2505 be created to vectorize the SLP group, i.e., SLP_NODE->VEC_STMTS_SIZE.
2506 In case of loop-based vectorization we allocate VECs of size 1. We
2507 allocate VEC_OPRNDS1 only in case of binary operation. */
2508 if (!slp_node)
2509 {
2510 vec_oprnds0 = VEC_alloc (tree, heap, 1);
2511 vec_oprnds1 = VEC_alloc (tree, heap, 1);
2512 }
2513 else if (scalar_shift_arg)
2514 vec_oprnds1 = VEC_alloc (tree, heap, slp_node->vec_stmts_size);
2515
2516 prev_stmt_info = NULL;
2517 for (j = 0; j < ncopies; j++)
2518 {
2519 /* Handle uses. */
2520 if (j == 0)
2521 {
2522 if (scalar_shift_arg)
2523 {
2524 /* Vector shl and shr insn patterns can be defined with scalar
2525 operand 2 (shift operand). In this case, use constant or loop
2526 invariant op1 directly, without extending it to vector mode
2527 first. */
2528 optab_op2_mode = insn_data[icode].operand[2].mode;
2529 if (!VECTOR_MODE_P (optab_op2_mode))
2530 {
2531 if (vect_print_dump_info (REPORT_DETAILS))
2532 fprintf (vect_dump, "operand 1 using scalar mode.");
2533 vec_oprnd1 = op1;
2534 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
2535 if (slp_node)
2536 {
2537 /* Store vec_oprnd1 for every vector stmt to be created
2538 for SLP_NODE. We check during the analysis that all
2539 the shift arguments are the same.
2540 TODO: Allow different constants for different vector
2541 stmts generated for an SLP instance. */
2542 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
2543 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
2544 }
2545 }
2546 }
2547
2548 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
2549 (a special case for certain kind of vector shifts); otherwise,
2550 operand 1 should be of a vector type (the usual case). */
2551 if (vec_oprnd1)
2552 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
2553 slp_node);
2554 else
2555 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
2556 slp_node);
2557 }
2558 else
2559 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
2560
2561 /* Arguments are ready. Create the new vector stmt. */
2562 FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vop0)
2563 {
2564 vop1 = VEC_index (tree, vec_oprnds1, i);
2565 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vop0, vop1);
2566 new_temp = make_ssa_name (vec_dest, new_stmt);
2567 gimple_assign_set_lhs (new_stmt, new_temp);
2568 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2569 if (slp_node)
2570 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
2571 }
2572
2573 if (slp_node)
2574 continue;
2575
2576 if (j == 0)
2577 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2578 else
2579 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2580 prev_stmt_info = vinfo_for_stmt (new_stmt);
2581 }
2582
2583 VEC_free (tree, heap, vec_oprnds0);
2584 VEC_free (tree, heap, vec_oprnds1);
2585
2586 return true;
2587 }
2588
2589
2590 /* Function vectorizable_operation.
2591
2592 Check if STMT performs a binary, unary or ternary operation that can
2593 be vectorized.
2594 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2595 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2596 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2597
2598 static bool
2599 vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
2600 gimple *vec_stmt, slp_tree slp_node)
2601 {
2602 tree vec_dest;
2603 tree scalar_dest;
2604 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
2605 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2606 tree vectype;
2607 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2608 enum tree_code code;
2609 enum machine_mode vec_mode;
2610 tree new_temp;
2611 int op_type;
2612 optab optab;
2613 int icode;
2614 tree def;
2615 gimple def_stmt;
2616 enum vect_def_type dt[3]
2617 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
2618 gimple new_stmt = NULL;
2619 stmt_vec_info prev_stmt_info;
2620 int nunits_in;
2621 int nunits_out;
2622 tree vectype_out;
2623 int ncopies;
2624 int j, i;
2625 VEC(tree,heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL, *vec_oprnds2 = NULL;
2626 tree vop0, vop1, vop2;
2627 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2628 int vf;
2629
2630 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2631 return false;
2632
2633 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2634 return false;
2635
2636 /* Is STMT a vectorizable binary/unary operation? */
2637 if (!is_gimple_assign (stmt))
2638 return false;
2639
2640 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
2641 return false;
2642
2643 code = gimple_assign_rhs_code (stmt);
2644
2645 /* For pointer addition, we should use the normal plus for
2646 the vector addition. */
2647 if (code == POINTER_PLUS_EXPR)
2648 code = PLUS_EXPR;
2649
2650 /* Support only unary or binary operations. */
2651 op_type = TREE_CODE_LENGTH (code);
2652 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
2653 {
2654 if (vect_print_dump_info (REPORT_DETAILS))
2655 fprintf (vect_dump, "num. args = %d (not unary/binary/ternary op).",
2656 op_type);
2657 return false;
2658 }
2659
2660 scalar_dest = gimple_assign_lhs (stmt);
2661 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
2662
2663 op0 = gimple_assign_rhs1 (stmt);
2664 if (!vect_is_simple_use_1 (op0, loop_vinfo, bb_vinfo,
2665 &def_stmt, &def, &dt[0], &vectype))
2666 {
2667 if (vect_print_dump_info (REPORT_DETAILS))
2668 fprintf (vect_dump, "use not simple.");
2669 return false;
2670 }
2671 /* If op0 is an external or constant def use a vector type with
2672 the same size as the output vector type. */
2673 if (!vectype)
2674 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
2675 if (vec_stmt)
2676 gcc_assert (vectype);
2677 if (!vectype)
2678 {
2679 if (vect_print_dump_info (REPORT_DETAILS))
2680 {
2681 fprintf (vect_dump, "no vectype for scalar type ");
2682 print_generic_expr (vect_dump, TREE_TYPE (op0), TDF_SLIM);
2683 }
2684
2685 return false;
2686 }
2687
2688 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2689 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
2690 if (nunits_out != nunits_in)
2691 return false;
2692
2693 if (op_type == binary_op || op_type == ternary_op)
2694 {
2695 op1 = gimple_assign_rhs2 (stmt);
2696 if (!vect_is_simple_use (op1, loop_vinfo, bb_vinfo, &def_stmt, &def,
2697 &dt[1]))
2698 {
2699 if (vect_print_dump_info (REPORT_DETAILS))
2700 fprintf (vect_dump, "use not simple.");
2701 return false;
2702 }
2703 }
2704 if (op_type == ternary_op)
2705 {
2706 op2 = gimple_assign_rhs3 (stmt);
2707 if (!vect_is_simple_use (op2, loop_vinfo, bb_vinfo, &def_stmt, &def,
2708 &dt[2]))
2709 {
2710 if (vect_print_dump_info (REPORT_DETAILS))
2711 fprintf (vect_dump, "use not simple.");
2712 return false;
2713 }
2714 }
2715
2716 if (loop_vinfo)
2717 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2718 else
2719 vf = 1;
2720
2721 /* Multiple types in SLP are handled by creating the appropriate number of
2722 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2723 case of SLP. */
2724 if (slp_node || PURE_SLP_STMT (stmt_info))
2725 ncopies = 1;
2726 else
2727 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
2728
2729 gcc_assert (ncopies >= 1);
2730
2731 /* Shifts are handled in vectorizable_shift (). */
2732 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
2733 || code == RROTATE_EXPR)
2734 return false;
2735
2736 optab = optab_for_tree_code (code, vectype, optab_default);
2737
2738 /* Supportable by target? */
2739 if (!optab)
2740 {
2741 if (vect_print_dump_info (REPORT_DETAILS))
2742 fprintf (vect_dump, "no optab.");
2743 return false;
2744 }
2745 vec_mode = TYPE_MODE (vectype);
2746 icode = (int) optab_handler (optab, vec_mode);
2747 if (icode == CODE_FOR_nothing)
2748 {
2749 if (vect_print_dump_info (REPORT_DETAILS))
2750 fprintf (vect_dump, "op not supported by target.");
2751 /* Check only during analysis. */
2752 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
2753 || (vf < vect_min_worthwhile_factor (code)
2754 && !vec_stmt))
2755 return false;
2756 if (vect_print_dump_info (REPORT_DETAILS))
2757 fprintf (vect_dump, "proceeding using word mode.");
2758 }
2759
2760 /* Worthwhile without SIMD support? Check only during analysis. */
2761 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
2762 && vf < vect_min_worthwhile_factor (code)
2763 && !vec_stmt)
2764 {
2765 if (vect_print_dump_info (REPORT_DETAILS))
2766 fprintf (vect_dump, "not worthwhile without SIMD support.");
2767 return false;
2768 }
2769
2770 if (!vec_stmt) /* transformation not required. */
2771 {
2772 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
2773 if (vect_print_dump_info (REPORT_DETAILS))
2774 fprintf (vect_dump, "=== vectorizable_operation ===");
2775 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
2776 return true;
2777 }
2778
2779 /** Transform. **/
2780
2781 if (vect_print_dump_info (REPORT_DETAILS))
2782 fprintf (vect_dump, "transform binary/unary operation.");
2783
2784 /* Handle def. */
2785 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2786
2787 /* Allocate VECs for vector operands. In case of SLP, vector operands are
2788 created in the previous stages of the recursion, so no allocation is
2789 needed, except for the case of shift with scalar shift argument. In that
2790 case we store the scalar operand in VEC_OPRNDS1 for every vector stmt to
2791 be created to vectorize the SLP group, i.e., SLP_NODE->VEC_STMTS_SIZE.
2792 In case of loop-based vectorization we allocate VECs of size 1. We
2793 allocate VEC_OPRNDS1 only in case of binary operation. */
2794 if (!slp_node)
2795 {
2796 vec_oprnds0 = VEC_alloc (tree, heap, 1);
2797 if (op_type == binary_op || op_type == ternary_op)
2798 vec_oprnds1 = VEC_alloc (tree, heap, 1);
2799 if (op_type == ternary_op)
2800 vec_oprnds2 = VEC_alloc (tree, heap, 1);
2801 }
2802
2803 /* In case the vectorization factor (VF) is bigger than the number
2804 of elements that we can fit in a vectype (nunits), we have to generate
2805 more than one vector stmt - i.e - we need to "unroll" the
2806 vector stmt by a factor VF/nunits. In doing so, we record a pointer
2807 from one copy of the vector stmt to the next, in the field
2808 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
2809 stages to find the correct vector defs to be used when vectorizing
2810 stmts that use the defs of the current stmt. The example below
2811 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
2812 we need to create 4 vectorized stmts):
2813
2814 before vectorization:
2815 RELATED_STMT VEC_STMT
2816 S1: x = memref - -
2817 S2: z = x + 1 - -
2818
2819 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
2820 there):
2821 RELATED_STMT VEC_STMT
2822 VS1_0: vx0 = memref0 VS1_1 -
2823 VS1_1: vx1 = memref1 VS1_2 -
2824 VS1_2: vx2 = memref2 VS1_3 -
2825 VS1_3: vx3 = memref3 - -
2826 S1: x = load - VS1_0
2827 S2: z = x + 1 - -
2828
2829 step2: vectorize stmt S2 (done here):
2830 To vectorize stmt S2 we first need to find the relevant vector
2831 def for the first operand 'x'. This is, as usual, obtained from
2832 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
2833 that defines 'x' (S1). This way we find the stmt VS1_0, and the
2834 relevant vector def 'vx0'. Having found 'vx0' we can generate
2835 the vector stmt VS2_0, and as usual, record it in the
2836 STMT_VINFO_VEC_STMT of stmt S2.
2837 When creating the second copy (VS2_1), we obtain the relevant vector
2838 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
2839 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
2840 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
2841 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
2842 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
2843 chain of stmts and pointers:
2844 RELATED_STMT VEC_STMT
2845 VS1_0: vx0 = memref0 VS1_1 -
2846 VS1_1: vx1 = memref1 VS1_2 -
2847 VS1_2: vx2 = memref2 VS1_3 -
2848 VS1_3: vx3 = memref3 - -
2849 S1: x = load - VS1_0
2850 VS2_0: vz0 = vx0 + v1 VS2_1 -
2851 VS2_1: vz1 = vx1 + v1 VS2_2 -
2852 VS2_2: vz2 = vx2 + v1 VS2_3 -
2853 VS2_3: vz3 = vx3 + v1 - -
2854 S2: z = x + 1 - VS2_0 */
2855
2856 prev_stmt_info = NULL;
2857 for (j = 0; j < ncopies; j++)
2858 {
2859 /* Handle uses. */
2860 if (j == 0)
2861 {
2862 if (op_type == binary_op || op_type == ternary_op)
2863 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
2864 slp_node);
2865 else
2866 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
2867 slp_node);
2868 if (op_type == ternary_op)
2869 {
2870 vec_oprnds2 = VEC_alloc (tree, heap, 1);
2871 VEC_quick_push (tree, vec_oprnds2,
2872 vect_get_vec_def_for_operand (op2, stmt, NULL));
2873 }
2874 }
2875 else
2876 {
2877 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
2878 if (op_type == ternary_op)
2879 {
2880 tree vec_oprnd = VEC_pop (tree, vec_oprnds2);
2881 VEC_quick_push (tree, vec_oprnds2,
2882 vect_get_vec_def_for_stmt_copy (dt[2],
2883 vec_oprnd));
2884 }
2885 }
2886
2887 /* Arguments are ready. Create the new vector stmt. */
2888 FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vop0)
2889 {
2890 vop1 = ((op_type == binary_op || op_type == ternary_op)
2891 ? VEC_index (tree, vec_oprnds1, i) : NULL_TREE);
2892 vop2 = ((op_type == ternary_op)
2893 ? VEC_index (tree, vec_oprnds2, i) : NULL_TREE);
2894 new_stmt = gimple_build_assign_with_ops3 (code, vec_dest,
2895 vop0, vop1, vop2);
2896 new_temp = make_ssa_name (vec_dest, new_stmt);
2897 gimple_assign_set_lhs (new_stmt, new_temp);
2898 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2899 if (slp_node)
2900 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
2901 }
2902
2903 if (slp_node)
2904 continue;
2905
2906 if (j == 0)
2907 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2908 else
2909 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2910 prev_stmt_info = vinfo_for_stmt (new_stmt);
2911 }
2912
2913 VEC_free (tree, heap, vec_oprnds0);
2914 if (vec_oprnds1)
2915 VEC_free (tree, heap, vec_oprnds1);
2916 if (vec_oprnds2)
2917 VEC_free (tree, heap, vec_oprnds2);
2918
2919 return true;
2920 }
2921
2922
2923 /* Get vectorized definitions for loop-based vectorization. For the first
2924 operand we call vect_get_vec_def_for_operand() (with OPRND containing
2925 scalar operand), and for the rest we get a copy with
2926 vect_get_vec_def_for_stmt_copy() using the previous vector definition
2927 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
2928 The vectors are collected into VEC_OPRNDS. */
2929
2930 static void
2931 vect_get_loop_based_defs (tree *oprnd, gimple stmt, enum vect_def_type dt,
2932 VEC (tree, heap) **vec_oprnds, int multi_step_cvt)
2933 {
2934 tree vec_oprnd;
2935
2936 /* Get first vector operand. */
2937 /* All the vector operands except the very first one (that is scalar oprnd)
2938 are stmt copies. */
2939 if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
2940 vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt, NULL);
2941 else
2942 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, *oprnd);
2943
2944 VEC_quick_push (tree, *vec_oprnds, vec_oprnd);
2945
2946 /* Get second vector operand. */
2947 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
2948 VEC_quick_push (tree, *vec_oprnds, vec_oprnd);
2949
2950 *oprnd = vec_oprnd;
2951
2952 /* For conversion in multiple steps, continue to get operands
2953 recursively. */
2954 if (multi_step_cvt)
2955 vect_get_loop_based_defs (oprnd, stmt, dt, vec_oprnds, multi_step_cvt - 1);
2956 }
2957
2958
2959 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
2960 For multi-step conversions store the resulting vectors and call the function
2961 recursively. */
2962
2963 static void
2964 vect_create_vectorized_demotion_stmts (VEC (tree, heap) **vec_oprnds,
2965 int multi_step_cvt, gimple stmt,
2966 VEC (tree, heap) *vec_dsts,
2967 gimple_stmt_iterator *gsi,
2968 slp_tree slp_node, enum tree_code code,
2969 stmt_vec_info *prev_stmt_info)
2970 {
2971 unsigned int i;
2972 tree vop0, vop1, new_tmp, vec_dest;
2973 gimple new_stmt;
2974 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2975
2976 vec_dest = VEC_pop (tree, vec_dsts);
2977
2978 for (i = 0; i < VEC_length (tree, *vec_oprnds); i += 2)
2979 {
2980 /* Create demotion operation. */
2981 vop0 = VEC_index (tree, *vec_oprnds, i);
2982 vop1 = VEC_index (tree, *vec_oprnds, i + 1);
2983 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vop0, vop1);
2984 new_tmp = make_ssa_name (vec_dest, new_stmt);
2985 gimple_assign_set_lhs (new_stmt, new_tmp);
2986 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2987
2988 if (multi_step_cvt)
2989 /* Store the resulting vector for next recursive call. */
2990 VEC_replace (tree, *vec_oprnds, i/2, new_tmp);
2991 else
2992 {
2993 /* This is the last step of the conversion sequence. Store the
2994 vectors in SLP_NODE or in vector info of the scalar statement
2995 (or in STMT_VINFO_RELATED_STMT chain). */
2996 if (slp_node)
2997 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
2998 else
2999 {
3000 if (!*prev_stmt_info)
3001 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3002 else
3003 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt;
3004
3005 *prev_stmt_info = vinfo_for_stmt (new_stmt);
3006 }
3007 }
3008 }
3009
3010 /* For multi-step demotion operations we first generate demotion operations
3011 from the source type to the intermediate types, and then combine the
3012 results (stored in VEC_OPRNDS) in demotion operation to the destination
3013 type. */
3014 if (multi_step_cvt)
3015 {
3016 /* At each level of recursion we have have of the operands we had at the
3017 previous level. */
3018 VEC_truncate (tree, *vec_oprnds, (i+1)/2);
3019 vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
3020 stmt, vec_dsts, gsi, slp_node,
3021 code, prev_stmt_info);
3022 }
3023 }
3024
3025
3026 /* Function vectorizable_type_demotion
3027
3028 Check if STMT performs a binary or unary operation that involves
3029 type demotion, and if it can be vectorized.
3030 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3031 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3032 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3033
3034 static bool
3035 vectorizable_type_demotion (gimple stmt, gimple_stmt_iterator *gsi,
3036 gimple *vec_stmt, slp_tree slp_node)
3037 {
3038 tree vec_dest;
3039 tree scalar_dest;
3040 tree op0;
3041 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3042 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3043 enum tree_code code, code1 = ERROR_MARK;
3044 tree def;
3045 gimple def_stmt;
3046 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
3047 stmt_vec_info prev_stmt_info;
3048 int nunits_in;
3049 int nunits_out;
3050 tree vectype_out;
3051 int ncopies;
3052 int j, i;
3053 tree vectype_in;
3054 int multi_step_cvt = 0;
3055 VEC (tree, heap) *vec_oprnds0 = NULL;
3056 VEC (tree, heap) *vec_dsts = NULL, *interm_types = NULL, *tmp_vec_dsts = NULL;
3057 tree last_oprnd, intermediate_type;
3058 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3059
3060 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3061 return false;
3062
3063 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
3064 return false;
3065
3066 /* Is STMT a vectorizable type-demotion operation? */
3067 if (!is_gimple_assign (stmt))
3068 return false;
3069
3070 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
3071 return false;
3072
3073 code = gimple_assign_rhs_code (stmt);
3074 if (!CONVERT_EXPR_CODE_P (code))
3075 return false;
3076
3077 scalar_dest = gimple_assign_lhs (stmt);
3078 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3079
3080 /* Check the operands of the operation. */
3081 op0 = gimple_assign_rhs1 (stmt);
3082 if (! ((INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
3083 && INTEGRAL_TYPE_P (TREE_TYPE (op0)))
3084 || (SCALAR_FLOAT_TYPE_P (TREE_TYPE (scalar_dest))
3085 && SCALAR_FLOAT_TYPE_P (TREE_TYPE (op0))
3086 && CONVERT_EXPR_CODE_P (code))))
3087 return false;
3088 if (!vect_is_simple_use_1 (op0, loop_vinfo, bb_vinfo,
3089 &def_stmt, &def, &dt[0], &vectype_in))
3090 {
3091 if (vect_print_dump_info (REPORT_DETAILS))
3092 fprintf (vect_dump, "use not simple.");
3093 return false;
3094 }
3095 /* If op0 is an external def use a vector type with the
3096 same size as the output vector type if possible. */
3097 if (!vectype_in)
3098 vectype_in = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
3099 if (vec_stmt)
3100 gcc_assert (vectype_in);
3101 if (!vectype_in)
3102 {
3103 if (vect_print_dump_info (REPORT_DETAILS))
3104 {
3105 fprintf (vect_dump, "no vectype for scalar type ");
3106 print_generic_expr (vect_dump, TREE_TYPE (op0), TDF_SLIM);
3107 }
3108
3109 return false;
3110 }
3111
3112 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3113 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3114 if (nunits_in >= nunits_out)
3115 return false;
3116
3117 /* Multiple types in SLP are handled by creating the appropriate number of
3118 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3119 case of SLP. */
3120 if (slp_node || PURE_SLP_STMT (stmt_info))
3121 ncopies = 1;
3122 else
3123 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
3124 gcc_assert (ncopies >= 1);
3125
3126 /* Supportable by target? */
3127 if (!supportable_narrowing_operation (code, vectype_out, vectype_in,
3128 &code1, &multi_step_cvt, &interm_types))
3129 return false;
3130
3131 if (!vec_stmt) /* transformation not required. */
3132 {
3133 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
3134 if (vect_print_dump_info (REPORT_DETAILS))
3135 fprintf (vect_dump, "=== vectorizable_demotion ===");
3136 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
3137 return true;
3138 }
3139
3140 /** Transform. **/
3141 if (vect_print_dump_info (REPORT_DETAILS))
3142 fprintf (vect_dump, "transform type demotion operation. ncopies = %d.",
3143 ncopies);
3144
3145 /* In case of multi-step demotion, we first generate demotion operations to
3146 the intermediate types, and then from that types to the final one.
3147 We create vector destinations for the intermediate type (TYPES) received
3148 from supportable_narrowing_operation, and store them in the correct order
3149 for future use in vect_create_vectorized_demotion_stmts(). */
3150 if (multi_step_cvt)
3151 vec_dsts = VEC_alloc (tree, heap, multi_step_cvt + 1);
3152 else
3153 vec_dsts = VEC_alloc (tree, heap, 1);
3154
3155 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
3156 VEC_quick_push (tree, vec_dsts, vec_dest);
3157
3158 if (multi_step_cvt)
3159 {
3160 for (i = VEC_length (tree, interm_types) - 1;
3161 VEC_iterate (tree, interm_types, i, intermediate_type); i--)
3162 {
3163 vec_dest = vect_create_destination_var (scalar_dest,
3164 intermediate_type);
3165 VEC_quick_push (tree, vec_dsts, vec_dest);
3166 }
3167 }
3168
3169 /* In case the vectorization factor (VF) is bigger than the number
3170 of elements that we can fit in a vectype (nunits), we have to generate
3171 more than one vector stmt - i.e - we need to "unroll" the
3172 vector stmt by a factor VF/nunits. */
3173 last_oprnd = op0;
3174 prev_stmt_info = NULL;
3175 for (j = 0; j < ncopies; j++)
3176 {
3177 /* Handle uses. */
3178 if (slp_node)
3179 vect_get_slp_defs (op0, NULL_TREE, slp_node, &vec_oprnds0, NULL, -1);
3180 else
3181 {
3182 VEC_free (tree, heap, vec_oprnds0);
3183 vec_oprnds0 = VEC_alloc (tree, heap,
3184 (multi_step_cvt ? vect_pow2 (multi_step_cvt) * 2 : 2));
3185 vect_get_loop_based_defs (&last_oprnd, stmt, dt[0], &vec_oprnds0,
3186 vect_pow2 (multi_step_cvt) - 1);
3187 }
3188
3189 /* Arguments are ready. Create the new vector stmts. */
3190 tmp_vec_dsts = VEC_copy (tree, heap, vec_dsts);
3191 vect_create_vectorized_demotion_stmts (&vec_oprnds0,
3192 multi_step_cvt, stmt, tmp_vec_dsts,
3193 gsi, slp_node, code1,
3194 &prev_stmt_info);
3195 }
3196
3197 VEC_free (tree, heap, vec_oprnds0);
3198 VEC_free (tree, heap, vec_dsts);
3199 VEC_free (tree, heap, tmp_vec_dsts);
3200 VEC_free (tree, heap, interm_types);
3201
3202 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
3203 return true;
3204 }
3205
3206
3207 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
3208 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
3209 the resulting vectors and call the function recursively. */
3210
3211 static void
3212 vect_create_vectorized_promotion_stmts (VEC (tree, heap) **vec_oprnds0,
3213 VEC (tree, heap) **vec_oprnds1,
3214 int multi_step_cvt, gimple stmt,
3215 VEC (tree, heap) *vec_dsts,
3216 gimple_stmt_iterator *gsi,
3217 slp_tree slp_node, enum tree_code code1,
3218 enum tree_code code2, tree decl1,
3219 tree decl2, int op_type,
3220 stmt_vec_info *prev_stmt_info)
3221 {
3222 int i;
3223 tree vop0, vop1, new_tmp1, new_tmp2, vec_dest;
3224 gimple new_stmt1, new_stmt2;
3225 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3226 VEC (tree, heap) *vec_tmp;
3227
3228 vec_dest = VEC_pop (tree, vec_dsts);
3229 vec_tmp = VEC_alloc (tree, heap, VEC_length (tree, *vec_oprnds0) * 2);
3230
3231 FOR_EACH_VEC_ELT (tree, *vec_oprnds0, i, vop0)
3232 {
3233 if (op_type == binary_op)
3234 vop1 = VEC_index (tree, *vec_oprnds1, i);
3235 else
3236 vop1 = NULL_TREE;
3237
3238 /* Generate the two halves of promotion operation. */
3239 new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1,
3240 op_type, vec_dest, gsi, stmt);
3241 new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1,
3242 op_type, vec_dest, gsi, stmt);
3243 if (is_gimple_call (new_stmt1))
3244 {
3245 new_tmp1 = gimple_call_lhs (new_stmt1);
3246 new_tmp2 = gimple_call_lhs (new_stmt2);
3247 }
3248 else
3249 {
3250 new_tmp1 = gimple_assign_lhs (new_stmt1);
3251 new_tmp2 = gimple_assign_lhs (new_stmt2);
3252 }
3253
3254 if (multi_step_cvt)
3255 {
3256 /* Store the results for the recursive call. */
3257 VEC_quick_push (tree, vec_tmp, new_tmp1);
3258 VEC_quick_push (tree, vec_tmp, new_tmp2);
3259 }
3260 else
3261 {
3262 /* Last step of promotion sequience - store the results. */
3263 if (slp_node)
3264 {
3265 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt1);
3266 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt2);
3267 }
3268 else
3269 {
3270 if (!*prev_stmt_info)
3271 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt1;
3272 else
3273 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt1;
3274
3275 *prev_stmt_info = vinfo_for_stmt (new_stmt1);
3276 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt2;
3277 *prev_stmt_info = vinfo_for_stmt (new_stmt2);
3278 }
3279 }
3280 }
3281
3282 if (multi_step_cvt)
3283 {
3284 /* For multi-step promotion operation we first generate we call the
3285 function recurcively for every stage. We start from the input type,
3286 create promotion operations to the intermediate types, and then
3287 create promotions to the output type. */
3288 *vec_oprnds0 = VEC_copy (tree, heap, vec_tmp);
3289 vect_create_vectorized_promotion_stmts (vec_oprnds0, vec_oprnds1,
3290 multi_step_cvt - 1, stmt,
3291 vec_dsts, gsi, slp_node, code1,
3292 code2, decl2, decl2, op_type,
3293 prev_stmt_info);
3294 }
3295
3296 VEC_free (tree, heap, vec_tmp);
3297 }
3298
3299
3300 /* Function vectorizable_type_promotion
3301
3302 Check if STMT performs a binary or unary operation that involves
3303 type promotion, and if it can be vectorized.
3304 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3305 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3306 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3307
3308 static bool
3309 vectorizable_type_promotion (gimple stmt, gimple_stmt_iterator *gsi,
3310 gimple *vec_stmt, slp_tree slp_node)
3311 {
3312 tree vec_dest;
3313 tree scalar_dest;
3314 tree op0, op1 = NULL;
3315 tree vec_oprnd0=NULL, vec_oprnd1=NULL;
3316 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3317 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3318 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
3319 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
3320 int op_type;
3321 tree def;
3322 gimple def_stmt;
3323 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
3324 stmt_vec_info prev_stmt_info;
3325 int nunits_in;
3326 int nunits_out;
3327 tree vectype_out;
3328 int ncopies;
3329 int j, i;
3330 tree vectype_in;
3331 tree intermediate_type = NULL_TREE;
3332 int multi_step_cvt = 0;
3333 VEC (tree, heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL;
3334 VEC (tree, heap) *vec_dsts = NULL, *interm_types = NULL, *tmp_vec_dsts = NULL;
3335 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3336
3337 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3338 return false;
3339
3340 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
3341 return false;
3342
3343 /* Is STMT a vectorizable type-promotion operation? */
3344 if (!is_gimple_assign (stmt))
3345 return false;
3346
3347 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
3348 return false;
3349
3350 code = gimple_assign_rhs_code (stmt);
3351 if (!CONVERT_EXPR_CODE_P (code)
3352 && code != WIDEN_MULT_EXPR)
3353 return false;
3354
3355 scalar_dest = gimple_assign_lhs (stmt);
3356 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3357
3358 /* Check the operands of the operation. */
3359 op0 = gimple_assign_rhs1 (stmt);
3360 if (! ((INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
3361 && INTEGRAL_TYPE_P (TREE_TYPE (op0)))
3362 || (SCALAR_FLOAT_TYPE_P (TREE_TYPE (scalar_dest))
3363 && SCALAR_FLOAT_TYPE_P (TREE_TYPE (op0))
3364 && CONVERT_EXPR_CODE_P (code))))
3365 return false;
3366 if (!vect_is_simple_use_1 (op0, loop_vinfo, bb_vinfo,
3367 &def_stmt, &def, &dt[0], &vectype_in))
3368 {
3369 if (vect_print_dump_info (REPORT_DETAILS))
3370 fprintf (vect_dump, "use not simple.");
3371 return false;
3372 }
3373
3374 op_type = TREE_CODE_LENGTH (code);
3375 if (op_type == binary_op)
3376 {
3377 bool ok;
3378
3379 op1 = gimple_assign_rhs2 (stmt);
3380 if (code == WIDEN_MULT_EXPR)
3381 {
3382 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
3383 OP1. */
3384 if (CONSTANT_CLASS_P (op0))
3385 ok = vect_is_simple_use_1 (op1, loop_vinfo, NULL,
3386 &def_stmt, &def, &dt[1], &vectype_in);
3387 else
3388 ok = vect_is_simple_use (op1, loop_vinfo, NULL, &def_stmt, &def,
3389 &dt[1]);
3390
3391 if (!ok)
3392 {
3393 if (vect_print_dump_info (REPORT_DETAILS))
3394 fprintf (vect_dump, "use not simple.");
3395 return false;
3396 }
3397 }
3398 }
3399
3400 /* If op0 is an external or constant def use a vector type with
3401 the same size as the output vector type. */
3402 if (!vectype_in)
3403 vectype_in = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
3404 if (vec_stmt)
3405 gcc_assert (vectype_in);
3406 if (!vectype_in)
3407 {
3408 if (vect_print_dump_info (REPORT_DETAILS))
3409 {
3410 fprintf (vect_dump, "no vectype for scalar type ");
3411 print_generic_expr (vect_dump, TREE_TYPE (op0), TDF_SLIM);
3412 }
3413
3414 return false;
3415 }
3416
3417 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3418 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3419 if (nunits_in <= nunits_out)
3420 return false;
3421
3422 /* Multiple types in SLP are handled by creating the appropriate number of
3423 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3424 case of SLP. */
3425 if (slp_node || PURE_SLP_STMT (stmt_info))
3426 ncopies = 1;
3427 else
3428 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
3429
3430 gcc_assert (ncopies >= 1);
3431
3432 /* Supportable by target? */
3433 if (!supportable_widening_operation (code, stmt, vectype_out, vectype_in,
3434 &decl1, &decl2, &code1, &code2,
3435 &multi_step_cvt, &interm_types))
3436 return false;
3437
3438 /* Binary widening operation can only be supported directly by the
3439 architecture. */
3440 gcc_assert (!(multi_step_cvt && op_type == binary_op));
3441
3442 if (!vec_stmt) /* transformation not required. */
3443 {
3444 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
3445 if (vect_print_dump_info (REPORT_DETAILS))
3446 fprintf (vect_dump, "=== vectorizable_promotion ===");
3447 vect_model_simple_cost (stmt_info, 2*ncopies, dt, NULL);
3448 return true;
3449 }
3450
3451 /** Transform. **/
3452
3453 if (vect_print_dump_info (REPORT_DETAILS))
3454 fprintf (vect_dump, "transform type promotion operation. ncopies = %d.",
3455 ncopies);
3456
3457 if (code == WIDEN_MULT_EXPR)
3458 {
3459 if (CONSTANT_CLASS_P (op0))
3460 op0 = fold_convert (TREE_TYPE (op1), op0);
3461 else if (CONSTANT_CLASS_P (op1))
3462 op1 = fold_convert (TREE_TYPE (op0), op1);
3463 }
3464
3465 /* Handle def. */
3466 /* In case of multi-step promotion, we first generate promotion operations
3467 to the intermediate types, and then from that types to the final one.
3468 We store vector destination in VEC_DSTS in the correct order for
3469 recursive creation of promotion operations in
3470 vect_create_vectorized_promotion_stmts(). Vector destinations are created
3471 according to TYPES recieved from supportable_widening_operation(). */
3472 if (multi_step_cvt)
3473 vec_dsts = VEC_alloc (tree, heap, multi_step_cvt + 1);
3474 else
3475 vec_dsts = VEC_alloc (tree, heap, 1);
3476
3477 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
3478 VEC_quick_push (tree, vec_dsts, vec_dest);
3479
3480 if (multi_step_cvt)
3481 {
3482 for (i = VEC_length (tree, interm_types) - 1;
3483 VEC_iterate (tree, interm_types, i, intermediate_type); i--)
3484 {
3485 vec_dest = vect_create_destination_var (scalar_dest,
3486 intermediate_type);
3487 VEC_quick_push (tree, vec_dsts, vec_dest);
3488 }
3489 }
3490
3491 if (!slp_node)
3492 {
3493 vec_oprnds0 = VEC_alloc (tree, heap,
3494 (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1));
3495 if (op_type == binary_op)
3496 vec_oprnds1 = VEC_alloc (tree, heap, 1);
3497 }
3498
3499 /* In case the vectorization factor (VF) is bigger than the number
3500 of elements that we can fit in a vectype (nunits), we have to generate
3501 more than one vector stmt - i.e - we need to "unroll" the
3502 vector stmt by a factor VF/nunits. */
3503
3504 prev_stmt_info = NULL;
3505 for (j = 0; j < ncopies; j++)
3506 {
3507 /* Handle uses. */
3508 if (j == 0)
3509 {
3510 if (slp_node)
3511 vect_get_slp_defs (op0, op1, slp_node, &vec_oprnds0,
3512 &vec_oprnds1, -1);
3513 else
3514 {
3515 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
3516 VEC_quick_push (tree, vec_oprnds0, vec_oprnd0);
3517 if (op_type == binary_op)
3518 {
3519 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt, NULL);
3520 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
3521 }
3522 }
3523 }
3524 else
3525 {
3526 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
3527 VEC_replace (tree, vec_oprnds0, 0, vec_oprnd0);
3528 if (op_type == binary_op)
3529 {
3530 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd1);
3531 VEC_replace (tree, vec_oprnds1, 0, vec_oprnd1);
3532 }
3533 }
3534
3535 /* Arguments are ready. Create the new vector stmts. */
3536 tmp_vec_dsts = VEC_copy (tree, heap, vec_dsts);
3537 vect_create_vectorized_promotion_stmts (&vec_oprnds0, &vec_oprnds1,
3538 multi_step_cvt, stmt,
3539 tmp_vec_dsts,
3540 gsi, slp_node, code1, code2,
3541 decl1, decl2, op_type,
3542 &prev_stmt_info);
3543 }
3544
3545 VEC_free (tree, heap, vec_dsts);
3546 VEC_free (tree, heap, tmp_vec_dsts);
3547 VEC_free (tree, heap, interm_types);
3548 VEC_free (tree, heap, vec_oprnds0);
3549 VEC_free (tree, heap, vec_oprnds1);
3550
3551 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
3552 return true;
3553 }
3554
3555
3556 /* Function vectorizable_store.
3557
3558 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
3559 can be vectorized.
3560 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3561 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3562 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3563
3564 static bool
3565 vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
3566 slp_tree slp_node)
3567 {
3568 tree scalar_dest;
3569 tree data_ref;
3570 tree op;
3571 tree vec_oprnd = NULL_TREE;
3572 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3573 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
3574 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3575 tree elem_type;
3576 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3577 struct loop *loop = NULL;
3578 enum machine_mode vec_mode;
3579 tree dummy;
3580 enum dr_alignment_support alignment_support_scheme;
3581 tree def;
3582 gimple def_stmt;
3583 enum vect_def_type dt;
3584 stmt_vec_info prev_stmt_info = NULL;
3585 tree dataref_ptr = NULL_TREE;
3586 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
3587 int ncopies;
3588 int j;
3589 gimple next_stmt, first_stmt = NULL;
3590 bool strided_store = false;
3591 bool store_lanes_p = false;
3592 unsigned int group_size, i;
3593 VEC(tree,heap) *dr_chain = NULL, *oprnds = NULL, *result_chain = NULL;
3594 bool inv_p;
3595 VEC(tree,heap) *vec_oprnds = NULL;
3596 bool slp = (slp_node != NULL);
3597 unsigned int vec_num;
3598 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3599 tree aggr_type;
3600
3601 if (loop_vinfo)
3602 loop = LOOP_VINFO_LOOP (loop_vinfo);
3603
3604 /* Multiple types in SLP are handled by creating the appropriate number of
3605 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3606 case of SLP. */
3607 if (slp || PURE_SLP_STMT (stmt_info))
3608 ncopies = 1;
3609 else
3610 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
3611
3612 gcc_assert (ncopies >= 1);
3613
3614 /* FORNOW. This restriction should be relaxed. */
3615 if (loop && nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
3616 {
3617 if (vect_print_dump_info (REPORT_DETAILS))
3618 fprintf (vect_dump, "multiple types in nested loop.");
3619 return false;
3620 }
3621
3622 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3623 return false;
3624
3625 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
3626 return false;
3627
3628 /* Is vectorizable store? */
3629
3630 if (!is_gimple_assign (stmt))
3631 return false;
3632
3633 scalar_dest = gimple_assign_lhs (stmt);
3634 if (TREE_CODE (scalar_dest) != ARRAY_REF
3635 && TREE_CODE (scalar_dest) != INDIRECT_REF
3636 && TREE_CODE (scalar_dest) != COMPONENT_REF
3637 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
3638 && TREE_CODE (scalar_dest) != REALPART_EXPR
3639 && TREE_CODE (scalar_dest) != MEM_REF)
3640 return false;
3641
3642 gcc_assert (gimple_assign_single_p (stmt));
3643 op = gimple_assign_rhs1 (stmt);
3644 if (!vect_is_simple_use (op, loop_vinfo, bb_vinfo, &def_stmt, &def, &dt))
3645 {
3646 if (vect_print_dump_info (REPORT_DETAILS))
3647 fprintf (vect_dump, "use not simple.");
3648 return false;
3649 }
3650
3651 /* The scalar rhs type needs to be trivially convertible to the vector
3652 component type. This should always be the case. */
3653 elem_type = TREE_TYPE (vectype);
3654 if (!useless_type_conversion_p (elem_type, TREE_TYPE (op)))
3655 {
3656 if (vect_print_dump_info (REPORT_DETAILS))
3657 fprintf (vect_dump, "??? operands of different types");
3658 return false;
3659 }
3660
3661 vec_mode = TYPE_MODE (vectype);
3662 /* FORNOW. In some cases can vectorize even if data-type not supported
3663 (e.g. - array initialization with 0). */
3664 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
3665 return false;
3666
3667 if (!STMT_VINFO_DATA_REF (stmt_info))
3668 return false;
3669
3670 if (tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0)
3671 {
3672 if (vect_print_dump_info (REPORT_DETAILS))
3673 fprintf (vect_dump, "negative step for store.");
3674 return false;
3675 }
3676
3677 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
3678 {
3679 strided_store = true;
3680 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
3681 if (!slp && !PURE_SLP_STMT (stmt_info))
3682 {
3683 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
3684 if (vect_store_lanes_supported (vectype, group_size))
3685 store_lanes_p = true;
3686 else if (!vect_strided_store_supported (vectype, group_size))
3687 return false;
3688 }
3689
3690 if (first_stmt == stmt)
3691 {
3692 /* STMT is the leader of the group. Check the operands of all the
3693 stmts of the group. */
3694 next_stmt = GROUP_NEXT_ELEMENT (stmt_info);
3695 while (next_stmt)
3696 {
3697 gcc_assert (gimple_assign_single_p (next_stmt));
3698 op = gimple_assign_rhs1 (next_stmt);
3699 if (!vect_is_simple_use (op, loop_vinfo, bb_vinfo, &def_stmt,
3700 &def, &dt))
3701 {
3702 if (vect_print_dump_info (REPORT_DETAILS))
3703 fprintf (vect_dump, "use not simple.");
3704 return false;
3705 }
3706 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
3707 }
3708 }
3709 }
3710
3711 if (!vec_stmt) /* transformation not required. */
3712 {
3713 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
3714 vect_model_store_cost (stmt_info, ncopies, store_lanes_p, dt, NULL);
3715 return true;
3716 }
3717
3718 /** Transform. **/
3719
3720 if (strided_store)
3721 {
3722 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
3723 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
3724
3725 GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))++;
3726
3727 /* FORNOW */
3728 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt));
3729
3730 /* We vectorize all the stmts of the interleaving group when we
3731 reach the last stmt in the group. */
3732 if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))
3733 < GROUP_SIZE (vinfo_for_stmt (first_stmt))
3734 && !slp)
3735 {
3736 *vec_stmt = NULL;
3737 return true;
3738 }
3739
3740 if (slp)
3741 {
3742 strided_store = false;
3743 /* VEC_NUM is the number of vect stmts to be created for this
3744 group. */
3745 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
3746 first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0);
3747 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
3748 }
3749 else
3750 /* VEC_NUM is the number of vect stmts to be created for this
3751 group. */
3752 vec_num = group_size;
3753 }
3754 else
3755 {
3756 first_stmt = stmt;
3757 first_dr = dr;
3758 group_size = vec_num = 1;
3759 }
3760
3761 if (vect_print_dump_info (REPORT_DETAILS))
3762 fprintf (vect_dump, "transform store. ncopies = %d",ncopies);
3763
3764 dr_chain = VEC_alloc (tree, heap, group_size);
3765 oprnds = VEC_alloc (tree, heap, group_size);
3766
3767 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
3768 gcc_assert (alignment_support_scheme);
3769 /* Targets with store-lane instructions must not require explicit
3770 realignment. */
3771 gcc_assert (!store_lanes_p
3772 || alignment_support_scheme == dr_aligned
3773 || alignment_support_scheme == dr_unaligned_supported);
3774
3775 if (store_lanes_p)
3776 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
3777 else
3778 aggr_type = vectype;
3779
3780 /* In case the vectorization factor (VF) is bigger than the number
3781 of elements that we can fit in a vectype (nunits), we have to generate
3782 more than one vector stmt - i.e - we need to "unroll" the
3783 vector stmt by a factor VF/nunits. For more details see documentation in
3784 vect_get_vec_def_for_copy_stmt. */
3785
3786 /* In case of interleaving (non-unit strided access):
3787
3788 S1: &base + 2 = x2
3789 S2: &base = x0
3790 S3: &base + 1 = x1
3791 S4: &base + 3 = x3
3792
3793 We create vectorized stores starting from base address (the access of the
3794 first stmt in the chain (S2 in the above example), when the last store stmt
3795 of the chain (S4) is reached:
3796
3797 VS1: &base = vx2
3798 VS2: &base + vec_size*1 = vx0
3799 VS3: &base + vec_size*2 = vx1
3800 VS4: &base + vec_size*3 = vx3
3801
3802 Then permutation statements are generated:
3803
3804 VS5: vx5 = VEC_INTERLEAVE_HIGH_EXPR < vx0, vx3 >
3805 VS6: vx6 = VEC_INTERLEAVE_LOW_EXPR < vx0, vx3 >
3806 ...
3807
3808 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
3809 (the order of the data-refs in the output of vect_permute_store_chain
3810 corresponds to the order of scalar stmts in the interleaving chain - see
3811 the documentation of vect_permute_store_chain()).
3812
3813 In case of both multiple types and interleaving, above vector stores and
3814 permutation stmts are created for every copy. The result vector stmts are
3815 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
3816 STMT_VINFO_RELATED_STMT for the next copies.
3817 */
3818
3819 prev_stmt_info = NULL;
3820 for (j = 0; j < ncopies; j++)
3821 {
3822 gimple new_stmt;
3823 gimple ptr_incr;
3824
3825 if (j == 0)
3826 {
3827 if (slp)
3828 {
3829 /* Get vectorized arguments for SLP_NODE. */
3830 vect_get_slp_defs (NULL_TREE, NULL_TREE, slp_node, &vec_oprnds,
3831 NULL, -1);
3832
3833 vec_oprnd = VEC_index (tree, vec_oprnds, 0);
3834 }
3835 else
3836 {
3837 /* For interleaved stores we collect vectorized defs for all the
3838 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
3839 used as an input to vect_permute_store_chain(), and OPRNDS as
3840 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
3841
3842 If the store is not strided, GROUP_SIZE is 1, and DR_CHAIN and
3843 OPRNDS are of size 1. */
3844 next_stmt = first_stmt;
3845 for (i = 0; i < group_size; i++)
3846 {
3847 /* Since gaps are not supported for interleaved stores,
3848 GROUP_SIZE is the exact number of stmts in the chain.
3849 Therefore, NEXT_STMT can't be NULL_TREE. In case that
3850 there is no interleaving, GROUP_SIZE is 1, and only one
3851 iteration of the loop will be executed. */
3852 gcc_assert (next_stmt
3853 && gimple_assign_single_p (next_stmt));
3854 op = gimple_assign_rhs1 (next_stmt);
3855
3856 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt,
3857 NULL);
3858 VEC_quick_push(tree, dr_chain, vec_oprnd);
3859 VEC_quick_push(tree, oprnds, vec_oprnd);
3860 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
3861 }
3862 }
3863
3864 /* We should have catched mismatched types earlier. */
3865 gcc_assert (useless_type_conversion_p (vectype,
3866 TREE_TYPE (vec_oprnd)));
3867 dataref_ptr = vect_create_data_ref_ptr (first_stmt, aggr_type, NULL,
3868 NULL_TREE, &dummy, gsi,
3869 &ptr_incr, false, &inv_p);
3870 gcc_assert (bb_vinfo || !inv_p);
3871 }
3872 else
3873 {
3874 /* For interleaved stores we created vectorized defs for all the
3875 defs stored in OPRNDS in the previous iteration (previous copy).
3876 DR_CHAIN is then used as an input to vect_permute_store_chain(),
3877 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
3878 next copy.
3879 If the store is not strided, GROUP_SIZE is 1, and DR_CHAIN and
3880 OPRNDS are of size 1. */
3881 for (i = 0; i < group_size; i++)
3882 {
3883 op = VEC_index (tree, oprnds, i);
3884 vect_is_simple_use (op, loop_vinfo, bb_vinfo, &def_stmt, &def,
3885 &dt);
3886 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, op);
3887 VEC_replace(tree, dr_chain, i, vec_oprnd);
3888 VEC_replace(tree, oprnds, i, vec_oprnd);
3889 }
3890 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
3891 TYPE_SIZE_UNIT (aggr_type));
3892 }
3893
3894 if (store_lanes_p)
3895 {
3896 tree vec_array;
3897
3898 /* Combine all the vectors into an array. */
3899 vec_array = create_vector_array (vectype, vec_num);
3900 for (i = 0; i < vec_num; i++)
3901 {
3902 vec_oprnd = VEC_index (tree, dr_chain, i);
3903 write_vector_array (stmt, gsi, vec_oprnd, vec_array, i);
3904 }
3905
3906 /* Emit:
3907 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
3908 data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
3909 new_stmt = gimple_build_call_internal (IFN_STORE_LANES, 1, vec_array);
3910 gimple_call_set_lhs (new_stmt, data_ref);
3911 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3912 mark_symbols_for_renaming (new_stmt);
3913 }
3914 else
3915 {
3916 new_stmt = NULL;
3917 if (strided_store)
3918 {
3919 result_chain = VEC_alloc (tree, heap, group_size);
3920 /* Permute. */
3921 vect_permute_store_chain (dr_chain, group_size, stmt, gsi,
3922 &result_chain);
3923 }
3924
3925 next_stmt = first_stmt;
3926 for (i = 0; i < vec_num; i++)
3927 {
3928 struct ptr_info_def *pi;
3929
3930 if (i > 0)
3931 /* Bump the vector pointer. */
3932 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
3933 stmt, NULL_TREE);
3934
3935 if (slp)
3936 vec_oprnd = VEC_index (tree, vec_oprnds, i);
3937 else if (strided_store)
3938 /* For strided stores vectorized defs are interleaved in
3939 vect_permute_store_chain(). */
3940 vec_oprnd = VEC_index (tree, result_chain, i);
3941
3942 data_ref = build2 (MEM_REF, TREE_TYPE (vec_oprnd), dataref_ptr,
3943 build_int_cst (reference_alias_ptr_type
3944 (DR_REF (first_dr)), 0));
3945 pi = get_ptr_info (dataref_ptr);
3946 pi->align = TYPE_ALIGN_UNIT (vectype);
3947 if (aligned_access_p (first_dr))
3948 pi->misalign = 0;
3949 else if (DR_MISALIGNMENT (first_dr) == -1)
3950 {
3951 TREE_TYPE (data_ref)
3952 = build_aligned_type (TREE_TYPE (data_ref),
3953 TYPE_ALIGN (elem_type));
3954 pi->align = TYPE_ALIGN_UNIT (elem_type);
3955 pi->misalign = 0;
3956 }
3957 else
3958 {
3959 TREE_TYPE (data_ref)
3960 = build_aligned_type (TREE_TYPE (data_ref),
3961 TYPE_ALIGN (elem_type));
3962 pi->misalign = DR_MISALIGNMENT (first_dr);
3963 }
3964
3965 /* Arguments are ready. Create the new vector stmt. */
3966 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
3967 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3968 mark_symbols_for_renaming (new_stmt);
3969
3970 if (slp)
3971 continue;
3972
3973 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
3974 if (!next_stmt)
3975 break;
3976 }
3977 }
3978 if (!slp)
3979 {
3980 if (j == 0)
3981 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3982 else
3983 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3984 prev_stmt_info = vinfo_for_stmt (new_stmt);
3985 }
3986 }
3987
3988 VEC_free (tree, heap, dr_chain);
3989 VEC_free (tree, heap, oprnds);
3990 if (result_chain)
3991 VEC_free (tree, heap, result_chain);
3992 if (vec_oprnds)
3993 VEC_free (tree, heap, vec_oprnds);
3994
3995 return true;
3996 }
3997
3998 /* Given a vector type VECTYPE returns a builtin DECL to be used
3999 for vector permutation and returns the mask that implements
4000 reversal of the vector elements. If that is impossible to do,
4001 returns NULL. */
4002
4003 static tree
4004 perm_mask_for_reverse (tree vectype)
4005 {
4006 tree mask_element_type, mask_type, mask_vec = NULL;
4007 int i, nunits;
4008
4009 if (!can_vec_perm_expr_p (vectype, NULL_TREE))
4010 return NULL;
4011
4012 mask_element_type
4013 = lang_hooks.types.type_for_size
4014 (TREE_INT_CST_LOW (TYPE_SIZE (TREE_TYPE (vectype))), 1);
4015 mask_type = get_vectype_for_scalar_type (mask_element_type);
4016 nunits = TYPE_VECTOR_SUBPARTS (vectype);
4017
4018 for (i = 0; i < nunits; i++)
4019 mask_vec = tree_cons (NULL, build_int_cst (mask_element_type, i), mask_vec);
4020 mask_vec = build_vector (mask_type, mask_vec);
4021
4022 if (!can_vec_perm_expr_p (vectype, mask_vec))
4023 return NULL;
4024
4025 return mask_vec;
4026 }
4027
4028 /* Given a vector variable X, that was generated for the scalar LHS of
4029 STMT, generate instructions to reverse the vector elements of X,
4030 insert them a *GSI and return the permuted vector variable. */
4031
4032 static tree
4033 reverse_vec_elements (tree x, gimple stmt, gimple_stmt_iterator *gsi)
4034 {
4035 tree vectype = TREE_TYPE (x);
4036 tree mask_vec, perm_dest, data_ref;
4037 gimple perm_stmt;
4038
4039 mask_vec = perm_mask_for_reverse (vectype);
4040
4041 perm_dest = vect_create_destination_var (gimple_assign_lhs (stmt), vectype);
4042
4043 /* Generate the permute statement. */
4044 perm_stmt = gimple_build_assign_with_ops3 (VEC_PERM_EXPR, perm_dest,
4045 x, x, mask_vec);
4046 data_ref = make_ssa_name (perm_dest, perm_stmt);
4047 gimple_set_lhs (perm_stmt, data_ref);
4048 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
4049
4050 return data_ref;
4051 }
4052
4053 /* vectorizable_load.
4054
4055 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
4056 can be vectorized.
4057 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4058 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4059 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4060
4061 static bool
4062 vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
4063 slp_tree slp_node, slp_instance slp_node_instance)
4064 {
4065 tree scalar_dest;
4066 tree vec_dest = NULL;
4067 tree data_ref = NULL;
4068 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4069 stmt_vec_info prev_stmt_info;
4070 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4071 struct loop *loop = NULL;
4072 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
4073 bool nested_in_vect_loop = false;
4074 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
4075 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4076 tree elem_type;
4077 tree new_temp;
4078 enum machine_mode mode;
4079 gimple new_stmt = NULL;
4080 tree dummy;
4081 enum dr_alignment_support alignment_support_scheme;
4082 tree dataref_ptr = NULL_TREE;
4083 gimple ptr_incr;
4084 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
4085 int ncopies;
4086 int i, j, group_size;
4087 tree msq = NULL_TREE, lsq;
4088 tree offset = NULL_TREE;
4089 tree realignment_token = NULL_TREE;
4090 gimple phi = NULL;
4091 VEC(tree,heap) *dr_chain = NULL;
4092 bool strided_load = false;
4093 bool load_lanes_p = false;
4094 gimple first_stmt;
4095 tree scalar_type;
4096 bool inv_p;
4097 bool negative;
4098 bool compute_in_loop = false;
4099 struct loop *at_loop;
4100 int vec_num;
4101 bool slp = (slp_node != NULL);
4102 bool slp_perm = false;
4103 enum tree_code code;
4104 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4105 int vf;
4106 tree aggr_type;
4107
4108 if (loop_vinfo)
4109 {
4110 loop = LOOP_VINFO_LOOP (loop_vinfo);
4111 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
4112 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4113 }
4114 else
4115 vf = 1;
4116
4117 /* Multiple types in SLP are handled by creating the appropriate number of
4118 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4119 case of SLP. */
4120 if (slp || PURE_SLP_STMT (stmt_info))
4121 ncopies = 1;
4122 else
4123 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
4124
4125 gcc_assert (ncopies >= 1);
4126
4127 /* FORNOW. This restriction should be relaxed. */
4128 if (nested_in_vect_loop && ncopies > 1)
4129 {
4130 if (vect_print_dump_info (REPORT_DETAILS))
4131 fprintf (vect_dump, "multiple types in nested loop.");
4132 return false;
4133 }
4134
4135 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4136 return false;
4137
4138 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
4139 return false;
4140
4141 /* Is vectorizable load? */
4142 if (!is_gimple_assign (stmt))
4143 return false;
4144
4145 scalar_dest = gimple_assign_lhs (stmt);
4146 if (TREE_CODE (scalar_dest) != SSA_NAME)
4147 return false;
4148
4149 code = gimple_assign_rhs_code (stmt);
4150 if (code != ARRAY_REF
4151 && code != INDIRECT_REF
4152 && code != COMPONENT_REF
4153 && code != IMAGPART_EXPR
4154 && code != REALPART_EXPR
4155 && code != MEM_REF
4156 && TREE_CODE_CLASS (code) != tcc_declaration)
4157 return false;
4158
4159 if (!STMT_VINFO_DATA_REF (stmt_info))
4160 return false;
4161
4162 negative = tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0;
4163 if (negative && ncopies > 1)
4164 {
4165 if (vect_print_dump_info (REPORT_DETAILS))
4166 fprintf (vect_dump, "multiple types with negative step.");
4167 return false;
4168 }
4169
4170 scalar_type = TREE_TYPE (DR_REF (dr));
4171 mode = TYPE_MODE (vectype);
4172
4173 /* FORNOW. In some cases can vectorize even if data-type not supported
4174 (e.g. - data copies). */
4175 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
4176 {
4177 if (vect_print_dump_info (REPORT_DETAILS))
4178 fprintf (vect_dump, "Aligned load, but unsupported type.");
4179 return false;
4180 }
4181
4182 /* The vector component type needs to be trivially convertible to the
4183 scalar lhs. This should always be the case. */
4184 elem_type = TREE_TYPE (vectype);
4185 if (!useless_type_conversion_p (TREE_TYPE (scalar_dest), elem_type))
4186 {
4187 if (vect_print_dump_info (REPORT_DETAILS))
4188 fprintf (vect_dump, "??? operands of different types");
4189 return false;
4190 }
4191
4192 /* Check if the load is a part of an interleaving chain. */
4193 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
4194 {
4195 strided_load = true;
4196 /* FORNOW */
4197 gcc_assert (! nested_in_vect_loop);
4198
4199 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
4200 if (!slp && !PURE_SLP_STMT (stmt_info))
4201 {
4202 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
4203 if (vect_load_lanes_supported (vectype, group_size))
4204 load_lanes_p = true;
4205 else if (!vect_strided_load_supported (vectype, group_size))
4206 return false;
4207 }
4208 }
4209
4210 if (negative)
4211 {
4212 gcc_assert (!strided_load);
4213 alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
4214 if (alignment_support_scheme != dr_aligned
4215 && alignment_support_scheme != dr_unaligned_supported)
4216 {
4217 if (vect_print_dump_info (REPORT_DETAILS))
4218 fprintf (vect_dump, "negative step but alignment required.");
4219 return false;
4220 }
4221 if (!perm_mask_for_reverse (vectype))
4222 {
4223 if (vect_print_dump_info (REPORT_DETAILS))
4224 fprintf (vect_dump, "negative step and reversing not supported.");
4225 return false;
4226 }
4227 }
4228
4229 if (!vec_stmt) /* transformation not required. */
4230 {
4231 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
4232 vect_model_load_cost (stmt_info, ncopies, load_lanes_p, NULL);
4233 return true;
4234 }
4235
4236 if (vect_print_dump_info (REPORT_DETAILS))
4237 fprintf (vect_dump, "transform load. ncopies = %d", ncopies);
4238
4239 /** Transform. **/
4240
4241 if (strided_load)
4242 {
4243 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
4244 if (slp
4245 && !SLP_INSTANCE_LOAD_PERMUTATION (slp_node_instance)
4246 && first_stmt != VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0))
4247 first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0);
4248
4249 /* Check if the chain of loads is already vectorized. */
4250 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt)))
4251 {
4252 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4253 return true;
4254 }
4255 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
4256 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
4257
4258 /* VEC_NUM is the number of vect stmts to be created for this group. */
4259 if (slp)
4260 {
4261 strided_load = false;
4262 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
4263 if (SLP_INSTANCE_LOAD_PERMUTATION (slp_node_instance))
4264 slp_perm = true;
4265 }
4266 else
4267 vec_num = group_size;
4268 }
4269 else
4270 {
4271 first_stmt = stmt;
4272 first_dr = dr;
4273 group_size = vec_num = 1;
4274 }
4275
4276 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
4277 gcc_assert (alignment_support_scheme);
4278 /* Targets with load-lane instructions must not require explicit
4279 realignment. */
4280 gcc_assert (!load_lanes_p
4281 || alignment_support_scheme == dr_aligned
4282 || alignment_support_scheme == dr_unaligned_supported);
4283
4284 /* In case the vectorization factor (VF) is bigger than the number
4285 of elements that we can fit in a vectype (nunits), we have to generate
4286 more than one vector stmt - i.e - we need to "unroll" the
4287 vector stmt by a factor VF/nunits. In doing so, we record a pointer
4288 from one copy of the vector stmt to the next, in the field
4289 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
4290 stages to find the correct vector defs to be used when vectorizing
4291 stmts that use the defs of the current stmt. The example below
4292 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
4293 need to create 4 vectorized stmts):
4294
4295 before vectorization:
4296 RELATED_STMT VEC_STMT
4297 S1: x = memref - -
4298 S2: z = x + 1 - -
4299
4300 step 1: vectorize stmt S1:
4301 We first create the vector stmt VS1_0, and, as usual, record a
4302 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
4303 Next, we create the vector stmt VS1_1, and record a pointer to
4304 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
4305 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
4306 stmts and pointers:
4307 RELATED_STMT VEC_STMT
4308 VS1_0: vx0 = memref0 VS1_1 -
4309 VS1_1: vx1 = memref1 VS1_2 -
4310 VS1_2: vx2 = memref2 VS1_3 -
4311 VS1_3: vx3 = memref3 - -
4312 S1: x = load - VS1_0
4313 S2: z = x + 1 - -
4314
4315 See in documentation in vect_get_vec_def_for_stmt_copy for how the
4316 information we recorded in RELATED_STMT field is used to vectorize
4317 stmt S2. */
4318
4319 /* In case of interleaving (non-unit strided access):
4320
4321 S1: x2 = &base + 2
4322 S2: x0 = &base
4323 S3: x1 = &base + 1
4324 S4: x3 = &base + 3
4325
4326 Vectorized loads are created in the order of memory accesses
4327 starting from the access of the first stmt of the chain:
4328
4329 VS1: vx0 = &base
4330 VS2: vx1 = &base + vec_size*1
4331 VS3: vx3 = &base + vec_size*2
4332 VS4: vx4 = &base + vec_size*3
4333
4334 Then permutation statements are generated:
4335
4336 VS5: vx5 = VEC_EXTRACT_EVEN_EXPR < vx0, vx1 >
4337 VS6: vx6 = VEC_EXTRACT_ODD_EXPR < vx0, vx1 >
4338 ...
4339
4340 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
4341 (the order of the data-refs in the output of vect_permute_load_chain
4342 corresponds to the order of scalar stmts in the interleaving chain - see
4343 the documentation of vect_permute_load_chain()).
4344 The generation of permutation stmts and recording them in
4345 STMT_VINFO_VEC_STMT is done in vect_transform_strided_load().
4346
4347 In case of both multiple types and interleaving, the vector loads and
4348 permutation stmts above are created for every copy. The result vector
4349 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
4350 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
4351
4352 /* If the data reference is aligned (dr_aligned) or potentially unaligned
4353 on a target that supports unaligned accesses (dr_unaligned_supported)
4354 we generate the following code:
4355 p = initial_addr;
4356 indx = 0;
4357 loop {
4358 p = p + indx * vectype_size;
4359 vec_dest = *(p);
4360 indx = indx + 1;
4361 }
4362
4363 Otherwise, the data reference is potentially unaligned on a target that
4364 does not support unaligned accesses (dr_explicit_realign_optimized) -
4365 then generate the following code, in which the data in each iteration is
4366 obtained by two vector loads, one from the previous iteration, and one
4367 from the current iteration:
4368 p1 = initial_addr;
4369 msq_init = *(floor(p1))
4370 p2 = initial_addr + VS - 1;
4371 realignment_token = call target_builtin;
4372 indx = 0;
4373 loop {
4374 p2 = p2 + indx * vectype_size
4375 lsq = *(floor(p2))
4376 vec_dest = realign_load (msq, lsq, realignment_token)
4377 indx = indx + 1;
4378 msq = lsq;
4379 } */
4380
4381 /* If the misalignment remains the same throughout the execution of the
4382 loop, we can create the init_addr and permutation mask at the loop
4383 preheader. Otherwise, it needs to be created inside the loop.
4384 This can only occur when vectorizing memory accesses in the inner-loop
4385 nested within an outer-loop that is being vectorized. */
4386
4387 if (loop && nested_in_vect_loop_p (loop, stmt)
4388 && (TREE_INT_CST_LOW (DR_STEP (dr))
4389 % GET_MODE_SIZE (TYPE_MODE (vectype)) != 0))
4390 {
4391 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
4392 compute_in_loop = true;
4393 }
4394
4395 if ((alignment_support_scheme == dr_explicit_realign_optimized
4396 || alignment_support_scheme == dr_explicit_realign)
4397 && !compute_in_loop)
4398 {
4399 msq = vect_setup_realignment (first_stmt, gsi, &realignment_token,
4400 alignment_support_scheme, NULL_TREE,
4401 &at_loop);
4402 if (alignment_support_scheme == dr_explicit_realign_optimized)
4403 {
4404 phi = SSA_NAME_DEF_STMT (msq);
4405 offset = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
4406 }
4407 }
4408 else
4409 at_loop = loop;
4410
4411 if (negative)
4412 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
4413
4414 if (load_lanes_p)
4415 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
4416 else
4417 aggr_type = vectype;
4418
4419 prev_stmt_info = NULL;
4420 for (j = 0; j < ncopies; j++)
4421 {
4422 /* 1. Create the vector or array pointer update chain. */
4423 if (j == 0)
4424 dataref_ptr = vect_create_data_ref_ptr (first_stmt, aggr_type, at_loop,
4425 offset, &dummy, gsi,
4426 &ptr_incr, false, &inv_p);
4427 else
4428 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
4429 TYPE_SIZE_UNIT (aggr_type));
4430
4431 if (strided_load || slp_perm)
4432 dr_chain = VEC_alloc (tree, heap, vec_num);
4433
4434 if (load_lanes_p)
4435 {
4436 tree vec_array;
4437
4438 vec_array = create_vector_array (vectype, vec_num);
4439
4440 /* Emit:
4441 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
4442 data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
4443 new_stmt = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
4444 gimple_call_set_lhs (new_stmt, vec_array);
4445 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4446 mark_symbols_for_renaming (new_stmt);
4447
4448 /* Extract each vector into an SSA_NAME. */
4449 for (i = 0; i < vec_num; i++)
4450 {
4451 new_temp = read_vector_array (stmt, gsi, scalar_dest,
4452 vec_array, i);
4453 VEC_quick_push (tree, dr_chain, new_temp);
4454 }
4455
4456 /* Record the mapping between SSA_NAMEs and statements. */
4457 vect_record_strided_load_vectors (stmt, dr_chain);
4458 }
4459 else
4460 {
4461 for (i = 0; i < vec_num; i++)
4462 {
4463 if (i > 0)
4464 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
4465 stmt, NULL_TREE);
4466
4467 /* 2. Create the vector-load in the loop. */
4468 switch (alignment_support_scheme)
4469 {
4470 case dr_aligned:
4471 case dr_unaligned_supported:
4472 {
4473 struct ptr_info_def *pi;
4474 data_ref
4475 = build2 (MEM_REF, vectype, dataref_ptr,
4476 build_int_cst (reference_alias_ptr_type
4477 (DR_REF (first_dr)), 0));
4478 pi = get_ptr_info (dataref_ptr);
4479 pi->align = TYPE_ALIGN_UNIT (vectype);
4480 if (alignment_support_scheme == dr_aligned)
4481 {
4482 gcc_assert (aligned_access_p (first_dr));
4483 pi->misalign = 0;
4484 }
4485 else if (DR_MISALIGNMENT (first_dr) == -1)
4486 {
4487 TREE_TYPE (data_ref)
4488 = build_aligned_type (TREE_TYPE (data_ref),
4489 TYPE_ALIGN (elem_type));
4490 pi->align = TYPE_ALIGN_UNIT (elem_type);
4491 pi->misalign = 0;
4492 }
4493 else
4494 {
4495 TREE_TYPE (data_ref)
4496 = build_aligned_type (TREE_TYPE (data_ref),
4497 TYPE_ALIGN (elem_type));
4498 pi->misalign = DR_MISALIGNMENT (first_dr);
4499 }
4500 break;
4501 }
4502 case dr_explicit_realign:
4503 {
4504 tree ptr, bump;
4505 tree vs_minus_1;
4506
4507 vs_minus_1 = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
4508
4509 if (compute_in_loop)
4510 msq = vect_setup_realignment (first_stmt, gsi,
4511 &realignment_token,
4512 dr_explicit_realign,
4513 dataref_ptr, NULL);
4514
4515 new_stmt = gimple_build_assign_with_ops
4516 (BIT_AND_EXPR, NULL_TREE, dataref_ptr,
4517 build_int_cst
4518 (TREE_TYPE (dataref_ptr),
4519 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
4520 ptr = make_ssa_name (SSA_NAME_VAR (dataref_ptr), new_stmt);
4521 gimple_assign_set_lhs (new_stmt, ptr);
4522 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4523 data_ref
4524 = build2 (MEM_REF, vectype, ptr,
4525 build_int_cst (reference_alias_ptr_type
4526 (DR_REF (first_dr)), 0));
4527 vec_dest = vect_create_destination_var (scalar_dest,
4528 vectype);
4529 new_stmt = gimple_build_assign (vec_dest, data_ref);
4530 new_temp = make_ssa_name (vec_dest, new_stmt);
4531 gimple_assign_set_lhs (new_stmt, new_temp);
4532 gimple_set_vdef (new_stmt, gimple_vdef (stmt));
4533 gimple_set_vuse (new_stmt, gimple_vuse (stmt));
4534 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4535 msq = new_temp;
4536
4537 bump = size_binop (MULT_EXPR, vs_minus_1,
4538 TYPE_SIZE_UNIT (scalar_type));
4539 ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump);
4540 new_stmt = gimple_build_assign_with_ops
4541 (BIT_AND_EXPR, NULL_TREE, ptr,
4542 build_int_cst
4543 (TREE_TYPE (ptr),
4544 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
4545 ptr = make_ssa_name (SSA_NAME_VAR (dataref_ptr), new_stmt);
4546 gimple_assign_set_lhs (new_stmt, ptr);
4547 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4548 data_ref
4549 = build2 (MEM_REF, vectype, ptr,
4550 build_int_cst (reference_alias_ptr_type
4551 (DR_REF (first_dr)), 0));
4552 break;
4553 }
4554 case dr_explicit_realign_optimized:
4555 new_stmt = gimple_build_assign_with_ops
4556 (BIT_AND_EXPR, NULL_TREE, dataref_ptr,
4557 build_int_cst
4558 (TREE_TYPE (dataref_ptr),
4559 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
4560 new_temp = make_ssa_name (SSA_NAME_VAR (dataref_ptr),
4561 new_stmt);
4562 gimple_assign_set_lhs (new_stmt, new_temp);
4563 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4564 data_ref
4565 = build2 (MEM_REF, vectype, new_temp,
4566 build_int_cst (reference_alias_ptr_type
4567 (DR_REF (first_dr)), 0));
4568 break;
4569 default:
4570 gcc_unreachable ();
4571 }
4572 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4573 new_stmt = gimple_build_assign (vec_dest, data_ref);
4574 new_temp = make_ssa_name (vec_dest, new_stmt);
4575 gimple_assign_set_lhs (new_stmt, new_temp);
4576 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4577 mark_symbols_for_renaming (new_stmt);
4578
4579 /* 3. Handle explicit realignment if necessary/supported.
4580 Create in loop:
4581 vec_dest = realign_load (msq, lsq, realignment_token) */
4582 if (alignment_support_scheme == dr_explicit_realign_optimized
4583 || alignment_support_scheme == dr_explicit_realign)
4584 {
4585 lsq = gimple_assign_lhs (new_stmt);
4586 if (!realignment_token)
4587 realignment_token = dataref_ptr;
4588 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4589 new_stmt
4590 = gimple_build_assign_with_ops3 (REALIGN_LOAD_EXPR,
4591 vec_dest, msq, lsq,
4592 realignment_token);
4593 new_temp = make_ssa_name (vec_dest, new_stmt);
4594 gimple_assign_set_lhs (new_stmt, new_temp);
4595 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4596
4597 if (alignment_support_scheme == dr_explicit_realign_optimized)
4598 {
4599 gcc_assert (phi);
4600 if (i == vec_num - 1 && j == ncopies - 1)
4601 add_phi_arg (phi, lsq,
4602 loop_latch_edge (containing_loop),
4603 UNKNOWN_LOCATION);
4604 msq = lsq;
4605 }
4606 }
4607
4608 /* 4. Handle invariant-load. */
4609 if (inv_p && !bb_vinfo)
4610 {
4611 tree vec_inv;
4612 gimple_stmt_iterator gsi2 = *gsi;
4613 gcc_assert (!strided_load);
4614 gsi_next (&gsi2);
4615 vec_inv = build_vector_from_val (vectype, scalar_dest);
4616 new_temp = vect_init_vector (stmt, vec_inv,
4617 vectype, &gsi2);
4618 new_stmt = SSA_NAME_DEF_STMT (new_temp);
4619 }
4620
4621 if (negative)
4622 {
4623 new_temp = reverse_vec_elements (new_temp, stmt, gsi);
4624 new_stmt = SSA_NAME_DEF_STMT (new_temp);
4625 }
4626
4627 /* Collect vector loads and later create their permutation in
4628 vect_transform_strided_load (). */
4629 if (strided_load || slp_perm)
4630 VEC_quick_push (tree, dr_chain, new_temp);
4631
4632 /* Store vector loads in the corresponding SLP_NODE. */
4633 if (slp && !slp_perm)
4634 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node),
4635 new_stmt);
4636 }
4637 }
4638
4639 if (slp && !slp_perm)
4640 continue;
4641
4642 if (slp_perm)
4643 {
4644 if (!vect_transform_slp_perm_load (stmt, dr_chain, gsi, vf,
4645 slp_node_instance, false))
4646 {
4647 VEC_free (tree, heap, dr_chain);
4648 return false;
4649 }
4650 }
4651 else
4652 {
4653 if (strided_load)
4654 {
4655 if (!load_lanes_p)
4656 vect_transform_strided_load (stmt, dr_chain, group_size, gsi);
4657 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4658 }
4659 else
4660 {
4661 if (j == 0)
4662 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4663 else
4664 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4665 prev_stmt_info = vinfo_for_stmt (new_stmt);
4666 }
4667 }
4668 if (dr_chain)
4669 VEC_free (tree, heap, dr_chain);
4670 }
4671
4672 return true;
4673 }
4674
4675 /* Function vect_is_simple_cond.
4676
4677 Input:
4678 LOOP - the loop that is being vectorized.
4679 COND - Condition that is checked for simple use.
4680
4681 Output:
4682 *COMP_VECTYPE - the vector type for the comparison.
4683
4684 Returns whether a COND can be vectorized. Checks whether
4685 condition operands are supportable using vec_is_simple_use. */
4686
4687 static bool
4688 vect_is_simple_cond (tree cond, loop_vec_info loop_vinfo, tree *comp_vectype)
4689 {
4690 tree lhs, rhs;
4691 tree def;
4692 enum vect_def_type dt;
4693 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
4694
4695 if (!COMPARISON_CLASS_P (cond))
4696 return false;
4697
4698 lhs = TREE_OPERAND (cond, 0);
4699 rhs = TREE_OPERAND (cond, 1);
4700
4701 if (TREE_CODE (lhs) == SSA_NAME)
4702 {
4703 gimple lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
4704 if (!vect_is_simple_use_1 (lhs, loop_vinfo, NULL, &lhs_def_stmt, &def,
4705 &dt, &vectype1))
4706 return false;
4707 }
4708 else if (TREE_CODE (lhs) != INTEGER_CST && TREE_CODE (lhs) != REAL_CST
4709 && TREE_CODE (lhs) != FIXED_CST)
4710 return false;
4711
4712 if (TREE_CODE (rhs) == SSA_NAME)
4713 {
4714 gimple rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
4715 if (!vect_is_simple_use_1 (rhs, loop_vinfo, NULL, &rhs_def_stmt, &def,
4716 &dt, &vectype2))
4717 return false;
4718 }
4719 else if (TREE_CODE (rhs) != INTEGER_CST && TREE_CODE (rhs) != REAL_CST
4720 && TREE_CODE (rhs) != FIXED_CST)
4721 return false;
4722
4723 *comp_vectype = vectype1 ? vectype1 : vectype2;
4724 return true;
4725 }
4726
4727 /* vectorizable_condition.
4728
4729 Check if STMT is conditional modify expression that can be vectorized.
4730 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4731 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
4732 at GSI.
4733
4734 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
4735 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
4736 else caluse if it is 2).
4737
4738 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4739
4740 bool
4741 vectorizable_condition (gimple stmt, gimple_stmt_iterator *gsi,
4742 gimple *vec_stmt, tree reduc_def, int reduc_index)
4743 {
4744 tree scalar_dest = NULL_TREE;
4745 tree vec_dest = NULL_TREE;
4746 tree cond_expr, then_clause, else_clause;
4747 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4748 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4749 tree comp_vectype;
4750 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
4751 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
4752 tree vec_compare, vec_cond_expr;
4753 tree new_temp;
4754 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4755 tree def;
4756 enum vect_def_type dt, dts[4];
4757 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
4758 int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
4759 enum tree_code code;
4760 stmt_vec_info prev_stmt_info = NULL;
4761 int j;
4762
4763 /* FORNOW: unsupported in basic block SLP. */
4764 gcc_assert (loop_vinfo);
4765
4766 /* FORNOW: SLP not supported. */
4767 if (STMT_SLP_TYPE (stmt_info))
4768 return false;
4769
4770 gcc_assert (ncopies >= 1);
4771 if (reduc_index && ncopies > 1)
4772 return false; /* FORNOW */
4773
4774 if (!STMT_VINFO_RELEVANT_P (stmt_info))
4775 return false;
4776
4777 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4778 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
4779 && reduc_def))
4780 return false;
4781
4782 /* FORNOW: not yet supported. */
4783 if (STMT_VINFO_LIVE_P (stmt_info))
4784 {
4785 if (vect_print_dump_info (REPORT_DETAILS))
4786 fprintf (vect_dump, "value used after loop.");
4787 return false;
4788 }
4789
4790 /* Is vectorizable conditional operation? */
4791 if (!is_gimple_assign (stmt))
4792 return false;
4793
4794 code = gimple_assign_rhs_code (stmt);
4795
4796 if (code != COND_EXPR)
4797 return false;
4798
4799 cond_expr = gimple_assign_rhs1 (stmt);
4800 then_clause = gimple_assign_rhs2 (stmt);
4801 else_clause = gimple_assign_rhs3 (stmt);
4802
4803 if (!vect_is_simple_cond (cond_expr, loop_vinfo, &comp_vectype)
4804 || !comp_vectype)
4805 return false;
4806
4807 if (TREE_CODE (then_clause) == SSA_NAME)
4808 {
4809 gimple then_def_stmt = SSA_NAME_DEF_STMT (then_clause);
4810 if (!vect_is_simple_use (then_clause, loop_vinfo, NULL,
4811 &then_def_stmt, &def, &dt))
4812 return false;
4813 }
4814 else if (TREE_CODE (then_clause) != INTEGER_CST
4815 && TREE_CODE (then_clause) != REAL_CST
4816 && TREE_CODE (then_clause) != FIXED_CST)
4817 return false;
4818
4819 if (TREE_CODE (else_clause) == SSA_NAME)
4820 {
4821 gimple else_def_stmt = SSA_NAME_DEF_STMT (else_clause);
4822 if (!vect_is_simple_use (else_clause, loop_vinfo, NULL,
4823 &else_def_stmt, &def, &dt))
4824 return false;
4825 }
4826 else if (TREE_CODE (else_clause) != INTEGER_CST
4827 && TREE_CODE (else_clause) != REAL_CST
4828 && TREE_CODE (else_clause) != FIXED_CST)
4829 return false;
4830
4831 if (!vec_stmt)
4832 {
4833 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
4834 return expand_vec_cond_expr_p (vectype, comp_vectype);
4835 }
4836
4837 /* Transform */
4838
4839 /* Handle def. */
4840 scalar_dest = gimple_assign_lhs (stmt);
4841 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4842
4843 /* Handle cond expr. */
4844 for (j = 0; j < ncopies; j++)
4845 {
4846 gimple new_stmt;
4847 if (j == 0)
4848 {
4849 gimple gtemp;
4850 vec_cond_lhs =
4851 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0),
4852 stmt, NULL);
4853 vect_is_simple_use (TREE_OPERAND (cond_expr, 0), loop_vinfo,
4854 NULL, &gtemp, &def, &dts[0]);
4855 vec_cond_rhs =
4856 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1),
4857 stmt, NULL);
4858 vect_is_simple_use (TREE_OPERAND (cond_expr, 1), loop_vinfo,
4859 NULL, &gtemp, &def, &dts[1]);
4860 if (reduc_index == 1)
4861 vec_then_clause = reduc_def;
4862 else
4863 {
4864 vec_then_clause = vect_get_vec_def_for_operand (then_clause,
4865 stmt, NULL);
4866 vect_is_simple_use (then_clause, loop_vinfo,
4867 NULL, &gtemp, &def, &dts[2]);
4868 }
4869 if (reduc_index == 2)
4870 vec_else_clause = reduc_def;
4871 else
4872 {
4873 vec_else_clause = vect_get_vec_def_for_operand (else_clause,
4874 stmt, NULL);
4875 vect_is_simple_use (else_clause, loop_vinfo,
4876 NULL, &gtemp, &def, &dts[3]);
4877 }
4878 }
4879 else
4880 {
4881 vec_cond_lhs = vect_get_vec_def_for_stmt_copy (dts[0], vec_cond_lhs);
4882 vec_cond_rhs = vect_get_vec_def_for_stmt_copy (dts[1], vec_cond_rhs);
4883 vec_then_clause = vect_get_vec_def_for_stmt_copy (dts[2],
4884 vec_then_clause);
4885 vec_else_clause = vect_get_vec_def_for_stmt_copy (dts[3],
4886 vec_else_clause);
4887 }
4888
4889 /* Arguments are ready. Create the new vector stmt. */
4890 vec_compare = build2 (TREE_CODE (cond_expr), vectype,
4891 vec_cond_lhs, vec_cond_rhs);
4892 vec_cond_expr = build3 (VEC_COND_EXPR, vectype,
4893 vec_compare, vec_then_clause, vec_else_clause);
4894
4895 new_stmt = gimple_build_assign (vec_dest, vec_cond_expr);
4896 new_temp = make_ssa_name (vec_dest, new_stmt);
4897 gimple_assign_set_lhs (new_stmt, new_temp);
4898 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4899 if (j == 0)
4900 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4901 else
4902 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4903
4904 prev_stmt_info = vinfo_for_stmt (new_stmt);
4905 }
4906
4907 return true;
4908 }
4909
4910
4911 /* Make sure the statement is vectorizable. */
4912
4913 bool
4914 vect_analyze_stmt (gimple stmt, bool *need_to_vectorize, slp_tree node)
4915 {
4916 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4917 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4918 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
4919 bool ok;
4920 tree scalar_type, vectype;
4921 gimple pattern_stmt, pattern_def_stmt;
4922
4923 if (vect_print_dump_info (REPORT_DETAILS))
4924 {
4925 fprintf (vect_dump, "==> examining statement: ");
4926 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
4927 }
4928
4929 if (gimple_has_volatile_ops (stmt))
4930 {
4931 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
4932 fprintf (vect_dump, "not vectorized: stmt has volatile operands");
4933
4934 return false;
4935 }
4936
4937 /* Skip stmts that do not need to be vectorized. In loops this is expected
4938 to include:
4939 - the COND_EXPR which is the loop exit condition
4940 - any LABEL_EXPRs in the loop
4941 - computations that are used only for array indexing or loop control.
4942 In basic blocks we only analyze statements that are a part of some SLP
4943 instance, therefore, all the statements are relevant.
4944
4945 Pattern statement need to be analyzed instead of the original statement
4946 if the original statement is not relevant. Otherwise, we analyze both
4947 statements. */
4948
4949 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
4950 if (!STMT_VINFO_RELEVANT_P (stmt_info)
4951 && !STMT_VINFO_LIVE_P (stmt_info))
4952 {
4953 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
4954 && pattern_stmt
4955 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
4956 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
4957 {
4958 /* Analyze PATTERN_STMT instead of the original stmt. */
4959 stmt = pattern_stmt;
4960 stmt_info = vinfo_for_stmt (pattern_stmt);
4961 if (vect_print_dump_info (REPORT_DETAILS))
4962 {
4963 fprintf (vect_dump, "==> examining pattern statement: ");
4964 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
4965 }
4966 }
4967 else
4968 {
4969 if (vect_print_dump_info (REPORT_DETAILS))
4970 fprintf (vect_dump, "irrelevant.");
4971
4972 return true;
4973 }
4974 }
4975 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
4976 && pattern_stmt
4977 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
4978 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
4979 {
4980 /* Analyze PATTERN_STMT too. */
4981 if (vect_print_dump_info (REPORT_DETAILS))
4982 {
4983 fprintf (vect_dump, "==> examining pattern statement: ");
4984 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
4985 }
4986
4987 if (!vect_analyze_stmt (pattern_stmt, need_to_vectorize, node))
4988 return false;
4989 }
4990
4991 if (is_pattern_stmt_p (stmt_info)
4992 && (pattern_def_stmt = STMT_VINFO_PATTERN_DEF_STMT (stmt_info))
4993 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt))
4994 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt))))
4995 {
4996 /* Analyze def stmt of STMT if it's a pattern stmt. */
4997 if (vect_print_dump_info (REPORT_DETAILS))
4998 {
4999 fprintf (vect_dump, "==> examining pattern def statement: ");
5000 print_gimple_stmt (vect_dump, pattern_def_stmt, 0, TDF_SLIM);
5001 }
5002
5003 if (!vect_analyze_stmt (pattern_def_stmt, need_to_vectorize, node))
5004 return false;
5005 }
5006
5007
5008 switch (STMT_VINFO_DEF_TYPE (stmt_info))
5009 {
5010 case vect_internal_def:
5011 break;
5012
5013 case vect_reduction_def:
5014 case vect_nested_cycle:
5015 gcc_assert (!bb_vinfo && (relevance == vect_used_in_outer
5016 || relevance == vect_used_in_outer_by_reduction
5017 || relevance == vect_unused_in_scope));
5018 break;
5019
5020 case vect_induction_def:
5021 case vect_constant_def:
5022 case vect_external_def:
5023 case vect_unknown_def_type:
5024 default:
5025 gcc_unreachable ();
5026 }
5027
5028 if (bb_vinfo)
5029 {
5030 gcc_assert (PURE_SLP_STMT (stmt_info));
5031
5032 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
5033 if (vect_print_dump_info (REPORT_DETAILS))
5034 {
5035 fprintf (vect_dump, "get vectype for scalar type: ");
5036 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
5037 }
5038
5039 vectype = get_vectype_for_scalar_type (scalar_type);
5040 if (!vectype)
5041 {
5042 if (vect_print_dump_info (REPORT_DETAILS))
5043 {
5044 fprintf (vect_dump, "not SLPed: unsupported data-type ");
5045 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
5046 }
5047 return false;
5048 }
5049
5050 if (vect_print_dump_info (REPORT_DETAILS))
5051 {
5052 fprintf (vect_dump, "vectype: ");
5053 print_generic_expr (vect_dump, vectype, TDF_SLIM);
5054 }
5055
5056 STMT_VINFO_VECTYPE (stmt_info) = vectype;
5057 }
5058
5059 if (STMT_VINFO_RELEVANT_P (stmt_info))
5060 {
5061 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))));
5062 gcc_assert (STMT_VINFO_VECTYPE (stmt_info));
5063 *need_to_vectorize = true;
5064 }
5065
5066 ok = true;
5067 if (!bb_vinfo
5068 && (STMT_VINFO_RELEVANT_P (stmt_info)
5069 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
5070 ok = (vectorizable_type_promotion (stmt, NULL, NULL, NULL)
5071 || vectorizable_type_demotion (stmt, NULL, NULL, NULL)
5072 || vectorizable_conversion (stmt, NULL, NULL, NULL)
5073 || vectorizable_shift (stmt, NULL, NULL, NULL)
5074 || vectorizable_operation (stmt, NULL, NULL, NULL)
5075 || vectorizable_assignment (stmt, NULL, NULL, NULL)
5076 || vectorizable_load (stmt, NULL, NULL, NULL, NULL)
5077 || vectorizable_call (stmt, NULL, NULL)
5078 || vectorizable_store (stmt, NULL, NULL, NULL)
5079 || vectorizable_reduction (stmt, NULL, NULL, NULL)
5080 || vectorizable_condition (stmt, NULL, NULL, NULL, 0));
5081 else
5082 {
5083 if (bb_vinfo)
5084 ok = (vectorizable_type_promotion (stmt, NULL, NULL, node)
5085 || vectorizable_type_demotion (stmt, NULL, NULL, node)
5086 || vectorizable_shift (stmt, NULL, NULL, node)
5087 || vectorizable_operation (stmt, NULL, NULL, node)
5088 || vectorizable_assignment (stmt, NULL, NULL, node)
5089 || vectorizable_load (stmt, NULL, NULL, node, NULL)
5090 || vectorizable_store (stmt, NULL, NULL, node));
5091 }
5092
5093 if (!ok)
5094 {
5095 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
5096 {
5097 fprintf (vect_dump, "not vectorized: relevant stmt not ");
5098 fprintf (vect_dump, "supported: ");
5099 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
5100 }
5101
5102 return false;
5103 }
5104
5105 if (bb_vinfo)
5106 return true;
5107
5108 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
5109 need extra handling, except for vectorizable reductions. */
5110 if (STMT_VINFO_LIVE_P (stmt_info)
5111 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
5112 ok = vectorizable_live_operation (stmt, NULL, NULL);
5113
5114 if (!ok)
5115 {
5116 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
5117 {
5118 fprintf (vect_dump, "not vectorized: live stmt not ");
5119 fprintf (vect_dump, "supported: ");
5120 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
5121 }
5122
5123 return false;
5124 }
5125
5126 return true;
5127 }
5128
5129
5130 /* Function vect_transform_stmt.
5131
5132 Create a vectorized stmt to replace STMT, and insert it at BSI. */
5133
5134 bool
5135 vect_transform_stmt (gimple stmt, gimple_stmt_iterator *gsi,
5136 bool *strided_store, slp_tree slp_node,
5137 slp_instance slp_node_instance)
5138 {
5139 bool is_store = false;
5140 gimple vec_stmt = NULL;
5141 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5142 bool done;
5143
5144 switch (STMT_VINFO_TYPE (stmt_info))
5145 {
5146 case type_demotion_vec_info_type:
5147 done = vectorizable_type_demotion (stmt, gsi, &vec_stmt, slp_node);
5148 gcc_assert (done);
5149 break;
5150
5151 case type_promotion_vec_info_type:
5152 done = vectorizable_type_promotion (stmt, gsi, &vec_stmt, slp_node);
5153 gcc_assert (done);
5154 break;
5155
5156 case type_conversion_vec_info_type:
5157 done = vectorizable_conversion (stmt, gsi, &vec_stmt, slp_node);
5158 gcc_assert (done);
5159 break;
5160
5161 case induc_vec_info_type:
5162 gcc_assert (!slp_node);
5163 done = vectorizable_induction (stmt, gsi, &vec_stmt);
5164 gcc_assert (done);
5165 break;
5166
5167 case shift_vec_info_type:
5168 done = vectorizable_shift (stmt, gsi, &vec_stmt, slp_node);
5169 gcc_assert (done);
5170 break;
5171
5172 case op_vec_info_type:
5173 done = vectorizable_operation (stmt, gsi, &vec_stmt, slp_node);
5174 gcc_assert (done);
5175 break;
5176
5177 case assignment_vec_info_type:
5178 done = vectorizable_assignment (stmt, gsi, &vec_stmt, slp_node);
5179 gcc_assert (done);
5180 break;
5181
5182 case load_vec_info_type:
5183 done = vectorizable_load (stmt, gsi, &vec_stmt, slp_node,
5184 slp_node_instance);
5185 gcc_assert (done);
5186 break;
5187
5188 case store_vec_info_type:
5189 done = vectorizable_store (stmt, gsi, &vec_stmt, slp_node);
5190 gcc_assert (done);
5191 if (STMT_VINFO_STRIDED_ACCESS (stmt_info) && !slp_node)
5192 {
5193 /* In case of interleaving, the whole chain is vectorized when the
5194 last store in the chain is reached. Store stmts before the last
5195 one are skipped, and there vec_stmt_info shouldn't be freed
5196 meanwhile. */
5197 *strided_store = true;
5198 if (STMT_VINFO_VEC_STMT (stmt_info))
5199 is_store = true;
5200 }
5201 else
5202 is_store = true;
5203 break;
5204
5205 case condition_vec_info_type:
5206 gcc_assert (!slp_node);
5207 done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0);
5208 gcc_assert (done);
5209 break;
5210
5211 case call_vec_info_type:
5212 gcc_assert (!slp_node);
5213 done = vectorizable_call (stmt, gsi, &vec_stmt);
5214 stmt = gsi_stmt (*gsi);
5215 break;
5216
5217 case reduc_vec_info_type:
5218 done = vectorizable_reduction (stmt, gsi, &vec_stmt, slp_node);
5219 gcc_assert (done);
5220 break;
5221
5222 default:
5223 if (!STMT_VINFO_LIVE_P (stmt_info))
5224 {
5225 if (vect_print_dump_info (REPORT_DETAILS))
5226 fprintf (vect_dump, "stmt not supported.");
5227 gcc_unreachable ();
5228 }
5229 }
5230
5231 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
5232 is being vectorized, but outside the immediately enclosing loop. */
5233 if (vec_stmt
5234 && STMT_VINFO_LOOP_VINFO (stmt_info)
5235 && nested_in_vect_loop_p (LOOP_VINFO_LOOP (
5236 STMT_VINFO_LOOP_VINFO (stmt_info)), stmt)
5237 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
5238 && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
5239 || STMT_VINFO_RELEVANT (stmt_info) ==
5240 vect_used_in_outer_by_reduction))
5241 {
5242 struct loop *innerloop = LOOP_VINFO_LOOP (
5243 STMT_VINFO_LOOP_VINFO (stmt_info))->inner;
5244 imm_use_iterator imm_iter;
5245 use_operand_p use_p;
5246 tree scalar_dest;
5247 gimple exit_phi;
5248
5249 if (vect_print_dump_info (REPORT_DETAILS))
5250 fprintf (vect_dump, "Record the vdef for outer-loop vectorization.");
5251
5252 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
5253 (to be used when vectorizing outer-loop stmts that use the DEF of
5254 STMT). */
5255 if (gimple_code (stmt) == GIMPLE_PHI)
5256 scalar_dest = PHI_RESULT (stmt);
5257 else
5258 scalar_dest = gimple_assign_lhs (stmt);
5259
5260 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
5261 {
5262 if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
5263 {
5264 exit_phi = USE_STMT (use_p);
5265 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi)) = vec_stmt;
5266 }
5267 }
5268 }
5269
5270 /* Handle stmts whose DEF is used outside the loop-nest that is
5271 being vectorized. */
5272 if (STMT_VINFO_LIVE_P (stmt_info)
5273 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
5274 {
5275 done = vectorizable_live_operation (stmt, gsi, &vec_stmt);
5276 gcc_assert (done);
5277 }
5278
5279 if (vec_stmt)
5280 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
5281
5282 return is_store;
5283 }
5284
5285
5286 /* Remove a group of stores (for SLP or interleaving), free their
5287 stmt_vec_info. */
5288
5289 void
5290 vect_remove_stores (gimple first_stmt)
5291 {
5292 gimple next = first_stmt;
5293 gimple tmp;
5294 gimple_stmt_iterator next_si;
5295
5296 while (next)
5297 {
5298 /* Free the attached stmt_vec_info and remove the stmt. */
5299 next_si = gsi_for_stmt (next);
5300 gsi_remove (&next_si, true);
5301 tmp = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
5302 free_stmt_vec_info (next);
5303 next = tmp;
5304 }
5305 }
5306
5307
5308 /* Function new_stmt_vec_info.
5309
5310 Create and initialize a new stmt_vec_info struct for STMT. */
5311
5312 stmt_vec_info
5313 new_stmt_vec_info (gimple stmt, loop_vec_info loop_vinfo,
5314 bb_vec_info bb_vinfo)
5315 {
5316 stmt_vec_info res;
5317 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
5318
5319 STMT_VINFO_TYPE (res) = undef_vec_info_type;
5320 STMT_VINFO_STMT (res) = stmt;
5321 STMT_VINFO_LOOP_VINFO (res) = loop_vinfo;
5322 STMT_VINFO_BB_VINFO (res) = bb_vinfo;
5323 STMT_VINFO_RELEVANT (res) = vect_unused_in_scope;
5324 STMT_VINFO_LIVE_P (res) = false;
5325 STMT_VINFO_VECTYPE (res) = NULL;
5326 STMT_VINFO_VEC_STMT (res) = NULL;
5327 STMT_VINFO_VECTORIZABLE (res) = true;
5328 STMT_VINFO_IN_PATTERN_P (res) = false;
5329 STMT_VINFO_RELATED_STMT (res) = NULL;
5330 STMT_VINFO_PATTERN_DEF_STMT (res) = NULL;
5331 STMT_VINFO_DATA_REF (res) = NULL;
5332
5333 STMT_VINFO_DR_BASE_ADDRESS (res) = NULL;
5334 STMT_VINFO_DR_OFFSET (res) = NULL;
5335 STMT_VINFO_DR_INIT (res) = NULL;
5336 STMT_VINFO_DR_STEP (res) = NULL;
5337 STMT_VINFO_DR_ALIGNED_TO (res) = NULL;
5338
5339 if (gimple_code (stmt) == GIMPLE_PHI
5340 && is_loop_header_bb_p (gimple_bb (stmt)))
5341 STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
5342 else
5343 STMT_VINFO_DEF_TYPE (res) = vect_internal_def;
5344
5345 STMT_VINFO_SAME_ALIGN_REFS (res) = VEC_alloc (dr_p, heap, 5);
5346 STMT_VINFO_INSIDE_OF_LOOP_COST (res) = 0;
5347 STMT_VINFO_OUTSIDE_OF_LOOP_COST (res) = 0;
5348 STMT_SLP_TYPE (res) = loop_vect;
5349 GROUP_FIRST_ELEMENT (res) = NULL;
5350 GROUP_NEXT_ELEMENT (res) = NULL;
5351 GROUP_SIZE (res) = 0;
5352 GROUP_STORE_COUNT (res) = 0;
5353 GROUP_GAP (res) = 0;
5354 GROUP_SAME_DR_STMT (res) = NULL;
5355 GROUP_READ_WRITE_DEPENDENCE (res) = false;
5356
5357 return res;
5358 }
5359
5360
5361 /* Create a hash table for stmt_vec_info. */
5362
5363 void
5364 init_stmt_vec_info_vec (void)
5365 {
5366 gcc_assert (!stmt_vec_info_vec);
5367 stmt_vec_info_vec = VEC_alloc (vec_void_p, heap, 50);
5368 }
5369
5370
5371 /* Free hash table for stmt_vec_info. */
5372
5373 void
5374 free_stmt_vec_info_vec (void)
5375 {
5376 gcc_assert (stmt_vec_info_vec);
5377 VEC_free (vec_void_p, heap, stmt_vec_info_vec);
5378 }
5379
5380
5381 /* Free stmt vectorization related info. */
5382
5383 void
5384 free_stmt_vec_info (gimple stmt)
5385 {
5386 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5387
5388 if (!stmt_info)
5389 return;
5390
5391 VEC_free (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmt_info));
5392 set_vinfo_for_stmt (stmt, NULL);
5393 free (stmt_info);
5394 }
5395
5396
5397 /* Function get_vectype_for_scalar_type_and_size.
5398
5399 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
5400 by the target. */
5401
5402 static tree
5403 get_vectype_for_scalar_type_and_size (tree scalar_type, unsigned size)
5404 {
5405 enum machine_mode inner_mode = TYPE_MODE (scalar_type);
5406 enum machine_mode simd_mode;
5407 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
5408 int nunits;
5409 tree vectype;
5410
5411 if (nbytes == 0)
5412 return NULL_TREE;
5413
5414 /* We can't build a vector type of elements with alignment bigger than
5415 their size. */
5416 if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
5417 return NULL_TREE;
5418
5419 /* If we'd build a vector type of elements whose mode precision doesn't
5420 match their types precision we'll get mismatched types on vector
5421 extracts via BIT_FIELD_REFs. This effectively means we disable
5422 vectorization of bool and/or enum types in some languages. */
5423 if (INTEGRAL_TYPE_P (scalar_type)
5424 && GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type))
5425 return NULL_TREE;
5426
5427 if (GET_MODE_CLASS (inner_mode) != MODE_INT
5428 && GET_MODE_CLASS (inner_mode) != MODE_FLOAT)
5429 return NULL_TREE;
5430
5431 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
5432 When the component mode passes the above test simply use a type
5433 corresponding to that mode. The theory is that any use that
5434 would cause problems with this will disable vectorization anyway. */
5435 if (!SCALAR_FLOAT_TYPE_P (scalar_type)
5436 && !INTEGRAL_TYPE_P (scalar_type)
5437 && !POINTER_TYPE_P (scalar_type))
5438 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
5439
5440 /* If no size was supplied use the mode the target prefers. Otherwise
5441 lookup a vector mode of the specified size. */
5442 if (size == 0)
5443 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
5444 else
5445 simd_mode = mode_for_vector (inner_mode, size / nbytes);
5446 nunits = GET_MODE_SIZE (simd_mode) / nbytes;
5447 if (nunits <= 1)
5448 return NULL_TREE;
5449
5450 vectype = build_vector_type (scalar_type, nunits);
5451 if (vect_print_dump_info (REPORT_DETAILS))
5452 {
5453 fprintf (vect_dump, "get vectype with %d units of type ", nunits);
5454 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
5455 }
5456
5457 if (!vectype)
5458 return NULL_TREE;
5459
5460 if (vect_print_dump_info (REPORT_DETAILS))
5461 {
5462 fprintf (vect_dump, "vectype: ");
5463 print_generic_expr (vect_dump, vectype, TDF_SLIM);
5464 }
5465
5466 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
5467 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
5468 {
5469 if (vect_print_dump_info (REPORT_DETAILS))
5470 fprintf (vect_dump, "mode not supported by target.");
5471 return NULL_TREE;
5472 }
5473
5474 return vectype;
5475 }
5476
5477 unsigned int current_vector_size;
5478
5479 /* Function get_vectype_for_scalar_type.
5480
5481 Returns the vector type corresponding to SCALAR_TYPE as supported
5482 by the target. */
5483
5484 tree
5485 get_vectype_for_scalar_type (tree scalar_type)
5486 {
5487 tree vectype;
5488 vectype = get_vectype_for_scalar_type_and_size (scalar_type,
5489 current_vector_size);
5490 if (vectype
5491 && current_vector_size == 0)
5492 current_vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
5493 return vectype;
5494 }
5495
5496 /* Function get_same_sized_vectype
5497
5498 Returns a vector type corresponding to SCALAR_TYPE of size
5499 VECTOR_TYPE if supported by the target. */
5500
5501 tree
5502 get_same_sized_vectype (tree scalar_type, tree vector_type)
5503 {
5504 return get_vectype_for_scalar_type_and_size
5505 (scalar_type, GET_MODE_SIZE (TYPE_MODE (vector_type)));
5506 }
5507
5508 /* Function vect_is_simple_use.
5509
5510 Input:
5511 LOOP_VINFO - the vect info of the loop that is being vectorized.
5512 BB_VINFO - the vect info of the basic block that is being vectorized.
5513 OPERAND - operand of a stmt in the loop or bb.
5514 DEF - the defining stmt in case OPERAND is an SSA_NAME.
5515
5516 Returns whether a stmt with OPERAND can be vectorized.
5517 For loops, supportable operands are constants, loop invariants, and operands
5518 that are defined by the current iteration of the loop. Unsupportable
5519 operands are those that are defined by a previous iteration of the loop (as
5520 is the case in reduction/induction computations).
5521 For basic blocks, supportable operands are constants and bb invariants.
5522 For now, operands defined outside the basic block are not supported. */
5523
5524 bool
5525 vect_is_simple_use (tree operand, loop_vec_info loop_vinfo,
5526 bb_vec_info bb_vinfo, gimple *def_stmt,
5527 tree *def, enum vect_def_type *dt)
5528 {
5529 basic_block bb;
5530 stmt_vec_info stmt_vinfo;
5531 struct loop *loop = NULL;
5532
5533 if (loop_vinfo)
5534 loop = LOOP_VINFO_LOOP (loop_vinfo);
5535
5536 *def_stmt = NULL;
5537 *def = NULL_TREE;
5538
5539 if (vect_print_dump_info (REPORT_DETAILS))
5540 {
5541 fprintf (vect_dump, "vect_is_simple_use: operand ");
5542 print_generic_expr (vect_dump, operand, TDF_SLIM);
5543 }
5544
5545 if (TREE_CODE (operand) == INTEGER_CST || TREE_CODE (operand) == REAL_CST)
5546 {
5547 *dt = vect_constant_def;
5548 return true;
5549 }
5550
5551 if (is_gimple_min_invariant (operand))
5552 {
5553 *def = operand;
5554 *dt = vect_external_def;
5555 return true;
5556 }
5557
5558 if (TREE_CODE (operand) == PAREN_EXPR)
5559 {
5560 if (vect_print_dump_info (REPORT_DETAILS))
5561 fprintf (vect_dump, "non-associatable copy.");
5562 operand = TREE_OPERAND (operand, 0);
5563 }
5564
5565 if (TREE_CODE (operand) != SSA_NAME)
5566 {
5567 if (vect_print_dump_info (REPORT_DETAILS))
5568 fprintf (vect_dump, "not ssa-name.");
5569 return false;
5570 }
5571
5572 *def_stmt = SSA_NAME_DEF_STMT (operand);
5573 if (*def_stmt == NULL)
5574 {
5575 if (vect_print_dump_info (REPORT_DETAILS))
5576 fprintf (vect_dump, "no def_stmt.");
5577 return false;
5578 }
5579
5580 if (vect_print_dump_info (REPORT_DETAILS))
5581 {
5582 fprintf (vect_dump, "def_stmt: ");
5583 print_gimple_stmt (vect_dump, *def_stmt, 0, TDF_SLIM);
5584 }
5585
5586 /* Empty stmt is expected only in case of a function argument.
5587 (Otherwise - we expect a phi_node or a GIMPLE_ASSIGN). */
5588 if (gimple_nop_p (*def_stmt))
5589 {
5590 *def = operand;
5591 *dt = vect_external_def;
5592 return true;
5593 }
5594
5595 bb = gimple_bb (*def_stmt);
5596
5597 if ((loop && !flow_bb_inside_loop_p (loop, bb))
5598 || (!loop && bb != BB_VINFO_BB (bb_vinfo))
5599 || (!loop && gimple_code (*def_stmt) == GIMPLE_PHI))
5600 *dt = vect_external_def;
5601 else
5602 {
5603 stmt_vinfo = vinfo_for_stmt (*def_stmt);
5604 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
5605 }
5606
5607 if (*dt == vect_unknown_def_type)
5608 {
5609 if (vect_print_dump_info (REPORT_DETAILS))
5610 fprintf (vect_dump, "Unsupported pattern.");
5611 return false;
5612 }
5613
5614 if (vect_print_dump_info (REPORT_DETAILS))
5615 fprintf (vect_dump, "type of def: %d.",*dt);
5616
5617 switch (gimple_code (*def_stmt))
5618 {
5619 case GIMPLE_PHI:
5620 *def = gimple_phi_result (*def_stmt);
5621 break;
5622
5623 case GIMPLE_ASSIGN:
5624 *def = gimple_assign_lhs (*def_stmt);
5625 break;
5626
5627 case GIMPLE_CALL:
5628 *def = gimple_call_lhs (*def_stmt);
5629 if (*def != NULL)
5630 break;
5631 /* FALLTHRU */
5632 default:
5633 if (vect_print_dump_info (REPORT_DETAILS))
5634 fprintf (vect_dump, "unsupported defining stmt: ");
5635 return false;
5636 }
5637
5638 return true;
5639 }
5640
5641 /* Function vect_is_simple_use_1.
5642
5643 Same as vect_is_simple_use_1 but also determines the vector operand
5644 type of OPERAND and stores it to *VECTYPE. If the definition of
5645 OPERAND is vect_uninitialized_def, vect_constant_def or
5646 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
5647 is responsible to compute the best suited vector type for the
5648 scalar operand. */
5649
5650 bool
5651 vect_is_simple_use_1 (tree operand, loop_vec_info loop_vinfo,
5652 bb_vec_info bb_vinfo, gimple *def_stmt,
5653 tree *def, enum vect_def_type *dt, tree *vectype)
5654 {
5655 if (!vect_is_simple_use (operand, loop_vinfo, bb_vinfo, def_stmt, def, dt))
5656 return false;
5657
5658 /* Now get a vector type if the def is internal, otherwise supply
5659 NULL_TREE and leave it up to the caller to figure out a proper
5660 type for the use stmt. */
5661 if (*dt == vect_internal_def
5662 || *dt == vect_induction_def
5663 || *dt == vect_reduction_def
5664 || *dt == vect_double_reduction_def
5665 || *dt == vect_nested_cycle)
5666 {
5667 stmt_vec_info stmt_info = vinfo_for_stmt (*def_stmt);
5668
5669 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
5670 && !STMT_VINFO_RELEVANT (stmt_info)
5671 && !STMT_VINFO_LIVE_P (stmt_info))
5672 stmt_info = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
5673
5674 *vectype = STMT_VINFO_VECTYPE (stmt_info);
5675 gcc_assert (*vectype != NULL_TREE);
5676 }
5677 else if (*dt == vect_uninitialized_def
5678 || *dt == vect_constant_def
5679 || *dt == vect_external_def)
5680 *vectype = NULL_TREE;
5681 else
5682 gcc_unreachable ();
5683
5684 return true;
5685 }
5686
5687
5688 /* Function supportable_widening_operation
5689
5690 Check whether an operation represented by the code CODE is a
5691 widening operation that is supported by the target platform in
5692 vector form (i.e., when operating on arguments of type VECTYPE_IN
5693 producing a result of type VECTYPE_OUT).
5694
5695 Widening operations we currently support are NOP (CONVERT), FLOAT
5696 and WIDEN_MULT. This function checks if these operations are supported
5697 by the target platform either directly (via vector tree-codes), or via
5698 target builtins.
5699
5700 Output:
5701 - CODE1 and CODE2 are codes of vector operations to be used when
5702 vectorizing the operation, if available.
5703 - DECL1 and DECL2 are decls of target builtin functions to be used
5704 when vectorizing the operation, if available. In this case,
5705 CODE1 and CODE2 are CALL_EXPR.
5706 - MULTI_STEP_CVT determines the number of required intermediate steps in
5707 case of multi-step conversion (like char->short->int - in that case
5708 MULTI_STEP_CVT will be 1).
5709 - INTERM_TYPES contains the intermediate type required to perform the
5710 widening operation (short in the above example). */
5711
5712 bool
5713 supportable_widening_operation (enum tree_code code, gimple stmt,
5714 tree vectype_out, tree vectype_in,
5715 tree *decl1, tree *decl2,
5716 enum tree_code *code1, enum tree_code *code2,
5717 int *multi_step_cvt,
5718 VEC (tree, heap) **interm_types)
5719 {
5720 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5721 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
5722 struct loop *vect_loop = NULL;
5723 bool ordered_p;
5724 enum machine_mode vec_mode;
5725 enum insn_code icode1, icode2;
5726 optab optab1, optab2;
5727 tree vectype = vectype_in;
5728 tree wide_vectype = vectype_out;
5729 enum tree_code c1, c2;
5730
5731 if (loop_info)
5732 vect_loop = LOOP_VINFO_LOOP (loop_info);
5733
5734 /* The result of a vectorized widening operation usually requires two vectors
5735 (because the widened results do not fit int one vector). The generated
5736 vector results would normally be expected to be generated in the same
5737 order as in the original scalar computation, i.e. if 8 results are
5738 generated in each vector iteration, they are to be organized as follows:
5739 vect1: [res1,res2,res3,res4], vect2: [res5,res6,res7,res8].
5740
5741 However, in the special case that the result of the widening operation is
5742 used in a reduction computation only, the order doesn't matter (because
5743 when vectorizing a reduction we change the order of the computation).
5744 Some targets can take advantage of this and generate more efficient code.
5745 For example, targets like Altivec, that support widen_mult using a sequence
5746 of {mult_even,mult_odd} generate the following vectors:
5747 vect1: [res1,res3,res5,res7], vect2: [res2,res4,res6,res8].
5748
5749 When vectorizing outer-loops, we execute the inner-loop sequentially
5750 (each vectorized inner-loop iteration contributes to VF outer-loop
5751 iterations in parallel). We therefore don't allow to change the order
5752 of the computation in the inner-loop during outer-loop vectorization. */
5753
5754 if (vect_loop
5755 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
5756 && !nested_in_vect_loop_p (vect_loop, stmt))
5757 ordered_p = false;
5758 else
5759 ordered_p = true;
5760
5761 if (!ordered_p
5762 && code == WIDEN_MULT_EXPR
5763 && targetm.vectorize.builtin_mul_widen_even
5764 && targetm.vectorize.builtin_mul_widen_even (vectype)
5765 && targetm.vectorize.builtin_mul_widen_odd
5766 && targetm.vectorize.builtin_mul_widen_odd (vectype))
5767 {
5768 if (vect_print_dump_info (REPORT_DETAILS))
5769 fprintf (vect_dump, "Unordered widening operation detected.");
5770
5771 *code1 = *code2 = CALL_EXPR;
5772 *decl1 = targetm.vectorize.builtin_mul_widen_even (vectype);
5773 *decl2 = targetm.vectorize.builtin_mul_widen_odd (vectype);
5774 return true;
5775 }
5776
5777 switch (code)
5778 {
5779 case WIDEN_MULT_EXPR:
5780 if (BYTES_BIG_ENDIAN)
5781 {
5782 c1 = VEC_WIDEN_MULT_HI_EXPR;
5783 c2 = VEC_WIDEN_MULT_LO_EXPR;
5784 }
5785 else
5786 {
5787 c2 = VEC_WIDEN_MULT_HI_EXPR;
5788 c1 = VEC_WIDEN_MULT_LO_EXPR;
5789 }
5790 break;
5791
5792 CASE_CONVERT:
5793 if (BYTES_BIG_ENDIAN)
5794 {
5795 c1 = VEC_UNPACK_HI_EXPR;
5796 c2 = VEC_UNPACK_LO_EXPR;
5797 }
5798 else
5799 {
5800 c2 = VEC_UNPACK_HI_EXPR;
5801 c1 = VEC_UNPACK_LO_EXPR;
5802 }
5803 break;
5804
5805 case FLOAT_EXPR:
5806 if (BYTES_BIG_ENDIAN)
5807 {
5808 c1 = VEC_UNPACK_FLOAT_HI_EXPR;
5809 c2 = VEC_UNPACK_FLOAT_LO_EXPR;
5810 }
5811 else
5812 {
5813 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
5814 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
5815 }
5816 break;
5817
5818 case FIX_TRUNC_EXPR:
5819 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
5820 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
5821 computing the operation. */
5822 return false;
5823
5824 default:
5825 gcc_unreachable ();
5826 }
5827
5828 if (code == FIX_TRUNC_EXPR)
5829 {
5830 /* The signedness is determined from output operand. */
5831 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
5832 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
5833 }
5834 else
5835 {
5836 optab1 = optab_for_tree_code (c1, vectype, optab_default);
5837 optab2 = optab_for_tree_code (c2, vectype, optab_default);
5838 }
5839
5840 if (!optab1 || !optab2)
5841 return false;
5842
5843 vec_mode = TYPE_MODE (vectype);
5844 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
5845 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
5846 return false;
5847
5848 /* Check if it's a multi-step conversion that can be done using intermediate
5849 types. */
5850 if (insn_data[icode1].operand[0].mode != TYPE_MODE (wide_vectype)
5851 || insn_data[icode2].operand[0].mode != TYPE_MODE (wide_vectype))
5852 {
5853 int i;
5854 tree prev_type = vectype, intermediate_type;
5855 enum machine_mode intermediate_mode, prev_mode = vec_mode;
5856 optab optab3, optab4;
5857
5858 if (!CONVERT_EXPR_CODE_P (code))
5859 return false;
5860
5861 *code1 = c1;
5862 *code2 = c2;
5863
5864 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
5865 intermediate steps in promotion sequence. We try
5866 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
5867 not. */
5868 *interm_types = VEC_alloc (tree, heap, MAX_INTERM_CVT_STEPS);
5869 for (i = 0; i < 3; i++)
5870 {
5871 intermediate_mode = insn_data[icode1].operand[0].mode;
5872 intermediate_type = lang_hooks.types.type_for_mode (intermediate_mode,
5873 TYPE_UNSIGNED (prev_type));
5874 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
5875 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
5876
5877 if (!optab3 || !optab4
5878 || ((icode1 = optab_handler (optab1, prev_mode))
5879 == CODE_FOR_nothing)
5880 || insn_data[icode1].operand[0].mode != intermediate_mode
5881 || ((icode2 = optab_handler (optab2, prev_mode))
5882 == CODE_FOR_nothing)
5883 || insn_data[icode2].operand[0].mode != intermediate_mode
5884 || ((icode1 = optab_handler (optab3, intermediate_mode))
5885 == CODE_FOR_nothing)
5886 || ((icode2 = optab_handler (optab4, intermediate_mode))
5887 == CODE_FOR_nothing))
5888 return false;
5889
5890 VEC_quick_push (tree, *interm_types, intermediate_type);
5891 (*multi_step_cvt)++;
5892
5893 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
5894 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
5895 return true;
5896
5897 prev_type = intermediate_type;
5898 prev_mode = intermediate_mode;
5899 }
5900
5901 return false;
5902 }
5903
5904 *code1 = c1;
5905 *code2 = c2;
5906 return true;
5907 }
5908
5909
5910 /* Function supportable_narrowing_operation
5911
5912 Check whether an operation represented by the code CODE is a
5913 narrowing operation that is supported by the target platform in
5914 vector form (i.e., when operating on arguments of type VECTYPE_IN
5915 and producing a result of type VECTYPE_OUT).
5916
5917 Narrowing operations we currently support are NOP (CONVERT) and
5918 FIX_TRUNC. This function checks if these operations are supported by
5919 the target platform directly via vector tree-codes.
5920
5921 Output:
5922 - CODE1 is the code of a vector operation to be used when
5923 vectorizing the operation, if available.
5924 - MULTI_STEP_CVT determines the number of required intermediate steps in
5925 case of multi-step conversion (like int->short->char - in that case
5926 MULTI_STEP_CVT will be 1).
5927 - INTERM_TYPES contains the intermediate type required to perform the
5928 narrowing operation (short in the above example). */
5929
5930 bool
5931 supportable_narrowing_operation (enum tree_code code,
5932 tree vectype_out, tree vectype_in,
5933 enum tree_code *code1, int *multi_step_cvt,
5934 VEC (tree, heap) **interm_types)
5935 {
5936 enum machine_mode vec_mode;
5937 enum insn_code icode1;
5938 optab optab1, interm_optab;
5939 tree vectype = vectype_in;
5940 tree narrow_vectype = vectype_out;
5941 enum tree_code c1;
5942 tree intermediate_type, prev_type;
5943 int i;
5944
5945 switch (code)
5946 {
5947 CASE_CONVERT:
5948 c1 = VEC_PACK_TRUNC_EXPR;
5949 break;
5950
5951 case FIX_TRUNC_EXPR:
5952 c1 = VEC_PACK_FIX_TRUNC_EXPR;
5953 break;
5954
5955 case FLOAT_EXPR:
5956 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
5957 tree code and optabs used for computing the operation. */
5958 return false;
5959
5960 default:
5961 gcc_unreachable ();
5962 }
5963
5964 if (code == FIX_TRUNC_EXPR)
5965 /* The signedness is determined from output operand. */
5966 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
5967 else
5968 optab1 = optab_for_tree_code (c1, vectype, optab_default);
5969
5970 if (!optab1)
5971 return false;
5972
5973 vec_mode = TYPE_MODE (vectype);
5974 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
5975 return false;
5976
5977 /* Check if it's a multi-step conversion that can be done using intermediate
5978 types. */
5979 if (insn_data[icode1].operand[0].mode != TYPE_MODE (narrow_vectype))
5980 {
5981 enum machine_mode intermediate_mode, prev_mode = vec_mode;
5982
5983 *code1 = c1;
5984 prev_type = vectype;
5985 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
5986 intermediate steps in promotion sequence. We try
5987 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
5988 not. */
5989 *interm_types = VEC_alloc (tree, heap, MAX_INTERM_CVT_STEPS);
5990 for (i = 0; i < 3; i++)
5991 {
5992 intermediate_mode = insn_data[icode1].operand[0].mode;
5993 intermediate_type = lang_hooks.types.type_for_mode (intermediate_mode,
5994 TYPE_UNSIGNED (prev_type));
5995 interm_optab = optab_for_tree_code (c1, intermediate_type,
5996 optab_default);
5997 if (!interm_optab
5998 || ((icode1 = optab_handler (optab1, prev_mode))
5999 == CODE_FOR_nothing)
6000 || insn_data[icode1].operand[0].mode != intermediate_mode
6001 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
6002 == CODE_FOR_nothing))
6003 return false;
6004
6005 VEC_quick_push (tree, *interm_types, intermediate_type);
6006 (*multi_step_cvt)++;
6007
6008 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
6009 return true;
6010
6011 prev_type = intermediate_type;
6012 prev_mode = intermediate_mode;
6013 }
6014
6015 return false;
6016 }
6017
6018 *code1 = c1;
6019 return true;
6020 }