tree-vect-stmts.c (vect_get_vec_def_for_operand): Convert constants to vector element...
[gcc.git] / gcc / tree-vect-stmts.c
1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
3 Free Software Foundation, Inc.
4 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 and Ira Rosen <irar@il.ibm.com>
6
7 This file is part of GCC.
8
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
13
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
22
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "tm.h"
27 #include "ggc.h"
28 #include "tree.h"
29 #include "target.h"
30 #include "basic-block.h"
31 #include "tree-pretty-print.h"
32 #include "gimple-pretty-print.h"
33 #include "tree-flow.h"
34 #include "tree-dump.h"
35 #include "cfgloop.h"
36 #include "cfglayout.h"
37 #include "expr.h"
38 #include "recog.h"
39 #include "optabs.h"
40 #include "diagnostic-core.h"
41 #include "tree-vectorizer.h"
42 #include "langhooks.h"
43
44
45 /* Return a variable of type ELEM_TYPE[NELEMS]. */
46
47 static tree
48 create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
49 {
50 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
51 "vect_array");
52 }
53
54 /* ARRAY is an array of vectors created by create_vector_array.
55 Return an SSA_NAME for the vector in index N. The reference
56 is part of the vectorization of STMT and the vector is associated
57 with scalar destination SCALAR_DEST. */
58
59 static tree
60 read_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree scalar_dest,
61 tree array, unsigned HOST_WIDE_INT n)
62 {
63 tree vect_type, vect, vect_name, array_ref;
64 gimple new_stmt;
65
66 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
67 vect_type = TREE_TYPE (TREE_TYPE (array));
68 vect = vect_create_destination_var (scalar_dest, vect_type);
69 array_ref = build4 (ARRAY_REF, vect_type, array,
70 build_int_cst (size_type_node, n),
71 NULL_TREE, NULL_TREE);
72
73 new_stmt = gimple_build_assign (vect, array_ref);
74 vect_name = make_ssa_name (vect, new_stmt);
75 gimple_assign_set_lhs (new_stmt, vect_name);
76 vect_finish_stmt_generation (stmt, new_stmt, gsi);
77 mark_symbols_for_renaming (new_stmt);
78
79 return vect_name;
80 }
81
82 /* ARRAY is an array of vectors created by create_vector_array.
83 Emit code to store SSA_NAME VECT in index N of the array.
84 The store is part of the vectorization of STMT. */
85
86 static void
87 write_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree vect,
88 tree array, unsigned HOST_WIDE_INT n)
89 {
90 tree array_ref;
91 gimple new_stmt;
92
93 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
94 build_int_cst (size_type_node, n),
95 NULL_TREE, NULL_TREE);
96
97 new_stmt = gimple_build_assign (array_ref, vect);
98 vect_finish_stmt_generation (stmt, new_stmt, gsi);
99 mark_symbols_for_renaming (new_stmt);
100 }
101
102 /* PTR is a pointer to an array of type TYPE. Return a representation
103 of *PTR. The memory reference replaces those in FIRST_DR
104 (and its group). */
105
106 static tree
107 create_array_ref (tree type, tree ptr, struct data_reference *first_dr)
108 {
109 struct ptr_info_def *pi;
110 tree mem_ref, alias_ptr_type;
111
112 alias_ptr_type = reference_alias_ptr_type (DR_REF (first_dr));
113 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
114 /* Arrays have the same alignment as their type. */
115 pi = get_ptr_info (ptr);
116 pi->align = TYPE_ALIGN_UNIT (type);
117 pi->misalign = 0;
118 return mem_ref;
119 }
120
121 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
122
123 /* Function vect_mark_relevant.
124
125 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
126
127 static void
128 vect_mark_relevant (VEC(gimple,heap) **worklist, gimple stmt,
129 enum vect_relevant relevant, bool live_p,
130 bool used_in_pattern)
131 {
132 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
133 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
134 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
135 gimple pattern_stmt;
136
137 if (vect_print_dump_info (REPORT_DETAILS))
138 fprintf (vect_dump, "mark relevant %d, live %d.", relevant, live_p);
139
140 /* If this stmt is an original stmt in a pattern, we might need to mark its
141 related pattern stmt instead of the original stmt. However, such stmts
142 may have their own uses that are not in any pattern, in such cases the
143 stmt itself should be marked. */
144 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
145 {
146 bool found = false;
147 if (!used_in_pattern)
148 {
149 imm_use_iterator imm_iter;
150 use_operand_p use_p;
151 gimple use_stmt;
152 tree lhs;
153
154 if (is_gimple_assign (stmt))
155 lhs = gimple_assign_lhs (stmt);
156 else
157 lhs = gimple_call_lhs (stmt);
158
159 /* This use is out of pattern use, if LHS has other uses that are
160 pattern uses, we should mark the stmt itself, and not the pattern
161 stmt. */
162 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
163 {
164 if (is_gimple_debug (USE_STMT (use_p)))
165 continue;
166 use_stmt = USE_STMT (use_p);
167
168 if (vinfo_for_stmt (use_stmt)
169 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
170 {
171 found = true;
172 break;
173 }
174 }
175 }
176
177 if (!found)
178 {
179 /* This is the last stmt in a sequence that was detected as a
180 pattern that can potentially be vectorized. Don't mark the stmt
181 as relevant/live because it's not going to be vectorized.
182 Instead mark the pattern-stmt that replaces it. */
183
184 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
185
186 if (vect_print_dump_info (REPORT_DETAILS))
187 fprintf (vect_dump, "last stmt in pattern. don't mark"
188 " relevant/live.");
189 stmt_info = vinfo_for_stmt (pattern_stmt);
190 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
191 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
192 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
193 stmt = pattern_stmt;
194 }
195 }
196
197 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
198 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
199 STMT_VINFO_RELEVANT (stmt_info) = relevant;
200
201 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
202 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
203 {
204 if (vect_print_dump_info (REPORT_DETAILS))
205 fprintf (vect_dump, "already marked relevant/live.");
206 return;
207 }
208
209 VEC_safe_push (gimple, heap, *worklist, stmt);
210 }
211
212
213 /* Function vect_stmt_relevant_p.
214
215 Return true if STMT in loop that is represented by LOOP_VINFO is
216 "relevant for vectorization".
217
218 A stmt is considered "relevant for vectorization" if:
219 - it has uses outside the loop.
220 - it has vdefs (it alters memory).
221 - control stmts in the loop (except for the exit condition).
222
223 CHECKME: what other side effects would the vectorizer allow? */
224
225 static bool
226 vect_stmt_relevant_p (gimple stmt, loop_vec_info loop_vinfo,
227 enum vect_relevant *relevant, bool *live_p)
228 {
229 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
230 ssa_op_iter op_iter;
231 imm_use_iterator imm_iter;
232 use_operand_p use_p;
233 def_operand_p def_p;
234
235 *relevant = vect_unused_in_scope;
236 *live_p = false;
237
238 /* cond stmt other than loop exit cond. */
239 if (is_ctrl_stmt (stmt)
240 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt))
241 != loop_exit_ctrl_vec_info_type)
242 *relevant = vect_used_in_scope;
243
244 /* changing memory. */
245 if (gimple_code (stmt) != GIMPLE_PHI)
246 if (gimple_vdef (stmt))
247 {
248 if (vect_print_dump_info (REPORT_DETAILS))
249 fprintf (vect_dump, "vec_stmt_relevant_p: stmt has vdefs.");
250 *relevant = vect_used_in_scope;
251 }
252
253 /* uses outside the loop. */
254 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
255 {
256 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
257 {
258 basic_block bb = gimple_bb (USE_STMT (use_p));
259 if (!flow_bb_inside_loop_p (loop, bb))
260 {
261 if (vect_print_dump_info (REPORT_DETAILS))
262 fprintf (vect_dump, "vec_stmt_relevant_p: used out of loop.");
263
264 if (is_gimple_debug (USE_STMT (use_p)))
265 continue;
266
267 /* We expect all such uses to be in the loop exit phis
268 (because of loop closed form) */
269 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
270 gcc_assert (bb == single_exit (loop)->dest);
271
272 *live_p = true;
273 }
274 }
275 }
276
277 return (*live_p || *relevant);
278 }
279
280
281 /* Function exist_non_indexing_operands_for_use_p
282
283 USE is one of the uses attached to STMT. Check if USE is
284 used in STMT for anything other than indexing an array. */
285
286 static bool
287 exist_non_indexing_operands_for_use_p (tree use, gimple stmt)
288 {
289 tree operand;
290 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
291
292 /* USE corresponds to some operand in STMT. If there is no data
293 reference in STMT, then any operand that corresponds to USE
294 is not indexing an array. */
295 if (!STMT_VINFO_DATA_REF (stmt_info))
296 return true;
297
298 /* STMT has a data_ref. FORNOW this means that its of one of
299 the following forms:
300 -1- ARRAY_REF = var
301 -2- var = ARRAY_REF
302 (This should have been verified in analyze_data_refs).
303
304 'var' in the second case corresponds to a def, not a use,
305 so USE cannot correspond to any operands that are not used
306 for array indexing.
307
308 Therefore, all we need to check is if STMT falls into the
309 first case, and whether var corresponds to USE. */
310
311 if (!gimple_assign_copy_p (stmt))
312 return false;
313 if (TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME)
314 return false;
315 operand = gimple_assign_rhs1 (stmt);
316 if (TREE_CODE (operand) != SSA_NAME)
317 return false;
318
319 if (operand == use)
320 return true;
321
322 return false;
323 }
324
325
326 /*
327 Function process_use.
328
329 Inputs:
330 - a USE in STMT in a loop represented by LOOP_VINFO
331 - LIVE_P, RELEVANT - enum values to be set in the STMT_VINFO of the stmt
332 that defined USE. This is done by calling mark_relevant and passing it
333 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
334
335 Outputs:
336 Generally, LIVE_P and RELEVANT are used to define the liveness and
337 relevance info of the DEF_STMT of this USE:
338 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
339 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
340 Exceptions:
341 - case 1: If USE is used only for address computations (e.g. array indexing),
342 which does not need to be directly vectorized, then the liveness/relevance
343 of the respective DEF_STMT is left unchanged.
344 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
345 skip DEF_STMT cause it had already been processed.
346 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
347 be modified accordingly.
348
349 Return true if everything is as expected. Return false otherwise. */
350
351 static bool
352 process_use (gimple stmt, tree use, loop_vec_info loop_vinfo, bool live_p,
353 enum vect_relevant relevant, VEC(gimple,heap) **worklist)
354 {
355 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
356 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
357 stmt_vec_info dstmt_vinfo;
358 basic_block bb, def_bb;
359 tree def;
360 gimple def_stmt;
361 enum vect_def_type dt;
362
363 /* case 1: we are only interested in uses that need to be vectorized. Uses
364 that are used for address computation are not considered relevant. */
365 if (!exist_non_indexing_operands_for_use_p (use, stmt))
366 return true;
367
368 if (!vect_is_simple_use (use, loop_vinfo, NULL, &def_stmt, &def, &dt))
369 {
370 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
371 fprintf (vect_dump, "not vectorized: unsupported use in stmt.");
372 return false;
373 }
374
375 if (!def_stmt || gimple_nop_p (def_stmt))
376 return true;
377
378 def_bb = gimple_bb (def_stmt);
379 if (!flow_bb_inside_loop_p (loop, def_bb))
380 {
381 if (vect_print_dump_info (REPORT_DETAILS))
382 fprintf (vect_dump, "def_stmt is out of loop.");
383 return true;
384 }
385
386 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
387 DEF_STMT must have already been processed, because this should be the
388 only way that STMT, which is a reduction-phi, was put in the worklist,
389 as there should be no other uses for DEF_STMT in the loop. So we just
390 check that everything is as expected, and we are done. */
391 dstmt_vinfo = vinfo_for_stmt (def_stmt);
392 bb = gimple_bb (stmt);
393 if (gimple_code (stmt) == GIMPLE_PHI
394 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
395 && gimple_code (def_stmt) != GIMPLE_PHI
396 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
397 && bb->loop_father == def_bb->loop_father)
398 {
399 if (vect_print_dump_info (REPORT_DETAILS))
400 fprintf (vect_dump, "reduc-stmt defining reduc-phi in the same nest.");
401 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))
402 dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
403 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
404 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
405 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope);
406 return true;
407 }
408
409 /* case 3a: outer-loop stmt defining an inner-loop stmt:
410 outer-loop-header-bb:
411 d = def_stmt
412 inner-loop:
413 stmt # use (d)
414 outer-loop-tail-bb:
415 ... */
416 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
417 {
418 if (vect_print_dump_info (REPORT_DETAILS))
419 fprintf (vect_dump, "outer-loop def-stmt defining inner-loop stmt.");
420
421 switch (relevant)
422 {
423 case vect_unused_in_scope:
424 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
425 vect_used_in_scope : vect_unused_in_scope;
426 break;
427
428 case vect_used_in_outer_by_reduction:
429 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
430 relevant = vect_used_by_reduction;
431 break;
432
433 case vect_used_in_outer:
434 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
435 relevant = vect_used_in_scope;
436 break;
437
438 case vect_used_in_scope:
439 break;
440
441 default:
442 gcc_unreachable ();
443 }
444 }
445
446 /* case 3b: inner-loop stmt defining an outer-loop stmt:
447 outer-loop-header-bb:
448 ...
449 inner-loop:
450 d = def_stmt
451 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
452 stmt # use (d) */
453 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
454 {
455 if (vect_print_dump_info (REPORT_DETAILS))
456 fprintf (vect_dump, "inner-loop def-stmt defining outer-loop stmt.");
457
458 switch (relevant)
459 {
460 case vect_unused_in_scope:
461 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
462 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
463 vect_used_in_outer_by_reduction : vect_unused_in_scope;
464 break;
465
466 case vect_used_by_reduction:
467 relevant = vect_used_in_outer_by_reduction;
468 break;
469
470 case vect_used_in_scope:
471 relevant = vect_used_in_outer;
472 break;
473
474 default:
475 gcc_unreachable ();
476 }
477 }
478
479 vect_mark_relevant (worklist, def_stmt, relevant, live_p,
480 is_pattern_stmt_p (stmt_vinfo));
481 return true;
482 }
483
484
485 /* Function vect_mark_stmts_to_be_vectorized.
486
487 Not all stmts in the loop need to be vectorized. For example:
488
489 for i...
490 for j...
491 1. T0 = i + j
492 2. T1 = a[T0]
493
494 3. j = j + 1
495
496 Stmt 1 and 3 do not need to be vectorized, because loop control and
497 addressing of vectorized data-refs are handled differently.
498
499 This pass detects such stmts. */
500
501 bool
502 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
503 {
504 VEC(gimple,heap) *worklist;
505 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
506 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
507 unsigned int nbbs = loop->num_nodes;
508 gimple_stmt_iterator si;
509 gimple stmt;
510 unsigned int i;
511 stmt_vec_info stmt_vinfo;
512 basic_block bb;
513 gimple phi;
514 bool live_p;
515 enum vect_relevant relevant, tmp_relevant;
516 enum vect_def_type def_type;
517
518 if (vect_print_dump_info (REPORT_DETAILS))
519 fprintf (vect_dump, "=== vect_mark_stmts_to_be_vectorized ===");
520
521 worklist = VEC_alloc (gimple, heap, 64);
522
523 /* 1. Init worklist. */
524 for (i = 0; i < nbbs; i++)
525 {
526 bb = bbs[i];
527 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
528 {
529 phi = gsi_stmt (si);
530 if (vect_print_dump_info (REPORT_DETAILS))
531 {
532 fprintf (vect_dump, "init: phi relevant? ");
533 print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM);
534 }
535
536 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
537 vect_mark_relevant (&worklist, phi, relevant, live_p, false);
538 }
539 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
540 {
541 stmt = gsi_stmt (si);
542 if (vect_print_dump_info (REPORT_DETAILS))
543 {
544 fprintf (vect_dump, "init: stmt relevant? ");
545 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
546 }
547
548 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
549 vect_mark_relevant (&worklist, stmt, relevant, live_p, false);
550 }
551 }
552
553 /* 2. Process_worklist */
554 while (VEC_length (gimple, worklist) > 0)
555 {
556 use_operand_p use_p;
557 ssa_op_iter iter;
558
559 stmt = VEC_pop (gimple, worklist);
560 if (vect_print_dump_info (REPORT_DETAILS))
561 {
562 fprintf (vect_dump, "worklist: examine stmt: ");
563 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
564 }
565
566 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
567 (DEF_STMT) as relevant/irrelevant and live/dead according to the
568 liveness and relevance properties of STMT. */
569 stmt_vinfo = vinfo_for_stmt (stmt);
570 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
571 live_p = STMT_VINFO_LIVE_P (stmt_vinfo);
572
573 /* Generally, the liveness and relevance properties of STMT are
574 propagated as is to the DEF_STMTs of its USEs:
575 live_p <-- STMT_VINFO_LIVE_P (STMT_VINFO)
576 relevant <-- STMT_VINFO_RELEVANT (STMT_VINFO)
577
578 One exception is when STMT has been identified as defining a reduction
579 variable; in this case we set the liveness/relevance as follows:
580 live_p = false
581 relevant = vect_used_by_reduction
582 This is because we distinguish between two kinds of relevant stmts -
583 those that are used by a reduction computation, and those that are
584 (also) used by a regular computation. This allows us later on to
585 identify stmts that are used solely by a reduction, and therefore the
586 order of the results that they produce does not have to be kept. */
587
588 def_type = STMT_VINFO_DEF_TYPE (stmt_vinfo);
589 tmp_relevant = relevant;
590 switch (def_type)
591 {
592 case vect_reduction_def:
593 switch (tmp_relevant)
594 {
595 case vect_unused_in_scope:
596 relevant = vect_used_by_reduction;
597 break;
598
599 case vect_used_by_reduction:
600 if (gimple_code (stmt) == GIMPLE_PHI)
601 break;
602 /* fall through */
603
604 default:
605 if (vect_print_dump_info (REPORT_DETAILS))
606 fprintf (vect_dump, "unsupported use of reduction.");
607
608 VEC_free (gimple, heap, worklist);
609 return false;
610 }
611
612 live_p = false;
613 break;
614
615 case vect_nested_cycle:
616 if (tmp_relevant != vect_unused_in_scope
617 && tmp_relevant != vect_used_in_outer_by_reduction
618 && tmp_relevant != vect_used_in_outer)
619 {
620 if (vect_print_dump_info (REPORT_DETAILS))
621 fprintf (vect_dump, "unsupported use of nested cycle.");
622
623 VEC_free (gimple, heap, worklist);
624 return false;
625 }
626
627 live_p = false;
628 break;
629
630 case vect_double_reduction_def:
631 if (tmp_relevant != vect_unused_in_scope
632 && tmp_relevant != vect_used_by_reduction)
633 {
634 if (vect_print_dump_info (REPORT_DETAILS))
635 fprintf (vect_dump, "unsupported use of double reduction.");
636
637 VEC_free (gimple, heap, worklist);
638 return false;
639 }
640
641 live_p = false;
642 break;
643
644 default:
645 break;
646 }
647
648 if (is_pattern_stmt_p (vinfo_for_stmt (stmt)))
649 {
650 /* Pattern statements are not inserted into the code, so
651 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
652 have to scan the RHS or function arguments instead. */
653 if (is_gimple_assign (stmt))
654 {
655 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
656 tree op = gimple_assign_rhs1 (stmt);
657
658 i = 1;
659 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
660 {
661 if (!process_use (stmt, TREE_OPERAND (op, 0), loop_vinfo,
662 live_p, relevant, &worklist)
663 || !process_use (stmt, TREE_OPERAND (op, 1), loop_vinfo,
664 live_p, relevant, &worklist))
665 {
666 VEC_free (gimple, heap, worklist);
667 return false;
668 }
669 i = 2;
670 }
671 for (; i < gimple_num_ops (stmt); i++)
672 {
673 op = gimple_op (stmt, i);
674 if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
675 &worklist))
676 {
677 VEC_free (gimple, heap, worklist);
678 return false;
679 }
680 }
681 }
682 else if (is_gimple_call (stmt))
683 {
684 for (i = 0; i < gimple_call_num_args (stmt); i++)
685 {
686 tree arg = gimple_call_arg (stmt, i);
687 if (!process_use (stmt, arg, loop_vinfo, live_p, relevant,
688 &worklist))
689 {
690 VEC_free (gimple, heap, worklist);
691 return false;
692 }
693 }
694 }
695 }
696 else
697 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
698 {
699 tree op = USE_FROM_PTR (use_p);
700 if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
701 &worklist))
702 {
703 VEC_free (gimple, heap, worklist);
704 return false;
705 }
706 }
707 } /* while worklist */
708
709 VEC_free (gimple, heap, worklist);
710 return true;
711 }
712
713
714 /* Get cost by calling cost target builtin. */
715
716 static inline
717 int vect_get_stmt_cost (enum vect_cost_for_stmt type_of_cost)
718 {
719 tree dummy_type = NULL;
720 int dummy = 0;
721
722 return targetm.vectorize.builtin_vectorization_cost (type_of_cost,
723 dummy_type, dummy);
724 }
725
726
727 /* Get cost for STMT. */
728
729 int
730 cost_for_stmt (gimple stmt)
731 {
732 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
733
734 switch (STMT_VINFO_TYPE (stmt_info))
735 {
736 case load_vec_info_type:
737 return vect_get_stmt_cost (scalar_load);
738 case store_vec_info_type:
739 return vect_get_stmt_cost (scalar_store);
740 case op_vec_info_type:
741 case condition_vec_info_type:
742 case assignment_vec_info_type:
743 case reduc_vec_info_type:
744 case induc_vec_info_type:
745 case type_promotion_vec_info_type:
746 case type_demotion_vec_info_type:
747 case type_conversion_vec_info_type:
748 case call_vec_info_type:
749 return vect_get_stmt_cost (scalar_stmt);
750 case undef_vec_info_type:
751 default:
752 gcc_unreachable ();
753 }
754 }
755
756 /* Function vect_model_simple_cost.
757
758 Models cost for simple operations, i.e. those that only emit ncopies of a
759 single op. Right now, this does not account for multiple insns that could
760 be generated for the single vector op. We will handle that shortly. */
761
762 void
763 vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
764 enum vect_def_type *dt, slp_tree slp_node)
765 {
766 int i;
767 int inside_cost = 0, outside_cost = 0;
768
769 /* The SLP costs were already calculated during SLP tree build. */
770 if (PURE_SLP_STMT (stmt_info))
771 return;
772
773 inside_cost = ncopies * vect_get_stmt_cost (vector_stmt);
774
775 /* FORNOW: Assuming maximum 2 args per stmts. */
776 for (i = 0; i < 2; i++)
777 {
778 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
779 outside_cost += vect_get_stmt_cost (vector_stmt);
780 }
781
782 if (vect_print_dump_info (REPORT_COST))
783 fprintf (vect_dump, "vect_model_simple_cost: inside_cost = %d, "
784 "outside_cost = %d .", inside_cost, outside_cost);
785
786 /* Set the costs either in STMT_INFO or SLP_NODE (if exists). */
787 stmt_vinfo_set_inside_of_loop_cost (stmt_info, slp_node, inside_cost);
788 stmt_vinfo_set_outside_of_loop_cost (stmt_info, slp_node, outside_cost);
789 }
790
791
792 /* Function vect_cost_strided_group_size
793
794 For strided load or store, return the group_size only if it is the first
795 load or store of a group, else return 1. This ensures that group size is
796 only returned once per group. */
797
798 static int
799 vect_cost_strided_group_size (stmt_vec_info stmt_info)
800 {
801 gimple first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
802
803 if (first_stmt == STMT_VINFO_STMT (stmt_info))
804 return GROUP_SIZE (stmt_info);
805
806 return 1;
807 }
808
809
810 /* Function vect_model_store_cost
811
812 Models cost for stores. In the case of strided accesses, one access
813 has the overhead of the strided access attributed to it. */
814
815 void
816 vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
817 bool store_lanes_p, enum vect_def_type dt,
818 slp_tree slp_node)
819 {
820 int group_size;
821 unsigned int inside_cost = 0, outside_cost = 0;
822 struct data_reference *first_dr;
823 gimple first_stmt;
824
825 /* The SLP costs were already calculated during SLP tree build. */
826 if (PURE_SLP_STMT (stmt_info))
827 return;
828
829 if (dt == vect_constant_def || dt == vect_external_def)
830 outside_cost = vect_get_stmt_cost (scalar_to_vec);
831
832 /* Strided access? */
833 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
834 {
835 if (slp_node)
836 {
837 first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0);
838 group_size = 1;
839 }
840 else
841 {
842 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
843 group_size = vect_cost_strided_group_size (stmt_info);
844 }
845
846 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
847 }
848 /* Not a strided access. */
849 else
850 {
851 group_size = 1;
852 first_dr = STMT_VINFO_DATA_REF (stmt_info);
853 }
854
855 /* We assume that the cost of a single store-lanes instruction is
856 equivalent to the cost of GROUP_SIZE separate stores. If a strided
857 access is instead being provided by a permute-and-store operation,
858 include the cost of the permutes. */
859 if (!store_lanes_p && group_size > 1)
860 {
861 /* Uses a high and low interleave operation for each needed permute. */
862 inside_cost = ncopies * exact_log2(group_size) * group_size
863 * vect_get_stmt_cost (vector_stmt);
864
865 if (vect_print_dump_info (REPORT_COST))
866 fprintf (vect_dump, "vect_model_store_cost: strided group_size = %d .",
867 group_size);
868
869 }
870
871 /* Costs of the stores. */
872 vect_get_store_cost (first_dr, ncopies, &inside_cost);
873
874 if (vect_print_dump_info (REPORT_COST))
875 fprintf (vect_dump, "vect_model_store_cost: inside_cost = %d, "
876 "outside_cost = %d .", inside_cost, outside_cost);
877
878 /* Set the costs either in STMT_INFO or SLP_NODE (if exists). */
879 stmt_vinfo_set_inside_of_loop_cost (stmt_info, slp_node, inside_cost);
880 stmt_vinfo_set_outside_of_loop_cost (stmt_info, slp_node, outside_cost);
881 }
882
883
884 /* Calculate cost of DR's memory access. */
885 void
886 vect_get_store_cost (struct data_reference *dr, int ncopies,
887 unsigned int *inside_cost)
888 {
889 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
890
891 switch (alignment_support_scheme)
892 {
893 case dr_aligned:
894 {
895 *inside_cost += ncopies * vect_get_stmt_cost (vector_store);
896
897 if (vect_print_dump_info (REPORT_COST))
898 fprintf (vect_dump, "vect_model_store_cost: aligned.");
899
900 break;
901 }
902
903 case dr_unaligned_supported:
904 {
905 gimple stmt = DR_STMT (dr);
906 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
907 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
908
909 /* Here, we assign an additional cost for the unaligned store. */
910 *inside_cost += ncopies
911 * targetm.vectorize.builtin_vectorization_cost (unaligned_store,
912 vectype, DR_MISALIGNMENT (dr));
913
914 if (vect_print_dump_info (REPORT_COST))
915 fprintf (vect_dump, "vect_model_store_cost: unaligned supported by "
916 "hardware.");
917
918 break;
919 }
920
921 default:
922 gcc_unreachable ();
923 }
924 }
925
926
927 /* Function vect_model_load_cost
928
929 Models cost for loads. In the case of strided accesses, the last access
930 has the overhead of the strided access attributed to it. Since unaligned
931 accesses are supported for loads, we also account for the costs of the
932 access scheme chosen. */
933
934 void
935 vect_model_load_cost (stmt_vec_info stmt_info, int ncopies, bool load_lanes_p,
936 slp_tree slp_node)
937 {
938 int group_size;
939 gimple first_stmt;
940 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
941 unsigned int inside_cost = 0, outside_cost = 0;
942
943 /* The SLP costs were already calculated during SLP tree build. */
944 if (PURE_SLP_STMT (stmt_info))
945 return;
946
947 /* Strided accesses? */
948 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
949 if (STMT_VINFO_STRIDED_ACCESS (stmt_info) && first_stmt && !slp_node)
950 {
951 group_size = vect_cost_strided_group_size (stmt_info);
952 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
953 }
954 /* Not a strided access. */
955 else
956 {
957 group_size = 1;
958 first_dr = dr;
959 }
960
961 /* We assume that the cost of a single load-lanes instruction is
962 equivalent to the cost of GROUP_SIZE separate loads. If a strided
963 access is instead being provided by a load-and-permute operation,
964 include the cost of the permutes. */
965 if (!load_lanes_p && group_size > 1)
966 {
967 /* Uses an even and odd extract operations for each needed permute. */
968 inside_cost = ncopies * exact_log2(group_size) * group_size
969 * vect_get_stmt_cost (vector_stmt);
970
971 if (vect_print_dump_info (REPORT_COST))
972 fprintf (vect_dump, "vect_model_load_cost: strided group_size = %d .",
973 group_size);
974 }
975
976 /* The loads themselves. */
977 vect_get_load_cost (first_dr, ncopies,
978 ((!STMT_VINFO_STRIDED_ACCESS (stmt_info)) || group_size > 1
979 || slp_node),
980 &inside_cost, &outside_cost);
981
982 if (vect_print_dump_info (REPORT_COST))
983 fprintf (vect_dump, "vect_model_load_cost: inside_cost = %d, "
984 "outside_cost = %d .", inside_cost, outside_cost);
985
986 /* Set the costs either in STMT_INFO or SLP_NODE (if exists). */
987 stmt_vinfo_set_inside_of_loop_cost (stmt_info, slp_node, inside_cost);
988 stmt_vinfo_set_outside_of_loop_cost (stmt_info, slp_node, outside_cost);
989 }
990
991
992 /* Calculate cost of DR's memory access. */
993 void
994 vect_get_load_cost (struct data_reference *dr, int ncopies,
995 bool add_realign_cost, unsigned int *inside_cost,
996 unsigned int *outside_cost)
997 {
998 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
999
1000 switch (alignment_support_scheme)
1001 {
1002 case dr_aligned:
1003 {
1004 *inside_cost += ncopies * vect_get_stmt_cost (vector_load);
1005
1006 if (vect_print_dump_info (REPORT_COST))
1007 fprintf (vect_dump, "vect_model_load_cost: aligned.");
1008
1009 break;
1010 }
1011 case dr_unaligned_supported:
1012 {
1013 gimple stmt = DR_STMT (dr);
1014 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1015 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1016
1017 /* Here, we assign an additional cost for the unaligned load. */
1018 *inside_cost += ncopies
1019 * targetm.vectorize.builtin_vectorization_cost (unaligned_load,
1020 vectype, DR_MISALIGNMENT (dr));
1021 if (vect_print_dump_info (REPORT_COST))
1022 fprintf (vect_dump, "vect_model_load_cost: unaligned supported by "
1023 "hardware.");
1024
1025 break;
1026 }
1027 case dr_explicit_realign:
1028 {
1029 *inside_cost += ncopies * (2 * vect_get_stmt_cost (vector_load)
1030 + vect_get_stmt_cost (vector_stmt));
1031
1032 /* FIXME: If the misalignment remains fixed across the iterations of
1033 the containing loop, the following cost should be added to the
1034 outside costs. */
1035 if (targetm.vectorize.builtin_mask_for_load)
1036 *inside_cost += vect_get_stmt_cost (vector_stmt);
1037
1038 break;
1039 }
1040 case dr_explicit_realign_optimized:
1041 {
1042 if (vect_print_dump_info (REPORT_COST))
1043 fprintf (vect_dump, "vect_model_load_cost: unaligned software "
1044 "pipelined.");
1045
1046 /* Unaligned software pipeline has a load of an address, an initial
1047 load, and possibly a mask operation to "prime" the loop. However,
1048 if this is an access in a group of loads, which provide strided
1049 access, then the above cost should only be considered for one
1050 access in the group. Inside the loop, there is a load op
1051 and a realignment op. */
1052
1053 if (add_realign_cost)
1054 {
1055 *outside_cost = 2 * vect_get_stmt_cost (vector_stmt);
1056 if (targetm.vectorize.builtin_mask_for_load)
1057 *outside_cost += vect_get_stmt_cost (vector_stmt);
1058 }
1059
1060 *inside_cost += ncopies * (vect_get_stmt_cost (vector_load)
1061 + vect_get_stmt_cost (vector_stmt));
1062 break;
1063 }
1064
1065 default:
1066 gcc_unreachable ();
1067 }
1068 }
1069
1070
1071 /* Function vect_init_vector.
1072
1073 Insert a new stmt (INIT_STMT) that initializes a new vector variable with
1074 the vector elements of VECTOR_VAR. Place the initialization at BSI if it
1075 is not NULL. Otherwise, place the initialization at the loop preheader.
1076 Return the DEF of INIT_STMT.
1077 It will be used in the vectorization of STMT. */
1078
1079 tree
1080 vect_init_vector (gimple stmt, tree vector_var, tree vector_type,
1081 gimple_stmt_iterator *gsi)
1082 {
1083 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1084 tree new_var;
1085 gimple init_stmt;
1086 tree vec_oprnd;
1087 edge pe;
1088 tree new_temp;
1089 basic_block new_bb;
1090
1091 new_var = vect_get_new_vect_var (vector_type, vect_simple_var, "cst_");
1092 add_referenced_var (new_var);
1093 init_stmt = gimple_build_assign (new_var, vector_var);
1094 new_temp = make_ssa_name (new_var, init_stmt);
1095 gimple_assign_set_lhs (init_stmt, new_temp);
1096
1097 if (gsi)
1098 vect_finish_stmt_generation (stmt, init_stmt, gsi);
1099 else
1100 {
1101 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1102
1103 if (loop_vinfo)
1104 {
1105 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1106
1107 if (nested_in_vect_loop_p (loop, stmt))
1108 loop = loop->inner;
1109
1110 pe = loop_preheader_edge (loop);
1111 new_bb = gsi_insert_on_edge_immediate (pe, init_stmt);
1112 gcc_assert (!new_bb);
1113 }
1114 else
1115 {
1116 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1117 basic_block bb;
1118 gimple_stmt_iterator gsi_bb_start;
1119
1120 gcc_assert (bb_vinfo);
1121 bb = BB_VINFO_BB (bb_vinfo);
1122 gsi_bb_start = gsi_after_labels (bb);
1123 gsi_insert_before (&gsi_bb_start, init_stmt, GSI_SAME_STMT);
1124 }
1125 }
1126
1127 if (vect_print_dump_info (REPORT_DETAILS))
1128 {
1129 fprintf (vect_dump, "created new init_stmt: ");
1130 print_gimple_stmt (vect_dump, init_stmt, 0, TDF_SLIM);
1131 }
1132
1133 vec_oprnd = gimple_assign_lhs (init_stmt);
1134 return vec_oprnd;
1135 }
1136
1137
1138 /* Function vect_get_vec_def_for_operand.
1139
1140 OP is an operand in STMT. This function returns a (vector) def that will be
1141 used in the vectorized stmt for STMT.
1142
1143 In the case that OP is an SSA_NAME which is defined in the loop, then
1144 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1145
1146 In case OP is an invariant or constant, a new stmt that creates a vector def
1147 needs to be introduced. */
1148
1149 tree
1150 vect_get_vec_def_for_operand (tree op, gimple stmt, tree *scalar_def)
1151 {
1152 tree vec_oprnd;
1153 gimple vec_stmt;
1154 gimple def_stmt;
1155 stmt_vec_info def_stmt_info = NULL;
1156 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1157 unsigned int nunits;
1158 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1159 tree vec_inv;
1160 tree vec_cst;
1161 tree t = NULL_TREE;
1162 tree def;
1163 int i;
1164 enum vect_def_type dt;
1165 bool is_simple_use;
1166 tree vector_type;
1167
1168 if (vect_print_dump_info (REPORT_DETAILS))
1169 {
1170 fprintf (vect_dump, "vect_get_vec_def_for_operand: ");
1171 print_generic_expr (vect_dump, op, TDF_SLIM);
1172 }
1173
1174 is_simple_use = vect_is_simple_use (op, loop_vinfo, NULL, &def_stmt, &def,
1175 &dt);
1176 gcc_assert (is_simple_use);
1177 if (vect_print_dump_info (REPORT_DETAILS))
1178 {
1179 if (def)
1180 {
1181 fprintf (vect_dump, "def = ");
1182 print_generic_expr (vect_dump, def, TDF_SLIM);
1183 }
1184 if (def_stmt)
1185 {
1186 fprintf (vect_dump, " def_stmt = ");
1187 print_gimple_stmt (vect_dump, def_stmt, 0, TDF_SLIM);
1188 }
1189 }
1190
1191 switch (dt)
1192 {
1193 /* Case 1: operand is a constant. */
1194 case vect_constant_def:
1195 {
1196 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1197 gcc_assert (vector_type);
1198 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
1199
1200 if (scalar_def)
1201 *scalar_def = op;
1202
1203 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
1204 if (vect_print_dump_info (REPORT_DETAILS))
1205 fprintf (vect_dump, "Create vector_cst. nunits = %d", nunits);
1206
1207 vec_cst = build_vector_from_val (vector_type,
1208 fold_convert (TREE_TYPE (vector_type),
1209 op));
1210 return vect_init_vector (stmt, vec_cst, vector_type, NULL);
1211 }
1212
1213 /* Case 2: operand is defined outside the loop - loop invariant. */
1214 case vect_external_def:
1215 {
1216 vector_type = get_vectype_for_scalar_type (TREE_TYPE (def));
1217 gcc_assert (vector_type);
1218 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
1219
1220 if (scalar_def)
1221 *scalar_def = def;
1222
1223 /* Create 'vec_inv = {inv,inv,..,inv}' */
1224 if (vect_print_dump_info (REPORT_DETAILS))
1225 fprintf (vect_dump, "Create vector_inv.");
1226
1227 for (i = nunits - 1; i >= 0; --i)
1228 {
1229 t = tree_cons (NULL_TREE, def, t);
1230 }
1231
1232 /* FIXME: use build_constructor directly. */
1233 vec_inv = build_constructor_from_list (vector_type, t);
1234 return vect_init_vector (stmt, vec_inv, vector_type, NULL);
1235 }
1236
1237 /* Case 3: operand is defined inside the loop. */
1238 case vect_internal_def:
1239 {
1240 if (scalar_def)
1241 *scalar_def = NULL/* FIXME tuples: def_stmt*/;
1242
1243 /* Get the def from the vectorized stmt. */
1244 def_stmt_info = vinfo_for_stmt (def_stmt);
1245
1246 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1247 /* Get vectorized pattern statement. */
1248 if (!vec_stmt
1249 && STMT_VINFO_IN_PATTERN_P (def_stmt_info)
1250 && !STMT_VINFO_RELEVANT (def_stmt_info))
1251 vec_stmt = STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1252 STMT_VINFO_RELATED_STMT (def_stmt_info)));
1253 gcc_assert (vec_stmt);
1254 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1255 vec_oprnd = PHI_RESULT (vec_stmt);
1256 else if (is_gimple_call (vec_stmt))
1257 vec_oprnd = gimple_call_lhs (vec_stmt);
1258 else
1259 vec_oprnd = gimple_assign_lhs (vec_stmt);
1260 return vec_oprnd;
1261 }
1262
1263 /* Case 4: operand is defined by a loop header phi - reduction */
1264 case vect_reduction_def:
1265 case vect_double_reduction_def:
1266 case vect_nested_cycle:
1267 {
1268 struct loop *loop;
1269
1270 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1271 loop = (gimple_bb (def_stmt))->loop_father;
1272
1273 /* Get the def before the loop */
1274 op = PHI_ARG_DEF_FROM_EDGE (def_stmt, loop_preheader_edge (loop));
1275 return get_initial_def_for_reduction (stmt, op, scalar_def);
1276 }
1277
1278 /* Case 5: operand is defined by loop-header phi - induction. */
1279 case vect_induction_def:
1280 {
1281 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1282
1283 /* Get the def from the vectorized stmt. */
1284 def_stmt_info = vinfo_for_stmt (def_stmt);
1285 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1286 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1287 vec_oprnd = PHI_RESULT (vec_stmt);
1288 else
1289 vec_oprnd = gimple_get_lhs (vec_stmt);
1290 return vec_oprnd;
1291 }
1292
1293 default:
1294 gcc_unreachable ();
1295 }
1296 }
1297
1298
1299 /* Function vect_get_vec_def_for_stmt_copy
1300
1301 Return a vector-def for an operand. This function is used when the
1302 vectorized stmt to be created (by the caller to this function) is a "copy"
1303 created in case the vectorized result cannot fit in one vector, and several
1304 copies of the vector-stmt are required. In this case the vector-def is
1305 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1306 of the stmt that defines VEC_OPRND.
1307 DT is the type of the vector def VEC_OPRND.
1308
1309 Context:
1310 In case the vectorization factor (VF) is bigger than the number
1311 of elements that can fit in a vectype (nunits), we have to generate
1312 more than one vector stmt to vectorize the scalar stmt. This situation
1313 arises when there are multiple data-types operated upon in the loop; the
1314 smallest data-type determines the VF, and as a result, when vectorizing
1315 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1316 vector stmt (each computing a vector of 'nunits' results, and together
1317 computing 'VF' results in each iteration). This function is called when
1318 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1319 which VF=16 and nunits=4, so the number of copies required is 4):
1320
1321 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1322
1323 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1324 VS1.1: vx.1 = memref1 VS1.2
1325 VS1.2: vx.2 = memref2 VS1.3
1326 VS1.3: vx.3 = memref3
1327
1328 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1329 VSnew.1: vz1 = vx.1 + ... VSnew.2
1330 VSnew.2: vz2 = vx.2 + ... VSnew.3
1331 VSnew.3: vz3 = vx.3 + ...
1332
1333 The vectorization of S1 is explained in vectorizable_load.
1334 The vectorization of S2:
1335 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1336 the function 'vect_get_vec_def_for_operand' is called to
1337 get the relevant vector-def for each operand of S2. For operand x it
1338 returns the vector-def 'vx.0'.
1339
1340 To create the remaining copies of the vector-stmt (VSnew.j), this
1341 function is called to get the relevant vector-def for each operand. It is
1342 obtained from the respective VS1.j stmt, which is recorded in the
1343 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1344
1345 For example, to obtain the vector-def 'vx.1' in order to create the
1346 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1347 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1348 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1349 and return its def ('vx.1').
1350 Overall, to create the above sequence this function will be called 3 times:
1351 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1352 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1353 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1354
1355 tree
1356 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
1357 {
1358 gimple vec_stmt_for_operand;
1359 stmt_vec_info def_stmt_info;
1360
1361 /* Do nothing; can reuse same def. */
1362 if (dt == vect_external_def || dt == vect_constant_def )
1363 return vec_oprnd;
1364
1365 vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
1366 def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
1367 gcc_assert (def_stmt_info);
1368 vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
1369 gcc_assert (vec_stmt_for_operand);
1370 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1371 if (gimple_code (vec_stmt_for_operand) == GIMPLE_PHI)
1372 vec_oprnd = PHI_RESULT (vec_stmt_for_operand);
1373 else
1374 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1375 return vec_oprnd;
1376 }
1377
1378
1379 /* Get vectorized definitions for the operands to create a copy of an original
1380 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1381
1382 static void
1383 vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
1384 VEC(tree,heap) **vec_oprnds0,
1385 VEC(tree,heap) **vec_oprnds1)
1386 {
1387 tree vec_oprnd = VEC_pop (tree, *vec_oprnds0);
1388
1389 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd);
1390 VEC_quick_push (tree, *vec_oprnds0, vec_oprnd);
1391
1392 if (vec_oprnds1 && *vec_oprnds1)
1393 {
1394 vec_oprnd = VEC_pop (tree, *vec_oprnds1);
1395 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd);
1396 VEC_quick_push (tree, *vec_oprnds1, vec_oprnd);
1397 }
1398 }
1399
1400
1401 /* Get vectorized definitions for OP0 and OP1, or SLP_NODE if it is not
1402 NULL. */
1403
1404 static void
1405 vect_get_vec_defs (tree op0, tree op1, gimple stmt,
1406 VEC(tree,heap) **vec_oprnds0, VEC(tree,heap) **vec_oprnds1,
1407 slp_tree slp_node)
1408 {
1409 if (slp_node)
1410 vect_get_slp_defs (op0, op1, slp_node, vec_oprnds0, vec_oprnds1, -1);
1411 else
1412 {
1413 tree vec_oprnd;
1414
1415 *vec_oprnds0 = VEC_alloc (tree, heap, 1);
1416 vec_oprnd = vect_get_vec_def_for_operand (op0, stmt, NULL);
1417 VEC_quick_push (tree, *vec_oprnds0, vec_oprnd);
1418
1419 if (op1)
1420 {
1421 *vec_oprnds1 = VEC_alloc (tree, heap, 1);
1422 vec_oprnd = vect_get_vec_def_for_operand (op1, stmt, NULL);
1423 VEC_quick_push (tree, *vec_oprnds1, vec_oprnd);
1424 }
1425 }
1426 }
1427
1428
1429 /* Function vect_finish_stmt_generation.
1430
1431 Insert a new stmt. */
1432
1433 void
1434 vect_finish_stmt_generation (gimple stmt, gimple vec_stmt,
1435 gimple_stmt_iterator *gsi)
1436 {
1437 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1438 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1439 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
1440
1441 gcc_assert (gimple_code (stmt) != GIMPLE_LABEL);
1442
1443 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1444
1445 set_vinfo_for_stmt (vec_stmt, new_stmt_vec_info (vec_stmt, loop_vinfo,
1446 bb_vinfo));
1447
1448 if (vect_print_dump_info (REPORT_DETAILS))
1449 {
1450 fprintf (vect_dump, "add new stmt: ");
1451 print_gimple_stmt (vect_dump, vec_stmt, 0, TDF_SLIM);
1452 }
1453
1454 gimple_set_location (vec_stmt, gimple_location (stmt));
1455 }
1456
1457 /* Checks if CALL can be vectorized in type VECTYPE. Returns
1458 a function declaration if the target has a vectorized version
1459 of the function, or NULL_TREE if the function cannot be vectorized. */
1460
1461 tree
1462 vectorizable_function (gimple call, tree vectype_out, tree vectype_in)
1463 {
1464 tree fndecl = gimple_call_fndecl (call);
1465
1466 /* We only handle functions that do not read or clobber memory -- i.e.
1467 const or novops ones. */
1468 if (!(gimple_call_flags (call) & (ECF_CONST | ECF_NOVOPS)))
1469 return NULL_TREE;
1470
1471 if (!fndecl
1472 || TREE_CODE (fndecl) != FUNCTION_DECL
1473 || !DECL_BUILT_IN (fndecl))
1474 return NULL_TREE;
1475
1476 return targetm.vectorize.builtin_vectorized_function (fndecl, vectype_out,
1477 vectype_in);
1478 }
1479
1480 /* Function vectorizable_call.
1481
1482 Check if STMT performs a function call that can be vectorized.
1483 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1484 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1485 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1486
1487 static bool
1488 vectorizable_call (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt)
1489 {
1490 tree vec_dest;
1491 tree scalar_dest;
1492 tree op, type;
1493 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
1494 stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
1495 tree vectype_out, vectype_in;
1496 int nunits_in;
1497 int nunits_out;
1498 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1499 tree fndecl, new_temp, def, rhs_type;
1500 gimple def_stmt;
1501 enum vect_def_type dt[3]
1502 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
1503 gimple new_stmt = NULL;
1504 int ncopies, j;
1505 VEC(tree, heap) *vargs = NULL;
1506 enum { NARROW, NONE, WIDEN } modifier;
1507 size_t i, nargs;
1508 tree lhs;
1509
1510 /* FORNOW: unsupported in basic block SLP. */
1511 gcc_assert (loop_vinfo);
1512
1513 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1514 return false;
1515
1516 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
1517 return false;
1518
1519 /* FORNOW: SLP not supported. */
1520 if (STMT_SLP_TYPE (stmt_info))
1521 return false;
1522
1523 /* Is STMT a vectorizable call? */
1524 if (!is_gimple_call (stmt))
1525 return false;
1526
1527 if (TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
1528 return false;
1529
1530 if (stmt_can_throw_internal (stmt))
1531 return false;
1532
1533 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
1534
1535 /* Process function arguments. */
1536 rhs_type = NULL_TREE;
1537 vectype_in = NULL_TREE;
1538 nargs = gimple_call_num_args (stmt);
1539
1540 /* Bail out if the function has more than three arguments, we do not have
1541 interesting builtin functions to vectorize with more than two arguments
1542 except for fma. No arguments is also not good. */
1543 if (nargs == 0 || nargs > 3)
1544 return false;
1545
1546 for (i = 0; i < nargs; i++)
1547 {
1548 tree opvectype;
1549
1550 op = gimple_call_arg (stmt, i);
1551
1552 /* We can only handle calls with arguments of the same type. */
1553 if (rhs_type
1554 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
1555 {
1556 if (vect_print_dump_info (REPORT_DETAILS))
1557 fprintf (vect_dump, "argument types differ.");
1558 return false;
1559 }
1560 if (!rhs_type)
1561 rhs_type = TREE_TYPE (op);
1562
1563 if (!vect_is_simple_use_1 (op, loop_vinfo, NULL,
1564 &def_stmt, &def, &dt[i], &opvectype))
1565 {
1566 if (vect_print_dump_info (REPORT_DETAILS))
1567 fprintf (vect_dump, "use not simple.");
1568 return false;
1569 }
1570
1571 if (!vectype_in)
1572 vectype_in = opvectype;
1573 else if (opvectype
1574 && opvectype != vectype_in)
1575 {
1576 if (vect_print_dump_info (REPORT_DETAILS))
1577 fprintf (vect_dump, "argument vector types differ.");
1578 return false;
1579 }
1580 }
1581 /* If all arguments are external or constant defs use a vector type with
1582 the same size as the output vector type. */
1583 if (!vectype_in)
1584 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
1585 if (vec_stmt)
1586 gcc_assert (vectype_in);
1587 if (!vectype_in)
1588 {
1589 if (vect_print_dump_info (REPORT_DETAILS))
1590 {
1591 fprintf (vect_dump, "no vectype for scalar type ");
1592 print_generic_expr (vect_dump, rhs_type, TDF_SLIM);
1593 }
1594
1595 return false;
1596 }
1597
1598 /* FORNOW */
1599 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
1600 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
1601 if (nunits_in == nunits_out / 2)
1602 modifier = NARROW;
1603 else if (nunits_out == nunits_in)
1604 modifier = NONE;
1605 else if (nunits_out == nunits_in / 2)
1606 modifier = WIDEN;
1607 else
1608 return false;
1609
1610 /* For now, we only vectorize functions if a target specific builtin
1611 is available. TODO -- in some cases, it might be profitable to
1612 insert the calls for pieces of the vector, in order to be able
1613 to vectorize other operations in the loop. */
1614 fndecl = vectorizable_function (stmt, vectype_out, vectype_in);
1615 if (fndecl == NULL_TREE)
1616 {
1617 if (vect_print_dump_info (REPORT_DETAILS))
1618 fprintf (vect_dump, "function is not vectorizable.");
1619
1620 return false;
1621 }
1622
1623 gcc_assert (!gimple_vuse (stmt));
1624
1625 if (modifier == NARROW)
1626 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
1627 else
1628 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
1629
1630 /* Sanity check: make sure that at least one copy of the vectorized stmt
1631 needs to be generated. */
1632 gcc_assert (ncopies >= 1);
1633
1634 if (!vec_stmt) /* transformation not required. */
1635 {
1636 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
1637 if (vect_print_dump_info (REPORT_DETAILS))
1638 fprintf (vect_dump, "=== vectorizable_call ===");
1639 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
1640 return true;
1641 }
1642
1643 /** Transform. **/
1644
1645 if (vect_print_dump_info (REPORT_DETAILS))
1646 fprintf (vect_dump, "transform call.");
1647
1648 /* Handle def. */
1649 scalar_dest = gimple_call_lhs (stmt);
1650 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
1651
1652 prev_stmt_info = NULL;
1653 switch (modifier)
1654 {
1655 case NONE:
1656 for (j = 0; j < ncopies; ++j)
1657 {
1658 /* Build argument list for the vectorized call. */
1659 if (j == 0)
1660 vargs = VEC_alloc (tree, heap, nargs);
1661 else
1662 VEC_truncate (tree, vargs, 0);
1663
1664 for (i = 0; i < nargs; i++)
1665 {
1666 op = gimple_call_arg (stmt, i);
1667 if (j == 0)
1668 vec_oprnd0
1669 = vect_get_vec_def_for_operand (op, stmt, NULL);
1670 else
1671 {
1672 vec_oprnd0 = gimple_call_arg (new_stmt, i);
1673 vec_oprnd0
1674 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
1675 }
1676
1677 VEC_quick_push (tree, vargs, vec_oprnd0);
1678 }
1679
1680 new_stmt = gimple_build_call_vec (fndecl, vargs);
1681 new_temp = make_ssa_name (vec_dest, new_stmt);
1682 gimple_call_set_lhs (new_stmt, new_temp);
1683
1684 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1685 mark_symbols_for_renaming (new_stmt);
1686
1687 if (j == 0)
1688 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
1689 else
1690 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1691
1692 prev_stmt_info = vinfo_for_stmt (new_stmt);
1693 }
1694
1695 break;
1696
1697 case NARROW:
1698 for (j = 0; j < ncopies; ++j)
1699 {
1700 /* Build argument list for the vectorized call. */
1701 if (j == 0)
1702 vargs = VEC_alloc (tree, heap, nargs * 2);
1703 else
1704 VEC_truncate (tree, vargs, 0);
1705
1706 for (i = 0; i < nargs; i++)
1707 {
1708 op = gimple_call_arg (stmt, i);
1709 if (j == 0)
1710 {
1711 vec_oprnd0
1712 = vect_get_vec_def_for_operand (op, stmt, NULL);
1713 vec_oprnd1
1714 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
1715 }
1716 else
1717 {
1718 vec_oprnd1 = gimple_call_arg (new_stmt, 2*i + 1);
1719 vec_oprnd0
1720 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd1);
1721 vec_oprnd1
1722 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
1723 }
1724
1725 VEC_quick_push (tree, vargs, vec_oprnd0);
1726 VEC_quick_push (tree, vargs, vec_oprnd1);
1727 }
1728
1729 new_stmt = gimple_build_call_vec (fndecl, vargs);
1730 new_temp = make_ssa_name (vec_dest, new_stmt);
1731 gimple_call_set_lhs (new_stmt, new_temp);
1732
1733 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1734 mark_symbols_for_renaming (new_stmt);
1735
1736 if (j == 0)
1737 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
1738 else
1739 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1740
1741 prev_stmt_info = vinfo_for_stmt (new_stmt);
1742 }
1743
1744 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
1745
1746 break;
1747
1748 case WIDEN:
1749 /* No current target implements this case. */
1750 return false;
1751 }
1752
1753 VEC_free (tree, heap, vargs);
1754
1755 /* Update the exception handling table with the vector stmt if necessary. */
1756 if (maybe_clean_or_replace_eh_stmt (stmt, *vec_stmt))
1757 gimple_purge_dead_eh_edges (gimple_bb (stmt));
1758
1759 /* The call in STMT might prevent it from being removed in dce.
1760 We however cannot remove it here, due to the way the ssa name
1761 it defines is mapped to the new definition. So just replace
1762 rhs of the statement with something harmless. */
1763
1764 type = TREE_TYPE (scalar_dest);
1765 if (is_pattern_stmt_p (stmt_info))
1766 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
1767 else
1768 lhs = gimple_call_lhs (stmt);
1769 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
1770 set_vinfo_for_stmt (new_stmt, stmt_info);
1771 set_vinfo_for_stmt (stmt, NULL);
1772 STMT_VINFO_STMT (stmt_info) = new_stmt;
1773 gsi_replace (gsi, new_stmt, false);
1774 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt;
1775
1776 return true;
1777 }
1778
1779
1780 /* Function vect_gen_widened_results_half
1781
1782 Create a vector stmt whose code, type, number of arguments, and result
1783 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
1784 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
1785 In the case that CODE is a CALL_EXPR, this means that a call to DECL
1786 needs to be created (DECL is a function-decl of a target-builtin).
1787 STMT is the original scalar stmt that we are vectorizing. */
1788
1789 static gimple
1790 vect_gen_widened_results_half (enum tree_code code,
1791 tree decl,
1792 tree vec_oprnd0, tree vec_oprnd1, int op_type,
1793 tree vec_dest, gimple_stmt_iterator *gsi,
1794 gimple stmt)
1795 {
1796 gimple new_stmt;
1797 tree new_temp;
1798
1799 /* Generate half of the widened result: */
1800 if (code == CALL_EXPR)
1801 {
1802 /* Target specific support */
1803 if (op_type == binary_op)
1804 new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1);
1805 else
1806 new_stmt = gimple_build_call (decl, 1, vec_oprnd0);
1807 new_temp = make_ssa_name (vec_dest, new_stmt);
1808 gimple_call_set_lhs (new_stmt, new_temp);
1809 }
1810 else
1811 {
1812 /* Generic support */
1813 gcc_assert (op_type == TREE_CODE_LENGTH (code));
1814 if (op_type != binary_op)
1815 vec_oprnd1 = NULL;
1816 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vec_oprnd0,
1817 vec_oprnd1);
1818 new_temp = make_ssa_name (vec_dest, new_stmt);
1819 gimple_assign_set_lhs (new_stmt, new_temp);
1820 }
1821 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1822
1823 return new_stmt;
1824 }
1825
1826
1827 /* Check if STMT performs a conversion operation, that can be vectorized.
1828 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1829 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1830 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1831
1832 static bool
1833 vectorizable_conversion (gimple stmt, gimple_stmt_iterator *gsi,
1834 gimple *vec_stmt, slp_tree slp_node)
1835 {
1836 tree vec_dest;
1837 tree scalar_dest;
1838 tree op0;
1839 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
1840 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1841 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1842 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
1843 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
1844 tree new_temp;
1845 tree def;
1846 gimple def_stmt;
1847 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
1848 gimple new_stmt = NULL;
1849 stmt_vec_info prev_stmt_info;
1850 int nunits_in;
1851 int nunits_out;
1852 tree vectype_out, vectype_in;
1853 int ncopies, j;
1854 tree rhs_type;
1855 tree builtin_decl;
1856 enum { NARROW, NONE, WIDEN } modifier;
1857 int i;
1858 VEC(tree,heap) *vec_oprnds0 = NULL;
1859 tree vop0;
1860 VEC(tree,heap) *dummy = NULL;
1861 int dummy_int;
1862
1863 /* Is STMT a vectorizable conversion? */
1864
1865 /* FORNOW: unsupported in basic block SLP. */
1866 gcc_assert (loop_vinfo);
1867
1868 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1869 return false;
1870
1871 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
1872 return false;
1873
1874 if (!is_gimple_assign (stmt))
1875 return false;
1876
1877 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
1878 return false;
1879
1880 code = gimple_assign_rhs_code (stmt);
1881 if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
1882 return false;
1883
1884 /* Check types of lhs and rhs. */
1885 scalar_dest = gimple_assign_lhs (stmt);
1886 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
1887
1888 op0 = gimple_assign_rhs1 (stmt);
1889 rhs_type = TREE_TYPE (op0);
1890 /* Check the operands of the operation. */
1891 if (!vect_is_simple_use_1 (op0, loop_vinfo, NULL,
1892 &def_stmt, &def, &dt[0], &vectype_in))
1893 {
1894 if (vect_print_dump_info (REPORT_DETAILS))
1895 fprintf (vect_dump, "use not simple.");
1896 return false;
1897 }
1898 /* If op0 is an external or constant defs use a vector type of
1899 the same size as the output vector type. */
1900 if (!vectype_in)
1901 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
1902 if (vec_stmt)
1903 gcc_assert (vectype_in);
1904 if (!vectype_in)
1905 {
1906 if (vect_print_dump_info (REPORT_DETAILS))
1907 {
1908 fprintf (vect_dump, "no vectype for scalar type ");
1909 print_generic_expr (vect_dump, rhs_type, TDF_SLIM);
1910 }
1911
1912 return false;
1913 }
1914
1915 /* FORNOW */
1916 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
1917 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
1918 if (nunits_in == nunits_out / 2)
1919 modifier = NARROW;
1920 else if (nunits_out == nunits_in)
1921 modifier = NONE;
1922 else if (nunits_out == nunits_in / 2)
1923 modifier = WIDEN;
1924 else
1925 return false;
1926
1927 if (modifier == NARROW)
1928 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
1929 else
1930 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
1931
1932 /* Multiple types in SLP are handled by creating the appropriate number of
1933 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
1934 case of SLP. */
1935 if (slp_node || PURE_SLP_STMT (stmt_info))
1936 ncopies = 1;
1937
1938 /* Sanity check: make sure that at least one copy of the vectorized stmt
1939 needs to be generated. */
1940 gcc_assert (ncopies >= 1);
1941
1942 /* Supportable by target? */
1943 if ((modifier == NONE
1944 && !targetm.vectorize.builtin_conversion (code, vectype_out, vectype_in))
1945 || (modifier == WIDEN
1946 && !supportable_widening_operation (code, stmt,
1947 vectype_out, vectype_in,
1948 &decl1, &decl2,
1949 &code1, &code2,
1950 &dummy_int, &dummy))
1951 || (modifier == NARROW
1952 && !supportable_narrowing_operation (code, vectype_out, vectype_in,
1953 &code1, &dummy_int, &dummy)))
1954 {
1955 if (vect_print_dump_info (REPORT_DETAILS))
1956 fprintf (vect_dump, "conversion not supported by target.");
1957 return false;
1958 }
1959
1960 if (modifier != NONE)
1961 {
1962 /* FORNOW: SLP not supported. */
1963 if (STMT_SLP_TYPE (stmt_info))
1964 return false;
1965 }
1966
1967 if (!vec_stmt) /* transformation not required. */
1968 {
1969 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
1970 return true;
1971 }
1972
1973 /** Transform. **/
1974 if (vect_print_dump_info (REPORT_DETAILS))
1975 fprintf (vect_dump, "transform conversion.");
1976
1977 /* Handle def. */
1978 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
1979
1980 if (modifier == NONE && !slp_node)
1981 vec_oprnds0 = VEC_alloc (tree, heap, 1);
1982
1983 prev_stmt_info = NULL;
1984 switch (modifier)
1985 {
1986 case NONE:
1987 for (j = 0; j < ncopies; j++)
1988 {
1989 if (j == 0)
1990 vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node);
1991 else
1992 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL);
1993
1994 builtin_decl =
1995 targetm.vectorize.builtin_conversion (code,
1996 vectype_out, vectype_in);
1997 FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vop0)
1998 {
1999 /* Arguments are ready. create the new vector stmt. */
2000 new_stmt = gimple_build_call (builtin_decl, 1, vop0);
2001 new_temp = make_ssa_name (vec_dest, new_stmt);
2002 gimple_call_set_lhs (new_stmt, new_temp);
2003 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2004 if (slp_node)
2005 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
2006 }
2007
2008 if (j == 0)
2009 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2010 else
2011 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2012 prev_stmt_info = vinfo_for_stmt (new_stmt);
2013 }
2014 break;
2015
2016 case WIDEN:
2017 /* In case the vectorization factor (VF) is bigger than the number
2018 of elements that we can fit in a vectype (nunits), we have to
2019 generate more than one vector stmt - i.e - we need to "unroll"
2020 the vector stmt by a factor VF/nunits. */
2021 for (j = 0; j < ncopies; j++)
2022 {
2023 if (j == 0)
2024 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
2025 else
2026 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
2027
2028 /* Generate first half of the widened result: */
2029 new_stmt
2030 = vect_gen_widened_results_half (code1, decl1,
2031 vec_oprnd0, vec_oprnd1,
2032 unary_op, vec_dest, gsi, stmt);
2033 if (j == 0)
2034 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2035 else
2036 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2037 prev_stmt_info = vinfo_for_stmt (new_stmt);
2038
2039 /* Generate second half of the widened result: */
2040 new_stmt
2041 = vect_gen_widened_results_half (code2, decl2,
2042 vec_oprnd0, vec_oprnd1,
2043 unary_op, vec_dest, gsi, stmt);
2044 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2045 prev_stmt_info = vinfo_for_stmt (new_stmt);
2046 }
2047 break;
2048
2049 case NARROW:
2050 /* In case the vectorization factor (VF) is bigger than the number
2051 of elements that we can fit in a vectype (nunits), we have to
2052 generate more than one vector stmt - i.e - we need to "unroll"
2053 the vector stmt by a factor VF/nunits. */
2054 for (j = 0; j < ncopies; j++)
2055 {
2056 /* Handle uses. */
2057 if (j == 0)
2058 {
2059 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
2060 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
2061 }
2062 else
2063 {
2064 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd1);
2065 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
2066 }
2067
2068 /* Arguments are ready. Create the new vector stmt. */
2069 new_stmt = gimple_build_assign_with_ops (code1, vec_dest, vec_oprnd0,
2070 vec_oprnd1);
2071 new_temp = make_ssa_name (vec_dest, new_stmt);
2072 gimple_assign_set_lhs (new_stmt, new_temp);
2073 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2074
2075 if (j == 0)
2076 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2077 else
2078 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2079
2080 prev_stmt_info = vinfo_for_stmt (new_stmt);
2081 }
2082
2083 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2084 }
2085
2086 if (vec_oprnds0)
2087 VEC_free (tree, heap, vec_oprnds0);
2088
2089 return true;
2090 }
2091
2092
2093 /* Function vectorizable_assignment.
2094
2095 Check if STMT performs an assignment (copy) that can be vectorized.
2096 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2097 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2098 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2099
2100 static bool
2101 vectorizable_assignment (gimple stmt, gimple_stmt_iterator *gsi,
2102 gimple *vec_stmt, slp_tree slp_node)
2103 {
2104 tree vec_dest;
2105 tree scalar_dest;
2106 tree op;
2107 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2108 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2109 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2110 tree new_temp;
2111 tree def;
2112 gimple def_stmt;
2113 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
2114 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
2115 int ncopies;
2116 int i, j;
2117 VEC(tree,heap) *vec_oprnds = NULL;
2118 tree vop;
2119 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2120 gimple new_stmt = NULL;
2121 stmt_vec_info prev_stmt_info = NULL;
2122 enum tree_code code;
2123 tree vectype_in;
2124
2125 /* Multiple types in SLP are handled by creating the appropriate number of
2126 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2127 case of SLP. */
2128 if (slp_node || PURE_SLP_STMT (stmt_info))
2129 ncopies = 1;
2130 else
2131 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
2132
2133 gcc_assert (ncopies >= 1);
2134
2135 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2136 return false;
2137
2138 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2139 return false;
2140
2141 /* Is vectorizable assignment? */
2142 if (!is_gimple_assign (stmt))
2143 return false;
2144
2145 scalar_dest = gimple_assign_lhs (stmt);
2146 if (TREE_CODE (scalar_dest) != SSA_NAME)
2147 return false;
2148
2149 code = gimple_assign_rhs_code (stmt);
2150 if (gimple_assign_single_p (stmt)
2151 || code == PAREN_EXPR
2152 || CONVERT_EXPR_CODE_P (code))
2153 op = gimple_assign_rhs1 (stmt);
2154 else
2155 return false;
2156
2157 if (code == VIEW_CONVERT_EXPR)
2158 op = TREE_OPERAND (op, 0);
2159
2160 if (!vect_is_simple_use_1 (op, loop_vinfo, bb_vinfo,
2161 &def_stmt, &def, &dt[0], &vectype_in))
2162 {
2163 if (vect_print_dump_info (REPORT_DETAILS))
2164 fprintf (vect_dump, "use not simple.");
2165 return false;
2166 }
2167
2168 /* We can handle NOP_EXPR conversions that do not change the number
2169 of elements or the vector size. */
2170 if ((CONVERT_EXPR_CODE_P (code)
2171 || code == VIEW_CONVERT_EXPR)
2172 && (!vectype_in
2173 || TYPE_VECTOR_SUBPARTS (vectype_in) != nunits
2174 || (GET_MODE_SIZE (TYPE_MODE (vectype))
2175 != GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
2176 return false;
2177
2178 /* We do not handle bit-precision changes. */
2179 if ((CONVERT_EXPR_CODE_P (code)
2180 || code == VIEW_CONVERT_EXPR)
2181 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
2182 && ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
2183 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
2184 || ((TYPE_PRECISION (TREE_TYPE (op))
2185 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (op))))))
2186 /* But a conversion that does not change the bit-pattern is ok. */
2187 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
2188 > TYPE_PRECISION (TREE_TYPE (op)))
2189 && TYPE_UNSIGNED (TREE_TYPE (op))))
2190 {
2191 if (vect_print_dump_info (REPORT_DETAILS))
2192 fprintf (vect_dump, "type conversion to/from bit-precision "
2193 "unsupported.");
2194 return false;
2195 }
2196
2197 if (!vec_stmt) /* transformation not required. */
2198 {
2199 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
2200 if (vect_print_dump_info (REPORT_DETAILS))
2201 fprintf (vect_dump, "=== vectorizable_assignment ===");
2202 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
2203 return true;
2204 }
2205
2206 /** Transform. **/
2207 if (vect_print_dump_info (REPORT_DETAILS))
2208 fprintf (vect_dump, "transform assignment.");
2209
2210 /* Handle def. */
2211 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2212
2213 /* Handle use. */
2214 for (j = 0; j < ncopies; j++)
2215 {
2216 /* Handle uses. */
2217 if (j == 0)
2218 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node);
2219 else
2220 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
2221
2222 /* Arguments are ready. create the new vector stmt. */
2223 FOR_EACH_VEC_ELT (tree, vec_oprnds, i, vop)
2224 {
2225 if (CONVERT_EXPR_CODE_P (code)
2226 || code == VIEW_CONVERT_EXPR)
2227 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
2228 new_stmt = gimple_build_assign (vec_dest, vop);
2229 new_temp = make_ssa_name (vec_dest, new_stmt);
2230 gimple_assign_set_lhs (new_stmt, new_temp);
2231 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2232 if (slp_node)
2233 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
2234 }
2235
2236 if (slp_node)
2237 continue;
2238
2239 if (j == 0)
2240 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2241 else
2242 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2243
2244 prev_stmt_info = vinfo_for_stmt (new_stmt);
2245 }
2246
2247 VEC_free (tree, heap, vec_oprnds);
2248 return true;
2249 }
2250
2251
2252 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
2253 either as shift by a scalar or by a vector. */
2254
2255 bool
2256 vect_supportable_shift (enum tree_code code, tree scalar_type)
2257 {
2258
2259 enum machine_mode vec_mode;
2260 optab optab;
2261 int icode;
2262 tree vectype;
2263
2264 vectype = get_vectype_for_scalar_type (scalar_type);
2265 if (!vectype)
2266 return false;
2267
2268 optab = optab_for_tree_code (code, vectype, optab_scalar);
2269 if (!optab
2270 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
2271 {
2272 optab = optab_for_tree_code (code, vectype, optab_vector);
2273 if (!optab
2274 || (optab_handler (optab, TYPE_MODE (vectype))
2275 == CODE_FOR_nothing))
2276 return false;
2277 }
2278
2279 vec_mode = TYPE_MODE (vectype);
2280 icode = (int) optab_handler (optab, vec_mode);
2281 if (icode == CODE_FOR_nothing)
2282 return false;
2283
2284 return true;
2285 }
2286
2287
2288 /* Function vectorizable_shift.
2289
2290 Check if STMT performs a shift operation that can be vectorized.
2291 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2292 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2293 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2294
2295 static bool
2296 vectorizable_shift (gimple stmt, gimple_stmt_iterator *gsi,
2297 gimple *vec_stmt, slp_tree slp_node)
2298 {
2299 tree vec_dest;
2300 tree scalar_dest;
2301 tree op0, op1 = NULL;
2302 tree vec_oprnd1 = NULL_TREE;
2303 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2304 tree vectype;
2305 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2306 enum tree_code code;
2307 enum machine_mode vec_mode;
2308 tree new_temp;
2309 optab optab;
2310 int icode;
2311 enum machine_mode optab_op2_mode;
2312 tree def;
2313 gimple def_stmt;
2314 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
2315 gimple new_stmt = NULL;
2316 stmt_vec_info prev_stmt_info;
2317 int nunits_in;
2318 int nunits_out;
2319 tree vectype_out;
2320 int ncopies;
2321 int j, i;
2322 VEC (tree, heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL;
2323 tree vop0, vop1;
2324 unsigned int k;
2325 bool scalar_shift_arg = true;
2326 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2327 int vf;
2328
2329 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2330 return false;
2331
2332 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2333 return false;
2334
2335 /* Is STMT a vectorizable binary/unary operation? */
2336 if (!is_gimple_assign (stmt))
2337 return false;
2338
2339 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
2340 return false;
2341
2342 code = gimple_assign_rhs_code (stmt);
2343
2344 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
2345 || code == RROTATE_EXPR))
2346 return false;
2347
2348 scalar_dest = gimple_assign_lhs (stmt);
2349 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
2350 if (TYPE_PRECISION (TREE_TYPE (scalar_dest))
2351 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
2352 {
2353 if (vect_print_dump_info (REPORT_DETAILS))
2354 fprintf (vect_dump, "bit-precision shifts not supported.");
2355 return false;
2356 }
2357
2358 op0 = gimple_assign_rhs1 (stmt);
2359 if (!vect_is_simple_use_1 (op0, loop_vinfo, bb_vinfo,
2360 &def_stmt, &def, &dt[0], &vectype))
2361 {
2362 if (vect_print_dump_info (REPORT_DETAILS))
2363 fprintf (vect_dump, "use not simple.");
2364 return false;
2365 }
2366 /* If op0 is an external or constant def use a vector type with
2367 the same size as the output vector type. */
2368 if (!vectype)
2369 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
2370 if (vec_stmt)
2371 gcc_assert (vectype);
2372 if (!vectype)
2373 {
2374 if (vect_print_dump_info (REPORT_DETAILS))
2375 {
2376 fprintf (vect_dump, "no vectype for scalar type ");
2377 print_generic_expr (vect_dump, TREE_TYPE (op0), TDF_SLIM);
2378 }
2379
2380 return false;
2381 }
2382
2383 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2384 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
2385 if (nunits_out != nunits_in)
2386 return false;
2387
2388 op1 = gimple_assign_rhs2 (stmt);
2389 if (!vect_is_simple_use (op1, loop_vinfo, bb_vinfo, &def_stmt, &def, &dt[1]))
2390 {
2391 if (vect_print_dump_info (REPORT_DETAILS))
2392 fprintf (vect_dump, "use not simple.");
2393 return false;
2394 }
2395
2396 if (loop_vinfo)
2397 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2398 else
2399 vf = 1;
2400
2401 /* Multiple types in SLP are handled by creating the appropriate number of
2402 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2403 case of SLP. */
2404 if (slp_node || PURE_SLP_STMT (stmt_info))
2405 ncopies = 1;
2406 else
2407 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
2408
2409 gcc_assert (ncopies >= 1);
2410
2411 /* Determine whether the shift amount is a vector, or scalar. If the
2412 shift/rotate amount is a vector, use the vector/vector shift optabs. */
2413
2414 if (dt[1] == vect_internal_def && !slp_node)
2415 scalar_shift_arg = false;
2416 else if (dt[1] == vect_constant_def
2417 || dt[1] == vect_external_def
2418 || dt[1] == vect_internal_def)
2419 {
2420 /* In SLP, need to check whether the shift count is the same,
2421 in loops if it is a constant or invariant, it is always
2422 a scalar shift. */
2423 if (slp_node)
2424 {
2425 VEC (gimple, heap) *stmts = SLP_TREE_SCALAR_STMTS (slp_node);
2426 gimple slpstmt;
2427
2428 FOR_EACH_VEC_ELT (gimple, stmts, k, slpstmt)
2429 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
2430 scalar_shift_arg = false;
2431 }
2432 }
2433 else
2434 {
2435 if (vect_print_dump_info (REPORT_DETAILS))
2436 fprintf (vect_dump, "operand mode requires invariant argument.");
2437 return false;
2438 }
2439
2440 /* Vector shifted by vector. */
2441 if (!scalar_shift_arg)
2442 {
2443 optab = optab_for_tree_code (code, vectype, optab_vector);
2444 if (vect_print_dump_info (REPORT_DETAILS))
2445 fprintf (vect_dump, "vector/vector shift/rotate found.");
2446 }
2447 /* See if the machine has a vector shifted by scalar insn and if not
2448 then see if it has a vector shifted by vector insn. */
2449 else
2450 {
2451 optab = optab_for_tree_code (code, vectype, optab_scalar);
2452 if (optab
2453 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
2454 {
2455 if (vect_print_dump_info (REPORT_DETAILS))
2456 fprintf (vect_dump, "vector/scalar shift/rotate found.");
2457 }
2458 else
2459 {
2460 optab = optab_for_tree_code (code, vectype, optab_vector);
2461 if (optab
2462 && (optab_handler (optab, TYPE_MODE (vectype))
2463 != CODE_FOR_nothing))
2464 {
2465 scalar_shift_arg = false;
2466
2467 if (vect_print_dump_info (REPORT_DETAILS))
2468 fprintf (vect_dump, "vector/vector shift/rotate found.");
2469
2470 /* Unlike the other binary operators, shifts/rotates have
2471 the rhs being int, instead of the same type as the lhs,
2472 so make sure the scalar is the right type if we are
2473 dealing with vectors of short/char. */
2474 if (dt[1] == vect_constant_def)
2475 op1 = fold_convert (TREE_TYPE (vectype), op1);
2476 }
2477 }
2478 }
2479
2480 /* Supportable by target? */
2481 if (!optab)
2482 {
2483 if (vect_print_dump_info (REPORT_DETAILS))
2484 fprintf (vect_dump, "no optab.");
2485 return false;
2486 }
2487 vec_mode = TYPE_MODE (vectype);
2488 icode = (int) optab_handler (optab, vec_mode);
2489 if (icode == CODE_FOR_nothing)
2490 {
2491 if (vect_print_dump_info (REPORT_DETAILS))
2492 fprintf (vect_dump, "op not supported by target.");
2493 /* Check only during analysis. */
2494 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
2495 || (vf < vect_min_worthwhile_factor (code)
2496 && !vec_stmt))
2497 return false;
2498 if (vect_print_dump_info (REPORT_DETAILS))
2499 fprintf (vect_dump, "proceeding using word mode.");
2500 }
2501
2502 /* Worthwhile without SIMD support? Check only during analysis. */
2503 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
2504 && vf < vect_min_worthwhile_factor (code)
2505 && !vec_stmt)
2506 {
2507 if (vect_print_dump_info (REPORT_DETAILS))
2508 fprintf (vect_dump, "not worthwhile without SIMD support.");
2509 return false;
2510 }
2511
2512 if (!vec_stmt) /* transformation not required. */
2513 {
2514 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
2515 if (vect_print_dump_info (REPORT_DETAILS))
2516 fprintf (vect_dump, "=== vectorizable_shift ===");
2517 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
2518 return true;
2519 }
2520
2521 /** Transform. **/
2522
2523 if (vect_print_dump_info (REPORT_DETAILS))
2524 fprintf (vect_dump, "transform binary/unary operation.");
2525
2526 /* Handle def. */
2527 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2528
2529 /* Allocate VECs for vector operands. In case of SLP, vector operands are
2530 created in the previous stages of the recursion, so no allocation is
2531 needed, except for the case of shift with scalar shift argument. In that
2532 case we store the scalar operand in VEC_OPRNDS1 for every vector stmt to
2533 be created to vectorize the SLP group, i.e., SLP_NODE->VEC_STMTS_SIZE.
2534 In case of loop-based vectorization we allocate VECs of size 1. We
2535 allocate VEC_OPRNDS1 only in case of binary operation. */
2536 if (!slp_node)
2537 {
2538 vec_oprnds0 = VEC_alloc (tree, heap, 1);
2539 vec_oprnds1 = VEC_alloc (tree, heap, 1);
2540 }
2541 else if (scalar_shift_arg)
2542 vec_oprnds1 = VEC_alloc (tree, heap, slp_node->vec_stmts_size);
2543
2544 prev_stmt_info = NULL;
2545 for (j = 0; j < ncopies; j++)
2546 {
2547 /* Handle uses. */
2548 if (j == 0)
2549 {
2550 if (scalar_shift_arg)
2551 {
2552 /* Vector shl and shr insn patterns can be defined with scalar
2553 operand 2 (shift operand). In this case, use constant or loop
2554 invariant op1 directly, without extending it to vector mode
2555 first. */
2556 optab_op2_mode = insn_data[icode].operand[2].mode;
2557 if (!VECTOR_MODE_P (optab_op2_mode))
2558 {
2559 if (vect_print_dump_info (REPORT_DETAILS))
2560 fprintf (vect_dump, "operand 1 using scalar mode.");
2561 vec_oprnd1 = op1;
2562 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
2563 if (slp_node)
2564 {
2565 /* Store vec_oprnd1 for every vector stmt to be created
2566 for SLP_NODE. We check during the analysis that all
2567 the shift arguments are the same.
2568 TODO: Allow different constants for different vector
2569 stmts generated for an SLP instance. */
2570 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
2571 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
2572 }
2573 }
2574 }
2575
2576 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
2577 (a special case for certain kind of vector shifts); otherwise,
2578 operand 1 should be of a vector type (the usual case). */
2579 if (vec_oprnd1)
2580 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
2581 slp_node);
2582 else
2583 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
2584 slp_node);
2585 }
2586 else
2587 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
2588
2589 /* Arguments are ready. Create the new vector stmt. */
2590 FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vop0)
2591 {
2592 vop1 = VEC_index (tree, vec_oprnds1, i);
2593 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vop0, vop1);
2594 new_temp = make_ssa_name (vec_dest, new_stmt);
2595 gimple_assign_set_lhs (new_stmt, new_temp);
2596 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2597 if (slp_node)
2598 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
2599 }
2600
2601 if (slp_node)
2602 continue;
2603
2604 if (j == 0)
2605 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2606 else
2607 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2608 prev_stmt_info = vinfo_for_stmt (new_stmt);
2609 }
2610
2611 VEC_free (tree, heap, vec_oprnds0);
2612 VEC_free (tree, heap, vec_oprnds1);
2613
2614 return true;
2615 }
2616
2617
2618 /* Function vectorizable_operation.
2619
2620 Check if STMT performs a binary, unary or ternary operation that can
2621 be vectorized.
2622 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2623 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2624 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2625
2626 static bool
2627 vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
2628 gimple *vec_stmt, slp_tree slp_node)
2629 {
2630 tree vec_dest;
2631 tree scalar_dest;
2632 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
2633 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2634 tree vectype;
2635 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2636 enum tree_code code;
2637 enum machine_mode vec_mode;
2638 tree new_temp;
2639 int op_type;
2640 optab optab;
2641 int icode;
2642 tree def;
2643 gimple def_stmt;
2644 enum vect_def_type dt[3]
2645 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
2646 gimple new_stmt = NULL;
2647 stmt_vec_info prev_stmt_info;
2648 int nunits_in;
2649 int nunits_out;
2650 tree vectype_out;
2651 int ncopies;
2652 int j, i;
2653 VEC(tree,heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL, *vec_oprnds2 = NULL;
2654 tree vop0, vop1, vop2;
2655 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2656 int vf;
2657
2658 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2659 return false;
2660
2661 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2662 return false;
2663
2664 /* Is STMT a vectorizable binary/unary operation? */
2665 if (!is_gimple_assign (stmt))
2666 return false;
2667
2668 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
2669 return false;
2670
2671 code = gimple_assign_rhs_code (stmt);
2672
2673 /* For pointer addition, we should use the normal plus for
2674 the vector addition. */
2675 if (code == POINTER_PLUS_EXPR)
2676 code = PLUS_EXPR;
2677
2678 /* Support only unary or binary operations. */
2679 op_type = TREE_CODE_LENGTH (code);
2680 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
2681 {
2682 if (vect_print_dump_info (REPORT_DETAILS))
2683 fprintf (vect_dump, "num. args = %d (not unary/binary/ternary op).",
2684 op_type);
2685 return false;
2686 }
2687
2688 scalar_dest = gimple_assign_lhs (stmt);
2689 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
2690
2691 /* Most operations cannot handle bit-precision types without extra
2692 truncations. */
2693 if ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
2694 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
2695 /* Exception are bitwise binary operations. */
2696 && code != BIT_IOR_EXPR
2697 && code != BIT_XOR_EXPR
2698 && code != BIT_AND_EXPR)
2699 {
2700 if (vect_print_dump_info (REPORT_DETAILS))
2701 fprintf (vect_dump, "bit-precision arithmetic not supported.");
2702 return false;
2703 }
2704
2705 op0 = gimple_assign_rhs1 (stmt);
2706 if (!vect_is_simple_use_1 (op0, loop_vinfo, bb_vinfo,
2707 &def_stmt, &def, &dt[0], &vectype))
2708 {
2709 if (vect_print_dump_info (REPORT_DETAILS))
2710 fprintf (vect_dump, "use not simple.");
2711 return false;
2712 }
2713 /* If op0 is an external or constant def use a vector type with
2714 the same size as the output vector type. */
2715 if (!vectype)
2716 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
2717 if (vec_stmt)
2718 gcc_assert (vectype);
2719 if (!vectype)
2720 {
2721 if (vect_print_dump_info (REPORT_DETAILS))
2722 {
2723 fprintf (vect_dump, "no vectype for scalar type ");
2724 print_generic_expr (vect_dump, TREE_TYPE (op0), TDF_SLIM);
2725 }
2726
2727 return false;
2728 }
2729
2730 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2731 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
2732 if (nunits_out != nunits_in)
2733 return false;
2734
2735 if (op_type == binary_op || op_type == ternary_op)
2736 {
2737 op1 = gimple_assign_rhs2 (stmt);
2738 if (!vect_is_simple_use (op1, loop_vinfo, bb_vinfo, &def_stmt, &def,
2739 &dt[1]))
2740 {
2741 if (vect_print_dump_info (REPORT_DETAILS))
2742 fprintf (vect_dump, "use not simple.");
2743 return false;
2744 }
2745 }
2746 if (op_type == ternary_op)
2747 {
2748 op2 = gimple_assign_rhs3 (stmt);
2749 if (!vect_is_simple_use (op2, loop_vinfo, bb_vinfo, &def_stmt, &def,
2750 &dt[2]))
2751 {
2752 if (vect_print_dump_info (REPORT_DETAILS))
2753 fprintf (vect_dump, "use not simple.");
2754 return false;
2755 }
2756 }
2757
2758 if (loop_vinfo)
2759 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2760 else
2761 vf = 1;
2762
2763 /* Multiple types in SLP are handled by creating the appropriate number of
2764 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2765 case of SLP. */
2766 if (slp_node || PURE_SLP_STMT (stmt_info))
2767 ncopies = 1;
2768 else
2769 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
2770
2771 gcc_assert (ncopies >= 1);
2772
2773 /* Shifts are handled in vectorizable_shift (). */
2774 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
2775 || code == RROTATE_EXPR)
2776 return false;
2777
2778 optab = optab_for_tree_code (code, vectype, optab_default);
2779
2780 /* Supportable by target? */
2781 if (!optab)
2782 {
2783 if (vect_print_dump_info (REPORT_DETAILS))
2784 fprintf (vect_dump, "no optab.");
2785 return false;
2786 }
2787 vec_mode = TYPE_MODE (vectype);
2788 icode = (int) optab_handler (optab, vec_mode);
2789 if (icode == CODE_FOR_nothing)
2790 {
2791 if (vect_print_dump_info (REPORT_DETAILS))
2792 fprintf (vect_dump, "op not supported by target.");
2793 /* Check only during analysis. */
2794 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
2795 || (vf < vect_min_worthwhile_factor (code)
2796 && !vec_stmt))
2797 return false;
2798 if (vect_print_dump_info (REPORT_DETAILS))
2799 fprintf (vect_dump, "proceeding using word mode.");
2800 }
2801
2802 /* Worthwhile without SIMD support? Check only during analysis. */
2803 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
2804 && vf < vect_min_worthwhile_factor (code)
2805 && !vec_stmt)
2806 {
2807 if (vect_print_dump_info (REPORT_DETAILS))
2808 fprintf (vect_dump, "not worthwhile without SIMD support.");
2809 return false;
2810 }
2811
2812 if (!vec_stmt) /* transformation not required. */
2813 {
2814 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
2815 if (vect_print_dump_info (REPORT_DETAILS))
2816 fprintf (vect_dump, "=== vectorizable_operation ===");
2817 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
2818 return true;
2819 }
2820
2821 /** Transform. **/
2822
2823 if (vect_print_dump_info (REPORT_DETAILS))
2824 fprintf (vect_dump, "transform binary/unary operation.");
2825
2826 /* Handle def. */
2827 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2828
2829 /* Allocate VECs for vector operands. In case of SLP, vector operands are
2830 created in the previous stages of the recursion, so no allocation is
2831 needed, except for the case of shift with scalar shift argument. In that
2832 case we store the scalar operand in VEC_OPRNDS1 for every vector stmt to
2833 be created to vectorize the SLP group, i.e., SLP_NODE->VEC_STMTS_SIZE.
2834 In case of loop-based vectorization we allocate VECs of size 1. We
2835 allocate VEC_OPRNDS1 only in case of binary operation. */
2836 if (!slp_node)
2837 {
2838 vec_oprnds0 = VEC_alloc (tree, heap, 1);
2839 if (op_type == binary_op || op_type == ternary_op)
2840 vec_oprnds1 = VEC_alloc (tree, heap, 1);
2841 if (op_type == ternary_op)
2842 vec_oprnds2 = VEC_alloc (tree, heap, 1);
2843 }
2844
2845 /* In case the vectorization factor (VF) is bigger than the number
2846 of elements that we can fit in a vectype (nunits), we have to generate
2847 more than one vector stmt - i.e - we need to "unroll" the
2848 vector stmt by a factor VF/nunits. In doing so, we record a pointer
2849 from one copy of the vector stmt to the next, in the field
2850 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
2851 stages to find the correct vector defs to be used when vectorizing
2852 stmts that use the defs of the current stmt. The example below
2853 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
2854 we need to create 4 vectorized stmts):
2855
2856 before vectorization:
2857 RELATED_STMT VEC_STMT
2858 S1: x = memref - -
2859 S2: z = x + 1 - -
2860
2861 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
2862 there):
2863 RELATED_STMT VEC_STMT
2864 VS1_0: vx0 = memref0 VS1_1 -
2865 VS1_1: vx1 = memref1 VS1_2 -
2866 VS1_2: vx2 = memref2 VS1_3 -
2867 VS1_3: vx3 = memref3 - -
2868 S1: x = load - VS1_0
2869 S2: z = x + 1 - -
2870
2871 step2: vectorize stmt S2 (done here):
2872 To vectorize stmt S2 we first need to find the relevant vector
2873 def for the first operand 'x'. This is, as usual, obtained from
2874 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
2875 that defines 'x' (S1). This way we find the stmt VS1_0, and the
2876 relevant vector def 'vx0'. Having found 'vx0' we can generate
2877 the vector stmt VS2_0, and as usual, record it in the
2878 STMT_VINFO_VEC_STMT of stmt S2.
2879 When creating the second copy (VS2_1), we obtain the relevant vector
2880 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
2881 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
2882 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
2883 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
2884 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
2885 chain of stmts and pointers:
2886 RELATED_STMT VEC_STMT
2887 VS1_0: vx0 = memref0 VS1_1 -
2888 VS1_1: vx1 = memref1 VS1_2 -
2889 VS1_2: vx2 = memref2 VS1_3 -
2890 VS1_3: vx3 = memref3 - -
2891 S1: x = load - VS1_0
2892 VS2_0: vz0 = vx0 + v1 VS2_1 -
2893 VS2_1: vz1 = vx1 + v1 VS2_2 -
2894 VS2_2: vz2 = vx2 + v1 VS2_3 -
2895 VS2_3: vz3 = vx3 + v1 - -
2896 S2: z = x + 1 - VS2_0 */
2897
2898 prev_stmt_info = NULL;
2899 for (j = 0; j < ncopies; j++)
2900 {
2901 /* Handle uses. */
2902 if (j == 0)
2903 {
2904 if (op_type == binary_op || op_type == ternary_op)
2905 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
2906 slp_node);
2907 else
2908 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
2909 slp_node);
2910 if (op_type == ternary_op)
2911 {
2912 vec_oprnds2 = VEC_alloc (tree, heap, 1);
2913 VEC_quick_push (tree, vec_oprnds2,
2914 vect_get_vec_def_for_operand (op2, stmt, NULL));
2915 }
2916 }
2917 else
2918 {
2919 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
2920 if (op_type == ternary_op)
2921 {
2922 tree vec_oprnd = VEC_pop (tree, vec_oprnds2);
2923 VEC_quick_push (tree, vec_oprnds2,
2924 vect_get_vec_def_for_stmt_copy (dt[2],
2925 vec_oprnd));
2926 }
2927 }
2928
2929 /* Arguments are ready. Create the new vector stmt. */
2930 FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vop0)
2931 {
2932 vop1 = ((op_type == binary_op || op_type == ternary_op)
2933 ? VEC_index (tree, vec_oprnds1, i) : NULL_TREE);
2934 vop2 = ((op_type == ternary_op)
2935 ? VEC_index (tree, vec_oprnds2, i) : NULL_TREE);
2936 new_stmt = gimple_build_assign_with_ops3 (code, vec_dest,
2937 vop0, vop1, vop2);
2938 new_temp = make_ssa_name (vec_dest, new_stmt);
2939 gimple_assign_set_lhs (new_stmt, new_temp);
2940 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2941 if (slp_node)
2942 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
2943 }
2944
2945 if (slp_node)
2946 continue;
2947
2948 if (j == 0)
2949 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2950 else
2951 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2952 prev_stmt_info = vinfo_for_stmt (new_stmt);
2953 }
2954
2955 VEC_free (tree, heap, vec_oprnds0);
2956 if (vec_oprnds1)
2957 VEC_free (tree, heap, vec_oprnds1);
2958 if (vec_oprnds2)
2959 VEC_free (tree, heap, vec_oprnds2);
2960
2961 return true;
2962 }
2963
2964
2965 /* Get vectorized definitions for loop-based vectorization. For the first
2966 operand we call vect_get_vec_def_for_operand() (with OPRND containing
2967 scalar operand), and for the rest we get a copy with
2968 vect_get_vec_def_for_stmt_copy() using the previous vector definition
2969 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
2970 The vectors are collected into VEC_OPRNDS. */
2971
2972 static void
2973 vect_get_loop_based_defs (tree *oprnd, gimple stmt, enum vect_def_type dt,
2974 VEC (tree, heap) **vec_oprnds, int multi_step_cvt)
2975 {
2976 tree vec_oprnd;
2977
2978 /* Get first vector operand. */
2979 /* All the vector operands except the very first one (that is scalar oprnd)
2980 are stmt copies. */
2981 if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
2982 vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt, NULL);
2983 else
2984 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, *oprnd);
2985
2986 VEC_quick_push (tree, *vec_oprnds, vec_oprnd);
2987
2988 /* Get second vector operand. */
2989 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
2990 VEC_quick_push (tree, *vec_oprnds, vec_oprnd);
2991
2992 *oprnd = vec_oprnd;
2993
2994 /* For conversion in multiple steps, continue to get operands
2995 recursively. */
2996 if (multi_step_cvt)
2997 vect_get_loop_based_defs (oprnd, stmt, dt, vec_oprnds, multi_step_cvt - 1);
2998 }
2999
3000
3001 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
3002 For multi-step conversions store the resulting vectors and call the function
3003 recursively. */
3004
3005 static void
3006 vect_create_vectorized_demotion_stmts (VEC (tree, heap) **vec_oprnds,
3007 int multi_step_cvt, gimple stmt,
3008 VEC (tree, heap) *vec_dsts,
3009 gimple_stmt_iterator *gsi,
3010 slp_tree slp_node, enum tree_code code,
3011 stmt_vec_info *prev_stmt_info)
3012 {
3013 unsigned int i;
3014 tree vop0, vop1, new_tmp, vec_dest;
3015 gimple new_stmt;
3016 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3017
3018 vec_dest = VEC_pop (tree, vec_dsts);
3019
3020 for (i = 0; i < VEC_length (tree, *vec_oprnds); i += 2)
3021 {
3022 /* Create demotion operation. */
3023 vop0 = VEC_index (tree, *vec_oprnds, i);
3024 vop1 = VEC_index (tree, *vec_oprnds, i + 1);
3025 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vop0, vop1);
3026 new_tmp = make_ssa_name (vec_dest, new_stmt);
3027 gimple_assign_set_lhs (new_stmt, new_tmp);
3028 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3029
3030 if (multi_step_cvt)
3031 /* Store the resulting vector for next recursive call. */
3032 VEC_replace (tree, *vec_oprnds, i/2, new_tmp);
3033 else
3034 {
3035 /* This is the last step of the conversion sequence. Store the
3036 vectors in SLP_NODE or in vector info of the scalar statement
3037 (or in STMT_VINFO_RELATED_STMT chain). */
3038 if (slp_node)
3039 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
3040 else
3041 {
3042 if (!*prev_stmt_info)
3043 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3044 else
3045 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt;
3046
3047 *prev_stmt_info = vinfo_for_stmt (new_stmt);
3048 }
3049 }
3050 }
3051
3052 /* For multi-step demotion operations we first generate demotion operations
3053 from the source type to the intermediate types, and then combine the
3054 results (stored in VEC_OPRNDS) in demotion operation to the destination
3055 type. */
3056 if (multi_step_cvt)
3057 {
3058 /* At each level of recursion we have have of the operands we had at the
3059 previous level. */
3060 VEC_truncate (tree, *vec_oprnds, (i+1)/2);
3061 vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
3062 stmt, vec_dsts, gsi, slp_node,
3063 code, prev_stmt_info);
3064 }
3065 }
3066
3067
3068 /* Function vectorizable_type_demotion
3069
3070 Check if STMT performs a binary or unary operation that involves
3071 type demotion, and if it can be vectorized.
3072 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3073 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3074 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3075
3076 static bool
3077 vectorizable_type_demotion (gimple stmt, gimple_stmt_iterator *gsi,
3078 gimple *vec_stmt, slp_tree slp_node)
3079 {
3080 tree vec_dest;
3081 tree scalar_dest;
3082 tree op0;
3083 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3084 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3085 enum tree_code code, code1 = ERROR_MARK;
3086 tree def;
3087 gimple def_stmt;
3088 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
3089 stmt_vec_info prev_stmt_info;
3090 int nunits_in;
3091 int nunits_out;
3092 tree vectype_out;
3093 int ncopies;
3094 int j, i;
3095 tree vectype_in;
3096 int multi_step_cvt = 0;
3097 VEC (tree, heap) *vec_oprnds0 = NULL;
3098 VEC (tree, heap) *vec_dsts = NULL, *interm_types = NULL, *tmp_vec_dsts = NULL;
3099 tree last_oprnd, intermediate_type;
3100 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3101
3102 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3103 return false;
3104
3105 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
3106 return false;
3107
3108 /* Is STMT a vectorizable type-demotion operation? */
3109 if (!is_gimple_assign (stmt))
3110 return false;
3111
3112 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
3113 return false;
3114
3115 code = gimple_assign_rhs_code (stmt);
3116 if (!CONVERT_EXPR_CODE_P (code))
3117 return false;
3118
3119 scalar_dest = gimple_assign_lhs (stmt);
3120 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3121
3122 /* Check the operands of the operation. */
3123 op0 = gimple_assign_rhs1 (stmt);
3124 if (! ((INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
3125 && INTEGRAL_TYPE_P (TREE_TYPE (op0)))
3126 || (SCALAR_FLOAT_TYPE_P (TREE_TYPE (scalar_dest))
3127 && SCALAR_FLOAT_TYPE_P (TREE_TYPE (op0)))))
3128 return false;
3129
3130 if (INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
3131 && ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
3132 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
3133 || ((TYPE_PRECISION (TREE_TYPE (op0))
3134 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (op0)))))))
3135 {
3136 if (vect_print_dump_info (REPORT_DETAILS))
3137 fprintf (vect_dump, "type demotion to/from bit-precision unsupported.");
3138 return false;
3139 }
3140
3141 if (!vect_is_simple_use_1 (op0, loop_vinfo, bb_vinfo,
3142 &def_stmt, &def, &dt[0], &vectype_in))
3143 {
3144 if (vect_print_dump_info (REPORT_DETAILS))
3145 fprintf (vect_dump, "use not simple.");
3146 return false;
3147 }
3148 /* If op0 is an external def use a vector type with the
3149 same size as the output vector type if possible. */
3150 if (!vectype_in)
3151 vectype_in = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
3152 if (vec_stmt)
3153 gcc_assert (vectype_in);
3154 if (!vectype_in)
3155 {
3156 if (vect_print_dump_info (REPORT_DETAILS))
3157 {
3158 fprintf (vect_dump, "no vectype for scalar type ");
3159 print_generic_expr (vect_dump, TREE_TYPE (op0), TDF_SLIM);
3160 }
3161
3162 return false;
3163 }
3164
3165 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3166 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3167 if (nunits_in >= nunits_out)
3168 return false;
3169
3170 /* Multiple types in SLP are handled by creating the appropriate number of
3171 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3172 case of SLP. */
3173 if (slp_node || PURE_SLP_STMT (stmt_info))
3174 ncopies = 1;
3175 else
3176 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
3177 gcc_assert (ncopies >= 1);
3178
3179 /* Supportable by target? */
3180 if (!supportable_narrowing_operation (code, vectype_out, vectype_in,
3181 &code1, &multi_step_cvt, &interm_types))
3182 return false;
3183
3184 if (!vec_stmt) /* transformation not required. */
3185 {
3186 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
3187 if (vect_print_dump_info (REPORT_DETAILS))
3188 fprintf (vect_dump, "=== vectorizable_demotion ===");
3189 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
3190 return true;
3191 }
3192
3193 /** Transform. **/
3194 if (vect_print_dump_info (REPORT_DETAILS))
3195 fprintf (vect_dump, "transform type demotion operation. ncopies = %d.",
3196 ncopies);
3197
3198 /* In case of multi-step demotion, we first generate demotion operations to
3199 the intermediate types, and then from that types to the final one.
3200 We create vector destinations for the intermediate type (TYPES) received
3201 from supportable_narrowing_operation, and store them in the correct order
3202 for future use in vect_create_vectorized_demotion_stmts(). */
3203 if (multi_step_cvt)
3204 vec_dsts = VEC_alloc (tree, heap, multi_step_cvt + 1);
3205 else
3206 vec_dsts = VEC_alloc (tree, heap, 1);
3207
3208 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
3209 VEC_quick_push (tree, vec_dsts, vec_dest);
3210
3211 if (multi_step_cvt)
3212 {
3213 for (i = VEC_length (tree, interm_types) - 1;
3214 VEC_iterate (tree, interm_types, i, intermediate_type); i--)
3215 {
3216 vec_dest = vect_create_destination_var (scalar_dest,
3217 intermediate_type);
3218 VEC_quick_push (tree, vec_dsts, vec_dest);
3219 }
3220 }
3221
3222 /* In case the vectorization factor (VF) is bigger than the number
3223 of elements that we can fit in a vectype (nunits), we have to generate
3224 more than one vector stmt - i.e - we need to "unroll" the
3225 vector stmt by a factor VF/nunits. */
3226 last_oprnd = op0;
3227 prev_stmt_info = NULL;
3228 for (j = 0; j < ncopies; j++)
3229 {
3230 /* Handle uses. */
3231 if (slp_node)
3232 vect_get_slp_defs (op0, NULL_TREE, slp_node, &vec_oprnds0, NULL, -1);
3233 else
3234 {
3235 VEC_free (tree, heap, vec_oprnds0);
3236 vec_oprnds0 = VEC_alloc (tree, heap,
3237 (multi_step_cvt ? vect_pow2 (multi_step_cvt) * 2 : 2));
3238 vect_get_loop_based_defs (&last_oprnd, stmt, dt[0], &vec_oprnds0,
3239 vect_pow2 (multi_step_cvt) - 1);
3240 }
3241
3242 /* Arguments are ready. Create the new vector stmts. */
3243 tmp_vec_dsts = VEC_copy (tree, heap, vec_dsts);
3244 vect_create_vectorized_demotion_stmts (&vec_oprnds0,
3245 multi_step_cvt, stmt, tmp_vec_dsts,
3246 gsi, slp_node, code1,
3247 &prev_stmt_info);
3248 }
3249
3250 VEC_free (tree, heap, vec_oprnds0);
3251 VEC_free (tree, heap, vec_dsts);
3252 VEC_free (tree, heap, tmp_vec_dsts);
3253 VEC_free (tree, heap, interm_types);
3254
3255 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
3256 return true;
3257 }
3258
3259
3260 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
3261 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
3262 the resulting vectors and call the function recursively. */
3263
3264 static void
3265 vect_create_vectorized_promotion_stmts (VEC (tree, heap) **vec_oprnds0,
3266 VEC (tree, heap) **vec_oprnds1,
3267 int multi_step_cvt, gimple stmt,
3268 VEC (tree, heap) *vec_dsts,
3269 gimple_stmt_iterator *gsi,
3270 slp_tree slp_node, enum tree_code code1,
3271 enum tree_code code2, tree decl1,
3272 tree decl2, int op_type,
3273 stmt_vec_info *prev_stmt_info)
3274 {
3275 int i;
3276 tree vop0, vop1, new_tmp1, new_tmp2, vec_dest;
3277 gimple new_stmt1, new_stmt2;
3278 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3279 VEC (tree, heap) *vec_tmp;
3280
3281 vec_dest = VEC_pop (tree, vec_dsts);
3282 vec_tmp = VEC_alloc (tree, heap, VEC_length (tree, *vec_oprnds0) * 2);
3283
3284 FOR_EACH_VEC_ELT (tree, *vec_oprnds0, i, vop0)
3285 {
3286 if (op_type == binary_op)
3287 vop1 = VEC_index (tree, *vec_oprnds1, i);
3288 else
3289 vop1 = NULL_TREE;
3290
3291 /* Generate the two halves of promotion operation. */
3292 new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1,
3293 op_type, vec_dest, gsi, stmt);
3294 new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1,
3295 op_type, vec_dest, gsi, stmt);
3296 if (is_gimple_call (new_stmt1))
3297 {
3298 new_tmp1 = gimple_call_lhs (new_stmt1);
3299 new_tmp2 = gimple_call_lhs (new_stmt2);
3300 }
3301 else
3302 {
3303 new_tmp1 = gimple_assign_lhs (new_stmt1);
3304 new_tmp2 = gimple_assign_lhs (new_stmt2);
3305 }
3306
3307 if (multi_step_cvt)
3308 {
3309 /* Store the results for the recursive call. */
3310 VEC_quick_push (tree, vec_tmp, new_tmp1);
3311 VEC_quick_push (tree, vec_tmp, new_tmp2);
3312 }
3313 else
3314 {
3315 /* Last step of promotion sequience - store the results. */
3316 if (slp_node)
3317 {
3318 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt1);
3319 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt2);
3320 }
3321 else
3322 {
3323 if (!*prev_stmt_info)
3324 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt1;
3325 else
3326 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt1;
3327
3328 *prev_stmt_info = vinfo_for_stmt (new_stmt1);
3329 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt2;
3330 *prev_stmt_info = vinfo_for_stmt (new_stmt2);
3331 }
3332 }
3333 }
3334
3335 if (multi_step_cvt)
3336 {
3337 /* For multi-step promotion operation we first generate we call the
3338 function recurcively for every stage. We start from the input type,
3339 create promotion operations to the intermediate types, and then
3340 create promotions to the output type. */
3341 *vec_oprnds0 = VEC_copy (tree, heap, vec_tmp);
3342 vect_create_vectorized_promotion_stmts (vec_oprnds0, vec_oprnds1,
3343 multi_step_cvt - 1, stmt,
3344 vec_dsts, gsi, slp_node, code1,
3345 code2, decl2, decl2, op_type,
3346 prev_stmt_info);
3347 }
3348
3349 VEC_free (tree, heap, vec_tmp);
3350 }
3351
3352
3353 /* Function vectorizable_type_promotion
3354
3355 Check if STMT performs a binary or unary operation that involves
3356 type promotion, and if it can be vectorized.
3357 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3358 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3359 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3360
3361 static bool
3362 vectorizable_type_promotion (gimple stmt, gimple_stmt_iterator *gsi,
3363 gimple *vec_stmt, slp_tree slp_node)
3364 {
3365 tree vec_dest;
3366 tree scalar_dest;
3367 tree op0, op1 = NULL;
3368 tree vec_oprnd0=NULL, vec_oprnd1=NULL;
3369 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3370 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3371 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
3372 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
3373 int op_type;
3374 tree def;
3375 gimple def_stmt;
3376 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
3377 stmt_vec_info prev_stmt_info;
3378 int nunits_in;
3379 int nunits_out;
3380 tree vectype_out;
3381 int ncopies;
3382 int j, i;
3383 tree vectype_in;
3384 tree intermediate_type = NULL_TREE;
3385 int multi_step_cvt = 0;
3386 VEC (tree, heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL;
3387 VEC (tree, heap) *vec_dsts = NULL, *interm_types = NULL, *tmp_vec_dsts = NULL;
3388 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3389 unsigned int k;
3390
3391 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3392 return false;
3393
3394 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
3395 return false;
3396
3397 /* Is STMT a vectorizable type-promotion operation? */
3398 if (!is_gimple_assign (stmt))
3399 return false;
3400
3401 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
3402 return false;
3403
3404 code = gimple_assign_rhs_code (stmt);
3405 if (!CONVERT_EXPR_CODE_P (code)
3406 && code != WIDEN_MULT_EXPR
3407 && code != WIDEN_LSHIFT_EXPR)
3408 return false;
3409
3410 scalar_dest = gimple_assign_lhs (stmt);
3411 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3412
3413 /* Check the operands of the operation. */
3414 op0 = gimple_assign_rhs1 (stmt);
3415 if (! ((INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
3416 && INTEGRAL_TYPE_P (TREE_TYPE (op0)))
3417 || (SCALAR_FLOAT_TYPE_P (TREE_TYPE (scalar_dest))
3418 && SCALAR_FLOAT_TYPE_P (TREE_TYPE (op0))
3419 && CONVERT_EXPR_CODE_P (code))))
3420 return false;
3421
3422 if (INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
3423 && ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
3424 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
3425 || ((TYPE_PRECISION (TREE_TYPE (op0))
3426 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (op0)))))))
3427 {
3428 if (vect_print_dump_info (REPORT_DETAILS))
3429 fprintf (vect_dump, "type promotion to/from bit-precision "
3430 "unsupported.");
3431 return false;
3432 }
3433
3434 if (!vect_is_simple_use_1 (op0, loop_vinfo, bb_vinfo,
3435 &def_stmt, &def, &dt[0], &vectype_in))
3436 {
3437 if (vect_print_dump_info (REPORT_DETAILS))
3438 fprintf (vect_dump, "use not simple.");
3439 return false;
3440 }
3441
3442 op_type = TREE_CODE_LENGTH (code);
3443 if (op_type == binary_op)
3444 {
3445 bool ok;
3446
3447 op1 = gimple_assign_rhs2 (stmt);
3448 if (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR)
3449 {
3450 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
3451 OP1. */
3452 if (CONSTANT_CLASS_P (op0))
3453 ok = vect_is_simple_use_1 (op1, loop_vinfo, NULL,
3454 &def_stmt, &def, &dt[1], &vectype_in);
3455 else
3456 ok = vect_is_simple_use (op1, loop_vinfo, NULL, &def_stmt, &def,
3457 &dt[1]);
3458
3459 if (!ok)
3460 {
3461 if (vect_print_dump_info (REPORT_DETAILS))
3462 fprintf (vect_dump, "use not simple.");
3463 return false;
3464 }
3465 }
3466 }
3467
3468 /* If op0 is an external or constant def use a vector type with
3469 the same size as the output vector type. */
3470 if (!vectype_in)
3471 vectype_in = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
3472 if (vec_stmt)
3473 gcc_assert (vectype_in);
3474 if (!vectype_in)
3475 {
3476 if (vect_print_dump_info (REPORT_DETAILS))
3477 {
3478 fprintf (vect_dump, "no vectype for scalar type ");
3479 print_generic_expr (vect_dump, TREE_TYPE (op0), TDF_SLIM);
3480 }
3481
3482 return false;
3483 }
3484
3485 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3486 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3487 if (nunits_in <= nunits_out)
3488 return false;
3489
3490 /* Multiple types in SLP are handled by creating the appropriate number of
3491 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3492 case of SLP. */
3493 if (slp_node || PURE_SLP_STMT (stmt_info))
3494 ncopies = 1;
3495 else
3496 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
3497
3498 gcc_assert (ncopies >= 1);
3499
3500 /* Supportable by target? */
3501 if (!supportable_widening_operation (code, stmt, vectype_out, vectype_in,
3502 &decl1, &decl2, &code1, &code2,
3503 &multi_step_cvt, &interm_types))
3504 return false;
3505
3506 /* Binary widening operation can only be supported directly by the
3507 architecture. */
3508 gcc_assert (!(multi_step_cvt && op_type == binary_op));
3509
3510 if (!vec_stmt) /* transformation not required. */
3511 {
3512 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
3513 if (vect_print_dump_info (REPORT_DETAILS))
3514 fprintf (vect_dump, "=== vectorizable_promotion ===");
3515 vect_model_simple_cost (stmt_info, 2*ncopies, dt, NULL);
3516 return true;
3517 }
3518
3519 /** Transform. **/
3520
3521 if (vect_print_dump_info (REPORT_DETAILS))
3522 fprintf (vect_dump, "transform type promotion operation. ncopies = %d.",
3523 ncopies);
3524
3525 if (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR)
3526 {
3527 if (CONSTANT_CLASS_P (op0))
3528 op0 = fold_convert (TREE_TYPE (op1), op0);
3529 else if (CONSTANT_CLASS_P (op1))
3530 op1 = fold_convert (TREE_TYPE (op0), op1);
3531 }
3532
3533 /* Handle def. */
3534 /* In case of multi-step promotion, we first generate promotion operations
3535 to the intermediate types, and then from that types to the final one.
3536 We store vector destination in VEC_DSTS in the correct order for
3537 recursive creation of promotion operations in
3538 vect_create_vectorized_promotion_stmts(). Vector destinations are created
3539 according to TYPES recieved from supportable_widening_operation(). */
3540 if (multi_step_cvt)
3541 vec_dsts = VEC_alloc (tree, heap, multi_step_cvt + 1);
3542 else
3543 vec_dsts = VEC_alloc (tree, heap, 1);
3544
3545 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
3546 VEC_quick_push (tree, vec_dsts, vec_dest);
3547
3548 if (multi_step_cvt)
3549 {
3550 for (i = VEC_length (tree, interm_types) - 1;
3551 VEC_iterate (tree, interm_types, i, intermediate_type); i--)
3552 {
3553 vec_dest = vect_create_destination_var (scalar_dest,
3554 intermediate_type);
3555 VEC_quick_push (tree, vec_dsts, vec_dest);
3556 }
3557 }
3558
3559 if (!slp_node)
3560 {
3561 vec_oprnds0 = VEC_alloc (tree, heap,
3562 (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1));
3563 if (op_type == binary_op)
3564 vec_oprnds1 = VEC_alloc (tree, heap, 1);
3565 }
3566 else if (code == WIDEN_LSHIFT_EXPR)
3567 vec_oprnds1 = VEC_alloc (tree, heap, slp_node->vec_stmts_size);
3568
3569 /* In case the vectorization factor (VF) is bigger than the number
3570 of elements that we can fit in a vectype (nunits), we have to generate
3571 more than one vector stmt - i.e - we need to "unroll" the
3572 vector stmt by a factor VF/nunits. */
3573
3574 prev_stmt_info = NULL;
3575 for (j = 0; j < ncopies; j++)
3576 {
3577 /* Handle uses. */
3578 if (j == 0)
3579 {
3580 if (slp_node)
3581 {
3582 if (code == WIDEN_LSHIFT_EXPR)
3583 {
3584 vec_oprnd1 = op1;
3585 /* Store vec_oprnd1 for every vector stmt to be created
3586 for SLP_NODE. We check during the analysis that all
3587 the shift arguments are the same. */
3588 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
3589 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
3590
3591 vect_get_slp_defs (op0, NULL_TREE, slp_node, &vec_oprnds0, NULL,
3592 -1);
3593 }
3594 else
3595 vect_get_slp_defs (op0, op1, slp_node, &vec_oprnds0,
3596 &vec_oprnds1, -1);
3597 }
3598 else
3599 {
3600 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
3601 VEC_quick_push (tree, vec_oprnds0, vec_oprnd0);
3602 if (op_type == binary_op)
3603 {
3604 if (code == WIDEN_LSHIFT_EXPR)
3605 vec_oprnd1 = op1;
3606 else
3607 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt, NULL);
3608 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
3609 }
3610 }
3611 }
3612 else
3613 {
3614 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
3615 VEC_replace (tree, vec_oprnds0, 0, vec_oprnd0);
3616 if (op_type == binary_op)
3617 {
3618 if (code == WIDEN_LSHIFT_EXPR)
3619 vec_oprnd1 = op1;
3620 else
3621 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd1);
3622 VEC_replace (tree, vec_oprnds1, 0, vec_oprnd1);
3623 }
3624 }
3625
3626 /* Arguments are ready. Create the new vector stmts. */
3627 tmp_vec_dsts = VEC_copy (tree, heap, vec_dsts);
3628 vect_create_vectorized_promotion_stmts (&vec_oprnds0, &vec_oprnds1,
3629 multi_step_cvt, stmt,
3630 tmp_vec_dsts,
3631 gsi, slp_node, code1, code2,
3632 decl1, decl2, op_type,
3633 &prev_stmt_info);
3634 }
3635
3636 VEC_free (tree, heap, vec_dsts);
3637 VEC_free (tree, heap, tmp_vec_dsts);
3638 VEC_free (tree, heap, interm_types);
3639 VEC_free (tree, heap, vec_oprnds0);
3640 VEC_free (tree, heap, vec_oprnds1);
3641
3642 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
3643 return true;
3644 }
3645
3646
3647 /* Function vectorizable_store.
3648
3649 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
3650 can be vectorized.
3651 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3652 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3653 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3654
3655 static bool
3656 vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
3657 slp_tree slp_node)
3658 {
3659 tree scalar_dest;
3660 tree data_ref;
3661 tree op;
3662 tree vec_oprnd = NULL_TREE;
3663 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3664 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
3665 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3666 tree elem_type;
3667 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3668 struct loop *loop = NULL;
3669 enum machine_mode vec_mode;
3670 tree dummy;
3671 enum dr_alignment_support alignment_support_scheme;
3672 tree def;
3673 gimple def_stmt;
3674 enum vect_def_type dt;
3675 stmt_vec_info prev_stmt_info = NULL;
3676 tree dataref_ptr = NULL_TREE;
3677 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
3678 int ncopies;
3679 int j;
3680 gimple next_stmt, first_stmt = NULL;
3681 bool strided_store = false;
3682 bool store_lanes_p = false;
3683 unsigned int group_size, i;
3684 VEC(tree,heap) *dr_chain = NULL, *oprnds = NULL, *result_chain = NULL;
3685 bool inv_p;
3686 VEC(tree,heap) *vec_oprnds = NULL;
3687 bool slp = (slp_node != NULL);
3688 unsigned int vec_num;
3689 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3690 tree aggr_type;
3691
3692 if (loop_vinfo)
3693 loop = LOOP_VINFO_LOOP (loop_vinfo);
3694
3695 /* Multiple types in SLP are handled by creating the appropriate number of
3696 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3697 case of SLP. */
3698 if (slp || PURE_SLP_STMT (stmt_info))
3699 ncopies = 1;
3700 else
3701 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
3702
3703 gcc_assert (ncopies >= 1);
3704
3705 /* FORNOW. This restriction should be relaxed. */
3706 if (loop && nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
3707 {
3708 if (vect_print_dump_info (REPORT_DETAILS))
3709 fprintf (vect_dump, "multiple types in nested loop.");
3710 return false;
3711 }
3712
3713 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3714 return false;
3715
3716 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
3717 return false;
3718
3719 /* Is vectorizable store? */
3720
3721 if (!is_gimple_assign (stmt))
3722 return false;
3723
3724 scalar_dest = gimple_assign_lhs (stmt);
3725 if (TREE_CODE (scalar_dest) != ARRAY_REF
3726 && TREE_CODE (scalar_dest) != INDIRECT_REF
3727 && TREE_CODE (scalar_dest) != COMPONENT_REF
3728 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
3729 && TREE_CODE (scalar_dest) != REALPART_EXPR
3730 && TREE_CODE (scalar_dest) != MEM_REF)
3731 return false;
3732
3733 gcc_assert (gimple_assign_single_p (stmt));
3734 op = gimple_assign_rhs1 (stmt);
3735 if (!vect_is_simple_use (op, loop_vinfo, bb_vinfo, &def_stmt, &def, &dt))
3736 {
3737 if (vect_print_dump_info (REPORT_DETAILS))
3738 fprintf (vect_dump, "use not simple.");
3739 return false;
3740 }
3741
3742 elem_type = TREE_TYPE (vectype);
3743 vec_mode = TYPE_MODE (vectype);
3744
3745 /* FORNOW. In some cases can vectorize even if data-type not supported
3746 (e.g. - array initialization with 0). */
3747 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
3748 return false;
3749
3750 if (!STMT_VINFO_DATA_REF (stmt_info))
3751 return false;
3752
3753 if (tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0)
3754 {
3755 if (vect_print_dump_info (REPORT_DETAILS))
3756 fprintf (vect_dump, "negative step for store.");
3757 return false;
3758 }
3759
3760 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
3761 {
3762 strided_store = true;
3763 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
3764 if (!slp && !PURE_SLP_STMT (stmt_info))
3765 {
3766 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
3767 if (vect_store_lanes_supported (vectype, group_size))
3768 store_lanes_p = true;
3769 else if (!vect_strided_store_supported (vectype, group_size))
3770 return false;
3771 }
3772
3773 if (first_stmt == stmt)
3774 {
3775 /* STMT is the leader of the group. Check the operands of all the
3776 stmts of the group. */
3777 next_stmt = GROUP_NEXT_ELEMENT (stmt_info);
3778 while (next_stmt)
3779 {
3780 gcc_assert (gimple_assign_single_p (next_stmt));
3781 op = gimple_assign_rhs1 (next_stmt);
3782 if (!vect_is_simple_use (op, loop_vinfo, bb_vinfo, &def_stmt,
3783 &def, &dt))
3784 {
3785 if (vect_print_dump_info (REPORT_DETAILS))
3786 fprintf (vect_dump, "use not simple.");
3787 return false;
3788 }
3789 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
3790 }
3791 }
3792 }
3793
3794 if (!vec_stmt) /* transformation not required. */
3795 {
3796 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
3797 vect_model_store_cost (stmt_info, ncopies, store_lanes_p, dt, NULL);
3798 return true;
3799 }
3800
3801 /** Transform. **/
3802
3803 if (strided_store)
3804 {
3805 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
3806 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
3807
3808 GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))++;
3809
3810 /* FORNOW */
3811 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt));
3812
3813 /* We vectorize all the stmts of the interleaving group when we
3814 reach the last stmt in the group. */
3815 if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))
3816 < GROUP_SIZE (vinfo_for_stmt (first_stmt))
3817 && !slp)
3818 {
3819 *vec_stmt = NULL;
3820 return true;
3821 }
3822
3823 if (slp)
3824 {
3825 strided_store = false;
3826 /* VEC_NUM is the number of vect stmts to be created for this
3827 group. */
3828 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
3829 first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0);
3830 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
3831 }
3832 else
3833 /* VEC_NUM is the number of vect stmts to be created for this
3834 group. */
3835 vec_num = group_size;
3836 }
3837 else
3838 {
3839 first_stmt = stmt;
3840 first_dr = dr;
3841 group_size = vec_num = 1;
3842 }
3843
3844 if (vect_print_dump_info (REPORT_DETAILS))
3845 fprintf (vect_dump, "transform store. ncopies = %d",ncopies);
3846
3847 dr_chain = VEC_alloc (tree, heap, group_size);
3848 oprnds = VEC_alloc (tree, heap, group_size);
3849
3850 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
3851 gcc_assert (alignment_support_scheme);
3852 /* Targets with store-lane instructions must not require explicit
3853 realignment. */
3854 gcc_assert (!store_lanes_p
3855 || alignment_support_scheme == dr_aligned
3856 || alignment_support_scheme == dr_unaligned_supported);
3857
3858 if (store_lanes_p)
3859 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
3860 else
3861 aggr_type = vectype;
3862
3863 /* In case the vectorization factor (VF) is bigger than the number
3864 of elements that we can fit in a vectype (nunits), we have to generate
3865 more than one vector stmt - i.e - we need to "unroll" the
3866 vector stmt by a factor VF/nunits. For more details see documentation in
3867 vect_get_vec_def_for_copy_stmt. */
3868
3869 /* In case of interleaving (non-unit strided access):
3870
3871 S1: &base + 2 = x2
3872 S2: &base = x0
3873 S3: &base + 1 = x1
3874 S4: &base + 3 = x3
3875
3876 We create vectorized stores starting from base address (the access of the
3877 first stmt in the chain (S2 in the above example), when the last store stmt
3878 of the chain (S4) is reached:
3879
3880 VS1: &base = vx2
3881 VS2: &base + vec_size*1 = vx0
3882 VS3: &base + vec_size*2 = vx1
3883 VS4: &base + vec_size*3 = vx3
3884
3885 Then permutation statements are generated:
3886
3887 VS5: vx5 = VEC_INTERLEAVE_HIGH_EXPR < vx0, vx3 >
3888 VS6: vx6 = VEC_INTERLEAVE_LOW_EXPR < vx0, vx3 >
3889 ...
3890
3891 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
3892 (the order of the data-refs in the output of vect_permute_store_chain
3893 corresponds to the order of scalar stmts in the interleaving chain - see
3894 the documentation of vect_permute_store_chain()).
3895
3896 In case of both multiple types and interleaving, above vector stores and
3897 permutation stmts are created for every copy. The result vector stmts are
3898 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
3899 STMT_VINFO_RELATED_STMT for the next copies.
3900 */
3901
3902 prev_stmt_info = NULL;
3903 for (j = 0; j < ncopies; j++)
3904 {
3905 gimple new_stmt;
3906 gimple ptr_incr;
3907
3908 if (j == 0)
3909 {
3910 if (slp)
3911 {
3912 /* Get vectorized arguments for SLP_NODE. */
3913 vect_get_slp_defs (NULL_TREE, NULL_TREE, slp_node, &vec_oprnds,
3914 NULL, -1);
3915
3916 vec_oprnd = VEC_index (tree, vec_oprnds, 0);
3917 }
3918 else
3919 {
3920 /* For interleaved stores we collect vectorized defs for all the
3921 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
3922 used as an input to vect_permute_store_chain(), and OPRNDS as
3923 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
3924
3925 If the store is not strided, GROUP_SIZE is 1, and DR_CHAIN and
3926 OPRNDS are of size 1. */
3927 next_stmt = first_stmt;
3928 for (i = 0; i < group_size; i++)
3929 {
3930 /* Since gaps are not supported for interleaved stores,
3931 GROUP_SIZE is the exact number of stmts in the chain.
3932 Therefore, NEXT_STMT can't be NULL_TREE. In case that
3933 there is no interleaving, GROUP_SIZE is 1, and only one
3934 iteration of the loop will be executed. */
3935 gcc_assert (next_stmt
3936 && gimple_assign_single_p (next_stmt));
3937 op = gimple_assign_rhs1 (next_stmt);
3938
3939 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt,
3940 NULL);
3941 VEC_quick_push(tree, dr_chain, vec_oprnd);
3942 VEC_quick_push(tree, oprnds, vec_oprnd);
3943 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
3944 }
3945 }
3946
3947 /* We should have catched mismatched types earlier. */
3948 gcc_assert (useless_type_conversion_p (vectype,
3949 TREE_TYPE (vec_oprnd)));
3950 dataref_ptr = vect_create_data_ref_ptr (first_stmt, aggr_type, NULL,
3951 NULL_TREE, &dummy, gsi,
3952 &ptr_incr, false, &inv_p);
3953 gcc_assert (bb_vinfo || !inv_p);
3954 }
3955 else
3956 {
3957 /* For interleaved stores we created vectorized defs for all the
3958 defs stored in OPRNDS in the previous iteration (previous copy).
3959 DR_CHAIN is then used as an input to vect_permute_store_chain(),
3960 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
3961 next copy.
3962 If the store is not strided, GROUP_SIZE is 1, and DR_CHAIN and
3963 OPRNDS are of size 1. */
3964 for (i = 0; i < group_size; i++)
3965 {
3966 op = VEC_index (tree, oprnds, i);
3967 vect_is_simple_use (op, loop_vinfo, bb_vinfo, &def_stmt, &def,
3968 &dt);
3969 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, op);
3970 VEC_replace(tree, dr_chain, i, vec_oprnd);
3971 VEC_replace(tree, oprnds, i, vec_oprnd);
3972 }
3973 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
3974 TYPE_SIZE_UNIT (aggr_type));
3975 }
3976
3977 if (store_lanes_p)
3978 {
3979 tree vec_array;
3980
3981 /* Combine all the vectors into an array. */
3982 vec_array = create_vector_array (vectype, vec_num);
3983 for (i = 0; i < vec_num; i++)
3984 {
3985 vec_oprnd = VEC_index (tree, dr_chain, i);
3986 write_vector_array (stmt, gsi, vec_oprnd, vec_array, i);
3987 }
3988
3989 /* Emit:
3990 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
3991 data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
3992 new_stmt = gimple_build_call_internal (IFN_STORE_LANES, 1, vec_array);
3993 gimple_call_set_lhs (new_stmt, data_ref);
3994 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3995 mark_symbols_for_renaming (new_stmt);
3996 }
3997 else
3998 {
3999 new_stmt = NULL;
4000 if (strided_store)
4001 {
4002 result_chain = VEC_alloc (tree, heap, group_size);
4003 /* Permute. */
4004 vect_permute_store_chain (dr_chain, group_size, stmt, gsi,
4005 &result_chain);
4006 }
4007
4008 next_stmt = first_stmt;
4009 for (i = 0; i < vec_num; i++)
4010 {
4011 struct ptr_info_def *pi;
4012
4013 if (i > 0)
4014 /* Bump the vector pointer. */
4015 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
4016 stmt, NULL_TREE);
4017
4018 if (slp)
4019 vec_oprnd = VEC_index (tree, vec_oprnds, i);
4020 else if (strided_store)
4021 /* For strided stores vectorized defs are interleaved in
4022 vect_permute_store_chain(). */
4023 vec_oprnd = VEC_index (tree, result_chain, i);
4024
4025 data_ref = build2 (MEM_REF, TREE_TYPE (vec_oprnd), dataref_ptr,
4026 build_int_cst (reference_alias_ptr_type
4027 (DR_REF (first_dr)), 0));
4028 pi = get_ptr_info (dataref_ptr);
4029 pi->align = TYPE_ALIGN_UNIT (vectype);
4030 if (aligned_access_p (first_dr))
4031 pi->misalign = 0;
4032 else if (DR_MISALIGNMENT (first_dr) == -1)
4033 {
4034 TREE_TYPE (data_ref)
4035 = build_aligned_type (TREE_TYPE (data_ref),
4036 TYPE_ALIGN (elem_type));
4037 pi->align = TYPE_ALIGN_UNIT (elem_type);
4038 pi->misalign = 0;
4039 }
4040 else
4041 {
4042 TREE_TYPE (data_ref)
4043 = build_aligned_type (TREE_TYPE (data_ref),
4044 TYPE_ALIGN (elem_type));
4045 pi->misalign = DR_MISALIGNMENT (first_dr);
4046 }
4047
4048 /* Arguments are ready. Create the new vector stmt. */
4049 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
4050 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4051 mark_symbols_for_renaming (new_stmt);
4052
4053 if (slp)
4054 continue;
4055
4056 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
4057 if (!next_stmt)
4058 break;
4059 }
4060 }
4061 if (!slp)
4062 {
4063 if (j == 0)
4064 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4065 else
4066 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4067 prev_stmt_info = vinfo_for_stmt (new_stmt);
4068 }
4069 }
4070
4071 VEC_free (tree, heap, dr_chain);
4072 VEC_free (tree, heap, oprnds);
4073 if (result_chain)
4074 VEC_free (tree, heap, result_chain);
4075 if (vec_oprnds)
4076 VEC_free (tree, heap, vec_oprnds);
4077
4078 return true;
4079 }
4080
4081 /* Given a vector type VECTYPE returns a builtin DECL to be used
4082 for vector permutation and returns the mask that implements
4083 reversal of the vector elements. If that is impossible to do,
4084 returns NULL. */
4085
4086 static tree
4087 perm_mask_for_reverse (tree vectype)
4088 {
4089 tree mask_element_type, mask_type, mask_vec = NULL;
4090 int i, nunits;
4091
4092 if (!can_vec_perm_expr_p (vectype, NULL_TREE))
4093 return NULL;
4094
4095 mask_element_type
4096 = lang_hooks.types.type_for_size
4097 (TREE_INT_CST_LOW (TYPE_SIZE (TREE_TYPE (vectype))), 1);
4098 mask_type = get_vectype_for_scalar_type (mask_element_type);
4099 nunits = TYPE_VECTOR_SUBPARTS (vectype);
4100
4101 for (i = 0; i < nunits; i++)
4102 mask_vec = tree_cons (NULL, build_int_cst (mask_element_type, i), mask_vec);
4103 mask_vec = build_vector (mask_type, mask_vec);
4104
4105 if (!can_vec_perm_expr_p (vectype, mask_vec))
4106 return NULL;
4107
4108 return mask_vec;
4109 }
4110
4111 /* Given a vector variable X, that was generated for the scalar LHS of
4112 STMT, generate instructions to reverse the vector elements of X,
4113 insert them a *GSI and return the permuted vector variable. */
4114
4115 static tree
4116 reverse_vec_elements (tree x, gimple stmt, gimple_stmt_iterator *gsi)
4117 {
4118 tree vectype = TREE_TYPE (x);
4119 tree mask_vec, perm_dest, data_ref;
4120 gimple perm_stmt;
4121
4122 mask_vec = perm_mask_for_reverse (vectype);
4123
4124 perm_dest = vect_create_destination_var (gimple_assign_lhs (stmt), vectype);
4125
4126 /* Generate the permute statement. */
4127 perm_stmt = gimple_build_assign_with_ops3 (VEC_PERM_EXPR, perm_dest,
4128 x, x, mask_vec);
4129 data_ref = make_ssa_name (perm_dest, perm_stmt);
4130 gimple_set_lhs (perm_stmt, data_ref);
4131 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
4132
4133 return data_ref;
4134 }
4135
4136 /* vectorizable_load.
4137
4138 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
4139 can be vectorized.
4140 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4141 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4142 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4143
4144 static bool
4145 vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
4146 slp_tree slp_node, slp_instance slp_node_instance)
4147 {
4148 tree scalar_dest;
4149 tree vec_dest = NULL;
4150 tree data_ref = NULL;
4151 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4152 stmt_vec_info prev_stmt_info;
4153 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4154 struct loop *loop = NULL;
4155 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
4156 bool nested_in_vect_loop = false;
4157 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
4158 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4159 tree elem_type;
4160 tree new_temp;
4161 enum machine_mode mode;
4162 gimple new_stmt = NULL;
4163 tree dummy;
4164 enum dr_alignment_support alignment_support_scheme;
4165 tree dataref_ptr = NULL_TREE;
4166 gimple ptr_incr;
4167 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
4168 int ncopies;
4169 int i, j, group_size;
4170 tree msq = NULL_TREE, lsq;
4171 tree offset = NULL_TREE;
4172 tree realignment_token = NULL_TREE;
4173 gimple phi = NULL;
4174 VEC(tree,heap) *dr_chain = NULL;
4175 bool strided_load = false;
4176 bool load_lanes_p = false;
4177 gimple first_stmt;
4178 bool inv_p;
4179 bool negative;
4180 bool compute_in_loop = false;
4181 struct loop *at_loop;
4182 int vec_num;
4183 bool slp = (slp_node != NULL);
4184 bool slp_perm = false;
4185 enum tree_code code;
4186 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4187 int vf;
4188 tree aggr_type;
4189
4190 if (loop_vinfo)
4191 {
4192 loop = LOOP_VINFO_LOOP (loop_vinfo);
4193 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
4194 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4195 }
4196 else
4197 vf = 1;
4198
4199 /* Multiple types in SLP are handled by creating the appropriate number of
4200 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4201 case of SLP. */
4202 if (slp || PURE_SLP_STMT (stmt_info))
4203 ncopies = 1;
4204 else
4205 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
4206
4207 gcc_assert (ncopies >= 1);
4208
4209 /* FORNOW. This restriction should be relaxed. */
4210 if (nested_in_vect_loop && ncopies > 1)
4211 {
4212 if (vect_print_dump_info (REPORT_DETAILS))
4213 fprintf (vect_dump, "multiple types in nested loop.");
4214 return false;
4215 }
4216
4217 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4218 return false;
4219
4220 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
4221 return false;
4222
4223 /* Is vectorizable load? */
4224 if (!is_gimple_assign (stmt))
4225 return false;
4226
4227 scalar_dest = gimple_assign_lhs (stmt);
4228 if (TREE_CODE (scalar_dest) != SSA_NAME)
4229 return false;
4230
4231 code = gimple_assign_rhs_code (stmt);
4232 if (code != ARRAY_REF
4233 && code != INDIRECT_REF
4234 && code != COMPONENT_REF
4235 && code != IMAGPART_EXPR
4236 && code != REALPART_EXPR
4237 && code != MEM_REF
4238 && TREE_CODE_CLASS (code) != tcc_declaration)
4239 return false;
4240
4241 if (!STMT_VINFO_DATA_REF (stmt_info))
4242 return false;
4243
4244 negative = tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0;
4245 if (negative && ncopies > 1)
4246 {
4247 if (vect_print_dump_info (REPORT_DETAILS))
4248 fprintf (vect_dump, "multiple types with negative step.");
4249 return false;
4250 }
4251
4252 elem_type = TREE_TYPE (vectype);
4253 mode = TYPE_MODE (vectype);
4254
4255 /* FORNOW. In some cases can vectorize even if data-type not supported
4256 (e.g. - data copies). */
4257 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
4258 {
4259 if (vect_print_dump_info (REPORT_DETAILS))
4260 fprintf (vect_dump, "Aligned load, but unsupported type.");
4261 return false;
4262 }
4263
4264 /* Check if the load is a part of an interleaving chain. */
4265 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
4266 {
4267 strided_load = true;
4268 /* FORNOW */
4269 gcc_assert (! nested_in_vect_loop);
4270
4271 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
4272 if (!slp && !PURE_SLP_STMT (stmt_info))
4273 {
4274 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
4275 if (vect_load_lanes_supported (vectype, group_size))
4276 load_lanes_p = true;
4277 else if (!vect_strided_load_supported (vectype, group_size))
4278 return false;
4279 }
4280 }
4281
4282 if (negative)
4283 {
4284 gcc_assert (!strided_load);
4285 alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
4286 if (alignment_support_scheme != dr_aligned
4287 && alignment_support_scheme != dr_unaligned_supported)
4288 {
4289 if (vect_print_dump_info (REPORT_DETAILS))
4290 fprintf (vect_dump, "negative step but alignment required.");
4291 return false;
4292 }
4293 if (!perm_mask_for_reverse (vectype))
4294 {
4295 if (vect_print_dump_info (REPORT_DETAILS))
4296 fprintf (vect_dump, "negative step and reversing not supported.");
4297 return false;
4298 }
4299 }
4300
4301 if (!vec_stmt) /* transformation not required. */
4302 {
4303 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
4304 vect_model_load_cost (stmt_info, ncopies, load_lanes_p, NULL);
4305 return true;
4306 }
4307
4308 if (vect_print_dump_info (REPORT_DETAILS))
4309 fprintf (vect_dump, "transform load. ncopies = %d", ncopies);
4310
4311 /** Transform. **/
4312
4313 if (strided_load)
4314 {
4315 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
4316 if (slp
4317 && !SLP_INSTANCE_LOAD_PERMUTATION (slp_node_instance)
4318 && first_stmt != VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0))
4319 first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0);
4320
4321 /* Check if the chain of loads is already vectorized. */
4322 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt)))
4323 {
4324 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4325 return true;
4326 }
4327 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
4328 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
4329
4330 /* VEC_NUM is the number of vect stmts to be created for this group. */
4331 if (slp)
4332 {
4333 strided_load = false;
4334 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
4335 if (SLP_INSTANCE_LOAD_PERMUTATION (slp_node_instance))
4336 slp_perm = true;
4337 }
4338 else
4339 vec_num = group_size;
4340 }
4341 else
4342 {
4343 first_stmt = stmt;
4344 first_dr = dr;
4345 group_size = vec_num = 1;
4346 }
4347
4348 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
4349 gcc_assert (alignment_support_scheme);
4350 /* Targets with load-lane instructions must not require explicit
4351 realignment. */
4352 gcc_assert (!load_lanes_p
4353 || alignment_support_scheme == dr_aligned
4354 || alignment_support_scheme == dr_unaligned_supported);
4355
4356 /* In case the vectorization factor (VF) is bigger than the number
4357 of elements that we can fit in a vectype (nunits), we have to generate
4358 more than one vector stmt - i.e - we need to "unroll" the
4359 vector stmt by a factor VF/nunits. In doing so, we record a pointer
4360 from one copy of the vector stmt to the next, in the field
4361 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
4362 stages to find the correct vector defs to be used when vectorizing
4363 stmts that use the defs of the current stmt. The example below
4364 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
4365 need to create 4 vectorized stmts):
4366
4367 before vectorization:
4368 RELATED_STMT VEC_STMT
4369 S1: x = memref - -
4370 S2: z = x + 1 - -
4371
4372 step 1: vectorize stmt S1:
4373 We first create the vector stmt VS1_0, and, as usual, record a
4374 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
4375 Next, we create the vector stmt VS1_1, and record a pointer to
4376 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
4377 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
4378 stmts and pointers:
4379 RELATED_STMT VEC_STMT
4380 VS1_0: vx0 = memref0 VS1_1 -
4381 VS1_1: vx1 = memref1 VS1_2 -
4382 VS1_2: vx2 = memref2 VS1_3 -
4383 VS1_3: vx3 = memref3 - -
4384 S1: x = load - VS1_0
4385 S2: z = x + 1 - -
4386
4387 See in documentation in vect_get_vec_def_for_stmt_copy for how the
4388 information we recorded in RELATED_STMT field is used to vectorize
4389 stmt S2. */
4390
4391 /* In case of interleaving (non-unit strided access):
4392
4393 S1: x2 = &base + 2
4394 S2: x0 = &base
4395 S3: x1 = &base + 1
4396 S4: x3 = &base + 3
4397
4398 Vectorized loads are created in the order of memory accesses
4399 starting from the access of the first stmt of the chain:
4400
4401 VS1: vx0 = &base
4402 VS2: vx1 = &base + vec_size*1
4403 VS3: vx3 = &base + vec_size*2
4404 VS4: vx4 = &base + vec_size*3
4405
4406 Then permutation statements are generated:
4407
4408 VS5: vx5 = VEC_EXTRACT_EVEN_EXPR < vx0, vx1 >
4409 VS6: vx6 = VEC_EXTRACT_ODD_EXPR < vx0, vx1 >
4410 ...
4411
4412 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
4413 (the order of the data-refs in the output of vect_permute_load_chain
4414 corresponds to the order of scalar stmts in the interleaving chain - see
4415 the documentation of vect_permute_load_chain()).
4416 The generation of permutation stmts and recording them in
4417 STMT_VINFO_VEC_STMT is done in vect_transform_strided_load().
4418
4419 In case of both multiple types and interleaving, the vector loads and
4420 permutation stmts above are created for every copy. The result vector
4421 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
4422 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
4423
4424 /* If the data reference is aligned (dr_aligned) or potentially unaligned
4425 on a target that supports unaligned accesses (dr_unaligned_supported)
4426 we generate the following code:
4427 p = initial_addr;
4428 indx = 0;
4429 loop {
4430 p = p + indx * vectype_size;
4431 vec_dest = *(p);
4432 indx = indx + 1;
4433 }
4434
4435 Otherwise, the data reference is potentially unaligned on a target that
4436 does not support unaligned accesses (dr_explicit_realign_optimized) -
4437 then generate the following code, in which the data in each iteration is
4438 obtained by two vector loads, one from the previous iteration, and one
4439 from the current iteration:
4440 p1 = initial_addr;
4441 msq_init = *(floor(p1))
4442 p2 = initial_addr + VS - 1;
4443 realignment_token = call target_builtin;
4444 indx = 0;
4445 loop {
4446 p2 = p2 + indx * vectype_size
4447 lsq = *(floor(p2))
4448 vec_dest = realign_load (msq, lsq, realignment_token)
4449 indx = indx + 1;
4450 msq = lsq;
4451 } */
4452
4453 /* If the misalignment remains the same throughout the execution of the
4454 loop, we can create the init_addr and permutation mask at the loop
4455 preheader. Otherwise, it needs to be created inside the loop.
4456 This can only occur when vectorizing memory accesses in the inner-loop
4457 nested within an outer-loop that is being vectorized. */
4458
4459 if (loop && nested_in_vect_loop_p (loop, stmt)
4460 && (TREE_INT_CST_LOW (DR_STEP (dr))
4461 % GET_MODE_SIZE (TYPE_MODE (vectype)) != 0))
4462 {
4463 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
4464 compute_in_loop = true;
4465 }
4466
4467 if ((alignment_support_scheme == dr_explicit_realign_optimized
4468 || alignment_support_scheme == dr_explicit_realign)
4469 && !compute_in_loop)
4470 {
4471 msq = vect_setup_realignment (first_stmt, gsi, &realignment_token,
4472 alignment_support_scheme, NULL_TREE,
4473 &at_loop);
4474 if (alignment_support_scheme == dr_explicit_realign_optimized)
4475 {
4476 phi = SSA_NAME_DEF_STMT (msq);
4477 offset = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
4478 }
4479 }
4480 else
4481 at_loop = loop;
4482
4483 if (negative)
4484 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
4485
4486 if (load_lanes_p)
4487 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
4488 else
4489 aggr_type = vectype;
4490
4491 prev_stmt_info = NULL;
4492 for (j = 0; j < ncopies; j++)
4493 {
4494 /* 1. Create the vector or array pointer update chain. */
4495 if (j == 0)
4496 dataref_ptr = vect_create_data_ref_ptr (first_stmt, aggr_type, at_loop,
4497 offset, &dummy, gsi,
4498 &ptr_incr, false, &inv_p);
4499 else
4500 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
4501 TYPE_SIZE_UNIT (aggr_type));
4502
4503 if (strided_load || slp_perm)
4504 dr_chain = VEC_alloc (tree, heap, vec_num);
4505
4506 if (load_lanes_p)
4507 {
4508 tree vec_array;
4509
4510 vec_array = create_vector_array (vectype, vec_num);
4511
4512 /* Emit:
4513 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
4514 data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
4515 new_stmt = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
4516 gimple_call_set_lhs (new_stmt, vec_array);
4517 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4518 mark_symbols_for_renaming (new_stmt);
4519
4520 /* Extract each vector into an SSA_NAME. */
4521 for (i = 0; i < vec_num; i++)
4522 {
4523 new_temp = read_vector_array (stmt, gsi, scalar_dest,
4524 vec_array, i);
4525 VEC_quick_push (tree, dr_chain, new_temp);
4526 }
4527
4528 /* Record the mapping between SSA_NAMEs and statements. */
4529 vect_record_strided_load_vectors (stmt, dr_chain);
4530 }
4531 else
4532 {
4533 for (i = 0; i < vec_num; i++)
4534 {
4535 if (i > 0)
4536 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
4537 stmt, NULL_TREE);
4538
4539 /* 2. Create the vector-load in the loop. */
4540 switch (alignment_support_scheme)
4541 {
4542 case dr_aligned:
4543 case dr_unaligned_supported:
4544 {
4545 struct ptr_info_def *pi;
4546 data_ref
4547 = build2 (MEM_REF, vectype, dataref_ptr,
4548 build_int_cst (reference_alias_ptr_type
4549 (DR_REF (first_dr)), 0));
4550 pi = get_ptr_info (dataref_ptr);
4551 pi->align = TYPE_ALIGN_UNIT (vectype);
4552 if (alignment_support_scheme == dr_aligned)
4553 {
4554 gcc_assert (aligned_access_p (first_dr));
4555 pi->misalign = 0;
4556 }
4557 else if (DR_MISALIGNMENT (first_dr) == -1)
4558 {
4559 TREE_TYPE (data_ref)
4560 = build_aligned_type (TREE_TYPE (data_ref),
4561 TYPE_ALIGN (elem_type));
4562 pi->align = TYPE_ALIGN_UNIT (elem_type);
4563 pi->misalign = 0;
4564 }
4565 else
4566 {
4567 TREE_TYPE (data_ref)
4568 = build_aligned_type (TREE_TYPE (data_ref),
4569 TYPE_ALIGN (elem_type));
4570 pi->misalign = DR_MISALIGNMENT (first_dr);
4571 }
4572 break;
4573 }
4574 case dr_explicit_realign:
4575 {
4576 tree ptr, bump;
4577 tree vs_minus_1;
4578
4579 vs_minus_1 = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
4580
4581 if (compute_in_loop)
4582 msq = vect_setup_realignment (first_stmt, gsi,
4583 &realignment_token,
4584 dr_explicit_realign,
4585 dataref_ptr, NULL);
4586
4587 new_stmt = gimple_build_assign_with_ops
4588 (BIT_AND_EXPR, NULL_TREE, dataref_ptr,
4589 build_int_cst
4590 (TREE_TYPE (dataref_ptr),
4591 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
4592 ptr = make_ssa_name (SSA_NAME_VAR (dataref_ptr), new_stmt);
4593 gimple_assign_set_lhs (new_stmt, ptr);
4594 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4595 data_ref
4596 = build2 (MEM_REF, vectype, ptr,
4597 build_int_cst (reference_alias_ptr_type
4598 (DR_REF (first_dr)), 0));
4599 vec_dest = vect_create_destination_var (scalar_dest,
4600 vectype);
4601 new_stmt = gimple_build_assign (vec_dest, data_ref);
4602 new_temp = make_ssa_name (vec_dest, new_stmt);
4603 gimple_assign_set_lhs (new_stmt, new_temp);
4604 gimple_set_vdef (new_stmt, gimple_vdef (stmt));
4605 gimple_set_vuse (new_stmt, gimple_vuse (stmt));
4606 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4607 msq = new_temp;
4608
4609 bump = size_binop (MULT_EXPR, vs_minus_1,
4610 TYPE_SIZE_UNIT (elem_type));
4611 ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump);
4612 new_stmt = gimple_build_assign_with_ops
4613 (BIT_AND_EXPR, NULL_TREE, ptr,
4614 build_int_cst
4615 (TREE_TYPE (ptr),
4616 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
4617 ptr = make_ssa_name (SSA_NAME_VAR (dataref_ptr), new_stmt);
4618 gimple_assign_set_lhs (new_stmt, ptr);
4619 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4620 data_ref
4621 = build2 (MEM_REF, vectype, ptr,
4622 build_int_cst (reference_alias_ptr_type
4623 (DR_REF (first_dr)), 0));
4624 break;
4625 }
4626 case dr_explicit_realign_optimized:
4627 new_stmt = gimple_build_assign_with_ops
4628 (BIT_AND_EXPR, NULL_TREE, dataref_ptr,
4629 build_int_cst
4630 (TREE_TYPE (dataref_ptr),
4631 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
4632 new_temp = make_ssa_name (SSA_NAME_VAR (dataref_ptr),
4633 new_stmt);
4634 gimple_assign_set_lhs (new_stmt, new_temp);
4635 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4636 data_ref
4637 = build2 (MEM_REF, vectype, new_temp,
4638 build_int_cst (reference_alias_ptr_type
4639 (DR_REF (first_dr)), 0));
4640 break;
4641 default:
4642 gcc_unreachable ();
4643 }
4644 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4645 new_stmt = gimple_build_assign (vec_dest, data_ref);
4646 new_temp = make_ssa_name (vec_dest, new_stmt);
4647 gimple_assign_set_lhs (new_stmt, new_temp);
4648 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4649 mark_symbols_for_renaming (new_stmt);
4650
4651 /* 3. Handle explicit realignment if necessary/supported.
4652 Create in loop:
4653 vec_dest = realign_load (msq, lsq, realignment_token) */
4654 if (alignment_support_scheme == dr_explicit_realign_optimized
4655 || alignment_support_scheme == dr_explicit_realign)
4656 {
4657 lsq = gimple_assign_lhs (new_stmt);
4658 if (!realignment_token)
4659 realignment_token = dataref_ptr;
4660 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4661 new_stmt
4662 = gimple_build_assign_with_ops3 (REALIGN_LOAD_EXPR,
4663 vec_dest, msq, lsq,
4664 realignment_token);
4665 new_temp = make_ssa_name (vec_dest, new_stmt);
4666 gimple_assign_set_lhs (new_stmt, new_temp);
4667 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4668
4669 if (alignment_support_scheme == dr_explicit_realign_optimized)
4670 {
4671 gcc_assert (phi);
4672 if (i == vec_num - 1 && j == ncopies - 1)
4673 add_phi_arg (phi, lsq,
4674 loop_latch_edge (containing_loop),
4675 UNKNOWN_LOCATION);
4676 msq = lsq;
4677 }
4678 }
4679
4680 /* 4. Handle invariant-load. */
4681 if (inv_p && !bb_vinfo)
4682 {
4683 tree vec_inv;
4684 gimple_stmt_iterator gsi2 = *gsi;
4685 gcc_assert (!strided_load);
4686 gsi_next (&gsi2);
4687 vec_inv = build_vector_from_val (vectype, scalar_dest);
4688 new_temp = vect_init_vector (stmt, vec_inv,
4689 vectype, &gsi2);
4690 new_stmt = SSA_NAME_DEF_STMT (new_temp);
4691 }
4692
4693 if (negative)
4694 {
4695 new_temp = reverse_vec_elements (new_temp, stmt, gsi);
4696 new_stmt = SSA_NAME_DEF_STMT (new_temp);
4697 }
4698
4699 /* Collect vector loads and later create their permutation in
4700 vect_transform_strided_load (). */
4701 if (strided_load || slp_perm)
4702 VEC_quick_push (tree, dr_chain, new_temp);
4703
4704 /* Store vector loads in the corresponding SLP_NODE. */
4705 if (slp && !slp_perm)
4706 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node),
4707 new_stmt);
4708 }
4709 }
4710
4711 if (slp && !slp_perm)
4712 continue;
4713
4714 if (slp_perm)
4715 {
4716 if (!vect_transform_slp_perm_load (stmt, dr_chain, gsi, vf,
4717 slp_node_instance, false))
4718 {
4719 VEC_free (tree, heap, dr_chain);
4720 return false;
4721 }
4722 }
4723 else
4724 {
4725 if (strided_load)
4726 {
4727 if (!load_lanes_p)
4728 vect_transform_strided_load (stmt, dr_chain, group_size, gsi);
4729 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4730 }
4731 else
4732 {
4733 if (j == 0)
4734 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4735 else
4736 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4737 prev_stmt_info = vinfo_for_stmt (new_stmt);
4738 }
4739 }
4740 if (dr_chain)
4741 VEC_free (tree, heap, dr_chain);
4742 }
4743
4744 return true;
4745 }
4746
4747 /* Function vect_is_simple_cond.
4748
4749 Input:
4750 LOOP - the loop that is being vectorized.
4751 COND - Condition that is checked for simple use.
4752
4753 Output:
4754 *COMP_VECTYPE - the vector type for the comparison.
4755
4756 Returns whether a COND can be vectorized. Checks whether
4757 condition operands are supportable using vec_is_simple_use. */
4758
4759 static bool
4760 vect_is_simple_cond (tree cond, loop_vec_info loop_vinfo, tree *comp_vectype)
4761 {
4762 tree lhs, rhs;
4763 tree def;
4764 enum vect_def_type dt;
4765 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
4766
4767 if (!COMPARISON_CLASS_P (cond))
4768 return false;
4769
4770 lhs = TREE_OPERAND (cond, 0);
4771 rhs = TREE_OPERAND (cond, 1);
4772
4773 if (TREE_CODE (lhs) == SSA_NAME)
4774 {
4775 gimple lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
4776 if (!vect_is_simple_use_1 (lhs, loop_vinfo, NULL, &lhs_def_stmt, &def,
4777 &dt, &vectype1))
4778 return false;
4779 }
4780 else if (TREE_CODE (lhs) != INTEGER_CST && TREE_CODE (lhs) != REAL_CST
4781 && TREE_CODE (lhs) != FIXED_CST)
4782 return false;
4783
4784 if (TREE_CODE (rhs) == SSA_NAME)
4785 {
4786 gimple rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
4787 if (!vect_is_simple_use_1 (rhs, loop_vinfo, NULL, &rhs_def_stmt, &def,
4788 &dt, &vectype2))
4789 return false;
4790 }
4791 else if (TREE_CODE (rhs) != INTEGER_CST && TREE_CODE (rhs) != REAL_CST
4792 && TREE_CODE (rhs) != FIXED_CST)
4793 return false;
4794
4795 *comp_vectype = vectype1 ? vectype1 : vectype2;
4796 return true;
4797 }
4798
4799 /* vectorizable_condition.
4800
4801 Check if STMT is conditional modify expression that can be vectorized.
4802 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4803 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
4804 at GSI.
4805
4806 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
4807 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
4808 else caluse if it is 2).
4809
4810 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4811
4812 bool
4813 vectorizable_condition (gimple stmt, gimple_stmt_iterator *gsi,
4814 gimple *vec_stmt, tree reduc_def, int reduc_index)
4815 {
4816 tree scalar_dest = NULL_TREE;
4817 tree vec_dest = NULL_TREE;
4818 tree cond_expr, then_clause, else_clause;
4819 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4820 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4821 tree comp_vectype;
4822 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
4823 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
4824 tree vec_compare, vec_cond_expr;
4825 tree new_temp;
4826 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4827 tree def;
4828 enum vect_def_type dt, dts[4];
4829 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
4830 int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
4831 enum tree_code code;
4832 stmt_vec_info prev_stmt_info = NULL;
4833 int j;
4834
4835 /* FORNOW: unsupported in basic block SLP. */
4836 gcc_assert (loop_vinfo);
4837
4838 /* FORNOW: SLP not supported. */
4839 if (STMT_SLP_TYPE (stmt_info))
4840 return false;
4841
4842 gcc_assert (ncopies >= 1);
4843 if (reduc_index && ncopies > 1)
4844 return false; /* FORNOW */
4845
4846 if (!STMT_VINFO_RELEVANT_P (stmt_info))
4847 return false;
4848
4849 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4850 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
4851 && reduc_def))
4852 return false;
4853
4854 /* FORNOW: not yet supported. */
4855 if (STMT_VINFO_LIVE_P (stmt_info))
4856 {
4857 if (vect_print_dump_info (REPORT_DETAILS))
4858 fprintf (vect_dump, "value used after loop.");
4859 return false;
4860 }
4861
4862 /* Is vectorizable conditional operation? */
4863 if (!is_gimple_assign (stmt))
4864 return false;
4865
4866 code = gimple_assign_rhs_code (stmt);
4867
4868 if (code != COND_EXPR)
4869 return false;
4870
4871 cond_expr = gimple_assign_rhs1 (stmt);
4872 then_clause = gimple_assign_rhs2 (stmt);
4873 else_clause = gimple_assign_rhs3 (stmt);
4874
4875 if (!vect_is_simple_cond (cond_expr, loop_vinfo, &comp_vectype)
4876 || !comp_vectype)
4877 return false;
4878
4879 if (TREE_CODE (then_clause) == SSA_NAME)
4880 {
4881 gimple then_def_stmt = SSA_NAME_DEF_STMT (then_clause);
4882 if (!vect_is_simple_use (then_clause, loop_vinfo, NULL,
4883 &then_def_stmt, &def, &dt))
4884 return false;
4885 }
4886 else if (TREE_CODE (then_clause) != INTEGER_CST
4887 && TREE_CODE (then_clause) != REAL_CST
4888 && TREE_CODE (then_clause) != FIXED_CST)
4889 return false;
4890
4891 if (TREE_CODE (else_clause) == SSA_NAME)
4892 {
4893 gimple else_def_stmt = SSA_NAME_DEF_STMT (else_clause);
4894 if (!vect_is_simple_use (else_clause, loop_vinfo, NULL,
4895 &else_def_stmt, &def, &dt))
4896 return false;
4897 }
4898 else if (TREE_CODE (else_clause) != INTEGER_CST
4899 && TREE_CODE (else_clause) != REAL_CST
4900 && TREE_CODE (else_clause) != FIXED_CST)
4901 return false;
4902
4903 if (!vec_stmt)
4904 {
4905 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
4906 return expand_vec_cond_expr_p (vectype, comp_vectype);
4907 }
4908
4909 /* Transform */
4910
4911 /* Handle def. */
4912 scalar_dest = gimple_assign_lhs (stmt);
4913 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4914
4915 /* Handle cond expr. */
4916 for (j = 0; j < ncopies; j++)
4917 {
4918 gimple new_stmt;
4919 if (j == 0)
4920 {
4921 gimple gtemp;
4922 vec_cond_lhs =
4923 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0),
4924 stmt, NULL);
4925 vect_is_simple_use (TREE_OPERAND (cond_expr, 0), loop_vinfo,
4926 NULL, &gtemp, &def, &dts[0]);
4927 vec_cond_rhs =
4928 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1),
4929 stmt, NULL);
4930 vect_is_simple_use (TREE_OPERAND (cond_expr, 1), loop_vinfo,
4931 NULL, &gtemp, &def, &dts[1]);
4932 if (reduc_index == 1)
4933 vec_then_clause = reduc_def;
4934 else
4935 {
4936 vec_then_clause = vect_get_vec_def_for_operand (then_clause,
4937 stmt, NULL);
4938 vect_is_simple_use (then_clause, loop_vinfo,
4939 NULL, &gtemp, &def, &dts[2]);
4940 }
4941 if (reduc_index == 2)
4942 vec_else_clause = reduc_def;
4943 else
4944 {
4945 vec_else_clause = vect_get_vec_def_for_operand (else_clause,
4946 stmt, NULL);
4947 vect_is_simple_use (else_clause, loop_vinfo,
4948 NULL, &gtemp, &def, &dts[3]);
4949 }
4950 }
4951 else
4952 {
4953 vec_cond_lhs = vect_get_vec_def_for_stmt_copy (dts[0], vec_cond_lhs);
4954 vec_cond_rhs = vect_get_vec_def_for_stmt_copy (dts[1], vec_cond_rhs);
4955 vec_then_clause = vect_get_vec_def_for_stmt_copy (dts[2],
4956 vec_then_clause);
4957 vec_else_clause = vect_get_vec_def_for_stmt_copy (dts[3],
4958 vec_else_clause);
4959 }
4960
4961 /* Arguments are ready. Create the new vector stmt. */
4962 vec_compare = build2 (TREE_CODE (cond_expr), vectype,
4963 vec_cond_lhs, vec_cond_rhs);
4964 vec_cond_expr = build3 (VEC_COND_EXPR, vectype,
4965 vec_compare, vec_then_clause, vec_else_clause);
4966
4967 new_stmt = gimple_build_assign (vec_dest, vec_cond_expr);
4968 new_temp = make_ssa_name (vec_dest, new_stmt);
4969 gimple_assign_set_lhs (new_stmt, new_temp);
4970 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4971 if (j == 0)
4972 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4973 else
4974 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4975
4976 prev_stmt_info = vinfo_for_stmt (new_stmt);
4977 }
4978
4979 return true;
4980 }
4981
4982
4983 /* Make sure the statement is vectorizable. */
4984
4985 bool
4986 vect_analyze_stmt (gimple stmt, bool *need_to_vectorize, slp_tree node)
4987 {
4988 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4989 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4990 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
4991 bool ok;
4992 tree scalar_type, vectype;
4993 gimple pattern_stmt, pattern_def_stmt;
4994
4995 if (vect_print_dump_info (REPORT_DETAILS))
4996 {
4997 fprintf (vect_dump, "==> examining statement: ");
4998 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
4999 }
5000
5001 if (gimple_has_volatile_ops (stmt))
5002 {
5003 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
5004 fprintf (vect_dump, "not vectorized: stmt has volatile operands");
5005
5006 return false;
5007 }
5008
5009 /* Skip stmts that do not need to be vectorized. In loops this is expected
5010 to include:
5011 - the COND_EXPR which is the loop exit condition
5012 - any LABEL_EXPRs in the loop
5013 - computations that are used only for array indexing or loop control.
5014 In basic blocks we only analyze statements that are a part of some SLP
5015 instance, therefore, all the statements are relevant.
5016
5017 Pattern statement need to be analyzed instead of the original statement
5018 if the original statement is not relevant. Otherwise, we analyze both
5019 statements. */
5020
5021 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
5022 if (!STMT_VINFO_RELEVANT_P (stmt_info)
5023 && !STMT_VINFO_LIVE_P (stmt_info))
5024 {
5025 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
5026 && pattern_stmt
5027 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
5028 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
5029 {
5030 /* Analyze PATTERN_STMT instead of the original stmt. */
5031 stmt = pattern_stmt;
5032 stmt_info = vinfo_for_stmt (pattern_stmt);
5033 if (vect_print_dump_info (REPORT_DETAILS))
5034 {
5035 fprintf (vect_dump, "==> examining pattern statement: ");
5036 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
5037 }
5038 }
5039 else
5040 {
5041 if (vect_print_dump_info (REPORT_DETAILS))
5042 fprintf (vect_dump, "irrelevant.");
5043
5044 return true;
5045 }
5046 }
5047 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
5048 && pattern_stmt
5049 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
5050 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
5051 {
5052 /* Analyze PATTERN_STMT too. */
5053 if (vect_print_dump_info (REPORT_DETAILS))
5054 {
5055 fprintf (vect_dump, "==> examining pattern statement: ");
5056 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
5057 }
5058
5059 if (!vect_analyze_stmt (pattern_stmt, need_to_vectorize, node))
5060 return false;
5061 }
5062
5063 if (is_pattern_stmt_p (stmt_info)
5064 && (pattern_def_stmt = STMT_VINFO_PATTERN_DEF_STMT (stmt_info))
5065 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt))
5066 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt))))
5067 {
5068 /* Analyze def stmt of STMT if it's a pattern stmt. */
5069 if (vect_print_dump_info (REPORT_DETAILS))
5070 {
5071 fprintf (vect_dump, "==> examining pattern def statement: ");
5072 print_gimple_stmt (vect_dump, pattern_def_stmt, 0, TDF_SLIM);
5073 }
5074
5075 if (!vect_analyze_stmt (pattern_def_stmt, need_to_vectorize, node))
5076 return false;
5077 }
5078
5079
5080 switch (STMT_VINFO_DEF_TYPE (stmt_info))
5081 {
5082 case vect_internal_def:
5083 break;
5084
5085 case vect_reduction_def:
5086 case vect_nested_cycle:
5087 gcc_assert (!bb_vinfo && (relevance == vect_used_in_outer
5088 || relevance == vect_used_in_outer_by_reduction
5089 || relevance == vect_unused_in_scope));
5090 break;
5091
5092 case vect_induction_def:
5093 case vect_constant_def:
5094 case vect_external_def:
5095 case vect_unknown_def_type:
5096 default:
5097 gcc_unreachable ();
5098 }
5099
5100 if (bb_vinfo)
5101 {
5102 gcc_assert (PURE_SLP_STMT (stmt_info));
5103
5104 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
5105 if (vect_print_dump_info (REPORT_DETAILS))
5106 {
5107 fprintf (vect_dump, "get vectype for scalar type: ");
5108 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
5109 }
5110
5111 vectype = get_vectype_for_scalar_type (scalar_type);
5112 if (!vectype)
5113 {
5114 if (vect_print_dump_info (REPORT_DETAILS))
5115 {
5116 fprintf (vect_dump, "not SLPed: unsupported data-type ");
5117 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
5118 }
5119 return false;
5120 }
5121
5122 if (vect_print_dump_info (REPORT_DETAILS))
5123 {
5124 fprintf (vect_dump, "vectype: ");
5125 print_generic_expr (vect_dump, vectype, TDF_SLIM);
5126 }
5127
5128 STMT_VINFO_VECTYPE (stmt_info) = vectype;
5129 }
5130
5131 if (STMT_VINFO_RELEVANT_P (stmt_info))
5132 {
5133 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))));
5134 gcc_assert (STMT_VINFO_VECTYPE (stmt_info));
5135 *need_to_vectorize = true;
5136 }
5137
5138 ok = true;
5139 if (!bb_vinfo
5140 && (STMT_VINFO_RELEVANT_P (stmt_info)
5141 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
5142 ok = (vectorizable_type_promotion (stmt, NULL, NULL, NULL)
5143 || vectorizable_type_demotion (stmt, NULL, NULL, NULL)
5144 || vectorizable_conversion (stmt, NULL, NULL, NULL)
5145 || vectorizable_shift (stmt, NULL, NULL, NULL)
5146 || vectorizable_operation (stmt, NULL, NULL, NULL)
5147 || vectorizable_assignment (stmt, NULL, NULL, NULL)
5148 || vectorizable_load (stmt, NULL, NULL, NULL, NULL)
5149 || vectorizable_call (stmt, NULL, NULL)
5150 || vectorizable_store (stmt, NULL, NULL, NULL)
5151 || vectorizable_reduction (stmt, NULL, NULL, NULL)
5152 || vectorizable_condition (stmt, NULL, NULL, NULL, 0));
5153 else
5154 {
5155 if (bb_vinfo)
5156 ok = (vectorizable_type_promotion (stmt, NULL, NULL, node)
5157 || vectorizable_type_demotion (stmt, NULL, NULL, node)
5158 || vectorizable_shift (stmt, NULL, NULL, node)
5159 || vectorizable_operation (stmt, NULL, NULL, node)
5160 || vectorizable_assignment (stmt, NULL, NULL, node)
5161 || vectorizable_load (stmt, NULL, NULL, node, NULL)
5162 || vectorizable_store (stmt, NULL, NULL, node));
5163 }
5164
5165 if (!ok)
5166 {
5167 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
5168 {
5169 fprintf (vect_dump, "not vectorized: relevant stmt not ");
5170 fprintf (vect_dump, "supported: ");
5171 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
5172 }
5173
5174 return false;
5175 }
5176
5177 if (bb_vinfo)
5178 return true;
5179
5180 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
5181 need extra handling, except for vectorizable reductions. */
5182 if (STMT_VINFO_LIVE_P (stmt_info)
5183 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
5184 ok = vectorizable_live_operation (stmt, NULL, NULL);
5185
5186 if (!ok)
5187 {
5188 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
5189 {
5190 fprintf (vect_dump, "not vectorized: live stmt not ");
5191 fprintf (vect_dump, "supported: ");
5192 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
5193 }
5194
5195 return false;
5196 }
5197
5198 return true;
5199 }
5200
5201
5202 /* Function vect_transform_stmt.
5203
5204 Create a vectorized stmt to replace STMT, and insert it at BSI. */
5205
5206 bool
5207 vect_transform_stmt (gimple stmt, gimple_stmt_iterator *gsi,
5208 bool *strided_store, slp_tree slp_node,
5209 slp_instance slp_node_instance)
5210 {
5211 bool is_store = false;
5212 gimple vec_stmt = NULL;
5213 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5214 bool done;
5215
5216 switch (STMT_VINFO_TYPE (stmt_info))
5217 {
5218 case type_demotion_vec_info_type:
5219 done = vectorizable_type_demotion (stmt, gsi, &vec_stmt, slp_node);
5220 gcc_assert (done);
5221 break;
5222
5223 case type_promotion_vec_info_type:
5224 done = vectorizable_type_promotion (stmt, gsi, &vec_stmt, slp_node);
5225 gcc_assert (done);
5226 break;
5227
5228 case type_conversion_vec_info_type:
5229 done = vectorizable_conversion (stmt, gsi, &vec_stmt, slp_node);
5230 gcc_assert (done);
5231 break;
5232
5233 case induc_vec_info_type:
5234 gcc_assert (!slp_node);
5235 done = vectorizable_induction (stmt, gsi, &vec_stmt);
5236 gcc_assert (done);
5237 break;
5238
5239 case shift_vec_info_type:
5240 done = vectorizable_shift (stmt, gsi, &vec_stmt, slp_node);
5241 gcc_assert (done);
5242 break;
5243
5244 case op_vec_info_type:
5245 done = vectorizable_operation (stmt, gsi, &vec_stmt, slp_node);
5246 gcc_assert (done);
5247 break;
5248
5249 case assignment_vec_info_type:
5250 done = vectorizable_assignment (stmt, gsi, &vec_stmt, slp_node);
5251 gcc_assert (done);
5252 break;
5253
5254 case load_vec_info_type:
5255 done = vectorizable_load (stmt, gsi, &vec_stmt, slp_node,
5256 slp_node_instance);
5257 gcc_assert (done);
5258 break;
5259
5260 case store_vec_info_type:
5261 done = vectorizable_store (stmt, gsi, &vec_stmt, slp_node);
5262 gcc_assert (done);
5263 if (STMT_VINFO_STRIDED_ACCESS (stmt_info) && !slp_node)
5264 {
5265 /* In case of interleaving, the whole chain is vectorized when the
5266 last store in the chain is reached. Store stmts before the last
5267 one are skipped, and there vec_stmt_info shouldn't be freed
5268 meanwhile. */
5269 *strided_store = true;
5270 if (STMT_VINFO_VEC_STMT (stmt_info))
5271 is_store = true;
5272 }
5273 else
5274 is_store = true;
5275 break;
5276
5277 case condition_vec_info_type:
5278 gcc_assert (!slp_node);
5279 done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0);
5280 gcc_assert (done);
5281 break;
5282
5283 case call_vec_info_type:
5284 gcc_assert (!slp_node);
5285 done = vectorizable_call (stmt, gsi, &vec_stmt);
5286 stmt = gsi_stmt (*gsi);
5287 break;
5288
5289 case reduc_vec_info_type:
5290 done = vectorizable_reduction (stmt, gsi, &vec_stmt, slp_node);
5291 gcc_assert (done);
5292 break;
5293
5294 default:
5295 if (!STMT_VINFO_LIVE_P (stmt_info))
5296 {
5297 if (vect_print_dump_info (REPORT_DETAILS))
5298 fprintf (vect_dump, "stmt not supported.");
5299 gcc_unreachable ();
5300 }
5301 }
5302
5303 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
5304 is being vectorized, but outside the immediately enclosing loop. */
5305 if (vec_stmt
5306 && STMT_VINFO_LOOP_VINFO (stmt_info)
5307 && nested_in_vect_loop_p (LOOP_VINFO_LOOP (
5308 STMT_VINFO_LOOP_VINFO (stmt_info)), stmt)
5309 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
5310 && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
5311 || STMT_VINFO_RELEVANT (stmt_info) ==
5312 vect_used_in_outer_by_reduction))
5313 {
5314 struct loop *innerloop = LOOP_VINFO_LOOP (
5315 STMT_VINFO_LOOP_VINFO (stmt_info))->inner;
5316 imm_use_iterator imm_iter;
5317 use_operand_p use_p;
5318 tree scalar_dest;
5319 gimple exit_phi;
5320
5321 if (vect_print_dump_info (REPORT_DETAILS))
5322 fprintf (vect_dump, "Record the vdef for outer-loop vectorization.");
5323
5324 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
5325 (to be used when vectorizing outer-loop stmts that use the DEF of
5326 STMT). */
5327 if (gimple_code (stmt) == GIMPLE_PHI)
5328 scalar_dest = PHI_RESULT (stmt);
5329 else
5330 scalar_dest = gimple_assign_lhs (stmt);
5331
5332 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
5333 {
5334 if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
5335 {
5336 exit_phi = USE_STMT (use_p);
5337 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi)) = vec_stmt;
5338 }
5339 }
5340 }
5341
5342 /* Handle stmts whose DEF is used outside the loop-nest that is
5343 being vectorized. */
5344 if (STMT_VINFO_LIVE_P (stmt_info)
5345 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
5346 {
5347 done = vectorizable_live_operation (stmt, gsi, &vec_stmt);
5348 gcc_assert (done);
5349 }
5350
5351 if (vec_stmt)
5352 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
5353
5354 return is_store;
5355 }
5356
5357
5358 /* Remove a group of stores (for SLP or interleaving), free their
5359 stmt_vec_info. */
5360
5361 void
5362 vect_remove_stores (gimple first_stmt)
5363 {
5364 gimple next = first_stmt;
5365 gimple tmp;
5366 gimple_stmt_iterator next_si;
5367
5368 while (next)
5369 {
5370 /* Free the attached stmt_vec_info and remove the stmt. */
5371 next_si = gsi_for_stmt (next);
5372 gsi_remove (&next_si, true);
5373 tmp = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
5374 free_stmt_vec_info (next);
5375 next = tmp;
5376 }
5377 }
5378
5379
5380 /* Function new_stmt_vec_info.
5381
5382 Create and initialize a new stmt_vec_info struct for STMT. */
5383
5384 stmt_vec_info
5385 new_stmt_vec_info (gimple stmt, loop_vec_info loop_vinfo,
5386 bb_vec_info bb_vinfo)
5387 {
5388 stmt_vec_info res;
5389 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
5390
5391 STMT_VINFO_TYPE (res) = undef_vec_info_type;
5392 STMT_VINFO_STMT (res) = stmt;
5393 STMT_VINFO_LOOP_VINFO (res) = loop_vinfo;
5394 STMT_VINFO_BB_VINFO (res) = bb_vinfo;
5395 STMT_VINFO_RELEVANT (res) = vect_unused_in_scope;
5396 STMT_VINFO_LIVE_P (res) = false;
5397 STMT_VINFO_VECTYPE (res) = NULL;
5398 STMT_VINFO_VEC_STMT (res) = NULL;
5399 STMT_VINFO_VECTORIZABLE (res) = true;
5400 STMT_VINFO_IN_PATTERN_P (res) = false;
5401 STMT_VINFO_RELATED_STMT (res) = NULL;
5402 STMT_VINFO_PATTERN_DEF_STMT (res) = NULL;
5403 STMT_VINFO_DATA_REF (res) = NULL;
5404
5405 STMT_VINFO_DR_BASE_ADDRESS (res) = NULL;
5406 STMT_VINFO_DR_OFFSET (res) = NULL;
5407 STMT_VINFO_DR_INIT (res) = NULL;
5408 STMT_VINFO_DR_STEP (res) = NULL;
5409 STMT_VINFO_DR_ALIGNED_TO (res) = NULL;
5410
5411 if (gimple_code (stmt) == GIMPLE_PHI
5412 && is_loop_header_bb_p (gimple_bb (stmt)))
5413 STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
5414 else
5415 STMT_VINFO_DEF_TYPE (res) = vect_internal_def;
5416
5417 STMT_VINFO_SAME_ALIGN_REFS (res) = VEC_alloc (dr_p, heap, 5);
5418 STMT_VINFO_INSIDE_OF_LOOP_COST (res) = 0;
5419 STMT_VINFO_OUTSIDE_OF_LOOP_COST (res) = 0;
5420 STMT_SLP_TYPE (res) = loop_vect;
5421 GROUP_FIRST_ELEMENT (res) = NULL;
5422 GROUP_NEXT_ELEMENT (res) = NULL;
5423 GROUP_SIZE (res) = 0;
5424 GROUP_STORE_COUNT (res) = 0;
5425 GROUP_GAP (res) = 0;
5426 GROUP_SAME_DR_STMT (res) = NULL;
5427 GROUP_READ_WRITE_DEPENDENCE (res) = false;
5428
5429 return res;
5430 }
5431
5432
5433 /* Create a hash table for stmt_vec_info. */
5434
5435 void
5436 init_stmt_vec_info_vec (void)
5437 {
5438 gcc_assert (!stmt_vec_info_vec);
5439 stmt_vec_info_vec = VEC_alloc (vec_void_p, heap, 50);
5440 }
5441
5442
5443 /* Free hash table for stmt_vec_info. */
5444
5445 void
5446 free_stmt_vec_info_vec (void)
5447 {
5448 gcc_assert (stmt_vec_info_vec);
5449 VEC_free (vec_void_p, heap, stmt_vec_info_vec);
5450 }
5451
5452
5453 /* Free stmt vectorization related info. */
5454
5455 void
5456 free_stmt_vec_info (gimple stmt)
5457 {
5458 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5459
5460 if (!stmt_info)
5461 return;
5462
5463 VEC_free (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmt_info));
5464 set_vinfo_for_stmt (stmt, NULL);
5465 free (stmt_info);
5466 }
5467
5468
5469 /* Function get_vectype_for_scalar_type_and_size.
5470
5471 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
5472 by the target. */
5473
5474 static tree
5475 get_vectype_for_scalar_type_and_size (tree scalar_type, unsigned size)
5476 {
5477 enum machine_mode inner_mode = TYPE_MODE (scalar_type);
5478 enum machine_mode simd_mode;
5479 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
5480 int nunits;
5481 tree vectype;
5482
5483 if (nbytes == 0)
5484 return NULL_TREE;
5485
5486 /* We can't build a vector type of elements with alignment bigger than
5487 their size. */
5488 if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
5489 return NULL_TREE;
5490
5491 /* For vector types of elements whose mode precision doesn't
5492 match their types precision we use a element type of mode
5493 precision. The vectorization routines will have to make sure
5494 they support the proper result truncation/extension. */
5495 if (INTEGRAL_TYPE_P (scalar_type)
5496 && GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type))
5497 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
5498 TYPE_UNSIGNED (scalar_type));
5499
5500 if (GET_MODE_CLASS (inner_mode) != MODE_INT
5501 && GET_MODE_CLASS (inner_mode) != MODE_FLOAT)
5502 return NULL_TREE;
5503
5504 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
5505 When the component mode passes the above test simply use a type
5506 corresponding to that mode. The theory is that any use that
5507 would cause problems with this will disable vectorization anyway. */
5508 if (!SCALAR_FLOAT_TYPE_P (scalar_type)
5509 && !INTEGRAL_TYPE_P (scalar_type)
5510 && !POINTER_TYPE_P (scalar_type))
5511 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
5512
5513 /* If no size was supplied use the mode the target prefers. Otherwise
5514 lookup a vector mode of the specified size. */
5515 if (size == 0)
5516 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
5517 else
5518 simd_mode = mode_for_vector (inner_mode, size / nbytes);
5519 nunits = GET_MODE_SIZE (simd_mode) / nbytes;
5520 if (nunits <= 1)
5521 return NULL_TREE;
5522
5523 vectype = build_vector_type (scalar_type, nunits);
5524 if (vect_print_dump_info (REPORT_DETAILS))
5525 {
5526 fprintf (vect_dump, "get vectype with %d units of type ", nunits);
5527 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
5528 }
5529
5530 if (!vectype)
5531 return NULL_TREE;
5532
5533 if (vect_print_dump_info (REPORT_DETAILS))
5534 {
5535 fprintf (vect_dump, "vectype: ");
5536 print_generic_expr (vect_dump, vectype, TDF_SLIM);
5537 }
5538
5539 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
5540 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
5541 {
5542 if (vect_print_dump_info (REPORT_DETAILS))
5543 fprintf (vect_dump, "mode not supported by target.");
5544 return NULL_TREE;
5545 }
5546
5547 return vectype;
5548 }
5549
5550 unsigned int current_vector_size;
5551
5552 /* Function get_vectype_for_scalar_type.
5553
5554 Returns the vector type corresponding to SCALAR_TYPE as supported
5555 by the target. */
5556
5557 tree
5558 get_vectype_for_scalar_type (tree scalar_type)
5559 {
5560 tree vectype;
5561 vectype = get_vectype_for_scalar_type_and_size (scalar_type,
5562 current_vector_size);
5563 if (vectype
5564 && current_vector_size == 0)
5565 current_vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
5566 return vectype;
5567 }
5568
5569 /* Function get_same_sized_vectype
5570
5571 Returns a vector type corresponding to SCALAR_TYPE of size
5572 VECTOR_TYPE if supported by the target. */
5573
5574 tree
5575 get_same_sized_vectype (tree scalar_type, tree vector_type)
5576 {
5577 return get_vectype_for_scalar_type_and_size
5578 (scalar_type, GET_MODE_SIZE (TYPE_MODE (vector_type)));
5579 }
5580
5581 /* Function vect_is_simple_use.
5582
5583 Input:
5584 LOOP_VINFO - the vect info of the loop that is being vectorized.
5585 BB_VINFO - the vect info of the basic block that is being vectorized.
5586 OPERAND - operand of a stmt in the loop or bb.
5587 DEF - the defining stmt in case OPERAND is an SSA_NAME.
5588
5589 Returns whether a stmt with OPERAND can be vectorized.
5590 For loops, supportable operands are constants, loop invariants, and operands
5591 that are defined by the current iteration of the loop. Unsupportable
5592 operands are those that are defined by a previous iteration of the loop (as
5593 is the case in reduction/induction computations).
5594 For basic blocks, supportable operands are constants and bb invariants.
5595 For now, operands defined outside the basic block are not supported. */
5596
5597 bool
5598 vect_is_simple_use (tree operand, loop_vec_info loop_vinfo,
5599 bb_vec_info bb_vinfo, gimple *def_stmt,
5600 tree *def, enum vect_def_type *dt)
5601 {
5602 basic_block bb;
5603 stmt_vec_info stmt_vinfo;
5604 struct loop *loop = NULL;
5605
5606 if (loop_vinfo)
5607 loop = LOOP_VINFO_LOOP (loop_vinfo);
5608
5609 *def_stmt = NULL;
5610 *def = NULL_TREE;
5611
5612 if (vect_print_dump_info (REPORT_DETAILS))
5613 {
5614 fprintf (vect_dump, "vect_is_simple_use: operand ");
5615 print_generic_expr (vect_dump, operand, TDF_SLIM);
5616 }
5617
5618 if (TREE_CODE (operand) == INTEGER_CST || TREE_CODE (operand) == REAL_CST)
5619 {
5620 *dt = vect_constant_def;
5621 return true;
5622 }
5623
5624 if (is_gimple_min_invariant (operand))
5625 {
5626 *def = operand;
5627 *dt = vect_external_def;
5628 return true;
5629 }
5630
5631 if (TREE_CODE (operand) == PAREN_EXPR)
5632 {
5633 if (vect_print_dump_info (REPORT_DETAILS))
5634 fprintf (vect_dump, "non-associatable copy.");
5635 operand = TREE_OPERAND (operand, 0);
5636 }
5637
5638 if (TREE_CODE (operand) != SSA_NAME)
5639 {
5640 if (vect_print_dump_info (REPORT_DETAILS))
5641 fprintf (vect_dump, "not ssa-name.");
5642 return false;
5643 }
5644
5645 *def_stmt = SSA_NAME_DEF_STMT (operand);
5646 if (*def_stmt == NULL)
5647 {
5648 if (vect_print_dump_info (REPORT_DETAILS))
5649 fprintf (vect_dump, "no def_stmt.");
5650 return false;
5651 }
5652
5653 if (vect_print_dump_info (REPORT_DETAILS))
5654 {
5655 fprintf (vect_dump, "def_stmt: ");
5656 print_gimple_stmt (vect_dump, *def_stmt, 0, TDF_SLIM);
5657 }
5658
5659 /* Empty stmt is expected only in case of a function argument.
5660 (Otherwise - we expect a phi_node or a GIMPLE_ASSIGN). */
5661 if (gimple_nop_p (*def_stmt))
5662 {
5663 *def = operand;
5664 *dt = vect_external_def;
5665 return true;
5666 }
5667
5668 bb = gimple_bb (*def_stmt);
5669
5670 if ((loop && !flow_bb_inside_loop_p (loop, bb))
5671 || (!loop && bb != BB_VINFO_BB (bb_vinfo))
5672 || (!loop && gimple_code (*def_stmt) == GIMPLE_PHI))
5673 *dt = vect_external_def;
5674 else
5675 {
5676 stmt_vinfo = vinfo_for_stmt (*def_stmt);
5677 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
5678 }
5679
5680 if (*dt == vect_unknown_def_type)
5681 {
5682 if (vect_print_dump_info (REPORT_DETAILS))
5683 fprintf (vect_dump, "Unsupported pattern.");
5684 return false;
5685 }
5686
5687 if (vect_print_dump_info (REPORT_DETAILS))
5688 fprintf (vect_dump, "type of def: %d.",*dt);
5689
5690 switch (gimple_code (*def_stmt))
5691 {
5692 case GIMPLE_PHI:
5693 *def = gimple_phi_result (*def_stmt);
5694 break;
5695
5696 case GIMPLE_ASSIGN:
5697 *def = gimple_assign_lhs (*def_stmt);
5698 break;
5699
5700 case GIMPLE_CALL:
5701 *def = gimple_call_lhs (*def_stmt);
5702 if (*def != NULL)
5703 break;
5704 /* FALLTHRU */
5705 default:
5706 if (vect_print_dump_info (REPORT_DETAILS))
5707 fprintf (vect_dump, "unsupported defining stmt: ");
5708 return false;
5709 }
5710
5711 return true;
5712 }
5713
5714 /* Function vect_is_simple_use_1.
5715
5716 Same as vect_is_simple_use_1 but also determines the vector operand
5717 type of OPERAND and stores it to *VECTYPE. If the definition of
5718 OPERAND is vect_uninitialized_def, vect_constant_def or
5719 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
5720 is responsible to compute the best suited vector type for the
5721 scalar operand. */
5722
5723 bool
5724 vect_is_simple_use_1 (tree operand, loop_vec_info loop_vinfo,
5725 bb_vec_info bb_vinfo, gimple *def_stmt,
5726 tree *def, enum vect_def_type *dt, tree *vectype)
5727 {
5728 if (!vect_is_simple_use (operand, loop_vinfo, bb_vinfo, def_stmt, def, dt))
5729 return false;
5730
5731 /* Now get a vector type if the def is internal, otherwise supply
5732 NULL_TREE and leave it up to the caller to figure out a proper
5733 type for the use stmt. */
5734 if (*dt == vect_internal_def
5735 || *dt == vect_induction_def
5736 || *dt == vect_reduction_def
5737 || *dt == vect_double_reduction_def
5738 || *dt == vect_nested_cycle)
5739 {
5740 stmt_vec_info stmt_info = vinfo_for_stmt (*def_stmt);
5741
5742 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
5743 && !STMT_VINFO_RELEVANT (stmt_info)
5744 && !STMT_VINFO_LIVE_P (stmt_info))
5745 stmt_info = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
5746
5747 *vectype = STMT_VINFO_VECTYPE (stmt_info);
5748 gcc_assert (*vectype != NULL_TREE);
5749 }
5750 else if (*dt == vect_uninitialized_def
5751 || *dt == vect_constant_def
5752 || *dt == vect_external_def)
5753 *vectype = NULL_TREE;
5754 else
5755 gcc_unreachable ();
5756
5757 return true;
5758 }
5759
5760
5761 /* Function supportable_widening_operation
5762
5763 Check whether an operation represented by the code CODE is a
5764 widening operation that is supported by the target platform in
5765 vector form (i.e., when operating on arguments of type VECTYPE_IN
5766 producing a result of type VECTYPE_OUT).
5767
5768 Widening operations we currently support are NOP (CONVERT), FLOAT
5769 and WIDEN_MULT. This function checks if these operations are supported
5770 by the target platform either directly (via vector tree-codes), or via
5771 target builtins.
5772
5773 Output:
5774 - CODE1 and CODE2 are codes of vector operations to be used when
5775 vectorizing the operation, if available.
5776 - DECL1 and DECL2 are decls of target builtin functions to be used
5777 when vectorizing the operation, if available. In this case,
5778 CODE1 and CODE2 are CALL_EXPR.
5779 - MULTI_STEP_CVT determines the number of required intermediate steps in
5780 case of multi-step conversion (like char->short->int - in that case
5781 MULTI_STEP_CVT will be 1).
5782 - INTERM_TYPES contains the intermediate type required to perform the
5783 widening operation (short in the above example). */
5784
5785 bool
5786 supportable_widening_operation (enum tree_code code, gimple stmt,
5787 tree vectype_out, tree vectype_in,
5788 tree *decl1, tree *decl2,
5789 enum tree_code *code1, enum tree_code *code2,
5790 int *multi_step_cvt,
5791 VEC (tree, heap) **interm_types)
5792 {
5793 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5794 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
5795 struct loop *vect_loop = NULL;
5796 bool ordered_p;
5797 enum machine_mode vec_mode;
5798 enum insn_code icode1, icode2;
5799 optab optab1, optab2;
5800 tree vectype = vectype_in;
5801 tree wide_vectype = vectype_out;
5802 enum tree_code c1, c2;
5803
5804 if (loop_info)
5805 vect_loop = LOOP_VINFO_LOOP (loop_info);
5806
5807 /* The result of a vectorized widening operation usually requires two vectors
5808 (because the widened results do not fit int one vector). The generated
5809 vector results would normally be expected to be generated in the same
5810 order as in the original scalar computation, i.e. if 8 results are
5811 generated in each vector iteration, they are to be organized as follows:
5812 vect1: [res1,res2,res3,res4], vect2: [res5,res6,res7,res8].
5813
5814 However, in the special case that the result of the widening operation is
5815 used in a reduction computation only, the order doesn't matter (because
5816 when vectorizing a reduction we change the order of the computation).
5817 Some targets can take advantage of this and generate more efficient code.
5818 For example, targets like Altivec, that support widen_mult using a sequence
5819 of {mult_even,mult_odd} generate the following vectors:
5820 vect1: [res1,res3,res5,res7], vect2: [res2,res4,res6,res8].
5821
5822 When vectorizing outer-loops, we execute the inner-loop sequentially
5823 (each vectorized inner-loop iteration contributes to VF outer-loop
5824 iterations in parallel). We therefore don't allow to change the order
5825 of the computation in the inner-loop during outer-loop vectorization. */
5826
5827 if (vect_loop
5828 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
5829 && !nested_in_vect_loop_p (vect_loop, stmt))
5830 ordered_p = false;
5831 else
5832 ordered_p = true;
5833
5834 if (!ordered_p
5835 && code == WIDEN_MULT_EXPR
5836 && targetm.vectorize.builtin_mul_widen_even
5837 && targetm.vectorize.builtin_mul_widen_even (vectype)
5838 && targetm.vectorize.builtin_mul_widen_odd
5839 && targetm.vectorize.builtin_mul_widen_odd (vectype))
5840 {
5841 if (vect_print_dump_info (REPORT_DETAILS))
5842 fprintf (vect_dump, "Unordered widening operation detected.");
5843
5844 *code1 = *code2 = CALL_EXPR;
5845 *decl1 = targetm.vectorize.builtin_mul_widen_even (vectype);
5846 *decl2 = targetm.vectorize.builtin_mul_widen_odd (vectype);
5847 return true;
5848 }
5849
5850 switch (code)
5851 {
5852 case WIDEN_MULT_EXPR:
5853 if (BYTES_BIG_ENDIAN)
5854 {
5855 c1 = VEC_WIDEN_MULT_HI_EXPR;
5856 c2 = VEC_WIDEN_MULT_LO_EXPR;
5857 }
5858 else
5859 {
5860 c2 = VEC_WIDEN_MULT_HI_EXPR;
5861 c1 = VEC_WIDEN_MULT_LO_EXPR;
5862 }
5863 break;
5864
5865 case WIDEN_LSHIFT_EXPR:
5866 if (BYTES_BIG_ENDIAN)
5867 {
5868 c1 = VEC_WIDEN_LSHIFT_HI_EXPR;
5869 c2 = VEC_WIDEN_LSHIFT_LO_EXPR;
5870 }
5871 else
5872 {
5873 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
5874 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
5875 }
5876 break;
5877
5878 CASE_CONVERT:
5879 if (BYTES_BIG_ENDIAN)
5880 {
5881 c1 = VEC_UNPACK_HI_EXPR;
5882 c2 = VEC_UNPACK_LO_EXPR;
5883 }
5884 else
5885 {
5886 c2 = VEC_UNPACK_HI_EXPR;
5887 c1 = VEC_UNPACK_LO_EXPR;
5888 }
5889 break;
5890
5891 case FLOAT_EXPR:
5892 if (BYTES_BIG_ENDIAN)
5893 {
5894 c1 = VEC_UNPACK_FLOAT_HI_EXPR;
5895 c2 = VEC_UNPACK_FLOAT_LO_EXPR;
5896 }
5897 else
5898 {
5899 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
5900 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
5901 }
5902 break;
5903
5904 case FIX_TRUNC_EXPR:
5905 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
5906 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
5907 computing the operation. */
5908 return false;
5909
5910 default:
5911 gcc_unreachable ();
5912 }
5913
5914 if (code == FIX_TRUNC_EXPR)
5915 {
5916 /* The signedness is determined from output operand. */
5917 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
5918 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
5919 }
5920 else
5921 {
5922 optab1 = optab_for_tree_code (c1, vectype, optab_default);
5923 optab2 = optab_for_tree_code (c2, vectype, optab_default);
5924 }
5925
5926 if (!optab1 || !optab2)
5927 return false;
5928
5929 vec_mode = TYPE_MODE (vectype);
5930 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
5931 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
5932 return false;
5933
5934 /* Check if it's a multi-step conversion that can be done using intermediate
5935 types. */
5936 if (insn_data[icode1].operand[0].mode != TYPE_MODE (wide_vectype)
5937 || insn_data[icode2].operand[0].mode != TYPE_MODE (wide_vectype))
5938 {
5939 int i;
5940 tree prev_type = vectype, intermediate_type;
5941 enum machine_mode intermediate_mode, prev_mode = vec_mode;
5942 optab optab3, optab4;
5943
5944 if (!CONVERT_EXPR_CODE_P (code))
5945 return false;
5946
5947 *code1 = c1;
5948 *code2 = c2;
5949
5950 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
5951 intermediate steps in promotion sequence. We try
5952 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
5953 not. */
5954 *interm_types = VEC_alloc (tree, heap, MAX_INTERM_CVT_STEPS);
5955 for (i = 0; i < 3; i++)
5956 {
5957 intermediate_mode = insn_data[icode1].operand[0].mode;
5958 intermediate_type = lang_hooks.types.type_for_mode (intermediate_mode,
5959 TYPE_UNSIGNED (prev_type));
5960 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
5961 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
5962
5963 if (!optab3 || !optab4
5964 || ((icode1 = optab_handler (optab1, prev_mode))
5965 == CODE_FOR_nothing)
5966 || insn_data[icode1].operand[0].mode != intermediate_mode
5967 || ((icode2 = optab_handler (optab2, prev_mode))
5968 == CODE_FOR_nothing)
5969 || insn_data[icode2].operand[0].mode != intermediate_mode
5970 || ((icode1 = optab_handler (optab3, intermediate_mode))
5971 == CODE_FOR_nothing)
5972 || ((icode2 = optab_handler (optab4, intermediate_mode))
5973 == CODE_FOR_nothing))
5974 return false;
5975
5976 VEC_quick_push (tree, *interm_types, intermediate_type);
5977 (*multi_step_cvt)++;
5978
5979 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
5980 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
5981 return true;
5982
5983 prev_type = intermediate_type;
5984 prev_mode = intermediate_mode;
5985 }
5986
5987 return false;
5988 }
5989
5990 *code1 = c1;
5991 *code2 = c2;
5992 return true;
5993 }
5994
5995
5996 /* Function supportable_narrowing_operation
5997
5998 Check whether an operation represented by the code CODE is a
5999 narrowing operation that is supported by the target platform in
6000 vector form (i.e., when operating on arguments of type VECTYPE_IN
6001 and producing a result of type VECTYPE_OUT).
6002
6003 Narrowing operations we currently support are NOP (CONVERT) and
6004 FIX_TRUNC. This function checks if these operations are supported by
6005 the target platform directly via vector tree-codes.
6006
6007 Output:
6008 - CODE1 is the code of a vector operation to be used when
6009 vectorizing the operation, if available.
6010 - MULTI_STEP_CVT determines the number of required intermediate steps in
6011 case of multi-step conversion (like int->short->char - in that case
6012 MULTI_STEP_CVT will be 1).
6013 - INTERM_TYPES contains the intermediate type required to perform the
6014 narrowing operation (short in the above example). */
6015
6016 bool
6017 supportable_narrowing_operation (enum tree_code code,
6018 tree vectype_out, tree vectype_in,
6019 enum tree_code *code1, int *multi_step_cvt,
6020 VEC (tree, heap) **interm_types)
6021 {
6022 enum machine_mode vec_mode;
6023 enum insn_code icode1;
6024 optab optab1, interm_optab;
6025 tree vectype = vectype_in;
6026 tree narrow_vectype = vectype_out;
6027 enum tree_code c1;
6028 tree intermediate_type, prev_type;
6029 int i;
6030
6031 switch (code)
6032 {
6033 CASE_CONVERT:
6034 c1 = VEC_PACK_TRUNC_EXPR;
6035 break;
6036
6037 case FIX_TRUNC_EXPR:
6038 c1 = VEC_PACK_FIX_TRUNC_EXPR;
6039 break;
6040
6041 case FLOAT_EXPR:
6042 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
6043 tree code and optabs used for computing the operation. */
6044 return false;
6045
6046 default:
6047 gcc_unreachable ();
6048 }
6049
6050 if (code == FIX_TRUNC_EXPR)
6051 /* The signedness is determined from output operand. */
6052 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
6053 else
6054 optab1 = optab_for_tree_code (c1, vectype, optab_default);
6055
6056 if (!optab1)
6057 return false;
6058
6059 vec_mode = TYPE_MODE (vectype);
6060 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
6061 return false;
6062
6063 /* Check if it's a multi-step conversion that can be done using intermediate
6064 types. */
6065 if (insn_data[icode1].operand[0].mode != TYPE_MODE (narrow_vectype))
6066 {
6067 enum machine_mode intermediate_mode, prev_mode = vec_mode;
6068
6069 *code1 = c1;
6070 prev_type = vectype;
6071 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
6072 intermediate steps in promotion sequence. We try
6073 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
6074 not. */
6075 *interm_types = VEC_alloc (tree, heap, MAX_INTERM_CVT_STEPS);
6076 for (i = 0; i < 3; i++)
6077 {
6078 intermediate_mode = insn_data[icode1].operand[0].mode;
6079 intermediate_type = lang_hooks.types.type_for_mode (intermediate_mode,
6080 TYPE_UNSIGNED (prev_type));
6081 interm_optab = optab_for_tree_code (c1, intermediate_type,
6082 optab_default);
6083 if (!interm_optab
6084 || ((icode1 = optab_handler (optab1, prev_mode))
6085 == CODE_FOR_nothing)
6086 || insn_data[icode1].operand[0].mode != intermediate_mode
6087 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
6088 == CODE_FOR_nothing))
6089 return false;
6090
6091 VEC_quick_push (tree, *interm_types, intermediate_type);
6092 (*multi_step_cvt)++;
6093
6094 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
6095 return true;
6096
6097 prev_type = intermediate_type;
6098 prev_mode = intermediate_mode;
6099 }
6100
6101 return false;
6102 }
6103
6104 *code1 = c1;
6105 return true;
6106 }